From 10858a05bea53e0f46d59a0444028a73ce4965a9 Mon Sep 17 00:00:00 2001 From: "Curtis C. Ober" Date: Mon, 22 Jul 2024 13:03:59 -0600 Subject: [PATCH 01/37] Zoltan: Update License and Copyright Update the License and Copyright files to * Reflect NTESS and new contract * Utilize SPDX identifiers within files * Make copyright and license files consistent across Trilinos packages (i.e., LICENSE and COPYRIGHT) Signed-off-by: Curtis C. Ober --- packages/zoltan/COPYRIGHT | 10 ++ packages/zoltan/COPYRIGHT_AND_LICENSE | 45 --------- packages/zoltan/LICENSE | 31 ++++++ packages/zoltan/Makefile.am | 44 --------- packages/zoltan/README | 94 ------------------- packages/zoltan/README.developer | 43 --------- packages/zoltan/README.md | 50 +++++++--- packages/zoltan/configure.ac | 43 --------- packages/zoltan/docs/dev_html/zdrive.inp | 43 --------- packages/zoltan/docs/tu_html/README | 43 --------- packages/zoltan/example/C/Makefile.am | 43 --------- packages/zoltan/example/C/coloring/zcol.c | 53 ++--------- .../zoltan/example/C/coloring/zcoldriver.c | 53 ++--------- packages/zoltan/example/C/migrateGRAPH.c | 53 ++--------- packages/zoltan/example/C/problemGRAPH.c | 53 ++--------- packages/zoltan/example/C/simpleBLOCK.c | 53 ++--------- packages/zoltan/example/C/simpleGRAPH.c | 53 ++--------- packages/zoltan/example/C/simpleHIER.c | 53 ++--------- packages/zoltan/example/C/simplePHG.c | 53 ++--------- packages/zoltan/example/C/simpleRCB.c | 53 ++--------- packages/zoltan/example/CPP/Makefile.am | 43 --------- packages/zoltan/example/CPP/exampleBLOCK.cpp | 53 ++--------- packages/zoltan/example/Fortran/mpi_h.f | 45 +-------- packages/zoltan/example/Fortran/simpleRCB.f90 | 45 +-------- .../zoltan/example/Fortran/zoltanRCBmod.f90 | 45 +-------- packages/zoltan/example/README | 44 --------- packages/zoltan/src/Makefile.am | 43 --------- .../zoltan/src/Utilities/Communication/README | 43 --------- .../zoltan/src/Utilities/Communication/comm.h | 53 ++--------- .../src/Utilities/Communication/comm_create.c | 53 ++--------- .../Utilities/Communication/comm_default.c | 53 ++--------- .../Utilities/Communication/comm_destroy.c | 53 ++--------- .../src/Utilities/Communication/comm_do.c | 53 ++--------- .../Utilities/Communication/comm_do_reverse.c | 53 ++--------- .../Communication/comm_exchange_sizes.c | 53 ++--------- .../src/Utilities/Communication/comm_info.c | 53 ++--------- .../Utilities/Communication/comm_invert_map.c | 53 ++--------- .../Communication/comm_invert_plan.c | 53 ++--------- .../src/Utilities/Communication/comm_resize.c | 53 ++--------- .../Utilities/Communication/comm_sort_ints.c | 53 ++--------- .../src/Utilities/DDirectory/DD_Create.c | 53 ++--------- .../src/Utilities/DDirectory/DD_Destroy.c | 53 ++--------- .../zoltan/src/Utilities/DDirectory/DD_Find.c | 53 ++--------- .../src/Utilities/DDirectory/DD_Hash2.c | 53 ++--------- .../src/Utilities/DDirectory/DD_Memory.c | 53 ++--------- .../src/Utilities/DDirectory/DD_Memory.h | 53 ++--------- .../src/Utilities/DDirectory/DD_Print.c | 53 ++--------- .../src/Utilities/DDirectory/DD_Remove.c | 53 ++--------- .../src/Utilities/DDirectory/DD_Set_Hash_Fn.c | 53 ++--------- .../DDirectory/DD_Set_Neighbor_Hash_Fn1.c | 53 ++--------- .../DDirectory/DD_Set_Neighbor_Hash_Fn2.c | 53 ++--------- .../DDirectory/DD_Set_Neighbor_Hash_Fn3.c | 53 ++--------- .../src/Utilities/DDirectory/DD_Stats.c | 53 ++--------- .../src/Utilities/DDirectory/DD_Update.c | 53 ++--------- .../zoltan/src/Utilities/DDirectory/README | 43 --------- .../Utilities/DDirectory/zoltan_dd_const.h | 53 ++--------- packages/zoltan/src/Utilities/Memory/README | 43 --------- packages/zoltan/src/Utilities/Memory/mem.c | 53 ++--------- packages/zoltan/src/Utilities/README | 43 --------- packages/zoltan/src/Utilities/Timer/README | 43 --------- packages/zoltan/src/Utilities/Timer/timer.c | 53 ++--------- packages/zoltan/src/Utilities/Timer/timer.h | 53 ++--------- .../zoltan/src/Utilities/Timer/zoltan_timer.c | 53 ++--------- .../src/Utilities/shared/zoltan_align.c | 53 ++--------- .../zoltan/src/Utilities/shared/zoltan_id.c | 53 ++--------- .../zoltan/src/Utilities/shared/zoltan_id.h | 53 ++--------- .../zoltan/src/Utilities/shared/zoltan_util.h | 53 ++--------- packages/zoltan/src/all/README | 43 --------- packages/zoltan/src/all/all_allo.c | 53 ++--------- packages/zoltan/src/all/all_allo_const.h | 53 ++--------- packages/zoltan/src/ch/README | 43 --------- packages/zoltan/src/ch/ch_dist_graph.c | 53 ++--------- packages/zoltan/src/ch/ch_init_dist.c | 53 ++--------- packages/zoltan/src/ch/ch_init_dist_const.h | 53 ++--------- packages/zoltan/src/ch/ch_input_assign.c | 53 ++--------- packages/zoltan/src/ch/ch_input_const.h | 53 ++--------- packages/zoltan/src/ch/ch_input_geom.c | 53 ++--------- packages/zoltan/src/ch/ch_input_graph.c | 53 ++--------- packages/zoltan/src/ch/ch_input_read.c | 53 ++--------- packages/zoltan/src/coloring/bucket.c | 53 ++--------- packages/zoltan/src/coloring/bucket.h | 53 ++--------- packages/zoltan/src/coloring/color_test.c | 53 ++--------- packages/zoltan/src/coloring/coloring.c | 53 ++--------- packages/zoltan/src/coloring/coloring.h | 53 ++--------- packages/zoltan/src/coloring/coloring_const.h | 53 ++--------- packages/zoltan/src/coloring/g2l_hash.c | 53 ++--------- packages/zoltan/src/coloring/g2l_hash.h | 53 ++--------- packages/zoltan/src/driver/Makefile.am | 43 --------- packages/zoltan/src/driver/README | 43 --------- packages/zoltan/src/driver/dr_chaco_io.c | 53 ++--------- .../zoltan/src/driver/dr_chaco_io.c.shockstem | 53 ++--------- packages/zoltan/src/driver/dr_compress.c | 53 ++--------- .../zoltan/src/driver/dr_compress_const.h | 53 ++--------- packages/zoltan/src/driver/dr_const.h | 53 ++--------- packages/zoltan/src/driver/dr_dd.c | 53 ++--------- packages/zoltan/src/driver/dr_dd.h | 53 ++--------- packages/zoltan/src/driver/dr_ddCPP.cpp | 53 ++--------- packages/zoltan/src/driver/dr_elem.c | 53 ++--------- packages/zoltan/src/driver/dr_elem_const.h | 53 ++--------- packages/zoltan/src/driver/dr_elem_util.c | 53 ++--------- .../zoltan/src/driver/dr_elem_util_const.h | 53 ++--------- packages/zoltan/src/driver/dr_err.c | 53 ++--------- packages/zoltan/src/driver/dr_err_const.h | 53 ++--------- packages/zoltan/src/driver/dr_eval.c | 53 ++--------- packages/zoltan/src/driver/dr_eval_const.h | 53 ++--------- packages/zoltan/src/driver/dr_exoII_io.c | 53 ++--------- packages/zoltan/src/driver/dr_exoII_ioCPP.cpp | 53 ++--------- packages/zoltan/src/driver/dr_externs.h | 53 ++--------- packages/zoltan/src/driver/dr_gnuplot.c | 53 ++--------- packages/zoltan/src/driver/dr_hg_io.c | 53 ++--------- packages/zoltan/src/driver/dr_hg_readfile.c | 53 ++--------- packages/zoltan/src/driver/dr_hg_readfile.h | 53 ++--------- packages/zoltan/src/driver/dr_input.c | 53 ++--------- .../zoltan/src/driver/dr_input.c.shockstem | 53 ++--------- packages/zoltan/src/driver/dr_input_const.h | 53 ++--------- .../src/driver/dr_input_const.h.shockstem | 53 ++--------- packages/zoltan/src/driver/dr_loadbal.c | 53 ++--------- packages/zoltan/src/driver/dr_loadbalCPP.cpp | 53 ++--------- packages/zoltan/src/driver/dr_loadbal_const.h | 53 ++--------- packages/zoltan/src/driver/dr_main.c | 53 ++--------- .../zoltan/src/driver/dr_main.c.shockstem | 53 ++--------- packages/zoltan/src/driver/dr_mainCPP.cpp | 53 ++--------- packages/zoltan/src/driver/dr_maps.c | 53 ++--------- packages/zoltan/src/driver/dr_mapsCPP.cpp | 53 ++--------- packages/zoltan/src/driver/dr_maps_const.h | 53 ++--------- packages/zoltan/src/driver/dr_migrate.c | 53 ++--------- .../zoltan/src/driver/dr_migrate.c.shockstem | 53 ++--------- packages/zoltan/src/driver/dr_migrateCPP.cpp | 53 ++--------- packages/zoltan/src/driver/dr_mm_readfile.c | 53 ++--------- packages/zoltan/src/driver/dr_mmio.c | 8 ++ packages/zoltan/src/driver/dr_mmio.h | 8 ++ packages/zoltan/src/driver/dr_output.c | 53 ++--------- packages/zoltan/src/driver/dr_output_const.h | 53 ++--------- packages/zoltan/src/driver/dr_par_util.c | 53 ++--------- .../zoltan/src/driver/dr_par_util_const.h | 53 ++--------- packages/zoltan/src/driver/dr_param_file.c | 53 ++--------- packages/zoltan/src/driver/dr_param_file.h | 53 ++--------- .../zoltan/src/driver/dr_param_fileCPP.cpp | 53 ++--------- packages/zoltan/src/driver/dr_param_fileCPP.h | 53 ++--------- packages/zoltan/src/driver/dr_random_io.c | 53 ++--------- packages/zoltan/src/driver/dr_setfixed.c | 53 ++--------- packages/zoltan/src/driver/dr_util.c | 53 ++--------- packages/zoltan/src/driver/dr_util_const.h | 53 ++--------- packages/zoltan/src/driver/order_test.c | 53 ++--------- packages/zoltan/src/fdriver/Makefile.am | 43 --------- packages/zoltan/src/fdriver/README.mpich | 43 --------- packages/zoltan/src/fdriver/farg_nagf95.f | 45 +-------- packages/zoltan/src/fdriver/farg_typical.f | 45 +-------- packages/zoltan/src/fdriver/fdr_chaco_io.f90 | 45 +-------- packages/zoltan/src/fdriver/fdr_const.f90 | 45 +-------- packages/zoltan/src/fdriver/fdr_input.f90 | 45 +-------- packages/zoltan/src/fdriver/fdr_loadbal.f90 | 45 +-------- packages/zoltan/src/fdriver/fdr_main.f90 | 45 +-------- packages/zoltan/src/fdriver/fdr_migrate.f90 | 45 +-------- packages/zoltan/src/fdriver/fdr_mm_io.f90 | 45 +-------- .../zoltan/src/fdriver/fdr_param_file.f90 | 45 +-------- packages/zoltan/src/fdriver/fdr_sort.f90 | 45 +-------- packages/zoltan/src/fdriver/makefile | 43 --------- packages/zoltan/src/fdriver/mmio.f | 8 ++ packages/zoltan/src/fdriver/mpi_h.f | 45 +-------- .../src/fdriver/zoltan_user_data.f90.old | 45 +-------- packages/zoltan/src/fort/README | 43 --------- packages/zoltan/src/fort/cwrap.c | 53 ++--------- packages/zoltan/src/fort/cwrap_fmangle.h | 53 ++--------- packages/zoltan/src/fort/fwrap.f90 | 45 +-------- packages/zoltan/src/fort/zoltan_user_data.f90 | 45 +-------- packages/zoltan/src/fort/ztypes.f90 | 45 +-------- packages/zoltan/src/graph/graph.c | 53 ++--------- packages/zoltan/src/graph/graph.h | 53 ++--------- packages/zoltan/src/graph/graph_const.h | 53 ++--------- packages/zoltan/src/graph/graph_params.h | 53 ++--------- packages/zoltan/src/ha/README | 43 --------- packages/zoltan/src/ha/divide_machine.c | 53 ++--------- packages/zoltan/src/ha/get_processor_name.c | 53 ++--------- packages/zoltan/src/ha/ha_const.h | 53 ++--------- packages/zoltan/src/ha/ha_ovis.c | 53 ++--------- packages/zoltan/src/ha/ha_ovis.h | 53 ++--------- packages/zoltan/src/hier/README | 51 ++-------- packages/zoltan/src/hier/hier.c | 53 ++--------- packages/zoltan/src/hier/hier.h | 53 ++--------- packages/zoltan/src/hier/hier_free_struct.c | 53 ++--------- packages/zoltan/src/hsfc/hsfc.c | 53 ++--------- packages/zoltan/src/hsfc/hsfc.h | 53 ++--------- packages/zoltan/src/hsfc/hsfc_box_assign.c | 53 ++--------- packages/zoltan/src/hsfc/hsfc_const.h | 53 ++--------- packages/zoltan/src/hsfc/hsfc_hilbert.c | 53 ++--------- packages/zoltan/src/hsfc/hsfc_hilbert_const.h | 53 ++--------- packages/zoltan/src/hsfc/hsfc_params.h | 53 ++--------- packages/zoltan/src/hsfc/hsfc_point_assign.c | 53 ++--------- packages/zoltan/src/include/README | 43 --------- packages/zoltan/src/include/lbi_const.h | 53 ++--------- packages/zoltan/src/include/zoltan.h | 53 ++--------- packages/zoltan/src/include/zoltan_align.h | 53 ++--------- packages/zoltan/src/include/zoltan_comm.h | 53 ++--------- packages/zoltan/src/include/zoltan_comm_cpp.h | 53 ++--------- packages/zoltan/src/include/zoltan_cpp.h | 53 ++--------- packages/zoltan/src/include/zoltan_dd.h | 53 ++--------- packages/zoltan/src/include/zoltan_dd_cpp.h | 53 ++--------- packages/zoltan/src/include/zoltan_eval.h | 53 ++--------- packages/zoltan/src/include/zoltan_mem.h | 53 ++--------- .../src/include/zoltan_partition_tree.h | 53 ++--------- packages/zoltan/src/include/zoltan_timer.h | 53 ++--------- .../zoltan/src/include/zoltan_timer_cpp.h | 53 ++--------- packages/zoltan/src/include/zoltan_types.h | 53 ++--------- packages/zoltan/src/lb/README | 43 --------- packages/zoltan/src/lb/lb_balance.c | 53 ++--------- packages/zoltan/src/lb/lb_box_assign.c | 53 ++--------- packages/zoltan/src/lb/lb_const.h | 53 ++--------- packages/zoltan/src/lb/lb_copy.c | 53 ++--------- packages/zoltan/src/lb/lb_eval.c | 53 ++--------- packages/zoltan/src/lb/lb_free.c | 53 ++--------- packages/zoltan/src/lb/lb_init.c | 53 ++--------- packages/zoltan/src/lb/lb_init_const.h | 53 ++--------- packages/zoltan/src/lb/lb_invert.c | 53 ++--------- packages/zoltan/src/lb/lb_migrate.c | 53 ++--------- packages/zoltan/src/lb/lb_part2proc.c | 53 ++--------- packages/zoltan/src/lb/lb_point_assign.c | 53 ++--------- packages/zoltan/src/lb/lb_remap.c | 53 ++--------- packages/zoltan/src/lb/lb_set_fn.c | 53 ++--------- packages/zoltan/src/lb/lb_set_method.c | 53 ++--------- packages/zoltan/src/lb/lb_set_part_sizes.c | 53 ++--------- packages/zoltan/src/lb/low_mem_lb_migrate.c | 53 ++--------- packages/zoltan/src/matlab/README | 43 --------- packages/zoltan/src/matrix/matrix_build.c | 53 ++--------- .../zoltan/src/matrix/matrix_distribute.c | 53 ++--------- .../zoltan/src/matrix/matrix_operations.c | 53 ++--------- packages/zoltan/src/matrix/matrix_sym.c | 53 ++--------- packages/zoltan/src/matrix/matrix_utils.c | 53 ++--------- packages/zoltan/src/matrix/zoltan_matrix.h | 53 ++--------- packages/zoltan/src/order/README | 43 --------- packages/zoltan/src/order/hsfcOrder.c | 53 ++--------- packages/zoltan/src/order/hsfcOrder.h | 53 ++--------- packages/zoltan/src/order/hund.c | 53 ++--------- packages/zoltan/src/order/order.c | 53 ++--------- packages/zoltan/src/order/order_const.h | 53 ++--------- packages/zoltan/src/order/order_params.h | 53 ++--------- packages/zoltan/src/order/order_struct.c | 53 ++--------- packages/zoltan/src/order/order_tools.c | 53 ++--------- packages/zoltan/src/order/perm.c | 53 ++--------- packages/zoltan/src/par/README | 43 --------- packages/zoltan/src/par/par_average.c | 53 ++--------- packages/zoltan/src/par/par_average_const.h | 53 ++--------- packages/zoltan/src/par/par_bisect.c | 53 ++--------- packages/zoltan/src/par/par_bisect_const.h | 53 ++--------- packages/zoltan/src/par/par_const.h | 53 ++--------- packages/zoltan/src/par/par_median.c | 53 ++--------- packages/zoltan/src/par/par_median_const.h | 53 ++--------- .../zoltan/src/par/par_median_randomized.c | 53 ++--------- packages/zoltan/src/par/par_stats.c | 53 ++--------- packages/zoltan/src/par/par_sync.c | 53 ++--------- packages/zoltan/src/par/par_tflops_special.c | 53 ++--------- .../zoltan/src/par/par_tflops_special_const.h | 53 ++--------- packages/zoltan/src/params/README | 43 --------- .../zoltan/src/params/assign_param_vals.c | 53 ++--------- packages/zoltan/src/params/bind_param.c | 53 ++--------- packages/zoltan/src/params/check_param.c | 53 ++--------- packages/zoltan/src/params/free_params.c | 53 ++--------- packages/zoltan/src/params/key_params.c | 53 ++--------- packages/zoltan/src/params/key_params.h | 53 ++--------- packages/zoltan/src/params/params_const.h | 53 ++--------- packages/zoltan/src/params/print_params.c | 53 ++--------- packages/zoltan/src/params/set_param.c | 53 ++--------- packages/zoltan/src/phg/README | 43 --------- packages/zoltan/src/phg/phg.c | 53 ++--------- packages/zoltan/src/phg/phg.h | 53 ++--------- packages/zoltan/src/phg/phg_Vcycle.c | 53 ++--------- packages/zoltan/src/phg/phg_build.c | 53 ++--------- .../phg/phg_build.c.improved_calculate_cuts | 53 ++--------- packages/zoltan/src/phg/phg_build_calls.c | 53 ++--------- packages/zoltan/src/phg/phg_coarse.c | 53 ++--------- packages/zoltan/src/phg/phg_comm.c | 53 ++--------- packages/zoltan/src/phg/phg_comm.h | 53 ++--------- packages/zoltan/src/phg/phg_const.h | 53 ++--------- packages/zoltan/src/phg/phg_distrib.c | 53 ++--------- packages/zoltan/src/phg/phg_distrib.h | 53 ++--------- packages/zoltan/src/phg/phg_gather.c | 53 ++--------- packages/zoltan/src/phg/phg_hypergraph.c | 53 ++--------- packages/zoltan/src/phg/phg_hypergraph.h | 53 ++--------- packages/zoltan/src/phg/phg_lookup.c | 53 ++--------- packages/zoltan/src/phg/phg_lookup.h | 53 ++--------- packages/zoltan/src/phg/phg_match.c | 53 ++--------- packages/zoltan/src/phg/phg_order.c | 53 ++--------- packages/zoltan/src/phg/phg_params.h | 53 ++--------- packages/zoltan/src/phg/phg_parkway.c | 53 ++--------- packages/zoltan/src/phg/phg_partition_tree.c | 53 ++--------- packages/zoltan/src/phg/phg_patoh.c | 53 ++--------- packages/zoltan/src/phg/phg_plot.c | 53 ++--------- packages/zoltan/src/phg/phg_rdivide.c | 53 ++--------- packages/zoltan/src/phg/phg_refinement.c | 53 ++--------- packages/zoltan/src/phg/phg_scale.c | 53 ++--------- packages/zoltan/src/phg/phg_serialpartition.c | 53 ++--------- packages/zoltan/src/phg/phg_tree.c | 53 ++--------- packages/zoltan/src/phg/phg_tree.h | 53 ++--------- packages/zoltan/src/phg/phg_two_ways.c | 53 ++--------- packages/zoltan/src/phg/phg_util.c | 53 ++--------- packages/zoltan/src/phg/phg_util.h | 53 ++--------- packages/zoltan/src/phg/phg_verbose.c | 53 ++--------- packages/zoltan/src/phg/phg_verbose.h | 53 ++--------- packages/zoltan/src/rcb/README | 43 --------- packages/zoltan/src/rcb/box_assign.c | 53 ++--------- packages/zoltan/src/rcb/create_proc_list.c | 53 ++--------- .../zoltan/src/rcb/create_proc_list_const.h | 53 ++--------- packages/zoltan/src/rcb/inertial.h | 53 ++--------- packages/zoltan/src/rcb/inertial1d.c | 53 ++--------- packages/zoltan/src/rcb/inertial2d.c | 53 ++--------- packages/zoltan/src/rcb/inertial3d.c | 53 ++--------- packages/zoltan/src/rcb/point_assign.c | 53 ++--------- packages/zoltan/src/rcb/rcb.c | 53 ++--------- packages/zoltan/src/rcb/rcb.h | 53 ++--------- packages/zoltan/src/rcb/rcb_box.c | 53 ++--------- packages/zoltan/src/rcb/rcb_const.h | 53 ++--------- packages/zoltan/src/rcb/rcb_params.h | 53 ++--------- packages/zoltan/src/rcb/rcb_partition_tree.c | 53 ++--------- packages/zoltan/src/rcb/rcb_util.c | 53 ++--------- packages/zoltan/src/rcb/rib.c | 53 ++--------- packages/zoltan/src/rcb/rib.h | 53 ++--------- packages/zoltan/src/rcb/rib_const.h | 53 ++--------- packages/zoltan/src/rcb/rib_params.h | 53 ++--------- packages/zoltan/src/rcb/rib_util.c | 53 ++--------- packages/zoltan/src/rcb/shared.c | 53 ++--------- packages/zoltan/src/rcb/shared.h | 53 ++--------- packages/zoltan/src/reftree/README | 24 ----- packages/zoltan/src/reftree/reftree.h | 35 ++----- packages/zoltan/src/reftree/reftree_build.c | 35 ++----- .../zoltan/src/reftree/reftree_coarse_path.c | 35 ++----- packages/zoltan/src/reftree/reftree_const.h | 35 ++----- packages/zoltan/src/reftree/reftree_hash.c | 35 ++----- packages/zoltan/src/reftree/reftree_part.c | 35 ++----- packages/zoltan/src/simple/README | 43 --------- packages/zoltan/src/simple/block.c | 53 ++--------- packages/zoltan/src/simple/cyclic.c | 53 ++--------- packages/zoltan/src/simple/random.c | 53 ++--------- packages/zoltan/src/simple/simple_const.h | 53 ++--------- packages/zoltan/src/timer/README | 43 --------- packages/zoltan/src/timer/timer_const.h | 53 ++--------- packages/zoltan/src/timer/timer_params.c | 53 ++--------- packages/zoltan/src/tpls/README | 43 --------- packages/zoltan/src/tpls/build_graph.c | 53 ++--------- packages/zoltan/src/tpls/graph_util.h | 53 ++--------- packages/zoltan/src/tpls/parmetis_interface.c | 53 ++--------- packages/zoltan/src/tpls/parmetis_interface.h | 53 ++--------- .../src/tpls/parmetis_interface_params.h | 53 ++--------- packages/zoltan/src/tpls/postprocessing.c | 53 ++--------- packages/zoltan/src/tpls/preprocessing.c | 53 ++--------- packages/zoltan/src/tpls/scatter_graph.c | 53 ++--------- packages/zoltan/src/tpls/scotch_interface.c | 53 ++--------- packages/zoltan/src/tpls/scotch_interface.h | 53 ++--------- .../zoltan/src/tpls/scotch_interface_params.h | 53 ++--------- packages/zoltan/src/tpls/third_library.c | 53 ++--------- packages/zoltan/src/tpls/third_library.h | 53 ++--------- .../zoltan/src/tpls/third_library_const.h | 53 ++--------- .../zoltan/src/tpls/third_library_params.h | 53 ++--------- .../zoltan/src/tpls/third_library_tools.h | 53 ++--------- packages/zoltan/src/tpls/verify_graph.c | 53 ++--------- .../src/util/generate_miniFElike_grids.cpp | 53 ++--------- packages/zoltan/src/util/memory_usage/README | 43 --------- .../zoltan/src/util/memory_usage/commdup.c | 53 ++--------- .../zoltan/src/util/memory_usage/commsplit.c | 53 ++--------- .../src/util/memory_usage/get_heap_usage.h | 53 ++--------- .../zoltan/src/util/memory_usage/rcblike.c | 53 ++--------- .../src/util/network_topology/MPI/README | 43 --------- .../src/util/network_topology/MPI/test32.c | 53 ++--------- .../util/network_topology/MPI/topologyTest.c | 53 ++--------- .../util/network_topology/MPI/topologyVis.c | 53 ++--------- .../src/util/network_topology/hwloc/README | 43 --------- .../network_topology/hwloc/node_topology.c | 53 ++--------- .../hwloc/zoltan_get_topology.c | 53 ++--------- packages/zoltan/src/util/vtk_view.cpp | 53 ++--------- packages/zoltan/src/zz/README | 43 --------- packages/zoltan/src/zz/zz_back_trace.c | 53 ++--------- packages/zoltan/src/zz/zz_const.h | 53 ++--------- packages/zoltan/src/zz/zz_coord.c | 53 ++--------- packages/zoltan/src/zz/zz_gen_files.c | 53 ++--------- packages/zoltan/src/zz/zz_hash.c | 53 ++--------- packages/zoltan/src/zz/zz_hash.h | 53 ++--------- packages/zoltan/src/zz/zz_heap.c | 53 ++--------- packages/zoltan/src/zz/zz_heap.h | 53 ++--------- packages/zoltan/src/zz/zz_id_const.h | 53 ++--------- packages/zoltan/src/zz/zz_init.c | 53 ++--------- packages/zoltan/src/zz/zz_map.c | 53 ++--------- packages/zoltan/src/zz/zz_obj_list.c | 53 ++--------- packages/zoltan/src/zz/zz_rand.c | 53 ++--------- packages/zoltan/src/zz/zz_rand.h | 53 ++--------- packages/zoltan/src/zz/zz_set_fn.c | 53 ++--------- packages/zoltan/src/zz/zz_sort.c | 53 ++--------- packages/zoltan/src/zz/zz_sort.h | 53 ++--------- packages/zoltan/src/zz/zz_struct.c | 53 ++--------- packages/zoltan/src/zz/zz_util.c | 53 ++--------- packages/zoltan/src/zz/zz_util_const.h | 53 ++--------- packages/zoltan/test/Large_Data/Makefile.am | 43 --------- .../zoltan/test/Large_Data/stressTestColor.c | 53 ++--------- .../zoltan/test/Large_Data/stressTestGRAPH.c | 53 ++--------- .../zoltan/test/Large_Data/stressTestPHG.c | 53 ++--------- .../zoltan/test/Large_Data/stressTestRCB.c | 53 ++--------- .../zoltan/test/Large_Data/stressTestRIB.c | 53 ++--------- packages/zoltan/test/README | 43 --------- .../zoltan/test/TestMPI/canarySelfMessages.c | 8 ++ packages/zoltan/test/TestMPI/mpiMinLoc.c | 8 ++ .../Utilities_Tests/Communication/comm_main.c | 53 ++--------- .../Communication/comm_main_2.cpp | 53 ++--------- .../Utilities_Tests/Communication/test/README | 43 --------- .../test/Utilities_Tests/DDirectory/DD_Main.c | 53 ++--------- .../Utilities_Tests/DDirectory/DD_Main_2.cpp | 53 ++--------- .../test/Utilities_Tests/Memory/mem_main.c | 53 ++--------- .../test/Utilities_Tests/Timer/timer_main.c | 53 ++--------- .../Utilities_Tests/Timer/timer_main_2.cpp | 53 ++--------- packages/zoltan/test/ch_brack2_3/README | 43 --------- packages/zoltan/test/ch_degenerate/README | 43 --------- packages/zoltan/test/ch_degenerateAA/README | 43 --------- packages/zoltan/test/ch_drake/README | 43 --------- packages/zoltan/test/ch_ewgt/README | 43 --------- packages/zoltan/test/ch_grid20x19/README | 43 --------- packages/zoltan/test/ch_hammond/README | 43 --------- packages/zoltan/test/ch_nograph/README | 43 --------- packages/zoltan/test/ch_simple/README | 43 --------- packages/zoltan/test/ch_vwgt/README | 43 --------- packages/zoltan/test/hg_cage10/README | 43 --------- packages/zoltan/test/hg_felix/README | 43 --------- packages/zoltan/test/hg_vwgt/README | 43 --------- .../simple_local_HSFC_order.cpp | 53 ++--------- .../simple_local_HSFC_order_0block.cpp | 53 ++--------- packages/zoltan/test/misc_tests/copyZZ.c | 8 ++ packages/zoltan/test/misc_tests/copyZZ.cpp | 8 ++ .../test/misc_tests/test_get_callbacks.c | 8 ++ packages/zoltan/test/test_zoltan | 43 --------- packages/zoltan/test/test_zoltan_new | 43 --------- 426 files changed, 2935 insertions(+), 18399 deletions(-) create mode 100644 packages/zoltan/COPYRIGHT delete mode 100644 packages/zoltan/COPYRIGHT_AND_LICENSE create mode 100644 packages/zoltan/LICENSE delete mode 100644 packages/zoltan/README diff --git a/packages/zoltan/COPYRIGHT b/packages/zoltan/COPYRIGHT new file mode 100644 index 000000000000..0dbfc43b8a4b --- /dev/null +++ b/packages/zoltan/COPYRIGHT @@ -0,0 +1,10 @@ + + ??: description + Copyright (c) 20?? NTESS + +Copyright 20?? National Technology & Engineering Solutions of Sandia, +LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the +U.S. Government retains certain rights in this software. + +Copyright the ?? contributors. + diff --git a/packages/zoltan/COPYRIGHT_AND_LICENSE b/packages/zoltan/COPYRIGHT_AND_LICENSE deleted file mode 100644 index 0af71a431861..000000000000 --- a/packages/zoltan/COPYRIGHT_AND_LICENSE +++ /dev/null @@ -1,45 +0,0 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ diff --git a/packages/zoltan/LICENSE b/packages/zoltan/LICENSE new file mode 100644 index 000000000000..c1537e083987 --- /dev/null +++ b/packages/zoltan/LICENSE @@ -0,0 +1,31 @@ +SPDX-License-Identifier: BSD-3-Clause + +Copyright (c) 2012 NTESS and the Zoltan contributors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + 3. Neither the name of the copyright holder nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/packages/zoltan/Makefile.am b/packages/zoltan/Makefile.am index 77d47b6e0e73..0d03c27ec167 100644 --- a/packages/zoltan/Makefile.am +++ b/packages/zoltan/Makefile.am @@ -1,47 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER - ## ####################################################################### ## Options to automake (rarely used - don't worry about it) diff --git a/packages/zoltan/README b/packages/zoltan/README deleted file mode 100644 index 6eaf09490aa3..000000000000 --- a/packages/zoltan/README +++ /dev/null @@ -1,94 +0,0 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER -@HEADER - -********************************************************************** - - Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - Copyright 2012 Sandia Corporation - -Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -the U.S. Government retains certain rights in this software. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the Corporation nor the names of the -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Questions? Contact Karen Devine kddevin@sandia.gov - Erik Boman egboman@sandia.gov - -@HEADER - -############################################################################## - -INSTALLATION ------------- -Instructions for building and installing Zoltan using CMAKE or Autotools -are at the following web site: - - http://www.cs.sandia.gov/zoltan/ug_html/ug_usage.html - diff --git a/packages/zoltan/README.developer b/packages/zoltan/README.developer index 7dfa09d60c44..05f35e194c45 100644 --- a/packages/zoltan/README.developer +++ b/packages/zoltan/README.developer @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER lriesen@sandia.gov November 29, 2010 diff --git a/packages/zoltan/README.md b/packages/zoltan/README.md index 74cb91974c30..4769d507ae0f 100644 --- a/packages/zoltan/README.md +++ b/packages/zoltan/README.md @@ -1,23 +1,53 @@ # Zoltan Zoltan Dynamic Load Balancing and Graph Algorithm Toolkit -- Distribution site -The most up-to-date version of Zoltan is in the Trilinos framework at https://github.com/trilinos/Trilinos. +The most up-to-date version of Zoltan is in the Trilinos framework +at https://github.com/trilinos/Trilinos. -This site provides stand-alone releases of Zoltan, separate from Trilinos: https://github.com/sandialabs/Zoltan/releases. Stand-alone releases of Zoltan may lag the Trilinos repo code. +This site provides stand-alone releases of Zoltan, separate from +Trilinos: https://github.com/sandialabs/Zoltan/releases. Stand-alone +releases of Zoltan may lag the Trilinos repo code. -You can download individual releases of Zoltan from this site, or clone the Trilinos repository -https://github.com/trilinos/Trilinos to get the most up-to-date version of Zoltan. Individual releases are tarballs that can be unzipped and built with autotools/make. +You can download individual releases of Zoltan from this site, or +clone the Trilinos repository https://github.com/trilinos/Trilinos +to get the most up-to-date version of Zoltan. Individual releases +are tarballs that can be unzipped and built with autotools/make. -Trilinos clones include Zoltan in directory Trilinos/packages/zoltan. In this directory, you can -build Zoltan separately from Trilinos using autotools/make. Or in the Trilinos repository, -you can build Zoltan using Trilinos' cmake system. +Trilinos clones include Zoltan in directory Trilinos/packages/zoltan. +In this directory, you can build Zoltan separately from Trilinos +using autotools/make. Or in the Trilinos repository, you can build +Zoltan using Trilinos' cmake system. -See https://htmlpreview.github.io/?https://github.com/sandialabs/zoltan/blob/master/doc/Zoltan_html/ug_html/ug_usage.html +See +https://htmlpreview.github.io/?https://github.com/sandialabs/zoltan/blob/master/doc/Zoltan_html/ug_html/ug_usage.html for details on building Zoltan. The main Zoltan page is http://cs.sandia.gov/Zoltan. -Release history: https://htmlpreview.github.io/?https://github.com/sandialabs/zoltan/blob/master/doc/Zoltan_html/ug_html/ug_release.html + +## INSTALLATION + +Instructions for building and installing Zoltan using CMAKE or Autotools +are at the following web site: + + http://www.cs.sandia.gov/zoltan/ug_html/ug_usage.html + + +## Copyright and License +See zoltan/COPYRIGHT, zoltan/LICENSE, https://trilinos.github.io/license.html and individual file headers for additional information. + + +## Questions? +Contact lead developers: + +* Zoltan team (GitHub handle: @trilinos/zoltan) +* Erik Boman (GitHub handle: [egboman](https://github.com/egboman) or egboman@sandia.gov) + + +## Release History + +https://htmlpreview.github.io/?https://github.com/sandialabs/zoltan/blob/master/doc/Zoltan_html/ug_html/ug_release.html + * Version 3.90 (4/08/21; as in Trilinos v13; d328e0e2a8a5c48a4e01d6541cd8c0eb7f364823) * Version 3.83 (1/28/16; as in Trilinos v12.6; aaf328db7e43001ee2d3148f72f12147e51c3293) * Version 3.82 (5/1/15; as in Trilinos v12) @@ -34,5 +64,3 @@ Release history: https://htmlpreview.github.io/?https://github.com/sandialabs/z * Version 1.5 (5/29/03) * Version 1.4 (6/18/02) * Version 1.3 (3/27/02) -- the Original - -Questions? Email zoltan-dev@software.sandia.gov diff --git a/packages/zoltan/configure.ac b/packages/zoltan/configure.ac index 37509602b966..1b62bda34e69 100644 --- a/packages/zoltan/configure.ac +++ b/packages/zoltan/configure.ac @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER # ------------------------------------------------------------------------ # Process this file with autoconf to produce a configure script. # ------------------------------------------------------------------------ diff --git a/packages/zoltan/docs/dev_html/zdrive.inp b/packages/zoltan/docs/dev_html/zdrive.inp index bd2bf40542be..6eae2950af71 100644 --- a/packages/zoltan/docs/dev_html/zdrive.inp +++ b/packages/zoltan/docs/dev_html/zdrive.inp @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER ############################################################################## # # EXAMPLE OF zdrive.inp INPUT FILE FOR zdrive AND zfdrive. diff --git a/packages/zoltan/docs/tu_html/README b/packages/zoltan/docs/tu_html/README index f5a76909a8a1..8b94251bdbac 100644 --- a/packages/zoltan/docs/tu_html/README +++ b/packages/zoltan/docs/tu_html/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER The Zoltan tutorial was written in LaTeX, but can be converted to html. The Makefile assumes latex2html is available. Just type 'make' and you should get diff --git a/packages/zoltan/example/C/Makefile.am b/packages/zoltan/example/C/Makefile.am index 7df6eef47b1c..172b4957861b 100644 --- a/packages/zoltan/example/C/Makefile.am +++ b/packages/zoltan/example/C/Makefile.am @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER # The following line helps the test harness recover from build errors. diff --git a/packages/zoltan/example/C/coloring/zcol.c b/packages/zoltan/example/C/coloring/zcol.c index 5f8f241a6d87..d7ba0fb86ee9 100644 --- a/packages/zoltan/example/C/coloring/zcol.c +++ b/packages/zoltan/example/C/coloring/zcol.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /*************************************************************** File : zcoldrive.c Date : diff --git a/packages/zoltan/example/C/coloring/zcoldriver.c b/packages/zoltan/example/C/coloring/zcoldriver.c index a555a4ca745b..99f95c0f252d 100644 --- a/packages/zoltan/example/C/coloring/zcoldriver.c +++ b/packages/zoltan/example/C/coloring/zcoldriver.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /*************************************************************** File : zcoldrive.c Date : diff --git a/packages/zoltan/example/C/migrateGRAPH.c b/packages/zoltan/example/C/migrateGRAPH.c index 7be1c67f9cec..e0ffb902f3d9 100644 --- a/packages/zoltan/example/C/migrateGRAPH.c +++ b/packages/zoltan/example/C/migrateGRAPH.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /************************************************************** * An expansion of simpleGRAPH.c. * diff --git a/packages/zoltan/example/C/problemGRAPH.c b/packages/zoltan/example/C/problemGRAPH.c index 59c82d050c62..ec644167d5b0 100644 --- a/packages/zoltan/example/C/problemGRAPH.c +++ b/packages/zoltan/example/C/problemGRAPH.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /************************************************************** * Basic example of using Zoltan to partition a graph. ***************************************************************/ diff --git a/packages/zoltan/example/C/simpleBLOCK.c b/packages/zoltan/example/C/simpleBLOCK.c index 399fc5a6a3fc..c59a0027e1b4 100644 --- a/packages/zoltan/example/C/simpleBLOCK.c +++ b/packages/zoltan/example/C/simpleBLOCK.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* Basic example of using Zoltan to compute a quick partitioning ** of a set of objects. ***************************************************************/ diff --git a/packages/zoltan/example/C/simpleGRAPH.c b/packages/zoltan/example/C/simpleGRAPH.c index 98933bc5002b..7c8c4b3700f2 100644 --- a/packages/zoltan/example/C/simpleGRAPH.c +++ b/packages/zoltan/example/C/simpleGRAPH.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /************************************************************** * Basic example of using Zoltan to partition a graph. ***************************************************************/ diff --git a/packages/zoltan/example/C/simpleHIER.c b/packages/zoltan/example/C/simpleHIER.c index 018f26ee66b2..2b24013634e3 100644 --- a/packages/zoltan/example/C/simpleHIER.c +++ b/packages/zoltan/example/C/simpleHIER.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /************************************************************** * Basic example of using Zoltan to partition a graph. ***************************************************************/ diff --git a/packages/zoltan/example/C/simplePHG.c b/packages/zoltan/example/C/simplePHG.c index 379ea367bce2..a5bf28d25827 100644 --- a/packages/zoltan/example/C/simplePHG.c +++ b/packages/zoltan/example/C/simplePHG.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /************************************************************** * Basic example of using Zoltan to partition a hypergraph. * diff --git a/packages/zoltan/example/C/simpleRCB.c b/packages/zoltan/example/C/simpleRCB.c index e881ec4857da..01c351100157 100644 --- a/packages/zoltan/example/C/simpleRCB.c +++ b/packages/zoltan/example/C/simpleRCB.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /*************************************************************** ** Basic example of using Zoltan to compute an RCB partitioning ** of a very simple mesh or graph. diff --git a/packages/zoltan/example/CPP/Makefile.am b/packages/zoltan/example/CPP/Makefile.am index 49c4dc393f90..cd2d14132dbb 100644 --- a/packages/zoltan/example/CPP/Makefile.am +++ b/packages/zoltan/example/CPP/Makefile.am @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER # The following line helps the test harness recover from build errors. diff --git a/packages/zoltan/example/CPP/exampleBLOCK.cpp b/packages/zoltan/example/CPP/exampleBLOCK.cpp index 18764ad380b9..4a2095648b0d 100644 --- a/packages/zoltan/example/CPP/exampleBLOCK.cpp +++ b/packages/zoltan/example/CPP/exampleBLOCK.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER // // Basic C++ example of using Zoltan to compute a quick partitioning // of a set of objects. diff --git a/packages/zoltan/example/Fortran/mpi_h.f b/packages/zoltan/example/Fortran/mpi_h.f index bbd78ec6c31f..71556826ca15 100644 --- a/packages/zoltan/example/Fortran/mpi_h.f +++ b/packages/zoltan/example/Fortran/mpi_h.f @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! !-------------------------------------------------------------------------- ! Purpose: Provide the MPI include file as a module. diff --git a/packages/zoltan/example/Fortran/simpleRCB.f90 b/packages/zoltan/example/Fortran/simpleRCB.f90 index e53e8da0d00c..775028eb13d6 100644 --- a/packages/zoltan/example/Fortran/simpleRCB.f90 +++ b/packages/zoltan/example/Fortran/simpleRCB.f90 @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! // !! File: driver.cc // diff --git a/packages/zoltan/example/Fortran/zoltanRCBmod.f90 b/packages/zoltan/example/Fortran/zoltanRCBmod.f90 index fda0efd35269..581f6ae0edd5 100644 --- a/packages/zoltan/example/Fortran/zoltanRCBmod.f90 +++ b/packages/zoltan/example/Fortran/zoltanRCBmod.f90 @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! // !! File: driver.cc // diff --git a/packages/zoltan/example/README b/packages/zoltan/example/README index 14690cd8cf9b..9a68198b8060 100644 --- a/packages/zoltan/example/README +++ b/packages/zoltan/example/README @@ -1,47 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER -$Id$ Some simple C examples may be found in the "C" directory, and C++ examples in the "CPP" directory. Edit the classicMakefile found diff --git a/packages/zoltan/src/Makefile.am b/packages/zoltan/src/Makefile.am index 152e8702ed86..b64c53f26e15 100644 --- a/packages/zoltan/src/Makefile.am +++ b/packages/zoltan/src/Makefile.am @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER # The following line helps the test harness recover from build errors. diff --git a/packages/zoltan/src/Utilities/Communication/README b/packages/zoltan/src/Utilities/Communication/README index 9f61ec3c4eb3..b6f162388426 100644 --- a/packages/zoltan/src/Utilities/Communication/README +++ b/packages/zoltan/src/Utilities/Communication/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER COMM DIRECTORY -- Package to efficiently perform unstructured communication operations. The same pattern can be created once and diff --git a/packages/zoltan/src/Utilities/Communication/comm.h b/packages/zoltan/src/Utilities/Communication/comm.h index 4ae5cd21f8e7..7ad6fea5f2d1 100644 --- a/packages/zoltan/src/Utilities/Communication/comm.h +++ b/packages/zoltan/src/Utilities/Communication/comm.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __COMM_H diff --git a/packages/zoltan/src/Utilities/Communication/comm_create.c b/packages/zoltan/src/Utilities/Communication/comm_create.c index fb248c778619..1078c9d0808e 100644 --- a/packages/zoltan/src/Utilities/Communication/comm_create.c +++ b/packages/zoltan/src/Utilities/Communication/comm_create.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include "comm.h" diff --git a/packages/zoltan/src/Utilities/Communication/comm_default.c b/packages/zoltan/src/Utilities/Communication/comm_default.c index f980739592f8..4f0a2569892c 100644 --- a/packages/zoltan/src/Utilities/Communication/comm_default.c +++ b/packages/zoltan/src/Utilities/Communication/comm_default.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "comm.h" #include diff --git a/packages/zoltan/src/Utilities/Communication/comm_destroy.c b/packages/zoltan/src/Utilities/Communication/comm_destroy.c index 73649c0026b4..55d2d0c02d7d 100644 --- a/packages/zoltan/src/Utilities/Communication/comm_destroy.c +++ b/packages/zoltan/src/Utilities/Communication/comm_destroy.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER diff --git a/packages/zoltan/src/Utilities/Communication/comm_do.c b/packages/zoltan/src/Utilities/Communication/comm_do.c index 7361912b985f..d1eec942e13c 100644 --- a/packages/zoltan/src/Utilities/Communication/comm_do.c +++ b/packages/zoltan/src/Utilities/Communication/comm_do.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/Communication/comm_do_reverse.c b/packages/zoltan/src/Utilities/Communication/comm_do_reverse.c index 2dac835d863b..9152bed08e34 100644 --- a/packages/zoltan/src/Utilities/Communication/comm_do_reverse.c +++ b/packages/zoltan/src/Utilities/Communication/comm_do_reverse.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/Communication/comm_exchange_sizes.c b/packages/zoltan/src/Utilities/Communication/comm_exchange_sizes.c index 62b62054e220..c1e614d92fc4 100644 --- a/packages/zoltan/src/Utilities/Communication/comm_exchange_sizes.c +++ b/packages/zoltan/src/Utilities/Communication/comm_exchange_sizes.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/src/Utilities/Communication/comm_info.c b/packages/zoltan/src/Utilities/Communication/comm_info.c index 48f5a81a3300..944a24237ade 100644 --- a/packages/zoltan/src/Utilities/Communication/comm_info.c +++ b/packages/zoltan/src/Utilities/Communication/comm_info.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/Communication/comm_invert_map.c b/packages/zoltan/src/Utilities/Communication/comm_invert_map.c index c95db453d1d2..04f9868a80c1 100644 --- a/packages/zoltan/src/Utilities/Communication/comm_invert_map.c +++ b/packages/zoltan/src/Utilities/Communication/comm_invert_map.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/src/Utilities/Communication/comm_invert_plan.c b/packages/zoltan/src/Utilities/Communication/comm_invert_plan.c index e8c9a6fcb958..ecc2f056eab5 100644 --- a/packages/zoltan/src/Utilities/Communication/comm_invert_plan.c +++ b/packages/zoltan/src/Utilities/Communication/comm_invert_plan.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include "comm.h" diff --git a/packages/zoltan/src/Utilities/Communication/comm_resize.c b/packages/zoltan/src/Utilities/Communication/comm_resize.c index ff84fc65d1d6..6bebbade0ec4 100644 --- a/packages/zoltan/src/Utilities/Communication/comm_resize.c +++ b/packages/zoltan/src/Utilities/Communication/comm_resize.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/src/Utilities/Communication/comm_sort_ints.c b/packages/zoltan/src/Utilities/Communication/comm_sort_ints.c index f0e3535ddf56..754eb57631ff 100644 --- a/packages/zoltan/src/Utilities/Communication/comm_sort_ints.c +++ b/packages/zoltan/src/Utilities/Communication/comm_sort_ints.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include "comm.h" diff --git a/packages/zoltan/src/Utilities/DDirectory/DD_Create.c b/packages/zoltan/src/Utilities/DDirectory/DD_Create.c index 2626250d151b..dd811ee1b060 100644 --- a/packages/zoltan/src/Utilities/DDirectory/DD_Create.c +++ b/packages/zoltan/src/Utilities/DDirectory/DD_Create.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/DDirectory/DD_Destroy.c b/packages/zoltan/src/Utilities/DDirectory/DD_Destroy.c index a20b04934e48..6fe65c1faa0a 100644 --- a/packages/zoltan/src/Utilities/DDirectory/DD_Destroy.c +++ b/packages/zoltan/src/Utilities/DDirectory/DD_Destroy.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/DDirectory/DD_Find.c b/packages/zoltan/src/Utilities/DDirectory/DD_Find.c index c3bd7083ea8c..ec11679d9880 100644 --- a/packages/zoltan/src/Utilities/DDirectory/DD_Find.c +++ b/packages/zoltan/src/Utilities/DDirectory/DD_Find.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/DDirectory/DD_Hash2.c b/packages/zoltan/src/Utilities/DDirectory/DD_Hash2.c index 2c827860d6bb..1a1d498afed5 100644 --- a/packages/zoltan/src/Utilities/DDirectory/DD_Hash2.c +++ b/packages/zoltan/src/Utilities/DDirectory/DD_Hash2.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/DDirectory/DD_Memory.c b/packages/zoltan/src/Utilities/DDirectory/DD_Memory.c index 4a9f9344f610..17dc118d38b6 100644 --- a/packages/zoltan/src/Utilities/DDirectory/DD_Memory.c +++ b/packages/zoltan/src/Utilities/DDirectory/DD_Memory.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/DDirectory/DD_Memory.h b/packages/zoltan/src/Utilities/DDirectory/DD_Memory.h index 7f003c5e1643..9569be812eba 100644 --- a/packages/zoltan/src/Utilities/DDirectory/DD_Memory.h +++ b/packages/zoltan/src/Utilities/DDirectory/DD_Memory.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef ZOLTAN_DD_MEMORY_H_ #define ZOLTAN_DD_MEMORY_H_ diff --git a/packages/zoltan/src/Utilities/DDirectory/DD_Print.c b/packages/zoltan/src/Utilities/DDirectory/DD_Print.c index 226b57c0b590..5948a2481b59 100644 --- a/packages/zoltan/src/Utilities/DDirectory/DD_Print.c +++ b/packages/zoltan/src/Utilities/DDirectory/DD_Print.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/DDirectory/DD_Remove.c b/packages/zoltan/src/Utilities/DDirectory/DD_Remove.c index ffdba6f28201..6c00519d89f4 100644 --- a/packages/zoltan/src/Utilities/DDirectory/DD_Remove.c +++ b/packages/zoltan/src/Utilities/DDirectory/DD_Remove.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/DDirectory/DD_Set_Hash_Fn.c b/packages/zoltan/src/Utilities/DDirectory/DD_Set_Hash_Fn.c index 35c5e9896803..ccf404b5f040 100644 --- a/packages/zoltan/src/Utilities/DDirectory/DD_Set_Hash_Fn.c +++ b/packages/zoltan/src/Utilities/DDirectory/DD_Set_Hash_Fn.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/DDirectory/DD_Set_Neighbor_Hash_Fn1.c b/packages/zoltan/src/Utilities/DDirectory/DD_Set_Neighbor_Hash_Fn1.c index c9a61da02c70..bce1d5bd5326 100644 --- a/packages/zoltan/src/Utilities/DDirectory/DD_Set_Neighbor_Hash_Fn1.c +++ b/packages/zoltan/src/Utilities/DDirectory/DD_Set_Neighbor_Hash_Fn1.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/DDirectory/DD_Set_Neighbor_Hash_Fn2.c b/packages/zoltan/src/Utilities/DDirectory/DD_Set_Neighbor_Hash_Fn2.c index df784873e310..c1e0578ad284 100644 --- a/packages/zoltan/src/Utilities/DDirectory/DD_Set_Neighbor_Hash_Fn2.c +++ b/packages/zoltan/src/Utilities/DDirectory/DD_Set_Neighbor_Hash_Fn2.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/DDirectory/DD_Set_Neighbor_Hash_Fn3.c b/packages/zoltan/src/Utilities/DDirectory/DD_Set_Neighbor_Hash_Fn3.c index 0ab6e0960dec..dba1f9ef4cd3 100644 --- a/packages/zoltan/src/Utilities/DDirectory/DD_Set_Neighbor_Hash_Fn3.c +++ b/packages/zoltan/src/Utilities/DDirectory/DD_Set_Neighbor_Hash_Fn3.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/DDirectory/DD_Stats.c b/packages/zoltan/src/Utilities/DDirectory/DD_Stats.c index 9ec73bcbc1a6..30374a30dd29 100644 --- a/packages/zoltan/src/Utilities/DDirectory/DD_Stats.c +++ b/packages/zoltan/src/Utilities/DDirectory/DD_Stats.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/DDirectory/DD_Update.c b/packages/zoltan/src/Utilities/DDirectory/DD_Update.c index dc9d17324ba8..19f31fa110e9 100644 --- a/packages/zoltan/src/Utilities/DDirectory/DD_Update.c +++ b/packages/zoltan/src/Utilities/DDirectory/DD_Update.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/Utilities/DDirectory/README b/packages/zoltan/src/Utilities/DDirectory/README index 1bdd561a1b57..659daaca708f 100644 --- a/packages/zoltan/src/Utilities/DDirectory/README +++ b/packages/zoltan/src/Utilities/DDirectory/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER ZOLTAN Distributed Directory (DD) Software Documentation diff --git a/packages/zoltan/src/Utilities/DDirectory/zoltan_dd_const.h b/packages/zoltan/src/Utilities/DDirectory/zoltan_dd_const.h index 521040ff3e13..40bf8758afd6 100644 --- a/packages/zoltan/src/Utilities/DDirectory/zoltan_dd_const.h +++ b/packages/zoltan/src/Utilities/DDirectory/zoltan_dd_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT(INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef ZOLTAN_DD_H #define ZOLTAN_DD_H diff --git a/packages/zoltan/src/Utilities/Memory/README b/packages/zoltan/src/Utilities/Memory/README index 24d1e18a5c89..211efb8b719b 100644 --- a/packages/zoltan/src/Utilities/Memory/README +++ b/packages/zoltan/src/Utilities/Memory/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER MEMORY UTILITIES -- Routines for memory allocation and deallocation. ---------------------------------------------------------------- diff --git a/packages/zoltan/src/Utilities/Memory/mem.c b/packages/zoltan/src/Utilities/Memory/mem.c index c1f5aa50faf7..ec8c9cd9e7f1 100644 --- a/packages/zoltan/src/Utilities/Memory/mem.c +++ b/packages/zoltan/src/Utilities/Memory/mem.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/Utilities/README b/packages/zoltan/src/Utilities/README index f721007f11cb..51bad6485acd 100644 --- a/packages/zoltan/src/Utilities/README +++ b/packages/zoltan/src/Utilities/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER Zoltan Utilities diff --git a/packages/zoltan/src/Utilities/Timer/README b/packages/zoltan/src/Utilities/Timer/README index 9a2c5c410dc8..e624cf1d6f4d 100644 --- a/packages/zoltan/src/Utilities/Timer/README +++ b/packages/zoltan/src/Utilities/Timer/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER TIMER UTILITY -- Routines to implement Zoltan_Time and Zoltan_Timer objects. ---------------------------------------------------------------------------- diff --git a/packages/zoltan/src/Utilities/Timer/timer.c b/packages/zoltan/src/Utilities/Timer/timer.c index 4903d1ef5dc5..54ff01cab1af 100644 --- a/packages/zoltan/src/Utilities/Timer/timer.c +++ b/packages/zoltan/src/Utilities/Timer/timer.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/Utilities/Timer/timer.h b/packages/zoltan/src/Utilities/Timer/timer.h index 2048cab7cc37..d7904c64c02b 100644 --- a/packages/zoltan/src/Utilities/Timer/timer.h +++ b/packages/zoltan/src/Utilities/Timer/timer.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __TIMER_H #define __TIMER_H diff --git a/packages/zoltan/src/Utilities/Timer/zoltan_timer.c b/packages/zoltan/src/Utilities/Timer/zoltan_timer.c index c8605d625674..88188d41c8c5 100644 --- a/packages/zoltan/src/Utilities/Timer/zoltan_timer.c +++ b/packages/zoltan/src/Utilities/Timer/zoltan_timer.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "zoltan_timer.h" diff --git a/packages/zoltan/src/Utilities/shared/zoltan_align.c b/packages/zoltan/src/Utilities/shared/zoltan_align.c index 87b31ff5022f..187428f70d5f 100644 --- a/packages/zoltan/src/Utilities/shared/zoltan_align.c +++ b/packages/zoltan/src/Utilities/shared/zoltan_align.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/Utilities/shared/zoltan_id.c b/packages/zoltan/src/Utilities/shared/zoltan_id.c index 2ad778ea3a79..d039d2f5789e 100644 --- a/packages/zoltan/src/Utilities/shared/zoltan_id.c +++ b/packages/zoltan/src/Utilities/shared/zoltan_id.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/Utilities/shared/zoltan_id.h b/packages/zoltan/src/Utilities/shared/zoltan_id.h index ab6220bea5e1..b6b119c31277 100644 --- a/packages/zoltan/src/Utilities/shared/zoltan_id.h +++ b/packages/zoltan/src/Utilities/shared/zoltan_id.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_ID_H #define __ZOLTAN_ID_H diff --git a/packages/zoltan/src/Utilities/shared/zoltan_util.h b/packages/zoltan/src/Utilities/shared/zoltan_util.h index 2a7518f96afc..25c2fc4fc14e 100644 --- a/packages/zoltan/src/Utilities/shared/zoltan_util.h +++ b/packages/zoltan/src/Utilities/shared/zoltan_util.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_UTIL_H #define __ZOLTAN_UTIL_H diff --git a/packages/zoltan/src/all/README b/packages/zoltan/src/all/README index 002bc75fe667..228b516cc810 100644 --- a/packages/zoltan/src/all/README +++ b/packages/zoltan/src/all/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER ALL DIRECTORY -- Routines for memory allocation and deallocation. ---------------------------------------------------------------- diff --git a/packages/zoltan/src/all/all_allo.c b/packages/zoltan/src/all/all_allo.c index 0b4ca589b34f..01bbb54d85dd 100644 --- a/packages/zoltan/src/all/all_allo.c +++ b/packages/zoltan/src/all/all_allo.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/all/all_allo_const.h b/packages/zoltan/src/all/all_allo_const.h index 7272b1a20adb..e8f0707f80fc 100644 --- a/packages/zoltan/src/all/all_allo_const.h +++ b/packages/zoltan/src/all/all_allo_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ALL_ALLO_H diff --git a/packages/zoltan/src/ch/README b/packages/zoltan/src/ch/README index 67ce535594b7..5548c251852b 100644 --- a/packages/zoltan/src/ch/README +++ b/packages/zoltan/src/ch/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER CH DIRECTORY -- code for reading Chaco input files. --------------------------------------------------- diff --git a/packages/zoltan/src/ch/ch_dist_graph.c b/packages/zoltan/src/ch/ch_dist_graph.c index 19ac90d6f42f..00e869d0e9ca 100644 --- a/packages/zoltan/src/ch/ch_dist_graph.c +++ b/packages/zoltan/src/ch/ch_dist_graph.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/ch/ch_init_dist.c b/packages/zoltan/src/ch/ch_init_dist.c index a908e237bd0d..680703ba4eb0 100644 --- a/packages/zoltan/src/ch/ch_init_dist.c +++ b/packages/zoltan/src/ch/ch_init_dist.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/ch/ch_init_dist_const.h b/packages/zoltan/src/ch/ch_init_dist_const.h index 067fc11bdc59..1cdee675e7ed 100644 --- a/packages/zoltan/src/ch/ch_init_dist_const.h +++ b/packages/zoltan/src/ch/ch_init_dist_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef CH_INIT_DIST_CONST_H diff --git a/packages/zoltan/src/ch/ch_input_assign.c b/packages/zoltan/src/ch/ch_input_assign.c index 2e2210d4a024..76184aaa3b1f 100644 --- a/packages/zoltan/src/ch/ch_input_assign.c +++ b/packages/zoltan/src/ch/ch_input_assign.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* This software was developed by Bruce Hendrickson and Robert Leland * * at Sandia National Laboratories under US Department of Energy * diff --git a/packages/zoltan/src/ch/ch_input_const.h b/packages/zoltan/src/ch/ch_input_const.h index 253a4e1a983e..df9f1809beac 100644 --- a/packages/zoltan/src/ch/ch_input_const.h +++ b/packages/zoltan/src/ch/ch_input_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __CH_INPUT_CONST_H diff --git a/packages/zoltan/src/ch/ch_input_geom.c b/packages/zoltan/src/ch/ch_input_geom.c index 0a850f437218..b849981cf728 100644 --- a/packages/zoltan/src/ch/ch_input_geom.c +++ b/packages/zoltan/src/ch/ch_input_geom.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER diff --git a/packages/zoltan/src/ch/ch_input_graph.c b/packages/zoltan/src/ch/ch_input_graph.c index f5b047a34c76..43d66e803e32 100644 --- a/packages/zoltan/src/ch/ch_input_graph.c +++ b/packages/zoltan/src/ch/ch_input_graph.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER diff --git a/packages/zoltan/src/ch/ch_input_read.c b/packages/zoltan/src/ch/ch_input_read.c index 049024be18fa..0a473ff73189 100644 --- a/packages/zoltan/src/ch/ch_input_read.c +++ b/packages/zoltan/src/ch/ch_input_read.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* This software was developed by Bruce Hendrickson and Robert Leland * diff --git a/packages/zoltan/src/coloring/bucket.c b/packages/zoltan/src/coloring/bucket.c index bc403f666bde..e1ff4f6a9727 100644 --- a/packages/zoltan/src/coloring/bucket.c +++ b/packages/zoltan/src/coloring/bucket.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ extern "C" { diff --git a/packages/zoltan/src/coloring/bucket.h b/packages/zoltan/src/coloring/bucket.h index a9aecc10cd50..758862b10295 100644 --- a/packages/zoltan/src/coloring/bucket.h +++ b/packages/zoltan/src/coloring/bucket.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __BUCKET__H #define __BUCKET__H diff --git a/packages/zoltan/src/coloring/color_test.c b/packages/zoltan/src/coloring/color_test.c index 465a6a97ed6d..515f8a3f37c0 100644 --- a/packages/zoltan/src/coloring/color_test.c +++ b/packages/zoltan/src/coloring/color_test.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/coloring/coloring.c b/packages/zoltan/src/coloring/coloring.c index edef3fecf251..45ee13c3eaee 100644 --- a/packages/zoltan/src/coloring/coloring.c +++ b/packages/zoltan/src/coloring/coloring.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ extern "C" { diff --git a/packages/zoltan/src/coloring/coloring.h b/packages/zoltan/src/coloring/coloring.h index b31db02cee3b..3b04680a26a1 100644 --- a/packages/zoltan/src/coloring/coloring.h +++ b/packages/zoltan/src/coloring/coloring.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __COLORING_H #define __COLORING_H diff --git a/packages/zoltan/src/coloring/coloring_const.h b/packages/zoltan/src/coloring/coloring_const.h index 473fd12fbe05..ef518f125c4f 100644 --- a/packages/zoltan/src/coloring/coloring_const.h +++ b/packages/zoltan/src/coloring/coloring_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __COLORING_CONST_H diff --git a/packages/zoltan/src/coloring/g2l_hash.c b/packages/zoltan/src/coloring/g2l_hash.c index 9a0f8b51d95b..a4cb7f27fb1d 100644 --- a/packages/zoltan/src/coloring/g2l_hash.c +++ b/packages/zoltan/src/coloring/g2l_hash.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ extern "C" { diff --git a/packages/zoltan/src/coloring/g2l_hash.h b/packages/zoltan/src/coloring/g2l_hash.h index 1787aec5cc2a..70253df3cce4 100644 --- a/packages/zoltan/src/coloring/g2l_hash.h +++ b/packages/zoltan/src/coloring/g2l_hash.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _G2L_HASH_H_ #define _G2L_HASH_H_ diff --git a/packages/zoltan/src/driver/Makefile.am b/packages/zoltan/src/driver/Makefile.am index 57d5cdfb4f2b..fb6316ca0e14 100644 --- a/packages/zoltan/src/driver/Makefile.am +++ b/packages/zoltan/src/driver/Makefile.am @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER include $(top_builddir)/Makefile.export.zoltan diff --git a/packages/zoltan/src/driver/README b/packages/zoltan/src/driver/README index 72ba401d6335..d7422e5d5677 100644 --- a/packages/zoltan/src/driver/README +++ b/packages/zoltan/src/driver/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER DRIVER DIRECTORY -- Zoltan test driver application source code. ------------------------------------------------------------------------ diff --git a/packages/zoltan/src/driver/dr_chaco_io.c b/packages/zoltan/src/driver/dr_chaco_io.c index 2872a9d9ffa8..dba8713667a8 100644 --- a/packages/zoltan/src/driver/dr_chaco_io.c +++ b/packages/zoltan/src/driver/dr_chaco_io.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/src/driver/dr_chaco_io.c.shockstem b/packages/zoltan/src/driver/dr_chaco_io.c.shockstem index 570b14bbd295..c7792dbdbd84 100644 --- a/packages/zoltan/src/driver/dr_chaco_io.c.shockstem +++ b/packages/zoltan/src/driver/dr_chaco_io.c.shockstem @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/src/driver/dr_compress.c b/packages/zoltan/src/driver/dr_compress.c index 6694fd58219c..f5fc9464bb54 100644 --- a/packages/zoltan/src/driver/dr_compress.c +++ b/packages/zoltan/src/driver/dr_compress.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/driver/dr_compress_const.h b/packages/zoltan/src/driver/dr_compress_const.h index 6a90f37bf0d5..91d8b42c71d7 100644 --- a/packages/zoltan/src/driver/dr_compress_const.h +++ b/packages/zoltan/src/driver/dr_compress_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _DR_COMPRESS_CONST_H #define _DR_COMPRESS_CONST_H diff --git a/packages/zoltan/src/driver/dr_const.h b/packages/zoltan/src/driver/dr_const.h index 66d8e514efe6..258c6620da33 100644 --- a/packages/zoltan/src/driver/dr_const.h +++ b/packages/zoltan/src/driver/dr_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _DR_CONST_H #define _DR_CONST_H diff --git a/packages/zoltan/src/driver/dr_dd.c b/packages/zoltan/src/driver/dr_dd.c index 3353634e09e0..119a4f5272ab 100644 --- a/packages/zoltan/src/driver/dr_dd.c +++ b/packages/zoltan/src/driver/dr_dd.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "dr_const.h" diff --git a/packages/zoltan/src/driver/dr_dd.h b/packages/zoltan/src/driver/dr_dd.h index c89756a5afdf..8a1d63f531e7 100644 --- a/packages/zoltan/src/driver/dr_dd.h +++ b/packages/zoltan/src/driver/dr_dd.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __DR_DD_H #define __DR_DD_H diff --git a/packages/zoltan/src/driver/dr_ddCPP.cpp b/packages/zoltan/src/driver/dr_ddCPP.cpp index 918343d637c6..0dd3b3b2d06a 100644 --- a/packages/zoltan/src/driver/dr_ddCPP.cpp +++ b/packages/zoltan/src/driver/dr_ddCPP.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "dr_const.h" #include "dr_util_const.h" diff --git a/packages/zoltan/src/driver/dr_elem.c b/packages/zoltan/src/driver/dr_elem.c index 571878465758..3ee0b7adee39 100644 --- a/packages/zoltan/src/driver/dr_elem.c +++ b/packages/zoltan/src/driver/dr_elem.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER diff --git a/packages/zoltan/src/driver/dr_elem_const.h b/packages/zoltan/src/driver/dr_elem_const.h index 7d215b3360a8..f5c6072d3147 100644 --- a/packages/zoltan/src/driver/dr_elem_const.h +++ b/packages/zoltan/src/driver/dr_elem_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _DR_ELM_CONST_H diff --git a/packages/zoltan/src/driver/dr_elem_util.c b/packages/zoltan/src/driver/dr_elem_util.c index 945932c782c2..f04737113a05 100644 --- a/packages/zoltan/src/driver/dr_elem_util.c +++ b/packages/zoltan/src/driver/dr_elem_util.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "dr_const.h" diff --git a/packages/zoltan/src/driver/dr_elem_util_const.h b/packages/zoltan/src/driver/dr_elem_util_const.h index ea5d9734811c..96be2d589b89 100644 --- a/packages/zoltan/src/driver/dr_elem_util_const.h +++ b/packages/zoltan/src/driver/dr_elem_util_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _DR_ELEM_UTIL_CONST_H_ diff --git a/packages/zoltan/src/driver/dr_err.c b/packages/zoltan/src/driver/dr_err.c index dd1dc881b7e4..147942e3cbdb 100644 --- a/packages/zoltan/src/driver/dr_err.c +++ b/packages/zoltan/src/driver/dr_err.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/driver/dr_err_const.h b/packages/zoltan/src/driver/dr_err_const.h index eb7c433b7bac..3fcdd7c805ee 100644 --- a/packages/zoltan/src/driver/dr_err_const.h +++ b/packages/zoltan/src/driver/dr_err_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _DR_ERR_CONST_H_ diff --git a/packages/zoltan/src/driver/dr_eval.c b/packages/zoltan/src/driver/dr_eval.c index 5dc6b93fc7e1..08e675b9c7eb 100644 --- a/packages/zoltan/src/driver/dr_eval.c +++ b/packages/zoltan/src/driver/dr_eval.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "dr_const.h" #include "dr_eval_const.h" diff --git a/packages/zoltan/src/driver/dr_eval_const.h b/packages/zoltan/src/driver/dr_eval_const.h index 2f3771bbd0ee..7a722e3c40a9 100644 --- a/packages/zoltan/src/driver/dr_eval_const.h +++ b/packages/zoltan/src/driver/dr_eval_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _DR_EVAL_H_ diff --git a/packages/zoltan/src/driver/dr_exoII_io.c b/packages/zoltan/src/driver/dr_exoII_io.c index f57cb46d891f..4c84e7d99e56 100644 --- a/packages/zoltan/src/driver/dr_exoII_io.c +++ b/packages/zoltan/src/driver/dr_exoII_io.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/src/driver/dr_exoII_ioCPP.cpp b/packages/zoltan/src/driver/dr_exoII_ioCPP.cpp index 92fd1c04b04b..d8ffbddc450b 100644 --- a/packages/zoltan/src/driver/dr_exoII_ioCPP.cpp +++ b/packages/zoltan/src/driver/dr_exoII_ioCPP.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include // must appear before stdio or iostream diff --git a/packages/zoltan/src/driver/dr_externs.h b/packages/zoltan/src/driver/dr_externs.h index ee6c624a7cd2..68fdc653b2b0 100644 --- a/packages/zoltan/src/driver/dr_externs.h +++ b/packages/zoltan/src/driver/dr_externs.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _DR_EXTERNS_H #define _DR_EXTERNS_H diff --git a/packages/zoltan/src/driver/dr_gnuplot.c b/packages/zoltan/src/driver/dr_gnuplot.c index 435138ca3fc4..9cce4f20ca05 100644 --- a/packages/zoltan/src/driver/dr_gnuplot.c +++ b/packages/zoltan/src/driver/dr_gnuplot.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "dr_const.h" #include "dr_externs.h" diff --git a/packages/zoltan/src/driver/dr_hg_io.c b/packages/zoltan/src/driver/dr_hg_io.c index 8d41dd9d93ad..0add0bce3f67 100644 --- a/packages/zoltan/src/driver/dr_hg_io.c +++ b/packages/zoltan/src/driver/dr_hg_io.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/driver/dr_hg_readfile.c b/packages/zoltan/src/driver/dr_hg_readfile.c index af1a5226b626..cab9fec0cd7d 100644 --- a/packages/zoltan/src/driver/dr_hg_readfile.c +++ b/packages/zoltan/src/driver/dr_hg_readfile.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/src/driver/dr_hg_readfile.h b/packages/zoltan/src/driver/dr_hg_readfile.h index 079b51699584..c3cd8baf9048 100644 --- a/packages/zoltan/src/driver/dr_hg_readfile.h +++ b/packages/zoltan/src/driver/dr_hg_readfile.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _ZOLTAN_HG_READFILE_CONST_H_ diff --git a/packages/zoltan/src/driver/dr_input.c b/packages/zoltan/src/driver/dr_input.c index 1e5e206c55ed..c9d66e3a1e42 100644 --- a/packages/zoltan/src/driver/dr_input.c +++ b/packages/zoltan/src/driver/dr_input.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/src/driver/dr_input.c.shockstem b/packages/zoltan/src/driver/dr_input.c.shockstem index efcc6e427ba6..67cf86c5980c 100644 --- a/packages/zoltan/src/driver/dr_input.c.shockstem +++ b/packages/zoltan/src/driver/dr_input.c.shockstem @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/src/driver/dr_input_const.h b/packages/zoltan/src/driver/dr_input_const.h index 3f2b4ee48b84..fee1ac4731f9 100644 --- a/packages/zoltan/src/driver/dr_input_const.h +++ b/packages/zoltan/src/driver/dr_input_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _DR_INPUT_CONST_H_ diff --git a/packages/zoltan/src/driver/dr_input_const.h.shockstem b/packages/zoltan/src/driver/dr_input_const.h.shockstem index 04375a440e58..8f37d9b3a405 100644 --- a/packages/zoltan/src/driver/dr_input_const.h.shockstem +++ b/packages/zoltan/src/driver/dr_input_const.h.shockstem @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _DR_INPUT_CONST_H_ diff --git a/packages/zoltan/src/driver/dr_loadbal.c b/packages/zoltan/src/driver/dr_loadbal.c index c24911016e41..1fe676e4f90f 100644 --- a/packages/zoltan/src/driver/dr_loadbal.c +++ b/packages/zoltan/src/driver/dr_loadbal.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #ifdef TIMER_CALLBACKS diff --git a/packages/zoltan/src/driver/dr_loadbalCPP.cpp b/packages/zoltan/src/driver/dr_loadbalCPP.cpp index 87f67061aff4..84bbfd23611e 100644 --- a/packages/zoltan/src/driver/dr_loadbalCPP.cpp +++ b/packages/zoltan/src/driver/dr_loadbalCPP.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef TIMER_CALLBACKS /* Code that times how much time is spent in the callback functions. diff --git a/packages/zoltan/src/driver/dr_loadbal_const.h b/packages/zoltan/src/driver/dr_loadbal_const.h index 4924c7c66eca..56685cf84c81 100644 --- a/packages/zoltan/src/driver/dr_loadbal_const.h +++ b/packages/zoltan/src/driver/dr_loadbal_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _DR_LOADBAL_CONST_H_ diff --git a/packages/zoltan/src/driver/dr_main.c b/packages/zoltan/src/driver/dr_main.c index 8670ecbb9476..2ebdd42b3705 100644 --- a/packages/zoltan/src/driver/dr_main.c +++ b/packages/zoltan/src/driver/dr_main.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include /*--------------------------------------------------------------------------*/ diff --git a/packages/zoltan/src/driver/dr_main.c.shockstem b/packages/zoltan/src/driver/dr_main.c.shockstem index 5d656257a3c0..83134ce60004 100644 --- a/packages/zoltan/src/driver/dr_main.c.shockstem +++ b/packages/zoltan/src/driver/dr_main.c.shockstem @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include /*--------------------------------------------------------------------------*/ diff --git a/packages/zoltan/src/driver/dr_mainCPP.cpp b/packages/zoltan/src/driver/dr_mainCPP.cpp index e50641e460df..ccab70e228f6 100644 --- a/packages/zoltan/src/driver/dr_mainCPP.cpp +++ b/packages/zoltan/src/driver/dr_mainCPP.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include // must appear before stdio or iostream diff --git a/packages/zoltan/src/driver/dr_maps.c b/packages/zoltan/src/driver/dr_maps.c index 83d98449f47e..211f7300a67c 100644 --- a/packages/zoltan/src/driver/dr_maps.c +++ b/packages/zoltan/src/driver/dr_maps.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "dr_const.h" #include "dr_externs.h" #include "dr_maps_const.h" diff --git a/packages/zoltan/src/driver/dr_mapsCPP.cpp b/packages/zoltan/src/driver/dr_mapsCPP.cpp index 60c8c7d4299c..3404178019bc 100644 --- a/packages/zoltan/src/driver/dr_mapsCPP.cpp +++ b/packages/zoltan/src/driver/dr_mapsCPP.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "dr_const.h" #include "dr_externs.h" diff --git a/packages/zoltan/src/driver/dr_maps_const.h b/packages/zoltan/src/driver/dr_maps_const.h index d8000abd8750..7c8d04e4c1aa 100644 --- a/packages/zoltan/src/driver/dr_maps_const.h +++ b/packages/zoltan/src/driver/dr_maps_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _DR_MAPS_CONST_H diff --git a/packages/zoltan/src/driver/dr_migrate.c b/packages/zoltan/src/driver/dr_migrate.c index eb3a53806a8c..f065d4f969eb 100644 --- a/packages/zoltan/src/driver/dr_migrate.c +++ b/packages/zoltan/src/driver/dr_migrate.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /*--------------------------------------------------------------------------*/ /* Purpose: Call Zoltan to migrate elements. */ diff --git a/packages/zoltan/src/driver/dr_migrate.c.shockstem b/packages/zoltan/src/driver/dr_migrate.c.shockstem index 4429a2531f9e..813319c7de9b 100644 --- a/packages/zoltan/src/driver/dr_migrate.c.shockstem +++ b/packages/zoltan/src/driver/dr_migrate.c.shockstem @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /****************************************************************************/ /* ALTERNATE FILE FOR SHOCKSTEM ADAPTIVE MESH REFINEMENT EXPERIMENTS ONLY. */ diff --git a/packages/zoltan/src/driver/dr_migrateCPP.cpp b/packages/zoltan/src/driver/dr_migrateCPP.cpp index 3d2fe01bd48f..729408267e5c 100644 --- a/packages/zoltan/src/driver/dr_migrateCPP.cpp +++ b/packages/zoltan/src/driver/dr_migrateCPP.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include // must appear before stdio or iostream diff --git a/packages/zoltan/src/driver/dr_mm_readfile.c b/packages/zoltan/src/driver/dr_mm_readfile.c index 787a8bda2ca6..f416f2e966f2 100644 --- a/packages/zoltan/src/driver/dr_mm_readfile.c +++ b/packages/zoltan/src/driver/dr_mm_readfile.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include #include "zoltan.h" diff --git a/packages/zoltan/src/driver/dr_mmio.c b/packages/zoltan/src/driver/dr_mmio.c index c78dd4344f51..8091222b0974 100644 --- a/packages/zoltan/src/driver/dr_mmio.c +++ b/packages/zoltan/src/driver/dr_mmio.c @@ -1,3 +1,11 @@ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* * Matrix Market I/O library for ANSI C * diff --git a/packages/zoltan/src/driver/dr_mmio.h b/packages/zoltan/src/driver/dr_mmio.h index db529d114ffd..54e2d66be6c9 100644 --- a/packages/zoltan/src/driver/dr_mmio.h +++ b/packages/zoltan/src/driver/dr_mmio.h @@ -1,3 +1,11 @@ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* * Matrix Market I/O library for ANSI C * diff --git a/packages/zoltan/src/driver/dr_output.c b/packages/zoltan/src/driver/dr_output.c index c63b9156b4c8..16b4a9a2cf64 100644 --- a/packages/zoltan/src/driver/dr_output.c +++ b/packages/zoltan/src/driver/dr_output.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "dr_const.h" #include "dr_externs.h" diff --git a/packages/zoltan/src/driver/dr_output_const.h b/packages/zoltan/src/driver/dr_output_const.h index 82d6f9401b21..12941b0bd374 100644 --- a/packages/zoltan/src/driver/dr_output_const.h +++ b/packages/zoltan/src/driver/dr_output_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _DR_OUTPUT_CONST_H_ diff --git a/packages/zoltan/src/driver/dr_par_util.c b/packages/zoltan/src/driver/dr_par_util.c index a60564f22774..62298d40f0c7 100644 --- a/packages/zoltan/src/driver/dr_par_util.c +++ b/packages/zoltan/src/driver/dr_par_util.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/src/driver/dr_par_util_const.h b/packages/zoltan/src/driver/dr_par_util_const.h index 64ca181574d2..84c3ed4a4b53 100644 --- a/packages/zoltan/src/driver/dr_par_util_const.h +++ b/packages/zoltan/src/driver/dr_par_util_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _DR_PAR_UTIL_CONST_H_ diff --git a/packages/zoltan/src/driver/dr_param_file.c b/packages/zoltan/src/driver/dr_param_file.c index 42dee8b41e26..0740e62f1167 100644 --- a/packages/zoltan/src/driver/dr_param_file.c +++ b/packages/zoltan/src/driver/dr_param_file.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* Code imported to Zoltan zdrive from diff --git a/packages/zoltan/src/driver/dr_param_file.h b/packages/zoltan/src/driver/dr_param_file.h index d80715e75b7c..5e51eee3289c 100644 --- a/packages/zoltan/src/driver/dr_param_file.h +++ b/packages/zoltan/src/driver/dr_param_file.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* Code imported to Zoltan zdrive from diff --git a/packages/zoltan/src/driver/dr_param_fileCPP.cpp b/packages/zoltan/src/driver/dr_param_fileCPP.cpp index 15f97ffe744b..e405ce4eb086 100644 --- a/packages/zoltan/src/driver/dr_param_fileCPP.cpp +++ b/packages/zoltan/src/driver/dr_param_fileCPP.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* Code imported to Zoltan zdrive from diff --git a/packages/zoltan/src/driver/dr_param_fileCPP.h b/packages/zoltan/src/driver/dr_param_fileCPP.h index 34e79faf40e4..879750ef57f1 100644 --- a/packages/zoltan/src/driver/dr_param_fileCPP.h +++ b/packages/zoltan/src/driver/dr_param_fileCPP.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* Code imported to Zoltan zdrive from diff --git a/packages/zoltan/src/driver/dr_random_io.c b/packages/zoltan/src/driver/dr_random_io.c index eab01154b789..7b616a5da249 100644 --- a/packages/zoltan/src/driver/dr_random_io.c +++ b/packages/zoltan/src/driver/dr_random_io.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/src/driver/dr_setfixed.c b/packages/zoltan/src/driver/dr_setfixed.c index db8395475423..d0d6cfeba3a3 100644 --- a/packages/zoltan/src/driver/dr_setfixed.c +++ b/packages/zoltan/src/driver/dr_setfixed.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/src/driver/dr_util.c b/packages/zoltan/src/driver/dr_util.c index 40b3621ddd30..fc12dd51cd6b 100644 --- a/packages/zoltan/src/driver/dr_util.c +++ b/packages/zoltan/src/driver/dr_util.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include #include diff --git a/packages/zoltan/src/driver/dr_util_const.h b/packages/zoltan/src/driver/dr_util_const.h index 504f37fea560..c67b8a56d04f 100644 --- a/packages/zoltan/src/driver/dr_util_const.h +++ b/packages/zoltan/src/driver/dr_util_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _DR_UTIL_CONST_H_ diff --git a/packages/zoltan/src/driver/order_test.c b/packages/zoltan/src/driver/order_test.c index aeed5f3deb8b..659c7f278d6b 100644 --- a/packages/zoltan/src/driver/order_test.c +++ b/packages/zoltan/src/driver/order_test.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ extern "C" { diff --git a/packages/zoltan/src/fdriver/Makefile.am b/packages/zoltan/src/fdriver/Makefile.am index 3c6200d4e4cc..49f6ed8ecff1 100644 --- a/packages/zoltan/src/fdriver/Makefile.am +++ b/packages/zoltan/src/fdriver/Makefile.am @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER include $(top_builddir)/Makefile.export.zoltan diff --git a/packages/zoltan/src/fdriver/README.mpich b/packages/zoltan/src/fdriver/README.mpich index 19b5851f7898..2828090cc81b 100644 --- a/packages/zoltan/src/fdriver/README.mpich +++ b/packages/zoltan/src/fdriver/README.mpich @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER I had the darndest time getting MPICH to work on Linux because they use command line arguments, which is not standard in Fortran. I do not yet know how much this effects other operating systems with one f90. Under the default build of diff --git a/packages/zoltan/src/fdriver/farg_nagf95.f b/packages/zoltan/src/fdriver/farg_nagf95.f index 9e6585a7c8f2..3b418a114a67 100644 --- a/packages/zoltan/src/fdriver/farg_nagf95.f +++ b/packages/zoltan/src/fdriver/farg_nagf95.f @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! ! Command line argument functions for NAGWare f95 4.0 diff --git a/packages/zoltan/src/fdriver/farg_typical.f b/packages/zoltan/src/fdriver/farg_typical.f index e3e169f3d5a2..51c7e973df86 100644 --- a/packages/zoltan/src/fdriver/farg_typical.f +++ b/packages/zoltan/src/fdriver/farg_typical.f @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! ! Command line argument functions for typical iargc, getarg implementations diff --git a/packages/zoltan/src/fdriver/fdr_chaco_io.f90 b/packages/zoltan/src/fdriver/fdr_chaco_io.f90 index 8b92ef5b5704..ac45fe81f0a0 100644 --- a/packages/zoltan/src/fdriver/fdr_chaco_io.f90 +++ b/packages/zoltan/src/fdriver/fdr_chaco_io.f90 @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! module dr_chaco_io use zoltan diff --git a/packages/zoltan/src/fdriver/fdr_const.f90 b/packages/zoltan/src/fdriver/fdr_const.f90 index b259610841b9..4552e8087dee 100644 --- a/packages/zoltan/src/fdriver/fdr_const.f90 +++ b/packages/zoltan/src/fdriver/fdr_const.f90 @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! !-------------------------------------------------------------------- ! File dr_const.h translated to Fortran by William F. Mitchell ! diff --git a/packages/zoltan/src/fdriver/fdr_input.f90 b/packages/zoltan/src/fdriver/fdr_input.f90 index 87091c6ad8a0..2f977cc48658 100644 --- a/packages/zoltan/src/fdriver/fdr_input.f90 +++ b/packages/zoltan/src/fdriver/fdr_input.f90 @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! module dr_input use zoltan use dr_const diff --git a/packages/zoltan/src/fdriver/fdr_loadbal.f90 b/packages/zoltan/src/fdriver/fdr_loadbal.f90 index 4e9dc4810dc1..9f37e77e6efe 100644 --- a/packages/zoltan/src/fdriver/fdr_loadbal.f90 +++ b/packages/zoltan/src/fdriver/fdr_loadbal.f90 @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! module dr_loadbal use zoltan use zoltan_user_data diff --git a/packages/zoltan/src/fdriver/fdr_main.f90 b/packages/zoltan/src/fdriver/fdr_main.f90 index 2e2a3fa65389..70be65a453c3 100644 --- a/packages/zoltan/src/fdriver/fdr_main.f90 +++ b/packages/zoltan/src/fdriver/fdr_main.f90 @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! !-------------------------------------------------------------------------- ! Purpose: Driver for dynamic load-balance library, ZOLTAN. diff --git a/packages/zoltan/src/fdriver/fdr_migrate.f90 b/packages/zoltan/src/fdriver/fdr_migrate.f90 index f85c329fdca3..f1ac82f7b19c 100644 --- a/packages/zoltan/src/fdriver/fdr_migrate.f90 +++ b/packages/zoltan/src/fdriver/fdr_migrate.f90 @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! module dr_migrate diff --git a/packages/zoltan/src/fdriver/fdr_mm_io.f90 b/packages/zoltan/src/fdriver/fdr_mm_io.f90 index 9a30fb0e6d39..e7234c0d0b94 100644 --- a/packages/zoltan/src/fdriver/fdr_mm_io.f90 +++ b/packages/zoltan/src/fdriver/fdr_mm_io.f90 @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! module dr_mm_io use zoltan diff --git a/packages/zoltan/src/fdriver/fdr_param_file.f90 b/packages/zoltan/src/fdriver/fdr_param_file.f90 index 180776f22136..57aa97592be0 100644 --- a/packages/zoltan/src/fdriver/fdr_param_file.f90 +++ b/packages/zoltan/src/fdriver/fdr_param_file.f90 @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! ! *************************************************************************** ! ! Code imported to Zoltan zdrive from diff --git a/packages/zoltan/src/fdriver/fdr_sort.f90 b/packages/zoltan/src/fdriver/fdr_sort.f90 index b4a76530a182..52dd02f02667 100644 --- a/packages/zoltan/src/fdriver/fdr_sort.f90 +++ b/packages/zoltan/src/fdriver/fdr_sort.f90 @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! !-------------------------------------------------------------------------- ! Purpose: Driver for dynamic load-balance library, ZOLTAN. diff --git a/packages/zoltan/src/fdriver/makefile b/packages/zoltan/src/fdriver/makefile index 68b2f0905dde..bd24be13d4e9 100644 --- a/packages/zoltan/src/fdriver/makefile +++ b/packages/zoltan/src/fdriver/makefile @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER ZOD = ../$(ZOLTAN_OBJ_DIR) ZOM = $(F90_MODULE_PREFIX)$(ZOD) diff --git a/packages/zoltan/src/fdriver/mmio.f b/packages/zoltan/src/fdriver/mmio.f index 7f29bf5be0ac..339f834690e1 100644 --- a/packages/zoltan/src/fdriver/mmio.f +++ b/packages/zoltan/src/fdriver/mmio.f @@ -1,3 +1,11 @@ +c @HEADER +c ***************************************************************************** +c Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +c +c Copyright 2012 NTESS and the Zoltan contributors. +c SPDX-License-Identifier: BSD-3-Clause +c ***************************************************************************** +c @HEADER subroutine mmread(iunit,rep,field,symm,rows,cols,nnz,nnzmax, * indx,jndx,ival,rval,cval) ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc diff --git a/packages/zoltan/src/fdriver/mpi_h.f b/packages/zoltan/src/fdriver/mpi_h.f index bbd78ec6c31f..71556826ca15 100644 --- a/packages/zoltan/src/fdriver/mpi_h.f +++ b/packages/zoltan/src/fdriver/mpi_h.f @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! !-------------------------------------------------------------------------- ! Purpose: Provide the MPI include file as a module. diff --git a/packages/zoltan/src/fdriver/zoltan_user_data.f90.old b/packages/zoltan/src/fdriver/zoltan_user_data.f90.old index 6557521726fb..d3a80467c07a 100644 --- a/packages/zoltan/src/fdriver/zoltan_user_data.f90.old +++ b/packages/zoltan/src/fdriver/zoltan_user_data.f90.old @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! module zoltan_user_data use zoltan_types implicit none diff --git a/packages/zoltan/src/fort/README b/packages/zoltan/src/fort/README index 319036d7136f..c6b6cc44960a 100644 --- a/packages/zoltan/src/fort/README +++ b/packages/zoltan/src/fort/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER FORT DIRECTORY -- F90 interface. diff --git a/packages/zoltan/src/fort/cwrap.c b/packages/zoltan/src/fort/cwrap.c index 771925e366cd..3c09a3800e52 100644 --- a/packages/zoltan/src/fort/cwrap.c +++ b/packages/zoltan/src/fort/cwrap.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /***************************************************************************** * KDD: 2/7/11 * KDD: This interface uses pointer aliasing to translate C pointers to diff --git a/packages/zoltan/src/fort/cwrap_fmangle.h b/packages/zoltan/src/fort/cwrap_fmangle.h index fdc23f8d3cb7..9ae56edd3bb5 100644 --- a/packages/zoltan/src/fort/cwrap_fmangle.h +++ b/packages/zoltan/src/fort/cwrap_fmangle.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef CWRAPFMANGLEH_ #define CWRAPFMANGLEH_ diff --git a/packages/zoltan/src/fort/fwrap.f90 b/packages/zoltan/src/fort/fwrap.f90 index 964f8fe197c3..327a8ceb4845 100644 --- a/packages/zoltan/src/fort/fwrap.f90 +++ b/packages/zoltan/src/fort/fwrap.f90 @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! !-------------------------------------------------------------------------- ! preprocessor directives to handle special case compilers diff --git a/packages/zoltan/src/fort/zoltan_user_data.f90 b/packages/zoltan/src/fort/zoltan_user_data.f90 index 47ab1e2cc18c..7f44da974152 100644 --- a/packages/zoltan/src/fort/zoltan_user_data.f90 +++ b/packages/zoltan/src/fort/zoltan_user_data.f90 @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! module zoltan_user_data use zoltan_types diff --git a/packages/zoltan/src/fort/ztypes.f90 b/packages/zoltan/src/fort/ztypes.f90 index 0cc11a7deeaa..edb2023bd6d6 100644 --- a/packages/zoltan/src/fort/ztypes.f90 +++ b/packages/zoltan/src/fort/ztypes.f90 @@ -1,48 +1,13 @@ !! !! @HEADER -!! -!!!!********************************************************************** -!! +!! ***************************************************************************** !! Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -!! Copyright 2012 Sandia Corporation -!! -!! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!! the U.S. Government retains certain rights in this software. -!! -!! Redistribution and use in source and binary forms, with or without -!! modification, are permitted provided that the following conditions are -!! met: -!! -!! 1. Redistributions of source code must retain the above copyright -!! notice, this list of conditions and the following disclaimer. -!! -!! 2. Redistributions in binary form must reproduce the above copyright -!! notice, this list of conditions and the following disclaimer in the -!! documentation and/or other materials provided with the distribution. -!! -!! 3. Neither the name of the Corporation nor the names of the -!! contributors may be used to endorse or promote products derived from -!! this software without specific prior written permission. -!! -!! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!! -!! Questions? Contact Karen Devine kddevin@sandia.gov -!! Erik Boman egboman@sandia.gov -!! -!!!!********************************************************************** !! +!! Copyright 2012 NTESS and the Zoltan contributors. +!! SPDX-License-Identifier: BSD-3-Clause +!! ***************************************************************************** !! @HEADER - !! +!! module zoltan_types diff --git a/packages/zoltan/src/graph/graph.c b/packages/zoltan/src/graph/graph.c index cd606e8365f2..d1f03aeaefb6 100644 --- a/packages/zoltan/src/graph/graph.c +++ b/packages/zoltan/src/graph/graph.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/graph/graph.h b/packages/zoltan/src/graph/graph.h index a8dc3d64763a..13d5f02e16fa 100644 --- a/packages/zoltan/src/graph/graph.h +++ b/packages/zoltan/src/graph/graph.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __GRAPH_H #define __GRAPH_H diff --git a/packages/zoltan/src/graph/graph_const.h b/packages/zoltan/src/graph/graph_const.h index fb3b6b8ee5d4..0f9319a59ec3 100644 --- a/packages/zoltan/src/graph/graph_const.h +++ b/packages/zoltan/src/graph/graph_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __GRAPH_CONST_H diff --git a/packages/zoltan/src/graph/graph_params.h b/packages/zoltan/src/graph/graph_params.h index eb87973117e1..f3dbffcd08c1 100644 --- a/packages/zoltan/src/graph/graph_params.h +++ b/packages/zoltan/src/graph/graph_params.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __GRAPH_PARAMS_H #define __GRAPH_PARAMS_H diff --git a/packages/zoltan/src/ha/README b/packages/zoltan/src/ha/README index bff0837db66e..c550678a512b 100644 --- a/packages/zoltan/src/ha/README +++ b/packages/zoltan/src/ha/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER HA DIRECTORY -- Routines to support heterogeneous architectures diff --git a/packages/zoltan/src/ha/divide_machine.c b/packages/zoltan/src/ha/divide_machine.c index fbd5746fdeec..f5b44eabb275 100644 --- a/packages/zoltan/src/ha/divide_machine.c +++ b/packages/zoltan/src/ha/divide_machine.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/ha/get_processor_name.c b/packages/zoltan/src/ha/get_processor_name.c index b8682b195ba3..7dd10b5a1edc 100644 --- a/packages/zoltan/src/ha/get_processor_name.c +++ b/packages/zoltan/src/ha/get_processor_name.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/ha/ha_const.h b/packages/zoltan/src/ha/ha_const.h index 9935a07c528b..001b07211019 100644 --- a/packages/zoltan/src/ha/ha_const.h +++ b/packages/zoltan/src/ha/ha_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __HA_CONST_H #define __HA_CONST_H diff --git a/packages/zoltan/src/ha/ha_ovis.c b/packages/zoltan/src/ha/ha_ovis.c index fca87b084686..1a1eaac2f419 100644 --- a/packages/zoltan/src/ha/ha_ovis.c +++ b/packages/zoltan/src/ha/ha_ovis.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/ha/ha_ovis.h b/packages/zoltan/src/ha/ha_ovis.h index 48837e94ee8e..27eb08cbf54b 100644 --- a/packages/zoltan/src/ha/ha_ovis.h +++ b/packages/zoltan/src/ha/ha_ovis.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __HA_OVIS_H #define __HA_OVIS_H diff --git a/packages/zoltan/src/hier/README b/packages/zoltan/src/hier/README index b0aa2ca0d42a..a59617358274 100644 --- a/packages/zoltan/src/hier/README +++ b/packages/zoltan/src/hier/README @@ -1,46 +1,11 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER HIER DIRECTORY -- Hierarchical partitioning and rebalancing support diff --git a/packages/zoltan/src/hier/hier.c b/packages/zoltan/src/hier/hier.c index c21ced55957c..49d1562daaa4 100644 --- a/packages/zoltan/src/hier/hier.c +++ b/packages/zoltan/src/hier/hier.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/hier/hier.h b/packages/zoltan/src/hier/hier.h index 3623629ce052..d8e12159b6d3 100644 --- a/packages/zoltan/src/hier/hier.h +++ b/packages/zoltan/src/hier/hier.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __HIER_H #define __HIER_H diff --git a/packages/zoltan/src/hier/hier_free_struct.c b/packages/zoltan/src/hier/hier_free_struct.c index a4a284c88774..067d3be40c71 100644 --- a/packages/zoltan/src/hier/hier_free_struct.c +++ b/packages/zoltan/src/hier/hier_free_struct.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/hsfc/hsfc.c b/packages/zoltan/src/hsfc/hsfc.c index c0a82b4668fd..2dcd799d7ad2 100644 --- a/packages/zoltan/src/hsfc/hsfc.c +++ b/packages/zoltan/src/hsfc/hsfc.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/hsfc/hsfc.h b/packages/zoltan/src/hsfc/hsfc.h index d61b991ed3dd..635569c1940b 100644 --- a/packages/zoltan/src/hsfc/hsfc.h +++ b/packages/zoltan/src/hsfc/hsfc.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef ZOLTAN_HSFC_H #define ZOLTAN_HSFC_H diff --git a/packages/zoltan/src/hsfc/hsfc_box_assign.c b/packages/zoltan/src/hsfc/hsfc_box_assign.c index 90ada22fcbac..9e7cd27d6944 100644 --- a/packages/zoltan/src/hsfc/hsfc_box_assign.c +++ b/packages/zoltan/src/hsfc/hsfc_box_assign.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/hsfc/hsfc_const.h b/packages/zoltan/src/hsfc/hsfc_const.h index 109797c199a2..fe6effc4e4d2 100644 --- a/packages/zoltan/src/hsfc/hsfc_const.h +++ b/packages/zoltan/src/hsfc/hsfc_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef ZOLTAN_HSFC_CONST_H #define ZOLTAN_HSFC_CONST_H diff --git a/packages/zoltan/src/hsfc/hsfc_hilbert.c b/packages/zoltan/src/hsfc/hsfc_hilbert.c index 1a1fd4f4ae9a..a39bb20c5259 100644 --- a/packages/zoltan/src/hsfc/hsfc_hilbert.c +++ b/packages/zoltan/src/hsfc/hsfc_hilbert.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/hsfc/hsfc_hilbert_const.h b/packages/zoltan/src/hsfc/hsfc_hilbert_const.h index 6283e5a0b285..15e05d5290ca 100644 --- a/packages/zoltan/src/hsfc/hsfc_hilbert_const.h +++ b/packages/zoltan/src/hsfc/hsfc_hilbert_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __HSFC_HILBERT_CONST_H #define __HSFC_HILBERT_CONST_H diff --git a/packages/zoltan/src/hsfc/hsfc_params.h b/packages/zoltan/src/hsfc/hsfc_params.h index d83460bb2196..2a6f2c799dd7 100644 --- a/packages/zoltan/src/hsfc/hsfc_params.h +++ b/packages/zoltan/src/hsfc/hsfc_params.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __HSFC_PARAMS_H #define __HSFC_PARAMS_H diff --git a/packages/zoltan/src/hsfc/hsfc_point_assign.c b/packages/zoltan/src/hsfc/hsfc_point_assign.c index 8a5c163af879..abfc53221d38 100644 --- a/packages/zoltan/src/hsfc/hsfc_point_assign.c +++ b/packages/zoltan/src/hsfc/hsfc_point_assign.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/include/README b/packages/zoltan/src/include/README index 61336918242a..5d21d7d8af42 100644 --- a/packages/zoltan/src/include/README +++ b/packages/zoltan/src/include/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER Include files for the Zoltan interface. diff --git a/packages/zoltan/src/include/lbi_const.h b/packages/zoltan/src/include/lbi_const.h index 5e04c34fe6e6..8a93cbd11aa3 100644 --- a/packages/zoltan/src/include/lbi_const.h +++ b/packages/zoltan/src/include/lbi_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __LBI_CONST_H #define __LBI_CONST_H diff --git a/packages/zoltan/src/include/zoltan.h b/packages/zoltan/src/include/zoltan.h index df9e2974d269..58d818358cac 100644 --- a/packages/zoltan/src/include/zoltan.h +++ b/packages/zoltan/src/include/zoltan.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_H #define __ZOLTAN_H diff --git a/packages/zoltan/src/include/zoltan_align.h b/packages/zoltan/src/include/zoltan_align.h index f52f3077626f..2e6874e558cd 100644 --- a/packages/zoltan/src/include/zoltan_align.h +++ b/packages/zoltan/src/include/zoltan_align.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_ALIGN_H #define __ZOLTAN_ALIGN_H diff --git a/packages/zoltan/src/include/zoltan_comm.h b/packages/zoltan/src/include/zoltan_comm.h index 7f17506b7fb0..cc3a5cd84689 100644 --- a/packages/zoltan/src/include/zoltan_comm.h +++ b/packages/zoltan/src/include/zoltan_comm.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __COMM_CONST_H diff --git a/packages/zoltan/src/include/zoltan_comm_cpp.h b/packages/zoltan/src/include/zoltan_comm_cpp.h index 27360389db92..f78aeebc9fda 100644 --- a/packages/zoltan/src/include/zoltan_comm_cpp.h +++ b/packages/zoltan/src/include/zoltan_comm_cpp.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER // // ************************************************************************ // diff --git a/packages/zoltan/src/include/zoltan_cpp.h b/packages/zoltan/src/include/zoltan_cpp.h index 1438244f04f5..895628d26e23 100644 --- a/packages/zoltan/src/include/zoltan_cpp.h +++ b/packages/zoltan/src/include/zoltan_cpp.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER // ************************************************************************ // C++ class representing a Zoltan_Struct object. diff --git a/packages/zoltan/src/include/zoltan_dd.h b/packages/zoltan/src/include/zoltan_dd.h index 4faf5293bdcd..100f38c98558 100644 --- a/packages/zoltan/src/include/zoltan_dd.h +++ b/packages/zoltan/src/include/zoltan_dd.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT(INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef ZOLTAN_DD_DDIRECTORY_H #define ZOLTAN_DD_DDIRECTORY_H diff --git a/packages/zoltan/src/include/zoltan_dd_cpp.h b/packages/zoltan/src/include/zoltan_dd_cpp.h index 07549863e0de..2df604d94297 100644 --- a/packages/zoltan/src/include/zoltan_dd_cpp.h +++ b/packages/zoltan/src/include/zoltan_dd_cpp.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER // ************************************************************************ // diff --git a/packages/zoltan/src/include/zoltan_eval.h b/packages/zoltan/src/include/zoltan_eval.h index 4397f2e8b7ce..452816accc9c 100644 --- a/packages/zoltan/src/include/zoltan_eval.h +++ b/packages/zoltan/src/include/zoltan_eval.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_EVAL_H #define __ZOLTAN_EVAL_H diff --git a/packages/zoltan/src/include/zoltan_mem.h b/packages/zoltan/src/include/zoltan_mem.h index 642a27e737fc..a7a3d64cd8fd 100644 --- a/packages/zoltan/src/include/zoltan_mem.h +++ b/packages/zoltan/src/include/zoltan_mem.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __MEM_CONST_H #define __MEM_CONST_H diff --git a/packages/zoltan/src/include/zoltan_partition_tree.h b/packages/zoltan/src/include/zoltan_partition_tree.h index 07746b09c73c..917a30f87029 100644 --- a/packages/zoltan/src/include/zoltan_partition_tree.h +++ b/packages/zoltan/src/include/zoltan_partition_tree.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef ZOLTAN_PARTITION_TREE_H #define ZOLTAN_PARTITION_TREE_H diff --git a/packages/zoltan/src/include/zoltan_timer.h b/packages/zoltan/src/include/zoltan_timer.h index aa66e675d04f..a2d3a2e747ea 100644 --- a/packages/zoltan/src/include/zoltan_timer.h +++ b/packages/zoltan/src/include/zoltan_timer.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTANTIMER_H #define __ZOLTANTIMER_H diff --git a/packages/zoltan/src/include/zoltan_timer_cpp.h b/packages/zoltan/src/include/zoltan_timer_cpp.h index e11fad2921cf..536daeb15afa 100644 --- a/packages/zoltan/src/include/zoltan_timer_cpp.h +++ b/packages/zoltan/src/include/zoltan_timer_cpp.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER // // ************************************************************************ // diff --git a/packages/zoltan/src/include/zoltan_types.h b/packages/zoltan/src/include/zoltan_types.h index 0b9ea08dacfd..fbbec89eee64 100644 --- a/packages/zoltan/src/include/zoltan_types.h +++ b/packages/zoltan/src/include/zoltan_types.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_TYPES_H #define __ZOLTAN_TYPES_H diff --git a/packages/zoltan/src/lb/README b/packages/zoltan/src/lb/README index a1b4ab968cba..bd7a2cfa25b2 100644 --- a/packages/zoltan/src/lb/README +++ b/packages/zoltan/src/lb/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER LB DIRECTORY -- Zoltan load-balancing tools. diff --git a/packages/zoltan/src/lb/lb_balance.c b/packages/zoltan/src/lb/lb_balance.c index a30e0e429286..c6a7b9368850 100644 --- a/packages/zoltan/src/lb/lb_balance.c +++ b/packages/zoltan/src/lb/lb_balance.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/lb/lb_box_assign.c b/packages/zoltan/src/lb/lb_box_assign.c index 721387956c69..73e506bac3a5 100644 --- a/packages/zoltan/src/lb/lb_box_assign.c +++ b/packages/zoltan/src/lb/lb_box_assign.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/lb/lb_const.h b/packages/zoltan/src/lb/lb_const.h index 54494d387e29..7e811d934a9f 100644 --- a/packages/zoltan/src/lb/lb_const.h +++ b/packages/zoltan/src/lb/lb_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __LB_CONST_H diff --git a/packages/zoltan/src/lb/lb_copy.c b/packages/zoltan/src/lb/lb_copy.c index 7418d4cfdf4e..b0cc26f16523 100644 --- a/packages/zoltan/src/lb/lb_copy.c +++ b/packages/zoltan/src/lb/lb_copy.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/lb/lb_eval.c b/packages/zoltan/src/lb/lb_eval.c index 9946dea5a442..68d187332cf3 100644 --- a/packages/zoltan/src/lb/lb_eval.c +++ b/packages/zoltan/src/lb/lb_eval.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/lb/lb_free.c b/packages/zoltan/src/lb/lb_free.c index bffd13b7d641..04d78d0a9967 100644 --- a/packages/zoltan/src/lb/lb_free.c +++ b/packages/zoltan/src/lb/lb_free.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ extern "C" { diff --git a/packages/zoltan/src/lb/lb_init.c b/packages/zoltan/src/lb/lb_init.c index 5f080cb8b46d..e90998f872b4 100644 --- a/packages/zoltan/src/lb/lb_init.c +++ b/packages/zoltan/src/lb/lb_init.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/lb/lb_init_const.h b/packages/zoltan/src/lb/lb_init_const.h index 1fab38dd9685..b57e539088e1 100644 --- a/packages/zoltan/src/lb/lb_init_const.h +++ b/packages/zoltan/src/lb/lb_init_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __LB_INIT_CONST_H diff --git a/packages/zoltan/src/lb/lb_invert.c b/packages/zoltan/src/lb/lb_invert.c index f2d9eb429c5b..4fcdd21ba615 100644 --- a/packages/zoltan/src/lb/lb_invert.c +++ b/packages/zoltan/src/lb/lb_invert.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/lb/lb_migrate.c b/packages/zoltan/src/lb/lb_migrate.c index 39b42107c112..d4102ec3e8aa 100644 --- a/packages/zoltan/src/lb/lb_migrate.c +++ b/packages/zoltan/src/lb/lb_migrate.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/lb/lb_part2proc.c b/packages/zoltan/src/lb/lb_part2proc.c index 01a29cdd4b5a..80f1cdbfd906 100644 --- a/packages/zoltan/src/lb/lb_part2proc.c +++ b/packages/zoltan/src/lb/lb_part2proc.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/lb/lb_point_assign.c b/packages/zoltan/src/lb/lb_point_assign.c index 4baa1a2d47f0..059e60f4ed4c 100644 --- a/packages/zoltan/src/lb/lb_point_assign.c +++ b/packages/zoltan/src/lb/lb_point_assign.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/lb/lb_remap.c b/packages/zoltan/src/lb/lb_remap.c index 379925e1d2c4..5cc5fd79ccf1 100644 --- a/packages/zoltan/src/lb/lb_remap.c +++ b/packages/zoltan/src/lb/lb_remap.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/lb/lb_set_fn.c b/packages/zoltan/src/lb/lb_set_fn.c index ca5c2ebd1cb7..1947e702fb17 100644 --- a/packages/zoltan/src/lb/lb_set_fn.c +++ b/packages/zoltan/src/lb/lb_set_fn.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/lb/lb_set_method.c b/packages/zoltan/src/lb/lb_set_method.c index 39df9d502104..c66ee87db593 100644 --- a/packages/zoltan/src/lb/lb_set_method.c +++ b/packages/zoltan/src/lb/lb_set_method.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/lb/lb_set_part_sizes.c b/packages/zoltan/src/lb/lb_set_part_sizes.c index 81d6c28bef17..6e2477e11ebe 100644 --- a/packages/zoltan/src/lb/lb_set_part_sizes.c +++ b/packages/zoltan/src/lb/lb_set_part_sizes.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/lb/low_mem_lb_migrate.c b/packages/zoltan/src/lb/low_mem_lb_migrate.c index 3415b492702f..972d4375df7c 100644 --- a/packages/zoltan/src/lb/low_mem_lb_migrate.c +++ b/packages/zoltan/src/lb/low_mem_lb_migrate.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/matlab/README b/packages/zoltan/src/matlab/README index 26bb1191ca68..4db76a2318cc 100644 --- a/packages/zoltan/src/matlab/README +++ b/packages/zoltan/src/matlab/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER This directory contains a crude Matlab interface to Zoltan for partitioning sparse matrices (by rows or columns). This is not a finished product, but work in progress that you diff --git a/packages/zoltan/src/matrix/matrix_build.c b/packages/zoltan/src/matrix/matrix_build.c index b93cf3ffd585..7a978be789c8 100644 --- a/packages/zoltan/src/matrix/matrix_build.c +++ b/packages/zoltan/src/matrix/matrix_build.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/matrix/matrix_distribute.c b/packages/zoltan/src/matrix/matrix_distribute.c index 750b06453a1a..e87a940c04f0 100644 --- a/packages/zoltan/src/matrix/matrix_distribute.c +++ b/packages/zoltan/src/matrix/matrix_distribute.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/matrix/matrix_operations.c b/packages/zoltan/src/matrix/matrix_operations.c index 4f946d25cadd..85fd1da9cf5f 100644 --- a/packages/zoltan/src/matrix/matrix_operations.c +++ b/packages/zoltan/src/matrix/matrix_operations.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/matrix/matrix_sym.c b/packages/zoltan/src/matrix/matrix_sym.c index d1f4babec125..097ff5cc9982 100644 --- a/packages/zoltan/src/matrix/matrix_sym.c +++ b/packages/zoltan/src/matrix/matrix_sym.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/matrix/matrix_utils.c b/packages/zoltan/src/matrix/matrix_utils.c index d836ac1df6dd..9fab2cd79570 100644 --- a/packages/zoltan/src/matrix/matrix_utils.c +++ b/packages/zoltan/src/matrix/matrix_utils.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/matrix/zoltan_matrix.h b/packages/zoltan/src/matrix/zoltan_matrix.h index c4aa8f4bf108..6c15be73eaf4 100644 --- a/packages/zoltan/src/matrix/zoltan_matrix.h +++ b/packages/zoltan/src/matrix/zoltan_matrix.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /***************************************************************************** * This module wants to be an "abstract" view of user data before being * "specialized" into real Zoltan datastructure like HyperGraph or Graph. diff --git a/packages/zoltan/src/order/README b/packages/zoltan/src/order/README index 39ef47ba4939..efc069405b62 100644 --- a/packages/zoltan/src/order/README +++ b/packages/zoltan/src/order/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER ORDER DIRECTORY -- Zoltan ordering tools. diff --git a/packages/zoltan/src/order/hsfcOrder.c b/packages/zoltan/src/order/hsfcOrder.c index 131de35ae308..6998d0acfd7a 100644 --- a/packages/zoltan/src/order/hsfcOrder.c +++ b/packages/zoltan/src/order/hsfcOrder.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/order/hsfcOrder.h b/packages/zoltan/src/order/hsfcOrder.h index 1b6db7df082c..b69c361305f4 100644 --- a/packages/zoltan/src/order/hsfcOrder.h +++ b/packages/zoltan/src/order/hsfcOrder.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER int Zoltan_LocalHSFC_Order( ZZ *zz, /* Zoltan structure */ diff --git a/packages/zoltan/src/order/hund.c b/packages/zoltan/src/order/hund.c index e6b80ed9f1d0..0be1d9a89959 100644 --- a/packages/zoltan/src/order/hund.c +++ b/packages/zoltan/src/order/hund.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/order/order.c b/packages/zoltan/src/order/order.c index 8d83b2c9f3c4..5bea8f1ec806 100644 --- a/packages/zoltan/src/order/order.c +++ b/packages/zoltan/src/order/order.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/order/order_const.h b/packages/zoltan/src/order/order_const.h index a0c701c21e49..cb2817420619 100644 --- a/packages/zoltan/src/order/order_const.h +++ b/packages/zoltan/src/order/order_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ORDER_CONST_H diff --git a/packages/zoltan/src/order/order_params.h b/packages/zoltan/src/order/order_params.h index 61e86bff93d4..35a771a3c938 100644 --- a/packages/zoltan/src/order/order_params.h +++ b/packages/zoltan/src/order/order_params.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ORDER_PARAMS_H #define __ORDER_PARAMS_H diff --git a/packages/zoltan/src/order/order_struct.c b/packages/zoltan/src/order/order_struct.c index d2e4a25c6310..e469feb6e003 100644 --- a/packages/zoltan/src/order/order_struct.c +++ b/packages/zoltan/src/order/order_struct.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/order/order_tools.c b/packages/zoltan/src/order/order_tools.c index 7867d6ed1468..dd9354ace5f4 100644 --- a/packages/zoltan/src/order/order_tools.c +++ b/packages/zoltan/src/order/order_tools.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ extern "C" { diff --git a/packages/zoltan/src/order/perm.c b/packages/zoltan/src/order/perm.c index 318de0139c59..547deed0a65f 100644 --- a/packages/zoltan/src/order/perm.c +++ b/packages/zoltan/src/order/perm.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/par/README b/packages/zoltan/src/par/README index e76794cc81f9..09a536858b7b 100644 --- a/packages/zoltan/src/par/README +++ b/packages/zoltan/src/par/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER PAR DIRECTORY -- Parallel computing utilities diff --git a/packages/zoltan/src/par/par_average.c b/packages/zoltan/src/par/par_average.c index 3d0e64425e85..70b1b1047cdc 100644 --- a/packages/zoltan/src/par/par_average.c +++ b/packages/zoltan/src/par/par_average.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/par/par_average_const.h b/packages/zoltan/src/par/par_average_const.h index 3515e48b9ac9..358e542bdda3 100644 --- a/packages/zoltan/src/par/par_average_const.h +++ b/packages/zoltan/src/par/par_average_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __PAR_AVERAGE_CONST_H diff --git a/packages/zoltan/src/par/par_bisect.c b/packages/zoltan/src/par/par_bisect.c index 4d2849b126ed..188cf1cc2010 100644 --- a/packages/zoltan/src/par/par_bisect.c +++ b/packages/zoltan/src/par/par_bisect.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/par/par_bisect_const.h b/packages/zoltan/src/par/par_bisect_const.h index 91af5c25dd0c..f672f6a00bcc 100644 --- a/packages/zoltan/src/par/par_bisect_const.h +++ b/packages/zoltan/src/par/par_bisect_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __PAR_BISECT_CONST_H diff --git a/packages/zoltan/src/par/par_const.h b/packages/zoltan/src/par/par_const.h index e119b44970a8..73ccd2743d0b 100644 --- a/packages/zoltan/src/par/par_const.h +++ b/packages/zoltan/src/par/par_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __PAR_CONST_H diff --git a/packages/zoltan/src/par/par_median.c b/packages/zoltan/src/par/par_median.c index 1bb73fca7037..1103829783ab 100644 --- a/packages/zoltan/src/par/par_median.c +++ b/packages/zoltan/src/par/par_median.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/par/par_median_const.h b/packages/zoltan/src/par/par_median_const.h index 587204c4186f..ddadcefdca44 100644 --- a/packages/zoltan/src/par/par_median_const.h +++ b/packages/zoltan/src/par/par_median_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __PAR_MEDIAN_CONST_H diff --git a/packages/zoltan/src/par/par_median_randomized.c b/packages/zoltan/src/par/par_median_randomized.c index d0c87d052f41..c35acf67e46b 100644 --- a/packages/zoltan/src/par/par_median_randomized.c +++ b/packages/zoltan/src/par/par_median_randomized.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/par/par_stats.c b/packages/zoltan/src/par/par_stats.c index f414ea0d03d0..af5b3bb73189 100644 --- a/packages/zoltan/src/par/par_stats.c +++ b/packages/zoltan/src/par/par_stats.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/par/par_sync.c b/packages/zoltan/src/par/par_sync.c index 89b8b62040a8..e9ef5f19c1e1 100644 --- a/packages/zoltan/src/par/par_sync.c +++ b/packages/zoltan/src/par/par_sync.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/par/par_tflops_special.c b/packages/zoltan/src/par/par_tflops_special.c index e27909fabc14..1423e924119a 100644 --- a/packages/zoltan/src/par/par_tflops_special.c +++ b/packages/zoltan/src/par/par_tflops_special.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/src/par/par_tflops_special_const.h b/packages/zoltan/src/par/par_tflops_special_const.h index 1927e888ee2f..0fb3a8c7aa53 100644 --- a/packages/zoltan/src/par/par_tflops_special_const.h +++ b/packages/zoltan/src/par/par_tflops_special_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __PAR_TFLOPS_SPECIAL_H diff --git a/packages/zoltan/src/params/README b/packages/zoltan/src/params/README index 02599a4c033f..1e0bc06fba71 100644 --- a/packages/zoltan/src/params/README +++ b/packages/zoltan/src/params/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER PARAMS DIRECTORY -- Routines for setting parameter values diff --git a/packages/zoltan/src/params/assign_param_vals.c b/packages/zoltan/src/params/assign_param_vals.c index 9de1723a3f2a..1c5866f838c2 100644 --- a/packages/zoltan/src/params/assign_param_vals.c +++ b/packages/zoltan/src/params/assign_param_vals.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/params/bind_param.c b/packages/zoltan/src/params/bind_param.c index fe128f19550a..71f8b7bca83f 100644 --- a/packages/zoltan/src/params/bind_param.c +++ b/packages/zoltan/src/params/bind_param.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/params/check_param.c b/packages/zoltan/src/params/check_param.c index eca9eca52960..30f25506fb89 100644 --- a/packages/zoltan/src/params/check_param.c +++ b/packages/zoltan/src/params/check_param.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/params/free_params.c b/packages/zoltan/src/params/free_params.c index 94c0fe18b1a9..3f01cd7b93f3 100644 --- a/packages/zoltan/src/params/free_params.c +++ b/packages/zoltan/src/params/free_params.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/params/key_params.c b/packages/zoltan/src/params/key_params.c index 6f634ac51e1f..9f0d059d970b 100644 --- a/packages/zoltan/src/params/key_params.c +++ b/packages/zoltan/src/params/key_params.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/params/key_params.h b/packages/zoltan/src/params/key_params.h index 11ab188deed1..4ecc6b76c7ed 100644 --- a/packages/zoltan/src/params/key_params.h +++ b/packages/zoltan/src/params/key_params.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __KEY_PARAMS_H diff --git a/packages/zoltan/src/params/params_const.h b/packages/zoltan/src/params/params_const.h index 8400fc765b6c..919589f04568 100644 --- a/packages/zoltan/src/params/params_const.h +++ b/packages/zoltan/src/params/params_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include diff --git a/packages/zoltan/src/params/print_params.c b/packages/zoltan/src/params/print_params.c index d85ebbe0149d..8e49691ec508 100644 --- a/packages/zoltan/src/params/print_params.c +++ b/packages/zoltan/src/params/print_params.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/params/set_param.c b/packages/zoltan/src/params/set_param.c index 415ddaeafb29..47da457a0ae3 100644 --- a/packages/zoltan/src/params/set_param.c +++ b/packages/zoltan/src/params/set_param.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/phg/README b/packages/zoltan/src/phg/README index ff38e36f9072..2628ef6d9c88 100644 --- a/packages/zoltan/src/phg/README +++ b/packages/zoltan/src/phg/README @@ -1,44 +1 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER Source files for parallel hypergraph partitioning. diff --git a/packages/zoltan/src/phg/phg.c b/packages/zoltan/src/phg/phg.c index 1cbce39bb288..a4df761c60ee 100644 --- a/packages/zoltan/src/phg/phg.c +++ b/packages/zoltan/src/phg/phg.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/phg/phg.h b/packages/zoltan/src/phg/phg.h index 99b1bf667673..5a0c54637000 100644 --- a/packages/zoltan/src/phg/phg.h +++ b/packages/zoltan/src/phg/phg.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_PHG_H #define __ZOLTAN_PHG_H diff --git a/packages/zoltan/src/phg/phg_Vcycle.c b/packages/zoltan/src/phg/phg_Vcycle.c index dd077e92f139..1e17459452aa 100644 --- a/packages/zoltan/src/phg/phg_Vcycle.c +++ b/packages/zoltan/src/phg/phg_Vcycle.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/phg/phg_build.c b/packages/zoltan/src/phg/phg_build.c index c37a0f650011..9496a352fa19 100644 --- a/packages/zoltan/src/phg/phg_build.c +++ b/packages/zoltan/src/phg/phg_build.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER diff --git a/packages/zoltan/src/phg/phg_build.c.improved_calculate_cuts b/packages/zoltan/src/phg/phg_build.c.improved_calculate_cuts index f4c99dabd2eb..ff9b2bfa47e3 100644 --- a/packages/zoltan/src/phg/phg_build.c.improved_calculate_cuts +++ b/packages/zoltan/src/phg/phg_build.c.improved_calculate_cuts @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/phg/phg_build_calls.c b/packages/zoltan/src/phg/phg_build_calls.c index fc2908415dfc..c90513f98ca9 100644 --- a/packages/zoltan/src/phg/phg_build_calls.c +++ b/packages/zoltan/src/phg/phg_build_calls.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER diff --git a/packages/zoltan/src/phg/phg_coarse.c b/packages/zoltan/src/phg/phg_coarse.c index 096b797002b0..a9a7c487714d 100644 --- a/packages/zoltan/src/phg/phg_coarse.c +++ b/packages/zoltan/src/phg/phg_coarse.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ extern "C" { diff --git a/packages/zoltan/src/phg/phg_comm.c b/packages/zoltan/src/phg/phg_comm.c index f9331c49307c..309753c1de87 100644 --- a/packages/zoltan/src/phg/phg_comm.c +++ b/packages/zoltan/src/phg/phg_comm.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus extern "C" { diff --git a/packages/zoltan/src/phg/phg_comm.h b/packages/zoltan/src/phg/phg_comm.h index 9e23e1376359..fb2111d53a57 100644 --- a/packages/zoltan/src/phg/phg_comm.h +++ b/packages/zoltan/src/phg/phg_comm.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_PHG_COMM_H #define __ZOLTAN_PHG_COMM_H diff --git a/packages/zoltan/src/phg/phg_const.h b/packages/zoltan/src/phg/phg_const.h index 0594c342b70c..047f941c2846 100644 --- a/packages/zoltan/src/phg/phg_const.h +++ b/packages/zoltan/src/phg/phg_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __PHG_CONST_H #define __PHG_CONST_H diff --git a/packages/zoltan/src/phg/phg_distrib.c b/packages/zoltan/src/phg/phg_distrib.c index 1a0b0da62110..c91a6fc1421f 100644 --- a/packages/zoltan/src/phg/phg_distrib.c +++ b/packages/zoltan/src/phg/phg_distrib.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ extern "C" { diff --git a/packages/zoltan/src/phg/phg_distrib.h b/packages/zoltan/src/phg/phg_distrib.h index e7e0513c14c3..b08594d94f5b 100644 --- a/packages/zoltan/src/phg/phg_distrib.h +++ b/packages/zoltan/src/phg/phg_distrib.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_PHG_DISTRIB_H #define __ZOLTAN_PHG_DISTRIB_H diff --git a/packages/zoltan/src/phg/phg_gather.c b/packages/zoltan/src/phg/phg_gather.c index df95e1502b9e..61afddd1d620 100644 --- a/packages/zoltan/src/phg/phg_gather.c +++ b/packages/zoltan/src/phg/phg_gather.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/phg/phg_hypergraph.c b/packages/zoltan/src/phg/phg_hypergraph.c index 441c479d4d91..2082dae64a8c 100644 --- a/packages/zoltan/src/phg/phg_hypergraph.c +++ b/packages/zoltan/src/phg/phg_hypergraph.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ extern "C" { diff --git a/packages/zoltan/src/phg/phg_hypergraph.h b/packages/zoltan/src/phg/phg_hypergraph.h index f586863cb4d0..c1d9e2a6adc0 100644 --- a/packages/zoltan/src/phg/phg_hypergraph.h +++ b/packages/zoltan/src/phg/phg_hypergraph.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __PHG_HYPERGRAPH_H #define __PHG_HYPERGRAPH_H diff --git a/packages/zoltan/src/phg/phg_lookup.c b/packages/zoltan/src/phg/phg_lookup.c index 60fe06143927..72c3d4e6dcee 100644 --- a/packages/zoltan/src/phg/phg_lookup.c +++ b/packages/zoltan/src/phg/phg_lookup.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/phg/phg_lookup.h b/packages/zoltan/src/phg/phg_lookup.h index 51cffd980e15..6e4f16f1b9c9 100644 --- a/packages/zoltan/src/phg/phg_lookup.h +++ b/packages/zoltan/src/phg/phg_lookup.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_PHG_LOOKUP_H #define __ZOLTAN_PHG_LOOKUP_H diff --git a/packages/zoltan/src/phg/phg_match.c b/packages/zoltan/src/phg/phg_match.c index ac2f06f30f73..f8f508094a2b 100644 --- a/packages/zoltan/src/phg/phg_match.c +++ b/packages/zoltan/src/phg/phg_match.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/phg/phg_order.c b/packages/zoltan/src/phg/phg_order.c index e8e18263a2bc..15fa196f3add 100644 --- a/packages/zoltan/src/phg/phg_order.c +++ b/packages/zoltan/src/phg/phg_order.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/phg/phg_params.h b/packages/zoltan/src/phg/phg_params.h index 17b653a31738..cd6369710362 100644 --- a/packages/zoltan/src/phg/phg_params.h +++ b/packages/zoltan/src/phg/phg_params.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __PHG_PARAMS_H #define __PHG_PARAMS_H diff --git a/packages/zoltan/src/phg/phg_parkway.c b/packages/zoltan/src/phg/phg_parkway.c index 9444eccc3a46..4334438d2363 100644 --- a/packages/zoltan/src/phg/phg_parkway.c +++ b/packages/zoltan/src/phg/phg_parkway.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ extern "C" { diff --git a/packages/zoltan/src/phg/phg_partition_tree.c b/packages/zoltan/src/phg/phg_partition_tree.c index ba998a77c80e..e9bd00068e1c 100644 --- a/packages/zoltan/src/phg/phg_partition_tree.c +++ b/packages/zoltan/src/phg/phg_partition_tree.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/phg/phg_patoh.c b/packages/zoltan/src/phg/phg_patoh.c index 029806aa0167..d85c8cb82f4b 100644 --- a/packages/zoltan/src/phg/phg_patoh.c +++ b/packages/zoltan/src/phg/phg_patoh.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/phg/phg_plot.c b/packages/zoltan/src/phg/phg_plot.c index e595ec6de45b..4924b32e37c3 100644 --- a/packages/zoltan/src/phg/phg_plot.c +++ b/packages/zoltan/src/phg/phg_plot.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/phg/phg_rdivide.c b/packages/zoltan/src/phg/phg_rdivide.c index 3612aff3e431..d17b8cf1ef7b 100644 --- a/packages/zoltan/src/phg/phg_rdivide.c +++ b/packages/zoltan/src/phg/phg_rdivide.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/phg/phg_refinement.c b/packages/zoltan/src/phg/phg_refinement.c index 99d6e6631cb9..5c54656fe898 100644 --- a/packages/zoltan/src/phg/phg_refinement.c +++ b/packages/zoltan/src/phg/phg_refinement.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/phg/phg_scale.c b/packages/zoltan/src/phg/phg_scale.c index a9e35bd97286..3baf9432bff7 100644 --- a/packages/zoltan/src/phg/phg_scale.c +++ b/packages/zoltan/src/phg/phg_scale.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/phg/phg_serialpartition.c b/packages/zoltan/src/phg/phg_serialpartition.c index f5784e98dd45..33f40a52a0fa 100644 --- a/packages/zoltan/src/phg/phg_serialpartition.c +++ b/packages/zoltan/src/phg/phg_serialpartition.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/phg/phg_tree.c b/packages/zoltan/src/phg/phg_tree.c index 27214effa609..987e944e668c 100644 --- a/packages/zoltan/src/phg/phg_tree.c +++ b/packages/zoltan/src/phg/phg_tree.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/phg/phg_tree.h b/packages/zoltan/src/phg/phg_tree.h index 9b8161762a69..c98b799b433b 100644 --- a/packages/zoltan/src/phg/phg_tree.h +++ b/packages/zoltan/src/phg/phg_tree.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_PHG_TREE_H #define __ZOLTAN_PHG_TREE_H diff --git a/packages/zoltan/src/phg/phg_two_ways.c b/packages/zoltan/src/phg/phg_two_ways.c index 5133795b54ad..6ff02fcb04fb 100644 --- a/packages/zoltan/src/phg/phg_two_ways.c +++ b/packages/zoltan/src/phg/phg_two_ways.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/phg/phg_util.c b/packages/zoltan/src/phg/phg_util.c index 57e1835204e7..6a4f98fef8ee 100644 --- a/packages/zoltan/src/phg/phg_util.c +++ b/packages/zoltan/src/phg/phg_util.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/phg/phg_util.h b/packages/zoltan/src/phg/phg_util.h index 1d47d762f9f3..4f99326bf3dc 100644 --- a/packages/zoltan/src/phg/phg_util.h +++ b/packages/zoltan/src/phg/phg_util.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __PHG_UTIL_H #define __PHG_UTIL_H diff --git a/packages/zoltan/src/phg/phg_verbose.c b/packages/zoltan/src/phg/phg_verbose.c index 10665534cc35..fc83c0cf71ce 100644 --- a/packages/zoltan/src/phg/phg_verbose.c +++ b/packages/zoltan/src/phg/phg_verbose.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/phg/phg_verbose.h b/packages/zoltan/src/phg/phg_verbose.h index be0513b46c14..97cc5aac9ee4 100644 --- a/packages/zoltan/src/phg/phg_verbose.h +++ b/packages/zoltan/src/phg/phg_verbose.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_PHG_VERBOSE_H #define __ZOLTAN_PHG_VERBOSE_H diff --git a/packages/zoltan/src/rcb/README b/packages/zoltan/src/rcb/README index e7fc7a4b0c91..197ac055f1ea 100644 --- a/packages/zoltan/src/rcb/README +++ b/packages/zoltan/src/rcb/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER RCB DIRECTORY -- Implementation of Recursive Coordinate Bisection. Also diff --git a/packages/zoltan/src/rcb/box_assign.c b/packages/zoltan/src/rcb/box_assign.c index d3f9270ecdfc..c153528628a5 100644 --- a/packages/zoltan/src/rcb/box_assign.c +++ b/packages/zoltan/src/rcb/box_assign.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/rcb/create_proc_list.c b/packages/zoltan/src/rcb/create_proc_list.c index d3bc08c54ada..b158a820341a 100644 --- a/packages/zoltan/src/rcb/create_proc_list.c +++ b/packages/zoltan/src/rcb/create_proc_list.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/rcb/create_proc_list_const.h b/packages/zoltan/src/rcb/create_proc_list_const.h index 1845237c47a0..4363e3b61ff4 100644 --- a/packages/zoltan/src/rcb/create_proc_list_const.h +++ b/packages/zoltan/src/rcb/create_proc_list_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __CREATE_PROC_LIST_CONST_H diff --git a/packages/zoltan/src/rcb/inertial.h b/packages/zoltan/src/rcb/inertial.h index 03d5148e298f..5718ec793a55 100644 --- a/packages/zoltan/src/rcb/inertial.h +++ b/packages/zoltan/src/rcb/inertial.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __INERTIAL_H #define __INERTIAL_H diff --git a/packages/zoltan/src/rcb/inertial1d.c b/packages/zoltan/src/rcb/inertial1d.c index b89a8017e0a1..d4b9fc987c2a 100644 --- a/packages/zoltan/src/rcb/inertial1d.c +++ b/packages/zoltan/src/rcb/inertial1d.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/rcb/inertial2d.c b/packages/zoltan/src/rcb/inertial2d.c index 434582281bbc..70487dd1d08f 100644 --- a/packages/zoltan/src/rcb/inertial2d.c +++ b/packages/zoltan/src/rcb/inertial2d.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/rcb/inertial3d.c b/packages/zoltan/src/rcb/inertial3d.c index e259c1d68ef6..236505d4e9cb 100644 --- a/packages/zoltan/src/rcb/inertial3d.c +++ b/packages/zoltan/src/rcb/inertial3d.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/rcb/point_assign.c b/packages/zoltan/src/rcb/point_assign.c index 9c3f00b867ca..12176b84ef72 100644 --- a/packages/zoltan/src/rcb/point_assign.c +++ b/packages/zoltan/src/rcb/point_assign.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/rcb/rcb.c b/packages/zoltan/src/rcb/rcb.c index 57b3ae19117c..6474bb2de7fe 100644 --- a/packages/zoltan/src/rcb/rcb.c +++ b/packages/zoltan/src/rcb/rcb.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/rcb/rcb.h b/packages/zoltan/src/rcb/rcb.h index 7498792602a7..c4d0d9ef18fd 100644 --- a/packages/zoltan/src/rcb/rcb.h +++ b/packages/zoltan/src/rcb/rcb.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __RCB_H diff --git a/packages/zoltan/src/rcb/rcb_box.c b/packages/zoltan/src/rcb/rcb_box.c index ecde21a34935..96327ca700cf 100644 --- a/packages/zoltan/src/rcb/rcb_box.c +++ b/packages/zoltan/src/rcb/rcb_box.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/rcb/rcb_const.h b/packages/zoltan/src/rcb/rcb_const.h index 73eaccaa4156..ab0d86afb545 100644 --- a/packages/zoltan/src/rcb/rcb_const.h +++ b/packages/zoltan/src/rcb/rcb_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __RCB_CONST_H diff --git a/packages/zoltan/src/rcb/rcb_params.h b/packages/zoltan/src/rcb/rcb_params.h index 10938e2ec25e..d2cce4d23347 100644 --- a/packages/zoltan/src/rcb/rcb_params.h +++ b/packages/zoltan/src/rcb/rcb_params.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __RCB_PARAMS_H #define __RCB_PARAMS_H diff --git a/packages/zoltan/src/rcb/rcb_partition_tree.c b/packages/zoltan/src/rcb/rcb_partition_tree.c index 15bfd9be84af..7e273d29811c 100644 --- a/packages/zoltan/src/rcb/rcb_partition_tree.c +++ b/packages/zoltan/src/rcb/rcb_partition_tree.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/rcb/rcb_util.c b/packages/zoltan/src/rcb/rcb_util.c index 08a50c5f0f47..73480eaae801 100644 --- a/packages/zoltan/src/rcb/rcb_util.c +++ b/packages/zoltan/src/rcb/rcb_util.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "zz_const.h" #include "rcb.h" diff --git a/packages/zoltan/src/rcb/rib.c b/packages/zoltan/src/rcb/rib.c index 2f29b33cd682..8f8c93d7961a 100644 --- a/packages/zoltan/src/rcb/rib.c +++ b/packages/zoltan/src/rcb/rib.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/rcb/rib.h b/packages/zoltan/src/rcb/rib.h index 176d60e9b953..171602cbf305 100644 --- a/packages/zoltan/src/rcb/rib.h +++ b/packages/zoltan/src/rcb/rib.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __RIB_H diff --git a/packages/zoltan/src/rcb/rib_const.h b/packages/zoltan/src/rcb/rib_const.h index c25eebea9317..4604a562ba4a 100644 --- a/packages/zoltan/src/rcb/rib_const.h +++ b/packages/zoltan/src/rcb/rib_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __RIB_CONST_H diff --git a/packages/zoltan/src/rcb/rib_params.h b/packages/zoltan/src/rcb/rib_params.h index cc2bbe3069f3..37f275461a49 100644 --- a/packages/zoltan/src/rcb/rib_params.h +++ b/packages/zoltan/src/rcb/rib_params.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __RIB_PARAMS_H diff --git a/packages/zoltan/src/rcb/rib_util.c b/packages/zoltan/src/rcb/rib_util.c index d28a84084524..d13d32c958ef 100644 --- a/packages/zoltan/src/rcb/rib_util.c +++ b/packages/zoltan/src/rcb/rib_util.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "zz_const.h" #include "rib.h" diff --git a/packages/zoltan/src/rcb/shared.c b/packages/zoltan/src/rcb/shared.c index ce3387a1df81..acc371560e63 100644 --- a/packages/zoltan/src/rcb/shared.c +++ b/packages/zoltan/src/rcb/shared.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/rcb/shared.h b/packages/zoltan/src/rcb/shared.h index a854fa164b06..d76bb322bc3e 100644 --- a/packages/zoltan/src/rcb/shared.h +++ b/packages/zoltan/src/rcb/shared.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __SHARED_CONST_H diff --git a/packages/zoltan/src/reftree/README b/packages/zoltan/src/reftree/README index 621dec008d18..b0dd0bde0709 100644 --- a/packages/zoltan/src/reftree/README +++ b/packages/zoltan/src/reftree/README @@ -1,27 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER REFTREE DIRECTORY -- Routines for refinement-tree partitioning. diff --git a/packages/zoltan/src/reftree/reftree.h b/packages/zoltan/src/reftree/reftree.h index 160ff1ba63de..4a36099b3945 100644 --- a/packages/zoltan/src/reftree/reftree.h +++ b/packages/zoltan/src/reftree/reftree.h @@ -1,30 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __REFTREE_H #define __REFTREE_H diff --git a/packages/zoltan/src/reftree/reftree_build.c b/packages/zoltan/src/reftree/reftree_build.c index 14d4a580ba6f..1911fc71763c 100644 --- a/packages/zoltan/src/reftree/reftree_build.c +++ b/packages/zoltan/src/reftree/reftree_build.c @@ -1,30 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/reftree/reftree_coarse_path.c b/packages/zoltan/src/reftree/reftree_coarse_path.c index c58c777b8e72..670d951cfc6c 100644 --- a/packages/zoltan/src/reftree/reftree_coarse_path.c +++ b/packages/zoltan/src/reftree/reftree_coarse_path.c @@ -1,30 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/reftree/reftree_const.h b/packages/zoltan/src/reftree/reftree_const.h index d451440021b6..2387f41f81c4 100644 --- a/packages/zoltan/src/reftree/reftree_const.h +++ b/packages/zoltan/src/reftree/reftree_const.h @@ -1,30 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __REFTREE_CONST_H #define __REFTREE_CONST_H diff --git a/packages/zoltan/src/reftree/reftree_hash.c b/packages/zoltan/src/reftree/reftree_hash.c index 3baef46e6da5..4db7f34fb059 100644 --- a/packages/zoltan/src/reftree/reftree_hash.c +++ b/packages/zoltan/src/reftree/reftree_hash.c @@ -1,30 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/reftree/reftree_part.c b/packages/zoltan/src/reftree/reftree_part.c index 47c2edbc5ede..c6454c1de29f 100644 --- a/packages/zoltan/src/reftree/reftree_part.c +++ b/packages/zoltan/src/reftree/reftree_part.c @@ -1,30 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/simple/README b/packages/zoltan/src/simple/README index e4a7b95f3577..8f2cc2c87f50 100644 --- a/packages/zoltan/src/simple/README +++ b/packages/zoltan/src/simple/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER This module (directory) contains a couple very simple partitioning algorithms. They are instructional and mainly useful for testing, not for real work. diff --git a/packages/zoltan/src/simple/block.c b/packages/zoltan/src/simple/block.c index ff59963fb629..103095e75ff7 100644 --- a/packages/zoltan/src/simple/block.c +++ b/packages/zoltan/src/simple/block.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ extern "C" { diff --git a/packages/zoltan/src/simple/cyclic.c b/packages/zoltan/src/simple/cyclic.c index 60a88640a44d..da04939ef8b4 100644 --- a/packages/zoltan/src/simple/cyclic.c +++ b/packages/zoltan/src/simple/cyclic.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/simple/random.c b/packages/zoltan/src/simple/random.c index 488dea21825d..e09378732b1a 100644 --- a/packages/zoltan/src/simple/random.c +++ b/packages/zoltan/src/simple/random.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/simple/simple_const.h b/packages/zoltan/src/simple/simple_const.h index 670683f2cebb..6129099e85bd 100644 --- a/packages/zoltan/src/simple/simple_const.h +++ b/packages/zoltan/src/simple/simple_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __SIMPLE_CONST_H diff --git a/packages/zoltan/src/timer/README b/packages/zoltan/src/timer/README index 0c0724062874..7cdd17567072 100644 --- a/packages/zoltan/src/timer/README +++ b/packages/zoltan/src/timer/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER TIMER DIRECTORY -- Routines to provide access to timing utility. diff --git a/packages/zoltan/src/timer/timer_const.h b/packages/zoltan/src/timer/timer_const.h index 1ec666065490..3a8b4b728954 100644 --- a/packages/zoltan/src/timer/timer_const.h +++ b/packages/zoltan/src/timer/timer_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __TIMER_CONST_H diff --git a/packages/zoltan/src/timer/timer_params.c b/packages/zoltan/src/timer/timer_params.c index ffa4f3e7d651..b9ef1f8b07e9 100644 --- a/packages/zoltan/src/timer/timer_params.c +++ b/packages/zoltan/src/timer/timer_params.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/tpls/README b/packages/zoltan/src/tpls/README index 640698c2a637..a26949509adb 100644 --- a/packages/zoltan/src/tpls/README +++ b/packages/zoltan/src/tpls/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER PARMETIS DIRECTORY -- Routines to providing the interface to the diff --git a/packages/zoltan/src/tpls/build_graph.c b/packages/zoltan/src/tpls/build_graph.c index 6a66a64c4497..2d7304f1a4c6 100644 --- a/packages/zoltan/src/tpls/build_graph.c +++ b/packages/zoltan/src/tpls/build_graph.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/tpls/graph_util.h b/packages/zoltan/src/tpls/graph_util.h index 2017491bc6f3..1f61501905ba 100644 --- a/packages/zoltan/src/tpls/graph_util.h +++ b/packages/zoltan/src/tpls/graph_util.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __COMMON_H diff --git a/packages/zoltan/src/tpls/parmetis_interface.c b/packages/zoltan/src/tpls/parmetis_interface.c index 63caadfd1fd6..45f6dcc1a323 100644 --- a/packages/zoltan/src/tpls/parmetis_interface.c +++ b/packages/zoltan/src/tpls/parmetis_interface.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/tpls/parmetis_interface.h b/packages/zoltan/src/tpls/parmetis_interface.h index 6ff064f8dea8..2f0b2d6e7c05 100644 --- a/packages/zoltan/src/tpls/parmetis_interface.h +++ b/packages/zoltan/src/tpls/parmetis_interface.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __PARMETIS_INTERFACE_H diff --git a/packages/zoltan/src/tpls/parmetis_interface_params.h b/packages/zoltan/src/tpls/parmetis_interface_params.h index a40638405a38..07ed7ff32367 100644 --- a/packages/zoltan/src/tpls/parmetis_interface_params.h +++ b/packages/zoltan/src/tpls/parmetis_interface_params.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __PARMETIS_INTERFACE_PARAMS_H #define __PARMETIS_INTERFACE_PARAMS_H diff --git a/packages/zoltan/src/tpls/postprocessing.c b/packages/zoltan/src/tpls/postprocessing.c index d657006c3ae1..edd11727975e 100644 --- a/packages/zoltan/src/tpls/postprocessing.c +++ b/packages/zoltan/src/tpls/postprocessing.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER diff --git a/packages/zoltan/src/tpls/preprocessing.c b/packages/zoltan/src/tpls/preprocessing.c index cff1a06223c7..2c74eb63fb29 100644 --- a/packages/zoltan/src/tpls/preprocessing.c +++ b/packages/zoltan/src/tpls/preprocessing.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/tpls/scatter_graph.c b/packages/zoltan/src/tpls/scatter_graph.c index 512b45c1fb42..f50be8c30ec5 100644 --- a/packages/zoltan/src/tpls/scatter_graph.c +++ b/packages/zoltan/src/tpls/scatter_graph.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/tpls/scotch_interface.c b/packages/zoltan/src/tpls/scotch_interface.c index e77313a02761..46aef4921b80 100644 --- a/packages/zoltan/src/tpls/scotch_interface.c +++ b/packages/zoltan/src/tpls/scotch_interface.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/tpls/scotch_interface.h b/packages/zoltan/src/tpls/scotch_interface.h index 3f12806e7619..4ad7bbeba061 100644 --- a/packages/zoltan/src/tpls/scotch_interface.h +++ b/packages/zoltan/src/tpls/scotch_interface.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __SCOTCH_INTERFACE_H diff --git a/packages/zoltan/src/tpls/scotch_interface_params.h b/packages/zoltan/src/tpls/scotch_interface_params.h index 16bb4477c96f..e06168562ddd 100644 --- a/packages/zoltan/src/tpls/scotch_interface_params.h +++ b/packages/zoltan/src/tpls/scotch_interface_params.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __SCOTCH_INTERFACE_PARAMS_H #define __SCOTCH_INTERFACE_PARAMS_H diff --git a/packages/zoltan/src/tpls/third_library.c b/packages/zoltan/src/tpls/third_library.c index b066451117b3..695f003eaf24 100644 --- a/packages/zoltan/src/tpls/third_library.c +++ b/packages/zoltan/src/tpls/third_library.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/tpls/third_library.h b/packages/zoltan/src/tpls/third_library.h index 6ea4b5635173..f5fd69a259bf 100644 --- a/packages/zoltan/src/tpls/third_library.h +++ b/packages/zoltan/src/tpls/third_library.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __THIRD_LIBRARY_H diff --git a/packages/zoltan/src/tpls/third_library_const.h b/packages/zoltan/src/tpls/third_library_const.h index 6a9e66cfa295..95e50e27544d 100644 --- a/packages/zoltan/src/tpls/third_library_const.h +++ b/packages/zoltan/src/tpls/third_library_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __THIRD_LIBRARY_CONST_H diff --git a/packages/zoltan/src/tpls/third_library_params.h b/packages/zoltan/src/tpls/third_library_params.h index 03a584bae0b6..4807ff342274 100644 --- a/packages/zoltan/src/tpls/third_library_params.h +++ b/packages/zoltan/src/tpls/third_library_params.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __THIRD_LIBRARY_PARAMS_H #define __THIRD_LIBRARY_PARAMS_H diff --git a/packages/zoltan/src/tpls/third_library_tools.h b/packages/zoltan/src/tpls/third_library_tools.h index bce9c253e821..8bc9027d08b9 100644 --- a/packages/zoltan/src/tpls/third_library_tools.h +++ b/packages/zoltan/src/tpls/third_library_tools.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __THIRD_LIBRARY_TOOLS_H diff --git a/packages/zoltan/src/tpls/verify_graph.c b/packages/zoltan/src/tpls/verify_graph.c index b15591b49edb..5633096a8220 100644 --- a/packages/zoltan/src/tpls/verify_graph.c +++ b/packages/zoltan/src/tpls/verify_graph.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/util/generate_miniFElike_grids.cpp b/packages/zoltan/src/util/generate_miniFElike_grids.cpp index c446da08d683..983ec45bfb09 100644 --- a/packages/zoltan/src/util/generate_miniFElike_grids.cpp +++ b/packages/zoltan/src/util/generate_miniFElike_grids.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER // Program to generate uniform-grid input files for Zoltan's zdrive program. // Compile: // c++ generate_miniFElike_grids.cpp diff --git a/packages/zoltan/src/util/memory_usage/README b/packages/zoltan/src/util/memory_usage/README index fc4a5569653c..67259ae31724 100644 --- a/packages/zoltan/src/util/memory_usage/README +++ b/packages/zoltan/src/util/memory_usage/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER Utilities to measure memory usage. diff --git a/packages/zoltan/src/util/memory_usage/commdup.c b/packages/zoltan/src/util/memory_usage/commdup.c index c043d9b7f913..f532856e2fae 100644 --- a/packages/zoltan/src/util/memory_usage/commdup.c +++ b/packages/zoltan/src/util/memory_usage/commdup.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /////////////////////////////////////////////////////////////// // Program to test MPI_Comm_dup memory usage on thunderbird. // diff --git a/packages/zoltan/src/util/memory_usage/commsplit.c b/packages/zoltan/src/util/memory_usage/commsplit.c index b8bd5b988396..b4bc785e7940 100644 --- a/packages/zoltan/src/util/memory_usage/commsplit.c +++ b/packages/zoltan/src/util/memory_usage/commsplit.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER ///////////////////////////////////////////////////////////////// // Program to test MPI_Comm_split memory usage on thunderbird. // diff --git a/packages/zoltan/src/util/memory_usage/get_heap_usage.h b/packages/zoltan/src/util/memory_usage/get_heap_usage.h index 63cdf69999b3..dadfe4dd0719 100644 --- a/packages/zoltan/src/util/memory_usage/get_heap_usage.h +++ b/packages/zoltan/src/util/memory_usage/get_heap_usage.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER ////////////////////////////////////////////////////////////////////// // Subroutine to measure heap usage on several different platforms. // diff --git a/packages/zoltan/src/util/memory_usage/rcblike.c b/packages/zoltan/src/util/memory_usage/rcblike.c index bb14b4476797..196394f481fe 100644 --- a/packages/zoltan/src/util/memory_usage/rcblike.c +++ b/packages/zoltan/src/util/memory_usage/rcblike.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* Program to test MPI_Comm_split, etc., on thunderbird */ diff --git a/packages/zoltan/src/util/network_topology/MPI/README b/packages/zoltan/src/util/network_topology/MPI/README index 61b3adb43f24..1704ac9495b0 100644 --- a/packages/zoltan/src/util/network_topology/MPI/README +++ b/packages/zoltan/src/util/network_topology/MPI/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER August 10, 2010 lriesen@sandia.gov diff --git a/packages/zoltan/src/util/network_topology/MPI/test32.c b/packages/zoltan/src/util/network_topology/MPI/test32.c index 0b6ce98159cd..804bb2a284f7 100644 --- a/packages/zoltan/src/util/network_topology/MPI/test32.c +++ b/packages/zoltan/src/util/network_topology/MPI/test32.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include #include diff --git a/packages/zoltan/src/util/network_topology/MPI/topologyTest.c b/packages/zoltan/src/util/network_topology/MPI/topologyTest.c index b45d9d421935..692f0f684a1b 100644 --- a/packages/zoltan/src/util/network_topology/MPI/topologyTest.c +++ b/packages/zoltan/src/util/network_topology/MPI/topologyTest.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include #include diff --git a/packages/zoltan/src/util/network_topology/MPI/topologyVis.c b/packages/zoltan/src/util/network_topology/MPI/topologyVis.c index af4f78880a09..625d2519fe93 100644 --- a/packages/zoltan/src/util/network_topology/MPI/topologyVis.c +++ b/packages/zoltan/src/util/network_topology/MPI/topologyVis.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include #include diff --git a/packages/zoltan/src/util/network_topology/hwloc/README b/packages/zoltan/src/util/network_topology/hwloc/README index b97c9cf5a9c8..32e091c26afe 100644 --- a/packages/zoltan/src/util/network_topology/hwloc/README +++ b/packages/zoltan/src/util/network_topology/hwloc/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER Discover hardware topology using hwloc. Tested with hwloc 1.0.2: http://www.open-mpi.org/software/hwloc diff --git a/packages/zoltan/src/util/network_topology/hwloc/node_topology.c b/packages/zoltan/src/util/network_topology/hwloc/node_topology.c index 8a713b7748aa..ffa0dabbb030 100644 --- a/packages/zoltan/src/util/network_topology/hwloc/node_topology.c +++ b/packages/zoltan/src/util/network_topology/hwloc/node_topology.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* * Discover hardware topology using hwloc. * Tested with hwloc 1.0.2: http://www.open-mpi.org/software/hwloc diff --git a/packages/zoltan/src/util/network_topology/hwloc/zoltan_get_topology.c b/packages/zoltan/src/util/network_topology/hwloc/zoltan_get_topology.c index 1bc6005a8f4e..6f3d9285997a 100644 --- a/packages/zoltan/src/util/network_topology/hwloc/zoltan_get_topology.c +++ b/packages/zoltan/src/util/network_topology/hwloc/zoltan_get_topology.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* * Discover hardware topology using hwloc. * Tested with hwloc 1.0.2: http://www.open-mpi.org/software/hwloc diff --git a/packages/zoltan/src/util/vtk_view.cpp b/packages/zoltan/src/util/vtk_view.cpp index 6e4d081ea8a7..1b09122ed922 100644 --- a/packages/zoltan/src/util/vtk_view.cpp +++ b/packages/zoltan/src/util/vtk_view.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER //-------------------------------------------------------------------------- // This source file builds two applications: vtk_view and vtk_write. diff --git a/packages/zoltan/src/zz/README b/packages/zoltan/src/zz/README index 00417ee4bcb5..c0fa7c6b0728 100644 --- a/packages/zoltan/src/zz/README +++ b/packages/zoltan/src/zz/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER Directory zz Zoltan Interface Routines Includes general Zoltan interface, callback registry, and utilities. diff --git a/packages/zoltan/src/zz/zz_back_trace.c b/packages/zoltan/src/zz/zz_back_trace.c index e69600db6581..649f104df6e6 100644 --- a/packages/zoltan/src/zz/zz_back_trace.c +++ b/packages/zoltan/src/zz/zz_back_trace.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/zz/zz_const.h b/packages/zoltan/src/zz/zz_const.h index 96ae1ff246dc..eda62194c215 100644 --- a/packages/zoltan/src/zz/zz_const.h +++ b/packages/zoltan/src/zz/zz_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_CONST_H diff --git a/packages/zoltan/src/zz/zz_coord.c b/packages/zoltan/src/zz/zz_coord.c index 73f66fa5894b..d31da18accd3 100644 --- a/packages/zoltan/src/zz/zz_coord.c +++ b/packages/zoltan/src/zz/zz_coord.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/zz/zz_gen_files.c b/packages/zoltan/src/zz/zz_gen_files.c index 7186b1204dd4..6043446627a4 100644 --- a/packages/zoltan/src/zz/zz_gen_files.c +++ b/packages/zoltan/src/zz/zz_gen_files.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ extern "C" { diff --git a/packages/zoltan/src/zz/zz_hash.c b/packages/zoltan/src/zz/zz_hash.c index 8a66695dc967..ac5b7645f925 100644 --- a/packages/zoltan/src/zz/zz_hash.c +++ b/packages/zoltan/src/zz/zz_hash.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/zz/zz_hash.h b/packages/zoltan/src/zz/zz_hash.h index 415cdf52d9cb..468418d52ca1 100644 --- a/packages/zoltan/src/zz/zz_hash.h +++ b/packages/zoltan/src/zz/zz_hash.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_HASH_H diff --git a/packages/zoltan/src/zz/zz_heap.c b/packages/zoltan/src/zz/zz_heap.c index 57f2a16d7333..120c0c7b2afa 100644 --- a/packages/zoltan/src/zz/zz_heap.c +++ b/packages/zoltan/src/zz/zz_heap.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/zz/zz_heap.h b/packages/zoltan/src/zz/zz_heap.h index 6893a7b12b50..21d50972b32b 100644 --- a/packages/zoltan/src/zz/zz_heap.h +++ b/packages/zoltan/src/zz/zz_heap.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef ZOLTAN_HEAP_H #define ZOLTAN_HEAP_H diff --git a/packages/zoltan/src/zz/zz_id_const.h b/packages/zoltan/src/zz/zz_id_const.h index 5ce9c461625d..1812b3b8b74a 100644 --- a/packages/zoltan/src/zz/zz_id_const.h +++ b/packages/zoltan/src/zz/zz_id_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_ID_CONST_H #define __ZOLTAN_ID_CONST_H diff --git a/packages/zoltan/src/zz/zz_init.c b/packages/zoltan/src/zz/zz_init.c index a0cf60958840..a502dbe3486d 100644 --- a/packages/zoltan/src/zz/zz_init.c +++ b/packages/zoltan/src/zz/zz_init.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/zz/zz_map.c b/packages/zoltan/src/zz/zz_map.c index f25a64240ff6..23c44f261709 100644 --- a/packages/zoltan/src/zz/zz_map.c +++ b/packages/zoltan/src/zz/zz_map.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/zz/zz_obj_list.c b/packages/zoltan/src/zz/zz_obj_list.c index d8ea70abc776..2be571431fc3 100644 --- a/packages/zoltan/src/zz/zz_obj_list.c +++ b/packages/zoltan/src/zz/zz_obj_list.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/zz/zz_rand.c b/packages/zoltan/src/zz/zz_rand.c index a924035cc411..4fd876a13ac5 100644 --- a/packages/zoltan/src/zz/zz_rand.c +++ b/packages/zoltan/src/zz/zz_rand.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/zz/zz_rand.h b/packages/zoltan/src/zz/zz_rand.h index 69ae4e011e37..8e3c47d48a3a 100644 --- a/packages/zoltan/src/zz/zz_rand.h +++ b/packages/zoltan/src/zz/zz_rand.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZZ_RAND_H #define __ZZ_RAND_H diff --git a/packages/zoltan/src/zz/zz_set_fn.c b/packages/zoltan/src/zz/zz_set_fn.c index 03b8ef23a57c..95d0bfd7401e 100644 --- a/packages/zoltan/src/zz/zz_set_fn.c +++ b/packages/zoltan/src/zz/zz_set_fn.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/zz/zz_sort.c b/packages/zoltan/src/zz/zz_sort.c index 9d8a595d56f6..572a6d78a0aa 100644 --- a/packages/zoltan/src/zz/zz_sort.c +++ b/packages/zoltan/src/zz/zz_sort.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/src/zz/zz_sort.h b/packages/zoltan/src/zz/zz_sort.h index be530bb04926..a78a4bda31f1 100644 --- a/packages/zoltan/src/zz/zz_sort.h +++ b/packages/zoltan/src/zz/zz_sort.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef ZOLTAN_SORT_H #define ZOLTAN_SORT_H diff --git a/packages/zoltan/src/zz/zz_struct.c b/packages/zoltan/src/zz/zz_struct.c index 564d234ec350..0ec92a8cb370 100644 --- a/packages/zoltan/src/zz/zz_struct.c +++ b/packages/zoltan/src/zz/zz_struct.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/zz/zz_util.c b/packages/zoltan/src/zz/zz_util.c index 3d4600e6e124..ec98ca55ce22 100644 --- a/packages/zoltan/src/zz/zz_util.c +++ b/packages/zoltan/src/zz/zz_util.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus diff --git a/packages/zoltan/src/zz/zz_util_const.h b/packages/zoltan/src/zz/zz_util_const.h index 45559a2bbd2b..61b6d6309dea 100644 --- a/packages/zoltan/src/zz/zz_util_const.h +++ b/packages/zoltan/src/zz/zz_util_const.h @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef __ZOLTAN_UTIL_CONST_H diff --git a/packages/zoltan/test/Large_Data/Makefile.am b/packages/zoltan/test/Large_Data/Makefile.am index d71d024fbe55..4bd35d0dee0f 100644 --- a/packages/zoltan/test/Large_Data/Makefile.am +++ b/packages/zoltan/test/Large_Data/Makefile.am @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER diff --git a/packages/zoltan/test/Large_Data/stressTestColor.c b/packages/zoltan/test/Large_Data/stressTestColor.c index 6177b9b8c550..5d8bc5b190b4 100644 --- a/packages/zoltan/test/Large_Data/stressTestColor.c +++ b/packages/zoltan/test/Large_Data/stressTestColor.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /************************************************************ * This is called a stress test because it builds an * arbitrarily large graph. It tests the HIER_ASSIST diff --git a/packages/zoltan/test/Large_Data/stressTestGRAPH.c b/packages/zoltan/test/Large_Data/stressTestGRAPH.c index e922c4571fe9..457e3c272e36 100644 --- a/packages/zoltan/test/Large_Data/stressTestGRAPH.c +++ b/packages/zoltan/test/Large_Data/stressTestGRAPH.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /************************************************************ * This is called a stress test because it builds an * arbitrarily large graph. It tests the HIER_ASSIST diff --git a/packages/zoltan/test/Large_Data/stressTestPHG.c b/packages/zoltan/test/Large_Data/stressTestPHG.c index b779c6c99a12..407681957cf4 100644 --- a/packages/zoltan/test/Large_Data/stressTestPHG.c +++ b/packages/zoltan/test/Large_Data/stressTestPHG.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /************************************************************** * Stress test that can create a very large hypergraph to test * the large memory problems. diff --git a/packages/zoltan/test/Large_Data/stressTestRCB.c b/packages/zoltan/test/Large_Data/stressTestRCB.c index 7119deb3a932..66d122ff6431 100644 --- a/packages/zoltan/test/Large_Data/stressTestRCB.c +++ b/packages/zoltan/test/Large_Data/stressTestRCB.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /*************************************************************** ** Example of using Zoltan to compute an RCB partitioning ** of a possibly very large collection of vertices and weights. diff --git a/packages/zoltan/test/Large_Data/stressTestRIB.c b/packages/zoltan/test/Large_Data/stressTestRIB.c index 293345fbb86a..d609c3a21cb2 100644 --- a/packages/zoltan/test/Large_Data/stressTestRIB.c +++ b/packages/zoltan/test/Large_Data/stressTestRIB.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /*************************************************************** ** Example of using Zoltan to compute an RIB partitioning ** of a possibly very large collection of vertices and weights. diff --git a/packages/zoltan/test/README b/packages/zoltan/test/README index db43c0e8ef72..0092535d8722 100644 --- a/packages/zoltan/test/README +++ b/packages/zoltan/test/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER The test directory contains a series of tests used for regression testing. Each test is in its own directory. Within each directory, - file README gives a brief description of the particular test. diff --git a/packages/zoltan/test/TestMPI/canarySelfMessages.c b/packages/zoltan/test/TestMPI/canarySelfMessages.c index 6585d4d4ef7f..9c2617d05391 100644 --- a/packages/zoltan/test/TestMPI/canarySelfMessages.c +++ b/packages/zoltan/test/TestMPI/canarySelfMessages.c @@ -1,3 +1,11 @@ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* MPI distributions in some version of Ubuntu and FreeBSD do not handle * self messages correctly. Errors then appear in Zoltan in the Comm package, * or in PHG's building of the hypergraph. If this test fails, there is diff --git a/packages/zoltan/test/TestMPI/mpiMinLoc.c b/packages/zoltan/test/TestMPI/mpiMinLoc.c index 4ff05aff9514..c2e6d848223a 100644 --- a/packages/zoltan/test/TestMPI/mpiMinLoc.c +++ b/packages/zoltan/test/TestMPI/mpiMinLoc.c @@ -1,3 +1,11 @@ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include #include diff --git a/packages/zoltan/test/Utilities_Tests/Communication/comm_main.c b/packages/zoltan/test/Utilities_Tests/Communication/comm_main.c index 5863c1532a5b..6165598ebc24 100644 --- a/packages/zoltan/test/Utilities_Tests/Communication/comm_main.c +++ b/packages/zoltan/test/Utilities_Tests/Communication/comm_main.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/test/Utilities_Tests/Communication/comm_main_2.cpp b/packages/zoltan/test/Utilities_Tests/Communication/comm_main_2.cpp index af010e154dab..0f11cf43f0ac 100644 --- a/packages/zoltan/test/Utilities_Tests/Communication/comm_main_2.cpp +++ b/packages/zoltan/test/Utilities_Tests/Communication/comm_main_2.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/test/Utilities_Tests/Communication/test/README b/packages/zoltan/test/Utilities_Tests/Communication/test/README index 88e835f2e5a5..f41408de220d 100644 --- a/packages/zoltan/test/Utilities_Tests/Communication/test/README +++ b/packages/zoltan/test/Utilities_Tests/Communication/test/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER Test example for the communication library. To run the test, do the following (this example is for gcc, adapt to your platform): diff --git a/packages/zoltan/test/Utilities_Tests/DDirectory/DD_Main.c b/packages/zoltan/test/Utilities_Tests/DDirectory/DD_Main.c index 199772c72d2a..2f4ac2abcce2 100644 --- a/packages/zoltan/test/Utilities_Tests/DDirectory/DD_Main.c +++ b/packages/zoltan/test/Utilities_Tests/DDirectory/DD_Main.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* Program tests the Zoltan Distributed Directory software (stand-alone mode). diff --git a/packages/zoltan/test/Utilities_Tests/DDirectory/DD_Main_2.cpp b/packages/zoltan/test/Utilities_Tests/DDirectory/DD_Main_2.cpp index d03b704e4ac6..d1fbffc8bb58 100644 --- a/packages/zoltan/test/Utilities_Tests/DDirectory/DD_Main_2.cpp +++ b/packages/zoltan/test/Utilities_Tests/DDirectory/DD_Main_2.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* Program tests the Zoltan Distributed Directory software (stand-alone mode). */ diff --git a/packages/zoltan/test/Utilities_Tests/Memory/mem_main.c b/packages/zoltan/test/Utilities_Tests/Memory/mem_main.c index 863ae8237f1d..76faa228a3e1 100644 --- a/packages/zoltan/test/Utilities_Tests/Memory/mem_main.c +++ b/packages/zoltan/test/Utilities_Tests/Memory/mem_main.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ diff --git a/packages/zoltan/test/Utilities_Tests/Timer/timer_main.c b/packages/zoltan/test/Utilities_Tests/Timer/timer_main.c index b278baf04a31..924c46793169 100644 --- a/packages/zoltan/test/Utilities_Tests/Timer/timer_main.c +++ b/packages/zoltan/test/Utilities_Tests/Timer/timer_main.c @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "zoltan_timer.h" #include diff --git a/packages/zoltan/test/Utilities_Tests/Timer/timer_main_2.cpp b/packages/zoltan/test/Utilities_Tests/Timer/timer_main_2.cpp index dacf53980159..fa1d66d42422 100644 --- a/packages/zoltan/test/Utilities_Tests/Timer/timer_main_2.cpp +++ b/packages/zoltan/test/Utilities_Tests/Timer/timer_main_2.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include diff --git a/packages/zoltan/test/ch_brack2_3/README b/packages/zoltan/test/ch_brack2_3/README index b93562409591..d18a648f783a 100644 --- a/packages/zoltan/test/ch_brack2_3/README +++ b/packages/zoltan/test/ch_brack2_3/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER brack2 is a 3d finite element mesh with 62K nodes. brack2_3 is a variation with three weights at each node, and it was obtained from Kirk Schloegel (UMN) to test multiconstraint partitioning. diff --git a/packages/zoltan/test/ch_degenerate/README b/packages/zoltan/test/ch_degenerate/README index 125890bc17c5..bc6648d3a4d3 100644 --- a/packages/zoltan/test/ch_degenerate/README +++ b/packages/zoltan/test/ch_degenerate/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER "degenerate" is a box of 1000 points with dimensions 9 x 45 x 450. It is not axis-aligned. diff --git a/packages/zoltan/test/ch_degenerateAA/README b/packages/zoltan/test/ch_degenerateAA/README index 5c52ba39ff54..af286a57b581 100644 --- a/packages/zoltan/test/ch_degenerateAA/README +++ b/packages/zoltan/test/ch_degenerateAA/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER degenerateAA is an axis-aligned box of 1000 points with dimensions 9 x 45 x 450. diff --git a/packages/zoltan/test/ch_drake/README b/packages/zoltan/test/ch_drake/README index fba35769eb6f..4f5c20542e66 100644 --- a/packages/zoltan/test/ch_drake/README +++ b/packages/zoltan/test/ch_drake/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER ------------------------------------------------------------------------------- ch_drake diff --git a/packages/zoltan/test/ch_ewgt/README b/packages/zoltan/test/ch_ewgt/README index 5bad560fbda3..7b509e9a457d 100644 --- a/packages/zoltan/test/ch_ewgt/README +++ b/packages/zoltan/test/ch_ewgt/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER ------------------------------------------------------------------------------- ch_ewgt diff --git a/packages/zoltan/test/ch_grid20x19/README b/packages/zoltan/test/ch_grid20x19/README index e6a187a2811f..6f96cd410ae2 100644 --- a/packages/zoltan/test/ch_grid20x19/README +++ b/packages/zoltan/test/ch_grid20x19/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER ------------------------------------------------------------------------------- ch_grid20x19 diff --git a/packages/zoltan/test/ch_hammond/README b/packages/zoltan/test/ch_hammond/README index 397cd1bbed4e..1d163795f26a 100644 --- a/packages/zoltan/test/ch_hammond/README +++ b/packages/zoltan/test/ch_hammond/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER ------------------------------------------------------------------------------- ch_hammond diff --git a/packages/zoltan/test/ch_nograph/README b/packages/zoltan/test/ch_nograph/README index 996cb9f2d516..8fca149f2227 100644 --- a/packages/zoltan/test/ch_nograph/README +++ b/packages/zoltan/test/ch_nograph/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER ------------------------------------------------------------------------------- ch_nograph diff --git a/packages/zoltan/test/ch_simple/README b/packages/zoltan/test/ch_simple/README index 5d0e761b079a..1bfed4a220c1 100644 --- a/packages/zoltan/test/ch_simple/README +++ b/packages/zoltan/test/ch_simple/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER ------------------------------------------------------------------------------- ch_simple diff --git a/packages/zoltan/test/ch_vwgt/README b/packages/zoltan/test/ch_vwgt/README index 5a93810de2cd..1d4e63b8e634 100644 --- a/packages/zoltan/test/ch_vwgt/README +++ b/packages/zoltan/test/ch_vwgt/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER ------------------------------------------------------------------------------- ch_vwgt diff --git a/packages/zoltan/test/hg_cage10/README b/packages/zoltan/test/hg_cage10/README index 7781aee8f9ba..6f986b15da40 100644 --- a/packages/zoltan/test/hg_cage10/README +++ b/packages/zoltan/test/hg_cage10/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER KDDKDD 5/11/07: Round-off error in the computation of v2Col and n2Row in phg_distrib.c can lead to different answers on different platforms. diff --git a/packages/zoltan/test/hg_felix/README b/packages/zoltan/test/hg_felix/README index 392aab4a55f2..25518fe2175e 100644 --- a/packages/zoltan/test/hg_felix/README +++ b/packages/zoltan/test/hg_felix/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER This is a small hypergraph with 28 edges and 72 vertices. It has one weight per vertex and one weight per edge. It also has some vertices which are not part of any hyperedge. diff --git a/packages/zoltan/test/hg_vwgt/README b/packages/zoltan/test/hg_vwgt/README index 80513518f92d..435c4ee71a55 100644 --- a/packages/zoltan/test/hg_vwgt/README +++ b/packages/zoltan/test/hg_vwgt/README @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER hg_vwgt is the same 5x5 grid as ch_vwgt, and with the same weights. Only difference is that it's given in MM+ format with an initial distribution. diff --git a/packages/zoltan/test/loc_hsfc_order/simple_local_HSFC_order.cpp b/packages/zoltan/test/loc_hsfc_order/simple_local_HSFC_order.cpp index e117270a8f84..57925f17a256 100644 --- a/packages/zoltan/test/loc_hsfc_order/simple_local_HSFC_order.cpp +++ b/packages/zoltan/test/loc_hsfc_order/simple_local_HSFC_order.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER ////////////////////////////////////////////////////////////////////////////// // // // File: driver.cc // diff --git a/packages/zoltan/test/loc_hsfc_order/simple_local_HSFC_order_0block.cpp b/packages/zoltan/test/loc_hsfc_order/simple_local_HSFC_order_0block.cpp index 2c0c11fb09ca..e7b3e1c9c113 100644 --- a/packages/zoltan/test/loc_hsfc_order/simple_local_HSFC_order_0block.cpp +++ b/packages/zoltan/test/loc_hsfc_order/simple_local_HSFC_order_0block.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER ////////////////////////////////////////////////////////////////////////////// // // // File: driver.cc // diff --git a/packages/zoltan/test/misc_tests/copyZZ.c b/packages/zoltan/test/misc_tests/copyZZ.c index 2617a74b09e1..08ed831d839a 100644 --- a/packages/zoltan/test/misc_tests/copyZZ.c +++ b/packages/zoltan/test/misc_tests/copyZZ.c @@ -1,3 +1,11 @@ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include #include #include "zoltan.h" diff --git a/packages/zoltan/test/misc_tests/copyZZ.cpp b/packages/zoltan/test/misc_tests/copyZZ.cpp index 28eca809701d..c2b1c5c05d71 100644 --- a/packages/zoltan/test/misc_tests/copyZZ.cpp +++ b/packages/zoltan/test/misc_tests/copyZZ.cpp @@ -1,3 +1,11 @@ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "zoltan_cpp.h" /****************************************************************************/ diff --git a/packages/zoltan/test/misc_tests/test_get_callbacks.c b/packages/zoltan/test/misc_tests/test_get_callbacks.c index 3f3cdacb4a77..ee38a1d52052 100644 --- a/packages/zoltan/test/misc_tests/test_get_callbacks.c +++ b/packages/zoltan/test/misc_tests/test_get_callbacks.c @@ -1,3 +1,11 @@ +// @HEADER +// ***************************************************************************** +// Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring +// +// Copyright 2012 NTESS and the Zoltan contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /* Test for the Zoltan_Get_Fn interface function */ #include diff --git a/packages/zoltan/test/test_zoltan b/packages/zoltan/test/test_zoltan index 9f4f96e5a5ee..acca1f4702b8 100644 --- a/packages/zoltan/test/test_zoltan +++ b/packages/zoltan/test/test_zoltan @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER #!/bin/csh -f # # This C-shell script runs all the test examples diff --git a/packages/zoltan/test/test_zoltan_new b/packages/zoltan/test/test_zoltan_new index 63678b8e4bdf..89dfb34d2058 100644 --- a/packages/zoltan/test/test_zoltan_new +++ b/packages/zoltan/test/test_zoltan_new @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER #!/bin/csh -f ############################################################################## # From 960ae4578ed1563f10b9228bf5af166cec16707f Mon Sep 17 00:00:00 2001 From: "Curtis C. Ober" Date: Mon, 22 Jul 2024 14:01:30 -0600 Subject: [PATCH 02/37] Zoltan2: Update License and Copyright Update the License and Copyright files to * Reflect NTESS and new contract * Utilize SPDX identifiers within files * Make copyright and license files consistent across Trilinos packages (i.e., LICENSE and COPYRIGHT) Signed-off-by: Curtis C. Ober --- packages/zoltan2/COPYRIGHT | 10 +++ packages/zoltan2/LICENSE | 31 +++++++ packages/zoltan2/README.md | 12 +++ .../Zoltan2_TpetraCrsColorer.hpp | 9 ++ .../Zoltan2_TpetraCrsColorerUtils.hpp | 9 ++ .../Zoltan2_TpetraCrsColorer_Zoltan.hpp | 9 ++ .../Zoltan2_TpetraCrsColorer_Zoltan2.hpp | 9 ++ .../algorithms/Zoltan2_AlgForTestingOnly.hpp | 45 ++-------- .../core/src/algorithms/Zoltan2_Algorithm.hpp | 44 +--------- .../core/src/algorithms/Zoltan2_TPLTraits.hpp | 44 +--------- .../algorithms/color/Zoltan2_AlgHybrid2GL.hpp | 9 ++ .../color/Zoltan2_AlgHybridD1-2GL.hpp | 9 ++ .../algorithms/color/Zoltan2_AlgHybridD1.hpp | 9 ++ .../algorithms/color/Zoltan2_AlgHybridD2.hpp | 9 ++ .../algorithms/color/Zoltan2_AlgHybridPD2.hpp | 9 ++ .../color/Zoltan2_AlgSerialGreedy.hpp | 45 ++-------- .../color/Zoltan2_ColoringAlgorithms.hpp | 45 ++-------- .../color/Zoltan2_RebalanceColoring.hpp | 45 ++-------- .../map/Zoltan2_AlgBlockMapping.hpp | 45 ++-------- .../map/Zoltan2_AlgDefaultMapping.hpp | 45 ++-------- .../map/Zoltan2_AlgSparseMapping.hpp | 45 ++-------- .../map/Zoltan2_AlgTpetraMapping.hpp | 45 ++-------- .../algorithms/match/Zoltan2_GreedyMWM.hpp | 44 +--------- .../src/algorithms/order/Zoltan2_AlgAMD.hpp | 44 +--------- .../src/algorithms/order/Zoltan2_AlgMetis.hpp | 44 +--------- .../src/algorithms/order/Zoltan2_AlgND.hpp | 44 +--------- .../algorithms/order/Zoltan2_AlgNatural.hpp | 45 ++-------- .../src/algorithms/order/Zoltan2_AlgRCM.hpp | 45 ++-------- .../algorithms/order/Zoltan2_AlgRandom.hpp | 45 ++-------- .../order/Zoltan2_AlgSortedDegree.hpp | 45 ++-------- .../algorithms/order/Zoltan2_AlgSpectral.hpp | 45 ++-------- .../order/Zoltan2_MatcherHelper.hpp | 9 ++ .../order/Zoltan2_OrderingAlgorithms.hpp | 45 ++-------- .../src/algorithms/order/Zoltan2_Sort.hpp | 45 ++-------- .../algorithms/partition/Zoltan2_AlgBlock.hpp | 45 ++-------- .../partition/Zoltan2_AlgMultiJagged.hpp | 45 ++-------- .../algorithms/partition/Zoltan2_AlgParMA.hpp | 45 ++-------- .../partition/Zoltan2_AlgParMETIS.hpp | 45 ++-------- .../algorithms/partition/Zoltan2_AlgPuLP.hpp | 45 ++-------- .../partition/Zoltan2_AlgQuotient.hpp | 45 ++-------- .../partition/Zoltan2_AlgRCB.hpp.not_compiled | 44 +--------- .../Zoltan2_AlgRCB_methods.hpp.not_compiled | 44 +--------- .../algorithms/partition/Zoltan2_AlgSarma.hpp | 9 ++ .../partition/Zoltan2_AlgScotch.hpp | 45 ++-------- .../Zoltan2_CoordinatePartitioningGraph.hpp | 44 +--------- .../Zoltan2_MatrixPartitioningAlgs.hpp | 45 ++-------- .../Zoltan2_MultiJagged_ReductionOps.hpp | 44 +--------- .../Zoltan2_PartitioningAlgorithms.hpp | 45 ++-------- .../partition/Zoltan2_TaskMapping.hpp | 8 ++ .../algorithms/zoltan/Zoltan2_AlgZoltan.hpp | 45 ++-------- .../zoltan/Zoltan2_AlgZoltanCallbacks.hpp | 45 ++-------- .../core/src/directory/Zoltan2_Directory.hpp | 53 ++--------- .../src/directory/Zoltan2_Directory_Comm.cpp | 53 ++--------- .../src/directory/Zoltan2_Directory_Comm.hpp | 53 ++--------- .../src/directory/Zoltan2_Directory_Impl.hpp | 53 ++--------- .../src/environment/Zoltan2_DebugManager.hpp | 44 +--------- .../src/environment/Zoltan2_Environment.cpp | 45 ++-------- .../src/environment/Zoltan2_Environment.hpp | 44 +--------- .../src/environment/Zoltan2_Exceptions.hpp | 44 +--------- .../environment/Zoltan2_IntegerRangeList.hpp | 44 +--------- .../core/src/environment/Zoltan2_Machine.hpp | 9 ++ .../Zoltan2_MachineDragonflyRCA.hpp | 9 ++ .../Zoltan2_MachineDragonflyRCAForTesting.hpp | 9 ++ .../environment/Zoltan2_MachineForTesting.hpp | 9 ++ .../Zoltan2_MachineRepresentation.hpp | 9 ++ .../environment/Zoltan2_MachineTorusLDMS.hpp | 9 ++ .../environment/Zoltan2_MachineTorusRCA.hpp | 9 ++ .../Zoltan2_MachineTorusRCAForTesting.hpp | 9 ++ .../Zoltan2_MachineTorusTopoMgr.hpp | 9 ++ .../Zoltan2_MachineTorusTopoMgrForTesting.hpp | 9 ++ .../Zoltan2_MetricOutputManager.hpp | 44 +--------- .../src/environment/Zoltan2_Parameters.cpp | 44 +--------- .../src/environment/Zoltan2_Parameters.hpp | 42 ++------- .../src/environment/Zoltan2_TimerManager.cpp | 44 +--------- .../src/environment/Zoltan2_TimerManager.hpp | 44 +--------- .../core/src/input/Zoltan2_APFMeshAdapter.hpp | 44 +--------- .../core/src/input/Zoltan2_Adapter.hpp | 44 +--------- .../input/Zoltan2_BasicIdentifierAdapter.hpp | 44 +--------- .../Zoltan2_BasicKokkosIdentifierAdapter.hpp | 44 +--------- .../src/input/Zoltan2_BasicVectorAdapter.hpp | 44 +--------- .../core/src/input/Zoltan2_GraphAdapter.hpp | 44 +--------- .../src/input/Zoltan2_IdentifierAdapter.hpp | 44 +--------- .../core/src/input/Zoltan2_InputTraits.hpp | 44 +--------- .../core/src/input/Zoltan2_MatrixAdapter.hpp | 44 +--------- .../core/src/input/Zoltan2_MeshAdapter.hpp | 44 +--------- .../src/input/Zoltan2_PamgenMeshAdapter.hpp | 44 +--------- .../input/Zoltan2_TpetraCrsGraphAdapter.hpp | 44 +--------- .../input/Zoltan2_TpetraCrsMatrixAdapter.hpp | 44 +--------- .../input/Zoltan2_TpetraRowGraphAdapter.hpp | 44 +--------- .../input/Zoltan2_TpetraRowMatrixAdapter.hpp | 44 +--------- .../core/src/input/Zoltan2_VectorAdapter.hpp | 44 +--------- .../input/Zoltan2_XpetraCrsGraphAdapter.hpp | 44 +--------- .../input/Zoltan2_XpetraCrsMatrixAdapter.hpp | 44 +--------- .../Zoltan2_XpetraMultiVectorAdapter.hpp | 44 +--------- .../core/src/input/Zoltan2_XpetraTraits.hpp | 44 +--------- .../src/models/Zoltan2_CommGraphModel.hpp | 44 +--------- .../src/models/Zoltan2_CoordinateModel.hpp | 44 +--------- .../core/src/models/Zoltan2_GraphModel.hpp | 44 +--------- .../src/models/Zoltan2_HyperGraphModel.hpp | 44 +--------- .../src/models/Zoltan2_IdentifierModel.hpp | 44 +--------- .../zoltan2/core/src/models/Zoltan2_Model.hpp | 44 +--------- .../core/src/models/Zoltan2_ModelHelpers.hpp | 44 +--------- .../src/problems/Zoltan2_ColoringProblem.hpp | 44 +--------- .../src/problems/Zoltan2_ColoringSolution.hpp | 44 +--------- .../src/problems/Zoltan2_MappingProblem.hpp | 44 +--------- .../src/problems/Zoltan2_MappingSolution.hpp | 44 +--------- .../src/problems/Zoltan2_MatchingProblem.hpp | 44 +--------- .../src/problems/Zoltan2_MatchingSolution.hpp | 44 +--------- .../Zoltan2_MatrixPartitioningProblem.hpp | 44 +--------- .../Zoltan2_MatrixPartitioningSolution.hpp | 44 +--------- .../src/problems/Zoltan2_OrderingProblem.hpp | 44 +--------- .../src/problems/Zoltan2_OrderingSolution.hpp | 44 +--------- .../src/problems/Zoltan2_PartitionMapping.hpp | 44 +--------- .../problems/Zoltan2_PartitioningHelpers.hpp | 44 +--------- .../problems/Zoltan2_PartitioningProblem.hpp | 44 +--------- .../problems/Zoltan2_PartitioningSolution.cpp | 44 +--------- .../problems/Zoltan2_PartitioningSolution.hpp | 44 +--------- .../core/src/problems/Zoltan2_Problem.hpp | 44 +--------- .../core/src/problems/Zoltan2_Solution.hpp | 44 +--------- .../core/src/util/Zoltan2_AlltoAll.cpp | 44 +--------- .../core/src/util/Zoltan2_AlltoAll.hpp | 44 +--------- .../src/util/Zoltan2_BaseClassMetrics.hpp | 44 +--------- .../src/util/Zoltan2_EvaluateBaseClass.hpp | 44 +--------- .../core/src/util/Zoltan2_EvaluateMapping.hpp | 88 +------------------ .../src/util/Zoltan2_EvaluateOrdering.hpp | 44 +--------- .../src/util/Zoltan2_EvaluatePartition.hpp | 88 +------------------ .../core/src/util/Zoltan2_GraphMetrics.hpp | 44 +--------- .../src/util/Zoltan2_GraphMetricsUtility.hpp | 44 +--------- packages/zoltan2/core/src/util/Zoltan2_IO.cpp | 44 +--------- packages/zoltan2/core/src/util/Zoltan2_IO.hpp | 44 +--------- .../src/util/Zoltan2_ImbalanceMetrics.hpp | 44 +--------- .../util/Zoltan2_ImbalanceMetricsUtility.hpp | 44 +--------- .../core/src/util/Zoltan2_MetricUtility.hpp | 44 +--------- .../core/src/util/Zoltan2_Standards.hpp | 44 +--------- .../core/src/util/Zoltan2_StridedData.hpp | 45 ++-------- .../zoltan2/core/src/util/Zoltan2_Util.cpp | 44 +--------- .../zoltan2/core/src/util/Zoltan2_Util.hpp | 44 +--------- .../zoltan2/core/src/util/Zoltan2_Version.cpp | 45 ++-------- .../zoltan2/core/src/util/Zoltan2_Version.hpp | 45 ++-------- .../src/util/Zoltan2_componentMetrics.hpp | 44 +--------- .../core/src/util/Zoltan2_findUniqueGids.hpp | 44 +--------- packages/zoltan2/doc/COPYRIGHT_AND_LICENSE | 44 ---------- packages/zoltan2/example/block/block.cpp | 44 +--------- .../zoltan2/example/block/kokkosBlock.cpp | 44 +--------- packages/zoltan2/example/geometric/rcb_C.cpp | 44 +--------- packages/zoltan2/example/graph/graph.cpp | 45 ++-------- packages/zoltan2/scripts/copyright.txt | 44 ---------- packages/zoltan2/sphynx/COPYRIGHT | 10 +++ packages/zoltan2/sphynx/LICENSE | 75 +++++++--------- packages/zoltan2/sphynx/README.md | 12 +++ .../zoltan2/sphynx/src/Zoltan2_Sphynx.hpp | 46 +--------- .../sphynx/src/Zoltan2_SphynxProblem.hpp | 47 ++-------- .../sphynx/src/Zoltan2_SphynxVersion.cpp | 46 +--------- .../test/core/TpetraCrsColorer/Bug9500.cpp | 8 ++ .../TpetraCrsColorer/TpetraCrsColorer.cpp | 8 ++ .../zoltan2/test/core/color/coloring1.cpp | 45 ++-------- .../test/core/correctness/zoltanCompare.cpp | 44 +--------- .../core/directory/directoryTest_Impl.hpp | 44 +--------- .../core/directory/directoryTest_Kokkos.cpp | 44 +--------- .../directory/directoryTest_KokkosSimple.cpp | 44 +--------- .../directoryTest_findUniqueGids.cpp | 44 +--------- .../driver/Zoltan2_MeshCoordinateTest.hpp | 9 ++ .../core/driver/Zoltan2_TestInterface.hpp | 9 ++ .../test/core/driver/Zoltan2_Tests.hpp | 9 ++ .../zoltan2/test/core/driver/test_driver.cpp | 44 +--------- .../test/core/helpers/AdapterForTests.hpp | 44 +--------- .../core/helpers/ErrorHandlingForTests.hpp | 44 +--------- .../test/core/helpers/GeometricGenerator.hpp | 45 ++-------- .../zoltan2/test/core/helpers/PrintData.hpp | 44 +--------- .../test/core/helpers/UserInputForTests.hpp | 44 +--------- .../core/helpers/Zoltan2_ComparisonHelper.hpp | 44 +--------- .../core/helpers/Zoltan2_EvaluateFactory.hpp | 44 +--------- .../core/helpers/Zoltan2_MetricAnalyzer.hpp | 44 +--------- .../helpers/Zoltan2_PamgenMeshStructure.hpp | 44 +--------- .../core/helpers/Zoltan2_ProblemFactory.hpp | 44 +--------- .../test/core/helpers/Zoltan2_TestHelpers.hpp | 44 +--------- .../test/core/helpers/Zoltan2_Typedefs.hpp | 44 +--------- packages/zoltan2/test/core/order/nd.cpp | 45 ++-------- .../zoltan2/test/core/order/ordering1.cpp | 45 ++-------- .../zoltan2/test/core/order/orderingAMD.cpp | 45 ++-------- .../zoltan2/test/core/order/orderingMetis.cpp | 45 ++-------- .../test/core/order/orderingScotch.cpp | 45 ++-------- .../core/partition/APFMeshAdapterTest.cpp | 44 +--------- .../test/core/partition/MultiJaggedTest.cpp | 44 +--------- .../core/partition/PartitionAndParMATest.cpp | 44 +--------- .../core/partition/TaskMappingProblemTest.cpp | 8 ++ .../core/partition/TaskMappingSimulate.cpp | 8 ++ .../test/core/partition/TaskMappingTest.cpp | 8 ++ .../test/core/partition/TaskMappingTest3.cpp | 9 ++ .../zoltan2/test/core/partition/blockTest.cpp | 45 ++-------- .../zoltan2/test/core/partition/fix2010.cpp | 45 ++-------- .../zoltan2/test/core/partition/fix4785.cpp | 44 +--------- .../test/core/partition/mj_backwardcompat.cpp | 44 +--------- .../zoltan2/test/core/partition/mj_epetra.cpp | 44 +--------- .../core/partition/mj_int_coordinates.cpp | 44 +--------- .../core/partition/pamgenMeshAdapterTest.cpp | 44 +--------- .../test/core/partition/partition2DMatrix.cpp | 45 +--------- .../test/core/partition/partition_sarma.cpp | 9 ++ .../test/core/partition/partitioning1.cpp | 45 ++-------- .../test/core/partition/partitioningTree.cpp | 45 ++-------- .../test/core/partition/rcbPerformanceZ1.cpp | 44 +--------- .../zoltan2/test/core/partition/rcbTest.cpp | 44 +--------- .../test/core/scaling/rcbPerformance.cpp | 44 +--------- .../test/core/scaling/rcbPerformanceZ1.cpp | 44 +--------- .../test/core/temp/XpetraEpetraMap.cpp | 9 ++ .../test/core/temp/XpetraEpetraMatrix.cpp | 9 ++ .../test/core/temp/absdefinitiontest.cpp | 9 ++ .../zoltan2/test/core/temp/ddirectoryTest.cpp | 44 +--------- .../zoltan2/test/core/temp/mapMemoryLeak.cpp | 8 ++ .../zoltan2/test/core/temp/mapOneToOne.cpp | 9 ++ .../zoltan2/test/core/temp/mapRemotes.cpp | 9 ++ .../test/core/temp/multivectorTest.cpp | 9 +- packages/zoltan2/test/core/temp/paramTest.cpp | 44 +--------- .../zoltan2/test/core/temp/paramToXML.cpp | 44 +--------- packages/zoltan2/test/core/temp/testfail.cpp | 9 ++ .../test/core/temp/teuchosCommTest.cpp | 9 ++ .../zoltan2/test/core/temp/vecWithCopies.cpp | 9 ++ .../core/unit/environment/AllParameters.cpp | 45 ++-------- .../core/unit/environment/DebugManager.cpp | 45 ++-------- .../core/unit/environment/Environment.cpp | 45 ++-------- .../test/core/unit/environment/Machine.cpp | 45 ++-------- .../unit/environment/MetricOutputManager.cpp | 45 ++-------- .../test/core/unit/environment/Parameters.cpp | 45 ++-------- .../core/unit/environment/TimerManager.cpp | 45 ++-------- .../test/core/unit/input/APFMeshInput.cpp | 45 ++-------- .../core/unit/input/BasicCoordinateInput.cpp | 45 ++-------- .../core/unit/input/BasicIdentifierInput.cpp | 45 ++-------- .../unit/input/BasicKokkosIdentifierInput.cpp | 45 ++-------- .../core/unit/input/BasicVectorAdapter.cpp | 45 ++-------- .../test/core/unit/input/BasicVectorInput.cpp | 45 ++-------- .../test/core/unit/input/InputTraitsBad.cpp | 45 ++-------- .../test/core/unit/input/InputTraitsGood.cpp | 45 ++-------- .../test/core/unit/input/MatrixAdapter.cpp | 45 ++-------- .../test/core/unit/input/PamgenMeshInput.cpp | 45 ++-------- .../core/unit/input/TpetraCrsMatrixInput.cpp | 45 ++-------- .../core/unit/input/TpetraRowGraphInput.cpp | 45 ++-------- .../unit/input/TpetraRowGraphInputKokkos.cpp | 45 ++-------- .../core/unit/input/TpetraRowMatrixInput.cpp | 45 ++-------- .../core/unit/input/XpetraCrsGraphInput.cpp | 45 ++-------- .../core/unit/input/XpetraCrsMatrixInput.cpp | 45 ++-------- .../unit/input/XpetraMultiVectorInput.cpp | 45 ++-------- .../test/core/unit/input/XpetraTraits.cpp | 45 ++-------- .../core/unit/input/XpetraVectorInput.cpp | 45 ++-------- .../test/core/unit/models/CoordinateModel.cpp | 45 ++-------- .../test/core/unit/models/GraphModel.cpp | 45 ++-------- .../unit/models/GraphModel2ndAdjsFromAdjs.cpp | 45 ++-------- .../test/core/unit/models/HyperGraphModel.cpp | 45 ++-------- .../test/core/unit/models/IdentifierModel.cpp | 45 ++-------- .../test/core/unit/problems/Mapping.cpp | 45 ++-------- .../unit/problems/PartitioningSolution.cpp | 45 ++-------- .../zoltan2/test/core/unit/util/AlltoAll.cpp | 44 +--------- .../zoltan2/test/core/unit/util/Metric.cpp | 45 ++-------- .../test/core/unit/util/StridedData.cpp | 45 ++-------- .../zoltan2/test/core/unit/util/TPLTraits.cpp | 45 +--------- .../test/core/unit/util/componentMetrics.cpp | 45 ++-------- .../test/core/unit/util/findUniqueGids.cpp | 44 +--------- .../test/sphynx/Sphynx_Research_Driver.cpp | 45 +--------- packages/zoltan2/test/sphynx/Test_Sphynx.cpp | 46 ++-------- .../test/sphynx/largestComponent2Binary.cpp | 44 ++-------- .../test/sphynx/readMatrixFromBinaryFile.hpp | 43 +-------- .../zoltan2/util/xmlToHeaderDefinition.cpp | 44 +--------- 261 files changed, 1417 insertions(+), 8687 deletions(-) create mode 100644 packages/zoltan2/COPYRIGHT create mode 100644 packages/zoltan2/LICENSE create mode 100644 packages/zoltan2/README.md delete mode 100644 packages/zoltan2/doc/COPYRIGHT_AND_LICENSE delete mode 100644 packages/zoltan2/scripts/copyright.txt create mode 100644 packages/zoltan2/sphynx/COPYRIGHT create mode 100644 packages/zoltan2/sphynx/README.md diff --git a/packages/zoltan2/COPYRIGHT b/packages/zoltan2/COPYRIGHT new file mode 100644 index 000000000000..796c89031da5 --- /dev/null +++ b/packages/zoltan2/COPYRIGHT @@ -0,0 +1,10 @@ + + Zoltan2: A package of combinatorial algorithms for scientific computing + Copyright (c) 2012 NTESS + +Copyright 2012 National Technology & Engineering Solutions of Sandia, +LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the +U.S. Government retains certain rights in this software. + +Copyright the Zoltan2 contributors. + diff --git a/packages/zoltan2/LICENSE b/packages/zoltan2/LICENSE new file mode 100644 index 000000000000..1d55a80d286d --- /dev/null +++ b/packages/zoltan2/LICENSE @@ -0,0 +1,31 @@ +SPDX-License-Identifier: BSD-3-Clause + +Copyright (c) 2012 NTESS and the Zoltan2 contributors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + 3. Neither the name of the copyright holder nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/packages/zoltan2/README.md b/packages/zoltan2/README.md new file mode 100644 index 000000000000..cfbc13f472bb --- /dev/null +++ b/packages/zoltan2/README.md @@ -0,0 +1,12 @@ +# Zoltan2: A package of combinatorial algorithms for scientific computing + + +## Copyright and License +See zoltan2/COPYRIGHT, zoltan2/LICENSE, https://trilinos.github.io/license.html and individual file headers for additional information. + + +## Questions? +Contact lead developers: + +* Zoltan2 team (GitHub handle: @trilinos/zoltan2) +* Erik Boman (GitHub handle: [egboman](https://github.com/egboman) or egboman@sandia.gov) diff --git a/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorer.hpp b/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorer.hpp index f026b9fc21ac..2a3b84390b0c 100644 --- a/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorer.hpp +++ b/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorer.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #pragma once #include diff --git a/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorerUtils.hpp b/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorerUtils.hpp index 82dc5fcd3a50..0e4444f4a6e8 100644 --- a/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorerUtils.hpp +++ b/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorerUtils.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #pragma once #include "Teuchos_RCP.hpp" diff --git a/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorer_Zoltan.hpp b/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorer_Zoltan.hpp index ccee504d7496..153cc97f0c70 100644 --- a/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorer_Zoltan.hpp +++ b/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorer_Zoltan.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #pragma once #include "Zoltan2_TpetraCrsColorerUtils.hpp" diff --git a/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorer_Zoltan2.hpp b/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorer_Zoltan2.hpp index 9052214affeb..1b14fde35135 100644 --- a/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorer_Zoltan2.hpp +++ b/packages/zoltan2/core/src/TpetraCrsColorer/Zoltan2_TpetraCrsColorer_Zoltan2.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #pragma once #include "Teuchos_ArrayRCP.hpp" diff --git a/packages/zoltan2/core/src/algorithms/Zoltan2_AlgForTestingOnly.hpp b/packages/zoltan2/core/src/algorithms/Zoltan2_AlgForTestingOnly.hpp index a97815a50da3..93b9bbce7e1a 100644 --- a/packages/zoltan2/core/src/algorithms/Zoltan2_AlgForTestingOnly.hpp +++ b/packages/zoltan2/core/src/algorithms/Zoltan2_AlgForTestingOnly.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGFORTESTINGONLY_HPP_ #define _ZOLTAN2_ALGFORTESTINGONLY_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/Zoltan2_Algorithm.hpp b/packages/zoltan2/core/src/algorithms/Zoltan2_Algorithm.hpp index f8c19aabbef1..b938840eb89a 100644 --- a/packages/zoltan2/core/src/algorithms/Zoltan2_Algorithm.hpp +++ b/packages/zoltan2/core/src/algorithms/Zoltan2_Algorithm.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_BaseAdapter.hpp diff --git a/packages/zoltan2/core/src/algorithms/Zoltan2_TPLTraits.hpp b/packages/zoltan2/core/src/algorithms/Zoltan2_TPLTraits.hpp index 95b9d8894a43..40d4e82744b9 100644 --- a/packages/zoltan2/core/src/algorithms/Zoltan2_TPLTraits.hpp +++ b/packages/zoltan2/core/src/algorithms/Zoltan2_TPLTraits.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER #ifndef _ZOLTAN2_TPLTRAITS_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybrid2GL.hpp b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybrid2GL.hpp index 59d0dbb51736..eda33f33a854 100644 --- a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybrid2GL.hpp +++ b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybrid2GL.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_2GHOSTLAYER_HPP_ #define _ZOLTAN2_2GHOSTLAYER_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1-2GL.hpp b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1-2GL.hpp index 411b462415ac..f3d11e98cfcc 100644 --- a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1-2GL.hpp +++ b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1-2GL.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_DISTANCE1_2GHOSTLAYER_HPP_ #define _ZOLTAN2_DISTANCE1_2GHOSTLAYER_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1.hpp b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1.hpp index be182462a6eb..f3b3a6819aaa 100644 --- a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1.hpp +++ b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_ALGHYBRIDD1_HPP_ #define _ZOLTAN2_ALGHYBRIDD1_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD2.hpp b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD2.hpp index 2a146f5ee112..542548d4e6aa 100644 --- a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD2.hpp +++ b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD2.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_DISTANCE2_HPP_ #define _ZOLTAN2_DISTANCE2_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridPD2.hpp b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridPD2.hpp index b96aea413e4f..b4da8f7d1825 100644 --- a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridPD2.hpp +++ b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridPD2.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_PDISTANCE2_HPP_ #define _ZOLTAN2_PDISTANCE2_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgSerialGreedy.hpp b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgSerialGreedy.hpp index e0436e650d55..ee23e27bc15f 100644 --- a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgSerialGreedy.hpp +++ b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgSerialGreedy.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGSERIALGREEDY_HPP_ #define _ZOLTAN2_ALGSERIALGREEDY_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/color/Zoltan2_ColoringAlgorithms.hpp b/packages/zoltan2/core/src/algorithms/color/Zoltan2_ColoringAlgorithms.hpp index ed3f69ef2535..7a356ab0f038 100644 --- a/packages/zoltan2/core/src/algorithms/color/Zoltan2_ColoringAlgorithms.hpp +++ b/packages/zoltan2/core/src/algorithms/color/Zoltan2_ColoringAlgorithms.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_COLORINGALGORITHMS_HPP_ #define _ZOLTAN2_COLORINGALGORITHMS_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/color/Zoltan2_RebalanceColoring.hpp b/packages/zoltan2/core/src/algorithms/color/Zoltan2_RebalanceColoring.hpp index 8e5fb5596668..164079091ea1 100644 --- a/packages/zoltan2/core/src/algorithms/color/Zoltan2_RebalanceColoring.hpp +++ b/packages/zoltan2/core/src/algorithms/color/Zoltan2_RebalanceColoring.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_REBALANCECOLORING_HPP_ #define _ZOLTAN2_REBALANCECOLORING_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgBlockMapping.hpp b/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgBlockMapping.hpp index 5434195eb08d..d758ada1f684 100644 --- a/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgBlockMapping.hpp +++ b/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgBlockMapping.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGBLOCKMAPPING_HPP_ #define _ZOLTAN2_ALGBLOCKMAPPING_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgDefaultMapping.hpp b/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgDefaultMapping.hpp index 98fb3dc6e6b6..b4df6ef676c4 100644 --- a/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgDefaultMapping.hpp +++ b/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgDefaultMapping.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGDEFAULTMAPPING_HPP_ #define _ZOLTAN2_ALGDEFAULTMAPPING_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgSparseMapping.hpp b/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgSparseMapping.hpp index 2a131978b419..b32a5a359eb3 100644 --- a/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgSparseMapping.hpp +++ b/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgSparseMapping.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGDEFAULTMAPPING_HPP_ #define _ZOLTAN2_ALGDEFAULTMAPPING_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgTpetraMapping.hpp b/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgTpetraMapping.hpp index 2a131978b419..b32a5a359eb3 100644 --- a/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgTpetraMapping.hpp +++ b/packages/zoltan2/core/src/algorithms/map/Zoltan2_AlgTpetraMapping.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGDEFAULTMAPPING_HPP_ #define _ZOLTAN2_ALGDEFAULTMAPPING_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/match/Zoltan2_GreedyMWM.hpp b/packages/zoltan2/core/src/algorithms/match/Zoltan2_GreedyMWM.hpp index ed1d56b47293..8419e015f93d 100644 --- a/packages/zoltan2/core/src/algorithms/match/Zoltan2_GreedyMWM.hpp +++ b/packages/zoltan2/core/src/algorithms/match/Zoltan2_GreedyMWM.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_GreedyMWM.hpp diff --git a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgAMD.hpp b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgAMD.hpp index 2cbae91d3cc1..b1106d439d42 100644 --- a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgAMD.hpp +++ b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgAMD.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_AlgAMD.hpp diff --git a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgMetis.hpp b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgMetis.hpp index ff5c33b4012d..cec2bf5469fa 100644 --- a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgMetis.hpp +++ b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgMetis.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_AlgMetis.hpp diff --git a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgND.hpp b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgND.hpp index f813d9ad92d9..70a46892f179 100644 --- a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgND.hpp +++ b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgND.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER //MMW need to specify that this requires Zoltan diff --git a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgNatural.hpp b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgNatural.hpp index c3df1c441244..e27d464b817a 100644 --- a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgNatural.hpp +++ b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgNatural.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGNATURAL_HPP_ #define _ZOLTAN2_ALGNATURAL_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgRCM.hpp b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgRCM.hpp index c381cda9f1c0..0eaa364c033f 100644 --- a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgRCM.hpp +++ b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgRCM.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGRCM_HPP_ #define _ZOLTAN2_ALGRCM_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgRandom.hpp b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgRandom.hpp index 489717fc4aa8..8aad714fc592 100644 --- a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgRandom.hpp +++ b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgRandom.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGRANDOM_HPP_ #define _ZOLTAN2_ALGRANDOM_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgSortedDegree.hpp b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgSortedDegree.hpp index 72aa35a6a2c7..9c24ec8f6836 100644 --- a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgSortedDegree.hpp +++ b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgSortedDegree.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_SORTEDDEGREE_HPP_ #define _ZOLTAN2_SORTEDDEGREE_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgSpectral.hpp b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgSpectral.hpp index bc085c5bd5a9..23d4abd239b8 100644 --- a/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgSpectral.hpp +++ b/packages/zoltan2/core/src/algorithms/order/Zoltan2_AlgSpectral.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGSPECTRAL_HPP_ #define _ZOLTAN2_ALGSPECTRAL_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/order/Zoltan2_MatcherHelper.hpp b/packages/zoltan2/core/src/algorithms/order/Zoltan2_MatcherHelper.hpp index feb6b1bff1e3..c77e987b64dd 100644 --- a/packages/zoltan2/core/src/algorithms/order/Zoltan2_MatcherHelper.hpp +++ b/packages/zoltan2/core/src/algorithms/order/Zoltan2_MatcherHelper.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_MatcherHelper_hpp_ #define _ZOLTAN2_MatcherHelper_hpp_ diff --git a/packages/zoltan2/core/src/algorithms/order/Zoltan2_OrderingAlgorithms.hpp b/packages/zoltan2/core/src/algorithms/order/Zoltan2_OrderingAlgorithms.hpp index 9ed249a10f18..ebe346ba7f81 100644 --- a/packages/zoltan2/core/src/algorithms/order/Zoltan2_OrderingAlgorithms.hpp +++ b/packages/zoltan2/core/src/algorithms/order/Zoltan2_OrderingAlgorithms.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ORDERINGALGORITHMS_HPP_ #define _ZOLTAN2_ORDERINGALGORITHMS_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/order/Zoltan2_Sort.hpp b/packages/zoltan2/core/src/algorithms/order/Zoltan2_Sort.hpp index 7a7a434d0bbe..01a6ca240d51 100644 --- a/packages/zoltan2/core/src/algorithms/order/Zoltan2_Sort.hpp +++ b/packages/zoltan2/core/src/algorithms/order/Zoltan2_Sort.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_SORT_HPP_ #define _ZOLTAN2_SORT_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgBlock.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgBlock.hpp index ea0de3eb2df4..b772f2830528 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgBlock.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgBlock.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGBLOCK_HPP_ #define _ZOLTAN2_ALGBLOCK_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgMultiJagged.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgMultiJagged.hpp index da9ce69e9b1f..90aed30d381f 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgMultiJagged.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgMultiJagged.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + /*! \file Zoltan2_AlgMultiJagged.hpp \brief Contains the Multi-jagged algorthm. */ diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgParMA.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgParMA.hpp index c6d7327c746b..8cc17bcd2d3e 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgParMA.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgParMA.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGPARMA_HPP_ #define _ZOLTAN2_ALGPARMA_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgParMETIS.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgParMETIS.hpp index dac972d54bfb..7c91de4871c7 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgParMETIS.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgParMETIS.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGPARMETIS_HPP_ #define _ZOLTAN2_ALGPARMETIS_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgPuLP.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgPuLP.hpp index f0aa6440faae..70f8a6961f34 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgPuLP.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgPuLP.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGPULP_HPP_ #define _ZOLTAN2_ALGPULP_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgQuotient.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgQuotient.hpp index ca2697550bf3..cab2a8e621d0 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgQuotient.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgQuotient.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGQUOTIENT_HPP_ #define _ZOLTAN2_ALGQUOTIENT_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgRCB.hpp.not_compiled b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgRCB.hpp.not_compiled index 97836b52f2b6..581b87a775c7 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgRCB.hpp.not_compiled +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgRCB.hpp.not_compiled @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_AlgRCB.hpp diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgRCB_methods.hpp.not_compiled b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgRCB_methods.hpp.not_compiled index bed90e021f3a..40694d4e7211 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgRCB_methods.hpp.not_compiled +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgRCB_methods.hpp.not_compiled @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_AlgRCB_methods.hpp diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgSarma.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgSarma.hpp index 16405cd14948..159cb22c7409 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgSarma.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgSarma.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + /** * Created by mbenlioglu on Aug 31, 2020. */ diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgScotch.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgScotch.hpp index 223ab8cc7a7c..d5dffd647d4d 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgScotch.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgScotch.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGSCOTCH_HPP_ #define _ZOLTAN2_ALGSCOTCH_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_CoordinatePartitioningGraph.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_CoordinatePartitioningGraph.hpp index 240235bd01eb..d28dbf920acc 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_CoordinatePartitioningGraph.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_CoordinatePartitioningGraph.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER #ifndef _ZOLTAN2_COORDCOMMGRAPH_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_MatrixPartitioningAlgs.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_MatrixPartitioningAlgs.hpp index 46a5dd4cadd4..de0a7a3b431c 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_MatrixPartitioningAlgs.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_MatrixPartitioningAlgs.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGMATRIX_HPP_ #define _ZOLTAN2_ALGMATRIX_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_MultiJagged_ReductionOps.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_MultiJagged_ReductionOps.hpp index 3f8c139365c1..1dfd98701b76 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_MultiJagged_ReductionOps.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_MultiJagged_ReductionOps.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_MultiJagged_ReductionOps.hpp diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_PartitioningAlgorithms.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_PartitioningAlgorithms.hpp index 986ce1c6664a..5159400eccc3 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_PartitioningAlgorithms.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_PartitioningAlgorithms.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_PARTITIONINGALGORITHMS_HPP_ #define _ZOLTAN2_PARTITIONINGALGORITHMS_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_TaskMapping.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_TaskMapping.hpp index e20c65230e26..28477f5bd35e 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_TaskMapping.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_TaskMapping.hpp @@ -1,3 +1,11 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_ #define _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/zoltan/Zoltan2_AlgZoltan.hpp b/packages/zoltan2/core/src/algorithms/zoltan/Zoltan2_AlgZoltan.hpp index ad6dfd088dca..45e63c612322 100644 --- a/packages/zoltan2/core/src/algorithms/zoltan/Zoltan2_AlgZoltan.hpp +++ b/packages/zoltan2/core/src/algorithms/zoltan/Zoltan2_AlgZoltan.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGZOLTAN_HPP_ #define _ZOLTAN2_ALGZOLTAN_HPP_ diff --git a/packages/zoltan2/core/src/algorithms/zoltan/Zoltan2_AlgZoltanCallbacks.hpp b/packages/zoltan2/core/src/algorithms/zoltan/Zoltan2_AlgZoltanCallbacks.hpp index 457ffdf9c175..379953a7af42 100644 --- a/packages/zoltan2/core/src/algorithms/zoltan/Zoltan2_AlgZoltanCallbacks.hpp +++ b/packages/zoltan2/core/src/algorithms/zoltan/Zoltan2_AlgZoltanCallbacks.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_ALGZOLTANCALLBACKS_HPP_ #define _ZOLTAN2_ALGZOLTANCALLBACKS_HPP_ diff --git a/packages/zoltan2/core/src/directory/Zoltan2_Directory.hpp b/packages/zoltan2/core/src/directory/Zoltan2_Directory.hpp index cc63391e1557..9e3a5baa6aca 100644 --- a/packages/zoltan2/core/src/directory/Zoltan2_Directory.hpp +++ b/packages/zoltan2/core/src/directory/Zoltan2_Directory.hpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan2 Directory for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef ZOLTAN2_DIRECTORY_H_ #define ZOLTAN2_DIRECTORY_H_ diff --git a/packages/zoltan2/core/src/directory/Zoltan2_Directory_Comm.cpp b/packages/zoltan2/core/src/directory/Zoltan2_Directory_Comm.cpp index 3b155dffbfe4..b760b7dd00fd 100644 --- a/packages/zoltan2/core/src/directory/Zoltan2_Directory_Comm.cpp +++ b/packages/zoltan2/core/src/directory/Zoltan2_Directory_Comm.cpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan2 Directory for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of theremove_local - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "Zoltan2_Directory_Comm.hpp" #include diff --git a/packages/zoltan2/core/src/directory/Zoltan2_Directory_Comm.hpp b/packages/zoltan2/core/src/directory/Zoltan2_Directory_Comm.hpp index 4d666bfc1280..0dea5d0c1863 100644 --- a/packages/zoltan2/core/src/directory/Zoltan2_Directory_Comm.hpp +++ b/packages/zoltan2/core/src/directory/Zoltan2_Directory_Comm.hpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan2 Directory for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of the - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef ZOLTAN2_DIRECTORY_COMM_H_ #define ZOLTAN2_DIRECTORY_COMM_H_ diff --git a/packages/zoltan2/core/src/directory/Zoltan2_Directory_Impl.hpp b/packages/zoltan2/core/src/directory/Zoltan2_Directory_Impl.hpp index 8aee1bbe268f..a4133b3c83eb 100644 --- a/packages/zoltan2/core/src/directory/Zoltan2_Directory_Impl.hpp +++ b/packages/zoltan2/core/src/directory/Zoltan2_Directory_Impl.hpp @@ -1,48 +1,11 @@ -/* - * @HEADER - * - * *********************************************************************** - * - * Zoltan2 Directory for Load-balancing, Partitioning, Ordering and Coloring - * Copyright 2012 Sandia Corporation - * - * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, - * the U.S. Government retains certain rights in this software. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the Corporation nor the names of theremove_local - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Questions? Contact Karen Devine kddevin@sandia.gov - * Erik Boman egboman@sandia.gov - * - * *********************************************************************** - * - * @HEADER - */ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #ifndef ZOLTAN2_DIRECTORY_IMPL_H_ #define ZOLTAN2_DIRECTORY_IMPL_H_ diff --git a/packages/zoltan2/core/src/environment/Zoltan2_DebugManager.hpp b/packages/zoltan2/core/src/environment/Zoltan2_DebugManager.hpp index 22746816f460..66b2f1088afa 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_DebugManager.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_DebugManager.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_DebugManager.hpp diff --git a/packages/zoltan2/core/src/environment/Zoltan2_Environment.cpp b/packages/zoltan2/core/src/environment/Zoltan2_Environment.cpp index 025ec3dab4d6..26486dd98684 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_Environment.cpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_Environment.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + /*! \file Zoltan2_Environment.cpp \brief The definition of the Environment object. */ diff --git a/packages/zoltan2/core/src/environment/Zoltan2_Environment.hpp b/packages/zoltan2/core/src/environment/Zoltan2_Environment.hpp index 2dc52ccfddec..425c84380778 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_Environment.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_Environment.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_Environment.hpp diff --git a/packages/zoltan2/core/src/environment/Zoltan2_Exceptions.hpp b/packages/zoltan2/core/src/environment/Zoltan2_Exceptions.hpp index 39970515be09..b600326036e8 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_Exceptions.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_Exceptions.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER #ifndef _ZOLTAN2_EXCEPTIONS_HPP_ diff --git a/packages/zoltan2/core/src/environment/Zoltan2_IntegerRangeList.hpp b/packages/zoltan2/core/src/environment/Zoltan2_IntegerRangeList.hpp index 67b97553f697..78babadb48f1 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_IntegerRangeList.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_IntegerRangeList.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_IntegerRangeList.hpp diff --git a/packages/zoltan2/core/src/environment/Zoltan2_Machine.hpp b/packages/zoltan2/core/src/environment/Zoltan2_Machine.hpp index 2ff46a4a15e2..3880f85f53ef 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_Machine.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_Machine.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_MACHINE_HPP_ #define _ZOLTAN2_MACHINE_HPP_ diff --git a/packages/zoltan2/core/src/environment/Zoltan2_MachineDragonflyRCA.hpp b/packages/zoltan2/core/src/environment/Zoltan2_MachineDragonflyRCA.hpp index c288223dc497..f23f1bbeb101 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_MachineDragonflyRCA.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_MachineDragonflyRCA.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_MACHINE_DRAGONFLY_RCALIB_HPP_ #define _ZOLTAN2_MACHINE_DRAGONFLY_RCALIB_HPP_ diff --git a/packages/zoltan2/core/src/environment/Zoltan2_MachineDragonflyRCAForTesting.hpp b/packages/zoltan2/core/src/environment/Zoltan2_MachineDragonflyRCAForTesting.hpp index ec798606b8ae..5c20949d08ab 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_MachineDragonflyRCAForTesting.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_MachineDragonflyRCAForTesting.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_MACHINE_DRAGONFLY_RCALIBTEST_HPP_ #define _ZOLTAN2_MACHINE_DRAGONFLY_RCALIBTEST_HPP_ diff --git a/packages/zoltan2/core/src/environment/Zoltan2_MachineForTesting.hpp b/packages/zoltan2/core/src/environment/Zoltan2_MachineForTesting.hpp index 8bd2b9a32e0d..71be2e6448a2 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_MachineForTesting.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_MachineForTesting.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_MACHINEFORTESTING_HPP_ #define _ZOLTAN2_MACHINEFORTESTING_HPP_ diff --git a/packages/zoltan2/core/src/environment/Zoltan2_MachineRepresentation.hpp b/packages/zoltan2/core/src/environment/Zoltan2_MachineRepresentation.hpp index f01b200b5a79..2b12f98c9c37 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_MachineRepresentation.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_MachineRepresentation.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_MACHINEREPRESENTATION_HPP_ #define _ZOLTAN2_MACHINEREPRESENTATION_HPP_ diff --git a/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusLDMS.hpp b/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusLDMS.hpp index f6b3fc3b4eea..5d1f9d5acb08 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusLDMS.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusLDMS.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_MACHINEDEFAULT_HPP_ #define _ZOLTAN2_MACHINEDEFAULT_HPP_ diff --git a/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusRCA.hpp b/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusRCA.hpp index 89bea5b28577..bdfb6adbb0cc 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusRCA.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusRCA.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_MACHINE_TORUS_RCALIB_HPP_ #define _ZOLTAN2_MACHINE_TORUS_RCALIB_HPP_ diff --git a/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusRCAForTesting.hpp b/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusRCAForTesting.hpp index 9b29044c0a30..32cfdb128d3c 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusRCAForTesting.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusRCAForTesting.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_MACHINE_TORUS_RCALIBTEST_HPP_ #define _ZOLTAN2_MACHINE_TORUS_RCALIBTEST_HPP_ diff --git a/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusTopoMgr.hpp b/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusTopoMgr.hpp index 0bb51e6211b9..16d033e7804e 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusTopoMgr.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusTopoMgr.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_MACHINE_TORUS_TOPOMANAGER_HPP_ #define _ZOLTAN2_MACHINE_TORUS_TOPOMANAGER_HPP_ diff --git a/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusTopoMgrForTesting.hpp b/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusTopoMgrForTesting.hpp index 7c5911a62d3f..ac86171fe677 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusTopoMgrForTesting.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_MachineTorusTopoMgrForTesting.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #ifndef _ZOLTAN2_MACHINE_TORUS_TOPOMANAGERTEST_HPP_ #define _ZOLTAN2_MACHINE_TORUS_TOPOMANAGERTEST_HPP_ diff --git a/packages/zoltan2/core/src/environment/Zoltan2_MetricOutputManager.hpp b/packages/zoltan2/core/src/environment/Zoltan2_MetricOutputManager.hpp index 3bb6e8e92410..226b68291299 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_MetricOutputManager.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_MetricOutputManager.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_MetricOutputManager.hpp diff --git a/packages/zoltan2/core/src/environment/Zoltan2_Parameters.cpp b/packages/zoltan2/core/src/environment/Zoltan2_Parameters.cpp index 4600bcbc5108..9eaafab61fe1 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_Parameters.cpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_Parameters.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_Parameters.cpp diff --git a/packages/zoltan2/core/src/environment/Zoltan2_Parameters.hpp b/packages/zoltan2/core/src/environment/Zoltan2_Parameters.hpp index e354b64a6622..a86bd4bb47af 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_Parameters.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_Parameters.hpp @@ -1,41 +1,11 @@ -// +// @HEADER +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER /*! \file Zoltan2_Parameters.hpp \brief Defines Parameter related enumerators, declares functions. diff --git a/packages/zoltan2/core/src/environment/Zoltan2_TimerManager.cpp b/packages/zoltan2/core/src/environment/Zoltan2_TimerManager.cpp index f793470d5fd0..84a2845c1c84 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_TimerManager.cpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_TimerManager.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_TimerManager.cpp diff --git a/packages/zoltan2/core/src/environment/Zoltan2_TimerManager.hpp b/packages/zoltan2/core/src/environment/Zoltan2_TimerManager.hpp index dd570874e88f..ae11c15ab080 100644 --- a/packages/zoltan2/core/src/environment/Zoltan2_TimerManager.hpp +++ b/packages/zoltan2/core/src/environment/Zoltan2_TimerManager.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_TimerManager.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_APFMeshAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_APFMeshAdapter.hpp index ff56b9b1b17d..d0f035641878 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_APFMeshAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_APFMeshAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_APFMeshAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_Adapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_Adapter.hpp index ffd8ae6aa9d6..322c8448afab 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_Adapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_Adapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_BaseAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_BasicIdentifierAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_BasicIdentifierAdapter.hpp index 763eec83c568..dded64e5f691 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_BasicIdentifierAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_BasicIdentifierAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_BasicIdentifierAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_BasicKokkosIdentifierAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_BasicKokkosIdentifierAdapter.hpp index f3be8e714711..dd3b2aaf6c4a 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_BasicKokkosIdentifierAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_BasicKokkosIdentifierAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_BasicKokkosIdentifierAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_BasicVectorAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_BasicVectorAdapter.hpp index 0b655f50b106..322d6b947b1c 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_BasicVectorAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_BasicVectorAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_BasicVectorAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_GraphAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_GraphAdapter.hpp index 59fa2be31c19..10e859b875d3 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_GraphAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_GraphAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_GraphAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_IdentifierAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_IdentifierAdapter.hpp index 3f6c00db09b5..1169aa54f54c 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_IdentifierAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_IdentifierAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_IdentifierAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_InputTraits.hpp b/packages/zoltan2/core/src/input/Zoltan2_InputTraits.hpp index ff482f7d7fd9..e25468e24985 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_InputTraits.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_InputTraits.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_InputTraits.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_MatrixAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_MatrixAdapter.hpp index bbf974db57e5..3a3659ff1b76 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_MatrixAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_MatrixAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_MatrixAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_MeshAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_MeshAdapter.hpp index fd64329c44ca..3c02b025b396 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_MeshAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_MeshAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_MeshAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_PamgenMeshAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_PamgenMeshAdapter.hpp index 7179c91dfb1a..0015dcc57e2e 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_PamgenMeshAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_PamgenMeshAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_PamgenMeshAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_TpetraCrsGraphAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_TpetraCrsGraphAdapter.hpp index 1ef2e766216f..9ca73d94e7bd 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_TpetraCrsGraphAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_TpetraCrsGraphAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_TpetraCrsGraphAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_TpetraCrsMatrixAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_TpetraCrsMatrixAdapter.hpp index ee2092e676a2..34116978f7d0 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_TpetraCrsMatrixAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_TpetraCrsMatrixAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_XpetraCrsMatrixAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_TpetraRowGraphAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_TpetraRowGraphAdapter.hpp index 2f18a2a69f8c..9b4b7cbedf29 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_TpetraRowGraphAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_TpetraRowGraphAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_TpetraRowGraphAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_TpetraRowMatrixAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_TpetraRowMatrixAdapter.hpp index 79816d1c9bdd..3d1fd08667b4 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_TpetraRowMatrixAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_TpetraRowMatrixAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_TpetraRowMatrixAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_VectorAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_VectorAdapter.hpp index ec51abdda691..ad9e953bf49b 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_VectorAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_VectorAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_VectorAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_XpetraCrsGraphAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_XpetraCrsGraphAdapter.hpp index f53570a485aa..c06ae5950ddb 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_XpetraCrsGraphAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_XpetraCrsGraphAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_XpetraCrsGraphAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_XpetraCrsMatrixAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_XpetraCrsMatrixAdapter.hpp index 8627b72ca98b..84dc00549b67 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_XpetraCrsMatrixAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_XpetraCrsMatrixAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_XpetraCrsMatrixAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_XpetraMultiVectorAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_XpetraMultiVectorAdapter.hpp index b066bf1d1840..783c60c40099 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_XpetraMultiVectorAdapter.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_XpetraMultiVectorAdapter.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_XpetraMultiVectorAdapter.hpp diff --git a/packages/zoltan2/core/src/input/Zoltan2_XpetraTraits.hpp b/packages/zoltan2/core/src/input/Zoltan2_XpetraTraits.hpp index 60eb19db8c8a..62390607de08 100644 --- a/packages/zoltan2/core/src/input/Zoltan2_XpetraTraits.hpp +++ b/packages/zoltan2/core/src/input/Zoltan2_XpetraTraits.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_XpetraTraits.hpp diff --git a/packages/zoltan2/core/src/models/Zoltan2_CommGraphModel.hpp b/packages/zoltan2/core/src/models/Zoltan2_CommGraphModel.hpp index 39f2c902975f..bf87e66f353c 100644 --- a/packages/zoltan2/core/src/models/Zoltan2_CommGraphModel.hpp +++ b/packages/zoltan2/core/src/models/Zoltan2_CommGraphModel.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! CommGraphModel creates a graph representing the communication topology of diff --git a/packages/zoltan2/core/src/models/Zoltan2_CoordinateModel.hpp b/packages/zoltan2/core/src/models/Zoltan2_CoordinateModel.hpp index 72199c65780d..0e544fcf7ea7 100644 --- a/packages/zoltan2/core/src/models/Zoltan2_CoordinateModel.hpp +++ b/packages/zoltan2/core/src/models/Zoltan2_CoordinateModel.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_CoordinateModel.hpp diff --git a/packages/zoltan2/core/src/models/Zoltan2_GraphModel.hpp b/packages/zoltan2/core/src/models/Zoltan2_GraphModel.hpp index 6481d4041db0..eb8053a44e64 100644 --- a/packages/zoltan2/core/src/models/Zoltan2_GraphModel.hpp +++ b/packages/zoltan2/core/src/models/Zoltan2_GraphModel.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_GraphModel.hpp diff --git a/packages/zoltan2/core/src/models/Zoltan2_HyperGraphModel.hpp b/packages/zoltan2/core/src/models/Zoltan2_HyperGraphModel.hpp index e5928fcb3f4f..a041447d835e 100644 --- a/packages/zoltan2/core/src/models/Zoltan2_HyperGraphModel.hpp +++ b/packages/zoltan2/core/src/models/Zoltan2_HyperGraphModel.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_HyperGraphModel.hpp diff --git a/packages/zoltan2/core/src/models/Zoltan2_IdentifierModel.hpp b/packages/zoltan2/core/src/models/Zoltan2_IdentifierModel.hpp index 5f7474cd9861..fa82e9cc6fb7 100644 --- a/packages/zoltan2/core/src/models/Zoltan2_IdentifierModel.hpp +++ b/packages/zoltan2/core/src/models/Zoltan2_IdentifierModel.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_IdentifierModel.hpp diff --git a/packages/zoltan2/core/src/models/Zoltan2_Model.hpp b/packages/zoltan2/core/src/models/Zoltan2_Model.hpp index a4c83d32f0a5..de829c8641dd 100644 --- a/packages/zoltan2/core/src/models/Zoltan2_Model.hpp +++ b/packages/zoltan2/core/src/models/Zoltan2_Model.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_Model.hpp diff --git a/packages/zoltan2/core/src/models/Zoltan2_ModelHelpers.hpp b/packages/zoltan2/core/src/models/Zoltan2_ModelHelpers.hpp index 48a36e5ec44d..ef27748471a8 100644 --- a/packages/zoltan2/core/src/models/Zoltan2_ModelHelpers.hpp +++ b/packages/zoltan2/core/src/models/Zoltan2_ModelHelpers.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_ModelHelpers.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_ColoringProblem.hpp b/packages/zoltan2/core/src/problems/Zoltan2_ColoringProblem.hpp index b53d479a84a5..54db2125cb10 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_ColoringProblem.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_ColoringProblem.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_ColoringProblem.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_ColoringSolution.hpp b/packages/zoltan2/core/src/problems/Zoltan2_ColoringSolution.hpp index 2e1a7b14b4d6..1de78cb61018 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_ColoringSolution.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_ColoringSolution.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_ColoringSolution.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_MappingProblem.hpp b/packages/zoltan2/core/src/problems/Zoltan2_MappingProblem.hpp index 83cf4dff024b..cc91953a23fe 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_MappingProblem.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_MappingProblem.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_MappingProblem.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_MappingSolution.hpp b/packages/zoltan2/core/src/problems/Zoltan2_MappingSolution.hpp index e19bcd990bd8..66087b180498 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_MappingSolution.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_MappingSolution.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_MappingSolution.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_MatchingProblem.hpp b/packages/zoltan2/core/src/problems/Zoltan2_MatchingProblem.hpp index 5a50e99ac9a0..c49d5c339bb9 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_MatchingProblem.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_MatchingProblem.hpp @@ -1,47 +1,11 @@ #if 0 // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_MatchingProblem.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_MatchingSolution.hpp b/packages/zoltan2/core/src/problems/Zoltan2_MatchingSolution.hpp index bd2f5c19290a..2446fea17891 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_MatchingSolution.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_MatchingSolution.hpp @@ -1,47 +1,11 @@ #if 0 // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_MatchingSolution.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_MatrixPartitioningProblem.hpp b/packages/zoltan2/core/src/problems/Zoltan2_MatrixPartitioningProblem.hpp index 01a82a92aa36..6822415797ca 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_MatrixPartitioningProblem.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_MatrixPartitioningProblem.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_MatrixPartitioningProblem.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_MatrixPartitioningSolution.hpp b/packages/zoltan2/core/src/problems/Zoltan2_MatrixPartitioningSolution.hpp index 99269e1d661e..4390cf3c362a 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_MatrixPartitioningSolution.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_MatrixPartitioningSolution.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_PartitioningSolution.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_OrderingProblem.hpp b/packages/zoltan2/core/src/problems/Zoltan2_OrderingProblem.hpp index d73cb0d46154..e8cb85c6cf39 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_OrderingProblem.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_OrderingProblem.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_OrderingProblem.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_OrderingSolution.hpp b/packages/zoltan2/core/src/problems/Zoltan2_OrderingSolution.hpp index e672a3af405b..4703642b890b 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_OrderingSolution.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_OrderingSolution.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_OrderingSolution.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_PartitionMapping.hpp b/packages/zoltan2/core/src/problems/Zoltan2_PartitionMapping.hpp index 104d427d74c4..f26ca1fb6131 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_PartitionMapping.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_PartitionMapping.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_PartMapping.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_PartitioningHelpers.hpp b/packages/zoltan2/core/src/problems/Zoltan2_PartitioningHelpers.hpp index 6b7febfd23c3..00e38d145794 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_PartitioningHelpers.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_PartitioningHelpers.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_PartitioningHelpers.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_PartitioningProblem.hpp b/packages/zoltan2/core/src/problems/Zoltan2_PartitioningProblem.hpp index acb3a255d71d..e5f807a858d7 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_PartitioningProblem.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_PartitioningProblem.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_PartitioningProblem.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_PartitioningSolution.cpp b/packages/zoltan2/core/src/problems/Zoltan2_PartitioningSolution.cpp index 0bc3fdbcc80b..3c609764ede5 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_PartitioningSolution.cpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_PartitioningSolution.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_PartitioningSolution.cpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_PartitioningSolution.hpp b/packages/zoltan2/core/src/problems/Zoltan2_PartitioningSolution.hpp index 0fc52f4ee2cc..c04e591a41de 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_PartitioningSolution.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_PartitioningSolution.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_PartitioningSolution.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_Problem.hpp b/packages/zoltan2/core/src/problems/Zoltan2_Problem.hpp index c03fb84953c9..04f7837927ba 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_Problem.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_Problem.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_Problem.hpp diff --git a/packages/zoltan2/core/src/problems/Zoltan2_Solution.hpp b/packages/zoltan2/core/src/problems/Zoltan2_Solution.hpp index 3bcd6520805f..44facfd1f0d3 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_Solution.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_Solution.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_Solution.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_AlltoAll.cpp b/packages/zoltan2/core/src/util/Zoltan2_AlltoAll.cpp index e65efc686645..ba6fe2b61dae 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_AlltoAll.cpp +++ b/packages/zoltan2/core/src/util/Zoltan2_AlltoAll.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_AlltoAll.cpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_AlltoAll.hpp b/packages/zoltan2/core/src/util/Zoltan2_AlltoAll.hpp index 98cd8ec78f6c..1abd04b58c9f 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_AlltoAll.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_AlltoAll.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_AlltoAll.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_BaseClassMetrics.hpp b/packages/zoltan2/core/src/util/Zoltan2_BaseClassMetrics.hpp index dd8294af08f1..d03f6696598e 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_BaseClassMetrics.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_BaseClassMetrics.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_BaseClassMetrics.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_EvaluateBaseClass.hpp b/packages/zoltan2/core/src/util/Zoltan2_EvaluateBaseClass.hpp index 1f27bbbf0ca8..8f982de8844b 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_EvaluateBaseClass.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_EvaluateBaseClass.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_EvaluateBaseClass.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_EvaluateMapping.hpp b/packages/zoltan2/core/src/util/Zoltan2_EvaluateMapping.hpp index 8869efad6103..7352a5f49b79 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_EvaluateMapping.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_EvaluateMapping.hpp @@ -1,90 +1,10 @@ // @HEADER -// -// *********************************************************************** -// -// Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** -// -// @HEADER -// @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_EvaluatePartition.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_EvaluateOrdering.hpp b/packages/zoltan2/core/src/util/Zoltan2_EvaluateOrdering.hpp index 03fe35c73d3a..cccfa9c0af81 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_EvaluateOrdering.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_EvaluateOrdering.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_EvaluateOrdering.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_EvaluatePartition.hpp b/packages/zoltan2/core/src/util/Zoltan2_EvaluatePartition.hpp index f6e12a1471c2..e04a328c7572 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_EvaluatePartition.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_EvaluatePartition.hpp @@ -1,90 +1,10 @@ // @HEADER -// -// *********************************************************************** -// -// Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** -// -// @HEADER -// @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_EvaluatePartition.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_GraphMetrics.hpp b/packages/zoltan2/core/src/util/Zoltan2_GraphMetrics.hpp index 8bcd140aac5f..91e5c8e070e6 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_GraphMetrics.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_GraphMetrics.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_GraphMetrics.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_GraphMetricsUtility.hpp b/packages/zoltan2/core/src/util/Zoltan2_GraphMetricsUtility.hpp index 93372b41657a..8c35e787e0b7 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_GraphMetricsUtility.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_GraphMetricsUtility.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_GraphMetricValuesUtility.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_IO.cpp b/packages/zoltan2/core/src/util/Zoltan2_IO.cpp index f7af66917ed1..b95a65444ac5 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_IO.cpp +++ b/packages/zoltan2/core/src/util/Zoltan2_IO.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_IO.cpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_IO.hpp b/packages/zoltan2/core/src/util/Zoltan2_IO.hpp index a20f2f404752..7a99289a3a9a 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_IO.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_IO.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_IO.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_ImbalanceMetrics.hpp b/packages/zoltan2/core/src/util/Zoltan2_ImbalanceMetrics.hpp index 8e65949a5303..bd995a7f958f 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_ImbalanceMetrics.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_ImbalanceMetrics.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_ImbalanceMetrics.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_ImbalanceMetricsUtility.hpp b/packages/zoltan2/core/src/util/Zoltan2_ImbalanceMetricsUtility.hpp index f3f64ba97c0d..4b5914c9396a 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_ImbalanceMetricsUtility.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_ImbalanceMetricsUtility.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_ImbalanceMetricsUtility.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_MetricUtility.hpp b/packages/zoltan2/core/src/util/Zoltan2_MetricUtility.hpp index d8ae525ce647..d4ccaf89233c 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_MetricUtility.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_MetricUtility.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_MetricFunctions.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_Standards.hpp b/packages/zoltan2/core/src/util/Zoltan2_Standards.hpp index 36fbe753200c..1fd48af6f99a 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_Standards.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_Standards.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_Standards.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_StridedData.hpp b/packages/zoltan2/core/src/util/Zoltan2_StridedData.hpp index f8350bc247e8..50e205ed5e15 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_StridedData.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_StridedData.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_STRIDEDDATA_HPP_ #define _ZOLTAN2_STRIDEDDATA_HPP_ diff --git a/packages/zoltan2/core/src/util/Zoltan2_Util.cpp b/packages/zoltan2/core/src/util/Zoltan2_Util.cpp index aca7cc33671c..7a571fd05876 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_Util.cpp +++ b/packages/zoltan2/core/src/util/Zoltan2_Util.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_Util.cpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_Util.hpp b/packages/zoltan2/core/src/util/Zoltan2_Util.hpp index 649a9ecf728e..543553393374 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_Util.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_Util.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_Util.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_Version.cpp b/packages/zoltan2/core/src/util/Zoltan2_Version.cpp index 51ff4d83cc6a..e115634c72b8 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_Version.cpp +++ b/packages/zoltan2/core/src/util/Zoltan2_Version.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + //@HEADER // ************************************************************************ // copyright diff --git a/packages/zoltan2/core/src/util/Zoltan2_Version.hpp b/packages/zoltan2/core/src/util/Zoltan2_Version.hpp index a90b71eeb3fd..ca0b7c2f34b5 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_Version.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_Version.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + //@HEADER // ************************************************************************ // copyright diff --git a/packages/zoltan2/core/src/util/Zoltan2_componentMetrics.hpp b/packages/zoltan2/core/src/util/Zoltan2_componentMetrics.hpp index c9b93b6831a3..699b3d005d86 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_componentMetrics.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_componentMetrics.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_componentMetrics.hpp diff --git a/packages/zoltan2/core/src/util/Zoltan2_findUniqueGids.hpp b/packages/zoltan2/core/src/util/Zoltan2_findUniqueGids.hpp index 0d7ffec76f2a..3087c4559920 100644 --- a/packages/zoltan2/core/src/util/Zoltan2_findUniqueGids.hpp +++ b/packages/zoltan2/core/src/util/Zoltan2_findUniqueGids.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_findUniqueGids.hpp diff --git a/packages/zoltan2/doc/COPYRIGHT_AND_LICENSE b/packages/zoltan2/doc/COPYRIGHT_AND_LICENSE deleted file mode 100644 index 9a355000c813..000000000000 --- a/packages/zoltan2/doc/COPYRIGHT_AND_LICENSE +++ /dev/null @@ -1,44 +0,0 @@ -// @HEADER -// -// *********************************************************************** -// -// Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** -// -// @HEADER diff --git a/packages/zoltan2/example/block/block.cpp b/packages/zoltan2/example/block/block.cpp index bffad27726b5..7db1fcb81de8 100644 --- a/packages/zoltan2/example/block/block.cpp +++ b/packages/zoltan2/example/block/block.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file block.cpp diff --git a/packages/zoltan2/example/block/kokkosBlock.cpp b/packages/zoltan2/example/block/kokkosBlock.cpp index 9bf66d60f809..f2252920df65 100644 --- a/packages/zoltan2/example/block/kokkosBlock.cpp +++ b/packages/zoltan2/example/block/kokkosBlock.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file kokkosBlock.cpp diff --git a/packages/zoltan2/example/geometric/rcb_C.cpp b/packages/zoltan2/example/geometric/rcb_C.cpp index 05b97734d491..dc335bc7ebc7 100644 --- a/packages/zoltan2/example/geometric/rcb_C.cpp +++ b/packages/zoltan2/example/geometric/rcb_C.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file rcb_C.cpp diff --git a/packages/zoltan2/example/graph/graph.cpp b/packages/zoltan2/example/graph/graph.cpp index 691c0eded790..6bce019df4dc 100644 --- a/packages/zoltan2/example/graph/graph.cpp +++ b/packages/zoltan2/example/graph/graph.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #include #include #include diff --git a/packages/zoltan2/scripts/copyright.txt b/packages/zoltan2/scripts/copyright.txt deleted file mode 100644 index 9a355000c813..000000000000 --- a/packages/zoltan2/scripts/copyright.txt +++ /dev/null @@ -1,44 +0,0 @@ -// @HEADER -// -// *********************************************************************** -// -// Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** -// -// @HEADER diff --git a/packages/zoltan2/sphynx/COPYRIGHT b/packages/zoltan2/sphynx/COPYRIGHT new file mode 100644 index 000000000000..148bed2742c1 --- /dev/null +++ b/packages/zoltan2/sphynx/COPYRIGHT @@ -0,0 +1,10 @@ + + Sphynx + Copyright (c) 2020 NTESS + +Copyright 2020 National Technology & Engineering Solutions of Sandia, +LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the +U.S. Government retains certain rights in this software. + +Copyright the Sphynx contributors. + diff --git a/packages/zoltan2/sphynx/LICENSE b/packages/zoltan2/sphynx/LICENSE index da68d76e0c80..28bff628ec0a 100644 --- a/packages/zoltan2/sphynx/LICENSE +++ b/packages/zoltan2/sphynx/LICENSE @@ -1,44 +1,31 @@ -// *********************************************************************** -// -// Sphynx -// Copyright 2020 National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Sphynx is licensed under 3-clause BSD terms of use: -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Seher Acer (sacer@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// Karen Devine (kddevin@sandia.gov) -// -// *********************************************************************** +SPDX-License-Identifier: BSD-3-Clause + +Copyright (c) 2020 NTESS and the Sphynx contributors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + 3. Neither the name of the copyright holder nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/packages/zoltan2/sphynx/README.md b/packages/zoltan2/sphynx/README.md new file mode 100644 index 000000000000..61329079ea47 --- /dev/null +++ b/packages/zoltan2/sphynx/README.md @@ -0,0 +1,12 @@ +# Sphynx + + +## Copyright and License +See sphynx/COPYRIGHT, sphynx/LICENSE, https://trilinos.github.io/license.html and individual file headers for additional information. + + +## Questions? +Contact lead developers: + +* Zoltan2 team (GitHub handle: @trilinos/zoltan2) +* Erik Boman (GitHub handle: [egboman](https://github.com/egboman) or egboman@sandia.gov) diff --git a/packages/zoltan2/sphynx/src/Zoltan2_Sphynx.hpp b/packages/zoltan2/sphynx/src/Zoltan2_Sphynx.hpp index fb1a6ade30ed..823b592a028d 100644 --- a/packages/zoltan2/sphynx/src/Zoltan2_Sphynx.hpp +++ b/packages/zoltan2/sphynx/src/Zoltan2_Sphynx.hpp @@ -1,48 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Sphynx -// Copyright 2020 National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Seher Acer (sacer@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// Karen Devine (kddevin@sandia.gov) -// -// *********************************************************************** // +// Copyright 2020 NTESS and the Sphynx contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER #ifndef _ZOLTAN2_SPHYNXALGORITHM_HPP_ diff --git a/packages/zoltan2/sphynx/src/Zoltan2_SphynxProblem.hpp b/packages/zoltan2/sphynx/src/Zoltan2_SphynxProblem.hpp index eab485f5b848..68c6083295cd 100644 --- a/packages/zoltan2/sphynx/src/Zoltan2_SphynxProblem.hpp +++ b/packages/zoltan2/sphynx/src/Zoltan2_SphynxProblem.hpp @@ -1,49 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Sphynx -// Copyright 2020 National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Seher Acer (sacer@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// Karen Devine (kddevin@sandia.gov) -// -// *********************************************************************** // +// Copyright 2020 NTESS and the Sphynx contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef _ZOLTAN2_SPHYNXPROBLEM_HPP_ #define _ZOLTAN2_SPHYNXPROBLEM_HPP_ diff --git a/packages/zoltan2/sphynx/src/Zoltan2_SphynxVersion.cpp b/packages/zoltan2/sphynx/src/Zoltan2_SphynxVersion.cpp index 5c4f3d74e1c9..8e787bad604b 100644 --- a/packages/zoltan2/sphynx/src/Zoltan2_SphynxVersion.cpp +++ b/packages/zoltan2/sphynx/src/Zoltan2_SphynxVersion.cpp @@ -1,48 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Sphynx -// Copyright 2020 National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Seher Acer (sacer@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// Karen Devine (kddevin@sandia.gov) -// -// *********************************************************************** // +// Copyright 2020 NTESS and the Sphynx contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER #include "Trilinos_version.h" diff --git a/packages/zoltan2/test/core/TpetraCrsColorer/Bug9500.cpp b/packages/zoltan2/test/core/TpetraCrsColorer/Bug9500.cpp index 0cc4d3e26899..74f9028d732d 100644 --- a/packages/zoltan2/test/core/TpetraCrsColorer/Bug9500.cpp +++ b/packages/zoltan2/test/core/TpetraCrsColorer/Bug9500.cpp @@ -1,3 +1,11 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "Tpetra_Core.hpp" #include "Kokkos_Random.hpp" diff --git a/packages/zoltan2/test/core/TpetraCrsColorer/TpetraCrsColorer.cpp b/packages/zoltan2/test/core/TpetraCrsColorer/TpetraCrsColorer.cpp index c4e190bb38ec..2b176710d0e9 100644 --- a/packages/zoltan2/test/core/TpetraCrsColorer/TpetraCrsColorer.cpp +++ b/packages/zoltan2/test/core/TpetraCrsColorer/TpetraCrsColorer.cpp @@ -1,3 +1,11 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "Tpetra_Core.hpp" #include "Kokkos_Random.hpp" diff --git a/packages/zoltan2/test/core/color/coloring1.cpp b/packages/zoltan2/test/core/color/coloring1.cpp index d25beefeb073..47c56cb75df0 100644 --- a/packages/zoltan2/test/core/color/coloring1.cpp +++ b/packages/zoltan2/test/core/color/coloring1.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #include #include #include diff --git a/packages/zoltan2/test/core/correctness/zoltanCompare.cpp b/packages/zoltan2/test/core/correctness/zoltanCompare.cpp index e8a5d1eb742f..8913b2874a28 100644 --- a/packages/zoltan2/test/core/correctness/zoltanCompare.cpp +++ b/packages/zoltan2/test/core/correctness/zoltanCompare.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file zoltanCompare.cpp diff --git a/packages/zoltan2/test/core/directory/directoryTest_Impl.hpp b/packages/zoltan2/test/core/directory/directoryTest_Impl.hpp index caaa4c0bf592..2315b4af2df9 100644 --- a/packages/zoltan2/test/core/directory/directoryTest_Impl.hpp +++ b/packages/zoltan2/test/core/directory/directoryTest_Impl.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER #include diff --git a/packages/zoltan2/test/core/directory/directoryTest_Kokkos.cpp b/packages/zoltan2/test/core/directory/directoryTest_Kokkos.cpp index 29db98930533..f075806a20ca 100644 --- a/packages/zoltan2/test/core/directory/directoryTest_Kokkos.cpp +++ b/packages/zoltan2/test/core/directory/directoryTest_Kokkos.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER #include "directoryTest_Impl.hpp" diff --git a/packages/zoltan2/test/core/directory/directoryTest_KokkosSimple.cpp b/packages/zoltan2/test/core/directory/directoryTest_KokkosSimple.cpp index 0e7ebe391fe8..83ca55a5b9f5 100644 --- a/packages/zoltan2/test/core/directory/directoryTest_KokkosSimple.cpp +++ b/packages/zoltan2/test/core/directory/directoryTest_KokkosSimple.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER #include "Zoltan2_Directory_Impl.hpp" diff --git a/packages/zoltan2/test/core/directory/directoryTest_findUniqueGids.cpp b/packages/zoltan2/test/core/directory/directoryTest_findUniqueGids.cpp index 66762a2bdf43..4a50ddb72258 100644 --- a/packages/zoltan2/test/core/directory/directoryTest_findUniqueGids.cpp +++ b/packages/zoltan2/test/core/directory/directoryTest_findUniqueGids.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER // Program to testing Zoltan2::findUniqueGids capability diff --git a/packages/zoltan2/test/core/driver/Zoltan2_MeshCoordinateTest.hpp b/packages/zoltan2/test/core/driver/Zoltan2_MeshCoordinateTest.hpp index e2c19b28cbf4..3a5fc0e2715c 100644 --- a/packages/zoltan2/test/core/driver/Zoltan2_MeshCoordinateTest.hpp +++ b/packages/zoltan2/test/core/driver/Zoltan2_MeshCoordinateTest.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + // // Zoltan2_MeshCoordinateTest.h // Zoltan2TestDriver diff --git a/packages/zoltan2/test/core/driver/Zoltan2_TestInterface.hpp b/packages/zoltan2/test/core/driver/Zoltan2_TestInterface.hpp index 18137cad53fc..2dc67e6bbb03 100644 --- a/packages/zoltan2/test/core/driver/Zoltan2_TestInterface.hpp +++ b/packages/zoltan2/test/core/driver/Zoltan2_TestInterface.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + // // Zoltan2_TestInterface.h // Zoltan2TestDriver diff --git a/packages/zoltan2/test/core/driver/Zoltan2_Tests.hpp b/packages/zoltan2/test/core/driver/Zoltan2_Tests.hpp index ecba4794eadc..dc653bb73043 100644 --- a/packages/zoltan2/test/core/driver/Zoltan2_Tests.hpp +++ b/packages/zoltan2/test/core/driver/Zoltan2_Tests.hpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + // // Zoltan2_Tests.h // Zoltan2TestDriver diff --git a/packages/zoltan2/test/core/driver/test_driver.cpp b/packages/zoltan2/test/core/driver/test_driver.cpp index 6248d25e40f2..b766c3e94cff 100644 --- a/packages/zoltan2/test/core/driver/test_driver.cpp +++ b/packages/zoltan2/test/core/driver/test_driver.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /* \file test_driver.cpp diff --git a/packages/zoltan2/test/core/helpers/AdapterForTests.hpp b/packages/zoltan2/test/core/helpers/AdapterForTests.hpp index a88d5eec4d8e..886d77858715 100644 --- a/packages/zoltan2/test/core/helpers/AdapterForTests.hpp +++ b/packages/zoltan2/test/core/helpers/AdapterForTests.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file AdapterForTests.hpp diff --git a/packages/zoltan2/test/core/helpers/ErrorHandlingForTests.hpp b/packages/zoltan2/test/core/helpers/ErrorHandlingForTests.hpp index 6c76cd4bea8c..1ecd9edb7f7d 100644 --- a/packages/zoltan2/test/core/helpers/ErrorHandlingForTests.hpp +++ b/packages/zoltan2/test/core/helpers/ErrorHandlingForTests.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER #ifndef ERRORHANDLINGFORTESTS_HPP diff --git a/packages/zoltan2/test/core/helpers/GeometricGenerator.hpp b/packages/zoltan2/test/core/helpers/GeometricGenerator.hpp index 325108d802bb..592e334c9be6 100644 --- a/packages/zoltan2/test/core/helpers/GeometricGenerator.hpp +++ b/packages/zoltan2/test/core/helpers/GeometricGenerator.hpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #ifndef GEOMETRICGENERATOR #define GEOMETRICGENERATOR diff --git a/packages/zoltan2/test/core/helpers/PrintData.hpp b/packages/zoltan2/test/core/helpers/PrintData.hpp index a9b37079b1af..c67396720eb4 100644 --- a/packages/zoltan2/test/core/helpers/PrintData.hpp +++ b/packages/zoltan2/test/core/helpers/PrintData.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER #ifndef PRINTDATA_HPP diff --git a/packages/zoltan2/test/core/helpers/UserInputForTests.hpp b/packages/zoltan2/test/core/helpers/UserInputForTests.hpp index aced677d2aa7..5b44775f670d 100644 --- a/packages/zoltan2/test/core/helpers/UserInputForTests.hpp +++ b/packages/zoltan2/test/core/helpers/UserInputForTests.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file UserInputForTests.hpp diff --git a/packages/zoltan2/test/core/helpers/Zoltan2_ComparisonHelper.hpp b/packages/zoltan2/test/core/helpers/Zoltan2_ComparisonHelper.hpp index 2fffcb971fc7..0a77f7960e44 100644 --- a/packages/zoltan2/test/core/helpers/Zoltan2_ComparisonHelper.hpp +++ b/packages/zoltan2/test/core/helpers/Zoltan2_ComparisonHelper.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_ComparisonHelper.hpp diff --git a/packages/zoltan2/test/core/helpers/Zoltan2_EvaluateFactory.hpp b/packages/zoltan2/test/core/helpers/Zoltan2_EvaluateFactory.hpp index 971a8a804642..e296e53c2aa7 100644 --- a/packages/zoltan2/test/core/helpers/Zoltan2_EvaluateFactory.hpp +++ b/packages/zoltan2/test/core/helpers/Zoltan2_EvaluateFactory.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_EvaluateFactory.hpp diff --git a/packages/zoltan2/test/core/helpers/Zoltan2_MetricAnalyzer.hpp b/packages/zoltan2/test/core/helpers/Zoltan2_MetricAnalyzer.hpp index 26397932786d..9953da8b2bb0 100644 --- a/packages/zoltan2/test/core/helpers/Zoltan2_MetricAnalyzer.hpp +++ b/packages/zoltan2/test/core/helpers/Zoltan2_MetricAnalyzer.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /* \file Zoltan2_MetricAnalyzer.hpp diff --git a/packages/zoltan2/test/core/helpers/Zoltan2_PamgenMeshStructure.hpp b/packages/zoltan2/test/core/helpers/Zoltan2_PamgenMeshStructure.hpp index d80cea99ab40..7afd7ced9952 100644 --- a/packages/zoltan2/test/core/helpers/Zoltan2_PamgenMeshStructure.hpp +++ b/packages/zoltan2/test/core/helpers/Zoltan2_PamgenMeshStructure.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /* \file Zoltan2_PamgenMeshStructure.hpp diff --git a/packages/zoltan2/test/core/helpers/Zoltan2_ProblemFactory.hpp b/packages/zoltan2/test/core/helpers/Zoltan2_ProblemFactory.hpp index 2f1c386f6dbc..84b1a411479f 100644 --- a/packages/zoltan2/test/core/helpers/Zoltan2_ProblemFactory.hpp +++ b/packages/zoltan2/test/core/helpers/Zoltan2_ProblemFactory.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_TestFactory.hpp diff --git a/packages/zoltan2/test/core/helpers/Zoltan2_TestHelpers.hpp b/packages/zoltan2/test/core/helpers/Zoltan2_TestHelpers.hpp index 3e830b257819..afa40a8d49d6 100644 --- a/packages/zoltan2/test/core/helpers/Zoltan2_TestHelpers.hpp +++ b/packages/zoltan2/test/core/helpers/Zoltan2_TestHelpers.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_TestHelpers.hpp diff --git a/packages/zoltan2/test/core/helpers/Zoltan2_Typedefs.hpp b/packages/zoltan2/test/core/helpers/Zoltan2_Typedefs.hpp index ab9783a30a4e..af1177cb0ae8 100644 --- a/packages/zoltan2/test/core/helpers/Zoltan2_Typedefs.hpp +++ b/packages/zoltan2/test/core/helpers/Zoltan2_Typedefs.hpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file Zoltan2_Typedefs.hpp diff --git a/packages/zoltan2/test/core/order/nd.cpp b/packages/zoltan2/test/core/order/nd.cpp index 025da178a148..7e8d74a28d06 100644 --- a/packages/zoltan2/test/core/order/nd.cpp +++ b/packages/zoltan2/test/core/order/nd.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #include #include #include diff --git a/packages/zoltan2/test/core/order/ordering1.cpp b/packages/zoltan2/test/core/order/ordering1.cpp index c9b188daa1cd..98dde20aa409 100644 --- a/packages/zoltan2/test/core/order/ordering1.cpp +++ b/packages/zoltan2/test/core/order/ordering1.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #include #include #include diff --git a/packages/zoltan2/test/core/order/orderingAMD.cpp b/packages/zoltan2/test/core/order/orderingAMD.cpp index e34bd856e2c6..a7ed43bff87e 100644 --- a/packages/zoltan2/test/core/order/orderingAMD.cpp +++ b/packages/zoltan2/test/core/order/orderingAMD.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #include #include #include diff --git a/packages/zoltan2/test/core/order/orderingMetis.cpp b/packages/zoltan2/test/core/order/orderingMetis.cpp index c40ec02b46c6..b8176408d601 100644 --- a/packages/zoltan2/test/core/order/orderingMetis.cpp +++ b/packages/zoltan2/test/core/order/orderingMetis.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #include #include #include diff --git a/packages/zoltan2/test/core/order/orderingScotch.cpp b/packages/zoltan2/test/core/order/orderingScotch.cpp index 8a092d5abe98..35c3a9ef1645 100644 --- a/packages/zoltan2/test/core/order/orderingScotch.cpp +++ b/packages/zoltan2/test/core/order/orderingScotch.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #include #include #include diff --git a/packages/zoltan2/test/core/partition/APFMeshAdapterTest.cpp b/packages/zoltan2/test/core/partition/APFMeshAdapterTest.cpp index 13cdaed7480b..7b7a6807aef3 100644 --- a/packages/zoltan2/test/core/partition/APFMeshAdapterTest.cpp +++ b/packages/zoltan2/test/core/partition/APFMeshAdapterTest.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file APFMeshAdapterTest.cpp diff --git a/packages/zoltan2/test/core/partition/MultiJaggedTest.cpp b/packages/zoltan2/test/core/partition/MultiJaggedTest.cpp index e4c781d2a6f0..9f54c6545efc 100644 --- a/packages/zoltan2/test/core/partition/MultiJaggedTest.cpp +++ b/packages/zoltan2/test/core/partition/MultiJaggedTest.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file MultiJagged.cpp diff --git a/packages/zoltan2/test/core/partition/PartitionAndParMATest.cpp b/packages/zoltan2/test/core/partition/PartitionAndParMATest.cpp index eac7d68f3036..61d64a9b0fcd 100644 --- a/packages/zoltan2/test/core/partition/PartitionAndParMATest.cpp +++ b/packages/zoltan2/test/core/partition/PartitionAndParMATest.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file PartitionAndParMA.cpp diff --git a/packages/zoltan2/test/core/partition/TaskMappingProblemTest.cpp b/packages/zoltan2/test/core/partition/TaskMappingProblemTest.cpp index 828f935ef6d5..4e64eb87c2e5 100644 --- a/packages/zoltan2/test/core/partition/TaskMappingProblemTest.cpp +++ b/packages/zoltan2/test/core/partition/TaskMappingProblemTest.cpp @@ -1,3 +1,11 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "Zoltan2_TaskMapping.hpp" #include diff --git a/packages/zoltan2/test/core/partition/TaskMappingSimulate.cpp b/packages/zoltan2/test/core/partition/TaskMappingSimulate.cpp index 4f77b1dd4111..e11aadada5ec 100644 --- a/packages/zoltan2/test/core/partition/TaskMappingSimulate.cpp +++ b/packages/zoltan2/test/core/partition/TaskMappingSimulate.cpp @@ -1,3 +1,11 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "Zoltan2_TaskMapping.hpp" #include diff --git a/packages/zoltan2/test/core/partition/TaskMappingTest.cpp b/packages/zoltan2/test/core/partition/TaskMappingTest.cpp index 9e31677b4093..e7b4b7ec8a3a 100644 --- a/packages/zoltan2/test/core/partition/TaskMappingTest.cpp +++ b/packages/zoltan2/test/core/partition/TaskMappingTest.cpp @@ -1,3 +1,11 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "Zoltan2_TaskMapping.hpp" #include diff --git a/packages/zoltan2/test/core/partition/TaskMappingTest3.cpp b/packages/zoltan2/test/core/partition/TaskMappingTest3.cpp index 100102d03106..c79e5fc91177 100644 --- a/packages/zoltan2/test/core/partition/TaskMappingTest3.cpp +++ b/packages/zoltan2/test/core/partition/TaskMappingTest3.cpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #include "Zoltan2_TaskMapping.hpp" #include "Zoltan2_TestHelpers.hpp" #include "Tpetra_MultiVector_decl.hpp" diff --git a/packages/zoltan2/test/core/partition/blockTest.cpp b/packages/zoltan2/test/core/partition/blockTest.cpp index 3db364ff2fcf..b16be6665fe4 100644 --- a/packages/zoltan2/test/core/partition/blockTest.cpp +++ b/packages/zoltan2/test/core/partition/blockTest.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #include #include #include diff --git a/packages/zoltan2/test/core/partition/fix2010.cpp b/packages/zoltan2/test/core/partition/fix2010.cpp index 5ce3f133df7e..0dc7d467c305 100644 --- a/packages/zoltan2/test/core/partition/fix2010.cpp +++ b/packages/zoltan2/test/core/partition/fix2010.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #include #include #include diff --git a/packages/zoltan2/test/core/partition/fix4785.cpp b/packages/zoltan2/test/core/partition/fix4785.cpp index d0641a1b289e..2612fce8a94f 100644 --- a/packages/zoltan2/test/core/partition/fix4785.cpp +++ b/packages/zoltan2/test/core/partition/fix4785.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file mj_imbalanced.cpp diff --git a/packages/zoltan2/test/core/partition/mj_backwardcompat.cpp b/packages/zoltan2/test/core/partition/mj_backwardcompat.cpp index ac1c2c1e343e..7f54e5824eec 100644 --- a/packages/zoltan2/test/core/partition/mj_backwardcompat.cpp +++ b/packages/zoltan2/test/core/partition/mj_backwardcompat.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file mj_backwardcompat.cpp diff --git a/packages/zoltan2/test/core/partition/mj_epetra.cpp b/packages/zoltan2/test/core/partition/mj_epetra.cpp index 0093868db5a0..c3267bc3d60a 100644 --- a/packages/zoltan2/test/core/partition/mj_epetra.cpp +++ b/packages/zoltan2/test/core/partition/mj_epetra.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file mj_epetra diff --git a/packages/zoltan2/test/core/partition/mj_int_coordinates.cpp b/packages/zoltan2/test/core/partition/mj_int_coordinates.cpp index 616b1981e6ee..7b4c7a37552a 100644 --- a/packages/zoltan2/test/core/partition/mj_int_coordinates.cpp +++ b/packages/zoltan2/test/core/partition/mj_int_coordinates.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file mj_int_coordinates.cpp diff --git a/packages/zoltan2/test/core/partition/pamgenMeshAdapterTest.cpp b/packages/zoltan2/test/core/partition/pamgenMeshAdapterTest.cpp index b3e94ee98a91..5a9e4ca75634 100644 --- a/packages/zoltan2/test/core/partition/pamgenMeshAdapterTest.cpp +++ b/packages/zoltan2/test/core/partition/pamgenMeshAdapterTest.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file pamgenMeshAdapterTest.cpp diff --git a/packages/zoltan2/test/core/partition/partition2DMatrix.cpp b/packages/zoltan2/test/core/partition/partition2DMatrix.cpp index f12a1eb51010..904fca3eae4f 100644 --- a/packages/zoltan2/test/core/partition/partition2DMatrix.cpp +++ b/packages/zoltan2/test/core/partition/partition2DMatrix.cpp @@ -1,49 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER - #include #include // #include diff --git a/packages/zoltan2/test/core/partition/partition_sarma.cpp b/packages/zoltan2/test/core/partition/partition_sarma.cpp index 4eacaf54f487..80d195a5bc4a 100644 --- a/packages/zoltan2/test/core/partition/partition_sarma.cpp +++ b/packages/zoltan2/test/core/partition/partition_sarma.cpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + /* * Created by mbenlioglu on Nov 10, 2020. */ diff --git a/packages/zoltan2/test/core/partition/partitioning1.cpp b/packages/zoltan2/test/core/partition/partitioning1.cpp index 34a518ded1ae..d95b4b3103e8 100644 --- a/packages/zoltan2/test/core/partition/partitioning1.cpp +++ b/packages/zoltan2/test/core/partition/partitioning1.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #include #include #include diff --git a/packages/zoltan2/test/core/partition/partitioningTree.cpp b/packages/zoltan2/test/core/partition/partitioningTree.cpp index 9083a184e105..caa57072a3ed 100644 --- a/packages/zoltan2/test/core/partition/partitioningTree.cpp +++ b/packages/zoltan2/test/core/partition/partitioningTree.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #include #include #include diff --git a/packages/zoltan2/test/core/partition/rcbPerformanceZ1.cpp b/packages/zoltan2/test/core/partition/rcbPerformanceZ1.cpp index 5f22991f6d9a..6b924a5a3308 100644 --- a/packages/zoltan2/test/core/partition/rcbPerformanceZ1.cpp +++ b/packages/zoltan2/test/core/partition/rcbPerformanceZ1.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file rcbPerformanceZ1.cpp diff --git a/packages/zoltan2/test/core/partition/rcbTest.cpp b/packages/zoltan2/test/core/partition/rcbTest.cpp index b1cdcb45a8e0..a8327f120427 100644 --- a/packages/zoltan2/test/core/partition/rcbTest.cpp +++ b/packages/zoltan2/test/core/partition/rcbTest.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file rcb.cpp diff --git a/packages/zoltan2/test/core/scaling/rcbPerformance.cpp b/packages/zoltan2/test/core/scaling/rcbPerformance.cpp index ac23563893ab..94a047990fde 100644 --- a/packages/zoltan2/test/core/scaling/rcbPerformance.cpp +++ b/packages/zoltan2/test/core/scaling/rcbPerformance.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file rcbPerformance.cpp diff --git a/packages/zoltan2/test/core/scaling/rcbPerformanceZ1.cpp b/packages/zoltan2/test/core/scaling/rcbPerformanceZ1.cpp index 8db575e8e217..a6653246dbc6 100644 --- a/packages/zoltan2/test/core/scaling/rcbPerformanceZ1.cpp +++ b/packages/zoltan2/test/core/scaling/rcbPerformanceZ1.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file rcbPerformanceZ1.cpp diff --git a/packages/zoltan2/test/core/temp/XpetraEpetraMap.cpp b/packages/zoltan2/test/core/temp/XpetraEpetraMap.cpp index cb92e0928528..f330844ca0c1 100644 --- a/packages/zoltan2/test/core/temp/XpetraEpetraMap.cpp +++ b/packages/zoltan2/test/core/temp/XpetraEpetraMap.cpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + // Program to debug segfaults being reported in CDASH when // -D KokkosClassic_DefaultNode:STRING=Tpetra::KokkosCompat::KokkosOpenMPWrapperNode // -D Trilinos_ENABLE_OpenMP:BOOL=ON diff --git a/packages/zoltan2/test/core/temp/XpetraEpetraMatrix.cpp b/packages/zoltan2/test/core/temp/XpetraEpetraMatrix.cpp index c4e9ba6b8246..8b4a054c2a89 100644 --- a/packages/zoltan2/test/core/temp/XpetraEpetraMatrix.cpp +++ b/packages/zoltan2/test/core/temp/XpetraEpetraMatrix.cpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + // Program to debug segfaults being reported in CDASH when // -D Kokkos_ENABLE_THREADS:BOOL=ON // -D Tpetra_INST_PTHREAD:BOOL=ON diff --git a/packages/zoltan2/test/core/temp/absdefinitiontest.cpp b/packages/zoltan2/test/core/temp/absdefinitiontest.cpp index b2f07381a7a2..ccaec640a61e 100644 --- a/packages/zoltan2/test/core/temp/absdefinitiontest.cpp +++ b/packages/zoltan2/test/core/temp/absdefinitiontest.cpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #include "Zoltan2_PartitioningProblem.hpp" #include diff --git a/packages/zoltan2/test/core/temp/ddirectoryTest.cpp b/packages/zoltan2/test/core/temp/ddirectoryTest.cpp index c15d7ff8e126..cf37ead5828c 100644 --- a/packages/zoltan2/test/core/temp/ddirectoryTest.cpp +++ b/packages/zoltan2/test/core/temp/ddirectoryTest.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER // Program that demonstrates how to emulate distributed directories that diff --git a/packages/zoltan2/test/core/temp/mapMemoryLeak.cpp b/packages/zoltan2/test/core/temp/mapMemoryLeak.cpp index 68260e50e010..f44245ad5126 100644 --- a/packages/zoltan2/test/core/temp/mapMemoryLeak.cpp +++ b/packages/zoltan2/test/core/temp/mapMemoryLeak.cpp @@ -1,3 +1,11 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER #include "Teuchos_CommHelpers.hpp" #include "Teuchos_DefaultComm.hpp" diff --git a/packages/zoltan2/test/core/temp/mapOneToOne.cpp b/packages/zoltan2/test/core/temp/mapOneToOne.cpp index 8eb4d12ceca9..519d9795fc1e 100644 --- a/packages/zoltan2/test/core/temp/mapOneToOne.cpp +++ b/packages/zoltan2/test/core/temp/mapOneToOne.cpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + /// Small test program showing how to take GIDs that may have /// duplicates across processors (e.g., mesh vertices that are copied /// at part boundaries in an element-based decomposition) and assign diff --git a/packages/zoltan2/test/core/temp/mapRemotes.cpp b/packages/zoltan2/test/core/temp/mapRemotes.cpp index cbf91d2d1a84..7c6a98b52493 100644 --- a/packages/zoltan2/test/core/temp/mapRemotes.cpp +++ b/packages/zoltan2/test/core/temp/mapRemotes.cpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + // Small test program showing how to locate off-process GIDs // Trying to use Tpetra::Map like the Zoltan DDirectory // diff --git a/packages/zoltan2/test/core/temp/multivectorTest.cpp b/packages/zoltan2/test/core/temp/multivectorTest.cpp index 7591eedb9841..28737f1428f9 100644 --- a/packages/zoltan2/test/core/temp/multivectorTest.cpp +++ b/packages/zoltan2/test/core/temp/multivectorTest.cpp @@ -1,7 +1,10 @@ // @HEADER -// *********************************************************************** -// Copyright message goes here. -// *********************************************************************** +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER // Create a Tpetra::MultiVector, and time the following: diff --git a/packages/zoltan2/test/core/temp/paramTest.cpp b/packages/zoltan2/test/core/temp/paramTest.cpp index 7060d54d9068..64abcb8f6d96 100644 --- a/packages/zoltan2/test/core/temp/paramTest.cpp +++ b/packages/zoltan2/test/core/temp/paramTest.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \brief test that verifies zoltan2 parameters can be written to diff --git a/packages/zoltan2/test/core/temp/paramToXML.cpp b/packages/zoltan2/test/core/temp/paramToXML.cpp index 98a3d7712e6c..24e49a1b44a6 100644 --- a/packages/zoltan2/test/core/temp/paramToXML.cpp +++ b/packages/zoltan2/test/core/temp/paramToXML.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \brief utility that creates an initial XML file of parameters. diff --git a/packages/zoltan2/test/core/temp/testfail.cpp b/packages/zoltan2/test/core/temp/testfail.cpp index 80a57f9386a7..9fbb803551c2 100644 --- a/packages/zoltan2/test/core/temp/testfail.cpp +++ b/packages/zoltan2/test/core/temp/testfail.cpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + #include int main(int narg, char **arg) diff --git a/packages/zoltan2/test/core/temp/teuchosCommTest.cpp b/packages/zoltan2/test/core/temp/teuchosCommTest.cpp index 0a6575c9e134..c378209ab1cd 100644 --- a/packages/zoltan2/test/core/temp/teuchosCommTest.cpp +++ b/packages/zoltan2/test/core/temp/teuchosCommTest.cpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + // Test to evaluate the performance of Teuchos::Comm's reduceAll versus // direct MPI invocation of MPI_Allreduce for common data types int, double. diff --git a/packages/zoltan2/test/core/temp/vecWithCopies.cpp b/packages/zoltan2/test/core/temp/vecWithCopies.cpp index 37848d8fa1ce..9630f8c83e90 100644 --- a/packages/zoltan2/test/core/temp/vecWithCopies.cpp +++ b/packages/zoltan2/test/core/temp/vecWithCopies.cpp @@ -1,3 +1,12 @@ +// @HEADER +// ***************************************************************************** +// Zoltan2: A package of combinatorial algorithms for scientific computing +// +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + /// Small test program showing how to take GIDs that may have /// duplicates across processors (e.g., mesh vertices that are copied /// at part boundaries in an element-based decomposition) and assign diff --git a/packages/zoltan2/test/core/unit/environment/AllParameters.cpp b/packages/zoltan2/test/core/unit/environment/AllParameters.cpp index e2ce7294c097..a695aa202321 100644 --- a/packages/zoltan2/test/core/unit/environment/AllParameters.cpp +++ b/packages/zoltan2/test/core/unit/environment/AllParameters.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Testing parameters. Serial test. diff --git a/packages/zoltan2/test/core/unit/environment/DebugManager.cpp b/packages/zoltan2/test/core/unit/environment/DebugManager.cpp index 71820d5d44b4..4bb59db2ec78 100644 --- a/packages/zoltan2/test/core/unit/environment/DebugManager.cpp +++ b/packages/zoltan2/test/core/unit/environment/DebugManager.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Testing the DebugManager object. // diff --git a/packages/zoltan2/test/core/unit/environment/Environment.cpp b/packages/zoltan2/test/core/unit/environment/Environment.cpp index ffbf6febd33f..9b7b9830a23a 100644 --- a/packages/zoltan2/test/core/unit/environment/Environment.cpp +++ b/packages/zoltan2/test/core/unit/environment/Environment.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Testing Zoltan2::Environment /*! \todo test timer diff --git a/packages/zoltan2/test/core/unit/environment/Machine.cpp b/packages/zoltan2/test/core/unit/environment/Machine.cpp index 23c79c5a60c5..0bde39373eac 100644 --- a/packages/zoltan2/test/core/unit/environment/Machine.cpp +++ b/packages/zoltan2/test/core/unit/environment/Machine.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Testing Zoltan2::MachineRepresentation diff --git a/packages/zoltan2/test/core/unit/environment/MetricOutputManager.cpp b/packages/zoltan2/test/core/unit/environment/MetricOutputManager.cpp index e717e08c4a43..73de3da7c720 100644 --- a/packages/zoltan2/test/core/unit/environment/MetricOutputManager.cpp +++ b/packages/zoltan2/test/core/unit/environment/MetricOutputManager.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Testing the MetricOutputManager object. // diff --git a/packages/zoltan2/test/core/unit/environment/Parameters.cpp b/packages/zoltan2/test/core/unit/environment/Parameters.cpp index e2127c8c0988..b574bf76aaf0 100644 --- a/packages/zoltan2/test/core/unit/environment/Parameters.cpp +++ b/packages/zoltan2/test/core/unit/environment/Parameters.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Testing integer range list parameters. Serial test. diff --git a/packages/zoltan2/test/core/unit/environment/TimerManager.cpp b/packages/zoltan2/test/core/unit/environment/TimerManager.cpp index 46962a9c8da5..7a276c1d1230 100644 --- a/packages/zoltan2/test/core/unit/environment/TimerManager.cpp +++ b/packages/zoltan2/test/core/unit/environment/TimerManager.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Testing the TimerManager class. // TODO we only test that it doesn't crash. diff --git a/packages/zoltan2/test/core/unit/input/APFMeshInput.cpp b/packages/zoltan2/test/core/unit/input/APFMeshInput.cpp index 1d434b2b8d47..bbc6a2e54b6a 100644 --- a/packages/zoltan2/test/core/unit/input/APFMeshInput.cpp +++ b/packages/zoltan2/test/core/unit/input/APFMeshInput.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Basic testing of Zoltan2::APFMeshAdapter diff --git a/packages/zoltan2/test/core/unit/input/BasicCoordinateInput.cpp b/packages/zoltan2/test/core/unit/input/BasicCoordinateInput.cpp index fb5af1fe2544..3246c99b16e5 100644 --- a/packages/zoltan2/test/core/unit/input/BasicCoordinateInput.cpp +++ b/packages/zoltan2/test/core/unit/input/BasicCoordinateInput.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Test for Zoltan2::BasicVectorAdapter for coordinate-based problems diff --git a/packages/zoltan2/test/core/unit/input/BasicIdentifierInput.cpp b/packages/zoltan2/test/core/unit/input/BasicIdentifierInput.cpp index e8109c9a8bb0..85231ad44c9f 100644 --- a/packages/zoltan2/test/core/unit/input/BasicIdentifierInput.cpp +++ b/packages/zoltan2/test/core/unit/input/BasicIdentifierInput.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Basic testing of Zoltan2::BasicIdentifierAdapter diff --git a/packages/zoltan2/test/core/unit/input/BasicKokkosIdentifierInput.cpp b/packages/zoltan2/test/core/unit/input/BasicKokkosIdentifierInput.cpp index f0bc36c6ffde..afbe6593ad05 100644 --- a/packages/zoltan2/test/core/unit/input/BasicKokkosIdentifierInput.cpp +++ b/packages/zoltan2/test/core/unit/input/BasicKokkosIdentifierInput.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Basic testing of Zoltan2::BasicKokkosIdentifierAdapter diff --git a/packages/zoltan2/test/core/unit/input/BasicVectorAdapter.cpp b/packages/zoltan2/test/core/unit/input/BasicVectorAdapter.cpp index 624c0decb96c..21ab869bcddd 100644 --- a/packages/zoltan2/test/core/unit/input/BasicVectorAdapter.cpp +++ b/packages/zoltan2/test/core/unit/input/BasicVectorAdapter.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Test for Zoltan2::BasicVectorAdapter diff --git a/packages/zoltan2/test/core/unit/input/BasicVectorInput.cpp b/packages/zoltan2/test/core/unit/input/BasicVectorInput.cpp index ca649181cbaf..ee3ea72b1a16 100644 --- a/packages/zoltan2/test/core/unit/input/BasicVectorInput.cpp +++ b/packages/zoltan2/test/core/unit/input/BasicVectorInput.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Test for Zoltan2::BasicVectorAdapter diff --git a/packages/zoltan2/test/core/unit/input/InputTraitsBad.cpp b/packages/zoltan2/test/core/unit/input/InputTraitsBad.cpp index 613cb3e3521c..f5c5756d8211 100644 --- a/packages/zoltan2/test/core/unit/input/InputTraitsBad.cpp +++ b/packages/zoltan2/test/core/unit/input/InputTraitsBad.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Test for Zoltan2::InputTraitsBad diff --git a/packages/zoltan2/test/core/unit/input/InputTraitsGood.cpp b/packages/zoltan2/test/core/unit/input/InputTraitsGood.cpp index 576929fe5900..30075114bbe3 100644 --- a/packages/zoltan2/test/core/unit/input/InputTraitsGood.cpp +++ b/packages/zoltan2/test/core/unit/input/InputTraitsGood.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Test for Zoltan2::InputTraitsGood diff --git a/packages/zoltan2/test/core/unit/input/MatrixAdapter.cpp b/packages/zoltan2/test/core/unit/input/MatrixAdapter.cpp index 4f0fe9e600d2..24a072491f2b 100644 --- a/packages/zoltan2/test/core/unit/input/MatrixAdapter.cpp +++ b/packages/zoltan2/test/core/unit/input/MatrixAdapter.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Testing of GraphAdapter built from Xpetra matrix input adapters. // diff --git a/packages/zoltan2/test/core/unit/input/PamgenMeshInput.cpp b/packages/zoltan2/test/core/unit/input/PamgenMeshInput.cpp index d7e9add30f62..ec6d12ba7da7 100644 --- a/packages/zoltan2/test/core/unit/input/PamgenMeshInput.cpp +++ b/packages/zoltan2/test/core/unit/input/PamgenMeshInput.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Basic testing of Zoltan2::PamgenMeshAdapter diff --git a/packages/zoltan2/test/core/unit/input/TpetraCrsMatrixInput.cpp b/packages/zoltan2/test/core/unit/input/TpetraCrsMatrixInput.cpp index e112687401d5..7b70fdda8068 100644 --- a/packages/zoltan2/test/core/unit/input/TpetraCrsMatrixInput.cpp +++ b/packages/zoltan2/test/core/unit/input/TpetraCrsMatrixInput.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Basic testing of Zoltan2::TpetraCrsMatrixAdapter diff --git a/packages/zoltan2/test/core/unit/input/TpetraRowGraphInput.cpp b/packages/zoltan2/test/core/unit/input/TpetraRowGraphInput.cpp index 27b3838ce0d5..5b19976f76de 100644 --- a/packages/zoltan2/test/core/unit/input/TpetraRowGraphInput.cpp +++ b/packages/zoltan2/test/core/unit/input/TpetraRowGraphInput.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Basic testing of Zoltan2::TpetraRowGraphAdapter /*! \file TpetraRowGraphAdapter.cpp diff --git a/packages/zoltan2/test/core/unit/input/TpetraRowGraphInputKokkos.cpp b/packages/zoltan2/test/core/unit/input/TpetraRowGraphInputKokkos.cpp index 14764c59fbd4..8347eae5f564 100644 --- a/packages/zoltan2/test/core/unit/input/TpetraRowGraphInputKokkos.cpp +++ b/packages/zoltan2/test/core/unit/input/TpetraRowGraphInputKokkos.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Basic testing of Zoltan2::TpetraRowGraphAdapter /*! \file TpetraRowGraphAdapter.cpp diff --git a/packages/zoltan2/test/core/unit/input/TpetraRowMatrixInput.cpp b/packages/zoltan2/test/core/unit/input/TpetraRowMatrixInput.cpp index 1e861fb1ebe3..e725caff7390 100644 --- a/packages/zoltan2/test/core/unit/input/TpetraRowMatrixInput.cpp +++ b/packages/zoltan2/test/core/unit/input/TpetraRowMatrixInput.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Basic testing of Zoltan2::TpetraRowMatrixAdapter diff --git a/packages/zoltan2/test/core/unit/input/XpetraCrsGraphInput.cpp b/packages/zoltan2/test/core/unit/input/XpetraCrsGraphInput.cpp index 058317b77c53..c76081f51d3c 100644 --- a/packages/zoltan2/test/core/unit/input/XpetraCrsGraphInput.cpp +++ b/packages/zoltan2/test/core/unit/input/XpetraCrsGraphInput.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Basic testing of Zoltan2::XpetraCrsGraphAdapter /*! \file XpetraCrsGraphInput.cpp diff --git a/packages/zoltan2/test/core/unit/input/XpetraCrsMatrixInput.cpp b/packages/zoltan2/test/core/unit/input/XpetraCrsMatrixInput.cpp index 7ca4d83af9f1..a1aadea4c660 100644 --- a/packages/zoltan2/test/core/unit/input/XpetraCrsMatrixInput.cpp +++ b/packages/zoltan2/test/core/unit/input/XpetraCrsMatrixInput.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Basic testing of Zoltan2::XpetraCrsMatrixAdapter diff --git a/packages/zoltan2/test/core/unit/input/XpetraMultiVectorInput.cpp b/packages/zoltan2/test/core/unit/input/XpetraMultiVectorInput.cpp index d86a88b06f1e..a1e65be112ae 100644 --- a/packages/zoltan2/test/core/unit/input/XpetraMultiVectorInput.cpp +++ b/packages/zoltan2/test/core/unit/input/XpetraMultiVectorInput.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // @HEADER // *********************************************************************** // Zoltan2: Sandia Partitioning Ordering & Coloring Library diff --git a/packages/zoltan2/test/core/unit/input/XpetraTraits.cpp b/packages/zoltan2/test/core/unit/input/XpetraTraits.cpp index ca8caa652baf..8e1bf7dfeba2 100644 --- a/packages/zoltan2/test/core/unit/input/XpetraTraits.cpp +++ b/packages/zoltan2/test/core/unit/input/XpetraTraits.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Basic test of the XpetraTraits definitions. // diff --git a/packages/zoltan2/test/core/unit/input/XpetraVectorInput.cpp b/packages/zoltan2/test/core/unit/input/XpetraVectorInput.cpp index 48fe55651df7..7db4d050a9b8 100644 --- a/packages/zoltan2/test/core/unit/input/XpetraVectorInput.cpp +++ b/packages/zoltan2/test/core/unit/input/XpetraVectorInput.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // /*! \file XpetraVectorInput.cpp diff --git a/packages/zoltan2/test/core/unit/models/CoordinateModel.cpp b/packages/zoltan2/test/core/unit/models/CoordinateModel.cpp index 3de2c2a8d19c..441289dd0cff 100644 --- a/packages/zoltan2/test/core/unit/models/CoordinateModel.cpp +++ b/packages/zoltan2/test/core/unit/models/CoordinateModel.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Testing of CoordinateModel // diff --git a/packages/zoltan2/test/core/unit/models/GraphModel.cpp b/packages/zoltan2/test/core/unit/models/GraphModel.cpp index facfcccf7d95..4128a0b1ae70 100644 --- a/packages/zoltan2/test/core/unit/models/GraphModel.cpp +++ b/packages/zoltan2/test/core/unit/models/GraphModel.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Testing of GraphModel built from Xpetra matrix input adapters. // diff --git a/packages/zoltan2/test/core/unit/models/GraphModel2ndAdjsFromAdjs.cpp b/packages/zoltan2/test/core/unit/models/GraphModel2ndAdjsFromAdjs.cpp index 7320855a5899..dcf959382bb9 100644 --- a/packages/zoltan2/test/core/unit/models/GraphModel2ndAdjsFromAdjs.cpp +++ b/packages/zoltan2/test/core/unit/models/GraphModel2ndAdjsFromAdjs.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Basic testing of Zoltan2::PamgenMeshAdapter diff --git a/packages/zoltan2/test/core/unit/models/HyperGraphModel.cpp b/packages/zoltan2/test/core/unit/models/HyperGraphModel.cpp index 5ab375cdd9e1..a46f5d6d22dd 100644 --- a/packages/zoltan2/test/core/unit/models/HyperGraphModel.cpp +++ b/packages/zoltan2/test/core/unit/models/HyperGraphModel.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Testing of HyperGraphModel built from APF mesh adapters. // diff --git a/packages/zoltan2/test/core/unit/models/IdentifierModel.cpp b/packages/zoltan2/test/core/unit/models/IdentifierModel.cpp index 3301645c243f..34fbc5dffa22 100644 --- a/packages/zoltan2/test/core/unit/models/IdentifierModel.cpp +++ b/packages/zoltan2/test/core/unit/models/IdentifierModel.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Testing of IdentifierModel // diff --git a/packages/zoltan2/test/core/unit/problems/Mapping.cpp b/packages/zoltan2/test/core/unit/problems/Mapping.cpp index 85d11644e68a..2f892c9ca81c 100644 --- a/packages/zoltan2/test/core/unit/problems/Mapping.cpp +++ b/packages/zoltan2/test/core/unit/problems/Mapping.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Test the MappingProblem and MappingSolution classes. // diff --git a/packages/zoltan2/test/core/unit/problems/PartitioningSolution.cpp b/packages/zoltan2/test/core/unit/problems/PartitioningSolution.cpp index 9a8fb240253b..e526fa10b436 100644 --- a/packages/zoltan2/test/core/unit/problems/PartitioningSolution.cpp +++ b/packages/zoltan2/test/core/unit/problems/PartitioningSolution.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Test the PartitioningSolution class. // diff --git a/packages/zoltan2/test/core/unit/util/AlltoAll.cpp b/packages/zoltan2/test/core/unit/util/AlltoAll.cpp index a2df800ba931..026b06f15e2e 100644 --- a/packages/zoltan2/test/core/unit/util/AlltoAll.cpp +++ b/packages/zoltan2/test/core/unit/util/AlltoAll.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER // TODO: doxygen comments diff --git a/packages/zoltan2/test/core/unit/util/Metric.cpp b/packages/zoltan2/test/core/unit/util/Metric.cpp index bb8c90d991c1..b1c798c1b0fa 100644 --- a/packages/zoltan2/test/core/unit/util/Metric.cpp +++ b/packages/zoltan2/test/core/unit/util/Metric.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // Test the following: // EvaluatePartition class diff --git a/packages/zoltan2/test/core/unit/util/StridedData.cpp b/packages/zoltan2/test/core/unit/util/StridedData.cpp index a870f4032fee..9b867df22fe9 100644 --- a/packages/zoltan2/test/core/unit/util/StridedData.cpp +++ b/packages/zoltan2/test/core/unit/util/StridedData.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // // This is another test where you need to look at the output to // know if it's right. This should be fixed. diff --git a/packages/zoltan2/test/core/unit/util/TPLTraits.cpp b/packages/zoltan2/test/core/unit/util/TPLTraits.cpp index db2b90dcf866..bc5f48391fbf 100644 --- a/packages/zoltan2/test/core/unit/util/TPLTraits.cpp +++ b/packages/zoltan2/test/core/unit/util/TPLTraits.cpp @@ -1,49 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER - // Unit test for Zoltan2_TPLTraits.hpp // Passes various zgno_t types to ASSIGN. // Some combinations should work without error; diff --git a/packages/zoltan2/test/core/unit/util/componentMetrics.cpp b/packages/zoltan2/test/core/unit/util/componentMetrics.cpp index 8d0a5dccee3c..b40ebeb5b981 100644 --- a/packages/zoltan2/test/core/unit/util/componentMetrics.cpp +++ b/packages/zoltan2/test/core/unit/util/componentMetrics.cpp @@ -1,47 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #include #include #include diff --git a/packages/zoltan2/test/core/unit/util/findUniqueGids.cpp b/packages/zoltan2/test/core/unit/util/findUniqueGids.cpp index b307bd52c5d3..8fcba217c56e 100644 --- a/packages/zoltan2/test/core/unit/util/findUniqueGids.cpp +++ b/packages/zoltan2/test/core/unit/util/findUniqueGids.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER // Program to testing Zoltan2::findUniqueGids capability diff --git a/packages/zoltan2/test/sphynx/Sphynx_Research_Driver.cpp b/packages/zoltan2/test/sphynx/Sphynx_Research_Driver.cpp index f1974126171b..e869bc856bef 100644 --- a/packages/zoltan2/test/sphynx/Sphynx_Research_Driver.cpp +++ b/packages/zoltan2/test/sphynx/Sphynx_Research_Driver.cpp @@ -1,47 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Seher Acer (sacer@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// Jennifer Loe (jloe@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER #include "Teuchos_CommandLineProcessor.hpp" diff --git a/packages/zoltan2/test/sphynx/Test_Sphynx.cpp b/packages/zoltan2/test/sphynx/Test_Sphynx.cpp index 0112c928ed0d..d18a10f782b5 100644 --- a/packages/zoltan2/test/sphynx/Test_Sphynx.cpp +++ b/packages/zoltan2/test/sphynx/Test_Sphynx.cpp @@ -1,48 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Seher Acer (sacer@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// Karen Devine (kddevin@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + #include #include #include diff --git a/packages/zoltan2/test/sphynx/largestComponent2Binary.cpp b/packages/zoltan2/test/sphynx/largestComponent2Binary.cpp index 6ca948ba652c..02ac70d8f987 100644 --- a/packages/zoltan2/test/sphynx/largestComponent2Binary.cpp +++ b/packages/zoltan2/test/sphynx/largestComponent2Binary.cpp @@ -1,46 +1,12 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Seher Acer (sacer@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER + // ///////////////////////////////////////////////////////////////////////////// // Written by Seher Acer, 2019 diff --git a/packages/zoltan2/test/sphynx/readMatrixFromBinaryFile.hpp b/packages/zoltan2/test/sphynx/readMatrixFromBinaryFile.hpp index 3d5d3f19500b..e4437ba32c98 100644 --- a/packages/zoltan2/test/sphynx/readMatrixFromBinaryFile.hpp +++ b/packages/zoltan2/test/sphynx/readMatrixFromBinaryFile.hpp @@ -1,45 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Seher Acer (sacer@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER #ifndef __READMATRIXFROMBINARYFILE_HPP diff --git a/packages/zoltan2/util/xmlToHeaderDefinition.cpp b/packages/zoltan2/util/xmlToHeaderDefinition.cpp index 96b21a5c7fd7..371998f49f46 100644 --- a/packages/zoltan2/util/xmlToHeaderDefinition.cpp +++ b/packages/zoltan2/util/xmlToHeaderDefinition.cpp @@ -1,46 +1,10 @@ // @HEADER -// -// *********************************************************************** -// +// ***************************************************************************** // Zoltan2: A package of combinatorial algorithms for scientific computing -// Copyright 2012 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Karen Devine (kddevin@sandia.gov) -// Erik Boman (egboman@sandia.gov) -// Siva Rajamanickam (srajama@sandia.gov) -// -// *********************************************************************** // +// Copyright 2012 NTESS and the Zoltan2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** // @HEADER /*! \file xmlToHeaderDefinition.cpp From 24f1e829e38a2a11e42ccb5141fec68ab892c847 Mon Sep 17 00:00:00 2001 From: "Curtis C. Ober" Date: Mon, 22 Jul 2024 15:20:48 -0600 Subject: [PATCH 03/37] Zoltan: Missed some files Signed-off-by: Curtis C. Ober --- packages/zoltan/Makefile.in | 43 ------------------ .../NEA_docs/developer_html/dev_hybrid.html | 44 ------------------- packages/zoltan/docs/Zoltan.html | 44 ------------------- packages/zoltan/docs/Zoltan_FAQ.html | 44 ------------------- packages/zoltan/docs/Zoltan_bugreport.html | 44 ------------------- packages/zoltan/docs/Zoltan_cite.html | 44 ------------------- packages/zoltan/docs/Zoltan_download.html | 44 ------------------- packages/zoltan/docs/Zoltan_phil.html | 44 ------------------- packages/zoltan/docs/Zoltan_pubs.html | 44 ------------------- packages/zoltan/docs/dev_html/dev.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_add.html | 44 ------------------- .../docs/dev_html/dev_add_interface.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_add_lb.html | 44 ------------------- .../zoltan/docs/dev_html/dev_add_memory.html | 44 ------------------- .../zoltan/docs/dev_html/dev_add_params.html | 44 ------------------- .../zoltan/docs/dev_html/dev_add_remap.html | 44 ------------------- .../zoltan/docs/dev_html/dev_add_struct.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_cpp.html | 44 ------------------- .../zoltan/docs/dev_html/dev_degenerate.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_dist.html | 44 ------------------- .../docs/dev_html/dev_dist_compile.html | 44 ------------------- .../zoltan/docs/dev_html/dev_dist_cvs.html | 44 ------------------- .../zoltan/docs/dev_html/dev_dist_dir.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_driver.html | 44 ------------------- .../zoltan/docs/dev_html/dev_fortran.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_hier.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_hsfc.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_intro.html | 44 ------------------- .../docs/dev_html/dev_intro_coding.html | 44 ------------------- .../docs/dev_html/dev_intro_philosophy.html | 44 ------------------- .../zoltan/docs/dev_html/dev_intro_sqe.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_lb.html | 44 ------------------- .../docs/dev_html/dev_lb_interface.html | 44 ------------------- .../zoltan/docs/dev_html/dev_lb_structs.html | 44 ------------------- .../zoltan/docs/dev_html/dev_lb_types.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_mig.html | 44 ------------------- .../zoltan/docs/dev_html/dev_parmetis.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_phg.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_rcb.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_refs.html | 44 ------------------- .../zoltan/docs/dev_html/dev_reftree.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_rib.html | 44 ------------------- .../zoltan/docs/dev_html/dev_services.html | 44 ------------------- .../docs/dev_html/dev_services_debug.html | 44 ------------------- .../docs/dev_html/dev_services_hash.html | 44 ------------------- .../docs/dev_html/dev_services_objlist.html | 44 ------------------- .../docs/dev_html/dev_services_parallel.html | 44 ------------------- .../docs/dev_html/dev_services_params.html | 44 ------------------- .../docs/dev_html/dev_services_time.html | 44 ------------------- .../dev_html/dev_services_zoltantimer.html | 44 ------------------- .../zoltan/docs/dev_html/dev_test_script.html | 44 ------------------- packages/zoltan/docs/dev_html/dev_view.html | 44 ------------------- packages/zoltan/docs/tu_html/tutorial.html | 44 ------------------- packages/zoltan/docs/ug_html/ug.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_alg.html | 44 ------------------- .../zoltan/docs/ug_html/ug_alg_block.html | 44 ------------------- .../zoltan/docs/ug_html/ug_alg_cyclic.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_alg_geom.html | 44 ------------------- .../zoltan/docs/ug_html/ug_alg_graph.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_alg_hier.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_alg_hsfc.html | 44 ------------------- .../docs/ug_html/ug_alg_hypergraph.html | 44 ------------------- .../zoltan/docs/ug_html/ug_alg_jostle.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_alg_oct.html | 44 ------------------- .../zoltan/docs/ug_html/ug_alg_parkway.html | 44 ------------------- .../zoltan/docs/ug_html/ug_alg_parmetis.html | 44 ------------------- .../zoltan/docs/ug_html/ug_alg_patoh.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_alg_phg.html | 44 ------------------- .../zoltan/docs/ug_html/ug_alg_ptscotch.html | 44 ------------------- .../zoltan/docs/ug_html/ug_alg_random.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_alg_rcb.html | 44 ------------------- .../zoltan/docs/ug_html/ug_alg_reftree.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_alg_rib.html | 44 ------------------- .../zoltan/docs/ug_html/ug_alg_simple.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_backward.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_color.html | 44 ------------------- .../docs/ug_html/ug_color_parallel.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_cpp.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_examples.html | 44 ------------------- .../zoltan/docs/ug_html/ug_examples_init.html | 44 ------------------- .../zoltan/docs/ug_html/ug_examples_lb.html | 44 ------------------- .../zoltan/docs/ug_html/ug_examples_mig.html | 44 ------------------- .../docs/ug_html/ug_examples_query.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_fortran.html | 44 ------------------- .../zoltan/docs/ug_html/ug_graph_build.html | 44 ------------------- .../zoltan/docs/ug_html/ug_graph_vs_hg.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_index.html | 44 ------------------- .../zoltan/docs/ug_html/ug_interface.html | 44 ------------------- .../docs/ug_html/ug_interface_augment.html | 44 ------------------- .../docs/ug_html/ug_interface_color.html | 44 ------------------- .../docs/ug_html/ug_interface_init.html | 44 ------------------- .../zoltan/docs/ug_html/ug_interface_lb.html | 44 ------------------- .../zoltan/docs/ug_html/ug_interface_mig.html | 44 ------------------- .../docs/ug_html/ug_interface_order.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_intro.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_order.html | 44 ------------------- .../docs/ug_html/ug_order_local_hsfc.html | 44 ------------------- .../docs/ug_html/ug_order_parmetis.html | 44 ------------------- .../docs/ug_html/ug_order_ptscotch.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_param.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_query.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_query_lb.html | 44 ------------------- .../zoltan/docs/ug_html/ug_query_mig.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_refs.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_release.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_usage.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_util.html | 44 ------------------- .../zoltan/docs/ug_html/ug_util_comm.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_util_dd.html | 44 ------------------- packages/zoltan/docs/ug_html/ug_util_mem.html | 44 ------------------- packages/zoltan/example/C/Makefile.in | 43 ------------------ .../zoltan/example/C/coloring/makefile.old | 43 ------------------ packages/zoltan/example/CPP/Makefile.in | 44 ------------------- packages/zoltan/example/Makefile.am | 44 ------------------- packages/zoltan/example/Makefile.in | 44 ------------------- packages/zoltan/src/Makefile.in | 43 ------------------ packages/zoltan/src/driver/Makefile.in | 43 ------------------ packages/zoltan/src/fdriver/Makefile.in | 43 ------------------ packages/zoltan/src/fort/makefile | 43 ------------------ packages/zoltan/test/Large_Data/Makefile.in | 43 ------------------ packages/zoltan/test/Large_Data/README.txt | 43 ------------------ .../Utilities_Tests/Communication/makefile | 43 ------------------ 122 files changed, 5358 deletions(-) diff --git a/packages/zoltan/Makefile.in b/packages/zoltan/Makefile.in index 7cd0fba68c0d..8f74c72e00ac 100644 --- a/packages/zoltan/Makefile.in +++ b/packages/zoltan/Makefile.in @@ -15,49 +15,6 @@ @SET_MAKE@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ diff --git a/packages/zoltan/doc/NEA_docs/developer_html/dev_hybrid.html b/packages/zoltan/doc/NEA_docs/developer_html/dev_hybrid.html index 121993a9437c..ae480f25496e 100644 --- a/packages/zoltan/doc/NEA_docs/developer_html/dev_hybrid.html +++ b/packages/zoltan/doc/NEA_docs/developer_html/dev_hybrid.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/Zoltan.html b/packages/zoltan/docs/Zoltan.html index 154377ae653c..9ed671e77eae 100644 --- a/packages/zoltan/docs/Zoltan.html +++ b/packages/zoltan/docs/Zoltan.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/Zoltan_FAQ.html b/packages/zoltan/docs/Zoltan_FAQ.html index 45c332b8342b..066cc568a511 100644 --- a/packages/zoltan/docs/Zoltan_FAQ.html +++ b/packages/zoltan/docs/Zoltan_FAQ.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/Zoltan_bugreport.html b/packages/zoltan/docs/Zoltan_bugreport.html index 77ecab7b34b5..2fd089e2080e 100644 --- a/packages/zoltan/docs/Zoltan_bugreport.html +++ b/packages/zoltan/docs/Zoltan_bugreport.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/Zoltan_cite.html b/packages/zoltan/docs/Zoltan_cite.html index a47a1aa6cb6b..54dd416245c3 100644 --- a/packages/zoltan/docs/Zoltan_cite.html +++ b/packages/zoltan/docs/Zoltan_cite.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/Zoltan_download.html b/packages/zoltan/docs/Zoltan_download.html index 6c723c5352e7..c97ccf7d61c9 100644 --- a/packages/zoltan/docs/Zoltan_download.html +++ b/packages/zoltan/docs/Zoltan_download.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/Zoltan_phil.html b/packages/zoltan/docs/Zoltan_phil.html index 78e8dd7785bb..5f470f171ab6 100644 --- a/packages/zoltan/docs/Zoltan_phil.html +++ b/packages/zoltan/docs/Zoltan_phil.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/Zoltan_pubs.html b/packages/zoltan/docs/Zoltan_pubs.html index c4a051060eef..7b4dd6bac0fa 100644 --- a/packages/zoltan/docs/Zoltan_pubs.html +++ b/packages/zoltan/docs/Zoltan_pubs.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev.html b/packages/zoltan/docs/dev_html/dev.html index 02137a4f6557..4cee3454d985 100644 --- a/packages/zoltan/docs/dev_html/dev.html +++ b/packages/zoltan/docs/dev_html/dev.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_add.html b/packages/zoltan/docs/dev_html/dev_add.html index da445d75ce06..2b6740be8e46 100644 --- a/packages/zoltan/docs/dev_html/dev_add.html +++ b/packages/zoltan/docs/dev_html/dev_add.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_add_interface.html b/packages/zoltan/docs/dev_html/dev_add_interface.html index 640943fe8763..aa4905178ca3 100644 --- a/packages/zoltan/docs/dev_html/dev_add_interface.html +++ b/packages/zoltan/docs/dev_html/dev_add_interface.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_add_lb.html b/packages/zoltan/docs/dev_html/dev_add_lb.html index 73c016cc5228..e85f742e0c9c 100644 --- a/packages/zoltan/docs/dev_html/dev_add_lb.html +++ b/packages/zoltan/docs/dev_html/dev_add_lb.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_add_memory.html b/packages/zoltan/docs/dev_html/dev_add_memory.html index 2ce07f0bcdd0..a34b762d313c 100644 --- a/packages/zoltan/docs/dev_html/dev_add_memory.html +++ b/packages/zoltan/docs/dev_html/dev_add_memory.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_add_params.html b/packages/zoltan/docs/dev_html/dev_add_params.html index af08d10a3e01..aa3a568e0a6f 100644 --- a/packages/zoltan/docs/dev_html/dev_add_params.html +++ b/packages/zoltan/docs/dev_html/dev_add_params.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_add_remap.html b/packages/zoltan/docs/dev_html/dev_add_remap.html index 569bae7f59ef..3bfffb645a9e 100644 --- a/packages/zoltan/docs/dev_html/dev_add_remap.html +++ b/packages/zoltan/docs/dev_html/dev_add_remap.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_add_struct.html b/packages/zoltan/docs/dev_html/dev_add_struct.html index 7220a3b4b2ae..7359c18bf738 100644 --- a/packages/zoltan/docs/dev_html/dev_add_struct.html +++ b/packages/zoltan/docs/dev_html/dev_add_struct.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_cpp.html b/packages/zoltan/docs/dev_html/dev_cpp.html index 19efcb9aa8a8..90526bc22c6b 100644 --- a/packages/zoltan/docs/dev_html/dev_cpp.html +++ b/packages/zoltan/docs/dev_html/dev_cpp.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_degenerate.html b/packages/zoltan/docs/dev_html/dev_degenerate.html index bc55e39b0224..950c8e65d056 100644 --- a/packages/zoltan/docs/dev_html/dev_degenerate.html +++ b/packages/zoltan/docs/dev_html/dev_degenerate.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_dist.html b/packages/zoltan/docs/dev_html/dev_dist.html index e077988f087b..695c169c7309 100644 --- a/packages/zoltan/docs/dev_html/dev_dist.html +++ b/packages/zoltan/docs/dev_html/dev_dist.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_dist_compile.html b/packages/zoltan/docs/dev_html/dev_dist_compile.html index e9fc2bc768a1..b64dfb339633 100644 --- a/packages/zoltan/docs/dev_html/dev_dist_compile.html +++ b/packages/zoltan/docs/dev_html/dev_dist_compile.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_dist_cvs.html b/packages/zoltan/docs/dev_html/dev_dist_cvs.html index c9d614959e1a..4cf0bbec53a7 100644 --- a/packages/zoltan/docs/dev_html/dev_dist_cvs.html +++ b/packages/zoltan/docs/dev_html/dev_dist_cvs.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_dist_dir.html b/packages/zoltan/docs/dev_html/dev_dist_dir.html index bb0ddff1a1ab..dc7d74d3587b 100644 --- a/packages/zoltan/docs/dev_html/dev_dist_dir.html +++ b/packages/zoltan/docs/dev_html/dev_dist_dir.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_driver.html b/packages/zoltan/docs/dev_html/dev_driver.html index 73162aa171ea..0a02518e1bc5 100644 --- a/packages/zoltan/docs/dev_html/dev_driver.html +++ b/packages/zoltan/docs/dev_html/dev_driver.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_fortran.html b/packages/zoltan/docs/dev_html/dev_fortran.html index c6ee6313ebe1..7a9a5e3d1e27 100644 --- a/packages/zoltan/docs/dev_html/dev_fortran.html +++ b/packages/zoltan/docs/dev_html/dev_fortran.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_hier.html b/packages/zoltan/docs/dev_html/dev_hier.html index 64db15be007c..b18086bd16ac 100644 --- a/packages/zoltan/docs/dev_html/dev_hier.html +++ b/packages/zoltan/docs/dev_html/dev_hier.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_hsfc.html b/packages/zoltan/docs/dev_html/dev_hsfc.html index b54d59f7c01e..6776d5a279b6 100644 --- a/packages/zoltan/docs/dev_html/dev_hsfc.html +++ b/packages/zoltan/docs/dev_html/dev_hsfc.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_intro.html b/packages/zoltan/docs/dev_html/dev_intro.html index 8d96ac04e7bd..3578fb70a498 100644 --- a/packages/zoltan/docs/dev_html/dev_intro.html +++ b/packages/zoltan/docs/dev_html/dev_intro.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_intro_coding.html b/packages/zoltan/docs/dev_html/dev_intro_coding.html index 7f6b2c5e944e..32ad42c2619b 100644 --- a/packages/zoltan/docs/dev_html/dev_intro_coding.html +++ b/packages/zoltan/docs/dev_html/dev_intro_coding.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_intro_philosophy.html b/packages/zoltan/docs/dev_html/dev_intro_philosophy.html index b73b2b24bb5b..c05d13b69fa1 100644 --- a/packages/zoltan/docs/dev_html/dev_intro_philosophy.html +++ b/packages/zoltan/docs/dev_html/dev_intro_philosophy.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_intro_sqe.html b/packages/zoltan/docs/dev_html/dev_intro_sqe.html index dca3acfb8bc4..62b456cdab27 100644 --- a/packages/zoltan/docs/dev_html/dev_intro_sqe.html +++ b/packages/zoltan/docs/dev_html/dev_intro_sqe.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_lb.html b/packages/zoltan/docs/dev_html/dev_lb.html index 8e7d8f055b1a..0d2d01910957 100644 --- a/packages/zoltan/docs/dev_html/dev_lb.html +++ b/packages/zoltan/docs/dev_html/dev_lb.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_lb_interface.html b/packages/zoltan/docs/dev_html/dev_lb_interface.html index cb29cb6ba2be..60cf874ee4ab 100644 --- a/packages/zoltan/docs/dev_html/dev_lb_interface.html +++ b/packages/zoltan/docs/dev_html/dev_lb_interface.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_lb_structs.html b/packages/zoltan/docs/dev_html/dev_lb_structs.html index f6c06f5dc50b..c24006b5283c 100644 --- a/packages/zoltan/docs/dev_html/dev_lb_structs.html +++ b/packages/zoltan/docs/dev_html/dev_lb_structs.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_lb_types.html b/packages/zoltan/docs/dev_html/dev_lb_types.html index ade5ae6bf24d..301182a9255d 100644 --- a/packages/zoltan/docs/dev_html/dev_lb_types.html +++ b/packages/zoltan/docs/dev_html/dev_lb_types.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_mig.html b/packages/zoltan/docs/dev_html/dev_mig.html index 971ada412d30..5cfa72f2cec9 100644 --- a/packages/zoltan/docs/dev_html/dev_mig.html +++ b/packages/zoltan/docs/dev_html/dev_mig.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_parmetis.html b/packages/zoltan/docs/dev_html/dev_parmetis.html index 6be236b67da8..b400434cc07e 100644 --- a/packages/zoltan/docs/dev_html/dev_parmetis.html +++ b/packages/zoltan/docs/dev_html/dev_parmetis.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_phg.html b/packages/zoltan/docs/dev_html/dev_phg.html index 9fb62145ce3d..bc1168362091 100644 --- a/packages/zoltan/docs/dev_html/dev_phg.html +++ b/packages/zoltan/docs/dev_html/dev_phg.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_rcb.html b/packages/zoltan/docs/dev_html/dev_rcb.html index fbd04e0a19d5..0b988bb07645 100644 --- a/packages/zoltan/docs/dev_html/dev_rcb.html +++ b/packages/zoltan/docs/dev_html/dev_rcb.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_refs.html b/packages/zoltan/docs/dev_html/dev_refs.html index 654f0d417316..6b46cdba9078 100644 --- a/packages/zoltan/docs/dev_html/dev_refs.html +++ b/packages/zoltan/docs/dev_html/dev_refs.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_reftree.html b/packages/zoltan/docs/dev_html/dev_reftree.html index 32cbc873065b..ab2b57112b2f 100644 --- a/packages/zoltan/docs/dev_html/dev_reftree.html +++ b/packages/zoltan/docs/dev_html/dev_reftree.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_rib.html b/packages/zoltan/docs/dev_html/dev_rib.html index a68842aef85f..15df6bec7e12 100644 --- a/packages/zoltan/docs/dev_html/dev_rib.html +++ b/packages/zoltan/docs/dev_html/dev_rib.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_services.html b/packages/zoltan/docs/dev_html/dev_services.html index 70d316667313..f3c137f6e209 100644 --- a/packages/zoltan/docs/dev_html/dev_services.html +++ b/packages/zoltan/docs/dev_html/dev_services.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_services_debug.html b/packages/zoltan/docs/dev_html/dev_services_debug.html index 81af948df00a..5cd2714e8fad 100644 --- a/packages/zoltan/docs/dev_html/dev_services_debug.html +++ b/packages/zoltan/docs/dev_html/dev_services_debug.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_services_hash.html b/packages/zoltan/docs/dev_html/dev_services_hash.html index 55638b3675d8..b95cfa50d9da 100644 --- a/packages/zoltan/docs/dev_html/dev_services_hash.html +++ b/packages/zoltan/docs/dev_html/dev_services_hash.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_services_objlist.html b/packages/zoltan/docs/dev_html/dev_services_objlist.html index e2d9c327f86e..b6ce66c7e384 100644 --- a/packages/zoltan/docs/dev_html/dev_services_objlist.html +++ b/packages/zoltan/docs/dev_html/dev_services_objlist.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_services_parallel.html b/packages/zoltan/docs/dev_html/dev_services_parallel.html index c524ba20f71d..11d8f088cd53 100644 --- a/packages/zoltan/docs/dev_html/dev_services_parallel.html +++ b/packages/zoltan/docs/dev_html/dev_services_parallel.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_services_params.html b/packages/zoltan/docs/dev_html/dev_services_params.html index 2712706e41af..bc3cebfb5fb8 100644 --- a/packages/zoltan/docs/dev_html/dev_services_params.html +++ b/packages/zoltan/docs/dev_html/dev_services_params.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_services_time.html b/packages/zoltan/docs/dev_html/dev_services_time.html index e65bf983cdae..864208be5ef2 100644 --- a/packages/zoltan/docs/dev_html/dev_services_time.html +++ b/packages/zoltan/docs/dev_html/dev_services_time.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_services_zoltantimer.html b/packages/zoltan/docs/dev_html/dev_services_zoltantimer.html index ca195864ebb8..a1cac198e73c 100644 --- a/packages/zoltan/docs/dev_html/dev_services_zoltantimer.html +++ b/packages/zoltan/docs/dev_html/dev_services_zoltantimer.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_test_script.html b/packages/zoltan/docs/dev_html/dev_test_script.html index 17c2e48ca8c9..77a454b6a1ed 100644 --- a/packages/zoltan/docs/dev_html/dev_test_script.html +++ b/packages/zoltan/docs/dev_html/dev_test_script.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/dev_html/dev_view.html b/packages/zoltan/docs/dev_html/dev_view.html index f918fef862a4..d379704727f9 100644 --- a/packages/zoltan/docs/dev_html/dev_view.html +++ b/packages/zoltan/docs/dev_html/dev_view.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/tu_html/tutorial.html b/packages/zoltan/docs/tu_html/tutorial.html index a9e59c4fe722..407daa88fe58 100644 --- a/packages/zoltan/docs/tu_html/tutorial.html +++ b/packages/zoltan/docs/tu_html/tutorial.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug.html b/packages/zoltan/docs/ug_html/ug.html index 9f636d22087d..7fa3a8b62ef8 100644 --- a/packages/zoltan/docs/ug_html/ug.html +++ b/packages/zoltan/docs/ug_html/ug.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg.html b/packages/zoltan/docs/ug_html/ug_alg.html index dc386de3788a..95eecc9976ed 100644 --- a/packages/zoltan/docs/ug_html/ug_alg.html +++ b/packages/zoltan/docs/ug_html/ug_alg.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_block.html b/packages/zoltan/docs/ug_html/ug_alg_block.html index 24bb64172fb9..fc2d43c80e87 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_block.html +++ b/packages/zoltan/docs/ug_html/ug_alg_block.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_cyclic.html b/packages/zoltan/docs/ug_html/ug_alg_cyclic.html index c48f36523e0d..4cd3f49f417d 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_cyclic.html +++ b/packages/zoltan/docs/ug_html/ug_alg_cyclic.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_geom.html b/packages/zoltan/docs/ug_html/ug_alg_geom.html index 73e27c520093..c9bfe01a9e3e 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_geom.html +++ b/packages/zoltan/docs/ug_html/ug_alg_geom.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_graph.html b/packages/zoltan/docs/ug_html/ug_alg_graph.html index 0331129aec9d..7e46728ca7e8 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_graph.html +++ b/packages/zoltan/docs/ug_html/ug_alg_graph.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_hier.html b/packages/zoltan/docs/ug_html/ug_alg_hier.html index 5c60996970d6..bd9b3a978a66 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_hier.html +++ b/packages/zoltan/docs/ug_html/ug_alg_hier.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_hsfc.html b/packages/zoltan/docs/ug_html/ug_alg_hsfc.html index 2eae28e5d002..8ee32f898c22 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_hsfc.html +++ b/packages/zoltan/docs/ug_html/ug_alg_hsfc.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_hypergraph.html b/packages/zoltan/docs/ug_html/ug_alg_hypergraph.html index ae2aa6c9c0ed..4ec3e7e1202f 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_hypergraph.html +++ b/packages/zoltan/docs/ug_html/ug_alg_hypergraph.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_jostle.html b/packages/zoltan/docs/ug_html/ug_alg_jostle.html index 77c8809e6fa6..5b3e6fc43777 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_jostle.html +++ b/packages/zoltan/docs/ug_html/ug_alg_jostle.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_oct.html b/packages/zoltan/docs/ug_html/ug_alg_oct.html index 6d3b0b664f74..8ebd2f420250 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_oct.html +++ b/packages/zoltan/docs/ug_html/ug_alg_oct.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_parkway.html b/packages/zoltan/docs/ug_html/ug_alg_parkway.html index 4d4f7e7e7271..085106829ad6 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_parkway.html +++ b/packages/zoltan/docs/ug_html/ug_alg_parkway.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_parmetis.html b/packages/zoltan/docs/ug_html/ug_alg_parmetis.html index 3281aa450c14..56dfc032075a 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_parmetis.html +++ b/packages/zoltan/docs/ug_html/ug_alg_parmetis.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_patoh.html b/packages/zoltan/docs/ug_html/ug_alg_patoh.html index b7a4a9a080d5..e0a14d3d4fcf 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_patoh.html +++ b/packages/zoltan/docs/ug_html/ug_alg_patoh.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_phg.html b/packages/zoltan/docs/ug_html/ug_alg_phg.html index 441ffd5f438b..f97d6166620c 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_phg.html +++ b/packages/zoltan/docs/ug_html/ug_alg_phg.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_ptscotch.html b/packages/zoltan/docs/ug_html/ug_alg_ptscotch.html index 715d29f5cf60..e75401defb2d 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_ptscotch.html +++ b/packages/zoltan/docs/ug_html/ug_alg_ptscotch.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_random.html b/packages/zoltan/docs/ug_html/ug_alg_random.html index 1c96758da3c2..d941fb5a788e 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_random.html +++ b/packages/zoltan/docs/ug_html/ug_alg_random.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_rcb.html b/packages/zoltan/docs/ug_html/ug_alg_rcb.html index f65bb4d57325..a8cb119e1687 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_rcb.html +++ b/packages/zoltan/docs/ug_html/ug_alg_rcb.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_reftree.html b/packages/zoltan/docs/ug_html/ug_alg_reftree.html index 3dea63584a8c..e34a60f2f353 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_reftree.html +++ b/packages/zoltan/docs/ug_html/ug_alg_reftree.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_rib.html b/packages/zoltan/docs/ug_html/ug_alg_rib.html index a3cc8028b9dd..3cc396d2a843 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_rib.html +++ b/packages/zoltan/docs/ug_html/ug_alg_rib.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_alg_simple.html b/packages/zoltan/docs/ug_html/ug_alg_simple.html index e6c8d21f8bd3..a3c99b4fc853 100644 --- a/packages/zoltan/docs/ug_html/ug_alg_simple.html +++ b/packages/zoltan/docs/ug_html/ug_alg_simple.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_backward.html b/packages/zoltan/docs/ug_html/ug_backward.html index 97af92601c06..f6f9fee2ab58 100644 --- a/packages/zoltan/docs/ug_html/ug_backward.html +++ b/packages/zoltan/docs/ug_html/ug_backward.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_color.html b/packages/zoltan/docs/ug_html/ug_color.html index 0a66b59022fd..cc679d0f4c33 100644 --- a/packages/zoltan/docs/ug_html/ug_color.html +++ b/packages/zoltan/docs/ug_html/ug_color.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_color_parallel.html b/packages/zoltan/docs/ug_html/ug_color_parallel.html index cdedb98b7a43..f1b0a2fd4474 100644 --- a/packages/zoltan/docs/ug_html/ug_color_parallel.html +++ b/packages/zoltan/docs/ug_html/ug_color_parallel.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_cpp.html b/packages/zoltan/docs/ug_html/ug_cpp.html index 7bb59d46e4dd..4505b672617f 100644 --- a/packages/zoltan/docs/ug_html/ug_cpp.html +++ b/packages/zoltan/docs/ug_html/ug_cpp.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_examples.html b/packages/zoltan/docs/ug_html/ug_examples.html index 6e311e2b589b..ea6526f1e524 100644 --- a/packages/zoltan/docs/ug_html/ug_examples.html +++ b/packages/zoltan/docs/ug_html/ug_examples.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_examples_init.html b/packages/zoltan/docs/ug_html/ug_examples_init.html index 130c42789984..6c718f1ea0aa 100644 --- a/packages/zoltan/docs/ug_html/ug_examples_init.html +++ b/packages/zoltan/docs/ug_html/ug_examples_init.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_examples_lb.html b/packages/zoltan/docs/ug_html/ug_examples_lb.html index 9d2231da3d09..bea47d5141a2 100644 --- a/packages/zoltan/docs/ug_html/ug_examples_lb.html +++ b/packages/zoltan/docs/ug_html/ug_examples_lb.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_examples_mig.html b/packages/zoltan/docs/ug_html/ug_examples_mig.html index 1beeeacf53b6..b1b6effb1934 100644 --- a/packages/zoltan/docs/ug_html/ug_examples_mig.html +++ b/packages/zoltan/docs/ug_html/ug_examples_mig.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_examples_query.html b/packages/zoltan/docs/ug_html/ug_examples_query.html index f3cdfa2df75d..68c0bd3d1fb5 100644 --- a/packages/zoltan/docs/ug_html/ug_examples_query.html +++ b/packages/zoltan/docs/ug_html/ug_examples_query.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_fortran.html b/packages/zoltan/docs/ug_html/ug_fortran.html index 95416d3cc371..10d8e56feb15 100644 --- a/packages/zoltan/docs/ug_html/ug_fortran.html +++ b/packages/zoltan/docs/ug_html/ug_fortran.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_graph_build.html b/packages/zoltan/docs/ug_html/ug_graph_build.html index 8a09ac552ce1..9a9283d6ce8a 100644 --- a/packages/zoltan/docs/ug_html/ug_graph_build.html +++ b/packages/zoltan/docs/ug_html/ug_graph_build.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_graph_vs_hg.html b/packages/zoltan/docs/ug_html/ug_graph_vs_hg.html index 62f52a573bca..a41fcfca3901 100644 --- a/packages/zoltan/docs/ug_html/ug_graph_vs_hg.html +++ b/packages/zoltan/docs/ug_html/ug_graph_vs_hg.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_index.html b/packages/zoltan/docs/ug_html/ug_index.html index 998b13bf01ad..ce7e054ebddc 100644 --- a/packages/zoltan/docs/ug_html/ug_index.html +++ b/packages/zoltan/docs/ug_html/ug_index.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_interface.html b/packages/zoltan/docs/ug_html/ug_interface.html index 59af504806fb..75c03e1871b7 100644 --- a/packages/zoltan/docs/ug_html/ug_interface.html +++ b/packages/zoltan/docs/ug_html/ug_interface.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_interface_augment.html b/packages/zoltan/docs/ug_html/ug_interface_augment.html index e4ba8bca179e..b4ed2cb9aa86 100644 --- a/packages/zoltan/docs/ug_html/ug_interface_augment.html +++ b/packages/zoltan/docs/ug_html/ug_interface_augment.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_interface_color.html b/packages/zoltan/docs/ug_html/ug_interface_color.html index 38e82874ed48..e625d212377c 100644 --- a/packages/zoltan/docs/ug_html/ug_interface_color.html +++ b/packages/zoltan/docs/ug_html/ug_interface_color.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_interface_init.html b/packages/zoltan/docs/ug_html/ug_interface_init.html index 68279c04fd87..d77a5c8c3783 100644 --- a/packages/zoltan/docs/ug_html/ug_interface_init.html +++ b/packages/zoltan/docs/ug_html/ug_interface_init.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_interface_lb.html b/packages/zoltan/docs/ug_html/ug_interface_lb.html index ac3f32a51c8e..436014a93b21 100644 --- a/packages/zoltan/docs/ug_html/ug_interface_lb.html +++ b/packages/zoltan/docs/ug_html/ug_interface_lb.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_interface_mig.html b/packages/zoltan/docs/ug_html/ug_interface_mig.html index 48d335091fff..67381808a7c9 100644 --- a/packages/zoltan/docs/ug_html/ug_interface_mig.html +++ b/packages/zoltan/docs/ug_html/ug_interface_mig.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_interface_order.html b/packages/zoltan/docs/ug_html/ug_interface_order.html index ab892ee45fb7..16e452bdc166 100644 --- a/packages/zoltan/docs/ug_html/ug_interface_order.html +++ b/packages/zoltan/docs/ug_html/ug_interface_order.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_intro.html b/packages/zoltan/docs/ug_html/ug_intro.html index 3dcc6b893d9a..636fd3f338ce 100644 --- a/packages/zoltan/docs/ug_html/ug_intro.html +++ b/packages/zoltan/docs/ug_html/ug_intro.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_order.html b/packages/zoltan/docs/ug_html/ug_order.html index 8f06ec7a933f..f99526c6f175 100644 --- a/packages/zoltan/docs/ug_html/ug_order.html +++ b/packages/zoltan/docs/ug_html/ug_order.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_order_local_hsfc.html b/packages/zoltan/docs/ug_html/ug_order_local_hsfc.html index db1143b02ab8..7dc349b9b01a 100644 --- a/packages/zoltan/docs/ug_html/ug_order_local_hsfc.html +++ b/packages/zoltan/docs/ug_html/ug_order_local_hsfc.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_order_parmetis.html b/packages/zoltan/docs/ug_html/ug_order_parmetis.html index 39d8855818d2..00c741715c81 100644 --- a/packages/zoltan/docs/ug_html/ug_order_parmetis.html +++ b/packages/zoltan/docs/ug_html/ug_order_parmetis.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_order_ptscotch.html b/packages/zoltan/docs/ug_html/ug_order_ptscotch.html index 8d62d7a1b032..5f4c748952ee 100644 --- a/packages/zoltan/docs/ug_html/ug_order_ptscotch.html +++ b/packages/zoltan/docs/ug_html/ug_order_ptscotch.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_param.html b/packages/zoltan/docs/ug_html/ug_param.html index ff28749255bc..25e1fc436ec8 100644 --- a/packages/zoltan/docs/ug_html/ug_param.html +++ b/packages/zoltan/docs/ug_html/ug_param.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_query.html b/packages/zoltan/docs/ug_html/ug_query.html index 4f5f54eb662a..c9fa08393d41 100644 --- a/packages/zoltan/docs/ug_html/ug_query.html +++ b/packages/zoltan/docs/ug_html/ug_query.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_query_lb.html b/packages/zoltan/docs/ug_html/ug_query_lb.html index 61bfed1ac379..cbd4587d806f 100644 --- a/packages/zoltan/docs/ug_html/ug_query_lb.html +++ b/packages/zoltan/docs/ug_html/ug_query_lb.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_query_mig.html b/packages/zoltan/docs/ug_html/ug_query_mig.html index c5dd925e934b..56fcf9b73adc 100644 --- a/packages/zoltan/docs/ug_html/ug_query_mig.html +++ b/packages/zoltan/docs/ug_html/ug_query_mig.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_refs.html b/packages/zoltan/docs/ug_html/ug_refs.html index 04b1de0d433e..2a17ba3250a0 100644 --- a/packages/zoltan/docs/ug_html/ug_refs.html +++ b/packages/zoltan/docs/ug_html/ug_refs.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_release.html b/packages/zoltan/docs/ug_html/ug_release.html index d192375c45e0..e4319c847b11 100644 --- a/packages/zoltan/docs/ug_html/ug_release.html +++ b/packages/zoltan/docs/ug_html/ug_release.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_usage.html b/packages/zoltan/docs/ug_html/ug_usage.html index 1df893a9a5e0..979e7eab839b 100644 --- a/packages/zoltan/docs/ug_html/ug_usage.html +++ b/packages/zoltan/docs/ug_html/ug_usage.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_util.html b/packages/zoltan/docs/ug_html/ug_util.html index 085ba7da109e..a4062876a1bd 100644 --- a/packages/zoltan/docs/ug_html/ug_util.html +++ b/packages/zoltan/docs/ug_html/ug_util.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_util_comm.html b/packages/zoltan/docs/ug_html/ug_util_comm.html index 10371913bce1..c8a232b1d2f3 100644 --- a/packages/zoltan/docs/ug_html/ug_util_comm.html +++ b/packages/zoltan/docs/ug_html/ug_util_comm.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_util_dd.html b/packages/zoltan/docs/ug_html/ug_util_dd.html index 5d235e946e9d..6b74cc16c3f0 100644 --- a/packages/zoltan/docs/ug_html/ug_util_dd.html +++ b/packages/zoltan/docs/ug_html/ug_util_dd.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/docs/ug_html/ug_util_mem.html b/packages/zoltan/docs/ug_html/ug_util_mem.html index 22810ffe28a0..1e84c9d4353e 100644 --- a/packages/zoltan/docs/ug_html/ug_util_mem.html +++ b/packages/zoltan/docs/ug_html/ug_util_mem.html @@ -1,47 +1,3 @@ - diff --git a/packages/zoltan/example/C/Makefile.in b/packages/zoltan/example/C/Makefile.in index 5b97bf46f9e4..c5e55bdad2c6 100644 --- a/packages/zoltan/example/C/Makefile.in +++ b/packages/zoltan/example/C/Makefile.in @@ -15,49 +15,6 @@ @SET_MAKE@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER # The following line helps the test harness recover from build errors. diff --git a/packages/zoltan/example/C/coloring/makefile.old b/packages/zoltan/example/C/coloring/makefile.old index 934db66e06b8..0f9bf8ec84e5 100644 --- a/packages/zoltan/example/C/coloring/makefile.old +++ b/packages/zoltan/example/C/coloring/makefile.old @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER CC = mpicc CFLAGS = -Wall -Wstrict-prototypes -O3 -m64 # CFLAGS = -Wall -Wstrict-prototypes -g -m64 -DZOLTANV31 diff --git a/packages/zoltan/example/CPP/Makefile.in b/packages/zoltan/example/CPP/Makefile.in index 108c45bf204e..8fa33bfd1250 100644 --- a/packages/zoltan/example/CPP/Makefile.in +++ b/packages/zoltan/example/CPP/Makefile.in @@ -15,50 +15,6 @@ @SET_MAKE@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER - # The following line helps the test harness recover from build errors. VPATH = @srcdir@ diff --git a/packages/zoltan/example/Makefile.am b/packages/zoltan/example/Makefile.am index 49f69bbf7049..c129b1b0be03 100644 --- a/packages/zoltan/example/Makefile.am +++ b/packages/zoltan/example/Makefile.am @@ -1,47 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER - # The following line helps the test harness recover from build errors. all-local: diff --git a/packages/zoltan/example/Makefile.in b/packages/zoltan/example/Makefile.in index 65388bacb122..269fafa34b21 100644 --- a/packages/zoltan/example/Makefile.in +++ b/packages/zoltan/example/Makefile.in @@ -15,50 +15,6 @@ @SET_MAKE@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER - # The following line helps the test harness recover from build errors. VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ diff --git a/packages/zoltan/src/Makefile.in b/packages/zoltan/src/Makefile.in index 9155beb2f37e..a42c81aa1a67 100644 --- a/packages/zoltan/src/Makefile.in +++ b/packages/zoltan/src/Makefile.in @@ -15,49 +15,6 @@ @SET_MAKE@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER # The following line helps the test harness recover from build errors. diff --git a/packages/zoltan/src/driver/Makefile.in b/packages/zoltan/src/driver/Makefile.in index 8d2349b3a99d..2f2d8828ac4a 100644 --- a/packages/zoltan/src/driver/Makefile.in +++ b/packages/zoltan/src/driver/Makefile.in @@ -15,49 +15,6 @@ @SET_MAKE@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ diff --git a/packages/zoltan/src/fdriver/Makefile.in b/packages/zoltan/src/fdriver/Makefile.in index 2d7d0f0068cb..59434d85baf4 100644 --- a/packages/zoltan/src/fdriver/Makefile.in +++ b/packages/zoltan/src/fdriver/Makefile.in @@ -15,49 +15,6 @@ @SET_MAKE@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ diff --git a/packages/zoltan/src/fort/makefile b/packages/zoltan/src/fort/makefile index 65c23bce446c..7059ac30dfb9 100644 --- a/packages/zoltan/src/fort/makefile +++ b/packages/zoltan/src/fort/makefile @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER ZOD = ../$(ZOLTAN_OBJ_DIR) ZOM = $(F90_MODULE_PREFIX)$(ZOD) -I../fort diff --git a/packages/zoltan/test/Large_Data/Makefile.in b/packages/zoltan/test/Large_Data/Makefile.in index fc195b120b53..0fd328b968be 100644 --- a/packages/zoltan/test/Large_Data/Makefile.in +++ b/packages/zoltan/test/Large_Data/Makefile.in @@ -15,49 +15,6 @@ @SET_MAKE@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ diff --git a/packages/zoltan/test/Large_Data/README.txt b/packages/zoltan/test/Large_Data/README.txt index d226461ece0f..71ee30c446da 100644 --- a/packages/zoltan/test/Large_Data/README.txt +++ b/packages/zoltan/test/Large_Data/README.txt @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER The tests in this directory create in parallel an arbitrarily large problem, run Zoltan on the problem, and report the results. diff --git a/packages/zoltan/test/Utilities_Tests/Communication/makefile b/packages/zoltan/test/Utilities_Tests/Communication/makefile index 088d9fbd1ec5..e3b3acda67d5 100644 --- a/packages/zoltan/test/Utilities_Tests/Communication/makefile +++ b/packages/zoltan/test/Utilities_Tests/Communication/makefile @@ -1,46 +1,3 @@ -# @HEADER -# -######################################################################## -# -# Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring -# Copyright 2012 Sandia Corporation -# -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Karen Devine kddevin@sandia.gov -# Erik Boman egboman@sandia.gov -# -######################################################################## -# -# @HEADER # # a simple makefile to build comm_main.c # From ae6107af367f461ca8d426f0c94b6316bcbdb673 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=92=D0=BB=D0=B0=D0=B4=D0=B8=D1=81=D0=BB=D0=B0=D0=B2=20?= =?UTF-8?q?=D0=A1=D0=B5=D0=BC=D1=8B=D0=BA=D0=B8=D0=BD?= Date: Sun, 4 Aug 2024 20:14:51 +0300 Subject: [PATCH 04/37] Corrected functions to compile with VS2022 --- packages/kokkos/core/src/Kokkos_View.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/kokkos/core/src/Kokkos_View.hpp b/packages/kokkos/core/src/Kokkos_View.hpp index 484a0e6f62e4..2c5ade5cae4f 100644 --- a/packages/kokkos/core/src/Kokkos_View.hpp +++ b/packages/kokkos/core/src/Kokkos_View.hpp @@ -944,8 +944,8 @@ class View : public ViewTraits { template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && // - (2 == rank) && is_default_map && is_layout_right && (rank_dynamic == 0)), + (Kokkos::Impl::always_true::value && + (2 == rank) && is_default_map && is_layout_right && (rank_dynamic == 0) && std::is_integral::value && std::is_integral::value), reference_type> operator()(I0 i0, I1 i1) const { check_operator_parens_valid_args(i0, i1); @@ -955,8 +955,8 @@ class View : public ViewTraits { template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && // - (2 == rank) && is_default_map && is_layout_right && (rank_dynamic != 0)), + (Kokkos::Impl::always_true::value && + (2 == rank) && is_default_map && is_layout_right && (rank_dynamic != 0) && std::is_integral::value && std::is_integral::value), reference_type> operator()(I0 i0, I1 i1) const { check_operator_parens_valid_args(i0, i1); @@ -1088,7 +1088,7 @@ class View : public ViewTraits { template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::always_true::value && (2 == rank) && - is_default_map && is_layout_right && (rank_dynamic == 0)), + is_default_map && is_layout_right && (rank_dynamic == 0) && std::is_integral::value && std::is_integral::value), reference_type> access(I0 i0, I1 i1, Is... extra) const { check_access_member_function_valid_args(i0, i1, extra...); @@ -1099,7 +1099,7 @@ class View : public ViewTraits { template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::always_true::value && (2 == rank) && - is_default_map && is_layout_right && (rank_dynamic != 0)), + is_default_map && is_layout_right && (rank_dynamic != 0) && std::is_integral::value && std::is_integral::value), reference_type> access(I0 i0, I1 i1, Is... extra) const { check_access_member_function_valid_args(i0, i1, extra...); From 451d422d9a8f788838dc425386f8b7da21c86bb2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Aug 2024 22:47:56 +0000 Subject: [PATCH 05/37] Bump step-security/harden-runner from 2.9.0 to 2.9.1 Bumps [step-security/harden-runner](https://github.com/step-security/harden-runner) from 2.9.0 to 2.9.1. - [Release notes](https://github.com/step-security/harden-runner/releases) - [Commits](https://github.com/step-security/harden-runner/compare/0d381219ddf674d61a7572ddd19d7941e271515c...5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde) --- updated-dependencies: - dependency-name: step-security/harden-runner dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/dependency-review.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index f072eabac8d4..bf2dcfbae9fd 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Harden Runner - uses: step-security/harden-runner@0d381219ddf674d61a7572ddd19d7941e271515c # v2.9.0 + uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 with: egress-policy: audit From 00faf09250f2aa087d471723ade7dd8522298955 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Aug 2024 22:48:01 +0000 Subject: [PATCH 06/37] Bump actions/upload-artifact from 4.3.4 to 4.3.5 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.4 to 4.3.5. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/0b2256b8c012f0828dc542b3febcab082c67f72b...89ef406dd8d7e03cfd12d9e0a4a378f454709029) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/clang_format.yml | 2 +- .github/workflows/scorecards.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/clang_format.yml b/.github/workflows/clang_format.yml index 75830f15649a..fdcc82067cab 100644 --- a/.github/workflows/clang_format.yml +++ b/.github/workflows/clang_format.yml @@ -22,7 +22,7 @@ jobs: - run: git diff HEAD > format_patch.txt - run: if [ "$(cat format_patch.txt)" == "" ] ; then rm format_patch.txt ; else cat format_patch.txt; fi - - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 + - uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 # v4.3.5 id: upload-artf if: ${{ hashFiles('format_patch.txt') != '' }} with: diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 7c8fbc5870c4..52b22251f53a 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -58,7 +58,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 + uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 # v4.3.5 with: name: SARIF file path: results.sarif From 82e97a40baa25d14e041e848b731ea4fdcd80d47 Mon Sep 17 00:00:00 2001 From: Maarten Arnst Date: Wed, 7 Aug 2024 14:55:47 +0200 Subject: [PATCH 07/37] Fix Trilinos issue 13292 --- .../test/CrsMatrix/CrsMatrix_MatvecFence.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/packages/tpetra/core/test/CrsMatrix/CrsMatrix_MatvecFence.cpp b/packages/tpetra/core/test/CrsMatrix/CrsMatrix_MatvecFence.cpp index c6f1cfc6c9b2..c1c06d2b3b30 100644 --- a/packages/tpetra/core/test/CrsMatrix/CrsMatrix_MatvecFence.cpp +++ b/packages/tpetra/core/test/CrsMatrix/CrsMatrix_MatvecFence.cpp @@ -212,10 +212,21 @@ namespace { } else { expectedGlobalCount = 6 * iter_num; } - if (Tpetra::Details::Behavior::debug()) { - expectedInstanceCount = 5*iter_num; - } else { - expectedInstanceCount = 3*iter_num; +#ifdef HAVE_TPETRA_INST_HIP + if constexpr (std::is_same_v) { + if (Tpetra::Details::Behavior::debug()) { + expectedInstanceCount = 4*iter_num; + } else { + expectedInstanceCount = 2*iter_num; + } + } else +#endif + { + if (Tpetra::Details::Behavior::debug()) { + expectedInstanceCount = 5*iter_num; + } else { + expectedInstanceCount = 3*iter_num; + } } } } From e62773963c6281b7d1e2eef5c09eb2ea6a52f75e Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Wed, 7 Aug 2024 17:37:43 -0600 Subject: [PATCH 08/37] Amesos2 : add an option to specify solver name in SimpleSolve_File.cpp --- packages/amesos2/example/SimpleSolve_File.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/amesos2/example/SimpleSolve_File.cpp b/packages/amesos2/example/SimpleSolve_File.cpp index 9b354d8d7444..7d7150c2c621 100644 --- a/packages/amesos2/example/SimpleSolve_File.cpp +++ b/packages/amesos2/example/SimpleSolve_File.cpp @@ -91,9 +91,11 @@ int main(int argc, char *argv[]) { bool allprint = false; bool verbose = (myRank==0); std::string filename("arc130.mtx"); + std::string solvername("Superlu"); Teuchos::CommandLineProcessor cmdp(false,true); cmdp.setOption("verbose","quiet",&verbose,"Print messages and results."); cmdp.setOption("filename",&filename,"Filename for Matrix-Market test matrix."); + cmdp.setOption("solvername",&solvername,"Name of solver."); cmdp.setOption("print-matrix","no-print-matrix",&printMatrix,"Print the full matrix after reading it."); cmdp.setOption("print-solution","no-print-solution",&printSolution,"Print solution vector after solve."); cmdp.setOption("print-timing","no-print-timing",&printTiming,"Print solver timing statistics"); @@ -149,12 +151,12 @@ int main(int argc, char *argv[]) { // Constructor from Factory RCP > solver; - if( !Amesos2::query("Superlu") ){ - *fos << "SuperLU solver not enabled. Exiting..." << std::endl; + if( !Amesos2::query(solvername) ){ + *fos << solvername << " solver not enabled. Exiting..." << std::endl; return EXIT_SUCCESS; } - solver = Amesos2::create("Superlu", A, X, B); + solver = Amesos2::create(solvername, A, X, B); solver->symbolicFactorization().numericFactorization().solve(); From 2085d1a38f61eda2ad3a9bde2a3b7d9c21d4a580 Mon Sep 17 00:00:00 2001 From: Kim Liegeois Date: Tue, 30 Jul 2024 04:02:46 -0600 Subject: [PATCH 09/37] add fences and timers --- .../Ifpack2_BlockComputeResidualVector.hpp | 6 +-- packages/ifpack2/src/Ifpack2_BlockHelper.hpp | 10 +--- .../src/Ifpack2_BlockHelper_Timers.hpp | 36 ++++++++++++++ .../src/Ifpack2_BlockRelaxation_def.hpp | 31 +++++++++--- .../src/Ifpack2_BlockTriDiContainer_def.hpp | 2 +- .../src/Ifpack2_BlockTriDiContainer_impl.hpp | 49 +++++++++++-------- .../src/Tpetra_BlockCrsMatrix_Helpers_def.hpp | 12 ++--- 7 files changed, 101 insertions(+), 45 deletions(-) create mode 100644 packages/ifpack2/src/Ifpack2_BlockHelper_Timers.hpp diff --git a/packages/ifpack2/src/Ifpack2_BlockComputeResidualVector.hpp b/packages/ifpack2/src/Ifpack2_BlockComputeResidualVector.hpp index 4f76cd46f2d6..eff2119a1075 100644 --- a/packages/ifpack2/src/Ifpack2_BlockComputeResidualVector.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockComputeResidualVector.hpp @@ -790,7 +790,7 @@ namespace Ifpack2 { const MultiVectorLocalViewTypeB &b_, const MultiVectorLocalViewTypeX &x_) { IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN; - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ComputeResidual::"); + IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDi::ComputeResidual::", execution_space); y = y_; b = b_; x = x_; if constexpr (is_device::value) { @@ -818,7 +818,7 @@ namespace Ifpack2 { const MultiVectorLocalViewTypeX &x_, const MultiVectorLocalViewTypeX_Remote &x_remote_) { IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN; - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ComputeResidual::"); + IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDi::ComputeResidual::", execution_space); b = b_; x = x_; x_remote = x_remote_; if constexpr (is_device::value) { @@ -892,7 +892,7 @@ namespace Ifpack2 { const MultiVectorLocalViewTypeX_Remote &x_remote_, const bool compute_owned) { IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN; - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ComputeResidual::"); + IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDi::ComputeResidual::", execution_space); b = b_; x = x_; x_remote = x_remote_; if constexpr (is_device::value) { diff --git a/packages/ifpack2/src/Ifpack2_BlockHelper.hpp b/packages/ifpack2/src/Ifpack2_BlockHelper.hpp index a3b8a2afd766..5681e7dee0ea 100644 --- a/packages/ifpack2/src/Ifpack2_BlockHelper.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockHelper.hpp @@ -10,6 +10,7 @@ #ifndef IFPACK2_BLOCKHELPER_IMPL_HPP #define IFPACK2_BLOCKHELPER_IMPL_HPP +#include "Ifpack2_BlockHelper_Timers.hpp" namespace Ifpack2 { @@ -155,15 +156,6 @@ namespace Ifpack2 { }; #endif - -#if defined(HAVE_IFPACK2_BLOCKTRIDICONTAINER_TIMERS) -#define IFPACK2_BLOCKHELPER_TIMER(label) TEUCHOS_FUNC_TIME_MONITOR(label); -#define IFPACK2_BLOCKHELPER_TIMER_FENCE(execution_space) execution_space().fence(); -#else -#define IFPACK2_BLOCKHELPER_TIMER(label) -#define IFPACK2_BLOCKHELPER_TIMER_FENCE(execution_space) -#endif - #if defined(KOKKOS_ENABLE_CUDA) && defined(IFPACK2_BLOCKHELPER_ENABLE_PROFILE) #define IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN \ KOKKOS_IMPL_CUDA_SAFE_CALL(cudaProfilerStart()); diff --git a/packages/ifpack2/src/Ifpack2_BlockHelper_Timers.hpp b/packages/ifpack2/src/Ifpack2_BlockHelper_Timers.hpp new file mode 100644 index 000000000000..a42039d921ce --- /dev/null +++ b/packages/ifpack2/src/Ifpack2_BlockHelper_Timers.hpp @@ -0,0 +1,36 @@ +// @HEADER +// ***************************************************************************** +// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package +// +// Copyright 2009 NTESS and the Ifpack2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef IFPACK2_BLOCKHELPER_TIMERS_HPP +#define IFPACK2_BLOCKHELPER_TIMERS_HPP + + +namespace Ifpack2 { + + namespace BlockHelperDetails { + +#if defined(HAVE_IFPACK2_BLOCKTRIDICONTAINER_TIMERS) +#define IFPACK2_BLOCKHELPER_TIMER(label) TEUCHOS_FUNC_TIME_MONITOR(label); +#define IFPACK2_BLOCKHELPER_TIMER_FENCE(execution_space) execution_space().fence(); +#define IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE() Kokkos::DefaultExecutionSpace().fence(); +#else +#define IFPACK2_BLOCKHELPER_TIMER(label) +#define IFPACK2_BLOCKHELPER_TIMER_FENCE(execution_space) +#define IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE() +#endif + +#define IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE(label, execution_space) \ + IFPACK2_BLOCKHELPER_TIMER_FENCE(execution_space) \ + IFPACK2_BLOCKHELPER_TIMER(label) + + } // namespace BlockHelperDetails + +} // namespace Ifpack2 + +#endif diff --git a/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp b/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp index e5503876df6c..4d27076edb9a 100644 --- a/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp @@ -22,6 +22,7 @@ #include "Teuchos_TimeMonitor.hpp" #include "Tpetra_BlockCrsMatrix_Helpers_decl.hpp" #include "Tpetra_Import_Util.hpp" +#include "Ifpack2_BlockHelper_Timers.hpp" namespace Ifpack2 { @@ -606,7 +607,7 @@ initialize () Teuchos::RCP graph = A_->getGraph (); if(!hasBlockCrsMatrix_ && List_.isParameter("relaxation: container") && List_.get("relaxation: container") == "BlockTriDi" ) { - TEUCHOS_FUNC_TIME_MONITOR("Ifpack2::BlockRelaxation::initialize::convertToBlockCrsMatrix"); + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::convertToBlockCrsMatrix"); int block_size = List_.get("partitioner: block size"); bool use_explicit_conversion = List_.isParameter("partitioner: explicit convert to BlockCrs") && List_.get("partitioner: explicit convert to BlockCrs"); TEUCHOS_TEST_FOR_EXCEPT_MSG @@ -627,7 +628,7 @@ initialize () } graph = Tpetra::getBlockCrsGraph(*Teuchos::rcp_dynamic_cast(A_), block_size, true); } - Kokkos::DefaultExecutionSpace().fence(); + IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE(); } NumLocalRows_ = A_->getLocalNumRows (); @@ -640,15 +641,22 @@ initialize () Partitioner_ = Teuchos::null; if (PartitionerType_ == "linear") { + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::linear"); Partitioner_ = rcp (new Ifpack2::LinearPartitioner (graph)); + IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE(); } else if (PartitionerType_ == "line") { + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::line"); Partitioner_ = rcp (new Ifpack2::LinePartitioner (graph)); + IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE(); } else if (PartitionerType_ == "user") { + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::user"); Partitioner_ = rcp (new Ifpack2::Details::UserPartitioner (graph ) ); + IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE(); } else if (PartitionerType_ == "zoltan2") { + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::zoltan2"); #if defined(HAVE_IFPACK2_ZOLTAN2) if (graph->getComm ()->getSize () == 1) { // Only one MPI, so call zoltan2 with global graph @@ -664,6 +672,7 @@ initialize () TEUCHOS_TEST_FOR_EXCEPTION (true, std::logic_error, "Ifpack2::BlockRelaxation::initialize: Zoltan2 not enabled."); #endif + IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE(); } else { // We should have checked for this in setParameters(), so it's a // logic_error, not an invalid_argument or runtime_error. @@ -674,8 +683,12 @@ initialize () } // need to partition the graph of A - Partitioner_->setParameters (List_); - Partitioner_->compute (); + { + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::Partitioner"); + Partitioner_->setParameters (List_); + Partitioner_->compute (); + IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE(); + } // get actual number of partitions NumLocalBlocks_ = Partitioner_->numLocalParts (); @@ -696,7 +709,12 @@ initialize () "NumSweeps_ = " << NumSweeps_ << " < 0."); // Extract the submatrices - ExtractSubmatricesStructure (); + { + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::ExtractSubmatricesStructure"); + ExtractSubmatricesStructure (); + IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE(); + } + // Compute the weight vector if we're doing overlapped Jacobi (and // only if we're doing overlapped Jacobi). @@ -724,6 +742,7 @@ initialize () // only needed when Schwarz combine mode is ADD as opposed to ZERO (which is RAS) if (schwarzCombineMode_ == "ADD") { + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::ADD"); typedef Tpetra::MultiVector< typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type,typename MatrixType::node_type> scMV; Teuchos::RCP theImport = A_->getGraph()->getImporter(); if (!theImport.is_null()) { @@ -737,7 +756,7 @@ initialize () nonOverLapW.doExport (*W_, *theImport, Tpetra::ADD); W_->doImport( nonOverLapW, *theImport, Tpetra::INSERT); } - + IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE(); } W_->reciprocal (*W_); } diff --git a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp index 3adb66d5fd89..c98b5e564d81 100644 --- a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp @@ -53,7 +53,7 @@ namespace Ifpack2 { const int block_size, const bool explicitConversion) { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initInternal"); + IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDiContainer::initInternal", typename BlockHelperDetails::ImplType::execution_space); // create pointer of impl { diff --git a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp index d4b47f72aee7..5842854402be 100644 --- a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp @@ -860,6 +860,7 @@ namespace Ifpack2 { template Teuchos::RCP > createBlockCrsAsyncImporter(const Teuchos::RCP::tpetra_row_matrix_type> &A) { + IFPACK2_BLOCKHELPER_TIMER("createBlockCrsAsyncImporter"); using impl_type = BlockHelperDetails::ImplType; using tpetra_map_type = typename impl_type::tpetra_map_type; using local_ordinal_type = typename impl_type::local_ordinal_type; @@ -882,30 +883,36 @@ namespace Ifpack2 { std::vector gids; bool separate_remotes = true, found_first = false, need_owned_permutation = false; - for (size_t i=0;igetLocalNumElements();++i) { - const global_ordinal_type gid = column_map->getGlobalElement(i); - if (!domain_map->isNodeGlobalElement(gid)) { - found_first = true; - gids.push_back(gid); - } else if (found_first) { - separate_remotes = false; - break; - } - if (!need_owned_permutation && - domain_map->getLocalElement(gid) != static_cast(i)) { - // The owned part of the domain and column maps are different - // orderings. We *could* do a super efficient impl of this case in the - // num_sweeps > 1 case by adding complexity to PermuteAndRepack. But, - // really, if a caller cares about speed, they wouldn't make different - // local permutations like this. So we punt on the best impl and go for - // a pretty good one: the permutation is done in place in - // compute_b_minus_Rx for the pure-owned part of the MVP. The only cost - // is the presumably worse memory access pattern of the input vector. - need_owned_permutation = true; + { + IFPACK2_BLOCKHELPER_TIMER("createBlockCrsAsyncImporter::loop_over_local_elements"); + // This loop is relatively expensive + for (size_t i=0;igetLocalNumElements();++i) { + const global_ordinal_type gid = column_map->getGlobalElement(i); + if (!domain_map->isNodeGlobalElement(gid)) { + found_first = true; + gids.push_back(gid); + } else if (found_first) { + separate_remotes = false; + break; + } + if (!need_owned_permutation && + domain_map->getLocalElement(gid) != static_cast(i)) { + // The owned part of the domain and column maps are different + // orderings. We *could* do a super efficient impl of this case in the + // num_sweeps > 1 case by adding complexity to PermuteAndRepack. But, + // really, if a caller cares about speed, they wouldn't make different + // local permutations like this. So we punt on the best impl and go for + // a pretty good one: the permutation is done in place in + // compute_b_minus_Rx for the pure-owned part of the MVP. The only cost + // is the presumably worse memory access pattern of the input vector. + need_owned_permutation = true; + } } + IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType::execution_space) } if (separate_remotes) { + IFPACK2_BLOCKHELPER_TIMER("createBlockCrsAsyncImporter::separate_remotes"); const auto invalid = Teuchos::OrdinalTraits::invalid(); const auto parsimonious_col_map = Teuchos::rcp(new tpetra_map_type(invalid, gids.data(), gids.size(), 0, domain_map->getComm())); @@ -919,9 +926,11 @@ namespace Ifpack2 { dm2cm_host(i) = domain_map->getLocalElement(column_map->getGlobalElement(i)); Kokkos::deep_copy(dm2cm, dm2cm_host); } + IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType::execution_space) return Teuchos::rcp(new AsyncableImport(domain_map, parsimonious_col_map, blocksize, dm2cm)); } } + IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType::execution_space) return Teuchos::null; } diff --git a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp index 9d8cbabe7d67..9c6e9e37392d 100644 --- a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp +++ b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp @@ -265,7 +265,7 @@ namespace Tpetra { Teuchos::RCP > getBlockCrsGraph(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize, bool use_LID) { - + TEUCHOS_FUNC_TIME_MONITOR("Tpetra::getBlockCrsGraph"); /* ASSUMPTIONS: @@ -313,7 +313,7 @@ namespace Tpetra { const offset_type bs2 = blockSize * blockSize; if (use_LID) { - TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillCrsGraph"); + TEUCHOS_FUNC_TIME_MONITOR("Tpetra::getBlockCrsGraph::LID"); auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); auto pointRowptr = pointLocalGraph.row_map; auto pointColind = pointLocalGraph.entries; @@ -347,7 +347,7 @@ namespace Tpetra { Kokkos::DefaultExecutionSpace().fence(); } else { - TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillCrsGraph"); + TEUCHOS_FUNC_TIME_MONITOR("Tpetra::getBlockCrsGraph::GID"); auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); auto pointRowptr = pointLocalGraph.row_map; auto pointColind = pointLocalGraph.entries; @@ -401,7 +401,7 @@ namespace Tpetra { Teuchos::RCP > convertToBlockCrsMatrix(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize, bool use_LID) { - + TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix"); /* ASSUMPTIONS: @@ -434,7 +434,7 @@ namespace Tpetra { auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize, use_LID); if (use_LID) { - TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix"); + TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::LID"); auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); auto pointRowptr = pointLocalGraph.row_map; auto pointColind = pointLocalGraph.entries; @@ -466,7 +466,7 @@ namespace Tpetra { Kokkos::DefaultExecutionSpace().fence(); } else { - TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix"); + TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::GID"); auto localMeshColMap = meshCrsGraph->getColMap()->getLocalMap(); auto localPointColMap = pointMatrix.getColMap()->getLocalMap(); From dd2c47502ec939e19d36f38149974996dafedb4f Mon Sep 17 00:00:00 2001 From: Kim Liegeois Date: Thu, 1 Aug 2024 03:30:43 -0600 Subject: [PATCH 10/37] Add a createMeshMap implementation that uses LID ordering that can run on device --- .../Tpetra_BlockCrsMatrix_Helpers_decl.hpp | 2 +- .../src/Tpetra_BlockCrsMatrix_Helpers_def.hpp | 80 ++++++++++++------- 2 files changed, 52 insertions(+), 30 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_decl.hpp b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_decl.hpp index b52ecab0193b..63c8c688e61e 100644 --- a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_decl.hpp @@ -113,7 +113,7 @@ namespace Tpetra { /// Important! It's assumed that point GIDs associated with a single mesh GID appear consecutively in pointMap. template Teuchos::RCP> - createMeshMap(LO const &blockSize, const Tpetra::Map &pointMap); + createMeshMap(LO const &blockSize, const Tpetra::Map &pointMap, bool use_local_ID=false); /// \brief Non-member constructor that creates a point CrsMatrix from an existing BlockCrsMatrix. /// diff --git a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp index 9c6e9e37392d..807707a3ea03 100644 --- a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp +++ b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp @@ -292,22 +292,27 @@ namespace Tpetra { using range_type = Kokkos::RangePolicy; const map_type &pointRowMap = *(pointMatrix.getRowMap()); - RCP meshRowMap = createMeshMap(blockSize, pointRowMap); + RCP meshRowMap, meshColMap, meshDomainMap, meshRangeMap; const map_type &pointColMap = *(pointMatrix.getColMap()); - RCP meshColMap = createMeshMap(blockSize, pointColMap); + const map_type &pointDomainMap = *(pointMatrix.getDomainMap()); + const map_type &pointRangeMap = *(pointMatrix.getRangeMap()); + + { + TEUCHOS_FUNC_TIME_MONITOR("Tpetra::getBlockCrsGraph::createMeshMaps"); + meshRowMap = createMeshMap(blockSize, pointRowMap, use_LID); + meshColMap = createMeshMap(blockSize, pointColMap, use_LID); + meshDomainMap = createMeshMap(blockSize, pointDomainMap, use_LID); + meshRangeMap = createMeshMap(blockSize, pointRangeMap, use_LID); + Kokkos::DefaultExecutionSpace().fence(); + } + if(meshColMap.is_null()) throw std::runtime_error("ERROR: Cannot create mesh colmap"); auto localMeshColMap = meshColMap->getLocalMap(); auto localPointColMap = pointColMap.getLocalMap(); auto localPointRowMap = pointRowMap.getLocalMap(); - const map_type &pointDomainMap = *(pointMatrix.getDomainMap()); - RCP meshDomainMap = createMeshMap(blockSize, pointDomainMap); - - const map_type &pointRangeMap = *(pointMatrix.getRangeMap()); - RCP meshRangeMap = createMeshMap(blockSize, pointRangeMap); - RCP meshCrsGraph; const offset_type bs2 = blockSize * blockSize; @@ -890,32 +895,49 @@ namespace Tpetra { template Teuchos::RCP > - createMeshMap (const LO& blockSize, const Tpetra::Map& pointMap) + createMeshMap (const LO& blockSize, const Tpetra::Map& pointMap, bool use_LID) { typedef Teuchos::OrdinalTraits TOT; typedef Tpetra::Map map_type; - //calculate mesh GIDs - Teuchos::ArrayView pointGids = pointMap.getLocalElementList(); - Teuchos::Array meshGids; - GO indexBase = pointMap.getIndexBase(); - - // Use hash table to track whether we've encountered this GID previously. This will happen - // when striding through the point DOFs in a block. It should not happen otherwise. - // I don't use sort/make unique because I don't want to change the ordering. - meshGids.reserve(pointGids.size()); - Tpetra::Details::HashTable hashTable(pointGids.size()); - for (int i=0; i; - Teuchos::RCP meshMap = Teuchos::rcp( new map_type(TOT::invalid(), meshGids(), 0, pointMap.getComm()) ); - return meshMap; + auto pointGlobalID = pointMap.getMyGlobalIndicesDevice(); + LO block_rows = pointGlobalID.extent(0)/blockSize; + Kokkos::View meshGlobalID("meshGlobalID", block_rows); + Kokkos::parallel_for("fillMeshMap",range_type(0,block_rows), KOKKOS_LAMBDA(const LO i) { + meshGlobalID(i) = pointGlobalID(i*blockSize)/blockSize; + }); + + Teuchos::RCP meshMap = Teuchos::rcp( new map_type(TOT::invalid(), meshGlobalID, 0, pointMap.getComm()) ); + return meshMap; + } + else { + //calculate mesh GIDs + Teuchos::ArrayView pointGids = pointMap.getLocalElementList(); + Teuchos::Array meshGids; + GO indexBase = pointMap.getIndexBase(); + + // Use hash table to track whether we've encountered this GID previously. This will happen + // when striding through the point DOFs in a block. It should not happen otherwise. + // I don't use sort/make unique because I don't want to change the ordering. + meshGids.reserve(pointGids.size()); + Tpetra::Details::HashTable hashTable(pointGids.size()); + for (int i=0; i meshMap = Teuchos::rcp( new map_type(TOT::invalid(), meshGids(), 0, pointMap.getComm()) ); + return meshMap; + } } @@ -1075,7 +1097,7 @@ namespace Tpetra { // Explicit instantiation macro for createMeshMap / createPointMap // #define TPETRA_CREATEMESHMAP_INSTANT(LO,GO,NODE) \ - template Teuchos::RCP > createMeshMap (const LO& blockSize, const Map& pointMap); \ + template Teuchos::RCP > createMeshMap (const LO& blockSize, const Map& pointMap, bool use_local_ID); \ template Teuchos::RCP > createPointMap (const LO& blockSize, const Map& blockMap); #endif // TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP From 4b90f56c1d8b1904ed01ddc5e063e9e41367f236 Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Thu, 8 Aug 2024 10:59:32 -0600 Subject: [PATCH 11/37] Wrap git commands with retry operator One of the internal GitLab sites has been a bit flaky recently (high load suspected to be the root cause). Wrap all of the git operations with an operator that retries them after 1m, then 5m to try and get builds to report instead of failing. Signed-off-by: Samuel E. Browne --- packages/framework/get_dependencies.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/packages/framework/get_dependencies.sh b/packages/framework/get_dependencies.sh index 36b4751728d1..74c26e4eb601 100755 --- a/packages/framework/get_dependencies.sh +++ b/packages/framework/get_dependencies.sh @@ -19,6 +19,11 @@ function tril_genconfig_assert_pwd_is_git_repo() { fi } +function retry_command() { + cmd=$1 + ${cmd} || { echo "Retrying after 1m..." ; sleep 60 ; ${cmd} ; } || { echo "Retrying after 5m..." ; sleep 300 ; ${cmd} ; } +} + function tril_genconfig_clone_or_update_repo() { git_url=$1 sub_dir=$2 @@ -33,10 +38,12 @@ function tril_genconfig_clone_or_update_repo() { echo "STATUS: ${sub_dir}: Fetching remote repo" cd ${sub_dir} tril_genconfig_assert_pwd_is_git_repo - git fetch + cmd="git fetch" + retry_command "${cmd}" else echo "STATUS: ${sub_dir}: Cloning from '${git_url}'" - git clone ${git_url} ${sub_dir} + cmd="git clone ${git_url} ${sub_dir}" + retry_command "${cmd}" cd ${sub_dir} fi @@ -52,7 +59,8 @@ function tril_genconfig_clone_or_update_repo() { if [[ "${has_submodules}" == "has-submodules" ]] ; then echo echo "STATUS: ${sub_dir}: Update submodules" - git submodule update --force --init --recursive + cmd="git submodule update --force --init --recursive" + retry_command "${cmd}" cd - > /dev/null elif [[ "${has_submodules}" != "" ]] ; then echo "ERROR: argument '${has_submodules}' not allowed! Only 'has-submodules' or ''!" From d562a04fc2417f4fa36fb7aed0412530d0fd2884 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Thu, 8 Aug 2024 17:13:19 -0600 Subject: [PATCH 12/37] Amesos2 : add new options "--check-solution" and "--rhs_filename" to SimpleSolve_File --- packages/amesos2/example/SimpleSolve_File.cpp | 57 +++++++++++++------ 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/packages/amesos2/example/SimpleSolve_File.cpp b/packages/amesos2/example/SimpleSolve_File.cpp index 7d7150c2c621..60760d65d31d 100644 --- a/packages/amesos2/example/SimpleSolve_File.cpp +++ b/packages/amesos2/example/SimpleSolve_File.cpp @@ -87,17 +87,21 @@ int main(int argc, char *argv[]) { bool printMatrix = false; bool printSolution = false; + bool checkSolution = false; bool printTiming = false; bool allprint = false; bool verbose = (myRank==0); - std::string filename("arc130.mtx"); + std::string mat_filename("arc130.mtx"); + std::string rhs_filename(""); std::string solvername("Superlu"); Teuchos::CommandLineProcessor cmdp(false,true); cmdp.setOption("verbose","quiet",&verbose,"Print messages and results."); - cmdp.setOption("filename",&filename,"Filename for Matrix-Market test matrix."); + cmdp.setOption("filename",&mat_filename,"Filename for Matrix-Market test matrix."); + cmdp.setOption("rhs_filename",&rhs_filename,"Filename for Matrix-Market right-hand-side."); cmdp.setOption("solvername",&solvername,"Name of solver."); cmdp.setOption("print-matrix","no-print-matrix",&printMatrix,"Print the full matrix after reading it."); cmdp.setOption("print-solution","no-print-solution",&printSolution,"Print solution vector after solve."); + cmdp.setOption("check-solution","no-check-solution",&checkSolution,"Check solution vector after solve."); cmdp.setOption("print-timing","no-print-timing",&printTiming,"Print solver timing statistics"); cmdp.setOption("all-print","root-print",&allprint,"All processors print to out"); if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { @@ -112,7 +116,7 @@ int main(int argc, char *argv[]) { const size_t numVectors = 1; - RCP A = Tpetra::MatrixMarket::Reader::readSparseFile(filename, comm); + RCP A = Tpetra::MatrixMarket::Reader::readSparseFile(mat_filename, comm); if( printMatrix ){ A->describe(*fos, Teuchos::VERB_EXTREME); } @@ -134,20 +138,23 @@ int main(int argc, char *argv[]) { RCP X = rcp(new MV(dmnmap,numVectors)); X->randomize(); - /* Create B - * - * Use RHS: - * - * [[10] - * [10] - * [10] - * [10] - * [10] - * [10]] - */ + // Create B RCP B = rcp(new MV(rngmap,numVectors)); - B->putScalar(10); - + if (rhs_filename == "") { + /* + * Use RHS: + * + * [[10] + * [10] + * [10] + * [10] + * [10] + * [10]] + */ + B->putScalar(10); + } else { + B = Tpetra::MatrixMarket::Reader::readDenseFile (rhs_filename, comm, rngmap); + } // Constructor from Factory RCP > solver; @@ -176,6 +183,24 @@ int main(int argc, char *argv[]) { } } + if( checkSolution ){ + const Scalar one = Teuchos::ScalarTraits::one (); + RCP R = rcp(new MV(rngmap,numVectors)); + A->apply(*X, *R); + R->update(one, *B, -one); + for (size_t j = 0; j < numVectors; ++j) { + auto Rj = R->getVector(j); + auto Bj = B->getVector(j); + auto r_norm = Rj->norm2(); + auto b_norm = Bj->norm2(); + if (myRank == 0) { + *fos << "Relative Residual norm = " << r_norm << " / " << b_norm << " = " + << r_norm / b_norm << std::endl; + } + } + if (myRank == 0) *fos << std::endl; + } + if( printTiming ){ // Print some timing statistics solver->printTiming(*fos); From f8ff2ad41462ea8af664241df5044928799e5984 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Wed, 7 Aug 2024 16:39:21 -0600 Subject: [PATCH 13/37] stk: modify test to prevent allocation in parallel region modify NgpMeshTest.volatileFastSharedCommMap to prevent allocation in a parallel region, which can result in deadlock with kokkos version 4.4 address issue #13328 Co-authored-by: Christian Trott Signed-off-by: Nathan Ellingwood --- .../stk_mesh/ngp/NgpMeshTest.cpp | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp index 5ab49ea6f829..e03bd5360f07 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp @@ -151,16 +151,20 @@ TEST(StkVectorGpuTest, gpu_runs) void check_volatile_fast_shared_comm_map_values_on_device(const stk::mesh::NgpMesh & ngpMesh, int proc, const stk::mesh::DeviceCommMapIndices & deviceCommMapIndicesGold) { - Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), - KOKKOS_LAMBDA(size_t i) - { - stk::mesh::DeviceCommMapIndices deviceCommMapIndices = ngpMesh.volatile_fast_shared_comm_map(stk::topology::NODE_RANK, proc); - - for (size_t entry = 0; entry < deviceCommMapIndices.size(); ++entry) { - NGP_EXPECT_EQ(deviceCommMapIndicesGold[entry].bucket_id, deviceCommMapIndices[entry].bucket_id); - NGP_EXPECT_EQ(deviceCommMapIndicesGold[entry].bucket_ord, deviceCommMapIndices[entry].bucket_ord); - } - }); + auto test_lambda = KOKKOS_LAMBDA(size_t i) + { + stk::mesh::DeviceCommMapIndices deviceCommMapIndices = ngpMesh.volatile_fast_shared_comm_map(stk::topology::NODE_RANK, proc); + + for (size_t entry = 0; entry < deviceCommMapIndices.size(); ++entry) { + NGP_EXPECT_EQ(deviceCommMapIndicesGold[entry].bucket_id, deviceCommMapIndices[entry].bucket_id); + NGP_EXPECT_EQ(deviceCommMapIndicesGold[entry].bucket_ord, deviceCommMapIndices[entry].bucket_ord); + } + }; + if constexpr (std::is_same_v) { + test_lambda(0); + } else { + Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), test_lambda); + } } using HostCommMapIndices = Kokkos::View; From a5eb4d4e1436e5594ce73ffe62e1cb0f460c99b0 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Thu, 8 Aug 2024 15:37:54 -0600 Subject: [PATCH 14/37] Snapshot of kokkos.git from commit 948c1346301ff9b42b136a8c72eed91c839e3105 From repository at git@github.com:kokkos/kokkos.git At commit: commit 948c1346301ff9b42b136a8c72eed91c839e3105 Author: Nathan Ellingwood Date: Thu Aug 8 14:54:40 2024 -0600 update master_history.txt for 4.4.00 Signed-off-by: Nathan Ellingwood --- packages/kokkos/.jenkins | 25 +- packages/kokkos/.jenkins_nightly | 54 +- packages/kokkos/.olcf-gitlab-ci.yml | 12 + packages/kokkos/CHANGELOG.md | 78 +- packages/kokkos/CITATION.cff | 65 + packages/kokkos/CMakeLists.txt | 4 +- packages/kokkos/Makefile.kokkos | 54 +- packages/kokkos/Makefile.targets | 2 +- packages/kokkos/README.md | 63 +- .../src/sorting/impl/Kokkos_SortByKeyImpl.hpp | 55 +- .../src/std_algorithms/Kokkos_ForEach.hpp | 56 +- .../impl/Kokkos_AdjacentDifference.hpp | 10 + .../impl/Kokkos_Constraints.hpp | 61 +- .../src/std_algorithms/impl/Kokkos_CopyIf.hpp | 5 +- .../impl/Kokkos_ForEachForEachN.hpp | 20 +- .../impl/Kokkos_RandomAccessIterator.hpp | 31 + .../std_algorithms/impl/Kokkos_UniqueCopy.hpp | 5 +- .../unit_tests/TestRandomAccessIterator.cpp | 38 + .../algorithms/unit_tests/TestSortByKey.hpp | 14 +- .../TestStdAlgorithmsConstraints.cpp | 109 + .../TestStdAlgorithmsTeamExclusiveScan.cpp | 6 +- .../TestStdAlgorithmsTeamIsSorted.cpp | 4 +- .../TestStdAlgorithmsTeamIsSortedUntil.cpp | 4 +- .../TestStdAlgorithmsTeamMaxElement.cpp | 4 +- .../TestStdAlgorithmsTeamMinElement.cpp | 4 +- .../TestStdAlgorithmsTeamMinMaxElement.cpp | 4 +- .../TestStdAlgorithmsTeamReduce.cpp | 2 +- ...tdAlgorithmsTeamTransformExclusiveScan.cpp | 2 +- ...tdAlgorithmsTeamTransformInclusiveScan.cpp | 2 +- .../TestStdAlgorithmsTeamTransformReduce.cpp | 2 +- packages/kokkos/appveyor.yml | 2 +- packages/kokkos/benchmarks/CMakeLists.txt | 2 +- .../view_copy_constructor/CMakeLists.txt | 4 + .../benchmarks/view_copy_constructor/Makefile | 46 + .../view_copy_constructor.cpp | 310 +++ packages/kokkos/bin/nvcc_wrapper | 2 +- packages/kokkos/cmake/Dependencies.cmake | 1 - .../kokkos/cmake/KokkosConfigCommon.cmake.in | 7 +- packages/kokkos/cmake/KokkosCore_config.h.in | 3 + .../kokkos/cmake/Modules/FindTPLCUDA.cmake | 57 +- packages/kokkos/cmake/deps/CUDA.cmake | 1 - packages/kokkos/cmake/deps/CUSPARSE.cmake | 26 - packages/kokkos/cmake/fake_tribits.cmake | 8 - packages/kokkos/cmake/kokkos_arch.cmake | 80 +- .../kokkos/cmake/kokkos_compiler_id.cmake | 83 +- .../kokkos/cmake/kokkos_enable_options.cmake | 6 +- packages/kokkos/cmake/kokkos_functions.cmake | 7 +- .../kokkos/cmake/kokkos_test_cxx_std.cmake | 8 +- packages/kokkos/cmake/kokkos_tpls.cmake | 10 +- packages/kokkos/cmake/kokkos_tribits.cmake | 47 +- .../kokkos/cmake/tpls/FindTPLCUSPARSE.cmake | 26 - .../kokkos/containers/src/Kokkos_DualView.hpp | 58 +- .../containers/src/Kokkos_DynRankView.hpp | 410 ++-- .../containers/src/Kokkos_DynamicView.hpp | 333 ++-- .../containers/src/Kokkos_OffsetView.hpp | 263 ++- .../containers/src/Kokkos_UnorderedMap.hpp | 114 +- .../containers/unit_tests/TestDualView.hpp | 140 +- .../unit_tests/TestUnorderedMap.hpp | 7 +- .../containers/unit_tests/TestVector.hpp | 4 +- .../kokkos/core/perf_test/test_atomic.cpp | 3 +- .../perf_test/test_atomic_minmax_simple.cpp | 8 +- packages/kokkos/core/src/Cuda/Kokkos_Cuda.hpp | 15 +- .../kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp | 67 +- .../core/src/Cuda/Kokkos_Cuda_Error.hpp | 47 - .../core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp | 9 +- .../core/src/Cuda/Kokkos_Cuda_Instance.cpp | 8 + .../core/src/Cuda/Kokkos_Cuda_Instance.hpp | 30 +- .../src/Cuda/Kokkos_Cuda_Parallel_Team.hpp | 38 +- .../core/src/Cuda/Kokkos_Cuda_abort.hpp | 25 +- packages/kokkos/core/src/HIP/Kokkos_HIP.hpp | 15 +- .../kokkos/core/src/HIP/Kokkos_HIP_Error.hpp | 37 - .../core/src/HIP/Kokkos_HIP_Graph_Impl.hpp | 4 +- .../core/src/HIP/Kokkos_HIP_Instance.cpp | 16 + .../core/src/HIP/Kokkos_HIP_Instance.hpp | 26 +- .../HIP/Kokkos_HIP_ParallelReduce_MDRange.hpp | 1 + .../HIP/Kokkos_HIP_ParallelReduce_Team.hpp | 3 +- .../kokkos/core/src/HIP/Kokkos_HIP_Space.cpp | 17 +- packages/kokkos/core/src/HPX/Kokkos_HPX.cpp | 4 +- packages/kokkos/core/src/HPX/Kokkos_HPX.hpp | 18 +- packages/kokkos/core/src/Kokkos_Array.hpp | 102 +- .../Kokkos_Atomics_Desul_Volatile_Wrapper.hpp | 1 - .../core/src/Kokkos_Atomics_Desul_Wrapper.hpp | 2 - packages/kokkos/core/src/Kokkos_Complex.hpp | 76 + packages/kokkos/core/src/Kokkos_CopyViews.hpp | 527 ++--- .../kokkos/core/src/Kokkos_ExecPolicy.hpp | 81 +- packages/kokkos/core/src/Kokkos_Extents.hpp | 68 +- packages/kokkos/core/src/Kokkos_Graph.hpp | 3 + packages/kokkos/core/src/Kokkos_HostSpace.hpp | 2 - packages/kokkos/core/src/Kokkos_Layout.hpp | 120 +- packages/kokkos/core/src/Kokkos_Macros.hpp | 53 + .../core/src/Kokkos_MathematicalFunctions.hpp | 8 + packages/kokkos/core/src/Kokkos_Pair.hpp | 22 +- packages/kokkos/core/src/Kokkos_Parallel.hpp | 33 +- .../core/src/Kokkos_Parallel_Reduce.hpp | 77 +- packages/kokkos/core/src/Kokkos_View.hpp | 262 +-- .../core/src/OpenACC/Kokkos_OpenACCSpace.cpp | 11 +- .../Kokkos_OpenACC_ParallelFor_Team.hpp | 8 +- .../kokkos/core/src/OpenMP/Kokkos_OpenMP.cpp | 21 +- .../kokkos/core/src/OpenMP/Kokkos_OpenMP.hpp | 18 +- .../src/OpenMP/Kokkos_OpenMP_Instance.cpp | 38 +- .../src/OpenMP/Kokkos_OpenMP_Instance.hpp | 22 +- .../src/OpenMP/Kokkos_OpenMP_Parallel_For.hpp | 12 +- .../OpenMP/Kokkos_OpenMP_Parallel_Reduce.hpp | 20 +- .../OpenMP/Kokkos_OpenMP_Parallel_Scan.hpp | 10 +- .../core/src/OpenMP/Kokkos_OpenMP_Task.cpp | 13 +- .../core/src/OpenMP/Kokkos_OpenMP_Task.hpp | 11 +- .../src/OpenMPTarget/Kokkos_OpenMPTarget.hpp | 3 +- .../OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp | 6 +- .../OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp | 40 - .../Kokkos_OpenMPTarget_Instance.cpp | 1 - .../Kokkos_OpenMPTarget_MDRangePolicy.hpp | 5 + .../Kokkos_OpenMPTarget_Parallel.hpp | 4 - ...okkos_OpenMPTarget_ParallelFor_MDRange.hpp | 383 ++++ ...s_OpenMPTarget_ParallelReduce_MDRange.hpp} | 631 +++--- ...kkos_OpenMPTarget_ParallelReduce_Range.hpp | 9 +- ...okkos_OpenMPTarget_ParallelReduce_Team.hpp | 10 +- ...Kokkos_OpenMPTarget_ParallelScan_Range.hpp | 8 + packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp | 25 + .../src/SYCL/Kokkos_SYCL_GraphNodeKernel.hpp | 157 ++ .../src/SYCL/Kokkos_SYCL_GraphNode_Impl.hpp | 56 + .../core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp | 174 ++ .../core/src/SYCL/Kokkos_SYCL_Instance.cpp | 34 +- .../core/src/SYCL/Kokkos_SYCL_Instance.hpp | 34 +- .../SYCL/Kokkos_SYCL_ParallelFor_MDRange.hpp | 26 +- .../SYCL/Kokkos_SYCL_ParallelFor_Range.hpp | 61 +- .../src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp | 73 +- .../Kokkos_SYCL_ParallelReduce_MDRange.hpp | 64 +- .../SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp | 67 +- .../SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp | 134 +- .../SYCL/Kokkos_SYCL_ParallelScan_Range.hpp | 223 ++- .../core/src/SYCL/Kokkos_SYCL_Space.cpp | 89 +- .../kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp | 111 +- .../SYCL/Kokkos_SYCL_WorkgroupReduction.hpp | 103 +- .../kokkos/core/src/Serial/Kokkos_Serial.cpp | 41 +- .../kokkos/core/src/Serial/Kokkos_Serial.hpp | 29 +- .../Serial/Kokkos_Serial_Parallel_MDRange.hpp | 17 +- .../Serial/Kokkos_Serial_Parallel_Range.hpp | 28 +- .../Serial/Kokkos_Serial_Parallel_Team.hpp | 16 +- .../core/src/Threads/Kokkos_Threads_Team.hpp | 7 +- .../kokkos/core/src/View/Kokkos_ViewAlloc.hpp | 318 +++ .../View/MDSpan/Kokkos_MDSpan_Accessor.hpp | 220 +++ .../src/View/MDSpan/Kokkos_MDSpan_Extents.hpp | 19 +- .../src/View/MDSpan/Kokkos_MDSpan_Layout.hpp | 156 ++ .../core/src/decl/Kokkos_Declare_SYCL.hpp | 3 + packages/kokkos/core/src/impl/Kokkos_Core.cpp | 34 +- .../src/impl/Kokkos_Default_Graph_Impl.hpp | 7 +- .../Kokkos_DesulAtomicsConfig.hpp} | 12 +- .../kokkos/core/src/impl/Kokkos_Error.cpp | 121 +- .../kokkos/core/src/impl/Kokkos_Error.hpp | 107 +- .../kokkos/core/src/impl/Kokkos_HostSpace.cpp | 23 +- .../core/src/impl/Kokkos_HostThreadTeam.hpp | 23 +- .../kokkos/core/src/impl/Kokkos_Profiling.cpp | 78 - .../kokkos/core/src/impl/Kokkos_Profiling.hpp | 59 +- .../src/impl/Kokkos_Profiling_C_Interface.h | 8 + .../core/src/impl/Kokkos_SharedAlloc.cpp | 35 - .../core/src/impl/Kokkos_SharedAlloc.hpp | 86 +- .../kokkos/core/src/impl/Kokkos_ViewArray.hpp | 622 ------ .../core/src/impl/Kokkos_ViewLayoutTiled.hpp | 1425 ------------- .../core/src/impl/Kokkos_ViewMapping.hpp | 544 ++--- .../core/src/setup/Kokkos_Setup_Cuda.hpp | 2 + .../core/src/setup/Kokkos_Setup_HIP.hpp | 2 + .../core/src/setup/Kokkos_Setup_SYCL.hpp | 17 + packages/kokkos/core/unit_test/CMakeLists.txt | 125 +- packages/kokkos/core/unit_test/Makefile | 22 +- .../kokkos/core/unit_test/TestAggregate.hpp | 108 - packages/kokkos/core/unit_test/TestArray.cpp | 193 +- .../kokkos/core/unit_test/TestArrayOps.hpp | 29 + .../core/unit_test/TestAtomicOperations.hpp | 8 +- .../unit_test/TestBitManipulationBuiltins.hpp | 6 - .../kokkos/core/unit_test/TestComplex.hpp | 166 +- .../unit_test/TestExecSpaceThreadSafety.hpp | 327 +++ .../core/unit_test/TestExecutionSpace.hpp | 56 + packages/kokkos/core/unit_test/TestGraph.hpp | 71 +- .../core/unit_test/TestLocalDeepCopy.hpp | 28 +- packages/kokkos/core/unit_test/TestMDSpan.hpp | 8 +- .../unit_test/TestMDSpanAtomicAccessor.hpp | 112 ++ .../core/unit_test/TestMDSpanConversion.hpp | 507 +++++ .../unit_test/TestMathematicalConstants.hpp | 3 +- .../unit_test/TestMathematicalFunctions.hpp | 80 +- .../kokkos/core/unit_test/TestMultiGPU.hpp | 184 ++ .../core/unit_test/TestNestedReducerCTAD.cpp | 246 +++ .../core/unit_test/TestNumericTraits.hpp | 105 +- packages/kokkos/core/unit_test/TestOther.hpp | 5 - .../unit_test/TestRangePolicyConstructors.hpp | 40 + .../kokkos/core/unit_test/TestRealloc.hpp | 13 + packages/kokkos/core/unit_test/TestResize.hpp | 13 + .../core/unit_test/TestSpaceAwareAccessor.hpp | 156 ++ .../TestSpaceAwareAccessorAccessViolation.hpp | 128 ++ .../unit_test/TestTeamMDRangePolicyCTAD.cpp | 199 ++ .../core/unit_test/TestTeamPolicyCTAD.cpp | 135 ++ .../kokkos/core/unit_test/TestTeamVector.hpp | 7 +- .../core/unit_test/TestTeamVectorRange.hpp | 6 - .../kokkos/core/unit_test/TestViewAPI.hpp | 98 +- .../kokkos/core/unit_test/TestViewAPI_c.hpp | 1 + .../kokkos/core/unit_test/TestViewAPI_d.hpp | 18 - .../core/unit_test/TestViewBadAlloc.hpp | 86 + .../kokkos/core/unit_test/TestViewCopy_c.hpp | 434 ++++ .../core/unit_test/TestViewLayoutTiled.hpp | 1756 ----------------- .../kokkos/core/unit_test/TestViewOfViews.hpp | 75 + .../kokkos/core/unit_test/TestViewSubview.hpp | 5 +- .../core/unit_test/UnitTest_ScopeGuard.cpp | 155 ++ .../category_files/TestHPX_Category.hpp | 1 + .../category_files/TestOpenACC_Category.hpp | 1 + .../TestOpenMPTarget_Category.hpp | 1 + .../category_files/TestSYCL_Category.hpp | 1 + .../category_files/TestThreads_Category.hpp | 1 + .../core/unit_test/cuda/TestCuda_Graph.cpp | 18 - .../cuda/TestCuda_InterOp_StreamsMultiGPU.cpp | 162 +- .../headers_self_contained/CMakeLists.txt | 3 +- .../hip/TestHIP_Memory_Requirements.cpp | 3 - .../incremental/Test01_execspace.hpp | 2 + .../unit_test/openmp/TestOpenMP_Graph.cpp | 18 - .../unit_test/serial/TestSerial_Graph.cpp | 18 - .../sycl/TestSYCL_InterOp_StreamsMultiGPU.cpp | 64 + .../view/TestExtentsDatatypeConversion.cpp | 11 +- packages/kokkos/example/README | 4 +- .../build_cmake_installed/CMakeLists.txt | 1 + .../tutorial/01_hello_world/hello_world.cpp | 19 +- .../hello_world_lambda.cpp | 14 +- .../simple_reduce_lambda.cpp | 5 +- packages/kokkos/master_history.txt | 1 + .../scripts/docker/Dockerfile.openmptarget | 5 +- .../kokkos/scripts/docker/Dockerfile.sycl | 17 +- packages/kokkos/simd/src/Kokkos_SIMD.hpp | 9 +- packages/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp | 1011 +++++++++- .../kokkos/simd/src/Kokkos_SIMD_AVX512.hpp | 1234 +++++++++++- packages/kokkos/simd/src/Kokkos_SIMD_NEON.hpp | 835 +++++++- .../kokkos/simd/unit_tests/CMakeLists.txt | 12 +- .../unit_tests/include/SIMDTesting_Ops.hpp | 2 + .../include/SIMDTesting_Utilities.hpp | 12 +- .../unit_tests/include/TestSIMD_Condition.hpp | 66 +- .../include/TestSIMD_Conversions.hpp | 142 +- .../include/TestSIMD_GeneratorCtors.hpp | 124 +- .../unit_tests/include/TestSIMD_MaskOps.hpp | 80 +- .../unit_tests/include/TestSIMD_MathOps.hpp | 89 +- .../include/TestSIMD_Reductions.hpp | 40 +- .../unit_tests/include/TestSIMD_ShiftOps.hpp | 86 +- .../include/TestSIMD_WhereExpressions.hpp | 158 +- .../desul/include/desul/atomics/Adapt_HIP.hpp | 77 + .../include/desul/atomics/Atomic_Ref.hpp | 554 +----- .../desul/atomics/Compare_Exchange_HIP.hpp | 145 +- .../include/desul/atomics/Fetch_Op_CUDA.hpp | 54 +- .../desul/atomics/Fetch_Op_Generic.hpp | 92 +- .../include/desul/atomics/Fetch_Op_HIP.hpp | 167 +- .../atomics/Operator_Function_Objects.hpp | 34 +- packages/kokkos/tpls/gtest/gtest/gtest.h | 2 +- .../experimental/__p0009_bits/config.hpp | 2 +- .../experimental/__p0009_bits/extents.hpp | 95 +- .../experimental/__p0009_bits/layout_left.hpp | 26 +- .../__p0009_bits/layout_right.hpp | 25 +- .../__p0009_bits/layout_stride.hpp | 185 +- .../experimental/__p0009_bits/macros.hpp | 70 +- .../experimental/__p0009_bits/mdspan.hpp | 4 +- .../experimental/__p0009_bits/utility.hpp | 72 + .../experimental/__p2389_bits/dims.hpp} | 14 +- .../__p2630_bits/submdspan_mapping.hpp | 684 +++++-- .../__p2642_bits/layout_padded.hpp | 536 ++--- .../__p2642_bits/layout_padded_fwd.hpp | 62 +- .../tpls/mdspan/include/mdspan/mdspan.hpp | 1 + 259 files changed, 14381 insertions(+), 9966 deletions(-) create mode 100644 packages/kokkos/.olcf-gitlab-ci.yml create mode 100644 packages/kokkos/CITATION.cff create mode 100644 packages/kokkos/benchmarks/view_copy_constructor/CMakeLists.txt create mode 100644 packages/kokkos/benchmarks/view_copy_constructor/Makefile create mode 100644 packages/kokkos/benchmarks/view_copy_constructor/view_copy_constructor.cpp delete mode 100644 packages/kokkos/cmake/deps/CUSPARSE.cmake delete mode 100644 packages/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake create mode 100644 packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelFor_MDRange.hpp rename packages/kokkos/core/src/OpenMPTarget/{Kokkos_OpenMPTarget_Parallel_MDRange.hpp => Kokkos_OpenMPTarget_ParallelReduce_MDRange.hpp} (61%) create mode 100644 packages/kokkos/core/src/SYCL/Kokkos_SYCL_GraphNodeKernel.hpp create mode 100644 packages/kokkos/core/src/SYCL/Kokkos_SYCL_GraphNode_Impl.hpp create mode 100644 packages/kokkos/core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp create mode 100644 packages/kokkos/core/src/View/Kokkos_ViewAlloc.hpp create mode 100644 packages/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Accessor.hpp create mode 100644 packages/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Layout.hpp rename packages/kokkos/core/src/{Kokkos_Atomics_Desul_Config.hpp => impl/Kokkos_DesulAtomicsConfig.hpp} (72%) delete mode 100644 packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp delete mode 100644 packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp delete mode 100644 packages/kokkos/core/unit_test/TestAggregate.hpp create mode 100644 packages/kokkos/core/unit_test/TestExecSpaceThreadSafety.hpp create mode 100644 packages/kokkos/core/unit_test/TestMDSpanAtomicAccessor.hpp create mode 100644 packages/kokkos/core/unit_test/TestMDSpanConversion.hpp create mode 100644 packages/kokkos/core/unit_test/TestMultiGPU.hpp create mode 100644 packages/kokkos/core/unit_test/TestNestedReducerCTAD.cpp create mode 100644 packages/kokkos/core/unit_test/TestSpaceAwareAccessor.hpp create mode 100644 packages/kokkos/core/unit_test/TestSpaceAwareAccessorAccessViolation.hpp create mode 100644 packages/kokkos/core/unit_test/TestTeamMDRangePolicyCTAD.cpp create mode 100644 packages/kokkos/core/unit_test/TestTeamPolicyCTAD.cpp create mode 100644 packages/kokkos/core/unit_test/TestViewBadAlloc.hpp create mode 100644 packages/kokkos/core/unit_test/TestViewCopy_c.hpp delete mode 100644 packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp create mode 100644 packages/kokkos/core/unit_test/TestViewOfViews.hpp create mode 100644 packages/kokkos/core/unit_test/UnitTest_ScopeGuard.cpp delete mode 100644 packages/kokkos/core/unit_test/cuda/TestCuda_Graph.cpp delete mode 100644 packages/kokkos/core/unit_test/openmp/TestOpenMP_Graph.cpp delete mode 100644 packages/kokkos/core/unit_test/serial/TestSerial_Graph.cpp create mode 100644 packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_StreamsMultiGPU.cpp create mode 100644 packages/kokkos/tpls/desul/include/desul/atomics/Adapt_HIP.hpp create mode 100644 packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/utility.hpp rename packages/kokkos/{core/unit_test/hip/TestHIP_Graph.cpp => tpls/mdspan/include/experimental/__p2389_bits/dims.hpp} (59%) diff --git a/packages/kokkos/.jenkins b/packages/kokkos/.jenkins index ae3bffd92d72..0393ff06fb5e 100644 --- a/packages/kokkos/.jenkins +++ b/packages/kokkos/.jenkins @@ -58,7 +58,7 @@ pipeline { make -j8 && ctest --verbose''' } } - stage('CUDA-12.2-NVHPC') { + stage('CUDA-12.2-NVHPC-AS-HOST-COMPILER') { agent { dockerfile { filename 'Dockerfile.nvhpc' @@ -82,7 +82,7 @@ pipeline { -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_CXX_COMPILER=nvc++ \ -DCMAKE_CXX_STANDARD=17 \ - -DCMAKE_CXX_FLAGS="--diag_suppress=implicit_return_from_non_void_function,no_device_stack" \ + -DCMAKE_CXX_FLAGS="--diag_suppress=implicit_return_from_non_void_function" \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \ @@ -90,8 +90,6 @@ pipeline { -DKokkos_ENABLE_CUDA=ON \ -DKokkos_ENABLE_CUDA_LAMBDA=ON \ -DKokkos_ENABLE_OPENMP=ON \ - -DKokkos_ENABLE_IMPL_MDSPAN=ON \ - -DKokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER=ON \ .. && \ make -j8 && ctest --verbose''' } @@ -113,6 +111,7 @@ pipeline { -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER=clang++ \ -DCMAKE_CXX_FLAGS="-fsycl-device-code-split=per_kernel -Wno-deprecated-declarations -Werror -Wno-gnu-zero-variadic-macro-arguments -Wno-unknown-cuda-version -Wno-sycl-target" \ + -DCMAKE_PREFIX_PATH="$ONE_DPL_DIR" \ -DKOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED=0 \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ARCH_AMPERE80=ON \ @@ -166,6 +165,7 @@ pipeline { -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_HIP=ON \ -DKokkos_ENABLE_OPENMP=ON \ + -DKokkos_ENABLE_IMPL_MDSPAN=OFF \ -DKokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS=ON \ .. && \ make -j8 && ctest --verbose''' @@ -181,7 +181,7 @@ pipeline { dockerfile { filename 'Dockerfile.hipcc' dir 'scripts/docker' - additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.6-complete' + additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-22.04:5.6-complete' label 'rocm-docker' args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES' } @@ -190,6 +190,7 @@ pipeline { sh 'ccache --zero-stats' sh '''rm -rf build && mkdir -p build && cd build && \ cmake \ + -DBUILD_SHARED_LIBS=ON \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_CXX_COMPILER=hipcc \ -DCMAKE_CXX_FLAGS="-Werror -Wno-unused-command-line-argument" \ @@ -345,7 +346,7 @@ pipeline { sh '''rm -rf build && mkdir -p build && cd build && \ ../gnu_generate_makefile.bash \ --with-options=compiler_warnings \ - --cxxflags="-Werror" \ + --cxxflags="-Werror -Werror all-warnings -Xcudafe --diag_suppress=20208" \ --cxxstandard=c++17 \ --with-cuda \ --with-cuda-options=enable_lambda \ @@ -365,7 +366,7 @@ pipeline { filename 'Dockerfile.nvcc' dir 'scripts/docker' additionalBuildArgs '--build-arg BASE=nvidia/cuda:11.0.3-devel-ubuntu18.04 --build-arg ADDITIONAL_PACKAGES="g++-8 gfortran clang" --build-arg CMAKE_VERSION=3.17.3' - label 'nvidia-docker' + label 'nvidia-docker && (volta || ampere)' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } @@ -396,7 +397,6 @@ pipeline { -DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ - -DKokkos_ENABLE_IMPL_MDSPAN=ON \ -DCMAKE_INSTALL_PREFIX=${PWD}/../install \ .. && \ make -j8 install && \ @@ -437,7 +437,7 @@ pipeline { filename 'Dockerfile.nvcc' dir 'scripts/docker' additionalBuildArgs '--build-arg BASE=nvidia/cuda:11.6.2-devel-ubuntu20.04' - label 'nvidia-docker' + label 'nvidia-docker && (volta || ampere)' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } @@ -445,10 +445,11 @@ pipeline { sh 'ccache --zero-stats' sh '''rm -rf build && mkdir -p build && cd build && \ cmake \ + -DBUILD_SHARED_LIBS=ON \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER=$WORKSPACE/bin/nvcc_wrapper \ - -DCMAKE_CXX_FLAGS=-Werror \ + -DCMAKE_CXX_FLAGS="-Werror -Werror all-warnings -Xcudafe --diag_suppress=20208" \ -DCMAKE_CXX_STANDARD=17 \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ @@ -460,7 +461,7 @@ pipeline { -DKokkos_ENABLE_CUDA=ON \ -DKokkos_ENABLE_CUDA_LAMBDA=ON \ -DKokkos_ENABLE_LIBDL=OFF \ - -DKokkos_ENABLE_IMPL_MDSPAN=ON \ + -DKokkos_ENABLE_IMPL_MDSPAN=OFF \ -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF \ .. && \ make -j8 && ctest --verbose && \ @@ -497,7 +498,7 @@ pipeline { -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ - -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ + -DKokkos_ENABLE_DEPRECATION_WARNINGS=ON \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_OPENMP=ON \ diff --git a/packages/kokkos/.jenkins_nightly b/packages/kokkos/.jenkins_nightly index 5d5858178913..8dd02e9f028a 100644 --- a/packages/kokkos/.jenkins_nightly +++ b/packages/kokkos/.jenkins_nightly @@ -70,38 +70,74 @@ pipeline { ''' } } - stage('GCC-13') { + stage('GCC-14') { agent { docker { - image 'gcc:13.1' + image 'gcc:14.1' label 'docker' } } steps { sh ''' - DEBIAN_FRONTEND=noninteractive && \ - apt-get update && apt-get upgrade -y && apt-get install -y \ - cmake \ - && \ - apt-get clean && rm -rf /var/lib/apt/lists/* + wget https://github.com/Kitware/CMake/releases/download/v3.30.0/cmake-3.30.0-linux-x86_64.sh && \ + chmod +x cmake-3.30.0-linux-x86_64.sh && ./cmake-3.30.0-linux-x86_64.sh --skip-license --prefix=/usr mkdir -p build && cd build && \ cmake \ -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CXX_STANDARD=23 \ + -DCMAKE_CXX_STANDARD=26 \ -DCMAKE_CXX_FLAGS=-Werror \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_EXAMPLES=ON \ -DKokkos_ENABLE_TESTS=ON \ - -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ + -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ + -DKokkos_ENABLE_DEPRECATION_WARNINGS=ON \ -DKokkos_ENABLE_SERIAL=ON \ .. && \ make -j8 && ctest --verbose ''' } } + stage('HIP-ROCM-6.1') { + agent { + dockerfile { + filename 'Dockerfile.hipcc' + dir 'scripts/docker' + additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-22.04:6.1.2-complete' + label 'rocm-docker && AMD_Radeon_Instinct_MI210' + args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES' + } + } + environment { + // FIXME Test returns a wrong value + GTEST_FILTER = '-hip_hostpinned.view_allocation_large_rank' + } + steps { + sh 'ccache --zero-stats' + sh '''rm -rf build && mkdir -p build && cd build && \ + cmake \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_CXX_COMPILER=hipcc \ + -DCMAKE_CXX_FLAGS="-Werror -Wno-unused-command-line-argument" \ + -DCMAKE_CXX_STANDARD=20 \ + -DKokkos_ARCH_NATIVE=ON \ + -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ + -DKokkos_ENABLE_DEPRECATION_WARNINGS=ON \ + -DKokkos_ENABLE_TESTS=ON \ + -DKokkos_ENABLE_BENCHMARKS=ON \ + -DKokkos_ENABLE_HIP=ON \ + .. && \ + make -j8 && ctest --verbose''' + } + post { + always { + sh 'ccache --show-stats' + } + } + } } } } diff --git a/packages/kokkos/.olcf-gitlab-ci.yml b/packages/kokkos/.olcf-gitlab-ci.yml new file mode 100644 index 000000000000..4e737cc536ec --- /dev/null +++ b/packages/kokkos/.olcf-gitlab-ci.yml @@ -0,0 +1,12 @@ +test: + stage: test + tags: [frontier, shell] + id_tokens: + OLCF_ID_TOKEN: + aud: https://code.olcf.ornl.gov + script: + - module load rocm/6.0 + - cmake -B build -DCMAKE_CXX_COMPILER=hipcc -DKokkos_ENABLE_HIP=ON -DKokkos_ENABLE_TESTS=ON + - cmake --build build -j48 + - cd build + - ctest -E Kokkos_CoreUnitTest_DeviceAndThreads -V diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md index 4fbc9002973e..78225f9e6c27 100644 --- a/packages/kokkos/CHANGELOG.md +++ b/packages/kokkos/CHANGELOG.md @@ -1,12 +1,88 @@ # CHANGELOG +## [4.4.00](https://github.com/kokkos/kokkos/tree/4.4.00) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.01...4.4.00) + +### Features: +* Add `Kokkos::View` conversions from and to [`std::mdspan`](https://en.cppreference.com/w/cpp/container/mdspan) [\#6830](https://github.com/kokkos/kokkos/pull/6830) [\#7069](https://github.com/kokkos/kokkos/pull/7069) + +### Backend and Architecture Enhancements: + +#### CUDA: +* `nvcc_wrapper`: Adding ability to process `--disable-warnings` flag [\#6936](https://github.com/kokkos/kokkos/issues/6936) +* Use recommended/max team size functions in Cuda ParallelFor and Reduce constructors [\#6891](https://github.com/kokkos/kokkos/issues/6891) +* Improve compile-times when building with `Kokkos_ENABLE_DEBUG_BOUNDS_CHECK` in Cuda [\#7013](https://github.com/kokkos/kokkos/pull/7013) + +#### HIP: +* Use HIP builtin atomics [\#6882](https://github.com/kokkos/kokkos/pull/6882) [\#7000](https://github.com/kokkos/kokkos/pull/7000) +* Enable user-specified compiler and linker flags for AMD GPUs [\#7127](https://github.com/kokkos/kokkos/pull/7127) + +#### SYCL: +* Add support for Graphs [\#6912](https://github.com/kokkos/kokkos/pull/6912) +* Fix multi-GPU support [\#6887](https://github.com/kokkos/kokkos/pull/6887) +* Improve performance of reduction and scan operations [\#6562](https://github.com/kokkos/kokkos/pull/6562), [\#6750](https://github.com/kokkos/kokkos/pull/6750) +* Fix lock for guarding scratch space in `TeamPolicy` `parallel_reduce` [\#6988](https://github.com/kokkos/kokkos/pull/6988) +* Include submission command queue property information into `SYCL::print_configuration()` [\#7004](https://github.com/kokkos/kokkos/pull/7004) + +#### OpenACC: +* Make `TeamPolicy` `parallel_for` execute on the correct async queue [\#7012](https://github.com/kokkos/kokkos/pull/7012) + +#### OpenMPTarget: +* Honor user requested loop ordering in `MDRange` policy [\#6925](https://github.com/kokkos/kokkos/pull/6925) +* Prevent data races by guarding the scratch space used in `parallel_scan` [\#6998](https://github.com/kokkos/kokkos/pull/6998) + +#### HPX: +* Workaround issue with template argument deduction to support compilation with NVCC [\#7015](https://github.com/kokkos/kokkos/pull/7015) + +### General Enhancements +* Improve performance of view copies in host parallel regions [\#6730](https://github.com/kokkos/kokkos/pull/6730) +* Harmonize convertibility rules of `Kokkos::RandomAccessIterator` with `View`s [\#6929](https://github.com/kokkos/kokkos/pull/6929) +* Add a check precondition non-overlapping ranges for the `adjacent_difference` algorithm in debug mode [\#6922](https://github.com/kokkos/kokkos/pull/6922) +* Add deduction guides for `TeamPolicy` [\#7030](https://github.com/kokkos/kokkos/pull/7030) +* SIMD: Allow flexible vector width for 32 bit types [\#6802](https://github.com/kokkos/kokkos/pull/6802) +* Updates for `Kokkos::Array`: add `kokkos_swap(Array)` specialization [\#6943](https://github.com/kokkos/kokkos/pull/6943), add `Kokkos::to_array` [\#6375](https://github.com/kokkos/kokkos/pull/6375), make `Kokkos::Array` equality-comparable [\#7148](https://github.com/kokkos/kokkos/pull/7148) +* Structured binding support for `Kokkos::complex` [\#7040](https://github.com/kokkos/kokkos/pull/7040) + +### Build System Changes +* Do not require OpenMP support for languages other than CXX [\#6965](https://github.com/kokkos/kokkos/pull/6965) +* Update Intel GPU architectures in Makefile [\#6895](https://github.com/kokkos/kokkos/pull/6895) +* Fix use of OpenMP with Cuda or HIP as compile language [\#6972](https://github.com/kokkos/kokkos/pull/6972) +* Define and enforce new minimum compiler versions for C++20 support [\#7128](https://github.com/kokkos/kokkos/pull/7128), [\#7123](https://github.com/kokkos/kokkos/pull/7123) +* Add nvidia Grace CPU architecture: `Kokkos_ARCH_ARMV9_GRACE` [\#7158](https://github.com/kokkos/kokkos/pull/7158) +* Fix Makefile.kokkos for Threads [\#6896](https://github.com/kokkos/kokkos/pull/6896) +* Remove support for NVHPC as CUDA device compiler [\#6987](https://github.com/kokkos/kokkos/pull/6987) +* Fix using CUDAToolkit for CMake 3.28.4 and higher [\#7062](https://github.com/kokkos/kokkos/pull/7062) + +### Incompatibilities (i.e. breaking changes) +* Drop `Kokkos::Array` special treatment in `View`s [\#6906](https://github.com/kokkos/kokkos/pull/6906) +* Drop `Experimental::RawMemoryAllocationFailure` [\#7145](https://github.com/kokkos/kokkos/pull/7145) + +### Deprecations +* Remove `Experimental::LayoutTiled` class template and deprecate `is_layouttiled` trait [\#6907](https://github.com/kokkos/kokkos/pull/6907) +* Deprecate `Kokkos::layout_iterate_type_selector` [\#7076](https://github.com/kokkos/kokkos/pull/7076) +* Deprecate specialization of `Kokkos::pair` for a single element [\#6947](https://github.com/kokkos/kokkos/pull/6947) +* Deprecate `deep_copy` of `UnorderedMap` of different size [\#6812](https://github.com/kokkos/kokkos/pull/6812) +* Deprecate trailing `Proxy` template argument of `Kokkos::Array` [\#6934](https://github.com/kokkos/kokkos/pull/6934) +* Deprecate implicit conversions of integers to `ChunkSize` [\#7151](https://github.com/kokkos/kokkos/pull/7151) +* Deprecate implicit conversions to execution spaces [\#7156](https://github.com/kokkos/kokkos/pull/7156) + +### Bug Fixes +* Do not return a copy of the input functor in `Experimental::for_each` [\#6910](https://github.com/kokkos/kokkos/pull/6910) +* Fix `realloc` on views of non-default constructible element types [\#6993](https://github.com/kokkos/kokkos/pull/6993) +* Fix undefined behavior in `View` initialization or fill with zeros [\#7014](https://github.com/kokkos/kokkos/pull/7014) +* Fix `sort_by_key` on host execution spaces when building with NVCC [\#7059](https://github.com/kokkos/kokkos/pull/7059) +* Fix using shared libraries and -fvisibility=hidden [\#7065](https://github.com/kokkos/kokkos/pull/7065) +* Fix view reference counting when functor copy constructor throws in parallel dispatch [\#6289](https://github.com/kokkos/kokkos/pull/6289) +* Fix `initialize(InitializationSetting)` for handling `print_configuration` setting [\#7098](https://github.com/kokkos/kokkos/pull/7098) +* Thread safety fixes for the Serial and OpenMP backend [\#7080](https://github.com/kokkos/kokkos/pull/7080), [\#6151](https://github.com/kokkos/kokkos/pull/6151) + ## [4.3.01](https://github.com/kokkos/kokkos/tree/4.3.01) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.00...4.3.01) ### Backend and Architecture Enhancements: #### HIP: -* MI300 support unified memory support [\#6877](https://github.com/kokkos/kokkos/pull/6877) +* MI300 support unified memory [\#6877](https://github.com/kokkos/kokkos/pull/6877) ### Bug Fixes * Serial: Use the provided execution space instance in TeamPolicy [\#6951](https://github.com/kokkos/kokkos/pull/6951) diff --git a/packages/kokkos/CITATION.cff b/packages/kokkos/CITATION.cff new file mode 100644 index 000000000000..28c674c451bf --- /dev/null +++ b/packages/kokkos/CITATION.cff @@ -0,0 +1,65 @@ +cff-version: 1.2.0 +title: Kokkos +message: >- + If you use this software, please cite the overview paper +type: software +authors: + - name: The Kokkos authors + website: https://kokkos.org/community/team/ +identifiers: + - type: url + website: https://kokkos.org/kokkos-core-wiki/citation.html +repository-code: 'https://github.com/kokkos/kokkos' +url: 'https://kokkos.org/' +license: Apache-2.0 +preferred-citation: + type: article + authors: + - given-names: Christian R. + family-names: Trott + - given-names: Damien + family-names: Lebrun-Grandié + - given-names: Daniel + family-names: Arndt + - family-names: Ciesko + given-names: Jan + - given-names: Vinh + family-names: Dang + - family-names: Ellingwood + given-names: Nathan + - given-names: Rahulkumar + family-names: Gayatri + - given-names: Evan + family-names: Harvey + - given-names: Daisy S. + family-names: Hollman + - given-names: Dan + family-names: Ibanez + - given-names: Nevin + family-names: Liber + - given-names: Jonathan + family-names: Madsen + - given-names: Jeff + family-names: Miles + - given-names: David + family-names: Poliakoff + - given-names: Amy + family-names: Powell + - given-names: Sivasankaran + family-names: Rajamanickam + - given-names: Mikael + family-names: Simberg + - given-names: Dan + family-names: Sunderland + - given-names: Bruno + family-names: Turcksin + - given-names: Jeremiah + family-names: Wilke + doi: 10.1109/TPDS.2021.3097283 + journal: IEEE Transactions on Parallel and Distributed Systems + start: 805 + end: 817 + title: "Kokkos 3: Programming Model Extensions for the Exascale Era" + volume: 33 + issue: 4 + year: 2022 diff --git a/packages/kokkos/CMakeLists.txt b/packages/kokkos/CMakeLists.txt index 76f2183db8ac..054de2c1dae8 100644 --- a/packages/kokkos/CMakeLists.txt +++ b/packages/kokkos/CMakeLists.txt @@ -150,8 +150,8 @@ ENDIF() set(Kokkos_VERSION_MAJOR 4) -set(Kokkos_VERSION_MINOR 3) -set(Kokkos_VERSION_PATCH 1) +set(Kokkos_VERSION_MINOR 4) +set(Kokkos_VERSION_PATCH 0) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos index d9be7901a382..15f24f30732a 100644 --- a/packages/kokkos/Makefile.kokkos +++ b/packages/kokkos/Makefile.kokkos @@ -1,8 +1,8 @@ # Default settings common options. KOKKOS_VERSION_MAJOR = 4 -KOKKOS_VERSION_MINOR = 3 -KOKKOS_VERSION_PATCH = 1 +KOKKOS_VERSION_MINOR = 4 +KOKKOS_VERSION_PATCH = 0 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -11,11 +11,11 @@ KOKKOS_DEVICES ?= "Threads" # Options: # Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 -# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX +# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace # IBM: Power8,Power9 # AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 -# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC +# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC KOKKOS_ARCH ?= "" # Options: yes,no KOKKOS_DEBUG ?= "no" @@ -318,12 +318,43 @@ KOKKOS_INTERNAL_USE_ARCH_ICL := $(call kokkos_has_string,$(KOKKOS_ARCH),ICL) KOKKOS_INTERNAL_USE_ARCH_ICX := $(call kokkos_has_string,$(KOKKOS_ARCH),ICX) KOKKOS_INTERNAL_USE_ARCH_SPR := $(call kokkos_has_string,$(KOKKOS_ARCH),SPR) -KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen) +# Traditionally, we supported, e.g., IntelGen9 instead of Intel_Gen9. The latter +# matches the CMake option but we also accept the former for backward-compatibility. KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9) +endif KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen11) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen11) +endif KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen12LP) +endif +KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9) +endif +KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9) \ + + $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11) \ + + $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP)) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen) + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen) + endif +endif KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_DG1) +endif KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_XeHP) +endif +# Traditionally the architecture was called PVC instead of Intel_PVC. This +# version makes us accept IntelPVC and Intel_PVC as well. KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC := $(call kokkos_has_string,$(KOKKOS_ARCH),PVC) # NVIDIA based. @@ -384,7 +415,8 @@ KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8 KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX) KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2) KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX) -KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) | bc)) +KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv9-Grace) +KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE) | bc)) # IBM based. KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8) @@ -747,6 +779,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV9_GRACE") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") + + KOKKOS_CXXFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128 + KOKKOS_LDFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128 +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") @@ -1200,6 +1240,8 @@ ifeq ($(KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN), 0) endif tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_MDSPAN") +tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY") + KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1) ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) diff --git a/packages/kokkos/Makefile.targets b/packages/kokkos/Makefile.targets index e6900a822a89..e8e429e02750 100644 --- a/packages/kokkos/Makefile.targets +++ b/packages/kokkos/Makefile.targets @@ -81,7 +81,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) Kokkos_Threads_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp Kokkos_Threads_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Spinwait.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) diff --git a/packages/kokkos/README.md b/packages/kokkos/README.md index 19793bb82d94..c8c6f8f7cf50 100644 --- a/packages/kokkos/README.md +++ b/packages/kokkos/README.md @@ -1,4 +1,4 @@ -![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4) +[![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4)](https://kokkos.org) # Kokkos: Core Libraries @@ -10,43 +10,66 @@ hierarchies and multiple types of execution resources. It currently can use CUDA, HIP, SYCL, HPX, OpenMP and C++ threads as backend programming models with several other backends in development. -**Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem.** +**Kokkos Core is part of the [Kokkos C++ Performance Portability Programming Ecosystem](https://kokkos.org/about/abstract/).** -For the complete documentation, click below: +Kokkos is a [Linux Foundation](https://linuxfoundation.org) project. -# [kokkos.github.io/kokkos-core-wiki](https://kokkos.github.io/kokkos-core-wiki) - -# Learning about Kokkos +## Learning about Kokkos To start learning about Kokkos: -- [Kokkos Lectures](https://kokkos.github.io/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem capabilities. +- [Kokkos Lectures](https://kokkos.org/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important capabilities. -- [Programming guide](https://kokkos.github.io/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch. +- [Programming guide](https://kokkos.org/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch. -- [API reference](https://kokkos.github.io/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.github.io/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.github.io/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.github.io/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.github.io/kokkos-core-wiki/API/alphabetical.html). +- [API reference](https://kokkos.org/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.org/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.org/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.org/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.org/kokkos-core-wiki/API/alphabetical.html). -- [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability. +- [Use cases and Examples](https://kokkos.org/kokkos-core-wiki/usecases.html): a serie of examples ranging from how to use Kokkos with MPI to Fortran interoperability. -For questions find us on Slack: https://kokkosteam.slack.com or open a GitHub issue. +## Obtaining Kokkos -For non-public questions send an email to: *crtrott(at)sandia.gov* +The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest). + +The current release is [4.3.01](https://github.com/kokkos/kokkos/releases/tag/4.3.01). + +```bash +curl -OJ -L https://github.com/kokkos/kokkos/archive/refs/tags/4.3.01.tar.gz +# Or with wget +wget https://github.com/kokkos/kokkos/archive/refs/tags/4.3.01.tar.gz +``` + +To clone the latest development version of Kokkos from GitHub: + +```bash +git clone -b develop https://github.com/kokkos/kokkos.git +``` -# Contributing to Kokkos +### Building Kokkos -Please see [this page](https://kokkos.github.io/kokkos-core-wiki/contributing.html) for details on how to contribute. +To build Kokkos, you will need to have a C++ compiler that supports C++17 or later. +All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/requirements.html). -# Requirements, Building and Installing +Building and installation instructions are described [here](https://kokkos.org/kokkos-core-wiki/building.html). + +You can also install Kokkos using [Spack](https://spack.io/): `spack install kokkos`. [Available configuration options](https://packages.spack.io/package.html?name=kokkos) can be displayed using `spack info kokkos`. + +## For the complete documentation: [kokkos.org/kokkos-core-wiki/](https://kokkos.org/kokkos-core-wiki/) + +## Support + +For questions find us on Slack: https://kokkosteam.slack.com or open a GitHub issue. + +For non-public questions send an email to: *crtrott(at)sandia.gov* -All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.github.io/kokkos-core-wiki/requirements.html). +## Contributing -Building and installation instructions are described [here](https://kokkos.github.io/kokkos-core-wiki/building.html). +Please see [this page](https://kokkos.org/kokkos-core-wiki/contributing.html) for details on how to contribute. -# Citing Kokkos +## Citing Kokkos -Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citation.html). +Please see the [following page](https://kokkos.org/kokkos-core-wiki/citation.html). -# License +## License [![License](https://img.shields.io/badge/License-Apache--2.0_WITH_LLVM--exception-blue)](https://spdx.org/licenses/LLVM-exception.html) diff --git a/packages/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp b/packages/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp index 36deccdfb1e2..f11f80704843 100644 --- a/packages/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp +++ b/packages/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp @@ -189,6 +189,33 @@ void applyPermutation(const ExecutionSpace& space, KOKKOS_LAMBDA(int i) { view(i) = view_copy(permutation(i)); }); } +// FIXME_NVCC: nvcc has trouble compiling lambdas inside a function with +// variadic templates (sort_by_key_via_sort). Switch to using functors instead. +template +struct IotaFunctor { + Permute _permute; + KOKKOS_FUNCTION void operator()(int i) const { _permute(i) = i; } +}; +template +struct LessFunctor { + Keys _keys; + KOKKOS_FUNCTION bool operator()(int i, int j) const { + return _keys(i) < _keys(j); + } +}; + +// FIXME_NVCC+MSVC: We can't use a lambda instead of a functor which gave us +// "For this host platform/dialect, an extended lambda cannot be defined inside +// the 'if' or 'else' block of a constexpr if statement" +template +struct KeyComparisonFunctor { + Keys m_keys; + Comparator m_comparator; + KOKKOS_FUNCTION bool operator()(int i, int j) const { + return m_comparator(m_keys(i), m_keys(j)); + } +}; + template @@ -207,10 +234,9 @@ void sort_by_key_via_sort( n); // iota - Kokkos::parallel_for( - "Kokkos::sort_by_key_via_sort::iota", - Kokkos::RangePolicy(exec, 0, n), - KOKKOS_LAMBDA(int i) { permute(i) = i; }); + Kokkos::parallel_for("Kokkos::sort_by_key_via_sort::iota", + Kokkos::RangePolicy(exec, 0, n), + IotaFunctor{permute}); using Layout = typename Kokkos::View::array_layout; @@ -228,16 +254,15 @@ void sort_by_key_via_sort( Kokkos::DefaultHostExecutionSpace host_exec; if constexpr (sizeof...(MaybeComparator) == 0) { - Kokkos::sort( - host_exec, host_permute, - KOKKOS_LAMBDA(int i, int j) { return host_keys(i) < host_keys(j); }); + Kokkos::sort(host_exec, host_permute, + LessFunctor{host_keys}); } else { auto keys_comparator = std::get<0>(std::tuple(maybeComparator...)); Kokkos::sort( - host_exec, host_permute, KOKKOS_LAMBDA(int i, int j) { - return keys_comparator(host_keys(i), host_keys(j)); - }); + host_exec, host_permute, + KeyComparisonFunctor{ + host_keys, keys_comparator}); } host_exec.fence("Kokkos::Impl::sort_by_key_via_sort: after host sort"); Kokkos::deep_copy(exec, permute, host_permute); @@ -262,16 +287,14 @@ void sort_by_key_via_sort( } #else if constexpr (sizeof...(MaybeComparator) == 0) { - Kokkos::sort( - exec, permute, - KOKKOS_LAMBDA(int i, int j) { return keys(i) < keys(j); }); + Kokkos::sort(exec, permute, LessFunctor{keys}); } else { auto keys_comparator = std::get<0>(std::tuple(maybeComparator...)); Kokkos::sort( - exec, permute, KOKKOS_LAMBDA(int i, int j) { - return keys_comparator(keys(i), keys(j)); - }); + exec, permute, + KeyComparisonFunctor{ + keys, keys_comparator}); } #endif } diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp index 6215b325afc7..05969be463a5 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp @@ -29,49 +29,46 @@ namespace Experimental { template < class ExecutionSpace, class IteratorType, class UnaryFunctorType, std::enable_if_t, int> = 0> -UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - UnaryFunctorType functor) { - return Impl::for_each_exespace_impl(label, ex, first, last, - std::move(functor)); +void for_each(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, UnaryFunctorType functor) { + Impl::for_each_exespace_impl(label, ex, first, last, std::move(functor)); } template < class ExecutionSpace, class IteratorType, class UnaryFunctorType, std::enable_if_t, int> = 0> -UnaryFunctorType for_each(const ExecutionSpace& ex, IteratorType first, - IteratorType last, UnaryFunctorType functor) { - return Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default", - ex, first, last, std::move(functor)); +void for_each(const ExecutionSpace& ex, IteratorType first, IteratorType last, + UnaryFunctorType functor) { + Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default", ex, + first, last, std::move(functor)); } template < class ExecutionSpace, class DataType, class... Properties, class UnaryFunctorType, std::enable_if_t, int> = 0> -UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View& v, - UnaryFunctorType functor) { +void for_each(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, + UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v), - std::move(functor)); + Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v), + std::move(functor)); } template < class ExecutionSpace, class DataType, class... Properties, class UnaryFunctorType, std::enable_if_t, int> = 0> -UnaryFunctorType for_each(const ExecutionSpace& ex, - const ::Kokkos::View& v, - UnaryFunctorType functor) { +void for_each(const ExecutionSpace& ex, + const ::Kokkos::View& v, + UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex, - KE::begin(v), KE::end(v), - std::move(functor)); + Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex, + KE::begin(v), KE::end(v), std::move(functor)); } // @@ -82,24 +79,23 @@ UnaryFunctorType for_each(const ExecutionSpace& ex, template , int> = 0> -KOKKOS_FUNCTION UnaryFunctorType for_each(const TeamHandleType& teamHandle, - IteratorType first, IteratorType last, - UnaryFunctorType functor) { - return Impl::for_each_team_impl(teamHandle, first, last, std::move(functor)); +KOKKOS_FUNCTION void for_each(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { + Impl::for_each_team_impl(teamHandle, first, last, std::move(functor)); } template , int> = 0> -KOKKOS_FUNCTION UnaryFunctorType -for_each(const TeamHandleType& teamHandle, - const ::Kokkos::View& v, - UnaryFunctorType functor) { +KOKKOS_FUNCTION void for_each(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v), - std::move(functor)); + Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v), + std::move(functor)); } } // namespace Experimental diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp index a8171fa068d1..9f7fcf94fe0b 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp @@ -82,6 +82,11 @@ OutputIteratorType adjacent_difference_exespace_impl( return first_dest; } +#ifdef KOKKOS_ENABLE_DEBUG + // check for overlapping iterators + Impl::expect_no_overlap(first_from, last_from, first_dest); +#endif + // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); @@ -114,6 +119,11 @@ KOKKOS_FUNCTION OutputIteratorType adjacent_difference_team_impl( return first_dest; } +#ifdef KOKKOS_ENABLE_DEBUG + // check for overlapping iterators + Impl::expect_no_overlap(first_from, last_from, first_dest); +#endif + // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp index 27ce5a6fad6e..54bb13e25b9e 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp @@ -24,18 +24,21 @@ namespace Kokkos { namespace Experimental { namespace Impl { +template +class RandomAccessIterator; + template struct is_admissible_to_kokkos_std_algorithms : std::false_type {}; template struct is_admissible_to_kokkos_std_algorithms< - T, std::enable_if_t< ::Kokkos::is_view::value && T::rank() == 1 && - (std::is_same::value || - std::is_same::value || - std::is_same::value)> > + T, std::enable_if_t<::Kokkos::is_view::value && T::rank() == 1 && + (std::is_same::value || + std::is_same::value || + std::is_same::value)>> : std::true_type {}; template @@ -58,6 +61,18 @@ using is_iterator = Kokkos::is_detected; template inline constexpr bool is_iterator_v = is_iterator::value; +template +struct is_kokkos_iterator : std::false_type {}; + +template +struct is_kokkos_iterator> { + static constexpr bool value = + is_admissible_to_kokkos_std_algorithms::value; +}; + +template +inline constexpr bool is_kokkos_iterator_v = is_kokkos_iterator::value; + // // are_iterators // @@ -215,6 +230,38 @@ KOKKOS_INLINE_FUNCTION void expect_valid_range(IteratorType first, (void)last; } +// +// Check if kokkos iterators are overlapping +// +template +KOKKOS_INLINE_FUNCTION void expect_no_overlap( + [[maybe_unused]] IteratorType1 first, [[maybe_unused]] IteratorType1 last, + [[maybe_unused]] IteratorType2 s_first) { + if constexpr (is_kokkos_iterator_v && + is_kokkos_iterator_v) { + auto const view1 = first.view(); + auto const view2 = s_first.view(); + + std::size_t stride1 = view1.stride(0); + std::size_t stride2 = view2.stride(0); + ptrdiff_t first_diff = view1.data() - view2.data(); + + // FIXME If strides are not identical, checks may not be made + // with the cost of O(1) + // Currently, checks are made only if strides are identical + // If first_diff == 0, there is already an overlap + if (stride1 == stride2 || first_diff == 0) { + [[maybe_unused]] bool is_no_overlap = (first_diff % stride1); + auto* first_pointer1 = view1.data(); + auto* first_pointer2 = view2.data(); + [[maybe_unused]] auto* last_pointer1 = first_pointer1 + (last - first); + [[maybe_unused]] auto* last_pointer2 = first_pointer2 + (last - first); + KOKKOS_EXPECTS(first_pointer1 >= last_pointer2 || + last_pointer1 <= first_pointer2 || is_no_overlap); + } + } +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp index 3c1e2474bc9c..ad7b8bb8cab6 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp @@ -150,8 +150,9 @@ KOKKOS_FUNCTION OutputIterator copy_if_team_impl( return d_first + count; } -#if defined KOKKOS_COMPILER_INTEL || \ - (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130) +#if defined KOKKOS_COMPILER_INTEL || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) __builtin_unreachable(); #endif } diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp index d3be3b7f6670..99cc4a1cf3a6 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp @@ -42,10 +42,9 @@ struct StdForEachFunctor { }; template -UnaryFunctorType for_each_exespace_impl(const std::string& label, - const HandleType& handle, - IteratorType first, IteratorType last, - UnaryFunctorType functor) { +void for_each_exespace_impl(const std::string& label, const HandleType& handle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { // checks Impl::static_assert_random_access_and_accessible(handle, first); Impl::expect_valid_range(first, last); @@ -56,8 +55,6 @@ UnaryFunctorType for_each_exespace_impl(const std::string& label, label, RangePolicy(handle, 0, num_elements), StdForEachFunctor(first, functor)); handle.fence("Kokkos::for_each: fence after operation"); - - return functor; } template -KOKKOS_FUNCTION UnaryFunctorType -for_each_team_impl(const TeamHandleType& teamHandle, IteratorType first, - IteratorType last, UnaryFunctorType functor) { +KOKKOS_FUNCTION void for_each_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first); Impl::expect_valid_range(first, last); @@ -96,7 +93,6 @@ for_each_team_impl(const TeamHandleType& teamHandle, IteratorType first, TeamThreadRange(teamHandle, 0, num_elements), StdForEachFunctor(first, functor)); teamHandle.team_barrier(); - return functor; } template > { ptrdiff_t current_index) : m_view(view), m_current_index(current_index) {} +#ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond + template + requires(std::is_constructible_v) KOKKOS_FUNCTION + explicit(!std::is_convertible_v) + RandomAccessIterator(const RandomAccessIterator& other) + : m_view(other.m_view), m_current_index(other.m_current_index) {} +#else + template < + class OtherViewType, + std::enable_if_t && + !std::is_convertible_v, + int> = 0> + KOKKOS_FUNCTION explicit RandomAccessIterator( + const RandomAccessIterator& other) + : m_view(other.m_view), m_current_index(other.m_current_index) {} + + template , + int> = 0> + KOKKOS_FUNCTION RandomAccessIterator( + const RandomAccessIterator& other) + : m_view(other.m_view), m_current_index(other.m_current_index) {} +#endif + KOKKOS_FUNCTION iterator_type& operator++() { ++m_current_index; @@ -152,9 +176,16 @@ class RandomAccessIterator< ::Kokkos::View > { KOKKOS_FUNCTION reference operator*() const { return m_view(m_current_index); } + KOKKOS_FUNCTION + view_type view() const { return m_view; } + private: view_type m_view; ptrdiff_t m_current_index = 0; + + // Needed for the converting constructor accepting another iterator + template + friend class RandomAccessIterator; }; } // namespace Impl diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp index c7c293027862..710d04805d8f 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp @@ -175,8 +175,9 @@ KOKKOS_FUNCTION OutputIterator unique_copy_team_impl( d_first + count); } -#if defined KOKKOS_COMPILER_INTEL || \ - (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130) +#if defined KOKKOS_COMPILER_INTEL || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) __builtin_unreachable(); #endif } diff --git a/packages/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp b/packages/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp index 282d85548c55..7d484136b6dd 100644 --- a/packages/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp @@ -46,6 +46,44 @@ TEST_F(random_access_iterator_test, constructor) { EXPECT_TRUE(true); } +TEST_F(random_access_iterator_test, constructiblity) { + auto first_d = KE::begin(m_dynamic_view); + auto cfirst_d = KE::cbegin(m_dynamic_view); + + static_assert(std::is_constructible_v); + static_assert( + !std::is_constructible_v); + [[maybe_unused]] decltype(cfirst_d) tmp_cfirst_d(first_d); + + auto first_s = KE::begin(m_static_view); + auto cfirst_s = KE::cbegin(m_static_view); + + static_assert(std::is_constructible_v); + static_assert( + !std::is_constructible_v); + [[maybe_unused]] decltype(cfirst_s) tmp_cfirst_s(first_s); + + auto first_st = KE::begin(m_strided_view); + auto cfirst_st = KE::cbegin(m_strided_view); + + static_assert( + std::is_constructible_v); + static_assert( + !std::is_constructible_v); + [[maybe_unused]] decltype(cfirst_st) tmp_cfirst_st(first_st); + + // [FIXME] Better to have tests for the explicit specifier with an expression. + // As soon as View converting constructors are re-implemented with a + // conditional explicit, we may add those tests. + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + EXPECT_TRUE(true); +} + template void test_random_access_it_verify(IteratorType it, ValueType gold_value) { using view_t = Kokkos::View; diff --git a/packages/kokkos/algorithms/unit_tests/TestSortByKey.hpp b/packages/kokkos/algorithms/unit_tests/TestSortByKey.hpp index 16f68eaaf267..9e5bd4a57487 100644 --- a/packages/kokkos/algorithms/unit_tests/TestSortByKey.hpp +++ b/packages/kokkos/algorithms/unit_tests/TestSortByKey.hpp @@ -69,7 +69,7 @@ void iota(ExecutionSpace const &space, ViewType const &v, typename ViewType::value_type value = 0) { using ValueType = typename ViewType::value_type; Kokkos::parallel_for( - "ArborX::Algorithms::iota", + "Kokkos::Algorithms::iota", Kokkos::RangePolicy(space, 0, v.extent(0)), KOKKOS_LAMBDA(int i) { v(i) = value + (ValueType)i; }); } @@ -87,6 +87,18 @@ TEST(TEST_CATEGORY, SortByKeyEmptyView) { Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values)); } +// Test #7036 +TEST(TEST_CATEGORY, SortByKeyEmptyViewHost) { + using ExecutionSpace = Kokkos::DefaultHostExecutionSpace; + + // does not matter if we use int or something else + Kokkos::View keys("keys", 0); + Kokkos::View values("values", 0); + + ASSERT_NO_THROW( + Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values)); +} + TEST(TEST_CATEGORY, SortByKey) { using ExecutionSpace = TEST_EXECSPACE; using MemorySpace = typename ExecutionSpace::memory_space; diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp index 386d533f7a83..2a4525a8c332 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp @@ -81,5 +81,114 @@ TEST(std_algorithms, is_admissible_to_std_algorithms) { strided_view_3d_t>::value); } +TEST(std_algorithms, expect_no_overlap) { + namespace KE = Kokkos::Experimental; + using value_type = double; + + static constexpr size_t extent0 = 13; + + //------------- + // 1d views + //------------- + using static_view_1d_t = Kokkos::View; + [[maybe_unused]] static_view_1d_t static_view_1d{ + "std-algo-test-1d-contiguous-view-static"}; + + using dyn_view_1d_t = Kokkos::View; + [[maybe_unused]] dyn_view_1d_t dynamic_view_1d{ + "std-algo-test-1d-contiguous-view-dynamic", extent0}; + + using strided_view_1d_t = Kokkos::View; + Kokkos::LayoutStride layout1d{extent0, 2}; + strided_view_1d_t strided_view_1d{"std-algo-test-1d-strided-view", layout1d}; + +// Overlapping because iterators are identical +#if defined(KOKKOS_ENABLE_DEBUG) + auto first_s = KE::begin(static_view_1d); + auto last_s = first_s + extent0; + EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_s, last_s, first_s); }, + "Kokkos contract violation:.*"); + + auto first_d = KE::begin(dynamic_view_1d); + auto last_d = first_d + extent0; + EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_d, last_d, first_d); }, + "Kokkos contract violation:.*"); + + auto first_st = KE::begin(strided_view_1d); + auto last_st = first_st + extent0; + EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_st, last_st, first_st); }, + "Kokkos contract violation:.*"); +#endif + + // Ranges are overlapped + static constexpr size_t sub_extent0 = 6, offset0 = 3; + std::pair range0(0, sub_extent0), + range1(offset0, offset0 + sub_extent0); +#if defined(KOKKOS_ENABLE_DEBUG) + auto static_view_1d_0 = Kokkos::subview(static_view_1d, range0); + auto static_view_1d_1 = Kokkos::subview(static_view_1d, range1); + auto first_s0 = KE::begin(static_view_1d_0); // [0, 6) + auto last_s0 = first_s0 + static_view_1d_0.extent(0); + auto first_s1 = KE::begin(static_view_1d_1); // [3, 9) + EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_s0, last_s0, first_s1); }, + "Kokkos contract violation:.*"); + + auto dynamic_view_1d_0 = Kokkos::subview(dynamic_view_1d, range0); + auto dynamic_view_1d_1 = Kokkos::subview(dynamic_view_1d, range1); + auto first_d0 = KE::begin(dynamic_view_1d_0); // [0, 6) + auto last_d0 = first_d0 + dynamic_view_1d_0.extent(0); + auto first_d1 = KE::begin(dynamic_view_1d_1); // [3, 9) + EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_d0, last_d0, first_d1); }, + "Kokkos contract violation:.*"); +#endif + + auto strided_view_1d_0 = Kokkos::subview(strided_view_1d, range0); + auto strided_view_1d_1 = Kokkos::subview(strided_view_1d, range1); + auto first_st0 = KE::begin(strided_view_1d_0); // [0, 12) + auto last_st0 = first_st0 + strided_view_1d_0.extent(0); + auto first_st1 = KE::begin(strided_view_1d_1); // [3, 15) + // Does not overlap since offset (=3) is not divisible by stride (=2) + EXPECT_NO_THROW( + { KE::Impl::expect_no_overlap(first_st0, last_st0, first_st1); }); + + // Iterating over the same range without overlapping + Kokkos::View static_view_2d{ + "std-algo-test-2d-contiguous-view-static"}; + auto sub_static_view_1d_0 = Kokkos::subview(static_view_2d, 0, Kokkos::ALL); + auto sub_static_view_1d_1 = Kokkos::subview(static_view_2d, 1, Kokkos::ALL); + auto sub_first_s0 = KE::begin(sub_static_view_1d_0); // 0, 2, 4, ... + auto sub_last_s0 = sub_first_s0 + sub_static_view_1d_0.extent(0); + auto sub_first_s1 = KE::begin(sub_static_view_1d_1); // 1, 3, 5, ... + + EXPECT_NO_THROW({ + KE::Impl::expect_no_overlap(sub_first_s0, sub_last_s0, sub_first_s1); + }); + + Kokkos::View dynamic_view_2d{ + "std-algo-test-2d-contiguous-view-dynamic", 2, extent0}; + auto sub_dynamic_view_1d_0 = Kokkos::subview(dynamic_view_2d, 0, Kokkos::ALL); + auto sub_dynamic_view_1d_1 = Kokkos::subview(dynamic_view_2d, 1, Kokkos::ALL); + auto sub_first_d0 = KE::begin(sub_dynamic_view_1d_0); // 0, 2, 4, ... + auto sub_last_d0 = sub_first_d0 + sub_dynamic_view_1d_0.extent(0); + auto sub_first_d1 = KE::begin(sub_dynamic_view_1d_1); // 1, 3, 5, ... + + EXPECT_NO_THROW({ + KE::Impl::expect_no_overlap(sub_first_d0, sub_last_d0, sub_first_d1); + }); + + Kokkos::LayoutStride layout2d{2, 3, extent0, 2 * 3}; + Kokkos::View strided_view_2d{ + "std-algo-test-2d-contiguous-view-strided", layout2d}; + auto sub_strided_view_1d_0 = Kokkos::subview(strided_view_2d, 0, Kokkos::ALL); + auto sub_strided_view_1d_1 = Kokkos::subview(strided_view_2d, 1, Kokkos::ALL); + auto sub_first_st0 = KE::begin(sub_strided_view_1d_0); // 0, 6, 12, ... + auto sub_last_st0 = sub_first_st0 + sub_strided_view_1d_0.extent(0); + auto sub_first_st1 = KE::begin(sub_strided_view_1d_1); // 1, 7, 13, ... + + EXPECT_NO_THROW({ + KE::Impl::expect_no_overlap(sub_first_st0, sub_last_st0, sub_first_st1); + }); +} + } // namespace stdalgos } // namespace Test diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp index 2c8fee02f473..7cb9851087a1 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp @@ -85,7 +85,7 @@ struct TestFunctorA { break; } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET case 2: { auto it = KE::exclusive_scan( @@ -213,7 +213,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { break; } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET case 2: case 3: { auto it = exclusive_scan(KE::cbegin(rowFrom), KE::cend(rowFrom), @@ -242,7 +242,7 @@ template void run_all_scenarios() { for (int numTeams : teamSizesToTest) { for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) { -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET for (int apiId : {0, 1, 2, 3}) { #else for (int apiId : {0, 1}) { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSorted.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSorted.cpp index f9adeb0654b8..850e80dde1e0 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSorted.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSorted.cpp @@ -52,7 +52,7 @@ struct TestFunctorA { Kokkos::single(Kokkos::PerTeam(member), [=, *this]() { m_returnsView(myRowIndex) = result; }); } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET else if (m_apiPick == 2) { using value_type = typename ViewType::value_type; result = KE::is_sorted(member, KE::cbegin(myRowView), KE::cend(myRowView), @@ -179,7 +179,7 @@ template void run_all_scenarios(bool makeDataSortedOnPurpose) { for (int numTeams : teamSizesToTest) { for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 5153}) { -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET for (int apiId : {0, 1, 2, 3}) { #else for (int apiId : {0, 1}) { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSortedUntil.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSortedUntil.cpp index 33af5f99def6..e3b95527c77f 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSortedUntil.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSortedUntil.cpp @@ -73,7 +73,7 @@ struct TestFunctorA { m_distancesView(myRowIndex) = resultDist; }); } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET else if (m_apiPick == 2) { using value_type = typename ViewType::value_type; auto it = KE::is_sorted_until(member, KE::cbegin(myRowView), @@ -226,7 +226,7 @@ template void run_all_scenarios(const std::string& name, const std::vector& cols) { for (int numTeams : teamSizesToTest) { for (const auto& numCols : cols) { -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET for (int apiId : {0, 1, 2, 3}) { #else for (int apiId : {0, 1}) { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMaxElement.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMaxElement.cpp index fb891a8780fb..283525dbd10f 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMaxElement.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMaxElement.cpp @@ -59,7 +59,7 @@ struct TestFunctorA { m_distancesView(myRowIndex) = resultDist; }); } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET else if (m_apiPick == 2) { using value_type = typename ViewType::value_type; auto it = @@ -170,7 +170,7 @@ void run_all_scenarios() { } TEST(std_algorithms_max_element_team_test, test) { -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET run_all_scenarios(); run_all_scenarios(); run_all_scenarios(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinElement.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinElement.cpp index 4ba1b6f968bc..8579b48315d8 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinElement.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinElement.cpp @@ -59,7 +59,7 @@ struct TestFunctorA { m_distancesView(myRowIndex) = resultDist; }); } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET else if (m_apiPick == 2) { using value_type = typename ViewType::value_type; auto it = @@ -169,7 +169,7 @@ void run_all_scenarios() { } TEST(std_algorithms_min_element_team_test, test) { -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET run_all_scenarios(); run_all_scenarios(); run_all_scenarios(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinMaxElement.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinMaxElement.cpp index 17562a55727b..51010fdff59b 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinMaxElement.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinMaxElement.cpp @@ -66,7 +66,7 @@ struct TestFunctorA { m_distancesView(myRowIndex, 1) = resultDist2; }); } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET else if (m_apiPick == 2) { using value_type = typename ViewType::value_type; auto itPair = @@ -188,7 +188,7 @@ void run_all_scenarios() { } TEST(std_algorithms_minmax_element_team_test, test) { -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET run_all_scenarios(); run_all_scenarios(); run_all_scenarios(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp index 94c2a8f1f9a7..eb00d9e083a2 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp @@ -16,7 +16,7 @@ #include -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET namespace Test { namespace stdalgos { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp index 60fa369af180..1c438543819d 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp @@ -16,7 +16,7 @@ #include -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET namespace Test { namespace stdalgos { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp index 10454d65515b..0b0d798fd801 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp @@ -16,7 +16,7 @@ #include -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET namespace Test { namespace stdalgos { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp index b0a3241ec4bf..17ded226aae0 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp @@ -16,7 +16,7 @@ #include -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET namespace Test { namespace stdalgos { diff --git a/packages/kokkos/appveyor.yml b/packages/kokkos/appveyor.yml index c0b6e9cab9f7..d0a5645ef7b6 100644 --- a/packages/kokkos/appveyor.yml +++ b/packages/kokkos/appveyor.yml @@ -5,6 +5,6 @@ build_script: - cmd: >- mkdir build && cd build && - cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc" -DKokkos_ENABLE_DEPRECATED_CODE_4=ON -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF && + cmake c:\projects\source -DKokkos_ENABLE_IMPL_MDSPAN=OFF -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc" -DKokkos_ENABLE_DEPRECATED_CODE_4=ON -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF && cmake --build . --target install && ctest -C Debug --output-on-failure diff --git a/packages/kokkos/benchmarks/CMakeLists.txt b/packages/kokkos/benchmarks/CMakeLists.txt index abf502835947..529ef393d994 100644 --- a/packages/kokkos/benchmarks/CMakeLists.txt +++ b/packages/kokkos/benchmarks/CMakeLists.txt @@ -4,7 +4,7 @@ KOKKOS_ADD_BENCHMARK_DIRECTORIES(gather) KOKKOS_ADD_BENCHMARK_DIRECTORIES(gups) KOKKOS_ADD_BENCHMARK_DIRECTORIES(launch_latency) KOKKOS_ADD_BENCHMARK_DIRECTORIES(stream) - +KOKKOS_ADD_BENCHMARK_DIRECTORIES(view_copy_constructor) #FIXME_OPENMPTARGET - These two benchmarks cause ICE. Commenting them for now but a deeper analysis on the cause and a possible fix will follow. IF(NOT Kokkos_ENABLE_OPENMPTARGET) KOKKOS_ADD_BENCHMARK_DIRECTORIES(policy_performance) diff --git a/packages/kokkos/benchmarks/view_copy_constructor/CMakeLists.txt b/packages/kokkos/benchmarks/view_copy_constructor/CMakeLists.txt new file mode 100644 index 000000000000..50a331b2b354 --- /dev/null +++ b/packages/kokkos/benchmarks/view_copy_constructor/CMakeLists.txt @@ -0,0 +1,4 @@ +KOKKOS_ADD_EXECUTABLE( + view_copy_constructor + SOURCES view_copy_constructor.cpp +) diff --git a/packages/kokkos/benchmarks/view_copy_constructor/Makefile b/packages/kokkos/benchmarks/view_copy_constructor/Makefile new file mode 100644 index 000000000000..70c6d517e0d3 --- /dev/null +++ b/packages/kokkos/benchmarks/view_copy_constructor/Makefile @@ -0,0 +1,46 @@ +KOKKOS_DEVICES=Serial +KOKKOS_ARCH = "" + + +MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) + +ifndef KOKKOS_PATH + KOKKOS_PATH = $(MAKEFILE_PATH)../.. +endif + +SRC = $(wildcard $(MAKEFILE_PATH)*.cpp) +HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp) + +vpath %.cpp $(sort $(dir $(SRC))) + +default: build + echo "Start Build" + +CXX = clang++ +EXE = view_copy_constructor.exe + +CXXFLAGS ?= -Ofast +override CXXFLAGS += -I$(MAKEFILE_PATH) + +DEPFLAGS = -M +LINK = ${CXX} +LINKFLAGS = -Ofast +KOKKOS_CXX_STANDARD=c++20 + +OBJ = $(notdir $(SRC:.cpp=.o)) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o view_copy_constructor.cuda view_copy_constructor.exe + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/packages/kokkos/benchmarks/view_copy_constructor/view_copy_constructor.cpp b/packages/kokkos/benchmarks/view_copy_constructor/view_copy_constructor.cpp new file mode 100644 index 000000000000..63c49f09c01e --- /dev/null +++ b/packages/kokkos/benchmarks/view_copy_constructor/view_copy_constructor.cpp @@ -0,0 +1,310 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +// The function "test_view_collection" exposes the copy constructor +// and destructor overheads in Kokkos View objects +// Please see the lines marked by "NOTE". + +#include +#include +#include +#include +#include +#include +#include + +// NVIEWS is the number of Kokkos View objects in our ViewCollection object +// We have chosen a large value of 40 to make it easier to see performance +// differences when using the likelihood attribute +#define NVIEWS 40 + +class ViewCollection { + public: + Kokkos::View v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, + v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40; + double m_expected_sum; + double m_side_effect; + int m_N; + + ViewCollection(int N) + : v1("v1", N), + v2("v2", N), + v3("v3", N), + v4("v4", N), + v5("v5", N), + v6("v6", N), + v7("v7", N), + v8("v8", N), + v9("v9", N), + v10("v10", N), + v11("v11", N), + v12("v12", N), + v13("v13", N), + v14("v14", N), + v15("v15", N), + v16("v16", N), + v17("v17", N), + v18("v18", N), + v19("v19", N), + v20("v20", N), + v21("v21", N), + v22("v22", N), + v23("v23", N), + v24("v24", N), + v25("v25", N), + v26("v26", N), + v27("v27", N), + v28("v28", N), + v29("v29", N), + v30("v30", N), + v31("v31", N), + v32("v32", N), + v33("v33", N), + v34("v34", N), + v35("v35", N), + v36("v36", N), + v37("v37", N), + v38("v38", N), + v39("v39", N), + v40("v40", N), + m_expected_sum(N * NVIEWS), + m_side_effect(0.0), + m_N(N) { + for (int i = 0; i < N; ++i) { + v1(i) = 1; + v2(i) = 1; + v3(i) = 1; + v4(i) = 1; + v5(i) = 1; + v6(i) = 1; + v7(i) = 1; + v8(i) = 1; + v9(i) = 1; + v10(i) = 1; + v11(i) = 1; + v12(i) = 1; + v13(i) = 1; + v14(i) = 1; + v15(i) = 1; + v16(i) = 1; + v17(i) = 1; + v18(i) = 1; + v19(i) = 1; + v20(i) = 1; + v21(i) = 1; + v22(i) = 1; + v23(i) = 1; + v24(i) = 1; + v25(i) = 1; + v26(i) = 1; + v27(i) = 1; + v28(i) = 1; + v29(i) = 1; + v30(i) = 1; + v31(i) = 1; + v32(i) = 1; + v33(i) = 1; + v34(i) = 1; + v35(i) = 1; + v36(i) = 1; + v37(i) = 1; + v38(i) = 1; + v39(i) = 1; + v40(i) = 1; + } + } + +// The ADD_COPY_CONSTRUCTOR macro is helpful to compare time in the copy +// constructor between compilers. We have found that the GNU compiler +// is sometimes able to inline the default copy constructor. +#ifdef ADD_COPY_CONSTRUCTOR + __attribute__((noinline)) ViewCollection(const ViewCollection& other) + : v1(other.v1), + v2(other.v2), + v3(other.v3), + v4(other.v4), + v5(other.v5), + v6(other.v6), + v7(other.v7), + v8(other.v8), + v9(other.v9), + v10(other.v10), + v11(other.v11), + v12(other.v12), + v13(other.v13), + v14(other.v14), + v15(other.v15), + v16(other.v16), + v17(other.v17), + v18(other.v18), + v19(other.v19), + v20(other.v20), + v21(other.v21), + v22(other.v22), + v23(other.v23), + v24(other.v24), + v25(other.v25), + v26(other.v26), + v27(other.v27), + v28(other.v28), + v29(other.v29), + v30(other.v30), + v31(other.v31), + v32(other.v32), + v33(other.v33), + v34(other.v34), + v35(other.v35), + v36(other.v36), + v37(other.v37), + v38(other.v38), + v39(other.v39), + v40(other.v40), + m_expected_sum(other.m_expected_sum), + m_side_effect(other.m_side_effect), + m_N(other.m_N) {} +#endif + + KOKKOS_INLINE_FUNCTION + double sum_views(int ii, bool execute_kernel) { + double result = 0.0; + if (execute_kernel) { + // This code is only executed when using the command line option -k + // The computation references all Kokkos views. This may help our + // effort to stop compilers from optimizing away the Kokkos views + for (int i = 0; i < m_N; ++i) { + result += v1(i) + v2(i) + v3(i) + v4(i) + v5(i) + v6(i) + v7(i) + + v8(i) + v9(i) + v10(i) + v11(i) + v12(i) + v13(i) + v14(i) + + v15(i) + v16(i) + v17(i) + v18(i) + v19(i) + v20(i) + v21(i) + + v22(i) + v23(i) + v24(i) + v25(i) + v26(i) + v27(i) + v28(i) + + v29(i) + v30(i) + v31(i) + v32(i) + v33(i) + v34(i) + v35(i) + + v36(i) + v37(i) + v38(i) + v39(i) + v40(i); + } + } else { + result = m_expected_sum; + } + // This statement introduces a side effect that may help our effort to + // stop compilers from optimizing away the temporary ViewCollection object + m_side_effect = result * (ii + 1); + return result; + } +}; + +void test_view_collection_kk(int N, int num_iter, bool execute_kernel) { + ViewCollection view_collection(N); + + Kokkos::Timer view_collection_timer; + double max_value = 0.0; + // Max Reduction boilerplate code taken from slide 53 of + // kokkos-tutorials/LectureSeries/KokkosTutorial_02_ViewsAndSpaces.pdf + Kokkos::parallel_reduce( + "collection-reduction", num_iter, + KOKKOS_LAMBDA(int i, double& valueToUpdate) { + // NOTE: The following lines expose the Kokkos View overheads + ViewCollection tmp_view_collection = view_collection; + double my_value = tmp_view_collection.sum_views(i, execute_kernel); + if (my_value > valueToUpdate) valueToUpdate = my_value; + }, + Kokkos::Max(max_value)); + double view_collection_time = view_collection_timer.seconds(); + + bool success = std::fabs(max_value - N * NVIEWS) < 1.E-6; + std::cout << "View Time = " << view_collection_time << " seconds" + << std::endl; + if (success) { + std::cout << "Kokkos run:" << std::endl; + std::cout << "SUCCESS" << std::endl; + } else { + std::cout << "FAILURE" << std::endl; + } +} + +void test_view_collection_serial(int N, int num_iter, bool execute_kernel) { + ViewCollection view_collection(N); + + Kokkos::Timer view_collection_timer; + double max_value = 0.0; + // Max Reduction boilerplate code taken from slide 53 of + // kokkos-tutorials/LectureSeries/KokkosTutorial_02_ViewsAndSpaces.pdf + for (int i = 0; i < num_iter; ++i) { + // NOTE: The following lines expose the Kokkos View overheads + ViewCollection tmp_view_collection = view_collection; + double my_value = tmp_view_collection.sum_views(i, execute_kernel); + if (my_value > max_value) max_value = my_value; + } + double view_collection_time = view_collection_timer.seconds(); + + bool success = std::fabs(max_value - N * NVIEWS) < 1.E-6; + std::cout << "View Time 2 = " << view_collection_time << " seconds" + << std::endl; + if (success) { + std::cout << "Serial run:" << std::endl; + std::cout << "SUCCESS" << std::endl; + } else { + std::cout << "FAILURE" << std::endl; + } +} + +int main(int argc, char* argv[]) { + // The benchmark is only testing reference counting for views on host. +#if defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_SERIAL) || \ + defined(KOKKOS_ENABLE_THREADS) || defined(KOKKOS_ENABLE_HPX) + int N = 1; + int num_iter = 1 << 27; + bool execute_kernel = false; + + for (int i = 0; i < argc; i++) { + if ((strcmp(argv[i], "-N") == 0)) { + N = atoi(argv[++i]); + if (N < 1) { + std::cout << "Array extent must be >= 1" << std::endl; + exit(1); + } + } else if (strcmp(argv[i], "-i") == 0) { + num_iter = atoi(argv[++i]); + if (num_iter < 1) { + std::cout << "Number of iterations must be >= 1" << std::endl; + exit(1); + } + } else if (strcmp(argv[i], "-k") == 0) { + execute_kernel = true; + } else if ((strcmp(argv[i], "-h") == 0)) { + printf(" Options:\n"); + printf(" -N : Array extent\n"); + printf(" -i : Number of iterations\n"); + printf(" -k: Execute the summation kernel\n"); + printf(" -h: Print this message\n\n"); + exit(1); + } + } + + std::cout << "Array extent = " << N << std::endl; + std::cout << "Iterations = " << num_iter << std::endl; + std::cout << "Execute summation kernel = " << std::boolalpha << execute_kernel + << std::noboolalpha << std::endl; + + // Test inside a Kokkos kernel. + Kokkos::initialize(argc, argv); + { test_view_collection_kk(N, num_iter, execute_kernel); } + + // Test outside Kokkos kernel. + test_view_collection_serial(N, num_iter, execute_kernel); + + Kokkos::finalize(); +#endif + + return 0; +} diff --git a/packages/kokkos/bin/nvcc_wrapper b/packages/kokkos/bin/nvcc_wrapper index dbfef2267fec..d58645f98ad6 100755 --- a/packages/kokkos/bin/nvcc_wrapper +++ b/packages/kokkos/bin/nvcc_wrapper @@ -233,7 +233,7 @@ do cuda_args="$cuda_args $1" ;; #Handle more known nvcc args - --extended-lambda|--expt-extended-lambda|--expt-relaxed-constexpr|--Wno-deprecated-gpu-targets|-Wno-deprecated-gpu-targets|-allow-unsupported-compiler|--allow-unsupported-compiler) + --extended-lambda|--expt-extended-lambda|--expt-relaxed-constexpr|--Wno-deprecated-gpu-targets|-Wno-deprecated-gpu-targets|-allow-unsupported-compiler|--allow-unsupported-compiler|--disable-warnings) cuda_args="$cuda_args $1" ;; #Handle known nvcc args that have an argument diff --git a/packages/kokkos/cmake/Dependencies.cmake b/packages/kokkos/cmake/Dependencies.cmake index 611c089b2e3f..fb1e73b5799c 100644 --- a/packages/kokkos/cmake/Dependencies.cmake +++ b/packages/kokkos/cmake/Dependencies.cmake @@ -1,6 +1,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( LIB_OPTIONAL_TPLS Pthread CUDA HWLOC DLlib - TEST_OPTIONAL_TPLS CUSPARSE ) TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib) diff --git a/packages/kokkos/cmake/KokkosConfigCommon.cmake.in b/packages/kokkos/cmake/KokkosConfigCommon.cmake.in index 8d5ef0de42f9..d3ac39ffa31a 100644 --- a/packages/kokkos/cmake/KokkosConfigCommon.cmake.in +++ b/packages/kokkos/cmake/KokkosConfigCommon.cmake.in @@ -225,8 +225,13 @@ FUNCTION(kokkos_compilation) # if built w/o CUDA support, we want to basically make this a no-op SET(_Kokkos_ENABLE_CUDA @Kokkos_ENABLE_CUDA@) + + IF(CMAKE_VERSION VERSION_GREATER_EQUAL 3.17) + SET(MAYBE_CURRENT_INSTALLATION_ROOT "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../../..") + ENDIF() + # search relative first and then absolute - SET(_HINTS "${CMAKE_CURRENT_LIST_DIR}/../.." "@CMAKE_INSTALL_PREFIX@") + SET(_HINTS "${MAYBE_CURRENT_INSTALLATION_ROOT}" "@CMAKE_INSTALL_PREFIX@") # find kokkos_launch_compiler FIND_PROGRAM(Kokkos_COMPILE_LAUNCHER diff --git a/packages/kokkos/cmake/KokkosCore_config.h.in b/packages/kokkos/cmake/KokkosCore_config.h.in index 3ab39cd6abf0..7997aa3707c6 100644 --- a/packages/kokkos/cmake/KokkosCore_config.h.in +++ b/packages/kokkos/cmake/KokkosCore_config.h.in @@ -52,6 +52,8 @@ #cmakedefine KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION // deprecated #cmakedefine KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION #cmakedefine KOKKOS_ENABLE_IMPL_MDSPAN +#cmakedefine KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY +#cmakedefine KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND #cmakedefine KOKKOS_ENABLE_ATOMICS_BYPASS /* TPL Settings */ @@ -65,6 +67,7 @@ #cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX #cmakedefine KOKKOS_ARCH_ARMV81 #cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX2 +#cmakedefine KOKKOS_ARCH_ARMV9_GRACE #cmakedefine KOKKOS_ARCH_A64FX #cmakedefine KOKKOS_ARCH_AVX #cmakedefine KOKKOS_ARCH_AVX2 diff --git a/packages/kokkos/cmake/Modules/FindTPLCUDA.cmake b/packages/kokkos/cmake/Modules/FindTPLCUDA.cmake index 5a62c530fce6..445f4e93a592 100644 --- a/packages/kokkos/cmake/Modules/FindTPLCUDA.cmake +++ b/packages/kokkos/cmake/Modules/FindTPLCUDA.cmake @@ -7,37 +7,38 @@ IF (NOT CUDAToolkit_ROOT) ENDIF() ENDIF() -# FIXME CMake 3.28.4 creates more targets than we export -IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0" AND CMAKE_VERSION VERSION_LESS "3.28.4") - find_package(CUDAToolkit) -ELSE() - include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake) -ENDIF() - - -IF (TARGET CUDA::cudart) - SET(FOUND_CUDART TRUE) - KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cudart) -ELSE() - SET(FOUND_CUDART FALSE) +IF(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC AND CMAKE_VERSION VERSION_LESS "3.20.1") + MESSAGE(FATAL_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1") ENDIF() -IF (TARGET CUDA::cuda_driver) - SET(FOUND_CUDA_DRIVER TRUE) - KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cuda_driver) -ELSE() - SET(FOUND_CUDA_DRIVER FALSE) -ENDIF() - -include(FindPackageHandleStandardArgs) -IF(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC) - SET(KOKKOS_CUDA_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1") -ELSE() - SET(KOKKOS_CUDA_ERROR DEFAULT_MSG) -ENDIF() -FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUDA ${KOKKOS_CUDA_ERROR} FOUND_CUDART FOUND_CUDA_DRIVER) -IF (FOUND_CUDA_DRIVER AND FOUND_CUDART) +IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0") + find_package(CUDAToolkit REQUIRED) KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart ) + KOKKOS_EXPORT_CMAKE_TPL(CUDAToolkit REQUIRED) +ELSE() + include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake) + + IF (TARGET CUDA::cudart) + SET(FOUND_CUDART TRUE) + KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cudart) + ELSE() + SET(FOUND_CUDART FALSE) + ENDIF() + + IF (TARGET CUDA::cuda_driver) + SET(FOUND_CUDA_DRIVER TRUE) + KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cuda_driver) + ELSE() + SET(FOUND_CUDA_DRIVER FALSE) + ENDIF() + + include(FindPackageHandleStandardArgs) + FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUDA ${DEFAULT_MSG} FOUND_CUDART FOUND_CUDA_DRIVER) + IF (FOUND_CUDA_DRIVER AND FOUND_CUDART) + KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE + LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart + ) + ENDIF() ENDIF() diff --git a/packages/kokkos/cmake/deps/CUDA.cmake b/packages/kokkos/cmake/deps/CUDA.cmake index 68bf5b3d5798..5b6afd61512d 100644 --- a/packages/kokkos/cmake/deps/CUDA.cmake +++ b/packages/kokkos/cmake/deps/CUDA.cmake @@ -35,7 +35,6 @@ IF(NOT _CUDA_FAILURE) GLOBAL_SET(TPL_CUDA_LIBRARY_DIRS) GLOBAL_SET(TPL_CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE}) GLOBAL_SET(TPL_CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_cufft_LIBRARY}) - KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) ELSE() SET(TPL_ENABLE_CUDA OFF) ENDIF() diff --git a/packages/kokkos/cmake/deps/CUSPARSE.cmake b/packages/kokkos/cmake/deps/CUSPARSE.cmake deleted file mode 100644 index b016971ab915..000000000000 --- a/packages/kokkos/cmake/deps/CUSPARSE.cmake +++ /dev/null @@ -1,26 +0,0 @@ -#@HEADER -# ************************************************************************ -# -# Kokkos v. 4.0 -# Copyright (2022) National Technology & Engineering -# Solutions of Sandia, LLC (NTESS). -# -# Under the terms of Contract DE-NA0003525 with NTESS, -# the U.S. Government retains certain rights in this software. -# -# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -# -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# ************************************************************************ -# @HEADER - -#include(${TRIBITS_DEPS_DIR}/CUDA.cmake) - -#IF (TPL_ENABLE_CUDA) -# GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) -# GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) -# GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) -# KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) -#ENDIF() - diff --git a/packages/kokkos/cmake/fake_tribits.cmake b/packages/kokkos/cmake/fake_tribits.cmake index 4c5331ec793b..a18d2ac518a6 100644 --- a/packages/kokkos/cmake/fake_tribits.cmake +++ b/packages/kokkos/cmake/fake_tribits.cmake @@ -118,14 +118,6 @@ FUNCTION(KOKKOS_ADD_TEST) ENDIF() ENDFUNCTION() -FUNCTION(KOKKOS_ADD_ADVANCED_TEST) - if (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_ADVANCED_TEST(${ARGN}) - else() - # TODO Write this - endif() -ENDFUNCTION() - MACRO(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME) ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME}) TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES}) diff --git a/packages/kokkos/cmake/kokkos_arch.cmake b/packages/kokkos/cmake/kokkos_arch.cmake index 34e9f05986fc..a581d9f94571 100644 --- a/packages/kokkos/cmake/kokkos_arch.cmake +++ b/packages/kokkos/cmake/kokkos_arch.cmake @@ -28,6 +28,7 @@ KOKKOS_CHECK_DEPRECATED_OPTIONS( #------------------------------------------------------------------------------- SET(KOKKOS_ARCH_LIST) +include(CheckCXXCompilerFlag) KOKKOS_DEPRECATED_LIST(ARCH ARCH) @@ -49,6 +50,7 @@ DECLARE_AND_CHECK_HOST_ARCH(ARMV81 "ARMv8.1 Compatible CPU") DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX "ARMv8 Cavium ThunderX CPU") DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX2 "ARMv8 Cavium ThunderX2 CPU") DECLARE_AND_CHECK_HOST_ARCH(A64FX "ARMv8.2 with SVE Support") +DECLARE_AND_CHECK_HOST_ARCH(ARMV9_GRACE "ARMv9 NVIDIA Grace CPU") DECLARE_AND_CHECK_HOST_ARCH(SNB "Intel Sandy/Ivy Bridge CPUs") DECLARE_AND_CHECK_HOST_ARCH(HSW "Intel Haswell CPUs") DECLARE_AND_CHECK_HOST_ARCH(BDW "Intel Broadwell Xeon E-class CPUs") @@ -189,12 +191,6 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang) ELSEIF(CUDAToolkit_BIN_DIR) GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS --cuda-path=${CUDAToolkit_BIN_DIR}/..) ENDIF() -ELSEIF (KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) - SET(CUDA_ARCH_FLAG "-gpu") - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -cuda) - IF (KOKKOS_ENABLE_CUDA) # FIXME ideally unreachable when CUDA not enabled - GLOBAL_APPEND(KOKKOS_LINK_OPTIONS -cuda) - ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) SET(CUDA_ARCH_FLAG "-arch") ENDIF() @@ -209,6 +205,11 @@ ENDIF() #------------------------------- KOKKOS_HIP_OPTIONS --------------------------- +KOKKOS_OPTION(IMPL_AMDGPU_FLAGS "" STRING "Set compiler flags for AMD GPUs") +KOKKOS_OPTION(IMPL_AMDGPU_LINK "" STRING "Set linker flags for AMD GPUs") +MARK_AS_ADVANCED(Kokkos_IMPL_AMDGPU_FLAGS) +MARK_AS_ADVANCED(Kokkos_IMPL_AMDGPU_LINK) + #clear anything that might be in the cache GLOBAL_SET(KOKKOS_AMDGPU_OPTIONS) IF(KOKKOS_ENABLE_HIP) @@ -301,6 +302,20 @@ IF (KOKKOS_ARCH_A64FX) ) ENDIF() +IF (KOKKOS_ARCH_ARMV9_GRACE) + SET(KOKKOS_ARCH_ARM_NEON ON) + check_cxx_compiler_flag("-mcpu=neoverse-n2" COMPILER_SUPPORTS_NEOVERSE_N2) + check_cxx_compiler_flag("-msve-vector-bits=128" COMPILER_SUPPORTS_SVE_VECTOR_BITS) + IF (COMPILER_SUPPORTS_NEOVERSE_N2 AND COMPILER_SUPPORTS_SVE_VECTOR_BITS) + COMPILER_SPECIFIC_FLAGS( + COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID + DEFAULT -mcpu=neoverse-n2 -msve-vector-bits=128 + ) + ELSE() + MESSAGE(WARNING "Compiler does not support ARMv9 Grace architecture") + ENDIF() +ENDIF() + IF (KOKKOS_ARCH_ZEN) COMPILER_SPECIFIC_FLAGS( COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID @@ -535,17 +550,17 @@ IF (KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC) SET(KOKKOS_ARCH_AVX512XEON OFF) ENDIF() +# FIXME_NVCC nvcc doesn't seem to support Arm Neon. +IF(KOKKOS_ARCH_ARM_NEON AND KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + UNSET(KOKKOS_ARCH_ARM_NEON) +ENDIF() + IF (NOT KOKKOS_COMPILE_LANGUAGE STREQUAL CUDA) IF (KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) COMPILER_SPECIFIC_FLAGS( Clang -fcuda-rdc NVIDIA --relocatable-device-code=true - NVHPC -gpu=rdc ) - ELSEIF(KOKKOS_ENABLE_CUDA) - COMPILER_SPECIFIC_FLAGS( - NVHPC -gpu=nordc - ) ENDIF() ENDIF() @@ -571,7 +586,7 @@ IF (KOKKOS_ENABLE_HIP) COMPILER_SPECIFIC_FLAGS( DEFAULT -fgpu-rdc ) - IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC) + IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC AND NOT KOKKOS_IMPL_AMDGPU_FLAGS) COMPILER_SPECIFIC_LINK_OPTIONS( DEFAULT --hip-link ) @@ -654,15 +669,9 @@ FUNCTION(CHECK_CUDA_ARCH ARCH FLAG) IF(KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE) SET(CMAKE_CUDA_ARCHITECTURES ${KOKKOS_CUDA_ARCHITECTURES} PARENT_SCOPE) ELSE() - IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) - STRING(REPLACE "sm_" "cc" NVHPC_CUDA_ARCH ${FLAG}) - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${NVHPC_CUDA_ARCH}") - GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${NVHPC_CUDA_ARCH}") - ELSE() - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") - IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") - ENDIF() + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") + IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") ENDIF() ENDIF() ENDIF() @@ -704,14 +713,16 @@ FUNCTION(CHECK_AMDGPU_ARCH ARCH FLAG) MESSAGE(WARNING "Given AMD GPU architecture ${ARCH}, but Kokkos_ENABLE_HIP, Kokkos_ENABLE_SYCL, Kokkos_ENABLE_OPENACC, and Kokkos_ENABLE_OPENMPTARGET are OFF. Option will be ignored.") UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE) ELSE() - IF(KOKKOS_ENABLE_HIP) - SET(KOKKOS_HIP_ARCHITECTURES ${FLAG} PARENT_SCOPE) - ENDIF() - SET(KOKKOS_AMDGPU_ARCH_FLAG ${FLAG} PARENT_SCOPE) - GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}") - IF(KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE) - GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}") - ENDIF() + IF(KOKKOS_ENABLE_HIP) + SET(KOKKOS_HIP_ARCHITECTURES ${FLAG} PARENT_SCOPE) + ENDIF() + IF(NOT KOKKOS_IMPL_AMDGPU_FLAGS) + SET(KOKKOS_AMDGPU_ARCH_FLAG ${FLAG} PARENT_SCOPE) + GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}") + ENDIF() + IF(KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE) + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}") + ENDIF() ENDIF() ENDIF() ENDFUNCTION() @@ -724,6 +735,15 @@ FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS) CHECK_AMDGPU_ARCH(${ARCH} ${FLAG}) ENDFOREACH() +IF(KOKKOS_IMPL_AMDGPU_FLAGS) + IF (NOT AMDGPU_ARCH_ALREADY_SPECIFIED) + MESSAGE(FATAL_ERROR "When IMPL_AMDGPU_FLAGS is set the architecture autodectection is disabled. " + "Please explicitly set the GPU architecture.") + ENDIF() + GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${KOKKOS_IMPL_AMDGPU_FLAGS}") + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${KOKKOS_IMPL_AMDGPU_LINK}") +ENDIF() + MACRO(SET_AND_CHECK_AMD_ARCH ARCH FLAG) KOKKOS_SET_OPTION(ARCH_${ARCH} ON) CHECK_AMDGPU_ARCH(${ARCH} ${FLAG}) @@ -984,7 +1004,7 @@ IF (KOKKOS_ARCH_HOPPER90) ENDIF() #HIP detection of gpu arch -IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED) +IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED AND NOT KOKKOS_IMPL_AMDGPU_FLAGS) FIND_PROGRAM(ROCM_ENUMERATOR rocm_agent_enumerator) IF(NOT ROCM_ENUMERATOR) MESSAGE(FATAL_ERROR "Autodetection of AMD GPU architecture not possible as " diff --git a/packages/kokkos/cmake/kokkos_compiler_id.cmake b/packages/kokkos/cmake/kokkos_compiler_id.cmake index 9135ca2b41c0..e8bfadb64ebe 100644 --- a/packages/kokkos/cmake/kokkos_compiler_id.cmake +++ b/packages/kokkos/cmake/kokkos_compiler_id.cmake @@ -42,12 +42,8 @@ IF(Kokkos_ENABLE_CUDA) # If launcher was found and nvcc_wrapper was not specified as # compiler and `CMAKE_CXX_COMPILIER_LAUNCHER` is not set, set to use launcher. # Will ensure CMAKE_CXX_COMPILER is replaced by nvcc_wrapper - IF(Kokkos_COMPILE_LAUNCHER AND NOT INTERNAL_HAVE_COMPILER_NVCC AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang - AND NOT (Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)) + IF(Kokkos_COMPILE_LAUNCHER AND NOT INTERNAL_HAVE_COMPILER_NVCC AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang) IF(CMAKE_CXX_COMPILER_LAUNCHER) - IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) - MESSAGE(STATUS "Using nvc++ as device compiler requires Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER=ON!") - ENDIF() MESSAGE(FATAL_ERROR "Cannot use CMAKE_CXX_COMPILER_LAUNCHER if the CMAKE_CXX_COMPILER is not able to compile CUDA code, i.e. nvcc_wrapper or clang++!") ENDIF() # the first argument to launcher is always the C++ compiler defined by cmake @@ -149,56 +145,85 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Fujitsu) ENDIF() # Enforce the minimum compilers supported by Kokkos. -SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CPU) 8.0.0 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) 10.0.0 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) 15.0.0 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 8.2.0 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 19.0.5 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) 2021.1.1 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(SYCL) 2023.0.0 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 11.0.0 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n HIPCC 5.2.0 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVHPC/PGI 22.3 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n MSVC 19.29 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n XL/XLClang not supported") +IF(NOT CMAKE_CXX_STANDARD) + SET(CMAKE_CXX_STANDARD 17) +ENDIF() +IF(CMAKE_CXX_STANDARD EQUAL 17) + SET(KOKKOS_CLANG_CPU_MINIMUM 8.0.0) + SET(KOKKOS_CLANG_CUDA_MINIMUM 10.0.0) + SET(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0) + SET(KOKKOS_GCC_MINIMUM 8.2.0) + SET(KOKKOS_INTEL_MINIMUM 19.0.5) + SET(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2021.1.1) + SET(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0) + SET(KOKKOS_NVCC_MINIMUM 11.0.0) + SET(KOKKOS_HIPCC_MINIMUM 5.2.0) + SET(KOKKOS_NVHPC_MINIMUM 22.3) + SET(KOKKOS_MSVC_MINIMUM 19.29) +ELSE() + SET(KOKKOS_CLANG_CPU_MINIMUM 14.0.0) + SET(KOKKOS_CLANG_CUDA_MINIMUM 14.0.0) + SET(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0) + SET(KOKKOS_GCC_MINIMUM 10.1.0) + SET(KOKKOS_INTEL_MINIMUM "not supported") + SET(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2022.0.0) + SET(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0) + SET(KOKKOS_NVCC_MINIMUM 12.0.0) + SET(KOKKOS_HIPCC_MINIMUM 5.2.0) + SET(KOKKOS_NVHPC_MINIMUM 22.3) + SET(KOKKOS_MSVC_MINIMUM 19.30) +ENDIF() + +SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos for C++${CMAKE_CXX_STANDARD}. Required minimum compiler versions:") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CPU) ${KOKKOS_CLANG_CPU_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) ${KOKKOS_CLANG_CUDA_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) ${KOKKOS_CLANG_OPENMPTARGET_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC ${KOKKOS_GCC_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel ${KOKKOS_INTEL_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) ${KOKKOS_INTEL_LLVM_CPU_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(SYCL) ${KOKKOS_INTEL_LLVM_SYCL_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC ${KOKKOS_NVCC_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n HIPCC ${KOKKOS_HIPCC_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVHPC/PGI ${KOKKOS_NVHPC_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n MSVC ${KOKKOS_MSVC_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n XL/XLClang not supported") SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\nCompiler: ${KOKKOS_CXX_COMPILER_ID} ${KOKKOS_CXX_COMPILER_VERSION}\n") IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND NOT Kokkos_ENABLE_CUDA) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 8.0.0) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_CLANG_CPU_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND Kokkos_ENABLE_CUDA) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 10.0.0) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_CLANG_CUDA_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL GNU) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 8.2.0) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_GCC_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 19.0.5) + IF((NOT CMAKE_CXX_STANDARD EQUAL 17) OR (KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_MINIMUM})) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND NOT Kokkos_ENABLE_SYCL) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 2021.1.1) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_LLVM_CPU_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND Kokkos_ENABLE_SYCL) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 2023.0.0) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_LLVM_SYCL_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 11.0.0) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_NVCC_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Kokkos turns off CXX extensions" FORCE) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 5.2.0) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_HIPCC_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 22.3) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_NVHPC_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() # Treat PGI internally as NVHPC to simplify handling both compilers. @@ -206,13 +231,13 @@ ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NV # backward-compatible to pgc++. SET(KOKKOS_CXX_COMPILER_ID NVHPC CACHE STRING INTERNAL FORCE) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC") - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 19.29) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_MSVC_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL XL OR KOKKOS_CXX_COMPILER_ID STREQUAL XLClang) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND Kokkos_ENABLE_OPENMPTARGET) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 15.0.0) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS KOKKOS_CLANG_OPENMPTARGET_MINIMUM) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ENDIF() diff --git a/packages/kokkos/cmake/kokkos_enable_options.cmake b/packages/kokkos/cmake/kokkos_enable_options.cmake index 32788e7aa0f6..b900c4a232ea 100644 --- a/packages/kokkos/cmake/kokkos_enable_options.cmake +++ b/packages/kokkos/cmake/kokkos_enable_options.cmake @@ -75,8 +75,12 @@ KOKKOS_ENABLE_OPTION(IMPL_HIP_UNIFIED_MEMORY OFF "Whether to leverage unified me # This option will go away eventually, but allows fallback to old implementation when needed. KOKKOS_ENABLE_OPTION(DESUL_ATOMICS_EXTERNAL OFF "Whether to use an external desul installation") KOKKOS_ENABLE_OPTION(ATOMICS_BYPASS OFF "**NOT RECOMMENDED** Whether to make atomics non-atomic for non-threaded MPI-only use cases") +KOKKOS_ENABLE_OPTION(IMPL_REF_COUNT_BRANCH_UNLIKELY ON "Whether to use the C++20 `[[unlikely]]` attribute in the view reference counting") +mark_as_advanced(Kokkos_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY) +KOKKOS_ENABLE_OPTION(IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND OFF "Whether to enable a workaround for invalid use of View of Views that causes program hang on destruction.") +mark_as_advanced(Kokkos_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND) -KOKKOS_ENABLE_OPTION(IMPL_MDSPAN OFF "Whether to enable experimental mdspan support") +KOKKOS_ENABLE_OPTION(IMPL_MDSPAN ON "Whether to enable experimental mdspan support") KOKKOS_ENABLE_OPTION(MDSPAN_EXTERNAL OFF BOOL "Whether to use an external version of mdspan") KOKKOS_ENABLE_OPTION(IMPL_SKIP_COMPILER_MDSPAN ON BOOL "Whether to use an internal version of mdspan even if the compiler supports mdspan") mark_as_advanced(Kokkos_ENABLE_IMPL_MDSPAN) diff --git a/packages/kokkos/cmake/kokkos_functions.cmake b/packages/kokkos/cmake/kokkos_functions.cmake index 9dab1ca00ea4..d1f1e0d7a785 100644 --- a/packages/kokkos/cmake/kokkos_functions.cmake +++ b/packages/kokkos/cmake/kokkos_functions.cmake @@ -709,7 +709,12 @@ MACRO(kokkos_find_imported NAME) ENDIF() IF (NOT TPL_LIBRARY_SUFFIXES) - SET(TPL_LIBRARY_SUFFIXES lib lib64) + SET(TPL_LIBRARY_SUFFIXES lib) + IF(KOKKOS_IMPL_32BIT) + LIST(APPEND TPL_LIBRARY_SUFFIXES lib32) + ELSE() + LIST(APPEND TPL_LIBRARY_SUFFIXES lib64) + ENDIF() ENDIF() SET(${NAME}_INCLUDE_DIRS) diff --git a/packages/kokkos/cmake/kokkos_test_cxx_std.cmake b/packages/kokkos/cmake/kokkos_test_cxx_std.cmake index b075a3e36b56..5b45674e0570 100644 --- a/packages/kokkos/cmake/kokkos_test_cxx_std.cmake +++ b/packages/kokkos/cmake/kokkos_test_cxx_std.cmake @@ -124,12 +124,8 @@ IF(KOKKOS_ENABLE_CUDA) ELSEIF(CMAKE_CXX_EXTENSIONS) MESSAGE(FATAL_ERROR "Compiling CUDA code with clang doesn't support C++ extensions. Set -DCMAKE_CXX_EXTENSIONS=OFF") ENDIF() - ELSEIF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND NOT (Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)) - IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) - MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. To allow nvc++ as Cuda compiler, Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER=ON must be set!") - ELSE() - MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang or NVC++ or use kokkos_launch_compiler, but compiler ID was ${KOKKOS_CXX_COMPILER_ID}") - ENDIF() + ELSEIF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang or use kokkos_launch_compiler, but compiler ID was ${KOKKOS_CXX_COMPILER_ID}") ENDIF() ENDIF() diff --git a/packages/kokkos/cmake/kokkos_tpls.cmake b/packages/kokkos/cmake/kokkos_tpls.cmake index 6ef3b79bde25..cda9e0d6004a 100644 --- a/packages/kokkos/cmake/kokkos_tpls.cmake +++ b/packages/kokkos/cmake/kokkos_tpls.cmake @@ -103,13 +103,19 @@ if (Kokkos_ENABLE_IMPL_MDSPAN AND Kokkos_ENABLE_MDSPAN_EXTERNAL) endif() IF (Kokkos_ENABLE_OPENMP) - find_package(OpenMP REQUIRED) + find_package(OpenMP REQUIRED COMPONENTS CXX) # FIXME_TRILINOS Trilinos doesn't allow for Kokkos to use find_dependency # so we just append the flags here instead of linking with the OpenMP target. IF(KOKKOS_HAS_TRILINOS) COMPILER_SPECIFIC_FLAGS(DEFAULT ${OpenMP_CXX_FLAGS}) ELSE() - KOKKOS_EXPORT_CMAKE_TPL(OpenMP REQUIRED) + KOKKOS_EXPORT_CMAKE_TPL(OpenMP REQUIRED COMPONENTS CXX) + ENDIF() + IF(Kokkos_ENABLE_HIP AND KOKKOS_COMPILE_LANGUAGE STREQUAL HIP) + GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS ${OpenMP_CXX_FLAGS}) + ENDIF() + IF(Kokkos_ENABLE_CUDA AND KOKKOS_COMPILE_LANGUAGE STREQUAL CUDA) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -Xcompiler ${OpenMP_CXX_FLAGS}) ENDIF() ENDIF() diff --git a/packages/kokkos/cmake/kokkos_tribits.cmake b/packages/kokkos/cmake/kokkos_tribits.cmake index 060a7a8472c7..6da543a2c85b 100644 --- a/packages/kokkos/cmake/kokkos_tribits.cmake +++ b/packages/kokkos/cmake/kokkos_tribits.cmake @@ -160,6 +160,12 @@ FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME) ) ENDIF() ENDIF() + # We noticed problems with -fvisibility=hidden for inline static variables + # if Kokkos was built as shared library. + IF(BUILD_SHARED_LIBS) + SET_PROPERTY(TARGET ${PACKAGE_NAME}_${ROOT_NAME} PROPERTY VISIBILITY_INLINES_HIDDEN ON) + SET_PROPERTY(TARGET ${PACKAGE_NAME}_${ROOT_NAME} PROPERTY CXX_VISIBILITY_PRESET hidden) + ENDIF() ENDFUNCTION() FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME) @@ -241,34 +247,6 @@ MACRO(KOKKOS_CONFIGURE_CORE) KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_FwdBackend.hpp "KOKKOS_FWD" "fwd/Kokkos_Fwd" "${KOKKOS_ENABLED_DEVICES}") KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_SetupBackend.hpp "KOKKOS_SETUP" "setup/Kokkos_Setup" "${DEVICE_SETUP_LIST}") KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_DeclareBackend.hpp "KOKKOS_DECLARE" "decl/Kokkos_Declare" "${KOKKOS_ENABLED_DEVICES}") - SET(_DEFAULT_HOST_MEMSPACE "::Kokkos::HostSpace") - KOKKOS_OPTION(DEFAULT_DEVICE_MEMORY_SPACE "" STRING "Override default device memory space") - KOKKOS_OPTION(DEFAULT_HOST_MEMORY_SPACE "" STRING "Override default host memory space") - KOKKOS_OPTION(DEFAULT_DEVICE_EXECUTION_SPACE "" STRING "Override default device execution space") - KOKKOS_OPTION(DEFAULT_HOST_PARALLEL_EXECUTION_SPACE "" STRING "Override default host parallel execution space") - IF (NOT Kokkos_DEFAULT_DEVICE_EXECUTION_SPACE STREQUAL "") - SET(_DEVICE_PARALLEL ${Kokkos_DEFAULT_DEVICE_EXECUTION_SPACE}) - MESSAGE(STATUS "Override default device execution space: ${_DEVICE_PARALLEL}") - SET(KOKKOS_DEVICE_SPACE_ACTIVE ON) - ELSE() - IF (_DEVICE_PARALLEL STREQUAL "NoTypeDefined") - SET(KOKKOS_DEVICE_SPACE_ACTIVE OFF) - ELSE() - SET(KOKKOS_DEVICE_SPACE_ACTIVE ON) - ENDIF() - ENDIF() - IF (NOT Kokkos_DEFAULT_HOST_PARALLEL_EXECUTION_SPACE STREQUAL "") - SET(_HOST_PARALLEL ${Kokkos_DEFAULT_HOST_PARALLEL_EXECUTION_SPACE}) - MESSAGE(STATUS "Override default host parallel execution space: ${_HOST_PARALLEL}") - SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE ON) - ELSE() - IF (_HOST_PARALLEL STREQUAL "NoTypeDefined") - SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE OFF) - ELSE() - SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE ON) - ENDIF() - ENDIF() - #We are ready to configure the header CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY) ENDMACRO() @@ -484,15 +462,10 @@ ENDFUNCTION() FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET) - IF(KOKKOS_HAS_TRILINOS) - #ignore the target, tribits doesn't do anything directly with targets - TRIBITS_INCLUDE_DIRECTORIES(${ARGN}) - ELSE() #append to a list for later - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) - FOREACH(DIR ${ARGN}) - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $) - ENDFOREACH() - ENDIF() + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + FOREACH(DIR ${ARGN}) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $) + ENDFOREACH() ENDFUNCTION() FUNCTION(KOKKOS_LIB_COMPILE_OPTIONS TARGET) diff --git a/packages/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake b/packages/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake deleted file mode 100644 index 4709f8002b11..000000000000 --- a/packages/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake +++ /dev/null @@ -1,26 +0,0 @@ -#@HEADER -# ************************************************************************ -# -# Kokkos v. 4.0 -# Copyright (2022) National Technology & Engineering -# Solutions of Sandia, LLC (NTESS). -# -# Under the terms of Contract DE-NA0003525 with NTESS, -# the U.S. Government retains certain rights in this software. -# -# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -# -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -#@HEADER - -# Check for CUDA support - -IF (NOT TPL_ENABLE_CUDA) - MESSAGE(FATAL_ERROR "\nCUSPARSE requires CUDA") -ELSE() - GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) - GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) - GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) -ENDIF() - diff --git a/packages/kokkos/containers/src/Kokkos_DualView.hpp b/packages/kokkos/containers/src/Kokkos_DualView.hpp index e821570a8d5f..a37a2bdcebd9 100644 --- a/packages/kokkos/containers/src/Kokkos_DualView.hpp +++ b/packages/kokkos/containers/src/Kokkos_DualView.hpp @@ -944,13 +944,13 @@ class DualView : public ViewTraits { if (sizeMismatch) { ::Kokkos::realloc(arg_prop, d_view, n0, n1, n2, n3, n4, n5, n6, n7); - if (alloc_prop_input::initialize) { + if constexpr (alloc_prop_input::initialize) { h_view = create_mirror_view(typename t_host::memory_space(), d_view); } else { h_view = create_mirror_view(Kokkos::WithoutInitializing, typename t_host::memory_space(), d_view); } - } else if (alloc_prop_input::initialize) { + } else if constexpr (alloc_prop_input::initialize) { if constexpr (alloc_prop_input::has_execution_space) { const auto& exec_space = Impl::get_property(arg_prop); @@ -1038,12 +1038,10 @@ class DualView : public ViewTraits { /* Resize on Device */ if (sizeMismatch) { ::Kokkos::resize(properties, d_view, n0, n1, n2, n3, n4, n5, n6, n7); - if (alloc_prop_input::initialize) { - h_view = create_mirror_view(typename t_host::memory_space(), d_view); - } else { - h_view = create_mirror_view(Kokkos::WithoutInitializing, - typename t_host::memory_space(), d_view); - } + // this part of the lambda was relocated in a method as it contains a + // `if constexpr`. In some cases, both branches were evaluated + // leading to a compile error + resync_host(properties); /* Mark Device copy as modified */ ++modified_flags(1); @@ -1054,13 +1052,10 @@ class DualView : public ViewTraits { /* Resize on Host */ if (sizeMismatch) { ::Kokkos::resize(properties, h_view, n0, n1, n2, n3, n4, n5, n6, n7); - if (alloc_prop_input::initialize) { - d_view = create_mirror_view(typename t_dev::memory_space(), h_view); - - } else { - d_view = create_mirror_view(Kokkos::WithoutInitializing, - typename t_dev::memory_space(), h_view); - } + // this part of the lambda was relocated in a method as it contains a + // `if constexpr`. In some cases, both branches were evaluated + // leading to a compile error + resync_device(properties); /* Mark Host copy as modified */ ++modified_flags(0); @@ -1099,6 +1094,39 @@ class DualView : public ViewTraits { } } + private: + // resync host mirror from device + // this code was relocated from a lambda as it contains a `if constexpr`. + // In some cases, both branches were evaluated, leading to a compile error + template + inline void resync_host(Impl::ViewCtorProp const&) { + using alloc_prop_input = Impl::ViewCtorProp; + + if constexpr (alloc_prop_input::initialize) { + h_view = create_mirror_view(typename t_host::memory_space(), d_view); + } else { + h_view = create_mirror_view(Kokkos::WithoutInitializing, + typename t_host::memory_space(), d_view); + } + } + + // resync device mirror from host + // this code was relocated from a lambda as it contains a `if constexpr` + // In some cases, both branches were evaluated leading to a compile error + template + inline void resync_device(Impl::ViewCtorProp const&) { + using alloc_prop_input = Impl::ViewCtorProp; + + if constexpr (alloc_prop_input::initialize) { + d_view = create_mirror_view(typename t_dev::memory_space(), h_view); + + } else { + d_view = create_mirror_view(Kokkos::WithoutInitializing, + typename t_dev::memory_space(), h_view); + } + } + + public: void resize(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, diff --git a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp index 5fa59f1b7cdf..5f7fcaf69e7f 100644 --- a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -1657,8 +1657,7 @@ KOKKOS_FUNCTION auto as_view_of_rank_n( if constexpr (std::is_same_v || std::is_same_v || - std::is_same_v || - is_layouttiled::value) { + std::is_same_v) { for (int i = N; i < 7; ++i) layout.dimension[i] = KOKKOS_IMPL_CTOR_DEFAULT_ARG; } @@ -1933,254 +1932,155 @@ struct MirrorDRVType { } // namespace Impl namespace Impl { -template -inline typename DynRankView::HostMirror create_mirror( - const DynRankView& src, - const Impl::ViewCtorProp& arg_prop, - std::enable_if_t::has_memory_space>* = - nullptr) { - using src_type = DynRankView; - using dst_type = typename src_type::HostMirror; - using alloc_prop_input = Impl::ViewCtorProp; - - static_assert( - !alloc_prop_input::has_label, - "The view constructor arguments passed to Kokkos::create_mirror " - "must not include a label!"); - static_assert( - !alloc_prop_input::has_pointer, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not include a pointer!"); - static_assert( - !alloc_prop_input::allow_padding, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not explicitly allow padding!"); +// create a mirror +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc +template +inline auto create_mirror(const DynRankView& src, + const Impl::ViewCtorProp& arg_prop) { + check_view_ctor_args_create_mirror(); auto prop_copy = Impl::with_properties_if_unset( arg_prop, std::string(src.label()).append("_mirror")); - return dst_type(prop_copy, Impl::reconstructLayout(src.layout(), src.rank())); -} - -template -inline auto create_mirror( - const DynRankView& src, - const Impl::ViewCtorProp& arg_prop, - std::enable_if_t::has_memory_space>* = - nullptr) { - using dst_type = typename Impl::MirrorDRVType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::view_type; - - using alloc_prop_input = Impl::ViewCtorProp; - - static_assert( - !alloc_prop_input::has_label, - "The view constructor arguments passed to Kokkos::create_mirror " - "must not include a label!"); - static_assert( - !alloc_prop_input::has_pointer, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not include a pointer!"); - static_assert( - !alloc_prop_input::allow_padding, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not explicitly allow padding!"); + if constexpr (Impl::ViewCtorProp::has_memory_space) { + using dst_type = typename Impl::MirrorDRVType< + typename Impl::ViewCtorProp::memory_space, T, + P...>::view_type; - auto prop_copy = Impl::with_properties_if_unset( - arg_prop, std::string(src.label()).append("_mirror")); + return dst_type(prop_copy, + Impl::reconstructLayout(src.layout(), src.rank())); + } else { + using src_type = DynRankView; + using dst_type = typename src_type::HostMirror; - return dst_type(prop_copy, Impl::reconstructLayout(src.layout(), src.rank())); + return dst_type(prop_copy, + Impl::reconstructLayout(src.layout(), src.rank())); + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } } // namespace Impl -// Create a mirror in host space -template -inline typename DynRankView::HostMirror create_mirror( - const DynRankView& src, - std::enable_if_t::specialize, - void>::value>* = nullptr) { - return Impl::create_mirror(src, Kokkos::Impl::ViewCtorProp<>{}); +// public interface +template ::specialize>>> +inline auto create_mirror(const DynRankView& src) { + return Impl::create_mirror(src, Kokkos::view_alloc()); } -template -inline typename DynRankView::HostMirror create_mirror( - Kokkos::Impl::WithoutInitializing_t wi, const DynRankView& src, - std::enable_if_t::specialize, - void>::value>* = nullptr) { +// public interface that accepts a without initializing flag +template ::specialize>>> +inline auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi, + const DynRankView& src) { return Impl::create_mirror(src, Kokkos::view_alloc(wi)); } -template -inline typename DynRankView::HostMirror create_mirror( - const Impl::ViewCtorProp& arg_prop, - const DynRankView& src, - std::enable_if_t< - std::is_void::specialize>::value && - !Impl::ViewCtorProp::has_memory_space>* = nullptr) { - return Impl::create_mirror(src, arg_prop); -} - -// Create a mirror in a new space +// public interface that accepts a space template ::value && - std::is_void::specialize>::value>> -typename Impl::MirrorDRVType::view_type create_mirror( - const Space&, const Kokkos::DynRankView& src) { + std::is_void_v::specialize>>> +auto create_mirror(const Space&, const Kokkos::DynRankView& src) { return Impl::create_mirror( src, Kokkos::view_alloc(typename Space::memory_space{})); } -template -typename Impl::MirrorDRVType::view_type create_mirror( - Kokkos::Impl::WithoutInitializing_t wi, const Space&, - const Kokkos::DynRankView& src, - std::enable_if_t::specialize, - void>::value>* = nullptr) { +// public interface that accepts a space and a without initializing flag +template ::value && + std::is_void_v::specialize>>> +auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, + const Kokkos::DynRankView& src) { return Impl::create_mirror( src, Kokkos::view_alloc(wi, typename Space::memory_space{})); } -template -inline auto create_mirror( - const Impl::ViewCtorProp& arg_prop, - const DynRankView& src, - std::enable_if_t< - std::is_void::specialize>::value && - Impl::ViewCtorProp::has_memory_space>* = nullptr) { - using ReturnType = typename Impl::MirrorDRVType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::view_type; - return ReturnType{Impl::create_mirror(src, arg_prop)}; +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>>> +inline auto create_mirror(const Impl::ViewCtorProp& arg_prop, + const DynRankView& src) { + return Impl::create_mirror(src, arg_prop); } namespace Impl { -template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - std::is_same< - typename DynRankView::memory_space, - typename DynRankView::HostMirror::memory_space>::value && - std::is_same< - typename DynRankView::data_type, - typename DynRankView::HostMirror::data_type>::value, - typename DynRankView::HostMirror> -create_mirror_view(const DynRankView& src, - const typename Impl::ViewCtorProp&) { - return src; -} +// create a mirror view +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - !(std::is_same< - typename DynRankView::memory_space, - typename DynRankView::HostMirror::memory_space>::value && - std::is_same< - typename DynRankView::data_type, - typename DynRankView::HostMirror::data_type>::value), - typename DynRankView::HostMirror> -create_mirror_view( +inline auto create_mirror_view( const DynRankView& src, - const typename Impl::ViewCtorProp& arg_prop) { - return Kokkos::Impl::create_mirror(src, arg_prop); -} - -template ::has_memory_space>> -inline std::enable_if_t< - Kokkos::is_space< - typename Impl::ViewCtorProp::memory_space>::value && - Impl::MirrorDRViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace, - typename Impl::MirrorDRViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::view_type> -create_mirror_view(const Kokkos::DynRankView& src, - const typename Impl::ViewCtorProp&) { - return src; + [[maybe_unused]] const typename Impl::ViewCtorProp& + arg_prop) { + if constexpr (!Impl::ViewCtorProp::has_memory_space) { + if constexpr (std::is_same::memory_space, + typename DynRankView< + T, P...>::HostMirror::memory_space>::value && + std::is_same::data_type, + typename DynRankView< + T, P...>::HostMirror::data_type>::value) { + return typename DynRankView::HostMirror(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } else { + if constexpr (Impl::MirrorDRViewType::memory_space, + T, P...>::is_same_memspace) { + return typename Impl::MirrorDRViewType< + typename Impl::ViewCtorProp::memory_space, T, + P...>::view_type(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } -template ::has_memory_space>> -inline std::enable_if_t< - Kokkos::is_space< - typename Impl::ViewCtorProp::memory_space>::value && - !Impl::MirrorDRViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace, - typename Impl::MirrorDRViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::view_type> -create_mirror_view( - const Kokkos::DynRankView& src, - const typename Impl::ViewCtorProp& arg_prop) { - return Kokkos::Impl::create_mirror(src, arg_prop); -} } // namespace Impl -// Create a mirror view in host space +// public interface template -inline std::enable_if_t< - (std::is_same< - typename DynRankView::memory_space, - typename DynRankView::HostMirror::memory_space>::value && - std::is_same::data_type, - typename DynRankView::HostMirror::data_type>::value), - typename DynRankView::HostMirror> -create_mirror_view(const Kokkos::DynRankView& src) { - return src; -} - -template -inline std::enable_if_t< - !(std::is_same< - typename DynRankView::memory_space, - typename DynRankView::HostMirror::memory_space>::value && - std::is_same< - typename DynRankView::data_type, - typename DynRankView::HostMirror::data_type>::value), - typename DynRankView::HostMirror> -create_mirror_view(const Kokkos::DynRankView& src) { - return Kokkos::create_mirror(src); +inline auto create_mirror_view(const Kokkos::DynRankView& src) { + return Impl::create_mirror_view(src, Kokkos::view_alloc()); } +// public interface that accepts a without initializing flag template inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, const DynRankView& src) { return Impl::create_mirror_view(src, Kokkos::view_alloc(wi)); } -// Create a mirror view in a new space -// FIXME_C++17 Improve SFINAE here. +// public interface that accepts a space template ::value>> -inline typename Impl::MirrorDRViewType::view_type -create_mirror_view( - const Space&, const Kokkos::DynRankView& src, - std::enable_if_t< - Impl::MirrorDRViewType::is_same_memspace>* = nullptr) { - return src; +inline auto create_mirror_view(const Space&, + const Kokkos::DynRankView& src) { + return Impl::create_mirror_view( + src, Kokkos::view_alloc(typename Space::memory_space())); } -// FIXME_C++17 Improve SFINAE here. +// public interface that accepts a space and a without initializing flag template ::value>> -inline typename Impl::MirrorDRViewType::view_type -create_mirror_view( - const Space& space, const Kokkos::DynRankView& src, - std::enable_if_t< - !Impl::MirrorDRViewType::is_same_memspace>* = nullptr) { - return Kokkos::create_mirror(space, src); -} - -template + typename Enable = std::enable_if_t::value>> inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, const Space&, const Kokkos::DynRankView& src) { @@ -2188,6 +2088,8 @@ inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, src, Kokkos::view_alloc(typename Space::memory_space{}, wi)); } +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc template inline auto create_mirror_view( const typename Impl::ViewCtorProp& arg_prop, @@ -2195,75 +2097,51 @@ inline auto create_mirror_view( return Impl::create_mirror_view(src, arg_prop); } -template +// create a mirror view and deep copy it +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>::value>> auto create_mirror_view_and_copy( - const Impl::ViewCtorProp&, - const Kokkos::DynRankView& src, - std::enable_if_t< - std::is_void::specialize>::value && - Impl::MirrorDRViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace>* = nullptr) { + [[maybe_unused]] const Impl::ViewCtorProp& arg_prop, + const Kokkos::DynRankView& src) { using alloc_prop_input = Impl::ViewCtorProp; - static_assert( - alloc_prop_input::has_memory_space, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must include a memory space!"); - static_assert(!alloc_prop_input::has_pointer, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not include a pointer!"); - static_assert(!alloc_prop_input::allow_padding, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not explicitly allow padding!"); - - // same behavior as deep_copy(src, src) - if (!alloc_prop_input::has_execution_space) - fence( - "Kokkos::create_mirror_view_and_copy: fence before returning src view"); - return src; -} -template -auto create_mirror_view_and_copy( - const Impl::ViewCtorProp& arg_prop, - const Kokkos::DynRankView& src, - std::enable_if_t< - std::is_void::specialize>::value && - !Impl::MirrorDRViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace>* = nullptr) { - using alloc_prop_input = Impl::ViewCtorProp; - static_assert( - alloc_prop_input::has_memory_space, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must include a memory space!"); - static_assert(!alloc_prop_input::has_pointer, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not include a pointer!"); - static_assert(!alloc_prop_input::allow_padding, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not explicitly allow padding!"); - using Space = typename alloc_prop_input::memory_space; - using Mirror = typename Impl::MirrorDRViewType::view_type; - - auto arg_prop_copy = Impl::with_properties_if_unset( - arg_prop, std::string{}, WithoutInitializing, - typename Space::execution_space{}); - - std::string& label = Impl::get_property(arg_prop_copy); - if (label.empty()) label = src.label(); - auto mirror = typename Mirror::non_const_type{ - arg_prop_copy, Impl::reconstructLayout(src.layout(), src.rank())}; - if constexpr (alloc_prop_input::has_execution_space) { - deep_copy(Impl::get_property(arg_prop_copy), - mirror, src); - } else - deep_copy(mirror, src); - return mirror; + Impl::check_view_ctor_args_create_mirror_view_and_copy(); + + if constexpr (Impl::MirrorDRViewType< + typename Impl::ViewCtorProp::memory_space, + T, P...>::is_same_memspace) { + // same behavior as deep_copy(src, src) + if constexpr (!alloc_prop_input::has_execution_space) + fence( + "Kokkos::create_mirror_view_and_copy: fence before returning src " + "view"); + return src; + } else { + using Space = typename alloc_prop_input::memory_space; + using Mirror = typename Impl::MirrorDRViewType::view_type; + + auto arg_prop_copy = Impl::with_properties_if_unset( + arg_prop, std::string{}, WithoutInitializing, + typename Space::execution_space{}); + + std::string& label = Impl::get_property(arg_prop_copy); + if (label.empty()) label = src.label(); + auto mirror = typename Mirror::non_const_type{ + arg_prop_copy, Impl::reconstructLayout(src.layout(), src.rank())}; + if constexpr (alloc_prop_input::has_execution_space) { + deep_copy(Impl::get_property(arg_prop_copy), + mirror, src); + } else + deep_copy(mirror, src); + return mirror; + } +#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC) + __builtin_unreachable(); +#endif } template diff --git a/packages/kokkos/containers/src/Kokkos_DynamicView.hpp b/packages/kokkos/containers/src/Kokkos_DynamicView.hpp index 12885edbae92..a4b74e246e0d 100644 --- a/packages/kokkos/containers/src/Kokkos_DynamicView.hpp +++ b/packages/kokkos/containers/src/Kokkos_DynamicView.hpp @@ -590,96 +590,81 @@ struct MirrorDynamicViewType { } // namespace Impl namespace Impl { + +// create a mirror +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc template -inline auto create_mirror( - const Kokkos::Experimental::DynamicView& src, - const Impl::ViewCtorProp& arg_prop, - std::enable_if_t::has_memory_space>* = - nullptr) { +inline auto create_mirror(const Kokkos::Experimental::DynamicView& src, + const Impl::ViewCtorProp& arg_prop) { using alloc_prop_input = Impl::ViewCtorProp; - - static_assert( - !alloc_prop_input::has_label, - "The view constructor arguments passed to Kokkos::create_mirror " - "must not include a label!"); - static_assert( - !alloc_prop_input::has_pointer, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not include a pointer!"); - static_assert( - !alloc_prop_input::allow_padding, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not explicitly allow padding!"); + check_view_ctor_args_create_mirror(); auto prop_copy = Impl::with_properties_if_unset( arg_prop, std::string(src.label()).append("_mirror")); - auto ret = typename Kokkos::Experimental::DynamicView::HostMirror( - prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size()); - - ret.resize_serial(src.extent(0)); - - return ret; -} + if constexpr (Impl::ViewCtorProp::has_memory_space) { + using MemorySpace = typename alloc_prop_input::memory_space; -template -inline auto create_mirror( - const Kokkos::Experimental::DynamicView& src, - const Impl::ViewCtorProp& arg_prop, - std::enable_if_t::has_memory_space>* = - nullptr) { - using alloc_prop_input = Impl::ViewCtorProp; + auto ret = typename Kokkos::Impl::MirrorDynamicViewType< + MemorySpace, T, P...>::view_type(prop_copy, src.chunk_size(), + src.chunk_max() * src.chunk_size()); - static_assert( - !alloc_prop_input::has_label, - "The view constructor arguments passed to Kokkos::create_mirror " - "must not include a label!"); - static_assert( - !alloc_prop_input::has_pointer, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not include a pointer!"); - static_assert( - !alloc_prop_input::allow_padding, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not explicitly allow padding!"); - - using MemorySpace = typename alloc_prop_input::memory_space; - auto prop_copy = Impl::with_properties_if_unset( - arg_prop, std::string(src.label()).append("_mirror")); + ret.resize_serial(src.extent(0)); - auto ret = typename Kokkos::Impl::MirrorDynamicViewType< - MemorySpace, T, P...>::view_type(prop_copy, src.chunk_size(), - src.chunk_max() * src.chunk_size()); + return ret; + } else { + auto ret = typename Kokkos::Experimental::DynamicView::HostMirror( + prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size()); - ret.resize_serial(src.extent(0)); + ret.resize_serial(src.extent(0)); - return ret; + return ret; + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } + } // namespace Impl -// Create a mirror in host space -template +// public interface +template ::specialize>>> inline auto create_mirror( const Kokkos::Experimental::DynamicView& src) { return Impl::create_mirror(src, Impl::ViewCtorProp<>{}); } -template +// public interface that accepts a without initializing flag +template ::specialize>>> inline auto create_mirror( Kokkos::Impl::WithoutInitializing_t wi, const Kokkos::Experimental::DynamicView& src) { return Impl::create_mirror(src, Kokkos::view_alloc(wi)); } -// Create a mirror in a new space -template +// public interface that accepts a space +template ::value && + std::is_void_v::specialize>>> inline auto create_mirror( const Space&, const Kokkos::Experimental::DynamicView& src) { return Impl::create_mirror( src, Kokkos::view_alloc(typename Space::memory_space{})); } -template +// public interface that accepts a space and a without initializing flag +template ::value && + std::is_void_v::specialize>>> typename Kokkos::Impl::MirrorDynamicViewType::view_type create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, const Kokkos::Experimental::DynamicView& src) { @@ -687,7 +672,11 @@ create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, src, Kokkos::view_alloc(wi, typename Space::memory_space{})); } -template +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>>> inline auto create_mirror( const Impl::ViewCtorProp& arg_prop, const Kokkos::Experimental::DynamicView& src) { @@ -696,76 +685,56 @@ inline auto create_mirror( namespace Impl { +// create a mirror view +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - (std::is_same< - typename Kokkos::Experimental::DynamicView::memory_space, - typename Kokkos::Experimental::DynamicView< - T, P...>::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::Experimental::DynamicView::data_type, - typename Kokkos::Experimental::DynamicView< - T, P...>::HostMirror::data_type>::value), - typename Kokkos::Experimental::DynamicView::HostMirror> -create_mirror_view(const Kokkos::Experimental::DynamicView& src, - const Impl::ViewCtorProp&) { - return src; -} - -template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - !(std::is_same< - typename Kokkos::Experimental::DynamicView::memory_space, - typename Kokkos::Experimental::DynamicView< - T, P...>::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::Experimental::DynamicView::data_type, - typename Kokkos::Experimental::DynamicView< - T, P...>::HostMirror::data_type>::value), - typename Kokkos::Experimental::DynamicView::HostMirror> -create_mirror_view(const Kokkos::Experimental::DynamicView& src, - const Impl::ViewCtorProp& arg_prop) { - return Kokkos::create_mirror(arg_prop, src); -} - -template ::has_memory_space>> -std::enable_if_t::memory_space, - T, P...>::is_same_memspace, - typename Impl::MirrorDynamicViewType< - typename Impl::ViewCtorProp::memory_space, - T, P...>::view_type> -create_mirror_view(const Kokkos::Experimental::DynamicView& src, - const Impl::ViewCtorProp&) { - return src; +inline auto create_mirror_view( + const Kokkos::Experimental::DynamicView& src, + [[maybe_unused]] const Impl::ViewCtorProp& arg_prop) { + if constexpr (!Impl::ViewCtorProp::has_memory_space) { + if constexpr (std::is_same::memory_space, + typename Kokkos::Experimental::DynamicView< + T, P...>::HostMirror::memory_space>::value && + std::is_same::data_type, + typename Kokkos::Experimental::DynamicView< + T, P...>::HostMirror::data_type>::value) { + return + typename Kokkos::Experimental::DynamicView::HostMirror(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } else { + if constexpr (Impl::MirrorDynamicViewType< + typename Impl::ViewCtorProp< + ViewCtorArgs...>::memory_space, + T, P...>::is_same_memspace) { + return typename Impl::MirrorDynamicViewType< + typename Impl::ViewCtorProp::memory_space, T, + P...>::view_type(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } -template ::has_memory_space>> -std::enable_if_t::memory_space, - T, P...>::is_same_memspace, - typename Impl::MirrorDynamicViewType< - typename Impl::ViewCtorProp::memory_space, - T, P...>::view_type> -create_mirror_view(const Kokkos::Experimental::DynamicView& src, - const Impl::ViewCtorProp& arg_prop) { - return Kokkos::Impl::create_mirror(src, arg_prop); -} } // namespace Impl -// Create a mirror view in host space +// public interface template inline auto create_mirror_view( const typename Kokkos::Experimental::DynamicView& src) { return Impl::create_mirror_view(src, Impl::ViewCtorProp<>{}); } +// public interface that accepts a without initializing flag template inline auto create_mirror_view( Kokkos::Impl::WithoutInitializing_t wi, @@ -773,15 +742,18 @@ inline auto create_mirror_view( return Impl::create_mirror_view(src, Kokkos::view_alloc(wi)); } -// Create a mirror in a new space -template +// public interface that accepts a space +template ::value>> inline auto create_mirror_view( const Space&, const Kokkos::Experimental::DynamicView& src) { return Impl::create_mirror_view(src, view_alloc(typename Space::memory_space{})); } -template +// public interface that accepts a space and a without initializing flag +template ::value>> inline auto create_mirror_view( Kokkos::Impl::WithoutInitializing_t wi, const Space&, const Kokkos::Experimental::DynamicView& src) { @@ -789,6 +761,8 @@ inline auto create_mirror_view( src, Kokkos::view_alloc(wi, typename Space::memory_space{})); } +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc template inline auto create_mirror_view( const Impl::ViewCtorProp& arg_prop, @@ -985,80 +959,57 @@ struct ViewCopy, } // namespace Impl -template +// create a mirror view and deep copy it +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>::value>> auto create_mirror_view_and_copy( - const Impl::ViewCtorProp&, - const Kokkos::Experimental::DynamicView& src, - std::enable_if_t< - std::is_void::specialize>::value && - Impl::MirrorDynamicViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace>* = nullptr) { + [[maybe_unused]] const Impl::ViewCtorProp& arg_prop, + const Kokkos::Experimental::DynamicView& src) { using alloc_prop_input = Impl::ViewCtorProp; - static_assert( - alloc_prop_input::has_memory_space, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must include a memory space!"); - static_assert(!alloc_prop_input::has_pointer, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not include a pointer!"); - static_assert(!alloc_prop_input::allow_padding, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not explicitly allow padding!"); - - // same behavior as deep_copy(src, src) - if (!alloc_prop_input::has_execution_space) - fence( - "Kokkos::create_mirror_view_and_copy: fence before returning src view"); - return src; -} -template -auto create_mirror_view_and_copy( - const Impl::ViewCtorProp& arg_prop, - const Kokkos::Experimental::DynamicView& src, - std::enable_if_t< - std::is_void::specialize>::value && - !Impl::MirrorDynamicViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace>* = nullptr) { - using alloc_prop_input = Impl::ViewCtorProp; - static_assert( - alloc_prop_input::has_memory_space, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must include a memory space!"); - static_assert(!alloc_prop_input::has_pointer, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not include a pointer!"); - static_assert(!alloc_prop_input::allow_padding, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not explicitly allow padding!"); - using Space = typename alloc_prop_input::memory_space; - using Mirror = - typename Impl::MirrorDynamicViewType::view_type; - - auto arg_prop_copy = Impl::with_properties_if_unset( - arg_prop, std::string{}, WithoutInitializing, - typename Space::execution_space{}); - - std::string& label = Impl::get_property(arg_prop_copy); - if (label.empty()) label = src.label(); - auto mirror = typename Mirror::non_const_type( - arg_prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size()); - mirror.resize_serial(src.extent(0)); - if constexpr (alloc_prop_input::has_execution_space) { - deep_copy(Impl::get_property(arg_prop_copy), - mirror, src); - } else - deep_copy(mirror, src); - return mirror; + Impl::check_view_ctor_args_create_mirror_view_and_copy(); + + if constexpr (Impl::MirrorDynamicViewType< + typename Impl::ViewCtorProp::memory_space, + T, P...>::is_same_memspace) { + // same behavior as deep_copy(src, src) + if constexpr (!alloc_prop_input::has_execution_space) + fence( + "Kokkos::create_mirror_view_and_copy: fence before returning src " + "view"); + return src; + } else { + using Space = typename alloc_prop_input::memory_space; + using Mirror = + typename Impl::MirrorDynamicViewType::view_type; + + auto arg_prop_copy = Impl::with_properties_if_unset( + arg_prop, std::string{}, WithoutInitializing, + typename Space::execution_space{}); + + std::string& label = Impl::get_property(arg_prop_copy); + if (label.empty()) label = src.label(); + auto mirror = typename Mirror::non_const_type( + arg_prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size()); + mirror.resize_serial(src.extent(0)); + if constexpr (alloc_prop_input::has_execution_space) { + deep_copy(Impl::get_property(arg_prop_copy), + mirror, src); + } else + deep_copy(mirror, src); + return mirror; + } +#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC) + __builtin_unreachable(); +#endif } -template +template ::value>> auto create_mirror_view_and_copy( const Space&, const Kokkos::Experimental::DynamicView& src, std::string const& name = "") { diff --git a/packages/kokkos/containers/src/Kokkos_OffsetView.hpp b/packages/kokkos/containers/src/Kokkos_OffsetView.hpp index 91a7e4a92732..3adc70b19049 100644 --- a/packages/kokkos/containers/src/Kokkos_OffsetView.hpp +++ b/packages/kokkos/containers/src/Kokkos_OffsetView.hpp @@ -471,62 +471,31 @@ class OffsetView : public ViewTraits { template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::are_integral::value && (2 == Rank) && - is_default_map && is_layout_left && (traits::rank_dynamic == 0)), + is_default_map && + (is_layout_left || is_layout_right || is_layout_stride)), reference_type> operator()(const I0& i0, const I1& i1) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) const size_t j0 = i0 - m_begins[0]; const size_t j1 = i1 - m_begins[1]; - return m_map.m_impl_handle[j0 + m_map.m_impl_offset.m_dim.N0 * j1]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::are_integral::value && (2 == Rank) && - is_default_map && is_layout_left && (traits::rank_dynamic != 0)), - reference_type> - operator()(const I0& i0, const I1& i1) const { - KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) - const size_t j0 = i0 - m_begins[0]; - const size_t j1 = i1 - m_begins[1]; - return m_map.m_impl_handle[j0 + m_map.m_impl_offset.m_stride * j1]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::are_integral::value && (2 == Rank) && - is_default_map && is_layout_right && (traits::rank_dynamic == 0)), - reference_type> - operator()(const I0& i0, const I1& i1) const { - KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) - const size_t j0 = i0 - m_begins[0]; - const size_t j1 = i1 - m_begins[1]; - return m_map.m_impl_handle[j1 + m_map.m_impl_offset.m_dim.N1 * j0]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::are_integral::value && (2 == Rank) && - is_default_map && is_layout_right && (traits::rank_dynamic != 0)), - reference_type> - operator()(const I0& i0, const I1& i1) const { - KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) - const size_t j0 = i0 - m_begins[0]; - const size_t j1 = i1 - m_begins[1]; - return m_map.m_impl_handle[j1 + m_map.m_impl_offset.m_stride * j0]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION - std::enable_if_t<(Kokkos::Impl::are_integral::value && - (2 == Rank) && is_default_map && is_layout_stride), - reference_type> - operator()(const I0& i0, const I1& i1) const { - KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) - const size_t j0 = i0 - m_begins[0]; - const size_t j1 = i1 - m_begins[1]; - return m_map.m_impl_handle[j0 * m_map.m_impl_offset.m_stride.S0 + - j1 * m_map.m_impl_offset.m_stride.S1]; + if constexpr (is_layout_left) { + if constexpr (traits::rank_dynamic == 0) + return m_map.m_impl_handle[j0 + m_map.m_impl_offset.m_dim.N0 * j1]; + else + return m_map.m_impl_handle[j0 + m_map.m_impl_offset.m_stride * j1]; + } else if constexpr (is_layout_right) { + if constexpr (traits::rank_dynamic == 0) + return m_map.m_impl_handle[j1 + m_map.m_impl_offset.m_dim.N1 * j0]; + else + return m_map.m_impl_handle[j1 + m_map.m_impl_offset.m_stride * j0]; + } else { + static_assert(is_layout_stride); + return m_map.m_impl_handle[j0 * m_map.m_impl_offset.m_stride.S0 + + j1 * m_map.m_impl_offset.m_stride.S1]; + } +#if defined(KOKKOS_COMPILER_INTEL) + __builtin_unreachable(); +#endif } //------------------------------ @@ -1841,71 +1810,73 @@ struct MirrorOffsetType { } // namespace Impl namespace Impl { -template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space, - typename Kokkos::Experimental::OffsetView::HostMirror> -create_mirror(const Kokkos::Experimental::OffsetView& src, - const Impl::ViewCtorProp& arg_prop) { - return typename Kokkos::Experimental::OffsetView::HostMirror( - Kokkos::create_mirror(arg_prop, src.view()), src.begins()); -} -template ::has_memory_space>> +// create a mirror +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc +template inline auto create_mirror(const Kokkos::Experimental::OffsetView& src, const Impl::ViewCtorProp& arg_prop) { - using alloc_prop_input = Impl::ViewCtorProp; - using Space = typename Impl::ViewCtorProp::memory_space; + check_view_ctor_args_create_mirror(); - static_assert( - !alloc_prop_input::has_label, - "The view constructor arguments passed to Kokkos::create_mirror " - "must not include a label!"); - static_assert( - !alloc_prop_input::has_pointer, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not include a pointer!"); - static_assert( - !alloc_prop_input::allow_padding, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not explicitly allow padding!"); + if constexpr (Impl::ViewCtorProp::has_memory_space) { + using Space = typename Impl::ViewCtorProp::memory_space; - auto prop_copy = Impl::with_properties_if_unset( - arg_prop, std::string(src.label()).append("_mirror")); + auto prop_copy = Impl::with_properties_if_unset( + arg_prop, std::string(src.label()).append("_mirror")); - return typename Kokkos::Impl::MirrorOffsetType::view_type( - prop_copy, src.layout(), - {src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4), - src.begin(5), src.begin(6), src.begin(7)}); + return typename Kokkos::Impl::MirrorOffsetType::view_type( + prop_copy, src.layout(), + {src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4), + src.begin(5), src.begin(6), src.begin(7)}); + } else { + return typename Kokkos::Experimental::OffsetView::HostMirror( + Kokkos::create_mirror(arg_prop, src.view()), src.begins()); + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } + } // namespace Impl -// Create a mirror in host space -template +// public interface +template ::specialize>>> inline auto create_mirror( const Kokkos::Experimental::OffsetView& src) { return Impl::create_mirror(src, Impl::ViewCtorProp<>{}); } -template +// public interface that accepts a without initializing flag +template ::specialize>>> inline auto create_mirror( Kokkos::Impl::WithoutInitializing_t wi, const Kokkos::Experimental::OffsetView& src) { return Impl::create_mirror(src, Kokkos::view_alloc(wi)); } -// Create a mirror in a new space +// public interface that accepts a space template ::value>> + typename Enable = std::enable_if_t< + Kokkos::is_space::value && + std::is_void_v::specialize>>> inline auto create_mirror( const Space&, const Kokkos::Experimental::OffsetView& src) { return Impl::create_mirror( src, Kokkos::view_alloc(typename Space::memory_space{})); } -template +// public interface that accepts a space and a without initializing flag +template ::value && + std::is_void_v::specialize>>> typename Kokkos::Impl::MirrorOffsetType::view_type create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, const Kokkos::Experimental::OffsetView& src) { @@ -1913,7 +1884,11 @@ create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, src, Kokkos::view_alloc(typename Space::memory_space{}, wi)); } -template +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>>> inline auto create_mirror( const Impl::ViewCtorProp& arg_prop, const Kokkos::Experimental::OffsetView& src) { @@ -1921,76 +1896,56 @@ inline auto create_mirror( } namespace Impl { -template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - (std::is_same< - typename Kokkos::Experimental::OffsetView::memory_space, - typename Kokkos::Experimental::OffsetView< - T, P...>::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::Experimental::OffsetView::data_type, - typename Kokkos::Experimental::OffsetView< - T, P...>::HostMirror::data_type>::value), - typename Kokkos::Experimental::OffsetView::HostMirror> -create_mirror_view(const Kokkos::Experimental::OffsetView& src, - const Impl::ViewCtorProp&) { - return src; -} +// create a mirror view +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - !(std::is_same< - typename Kokkos::Experimental::OffsetView::memory_space, - typename Kokkos::Experimental::OffsetView< - T, P...>::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::Experimental::OffsetView::data_type, - typename Kokkos::Experimental::OffsetView< - T, P...>::HostMirror::data_type>::value), - typename Kokkos::Experimental::OffsetView::HostMirror> -create_mirror_view(const Kokkos::Experimental::OffsetView& src, - const Impl::ViewCtorProp& arg_prop) { - return Kokkos::create_mirror(arg_prop, src); -} - -template ::has_memory_space>> -std::enable_if_t::memory_space, - T, P...>::is_same_memspace, - typename Impl::MirrorOffsetViewType< - typename Impl::ViewCtorProp::memory_space, - T, P...>::view_type> -create_mirror_view(const Kokkos::Experimental::OffsetView& src, - const Impl::ViewCtorProp&) { - return src; +inline auto create_mirror_view( + const Kokkos::Experimental::OffsetView& src, + [[maybe_unused]] const Impl::ViewCtorProp& arg_prop) { + if constexpr (!Impl::ViewCtorProp::has_memory_space) { + if constexpr (std::is_same::memory_space, + typename Kokkos::Experimental::OffsetView< + T, P...>::HostMirror::memory_space>::value && + std::is_same::data_type, + typename Kokkos::Experimental::OffsetView< + T, P...>::HostMirror::data_type>::value) { + return + typename Kokkos::Experimental::OffsetView::HostMirror(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } else { + if constexpr (Impl::MirrorOffsetViewType::memory_space, + T, P...>::is_same_memspace) { + return typename Impl::MirrorOffsetViewType< + typename Impl::ViewCtorProp::memory_space, T, + P...>::view_type(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } -template ::has_memory_space>> -std::enable_if_t::memory_space, - T, P...>::is_same_memspace, - typename Impl::MirrorOffsetViewType< - typename Impl::ViewCtorProp::memory_space, - T, P...>::view_type> -create_mirror_view(const Kokkos::Experimental::OffsetView& src, - const Impl::ViewCtorProp& arg_prop) { - return Kokkos::Impl::create_mirror(src, arg_prop); -} } // namespace Impl -// Create a mirror view in host space +// public interface template inline auto create_mirror_view( const typename Kokkos::Experimental::OffsetView& src) { return Impl::create_mirror_view(src, Impl::ViewCtorProp<>{}); } +// public interface that accepts a without initializing flag template inline auto create_mirror_view( Kokkos::Impl::WithoutInitializing_t wi, @@ -1998,7 +1953,7 @@ inline auto create_mirror_view( return Impl::create_mirror_view(src, Kokkos::view_alloc(wi)); } -// Create a mirror view in a new space +// public interface that accepts a space template ::value>> inline auto create_mirror_view( @@ -2007,7 +1962,9 @@ inline auto create_mirror_view( src, Kokkos::view_alloc(typename Space::memory_space{})); } -template +// public interface that accepts a space and a without initializing flag +template ::value>> inline auto create_mirror_view( Kokkos::Impl::WithoutInitializing_t wi, const Space&, const Kokkos::Experimental::OffsetView& src) { @@ -2015,6 +1972,8 @@ inline auto create_mirror_view( src, Kokkos::view_alloc(typename Space::memory_space{}, wi)); } +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc template inline auto create_mirror_view( const Impl::ViewCtorProp& arg_prop, @@ -2022,7 +1981,9 @@ inline auto create_mirror_view( return Impl::create_mirror_view(src, arg_prop); } -// Create a mirror view and deep_copy in a new space +// create a mirror view and deep copy it +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc template typename Kokkos::Impl::MirrorOffsetViewType< typename Impl::ViewCtorProp::memory_space, T, diff --git a/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp index 78a6a238ece1..c3a8b67df8df 100644 --- a/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp +++ b/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp @@ -805,56 +805,94 @@ class UnorderedMap { return *this; } + // Re-allocate the views of the calling UnorderedMap according to src + // capacity, and deep copy the src data. template std::enable_if_t, key_type>::value && std::is_same, value_type>::value> create_copy_view( UnorderedMap const &src) { if (m_hash_lists.data() != src.m_hash_lists.data()) { - insertable_map_type tmp; - - tmp.m_bounded_insert = src.m_bounded_insert; - tmp.m_hasher = src.m_hasher; - tmp.m_equal_to = src.m_equal_to; - tmp.m_size() = src.m_size(); - tmp.m_available_indexes = bitset_type(src.capacity()); - tmp.m_hash_lists = size_type_view( - view_alloc(WithoutInitializing, "UnorderedMap hash list"), - src.m_hash_lists.extent(0)); - tmp.m_next_index = size_type_view( - view_alloc(WithoutInitializing, "UnorderedMap next index"), - src.m_next_index.extent(0)); - tmp.m_keys = - key_type_view(view_alloc(WithoutInitializing, "UnorderedMap keys"), - src.m_keys.extent(0)); - tmp.m_values = value_type_view( - view_alloc(WithoutInitializing, "UnorderedMap values"), - src.m_values.extent(0)); - tmp.m_scalars = scalars_view("UnorderedMap scalars"); - - Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes); + allocate_view(src); + deep_copy_view(src); + } + } + + // Allocate views of the calling UnorderedMap with the same capacity as the + // src. + template + std::enable_if_t, key_type>::value && + std::is_same, value_type>::value> + allocate_view( + UnorderedMap const &src) { + insertable_map_type tmp; + + tmp.m_bounded_insert = src.m_bounded_insert; + tmp.m_hasher = src.m_hasher; + tmp.m_equal_to = src.m_equal_to; + tmp.m_size() = src.m_size(); + tmp.m_available_indexes = bitset_type(src.capacity()); + tmp.m_hash_lists = size_type_view( + view_alloc(WithoutInitializing, "UnorderedMap hash list"), + src.m_hash_lists.extent(0)); + tmp.m_next_index = size_type_view( + view_alloc(WithoutInitializing, "UnorderedMap next index"), + src.m_next_index.extent(0)); + tmp.m_keys = + key_type_view(view_alloc(WithoutInitializing, "UnorderedMap keys"), + src.m_keys.extent(0)); + tmp.m_values = + value_type_view(view_alloc(WithoutInitializing, "UnorderedMap values"), + src.m_values.extent(0)); + tmp.m_scalars = scalars_view("UnorderedMap scalars"); + + *this = tmp; + } + + // Deep copy view data from src. This requires that the src capacity is + // identical to the capacity of the calling UnorderedMap. + template + std::enable_if_t, key_type>::value && + std::is_same, value_type>::value> + deep_copy_view( + UnorderedMap const &src) { +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4 + // To deep copy UnorderedMap, capacity must be identical + KOKKOS_EXPECTS(capacity() == src.capacity()); +#else + if (capacity() != src.capacity()) { + allocate_view(src); +#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS + Kokkos::Impl::log_warning( + "Warning: deep_copy_view() allocating views is deprecated. Must call " + "with UnorderedMaps of identical capacity, or use " + "create_copy_view().\n"); +#endif + } +#endif + + if (m_hash_lists.data() != src.m_hash_lists.data()) { + Kokkos::deep_copy(m_available_indexes, src.m_available_indexes); using raw_deep_copy = Kokkos::Impl::DeepCopy; - raw_deep_copy(tmp.m_hash_lists.data(), src.m_hash_lists.data(), + raw_deep_copy(m_hash_lists.data(), src.m_hash_lists.data(), sizeof(size_type) * src.m_hash_lists.extent(0)); - raw_deep_copy(tmp.m_next_index.data(), src.m_next_index.data(), + raw_deep_copy(m_next_index.data(), src.m_next_index.data(), sizeof(size_type) * src.m_next_index.extent(0)); - raw_deep_copy(tmp.m_keys.data(), src.m_keys.data(), + raw_deep_copy(m_keys.data(), src.m_keys.data(), sizeof(key_type) * src.m_keys.extent(0)); if (!is_set) { - raw_deep_copy(tmp.m_values.data(), src.m_values.data(), + raw_deep_copy(m_values.data(), src.m_values.data(), sizeof(impl_value_type) * src.m_values.extent(0)); } - raw_deep_copy(tmp.m_scalars.data(), src.m_scalars.data(), + raw_deep_copy(m_scalars.data(), src.m_scalars.data(), sizeof(int) * num_scalars); Kokkos::fence( - "Kokkos::UnorderedMap::create_copy_view: fence after copy to tmp"); - - *this = tmp; + "Kokkos::UnorderedMap::deep_copy_view: fence after copy to dst."); } } @@ -932,13 +970,25 @@ class UnorderedMap { friend struct Impl::UnorderedMapPrint; }; -// Specialization of deep_copy for two UnorderedMap objects. +// Specialization of deep_copy() for two UnorderedMap objects. template inline void deep_copy( UnorderedMap &dst, const UnorderedMap &src) { - dst.create_copy_view(src); + dst.deep_copy_view(src); +} + +// Specialization of create_mirror() for an UnorderedMap object. +template +typename UnorderedMap::HostMirror +create_mirror( + const UnorderedMap &src) { + typename UnorderedMap::HostMirror + dst; + dst.allocate_view(src); + return dst; } } // namespace Kokkos diff --git a/packages/kokkos/containers/unit_tests/TestDualView.hpp b/packages/kokkos/containers/unit_tests/TestDualView.hpp index a15e5fa29972..2512cb5c4915 100644 --- a/packages/kokkos/containers/unit_tests/TestDualView.hpp +++ b/packages/kokkos/containers/unit_tests/TestDualView.hpp @@ -55,8 +55,8 @@ struct test_dualview_alloc { bool result = false; test_dualview_alloc(unsigned int size) { - result = run_me >( - size, 3); + result = + run_me>(size, 3); } }; @@ -154,7 +154,7 @@ struct test_dualview_combinations { } test_dualview_combinations(unsigned int size, bool with_init) { - result = run_me >( + result = run_me>( size, 3, with_init); } }; @@ -253,21 +253,18 @@ struct test_dual_view_deep_copy { } // end run_me test_dual_view_deep_copy() { - run_me >(10, 5, - true); - run_me >(10, 5, - false); + run_me>(10, 5, true); + run_me>(10, 5, + false); // Test zero length but allocated (a.d_view.data!=nullptr but // a.d_view.span()==0) - run_me >(0, 5, true); - run_me >(0, 5, - false); + run_me>(0, 5, true); + run_me>(0, 5, false); // Test default constructed view - run_me >(-1, 5, - true); - run_me >(-1, 5, - false); + run_me>(-1, 5, true); + run_me>(-1, 5, + false); } }; @@ -282,15 +279,20 @@ struct test_dualview_resize { const unsigned int m = 5; const unsigned int factor = 2; - ViewType a("A", n, m); + ViewType a; + if constexpr (Initialize) + a = ViewType("A", n, m); + else + a = ViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "A"), n, m); + Kokkos::deep_copy(a.d_view, 1); /* Covers case "Resize on Device" */ a.modify_device(); - if (Initialize) - Kokkos::resize(Kokkos::WithoutInitializing, a, factor * n, factor * m); - else + if constexpr (Initialize) Kokkos::resize(a, factor * n, factor * m); + else + Kokkos::resize(Kokkos::WithoutInitializing, a, factor * n, factor * m); ASSERT_EQ(a.extent(0), n * factor); ASSERT_EQ(a.extent(1), m * factor); @@ -298,33 +300,38 @@ struct test_dualview_resize { a.sync_host(); // Check device view is initialized as expected - scalar_type a_d_sum = 0; // Execute on the execution_space associated with t_dev's memory space using t_dev_exec_space = typename ViewType::t_dev::memory_space::execution_space; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, a.d_view.extent(0)), - SumViewEntriesFunctor(a.d_view), - a_d_sum); + Kokkos::View errors_d( + "errors"); + Kokkos::parallel_for( + Kokkos::MDRangePolicy>( + {0, 0}, {a.d_view.extent(0), a.d_view.extent(1)}), + KOKKOS_LAMBDA(int i, int j) { + if (a.d_view(i, j) != 1) Kokkos::atomic_inc(errors_d.data()); + }); + int errors_d_scalar; + Kokkos::deep_copy(errors_d_scalar, errors_d); // Check host view is synced as expected - scalar_type a_h_sum = 0; + int errors_h_scalar = 0; for (size_t i = 0; i < a.h_view.extent(0); ++i) for (size_t j = 0; j < a.h_view.extent(1); ++j) { - a_h_sum += a.h_view(i, j); + if (a.h_view(i, j) != 1) ++errors_h_scalar; } // Check - ASSERT_EQ(a_h_sum, a_d_sum); - ASSERT_EQ(a_h_sum, scalar_type(a.extent(0) * a.extent(1))); + ASSERT_EQ(errors_d_scalar, 0); + ASSERT_EQ(errors_h_scalar, 0); /* Covers case "Resize on Host" */ a.modify_host(); - if (Initialize) - Kokkos::resize(Kokkos::WithoutInitializing, a, n / factor, m / factor); - else + if constexpr (Initialize) Kokkos::resize(a, n / factor, m / factor); + else + Kokkos::resize(Kokkos::WithoutInitializing, a, n / factor, m / factor); ASSERT_EQ(a.extent(0), n / factor); ASSERT_EQ(a.extent(1), m / factor); @@ -332,30 +339,33 @@ struct test_dualview_resize { a.sync_device(Kokkos::DefaultExecutionSpace{}); // Check device view is initialized as expected - a_d_sum = 0; + Kokkos::deep_copy(errors_d, 0); // Execute on the execution_space associated with t_dev's memory space using t_dev_exec_space = typename ViewType::t_dev::memory_space::execution_space; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, a.d_view.extent(0)), - SumViewEntriesFunctor(a.d_view), - a_d_sum); + Kokkos::parallel_for( + Kokkos::MDRangePolicy>( + {0, 0}, {a.d_view.extent(0), a.d_view.extent(1)}), + KOKKOS_LAMBDA(int i, int j) { + if (a.d_view(i, j) != 1) Kokkos::atomic_inc(errors_d.data()); + }); + Kokkos::deep_copy(errors_d_scalar, errors_d); // Check host view is synced as expected - a_h_sum = 0; + errors_h_scalar = 0; for (size_t i = 0; i < a.h_view.extent(0); ++i) for (size_t j = 0; j < a.h_view.extent(1); ++j) { - a_h_sum += a.h_view(i, j); + if (a.h_view(i, j) != 1) ++errors_h_scalar; } // Check - ASSERT_EQ(a_h_sum, scalar_type(a.extent(0) * a.extent(1))); - ASSERT_EQ(a_h_sum, a_d_sum); + ASSERT_EQ(errors_d_scalar, 0); + ASSERT_EQ(errors_h_scalar, 0); } // end run_me test_dualview_resize() { - run_me >(); + run_me>(); } }; @@ -369,40 +379,51 @@ struct test_dualview_realloc { const unsigned int n = 10; const unsigned int m = 5; - ViewType a("A", n, m); - if (Initialize) - Kokkos::realloc(Kokkos::WithoutInitializing, a, n, m); - else + ViewType a; + if constexpr (Initialize) { + a = ViewType("A", n, m); Kokkos::realloc(a, n, m); + } else { + a = ViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "A"), n, m); + Kokkos::realloc(Kokkos::WithoutInitializing, a, n, m); + } + ASSERT_EQ(a.extent(0), n); + ASSERT_EQ(a.extent(1), m); Kokkos::deep_copy(a.d_view, 1); + a.modify_device(); a.sync_host(); // Check device view is initialized as expected - scalar_type a_d_sum = 0; // Execute on the execution_space associated with t_dev's memory space using t_dev_exec_space = typename ViewType::t_dev::memory_space::execution_space; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, a.d_view.extent(0)), - SumViewEntriesFunctor(a.d_view), - a_d_sum); + Kokkos::View errors_d( + "errors"); + Kokkos::parallel_for( + Kokkos::MDRangePolicy>( + {0, 0}, {a.d_view.extent(0), a.d_view.extent(1)}), + KOKKOS_LAMBDA(int i, int j) { + if (a.d_view(i, j) != 1) Kokkos::atomic_inc(errors_d.data()); + }); + int errors_d_scalar; + Kokkos::deep_copy(errors_d_scalar, errors_d); // Check host view is synced as expected - scalar_type a_h_sum = 0; + int errors_h_scalar = 0; for (size_t i = 0; i < a.h_view.extent(0); ++i) for (size_t j = 0; j < a.h_view.extent(1); ++j) { - a_h_sum += a.h_view(i, j); + if (a.h_view(i, j) != 1) ++errors_h_scalar; } // Check - ASSERT_EQ(a_h_sum, scalar_type(a.extent(0) * a.extent(1))); - ASSERT_EQ(a_h_sum, a_d_sum); + ASSERT_EQ(errors_d_scalar, 0); + ASSERT_EQ(errors_h_scalar, 0); } // end run_me test_dualview_realloc() { - run_me >(); + run_me>(); } }; @@ -463,12 +484,23 @@ TEST(TEST_CATEGORY, dualview_deep_copy) { test_dualview_deep_copy(); } +struct NoDefaultConstructor { + NoDefaultConstructor(int i_) : i(i_) {} + KOKKOS_FUNCTION operator int() const { return i; } + + int i; +}; + TEST(TEST_CATEGORY, dualview_realloc) { test_dualview_realloc(); + Impl::test_dualview_realloc(); } TEST(TEST_CATEGORY, dualview_resize) { test_dualview_resize(); + Impl::test_dualview_resize(); } namespace { diff --git a/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp b/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp index f63f1c6afe37..4a7e826ecbe4 100644 --- a/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp +++ b/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp @@ -68,7 +68,7 @@ struct TestInsert { } while (rehash_on_fail && failed_count > 0u); // Trigger the m_size mutable bug. - typename map_type::HostMirror map_h; + auto map_h = create_mirror(map); execution_space().fence(); Kokkos::deep_copy(map_h, map); execution_space().fence(); @@ -367,7 +367,7 @@ void test_deep_copy(uint32_t num_nodes) { } } - host_map_type hmap; + auto hmap = create_mirror(map); Kokkos::deep_copy(hmap, map); ASSERT_EQ(map.size(), hmap.size()); @@ -380,6 +380,7 @@ void test_deep_copy(uint32_t num_nodes) { } map_type mmap; + mmap.allocate_view(hmap); Kokkos::deep_copy(mmap, hmap); const_map_type cmap = mmap; @@ -424,7 +425,7 @@ TEST(TEST_CATEGORY, UnorderedMap_valid_empty) { Map n{}; n = Map{m.capacity()}; n.rehash(m.capacity()); - Kokkos::deep_copy(n, m); + n.create_copy_view(m); ASSERT_TRUE(m.is_allocated()); ASSERT_TRUE(n.is_allocated()); } diff --git a/packages/kokkos/containers/unit_tests/TestVector.hpp b/packages/kokkos/containers/unit_tests/TestVector.hpp index a7d341b789d6..abed2676d76d 100644 --- a/packages/kokkos/containers/unit_tests/TestVector.hpp +++ b/packages/kokkos/containers/unit_tests/TestVector.hpp @@ -21,6 +21,8 @@ #include #include #include +#include +KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() #include namespace Test { @@ -231,7 +233,7 @@ void test_vector_allocate(unsigned int size) { TEST(TEST_CATEGORY, vector_combination) { test_vector_allocate(10); test_vector_combinations(10); - test_vector_combinations(3057); + test_vector_combinations(3057); } TEST(TEST_CATEGORY, vector_insert) { diff --git a/packages/kokkos/core/perf_test/test_atomic.cpp b/packages/kokkos/core/perf_test/test_atomic.cpp index ce3059f47d32..af74723e7e01 100644 --- a/packages/kokkos/core/perf_test/test_atomic.cpp +++ b/packages/kokkos/core/perf_test/test_atomic.cpp @@ -390,7 +390,7 @@ static void Test_Atomic(benchmark::State& state) { static constexpr int LOOP = 100'000; -BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); +BENCHMARK(Test_Atomic)->Arg(30'000)->Iterations(10); BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); @@ -398,4 +398,3 @@ BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); -BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); diff --git a/packages/kokkos/core/perf_test/test_atomic_minmax_simple.cpp b/packages/kokkos/core/perf_test/test_atomic_minmax_simple.cpp index b838c8eccf02..bc35d1c776f8 100644 --- a/packages/kokkos/core/perf_test/test_atomic_minmax_simple.cpp +++ b/packages/kokkos/core/perf_test/test_atomic_minmax_simple.cpp @@ -183,7 +183,8 @@ double atomic_contentious_max_replacement(benchmark::State& state, Kokkos::parallel_reduce( con_length, KOKKOS_LAMBDA(const int i, T& inner) { - inner = Kokkos::atomic_max_fetch(&(input(0)), inner + 1); + inner = Kokkos::atomic_max_fetch(&(input(0)), + Kokkos::min(inner, max - 1) + 1); if (i == con_length - 1) { Kokkos::atomic_max_fetch(&(input(0)), max); inner = max; @@ -223,7 +224,8 @@ double atomic_contentious_min_replacement(benchmark::State& state, Kokkos::parallel_reduce( con_length, KOKKOS_LAMBDA(const int i, T& inner) { - inner = Kokkos::atomic_min_fetch(&(input(0)), inner - 1); + inner = Kokkos::atomic_min_fetch(&(input(0)), + Kokkos::max(inner, min + 1) - 1); if (i == con_length - 1) { Kokkos::atomic_min_fetch(&(input(0)), min); inner = min; @@ -246,7 +248,7 @@ static void Atomic_ContentiousMinReplacements(benchmark::State& state) { auto inp = prepare_input(1, std::numeric_limits::max()); for (auto _ : state) { - const auto time = atomic_contentious_max_replacement(state, inp, length); + const auto time = atomic_contentious_min_replacement(state, inp, length); state.SetIterationTime(time); } diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda.hpp index 276d03da2657..fd86976d3ba6 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda.hpp @@ -166,8 +166,17 @@ class Cuda { Cuda(); - Cuda(cudaStream_t stream, - Impl::ManageStream manage_stream = Impl::ManageStream::no); + explicit Cuda(cudaStream_t stream) : Cuda(stream, Impl::ManageStream::no) {} + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + template + KOKKOS_DEPRECATED_WITH_COMMENT( + "Cuda execution space should be constructed explicitly.") + Cuda(cudaStream_t stream) + : Cuda(stream) {} +#endif + + Cuda(cudaStream_t stream, Impl::ManageStream manage_stream); KOKKOS_DEPRECATED Cuda(cudaStream_t stream, bool manage_stream); @@ -186,7 +195,7 @@ class Cuda { /// /// This matches the __CUDA_ARCH__ specification. KOKKOS_DEPRECATED static size_type device_arch() { - const cudaDeviceProp& cudaProp = Cuda().cuda_device_prop(); + const cudaDeviceProp cudaProp = Cuda().cuda_device_prop(); return cudaProp.major * 100 + cudaProp.minor; } diff --git a/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp index 0944937e1bf6..75318aff7781 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -59,12 +59,6 @@ const std::unique_ptr &Kokkos::Impl::cuda_get_deep_copy_space( namespace Kokkos { namespace Impl { -namespace { - -static std::atomic num_uvm_allocations(0); - -} // namespace - void DeepCopyCuda(void *dst, const void *src, size_t n) { KOKKOS_IMPL_CUDA_SAFE_CALL((CudaInternal::singleton().cuda_memcpy_wrapper( dst, src, n, cudaMemcpyDefault))); @@ -204,10 +198,7 @@ void *impl_allocate_common(const int device_id, // we should do here since we're turning it into an // exception here cudaGetLastError(); - throw Experimental::CudaRawMemoryAllocationFailure( - arg_alloc_size, error_code, - Experimental::RawMemoryAllocationFailure::AllocationMechanism:: - CudaMalloc); + Kokkos::Impl::throw_bad_alloc(arg_handle.name, arg_alloc_size, arg_label); } if (Kokkos::Profiling::profileLibraryLoaded()) { @@ -252,8 +243,6 @@ void *CudaUVMSpace::impl_allocate( Cuda::impl_static_fence( "Kokkos::CudaUVMSpace::impl_allocate: Pre UVM Allocation"); if (arg_alloc_size > 0) { - Kokkos::Impl::num_uvm_allocations++; - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); cudaError_t error_code = cudaMallocManaged(&ptr, arg_alloc_size, cudaMemAttachGlobal); @@ -263,10 +252,7 @@ void *CudaUVMSpace::impl_allocate( // we should do here since we're turning it into an // exception here cudaGetLastError(); - throw Experimental::CudaRawMemoryAllocationFailure( - arg_alloc_size, error_code, - Experimental::RawMemoryAllocationFailure::AllocationMechanism:: - CudaMallocManaged); + Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); } #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST @@ -307,10 +293,7 @@ void *CudaHostPinnedSpace::impl_allocate( // we should do here since we're turning it into an // exception here cudaGetLastError(); - throw Experimental::CudaRawMemoryAllocationFailure( - arg_alloc_size, error_code, - Experimental::RawMemoryAllocationFailure::AllocationMechanism:: - CudaHostAlloc); + Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); } if (Kokkos::Profiling::profileLibraryLoaded()) { const size_t reported_size = @@ -341,27 +324,24 @@ void CudaSpace::impl_deallocate( Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr, reported_size); } - try { #ifndef CUDART_VERSION #error CUDART_VERSION undefined! #elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020) - if (arg_alloc_size >= memory_threshold_g) { - Impl::cuda_device_synchronize( - "Kokkos::Cuda: backend fence before async free"); - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFreeAsync(arg_alloc_ptr, m_stream)); - Impl::cuda_device_synchronize( - "Kokkos::Cuda: backend fence after async free"); - } else { - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr)); - } -#else + if (arg_alloc_size >= memory_threshold_g) { + Impl::cuda_device_synchronize( + "Kokkos::Cuda: backend fence before async free"); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFreeAsync(arg_alloc_ptr, m_stream)); + Impl::cuda_device_synchronize( + "Kokkos::Cuda: backend fence after async free"); + } else { KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr)); -#endif - } catch (...) { } +#else + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr)); +#endif } void CudaUVMSpace::deallocate(void *const arg_alloc_ptr, const size_t arg_alloc_size) const { @@ -387,13 +367,9 @@ void CudaUVMSpace::impl_deallocate( Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr, reported_size); } - try { - if (arg_alloc_ptr != nullptr) { - Kokkos::Impl::num_uvm_allocations--; - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr)); - } - } catch (...) { + if (arg_alloc_ptr != nullptr) { + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr)); } Cuda::impl_static_fence( "Kokkos::CudaUVMSpace::impl_deallocate: Post UVM Deallocation"); @@ -420,11 +396,8 @@ void CudaHostPinnedSpace::impl_deallocate( Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr, reported_size); } - try { - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFreeHost(arg_alloc_ptr)); - } catch (...) { - } + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFreeHost(arg_alloc_ptr)); } } // namespace Kokkos diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp index c4458c910ca7..66656fefda5d 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp @@ -22,7 +22,6 @@ #include #include -#include namespace Kokkos { namespace Impl { @@ -69,52 +68,6 @@ inline void cuda_internal_safe_call(cudaError e, const char* name, Kokkos::Impl::cuda_internal_safe_call(call, #call, __FILE__, __LINE__) } // namespace Impl - -namespace Experimental { - -class CudaRawMemoryAllocationFailure : public RawMemoryAllocationFailure { - private: - using base_t = RawMemoryAllocationFailure; - - cudaError_t m_error_code = cudaSuccess; - - static FailureMode get_failure_mode(cudaError_t error_code) { - switch (error_code) { - case cudaErrorMemoryAllocation: return FailureMode::OutOfMemoryError; - case cudaErrorInvalidValue: return FailureMode::InvalidAllocationSize; - // TODO handle cudaErrorNotSupported for cudaMallocManaged - default: return FailureMode::Unknown; - } - } - - public: - // using base_t::base_t; - // would trigger - // - // error: cannot determine the exception specification of the default - // constructor due to a circular dependency - // - // using NVCC 9.1 and gcc 7.4 - CudaRawMemoryAllocationFailure( - size_t arg_attempted_size, size_t arg_attempted_alignment, - FailureMode arg_failure_mode = FailureMode::OutOfMemoryError, - AllocationMechanism arg_mechanism = - AllocationMechanism::StdMalloc) noexcept - : base_t(arg_attempted_size, arg_attempted_alignment, arg_failure_mode, - arg_mechanism) {} - - CudaRawMemoryAllocationFailure(size_t arg_attempted_size, - cudaError_t arg_error_code, - AllocationMechanism arg_mechanism) noexcept - : base_t(arg_attempted_size, /* CudaSpace doesn't handle alignment? */ 1, - get_failure_mode(arg_error_code), arg_mechanism), - m_error_code(arg_error_code) {} - - void append_additional_error_information(std::ostream& o) const override; -}; - -} // end namespace Experimental - } // namespace Kokkos #endif // KOKKOS_ENABLE_CUDA diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp index fcc3ff04ff58..625d8c317a1c 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp @@ -72,7 +72,7 @@ struct GraphImpl { GraphNodeImpl; - // Not moveable or copyable; it spends its whole life as a shared_ptr in the + // Not movable or copyable; it spends its whole life as a shared_ptr in the // Graph object GraphImpl() = delete; GraphImpl(GraphImpl const&) = delete; @@ -115,12 +115,9 @@ struct GraphImpl { template // requires NodeImplPtr is a shared_ptr to specialization of GraphNodeImpl - // Also requires that the kernel has the graph node tag in it's policy + // Also requires that the kernel has the graph node tag in its policy void add_node(std::shared_ptr const& arg_node_ptr) { - static_assert( - NodeImpl::kernel_type::Policy::is_graph_kernel::value, - "Something has gone horribly wrong, but it's too complicated to " - "explain here. Buy Daisy a coffee and she'll explain it to you."); + static_assert(NodeImpl::kernel_type::Policy::is_graph_kernel::value); KOKKOS_EXPECTS(bool(arg_node_ptr)); // The Kernel launch from the execute() method has been shimmed to insert // the node into the graph diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp index 849e8b3b30e8..89a00028969b 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp @@ -737,6 +737,14 @@ namespace Impl { int g_cuda_space_factory_initialized = initialize_space_factory("150_Cuda"); +int CudaInternal::m_cudaArch = -1; +cudaDeviceProp CudaInternal::m_deviceProp; +std::set CudaInternal::cuda_devices = {}; +std::map CudaInternal::constantMemHostStagingPerDevice = + {}; +std::map CudaInternal::constantMemReusablePerDevice = {}; +std::map CudaInternal::constantMemMutexPerDevice = {}; + } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp index 24f4af310190..ffaa0f54749f 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp @@ -91,10 +91,10 @@ class CudaInternal { int m_cudaDev = -1; // Device Properties - inline static int m_cudaArch = -1; + static int m_cudaArch; static int concurrency(); - inline static cudaDeviceProp m_deviceProp; + static cudaDeviceProp m_deviceProp; // Scratch Spaces for Reductions mutable std::size_t m_scratchSpaceCount; @@ -120,11 +120,10 @@ class CudaInternal { bool was_initialized = false; bool was_finalized = false; - inline static std::set cuda_devices = {}; - inline static std::map constantMemHostStagingPerDevice = - {}; - inline static std::map constantMemReusablePerDevice = {}; - inline static std::map constantMemMutexPerDevice = {}; + static std::set cuda_devices; + static std::map constantMemHostStagingPerDevice; + static std::map constantMemReusablePerDevice; + static std::map constantMemMutexPerDevice; static CudaInternal& singleton(); @@ -421,23 +420,6 @@ class CudaInternal { return cudaStreamSynchronize(stream); } - // The following are only available for cuda 11.2 and greater -#if (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020) - template - cudaError_t cuda_malloc_async_wrapper(void** devPtr, size_t size, - cudaStream_t hStream = nullptr) const { - if constexpr (setCudaDevice) set_cuda_device(); - return cudaMallocAsync(devPtr, size, get_input_stream(hStream)); - } - - template - cudaError_t cuda_free_async_wrapper(void* devPtr, - cudaStream_t hStream = nullptr) const { - if constexpr (setCudaDevice) set_cuda_device(); - return cudaFreeAsync(devPtr, get_input_stream(hStream)); - } -#endif - // C++ API routines template cudaError_t cuda_func_get_attributes_wrapper(cudaFuncAttributes* attr, diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp index 9f7be45c839b..71e775182106 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp @@ -539,17 +539,9 @@ class ParallelFor, m_vector_size(arg_policy.impl_vector_length()) { auto internal_space_instance = m_policy.space().impl_internal_space_instance(); - cudaFuncAttributes attr = - CudaParallelLaunch::get_cuda_func_attributes( - internal_space_instance->m_cudaDev); - m_team_size = - m_team_size >= 0 - ? m_team_size - : Kokkos::Impl::cuda_get_opt_block_size( - internal_space_instance, attr, m_functor, m_vector_size, - m_policy.team_scratch_size(0), - m_policy.thread_scratch_size(0)) / - m_vector_size; + m_team_size = m_team_size >= 0 ? m_team_size + : arg_policy.team_size_recommended( + arg_functor, ParallelForTag()); m_shmem_begin = (sizeof(double) * (m_team_size + 2)); m_shmem_size = @@ -585,13 +577,7 @@ class ParallelFor, "Kokkos::Impl::ParallelFor< Cuda > insufficient shared memory")); } - if (int(m_team_size) > - int(Kokkos::Impl::cuda_get_max_block_size( - internal_space_instance, attr, arg_functor, - arg_policy.impl_vector_length(), - arg_policy.team_scratch_size(0), - arg_policy.thread_scratch_size(0)) / - arg_policy.impl_vector_length())) { + if (m_team_size > arg_policy.team_size_max(arg_functor, ParallelForTag())) { Kokkos::Impl::throw_runtime_exception(std::string( "Kokkos::Impl::ParallelFor< Cuda > requested too large team size.")); } @@ -909,17 +895,11 @@ class ParallelReduce:: - get_cuda_func_attributes(internal_space_instance->m_cudaDev); - m_team_size = - m_team_size >= 0 - ? m_team_size - : Kokkos::Impl::cuda_get_opt_block_size( - internal_space_instance, attr, - m_functor_reducer.get_functor(), m_vector_size, - m_policy.team_scratch_size(0), - m_policy.thread_scratch_size(0)) / - m_vector_size; + m_team_size = m_team_size >= 0 ? m_team_size + : arg_policy.team_size_recommended( + arg_functor_reducer.get_functor(), + arg_functor_reducer.get_reducer(), + ParallelReduceTag()); m_team_begin = UseShflReduction diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp index c8d6641d1ee7..18aca15065ea 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp @@ -28,35 +28,20 @@ extern "C" { /* Cuda runtime function, declared in * Requires capability 2.x or better. */ -extern __device__ void __assertfail(const void *message, const void *file, - unsigned int line, const void *function, - size_t charsize); +[[noreturn]] __device__ void __assertfail(const void *message, const void *file, + unsigned int line, + const void *function, + size_t charsize); } namespace Kokkos { namespace Impl { -// required to workaround failures in random number generator unit tests with -// pre-volta architectures -#if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) -__device__ inline void cuda_abort(const char *const message) { -#else -[[noreturn]] __device__ inline void cuda_abort(const char *const message) { -#endif +[[noreturn]] __device__ static void cuda_abort(const char *const message) { const char empty[] = ""; __assertfail((const void *)message, (const void *)empty, (unsigned int)0, (const void *)empty, sizeof(char)); - - // This loop is never executed. It's intended to suppress warnings that the - // function returns, even though it does not. This is necessary because - // __assertfail is not marked as [[noreturn]], even though it does not return. - // Disable with KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK to workaround failures - // in random number generator unit tests with pre-volta architectures -#if !defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) - while (true) - ; -#endif } } // namespace Impl diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP.hpp index 3a88e97ee3dd..439075fc6cc5 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP.hpp @@ -48,8 +48,19 @@ class HIP { using scratch_memory_space = ScratchMemorySpace; HIP(); - HIP(hipStream_t stream, - Impl::ManageStream manage_stream = Impl::ManageStream::no); + + explicit HIP(hipStream_t stream) : HIP(stream, Impl::ManageStream::no) {} + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + template + KOKKOS_DEPRECATED_WITH_COMMENT( + "HIP execution space should be constructed explicitly.") + HIP(hipStream_t stream) + : HIP(stream) {} +#endif + + HIP(hipStream_t stream, Impl::ManageStream manage_stream); + KOKKOS_DEPRECATED HIP(hipStream_t stream, bool manage_stream); //@} diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Error.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Error.hpp index 43d63c090b37..fa45dcfec315 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Error.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Error.hpp @@ -22,8 +22,6 @@ #include -#include - namespace Kokkos { namespace Impl { @@ -44,39 +42,4 @@ inline void hip_internal_safe_call(hipError_t e, const char* name, #define KOKKOS_IMPL_HIP_SAFE_CALL(call) \ Kokkos::Impl::hip_internal_safe_call(call, #call, __FILE__, __LINE__) -namespace Kokkos { -namespace Experimental { - -class HIPRawMemoryAllocationFailure : public RawMemoryAllocationFailure { - private: - hipError_t m_error_code = hipSuccess; - - static FailureMode get_failure_mode(hipError_t error_code) { - switch (error_code) { - case hipErrorMemoryAllocation: return FailureMode::OutOfMemoryError; - case hipErrorInvalidValue: return FailureMode::InvalidAllocationSize; - default: return FailureMode::Unknown; - } - } - - public: - HIPRawMemoryAllocationFailure(size_t arg_attempted_size, - hipError_t arg_error_code, - AllocationMechanism arg_mechanism) noexcept - : RawMemoryAllocationFailure( - arg_attempted_size, /* HIPSpace doesn't handle alignment? */ 1, - get_failure_mode(arg_error_code), arg_mechanism), - m_error_code(arg_error_code) {} - - void append_additional_error_information(std::ostream& o) const override { - if (m_error_code != hipSuccess) { - o << " The HIP allocation returned the error code \"" - << hipGetErrorName(m_error_code) << "\"."; - } - } -}; - -} // namespace Experimental -} // namespace Kokkos - #endif diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp index 7cc06d02fbed..a0989fe67111 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp @@ -40,7 +40,7 @@ class GraphImpl { GraphNodeImpl; - // Not moveable or copyable; it spends its whole life as a shared_ptr in the + // Not movable or copyable; it spends its whole life as a shared_ptr in the // Graph object. GraphImpl() = delete; GraphImpl(GraphImpl const&) = delete; @@ -108,7 +108,7 @@ inline void GraphImpl::add_node( } // Requires NodeImplPtr is a shared_ptr to specialization of GraphNodeImpl -// Also requires that the kernel has the graph node tag in it's policy +// Also requires that the kernel has the graph node tag in its policy template inline void GraphImpl::add_node( std::shared_ptr const& arg_node_ptr) { diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp index 22c0db047f61..e0b25c69399a 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp @@ -353,6 +353,22 @@ void HIPInternal::finalize() { m_num_scratch_locks = 0; } +int HIPInternal::m_hipDev = -1; +unsigned HIPInternal::m_multiProcCount = 0; +unsigned HIPInternal::m_maxWarpCount = 0; +std::array HIPInternal::m_maxBlock = {0, 0, 0}; +unsigned HIPInternal::m_maxWavesPerCU = 0; +int HIPInternal::m_shmemPerSM = 0; +int HIPInternal::m_maxShmemPerBlock = 0; +int HIPInternal::m_maxThreadsPerSM = 0; + +hipDeviceProp_t HIPInternal::m_deviceProp; + +std::mutex HIPInternal::scratchFunctorMutex; +unsigned long *HIPInternal::constantMemHostStaging = nullptr; +hipEvent_t HIPInternal::constantMemReusable = nullptr; +std::mutex HIPInternal::constantMemMutex; + //---------------------------------------------------------------------------- Kokkos::HIP::size_type hip_internal_multiprocessor_count() { diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp index 142008124af9..19349e90bb16 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp @@ -70,16 +70,16 @@ class HIPInternal { public: using size_type = ::Kokkos::HIP::size_type; - inline static int m_hipDev = -1; - inline static unsigned m_multiProcCount = 0; - inline static unsigned m_maxWarpCount = 0; - inline static std::array m_maxBlock = {0, 0, 0}; - inline static unsigned m_maxWavesPerCU = 0; - inline static int m_shmemPerSM = 0; - inline static int m_maxShmemPerBlock = 0; - inline static int m_maxThreadsPerSM = 0; + static int m_hipDev; + static unsigned m_multiProcCount; + static unsigned m_maxWarpCount; + static std::array m_maxBlock; + static unsigned m_maxWavesPerCU; + static int m_shmemPerSM; + static int m_maxShmemPerBlock; + static int m_maxThreadsPerSM; - inline static hipDeviceProp_t m_deviceProp; + static hipDeviceProp_t m_deviceProp; static int concurrency(); @@ -92,7 +92,7 @@ class HIPInternal { size_type *m_scratchFlags = nullptr; mutable size_type *m_scratchFunctor = nullptr; mutable size_type *m_scratchFunctorHost = nullptr; - inline static std::mutex scratchFunctorMutex; + static std::mutex scratchFunctorMutex; hipStream_t m_stream = nullptr; uint32_t m_instance_id = @@ -111,9 +111,9 @@ class HIPInternal { // FIXME_HIP: these want to be per-device, not per-stream... use of 'static' // here will break once there are multiple devices though - inline static unsigned long *constantMemHostStaging = nullptr; - inline static hipEvent_t constantMemReusable = nullptr; - inline static std::mutex constantMemMutex; + static unsigned long *constantMemHostStaging; + static hipEvent_t constantMemReusable; + static std::mutex constantMemMutex; static HIPInternal &singleton(); diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_MDRange.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_MDRange.hpp index 55b6218d1c88..162951164626 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_MDRange.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_MDRange.hpp @@ -50,6 +50,7 @@ class ParallelReduce class ParallelReduce, HIP> { public: - using Policy = TeamPolicyInternal; + using Policy = TeamPolicy; using FunctorType = typename CombinedFunctorReducerType::functor_type; using ReducerType = typename CombinedFunctorReducerType::reducer_type; @@ -46,6 +46,7 @@ class ParallelReduce is_first_hip_managed_allocation(true); @@ -66,7 +67,6 @@ void* HIPSpace::allocate( return impl_allocate(arg_label, arg_alloc_size, arg_logical_size); } void* HIPSpace::impl_allocate( - const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size, const Kokkos::Tools::SpaceHandle arg_handle) const { @@ -77,10 +77,7 @@ void* HIPSpace::impl_allocate( // This is the only way to clear the last error, which we should do here // since we're turning it into an exception here (void)hipGetLastError(); - throw Experimental::HIPRawMemoryAllocationFailure( - arg_alloc_size, error_code, - Experimental::RawMemoryAllocationFailure::AllocationMechanism:: - HIPMalloc); + Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); } if (Kokkos::Profiling::profileLibraryLoaded()) { const size_t reported_size = @@ -111,10 +108,7 @@ void* HIPHostPinnedSpace::impl_allocate( // This is the only way to clear the last error, which we should do here // since we're turning it into an exception here (void)hipGetLastError(); - throw Experimental::HIPRawMemoryAllocationFailure( - arg_alloc_size, error_code, - Experimental::RawMemoryAllocationFailure::AllocationMechanism:: - HIPHostMalloc); + Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); } if (Kokkos::Profiling::profileLibraryLoaded()) { const size_t reported_size = @@ -178,10 +172,7 @@ Kokkos::HIP::runtime WARNING: Kokkos did not find an environment variable 'HSA_X // This is the only way to clear the last error, which we should do here // since we're turning it into an exception here (void)hipGetLastError(); - throw Experimental::HIPRawMemoryAllocationFailure( - arg_alloc_size, error_code, - Experimental::RawMemoryAllocationFailure::AllocationMechanism:: - HIPMallocManaged); + Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); } KOKKOS_IMPL_HIP_SAFE_CALL(hipMemAdvise( ptr, arg_alloc_size, hipMemAdviseSetCoarseGrain, m_device)); diff --git a/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp b/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp index 6d541a64148a..1f3d0783449f 100644 --- a/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp +++ b/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp @@ -153,7 +153,7 @@ void HPX::impl_instance_fence_locked(const std::string &name) const { auto &s = impl_get_sender(); hpx::this_thread::experimental::sync_wait(std::move(s)); - s = hpx::execution::experimental::unique_any_sender( + s = hpx::execution::experimental::unique_any_sender<>( hpx::execution::experimental::just()); }); } @@ -184,7 +184,7 @@ void HPX::impl_static_fence(const std::string &name) { } hpx::this_thread::experimental::sync_wait(std::move(s)); - s = hpx::execution::experimental::unique_any_sender( + s = hpx::execution::experimental::unique_any_sender<>( hpx::execution::experimental::just()); }); } diff --git a/packages/kokkos/core/src/HPX/Kokkos_HPX.hpp b/packages/kokkos/core/src/HPX/Kokkos_HPX.hpp index 26181a7c05d3..245dc128ca86 100644 --- a/packages/kokkos/core/src/HPX/Kokkos_HPX.hpp +++ b/packages/kokkos/core/src/HPX/Kokkos_HPX.hpp @@ -168,17 +168,31 @@ class HPX { : m_instance_data(Kokkos::Impl::HostSharedPtr( &m_default_instance_data, &default_instance_deleter)) {} ~HPX() = default; - HPX(instance_mode mode) + explicit HPX(instance_mode mode) : m_instance_data( mode == instance_mode::independent ? (Kokkos::Impl::HostSharedPtr( new instance_data(m_next_instance_id++))) : Kokkos::Impl::HostSharedPtr( &m_default_instance_data, &default_instance_deleter)) {} - HPX(hpx::execution::experimental::unique_any_sender<> &&sender) + explicit HPX(hpx::execution::experimental::unique_any_sender<> &&sender) : m_instance_data(Kokkos::Impl::HostSharedPtr( new instance_data(m_next_instance_id++, std::move(sender)))) {} +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + template + KOKKOS_DEPRECATED_WITH_COMMENT( + "HPX execution space should be constructed explicitly.") + HPX(instance_mode mode) + : HPX(mode) {} + + template + KOKKOS_DEPRECATED_WITH_COMMENT( + "HPX execution space should be constructed explicitly.") + HPX(hpx::execution::experimental::unique_any_sender<> &&sender) + : HPX(std::move(sender)) {} +#endif + HPX(HPX &&other) = default; HPX(const HPX &other) = default; diff --git a/packages/kokkos/core/src/Kokkos_Array.hpp b/packages/kokkos/core/src/Kokkos_Array.hpp index ba1626bb72e6..4d905fbc5538 100644 --- a/packages/kokkos/core/src/Kokkos_Array.hpp +++ b/packages/kokkos/core/src/Kokkos_Array.hpp @@ -29,7 +29,6 @@ #include #include #include -#include #include namespace Kokkos { @@ -80,7 +79,11 @@ struct ArrayBoundsCheck { /**\brief Derived from the C++17 'std::array'. * Dropping the iterator interface. */ +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 template +#else +template +#endif struct Array { public: /** @@ -129,10 +132,38 @@ struct Array { KOKKOS_INLINE_FUNCTION constexpr const_pointer data() const { return &m_internal_implementation_private_member_data[0]; } + + friend KOKKOS_FUNCTION constexpr bool operator==(Array const& lhs, + Array const& rhs) noexcept { + for (size_t i = 0; i != N; ++i) + if (lhs[i] != rhs[i]) return false; + return true; + } + + friend KOKKOS_FUNCTION constexpr bool operator!=(Array const& lhs, + Array const& rhs) noexcept { + return !(lhs == rhs); + } + + private: + template + friend KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t< + Impl::is_swappable::value> + kokkos_swap(Array& a, + Array& b) noexcept(Impl::is_nothrow_swappable_v) { + for (std::size_t i = 0; i < N; ++i) { + kokkos_swap(a[i], b[i]); + } + } }; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 template struct Array { +#else +template +struct Array { +#endif public: using reference = T&; using const_reference = std::add_const_t&; @@ -167,25 +198,35 @@ struct Array { KOKKOS_INLINE_FUNCTION pointer data() { return nullptr; } KOKKOS_INLINE_FUNCTION const_pointer data() const { return nullptr; } - KOKKOS_DEFAULTED_FUNCTION ~Array() = default; - KOKKOS_DEFAULTED_FUNCTION Array() = default; - KOKKOS_DEFAULTED_FUNCTION Array(const Array&) = default; - KOKKOS_DEFAULTED_FUNCTION Array& operator=(const Array&) = default; + friend KOKKOS_FUNCTION constexpr bool operator==(Array const&, + Array const&) noexcept { + return true; + } + friend KOKKOS_FUNCTION constexpr bool operator!=(Array const&, + Array const&) noexcept { + return false; + } - // Some supported compilers are not sufficiently C++11 compliant - // for default move constructor and move assignment operator. - // Array( Array && ) = default ; - // Array & operator = ( Array && ) = default ; + private: + friend KOKKOS_INLINE_FUNCTION constexpr void kokkos_swap( + Array&, Array&) noexcept {} }; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 +namespace Impl { +struct KokkosArrayContiguous {}; +struct KokkosArrayStrided {}; +} // namespace Impl + template <> -struct Array { - struct contiguous {}; - struct strided {}; +struct KOKKOS_DEPRECATED Array { + using contiguous = Impl::KokkosArrayContiguous; + using strided = Impl::KokkosArrayStrided; }; template -struct Array::contiguous> { +struct KOKKOS_DEPRECATED + Array { private: T* m_elem; size_t m_size; @@ -253,7 +294,8 @@ struct Array::contiguous> { }; template -struct Array::strided> { +struct KOKKOS_DEPRECATED + Array { private: T* m_elem; size_t m_size; @@ -320,10 +362,37 @@ struct Array::strided> { size_type arg_stride) : m_elem(arg_ptr), m_size(arg_size), m_stride(arg_stride) {} }; +#endif template Array(T, Us...)->Array; +namespace Impl { + +template +KOKKOS_FUNCTION constexpr Array, N> to_array_impl( + T (&a)[N], std::index_sequence) { + return {{a[I]...}}; +} + +template +KOKKOS_FUNCTION constexpr Array, N> to_array_impl( + T(&&a)[N], std::index_sequence) { + return {{std::move(a[I])...}}; +} + +} // namespace Impl + +template +KOKKOS_FUNCTION constexpr auto to_array(T (&a)[N]) { + return Impl::to_array_impl(a, std::make_index_sequence{}); +} + +template +KOKKOS_FUNCTION constexpr auto to_array(T(&&a)[N]) { + return Impl::to_array_impl(std::move(a), std::make_index_sequence{}); +} + } // namespace Kokkos // @@ -333,6 +402,7 @@ struct std::tuple_size> template struct std::tuple_element> { + static_assert(I < N); using type = T; }; @@ -340,21 +410,25 @@ namespace Kokkos { template KOKKOS_FUNCTION constexpr T& get(Array& a) noexcept { + static_assert(I < N); return a[I]; } template KOKKOS_FUNCTION constexpr T const& get(Array const& a) noexcept { + static_assert(I < N); return a[I]; } template KOKKOS_FUNCTION constexpr T&& get(Array&& a) noexcept { + static_assert(I < N); return std::move(a[I]); } template KOKKOS_FUNCTION constexpr T const&& get(Array const&& a) noexcept { + static_assert(I < N); return std::move(a[I]); } diff --git a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp index 9acacef901a7..bf57dcae650e 100644 --- a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp +++ b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp @@ -22,7 +22,6 @@ static_assert(false, #ifndef KOKKOS_DESUL_ATOMICS_VOLATILE_WRAPPER_HPP_ #define KOKKOS_DESUL_ATOMICS_VOLATILE_WRAPPER_HPP_ #include -#include #include #ifdef KOKKOS_ENABLE_ATOMICS_BYPASS diff --git a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp index eebdd20f15d4..26db69ac1f11 100644 --- a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp +++ b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp @@ -22,8 +22,6 @@ static_assert(false, #ifndef KOKKOS_DESUL_ATOMICS_WRAPPER_HPP_ #define KOKKOS_DESUL_ATOMICS_WRAPPER_HPP_ #include - -#include #include #include diff --git a/packages/kokkos/core/src/Kokkos_Complex.hpp b/packages/kokkos/core/src/Kokkos_Complex.hpp index 4d405116ccff..7dd2a9ddbb71 100644 --- a/packages/kokkos/core/src/Kokkos_Complex.hpp +++ b/packages/kokkos/core/src/Kokkos_Complex.hpp @@ -28,6 +28,7 @@ #include #include #include +#include namespace Kokkos { @@ -256,6 +257,12 @@ class return *this; } + template + friend constexpr const RT& get(const complex&) noexcept; + + template + friend constexpr const RT&& get(const complex&&) noexcept; + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 //! Copy constructor from volatile. template < @@ -423,6 +430,75 @@ class #endif // KOKKOS_ENABLE_DEPRECATED_CODE_4 }; +} // namespace Kokkos + +// Tuple protocol for complex based on https://wg21.link/P2819R2 (voted into +// the C++26 working draft on 2023-11) + +template +struct std::tuple_size> + : std::integral_constant {}; + +template +struct std::tuple_element> { + static_assert(I < 2); + using type = RealType; +}; + +namespace Kokkos { + +// get<...>(...) defined here so as not to be hidden friends, as per P2819R2 + +template +KOKKOS_FUNCTION constexpr RealType& get(complex& z) noexcept { + static_assert(I < 2); + if constexpr (I == 0) + return z.real(); + else + return z.imag(); +#ifdef KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif +} + +template +KOKKOS_FUNCTION constexpr RealType&& get(complex&& z) noexcept { + static_assert(I < 2); + if constexpr (I == 0) + return std::move(z.real()); + else + return std::move(z.imag()); +#ifdef KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif +} + +template +KOKKOS_FUNCTION constexpr const RealType& get( + const complex& z) noexcept { + static_assert(I < 2); + if constexpr (I == 0) + return z.re_; + else + return z.im_; +#ifdef KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif +} + +template +KOKKOS_FUNCTION constexpr const RealType&& get( + const complex&& z) noexcept { + static_assert(I < 2); + if constexpr (I == 0) + return std::move(z.re_); + else + return std::move(z.im_); +#ifdef KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif +} + //============================================================================== // {{{1 diff --git a/packages/kokkos/core/src/Kokkos_CopyViews.hpp b/packages/kokkos/core/src/Kokkos_CopyViews.hpp index 08f6ba8d696a..e856b1924719 100644 --- a/packages/kokkos/core/src/Kokkos_CopyViews.hpp +++ b/packages/kokkos/core/src/Kokkos_CopyViews.hpp @@ -221,10 +221,12 @@ struct ViewFill { ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_, const ExecSpace& space) : a(a_), val(val_) { + // MDRangePolicy is not supported for 7D views + // Iterate separately over extent(2) Kokkos::parallel_for("Kokkos::ViewFill-7D", policy_type(space, {0, 0, 0, 0, 0, 0}, - {a.extent(0), a.extent(1), a.extent(2), - a.extent(3), a.extent(5), a.extent(6)}), + {a.extent(0), a.extent(1), a.extent(3), + a.extent(4), a.extent(5), a.extent(6)}), *this); } @@ -249,6 +251,8 @@ struct ViewFill { ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_, const ExecSpace& space) : a(a_), val(val_) { + // MDRangePolicy is not supported for 8D views + // Iterate separately over extent(2) and extent(4) Kokkos::parallel_for("Kokkos::ViewFill-8D", policy_type(space, {0, 0, 0, 0, 0, 0}, {a.extent(0), a.extent(1), a.extent(3), @@ -293,9 +297,11 @@ struct ViewCopy { ViewTypeA a; ViewTypeB b; static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::outer_iteration_pattern; static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::inner_iteration_pattern; using iterate_type = Kokkos::Rank<2, outer_iteration_pattern, inner_iteration_pattern>; using policy_type = @@ -323,9 +329,11 @@ struct ViewCopy { ViewTypeB b; static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::outer_iteration_pattern; static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::inner_iteration_pattern; using iterate_type = Kokkos::Rank<3, outer_iteration_pattern, inner_iteration_pattern>; using policy_type = @@ -354,9 +362,11 @@ struct ViewCopy { ViewTypeB b; static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::outer_iteration_pattern; static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::inner_iteration_pattern; using iterate_type = Kokkos::Rank<4, outer_iteration_pattern, inner_iteration_pattern>; using policy_type = @@ -386,9 +396,11 @@ struct ViewCopy { ViewTypeB b; static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::outer_iteration_pattern; static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::inner_iteration_pattern; using iterate_type = Kokkos::Rank<5, outer_iteration_pattern, inner_iteration_pattern>; using policy_type = @@ -418,9 +430,11 @@ struct ViewCopy { ViewTypeB b; static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::outer_iteration_pattern; static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::inner_iteration_pattern; using iterate_type = Kokkos::Rank<6, outer_iteration_pattern, inner_iteration_pattern>; using policy_type = @@ -450,9 +464,11 @@ struct ViewCopy { ViewTypeB b; static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::outer_iteration_pattern; static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::inner_iteration_pattern; using iterate_type = Kokkos::Rank<6, outer_iteration_pattern, inner_iteration_pattern>; using policy_type = @@ -461,6 +477,8 @@ struct ViewCopy { ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_, const ExecSpace space = ExecSpace()) : a(a_), b(b_) { + // MDRangePolicy is not supported for 7D views + // Iterate separately over extent(2) Kokkos::parallel_for("Kokkos::ViewCopy-7D", policy_type(space, {0, 0, 0, 0, 0, 0}, {a.extent(0), a.extent(1), a.extent(3), @@ -483,9 +501,11 @@ struct ViewCopy { ViewTypeB b; static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::outer_iteration_pattern; static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::inner_iteration_pattern; using iterate_type = Kokkos::Rank<6, outer_iteration_pattern, inner_iteration_pattern>; using policy_type = @@ -494,6 +514,8 @@ struct ViewCopy { ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_, const ExecSpace space = ExecSpace()) : a(a_), b(b_) { + // MDRangePolicy is not supported for 8D views + // Iterate separately over extent(2) and extent(4) Kokkos::parallel_for("Kokkos::ViewCopy-8D", policy_type(space, {0, 0, 0, 0, 0, 0}, {a.extent(0), a.extent(1), a.extent(3), @@ -539,11 +561,8 @@ void view_copy(const ExecutionSpace& space, const DstType& dst, int64_t strides[DstType::rank + 1]; dst.stride(strides); Kokkos::Iterate iterate; - if (Kokkos::is_layouttiled::value) { - iterate = Kokkos::layout_iterate_type_selector< - typename DstType::array_layout>::outer_iteration_pattern; - } else if (std::is_same::value) { + if (std::is_same::value) { iterate = Kokkos::Iterate::Right; } else if (std::is_same::value) { @@ -630,11 +649,8 @@ void view_copy(const DstType& dst, const SrcType& src) { int64_t strides[DstType::rank + 1]; dst.stride(strides); Kokkos::Iterate iterate; - if (Kokkos::is_layouttiled::value) { - iterate = Kokkos::layout_iterate_type_selector< - typename DstType::array_layout>::outer_iteration_pattern; - } else if (std::is_same::value) { + if (std::is_same::value) { iterate = Kokkos::Iterate::Right; } else if (std::is_same::value) { @@ -3092,8 +3108,7 @@ inline std::enable_if_t< std::is_same::array_layout, Kokkos::LayoutRight>::value || std::is_same::array_layout, - Kokkos::LayoutStride>::value || - is_layouttiled::array_layout>::value> + Kokkos::LayoutStride>::value> impl_resize(const Impl::ViewCtorProp& arg_prop, Kokkos::View& v, const typename Kokkos::View::array_layout& layout) { @@ -3139,8 +3154,7 @@ inline std::enable_if_t< std::is_same::array_layout, Kokkos::LayoutRight>::value || std::is_same::array_layout, - Kokkos::LayoutStride>::value || - is_layouttiled::array_layout>::value)> + Kokkos::LayoutStride>::value)> impl_resize(const Impl::ViewCtorProp& arg_prop, Kokkos::View& v, const typename Kokkos::View::array_layout& layout) { @@ -3235,7 +3249,10 @@ impl_realloc(Kokkos::View& v, const size_t n0, const size_t n1, v = view_type(); // Best effort to deallocate in case no other view refers // to the shared allocation v = view_type(arg_prop_copy, n0, n1, n2, n3, n4, n5, n6, n7); - } else if (alloc_prop_input::initialize) { + return; + } + + if constexpr (alloc_prop_input::initialize) { if constexpr (alloc_prop_input::has_execution_space) { const auto& exec_space = Impl::get_property(arg_prop); @@ -3308,8 +3325,7 @@ inline std::enable_if_t< std::is_same::array_layout, Kokkos::LayoutRight>::value || std::is_same::array_layout, - Kokkos::LayoutStride>::value || - is_layouttiled::array_layout>::value> + Kokkos::LayoutStride>::value> impl_realloc(Kokkos::View& v, const typename Kokkos::View::array_layout& layout, const Impl::ViewCtorProp& arg_prop) { @@ -3331,7 +3347,10 @@ impl_realloc(Kokkos::View& v, if (v.layout() != layout) { v = view_type(); // Deallocate first, if the only view to allocation v = view_type(arg_prop, layout); - } else if (alloc_prop_input::initialize) { + return; + } + + if constexpr (alloc_prop_input::initialize) { if constexpr (alloc_prop_input::has_execution_space) { const auto& exec_space = Impl::get_property(arg_prop); @@ -3351,8 +3370,7 @@ inline std::enable_if_t< std::is_same::array_layout, Kokkos::LayoutRight>::value || std::is_same::array_layout, - Kokkos::LayoutStride>::value || - is_layouttiled::array_layout>::value)> + Kokkos::LayoutStride>::value)> impl_realloc(Kokkos::View& v, const typename Kokkos::View::array_layout& layout, const Impl::ViewCtorProp& arg_prop) { @@ -3452,6 +3470,7 @@ struct MirrorType { using view_type = Kokkos::View; }; +// collection of static asserts for create_mirror and create_mirror_view template void check_view_ctor_args_create_mirror() { using alloc_prop_input = Impl::ViewCtorProp; @@ -3470,232 +3489,231 @@ void check_view_ctor_args_create_mirror() { "not explicitly allow padding!"); } +// create a mirror +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc template -inline std::enable_if_t::has_memory_space, - typename Kokkos::View::HostMirror> -create_mirror(const Kokkos::View& src, - const Impl::ViewCtorProp& arg_prop) { - using src_type = View; - using dst_type = typename src_type::HostMirror; - +inline auto create_mirror(const Kokkos::View& src, + const Impl::ViewCtorProp& arg_prop) { check_view_ctor_args_create_mirror(); auto prop_copy = Impl::with_properties_if_unset( arg_prop, std::string(src.label()).append("_mirror")); - return dst_type(prop_copy, src.layout()); -} - -// Create a mirror in a new space (specialization for different space) -template ::has_memory_space>> -auto create_mirror(const Kokkos::View& src, - const Impl::ViewCtorProp& arg_prop) { - check_view_ctor_args_create_mirror(); - - auto prop_copy = Impl::with_properties_if_unset( - arg_prop, std::string(src.label()).append("_mirror")); - using alloc_prop = decltype(prop_copy); - - return typename Impl::MirrorType::view_type(prop_copy, src.layout()); + if constexpr (Impl::ViewCtorProp::has_memory_space) { + using memory_space = typename decltype(prop_copy)::memory_space; + using dst_type = + typename Impl::MirrorType::view_type; + return dst_type(prop_copy, src.layout()); + } else { + using dst_type = typename View::HostMirror; + return dst_type(prop_copy, src.layout()); + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } } // namespace Impl -template -std::enable_if_t::specialize>::value, - typename Kokkos::View::HostMirror> -create_mirror(Kokkos::View const& v) { - return Impl::create_mirror(v, Impl::ViewCtorProp<>{}); +// public interface +template ::specialize>>> +auto create_mirror(Kokkos::View const& src) { + return Impl::create_mirror(src, Impl::ViewCtorProp<>{}); } -template -std::enable_if_t::specialize>::value, - typename Kokkos::View::HostMirror> -create_mirror(Kokkos::Impl::WithoutInitializing_t wi, - Kokkos::View const& v) { - return Impl::create_mirror(v, view_alloc(wi)); +// public interface that accepts a without initializing flag +template ::specialize>>> +auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi, + Kokkos::View const& src) { + return Impl::create_mirror(src, view_alloc(wi)); } +// public interface that accepts a space template ::value>> -std::enable_if_t::specialize>::value, - typename Impl::MirrorType::view_type> -create_mirror(Space const&, Kokkos::View const& v) { - return Impl::create_mirror(v, view_alloc(typename Space::memory_space{})); + typename Enable = std::enable_if_t< + Kokkos::is_space::value && + std::is_void_v::specialize>>> +auto create_mirror(Space const&, Kokkos::View const& src) { + return Impl::create_mirror(src, view_alloc(typename Space::memory_space{})); } +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc template ::specialize>::value && - Impl::ViewCtorProp::has_memory_space>> + typename = std::enable_if_t< + std::is_void_v::specialize>>> auto create_mirror(Impl::ViewCtorProp const& arg_prop, - Kokkos::View const& v) { - return Impl::create_mirror(v, arg_prop); -} - -template -std::enable_if_t< - std::is_void::specialize>::value && - !Impl::ViewCtorProp::has_memory_space, - typename Kokkos::View::HostMirror> -create_mirror(Impl::ViewCtorProp const& arg_prop, - Kokkos::View const& v) { - return Impl::create_mirror(v, arg_prop); + Kokkos::View const& src) { + return Impl::create_mirror(src, arg_prop); } +// public interface that accepts a space and a without initializing flag template ::value>> -std::enable_if_t::specialize>::value, - typename Impl::MirrorType::view_type> -create_mirror(Kokkos::Impl::WithoutInitializing_t wi, Space const&, - Kokkos::View const& v) { - return Impl::create_mirror(v, view_alloc(typename Space::memory_space{}, wi)); + typename Enable = std::enable_if_t< + Kokkos::is_space::value && + std::is_void_v::specialize>>> +auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi, Space const&, + Kokkos::View const& src) { + return Impl::create_mirror(src, + view_alloc(typename Space::memory_space{}, wi)); } namespace Impl { -template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - (std::is_same< - typename Kokkos::View::memory_space, - typename Kokkos::View::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::View::data_type, - typename Kokkos::View::HostMirror::data_type>::value), - typename Kokkos::View::HostMirror> -create_mirror_view(const Kokkos::View& src, - const Impl::ViewCtorProp&) { - check_view_ctor_args_create_mirror(); - return src; -} +// choose a `Kokkos::create_mirror` adapted for the provided view and the +// provided arguments +template +inline auto choose_create_mirror( + const View& src, const Impl::ViewCtorProp& arg_prop) { + // Due to the fact that users can overload `Kokkos::create_mirror`, but also + // that they may not have implemented all of its different possible + // variations, this function chooses the correct private or public version of + // it to call. + // This helper should be used by any overload of + // `Kokkos::Impl::create_mirror_view`. + + if constexpr (std::is_void_v) { + // if the view is not specialized, just call the Impl function + + // using ADL to find the later defined overload of the function + using namespace Kokkos::Impl; + + return create_mirror(src, arg_prop); + } else { + // otherwise, recreate the public call + using ViewProp = Impl::ViewCtorProp; + + // using ADL to find the later defined overload of the function + using namespace Kokkos; + + if constexpr (sizeof...(ViewCtorArgs) == 0) { + // if there are no view constructor args, call the specific public + // function + return create_mirror(src); + } else if constexpr (sizeof...(ViewCtorArgs) == 1 && + ViewProp::has_memory_space) { + // if there is one view constructor arg and it has a memory space, call + // the specific public function + return create_mirror(typename ViewProp::memory_space{}, src); + } else if constexpr (sizeof...(ViewCtorArgs) == 1 && + !ViewProp::initialize) { + // if there is one view constructor arg and it has a without initializing + // mark, call the specific public function + return create_mirror(typename Kokkos::Impl::WithoutInitializing_t{}, src); + } else if constexpr (sizeof...(ViewCtorArgs) == 2 && + ViewProp::has_memory_space && !ViewProp::initialize) { + // if there is two view constructor args and they have a memory space and + // a without initializing mark, call the specific public function + return create_mirror(typename Kokkos::Impl::WithoutInitializing_t{}, + typename ViewProp::memory_space{}, src); + } else { + // if there are other constructor args, call the generic public function -template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - !(std::is_same::memory_space, - typename Kokkos::View< - T, P...>::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::View::data_type, - typename Kokkos::View::HostMirror::data_type>::value), - typename Kokkos::View::HostMirror> -create_mirror_view(const Kokkos::View& src, - const Impl::ViewCtorProp& arg_prop) { - return Kokkos::Impl::create_mirror(src, arg_prop); -} - -// Create a mirror view in a new space (specialization for same space) -template ::has_memory_space>> -std::enable_if_t::memory_space, - T, P...>::is_same_memspace, - typename Impl::MirrorViewType< - typename Impl::ViewCtorProp::memory_space, - T, P...>::view_type> -create_mirror_view(const Kokkos::View& src, - const Impl::ViewCtorProp&) { - check_view_ctor_args_create_mirror(); - return src; -} + // Beware, there are some libraries using Kokkos that don't implement + // this overload (hence the reason for this present function to exist). + return create_mirror(arg_prop, src); + } + } -// Create a mirror view in a new space (specialization for different space) -template ::has_memory_space>> -std::enable_if_t::memory_space, - T, P...>::is_same_memspace, - typename Impl::MirrorViewType< - typename Impl::ViewCtorProp::memory_space, - T, P...>::view_type> -create_mirror_view(const Kokkos::View& src, - const Impl::ViewCtorProp& arg_prop) { - return Kokkos::Impl::create_mirror(src, arg_prop); +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } -} // namespace Impl -template -std::enable_if_t< - std::is_same< - typename Kokkos::View::memory_space, - typename Kokkos::View::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::View::data_type, - typename Kokkos::View::HostMirror::data_type>::value, - typename Kokkos::View::HostMirror> -create_mirror_view(const Kokkos::View& src) { - return src; +// create a mirror view +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc +template +inline auto create_mirror_view( + const Kokkos::View& src, + [[maybe_unused]] const Impl::ViewCtorProp& arg_prop) { + if constexpr (!Impl::ViewCtorProp::has_memory_space) { + if constexpr (std::is_same::memory_space, + typename Kokkos::View< + T, P...>::HostMirror::memory_space>::value && + std::is_same::data_type, + typename Kokkos::View< + T, P...>::HostMirror::data_type>::value) { + check_view_ctor_args_create_mirror(); + return typename Kokkos::View::HostMirror(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } else { + if constexpr (Impl::MirrorViewType::memory_space, + T, P...>::is_same_memspace) { + check_view_ctor_args_create_mirror(); + return typename Impl::MirrorViewType< + typename Impl::ViewCtorProp::memory_space, T, + P...>::view_type(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } +} // namespace Impl +// public interface template -std::enable_if_t< - !(std::is_same< - typename Kokkos::View::memory_space, - typename Kokkos::View::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::View::data_type, - typename Kokkos::View::HostMirror::data_type>::value), - typename Kokkos::View::HostMirror> -create_mirror_view(const Kokkos::View& src) { - return Kokkos::create_mirror(src); +auto create_mirror_view(const Kokkos::View& src) { + return Impl::create_mirror_view(src, view_alloc()); } +// public interface that accepts a without initializing flag template -typename Kokkos::View::HostMirror create_mirror_view( - Kokkos::Impl::WithoutInitializing_t wi, Kokkos::View const& v) { - return Impl::create_mirror_view(v, view_alloc(wi)); +auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, + Kokkos::View const& src) { + return Impl::create_mirror_view(src, view_alloc(wi)); } -// FIXME_C++17 Improve SFINAE here. +// public interface that accepts a space template ::value>> -typename Impl::MirrorViewType::view_type create_mirror_view( - const Space&, const Kokkos::View& src, - std::enable_if_t::is_same_memspace>* = - nullptr) { - return src; -} - -// FIXME_C++17 Improve SFINAE here. -template ::value>> -typename Impl::MirrorViewType::view_type create_mirror_view( - const Space& space, const Kokkos::View& src, - std::enable_if_t::is_same_memspace>* = - nullptr) { - return Kokkos::create_mirror(space, src); +auto create_mirror_view(const Space&, const Kokkos::View& src) { + return Impl::create_mirror_view(src, + view_alloc(typename Space::memory_space())); } +// public interface that accepts a space and a without initializing flag template ::value>> -typename Impl::MirrorViewType::view_type create_mirror_view( - Kokkos::Impl::WithoutInitializing_t wi, Space const&, - Kokkos::View const& v) { +auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, Space const&, + Kokkos::View const& src) { return Impl::create_mirror_view( - v, view_alloc(typename Space::memory_space{}, wi)); + src, view_alloc(typename Space::memory_space{}, wi)); } -template +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>>> auto create_mirror_view(const Impl::ViewCtorProp& arg_prop, - const Kokkos::View& v) { - return Impl::create_mirror_view(v, arg_prop); + const Kokkos::View& src) { + return Impl::create_mirror_view(src, arg_prop); } -template -auto create_mirror_view_and_copy( - const Impl::ViewCtorProp&, - const Kokkos::View& src, - std::enable_if_t< - std::is_void::specialize>::value && - Impl::MirrorViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace>* = nullptr) { +namespace Impl { + +// collection of static asserts for create_mirror_view_and_copy +template +void check_view_ctor_args_create_mirror_view_and_copy() { using alloc_prop_input = Impl::ViewCtorProp; + static_assert( alloc_prop_input::has_memory_space, "The view constructor arguments passed to " @@ -3708,52 +3726,53 @@ auto create_mirror_view_and_copy( "The view constructor arguments passed to " "Kokkos::create_mirror_view_and_copy must " "not explicitly allow padding!"); - - // same behavior as deep_copy(src, src) - if (!alloc_prop_input::has_execution_space) - fence( - "Kokkos::create_mirror_view_and_copy: fence before returning src view"); - return src; } -template +} // namespace Impl + +// create a mirror view and deep copy it +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>>> auto create_mirror_view_and_copy( - const Impl::ViewCtorProp& arg_prop, - const Kokkos::View& src, - std::enable_if_t< - std::is_void::specialize>::value && - !Impl::MirrorViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace>* = nullptr) { + [[maybe_unused]] const Impl::ViewCtorProp& arg_prop, + const Kokkos::View& src) { using alloc_prop_input = Impl::ViewCtorProp; - static_assert( - alloc_prop_input::has_memory_space, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must include a memory space!"); - static_assert(!alloc_prop_input::has_pointer, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not include a pointer!"); - static_assert(!alloc_prop_input::allow_padding, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not explicitly allow padding!"); - using Space = typename alloc_prop_input::memory_space; - using Mirror = typename Impl::MirrorViewType::view_type; - - auto arg_prop_copy = Impl::with_properties_if_unset( - arg_prop, std::string{}, WithoutInitializing, - typename Space::execution_space{}); - - std::string& label = Impl::get_property(arg_prop_copy); - if (label.empty()) label = src.label(); - auto mirror = typename Mirror::non_const_type{arg_prop_copy, src.layout()}; - if constexpr (alloc_prop_input::has_execution_space) { - deep_copy(Impl::get_property(arg_prop_copy), - mirror, src); - } else - deep_copy(mirror, src); - return mirror; + + Impl::check_view_ctor_args_create_mirror_view_and_copy(); + + if constexpr (Impl::MirrorViewType::is_same_memspace) { + // same behavior as deep_copy(src, src) + if constexpr (!alloc_prop_input::has_execution_space) + fence( + "Kokkos::create_mirror_view_and_copy: fence before returning src " + "view"); + return src; + } else { + using Space = typename alloc_prop_input::memory_space; + using Mirror = typename Impl::MirrorViewType::view_type; + + auto arg_prop_copy = Impl::with_properties_if_unset( + arg_prop, std::string{}, WithoutInitializing, + typename Space::execution_space{}); + + std::string& label = Impl::get_property(arg_prop_copy); + if (label.empty()) label = src.label(); + auto mirror = typename Mirror::non_const_type{arg_prop_copy, src.layout()}; + if constexpr (alloc_prop_input::has_execution_space) { + deep_copy(Impl::get_property(arg_prop_copy), + mirror, src); + } else + deep_copy(mirror, src); + return mirror; + } +#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC) + __builtin_unreachable(); +#endif } // Previously when using auto here, the intel compiler 19.3 would diff --git a/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp b/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp index 5f251eeb26ac..b8d7f77deb30 100644 --- a/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp +++ b/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp @@ -40,7 +40,12 @@ struct ParallelReduceTag {}; struct ChunkSize { int value; + explicit ChunkSize(int value_) : value(value_) {} +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + template + KOKKOS_DEPRECATED_WITH_COMMENT("ChunkSize should be constructed explicitly.") ChunkSize(int value_) : value(value_) {} +#endif }; /** \brief Execution policy for work over a range of an integral type. @@ -714,6 +719,58 @@ class TeamPolicy } }; +// Execution space not provided deduces to TeamPolicy<> + +TeamPolicy()->TeamPolicy<>; + +TeamPolicy(int, int)->TeamPolicy<>; +TeamPolicy(int, int, int)->TeamPolicy<>; +TeamPolicy(int, Kokkos::AUTO_t const&)->TeamPolicy<>; +TeamPolicy(int, Kokkos::AUTO_t const&, int)->TeamPolicy<>; +TeamPolicy(int, Kokkos::AUTO_t const&, Kokkos::AUTO_t const&)->TeamPolicy<>; +TeamPolicy(int, int, Kokkos::AUTO_t const&)->TeamPolicy<>; + +// DefaultExecutionSpace deduces to TeamPolicy<> + +TeamPolicy(DefaultExecutionSpace const&, int, int)->TeamPolicy<>; +TeamPolicy(DefaultExecutionSpace const&, int, int, int)->TeamPolicy<>; +TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&) + ->TeamPolicy<>; +TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&, int) + ->TeamPolicy<>; +TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&, + Kokkos::AUTO_t const&) + ->TeamPolicy<>; +TeamPolicy(DefaultExecutionSpace const&, int, int, Kokkos::AUTO_t const&) + ->TeamPolicy<>; + +// ES != DefaultExecutionSpace deduces to TeamPolicy + +template >> +TeamPolicy(ES const&, int, int)->TeamPolicy; + +template >> +TeamPolicy(ES const&, int, int, int)->TeamPolicy; + +template >> +TeamPolicy(ES const&, int, Kokkos::AUTO_t const&)->TeamPolicy; + +template >> +TeamPolicy(ES const&, int, Kokkos::AUTO_t const&, int)->TeamPolicy; + +template >> +TeamPolicy(ES const&, int, Kokkos::AUTO_t const&, Kokkos::AUTO_t const&) + ->TeamPolicy; + +template >> +TeamPolicy(ES const&, int, int, Kokkos::AUTO_t const&)->TeamPolicy; + namespace Impl { template @@ -968,9 +1025,9 @@ struct TeamThreadMDRange, TeamHandle> { static constexpr auto par_vector = Impl::TeamMDRangeParVector::NotParVector; static constexpr Iterate direction = - OuterDir == Iterate::Default - ? layout_iterate_type_selector::outer_iteration_pattern - : iter; + OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector< + ArrayLayout>::outer_iteration_pattern + : iter; template KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType const& team_, Args&&... args) @@ -983,7 +1040,7 @@ struct TeamThreadMDRange, TeamHandle> { }; template -TeamThreadMDRange(TeamHandle const&, Args&&...) +KOKKOS_DEDUCTION_GUIDE TeamThreadMDRange(TeamHandle const&, Args&&...) ->TeamThreadMDRange, TeamHandle>; template @@ -1004,9 +1061,9 @@ struct ThreadVectorMDRange, TeamHandle> { static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector; static constexpr Iterate direction = - OuterDir == Iterate::Default - ? layout_iterate_type_selector::outer_iteration_pattern - : iter; + OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector< + ArrayLayout>::outer_iteration_pattern + : iter; template KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType const& team_, @@ -1020,7 +1077,7 @@ struct ThreadVectorMDRange, TeamHandle> { }; template -ThreadVectorMDRange(TeamHandle const&, Args&&...) +KOKKOS_DEDUCTION_GUIDE ThreadVectorMDRange(TeamHandle const&, Args&&...) ->ThreadVectorMDRange, TeamHandle>; template @@ -1041,9 +1098,9 @@ struct TeamVectorMDRange, TeamHandle> { static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector; static constexpr Iterate direction = - iter == Iterate::Default - ? layout_iterate_type_selector::outer_iteration_pattern - : iter; + iter == Iterate::Default ? Impl::layout_iterate_type_selector< + ArrayLayout>::outer_iteration_pattern + : iter; template KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType const& team_, @@ -1057,7 +1114,7 @@ struct TeamVectorMDRange, TeamHandle> { }; template -TeamVectorMDRange(TeamHandle const&, Args&&...) +KOKKOS_DEDUCTION_GUIDE TeamVectorMDRange(TeamHandle const&, Args&&...) ->TeamVectorMDRange, TeamHandle>; template #include #include +#ifdef KOKKOS_ENABLE_IMPL_MDSPAN +#include +#else +#include +#endif namespace Kokkos { -namespace Experimental { -constexpr ptrdiff_t dynamic_extent = -1; +#ifndef KOKKOS_ENABLE_IMPL_MDSPAN +constexpr size_t dynamic_extent = std::numeric_limits::max(); +#endif -template +namespace Experimental { + +template struct Extents { /* TODO @enhancement flesh this out more */ }; -template +template struct PrependExtent; -template +template struct PrependExtent, NewExtent> { using type = Extents; }; -template +template struct AppendExtent; -template +template struct AppendExtent, NewExtent> { using type = Extents; }; - } // end namespace Experimental namespace Impl { @@ -75,33 +82,32 @@ struct _parse_impl { // We have to treat the case of int**[x] specially, since it *doesn't* go // backwards -template +template struct _parse_impl, std::enable_if_t<_all_remaining_extents_dynamic::value>> - : _parse_impl> { -}; + : _parse_impl> {}; // int*(*[x])[y] should still work also (meaning int[][x][][y]) -template +template struct _parse_impl< T*, Kokkos::Experimental::Extents, std::enable_if_t::value>> { using _next = Kokkos::Experimental::AppendExtent< typename _parse_impl, void>::type, - Kokkos::Experimental::dynamic_extent>; + Kokkos::dynamic_extent>; using type = typename _next::type; }; -template +template struct _parse_impl, void> - : _parse_impl< - T, Kokkos::Experimental::Extents // TODO @pedantic this - // could be a - // narrowing cast - > {}; + : _parse_impl // TODO @pedantic + // this could be a + // narrowing cast + > {}; } // end namespace _parse_view_extents_impl @@ -111,38 +117,34 @@ struct ParseViewExtents { DataType, Kokkos::Experimental::Extents<>>::type; }; -template +template struct ApplyExtent { using type = ValueType[Ext]; }; template -struct ApplyExtent { +struct ApplyExtent { using type = ValueType*; }; -template +template struct ApplyExtent { using type = typename ApplyExtent::type[N]; }; -template +template struct ApplyExtent { using type = ValueType * [Ext]; }; template -struct ApplyExtent { - using type = - typename ApplyExtent::type*; +struct ApplyExtent { + using type = typename ApplyExtent::type*; }; template -struct ApplyExtent { - using type = - typename ApplyExtent::type[N]; +struct ApplyExtent { + using type = typename ApplyExtent::type[N]; }; } // end namespace Impl diff --git a/packages/kokkos/core/src/Kokkos_Graph.hpp b/packages/kokkos/core/src/Kokkos_Graph.hpp index 643bdcc02ccc..9cc6650e26ed 100644 --- a/packages/kokkos/core/src/Kokkos_Graph.hpp +++ b/packages/kokkos/core/src/Kokkos_Graph.hpp @@ -167,6 +167,9 @@ Graph create_graph(Closure&& arg_closure) { #include #endif #endif +#ifdef SYCL_EXT_ONEAPI_GRAPH +#include +#endif #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_GRAPH #undef KOKKOS_IMPL_PUBLIC_INCLUDE #undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_GRAPH diff --git a/packages/kokkos/core/src/Kokkos_HostSpace.hpp b/packages/kokkos/core/src/Kokkos_HostSpace.hpp index a1fb0f5a677d..8b5f29f95b21 100644 --- a/packages/kokkos/core/src/Kokkos_HostSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_HostSpace.hpp @@ -113,7 +113,6 @@ class HostSpace { const size_t arg_alloc_size, const size_t arg_logical_size = 0) const; - private: void* impl_allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size = 0, const Kokkos::Tools::SpaceHandle = @@ -124,7 +123,6 @@ class HostSpace { const Kokkos::Tools::SpaceHandle = Kokkos::Tools::make_space_handle(name())) const; - public: /**\brief Return Name of the MemorySpace */ static constexpr const char* name() { return m_name; } diff --git a/packages/kokkos/core/src/Kokkos_Layout.hpp b/packages/kokkos/core/src/Kokkos_Layout.hpp index ca4d956784c9..37b80e54a85f 100644 --- a/packages/kokkos/core/src/Kokkos_Layout.hpp +++ b/packages/kokkos/core/src/Kokkos_Layout.hpp @@ -217,81 +217,12 @@ enum class Iterate { Right // Right indices stride fastest }; -// To check for LayoutTiled -// This is to hide extra compile-time 'identifier' info within the LayoutTiled -// class by not relying on template specialization to include the ArgN*'s -template -struct is_layouttiled : std::false_type {}; - -template -struct is_layouttiled> - : std::true_type {}; - -namespace Experimental { - -/// LayoutTiled -// Must have Rank >= 2 -template < - Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0, - unsigned ArgN1, unsigned ArgN2 = 0, unsigned ArgN3 = 0, unsigned ArgN4 = 0, - unsigned ArgN5 = 0, unsigned ArgN6 = 0, unsigned ArgN7 = 0, - bool IsPowerOfTwo = - (Kokkos::Impl::is_integral_power_of_two(ArgN0) && - Kokkos::Impl::is_integral_power_of_two(ArgN1) && - (Kokkos::Impl::is_integral_power_of_two(ArgN2) || (ArgN2 == 0)) && - (Kokkos::Impl::is_integral_power_of_two(ArgN3) || (ArgN3 == 0)) && - (Kokkos::Impl::is_integral_power_of_two(ArgN4) || (ArgN4 == 0)) && - (Kokkos::Impl::is_integral_power_of_two(ArgN5) || (ArgN5 == 0)) && - (Kokkos::Impl::is_integral_power_of_two(ArgN6) || (ArgN6 == 0)) && - (Kokkos::Impl::is_integral_power_of_two(ArgN7) || (ArgN7 == 0)))> -struct LayoutTiled { - static_assert(IsPowerOfTwo, - "LayoutTiled must be given power-of-two tile dimensions"); - - using array_layout = LayoutTiled; - static constexpr Iterate outer_pattern = OuterP; - static constexpr Iterate inner_pattern = InnerP; - - enum { N0 = ArgN0 }; - enum { N1 = ArgN1 }; - enum { N2 = ArgN2 }; - enum { N3 = ArgN3 }; - enum { N4 = ArgN4 }; - enum { N5 = ArgN5 }; - enum { N6 = ArgN6 }; - enum { N7 = ArgN7 }; - - size_t dimension[ARRAY_LAYOUT_MAX_RANK]; - - enum : bool { is_extent_constructible = true }; - - LayoutTiled(LayoutTiled const&) = default; - LayoutTiled(LayoutTiled&&) = default; - LayoutTiled& operator=(LayoutTiled const&) = default; - LayoutTiled& operator=(LayoutTiled&&) = default; - - KOKKOS_INLINE_FUNCTION - explicit constexpr LayoutTiled(size_t argN0 = 0, size_t argN1 = 0, - size_t argN2 = 0, size_t argN3 = 0, - size_t argN4 = 0, size_t argN5 = 0, - size_t argN6 = 0, size_t argN7 = 0) - : dimension{argN0, argN1, argN2, argN3, argN4, argN5, argN6, argN7} {} - - friend bool operator==(const LayoutTiled& left, const LayoutTiled& right) { - for (unsigned int rank = 0; rank < ARRAY_LAYOUT_MAX_RANK; ++rank) - if (left.dimension[rank] != right.dimension[rank]) return false; - return true; - } - - friend bool operator!=(const LayoutTiled& left, const LayoutTiled& right) { - return !(left == right); - } -}; - -} // namespace Experimental +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 +template +struct KOKKOS_DEPRECATED is_layouttiled : std::false_type {}; +#endif +namespace Impl { // For use with view_copy template struct layout_iterate_type_selector { @@ -320,42 +251,13 @@ struct layout_iterate_type_selector { static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Default; }; +} // namespace Impl -template -struct layout_iterate_type_selector> { - static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Left; - static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Left; -}; - -template -struct layout_iterate_type_selector> { - static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Right; - static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Left; -}; - -template -struct layout_iterate_type_selector> { - static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Left; - static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Right; -}; - -template -struct layout_iterate_type_selector> { - static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Right; - static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Right; -}; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 +template +using layout_iterate_type_selector KOKKOS_DEPRECATED = + Impl::layout_iterate_type_selector; +#endif } // namespace Kokkos diff --git a/packages/kokkos/core/src/Kokkos_Macros.hpp b/packages/kokkos/core/src/Kokkos_Macros.hpp index b255d2a51950..0a0acd303f52 100644 --- a/packages/kokkos/core/src/Kokkos_Macros.hpp +++ b/packages/kokkos/core/src/Kokkos_Macros.hpp @@ -55,9 +55,22 @@ #ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H #include +#include #include #endif +#if !defined(KOKKOS_ENABLE_CXX17) +#if __has_include() +#include +#else +#include +#endif +#if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 10 +#error \ + "Compiling with support for C++20 or later requires a libstdc++ version later than 9" +#endif +#endif + //---------------------------------------------------------------------------- /** Pick up compiler specific #define macros: * @@ -332,6 +345,10 @@ #define KOKKOS_DEFAULTED_FUNCTION #endif +#if !defined(KOKKOS_DEDUCTION_GUIDE) +#define KOKKOS_DEDUCTION_GUIDE +#endif + #if !defined(KOKKOS_IMPL_HOST_FUNCTION) #define KOKKOS_IMPL_HOST_FUNCTION #endif @@ -562,8 +579,44 @@ static constexpr bool kokkos_omp_on_host() { return false; } #define KOKKOS_IMPL_WARNING(desc) KOKKOS_IMPL_DO_PRAGMA(message(#desc)) #endif +// clang-format off +#if defined(__NVCOMPILER) + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() \ + _Pragma("diag_suppress 1216") + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() \ + _Pragma("diag_default 1216") +#elif defined(__EDG__) + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() \ + _Pragma("warning push") \ + _Pragma("warning disable 1478") + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() \ + _Pragma("warning pop") +#elif defined(__GNUC__) || defined(__clang__) + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() \ + _Pragma("GCC diagnostic pop") +#elif defined(_MSC_VER) + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() \ + _Pragma("warning(push)") \ + _Pragma("warning(disable: 4996)") + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() \ + _Pragma("warning(pop)") +#else + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() +#endif +// clang-format on + #define KOKKOS_ATTRIBUTE_NODISCARD [[nodiscard]] +#ifndef KOKKOS_ENABLE_CXX17 +#define KOKKOS_IMPL_ATTRIBUTE_UNLIKELY [[unlikely]] +#else +#define KOKKOS_IMPL_ATTRIBUTE_UNLIKELY +#endif + #if (defined(KOKKOS_COMPILER_GNU) || defined(KOKKOS_COMPILER_CLANG) || \ defined(KOKKOS_COMPILER_INTEL) || defined(KOKKOS_COMPILER_INTEL_LLVM) || \ defined(KOKKOS_COMPILER_NVHPC)) && \ diff --git a/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp b/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp index 3fead8dd2936..19967782e5ee 100644 --- a/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp +++ b/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp @@ -277,12 +277,20 @@ KOKKOS_INLINE_FUNCTION long long abs(long long n) { #endif } KOKKOS_INLINE_FUNCTION float abs(float x) { +#ifdef KOKKOS_ENABLE_SYCL + return sycl::fabs(x); // sycl::abs is only provided for integral types +#else using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::abs; return abs(x); +#endif } KOKKOS_INLINE_FUNCTION double abs(double x) { +#ifdef KOKKOS_ENABLE_SYCL + return sycl::fabs(x); // sycl::abs is only provided for integral types +#else using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::abs; return abs(x); +#endif } inline long double abs(long double x) { using std::abs; diff --git a/packages/kokkos/core/src/Kokkos_Pair.hpp b/packages/kokkos/core/src/Kokkos_Pair.hpp index 9be8d8d7aa19..e569fefc14df 100644 --- a/packages/kokkos/core/src/Kokkos_Pair.hpp +++ b/packages/kokkos/core/src/Kokkos_Pair.hpp @@ -413,12 +413,13 @@ KOKKOS_FORCEINLINE_FUNCTION pair tie(T1& x, T2& y) { return (pair(x, y)); } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 // // Specialization of Kokkos::pair for a \c void second argument. This // is not actually a "pair"; it only contains one element, the first. // template -struct pair { +struct KOKKOS_DEPRECATED pair { using first_type = T1; using second_type = void; @@ -448,41 +449,48 @@ struct pair { // Specialization of relational operators for Kokkos::pair. // +#if defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU < 1110) +KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() +#endif template -KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator==( +KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator==( const pair& lhs, const pair& rhs) { return lhs.first == rhs.first; } template -KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator!=( +KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator!=( const pair& lhs, const pair& rhs) { return !(lhs == rhs); } template -KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<( +KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<( const pair& lhs, const pair& rhs) { return lhs.first < rhs.first; } template -KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<=( +KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<=( const pair& lhs, const pair& rhs) { return !(rhs < lhs); } template -KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>( +KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>( const pair& lhs, const pair& rhs) { return rhs < lhs; } template -KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>=( +KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>=( const pair& lhs, const pair& rhs) { return !(lhs < rhs); } +#if defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU < 1110) +KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() +#endif +#endif namespace Impl { template diff --git a/packages/kokkos/core/src/Kokkos_Parallel.hpp b/packages/kokkos/core/src/Kokkos_Parallel.hpp index 484f6c0d5f4c..122239df7908 100644 --- a/packages/kokkos/core/src/Kokkos_Parallel.hpp +++ b/packages/kokkos/core/src/Kokkos_Parallel.hpp @@ -137,9 +137,9 @@ inline void parallel_for(const std::string& str, const ExecPolicy& policy, ExecPolicy inner_policy = policy; Kokkos::Tools::Impl::begin_parallel_for(inner_policy, functor, str, kpID); - Kokkos::Impl::shared_allocation_tracking_disable(); - Impl::ParallelFor closure(functor, inner_policy); - Kokkos::Impl::shared_allocation_tracking_enable(); + auto closure = + Kokkos::Impl::construct_with_shared_allocation_tracking_disabled< + Impl::ParallelFor>(functor, inner_policy); closure.execute(); @@ -352,10 +352,10 @@ inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy, ExecutionPolicy inner_policy = policy; Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID); - Kokkos::Impl::shared_allocation_tracking_disable(); - Impl::ParallelScan closure(functor, - inner_policy); - Kokkos::Impl::shared_allocation_tracking_enable(); + auto closure = + Kokkos::Impl::construct_with_shared_allocation_tracking_disabled< + Impl::ParallelScan>(functor, + inner_policy); closure.execute(); @@ -398,18 +398,19 @@ inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy, Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID); if constexpr (Kokkos::is_view::value) { - Kokkos::Impl::shared_allocation_tracking_disable(); - Impl::ParallelScanWithTotal - closure(functor, inner_policy, return_value); - Kokkos::Impl::shared_allocation_tracking_enable(); + auto closure = + Kokkos::Impl::construct_with_shared_allocation_tracking_disabled< + Impl::ParallelScanWithTotal>( + functor, inner_policy, return_value); closure.execute(); } else { - Kokkos::Impl::shared_allocation_tracking_disable(); Kokkos::View view(&return_value); - Impl::ParallelScanWithTotal - closure(functor, inner_policy, view); - Kokkos::Impl::shared_allocation_tracking_enable(); + auto closure = + Kokkos::Impl::construct_with_shared_allocation_tracking_disabled< + Impl::ParallelScanWithTotal>(functor, inner_policy, + view); closure.execute(); } diff --git a/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp index d499eba6dcca..53913266f130 100644 --- a/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp +++ b/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp @@ -72,7 +72,7 @@ struct Sum { }; template -Sum(View const&) +KOKKOS_DEDUCTION_GUIDE Sum(View const&) ->Sum::memory_space>; template @@ -117,7 +117,7 @@ struct Prod { }; template -Prod(View const&) +KOKKOS_DEDUCTION_GUIDE Prod(View const&) ->Prod::memory_space>; template @@ -164,7 +164,7 @@ struct Min { }; template -Min(View const&) +KOKKOS_DEDUCTION_GUIDE Min(View const&) ->Min::memory_space>; template @@ -212,7 +212,7 @@ struct Max { }; template -Max(View const&) +KOKKOS_DEDUCTION_GUIDE Max(View const&) ->Max::memory_space>; template @@ -258,7 +258,7 @@ struct LAnd { }; template -LAnd(View const&) +KOKKOS_DEDUCTION_GUIDE LAnd(View const&) ->LAnd::memory_space>; template @@ -305,7 +305,7 @@ struct LOr { }; template -LOr(View const&) +KOKKOS_DEDUCTION_GUIDE LOr(View const&) ->LOr::memory_space>; template @@ -352,7 +352,7 @@ struct BAnd { }; template -BAnd(View const&) +KOKKOS_DEDUCTION_GUIDE BAnd(View const&) ->BAnd::memory_space>; template @@ -399,7 +399,7 @@ struct BOr { }; template -BOr(View const&) +KOKKOS_DEDUCTION_GUIDE BOr(View const&) ->BOr::memory_space>; template @@ -458,7 +458,8 @@ struct MinLoc { }; template -MinLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE MinLoc( + View, Properties...> const&) ->MinLoc, Properties...>::memory_space>; @@ -513,7 +514,8 @@ struct MaxLoc { }; template -MaxLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE MaxLoc( + View, Properties...> const&) ->MaxLoc, Properties...>::memory_space>; @@ -577,7 +579,7 @@ struct MinMax { }; template -MinMax(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE MinMax(View, Properties...> const&) ->MinMax, Properties...>::memory_space>; @@ -646,7 +648,8 @@ struct MinMaxLoc { }; template -MinMaxLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE MinMaxLoc( + View, Properties...> const&) ->MinMaxLoc, Properties...>::memory_space>; @@ -713,7 +716,8 @@ struct MaxFirstLoc { }; template -MaxFirstLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE MaxFirstLoc( + View, Properties...> const&) ->MaxFirstLoc, Properties...>::memory_space>; @@ -782,7 +786,7 @@ struct MaxFirstLocCustomComparator { template -MaxFirstLocCustomComparator( +KOKKOS_DEDUCTION_GUIDE MaxFirstLocCustomComparator( View, Properties...> const&, ComparatorType) ->MaxFirstLocCustomComparator, @@ -846,7 +850,8 @@ struct MinFirstLoc { }; template -MinFirstLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE MinFirstLoc( + View, Properties...> const&) ->MinFirstLoc, Properties...>::memory_space>; @@ -915,7 +920,7 @@ struct MinFirstLocCustomComparator { template -MinFirstLocCustomComparator( +KOKKOS_DEDUCTION_GUIDE MinFirstLocCustomComparator( View, Properties...> const&, ComparatorType) ->MinFirstLocCustomComparator, @@ -990,7 +995,8 @@ struct MinMaxFirstLastLoc { }; template -MinMaxFirstLastLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE MinMaxFirstLastLoc( + View, Properties...> const&) ->MinMaxFirstLastLoc, Properties...>::memory_space>; @@ -1069,7 +1075,7 @@ struct MinMaxFirstLastLocCustomComparator { template -MinMaxFirstLastLocCustomComparator( +KOKKOS_DEDUCTION_GUIDE MinMaxFirstLastLocCustomComparator( View, Properties...> const&, ComparatorType) ->MinMaxFirstLastLocCustomComparator< Scalar, Index, ComparatorType, @@ -1133,7 +1139,8 @@ struct FirstLoc { }; template -FirstLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE FirstLoc( + View, Properties...> const&) ->FirstLoc, Properties...>::memory_space>; @@ -1194,7 +1201,7 @@ struct LastLoc { }; template -LastLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE LastLoc(View, Properties...> const&) ->LastLoc, Properties...>::memory_space>; @@ -1261,7 +1268,8 @@ struct StdIsPartitioned { }; template -StdIsPartitioned(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE StdIsPartitioned( + View, Properties...> const&) ->StdIsPartitioned, Properties...>::memory_space>; @@ -1323,7 +1331,8 @@ struct StdPartitionPoint { }; template -StdPartitionPoint(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE StdPartitionPoint( + View, Properties...> const&) ->StdPartitionPoint, Properties...>::memory_space>; @@ -1502,18 +1511,18 @@ struct ParallelReduceAdaptor { using Analysis = FunctorAnalysis; - Kokkos::Impl::shared_allocation_tracking_disable(); - CombinedFunctorReducer functor_reducer( - functor, typename Analysis::Reducer( - ReducerSelector::select(functor, return_value))); - - // FIXME Remove "Wrapper" once all backends implement the new interface - Impl::ParallelReduce::execution_space> - closure(functor_reducer, inner_policy, - return_value_adapter::return_value(return_value, functor)); - Kokkos::Impl::shared_allocation_tracking_enable(); + + using CombinedFunctorReducerType = + CombinedFunctorReducer; + auto closure = construct_with_shared_allocation_tracking_disabled< + Impl::ParallelReduce::execution_space>>( + CombinedFunctorReducerType( + functor, typename Analysis::Reducer( + ReducerSelector::select(functor, return_value))), + inner_policy, + return_value_adapter::return_value(return_value, functor)); closure.execute(); Kokkos::Tools::Impl::end_parallel_reduce( diff --git a/packages/kokkos/core/src/Kokkos_View.hpp b/packages/kokkos/core/src/Kokkos_View.hpp index 484a0e6f62e4..820a40a5f558 100644 --- a/packages/kokkos/core/src/Kokkos_View.hpp +++ b/packages/kokkos/core/src/Kokkos_View.hpp @@ -38,6 +38,8 @@ static_assert(false, #ifdef KOKKOS_ENABLE_IMPL_MDSPAN #include +#include +#include #endif #include @@ -372,6 +374,35 @@ struct ViewTraits { //------------------------------------ }; +#ifdef KOKKOS_ENABLE_IMPL_MDSPAN +namespace Impl { +struct UnsupportedKokkosArrayLayout; + +template +struct MDSpanViewTraits { + using mdspan_type = UnsupportedKokkosArrayLayout; +}; + +// "Natural" mdspan for a view if the View's ArrayLayout is supported. +template +struct MDSpanViewTraits::type>> { + using index_type = std::size_t; + using extents_type = + typename Impl::ExtentsFromDataType::type; + using mdspan_layout_type = + typename Impl::LayoutFromArrayLayout::type; + using accessor_type = Impl::SpaceAwareAccessor< + typename Traits::memory_space, + Kokkos::default_accessor>; + using mdspan_type = mdspan; +}; +} // namespace Impl +#endif // KOKKOS_ENABLE_IMPL_MDSPAN + /** \class View * \brief View to an array of data. * @@ -522,7 +553,6 @@ constexpr bool is_assignable(const Kokkos::View& dst, //---------------------------------------------------------------------------- #include -#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -923,57 +953,30 @@ class View : public ViewTraits { template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::always_true::value && // - (2 == rank) && is_default_map && is_layout_left && (rank_dynamic == 0)), + (2 == rank) && is_default_map && + (is_layout_left || is_layout_right || is_layout_stride)), reference_type> operator()(I0 i0, I1 i1) const { check_operator_parens_valid_args(i0, i1); KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) - return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && // - (2 == rank) && is_default_map && is_layout_left && (rank_dynamic != 0)), - reference_type> - operator()(I0 i0, I1 i1) const { - check_operator_parens_valid_args(i0, i1); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) - return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && // - (2 == rank) && is_default_map && is_layout_right && (rank_dynamic == 0)), - reference_type> - operator()(I0 i0, I1 i1) const { - check_operator_parens_valid_args(i0, i1); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) - return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && // - (2 == rank) && is_default_map && is_layout_right && (rank_dynamic != 0)), - reference_type> - operator()(I0 i0, I1 i1) const { - check_operator_parens_valid_args(i0, i1); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) - return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION - std::enable_if_t<(Kokkos::Impl::always_true::value && // - (2 == rank) && is_default_map && is_layout_stride), - reference_type> - operator()(I0 i0, I1 i1) const { - check_operator_parens_valid_args(i0, i1); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) - return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 + - i1 * m_map.m_impl_offset.m_stride.S1]; + if constexpr (is_layout_left) { + if constexpr (rank_dynamic == 0) + return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1]; + else + return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1]; + } else if constexpr (is_layout_right) { + if constexpr (rank_dynamic == 0) + return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0]; + else + return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0]; + } else { + static_assert(is_layout_stride); + return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 + + i1 * m_map.m_impl_offset.m_stride.S1]; + } +#if defined KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif } // Rank 0 -> 8 operator() except for rank-1 and rank-2 with default map which @@ -1066,57 +1069,30 @@ class View : public ViewTraits { template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::always_true::value && (2 == rank) && - is_default_map && is_layout_left && (rank_dynamic == 0)), - reference_type> - access(I0 i0, I1 i1, Is... extra) const { - check_access_member_function_valid_args(i0, i1, extra...); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) - return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && (2 == rank) && - is_default_map && is_layout_left && (rank_dynamic != 0)), + is_default_map && + (is_layout_left || is_layout_right || is_layout_stride)), reference_type> access(I0 i0, I1 i1, Is... extra) const { check_access_member_function_valid_args(i0, i1, extra...); KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) - return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && (2 == rank) && - is_default_map && is_layout_right && (rank_dynamic == 0)), - reference_type> - access(I0 i0, I1 i1, Is... extra) const { - check_access_member_function_valid_args(i0, i1, extra...); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) - return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && (2 == rank) && - is_default_map && is_layout_right && (rank_dynamic != 0)), - reference_type> - access(I0 i0, I1 i1, Is... extra) const { - check_access_member_function_valid_args(i0, i1, extra...); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) - return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION - std::enable_if_t<(Kokkos::Impl::always_true::value && - (2 == rank) && is_default_map && is_layout_stride), - reference_type> - access(I0 i0, I1 i1, Is... extra) const { - check_access_member_function_valid_args(i0, i1, extra...); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) - return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 + - i1 * m_map.m_impl_offset.m_stride.S1]; + if constexpr (is_layout_left) { + if constexpr (rank_dynamic == 0) + return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1]; + else + return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1]; + } else if constexpr (is_layout_right) { + if constexpr (rank_dynamic == 0) + return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0]; + else + return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0]; + } else { + static_assert(is_layout_stride); + return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 + + i1 * m_map.m_impl_offset.m_stride.S1]; + } +#if defined KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif } //------------------------------ @@ -1442,8 +1418,7 @@ class View : public ViewTraits { std::is_same_v || std::is_same_v || - is_layouttiled::value) { + Kokkos::LayoutStride>) { size_t i0 = arg_layout.dimension[0]; size_t i1 = arg_layout.dimension[1]; size_t i2 = arg_layout.dimension[2]; @@ -1495,8 +1470,7 @@ class View : public ViewTraits { std::is_same_v || std::is_same_v || - is_layouttiled::value) { + Kokkos::LayoutStride>) { size_t i0 = arg_layout.dimension[0]; size_t i1 = arg_layout.dimension[1]; size_t i2 = arg_layout.dimension[2]; @@ -1725,6 +1699,79 @@ class View : public ViewTraits { "Layout is not constructible from extent arguments. Use " "overload taking a layout object instead."); } + + //---------------------------------------- + // MDSpan converting constructors +#ifdef KOKKOS_ENABLE_IMPL_MDSPAN + template ::mdspan_type> + KOKKOS_INLINE_FUNCTION +#ifndef KOKKOS_ENABLE_CXX17 + explicit(traits::is_managed) +#endif + View(const typename Impl::MDSpanViewTraits::mdspan_type& mds, + std::enable_if_t< + !std::is_same_v>* = + nullptr) + : View(mds.data_handle(), + Impl::array_layout_from_mapping< + typename traits::array_layout, + typename Impl::MDSpanViewTraits::mdspan_type>( + mds.mapping())) { + } + + template + KOKKOS_INLINE_FUNCTION +#ifndef KOKKOS_ENABLE_CXX17 + explicit(!std::is_convertible_v< + Kokkos::mdspan, + typename Impl::MDSpanViewTraits::mdspan_type>) +#endif + View(const Kokkos::mdspan& mds) + : View(typename Impl::MDSpanViewTraits::mdspan_type(mds)) { + } + + //---------------------------------------- + // Conversion to MDSpan + template ::mdspan_type, + typename = std::enable_if_t, + std::false_type, + std::is_assignable, + ImplNaturalMDSpanType>>::value>> + KOKKOS_INLINE_FUNCTION constexpr operator mdspan< + OtherElementType, OtherExtents, OtherLayoutPolicy, OtherAccessor>() { + using mdspan_type = typename Impl::MDSpanViewTraits::mdspan_type; + return mdspan_type{data(), + Impl::mapping_from_view_mapping(m_map)}; + } + + template >, + typename = std::enable_if_t>> + KOKKOS_INLINE_FUNCTION constexpr auto to_mdspan( + const OtherAccessorType& other_accessor = + typename Impl::MDSpanViewTraits::accessor_type()) { + using mdspan_type = typename Impl::MDSpanViewTraits::mdspan_type; + using ret_mdspan_type = + mdspan; + return ret_mdspan_type{data(), + Impl::mapping_from_view_mapping(m_map), + other_accessor}; + } +#endif // KOKKOS_ENABLE_IMPL_MDSPAN }; template @@ -1878,23 +1925,6 @@ KOKKOS_INLINE_FUNCTION bool operator!=(const View& lhs, namespace Kokkos { namespace Impl { -inline void shared_allocation_tracking_disable() { - Kokkos::Impl::SharedAllocationRecord::tracking_disable(); -} - -inline void shared_allocation_tracking_enable() { - Kokkos::Impl::SharedAllocationRecord::tracking_enable(); -} - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - template struct CommonViewValueType; diff --git a/packages/kokkos/core/src/OpenACC/Kokkos_OpenACCSpace.cpp b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACCSpace.cpp index acc0dcd3c6e2..c8a5d28ba83f 100644 --- a/packages/kokkos/core/src/OpenACC/Kokkos_OpenACCSpace.cpp +++ b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACCSpace.cpp @@ -67,16 +67,7 @@ void *Kokkos::Experimental::OpenACCSpace::impl_allocate( ptr = acc_malloc(arg_alloc_size); if (!ptr) { - size_t alignment = 1; // OpenACC does not handle alignment - using Kokkos::Experimental::RawMemoryAllocationFailure; - auto failure_mode = - arg_alloc_size > 0 - ? RawMemoryAllocationFailure::FailureMode::OutOfMemoryError - : RawMemoryAllocationFailure::FailureMode::InvalidAllocationSize; - auto alloc_mechanism = - RawMemoryAllocationFailure::AllocationMechanism::OpenACCMalloc; - throw RawMemoryAllocationFailure(arg_alloc_size, alignment, failure_mode, - alloc_mechanism); + Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); } if (Kokkos::Profiling::profileLibraryLoaded()) { diff --git a/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Team.hpp b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Team.hpp index 4fce680aef09..2b98018e3bb9 100644 --- a/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Team.hpp +++ b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Team.hpp @@ -44,10 +44,12 @@ class Kokkos::Impl::ParallelFor, auto team_size = m_policy.team_size(); auto vector_length = m_policy.impl_vector_length(); + int const async_arg = m_policy.space().acc_async_queue(); + auto const a_functor(m_functor); #pragma acc parallel loop gang vector num_gangs(league_size) \ - vector_length(team_size* vector_length) copyin(a_functor) + vector_length(team_size* vector_length) copyin(a_functor) async(async_arg) for (int i = 0; i < league_size * team_size * vector_length; i++) { int league_id = i / (team_size * vector_length); typename Policy::member_type team(league_id, league_size, team_size, @@ -145,10 +147,12 @@ class Kokkos::Impl::ParallelFor, auto team_size = m_policy.team_size(); auto vector_length = m_policy.impl_vector_length(); + int const async_arg = m_policy.space().acc_async_queue(); + auto const a_functor(m_functor); #pragma acc parallel loop gang num_gangs(league_size) num_workers(team_size) \ - vector_length(vector_length) copyin(a_functor) + vector_length(vector_length) copyin(a_functor) async(async_arg) for (int i = 0; i < league_size; i++) { int league_id = i; typename Policy::member_type team(league_id, league_size, team_size, diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP.cpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP.cpp index 81f2c5c30560..82199d0d72d5 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP.cpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP.cpp @@ -72,9 +72,28 @@ int OpenMP::concurrency(OpenMP const &instance) { int OpenMP::concurrency() const { return impl_thread_pool_size(); } #endif +void OpenMP::impl_static_fence(std::string const &name) { + Kokkos::Tools::Experimental::Impl::profile_fence_event( + name, + Kokkos::Tools::Experimental::SpecialSynchronizationCases:: + GlobalDeviceSynchronization, + []() { + std::lock_guard lock_all_instances( + Impl::OpenMPInternal::all_instances_mutex); + for (auto *instance_ptr : Impl::OpenMPInternal::all_instances) { + std::lock_guard lock_instance( + instance_ptr->m_instance_mutex); + } + }); +} + void OpenMP::fence(const std::string &name) const { Kokkos::Tools::Experimental::Impl::profile_fence_event( - name, Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{1}, []() {}); + name, Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{1}, + [this]() { + auto *internal_instance = this->impl_internal_space_instance(); + std::lock_guard lock(internal_instance->m_instance_mutex); + }); } bool OpenMP::impl_is_initialized() noexcept { diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP.hpp index 11292af84ad4..a403909f677c 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP.hpp @@ -67,7 +67,15 @@ class OpenMP { OpenMP(); - OpenMP(int pool_size); + explicit OpenMP(int pool_size); + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + template + KOKKOS_DEPRECATED_WITH_COMMENT( + "OpenMP execution space should be constructed explicitly.") + OpenMP(int pool_size) + : OpenMP(pool_size) {} +#endif /// \brief Print configuration information to the given output stream. void print_configuration(std::ostream& os, bool verbose = false) const; @@ -146,14 +154,6 @@ inline int OpenMP::impl_thread_pool_rank() noexcept { KOKKOS_IF_ON_DEVICE((return -1;)) } -inline void OpenMP::impl_static_fence(std::string const& name) { - Kokkos::Tools::Experimental::Impl::profile_fence_event( - name, - Kokkos::Tools::Experimental::SpecialSynchronizationCases:: - GlobalDeviceSynchronization, - []() {}); -} - inline bool OpenMP::is_asynchronous(OpenMP const& /*instance*/) noexcept { return false; } diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp index 32172fbc6c73..0f4c7d605240 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp @@ -34,18 +34,8 @@ namespace Kokkos { namespace Impl { -void OpenMPInternal::acquire_lock() { - while (1 == desul::atomic_compare_exchange(&m_pool_mutex, 0, 1, - desul::MemoryOrderAcquire(), - desul::MemoryScopeDevice())) { - // do nothing - } -} - -void OpenMPInternal::release_lock() { - desul::atomic_store(&m_pool_mutex, 0, desul::MemoryOrderRelease(), - desul::MemoryScopeDevice()); -} +std::vector OpenMPInternal::all_instances; +std::mutex OpenMPInternal::all_instances_mutex; void OpenMPInternal::clear_thread_data() { const size_t member_bytes = @@ -123,17 +113,11 @@ void OpenMPInternal::resize_thread_data(size_t pool_reduce_bytes, if (nullptr != m_pool[rank]) { m_pool[rank]->disband_pool(); - space.deallocate(m_pool[rank], old_alloc_bytes); + // impl_deallocate to not fence here + space.impl_deallocate("[unlabeled]", m_pool[rank], old_alloc_bytes); } - void *ptr = nullptr; - try { - ptr = space.allocate(alloc_bytes); - } catch ( - Kokkos::Experimental::RawMemoryAllocationFailure const &failure) { - // For now, just rethrow the error message the existing way - Kokkos::Impl::throw_runtime_exception(failure.get_error_message()); - } + void *ptr = space.allocate("Kokkos::OpenMP::scratch_mem", alloc_bytes); m_pool[rank] = new (ptr) HostThreadTeamData(); @@ -304,6 +288,18 @@ void OpenMPInternal::finalize() { } m_initialized = false; + + // guard erasing from all_instances + { + std::scoped_lock lock(all_instances_mutex); + + auto it = std::find(all_instances.begin(), all_instances.end(), this); + if (it == all_instances.end()) + Kokkos::abort( + "Execution space instance to be removed couldn't be found!"); + *it = all_instances.back(); + all_instances.pop_back(); + } } void OpenMPInternal::print_configuration(std::ostream &s) const { diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp index 35b9aa93ba7c..f4a0d3e20123 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp @@ -56,7 +56,13 @@ struct OpenMPTraits { class OpenMPInternal { private: OpenMPInternal(int arg_pool_size) - : m_pool_size{arg_pool_size}, m_level{omp_get_level()}, m_pool() {} + : m_pool_size{arg_pool_size}, m_level{omp_get_level()}, m_pool() { + // guard pushing to all_instances + { + std::scoped_lock lock(all_instances_mutex); + all_instances.push_back(this); + } + } ~OpenMPInternal() { clear_thread_data(); } @@ -66,7 +72,6 @@ class OpenMPInternal { int m_pool_size; int m_level; - int m_pool_mutex = 0; HostThreadTeamData* m_pool[OpenMPTraits::MAX_THREAD_COUNT]; @@ -83,12 +88,6 @@ class OpenMPInternal { int thread_pool_size() const { return m_pool_size; } - // Acquire lock used to protect access to m_pool - void acquire_lock(); - - // Release lock used to protect access to m_pool - void release_lock(); - void resize_thread_data(size_t pool_reduce_bytes, size_t team_reduce_bytes, size_t team_shared_bytes, size_t thread_local_bytes); @@ -107,6 +106,11 @@ class OpenMPInternal { bool verify_is_initialized(const char* const label) const; void print_configuration(std::ostream& s) const; + + std::mutex m_instance_mutex; + + static std::vector all_instances; + static std::mutex all_instances_mutex; }; inline bool execute_in_serial(OpenMP const& space = OpenMP()) { @@ -157,7 +161,7 @@ inline std::vector create_OpenMP_instances( "Kokkos::abort: Partition not enough resources left to create the last " "instance."); } - instances[weights.size() - 1] = resources_left; + instances[weights.size() - 1] = OpenMP(resources_left); return instances; } diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_For.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_For.hpp index 823a7e668e57..79d7d295c0e6 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_For.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_For.hpp @@ -108,6 +108,8 @@ class ParallelFor, Kokkos::OpenMP> { public: inline void execute() const { + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); if (execute_in_serial(m_policy.space())) { exec_range(m_functor, m_policy.begin(), m_policy.end()); return; @@ -202,6 +204,9 @@ class ParallelFor, public: inline void execute() const { + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); + #ifndef KOKKOS_COMPILER_INTEL if (execute_in_serial(m_iter.m_rp.space())) { exec_range(0, m_iter.m_rp.m_num_tiles); @@ -333,7 +338,8 @@ class ParallelFor, const size_t team_shared_size = m_shmem_size; const size_t thread_local_size = 0; // Never shrinks - m_instance->acquire_lock(); + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); m_instance->resize_thread_data(pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); @@ -343,8 +349,6 @@ class ParallelFor, m_functor, *(m_instance->get_thread_data()), 0, m_policy.league_size(), m_policy.league_size()); - m_instance->release_lock(); - return; } @@ -383,8 +387,6 @@ class ParallelFor, data.disband_team(); } - - m_instance->release_lock(); } inline ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Reduce.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Reduce.hpp index 05fd1c9dce3c..d22e1e7eda0b 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Reduce.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Reduce.hpp @@ -83,7 +83,8 @@ class ParallelReduce, const size_t pool_reduce_bytes = reducer.value_size(); - m_instance->acquire_lock(); + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); m_instance->resize_thread_data(pool_reduce_bytes, 0 // team_reduce_bytes , @@ -106,6 +107,7 @@ class ParallelReduce, update); reducer.final(ptr); + return; } const int pool_size = m_instance->thread_pool_size(); @@ -157,8 +159,6 @@ class ParallelReduce, m_result_ptr[j] = ptr[j]; } } - - m_instance->release_lock(); } //---------------------------------------- @@ -218,7 +218,8 @@ class ParallelReduceacquire_lock(); + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); m_instance->resize_thread_data(pool_reduce_bytes, 0 // team_reduce_bytes , @@ -241,8 +242,6 @@ class ParallelReducerelease_lock(); - return; } #endif @@ -299,8 +298,6 @@ class ParallelReducerelease_lock(); } //---------------------------------------- @@ -415,7 +412,8 @@ class ParallelReduceacquire_lock(); + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); m_instance->resize_thread_data(pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); @@ -433,8 +431,6 @@ class ParallelReducerelease_lock(); - return; } @@ -510,8 +506,6 @@ class ParallelReducerelease_lock(); } //---------------------------------------- diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Scan.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Scan.hpp index f843aef3a84c..b9ce25d3ee56 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Scan.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Scan.hpp @@ -70,6 +70,9 @@ class ParallelScan, const int value_count = Analysis::value_count(m_functor); const size_t pool_reduce_bytes = 2 * Analysis::value_size(m_functor); + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); + m_instance->resize_thread_data(pool_reduce_bytes, 0 // team_reduce_bytes , 0 // team_shared_bytes @@ -193,7 +196,8 @@ class ParallelScanWithTotal, const int value_count = Analysis::value_count(m_functor); const size_t pool_reduce_bytes = 2 * Analysis::value_size(m_functor); - m_instance->acquire_lock(); + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); m_instance->resize_thread_data(pool_reduce_bytes, 0 // team_reduce_bytes , @@ -213,8 +217,6 @@ class ParallelScanWithTotal, *m_result_ptr = update; - m_instance->release_lock(); - return; } @@ -266,8 +268,6 @@ class ParallelScanWithTotal, *m_result_ptr = update_base; } } - - m_instance->release_lock(); } //---------------------------------------- diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp index 3e67d8d62527..54c1574d71d3 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp @@ -52,18 +52,7 @@ HostThreadTeamDataSingleton::HostThreadTeamDataSingleton() num_pool_reduce_bytes, num_team_reduce_bytes, num_team_shared_bytes, num_thread_local_bytes); - void* ptr = nullptr; - try { - ptr = space.allocate(alloc_bytes); - } catch (Kokkos::Experimental::RawMemoryAllocationFailure const& f) { - // For now, just rethrow the error message with a note - // Note that this could, in turn, trigger an out of memory exception, - // but it's pretty unlikely, so we won't worry about it for now. - // TODO reasonable error message when `std::string` causes OOM error - Kokkos::Impl::throw_runtime_exception( - std::string("Failure to allocate scratch memory: ") + - f.get_error_message()); - } + void* ptr = space.allocate("Kokkos::Impl::HostThreadTeamData", alloc_bytes); HostThreadTeamData::scratch_assign( ptr, alloc_bytes, num_pool_reduce_bytes, num_team_reduce_bytes, diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp index 01b66948654c..2877d940fafc 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp @@ -73,7 +73,8 @@ class TaskQueueSpecialization> { execution_space().impl_internal_space_instance(); const int pool_size = get_max_team_count(scheduler.get_execution_space()); - instance->acquire_lock(); + // Serialize kernels on the same execution space instance + std::lock_guard lock(instance->m_instance_mutex); // TODO @tasking @new_feature DSH allow team sizes other than 1 const int team_size = 1; // Threads per core @@ -152,8 +153,6 @@ class TaskQueueSpecialization> { } self.disband_team(); } // end pragma omp parallel - - instance->release_lock(); } static uint32_t get_max_team_count(execution_space const& espace) { @@ -238,7 +237,8 @@ class TaskQueueSpecializationConstrained< execution_space().impl_internal_space_instance(); const int pool_size = instance->thread_pool_size(); - instance->acquire_lock(); + // Serialize kernels on the same execution space instance + std::lock_guard lock(instance->m_instance_mutex); const int team_size = 1; // Threads per core instance->resize_thread_data(0 /* global reduce buffer */ @@ -250,6 +250,7 @@ class TaskQueueSpecializationConstrained< 0 /* thread local buffer */ ); assert(pool_size % team_size == 0); + auto& queue = scheduler.queue(); queue.initialize_team_queues(pool_size / team_size); @@ -343,8 +344,6 @@ class TaskQueueSpecializationConstrained< } self.disband_team(); } // end pragma omp parallel - - instance->release_lock(); } template diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget.hpp index ea4e7f6baba2..84c7b85f11d1 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget.hpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget.hpp @@ -146,7 +146,8 @@ struct DeviceTypeTraits<::Kokkos::Experimental::OpenMPTarget> { /*--------------------------------------------------------------------------*/ #include -#include +#include +#include #include /*--------------------------------------------------------------------------*/ diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp index a414b34d7c68..635b0e0504fc 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp @@ -54,9 +54,11 @@ void* OpenMPTargetSpace::impl_allocate( static_assert(sizeof(void*) == sizeof(uintptr_t), "Error sizeof(void*) != sizeof(uintptr_t)"); - void* ptr; + void* ptr = omp_target_alloc(arg_alloc_size, omp_get_default_device()); - ptr = omp_target_alloc(arg_alloc_size, omp_get_default_device()); + if (!ptr) { + Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); + } if (Kokkos::Profiling::profileLibraryLoaded()) { const size_t reported_size = diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp index b39f5aca3533..6c5eb048e34e 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp @@ -71,8 +71,6 @@ void OpenMPTargetExec::verify_initialized(const char* const label) { void* OpenMPTargetExec::m_scratch_ptr = nullptr; int64_t OpenMPTargetExec::m_scratch_size = 0; -int* OpenMPTargetExec::m_lock_array = nullptr; -uint64_t OpenMPTargetExec::m_lock_size = 0; uint32_t* OpenMPTargetExec::m_uniquetoken_ptr = nullptr; int OpenMPTargetExec::MAX_ACTIVE_THREADS = 0; std::mutex OpenMPTargetExec::m_mutex_scratch_ptr; @@ -84,15 +82,6 @@ void OpenMPTargetExec::clear_scratch() { m_scratch_size = 0; } -void OpenMPTargetExec::clear_lock_array() { - if (m_lock_array != nullptr) { - Kokkos::Experimental::OpenMPTargetSpace space; - space.deallocate(m_lock_array, m_lock_size); - m_lock_array = nullptr; - m_lock_size = 0; - } -} - void* OpenMPTargetExec::get_scratch_ptr() { return m_scratch_ptr; } void OpenMPTargetExec::resize_scratch(int64_t team_size, int64_t shmem_size_L0, @@ -135,35 +124,6 @@ void OpenMPTargetExec::resize_scratch(int64_t team_size, int64_t shmem_size_L0, } } -int* OpenMPTargetExec::get_lock_array(int num_teams) { - Kokkos::Experimental::OpenMPTargetSpace space; - int max_active_league_size = MAX_ACTIVE_THREADS / 32; - int lock_array_elem = - (num_teams > max_active_league_size) ? num_teams : max_active_league_size; - if (m_lock_size < (lock_array_elem * sizeof(int))) { - space.deallocate(m_lock_array, m_lock_size); - m_lock_size = lock_array_elem * sizeof(int); - m_lock_array = static_cast(space.allocate(m_lock_size)); - - // FIXME_OPENMPTARGET - Creating a target region here to initialize the - // lock_array with 0's fails. Hence creating an equivalent host array to - // achieve the same. Value of host array are then copied to the lock_array. - int* h_lock_array = static_cast( - omp_target_alloc(m_lock_size, omp_get_initial_device())); - - for (int i = 0; i < lock_array_elem; ++i) h_lock_array[i] = 0; - - if (0 < m_lock_size) - KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy( - m_lock_array, h_lock_array, m_lock_size, 0, 0, - omp_get_default_device(), omp_get_initial_device())); - - omp_target_free(h_lock_array, omp_get_initial_device()); - } - - return m_lock_array; -} - } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp index 3387108da395..44e9119ea886 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp @@ -106,7 +106,6 @@ void OpenMPTargetInternal::print_configuration(std::ostream& os, void OpenMPTargetInternal::impl_finalize() { m_is_initialized = false; Kokkos::Impl::OpenMPTargetExec space; - if (space.m_lock_array != nullptr) space.clear_lock_array(); if (space.m_uniquetoken_ptr != nullptr) Kokkos::kokkos_free( diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_MDRangePolicy.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_MDRangePolicy.hpp index d718f56d38b0..e353676b6178 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_MDRangePolicy.hpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_MDRangePolicy.hpp @@ -22,6 +22,10 @@ namespace Kokkos { namespace Impl { +using OpenMPTargetIterateLeft = std::integral_constant; +using OpenMPTargetIterateRight = + std::integral_constant; + template struct ThreadAndVectorNestLevel +#include +#include "Kokkos_OpenMPTarget_MDRangePolicy.hpp" + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template +class ParallelFor, + Kokkos::Experimental::OpenMPTarget> { + private: + using Policy = Kokkos::MDRangePolicy; + using WorkTag = typename Policy::work_tag; + using Member = typename Policy::member_type; + using Index = typename Policy::index_type; + + const FunctorType m_functor; + const Policy m_policy; + + public: + inline void execute() const { + OpenMPTargetExec::verify_is_process( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + FunctorType functor(m_functor); + Policy policy = m_policy; + + typename Policy::point_type unused; + static_assert(1 < Policy::rank && Policy::rank < 7); + static_assert(Policy::inner_direction == Iterate::Left || + Policy::inner_direction == Iterate::Right); + + execute_tile( + unused, functor, policy, + std::integral_constant()); + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateRight) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + +#pragma omp target teams distribute parallel for collapse(2) map(to : functor) + for (auto i0 = begin_0; i0 < end_0; ++i0) + for (auto i1 = begin_1; i1 < end_1; ++i1) { + if constexpr (std::is_void::value) + functor(i0, i1); + else + functor(typename Policy::work_tag(), i0, i1); + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateRight) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + +#pragma omp target teams distribute parallel for collapse(3) map(to : functor) + for (auto i0 = begin_0; i0 < end_0; ++i0) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + if constexpr (std::is_void::value) + functor(i0, i1, i2); + else + functor(typename Policy::work_tag(), i0, i1, i2); + } + } + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateRight) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + +#pragma omp target teams distribute parallel for collapse(4) map(to : functor) + for (auto i0 = begin_0; i0 < end_0; ++i0) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + if constexpr (std::is_void::value) + functor(i0, i1, i2, i3); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3); + } + } + } + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateRight) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + const Index begin_4 = policy.m_lower[4]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + const Index end_4 = policy.m_upper[4]; + +#pragma omp target teams distribute parallel for collapse(5) map(to : functor) + for (auto i0 = begin_0; i0 < end_0; ++i0) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i4 = begin_4; i4 < end_4; ++i4) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4); + } + } + } + } + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateRight) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + const Index begin_4 = policy.m_lower[4]; + const Index begin_5 = policy.m_lower[5]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + const Index end_4 = policy.m_upper[4]; + const Index end_5 = policy.m_upper[5]; + +#pragma omp target teams distribute parallel for collapse(6) map(to : functor) + for (auto i0 = begin_0; i0 < end_0; ++i0) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i4 = begin_4; i4 < end_4; ++i4) { + for (auto i5 = begin_5; i5 < end_5; ++i5) { + { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4, i5); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, + i5); + } + } + } + } + } + } + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateLeft) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + +#pragma omp target teams distribute parallel for collapse(2) map(to : functor) + for (auto i1 = begin_1; i1 < end_1; ++i1) + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_void::value) + functor(i0, i1); + else + functor(typename Policy::work_tag(), i0, i1); + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateLeft) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + +#pragma omp target teams distribute parallel for collapse(3) map(to : functor) + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_void::value) + functor(i0, i1, i2); + else + functor(typename Policy::work_tag(), i0, i1, i2); + } + } + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateLeft) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + +#pragma omp target teams distribute parallel for collapse(4) map(to : functor) + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_void::value) + functor(i0, i1, i2, i3); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3); + } + } + } + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateLeft) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + const Index begin_4 = policy.m_lower[4]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + const Index end_4 = policy.m_upper[4]; + +#pragma omp target teams distribute parallel for collapse(5) map(to : functor) + for (auto i4 = begin_4; i4 < end_4; ++i4) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4); + } + } + } + } + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateLeft) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + const Index begin_4 = policy.m_lower[4]; + const Index begin_5 = policy.m_lower[5]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + const Index end_4 = policy.m_upper[4]; + const Index end_5 = policy.m_upper[5]; + +#pragma omp target teams distribute parallel for collapse(6) map(to : functor) + for (auto i5 = begin_5; i5 < end_5; ++i5) { + for (auto i4 = begin_4; i4 < end_4; ++i4) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4, i5); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, + i5); + } + } + } + } + } + } + } + } + + inline ParallelFor(const FunctorType& arg_functor, Policy arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} + // TODO DZP: based on a conversation with Christian, we're using 256 as a + // heuristic here. We need something better once we can query these kinds of + // properties + template + static int max_tile_size_product(const Policy&, const Functor&) { + return 256; + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif /* KOKKOS_OPENMPTARGET_PARALLELFOR_MDRANGE_HPP */ diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_MDRange.hpp similarity index 61% rename from packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp rename to packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_MDRange.hpp index 6878531730d9..e86a12197497 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_MDRange.hpp @@ -14,128 +14,120 @@ // //@HEADER -#ifndef KOKKOS_OPENMPTARGET_PARALLEL_MDRANGE_HPP -#define KOKKOS_OPENMPTARGET_PARALLEL_MDRANGE_HPP +#ifndef KOKKOS_OPENMPTARGET_PARALLELREDUCE_MDRANGE_HPP +#define KOKKOS_OPENMPTARGET_PARALLELREDUCE_MDRANGE_HPP #include #include -#include +#include "Kokkos_OpenMPTarget_MDRangePolicy.hpp" #include -// WORKAROUND OPENMPTARGET: sometimes tile sizes don't make it correctly, -// this was tracked down to a bug in clang with regards of mapping structs -// with arrays of long in it. Arrays of int might be fine though ... -#define KOKKOS_IMPL_MDRANGE_USE_NO_TILES // undef EOF - //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { namespace Impl { -template -class ParallelFor, - Kokkos::Experimental::OpenMPTarget> { +template +class ParallelReduce, + Kokkos::Experimental::OpenMPTarget> { private: - using Policy = Kokkos::MDRangePolicy; + using Policy = Kokkos::MDRangePolicy; + using FunctorType = typename CombinedFunctorReducerType::functor_type; + using ReducerType = typename CombinedFunctorReducerType::reducer_type; + using WorkTag = typename Policy::work_tag; using Member = typename Policy::member_type; using Index = typename Policy::index_type; - const FunctorType m_functor; + using pointer_type = typename ReducerType::pointer_type; + using reference_type = typename ReducerType::reference_type; + + static constexpr bool UseReducer = + !std::is_same_v; + + const pointer_type m_result_ptr; + const CombinedFunctorReducerType m_functor_reducer; const Policy m_policy; + using ParReduceCopy = ParallelReduceCopy; + + bool m_result_ptr_on_device; + public: inline void execute() const { - OpenMPTargetExec::verify_is_process( - "Kokkos::Experimental::OpenMPTarget parallel_for"); - OpenMPTargetExec::verify_initialized( - "Kokkos::Experimental::OpenMPTarget parallel_for"); - FunctorType functor(m_functor); - Policy policy = m_policy; - -#ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES - typename Policy::point_type unused; - - execute_tile(unused, functor, policy); -#else - const int64_t begin = 0; - const int64_t end = m_policy.m_num_tiles; - -#pragma omp target teams distribute map(to : functor) num_teams(end - begin) - { - for (ptrdiff_t tile_idx = begin; tile_idx < end; ++tile_idx) { - -#pragma omp parallel - { - typename Policy::point_type offset; - if (Policy::outer_direction == Policy::Left) { - for (int i = 0; i < Policy::rank; ++i) { - offset[i] = (tile_idx % policy.m_tile_end[i]) * policy.m_tile[i] + - policy.m_lower[i]; - tile_idx /= policy.m_tile_end[i]; - } - } else { - for (int i = Policy::rank - 1; i >= 0; --i) { - offset[i] = (tile_idx % policy.m_tile_end[i]) * policy.m_tile[i] + - policy.m_lower[i]; - tile_idx /= policy.m_tile_end[i]; - } - } - execute_tile(offset, functor, policy); - } - } - } -#endif + // Only let one ParallelReduce instance at a time use the scratch memory. + std::scoped_lock scratch_memory_lock( + OpenMPTargetExec::m_mutex_scratch_ptr); + execute_tile( + m_functor_reducer.get_functor(), m_policy, m_result_ptr, + std::integral_constant()); } - template + template + inline ParallelReduce(const CombinedFunctorReducerType& arg_functor_reducer, + Policy arg_policy, const ViewType& arg_result_view) + : m_result_ptr(arg_result_view.data()), + m_functor_reducer(arg_functor_reducer), + m_policy(arg_policy), + m_result_ptr_on_device( + MemorySpaceAccess::accessible) {} + + template inline std::enable_if_t execute_tile( - typename Policy::point_type offset, const FunctorType& functor, - const Policy& policy) const { -#ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES - (void)offset; + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateLeft) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; const Index end_0 = policy.m_upper[0]; const Index end_1 = policy.m_upper[1]; -#pragma omp target teams distribute parallel for collapse(2) map(to : functor) - for (auto i0 = begin_0; i0 < end_0; ++i0) { + ValueType result = ValueType(); + + // FIXME_OPENMPTARGET: Unable to separate directives and their companion + // loops which leads to code duplication for different reduction types. + if constexpr (UseReducer) { +#pragma omp declare reduction( \ + custom:ValueType \ + : OpenMPTargetReducerWrapper ::join(omp_out, omp_in)) \ + initializer(OpenMPTargetReducerWrapper ::init(omp_priv)) + +#pragma omp target teams distribute parallel for collapse(2) map(to \ + : functor) \ + reduction(custom \ + : result) for (auto i1 = begin_1; i1 < end_1; ++i1) { - if constexpr (std::is_void::value) - functor(i0, i1); - else - functor(typename Policy::work_tag(), i0, i1); + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_void::value) + functor(i0, i1, result); + else + functor(typename Policy::work_tag(), i0, i1, result); + } } - } -#else - const ptrdiff_t begin_0 = offset[0]; - ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; - end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; - - const ptrdiff_t begin_1 = offset[1]; - ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; - end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; - -#pragma omp for collapse(2) - for (ptrdiff_t i0 = begin_0; i0 < end_0; ++i0) - for (ptrdiff_t i1 = begin_1; i1 < end_1; ++i1) { - if constexpr (std::is_void::value) - functor(i0, i1); - else - functor(typename Policy::work_tag(), i0, i1); + } else { +#pragma omp target teams distribute parallel for collapse(2) map(to : functor) \ +reduction(+:result) + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_void::value) + functor(i0, i1, result); + else + functor(typename Policy::work_tag(), i0, i1, result); + } } -#endif + } + + ParReduceCopy::memcpy_result(ptr, &result, sizeof(ValueType), + m_result_ptr_on_device); } - template + template inline std::enable_if_t execute_tile( - typename Policy::point_type offset, const FunctorType& functor, - const Policy& policy) const { -#ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES - (void)offset; + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateLeft) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; const Index begin_2 = policy.m_lower[2]; @@ -144,107 +136,119 @@ class ParallelFor, const Index end_1 = policy.m_upper[1]; const Index end_2 = policy.m_upper[2]; -#pragma omp target teams distribute parallel for collapse(3) map(to : functor) - for (auto i0 = begin_0; i0 < end_0; ++i0) { - for (auto i1 = begin_1; i1 < end_1; ++i1) { - for (auto i2 = begin_2; i2 < end_2; ++i2) { - if constexpr (std::is_void::value) - functor(i0, i1, i2); - else - functor(typename Policy::work_tag(), i0, i1, i2); + ValueType result = ValueType(); + + // FIXME_OPENMPTARGET: Unable to separate directives and their companion + // loops which leads to code duplication for different reduction types. + if constexpr (UseReducer) { +#pragma omp declare reduction( \ + custom:ValueType \ + : OpenMPTargetReducerWrapper ::join( \ + omp_out, omp_in)) \ + initializer( \ + OpenMPTargetReducerWrapper ::init( \ + omp_priv)) + +#pragma omp target teams distribute parallel for collapse(3) map(to \ + : functor) \ + reduction(custom \ + : result) + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_void::value) + functor(i0, i1, i2, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, result); + } } } - } -#else - const ptrdiff_t begin_0 = offset[0]; - ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; - end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; - - const ptrdiff_t begin_1 = offset[1]; - ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; - end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; - - const ptrdiff_t begin_2 = offset[2]; - ptrdiff_t end_2 = begin_2 + policy.m_tile[2]; - end_2 = end_2 < policy.m_upper[2] ? end_2 : policy.m_upper[2]; - -#pragma omp for collapse(3) - for (ptrdiff_t i0 = begin_0; i0 < end_0; ++i0) - for (ptrdiff_t i1 = begin_1; i1 < end_1; ++i1) - for (ptrdiff_t i2 = begin_2; i2 < end_2; ++i2) { - if constexpr (std::is_void::value) - functor(i0, i1, i2); - else - functor(typename Policy::work_tag(), i0, i1, i2); + } else { +#pragma omp target teams distribute parallel for collapse(3) map(to : functor) \ +reduction(+:result) + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_void::value) + functor(i0, i1, i2, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, result); + } } -#endif + } + } + + ParReduceCopy::memcpy_result(ptr, &result, sizeof(ValueType), + m_result_ptr_on_device); } - template + template inline std::enable_if_t execute_tile( - typename Policy::point_type offset, const FunctorType& functor, - const Policy& policy) const { -#ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES - (void)offset; + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateLeft) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; - const Index begin_2 = policy.m_lower[2]; - const Index begin_3 = policy.m_lower[3]; + const Index begin_2 = policy.m_lower[3]; + const Index begin_3 = policy.m_lower[2]; const Index end_0 = policy.m_upper[0]; const Index end_1 = policy.m_upper[1]; const Index end_2 = policy.m_upper[2]; const Index end_3 = policy.m_upper[3]; -#pragma omp target teams distribute parallel for collapse(4) map(to : functor) - for (auto i0 = begin_0; i0 < end_0; ++i0) { - for (auto i1 = begin_1; i1 < end_1; ++i1) { + ValueType result = ValueType(); + + // FIXME_OPENMPTARGET: Unable to separate directives and their companion + // loops which leads to code duplication for different reduction types. + if constexpr (UseReducer) { +#pragma omp declare reduction( \ + custom:ValueType \ + : OpenMPTargetReducerWrapper ::join(omp_out, omp_in)) \ + initializer(OpenMPTargetReducerWrapper ::init(omp_priv)) + +#pragma omp target teams distribute parallel for collapse(4) map(to \ + : functor) \ + reduction(custom \ + : result) + for (auto i3 = begin_3; i3 < end_3; ++i3) { for (auto i2 = begin_2; i2 < end_2; ++i2) { - for (auto i3 = begin_3; i3 < end_3; ++i3) { - if constexpr (std::is_void::value) - functor(i0, i1, i2, i3); - else - functor(typename Policy::work_tag(), i0, i1, i2, i3); + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, result); + } } } } - } -#else - const ptrdiff_t begin_0 = offset[0]; - ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; - end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; - - const ptrdiff_t begin_1 = offset[1]; - ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; - end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; - - const ptrdiff_t begin_2 = offset[2]; - ptrdiff_t end_2 = begin_2 + policy.m_tile[2]; - end_2 = end_2 < policy.m_upper[2] ? end_2 : policy.m_upper[2]; - - const ptrdiff_t begin_3 = offset[3]; - ptrdiff_t end_3 = begin_3 + policy.m_tile[3]; - end_3 = end_3 < policy.m_upper[3] ? end_3 : policy.m_upper[3]; - -#pragma omp for collapse(4) - for (ptrdiff_t i0 = begin_0; i0 < end_0; ++i0) - for (ptrdiff_t i1 = begin_1; i1 < end_1; ++i1) - for (ptrdiff_t i2 = begin_2; i2 < end_2; ++i2) - for (ptrdiff_t i3 = begin_3; i3 < end_3; ++i3) { - if constexpr (std::is_void::value) - functor(i0, i1, i2, i3); - else - functor(typename Policy::work_tag(), i0, i1, i2, i3); + } else { +#pragma omp target teams distribute parallel for collapse(4) map(to : functor) \ +reduction(+:result) + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, result); + } } -#endif + } + } + } + + ParReduceCopy::memcpy_result(ptr, &result, sizeof(ValueType), + m_result_ptr_on_device); } - template + template inline std::enable_if_t execute_tile( - typename Policy::point_type offset, const FunctorType& functor, - const Policy& policy) const { -#ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES - (void)offset; + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateLeft) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; const Index begin_2 = policy.m_lower[2]; @@ -257,64 +261,65 @@ class ParallelFor, const Index end_3 = policy.m_upper[3]; const Index end_4 = policy.m_upper[4]; -#pragma omp target teams distribute parallel for collapse(5) map(to : functor) - for (auto i0 = begin_0; i0 < end_0; ++i0) { - for (auto i1 = begin_1; i1 < end_1; ++i1) { - for (auto i2 = begin_2; i2 < end_2; ++i2) { - for (auto i3 = begin_3; i3 < end_3; ++i3) { - for (auto i4 = begin_4; i4 < end_4; ++i4) { - if constexpr (std::is_same::value) - functor(i0, i1, i2, i3, i4); - else - functor(typename Policy::work_tag(), i0, i1, i2, i3, i4); + ValueType result = ValueType(); + + // FIXME_OPENMPTARGET: Unable to separate directives and their companion + // loops which leads to code duplication for different reduction types. + if constexpr (UseReducer) { +#pragma omp declare reduction( \ + custom:ValueType \ + : OpenMPTargetReducerWrapper ::join(omp_out, omp_in)) \ + initializer(OpenMPTargetReducerWrapper ::init(omp_priv)) + +#pragma omp target teams distribute parallel for collapse(5) map(to \ + : functor) \ + reduction(custom \ + : result) + for (auto i4 = begin_4; i4 < end_4; ++i4) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, + result); + } } } } } - } -#else - const ptrdiff_t begin_0 = offset[0]; - ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; - end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; - - const ptrdiff_t begin_1 = offset[1]; - ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; - end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; - - const ptrdiff_t begin_2 = offset[2]; - ptrdiff_t end_2 = begin_2 + policy.m_tile[2]; - end_2 = end_2 < policy.m_upper[2] ? end_2 : policy.m_upper[2]; - - const ptrdiff_t begin_3 = offset[3]; - ptrdiff_t end_3 = begin_3 + policy.m_tile[3]; - end_3 = end_3 < policy.m_upper[3] ? end_3 : policy.m_upper[3]; - - const ptrdiff_t begin_4 = offset[4]; - ptrdiff_t end_4 = begin_4 + policy.m_tile[4]; - end_4 = end_4 < policy.m_upper[4] ? end_4 : policy.m_upper[4]; - -#pragma omp for collapse(5) - for (ptrdiff_t i0 = begin_0; i0 < end_0; ++i0) - for (ptrdiff_t i1 = begin_1; i1 < end_1; ++i1) - for (ptrdiff_t i2 = begin_2; i2 < end_2; ++i2) - for (ptrdiff_t i3 = begin_3; i3 < end_3; ++i3) - for (ptrdiff_t i4 = begin_4; i4 < end_4; ++i4) { - if constexpr (std::is_same::value) - functor(i0, i1, i2, i3, i4); - else - functor(typename Policy::work_tag(), i0, i1, i2, i3, i4); + } else { +#pragma omp target teams distribute parallel for collapse(5) map(to : functor) \ +reduction(+:result) + for (auto i4 = begin_4; i4 < end_4; ++i4) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, + result); + } } -#endif + } + } + } + } + + ParReduceCopy::memcpy_result(ptr, &result, sizeof(ValueType), + m_result_ptr_on_device); } - template + template inline std::enable_if_t execute_tile( - typename Policy::point_type offset, const FunctorType& functor, - const Policy& policy) const { -#ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES - (void)offset; + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateLeft) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; const Index begin_2 = policy.m_lower[2]; @@ -329,140 +334,69 @@ class ParallelFor, const Index end_4 = policy.m_upper[4]; const Index end_5 = policy.m_upper[5]; -#pragma omp target teams distribute parallel for collapse(6) map(to : functor) - for (auto i0 = begin_0; i0 < end_0; ++i0) { - for (auto i1 = begin_1; i1 < end_1; ++i1) { - for (auto i2 = begin_2; i2 < end_2; ++i2) { + ValueType result = ValueType(); + + // FIXME_OPENMPTARGET: Unable to separate directives and their companion + // loops which leads to code duplication for different reduction types. + if constexpr (UseReducer) { +#pragma omp declare reduction( \ + custom:ValueType \ + : OpenMPTargetReducerWrapper ::join(omp_out, omp_in)) \ + initializer(OpenMPTargetReducerWrapper ::init(omp_priv)) + +#pragma omp target teams distribute parallel for collapse(6) map(to \ + : functor) \ + reduction(custom \ + : result) + for (auto i5 = begin_5; i5 < end_5; ++i5) { + for (auto i4 = begin_4; i4 < end_4; ++i4) { for (auto i3 = begin_3; i3 < end_3; ++i3) { - for (auto i4 = begin_4; i4 < end_4; ++i4) { - for (auto i5 = begin_5; i5 < end_5; ++i5) { - { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { if constexpr (std::is_same::value) - functor(i0, i1, i2, i3, i4, i5); + functor(i0, i1, i2, i3, i4, i5, result); else - functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, - i5); + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, i5, + result); } } } } } } - } -#else - const ptrdiff_t begin_0 = offset[0]; - ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; - end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; - - const ptrdiff_t begin_1 = offset[1]; - ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; - end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; - - const ptrdiff_t begin_2 = offset[2]; - ptrdiff_t end_2 = begin_2 + policy.m_tile[2]; - end_2 = end_2 < policy.m_upper[2] ? end_2 : policy.m_upper[2]; - - const ptrdiff_t begin_3 = offset[3]; - ptrdiff_t end_3 = begin_3 + policy.m_tile[3]; - end_3 = end_3 < policy.m_upper[3] ? end_3 : policy.m_upper[3]; - - const ptrdiff_t begin_4 = offset[4]; - ptrdiff_t end_4 = begin_4 + policy.m_tile[4]; - end_4 = end_4 < policy.m_upper[4] ? end_4 : policy.m_upper[4]; - - const ptrdiff_t begin_5 = offset[5]; - ptrdiff_t end_5 = begin_5 + policy.m_tile[5]; - end_5 = end_5 < policy.m_upper[5] ? end_5 : policy.m_upper[5]; - -#pragma omp for collapse(6) - for (ptrdiff_t i0 = begin_0; i0 < end_0; ++i0) - for (ptrdiff_t i1 = begin_1; i1 < end_1; ++i1) - for (ptrdiff_t i2 = begin_2; i2 < end_2; ++i2) - for (ptrdiff_t i3 = begin_3; i3 < end_3; ++i3) - for (ptrdiff_t i4 = begin_4; i4 < end_4; ++i4) - for (ptrdiff_t i5 = begin_5; i5 < end_5; ++i5) { - if constexpr (std::is_same::value) - functor(i0, i1, i2, i3, i4, i5); - else - functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, i5); + } else { +#pragma omp target teams distribute parallel for collapse(6) map(to : functor) \ +reduction(+:result) + for (auto i5 = begin_5; i5 < end_5; ++i5) { + for (auto i4 = begin_4; i4 < end_4; ++i4) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4, i5, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, i5, + result); + } } -#endif - } - - inline ParallelFor(const FunctorType& arg_functor, Policy arg_policy) - : m_functor(arg_functor), m_policy(arg_policy) {} - // TODO DZP: based on a conversation with Christian, we're using 256 as a - // heuristic here. We need something better once we can query these kinds of - // properties - template - static int max_tile_size_product(const Policy&, const Functor&) { - return 256; - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template -class ParallelReduce, - Kokkos::Experimental::OpenMPTarget> { - private: - using Policy = Kokkos::MDRangePolicy; - using FunctorType = typename CombinedFunctorReducerType::functor_type; - using ReducerType = typename CombinedFunctorReducerType::reducer_type; - - using WorkTag = typename Policy::work_tag; - using Member = typename Policy::member_type; - using Index = typename Policy::index_type; - - using pointer_type = typename ReducerType::pointer_type; - using reference_type = typename ReducerType::reference_type; - - static constexpr bool UseReducer = - !std::is_same_v; - - const pointer_type m_result_ptr; - const CombinedFunctorReducerType m_functor_reducer; - const Policy m_policy; - - using ParReduceCopy = ParallelReduceCopy; - - bool m_result_ptr_on_device; - - // Only let one ParallelReduce instance at a time use the scratch memory. - // The constructor acquires the mutex which is released in the destructor. - std::scoped_lock m_scratch_memory_lock; + } + } + } + } + } - public: - inline void execute() const { - execute_tile( - m_functor_reducer.get_functor(), m_policy, m_result_ptr); + ParReduceCopy::memcpy_result(ptr, &result, sizeof(ValueType), + m_result_ptr_on_device); } - template - inline ParallelReduce(const CombinedFunctorReducerType& arg_functor_reducer, - Policy arg_policy, const ViewType& arg_result_view) - : m_result_ptr(arg_result_view.data()), - m_functor_reducer(arg_functor_reducer), - m_policy(arg_policy), - m_result_ptr_on_device( - MemorySpaceAccess::accessible), - m_scratch_memory_lock(OpenMPTargetExec::m_mutex_scratch_ptr) {} - template - inline std::enable_if_t execute_tile(const FunctorType& functor, - const Policy& policy, - pointer_type ptr) const { + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateRight) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; @@ -509,9 +443,9 @@ reduction(+:result) } template - inline std::enable_if_t execute_tile(const FunctorType& functor, - const Policy& policy, - pointer_type ptr) const { + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateRight) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; const Index begin_2 = policy.m_lower[2]; @@ -567,9 +501,9 @@ reduction(+:result) } template - inline std::enable_if_t execute_tile(const FunctorType& functor, - const Policy& policy, - pointer_type ptr) const { + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateRight) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; const Index begin_2 = policy.m_lower[3]; @@ -630,9 +564,9 @@ reduction(+:result) } template - inline std::enable_if_t execute_tile(const FunctorType& functor, - const Policy& policy, - pointer_type ptr) const { + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateRight) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; const Index begin_2 = policy.m_lower[2]; @@ -701,9 +635,9 @@ reduction(+:result) } template - inline std::enable_if_t execute_tile(const FunctorType& functor, - const Policy& policy, - pointer_type ptr) const { + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateRight) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; const Index begin_2 = policy.m_lower[2]; @@ -788,5 +722,4 @@ reduction(+:result) //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -#undef KOKKOS_IMPL_MDRANGE_USE_NO_TILES -#endif /* KOKKOS_OPENMPTARGET_PARALLEL_HPP */ +#endif /* KOKKOS_OPENMPTARGET_PARALLELREDUCE_MDRANGE_HPP */ diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Range.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Range.hpp index caa568a89252..4a112ed11d06 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Range.hpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Range.hpp @@ -55,13 +55,13 @@ class ParallelReduce, const pointer_type m_result_ptr; bool m_result_ptr_on_device; const int m_result_ptr_num_elems; - // Only let one ParallelReduce instance at a time use the scratch memory. - // The constructor acquires the mutex which is released in the destructor. - std::scoped_lock m_scratch_memory_lock; using TagType = typename Policy::work_tag; public: void execute() const { + // Only let one ParallelReduce instance at a time use the scratch memory. + std::scoped_lock scratch_memory_lock( + OpenMPTargetExec::m_mutex_scratch_ptr); const FunctorType& functor = m_functor_reducer.get_functor(); if constexpr (FunctorHasJoin) { // Enter this loop if the Functor has a init-join. @@ -108,8 +108,7 @@ class ParallelReduce, m_result_ptr_on_device( MemorySpaceAccess::accessible), - m_result_ptr_num_elems(arg_result_view.size()), - m_scratch_memory_lock(OpenMPTargetExec::m_mutex_scratch_ptr) {} + m_result_ptr_num_elems(arg_result_view.size()) {} }; } // namespace Impl diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Team.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Team.hpp index 8abffa47a43e..16c0eedb8185 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Team.hpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Team.hpp @@ -470,12 +470,11 @@ class ParallelReduce m_scratch_memory_lock; - public: void execute() const { + // Only let one ParallelReduce instance at a time use the scratch memory. + std::scoped_lock scratch_memory_lock( + OpenMPTargetExec::m_mutex_scratch_ptr); const FunctorType& functor = m_functor_reducer.get_functor(); if constexpr (FunctorHasJoin) { ParReduceSpecialize::execute_init_join(functor, m_policy, m_result_ptr, @@ -521,8 +520,7 @@ class ParallelReduce::value( - arg_functor_reducer.get_functor(), arg_policy.team_size())), - m_scratch_memory_lock(OpenMPTargetExec::m_mutex_scratch_ptr) {} + arg_functor_reducer.get_functor(), arg_policy.team_size())) {} }; } // namespace Impl diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp index c1f7851f4137..b0d693280243 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp @@ -177,6 +177,10 @@ class ParallelScan, const idx_type chunk_size = 128; const idx_type n_chunks = (N + chunk_size - 1) / chunk_size; + // Only let one ParallelReduce instance at a time use the scratch memory. + std::scoped_lock scratch_memory_lock( + OpenMPTargetExec::m_mutex_scratch_ptr); + // This could be scratch memory per team Kokkos::View @@ -225,6 +229,10 @@ class ParallelScanWithTotal, const int64_t n_chunks = (N + chunk_size - 1) / chunk_size; if (N > 0) { + // Only let one ParallelReduce instance at a time use the scratch memory. + std::scoped_lock scratch_memory_lock( + OpenMPTargetExec::m_mutex_scratch_ptr); + // This could be scratch memory per team Kokkos::View diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp index 9a246f7642f9..4de6931918e4 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp @@ -110,6 +110,31 @@ void SYCL::print_configuration(std::ostream& os, bool verbose) const { #else os << "macro KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES : undefined\n"; #endif +#ifdef SYCL_EXT_ONEAPI_GRAPH + os << "macro SYCL_EXT_ONEAPI_GRAPH : defined\n"; +#else + os << "macro SYCL_EXT_ONEAPI_GRAPH : undefined\n"; +#endif +#ifdef SYCL_EXT_INTEL_QUEUE_IMMEDIATE_COMMAND_LIST + if (sycl_queue() + .has_property< + sycl::ext::intel::property::queue::immediate_command_list>()) + os << "Immediate command lists enforced\n"; + else if (sycl_queue() + .has_property()) + os << "Standard command queue enforced\n"; + else +#endif + { + os << "Immediate command lists and standard command queue allowed.\n"; + if (const char* environment_setting = + std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS")) + os << "SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=" + << environment_setting << " takes precedence.\n"; + else + os << "SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS not defined.\n"; + } int counter = 0; int active_device = Kokkos::device_id(); diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_GraphNodeKernel.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_GraphNodeKernel.hpp new file mode 100644 index 000000000000..9c39df941592 --- /dev/null +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_GraphNodeKernel.hpp @@ -0,0 +1,157 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SYCL_GRAPHNODEKERNEL_HPP +#define KOKKOS_SYCL_GRAPHNODEKERNEL_HPP + +#include + +#include + +#include +#include +#include + +#include + +namespace Kokkos { +namespace Impl { + +template +class GraphNodeKernelImpl + : public PatternImplSpecializationFromTag< + PatternTag, Functor, PolicyType, Args..., + Kokkos::Experimental::SYCL>::type { + public: + using Policy = PolicyType; + using graph_kernel = GraphNodeKernelImpl; + using base_t = typename PatternImplSpecializationFromTag< + PatternTag, Functor, Policy, Args..., Kokkos::Experimental::SYCL>::type; + + // TODO use the name and executionspace + template + GraphNodeKernelImpl(std::string, Kokkos::Experimental::SYCL const&, + Functor arg_functor, PolicyDeduced&& arg_policy, + ArgsDeduced&&... args) + : base_t(std::move(arg_functor), (PolicyDeduced &&) arg_policy, + (ArgsDeduced &&) args...) {} + + template + GraphNodeKernelImpl(Kokkos::Experimental::SYCL const& exec_space, + Functor arg_functor, PolicyDeduced&& arg_policy) + : GraphNodeKernelImpl("", exec_space, std::move(arg_functor), + (PolicyDeduced &&) arg_policy) {} + + void set_sycl_graph_ptr( + sycl::ext::oneapi::experimental::command_graph< + sycl::ext::oneapi::experimental::graph_state::modifiable>* + arg_graph) { + m_graph_ptr = arg_graph; + } + + void set_sycl_graph_node_ptr( + std::optional* arg_node) { + m_graph_node_ptr = arg_node; + } + + std::optional& get_sycl_graph_node() + const { + return *m_graph_node_ptr; + } + + sycl::ext::oneapi::experimental::command_graph< + sycl::ext::oneapi::experimental::graph_state::modifiable>& + get_sycl_graph() const { + return *m_graph_ptr; + } + + private: + Kokkos::ObservingRawPtr> + m_graph_ptr = nullptr; + Kokkos::ObservingRawPtr> + m_graph_node_ptr = nullptr; +}; + +struct SYCLGraphNodeAggregateKernel { + using graph_kernel = SYCLGraphNodeAggregateKernel; + + // Aggregates don't need a policy, but for the purposes of checking the static + // assertions about graph kernels, + struct Policy { + using is_graph_kernel = std::true_type; + }; +}; + +template ::type> +struct get_graph_node_kernel_type + : type_identity> {}; + +template +struct get_graph_node_kernel_type + : type_identity, + Kokkos::ParallelReduceTag>> {}; + +template +auto& get_sycl_graph_from_kernel(KernelType const& kernel) { + using graph_node_kernel_t = + typename get_graph_node_kernel_type::type; + auto const& kernel_as_graph_kernel = + static_cast(kernel); + auto& graph = kernel_as_graph_kernel.get_sycl_graph(); + + return graph; +} + +template +auto& get_sycl_graph_node_from_kernel(KernelType const& kernel) { + using graph_node_kernel_t = + typename get_graph_node_kernel_type::type; + auto const& kernel_as_graph_kernel = + static_cast(kernel); + auto& graph_node = kernel_as_graph_kernel.get_sycl_graph_node(); + + return graph_node; +} + +template +void sycl_attach_kernel_to_node(Kernel& kernel, const Lambda& lambda) { + sycl::ext::oneapi::experimental::command_graph< + sycl::ext::oneapi::experimental::graph_state::modifiable>& graph = + Impl::get_sycl_graph_from_kernel(kernel); + std::optional& graph_node = + Impl::get_sycl_graph_node_from_kernel(kernel); + KOKKOS_ENSURES(!graph_node); + graph_node = graph.add(lambda); + KOKKOS_ENSURES(graph_node); + // FIXME_SYCL_GRAPH not yet implemented in the compiler + // KOKKOS_ENSURES(graph_node.get_type() == + // sycl::ext::oneapi::experimental::node_type::kernel) +} + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_GraphNode_Impl.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_GraphNode_Impl.hpp new file mode 100644 index 000000000000..6bbe6711a2e8 --- /dev/null +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_GraphNode_Impl.hpp @@ -0,0 +1,56 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SYCL_GRAPHNODE_IMPL_HPP +#define KOKKOS_SYCL_GRAPHNODE_IMPL_HPP + +#include + +#include + +#include + +#include + +namespace Kokkos { +namespace Impl { +template <> +struct GraphNodeBackendSpecificDetails { + std::optional node; + + explicit GraphNodeBackendSpecificDetails() = default; + + explicit GraphNodeBackendSpecificDetails( + _graph_node_is_root_ctor_tag) noexcept {} +}; + +template +struct GraphNodeBackendDetailsBeforeTypeErasure { + protected: + GraphNodeBackendDetailsBeforeTypeErasure( + Kokkos::Experimental::SYCL const &, Kernel &, PredecessorRef const &, + GraphNodeBackendSpecificDetails &) noexcept {} + + GraphNodeBackendDetailsBeforeTypeErasure( + Kokkos::Experimental::SYCL const &, _graph_node_is_root_ctor_tag, + GraphNodeBackendSpecificDetails &) noexcept {} +}; + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp new file mode 100644 index 000000000000..1dc4a9c99739 --- /dev/null +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp @@ -0,0 +1,174 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SYCL_GRAPH_IMPL_HPP +#define KOKKOS_SYCL_GRAPH_IMPL_HPP + +#include + +#include + +#include +#include + +#include + +#include + +namespace Kokkos { +namespace Impl { +template <> +class GraphImpl { + public: + using node_details_t = + GraphNodeBackendSpecificDetails; + using root_node_impl_t = GraphNodeImpl; + using aggregate_kernel_impl_t = SYCLGraphNodeAggregateKernel; + using aggregate_node_impl_t = + GraphNodeImpl; + + // Not movable or copyable; it spends its whole life as a shared_ptr in the + // Graph object. + GraphImpl() = delete; + GraphImpl(GraphImpl const&) = delete; + GraphImpl(GraphImpl&&) = delete; + GraphImpl& operator=(GraphImpl const&) = delete; + GraphImpl& operator=(GraphImpl&&) = delete; + + ~GraphImpl(); + + explicit GraphImpl(Kokkos::Experimental::SYCL instance); + + void add_node(std::shared_ptr const& arg_node_ptr); + + template + void add_node(std::shared_ptr const& arg_node_ptr); + + template + void add_predecessor(NodeImplPtr arg_node_ptr, PredecessorRef arg_pred_ref); + + void submit(); + + Kokkos::Experimental::SYCL const& get_execution_space() const noexcept; + + auto create_root_node_ptr(); + + template + auto create_aggregate_ptr(PredecessorRefs&&...); + + private: + void instantiate_graph() { m_graph_exec = m_graph.finalize(); } + + Kokkos::Experimental::SYCL m_execution_space; + sycl::ext::oneapi::experimental::command_graph< + sycl::ext::oneapi::experimental::graph_state::modifiable> + m_graph; + std::optional> + m_graph_exec; +}; + +inline GraphImpl::~GraphImpl() { + m_execution_space.fence("Kokkos::GraphImpl::~GraphImpl: Graph Destruction"); +} + +inline GraphImpl::GraphImpl( + Kokkos::Experimental::SYCL instance) + : m_execution_space(std::move(instance)), + m_graph(m_execution_space.sycl_queue().get_context(), + m_execution_space.sycl_queue().get_device()) {} + +inline void GraphImpl::add_node( + std::shared_ptr const& arg_node_ptr) { + // add an empty node that needs to be set up before finalizing the graph + arg_node_ptr->node_details_t::node = m_graph.add(); +} + +// Requires NodeImplPtr is a shared_ptr to specialization of GraphNodeImpl +// Also requires that the kernel has the graph node tag in its policy +template +inline void GraphImpl::add_node( + std::shared_ptr const& arg_node_ptr) { + static_assert(NodeImpl::kernel_type::Policy::is_graph_kernel::value); + KOKKOS_EXPECTS(arg_node_ptr); + // The Kernel launch from the execute() method has been shimmed to insert + // the node into the graph + auto& kernel = arg_node_ptr->get_kernel(); + auto& node = static_cast(arg_node_ptr.get())->node; + KOKKOS_EXPECTS(!node); + kernel.set_sycl_graph_ptr(&m_graph); + kernel.set_sycl_graph_node_ptr(&node); + kernel.execute(); + KOKKOS_ENSURES(node); +} + +// Requires PredecessorRef is a specialization of GraphNodeRef that has +// already been added to this graph and NodeImpl is a specialization of +// GraphNodeImpl that has already been added to this graph. +template +inline void GraphImpl::add_predecessor( + NodeImplPtr arg_node_ptr, PredecessorRef arg_pred_ref) { + KOKKOS_EXPECTS(arg_node_ptr); + auto pred_ptr = GraphAccess::get_node_ptr(arg_pred_ref); + KOKKOS_EXPECTS(pred_ptr); + + auto& pred_node = pred_ptr->node_details_t::node; + KOKKOS_EXPECTS(pred_node); + + auto& node = arg_node_ptr->node_details_t::node; + KOKKOS_EXPECTS(node); + + m_graph.make_edge(*pred_node, *node); +} + +inline void GraphImpl::submit() { + if (!m_graph_exec) { + instantiate_graph(); + } + m_execution_space.sycl_queue().ext_oneapi_graph(*m_graph_exec); +} + +inline Kokkos::Experimental::SYCL const& +GraphImpl::get_execution_space() const noexcept { + return m_execution_space; +} + +inline auto GraphImpl::create_root_node_ptr() { + KOKKOS_EXPECTS(!m_graph_exec); + auto rv = std::make_shared(get_execution_space(), + _graph_node_is_root_ctor_tag{}); + rv->node_details_t::node = m_graph.add(); + return rv; +} + +template +inline auto GraphImpl::create_aggregate_ptr( + PredecessorRefs&&...) { + // The attachment to predecessors, which is all we really need, happens + // in the generic layer, which calls through to add_predecessor for + // each predecessor ref, so all we need to do here is create the (trivial) + // aggregate node. + return std::make_shared(m_execution_space, + _graph_node_kernel_ctor_tag{}, + aggregate_kernel_impl_t{}); +} +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp index 0e67adb5787d..5843dca81239 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp @@ -166,26 +166,27 @@ int SYCLInternal::acquire_team_scratch_space() { return current_team_scratch; } -sycl::device_ptr SYCLInternal::resize_team_scratch_space( +Kokkos::Impl::sycl_device_ptr SYCLInternal::resize_team_scratch_space( int scratch_pool_id, std::int64_t bytes, bool force_shrink) { // Multiple ParallelFor/Reduce Teams can call this function at the same time // and invalidate the m_team_scratch_ptr. We use a pool to avoid any race // condition. - if (m_team_scratch_current_size[scratch_pool_id] == 0) { + auto mem_space = Kokkos::Experimental::SYCLDeviceUSMSpace(*m_queue); + if (m_team_scratch_current_size[scratch_pool_id] == 0 && bytes > 0) { m_team_scratch_current_size[scratch_pool_id] = bytes; - m_team_scratch_ptr[scratch_pool_id] = - Kokkos::kokkos_malloc( - "Kokkos::Experimental::SYCLDeviceUSMSpace::TeamScratchMemory", - m_team_scratch_current_size[scratch_pool_id]); + m_team_scratch_ptr[scratch_pool_id] = mem_space.allocate( + "Kokkos::Experimental::SYCL::InternalTeamScratchMemory", + m_team_scratch_current_size[scratch_pool_id]); } if ((bytes > m_team_scratch_current_size[scratch_pool_id]) || ((bytes < m_team_scratch_current_size[scratch_pool_id]) && (force_shrink))) { + mem_space.deallocate(m_team_scratch_ptr[scratch_pool_id], + m_team_scratch_current_size[scratch_pool_id]); m_team_scratch_current_size[scratch_pool_id] = bytes; - m_team_scratch_ptr[scratch_pool_id] = - Kokkos::kokkos_realloc( - m_team_scratch_ptr[scratch_pool_id], - m_team_scratch_current_size[scratch_pool_id]); + m_team_scratch_ptr[scratch_pool_id] = mem_space.allocate( + "Kokkos::Experimental::SYCL::InternalTeamScratchMemory", + m_team_scratch_current_size[scratch_pool_id]); } return m_team_scratch_ptr[scratch_pool_id]; } @@ -234,8 +235,8 @@ void SYCLInternal::finalize() { for (int i = 0; i < m_n_team_scratch; ++i) { if (m_team_scratch_current_size[i] > 0) { - Kokkos::kokkos_free( - m_team_scratch_ptr[i]); + device_mem_space.deallocate(m_team_scratch_ptr[i], + m_team_scratch_current_size[i]); m_team_scratch_current_size[i] = 0; m_team_scratch_ptr[i] = nullptr; } @@ -250,7 +251,8 @@ void SYCLInternal::finalize() { m_queue.reset(); } -sycl::device_ptr SYCLInternal::scratch_space(const std::size_t size) { +Kokkos::Impl::sycl_device_ptr SYCLInternal::scratch_space( + const std::size_t size) { if (verify_is_initialized("scratch_space") && m_scratchSpaceCount < scratch_count(size)) { auto mem_space = Kokkos::Experimental::SYCLDeviceUSMSpace(*m_queue); @@ -270,7 +272,8 @@ sycl::device_ptr SYCLInternal::scratch_space(const std::size_t size) { return m_scratchSpace; } -sycl::host_ptr SYCLInternal::scratch_host(const std::size_t size) { +Kokkos::Impl::sycl_host_ptr SYCLInternal::scratch_host( + const std::size_t size) { if (verify_is_initialized("scratch_unified") && m_scratchHostCount < scratch_count(size)) { auto mem_space = Kokkos::Experimental::SYCLHostUSMSpace(*m_queue); @@ -290,7 +293,8 @@ sycl::host_ptr SYCLInternal::scratch_host(const std::size_t size) { return m_scratchHost; } -sycl::device_ptr SYCLInternal::scratch_flags(const std::size_t size) { +Kokkos::Impl::sycl_device_ptr SYCLInternal::scratch_flags( + const std::size_t size) { if (verify_is_initialized("scratch_flags") && m_scratchFlagsCount < scratch_count(size)) { auto mem_space = Kokkos::Experimental::SYCLDeviceUSMSpace(*m_queue); diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp index ab7e8ce71e06..2d784ef8a5f0 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp @@ -43,13 +43,12 @@ class SYCLInternal { SYCLInternal& operator=(SYCLInternal&&) = delete; SYCLInternal(SYCLInternal&&) = delete; - sycl::device_ptr scratch_space(const std::size_t size); - sycl::device_ptr scratch_flags(const std::size_t size); - sycl::host_ptr scratch_host(const std::size_t size); + Kokkos::Impl::sycl_device_ptr scratch_space(const std::size_t size); + Kokkos::Impl::sycl_device_ptr scratch_flags(const std::size_t size); + Kokkos::Impl::sycl_host_ptr scratch_host(const std::size_t size); int acquire_team_scratch_space(); - sycl::device_ptr resize_team_scratch_space(int scratch_pool_id, - std::int64_t bytes, - bool force_shrink = false); + Kokkos::Impl::sycl_device_ptr resize_team_scratch_space( + int scratch_pool_id, std::int64_t bytes, bool force_shrink = false); void register_team_scratch_event(int scratch_pool_id, sycl::event event); uint32_t impl_get_instance_id() const; @@ -59,21 +58,22 @@ class SYCLInternal { uint32_t m_maxConcurrency = 0; uint64_t m_maxShmemPerBlock = 0; - std::size_t m_scratchSpaceCount = 0; - sycl::device_ptr m_scratchSpace = nullptr; - std::size_t m_scratchHostCount = 0; - sycl::host_ptr m_scratchHost = nullptr; - std::size_t m_scratchFlagsCount = 0; - sycl::device_ptr m_scratchFlags = nullptr; + std::size_t m_scratchSpaceCount = 0; + Kokkos::Impl::sycl_device_ptr m_scratchSpace = nullptr; + std::size_t m_scratchHostCount = 0; + Kokkos::Impl::sycl_host_ptr m_scratchHost = nullptr; + std::size_t m_scratchFlagsCount = 0; + Kokkos::Impl::sycl_device_ptr m_scratchFlags = nullptr; // mutex to access shared memory mutable std::mutex m_mutexScratchSpace; // Team Scratch Level 1 Space - static constexpr int m_n_team_scratch = 10; - mutable int64_t m_team_scratch_current_size[m_n_team_scratch] = {}; - mutable sycl::device_ptr m_team_scratch_ptr[m_n_team_scratch] = {}; - mutable int m_current_team_scratch = 0; - mutable sycl::event m_team_scratch_event[m_n_team_scratch] = {}; + static constexpr int m_n_team_scratch = 10; + mutable int64_t m_team_scratch_current_size[m_n_team_scratch] = {}; + mutable Kokkos::Impl::sycl_device_ptr + m_team_scratch_ptr[m_n_team_scratch] = {}; + mutable int m_current_team_scratch = 0; + mutable sycl::event m_team_scratch_event[m_n_team_scratch] = {}; mutable std::mutex m_team_scratch_mutex; uint32_t m_instance_id = Kokkos::Tools::Experimental::Impl::idForInstance< diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_MDRange.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_MDRange.hpp index 7fbf5420f83e..cb7b1048da35 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_MDRange.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_MDRange.hpp @@ -120,7 +120,7 @@ class Kokkos::Impl::ParallelFor, desul::ensure_sycl_lock_arrays_on_device(q); - auto parallel_for_event = q.submit([&](sycl::handler& cgh) { + auto cgh_lambda = [&](sycl::handler& cgh) { const auto range = compute_ranges(); const sycl::range<3> global_range = range.get_global_range(); const sycl::range<3> local_range = range.get_local_range(); @@ -153,12 +153,22 @@ class Kokkos::Impl::ParallelFor, {global_x, global_y, global_z}, {local_x, local_y, local_z}) .exec_range(); }); - }); -#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES - q.ext_oneapi_submit_barrier(std::vector{parallel_for_event}); + }; + +#ifdef SYCL_EXT_ONEAPI_GRAPH + if constexpr (Policy::is_graph_kernel::value) { + sycl_attach_kernel_to_node(*this, cgh_lambda); + return {}; + } else #endif + { + auto parallel_for_event = q.submit(cgh_lambda); - return parallel_for_event; +#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES + q.ext_oneapi_submit_barrier(std::vector{parallel_for_event}); +#endif + return parallel_for_event; + } } public: @@ -181,12 +191,6 @@ class Kokkos::Impl::ParallelFor, functor_wrapper.register_event(event); } - ParallelFor(const ParallelFor&) = delete; - ParallelFor(ParallelFor&&) = delete; - ParallelFor& operator=(const ParallelFor&) = delete; - ParallelFor& operator=(ParallelFor&&) = delete; - ~ParallelFor() = default; - ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) : m_functor(arg_functor), m_policy(arg_policy), diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Range.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Range.hpp index b4de7eb89ffa..8ef43d392c6a 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Range.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Range.hpp @@ -17,11 +17,15 @@ #ifndef KOKKOS_SYCL_PARALLEL_FOR_RANGE_HPP_ #define KOKKOS_SYCL_PARALLEL_FOR_RANGE_HPP_ +#ifdef SYCL_EXT_ONEAPI_AUTO_LOCAL_RANGE +#include +#endif #ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES #include #endif namespace Kokkos::Impl { +#ifndef SYCL_EXT_ONEAPI_AUTO_LOCAL_RANGE template struct FunctorWrapperRangePolicyParallelFor { using WorkTag = typename Policy::work_tag; @@ -37,14 +41,15 @@ struct FunctorWrapperRangePolicyParallelFor { typename Policy::index_type m_begin; FunctorWrapper m_functor_wrapper; }; +#endif // Same as above but for a user-provided workgroup size template struct FunctorWrapperRangePolicyParallelForCustom { using WorkTag = typename Policy::work_tag; - void operator()(sycl::item<1> item) const { - const typename Policy::index_type id = item.get_linear_id(); + void operator()(sycl::nd_item<1> item) const { + const typename Policy::index_type id = item.get_global_linear_id(); if (id < m_work_size) { const auto shifted_id = id + m_begin; if constexpr (std::is_void_v) @@ -74,27 +79,47 @@ class Kokkos::Impl::ParallelFor, const Policy m_policy; template - static sycl::event sycl_direct_launch(const Policy& policy, - const Functor& functor, - const sycl::event& memcpy_event) { + sycl::event sycl_direct_launch(const Policy& policy, const Functor& functor, + const sycl::event& memcpy_event) const { // Convenience references const Kokkos::Experimental::SYCL& space = policy.space(); sycl::queue& q = space.sycl_queue(); desul::ensure_sycl_lock_arrays_on_device(q); - auto parallel_for_event = q.submit([&](sycl::handler& cgh) { + auto cgh_lambda = [&](sycl::handler& cgh) { #ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES cgh.depends_on(memcpy_event); #else (void)memcpy_event; #endif + if (policy.chunk_size() <= 1) { +#ifdef SYCL_EXT_ONEAPI_AUTO_LOCAL_RANGE + const auto actual_range = policy.end() - policy.begin(); + FunctorWrapperRangePolicyParallelForCustom f{ + policy.begin(), functor, actual_range}; + // Round the actual range up to the closest power of two not exceeding + // the maximum workgroup size + const auto max_wgroup_size = + q.get_device().get_info(); + const auto wgroup_size_multiple = Kokkos::bit_floor( + std::min(max_wgroup_size, actual_range)); + + const auto launch_range = (actual_range + wgroup_size_multiple - 1) / + wgroup_size_multiple * wgroup_size_multiple; + sycl::nd_range<1> range( + launch_range, sycl::ext::oneapi::experimental::auto_range<1>()); + cgh.parallel_for< + FunctorWrapperRangePolicyParallelForCustom>(range, + f); +#else FunctorWrapperRangePolicyParallelFor f{policy.begin(), functor}; sycl::range<1> range(policy.end() - policy.begin()); cgh.parallel_for>( range, f); +#endif } else { // Use the chunk size as workgroup size. We need to make sure that the // range the kernel is launched with is a multiple of the workgroup @@ -111,12 +136,22 @@ class Kokkos::Impl::ParallelFor, FunctorWrapperRangePolicyParallelForCustom>(range, f); } - }); -#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES - q.ext_oneapi_submit_barrier(std::vector{parallel_for_event}); + }; + +#ifdef SYCL_EXT_ONEAPI_GRAPH + if constexpr (Policy::is_graph_kernel::value) { + sycl_attach_kernel_to_node(*this, cgh_lambda); + return {}; + } else #endif + { + auto parallel_for_event = q.submit(cgh_lambda); - return parallel_for_event; +#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES + q.ext_oneapi_submit_barrier(std::vector{parallel_for_event}); +#endif + return parallel_for_event; + } } public: @@ -137,12 +172,6 @@ class Kokkos::Impl::ParallelFor, functor_wrapper.register_event(event); } - ParallelFor(const ParallelFor&) = delete; - ParallelFor(ParallelFor&&) = delete; - ParallelFor& operator=(const ParallelFor&) = delete; - ParallelFor& operator=(ParallelFor&&) = delete; - ~ParallelFor() = default; - ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) : m_functor(arg_functor), m_policy(arg_policy) {} }; diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp index ecb4a863da2d..cf7f582bc79f 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp @@ -22,13 +22,14 @@ #include #include +#include #include template class Kokkos::Impl::ParallelFor, Kokkos::Experimental::SYCL> { public: - using Policy = TeamPolicyInternal; + using Policy = TeamPolicy; using functor_type = FunctorType; using size_type = ::Kokkos::Experimental::SYCL::size_type; @@ -44,24 +45,19 @@ class Kokkos::Impl::ParallelFor, size_type const m_vector_size; int m_shmem_begin; int m_shmem_size; - sycl::device_ptr m_global_scratch_ptr; size_t m_scratch_size[2]; - // Only let one ParallelFor instance at a time use the team scratch memory. - // The constructor acquires the mutex which is released in the destructor. - std::scoped_lock m_scratch_buffers_lock; - int m_scratch_pool_id = -1; template - sycl::event sycl_direct_launch(const Policy& policy, + sycl::event sycl_direct_launch(const sycl_device_ptr global_scratch_ptr, const FunctorWrapper& functor_wrapper, const sycl::event& memcpy_event) const { // Convenience references - const Kokkos::Experimental::SYCL& space = policy.space(); + const Kokkos::Experimental::SYCL& space = m_policy.space(); sycl::queue& q = space.sycl_queue(); desul::ensure_sycl_lock_arrays_on_device(q); - auto parallel_for_event = q.submit([&](sycl::handler& cgh) { + auto cgh_lambda = [&](sycl::handler& cgh) { // FIXME_SYCL accessors seem to need a size greater than zero at least for // host queues sycl::local_accessor team_scratch_memory_L0( @@ -72,7 +68,6 @@ class Kokkos::Impl::ParallelFor, // Avoid capturing *this since it might not be trivially copyable const auto shmem_begin = m_shmem_begin; const size_t scratch_size[2] = {m_scratch_size[0], m_scratch_size[1]}; - sycl::device_ptr const global_scratch_ptr = m_global_scratch_ptr; auto lambda = [=](sycl::nd_item<2> item) { const member_type team_member( @@ -114,28 +109,53 @@ class Kokkos::Impl::ParallelFor, sycl::range<2>(m_team_size, m_league_size * final_vector_size), sycl::range<2>(m_team_size, final_vector_size)), lambda); - }); + }; + +#ifdef SYCL_EXT_ONEAPI_GRAPH + if constexpr (Policy::is_graph_kernel::value) { + sycl_attach_kernel_to_node(*this, cgh_lambda); + return {}; + } else +#endif + { + auto parallel_for_event = q.submit(cgh_lambda); + #ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES - q.ext_oneapi_submit_barrier(std::vector{parallel_for_event}); + q.ext_oneapi_submit_barrier(std::vector{parallel_for_event}); #endif - return parallel_for_event; + return parallel_for_event; + } } public: inline void execute() const { if (m_league_size == 0) return; - auto& space = *m_policy.space().impl_internal_space_instance(); + auto& instance = *m_policy.space().impl_internal_space_instance(); + + // Only let one instance at a time resize the instance's scratch memory + // allocations. + std::scoped_lock team_scratch_lock( + instance.m_team_scratch_mutex); + + // Functor's reduce memory, team scan memory, and team shared memory depend + // upon team size. + int scratch_pool_id = instance.acquire_team_scratch_space(); + const sycl_device_ptr global_scratch_ptr = + static_cast>(instance.resize_team_scratch_space( + scratch_pool_id, + static_cast(m_scratch_size[1]) * m_league_size)); + Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem& - indirectKernelMem = space.get_indirect_kernel_mem(); + indirectKernelMem = instance.get_indirect_kernel_mem(); auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper( m_functor, indirectKernelMem); - sycl::event event = sycl_direct_launch(m_policy, functor_wrapper, + sycl::event event = sycl_direct_launch(global_scratch_ptr, functor_wrapper, functor_wrapper.get_copy_event()); functor_wrapper.register_event(event); - space.register_team_scratch_event(m_scratch_pool_id, event); + instance.register_team_scratch_event(scratch_pool_id, event); } ParallelFor(FunctorType const& arg_functor, Policy const& arg_policy) @@ -143,10 +163,7 @@ class Kokkos::Impl::ParallelFor, m_policy(arg_policy), m_league_size(arg_policy.league_size()), m_team_size(arg_policy.team_size()), - m_vector_size(arg_policy.impl_vector_length()), - m_scratch_buffers_lock(arg_policy.space() - .impl_internal_space_instance() - ->m_team_scratch_mutex) { + m_vector_size(arg_policy.impl_vector_length()) { // FIXME_SYCL optimize if (m_team_size < 0) m_team_size = @@ -159,22 +176,14 @@ class Kokkos::Impl::ParallelFor, m_scratch_size[0] = m_shmem_size; m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); - // Functor's reduce memory, team scan memory, and team shared memory depend - // upon team size. - auto& space = *m_policy.space().impl_internal_space_instance(); - m_scratch_pool_id = space.acquire_team_scratch_space(); - m_global_scratch_ptr = - static_cast>(space.resize_team_scratch_space( - m_scratch_pool_id, - static_cast(m_scratch_size[1]) * m_league_size)); - - if (static_cast(space.m_maxShmemPerBlock) < + const auto& instance = *m_policy.space().impl_internal_space_instance(); + if (static_cast(instance.m_maxShmemPerBlock) < m_shmem_size - m_shmem_begin) { std::stringstream out; out << "Kokkos::Impl::ParallelFor insufficient shared memory! " "Requested " << m_shmem_size - m_shmem_begin << " bytes but maximum is " - << space.m_maxShmemPerBlock << '\n'; + << instance.m_maxShmemPerBlock << '\n'; Kokkos::Impl::throw_runtime_exception(out.str()); } diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_MDRange.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_MDRange.hpp index f55280e22e38..0774b24bca16 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_MDRange.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_MDRange.hpp @@ -77,9 +77,7 @@ class Kokkos::Impl::ParallelReduce::accessible), - m_scratch_buffers_lock( - m_space.impl_internal_space_instance()->m_mutexScratchSpace) {} + typename View::memory_space>::accessible) {} private: template @@ -94,10 +92,10 @@ class Kokkos::Impl::ParallelReduce results_ptr; + sycl_device_ptr results_ptr; auto host_result_ptr = (m_result_ptr && !m_result_ptr_device_accessible) - ? static_cast>( + ? static_cast>( instance.scratch_host(sizeof(value_type) * value_count)) : nullptr; @@ -108,13 +106,13 @@ class Kokkos::Impl::ParallelReduce>( + results_ptr = static_cast>( instance.scratch_space(sizeof(value_type) * value_count)); auto device_accessible_result_ptr = m_result_ptr_device_accessible @@ -129,12 +127,20 @@ class Kokkos::Impl::ParallelReduce{parallel_reduce_event}); + q.ext_oneapi_submit_barrier( + std::vector{last_reduction_event}); #endif - last_reduction_event = parallel_reduce_event; + } } else { // Otherwise (when n_tiles is not zero), we perform a reduction on the // values in all workgroups separately, write the workgroup results back @@ -155,16 +161,16 @@ class Kokkos::Impl::ParallelReduce>( + results_ptr = static_cast>( instance.scratch_space(sizeof(value_type) * value_count * n_wgroups)); auto device_accessible_result_ptr = m_result_ptr_device_accessible ? static_cast>(m_result_ptr) : static_cast>(host_result_ptr); - auto scratch_flags = static_cast>( + auto scratch_flags = static_cast>( instance.scratch_flags(sizeof(unsigned int))); - auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) { + auto cgh_lambda = [&](sycl::handler& cgh) { sycl::local_accessor local_mem( sycl::range<1>(wgroup_size) * value_count, cgh); sycl::local_accessor num_teams_done(1, cgh); @@ -298,12 +304,19 @@ class Kokkos::Impl::ParallelReduce{parallel_reduce_event}); + q.ext_oneapi_submit_barrier( + std::vector{last_reduction_event}); #endif - last_reduction_event = parallel_reduce_event; + } } // At this point, the reduced value is written to the entry in results_ptr @@ -311,6 +324,11 @@ class Kokkos::Impl::ParallelReduce::execute: result " "not device-accessible"); @@ -330,6 +348,12 @@ class Kokkos::Impl::ParallelReduce scratch_buffers_lock( + instance.m_mutexScratchSpace); + using IndirectKernelMem = Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem; IndirectKernelMem& indirectKernelMem = instance.get_indirect_kernel_mem(); @@ -349,10 +373,6 @@ class Kokkos::Impl::ParallelReduce m_scratch_buffers_lock; }; #endif /* KOKKOS_SYCL_PARALLEL_REDUCE_MDRANGE_HPP */ diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp index 5333e3c8a83a..2d46ffc77dc4 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp @@ -50,9 +50,7 @@ class Kokkos::Impl::ParallelReduce::accessible), - m_scratch_buffers_lock( - p.space().impl_internal_space_instance()->m_mutexScratchSpace) {} + typename View::memory_space>::accessible) {} private: template @@ -69,10 +67,10 @@ class Kokkos::Impl::ParallelReduce results_ptr = nullptr; + sycl_device_ptr results_ptr = nullptr; auto host_result_ptr = (m_result_ptr && !m_result_ptr_device_accessible) - ? static_cast>( + ? static_cast>( instance.scratch_host(sizeof(value_type) * value_count)) : nullptr; auto device_accessible_result_ptr = @@ -88,10 +86,10 @@ class Kokkos::Impl::ParallelReduce>( + results_ptr = static_cast>( instance.scratch_space(sizeof(value_type) * value_count)); - auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) { + auto cgh_lambda = [&](sycl::handler& cgh) { const auto begin = policy.begin(); #ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES cgh.depends_on(memcpy_event); @@ -114,24 +112,32 @@ class Kokkos::Impl::ParallelReduce{parallel_reduce_event}); + q.ext_oneapi_submit_barrier( + std::vector{last_reduction_event}); #endif - last_reduction_event = parallel_reduce_event; + } } else { // Otherwise (when size > 1), we perform a reduction on the values in all // workgroups separately, write the workgroup results back to global // memory and recurse until only one workgroup does the reduction and thus // gets the final value. - auto scratch_flags = static_cast>( + auto scratch_flags = static_cast>( instance.scratch_flags(sizeof(unsigned int))); auto reduction_lambda_factory = [&](sycl::local_accessor local_mem, sycl::local_accessor num_teams_done, - sycl::device_ptr results_ptr, int values_per_thread) { + sycl_device_ptr results_ptr, int values_per_thread) { const auto begin = policy.begin(); auto lambda = [=](sycl::nd_item<1> item) { @@ -241,7 +247,7 @@ class Kokkos::Impl::ParallelReduce num_teams_done(1, cgh); auto dummy_reduction_lambda = @@ -302,7 +308,7 @@ class Kokkos::Impl::ParallelReduce>(instance.scratch_space( + static_cast>(instance.scratch_space( sizeof(value_type) * value_count * n_wgroups)); sycl::local_accessor local_mem( @@ -320,12 +326,20 @@ class Kokkos::Impl::ParallelReduce(n_wgroups * wgroup_size, wgroup_size), reduction_lambda); - }); + }; + +#ifdef SYCL_EXT_ONEAPI_GRAPH + if constexpr (Policy::is_graph_kernel::value) { + sycl_attach_kernel_to_node(*this, cgh_lambda); + } else +#endif + { + last_reduction_event = q.submit(cgh_lambda); #ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES - q.ext_oneapi_submit_barrier( - std::vector{parallel_reduce_event}); + q.ext_oneapi_submit_barrier( + std::vector{last_reduction_event}); #endif - last_reduction_event = parallel_reduce_event; + } } // At this point, the reduced value is written to the entry in results_ptr @@ -333,6 +347,11 @@ class Kokkos::Impl::ParallelReduce::execute: result " "not device-accessible"); @@ -347,6 +366,12 @@ class Kokkos::Impl::ParallelReduce scratch_buffers_lock( + instance.m_mutexScratchSpace); + using IndirectKernelMem = Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem; IndirectKernelMem& indirectKernelMem = instance.get_indirect_kernel_mem(); @@ -366,10 +391,6 @@ class Kokkos::Impl::ParallelReduce m_scratch_buffers_lock; }; #endif /* KOKKOS_SYCL_PARALLEL_REDUCE_RANGE_HPP */ diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp index 27165c59e3a9..b443bcbf9023 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp @@ -23,6 +23,7 @@ #include #include +#include #include template @@ -30,7 +31,7 @@ class Kokkos::Impl::ParallelReduce, Kokkos::Experimental::SYCL> { public: - using Policy = TeamPolicyInternal; + using Policy = TeamPolicy; using FunctorType = typename CombinedFunctorReducerType::functor_type; using ReducerType = typename CombinedFunctorReducerType::reducer_type; @@ -54,24 +55,18 @@ class Kokkos::Impl::ParallelReduce m_global_scratch_ptr; size_t m_scratch_size[2]; const size_type m_league_size; int m_team_size; const size_type m_vector_size; - // Only let one ParallelReduce instance at a time use the team scratch memory - // and the host scratch memory. The constructor acquires the mutex which is - // released in the destructor. - std::scoped_lock m_scratch_buffers_lock; - int m_scratch_pool_id = -1; - template + template sycl::event sycl_direct_launch( - const PolicyType& policy, + const sycl_device_ptr global_scratch_ptr, const CombinedFunctorReducerWrapper& functor_reducer_wrapper, const sycl::event& memcpy_event) const { // Convenience references - const Kokkos::Experimental::SYCL& space = policy.space(); + const Kokkos::Experimental::SYCL& space = m_policy.space(); Kokkos::Experimental::Impl::SYCLInternal& instance = *space.impl_internal_space_instance(); sycl::queue& q = space.sycl_queue(); @@ -82,7 +77,7 @@ class Kokkos::Impl::ParallelReduce>( + ? static_cast>( instance.scratch_host(sizeof(value_type) * value_count)) : nullptr; @@ -95,14 +90,14 @@ class Kokkos::Impl::ParallelReduce>(instance.scratch_space( + static_cast>(instance.scratch_space( sizeof(value_type) * std::max(value_count, 1u))); auto device_accessible_result_ptr = m_result_ptr_device_accessible ? static_cast>(m_result_ptr) : static_cast>(host_result_ptr); - auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) { + auto cgh_lambda = [&](sycl::handler& cgh) { // FIXME_SYCL accessors seem to need a size greater than zero at least // for host queues sycl::local_accessor team_scratch_memory_L0( @@ -113,7 +108,6 @@ class Kokkos::Impl::ParallelReduce const global_scratch_ptr = m_global_scratch_ptr; #ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES cgh.depends_on(memcpy_event); @@ -144,19 +138,26 @@ class Kokkos::Impl::ParallelReduce{parallel_reduce_event}); + q.ext_oneapi_submit_barrier( + std::vector{last_reduction_event}); #endif - last_reduction_event = parallel_reduce_event; + } } else { // Otherwise, (if the total range has more than one element) we perform a // reduction on the values in all workgroups separately, write the // workgroup results back to global memory and recurse until only one // workgroup does the reduction and thus gets the final value. - auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) { - auto scratch_flags = static_cast>( + auto cgh_lambda = [&](sycl::handler& cgh) { + auto scratch_flags = static_cast>( instance.scratch_flags(sizeof(unsigned int))); // FIXME_SYCL accessors seem to need a size greater than zero at least @@ -170,12 +171,11 @@ class Kokkos::Impl::ParallelReduce const global_scratch_ptr = m_global_scratch_ptr; sycl::local_accessor num_teams_done(1, cgh); auto team_reduction_factory = [&](sycl::local_accessor local_mem, - sycl::device_ptr results_ptr) { + sycl_device_ptr results_ptr) { auto device_accessible_result_ptr = m_result_ptr_device_accessible ? static_cast>(m_result_ptr) @@ -331,7 +331,7 @@ class Kokkos::Impl::ParallelReduce((size + wgroup_size - 1) / wgroup_size, 1); results_ptr = - static_cast>(instance.scratch_space( + static_cast>(instance.scratch_space( sizeof(value_type) * std::max(value_count, 1u) * init_size)); size_t max_work_groups = @@ -359,12 +359,19 @@ class Kokkos::Impl::ParallelReduce(m_team_size, n_wgroups * m_vector_size), sycl::range<2>(m_team_size, m_vector_size)), reduction_lambda); - }); + }; +#ifdef SYCL_EXT_ONEAPI_GRAPH + if constexpr (Policy::is_graph_kernel::value) { + sycl_attach_kernel_to_node(*this, cgh_lambda); + } else +#endif + { + last_reduction_event = q.submit(cgh_lambda); #ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES - q.ext_oneapi_submit_barrier( - std::vector{parallel_reduce_event}); + q.ext_oneapi_submit_barrier( + std::vector{last_reduction_event}); #endif - last_reduction_event = parallel_reduce_event; + } } // At this point, the reduced value is written to the entry in results_ptr @@ -372,6 +379,11 @@ class Kokkos::Impl::ParallelReduce::execute: result not " "device-accessible"); @@ -386,6 +398,22 @@ class Kokkos::Impl::ParallelReduce scratch_buffers_lock( + instance.m_mutexScratchSpace); + std::scoped_lock team_scratch_lock( + instance.m_team_scratch_mutex); + + // Functor's reduce memory, team scan memory, and team shared memory depend + // upon team size. + int scratch_pool_id = instance.acquire_team_scratch_space(); + const sycl_device_ptr global_scratch_ptr = + static_cast>(instance.resize_team_scratch_space( + scratch_pool_id, + static_cast(m_scratch_size[1]) * m_league_size)); + using IndirectKernelMem = Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem; IndirectKernelMem& indirectKernelMem = instance.get_indirect_kernel_mem(); @@ -395,14 +423,24 @@ class Kokkos::Impl::ParallelReduce + ParallelReduce(CombinedFunctorReducerType const& arg_functor_reducer, + Policy const& arg_policy, ViewType const& arg_result) + : m_functor_reducer(arg_functor_reducer), + m_policy(arg_policy), + m_result_ptr(arg_result.data()), + m_result_ptr_device_accessible( + MemorySpaceAccess::accessible), + m_league_size(arg_policy.league_size()), + m_team_size(arg_policy.team_size()), + m_vector_size(arg_policy.impl_vector_length()) { // FIXME_SYCL optimize if (m_team_size < 0) m_team_size = m_policy.team_size_recommended( @@ -423,22 +461,15 @@ class Kokkos::Impl::ParallelReduce>(space.resize_team_scratch_space( - m_scratch_pool_id, - static_cast(m_scratch_size[1]) * m_league_size)); - - if (static_cast(space.m_maxShmemPerBlock) < + const Kokkos::Experimental::Impl::SYCLInternal& instance = + *m_policy.space().impl_internal_space_instance(); + if (static_cast(instance.m_maxShmemPerBlock) < m_shmem_size - m_shmem_begin) { std::stringstream out; out << "Kokkos::Impl::ParallelFor insufficient shared memory! " "Requested " << m_shmem_size - m_shmem_begin << " bytes but maximum is " - << space.m_maxShmemPerBlock << '\n'; + << instance.m_maxShmemPerBlock << '\n'; Kokkos::Impl::throw_runtime_exception(out.str()); } @@ -448,25 +479,6 @@ class Kokkos::Impl::ParallelReduce requested too large team size."); } - - public: - template - ParallelReduce(CombinedFunctorReducerType const& arg_functor_reducer, - Policy const& arg_policy, ViewType const& arg_result) - : m_functor_reducer(arg_functor_reducer), - m_policy(arg_policy), - m_result_ptr(arg_result.data()), - m_result_ptr_device_accessible( - MemorySpaceAccess::accessible), - m_league_size(arg_policy.league_size()), - m_team_size(arg_policy.team_size()), - m_vector_size(arg_policy.impl_vector_length()), - m_scratch_buffers_lock(arg_policy.space() - .impl_internal_space_instance() - ->m_team_scratch_mutex) { - initialize(); - } }; #endif diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelScan_Range.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelScan_Range.hpp index 977b69bc9eb7..bdb5b8837705 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelScan_Range.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelScan_Range.hpp @@ -18,6 +18,7 @@ #define KOKKOS_SYCL_PARALLEL_SCAN_RANGE_HPP #include +#include #include #include @@ -35,20 +36,38 @@ void workgroup_scan(sycl::nd_item item, const FunctorType& final_reducer, auto sg = item.get_sub_group(); const int sg_group_id = sg.get_group_id()[0]; const int id_in_sg = sg.get_local_id()[0]; - - for (int stride = 1; stride < global_range; stride <<= 1) { - auto tmp = sg.shuffle_up(local_value, stride); + const int local_range = std::min(sg.get_local_range()[0], global_range); + +#if defined(KOKKOS_ARCH_INTEL_GPU) || defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) + auto shuffle_combine = [&](int stride) { + if (stride < local_range) { + auto tmp = Kokkos::Impl::SYCLReduction::shift_group_right(sg, local_value, + stride); + if (id_in_sg >= stride) final_reducer.join(&local_value, &tmp); + } + }; + shuffle_combine(1); + shuffle_combine(2); + shuffle_combine(4); + shuffle_combine(8); + shuffle_combine(16); + KOKKOS_ASSERT(local_range <= 32); +#else + for (int stride = 1; stride < local_range; stride <<= 1) { + auto tmp = + Kokkos::Impl::SYCLReduction::shift_group_right(sg, local_value, stride); if (id_in_sg >= stride) final_reducer.join(&local_value, &tmp); } +#endif const int max_subgroup_size = sg.get_max_local_range()[0]; const int n_active_subgroups = (global_range + max_subgroup_size - 1) / max_subgroup_size; - const int local_range = sg.get_local_range()[0]; if (id_in_sg == local_range - 1 && sg_group_id < n_active_subgroups) local_mem[sg_group_id] = local_value; - local_value = sg.shuffle_up(local_value, 1); + local_value = + Kokkos::Impl::SYCLReduction::shift_group_right(sg, local_value, 1); if (id_in_sg == 0) final_reducer.init(&local_value); sycl::group_barrier(item.get_group()); @@ -61,8 +80,29 @@ void workgroup_scan(sycl::nd_item item, const FunctorType& final_reducer, const auto upper_bound = std::min(local_range, n_active_subgroups - round * local_range); auto local_sg_value = local_mem[idx < n_active_subgroups ? idx : 0]; +#if defined(KOKKOS_ARCH_INTEL_GPU) || defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) + auto shuffle_combine_sg = [&](int stride) { + if (stride < upper_bound) { + auto tmp = Kokkos::Impl::SYCLReduction::shift_group_right( + sg, local_sg_value, stride); + if (id_in_sg >= stride) { + if (idx < n_active_subgroups) + final_reducer.join(&local_sg_value, &tmp); + else + local_sg_value = tmp; + } + } + }; + shuffle_combine_sg(1); + shuffle_combine_sg(2); + shuffle_combine_sg(4); + shuffle_combine_sg(8); + shuffle_combine_sg(16); + KOKKOS_ASSERT(upper_bound <= 32); +#else for (int stride = 1; stride < upper_bound; stride <<= 1) { - auto tmp = sg.shuffle_up(local_sg_value, stride); + auto tmp = Kokkos::Impl::SYCLReduction::shift_group_right( + sg, local_sg_value, stride); if (id_in_sg >= stride) { if (idx < n_active_subgroups) final_reducer.join(&local_sg_value, &tmp); @@ -70,6 +110,7 @@ void workgroup_scan(sycl::nd_item item, const FunctorType& final_reducer, local_sg_value = tmp; } } +#endif if (idx < n_active_subgroups) { local_mem[idx] = local_sg_value; if (round > 0) @@ -111,14 +152,10 @@ class ParallelScanSYCLBase { const CombinedFunctorReducer m_functor_reducer; const Policy m_policy; - sycl::host_ptr m_scratch_host = nullptr; + sycl_host_ptr m_scratch_host = nullptr; pointer_type m_result_ptr; const bool m_result_ptr_device_accessible; - // Only let one ParallelScan instance at a time use the host scratch memory. - // The constructor acquires the mutex which is released in the destructor. - std::scoped_lock m_scratch_buffers_lock; - private: template sycl::event sycl_direct_launch(const FunctorWrapper& functor_wrapper, @@ -131,95 +168,93 @@ class ParallelScanSYCLBase { const auto size = m_policy.end() - m_policy.begin(); - auto scratch_flags = static_cast>( + auto scratch_flags = static_cast>( instance.scratch_flags(sizeof(unsigned int))); const auto begin = m_policy.begin(); // Initialize global memory - auto scan_lambda_factory = - [&](sycl::local_accessor local_mem, - sycl::local_accessor num_teams_done, - sycl::device_ptr global_mem_, - sycl::device_ptr group_results_) { - auto lambda = [=](sycl::nd_item<1> item) { - auto global_mem = global_mem_; - auto group_results = group_results_; - - const CombinedFunctorReducer< - FunctorType, typename Analysis::Reducer>& functor_reducer = - functor_wrapper.get_functor(); - const FunctorType& functor = functor_reducer.get_functor(); - const typename Analysis::Reducer& reducer = - functor_reducer.get_reducer(); - - const auto n_wgroups = item.get_group_range()[0]; - const int wgroup_size = item.get_local_range()[0]; - - const int local_id = item.get_local_linear_id(); - const index_type global_id = item.get_global_linear_id(); - - // Initialize local memory - value_type local_value; - reducer.init(&local_value); - if (global_id < size) { - if constexpr (std::is_void::value) - functor(global_id + begin, local_value, false); - else - functor(WorkTag(), global_id + begin, local_value, false); - } + auto scan_lambda_factory = [&](sycl::local_accessor local_mem, + sycl::local_accessor + num_teams_done, + sycl_device_ptr global_mem_, + sycl_device_ptr group_results_) { + auto lambda = [=](sycl::nd_item<1> item) { + auto global_mem = global_mem_; + auto group_results = group_results_; + + const CombinedFunctorReducer& + functor_reducer = functor_wrapper.get_functor(); + const FunctorType& functor = functor_reducer.get_functor(); + const typename Analysis::Reducer& reducer = + functor_reducer.get_reducer(); + + const auto n_wgroups = item.get_group_range()[0]; + const int wgroup_size = item.get_local_range()[0]; + + const int local_id = item.get_local_linear_id(); + const index_type global_id = item.get_global_linear_id(); + + // Initialize local memory + value_type local_value; + reducer.init(&local_value); + if (global_id < size) { + if constexpr (std::is_void::value) + functor(global_id + begin, local_value, false); + else + functor(WorkTag(), global_id + begin, local_value, false); + } - workgroup_scan<>(item, reducer, local_mem, local_value, - wgroup_size); + workgroup_scan<>(item, reducer, local_mem, local_value, wgroup_size); - // Write results to global memory - if (global_id < size) global_mem[global_id] = local_value; + // Write results to global memory + if (global_id < size) global_mem[global_id] = local_value; - if (local_id == wgroup_size - 1) { - group_results[item.get_group_linear_id()] = - local_mem[item.get_sub_group().get_group_range()[0] - 1]; + if (local_id == wgroup_size - 1) { + group_results[item.get_group_linear_id()] = + local_mem[item.get_sub_group().get_group_range()[0] - 1]; - sycl::atomic_ref - scratch_flags_ref(*scratch_flags); - num_teams_done[0] = ++scratch_flags_ref; - } - item.barrier(sycl::access::fence_space::global_space); - if (num_teams_done[0] == n_wgroups) { - if (local_id == 0) *scratch_flags = 0; - value_type total; - reducer.init(&total); - - for (unsigned int offset = 0; offset < n_wgroups; - offset += wgroup_size) { - index_type id = local_id + offset; - if (id < static_cast(n_wgroups)) - local_value = group_results[id]; - else - reducer.init(&local_value); - workgroup_scan<>( - item, reducer, local_mem, local_value, - std::min(n_wgroups - offset, wgroup_size)); - if (id < static_cast(n_wgroups)) { - reducer.join(&local_value, &total); - group_results[id] = local_value; - } - reducer.join( - &total, - &local_mem[item.get_sub_group().get_group_range()[0] - 1]); - if (offset + wgroup_size < n_wgroups) - item.barrier(sycl::access::fence_space::global_space); - } + sycl::atomic_ref + scratch_flags_ref(*scratch_flags); + num_teams_done[0] = ++scratch_flags_ref; + } + item.barrier(sycl::access::fence_space::global_space); + if (num_teams_done[0] == n_wgroups) { + if (local_id == 0) *scratch_flags = 0; + value_type total; + reducer.init(&total); + + for (unsigned int offset = 0; offset < n_wgroups; + offset += wgroup_size) { + index_type id = local_id + offset; + if (id < static_cast(n_wgroups)) + local_value = group_results[id]; + else + reducer.init(&local_value); + workgroup_scan<>( + item, reducer, local_mem, local_value, + std::min(n_wgroups - offset, wgroup_size)); + if (id < static_cast(n_wgroups)) { + reducer.join(&local_value, &total); + group_results[id] = local_value; } - }; - return lambda; - }; + reducer.join( + &total, + &local_mem[item.get_sub_group().get_group_range()[0] - 1]); + if (offset + wgroup_size < n_wgroups) + item.barrier(sycl::access::fence_space::global_space); + } + } + }; + return lambda; + }; size_t wgroup_size; size_t n_wgroups; - sycl::device_ptr global_mem; - sycl::device_ptr group_results; + sycl_device_ptr global_mem; + sycl_device_ptr group_results; desul::ensure_sycl_lock_arrays_on_device(q); @@ -254,9 +289,9 @@ class ParallelScanSYCLBase { // FIXME_SYCL consider only storing one value per block and recreate // initial results in the end before doing the final pass global_mem = - static_cast>(instance.scratch_space( + static_cast>(instance.scratch_space( n_wgroups * (wgroup_size + 1) * sizeof(value_type))); - m_scratch_host = static_cast>( + m_scratch_host = static_cast>( instance.scratch_host(sizeof(value_type))); group_results = global_mem + n_wgroups * wgroup_size; @@ -334,6 +369,11 @@ class ParallelScanSYCLBase { auto& instance = *m_policy.space().impl_internal_space_instance(); + // Only let one instance at a time resize the instance's scratch memory + // allocations. + std::scoped_lock scratch_buffers_lock( + instance.m_mutexScratchSpace); + Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem& indirectKernelMem = instance.get_indirect_kernel_mem(); @@ -352,10 +392,7 @@ class ParallelScanSYCLBase { : m_functor_reducer(arg_functor, typename Analysis::Reducer{arg_functor}), m_policy(arg_policy), m_result_ptr(arg_result_ptr), - m_result_ptr_device_accessible(arg_result_ptr_device_accessible), - m_scratch_buffers_lock(m_policy.space() - .impl_internal_space_instance() - ->m_mutexScratchSpace) {} + m_result_ptr_device_accessible(arg_result_ptr_device_accessible) {} }; } // namespace Kokkos::Impl diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp index 9cc8008cdf31..19fad29150e5 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp @@ -56,6 +56,23 @@ void DeepCopyAsyncSYCL(void* dst, const void* src, size_t n) { /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ +namespace { + +std::string_view get_memory_space_name(sycl::usm::alloc allocation_kind) { + switch (allocation_kind) { + case sycl::usm::alloc::host: + return Kokkos::Experimental::SYCLHostUSMSpace::name(); + case sycl::usm::alloc::device: + return Kokkos::Experimental::SYCLDeviceUSMSpace::name(); + case sycl::usm::alloc::shared: + return Kokkos::Experimental::SYCLSharedUSMSpace::name(); + default: + Kokkos::abort("bug: unknown sycl allocation type"); + return "unreachable"; + } +} + +} // namespace namespace Kokkos { namespace Experimental { @@ -75,17 +92,17 @@ SYCLHostUSMSpace::SYCLHostUSMSpace() SYCLHostUSMSpace::SYCLHostUSMSpace(sycl::queue queue) : m_queue(std::move(queue)) {} -void* allocate_sycl( - const char* arg_label, const size_t arg_alloc_size, - const size_t arg_logical_size, const Kokkos::Tools::SpaceHandle arg_handle, - const RawMemoryAllocationFailure::AllocationMechanism failure_tag, - const sycl::usm::alloc allocation_kind, const sycl::queue& queue) { +void* allocate_sycl(const char* arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle, + const sycl::usm::alloc allocation_kind, + const sycl::queue& queue) { void* const hostPtr = sycl::malloc(arg_alloc_size, queue, allocation_kind); - if (hostPtr == nullptr) - throw RawMemoryAllocationFailure( - arg_alloc_size, 1, RawMemoryAllocationFailure::FailureMode::Unknown, - failure_tag); + if (hostPtr == nullptr) { + Kokkos::Impl::throw_bad_alloc(get_memory_space_name(allocation_kind), + arg_alloc_size, arg_label); + } if (Kokkos::Profiling::profileLibraryLoaded()) { const size_t reported_size = @@ -106,12 +123,10 @@ void* SYCLDeviceUSMSpace::allocate(const Kokkos::Experimental::SYCL& exec_space, const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size) const { - return allocate_sycl( - arg_label, arg_alloc_size, arg_logical_size, - Kokkos::Tools::make_space_handle(name()), - RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocDevice, - sycl::usm::alloc::device, - *exec_space.impl_internal_space_instance()->m_queue); + return allocate_sycl(arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + sycl::usm::alloc::device, + *exec_space.impl_internal_space_instance()->m_queue); } void* SYCLDeviceUSMSpace::allocate(const size_t arg_alloc_size) const { @@ -121,11 +136,9 @@ void* SYCLDeviceUSMSpace::allocate(const size_t arg_alloc_size) const { void* SYCLDeviceUSMSpace::allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size) const { - return allocate_sycl( - arg_label, arg_alloc_size, arg_logical_size, - Kokkos::Tools::make_space_handle(name()), - RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocDevice, - sycl::usm::alloc::device, m_queue); + return allocate_sycl(arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + sycl::usm::alloc::device, m_queue); } void* SYCLSharedUSMSpace::allocate(const SYCL& exec_space, @@ -136,12 +149,10 @@ void* SYCLSharedUSMSpace::allocate(const SYCL& exec_space, const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size) const { - return allocate_sycl( - arg_label, arg_alloc_size, arg_logical_size, - Kokkos::Tools::make_space_handle(name()), - RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocShared, - sycl::usm::alloc::shared, - *exec_space.impl_internal_space_instance()->m_queue); + return allocate_sycl(arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + sycl::usm::alloc::shared, + *exec_space.impl_internal_space_instance()->m_queue); } void* SYCLSharedUSMSpace::allocate(const size_t arg_alloc_size) const { @@ -150,11 +161,9 @@ void* SYCLSharedUSMSpace::allocate(const size_t arg_alloc_size) const { void* SYCLSharedUSMSpace::allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size) const { - return allocate_sycl( - arg_label, arg_alloc_size, arg_logical_size, - Kokkos::Tools::make_space_handle(name()), - RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocShared, - sycl::usm::alloc::shared, m_queue); + return allocate_sycl(arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + sycl::usm::alloc::shared, m_queue); } void* SYCLHostUSMSpace::allocate(const SYCL& exec_space, @@ -164,12 +173,10 @@ void* SYCLHostUSMSpace::allocate(const SYCL& exec_space, void* SYCLHostUSMSpace::allocate(const SYCL& exec_space, const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size) const { - return allocate_sycl( - arg_label, arg_alloc_size, arg_logical_size, - Kokkos::Tools::make_space_handle(name()), - RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocHost, - sycl::usm::alloc::host, - *exec_space.impl_internal_space_instance()->m_queue); + return allocate_sycl(arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + sycl::usm::alloc::host, + *exec_space.impl_internal_space_instance()->m_queue); } void* SYCLHostUSMSpace::allocate(const size_t arg_alloc_size) const { @@ -178,11 +185,9 @@ void* SYCLHostUSMSpace::allocate(const size_t arg_alloc_size) const { void* SYCLHostUSMSpace::allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size) const { - return allocate_sycl( - arg_label, arg_alloc_size, arg_logical_size, - Kokkos::Tools::make_space_handle(name()), - RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocHost, - sycl::usm::alloc::host, m_queue); + return allocate_sycl(arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + sycl::usm::alloc::host, m_queue); } void sycl_deallocate(const char* arg_label, void* const arg_alloc_ptr, diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp index dbba3827581c..1e42faa5a833 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp @@ -22,6 +22,7 @@ #ifdef KOKKOS_ENABLE_SYCL #include +#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -133,72 +134,71 @@ class SYCLTeamMember { const unsigned int team_rank_ = team_rank(); // First combine the values in the same subgroup +#if defined(KOKKOS_ARCH_INTEL_GPU) || defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) + auto shuffle_combine = [&](int shift) { + if (vector_range * shift < sub_group_range) { + const value_type tmp = Kokkos::Impl::SYCLReduction::shift_group_left( + sg, value, vector_range * shift); + if (team_rank_ + shift < team_size_) reducer.join(value, tmp); + } + }; + shuffle_combine(1); + shuffle_combine(2); + shuffle_combine(4); + shuffle_combine(8); + shuffle_combine(16); + KOKKOS_ASSERT(sub_group_range <= 32); +#else for (unsigned int shift = 1; vector_range * shift < sub_group_range; shift <<= 1) { - const value_type tmp = sg.shuffle_down(value, vector_range * shift); + auto tmp = Kokkos::Impl::SYCLReduction::shift_group_left( + sg, value, vector_range * shift); if (team_rank_ + shift < team_size_) reducer.join(value, tmp); } - value = sg.shuffle(value, 0); +#endif + value = Kokkos::Impl::SYCLReduction::select_from_group(sg, value, 0); - const auto n_subgroups = sg.get_group_range()[0]; + const int n_subgroups = sg.get_group_range()[0]; if (n_subgroups == 1) { reducer.reference() = value; return; } - // We need to chunk up the whole reduction because we might not have - // allocated enough memory. - const unsigned int maximum_work_range = - std::min(m_team_reduce_size / sizeof(value_type), n_subgroups); + // It was found experimentally that 16 is a good value for Intel PVC. + // Since there is a maximum number of 1024 threads with subgroup size 16, + // we have a maximum of 64 subgroups per workgroup which means 64/16=4 + // rounds for loading values into the reduction_array, and 16 redundant + // reduction steps executed by every thread. + constexpr int step_width = 16; + auto tmp_alloc = sycl::ext::oneapi::group_local_memory_for_overwrite< + value_type[step_width]>(m_item.get_group()); + auto& reduction_array = *tmp_alloc; const auto id_in_sg = sg.get_local_id()[0]; - auto reduction_array = - static_cast>(m_team_reduce); - // Load values into the first maximum_work_range values of the reduction + // Load values into the first step_width values of the reduction // array in chunks. This means that only sub groups with an id in the // corresponding chunk load values. - const auto group_id = sg.get_group_id()[0]; - if (id_in_sg == 0 && group_id < maximum_work_range) + const int group_id = sg.get_group_id()[0]; + if (id_in_sg == 0 && group_id < step_width) reduction_array[group_id] = value; sycl::group_barrier(m_item.get_group()); - for (unsigned int start = maximum_work_range; start < n_subgroups; - start += maximum_work_range) { + for (int start = step_width; start < n_subgroups; start += step_width) { if (id_in_sg == 0 && group_id >= start && - group_id < - std::min(start + maximum_work_range, n_subgroups)) + group_id < std::min(start + step_width, n_subgroups)) reducer.join(reduction_array[group_id - start], value); sycl::group_barrier(m_item.get_group()); } - // Let the first subgroup do the final reduction - if (group_id == 0) { - const auto local_range = sg.get_local_range()[0]; - auto result = - reduction_array[id_in_sg < maximum_work_range ? id_in_sg : 0]; - // In case the maximum_work_range is larger than the range of the first - // subgroup, we first combine the items with a higher index. - for (unsigned int offset = local_range; offset < maximum_work_range; - offset += local_range) - if (id_in_sg + offset < maximum_work_range) - reducer.join(result, reduction_array[id_in_sg + offset]); - sycl::group_barrier(sg); - - // Now do the actual subgroup reduction. - const auto min_range = - std::min(maximum_work_range, local_range); - for (unsigned int stride = 1; stride < min_range; stride <<= 1) { - const auto tmp = sg.shuffle_down(result, stride); - if (id_in_sg + stride < min_range) reducer.join(result, tmp); - } - if (id_in_sg == 0) reduction_array[0] = result; - } - sycl::group_barrier(m_item.get_group()); + // Do the final reduction for all threads redundantly + value = reduction_array[0]; + for (int i = 1; i < std::min(step_width, n_subgroups); ++i) + reducer.join(value, reduction_array[i]); - reducer.reference() = reduction_array[0]; - // Make sure that the reduction array hasn't been modified in the meantime. - m_item.barrier(sycl::access::fence_space::local_space); + reducer.reference() = value; + // Make sure that every thread is done using the reduction array. + sycl::group_barrier(m_item.get_group()); } //-------------------------------------------------------------------------- @@ -223,7 +223,8 @@ class SYCLTeamMember { // First combine the values in the same subgroup for (unsigned int stride = 1; vector_range * stride < sub_group_range; stride <<= 1) { - auto tmp = sg.shuffle_up(value, vector_range * stride); + auto tmp = Kokkos::Impl::SYCLReduction::shift_group_right( + sg, value, vector_range * stride); if (id_in_sg >= vector_range * stride) value += tmp; } @@ -249,7 +250,8 @@ class SYCLTeamMember { sub_group_range, n_active_subgroups - round * sub_group_range); auto local_value = base_data[idx]; for (unsigned int stride = 1; stride < upper_bound; stride <<= 1) { - auto tmp = sg.shuffle_up(local_value, stride); + auto tmp = Kokkos::Impl::SYCLReduction::shift_group_right( + sg, local_value, stride); if (id_in_sg >= stride) { if (idx < n_active_subgroups) local_value += tmp; @@ -267,7 +269,8 @@ class SYCLTeamMember { } auto total = base_data[n_active_subgroups - 1]; - const auto update = sg.shuffle_up(value, vector_range); + const auto update = + Kokkos::Impl::SYCLReduction::shift_group_right(sg, value, vector_range); Type intermediate = (group_id > 0 ? base_data[group_id - 1] : 0) + (id_in_sg >= vector_range ? update : 0); @@ -320,7 +323,7 @@ class SYCLTeamMember { typename ReducerType::value_type tmp2 = tmp; for (int i = grange1; (i >>= 1);) { - tmp2 = sg.shuffle_down(tmp, i); + tmp2 = Kokkos::Impl::SYCLReduction::shift_group_left(sg, tmp, i); if (static_cast(tidx1) < i) { reducer.join(tmp, tmp2); } @@ -331,8 +334,9 @@ class SYCLTeamMember { // because floating point summation is not associative // and thus different threads could have different results. - tmp2 = sg.shuffle(tmp, (sg.get_local_id() / grange1) * grange1); - value = tmp2; + tmp2 = Kokkos::Impl::SYCLReduction::select_from_group( + sg, tmp, (sg.get_local_id() / grange1) * grange1); + value = tmp2; reducer.reference() = tmp2; } @@ -342,7 +346,7 @@ class SYCLTeamMember { KOKKOS_INLINE_FUNCTION SYCLTeamMember(sycl::local_ptr shared, const std::size_t shared_begin, const std::size_t shared_size, - sycl::device_ptr scratch_level_1_ptr, + sycl_device_ptr scratch_level_1_ptr, const std::size_t scratch_level_1_size, const sycl::nd_item<2> item, const int arg_league_rank, const int arg_league_size) @@ -839,7 +843,8 @@ parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< // [t] += [t-4] if t >= 4 // ... for (int j = 1; j < static_cast(grange1); j <<= 1) { - value_type tmp = sg.shuffle_up(val, j); + value_type tmp = + Kokkos::Impl::SYCLReduction::shift_group_right(sg, val, j); if (j <= static_cast(tidx1)) { reducer.join(val, tmp); } @@ -850,7 +855,8 @@ parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< // Update i's contribution into the val and add it to accum for next round if (i < loop_boundaries.end) closure(i, val, true); - accum = sg.shuffle(val, mask + vector_offset); + accum = Kokkos::Impl::SYCLReduction::select_from_group( + sg, val, mask + vector_offset); } reducer.reference() = accum; } @@ -927,7 +933,8 @@ KOKKOS_INLINE_FUNCTION void single( const auto grange1 = item.get_local_range(1); const auto sg = item.get_sub_group(); if (item.get_local_id(1) == 0) lambda(val); - val = sg.shuffle(val, (sg.get_local_id() / grange1) * grange1); + val = Kokkos::Impl::SYCLReduction::select_from_group( + sg, val, (sg.get_local_id() / grange1) * grange1); } template diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_WorkgroupReduction.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_WorkgroupReduction.hpp index c308384af090..abf0bd8f53e8 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_WorkgroupReduction.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_WorkgroupReduction.hpp @@ -21,8 +21,53 @@ namespace Kokkos::Impl::SYCLReduction { -// FIXME_SYCL It appears that using shuffles is slower than going through local -// memory. +template +struct TrivialWrapper { + std::byte array[N]; +}; + +// shuffle down +template +T shift_group_left(sycl::sub_group sg, T x, + sycl::sub_group::linear_id_type delta) { + if constexpr (std::is_trivially_copyable_v) + return sycl::shift_group_left(sg, x, delta); + else { + auto tmp = sycl::shift_group_left( + sg, reinterpret_cast&>(x), delta); + return reinterpret_cast(tmp); + } +} + +// shuffle up +template +T shift_group_right(sycl::sub_group sg, T x, + sycl::sub_group::linear_id_type delta) { + if constexpr (std::is_trivially_copyable_v) + return sycl::shift_group_right(sg, x, delta); + else { + auto tmp = sycl::shift_group_right( + sg, reinterpret_cast&>(x), delta); + return reinterpret_cast(tmp); + } +} + +// shuffle +template +T select_from_group(sycl::sub_group sg, T x, + sycl::sub_group::id_type remote_local_id) { + if constexpr (std::is_trivially_copyable_v) + return sycl::select_from_group(sg, x, remote_local_id); + else { + auto tmp = sycl::select_from_group( + sg, reinterpret_cast&>(x), remote_local_id); + return reinterpret_cast(tmp); + } +} + +// FIXME_SYCL For some types, shuffle reductions are competitive with local +// memory reductions but they are significantly slower for the value type used +// in combined reductions with multiple double arguments. template inline constexpr bool use_shuffle_based_algorithm = false; // std::is_reference_v; @@ -30,7 +75,7 @@ inline constexpr bool use_shuffle_based_algorithm = false; template std::enable_if_t> workgroup_reduction( sycl::nd_item& item, sycl::local_accessor local_mem, - sycl::device_ptr results_ptr, + sycl_device_ptr results_ptr, sycl::global_ptr device_accessible_result_ptr, const unsigned int value_count_, const ReducerType& final_reducer, bool final, unsigned int max_size) { @@ -102,24 +147,40 @@ std::enable_if_t> workgroup_reduction( template std::enable_if_t> workgroup_reduction( sycl::nd_item& item, sycl::local_accessor local_mem, - ValueType local_value, sycl::device_ptr results_ptr, + ValueType local_value, sycl_device_ptr results_ptr, sycl::global_ptr device_accessible_result_ptr, const ReducerType& final_reducer, bool final, unsigned int max_size) { const auto local_id = item.get_local_linear_id(); // Perform the actual workgroup reduction in each subgroup // separately. - auto sg = item.get_sub_group(); - const int id_in_sg = sg.get_local_id()[0]; - const auto local_range = - std::min(sg.get_local_range()[0], max_size); + auto sg = item.get_sub_group(); + const int id_in_sg = sg.get_local_id()[0]; + const int local_range = std::min(sg.get_local_range()[0], max_size); const auto upper_stride_bound = - std::min(local_range - id_in_sg, max_size - local_id); + std::min(local_range - id_in_sg, max_size - local_id); +#if defined(KOKKOS_ARCH_INTEL_GPU) || defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) + auto shuffle_combine = [&](int stride) { + if (stride < local_range) { + auto tmp = Kokkos::Impl::SYCLReduction::shift_group_left(sg, local_value, + stride); + if (stride < upper_stride_bound) final_reducer.join(&local_value, &tmp); + } + }; + shuffle_combine(1); + shuffle_combine(2); + shuffle_combine(4); + shuffle_combine(8); + shuffle_combine(16); + KOKKOS_ASSERT(local_range <= 32); +#else for (unsigned int stride = 1; stride < local_range; stride <<= 1) { - auto tmp = sg.shuffle_down(local_value, stride); + auto tmp = + Kokkos::Impl::SYCLReduction::shift_group_left(sg, local_value, stride); if (stride < upper_stride_bound) final_reducer.join(&local_value, &tmp); } +#endif // Copy the subgroup results into the first positions of the // reduction array. @@ -140,7 +201,7 @@ std::enable_if_t> workgroup_reduction( // the first subgroup, we first combine the items with a higher // index. if (n_active_subgroups > local_range) { - for (unsigned int offset = local_range; offset < n_active_subgroups; + for (int offset = local_range; offset < n_active_subgroups; offset += local_range) if (id_in_sg + offset < n_active_subgroups) { final_reducer.join(&sg_value, &local_mem[(id_in_sg + offset)]); @@ -149,11 +210,29 @@ std::enable_if_t> workgroup_reduction( } // Then, we proceed as before. +#if defined(KOKKOS_ARCH_INTEL_GPU) || defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) + auto shuffle_combine_sg = [&](int stride) { + if (stride < local_range) { + auto tmp = + Kokkos::Impl::SYCLReduction::shift_group_left(sg, sg_value, stride); + if (id_in_sg + stride < n_active_subgroups) + final_reducer.join(&sg_value, &tmp); + } + }; + shuffle_combine_sg(1); + shuffle_combine_sg(2); + shuffle_combine_sg(4); + shuffle_combine_sg(8); + shuffle_combine_sg(16); + KOKKOS_ASSERT(local_range <= 32); +#else for (unsigned int stride = 1; stride < local_range; stride <<= 1) { - auto tmp = sg.shuffle_down(sg_value, stride); + auto tmp = + Kokkos::Impl::SYCLReduction::shift_group_left(sg, sg_value, stride); if (id_in_sg + stride < n_active_subgroups) final_reducer.join(&sg_value, &tmp); } +#endif // Finally, we copy the workgroup results back to global memory // to be used in the next iteration. If this is the last diff --git a/packages/kokkos/core/src/Serial/Kokkos_Serial.cpp b/packages/kokkos/core/src/Serial/Kokkos_Serial.cpp index 39b201976b5a..44d797f1cccc 100644 --- a/packages/kokkos/core/src/Serial/Kokkos_Serial.cpp +++ b/packages/kokkos/core/src/Serial/Kokkos_Serial.cpp @@ -35,6 +35,9 @@ namespace Kokkos { namespace Impl { +std::vector SerialInternal::all_instances; +std::mutex SerialInternal::all_instances_mutex; + bool SerialInternal::is_initialized() { return m_is_initialized; } void SerialInternal::initialize() { @@ -43,6 +46,12 @@ void SerialInternal::initialize() { Impl::SharedAllocationRecord::tracking_enable(); m_is_initialized = true; + + // guard pushing to all_instances + { + std::scoped_lock lock(all_instances_mutex); + all_instances.push_back(this); + } } void SerialInternal::finalize() { @@ -59,6 +68,17 @@ void SerialInternal::finalize() { } m_is_initialized = false; + + // guard erasing from all_instances + { + std::scoped_lock lock(all_instances_mutex); + auto it = std::find(all_instances.begin(), all_instances.end(), this); + if (it == all_instances.end()) + Kokkos::abort( + "Execution space instance to be removed couldn't be found!"); + std::swap(*it, all_instances.back()); + all_instances.pop_back(); + } } SerialInternal& SerialInternal::singleton() { @@ -97,9 +117,12 @@ void SerialInternal::resize_thread_team_data(size_t pool_reduce_bytes, m_thread_team_data.disband_team(); m_thread_team_data.disband_pool(); - space.deallocate("Kokkos::Serial::scratch_mem", - m_thread_team_data.scratch_buffer(), - m_thread_team_data.scratch_bytes()); + // impl_deallocate doesn't fence which we try to avoid here since that + // interferes with the using the m_instance_mutex for ensuring proper + // kernel enqueuing + space.impl_deallocate("Kokkos::Serial::scratch_mem", + m_thread_team_data.scratch_buffer(), + m_thread_team_data.scratch_bytes()); } if (pool_reduce_bytes < old_pool_reduce) { @@ -119,13 +142,7 @@ void SerialInternal::resize_thread_team_data(size_t pool_reduce_bytes, HostThreadTeamData::scratch_size(pool_reduce_bytes, team_reduce_bytes, team_shared_bytes, thread_local_bytes); - void* ptr = nullptr; - try { - ptr = space.allocate("Kokkos::Serial::scratch_mem", alloc_bytes); - } catch (Kokkos::Experimental::RawMemoryAllocationFailure const& failure) { - // For now, just rethrow the error message the existing way - Kokkos::Impl::throw_runtime_exception(failure.get_error_message()); - } + void* ptr = space.allocate("Kokkos::Serial::scratch_mem", alloc_bytes); m_thread_team_data.scratch_assign(static_cast(ptr), alloc_bytes, pool_reduce_bytes, team_reduce_bytes, @@ -147,7 +164,9 @@ Serial::Serial(NewInstance) : m_space_instance(new Impl::SerialInternal, [](Impl::SerialInternal* ptr) { ptr->finalize(); delete ptr; - }) {} + }) { + m_space_instance->initialize(); +} void Serial::print_configuration(std::ostream& os, bool /*verbose*/) const { os << "Host Serial Execution Space:\n"; diff --git a/packages/kokkos/core/src/Serial/Kokkos_Serial.hpp b/packages/kokkos/core/src/Serial/Kokkos_Serial.hpp index 43eb4992ed73..81d43b31b35b 100644 --- a/packages/kokkos/core/src/Serial/Kokkos_Serial.hpp +++ b/packages/kokkos/core/src/Serial/Kokkos_Serial.hpp @@ -60,7 +60,10 @@ class SerialInternal { static SerialInternal& singleton(); - std::mutex m_thread_team_data_mutex; + std::mutex m_instance_mutex; + + static std::vector all_instances; + static std::mutex all_instances_mutex; // Resize thread team data scratch memory void resize_thread_team_data(size_t pool_reduce_bytes, @@ -113,7 +116,15 @@ class Serial { Serial(); - Serial(NewInstance); + explicit Serial(NewInstance); + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + template + KOKKOS_DEPRECATED_WITH_COMMENT( + "Serial execution space should be constructed explicitly.") + Serial(NewInstance) + : Serial(NewInstance{}) {} +#endif /// \brief True if and only if this method is being called in a /// thread-parallel function. @@ -137,7 +148,14 @@ class Serial { name, Kokkos::Tools::Experimental::SpecialSynchronizationCases:: GlobalDeviceSynchronization, - []() {}); // TODO: correct device ID + []() { + std::lock_guard lock_all_instances( + Impl::SerialInternal::all_instances_mutex); + for (auto* instance_ptr : Impl::SerialInternal::all_instances) { + std::lock_guard lock_instance( + instance_ptr->m_instance_mutex); + } + }); // TODO: correct device ID Kokkos::memory_fence(); } @@ -145,7 +163,10 @@ class Serial { "Kokkos::Serial::fence: Unnamed Instance Fence") const { Kokkos::Tools::Experimental::Impl::profile_fence_event( name, Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{1}, - []() {}); // TODO: correct device ID + [this]() { + auto* internal_instance = this->impl_internal_space_instance(); + std::lock_guard lock(internal_instance->m_instance_mutex); + }); // TODO: correct device ID Kokkos::memory_fence(); } diff --git a/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_MDRange.hpp b/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_MDRange.hpp index 67978aa3e9f7..34e115eca9b7 100644 --- a/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_MDRange.hpp +++ b/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_MDRange.hpp @@ -43,7 +43,14 @@ class ParallelFor, } public: - inline void execute() const { this->exec(); } + inline void execute() const { + // Make sure kernels are running sequentially even when using multiple + // threads + auto* internal_instance = + m_iter.m_rp.space().impl_internal_space_instance(); + std::lock_guard lock(internal_instance->m_instance_mutex); + this->exec(); + } template static int max_tile_size_product(const Policy&, const Functor&) { /** @@ -104,9 +111,11 @@ class ParallelReduce lock( - internal_instance->m_thread_team_data_mutex); + + // Make sure kernels are running sequentially even when using multiple + // threads, lock resize_thread_team_data + std::lock_guard instance_lock( + internal_instance->m_instance_mutex); internal_instance->resize_thread_team_data( pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); diff --git a/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Range.hpp b/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Range.hpp index 91b4c5671134..80faec9041d5 100644 --- a/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Range.hpp +++ b/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Range.hpp @@ -49,6 +49,10 @@ class ParallelFor, Kokkos::Serial> { public: inline void execute() const { + // Make sure kernels are running sequentially even when using multiple + // threads + auto* internal_instance = m_policy.space().impl_internal_space_instance(); + std::lock_guard lock(internal_instance->m_instance_mutex); this->template exec(); } @@ -103,9 +107,11 @@ class ParallelReduce, const size_t thread_local_size = 0; // Never shrinks auto* internal_instance = m_policy.space().impl_internal_space_instance(); - // Need to lock resize_thread_team_data - std::lock_guard lock( - internal_instance->m_thread_team_data_mutex); + + // Make sure kernels are running sequentially even when using multiple + // threads, lock resize_thread_team_data + std::lock_guard instance_lock( + internal_instance->m_instance_mutex); internal_instance->resize_thread_team_data( pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); @@ -187,10 +193,12 @@ class ParallelScan, const size_t team_shared_size = 0; // Never shrinks const size_t thread_local_size = 0; // Never shrinks - // Need to lock resize_thread_team_data auto* internal_instance = m_policy.space().impl_internal_space_instance(); - std::lock_guard lock( - internal_instance->m_thread_team_data_mutex); + // Make sure kernels are running sequentially even when using multiple + // threads, lock resize_thread_team_data + std::lock_guard instance_lock( + internal_instance->m_instance_mutex); + internal_instance->resize_thread_team_data( pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); @@ -253,10 +261,12 @@ class ParallelScanWithTotal, const size_t team_shared_size = 0; // Never shrinks const size_t thread_local_size = 0; // Never shrinks - // Need to lock resize_thread_team_data auto* internal_instance = m_policy.space().impl_internal_space_instance(); - std::lock_guard lock( - internal_instance->m_thread_team_data_mutex); + // Make sure kernels are running sequentially even when using multiple + // threads, lock resize_thread_team_data + std::lock_guard instance_lock( + internal_instance->m_instance_mutex); + internal_instance->resize_thread_team_data( pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); diff --git a/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp b/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp index a25b51496eff..a523cc86c97b 100644 --- a/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp +++ b/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp @@ -247,9 +247,11 @@ class ParallelFor, const size_t thread_local_size = 0; // Never shrinks auto* internal_instance = m_policy.space().impl_internal_space_instance(); - // Need to lock resize_thread_team_data - std::lock_guard lock( - internal_instance->m_thread_team_data_mutex); + // Make sure kernels are running sequentially even when using multiple + // threads, lock resize_thread_team_data + std::lock_guard instance_lock( + internal_instance->m_instance_mutex); + internal_instance->resize_thread_team_data( pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); @@ -319,9 +321,11 @@ class ParallelReduce lock( - internal_instance->m_thread_team_data_mutex); + // Make sure kernels are running sequentially even when using multiple + // threads, lock resize_thread_team_data + std::lock_guard instance_lock( + internal_instance->m_instance_mutex); + internal_instance->resize_thread_team_data( pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); diff --git a/packages/kokkos/core/src/Threads/Kokkos_Threads_Team.hpp b/packages/kokkos/core/src/Threads/Kokkos_Threads_Team.hpp index fd0f221365b5..a3501a437d29 100644 --- a/packages/kokkos/core/src/Threads/Kokkos_Threads_Team.hpp +++ b/packages/kokkos/core/src/Threads/Kokkos_Threads_Team.hpp @@ -188,8 +188,6 @@ class ThreadsExecTeamMember { using type = typename if_c::type; - if (m_instance == nullptr) return value; - if (team_rank() != team_size() - 1) * ((volatile type*)m_instance->scratch_memory()) = value; @@ -229,8 +227,6 @@ class ThreadsExecTeamMember { using type = typename if_c::type; - if (m_instance == nullptr) return; - type* const local_value = ((type*)m_instance->scratch_memory()); // Set this thread's contribution @@ -285,8 +281,6 @@ class ThreadsExecTeamMember { using type = typename if_c::type; - if (m_instance == nullptr) return type(0); - volatile type* const work_value = ((type*)m_instance->scratch_memory()); *work_value = value; @@ -358,6 +352,7 @@ class ThreadsExecTeamMember { m_chunk_size(team.chunk_size()), m_league_chunk_end(0), m_team_alloc(team.team_alloc()) { + KOKKOS_ASSERT(m_instance != nullptr); if (team.league_size()) { // Execution is using device-team interface: diff --git a/packages/kokkos/core/src/View/Kokkos_ViewAlloc.hpp b/packages/kokkos/core/src/View/Kokkos_ViewAlloc.hpp new file mode 100644 index 000000000000..95cb6f619cce --- /dev/null +++ b/packages/kokkos/core/src/View/Kokkos_ViewAlloc.hpp @@ -0,0 +1,318 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#endif + +#ifndef KOKKOS_VIEW_ALLOC_HPP +#define KOKKOS_VIEW_ALLOC_HPP + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace Kokkos::Impl { + +template +bool is_zero_byte(const T& x) { + constexpr std::byte all_zeroes[sizeof(T)] = {}; + return std::memcmp(&x, all_zeroes, sizeof(T)) == 0; +} + +//---------------------------------------------------------------------------- + +/* + * The construction, assignment to default, and destruction + * are merged into a single functor. + * Primarily to work around an unresolved CUDA back-end bug + * that would lose the destruction cuda device function when + * called from the shared memory tracking destruction. + * Secondarily to have two fewer partial specializations. + */ +template ::value> +struct ViewValueFunctor; + +template +struct ViewValueFunctor { + using ExecSpace = typename DeviceType::execution_space; + + struct DestroyTag {}; + struct ConstructTag {}; + + ExecSpace space; + ValueType* ptr; + size_t n; + std::string name; + bool default_exec_space; + + template + KOKKOS_INLINE_FUNCTION + std::enable_if_t::value> + operator()(ConstructTag const&, const size_t i) const { + new (ptr + i) ValueType(); + } + + KOKKOS_INLINE_FUNCTION void operator()(DestroyTag const&, + const size_t i) const { + (ptr + i)->~ValueType(); + } + + ViewValueFunctor() = default; + ViewValueFunctor(const ViewValueFunctor&) = default; + ViewValueFunctor& operator=(const ViewValueFunctor&) = default; + + ViewValueFunctor(ExecSpace const& arg_space, ValueType* const arg_ptr, + size_t const arg_n, std::string arg_name) + : space(arg_space), + ptr(arg_ptr), + n(arg_n), + name(std::move(arg_name)), + default_exec_space(false) { + functor_instantiate_workaround(); + } + + ViewValueFunctor(ValueType* const arg_ptr, size_t const arg_n, + std::string arg_name) + : space(ExecSpace{}), + ptr(arg_ptr), + n(arg_n), + name(std::move(arg_name)), + default_exec_space(true) { + functor_instantiate_workaround(); + } + + template + std::enable_if_t::value && + std::is_trivially_copy_assignable::value> + construct_dispatch() { + ValueType value{}; +// On A64FX memset seems to do the wrong thing with regards to first touch +// leading to the significant performance issues +#ifndef KOKKOS_ARCH_A64FX + if (Impl::is_zero_byte(value)) { + uint64_t kpID = 0; + if (Kokkos::Profiling::profileLibraryLoaded()) { + // We are not really using parallel_for here but using beginParallelFor + // instead of begin_parallel_for (and adding "via memset") is the best + // we can do to indicate that this is not supposed to be tunable (and + // doesn't really execute a parallel_for). + Kokkos::Profiling::beginParallelFor( + "Kokkos::View::initialization [" + name + "] via memset", + Kokkos::Profiling::Experimental::device_id(space), &kpID); + } + (void)ZeroMemset( + space, Kokkos::View>(ptr, n)); + + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } + if (default_exec_space) + space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); + } else { +#endif + parallel_for_implementation(); +#ifndef KOKKOS_ARCH_A64FX + } +#endif + } + + template + std::enable_if_t::value && + std::is_trivially_copy_assignable::value)> + construct_dispatch() { + parallel_for_implementation(); + } + + template + void parallel_for_implementation() { + using PolicyType = + Kokkos::RangePolicy, Tag>; + PolicyType policy(space, 0, n); + uint64_t kpID = 0; + if (Kokkos::Profiling::profileLibraryLoaded()) { + const std::string functor_name = + (std::is_same_v + ? "Kokkos::View::destruction [" + name + "]" + : "Kokkos::View::initialization [" + name + "]"); + Kokkos::Profiling::beginParallelFor( + functor_name, Kokkos::Profiling::Experimental::device_id(space), + &kpID); + } + +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same::value) { + Kokkos::Impl::cuda_prefetch_pointer(space, ptr, sizeof(ValueType) * n, + true); + } +#endif + const Kokkos::Impl::ParallelFor closure( + *this, policy); + closure.execute(); + if (default_exec_space || std::is_same_v) + space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } + } + + void construct_shared_allocation() { construct_dispatch(); } + + void destroy_shared_allocation() { +#ifdef KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND + if constexpr (std::is_same_v) + for (size_t i = 0; i < n; ++i) (ptr + i)->~ValueType(); + else +#endif + { + parallel_for_implementation(); + } + } + + // This function is to ensure that the functor with DestroyTag is instantiated + // This is a workaround to avoid "cudaErrorInvalidDeviceFunction" error later + // when the function is queried with cudaFuncGetAttributes + void functor_instantiate_workaround() { +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \ + defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET) + if (false) { + parallel_for_implementation(); + } +#endif + } +}; + +template +struct ViewValueFunctor { + using ExecSpace = typename DeviceType::execution_space; + using PolicyType = Kokkos::RangePolicy>; + + ExecSpace space; + ValueType* ptr; + size_t n; + std::string name; + bool default_exec_space; + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { ptr[i] = ValueType(); } + + ViewValueFunctor() = default; + ViewValueFunctor(const ViewValueFunctor&) = default; + ViewValueFunctor& operator=(const ViewValueFunctor&) = default; + + ViewValueFunctor(ExecSpace const& arg_space, ValueType* const arg_ptr, + size_t const arg_n, std::string arg_name) + : space(arg_space), + ptr(arg_ptr), + n(arg_n), + name(std::move(arg_name)), + default_exec_space(false) {} + + ViewValueFunctor(ValueType* const arg_ptr, size_t const arg_n, + std::string arg_name) + : space(ExecSpace{}), + ptr(arg_ptr), + n(arg_n), + name(std::move(arg_name)), + default_exec_space(true) {} + + template + std::enable_if_t::value && + std::is_trivially_copy_assignable::value> + construct_shared_allocation() { + // Shortcut for zero initialization +// On A64FX memset seems to do the wrong thing with regards to first touch +// leading to the significant performance issues +#ifndef KOKKOS_ARCH_A64FX + ValueType value{}; + if (Impl::is_zero_byte(value)) { + uint64_t kpID = 0; + if (Kokkos::Profiling::profileLibraryLoaded()) { + // We are not really using parallel_for here but using beginParallelFor + // instead of begin_parallel_for (and adding "via memset") is the best + // we can do to indicate that this is not supposed to be tunable (and + // doesn't really execute a parallel_for). + Kokkos::Profiling::beginParallelFor( + "Kokkos::View::initialization [" + name + "] via memset", + Kokkos::Profiling::Experimental::device_id(space), &kpID); + } + + (void)ZeroMemset( + space, Kokkos::View>(ptr, n)); + + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } + if (default_exec_space) + space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); + } else { +#endif + parallel_for_implementation(); +#ifndef KOKKOS_ARCH_A64FX + } +#endif + } + + template + std::enable_if_t::value && + std::is_trivially_copy_assignable::value)> + construct_shared_allocation() { + parallel_for_implementation(); + } + + void parallel_for_implementation() { + PolicyType policy(space, 0, n); + uint64_t kpID = 0; + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::beginParallelFor( + "Kokkos::View::initialization [" + name + "]", + Kokkos::Profiling::Experimental::device_id(space), &kpID); + } +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same::value) { + Kokkos::Impl::cuda_prefetch_pointer(space, ptr, sizeof(ValueType) * n, + true); + } +#endif + const Kokkos::Impl::ParallelFor closure( + *this, policy); + closure.execute(); + if (default_exec_space) + space.fence( + "Kokkos::Impl::ViewValueFunctor: Fence after setting values in " + "view"); + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } + } + + void destroy_shared_allocation() {} +}; +} // namespace Kokkos::Impl + +#endif // KOKKOS_VIEW_ALLOC_HPP diff --git a/packages/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Accessor.hpp b/packages/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Accessor.hpp new file mode 100644 index 000000000000..8814cc015ef5 --- /dev/null +++ b/packages/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Accessor.hpp @@ -0,0 +1,220 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#endif + +#ifndef KOKKOS_MDSPAN_ACCESSOR_HPP +#define KOKKOS_MDSPAN_ACCESSOR_HPP + +#include +#include +#include +#include + +namespace Kokkos { + +// For now use the accessors in Impl namespace, as an +// implementation detail for rebasing View on mdspan +namespace Impl { + +template +struct SpaceAwareAccessor { + // Part of Accessor Requirements + using element_type = typename NestedAccessor::element_type; + using reference = typename NestedAccessor::reference; + using data_handle_type = typename NestedAccessor::data_handle_type; + using offset_policy = + SpaceAwareAccessor; + + // Specific to SpaceAwareAccessor + using memory_space = MemorySpace; + using nested_accessor_type = NestedAccessor; + + static_assert(is_memory_space_v); + + KOKKOS_DEFAULTED_FUNCTION + constexpr SpaceAwareAccessor() = default; + + template < + class OtherMemorySpace, class OtherNestedAccessorType, + std::enable_if_t< + MemorySpaceAccess::assignable && + std::is_constructible_v, + int> = 0> + KOKKOS_FUNCTION constexpr SpaceAwareAccessor( + const SpaceAwareAccessor& + other) noexcept + : nested_acc(other.nested_acc) {} + + KOKKOS_FUNCTION + SpaceAwareAccessor(const NestedAccessor& acc) : nested_acc(acc) {} + + KOKKOS_FUNCTION + explicit operator NestedAccessor() const { return nested_acc; } + + KOKKOS_FUNCTION + constexpr reference access(data_handle_type p, size_t i) const noexcept { + Kokkos::Impl::runtime_check_memory_access_violation( + "Kokkos::SpaceAwareAccessor ERROR: attempt to access inaccessible " + "memory space"); + return nested_acc.access(p, i); + } + + KOKKOS_FUNCTION + constexpr typename offset_policy::data_handle_type offset(data_handle_type p, + size_t i) const + noexcept { + return nested_acc.offset(p, i); + } + + // Canonical way for accessing nested accessor see ISO C++ + // [linalg.scaled.scaledaccessor] + KOKKOS_FUNCTION + constexpr const NestedAccessor& nested_accessor() const noexcept { + return nested_acc; + } + + private: +// We either compile with our custom mdspan impl +// in which case we discover inside it whether no_unique_address +// works, or we use C++23 in which case it better be available +#ifdef _MDSPAN_NO_UNIQUE_ADDRESS + _MDSPAN_NO_UNIQUE_ADDRESS +#else + [[no_unique_address]] +#endif + NestedAccessor nested_acc; + template + friend struct SpaceAwareAccessor; +}; + +template +struct SpaceAwareAccessor { + // Part of Accessor Requirements + using element_type = typename NestedAccessor::element_type; + using reference = typename NestedAccessor::reference; + using data_handle_type = typename NestedAccessor::data_handle_type; + + using offset_policy = + SpaceAwareAccessor; + + // Specific to SpaceAwareAccessor + using memory_space = AnonymousSpace; + using nested_accessor_type = NestedAccessor; + + KOKKOS_DEFAULTED_FUNCTION + constexpr SpaceAwareAccessor() = default; + + template , + int> = 0> + KOKKOS_FUNCTION constexpr SpaceAwareAccessor( + const SpaceAwareAccessor& + other) noexcept + : nested_acc(other.nested_acc) {} + + KOKKOS_FUNCTION + SpaceAwareAccessor(const NestedAccessor& acc) : nested_acc(acc) {} + + KOKKOS_FUNCTION + explicit operator NestedAccessor() const { return nested_acc; } + + KOKKOS_FUNCTION + constexpr reference access(data_handle_type p, size_t i) const noexcept { + return nested_acc.access(p, i); + } + + KOKKOS_FUNCTION + constexpr typename offset_policy::data_handle_type offset(data_handle_type p, + size_t i) const + noexcept { + return nested_acc.offset(p, i); + } + + // Canonical way for accessing nested accessor see ISO C++ + // [linalg.scaled.scaledaccessor] + KOKKOS_FUNCTION + constexpr const NestedAccessor& nested_accessor() const noexcept { + return nested_acc; + } + + private: +// We either compile with our custom mdspan impl +// in which case we discover inside it whether no_unique_address +// works, or we use C++23 in which case it better be available +#ifdef _MDSPAN_NO_UNIQUE_ADDRESS + _MDSPAN_NO_UNIQUE_ADDRESS +#else + [[no_unique_address]] +#endif + NestedAccessor nested_acc; + template + friend struct SpaceAwareAccessor; +}; + +// Like atomic_accessor_relaxed proposed for ISO C++26 but with +// defaulted memory scope - similar to how desul's AtomicRef has a memory scope +template +struct AtomicAccessorRelaxed { + using element_type = ElementType; + using reference = + desul::AtomicRef; + using data_handle_type = ElementType*; + using offset_policy = AtomicAccessorRelaxed; + + KOKKOS_DEFAULTED_FUNCTION + AtomicAccessorRelaxed() = default; + + // Conversions from non-const to const element type + template >* = nullptr> + KOKKOS_FUNCTION constexpr AtomicAccessorRelaxed( + Kokkos::default_accessor) noexcept {} + + template >* = nullptr> + KOKKOS_FUNCTION constexpr AtomicAccessorRelaxed( + AtomicAccessorRelaxed) noexcept {} + + template >* = nullptr> + KOKKOS_FUNCTION explicit operator default_accessor() const { + return default_accessor{}; + } + + KOKKOS_FUNCTION + reference access(data_handle_type p, size_t i) const noexcept { + return reference(p[i]); + } + + KOKKOS_FUNCTION + data_handle_type offset(data_handle_type p, size_t i) const noexcept { + return p + i; + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Extents.hpp b/packages/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Extents.hpp index 3846b52d2396..29d1e00adfc2 100644 --- a/packages/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Extents.hpp +++ b/packages/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Extents.hpp @@ -37,9 +37,6 @@ struct ViewDimension; template struct ViewDataType; -} // namespace Kokkos::Impl - -namespace Kokkos::Experimental::Impl { // A few things to note -- // - mdspan allows for 0-rank extents similarly to View, so we don't need @@ -106,6 +103,20 @@ struct DataTypeFromExtents { // Will cause a compile error if it is malformed (i.e. dynamic after static) using type = typename ::Kokkos::Impl::ViewDataType::type; }; -} // namespace Kokkos::Experimental::Impl + +template +constexpr KOKKOS_INLINE_FUNCTION auto extents_from_view_mapping_impl( + const VM &view_mapping, std::index_sequence) { + return Extents{view_mapping.extent(Indices)...}; +} + +template +constexpr KOKKOS_INLINE_FUNCTION auto extents_from_view_mapping( + const VM &view_mapping) { + static_assert(Extents::rank() == VM::Rank); + return extents_from_view_mapping_impl( + view_mapping, std::make_index_sequence{}); +} +} // namespace Kokkos::Impl #endif // KOKKOS_EXPERIMENTAL_MDSPAN_EXTENTS_HPP diff --git a/packages/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Layout.hpp b/packages/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Layout.hpp new file mode 100644 index 000000000000..089628137d75 --- /dev/null +++ b/packages/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Layout.hpp @@ -0,0 +1,156 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#endif + +#ifndef KOKKOS_EXPERIMENTAL_MDSPAN_LAYOUT_HPP +#define KOKKOS_EXPERIMENTAL_MDSPAN_LAYOUT_HPP + +#include "Kokkos_MDSpan_Extents.hpp" +#include + +namespace Kokkos::Impl { + +template +struct LayoutFromArrayLayout; + +template <> +struct LayoutFromArrayLayout { + using type = Kokkos::Experimental::layout_left_padded; +}; + +template <> +struct LayoutFromArrayLayout { + using type = Kokkos::Experimental::layout_right_padded; +}; + +template <> +struct LayoutFromArrayLayout { + using type = layout_stride; +}; + +template +KOKKOS_INLINE_FUNCTION auto array_layout_from_mapping( + const typename MDSpanType::mapping_type &mapping) { + using mapping_type = typename MDSpanType::mapping_type; + using extents_type = typename mapping_type::extents_type; + + constexpr auto rank = extents_type::rank(); + const auto &ext = mapping.extents(); + + static_assert(rank <= ARRAY_LAYOUT_MAX_RANK, + "Unsupported rank for mdspan (must be <= 8)"); + + if constexpr (std::is_same_v) { + return Kokkos::LayoutStride{ + rank > 0 ? ext.extent(0) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 0 ? mapping.stride(0) : 0, + rank > 1 ? ext.extent(1) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 1 ? mapping.stride(1) : 0, + rank > 2 ? ext.extent(2) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 2 ? mapping.stride(2) : 0, + rank > 3 ? ext.extent(3) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 3 ? mapping.stride(3) : 0, + rank > 4 ? ext.extent(4) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 4 ? mapping.stride(4) : 0, + rank > 5 ? ext.extent(5) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 5 ? mapping.stride(5) : 0, + rank > 6 ? ext.extent(6) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 6 ? mapping.stride(6) : 0, + rank > 7 ? ext.extent(7) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 7 ? mapping.stride(7) : 0, + }; + } else { + // FIXME: Kokkos Layouts don't store stride (it's in the mapping) + // We could conceivably fix this by adding an extra ViewCtorProp for + // an abritrary padding. For now we will check for this. + if constexpr (rank > 1 && + (std::is_same_v> || + std::is_same_v>)) { + [[maybe_unused]] constexpr size_t strided_index = + std::is_same_v< + typename mapping_type::layout_type, + Kokkos::Experimental::layout_left_padded> + ? 1 + : rank - 2; + [[maybe_unused]] constexpr size_t extent_index = + std::is_same_v< + typename mapping_type::layout_type, + Kokkos::Experimental::layout_left_padded> + ? 0 + : rank - 1; + KOKKOS_ASSERT(mapping.stride(strided_index) == ext.extent(extent_index)); + } + + return ArrayLayout{rank > 0 ? ext.extent(0) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 1 ? ext.extent(1) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 2 ? ext.extent(2) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 3 ? ext.extent(3) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 4 ? ext.extent(4) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 5 ? ext.extent(5) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 6 ? ext.extent(6) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 7 ? ext.extent(7) : KOKKOS_IMPL_CTOR_DEFAULT_ARG}; + } +#ifdef KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif +} + +template +KOKKOS_INLINE_FUNCTION auto mapping_from_view_mapping(const VM &view_mapping) { + using mapping_type = typename MDSpanType::mapping_type; + using extents_type = typename mapping_type::extents_type; + + // std::span is not available in C++17 (our current requirements), + // so we need to use the std::array constructor for layout mappings. + // FIXME When C++20 is available, we can use std::span here instead + std::size_t strides[VM::Rank]; + view_mapping.stride_fill(&strides[0]); + if constexpr (std::is_same_v) { + return mapping_type(Kokkos::mdspan_non_standard, + extents_from_view_mapping(view_mapping), + strides); + } else if constexpr (VM::Rank > 1 && + std::is_same_v>) { + return mapping_type(extents_from_view_mapping(view_mapping), + strides[1]); + } else if constexpr (VM::Rank > 1 && + std::is_same_v>) { + return mapping_type(extents_from_view_mapping(view_mapping), + strides[VM::Rank - 2]); + } else { + return mapping_type(extents_from_view_mapping(view_mapping)); + } +#ifdef KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif +} + +} // namespace Kokkos::Impl + +#endif // KOKKOS_EXPERIMENTAL_MDSPAN_LAYOUT_HPP diff --git a/packages/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp b/packages/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp index bd12c5c6a99f..d13c90825c5a 100644 --- a/packages/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp +++ b/packages/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp @@ -19,6 +19,9 @@ #if defined(KOKKOS_ENABLE_SYCL) #include +#ifdef SYCL_EXT_ONEAPI_GRAPH +#include +#endif #include #include #include diff --git a/packages/kokkos/core/src/impl/Kokkos_Core.cpp b/packages/kokkos/core/src/impl/Kokkos_Core.cpp index 4a696526161e..6f862718bcb0 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Core.cpp @@ -91,6 +91,7 @@ void combine(Kokkos::InitializationSettings& out, KOKKOS_IMPL_COMBINE_SETTING(map_device_id_by); KOKKOS_IMPL_COMBINE_SETTING(device_id); KOKKOS_IMPL_COMBINE_SETTING(disable_warnings); + KOKKOS_IMPL_COMBINE_SETTING(print_configuration); KOKKOS_IMPL_COMBINE_SETTING(tune_internals); KOKKOS_IMPL_COMBINE_SETTING(tools_help); KOKKOS_IMPL_COMBINE_SETTING(tools_libs); @@ -610,6 +611,7 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) { #else declare_configuration_metadata("options", "KOKKOS_ENABLE_LIBDL", "no"); #endif + declare_configuration_metadata("architecture", "Default Device", typeid(Kokkos::DefaultExecutionSpace).name()); @@ -785,34 +787,18 @@ void initialize_internal(const Kokkos::InitializationSettings& settings) { post_initialize_internal(settings); } -void pre_finalize_internal() { - typename decltype(finalize_hooks)::size_type numSuccessfulCalls = 0; +// declared noexcept such that std::terminate is called if any of the registered +// function throws +void call_registered_finalize_hook_functions() noexcept { while (!finalize_hooks.empty()) { - auto f = finalize_hooks.top(); - try { - f(); - } catch (...) { - std::cerr << "Kokkos::finalize: A finalize hook (set via " - "Kokkos::push_finalize_hook) threw an exception that it did " - "not catch." - " Per std::atexit rules, this results in std::terminate. " - "This is " - "finalize hook number " - << numSuccessfulCalls - << " (1-based indexing) " - "out of " - << finalize_hooks.size() - << " to call. Remember that " - "Kokkos::finalize calls finalize hooks in reverse order " - "from how they " - "were pushed." - << std::endl; - std::terminate(); - } + auto const& func = finalize_hooks.top(); + func(); finalize_hooks.pop(); - ++numSuccessfulCalls; } +} +void pre_finalize_internal() { + call_registered_finalize_hook_functions(); Kokkos::Profiling::finalize(); } diff --git a/packages/kokkos/core/src/impl/Kokkos_Default_Graph_Impl.hpp b/packages/kokkos/core/src/impl/Kokkos_Default_Graph_Impl.hpp index 3693dff3d465..05d485491932 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Default_Graph_Impl.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Default_Graph_Impl.hpp @@ -56,7 +56,7 @@ struct GraphImpl : private ExecutionSpaceInstanceStorage { //---------------------------------------------------------------------------- // {{{2 - // Not moveable or copyable; it spends its whole live as a shared_ptr in the + // Not movable or copyable; it spends its whole live as a shared_ptr in the // Graph object GraphImpl() = default; GraphImpl(GraphImpl const&) = delete; @@ -82,10 +82,7 @@ struct GraphImpl : private ExecutionSpaceInstanceStorage { template // requires NodeImplPtr is a shared_ptr to specialization of GraphNodeImpl void add_node(std::shared_ptr const& arg_node_ptr) { - static_assert( - NodeImpl::kernel_type::Policy::is_graph_kernel::value, - "Something has gone horribly wrong, but it's too complicated to " - "explain here. Buy Daisy a coffee and she'll explain it to you."); + static_assert(NodeImpl::kernel_type::Policy::is_graph_kernel::value); // Since this is always called before any calls to add_predecessor involving // it, we can treat this node as a sink until we discover otherwise. arg_node_ptr->node_details_t::set_kernel(arg_node_ptr->get_kernel()); diff --git a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Config.hpp b/packages/kokkos/core/src/impl/Kokkos_DesulAtomicsConfig.hpp similarity index 72% rename from packages/kokkos/core/src/Kokkos_Atomics_Desul_Config.hpp rename to packages/kokkos/core/src/impl/Kokkos_DesulAtomicsConfig.hpp index 4cf170f5f131..02ab127d5c5d 100644 --- a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Config.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_DesulAtomicsConfig.hpp @@ -13,15 +13,9 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER -#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE -#include -static_assert(false, - "Including non-public Kokkos header files is not allowed."); -#endif -#ifndef KOKKOS_ATOMICS_DESUL_CONFIG_HPP -#define KOKKOS_ATOMICS_DESUL_CONFIG_HPP -#include +#ifndef KOKKOS_DESUL_ATOMICS_CONFIG_HPP +#define KOKKOS_DESUL_ATOMICS_CONFIG_HPP #if defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL) #define DESUL_CUDA_ARCH_IS_PRE_PASCAL @@ -32,4 +26,4 @@ static_assert(false, #define DESUL_CUDA_ARCH_IS_PRE_VOLTA #endif -#endif // KOKKOS_ATOMICS_DESUL_CONFIG_HPP +#endif diff --git a/packages/kokkos/core/src/impl/Kokkos_Error.cpp b/packages/kokkos/core/src/impl/Kokkos_Error.cpp index de6e83ed1f28..0dcd5d523d3c 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Error.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Error.cpp @@ -18,133 +18,54 @@ #define KOKKOS_IMPL_PUBLIC_INCLUDE #endif -#include -#include - #include -#include #include +#include #include #include // show_warnings #include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -namespace Kokkos { -namespace Impl { - -void throw_runtime_exception(const std::string &msg) { +void Kokkos::Impl::throw_runtime_exception(const std::string &msg) { throw std::runtime_error(msg); } -void log_warning(const std::string &msg) { +void Kokkos::Impl::throw_bad_alloc(std::string_view memory_space_name, + std::size_t size, std::string_view label) { + std::stringstream ss; + ss << "Kokkos ERROR: " << memory_space_name + << " memory space failed to allocate " << human_memory_size(size) + << " (label=\"" << label << "\")."; + throw std::runtime_error(ss.str()); +} + +void Kokkos::Impl::log_warning(const std::string &msg) { if (show_warnings()) { std::cerr << msg << std::flush; } } -std::string human_memory_size(size_t arg_bytes) { +std::string Kokkos::Impl::human_memory_size(size_t arg_bytes) { double bytes = arg_bytes; const double K = 1024; const double M = K * 1024; const double G = M * 1024; + const double T = G * 1024; std::ostringstream out; if (bytes < K) { out << std::setprecision(4) << bytes << " B"; } else if (bytes < M) { bytes /= K; - out << std::setprecision(4) << bytes << " K"; + out << std::setprecision(4) << bytes << " KiB"; } else if (bytes < G) { bytes /= M; - out << std::setprecision(4) << bytes << " M"; - } else { + out << std::setprecision(4) << bytes << " MiB"; + } else if (bytes < T) { bytes /= G; - out << std::setprecision(4) << bytes << " G"; - } - return out.str(); -} - -} // namespace Impl - -void Experimental::RawMemoryAllocationFailure::print_error_message( - std::ostream &o) const { - o << "Allocation of size " - << ::Kokkos::Impl::human_memory_size(m_attempted_size); - o << " failed"; - switch (m_failure_mode) { - case FailureMode::OutOfMemoryError: - o << ", likely due to insufficient memory."; - break; - case FailureMode::AllocationNotAligned: - o << " because the allocation was improperly aligned."; - break; - case FailureMode::InvalidAllocationSize: - o << " because the requested allocation size is not a valid size for the" - " requested allocation mechanism (it's probably too large)."; - break; - // TODO move this to the subclass for Cuda-related things - case FailureMode::MaximumCudaUVMAllocationsExceeded: - o << " because the maximum Cuda UVM allocations was exceeded."; - break; - case FailureMode::Unknown: o << " because of an unknown error."; break; - } - o << " (The allocation mechanism was "; - switch (m_mechanism) { - case AllocationMechanism::StdMalloc: o << "standard malloc()."; break; - case AllocationMechanism::CudaMalloc: o << "cudaMalloc()."; break; - case AllocationMechanism::CudaMallocManaged: - o << "cudaMallocManaged()."; - break; - case AllocationMechanism::CudaHostAlloc: o << "cudaHostAlloc()."; break; - case AllocationMechanism::HIPMalloc: o << "hipMalloc()."; break; - case AllocationMechanism::HIPHostMalloc: o << "hipHostMalloc()."; break; - case AllocationMechanism::HIPMallocManaged: - o << "hipMallocManaged()."; - break; - case AllocationMechanism::SYCLMallocDevice: - o << "sycl::malloc_device()."; - break; - case AllocationMechanism::SYCLMallocShared: - o << "sycl::malloc_shared()."; - break; - case AllocationMechanism::SYCLMallocHost: - o << "sycl::malloc_host()."; - break; - default: o << "unsupported."; + out << std::setprecision(4) << bytes << " GiB"; + } else { + bytes /= T; + out << std::setprecision(4) << bytes << " TiB"; } - append_additional_error_information(o); - o << ")" << std::endl; -} - -std::string Experimental::RawMemoryAllocationFailure::get_error_message() - const { - std::ostringstream out; - print_error_message(out); return out.str(); } - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -#ifdef KOKKOS_ENABLE_CUDA -namespace Experimental { - -void CudaRawMemoryAllocationFailure::append_additional_error_information( - std::ostream &o) const { - if (m_error_code != cudaSuccess) { - o << " The Cuda allocation returned the error code \"" - << cudaGetErrorName(m_error_code) << "\"."; - } -} - -} // end namespace Experimental -#endif - -} // namespace Kokkos diff --git a/packages/kokkos/core/src/impl/Kokkos_Error.hpp b/packages/kokkos/core/src/impl/Kokkos_Error.hpp index 1058fd98dbf7..9a80c7b31b82 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Error.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Error.hpp @@ -18,116 +18,19 @@ #define KOKKOS_IMPL_ERROR_HPP #include -#include #include #include #include -namespace Kokkos { -namespace Impl { +namespace Kokkos::Impl { [[noreturn]] void throw_runtime_exception(const std::string &msg); - +[[noreturn]] void throw_bad_alloc(std::string_view memory_space_name, + std::size_t size, std::string_view label); void log_warning(const std::string &msg); -std::string human_memory_size(size_t arg_bytes); - -} // namespace Impl +std::string human_memory_size(size_t bytes); -namespace Experimental { +} // namespace Kokkos::Impl -class RawMemoryAllocationFailure : public std::bad_alloc { - public: - enum class FailureMode { - OutOfMemoryError, - AllocationNotAligned, - InvalidAllocationSize, - MaximumCudaUVMAllocationsExceeded, - Unknown - }; - enum class AllocationMechanism { - StdMalloc, -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 - PosixMemAlign KOKKOS_DEPRECATED, - PosixMMap KOKKOS_DEPRECATED, - IntelMMAlloc KOKKOS_DEPRECATED, #endif - CudaMalloc, - CudaMallocManaged, - CudaHostAlloc, - HIPMalloc, - HIPHostMalloc, - HIPMallocManaged, - SYCLMallocDevice, - SYCLMallocShared, - SYCLMallocHost, - OpenACCMalloc, - }; - - private: - size_t m_attempted_size; - size_t m_attempted_alignment; - FailureMode m_failure_mode; - AllocationMechanism m_mechanism; - - public: - RawMemoryAllocationFailure( - size_t arg_attempted_size, size_t arg_attempted_alignment, - FailureMode arg_failure_mode = FailureMode::OutOfMemoryError, - AllocationMechanism arg_mechanism = - AllocationMechanism::StdMalloc) noexcept - : m_attempted_size(arg_attempted_size), - m_attempted_alignment(arg_attempted_alignment), - m_failure_mode(arg_failure_mode), - m_mechanism(arg_mechanism) {} - - RawMemoryAllocationFailure() noexcept = delete; - - RawMemoryAllocationFailure(RawMemoryAllocationFailure const &) noexcept = - default; - RawMemoryAllocationFailure(RawMemoryAllocationFailure &&) noexcept = default; - - RawMemoryAllocationFailure &operator =( - RawMemoryAllocationFailure const &) noexcept = default; - RawMemoryAllocationFailure &operator =( - RawMemoryAllocationFailure &&) noexcept = default; - - ~RawMemoryAllocationFailure() noexcept override = default; - - [[nodiscard]] const char *what() const noexcept override { - if (m_failure_mode == FailureMode::OutOfMemoryError) { - return "Memory allocation error: out of memory"; - } else if (m_failure_mode == FailureMode::AllocationNotAligned) { - return "Memory allocation error: allocation result was under-aligned"; - } - - return nullptr; // unreachable - } - - [[nodiscard]] size_t attempted_size() const noexcept { - return m_attempted_size; - } - - [[nodiscard]] size_t attempted_alignment() const noexcept { - return m_attempted_alignment; - } - - [[nodiscard]] AllocationMechanism allocation_mechanism() const noexcept { - return m_mechanism; - } - - [[nodiscard]] FailureMode failure_mode() const noexcept { - return m_failure_mode; - } - - void print_error_message(std::ostream &o) const; - [[nodiscard]] std::string get_error_message() const; - - virtual void append_additional_error_information(std::ostream &) const {} -}; - -} // end namespace Experimental - -} // namespace Kokkos - -#endif /* #ifndef KOKKOS_IMPL_ERROR_HPP */ diff --git a/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp index 1047b773d774..1c1fb67ff046 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp @@ -79,22 +79,9 @@ void *HostSpace::impl_allocate( ptr = operator new (arg_alloc_size, std::align_val_t(alignment), std::nothrow_t{}); - if ((ptr == nullptr) || (reinterpret_cast(ptr) == ~uintptr_t(0)) || + if (!ptr || (reinterpret_cast(ptr) == ~uintptr_t(0)) || (reinterpret_cast(ptr) & alignment_mask)) { - Experimental::RawMemoryAllocationFailure::FailureMode failure_mode = - Experimental::RawMemoryAllocationFailure::FailureMode:: - AllocationNotAligned; - if (ptr == nullptr) { - failure_mode = Experimental::RawMemoryAllocationFailure::FailureMode:: - OutOfMemoryError; - } - - Experimental::RawMemoryAllocationFailure::AllocationMechanism alloc_mec = - Experimental::RawMemoryAllocationFailure::AllocationMechanism:: - StdMalloc; - - throw Kokkos::Experimental::RawMemoryAllocationFailure( - arg_alloc_size, alignment, failure_mode, alloc_mec); + Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); } if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::allocateData(arg_handle, arg_label, ptr, reported_size); @@ -109,9 +96,8 @@ void HostSpace::deallocate(void *const arg_alloc_ptr, void HostSpace::deallocate(const char *arg_label, void *const arg_alloc_ptr, const size_t arg_alloc_size, - const size_t - - arg_logical_size) const { + const size_t arg_logical_size) const { + if (arg_alloc_ptr) Kokkos::fence("HostSpace::impl_deallocate before free"); impl_deallocate(arg_label, arg_alloc_ptr, arg_alloc_size, arg_logical_size); } void HostSpace::impl_deallocate( @@ -119,7 +105,6 @@ void HostSpace::impl_deallocate( const size_t arg_alloc_size, const size_t arg_logical_size, const Kokkos::Tools::SpaceHandle arg_handle) const { if (arg_alloc_ptr) { - Kokkos::fence("HostSpace::impl_deallocate before free"); size_t reported_size = (arg_logical_size > 0) ? arg_logical_size : arg_alloc_size; if (Kokkos::Profiling::profileLibraryLoaded()) { diff --git a/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp b/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp index 25f09b828655..3dc68a187bef 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp @@ -106,7 +106,11 @@ class HostThreadTeamData { public: inline bool team_rendezvous() const noexcept { - int* ptr = reinterpret_cast(m_team_scratch + m_team_rendezvous); + // FIXME_OPENMP The tasking framework creates an instance with + // m_team_scratch == nullptr and m_team_rendezvous != 0: + int* ptr = m_team_scratch == nullptr + ? nullptr + : reinterpret_cast(m_team_scratch + m_team_rendezvous); HostBarrier::split_arrive(ptr, m_team_size, m_team_rendezvous_step); if (m_team_rank != 0) { HostBarrier::wait(ptr, m_team_size, m_team_rendezvous_step); @@ -130,9 +134,13 @@ class HostThreadTeamData { } inline void team_rendezvous_release() const noexcept { + // FIXME_OPENMP The tasking framework creates an instance with + // m_team_scratch == nullptr and m_team_rendezvous != 0: HostBarrier::split_release( - reinterpret_cast(m_team_scratch + m_team_rendezvous), m_team_size, - m_team_rendezvous_step); + (m_team_scratch == nullptr) + ? nullptr + : reinterpret_cast(m_team_scratch + m_team_rendezvous), + m_team_size, m_team_rendezvous_step); } inline int pool_rendezvous() const noexcept { @@ -271,6 +279,9 @@ class HostThreadTeamData { } int64_t* team_shared() const noexcept { + // FIXME_OPENMP The tasking framework creates an instance with + // m_team_scratch == nullptr and m_team_shared != 0 + if (m_team_scratch == nullptr) return nullptr; return m_team_scratch + m_team_shared; } @@ -400,8 +411,12 @@ class HostThreadTeamMember { int const m_league_size; public: + // FIXME_OPENMP The tasking framework creates an instance with + // m_team_scratch == nullptr and m_team_shared != 0: constexpr HostThreadTeamMember(HostThreadTeamData& arg_data) noexcept - : m_scratch(arg_data.team_shared(), arg_data.team_shared_bytes()), + : m_scratch(arg_data.team_shared(), (arg_data.team_shared() == nullptr) + ? 0 + : arg_data.team_shared_bytes()), m_data(arg_data), m_league_rank(arg_data.m_league_rank), m_league_size(arg_data.m_league_size) {} diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp b/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp index bc6197753c32..0b3465301734 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp @@ -971,84 +971,6 @@ void set_callbacks(Kokkos::Tools::Experimental::EventSet new_events) { } // namespace Experimental } // namespace Tools -namespace Profiling { -bool profileLibraryLoaded() { return Kokkos::Tools::profileLibraryLoaded(); } - -void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, - uint64_t* kernelID) { - Kokkos::Tools::beginParallelFor(kernelPrefix, devID, kernelID); -} -void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, - uint64_t* kernelID) { - Kokkos::Tools::beginParallelReduce(kernelPrefix, devID, kernelID); -} -void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, - uint64_t* kernelID) { - Kokkos::Tools::beginParallelScan(kernelPrefix, devID, kernelID); -} -void endParallelFor(const uint64_t kernelID) { - Kokkos::Tools::endParallelFor(kernelID); -} -void endParallelReduce(const uint64_t kernelID) { - Kokkos::Tools::endParallelReduce(kernelID); -} -void endParallelScan(const uint64_t kernelID) { - Kokkos::Tools::endParallelScan(kernelID); -} - -void pushRegion(const std::string& kName) { Kokkos::Tools::pushRegion(kName); } -void popRegion() { Kokkos::Tools::popRegion(); } - -void createProfileSection(const std::string& sectionName, uint32_t* secID) { - Kokkos::Tools::createProfileSection(sectionName, secID); -} -void destroyProfileSection(const uint32_t secID) { - Kokkos::Tools::destroyProfileSection(secID); -} - -void startSection(const uint32_t secID) { Kokkos::Tools::startSection(secID); } - -void stopSection(const uint32_t secID) { Kokkos::Tools::stopSection(secID); } - -void markEvent(const std::string& eventName) { - Kokkos::Tools::markEvent(eventName); -} -void allocateData(const SpaceHandle handle, const std::string name, - const void* data, const uint64_t size) { - Kokkos::Tools::allocateData(handle, name, data, size); -} -void deallocateData(const SpaceHandle space, const std::string label, - const void* ptr, const uint64_t size) { - Kokkos::Tools::deallocateData(space, label, ptr, size); -} - -void beginDeepCopy(const SpaceHandle dst_space, const std::string dst_label, - const void* dst_ptr, const SpaceHandle src_space, - const std::string src_label, const void* src_ptr, - const uint64_t size) { - Kokkos::Tools::beginDeepCopy(dst_space, dst_label, dst_ptr, src_space, - src_label, src_ptr, size); -} -void endDeepCopy() { Kokkos::Tools::endDeepCopy(); } - -void finalize() { Kokkos::Tools::finalize(); } -void initialize(const std::string& profileLibrary) { - Kokkos::Tools::initialize(profileLibrary); -} - -bool printHelp(const std::string& args) { - return Kokkos::Tools::printHelp(args); -} -void parseArgs(const std::string& args) { Kokkos::Tools::parseArgs(args); } -void parseArgs(int _argc, char** _argv) { - Kokkos::Tools::parseArgs(_argc, _argv); -} - -SpaceHandle make_space_handle(const char* space_name) { - return Kokkos::Tools::make_space_handle(space_name); -} -} // namespace Profiling - // Tuning namespace Tools { diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp b/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp index 025d8d3d18e6..01a41d0c3fc2 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp @@ -263,40 +263,41 @@ size_t get_current_context_id(); } // namespace Tools namespace Profiling { -bool profileLibraryLoaded(); +// don't let ClangFormat reorder the using-declarations below +// clang-format off +using Kokkos::Tools::profileLibraryLoaded; -void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, - uint64_t* kernelID); -void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, - uint64_t* kernelID); -void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, - uint64_t* kernelID); -void endParallelFor(const uint64_t kernelID); -void endParallelReduce(const uint64_t kernelID); -void endParallelScan(const uint64_t kernelID); -void pushRegion(const std::string& kName); -void popRegion(); +using Kokkos::Tools::printHelp; +using Kokkos::Tools::parseArgs; -void createProfileSection(const std::string& sectionName, uint32_t* secID); -void destroyProfileSection(const uint32_t secID); -void startSection(const uint32_t secID); +using Kokkos::Tools::initialize; +using Kokkos::Tools::finalize; -void stopSection(const uint32_t secID); +using Kokkos::Tools::beginParallelFor; +using Kokkos::Tools::beginParallelReduce; +using Kokkos::Tools::beginParallelScan; +using Kokkos::Tools::endParallelFor; +using Kokkos::Tools::endParallelReduce; +using Kokkos::Tools::endParallelScan; -void markEvent(const std::string& eventName); -void allocateData(const SpaceHandle handle, const std::string name, - const void* data, const uint64_t size); -void deallocateData(const SpaceHandle space, const std::string label, - const void* ptr, const uint64_t size); -void beginDeepCopy(const SpaceHandle dst_space, const std::string dst_label, - const void* dst_ptr, const SpaceHandle src_space, - const std::string src_label, const void* src_ptr, - const uint64_t size); -void endDeepCopy(); -void finalize(); -void initialize(const std::string& = {}); +using Kokkos::Tools::allocateData; +using Kokkos::Tools::deallocateData; + +using Kokkos::Tools::beginDeepCopy; +using Kokkos::Tools::endDeepCopy; + +using Kokkos::Tools::pushRegion; +using Kokkos::Tools::popRegion; + +using Kokkos::Tools::createProfileSection; +using Kokkos::Tools::destroyProfileSection; +using Kokkos::Tools::startSection; +using Kokkos::Tools::stopSection; + +using Kokkos::Tools::markEvent; -SpaceHandle make_space_handle(const char* space_name); +using Kokkos::Tools::make_space_handle; +// clang-format on namespace Experimental { using Kokkos::Tools::Experimental::set_allocate_data_callback; diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h b/packages/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h index 15c466b27ed4..8c3194e43b5f 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h +++ b/packages/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h @@ -32,6 +32,10 @@ // Profiling +#ifdef __cplusplus +extern "C" { +#endif + struct Kokkos_Profiling_KokkosPDeviceInfo { size_t deviceID; }; @@ -267,4 +271,8 @@ struct Kokkos_Profiling_EventSet { // changing struct layout }; +#ifdef __cplusplus +} +#endif + #endif // KOKKOS_PROFILING_C_INTERFACE_HPP diff --git a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp index 0bc3814b3a1b..ccf3c47a1efe 100644 --- a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp @@ -323,41 +323,6 @@ void SharedAllocationRecord::print_host_accessible_records( } #endif -void safe_throw_allocation_with_header_failure( - std::string const& space_name, std::string const& label, - Kokkos::Experimental::RawMemoryAllocationFailure const& failure) { - auto generate_failure_message = [&](std::ostream& o) { - o << "Kokkos failed to allocate memory for label \"" << label - << "\". Allocation using MemorySpace named \"" << space_name - << "\" failed with the following error: "; - failure.print_error_message(o); - if (failure.failure_mode() == - Kokkos::Experimental::RawMemoryAllocationFailure::FailureMode:: - AllocationNotAligned) { - // TODO: delete the misaligned memory? - o << "Warning: Allocation failed due to misalignment; memory may " - "be leaked.\n"; - } - o.flush(); - }; - try { - std::ostringstream sstr; - generate_failure_message(sstr); - Kokkos::Impl::throw_runtime_exception(sstr.str()); - } catch (std::bad_alloc const&) { - // Probably failed to allocate the string because we're so close to out - // of memory. Try printing to std::cerr instead - try { - generate_failure_message(std::cerr); - } catch (std::bad_alloc const&) { - // oh well, we tried... - } - Kokkos::Impl::throw_runtime_exception( - "Kokkos encountered an allocation failure, then another allocation " - "failure while trying to create the error message."); - } -} - void fill_host_accessible_header_info( SharedAllocationRecord* arg_record, SharedAllocationHeader& arg_header, std::string const& arg_label) { diff --git a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp index 99ab660213f7..da03cc49830c 100644 --- a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp @@ -196,36 +196,21 @@ class SharedAllocationRecord { const SharedAllocationRecord* const root, const bool detail); }; -void safe_throw_allocation_with_header_failure( - std::string const& space_name, std::string const& label, - Kokkos::Experimental::RawMemoryAllocationFailure const& failure); - template SharedAllocationHeader* checked_allocation_with_header(MemorySpace const& space, std::string const& label, size_t alloc_size) { - try { - return reinterpret_cast(space.allocate( - label.c_str(), alloc_size + sizeof(SharedAllocationHeader), - alloc_size)); - } catch (Kokkos::Experimental::RawMemoryAllocationFailure const& failure) { - safe_throw_allocation_with_header_failure(space.name(), label, failure); - } - return nullptr; // unreachable + return reinterpret_cast(space.allocate( + label.c_str(), alloc_size + sizeof(SharedAllocationHeader), alloc_size)); } template SharedAllocationHeader* checked_allocation_with_header( ExecutionSpace const& exec_space, MemorySpace const& space, std::string const& label, size_t alloc_size) { - try { - return reinterpret_cast(space.allocate( - exec_space, label.c_str(), alloc_size + sizeof(SharedAllocationHeader), - alloc_size)); - } catch (Kokkos::Experimental::RawMemoryAllocationFailure const& failure) { - safe_throw_allocation_with_header_failure(space.name(), label, failure); - } - return nullptr; // unreachable + return reinterpret_cast( + space.allocate(exec_space, label.c_str(), + alloc_size + sizeof(SharedAllocationHeader), alloc_size)); } void fill_host_accessible_header_info(SharedAllocationHeader& arg_header, @@ -385,11 +370,9 @@ SharedAllocationRecord template class Kokkos::Impl::HostInaccessibleSharedAllocationRecordCommon< \ MEMORY_SPACE> -namespace { - /* Taking the address of this function so make sure it is unique */ template -void deallocate(SharedAllocationRecord* record_ptr) { +inline void deallocate(SharedAllocationRecord* record_ptr) { using base_type = SharedAllocationRecord; using this_type = SharedAllocationRecord; @@ -401,8 +384,6 @@ void deallocate(SharedAllocationRecord* record_ptr) { delete ptr; } -} // namespace - /* * Memory space specialization of SharedAllocationRecord< Space , void > * requires : @@ -487,15 +468,21 @@ union SharedAllocationTracker { // pressure on compiler optimization by reducing // number of symbols and inline functions. -#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT \ - KOKKOS_IF_ON_HOST((if (!(m_record_bits & DO_NOT_DEREF_FLAG)) { \ - Record::increment(m_record); \ - })) +#ifdef KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY +#define KOKKOS_IMPL_BRANCH_PROB KOKKOS_IMPL_ATTRIBUTE_UNLIKELY +#else +#define KOKKOS_IMPL_BRANCH_PROB +#endif + +#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT \ + KOKKOS_IF_ON_HOST( \ + (if (!(m_record_bits & DO_NOT_DEREF_FLAG)) \ + KOKKOS_IMPL_BRANCH_PROB { Record::increment(m_record); })) -#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT \ - KOKKOS_IF_ON_HOST((if (!(m_record_bits & DO_NOT_DEREF_FLAG)) { \ - Record::decrement(m_record); \ - })) +#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT \ + KOKKOS_IF_ON_HOST( \ + (if (!(m_record_bits & DO_NOT_DEREF_FLAG)) \ + KOKKOS_IMPL_BRANCH_PROB { Record::decrement(m_record); })) #define KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS(rhs, \ override_tracking) \ @@ -642,8 +629,41 @@ union SharedAllocationTracker { #undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT #undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT +#undef KOKKOS_IMPL_BRANCH_PROB }; +struct SharedAllocationDisableTrackingGuard { + SharedAllocationDisableTrackingGuard() { + KOKKOS_ASSERT( + (Kokkos::Impl::SharedAllocationRecord::tracking_enabled())); + Kokkos::Impl::SharedAllocationRecord::tracking_disable(); + } + + SharedAllocationDisableTrackingGuard( + const SharedAllocationDisableTrackingGuard&) = delete; + SharedAllocationDisableTrackingGuard(SharedAllocationDisableTrackingGuard&&) = + delete; + + ~SharedAllocationDisableTrackingGuard() { + KOKKOS_ASSERT(( + !Kokkos::Impl::SharedAllocationRecord::tracking_enabled())); + Kokkos::Impl::SharedAllocationRecord::tracking_enable(); + } + // clang-format off + // The old version of clang format we use is particularly egregious here + SharedAllocationDisableTrackingGuard& operator=( + const SharedAllocationDisableTrackingGuard&) = delete; + SharedAllocationDisableTrackingGuard& operator=( + SharedAllocationDisableTrackingGuard&&) = delete; + // clang-format on +}; + +template +inline FunctorType construct_with_shared_allocation_tracking_disabled( + Args&&... args) { + [[maybe_unused]] auto guard = SharedAllocationDisableTrackingGuard{}; + return {std::forward(args)...}; +} } /* namespace Impl */ } /* namespace Kokkos */ #endif diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp deleted file mode 100644 index fe43b630184f..000000000000 --- a/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp +++ /dev/null @@ -1,622 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP -#define KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP - -#include - -namespace Kokkos { -namespace Impl { - -template -struct ViewDataAnalysis> { - private: - using array_analysis = ViewArrayAnalysis; - - static_assert(std::is_void

::value); - static_assert(std::is_same>::value); - static_assert(std::is_scalar::value, - "View of Array type must be of a scalar type"); - - public: - using specialize = Kokkos::Array<>; - - using dimension = typename array_analysis::dimension; - - private: - enum { - is_const = std::is_same::value - }; - - using array_scalar_dimension = typename dimension::template append::type; - - using scalar_type = std::conditional_t; - using non_const_scalar_type = V; - using const_scalar_type = const V; - - public: - using value_type = typename array_analysis::value_type; - using const_value_type = typename array_analysis::const_value_type; - using non_const_value_type = typename array_analysis::non_const_value_type; - - using type = typename ViewDataType::type; - using const_type = typename ViewDataType::type; - using non_const_type = - typename ViewDataType::type; - - using scalar_array_type = - typename ViewDataType::type; - using const_scalar_array_type = - typename ViewDataType::type; - using non_const_scalar_array_type = - typename ViewDataType::type; -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/** \brief View mapping for non-specialized data type and standard layout */ -template -class ViewMapping> { - private: - template - friend class ViewMapping; - template - friend class Kokkos::View; - - using offset_type = ViewOffset; - - using handle_type = typename Traits::value_type::pointer; - - handle_type m_impl_handle; - offset_type m_impl_offset; - size_t m_stride = 0; - - using scalar_type = typename Traits::value_type::value_type; - - using contiguous_reference = Kokkos::Array::contiguous>; - using strided_reference = - Kokkos::Array::strided>; - - enum { - is_contiguous_reference = - (Traits::rank == 0) || (std::is_same::value) - }; - - enum { Array_N = Traits::value_type::size() }; - enum { Array_S = is_contiguous_reference ? Array_N : 1 }; - - KOKKOS_INLINE_FUNCTION - ViewMapping(const handle_type &arg_handle, const offset_type &arg_offset) - : m_impl_handle(arg_handle), - m_impl_offset(arg_offset), - m_stride(is_contiguous_reference ? 0 : arg_offset.span()) {} - - public: - //---------------------------------------- - // Domain dimensions - - static constexpr unsigned Rank = Traits::dimension::rank; - - template - KOKKOS_INLINE_FUNCTION constexpr size_t extent(const iType &r) const { - return m_impl_offset.m_dim.extent(r); - } - - static KOKKOS_INLINE_FUNCTION constexpr size_t static_extent( - const unsigned r) noexcept { - using dim_type = typename offset_type::dimension_type; - return dim_type::static_extent(r); - } - - KOKKOS_INLINE_FUNCTION constexpr typename Traits::array_layout layout() - const { - return m_impl_offset.layout(); - } - - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { - return m_impl_offset.dimension_0(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { - return m_impl_offset.dimension_1(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { - return m_impl_offset.dimension_2(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { - return m_impl_offset.dimension_3(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { - return m_impl_offset.dimension_4(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { - return m_impl_offset.dimension_5(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { - return m_impl_offset.dimension_6(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { - return m_impl_offset.dimension_7(); - } - - // Is a regular layout with uniform striding for each index. - using is_regular = typename offset_type::is_regular; - - KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { - return m_impl_offset.stride_0(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { - return m_impl_offset.stride_1(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { - return m_impl_offset.stride_2(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { - return m_impl_offset.stride_3(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { - return m_impl_offset.stride_4(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { - return m_impl_offset.stride_5(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { - return m_impl_offset.stride_6(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { - return m_impl_offset.stride_7(); - } - - //---------------------------------------- - // Range span - - /** \brief Span of the mapped range */ - KOKKOS_INLINE_FUNCTION constexpr size_t span() const { - return m_impl_offset.span() * Array_N; - } - - /** \brief Is the mapped range span contiguous */ - KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { - return m_impl_offset.span_is_contiguous(); - } - - using reference_type = - std::conditional_t; - - using pointer_type = handle_type; - - /** \brief If data references are lvalue_reference than can query pointer to - * memory */ - KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { - return m_impl_handle; - } - - //---------------------------------------- - // The View class performs all rank and bounds checking before - // calling these element reference methods. - - KOKKOS_FORCEINLINE_FUNCTION - reference_type reference() const { - return reference_type(m_impl_handle + 0, Array_N, 0); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type reference(const I0 &i0) const { - return reference_type(m_impl_handle + m_impl_offset(i0) * Array_S, Array_N, - m_stride); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type reference(const I0 &i0, - const I1 &i1) const { - return reference_type(m_impl_handle + m_impl_offset(i0, i1) * Array_S, - Array_N, m_stride); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type reference(const I0 &i0, - const I1 &i1, - const I2 &i2) const { - return reference_type(m_impl_handle + m_impl_offset(i0, i1, i2) * Array_S, - Array_N, m_stride); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type - reference(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3) const { - return reference_type( - m_impl_handle + m_impl_offset(i0, i1, i2, i3) * Array_S, Array_N, - m_stride); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type reference(const I0 &i0, - const I1 &i1, - const I2 &i2, - const I3 &i3, - const I4 &i4) const { - return reference_type( - m_impl_handle + m_impl_offset(i0, i1, i2, i3, i4) * Array_S, Array_N, - m_stride); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type - reference(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, - const I4 &i4, const I5 &i5) const { - return reference_type( - m_impl_handle + m_impl_offset(i0, i1, i2, i3, i4, i5) * Array_S, - Array_N, m_stride); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type - reference(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, - const I4 &i4, const I5 &i5, const I6 &i6) const { - return reference_type( - m_impl_handle + m_impl_offset(i0, i1, i2, i3, i4, i5, i6) * Array_S, - Array_N, m_stride); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type - reference(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, - const I4 &i4, const I5 &i5, const I6 &i6, const I7 &i7) const { - return reference_type( - m_impl_handle + m_impl_offset(i0, i1, i2, i3, i4, i5, i6, i7) * Array_S, - Array_N, m_stride); - } - - //---------------------------------------- - - private: - enum { MemorySpanMask = 8 - 1 /* Force alignment on 8 byte boundary */ }; - enum { MemorySpanSize = sizeof(scalar_type) }; - - public: - /** \brief Span, in bytes, of the referenced memory */ - KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const { - return (m_impl_offset.span() * Array_N * MemorySpanSize + MemorySpanMask) & - ~size_t(MemorySpanMask); - } - - //---------------------------------------- - - KOKKOS_DEFAULTED_FUNCTION ViewMapping() = default; - - //---------------------------------------- - - template - KOKKOS_INLINE_FUNCTION ViewMapping(pointer_type ptr, Args... args) - : m_impl_handle(ptr), - m_impl_offset(std::integral_constant(), args...), - m_stride(m_impl_offset.span()) {} - - //---------------------------------------- - - template - Kokkos::Impl::SharedAllocationRecord<> *allocate_shared( - Kokkos::Impl::ViewCtorProp const &arg_prop, - typename Traits::array_layout const &arg_layout, - bool execution_space_specified) { - using alloc_prop = Kokkos::Impl::ViewCtorProp; - - using execution_space = typename alloc_prop::execution_space; - using memory_space = typename Traits::memory_space; - static_assert( - SpaceAccessibility::accessible); - using functor_type = - ViewValueFunctor; - using record_type = - Kokkos::Impl::SharedAllocationRecord; - - // Query the mapping for byte-size of allocation. - using padding = std::integral_constant< - unsigned int, alloc_prop::allow_padding ? sizeof(scalar_type) : 0>; - - m_impl_offset = offset_type(padding(), arg_layout); - - const size_t alloc_size = - (m_impl_offset.span() * Array_N * MemorySpanSize + MemorySpanMask) & - ~size_t(MemorySpanMask); - const auto &alloc_name = Impl::get_property(arg_prop); - const execution_space &exec_space = - Impl::get_property(arg_prop); - const memory_space &mem_space = - Impl::get_property(arg_prop); - - // Allocate memory from the memory space and create tracking record. - record_type *const record = - execution_space_specified - ? record_type::allocate(exec_space, mem_space, alloc_name, - alloc_size) - : record_type::allocate(mem_space, alloc_name, alloc_size); - - m_impl_handle = handle_type(reinterpret_cast(record->data())); - - functor_type functor = - execution_space_specified - ? functor_type(exec_space, (pointer_type)m_impl_handle, - m_impl_offset.span() * Array_N, alloc_name) - : functor_type((pointer_type)m_impl_handle, - m_impl_offset.span() * Array_N, alloc_name); - -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \ - defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET) - if (false) { - // Make sure the destroy functor gets instantiated. - // This avoids "cudaErrorInvalidDeviceFunction"-type errors. - functor.destroy_shared_allocation(); - } -#endif - - // Only initialize if the allocation is non-zero. - // May be zero if one of the dimensions is zero. - if constexpr (alloc_prop::initialize) - if (alloc_size) { - // Assume destruction is only required when construction is requested. - // The ViewValueFunctor has both value construction and destruction - // operators. - record->m_destroy = std::move(functor); - - // Construct values - record->m_destroy.construct_shared_allocation(); - } - - return record; - } -}; - -/** \brief Assign Array to non-Array */ - -template -class ViewMapping< - DstTraits, SrcTraits, - std::enable_if_t<( - std::is_same::value && - std::is_void::value && - (std::is_same::value || - std::is_same::value || - std::is_same::value) && - std::is_same>::value && - (std::is_same::value || - std::is_same::value || - std::is_same::value))>> { - public: - // Can only convert to View::array_type - - enum { - is_assignable_data_type = - std::is_same::value && - (DstTraits::rank == SrcTraits::rank + 1) - }; - enum { - is_assignable = - std::is_same::value && - std::is_same::value - }; - - using TrackType = Kokkos::Impl::SharedAllocationTracker; - using DstType = ViewMapping; - using SrcType = ViewMapping>; - - KOKKOS_INLINE_FUNCTION - static void assign(DstType &dst, const SrcType &src, - const TrackType & /*src_track*/) { - static_assert(is_assignable, "Can only convert to array_type"); - - using dst_offset_type = typename DstType::offset_type; - - // Array dimension becomes the last dimension. - // Arguments beyond the destination rank are ignored. - if (src.span_is_contiguous()) { // not padded - dst.m_impl_offset = dst_offset_type( - std::integral_constant(), - typename DstTraits::array_layout( - (0 < SrcType::Rank ? src.dimension_0() - : SrcTraits::value_type::size()), - (1 < SrcType::Rank ? src.dimension_1() - : SrcTraits::value_type::size()), - (2 < SrcType::Rank ? src.dimension_2() - : SrcTraits::value_type::size()), - (3 < SrcType::Rank ? src.dimension_3() - : SrcTraits::value_type::size()), - (4 < SrcType::Rank ? src.dimension_4() - : SrcTraits::value_type::size()), - (5 < SrcType::Rank ? src.dimension_5() - : SrcTraits::value_type::size()), - (6 < SrcType::Rank ? src.dimension_6() - : SrcTraits::value_type::size()), - (7 < SrcType::Rank ? src.dimension_7() - : SrcTraits::value_type::size()))); - } else { // is padded - using padded = std::integral_constant< - unsigned int, sizeof(typename SrcTraits::value_type::value_type)>; - - dst.m_impl_offset = dst_offset_type( - padded(), typename DstTraits::array_layout( - (0 < SrcType::Rank ? src.dimension_0() - : SrcTraits::value_type::size()), - (1 < SrcType::Rank ? src.dimension_1() - : SrcTraits::value_type::size()), - (2 < SrcType::Rank ? src.dimension_2() - : SrcTraits::value_type::size()), - (3 < SrcType::Rank ? src.dimension_3() - : SrcTraits::value_type::size()), - (4 < SrcType::Rank ? src.dimension_4() - : SrcTraits::value_type::size()), - (5 < SrcType::Rank ? src.dimension_5() - : SrcTraits::value_type::size()), - (6 < SrcType::Rank ? src.dimension_6() - : SrcTraits::value_type::size()), - (7 < SrcType::Rank ? src.dimension_7() - : SrcTraits::value_type::size()))); - } - - dst.m_impl_handle = src.m_impl_handle; - } -}; - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -template -class ViewMapping< - std::enable_if_t<( - std::is_same>::value && - (std::is_same::value || - std::is_same::value || - std::is_same::value))>, - SrcTraits, Args...> { - private: - static_assert(SrcTraits::rank == sizeof...(Args)); - - enum : bool { - R0 = is_integral_extent<0, Args...>::value, - R1 = is_integral_extent<1, Args...>::value, - R2 = is_integral_extent<2, Args...>::value, - R3 = is_integral_extent<3, Args...>::value, - R4 = is_integral_extent<4, Args...>::value, - R5 = is_integral_extent<5, Args...>::value, - R6 = is_integral_extent<6, Args...>::value, - R7 = is_integral_extent<7, Args...>::value - }; - - enum { - rank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3) + - unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) - }; - - // Whether right-most rank is a range. - enum { - R0_rev = - 0 == SrcTraits::rank - ? false - : (1 == SrcTraits::rank - ? R0 - : (2 == SrcTraits::rank - ? R1 - : (3 == SrcTraits::rank - ? R2 - : (4 == SrcTraits::rank - ? R3 - : (5 == SrcTraits::rank - ? R4 - : (6 == SrcTraits::rank - ? R5 - : (7 == SrcTraits::rank - ? R6 - : R7))))))) - }; - - // Subview's layout - using array_layout = - std::conditional_t<((rank == 0) || - (rank <= 2 && R0 && - std::is_same::value) || - (rank <= 2 && R0_rev && - std::is_same::value)), - typename SrcTraits::array_layout, - Kokkos::LayoutStride>; - - using value_type = typename SrcTraits::value_type; - - using data_type = std::conditional_t< - rank == 0, value_type, - std::conditional_t< - rank == 1, value_type *, - std::conditional_t< - rank == 2, value_type **, - std::conditional_t< - rank == 3, value_type ***, - std::conditional_t< - rank == 4, value_type ****, - std::conditional_t< - rank == 5, value_type *****, - std::conditional_t< - rank == 6, value_type ******, - std::conditional_t>>>>>>>; - - public: - using traits_type = Kokkos::ViewTraits; - - using type = - Kokkos::View; - - KOKKOS_INLINE_FUNCTION - static void assign(ViewMapping &dst, - ViewMapping const &src, Args... args) { - using DstType = ViewMapping; - - using dst_offset_type = typename DstType::offset_type; - using dst_handle_type = typename DstType::handle_type; - - const SubviewExtents extents(src.m_impl_offset.m_dim, - args...); - - dst.m_impl_offset = dst_offset_type(src.m_impl_offset, extents); - dst.m_impl_handle = dst_handle_type( - src.m_impl_handle + - src.m_impl_offset(extents.domain_offset(0), extents.domain_offset(1), - extents.domain_offset(2), extents.domain_offset(3), - extents.domain_offset(4), extents.domain_offset(5), - extents.domain_offset(6), extents.domain_offset(7))); - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP */ diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp deleted file mode 100644 index 957717f973d1..000000000000 --- a/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp +++ /dev/null @@ -1,1425 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef KOKKOS_EXPERIMENTAL_VIEWLAYOUTTILE_HPP -#define KOKKOS_EXPERIMENTAL_VIEWLAYOUTTILE_HPP - -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -// View offset and mapping for tiled view's - -template -struct is_array_layout> - : public std::true_type {}; - -template -struct is_array_layout> - : public std::true_type {}; - -template -struct is_array_layout> - : public std::true_type {}; - -template -struct is_array_layout> - : public std::true_type {}; - -template -struct is_array_layout> - : public std::true_type {}; - -template -struct is_array_layout> - : public std::true_type {}; - -template -struct is_array_layout< - Kokkos::Experimental::LayoutTiled> - : public std::true_type {}; - -template -struct is_array_layout_tiled : public std::false_type {}; - -template -struct is_array_layout_tiled> : public std::true_type { -}; // Last template parameter "true" meaning this currently only supports - // powers-of-two - -namespace Impl { - -template -struct ViewOffset< - Dimension, Layout, - std::enable_if_t<((Dimension::rank <= 8) && (Dimension::rank >= 2) && - is_array_layout::value && - is_array_layout_tiled::value)>> { - public: - static constexpr Kokkos::Iterate outer_pattern = Layout::outer_pattern; - static constexpr Kokkos::Iterate inner_pattern = Layout::inner_pattern; - - static constexpr int VORank = Dimension::rank; - - static constexpr unsigned SHIFT_0 = - Kokkos::Impl::integral_power_of_two(Layout::N0); - static constexpr unsigned SHIFT_1 = - Kokkos::Impl::integral_power_of_two(Layout::N1); - static constexpr unsigned SHIFT_2 = - Kokkos::Impl::integral_power_of_two(Layout::N2); - static constexpr unsigned SHIFT_3 = - Kokkos::Impl::integral_power_of_two(Layout::N3); - static constexpr unsigned SHIFT_4 = - Kokkos::Impl::integral_power_of_two(Layout::N4); - static constexpr unsigned SHIFT_5 = - Kokkos::Impl::integral_power_of_two(Layout::N5); - static constexpr unsigned SHIFT_6 = - Kokkos::Impl::integral_power_of_two(Layout::N6); - static constexpr unsigned SHIFT_7 = - Kokkos::Impl::integral_power_of_two(Layout::N7); - static constexpr int MASK_0 = Layout::N0 - 1; - static constexpr int MASK_1 = Layout::N1 - 1; - static constexpr int MASK_2 = Layout::N2 - 1; - static constexpr int MASK_3 = Layout::N3 - 1; - static constexpr int MASK_4 = Layout::N4 - 1; - static constexpr int MASK_5 = Layout::N5 - 1; - static constexpr int MASK_6 = Layout::N6 - 1; - static constexpr int MASK_7 = Layout::N7 - 1; - - static constexpr unsigned SHIFT_2T = SHIFT_0 + SHIFT_1; - static constexpr unsigned SHIFT_3T = SHIFT_0 + SHIFT_1 + SHIFT_2; - static constexpr unsigned SHIFT_4T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3; - static constexpr unsigned SHIFT_5T = - SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4; - static constexpr unsigned SHIFT_6T = - SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5; - static constexpr unsigned SHIFT_7T = - SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 + SHIFT_6; - static constexpr unsigned SHIFT_8T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + - SHIFT_4 + SHIFT_5 + SHIFT_6 + SHIFT_7; - - // Is an irregular layout that does not have uniform striding for each index. - using is_mapping_plugin = std::true_type; - using is_regular = std::false_type; - - using size_type = size_t; - using dimension_type = Dimension; - using array_layout = Layout; - - dimension_type m_dim; - size_type m_tile_N0; // Num tiles dim 0 - size_type m_tile_N1; - size_type m_tile_N2; - size_type m_tile_N3; - size_type m_tile_N4; - size_type m_tile_N5; - size_type m_tile_N6; - size_type m_tile_N7; - - //---------------------------------------- - -#define KOKKOS_IMPL_DEBUG_OUTPUT_CHECK 0 - - // Rank 2 - template - KOKKOS_INLINE_FUNCTION size_type operator()(I0 const& i0, - I1 const& i1) const { - auto tile_offset = - (outer_pattern == (Kokkos::Iterate::Left)) - ? (((i0 >> SHIFT_0) + m_tile_N0 * ((i1 >> SHIFT_1))) << SHIFT_2T) - : (((m_tile_N1 * (i0 >> SHIFT_0) + (i1 >> SHIFT_1))) << SHIFT_2T); - // ( num_tiles[1] * ti0 + ti1 ) * FTD - - auto local_offset = (inner_pattern == (Kokkos::Iterate::Left)) - ? ((i0 & MASK_0) + ((i1 & MASK_1) << SHIFT_0)) - : (((i0 & MASK_0) << SHIFT_1) + (i1 & MASK_1)); - // ( tile_dim[1] * li0 + li1 ) - -#if KOKKOS_IMPL_DEBUG_OUTPUT_CHECK - std::cout << "Am I Outer Left? " - << (outer_pattern == (Kokkos::Iterate::Left)) << std::endl; - std::cout << "Am I Inner Left? " - << (inner_pattern == (Kokkos::Iterate::Left)) << std::endl; - std::cout << "i0 = " << i0 << " i1 = " << i1 - << "\ntilei0 = " << (i0 >> SHIFT_0) - << " tilei1 = " << (i1 >> SHIFT_1) - << "locali0 = " << (i0 & MASK_0) - << "\nlocali1 = " << (i1 & MASK_1) << std::endl; -#endif - - return tile_offset + local_offset; - } - - // Rank 3 - template - KOKKOS_INLINE_FUNCTION size_type operator()(I0 const& i0, I1 const& i1, - I2 const& i2) const { - auto tile_offset = - (outer_pattern == Kokkos::Iterate::Left) - ? (((i0 >> SHIFT_0) + - m_tile_N0 * ((i1 >> SHIFT_1) + m_tile_N1 * (i2 >> SHIFT_2))) - << SHIFT_3T) - : ((m_tile_N2 * (m_tile_N1 * (i0 >> SHIFT_0) + (i1 >> SHIFT_1)) + - (i2 >> SHIFT_2)) - << SHIFT_3T); - - auto local_offset = (inner_pattern == Kokkos::Iterate::Left) - ? ((i0 & MASK_0) + ((i1 & MASK_1) << SHIFT_0) + - ((i2 & MASK_2) << (SHIFT_0 + SHIFT_1))) - : (((i0 & MASK_0) << (SHIFT_2 + SHIFT_1)) + - ((i1 & MASK_1) << (SHIFT_2)) + (i2 & MASK_2)); - -#if KOKKOS_IMPL_DEBUG_OUTPUT_CHECK - std::cout << "Am I Outer Left? " - << (outer_pattern == (Kokkos::Iterate::Left)) << std::endl; - std::cout << "Am I Inner Left? " - << (inner_pattern == (Kokkos::Iterate::Left)) << std::endl; - std::cout << "i0 = " << i0 << " i1 = " << i1 << " i2 = " << i2 - << "\ntilei0 = " << (i0 >> SHIFT_0) - << " tilei1 = " << (i1 >> SHIFT_1) - << " tilei2 = " << (i2 >> SHIFT_2) - << "\nlocali0 = " << (i0 & MASK_0) - << "locali1 = " << (i1 & MASK_1) << "locali2 = " << (i2 & MASK_2) - << std::endl; -#endif - - return tile_offset + local_offset; - } - - // Rank 4 - template - KOKKOS_INLINE_FUNCTION size_type operator()(I0 const& i0, I1 const& i1, - I2 const& i2, - I3 const& i3) const { - auto tile_offset = - (outer_pattern == Kokkos::Iterate::Left) - ? (((i0 >> SHIFT_0) + - m_tile_N0 * ((i1 >> SHIFT_1) + - m_tile_N1 * ((i2 >> SHIFT_2) + - m_tile_N2 * (i3 >> SHIFT_3)))) - << SHIFT_4T) - : ((m_tile_N3 * (m_tile_N2 * (m_tile_N1 * (i0 >> SHIFT_0) + - (i1 >> SHIFT_1)) + - (i2 >> SHIFT_2)) + - (i3 >> SHIFT_3)) - << SHIFT_4T); - - auto local_offset = - (inner_pattern == Kokkos::Iterate::Left) - ? ((i0 & MASK_0) + ((i1 & MASK_1) << SHIFT_0) + - ((i2 & MASK_2) << (SHIFT_0 + SHIFT_1)) + - ((i3 & MASK_3) << (SHIFT_0 + SHIFT_1 + SHIFT_2))) - : (((i0 & MASK_0) << (SHIFT_3 + SHIFT_2 + SHIFT_1)) + - ((i1 & MASK_1) << (SHIFT_3 + SHIFT_2)) + - ((i2 & MASK_2) << (SHIFT_3)) + (i3 & MASK_3)); - - return tile_offset + local_offset; - } - - // Rank 5 - template - KOKKOS_INLINE_FUNCTION size_type operator()(I0 const& i0, I1 const& i1, - I2 const& i2, I3 const& i3, - I4 const& i4) const { - auto tile_offset = - (outer_pattern == Kokkos::Iterate::Left) - ? (((i0 >> SHIFT_0) + - m_tile_N0 * - ((i1 >> SHIFT_1) + - m_tile_N1 * ((i2 >> SHIFT_2) + - m_tile_N2 * ((i3 >> SHIFT_3) + - m_tile_N3 * (i4 >> SHIFT_4))))) - << SHIFT_5T) - : ((m_tile_N4 * - (m_tile_N3 * (m_tile_N2 * (m_tile_N1 * (i0 >> SHIFT_0) + - (i1 >> SHIFT_1)) + - (i2 >> SHIFT_2)) + - (i3 >> SHIFT_3)) + - (i4 >> SHIFT_4)) - << SHIFT_5T); - - auto local_offset = - (inner_pattern == Kokkos::Iterate::Left) - ? ((i0 & MASK_0) + ((i1 & MASK_1) << SHIFT_0) + - ((i2 & MASK_2) << (SHIFT_0 + SHIFT_1)) + - ((i3 & MASK_3) << (SHIFT_0 + SHIFT_1 + SHIFT_2)) + - ((i4 & MASK_4) << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3))) - : (((i0 & MASK_0) << (SHIFT_4 + SHIFT_3 + SHIFT_2 + SHIFT_1)) + - ((i1 & MASK_1) << (SHIFT_4 + SHIFT_3 + SHIFT_2)) + - ((i2 & MASK_2) << (SHIFT_4 + SHIFT_3)) + - ((i3 & MASK_3) << (SHIFT_4)) + (i4 & MASK_4)); - - return tile_offset + local_offset; - } - - // Rank 6 - template - KOKKOS_INLINE_FUNCTION size_type operator()(I0 const& i0, I1 const& i1, - I2 const& i2, I3 const& i3, - I4 const& i4, - I5 const& i5) const { - auto tile_offset = - (outer_pattern == Kokkos::Iterate::Left) - ? (((i0 >> SHIFT_0) + - m_tile_N0 * - ((i1 >> SHIFT_1) + - m_tile_N1 * - ((i2 >> SHIFT_2) + - m_tile_N2 * - ((i3 >> SHIFT_3) + - m_tile_N3 * ((i4 >> SHIFT_4) + - m_tile_N4 * (i5 >> SHIFT_5)))))) - << SHIFT_6T) - : ((m_tile_N5 * - (m_tile_N4 * - (m_tile_N3 * - (m_tile_N2 * (m_tile_N1 * (i0 >> SHIFT_0) + - (i1 >> SHIFT_1)) + - (i2 >> SHIFT_2)) + - (i3 >> SHIFT_3)) + - (i4 >> SHIFT_4)) + - (i5 >> SHIFT_5)) - << SHIFT_6T); - - auto local_offset = - (inner_pattern == Kokkos::Iterate::Left) - ? ((i0 & MASK_0) + ((i1 & MASK_1) << SHIFT_0) + - ((i2 & MASK_2) << (SHIFT_0 + SHIFT_1)) + - ((i3 & MASK_3) << (SHIFT_0 + SHIFT_1 + SHIFT_2)) + - ((i4 & MASK_4) << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3)) + - ((i5 & MASK_5) - << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4))) - : (((i0 & MASK_0) - << (SHIFT_5 + SHIFT_4 + SHIFT_3 + SHIFT_2 + SHIFT_1)) + - ((i1 & MASK_1) << (SHIFT_5 + SHIFT_4 + SHIFT_3 + SHIFT_2)) + - ((i2 & MASK_2) << (SHIFT_5 + SHIFT_4 + SHIFT_3)) + - ((i3 & MASK_3) << (SHIFT_5 + SHIFT_4)) + - ((i4 & MASK_4) << (SHIFT_5)) + (i5 & MASK_5)); - - return tile_offset + local_offset; - } - - // Rank 7 - template - KOKKOS_INLINE_FUNCTION size_type operator()(I0 const& i0, I1 const& i1, - I2 const& i2, I3 const& i3, - I4 const& i4, I5 const& i5, - I6 const& i6) const { - auto tile_offset = - (outer_pattern == Kokkos::Iterate::Left) - ? (((i0 >> SHIFT_0) + - m_tile_N0 * - ((i1 >> SHIFT_1) + - m_tile_N1 * - ((i2 >> SHIFT_2) + - m_tile_N2 * - ((i3 >> SHIFT_3) + - m_tile_N3 * - ((i4 >> SHIFT_4) + - m_tile_N4 * - ((i5 >> SHIFT_5) + - m_tile_N5 * (i6 >> SHIFT_6))))))) - << SHIFT_7T) - : ((m_tile_N6 * - (m_tile_N5 * - (m_tile_N4 * - (m_tile_N3 * - (m_tile_N2 * (m_tile_N1 * (i0 >> SHIFT_0) + - (i1 >> SHIFT_1)) + - (i2 >> SHIFT_2)) + - (i3 >> SHIFT_3)) + - (i4 >> SHIFT_4)) + - (i5 >> SHIFT_5)) + - (i6 >> SHIFT_6)) - << SHIFT_7T); - - auto local_offset = - (inner_pattern == Kokkos::Iterate::Left) - ? ((i0 & MASK_0) + ((i1 & MASK_1) << SHIFT_0) + - ((i2 & MASK_2) << (SHIFT_0 + SHIFT_1)) + - ((i3 & MASK_3) << (SHIFT_0 + SHIFT_1 + SHIFT_2)) + - ((i4 & MASK_4) << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3)) + - ((i5 & MASK_5) - << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4)) + - ((i6 & MASK_6) - << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5))) - : (((i0 & MASK_0) << (SHIFT_6 + SHIFT_5 + SHIFT_4 + SHIFT_3 + - SHIFT_2 + SHIFT_1)) + - ((i1 & MASK_1) - << (SHIFT_6 + SHIFT_5 + SHIFT_4 + SHIFT_3 + SHIFT_2)) + - ((i2 & MASK_2) << (SHIFT_6 + SHIFT_5 + SHIFT_4 + SHIFT_3)) + - ((i3 & MASK_3) << (SHIFT_6 + SHIFT_5 + SHIFT_4)) + - ((i4 & MASK_4) << (SHIFT_6 + SHIFT_5)) + - ((i5 & MASK_5) << (SHIFT_6)) + (i6 & MASK_6)); - - return tile_offset + local_offset; - } - - // Rank 8 - template - KOKKOS_INLINE_FUNCTION size_type operator()(I0 const& i0, I1 const& i1, - I2 const& i2, I3 const& i3, - I4 const& i4, I5 const& i5, - I6 const& i6, - I7 const& i7) const { - auto tile_offset = - (outer_pattern == Kokkos::Iterate::Left) - ? (((i0 >> SHIFT_0) + - m_tile_N0 * - ((i1 >> SHIFT_1) + - m_tile_N1 * - ((i2 >> SHIFT_2) + - m_tile_N2 * - ((i3 >> SHIFT_3) + - m_tile_N3 * - ((i4 >> SHIFT_4) + - m_tile_N4 * - ((i5 >> SHIFT_5) + - m_tile_N5 * - ((i6 >> SHIFT_6) + - m_tile_N6 * (i7 >> SHIFT_7)))))))) - << SHIFT_8T) - : ((m_tile_N7 * - (m_tile_N6 * - (m_tile_N5 * - (m_tile_N4 * - (m_tile_N3 * - (m_tile_N2 * - (m_tile_N1 * (i0 >> SHIFT_0) + - (i1 >> SHIFT_1)) + - (i2 >> SHIFT_2)) + - (i3 >> SHIFT_3)) + - (i4 >> SHIFT_4)) + - (i5 >> SHIFT_5)) + - (i6 >> SHIFT_6)) + - (i7 >> SHIFT_7)) - << SHIFT_8T); - - auto local_offset = - (inner_pattern == Kokkos::Iterate::Left) - ? ((i0 & MASK_0) + ((i1 & MASK_1) << SHIFT_0) + - ((i2 & MASK_2) << (SHIFT_0 + SHIFT_1)) + - ((i3 & MASK_3) << (SHIFT_0 + SHIFT_1 + SHIFT_2)) + - ((i4 & MASK_4) << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3)) + - ((i5 & MASK_5) - << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4)) + - ((i6 & MASK_6) << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + - SHIFT_4 + SHIFT_5)) + - ((i7 & MASK_7) << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + - SHIFT_4 + SHIFT_5 + SHIFT_6))) - : (((i0 & MASK_0) << (SHIFT_7 + SHIFT_6 + SHIFT_5 + SHIFT_4 + - SHIFT_3 + SHIFT_2 + SHIFT_1)) + - ((i1 & MASK_1) << (SHIFT_7 + SHIFT_6 + SHIFT_5 + SHIFT_4 + - SHIFT_3 + SHIFT_2)) + - ((i2 & MASK_2) - << (SHIFT_7 + SHIFT_6 + SHIFT_5 + SHIFT_4 + SHIFT_3)) + - ((i3 & MASK_3) << (SHIFT_7 + SHIFT_6 + SHIFT_5 + SHIFT_4)) + - ((i4 & MASK_4) << (SHIFT_7 + SHIFT_6 + SHIFT_5)) + - ((i5 & MASK_5) << (SHIFT_7 + SHIFT_6)) + - ((i6 & MASK_6) << (SHIFT_7)) + (i7 & MASK_7)); - - return tile_offset + local_offset; - } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION constexpr array_layout layout() const { - return array_layout((VORank > 0 ? m_dim.N0 : KOKKOS_INVALID_INDEX), - (VORank > 1 ? m_dim.N1 : KOKKOS_INVALID_INDEX), - (VORank > 2 ? m_dim.N2 : KOKKOS_INVALID_INDEX), - (VORank > 3 ? m_dim.N3 : KOKKOS_INVALID_INDEX), - (VORank > 4 ? m_dim.N4 : KOKKOS_INVALID_INDEX), - (VORank > 5 ? m_dim.N5 : KOKKOS_INVALID_INDEX), - (VORank > 6 ? m_dim.N6 : KOKKOS_INVALID_INDEX), - (VORank > 7 ? m_dim.N7 : KOKKOS_INVALID_INDEX)); - } - - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { - return m_dim.N0; - } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { - return m_dim.N1; - } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { - return m_dim.N2; - } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { - return m_dim.N3; - } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { - return m_dim.N4; - } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { - return m_dim.N5; - } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { - return m_dim.N6; - } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { - return m_dim.N7; - } - - KOKKOS_INLINE_FUNCTION constexpr size_type size() const { - return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * - m_dim.N6 * m_dim.N7; - } - - // Strides are meaningless due to irregularity - KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 0; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return 0; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return 0; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return 0; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return 0; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return 0; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return 0; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 0; } - - // Stride with [ rank ] value is the total length - template - KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { - s[0] = 0; - if (0 < dimension_type::rank) { - s[1] = 0; - } - if (1 < dimension_type::rank) { - s[2] = 0; - } - if (2 < dimension_type::rank) { - s[3] = 0; - } - if (3 < dimension_type::rank) { - s[4] = 0; - } - if (4 < dimension_type::rank) { - s[5] = 0; - } - if (5 < dimension_type::rank) { - s[6] = 0; - } - if (6 < dimension_type::rank) { - s[7] = 0; - } - if (7 < dimension_type::rank) { - s[8] = 0; - } - } - - KOKKOS_INLINE_FUNCTION constexpr size_type span() const { - // Rank2: ( NumTile0 * ( NumTile1 ) ) * TileSize, etc - return (VORank == 2) - ? (m_tile_N0 * m_tile_N1) << SHIFT_2T - : (VORank == 3) - ? (m_tile_N0 * m_tile_N1 * m_tile_N2) << SHIFT_3T - : (VORank == 4) - ? (m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3) - << SHIFT_4T - : (VORank == 5) - ? (m_tile_N0 * m_tile_N1 * m_tile_N2 * - m_tile_N3 * m_tile_N4) - << SHIFT_5T - : (VORank == 6) - ? (m_tile_N0 * m_tile_N1 * m_tile_N2 * - m_tile_N3 * m_tile_N4 * m_tile_N5) - << SHIFT_6T - : (VORank == 7) - ? (m_tile_N0 * m_tile_N1 * - m_tile_N2 * m_tile_N3 * - m_tile_N4 * m_tile_N5 * - m_tile_N6) - << SHIFT_7T - : (m_tile_N0 * m_tile_N1 * - m_tile_N2 * m_tile_N3 * - m_tile_N4 * m_tile_N5 * - m_tile_N6 * m_tile_N7) - << SHIFT_8T; - } - - KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { - return true; - } - - //---------------------------------------- -#ifdef KOKKOS_IMPL_WINDOWS_CUDA - KOKKOS_FUNCTION ViewOffset() {} - KOKKOS_FUNCTION ViewOffset(const ViewOffset& src) { - m_dim = src.m_dim; - m_tile_N0 = src.m_tile_N0; - m_tile_N1 = src.m_tile_N1; - m_tile_N2 = src.m_tile_N2; - m_tile_N3 = src.m_tile_N3; - m_tile_N4 = src.m_tile_N4; - m_tile_N5 = src.m_tile_N5; - m_tile_N6 = src.m_tile_N6; - m_tile_N7 = src.m_tile_N7; - } - KOKKOS_FUNCTION ViewOffset& operator=(const ViewOffset& src) { - m_dim = src.m_dim; - m_tile_N0 = src.m_tile_N0; - m_tile_N1 = src.m_tile_N1; - m_tile_N2 = src.m_tile_N2; - m_tile_N3 = src.m_tile_N3; - m_tile_N4 = src.m_tile_N4; - m_tile_N5 = src.m_tile_N5; - m_tile_N6 = src.m_tile_N6; - m_tile_N7 = src.m_tile_N7; - return *this; - } -#else - KOKKOS_DEFAULTED_FUNCTION ~ViewOffset() = default; - KOKKOS_DEFAULTED_FUNCTION ViewOffset() = default; - KOKKOS_DEFAULTED_FUNCTION ViewOffset(const ViewOffset&) = default; - KOKKOS_DEFAULTED_FUNCTION ViewOffset& operator=(const ViewOffset&) = default; -#endif - - template - KOKKOS_INLINE_FUNCTION constexpr ViewOffset( - std::integral_constant const&, - array_layout const arg_layout) - : m_dim(arg_layout.dimension[0], arg_layout.dimension[1], - arg_layout.dimension[2], arg_layout.dimension[3], - arg_layout.dimension[4], arg_layout.dimension[5], - arg_layout.dimension[6], arg_layout.dimension[7]), - m_tile_N0((arg_layout.dimension[0] + MASK_0) >> - SHIFT_0 /* number of tiles in first dimension */), - m_tile_N1((arg_layout.dimension[1] + MASK_1) >> SHIFT_1), - m_tile_N2((VORank > 2) ? (arg_layout.dimension[2] + MASK_2) >> SHIFT_2 - : 0), - m_tile_N3((VORank > 3) ? (arg_layout.dimension[3] + MASK_3) >> SHIFT_3 - : 0), - m_tile_N4((VORank > 4) ? (arg_layout.dimension[4] + MASK_4) >> SHIFT_4 - : 0), - m_tile_N5((VORank > 5) ? (arg_layout.dimension[5] + MASK_5) >> SHIFT_5 - : 0), - m_tile_N6((VORank > 6) ? (arg_layout.dimension[6] + MASK_6) >> SHIFT_6 - : 0), - m_tile_N7((VORank > 7) ? (arg_layout.dimension[7] + MASK_7) >> SHIFT_7 - : 0) {} -}; - -// FIXME Remove the out-of-class definitions when we require C++17 -#define KOKKOS_ITERATE_VIEW_OFFSET_ENABLE \ - std::enable_if_t<((Dimension::rank <= 8) && (Dimension::rank >= 2) && \ - is_array_layout::value && \ - is_array_layout_tiled::value)> -template -constexpr Kokkos::Iterate ViewOffset< - Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::outer_pattern; -template -constexpr Kokkos::Iterate ViewOffset< - Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::inner_pattern; -template -constexpr int - ViewOffset::VORank; -template -constexpr unsigned - ViewOffset::SHIFT_0; -template -constexpr unsigned - ViewOffset::SHIFT_1; -template -constexpr unsigned - ViewOffset::SHIFT_2; -template -constexpr unsigned - ViewOffset::SHIFT_3; -template -constexpr unsigned - ViewOffset::SHIFT_4; -template -constexpr unsigned - ViewOffset::SHIFT_5; -template -constexpr unsigned - ViewOffset::SHIFT_6; -template -constexpr unsigned - ViewOffset::SHIFT_7; -template -constexpr int - ViewOffset::MASK_0; -template -constexpr int - ViewOffset::MASK_1; -template -constexpr int - ViewOffset::MASK_2; -template -constexpr int - ViewOffset::MASK_3; -template -constexpr int - ViewOffset::MASK_4; -template -constexpr int - ViewOffset::MASK_5; -template -constexpr int - ViewOffset::MASK_6; -template -constexpr int - ViewOffset::MASK_7; -template -constexpr unsigned - ViewOffset::SHIFT_2T; -template -constexpr unsigned - ViewOffset::SHIFT_3T; -template -constexpr unsigned - ViewOffset::SHIFT_4T; -template -constexpr unsigned - ViewOffset::SHIFT_5T; -template -constexpr unsigned - ViewOffset::SHIFT_6T; -template -constexpr unsigned - ViewOffset::SHIFT_7T; -template -constexpr unsigned - ViewOffset::SHIFT_8T; -#undef KOKKOS_ITERATE_VIEW_OFFSET_ENABLE - -//---------------------------------------- - -// ViewMapping assign method needed in order to return a 'subview' tile as a -// proper View The outer iteration pattern determines the mapping of the pointer -// offset to the beginning of requested tile The inner iteration pattern is -// needed for the layout of the tile's View to be returned Rank 2 -template -class ViewMapping // void - , - Kokkos::ViewTraits< - T**, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>, - Kokkos::Experimental::LayoutTiled, - iType0, iType1> { - public: - using src_layout = - Kokkos::Experimental::LayoutTiled; - using src_traits = Kokkos::ViewTraits; - - static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); - static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = std::conditional_t; - using traits = Kokkos::ViewTraits; - using type = Kokkos::View; - - KOKKOS_INLINE_FUNCTION static void assign( - ViewMapping& dst, const ViewMapping& src, - const src_layout&, const iType0 i_tile0, const iType1 i_tile1) { - using dst_map_type = ViewMapping; - using src_map_type = ViewMapping; - using dst_handle_type = typename dst_map_type::handle_type; - using dst_offset_type = typename dst_map_type::offset_type; - using src_offset_type = typename src_map_type::offset_type; - - dst = dst_map_type( - dst_handle_type( - src.m_impl_handle + - (is_outer_left ? ((i_tile0 + src.m_impl_offset.m_tile_N0 * i_tile1) - << src_offset_type::SHIFT_2T) - : ((src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1) - << src_offset_type::SHIFT_2T)) // offset to start - // of the tile - ), - dst_offset_type()); - } -}; - -// Rank 3 -template -class ViewMapping // void - , - Kokkos::ViewTraits< - T***, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>, - Kokkos::Experimental::LayoutTiled, - iType0, iType1, iType2> { - public: - using src_layout = - Kokkos::Experimental::LayoutTiled; - using src_traits = Kokkos::ViewTraits; - - static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); - static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = std::conditional_t; - using traits = Kokkos::ViewTraits; - using type = Kokkos::View; - - KOKKOS_INLINE_FUNCTION static void assign( - ViewMapping& dst, const ViewMapping& src, - const src_layout&, const iType0 i_tile0, const iType1 i_tile1, - const iType2 i_tile2) { - using dst_map_type = ViewMapping; - using src_map_type = ViewMapping; - using dst_handle_type = typename dst_map_type::handle_type; - using dst_offset_type = typename dst_map_type::offset_type; - using src_offset_type = typename src_map_type::offset_type; - - dst = dst_map_type( - dst_handle_type( - src.m_impl_handle + - (is_outer_left - ? ((i_tile0 + - src.m_impl_offset.m_tile_N0 * - (i_tile1 + src.m_impl_offset.m_tile_N1 * i_tile2)) - << src_offset_type::SHIFT_3T) - : ((src.m_impl_offset.m_tile_N2 * - (src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1) + - i_tile2) - << src_offset_type::SHIFT_3T))) // offset to start of the - // tile - , - dst_offset_type()); - } -}; - -// Rank 4 -template -class ViewMapping< - std::enable_if_t<(N4 == 0 && N5 == 0 && N6 == 0 && N7 == 0)> // void - , - Kokkos::ViewTraits< - T****, - Kokkos::Experimental::LayoutTiled, - P...>, - Kokkos::Experimental::LayoutTiled, - iType0, iType1, iType2, iType3> { - public: - using src_layout = - Kokkos::Experimental::LayoutTiled; - using src_traits = Kokkos::ViewTraits; - - static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); - static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = std::conditional_t; - using traits = Kokkos::ViewTraits; - using type = Kokkos::View; - - KOKKOS_INLINE_FUNCTION static void assign( - ViewMapping& dst, const ViewMapping& src, - const src_layout&, const iType0 i_tile0, const iType1 i_tile1, - const iType2 i_tile2, const iType3 i_tile3) { - using dst_map_type = ViewMapping; - using src_map_type = ViewMapping; - using dst_handle_type = typename dst_map_type::handle_type; - using dst_offset_type = typename dst_map_type::offset_type; - using src_offset_type = typename src_map_type::offset_type; - - dst = dst_map_type( - dst_handle_type( - src.m_impl_handle + - (is_outer_left - ? ((i_tile0 + - src.m_impl_offset.m_tile_N0 * - (i_tile1 + src.m_impl_offset.m_tile_N1 * - (i_tile2 + src.m_impl_offset.m_tile_N2 * - i_tile3))) - << src_offset_type::SHIFT_4T) - : ((src.m_impl_offset.m_tile_N3 * - (src.m_impl_offset.m_tile_N2 * - (src.m_impl_offset.m_tile_N1 * i_tile0 + - i_tile1) + - i_tile2) + - i_tile3) - << src_offset_type::SHIFT_4T))) // offset to start of the - // tile - , - dst_offset_type()); - } -}; - -// Rank 5 -template -class ViewMapping // void - , - Kokkos::ViewTraits< - T*****, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>, - Kokkos::Experimental::LayoutTiled, - iType0, iType1, iType2, iType3, iType4> { - public: - using src_layout = - Kokkos::Experimental::LayoutTiled; - using src_traits = Kokkos::ViewTraits; - - static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); - static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = std::conditional_t; - using traits = Kokkos::ViewTraits; - using type = Kokkos::View; - - KOKKOS_INLINE_FUNCTION static void assign( - ViewMapping& dst, const ViewMapping& src, - const src_layout&, const iType0 i_tile0, const iType1 i_tile1, - const iType2 i_tile2, const iType3 i_tile3, const iType4 i_tile4) { - using dst_map_type = ViewMapping; - using src_map_type = ViewMapping; - using dst_handle_type = typename dst_map_type::handle_type; - using dst_offset_type = typename dst_map_type::offset_type; - using src_offset_type = typename src_map_type::offset_type; - - dst = dst_map_type( - dst_handle_type( - src.m_impl_handle + - (is_outer_left - ? ((i_tile0 + - src.m_impl_offset.m_tile_N0 * - (i_tile1 + - src.m_impl_offset.m_tile_N1 * - (i_tile2 + - src.m_impl_offset.m_tile_N2 * - (i_tile3 + - src.m_impl_offset.m_tile_N3 * i_tile4)))) - << src_offset_type::SHIFT_5T) - : ((src.m_impl_offset.m_tile_N4 * - (src.m_impl_offset.m_tile_N3 * - (src.m_impl_offset.m_tile_N2 * - (src.m_impl_offset.m_tile_N1 * i_tile0 + - i_tile1) + - i_tile2) + - i_tile3) + - i_tile4) - << src_offset_type::SHIFT_5T))) // offset to start of the - // tile - , - dst_offset_type()); - } -}; - -// Rank 6 -template -class ViewMapping // void - , - Kokkos::ViewTraits< - T******, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>, - Kokkos::Experimental::LayoutTiled, - iType0, iType1, iType2, iType3, iType4, iType5> { - public: - using src_layout = - Kokkos::Experimental::LayoutTiled; - using src_traits = Kokkos::ViewTraits; - - static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); - static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = std::conditional_t; - using traits = - Kokkos::ViewTraits; - using type = Kokkos::View; - - KOKKOS_INLINE_FUNCTION static void assign( - ViewMapping& dst, const ViewMapping& src, - const src_layout&, const iType0 i_tile0, const iType1 i_tile1, - const iType2 i_tile2, const iType3 i_tile3, const iType4 i_tile4, - const iType5 i_tile5) { - using dst_map_type = ViewMapping; - using src_map_type = ViewMapping; - using dst_handle_type = typename dst_map_type::handle_type; - using dst_offset_type = typename dst_map_type::offset_type; - using src_offset_type = typename src_map_type::offset_type; - - dst = dst_map_type( - dst_handle_type( - src.m_impl_handle + - (is_outer_left - ? ((i_tile0 + - src.m_impl_offset.m_tile_N0 * - (i_tile1 + - src.m_impl_offset.m_tile_N1 * - (i_tile2 + - src.m_impl_offset.m_tile_N2 * - (i_tile3 + - src.m_impl_offset.m_tile_N3 * - (i_tile4 + src.m_impl_offset.m_tile_N4 * - i_tile5))))) - << src_offset_type::SHIFT_6T) - : ((src.m_impl_offset.m_tile_N5 * - (src.m_impl_offset.m_tile_N4 * - (src.m_impl_offset.m_tile_N3 * - (src.m_impl_offset.m_tile_N2 * - (src.m_impl_offset.m_tile_N1 * i_tile0 + - i_tile1) + - i_tile2) + - i_tile3) + - i_tile4) + - i_tile5) - << src_offset_type::SHIFT_6T))) // offset to start of the - // tile - , - dst_offset_type()); - } -}; - -// Rank 7 -template -class ViewMapping // void - , - Kokkos::ViewTraits< - T*******, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>, - Kokkos::Experimental::LayoutTiled, - iType0, iType1, iType2, iType3, iType4, iType5, iType6> { - public: - using src_layout = - Kokkos::Experimental::LayoutTiled; - using src_traits = Kokkos::ViewTraits; - - static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); - static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = std::conditional_t; - using traits = - Kokkos::ViewTraits; - using type = Kokkos::View; - - KOKKOS_INLINE_FUNCTION static void assign( - ViewMapping& dst, const ViewMapping& src, - const src_layout&, const iType0 i_tile0, const iType1 i_tile1, - const iType2 i_tile2, const iType3 i_tile3, const iType4 i_tile4, - const iType5 i_tile5, const iType6 i_tile6) { - using dst_map_type = ViewMapping; - using src_map_type = ViewMapping; - using dst_handle_type = typename dst_map_type::handle_type; - using dst_offset_type = typename dst_map_type::offset_type; - using src_offset_type = typename src_map_type::offset_type; - - dst = dst_map_type( - dst_handle_type( - src.m_impl_handle + - (is_outer_left - ? ((i_tile0 + - src.m_impl_offset.m_tile_N0 * - (i_tile1 + - src.m_impl_offset.m_tile_N1 * - (i_tile2 + - src.m_impl_offset.m_tile_N2 * - (i_tile3 + - src.m_impl_offset.m_tile_N3 * - (i_tile4 + - src.m_impl_offset.m_tile_N4 * - (i_tile5 + - src.m_impl_offset.m_tile_N5 * - i_tile6)))))) - << src_offset_type::SHIFT_7T) - : ((src.m_impl_offset.m_tile_N6 * - (src.m_impl_offset.m_tile_N5 * - (src.m_impl_offset.m_tile_N4 * - (src.m_impl_offset.m_tile_N3 * - (src.m_impl_offset.m_tile_N2 * - (src.m_impl_offset.m_tile_N1 * - i_tile0 + - i_tile1) + - i_tile2) + - i_tile3) + - i_tile4) + - i_tile5) + - i_tile6) - << src_offset_type::SHIFT_7T))) // offset to start of the - // tile - , - dst_offset_type()); - } -}; - -// Rank 8 -template -class ViewMapping< - std::enable_if_t<(N0 != 0 && N1 != 0 && N2 != 0 && N3 != 0 && N4 != 0 && - N5 != 0 && N6 != 0 && N7 != 0)> // void - , - Kokkos::ViewTraits< - T********, - Kokkos::Experimental::LayoutTiled, - P...>, - Kokkos::Experimental::LayoutTiled, - iType0, iType1, iType2, iType3, iType4, iType5, iType6, iType7> { - public: - using src_layout = - Kokkos::Experimental::LayoutTiled; - using src_traits = Kokkos::ViewTraits; - - static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); - static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = std::conditional_t; - using traits = - Kokkos::ViewTraits; - using type = - Kokkos::View; - - KOKKOS_INLINE_FUNCTION static void assign( - ViewMapping& dst, const ViewMapping& src, - const src_layout&, const iType0 i_tile0, const iType1 i_tile1, - const iType2 i_tile2, const iType3 i_tile3, const iType4 i_tile4, - const iType5 i_tile5, const iType6 i_tile6, const iType7 i_tile7) { - using dst_map_type = ViewMapping; - using src_map_type = ViewMapping; - using dst_handle_type = typename dst_map_type::handle_type; - using dst_offset_type = typename dst_map_type::offset_type; - using src_offset_type = typename src_map_type::offset_type; - - dst = dst_map_type( - dst_handle_type( - src.m_impl_handle + - (is_outer_left - ? ((i_tile0 + - src.m_impl_offset.m_tile_N0 * - (i_tile1 + - src.m_impl_offset.m_tile_N1 * - (i_tile2 + - src.m_impl_offset.m_tile_N2 * - (i_tile3 + - src.m_impl_offset.m_tile_N3 * - (i_tile4 + - src.m_impl_offset.m_tile_N4 * - (i_tile5 + - src.m_impl_offset.m_tile_N5 * - (i_tile6 + - src.m_impl_offset.m_tile_N6 * - i_tile7))))))) - << src_offset_type::SHIFT_8T) - : ((src.m_impl_offset.m_tile_N7 * - (src.m_impl_offset.m_tile_N6 * - (src.m_impl_offset.m_tile_N5 * - (src.m_impl_offset.m_tile_N4 * - (src.m_impl_offset.m_tile_N3 * - (src.m_impl_offset.m_tile_N2 * - (src.m_impl_offset.m_tile_N1 * - i_tile0 + - i_tile1) + - i_tile2) + - i_tile3) + - i_tile4) + - i_tile5) + - i_tile6) + - i_tile7) - << src_offset_type::SHIFT_8T))) // offset to start of the - // tile - , - dst_offset_type()); - } -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------- - -namespace Kokkos { - -// Rank 2 -template -KOKKOS_INLINE_FUNCTION - Kokkos::View, - P...> - tile_subview(const Kokkos::View< - T**, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1) { - // Force the specialized ViewMapping for extracting a tile - // by using the first subview argument as the layout. - using array_layout = - std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, - Kokkos::LayoutRight>; - using SrcLayout = - Kokkos::Experimental::LayoutTiled; - - return Kokkos::View(src, SrcLayout(), i_tile0, - i_tile1); -} - -// Rank 3 -template -KOKKOS_INLINE_FUNCTION - Kokkos::View, - P...> - tile_subview(const Kokkos::View< - T***, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, - const size_t i_tile2) { - // Force the specialized ViewMapping for extracting a tile - // by using the first subview argument as the layout. - using array_layout = - std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, - Kokkos::LayoutRight>; - using SrcLayout = - Kokkos::Experimental::LayoutTiled; - - return Kokkos::View( - src, SrcLayout(), i_tile0, i_tile1, i_tile2); -} - -// Rank 4 -template -KOKKOS_INLINE_FUNCTION - Kokkos::View, - P...> - tile_subview(const Kokkos::View< - T****, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, - const size_t i_tile2, const size_t i_tile3) { - // Force the specialized ViewMapping for extracting a tile - // by using the first subview argument as the layout. - using array_layout = - std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, - Kokkos::LayoutRight>; - using SrcLayout = - Kokkos::Experimental::LayoutTiled; - - return Kokkos::View( - src, SrcLayout(), i_tile0, i_tile1, i_tile2, i_tile3); -} - -// Rank 5 -template -KOKKOS_INLINE_FUNCTION - Kokkos::View, - P...> - tile_subview(const Kokkos::View< - T*****, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, - const size_t i_tile2, const size_t i_tile3, - const size_t i_tile4) { - // Force the specialized ViewMapping for extracting a tile - // by using the first subview argument as the layout. - using array_layout = - std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, - Kokkos::LayoutRight>; - using SrcLayout = - Kokkos::Experimental::LayoutTiled; - - return Kokkos::View( - src, SrcLayout(), i_tile0, i_tile1, i_tile2, i_tile3, i_tile4); -} - -// Rank 6 -template -KOKKOS_INLINE_FUNCTION - Kokkos::View, - P...> - tile_subview(const Kokkos::View< - T******, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, - const size_t i_tile2, const size_t i_tile3, - const size_t i_tile4, const size_t i_tile5) { - // Force the specialized ViewMapping for extracting a tile - // by using the first subview argument as the layout. - using array_layout = - std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, - Kokkos::LayoutRight>; - using SrcLayout = - Kokkos::Experimental::LayoutTiled; - - return Kokkos::View( - src, SrcLayout(), i_tile0, i_tile1, i_tile2, i_tile3, i_tile4, i_tile5); -} - -// Rank 7 -template -KOKKOS_INLINE_FUNCTION - Kokkos::View, - P...> - tile_subview(const Kokkos::View< - T*******, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, - const size_t i_tile2, const size_t i_tile3, - const size_t i_tile4, const size_t i_tile5, - const size_t i_tile6) { - // Force the specialized ViewMapping for extracting a tile - // by using the first subview argument as the layout. - using array_layout = - std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, - Kokkos::LayoutRight>; - using SrcLayout = - Kokkos::Experimental::LayoutTiled; - - return Kokkos::View( - src, SrcLayout(), i_tile0, i_tile1, i_tile2, i_tile3, i_tile4, i_tile5, - i_tile6); -} - -// Rank 8 -template -KOKKOS_INLINE_FUNCTION - Kokkos::View, - P...> - tile_subview(const Kokkos::View< - T********, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, - const size_t i_tile2, const size_t i_tile3, - const size_t i_tile4, const size_t i_tile5, - const size_t i_tile6, const size_t i_tile7) { - // Force the specialized ViewMapping for extracting a tile - // by using the first subview argument as the layout. - using array_layout = - std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, - Kokkos::LayoutRight>; - using SrcLayout = - Kokkos::Experimental::LayoutTiled; - - return Kokkos::View( - src, SrcLayout(), i_tile0, i_tile1, i_tile2, i_tile3, i_tile4, i_tile5, - i_tile6, i_tile7); -} - -} /* namespace Kokkos */ -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_EXPERIENTAL_VIEWLAYOUTTILE_HPP */ diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp index 3217c76e3801..8919dccdb7a4 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp @@ -17,6 +17,7 @@ #ifndef KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP #define KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP +#include #include #include @@ -34,6 +35,7 @@ #include #include #include +#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -647,34 +649,60 @@ struct ViewOffset< m_dim.N5 * m_dim.N6; } - // Stride with [ rank ] value is the total length + // Fill the target unbounded array s with the stride. + // This method differs from stride() in that it does not write the total + // length to the last index of the array. Preconditions: s must be an array of + // dimension_type::rank elements + // FIXME: The version of clang-format in CI fails from maybe_unused + // clang-format off template - KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { - s[0] = 1; - if (0 < dimension_type::rank) { - s[1] = m_dim.N0; + KOKKOS_INLINE_FUNCTION iType + stride_fill([[maybe_unused]] iType* const s) const { + iType n = 1; + if constexpr (0 < dimension_type::rank) { + s[0] = n; + n *= m_dim.N0; } - if (1 < dimension_type::rank) { - s[2] = s[1] * m_dim.N1; + if constexpr (1 < dimension_type::rank) { + s[1] = n; + n *= m_dim.N1; } - if (2 < dimension_type::rank) { - s[3] = s[2] * m_dim.N2; + if constexpr (2 < dimension_type::rank) { + s[2] = n; + n *= m_dim.N2; } - if (3 < dimension_type::rank) { - s[4] = s[3] * m_dim.N3; + if constexpr (3 < dimension_type::rank) { + s[3] = n; + n *= m_dim.N3; } - if (4 < dimension_type::rank) { - s[5] = s[4] * m_dim.N4; + if constexpr (4 < dimension_type::rank) { + s[4] = n; + n *= m_dim.N4; } - if (5 < dimension_type::rank) { - s[6] = s[5] * m_dim.N5; + if constexpr (5 < dimension_type::rank) { + s[5] = n; + n *= m_dim.N5; } - if (6 < dimension_type::rank) { - s[7] = s[6] * m_dim.N6; + if constexpr (6 < dimension_type::rank) { + s[6] = n; + n *= m_dim.N6; } - if (7 < dimension_type::rank) { - s[8] = s[7] * m_dim.N7; + if constexpr (7 < dimension_type::rank) { + s[7] = n; + n *= m_dim.N7; } + return n; + } + // clang-format on + + // Fill the target unbounded array s with the stride and the total spanned + // size. This method differs from stride_fill() in that it writes the total + // spanned size to the last index of the array. Preconditions: s must be an + // array of dimension_type::rank + 1 elements Stride with [ rank ] value is + // the total length + template + KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { + s[dimension_type::rank] = stride_fill(s); } //---------------------------------------- @@ -935,34 +963,59 @@ struct ViewOffset< m_dim.N6; } - // Stride with [ rank ] value is the total length + // Fill the target unbounded array s with the stride. + // This method differs from stride() in that it does not write the total + // length to the last index of the array. Preconditions: s must be an array of + // dimension_type::rank elements + // The version of clang-format in CI fails from maybe_unused + // clang-format off template - KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { - s[0] = 1; - if (0 < dimension_type::rank) { - s[1] = m_stride; + KOKKOS_INLINE_FUNCTION iType + stride_fill([[maybe_unused]] iType* const s) const { + iType n = 1; + if constexpr (0 < dimension_type::rank) { + s[0] = n; + n *= m_stride; } - if (1 < dimension_type::rank) { - s[2] = s[1] * m_dim.N1; + if constexpr (1 < dimension_type::rank) { + s[1] = n; + n *= m_dim.N1; } - if (2 < dimension_type::rank) { - s[3] = s[2] * m_dim.N2; + if constexpr (2 < dimension_type::rank) { + s[2] = n; + n *= m_dim.N2; } - if (3 < dimension_type::rank) { - s[4] = s[3] * m_dim.N3; + if constexpr (3 < dimension_type::rank) { + s[3] = n; + n *= m_dim.N3; } - if (4 < dimension_type::rank) { - s[5] = s[4] * m_dim.N4; + if constexpr (4 < dimension_type::rank) { + s[4] = n; + n *= m_dim.N4; } - if (5 < dimension_type::rank) { - s[6] = s[5] * m_dim.N5; + if constexpr (5 < dimension_type::rank) { + s[5] = n; + n *= m_dim.N5; } - if (6 < dimension_type::rank) { - s[7] = s[6] * m_dim.N6; + if constexpr (6 < dimension_type::rank) { + s[6] = n; + n *= m_dim.N6; } - if (7 < dimension_type::rank) { - s[8] = s[7] * m_dim.N7; + if constexpr (7 < dimension_type::rank) { + s[7] = n; + n *= m_dim.N7; } + return n; + } + // clang-format on + + // Fill the target unbounded array s with the stride and the total spanned + // size. This method differs from stride_fill() in that it writes the total + // spanned size to the last index of the array. Preconditions: s must be an + // array of dimension_type::rank + 1 elements + template + KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { + s[dimension_type::rank] = stride_fill(s); } //---------------------------------------- @@ -1286,42 +1339,58 @@ struct ViewOffset< m_dim.N1; } - // Stride with [ rank ] value is the total length + // Fill the target unbounded array s with the stride. + // This method differs from stride() in that it does not write the total + // length to the last index of the array. Preconditions: s must be an array of + // dimension_type::rank elements + // The version of clang-format in CI fails from maybe_unused + // clang-format off template - KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { + KOKKOS_INLINE_FUNCTION iType + stride_fill([[maybe_unused]] iType* const s) const { size_type n = 1; - if (7 < dimension_type::rank) { + if constexpr (7 < dimension_type::rank) { s[7] = n; n *= m_dim.N7; } - if (6 < dimension_type::rank) { + if constexpr (6 < dimension_type::rank) { s[6] = n; n *= m_dim.N6; } - if (5 < dimension_type::rank) { + if constexpr (5 < dimension_type::rank) { s[5] = n; n *= m_dim.N5; } - if (4 < dimension_type::rank) { + if constexpr (4 < dimension_type::rank) { s[4] = n; n *= m_dim.N4; } - if (3 < dimension_type::rank) { + if constexpr (3 < dimension_type::rank) { s[3] = n; n *= m_dim.N3; } - if (2 < dimension_type::rank) { + if constexpr (2 < dimension_type::rank) { s[2] = n; n *= m_dim.N2; } - if (1 < dimension_type::rank) { + if constexpr (1 < dimension_type::rank) { s[1] = n; n *= m_dim.N1; } - if (0 < dimension_type::rank) { + if constexpr (0 < dimension_type::rank) { s[0] = n; } - s[dimension_type::rank] = n * m_dim.N0; + return n * m_dim.N0; + } + // clang-format on + + // Fill the target unbounded array s with the stride and the total spanned + // size. This method differs from stride_fill() in that it writes the total + // spanned size to the last index of the array. Preconditions: s must be an + // array of dimension_type::rank + 1 elements + template + KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { + s[dimension_type::rank] = stride_fill(s); } //---------------------------------------- @@ -1573,41 +1642,57 @@ struct ViewOffset< return m_stride; } - // Stride with [ rank ] value is the total length + // Fill the target unbounded array s with the stride. + // This method differs from stride() in that it does not write the total + // length to the last index of the array. Preconditions: s must be an array of + // dimension_type::rank elements + // The version of clang-format in CI fails from maybe_unused + // clang-format off template - KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { + KOKKOS_INLINE_FUNCTION iType + stride_fill([[maybe_unused]] iType* const s) const { size_type n = 1; - if (7 < dimension_type::rank) { + if constexpr (7 < dimension_type::rank) { s[7] = n; n *= m_dim.N7; } - if (6 < dimension_type::rank) { + if constexpr (6 < dimension_type::rank) { s[6] = n; n *= m_dim.N6; } - if (5 < dimension_type::rank) { + if constexpr (5 < dimension_type::rank) { s[5] = n; n *= m_dim.N5; } - if (4 < dimension_type::rank) { + if constexpr (4 < dimension_type::rank) { s[4] = n; n *= m_dim.N4; } - if (3 < dimension_type::rank) { + if constexpr (3 < dimension_type::rank) { s[3] = n; n *= m_dim.N3; } - if (2 < dimension_type::rank) { + if constexpr (2 < dimension_type::rank) { s[2] = n; n *= m_dim.N2; } - if (1 < dimension_type::rank) { + if constexpr (1 < dimension_type::rank) { s[1] = n; } - if (0 < dimension_type::rank) { + if constexpr (0 < dimension_type::rank) { s[0] = m_stride; } - s[dimension_type::rank] = m_stride * m_dim.N0; + return m_stride * m_dim.N0; + } + // clang-format on + + // Fill the target unbounded array s with the stride and the total spanned + // size. This method differs from stride_fill() in that it writes the total + // spanned size to the last index of the array. Preconditions: s must be an + // array of dimension_type::rank + 1 elements + template + KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { + s[dimension_type::rank] = stride_fill(s); } //---------------------------------------- @@ -2133,34 +2218,50 @@ struct ViewOffset { return m_stride.S7; } - // Stride with [ rank ] value is the total length + // Fill the target unbounded array s with the stride. + // This method differs from stride() in that it does not write the total + // length to the last index of the array. Preconditions: s must be an array of + // dimension_type::rank elements + // The version of clang-format in CI fails from maybe_unused + // clang-format off template - KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { - if (0 < dimension_type::rank) { + KOKKOS_INLINE_FUNCTION iType + stride_fill([[maybe_unused]] iType* const s) const { + if constexpr (0 < dimension_type::rank) { s[0] = m_stride.S0; } - if (1 < dimension_type::rank) { + if constexpr (1 < dimension_type::rank) { s[1] = m_stride.S1; } - if (2 < dimension_type::rank) { + if constexpr (2 < dimension_type::rank) { s[2] = m_stride.S2; } - if (3 < dimension_type::rank) { + if constexpr (3 < dimension_type::rank) { s[3] = m_stride.S3; } - if (4 < dimension_type::rank) { + if constexpr (4 < dimension_type::rank) { s[4] = m_stride.S4; } - if (5 < dimension_type::rank) { + if constexpr (5 < dimension_type::rank) { s[5] = m_stride.S5; } - if (6 < dimension_type::rank) { + if constexpr (6 < dimension_type::rank) { s[6] = m_stride.S6; } - if (7 < dimension_type::rank) { + if constexpr (7 < dimension_type::rank) { s[7] = m_stride.S7; } - s[dimension_type::rank] = span(); + return span(); + } + // clang-format on + + // Fill the target unbounded array s with the stride and the total spanned + // size. This method differs from stride_fill() in that it writes the total + // spanned size to the last index of the array. Preconditions: s must be an + // array of dimension_type::rank + 1 elements + template + KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { + s[dimension_type::rank] = stride_fill(s); } //---------------------------------------- @@ -2428,288 +2529,6 @@ struct ViewDataHandle< namespace Kokkos { namespace Impl { - -template -inline bool is_zero_byte(const T& t) { - using comparison_type = std::conditional_t< - sizeof(T) % sizeof(long long int) == 0, long long int, - std::conditional_t< - sizeof(T) % sizeof(long int) == 0, long int, - std::conditional_t< - sizeof(T) % sizeof(int) == 0, int, - std::conditional_t>>>; - const auto* const ptr = reinterpret_cast(&t); - for (std::size_t i = 0; i < sizeof(T) / sizeof(comparison_type); ++i) - if (ptr[i] != 0) return false; - return true; -} - -//---------------------------------------------------------------------------- - -/* - * The construction, assignment to default, and destruction - * are merged into a single functor. - * Primarily to work around an unresolved CUDA back-end bug - * that would lose the destruction cuda device function when - * called from the shared memory tracking destruction. - * Secondarily to have two fewer partial specializations. - */ -template ::value> -struct ViewValueFunctor; - -template -struct ViewValueFunctor { - using ExecSpace = typename DeviceType::execution_space; - - struct DestroyTag {}; - struct ConstructTag {}; - - ExecSpace space; - ValueType* ptr; - size_t n; - std::string name; - bool default_exec_space; - - template - KOKKOS_INLINE_FUNCTION - std::enable_if_t::value> - operator()(ConstructTag const&, const size_t i) const { - new (ptr + i) ValueType(); - } - - KOKKOS_INLINE_FUNCTION void operator()(DestroyTag const&, - const size_t i) const { - (ptr + i)->~ValueType(); - } - - ViewValueFunctor() = default; - ViewValueFunctor(const ViewValueFunctor&) = default; - ViewValueFunctor& operator=(const ViewValueFunctor&) = default; - - ViewValueFunctor(ExecSpace const& arg_space, ValueType* const arg_ptr, - size_t const arg_n, std::string arg_name) - : space(arg_space), - ptr(arg_ptr), - n(arg_n), - name(std::move(arg_name)), - default_exec_space(false) { - functor_instantiate_workaround(); - } - - ViewValueFunctor(ValueType* const arg_ptr, size_t const arg_n, - std::string arg_name) - : space(ExecSpace{}), - ptr(arg_ptr), - n(arg_n), - name(std::move(arg_name)), - default_exec_space(true) { - functor_instantiate_workaround(); - } - - template - std::enable_if_t::value && - std::is_trivially_copy_assignable::value> - construct_dispatch() { - ValueType value{}; -// On A64FX memset seems to do the wrong thing with regards to first touch -// leading to the significant performance issues -#ifndef KOKKOS_ARCH_A64FX - if (Impl::is_zero_byte(value)) { - uint64_t kpID = 0; - if (Kokkos::Profiling::profileLibraryLoaded()) { - // We are not really using parallel_for here but using beginParallelFor - // instead of begin_parallel_for (and adding "via memset") is the best - // we can do to indicate that this is not supposed to be tunable (and - // doesn't really execute a parallel_for). - Kokkos::Profiling::beginParallelFor( - "Kokkos::View::initialization [" + name + "] via memset", - Kokkos::Profiling::Experimental::device_id(space), &kpID); - } - (void)ZeroMemset( - space, Kokkos::View>(ptr, n)); - - if (Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::endParallelFor(kpID); - } - if (default_exec_space) - space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); - } else { -#endif - parallel_for_implementation(); -#ifndef KOKKOS_ARCH_A64FX - } -#endif - } - - template - std::enable_if_t::value && - std::is_trivially_copy_assignable::value)> - construct_dispatch() { - parallel_for_implementation(); - } - - template - void parallel_for_implementation() { - using PolicyType = - Kokkos::RangePolicy, Tag>; - PolicyType policy(space, 0, n); - uint64_t kpID = 0; - if (Kokkos::Profiling::profileLibraryLoaded()) { - const std::string functor_name = - (std::is_same_v - ? "Kokkos::View::destruction [" + name + "]" - : "Kokkos::View::initialization [" + name + "]"); - Kokkos::Profiling::beginParallelFor( - functor_name, Kokkos::Profiling::Experimental::device_id(space), - &kpID); - } - -#ifdef KOKKOS_ENABLE_CUDA - if (std::is_same::value) { - Kokkos::Impl::cuda_prefetch_pointer(space, ptr, sizeof(ValueType) * n, - true); - } -#endif - const Kokkos::Impl::ParallelFor closure( - *this, policy); - closure.execute(); - if (default_exec_space || std::is_same_v) - space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); - if (Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::endParallelFor(kpID); - } - } - - void construct_shared_allocation() { construct_dispatch(); } - - void destroy_shared_allocation() { - parallel_for_implementation(); - } - - // This function is to ensure that the functor with DestroyTag is instantiated - // This is a workaround to avoid "cudaErrorInvalidDeviceFunction" error later - // when the function is queried with cudaFuncGetAttributes - void functor_instantiate_workaround() { -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \ - defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET) - if (false) { - parallel_for_implementation(); - } -#endif - } -}; - -template -struct ViewValueFunctor { - using ExecSpace = typename DeviceType::execution_space; - using PolicyType = Kokkos::RangePolicy>; - - ExecSpace space; - ValueType* ptr; - size_t n; - std::string name; - bool default_exec_space; - - KOKKOS_INLINE_FUNCTION - void operator()(const size_t i) const { ptr[i] = ValueType(); } - - ViewValueFunctor() = default; - ViewValueFunctor(const ViewValueFunctor&) = default; - ViewValueFunctor& operator=(const ViewValueFunctor&) = default; - - ViewValueFunctor(ExecSpace const& arg_space, ValueType* const arg_ptr, - size_t const arg_n, std::string arg_name) - : space(arg_space), - ptr(arg_ptr), - n(arg_n), - name(std::move(arg_name)), - default_exec_space(false) {} - - ViewValueFunctor(ValueType* const arg_ptr, size_t const arg_n, - std::string arg_name) - : space(ExecSpace{}), - ptr(arg_ptr), - n(arg_n), - name(std::move(arg_name)), - default_exec_space(true) {} - - template - std::enable_if_t::value && - std::is_trivially_copy_assignable::value> - construct_shared_allocation() { - // Shortcut for zero initialization -// On A64FX memset seems to do the wrong thing with regards to first touch -// leading to the significant performance issues -#ifndef KOKKOS_ARCH_A64FX - ValueType value{}; - if (Impl::is_zero_byte(value)) { - uint64_t kpID = 0; - if (Kokkos::Profiling::profileLibraryLoaded()) { - // We are not really using parallel_for here but using beginParallelFor - // instead of begin_parallel_for (and adding "via memset") is the best - // we can do to indicate that this is not supposed to be tunable (and - // doesn't really execute a parallel_for). - Kokkos::Profiling::beginParallelFor( - "Kokkos::View::initialization [" + name + "] via memset", - Kokkos::Profiling::Experimental::device_id(space), &kpID); - } - - (void)ZeroMemset( - space, Kokkos::View>(ptr, n)); - - if (Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::endParallelFor(kpID); - } - if (default_exec_space) - space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); - } else { -#endif - parallel_for_implementation(); -#ifndef KOKKOS_ARCH_A64FX - } -#endif - } - - template - std::enable_if_t::value && - std::is_trivially_copy_assignable::value)> - construct_shared_allocation() { - parallel_for_implementation(); - } - - void parallel_for_implementation() { - PolicyType policy(0, n); - uint64_t kpID = 0; - if (Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::beginParallelFor( - "Kokkos::View::initialization [" + name + "]", - Kokkos::Profiling::Experimental::device_id(space), &kpID); - } -#ifdef KOKKOS_ENABLE_CUDA - if (std::is_same::value) { - Kokkos::Impl::cuda_prefetch_pointer(space, ptr, sizeof(ValueType) * n, - true); - } -#endif - const Kokkos::Impl::ParallelFor closure( - *this, PolicyType(0, n)); - closure.execute(); - if (default_exec_space) - space.fence( - "Kokkos::Impl::ViewValueFunctor: Fence after setting values in " - "view"); - if (Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::endParallelFor(kpID); - } - } - - void destroy_shared_allocation() {} -}; - //---------------------------------------------------------------------------- /** \brief View mapping for non-specialized data type and standard layout */ template @@ -2814,11 +2633,24 @@ class ViewMapping< return m_impl_offset.stride_7(); } + // Fill the target unbounded array s with the stride and the total spanned + // size. This method differs from stride_fill() in that it writes the total + // spanned size to the last index of the array. Preconditions: s must be an + // array of dimension_type::rank + 1 elements template KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { m_impl_offset.stride(s); } + // Fill the target unbounded array s with the stride. + // This method differs from stride() in that it does not write the total + // length to the last index of the array. Preconditions: s must be an array of + // dimension_type::rank elements + template + KOKKOS_INLINE_FUNCTION iType stride_fill(iType* const s) const { + return m_impl_offset.stride_fill(s); + } + //---------------------------------------- // Range span @@ -3360,7 +3192,7 @@ struct SubViewDataTypeImpl> { }; /* for integral args, subview doesn't have that dimension */ -template struct SubViewDataTypeImpl< std::enable_if_t>::value>, @@ -3369,7 +3201,7 @@ struct SubViewDataTypeImpl< Kokkos::Experimental::Extents, Args...> {}; /* for ALL slice, subview has the same dimension */ -template +template struct SubViewDataTypeImpl, Kokkos::ALL_t, Args...> @@ -3380,7 +3212,7 @@ struct SubViewDataTypeImpl struct SubViewDataTypeImpl< std::enable_if_t::value>, ValueType, diff --git a/packages/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp b/packages/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp index 1130485e841d..b2faccc52709 100644 --- a/packages/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp +++ b/packages/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp @@ -56,6 +56,8 @@ #define KOKKOS_LAMBDA [=] __host__ __device__ #define KOKKOS_CLASS_LAMBDA [ =, *this ] __host__ __device__ +#define KOKKOS_DEDUCTION_GUIDE __host__ __device__ + #define KOKKOS_IMPL_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__ #define KOKKOS_IMPL_FORCEINLINE __forceinline__ #define KOKKOS_IMPL_INLINE_FUNCTION __device__ __host__ inline diff --git a/packages/kokkos/core/src/setup/Kokkos_Setup_HIP.hpp b/packages/kokkos/core/src/setup/Kokkos_Setup_HIP.hpp index 7b0186610707..a3c5000b338b 100644 --- a/packages/kokkos/core/src/setup/Kokkos_Setup_HIP.hpp +++ b/packages/kokkos/core/src/setup/Kokkos_Setup_HIP.hpp @@ -27,6 +27,8 @@ #define KOKKOS_LAMBDA [=] __host__ __device__ #define KOKKOS_CLASS_LAMBDA [ =, *this ] __host__ __device__ +#define KOKKOS_DEDUCTION_GUIDE __host__ __device__ + #define KOKKOS_IMPL_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__ #define KOKKOS_IMPL_INLINE_FUNCTION __device__ __host__ inline #define KOKKOS_IMPL_FUNCTION __device__ __host__ diff --git a/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp b/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp index 30f6fa2ad23f..b117d75acb95 100644 --- a/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp +++ b/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp @@ -45,4 +45,21 @@ #define KOKKOS_IMPL_SYCL_GET_MULTI_PTR(accessor) accessor.get_pointer() #endif +// FIXME_SYCL Use type directly once it has stabilized in SYCL. +namespace Kokkos::Impl { +#ifndef SYCL_EXT_INTEL_USM_ADDRESS_SPACES +#error SYCL_EXT_INTEL_USM_ADDRESS_SPACES undefined! +#elif SYCL_EXT_INTEL_USM_ADDRESS_SPACES >= 2 +template +using sycl_device_ptr = sycl::ext::intel::device_ptr; +template +using sycl_host_ptr = sycl::ext::intel::host_ptr; +#else +template +using sycl_device_ptr = sycl::device_ptr; +template +using sycl_host_ptr = sycl::host_ptr; +#endif +} // namespace Kokkos::Impl + #endif diff --git a/packages/kokkos/core/unit_test/CMakeLists.txt b/packages/kokkos/core/unit_test/CMakeLists.txt index 6dfb7505c5d4..f82158187278 100644 --- a/packages/kokkos/core/unit_test/CMakeLists.txt +++ b/packages/kokkos/core/unit_test/CMakeLists.txt @@ -93,6 +93,9 @@ SET(COMPILE_ONLY_SOURCES TestViewTypeTraits.cpp TestTypeList.cpp TestMDRangePolicyCTAD.cpp + TestTeamPolicyCTAD.cpp + TestTeamMDRangePolicyCTAD.cpp + TestNestedReducerCTAD.cpp view/TestExtentsDatatypeConversion.cpp ) @@ -105,6 +108,9 @@ endif() IF(KOKKOS_HAS_TRILINOS) LIST(REMOVE_ITEM COMPILE_ONLY_SOURCES TestInterOp.cpp) ENDIF() +if(Kokkos_ENABLE_OPENMPTARGET) + list(REMOVE_ITEM COMPILE_ONLY_SOURCES TestNestedReducerCTAD.cpp) +endif() KOKKOS_ADD_EXECUTABLE( CoreTestCompileOnly SOURCES @@ -148,8 +154,10 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL) Crs DeepCopyAlignment ExecSpacePartitioning + ExecSpaceThreadSafety ExecutionSpace FunctorAnalysis + Graph HostSharedPtr HostSharedPtrAccessOnDevice Init @@ -173,7 +181,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL) endforeach() set(${Tag}_SOURCES1B) - foreach(Name + set(${Tag}_TESTNAMES1B MDRange_a MDRange_b MDRange_c @@ -184,6 +192,8 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL) MDRangePolicyConstructors MDRangeReduce MDSpan + MDSpanAtomicAccessor + MDSpanConversion MinMaxClamp NumericTraits OccupancyControlTrait @@ -203,8 +213,19 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL) Reductions Reductions_DeviceView SharedAlloc + SpaceAwareAccessorAccessViolation + SpaceAwareAccessor Swap ) + IF (NOT Kokkos_ENABLE_IMPL_MDSPAN) + LIST(REMOVE_ITEM ${Tag}_TESTNAMES1B + MDSpanAtomicAccessor + MDSpanConversion + SpaceAwareAccessorAccessViolation + SpaceAwareAccessor + ) + ENDIF() + foreach(Name IN LISTS ${Tag}_TESTNAMES1B) set(file ${dir}/Test${Tag}_${Name}.cpp) # Write to a temporary intermediate file and call configure_file to avoid # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs. @@ -217,7 +238,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL) endforeach() SET(${Tag}_SOURCES2A) - foreach(Name + SET(${TAG}_TESTNAMES2A TeamBasic TeamCombinedReducers TeamMDRange @@ -234,8 +255,10 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL) ViewAPI_c ViewAPI_d ViewAPI_e + ViewBadAlloc ViewCopy_a ViewCopy_b + ViewCopy_c ViewCtorDimMatch ViewEmptyRuntimeUnmanaged ViewHooks @@ -245,11 +268,21 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL) ViewMapping_subview ViewMemoryAccessViolation ViewOfClass + ViewOfViews ViewOutOfBoundsAccess ViewResize WorkGraph WithoutInitializing ) + # Workaround to internal compiler error with intel classic compilers + # when using -no-ip flag in ViewCopy_c + # See issue: https://github.com/kokkos/kokkos/issues/7084 + IF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + LIST(REMOVE_ITEM ${Tag}_TESTNAMES2A + ViewCopy_c + ) + endif() + foreach(Name IN LISTS ${Tag}_TESTNAMES2A) set(file ${dir}/Test${Tag}_${Name}.cpp) # Write to a temporary intermediate file and call configure_file to avoid # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs. @@ -353,6 +386,7 @@ foreach(PairDeviceSpace HIP-HostPinned;HIP-Managed;Cuda-HostPinned;Cuda-UVM;SYCL ViewAPI_e ViewCopy_a ViewCopy_b + ViewCopy_c ViewMapping_a ViewMapping_b ViewMapping_subview @@ -648,12 +682,6 @@ if(Kokkos_ENABLE_SERIAL) UnitTestMainInit.cpp ${Serial_SOURCES2} ) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - CoreUnitTest_SerialGraph - SOURCES - UnitTestMainInit.cpp - serial/TestSerial_Graph.cpp - ) endif() if(Kokkos_ENABLE_THREADS) @@ -681,12 +709,6 @@ if (Kokkos_ENABLE_OPENMP) UnitTestMain.cpp openmp/TestOpenMP_InterOp.cpp ) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - CoreUnitTest_OpenMPGraph - SOURCES - UnitTestMainInit.cpp - openmp/TestOpenMP_Graph.cpp - ) endif() if(Kokkos_ENABLE_HPX) @@ -794,12 +816,6 @@ if(Kokkos_ENABLE_CUDA) UnitTestMainInit.cpp cuda/TestCuda_InterOp_StreamsMultiGPU.cpp ) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - CoreUnitTest_CudaGraph - SOURCES - UnitTestMainInit.cpp - cuda/TestCuda_Graph.cpp - ) endif() if(Kokkos_ENABLE_HIP) @@ -827,12 +843,6 @@ if(Kokkos_ENABLE_HIP) UnitTestMain.cpp hip/TestHIP_InterOp_Streams.cpp ) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_HIPGraph - SOURCES - UnitTestMainInit.cpp - hip/TestHIP_Graph.cpp - ) endif() if(Kokkos_ENABLE_SYCL) @@ -902,15 +912,21 @@ if(Kokkos_ENABLE_SYCL) KOKKOS_ADD_EXECUTABLE_AND_TEST( CoreUnitTest_SYCLInterOpInit_Context SOURCES - UnitTestMainInit.cpp + UnitTestMainInit.cpp sycl/TestSYCL_InterOp_Init_Context.cpp ) KOKKOS_ADD_EXECUTABLE_AND_TEST( CoreUnitTest_SYCLInterOpStreams SOURCES - UnitTestMain.cpp + UnitTestMain.cpp sycl/TestSYCL_InterOp_Streams.cpp ) + KOKKOS_ADD_EXECUTABLE_AND_TEST( + CoreUnitTest_SYCLInterOpStreamsMultiGPU + SOURCES + UnitTestMainInit.cpp + sycl/TestSYCL_InterOp_StreamsMultiGPU.cpp + ) endif() SET(DEFAULT_DEVICE_SOURCES @@ -993,6 +1009,13 @@ KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_PushFinalizeHook.cpp ) +KOKKOS_ADD_EXECUTABLE_AND_TEST( + CoreUnitTest_ScopeGuard + SOURCES + UnitTestMain.cpp + UnitTest_ScopeGuard.cpp +) + # This test is intended for development and debugging by putting code # into TestDefaultDeviceDevelop.cpp. By default its empty. KOKKOS_ADD_EXECUTABLE_AND_TEST( @@ -1002,23 +1025,35 @@ KOKKOS_ADD_EXECUTABLE_AND_TEST( default/TestDefaultDeviceDevelop.cpp ) -# This test is special, because it passes exactly when it prints the -# message "PASSED: I am the custom std::terminate handler.", AND calls -# std::terminate. This means that we can't use -# KOKKOS_ADD_EXECUTABLE_AND_TEST. See GitHub issue #2147. - -KOKKOS_ADD_TEST_EXECUTABLE( push_finalize_hook_terminate - SOURCES UnitTest_PushFinalizeHook_terminate.cpp -) +# With MSVC, the terminate handler is called and prints the message but the +# program does not seem to exit and we get a timeout with ctest. +if (NOT WIN32) + # This test is special, because it passes exactly when it prints the + # message "PASSED: I am the custom std::terminate handler.", AND calls + # std::terminate. This means that we can't use + # KOKKOS_ADD_EXECUTABLE_AND_TEST. See GitHub issue #2147. + KOKKOS_ADD_TEST_EXECUTABLE( + CoreUnitTest_PushFinalizeHookTerminate + SOURCES UnitTest_PushFinalizeHook_terminate.cpp + ) + add_test( + NAME Kokkos_CoreUnitTest_PushFinalizeHookTerminateRegex + COMMAND ${CMAKE_COMMAND} -E env $ + ) + set_property( + TEST Kokkos_CoreUnitTest_PushFinalizeHookTerminateRegex + PROPERTY PASS_REGULAR_EXPRESSION "PASSED: I am the custom std::terminate handler." + ) + add_test( + NAME Kokkos_CoreUnitTest_PushFinalizeHookTerminateFails + COMMAND ${CMAKE_COMMAND} -E env $ + ) + set_property( + TEST Kokkos_CoreUnitTest_PushFinalizeHookTerminateFails + PROPERTY WILL_FAIL TRUE + ) +endif() -KOKKOS_ADD_ADVANCED_TEST( CoreUnitTest_PushFinalizeHook_terminate - TEST_0 - EXEC push_finalize_hook_terminate - NUM_MPI_PROCS 1 - PASS_REGULAR_EXPRESSION - "PASSED: I am the custom std::terminate handler." - ALWAYS_FAIL_ON_ZERO_RETURN -) if(KOKKOS_ENABLE_TUNING) KOKKOS_ADD_EXECUTABLE_AND_TEST( CoreUnitTest_TuningBuiltins @@ -1243,7 +1278,7 @@ if (NOT KOKKOS_HAS_TRILINOS) ) add_test( NAME Kokkos_CoreUnitTest_DeviceAndThreads - COMMAND ${Python3_EXECUTABLE} -m unittest -v $/TestDeviceAndThreads.py + COMMAND ${Python3_EXECUTABLE} $/TestDeviceAndThreads.py -v ) endif() endif() diff --git a/packages/kokkos/core/unit_test/Makefile b/packages/kokkos/core/unit_test/Makefile index 202809d3fc98..a4d65687e547 100644 --- a/packages/kokkos/core/unit_test/Makefile +++ b/packages/kokkos/core/unit_test/Makefile @@ -62,7 +62,7 @@ else STACK_TRACE_TERMINATE_FILTER := endif -TESTS = AtomicOperations_int AtomicOperations_unsignedint AtomicOperations_longint AtomicOperations_unsignedlongint AtomicOperations_longlongint AtomicOperations_double AtomicOperations_float AtomicOperations_complexdouble AtomicOperations_complexfloat AtomicViews Atomics BlockSizeDeduction Concepts Complex Crs DeepCopyAlignment FunctorAnalysis Init LocalDeepCopy MDRange_a MDRange_b MDRange_c MDRange_d MDRange_e MDRange_f Other ParallelScanRangePolicy RangePolicy RangePolicyRequire Reductions Reducers_a Reducers_b Reducers_c Reducers_d Reducers_e Reductions_DeviceView SharedAlloc TeamBasic TeamReductionScan TeamScratch TeamTeamSize TeamVectorRange UniqueToken ViewAPI_a ViewAPI_b ViewAPI_c ViewAPI_d ViewAPI_e ViewCopy_a ViewCopy_b ViewLayoutStrideAssignment ViewMapping_a ViewMapping_b ViewMapping_subview ViewOfClass WorkGraph View_64bit ViewResize +TESTS = AtomicOperations_int AtomicOperations_unsignedint AtomicOperations_longint AtomicOperations_unsignedlongint AtomicOperations_longlongint AtomicOperations_double AtomicOperations_float AtomicOperations_complexdouble AtomicOperations_complexfloat AtomicViews Atomics BlockSizeDeduction Concepts Complex Crs DeepCopyAlignment FunctorAnalysis Init LocalDeepCopy MDRange_a MDRange_b MDRange_c MDRange_d MDRange_e MDRange_f Other ParallelScanRangePolicy RangePolicy RangePolicyRequire Reductions Reducers_a Reducers_b Reducers_c Reducers_d Reducers_e Reductions_DeviceView SharedAlloc TeamBasic TeamReductionScan TeamScratch TeamTeamSize TeamVectorRange UniqueToken ViewAPI_a ViewAPI_b ViewAPI_c ViewAPI_d ViewAPI_e ViewCopy_a ViewCopy_b ViewCopy_c ViewLayoutStrideAssignment ViewMapping_a ViewMapping_b ViewMapping_subview ViewOfClass WorkGraph View_64bit ViewResize tmp := $(foreach device, $(KOKKOS_DEVICELIST), \ tmp2 := $(foreach test, $(TESTS), \ @@ -73,7 +73,7 @@ tmp := $(foreach device, $(KOKKOS_DEVICELIST), \ ) \ ) -GPU_SPACE_TESTS = SharedAlloc ViewAPI_a ViewAPI_b ViewAPI_c ViewAPI_d ViewAPI_e ViewCopy_a ViewCopy_b ViewMapping_a ViewMapping_b ViewMapping_subview +GPU_SPACE_TESTS = SharedAlloc ViewAPI_a ViewAPI_b ViewAPI_c ViewAPI_d ViewAPI_e ViewCopy_a ViewCopy_b ViewCopy_c ViewMapping_a ViewMapping_b ViewMapping_subview SUBVIEW_TESTS = SubView_a SubView_b SubView_c01 SubView_c02 SubView_c03 SubView_c04 SubView_c05 SubView_c06 SubView_c07 SubView_c08 SubView_c09 SubView_c10 SubView_c11 SubView_c12 SubView_c13 @@ -110,14 +110,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA += TestCuda_Init.o OBJ_CUDA += TestCuda_SharedAlloc.o TestCudaUVM_SharedAlloc.o TestCudaHostPinned_SharedAlloc.o OBJ_CUDA += TestCuda_RangePolicy.o TestCuda_RangePolicyRequire.o - OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o TestCuda_ViewAPI_e.o TestCuda_ViewCopy_a.o TestCuda_ViewCopy_b.o + OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o TestCuda_ViewAPI_e.o TestCuda_ViewCopy_a.o TestCuda_ViewCopy_b.o TestCuda_ViewCopy_c.o OBJ_CUDA += TestCuda_DeepCopyAlignment.o OBJ_CUDA += TestCuda_ViewMapping_a.o TestCuda_ViewMapping_b.o TestCuda_ViewMapping_subview.o TestCuda_ViewResize.o TestCuda_ViewLayoutStrideAssignment.o OBJ_CUDA += TestCudaUVM_ViewAPI_a.o TestCudaUVM_ViewAPI_b.o TestCudaUVM_ViewAPI_c.o TestCudaUVM_ViewAPI_d.o TestCudaUVM_ViewAPI_e.o - OBJ_CUDA += TestCudaUVM_ViewCopy_a.o TestCudaUVM_ViewCopy_b.o + OBJ_CUDA += TestCudaUVM_ViewCopy_a.o TestCudaUVM_ViewCopy_b.o TestCudaUVM_ViewCopy_c.o OBJ_CUDA += TestCudaUVM_ViewMapping_a.o TestCudaUVM_ViewMapping_b.o TestCudaUVM_ViewMapping_subview.o OBJ_CUDA += TestCudaHostPinned_ViewAPI_a.o TestCudaHostPinned_ViewAPI_b.o TestCudaHostPinned_ViewAPI_c.o TestCudaHostPinned_ViewAPI_d.o TestCudaHostPinned_ViewAPI_e.o - OBJ_CUDA += TestCudaHostPinned_ViewCopy_a.o TestCudaHostPinned_ViewCopy_b.o + OBJ_CUDA += TestCudaHostPinned_ViewCopy_a.o TestCudaHostPinned_ViewCopy_b.o TestCudaHostPinned_ViewCopy_c.o OBJ_CUDA += TestCudaHostPinned_ViewMapping_a.o TestCudaHostPinned_ViewMapping_b.o TestCudaHostPinned_ViewMapping_subview.o OBJ_CUDA += TestCuda_View_64bit.o OBJ_CUDA += TestCuda_ViewOfClass.o @@ -162,7 +162,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) OBJ_THREADS += TestThreads_RangePolicy.o TestThreads_RangePolicyRequire.o OBJ_THREADS += TestThreads_View_64bit.o OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o TestThreads_ViewAPI_c.o TestThreads_ViewAPI_d.o TestThreads_ViewAPI_e.o - OBJ_THREADS += TestThreads_ViewCopy_a.o TestThreads_ViewCopy_b.o + OBJ_THREADS += TestThreads_ViewCopy_a.o TestThreads_ViewCopy_b.o TestThreads_ViewCopy_c.o OBJ_THREADS += TestThreads_DeepCopyAlignment.o OBJ_THREADS += TestThreads_ViewMapping_a.o TestThreads_ViewMapping_b.o TestThreads_ViewMapping_subview.o TestThreads_ViewResize.o TestThreads_ViewLayoutStrideAssignment.o OBJ_THREADS += TestThreads_ViewOfClass.o @@ -198,7 +198,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) OBJ_OPENMP += TestOpenMP_RangePolicy.o TestOpenMP_RangePolicyRequire.o OBJ_OPENMP += TestOpenMP_View_64bit.o OBJ_OPENMP += TestOpenMP_ViewAPI_a.o TestOpenMP_ViewAPI_b.o TestOpenMP_ViewAPI_c.o TestOpenMP_ViewAPI_d.o TestOpenMP_ViewAPI_e.o - OBJ_OPENMP += TestOpenMP_DeepCopyAlignment.o TestOpenMP_ViewCopy_a.o TestOpenMP_ViewCopy_b.o + OBJ_OPENMP += TestOpenMP_DeepCopyAlignment.o TestOpenMP_ViewCopy_a.o TestOpenMP_ViewCopy_b.o TestOpenMP_ViewCopy_c.o OBJ_OPENMP += TestOpenMP_ViewMapping_a.o TestOpenMP_ViewMapping_b.o TestOpenMP_ViewMapping_subview.o TestOpenMP_ViewResize.o TestOpenMP_ViewLayoutStrideAssignment.o OBJ_OPENMP += TestOpenMP_ViewOfClass.o OBJ_OPENMP += TestOpenMP_SubView_a.o TestOpenMP_SubView_b.o @@ -237,7 +237,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) #OBJ_OPENMPTARGET += TestOpenMPTarget_SharedAlloc.o OBJ_OPENMPTARGET += TestOpenMPTarget_RangePolicy.o OBJ_OPENMPTARGET += TestOpenMPTarget_ViewAPI_a.o TestOpenMPTarget_ViewAPI_b.o TestOpenMPTarget_ViewAPI_c.o TestOpenMPTarget_ViewAPI_d.o #Some commented out code - #OBJ_OPENMPTARGET += TestOpenMPTarget_ViewAPI_e.o TestOpenMPTarget_ViewCopy_a.o TestOpenMPTarget_ViewCopy_b.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_ViewAPI_e.o TestOpenMPTarget_ViewCopy_a.o TestOpenMPTarget_ViewCopy_b.o TestOpenMPTarget_ViewCopy_c.o OBJ_OPENMPTARGET += TestOpenMPTarget_DeepCopyAlignment.o OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_a.o OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_b.o @@ -292,7 +292,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) OBJ_HIP += TestHIP_Memory_Requirements.o OBJ_HIP += TestHIP_ParallelScanRangePolicy.o OBJ_HIP += TestHIPHostPinned_ViewAPI_a.o TestHIPHostPinned_ViewAPI_b.o TestHIPHostPinned_ViewAPI_c.o TestHIPHostPinned_ViewAPI_d.o TestHIPHostPinned_ViewAPI_e.o - OBJ_HIP += TestHIPHostPinned_ViewCopy_a.o TestHIPHostPinned_ViewCopy_b.o + OBJ_HIP += TestHIPHostPinned_ViewCopy_a.o TestHIPHostPinned_ViewCopy_b.o TestHIPHostPinned_ViewCopy_c.o OBJ_HIP += TestHIPHostPinned_ViewMapping_a.o TestHIPHostPinned_ViewMapping_b.o TestHIPHostPinned_ViewMapping_subview.o TARGETS += KokkosCore_UnitTest_HIP @@ -307,7 +307,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) OBJ_HPX += TestHPX_RangePolicy.o TestHPX_RangePolicyRequire.o OBJ_HPX += TestHPX_View_64bit.o OBJ_HPX += TestHPX_ViewAPI_a.o TestHPX_ViewAPI_b.o TestHPX_ViewAPI_c.o TestHPX_ViewAPI_d.o TestHPX_ViewAPI_e.o - OBJ_HPX += TestHPX_ViewCopy_a.o TestHPX_ViewCopy_b.o + OBJ_HPX += TestHPX_ViewCopy_a.o TestHPX_ViewCopy_b.o TestHPX_ViewCopy_c.o OBJ_HPX += TestHPX_ViewMapping_a.o TestHPX_ViewMapping_b.o TestHPX_ViewMapping_subview.o TestHPX_ViewResize.o OBJ_HPX += TestHPX_ViewOfClass.o OBJ_HPX += TestHPX_SubView_a.o TestHPX_SubView_b.o @@ -347,7 +347,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) OBJ_SERIAL += TestSerial_RangePolicy.o TestSerial_RangePolicyRequire.o OBJ_SERIAL += TestSerial_View_64bit.o OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o TestSerial_ViewAPI_c.o TestSerial_ViewAPI_d.o TestSerial_ViewAPI_e.o - OBJ_SERIAL += TestSerial_DeepCopyAlignment.o TestSerial_ViewCopy_a.o TestSerial_ViewCopy_b.o + OBJ_SERIAL += TestSerial_DeepCopyAlignment.o TestSerial_ViewCopy_a.o TestSerial_ViewCopy_b.o TestSerial_ViewCopy_c.o OBJ_SERIAL += TestSerial_ViewMapping_a.o TestSerial_ViewMapping_b.o TestSerial_ViewMapping_subview.o TestSerial_ViewResize.o TestSerial_ViewLayoutStrideAssignment.o OBJ_SERIAL += TestSerial_ViewOfClass.o OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o diff --git a/packages/kokkos/core/unit_test/TestAggregate.hpp b/packages/kokkos/core/unit_test/TestAggregate.hpp deleted file mode 100644 index f1316a7426af..000000000000 --- a/packages/kokkos/core/unit_test/TestAggregate.hpp +++ /dev/null @@ -1,108 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef TEST_AGGREGATE_HPP -#define TEST_AGGREGATE_HPP - -#include - -namespace Test { - -template -void TestViewAggregate() { - using value_type = Kokkos::Array; - using analysis_1d = - Kokkos::Impl::ViewDataAnalysis; - - static_assert( - std::is_same >::value); - - using a32_traits = Kokkos::ViewTraits; - using flat_traits = - Kokkos::ViewTraits; - - static_assert( - std::is_same >::value); - static_assert( - std::is_same::value); - static_assert(a32_traits::rank == 2); - static_assert(a32_traits::rank_dynamic == 2); - - static_assert(std::is_void::value); - static_assert(flat_traits::rank == 3); - static_assert(flat_traits::rank_dynamic == 2); - static_assert(flat_traits::dimension::N2 == 32); - - using a32_type = Kokkos::View **, DeviceType>; - using a32_flat_type = typename a32_type::array_type; - - static_assert(std::is_same::value); - static_assert(std::is_same::value); - static_assert(a32_type::rank == 2); - static_assert(a32_flat_type::rank == 3); - - a32_type x("test", 4, 5); - a32_flat_type y(x); - - ASSERT_EQ(x.extent(0), 4u); - ASSERT_EQ(x.extent(1), 5u); - ASSERT_EQ(y.extent(0), 4u); - ASSERT_EQ(y.extent(1), 5u); - ASSERT_EQ(y.extent(2), 32u); - - // Initialize arrays from brace-init-list as for std::array. - // - // Comment: Clang will issue the following warning if we don't use double - // braces here (one for initializing the Kokkos::Array and one for - // initializing the sub-aggreagate C-array data member), - // - // warning: suggest braces around initialization of subobject - // - // but single brace syntax would be valid as well. - Kokkos::Array aggregate_initialization_syntax_1 = {{1.41, 3.14}}; - ASSERT_FLOAT_EQ(aggregate_initialization_syntax_1[0], 1.41); - ASSERT_FLOAT_EQ(aggregate_initialization_syntax_1[1], 3.14); - - Kokkos::Array aggregate_initialization_syntax_2{ - {0, 1, 2}}; // since C++11 - for (int i = 0; i < 3; ++i) { - ASSERT_EQ(aggregate_initialization_syntax_2[i], i); - } - - // Note that this is a valid initialization. - Kokkos::Array initialized_with_one_argument_missing = {{255, 255}}; - for (int i = 0; i < 2; ++i) { - ASSERT_DOUBLE_EQ(initialized_with_one_argument_missing[i], 255); - } - // But the following line would not compile - // Kokkos::Array< double, 3 > initialized_with_too_many{ { 1, 2, 3, 4 } }; - - // The code below must compile for zero-sized arrays. - using T = float; - - constexpr int N = 0; - Kokkos::Array a; - for (int i = 0; i < N; ++i) { - a[i] = T(); - } -} - -TEST(TEST_CATEGORY, view_aggregate) { TestViewAggregate(); } - -} // namespace Test - -#endif /* #ifndef TEST_AGGREGATE_HPP */ diff --git a/packages/kokkos/core/unit_test/TestArray.cpp b/packages/kokkos/core/unit_test/TestArray.cpp index 673d0036b716..cb713a178263 100644 --- a/packages/kokkos/core/unit_test/TestArray.cpp +++ b/packages/kokkos/core/unit_test/TestArray.cpp @@ -15,9 +15,19 @@ //@HEADER #include +#include namespace { +// nvcc errors on variables only used in static_asserts +// Passing those variables to this function should eliminate the warning +template +KOKKOS_FUNCTION constexpr void maybe_unused(Ts&&...) {} + +template +using equality_comparable = + decltype(std::declval() == std::declval()); + KOKKOS_FUNCTION constexpr bool test_array() { constexpr Kokkos::Array a{{1, 2}}; @@ -49,17 +59,6 @@ KOKKOS_FUNCTION constexpr bool test_array_structured_binding_support() { static_assert(test_array_structured_binding_support()); -template -KOKKOS_FUNCTION constexpr bool is_equal(L const& l, R const& r) { - if (std::size(l) != std::size(r)) return false; - - for (size_t i = 0; i != std::size(l); ++i) { - if (l[i] != r[i]) return false; - } - - return true; -} - // Disable ctad test for intel versions < 2021, see issue #6702 #if !defined(KOKKOS_COMPILER_INTEL) || KOKKOS_COMPILER_INTEL >= 2021 KOKKOS_FUNCTION constexpr bool test_array_ctad() { @@ -67,10 +66,180 @@ KOKKOS_FUNCTION constexpr bool test_array_ctad() { constexpr Kokkos::Array a{1, 2, 3, 5, x}; constexpr Kokkos::Array b{1, 2, 3, 5, x}; - return std::is_same_v && is_equal(a, b); + return std::is_same_v && a == b; } static_assert(test_array_ctad()); #endif +KOKKOS_FUNCTION constexpr bool test_array_aggregate_initialization() { + // Initialize arrays from brace-init-list as for std::array. + + Kokkos::Array aggregate_initialization_syntax_1 = {1.41f, 3.14f}; + if ((aggregate_initialization_syntax_1[0] != 1.41f) || + (aggregate_initialization_syntax_1[1] != 3.14f)) + return false; + + Kokkos::Array aggregate_initialization_syntax_2{ + {0, 1, 2}}; // since C++11 + if ((aggregate_initialization_syntax_2[0] != 0) || + (aggregate_initialization_syntax_2[1] != 1) || + (aggregate_initialization_syntax_2[2] != 2)) + return false; + + // Note that this is a valid initialization. + Kokkos::Array initialized_with_one_argument_missing = {{255, 255}}; + if ((initialized_with_one_argument_missing[0] != 255) || + (initialized_with_one_argument_missing[1] != 255) || + (initialized_with_one_argument_missing[2] != 0)) + return false; + + // But the following line would not compile + // Kokkos::Array< double, 3 > initialized_with_too_many{ { 1, 2, 3, 4 } }; + + return true; +} + +static_assert(test_array_aggregate_initialization()); + +// A few compilers, such as GCC 8.4, were erroring out when the function below +// appeared in a constant expression because +// Kokkos::Array::operator[] is non-constexpr. The issue +// disappears with GCC 9.1 (https://godbolt.org/z/TG4TEef1b). As a workaround, +// the static_assert was dropped and the [[maybe_unused]] is used as an attempt +// to silent warnings that the function is never used. +[[maybe_unused]] KOKKOS_FUNCTION void test_array_zero_sized() { + using T = float; + + // The code below must compile for zero-sized arrays. + constexpr int N = 0; + Kokkos::Array a; + for (int i = 0; i < N; ++i) { + a[i] = T(); + } +} + +constexpr bool test_array_const_qualified_element_type() { + Kokkos::Array a{255}; + return a[0] == 255; +} + +static_assert(test_array_const_qualified_element_type()); + +// User-defined type providing a sepcialization of kokkos_swap +struct MyInt { + int i; + + private: + friend constexpr KOKKOS_FUNCTION void kokkos_swap(MyInt& lhs, + MyInt& rhs) noexcept { + lhs.i = 255; + rhs.i = 127; + } +}; + +constexpr bool test_array_specialization_kokkos_swap() { + Kokkos::Array a{MyInt{1}, MyInt{2}}; + Kokkos::Array b{MyInt{11}, MyInt{22}}; + + // sanity check + if (a[0].i != 1 || a[1].i != 2 || b[0].i != 11 || b[1].i != 22) { + return false; + } + + using Kokkos::kokkos_swap; + kokkos_swap(a, b); + + // check that the user-definied kokkos_swap(MyInt) overload was called + if (a[0].i != 255 || a[1].i != 255 || b[0].i != 127 || b[1].i != 127) { + return false; + } + + return true; +} + +static_assert(test_array_specialization_kokkos_swap()); + +constexpr bool test_to_array() { + // copies a string literal + [[maybe_unused]] auto a1 = Kokkos::to_array("foo"); + static_assert(a1.size() == 4); + maybe_unused(a1); + + // deduces both element type and length + [[maybe_unused]] auto a2 = Kokkos::to_array({0, 2, 1, 3}); + static_assert(std::is_same_v>); + maybe_unused(a2); + +// gcc8, icc, and nvcc 11.3 do not support the implicit conversion +#if !(defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU < 910)) && \ + !(defined(KOKKOS_COMPILER_INTEL) && (KOKKOS_COMPILER_INTEL < 2021)) && \ + !(defined(KOKKOS_COMPILER_NVCC) && (KOKKOS_COMPILER_NVCC < 1140)) + // deduces length with element type specified + // implicit conversion happens + [[maybe_unused]] auto a3 = Kokkos::to_array({0, 1, 3}); + static_assert(std::is_same_v>); + maybe_unused(a3); +#endif + + return true; +} + +static_assert(test_to_array()); + +constexpr bool test_array_equality_comparable() { + using C0 = Kokkos::Array; + using C2 = Kokkos::Array; + using C3 = Kokkos::Array; + using I0 = Kokkos::Array; + using I2 = Kokkos::Array; + using I3 = Kokkos::Array; + + static_assert(Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + + static_assert(!Kokkos::is_detected_v); + static_assert(Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(Kokkos::is_detected_v); + + return true; +} + +static_assert(test_array_equality_comparable()); + } // namespace diff --git a/packages/kokkos/core/unit_test/TestArrayOps.hpp b/packages/kokkos/core/unit_test/TestArrayOps.hpp index 065285727147..29a452b660c1 100644 --- a/packages/kokkos/core/unit_test/TestArrayOps.hpp +++ b/packages/kokkos/core/unit_test/TestArrayOps.hpp @@ -92,6 +92,31 @@ TEST(TEST_CATEGORY, array_element_access) { ASSERT_EQ(ca.data()[index], a[index]); } +TEST(TEST_CATEGORY, array_operator_equal) { + using A = Kokkos::Array; + constexpr A a{{3, 5}}; + constexpr A b{{3, 5}}; + constexpr A c{{5, 3}}; + + static_assert(a == b); + static_assert(!(a == c)); + static_assert(a != c); + + ASSERT_TRUE(a == b); + ASSERT_FALSE(a == c); + ASSERT_TRUE(a != c); + + using E = Kokkos::Array; + constexpr E e; + constexpr E f; + + static_assert(e == f); + static_assert(!(e != f)); + + ASSERT_TRUE(e == f); + ASSERT_FALSE(e != f); +} + TEST(TEST_CATEGORY, array_zero_capacity) { using A = Kokkos::Array; A e; @@ -111,6 +136,8 @@ TEST(TEST_CATEGORY, array_zero_data_nullptr) { ASSERT_EQ(ce.data(), nullptr); } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 +KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() TEST(TEST_CATEGORY, array_contiguous_capacity) { using A = Kokkos::Array::contiguous>; @@ -389,5 +416,7 @@ TEST(TEST_CATEGORY, array_strided_assignment) { ASSERT_EQ(e.max_size(), std::size(ee) / eStride); ASSERT_EQ(e[0], ee[0]); } +KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() +#endif } // namespace diff --git a/packages/kokkos/core/unit_test/TestAtomicOperations.hpp b/packages/kokkos/core/unit_test/TestAtomicOperations.hpp index cd7ba47aa1e9..957ba9a7aa0d 100644 --- a/packages/kokkos/core/unit_test/TestAtomicOperations.hpp +++ b/packages/kokkos/core/unit_test/TestAtomicOperations.hpp @@ -459,9 +459,11 @@ bool AtomicOperationsTestIntegralType(int old_val_in, int update_in, int test) { case 12: return true; #else case 11: - return update_in >= 0 ? atomic_op_test( - old_val, update) - : true; + return (std::make_signed_t(update_in) >= 0 && + std::make_signed_t(old_val) >= 0) + ? atomic_op_test(old_val, + update) + : true; case 12: return update_in >= 0 ? atomic_op_test( old_val, update) diff --git a/packages/kokkos/core/unit_test/TestBitManipulationBuiltins.hpp b/packages/kokkos/core/unit_test/TestBitManipulationBuiltins.hpp index 2f3bcfe817df..fe015404f1b3 100644 --- a/packages/kokkos/core/unit_test/TestBitManipulationBuiltins.hpp +++ b/packages/kokkos/core/unit_test/TestBitManipulationBuiltins.hpp @@ -827,12 +827,6 @@ struct TestBitCastFunction { } } -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 - if constexpr (std::is_same_v) { - return; - } -#endif struct S { int i; diff --git a/packages/kokkos/core/unit_test/TestComplex.hpp b/packages/kokkos/core/unit_test/TestComplex.hpp index 5501a35b7f0f..ef6a21cd3703 100644 --- a/packages/kokkos/core/unit_test/TestComplex.hpp +++ b/packages/kokkos/core/unit_test/TestComplex.hpp @@ -15,9 +15,26 @@ //@HEADER #include -#include #include +// Suppress "'long double' is treated as 'double' in device code" +#ifdef KOKKOS_COMPILER_NVCC +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress 20208 +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic push +#pragma diag_suppress 20208 +#endif +#endif +#endif + +namespace { +template +KOKKOS_FUNCTION constexpr void maybe_unused(Ts &&...) noexcept {} +} // namespace + namespace Test { // Test construction and assignment @@ -532,4 +549,151 @@ TEST(TEST_CATEGORY, complex_operations_arithmetic_types_overloads) { Kokkos::complex>::value)); } +template +struct TestComplexStructuredBindings { + using exec_space = ExecSpace; + using value_type = double; + using complex_type = Kokkos::complex; + using device_view_type = Kokkos::View; + using host_view_type = typename device_view_type::HostMirror; + + device_view_type d_results; + host_view_type h_results; + + // tuple_size + static_assert(std::is_same_v::type, + std::integral_constant>); + + // tuple_element + static_assert( + std::is_same_v, value_type>); + static_assert( + std::is_same_v, value_type>); + + static void testgetreturnreferencetypes() { + complex_type m; + const complex_type c; + + // get lvalue + complex_type &ml = m; + static_assert(std::is_same_v(ml)), value_type &>); + static_assert(std::is_same_v(ml)), value_type &>); + + // get rvalue + complex_type &&mr = std::move(m); + static_assert( + std::is_same_v(std::move(mr))), value_type &&>); + static_assert( + std::is_same_v(std::move(mr))), value_type &&>); + + // get const lvalue + const complex_type &cl = c; + static_assert( + std::is_same_v(cl)), value_type const &>); + static_assert( + std::is_same_v(cl)), value_type const &>); + + // get const rvalue + complex_type const &&cr = std::move(c); + static_assert(std::is_same_v(std::move(cr))), + value_type const &&>); + static_assert(std::is_same_v(std::move(cr))), + value_type const &&>); + + maybe_unused(m, c, ml, mr, cl, cr); + } + + void testit() { + testgetreturnreferencetypes(); + + d_results = device_view_type("TestComplexStructuredBindings", 6); + h_results = Kokkos::create_mirror_view(d_results); + + Kokkos::parallel_for(Kokkos::RangePolicy(0, 1), *this); + Kokkos::fence(); + Kokkos::deep_copy(h_results, d_results); + + // get lvalue + ASSERT_FLOAT_EQ(h_results[0].real(), 2.); + ASSERT_FLOAT_EQ(h_results[0].imag(), 3.); + + // get rvalue + ASSERT_FLOAT_EQ(h_results[1].real(), 2.); + ASSERT_FLOAT_EQ(h_results[1].imag(), 3.); + + // get const lvalue + ASSERT_FLOAT_EQ(h_results[2].real(), 5.); + ASSERT_FLOAT_EQ(h_results[2].imag(), 7.); + + // get const rvalue + ASSERT_FLOAT_EQ(h_results[3].real(), 5.); + ASSERT_FLOAT_EQ(h_results[3].imag(), 7.); + + // swap real and imaginary + ASSERT_FLOAT_EQ(h_results[4].real(), 11.); + ASSERT_FLOAT_EQ(h_results[4].imag(), 13.); + ASSERT_FLOAT_EQ(h_results[5].real(), 13.); + ASSERT_FLOAT_EQ(h_results[5].imag(), 11.); + } + + KOKKOS_FUNCTION + void operator()(int) const { + complex_type m(2., 3.); + const complex_type c(5., 7.); + + // get lvalue + { + complex_type &ml = m; + auto &[mlr, mli] = ml; + d_results[0] = complex_type(mlr, mli); + } + + // get rvalue + { + complex_type &&mr = std::move(m); + auto &&[mrr, mri] = std::move(mr); + d_results[1] = complex_type(mrr, mri); + } + + // get const lvalue + { + const complex_type &cl = c; + auto &[clr, cli] = cl; + d_results[2] = complex_type(clr, cli); + } + + // get const rvalue + { + complex_type const &&cr = std::move(c); + auto &&[crr, cri] = std::move(cr); + d_results[3] = complex_type(crr, cri); + } + + // swap real and imaginary + { + complex_type z(11., 13.); + d_results[4] = z; + + auto &[zr, zi] = z; + Kokkos::kokkos_swap(zr, zi); + d_results[5] = z; + } + } +}; + +TEST(TEST_CATEGORY, complex_structured_bindings) { + TestComplexStructuredBindings test; + test.testit(); +} + } // namespace Test + +#ifdef KOKKOS_COMPILER_NVCC +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic pop +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic pop +#endif +#endif +#endif diff --git a/packages/kokkos/core/unit_test/TestExecSpaceThreadSafety.hpp b/packages/kokkos/core/unit_test/TestExecSpaceThreadSafety.hpp new file mode 100644 index 000000000000..a83355c51fea --- /dev/null +++ b/packages/kokkos/core/unit_test/TestExecSpaceThreadSafety.hpp @@ -0,0 +1,327 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include + +namespace { + +#ifdef KOKKOS_ENABLE_OPENMP +template +void run_threaded_test(const Lambda1 l1, const Lambda2 l2) { +#pragma omp parallel num_threads(2) + { + if (omp_get_thread_num() == 0) l1(); + if (omp_get_thread_num() == 1) l2(); + } +} +// We cannot run the multithreaded test when threads or HPX is enabled because +// we cannot launch a thread from inside another thread +#elif !defined(KOKKOS_ENABLE_THREADS) && !defined(KOKKOS_ENABLE_HPX) +template +void run_threaded_test(const Lambda1 l1, const Lambda2 l2) { + std::thread t1(l1); + std::thread t2(l2); + t1.join(); + t2.join(); +} +#else +template +void run_threaded_test(const Lambda1 l1, const Lambda2 l2) { + l1(); + l2(); +} +#endif + +// The idea for all of these tests is to access a View from kernels submitted by +// two different threads to the same execution space instance. If the kernels +// are executed concurrently, we expect to count too many increments. +void run_exec_space_thread_safety_range() { + constexpr int N = 10000000; + constexpr int M = 10; + + Kokkos::View view("view"); + Kokkos::View error("error"); + + auto lambda = [=]() { + TEST_EXECSPACE exec; + for (int j = 0; j < M; ++j) { + Kokkos::parallel_for( + Kokkos::RangePolicy(exec, 0, 1), KOKKOS_LAMBDA(int) { + Kokkos::atomic_store(view.data(), 0); + for (int i = 0; i < N; ++i) Kokkos::atomic_inc(view.data()); + if (Kokkos::atomic_load(view.data()) != N) + Kokkos::atomic_store(error.data(), 1); + }); + } + }; + + run_threaded_test(lambda, lambda); + + auto host_error = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, error); + ASSERT_EQ(host_error(), 0); +} + +TEST(TEST_CATEGORY, exec_space_thread_safety_range) { +#ifdef KOKKOS_ENABLE_OPENACC // FIXME_OPENACC + if (std::is_same_v) + GTEST_SKIP() << "skipping since test is known to fail with OpenACC"; +#endif +#ifdef KOKKOS_ENABLE_OPENMPTARGET + if (std::is_same_v) + GTEST_SKIP() << "skipping since test is known to fail for OpenMPTarget"; +#endif + run_exec_space_thread_safety_range(); +} + +void run_exec_space_thread_safety_mdrange() { + constexpr int N = 1000000; + constexpr int M = 10; + + Kokkos::View view("view"); + Kokkos::View error("error"); + + auto lambda = [=]() { + TEST_EXECSPACE exec; + for (int j = 0; j < M; ++j) { + Kokkos::parallel_for( + Kokkos::MDRangePolicy>(exec, {0, 0}, + {1, 1}), + KOKKOS_LAMBDA(int, int) { + Kokkos::atomic_store(view.data(), 0); + for (int i = 0; i < N; ++i) Kokkos::atomic_inc(view.data()); + if (Kokkos::atomic_load(view.data()) != N) + Kokkos::atomic_store(error.data(), 1); + }); + } + }; + + run_threaded_test(lambda, lambda); + + auto host_error = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, error); + ASSERT_EQ(host_error(), 0); +} + +TEST(TEST_CATEGORY, exec_space_thread_safety_mdrange) { +#ifdef KOKKOS_ENABLE_OPENMPTARGET + if (std::is_same_v) + GTEST_SKIP() << "skipping since test is known to fail for OpenMPTarget"; +#endif + run_exec_space_thread_safety_mdrange(); +} + +void run_exec_space_thread_safety_team_policy() { + constexpr int N = 1000000; + constexpr int M = 10; + + Kokkos::View view("view"); + Kokkos::View error("error"); + + auto lambda = [=]() { + TEST_EXECSPACE exec; + for (int j = 0; j < M; ++j) { + Kokkos::parallel_for( + Kokkos::TeamPolicy(exec, 1, 1, 1), + KOKKOS_LAMBDA(const Kokkos::TeamPolicy::member_type + &team_member) { + Kokkos::single(Kokkos::PerTeam(team_member), [=]() { + Kokkos::atomic_store(view.data(), 0); + for (int i = 0; i < N; ++i) Kokkos::atomic_inc(view.data()); + if (Kokkos::atomic_load(view.data()) != N) + Kokkos::atomic_store(error.data(), 1); + }); + }); + } + }; + + run_threaded_test(lambda, lambda); + + auto host_error = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, error); + ASSERT_EQ(host_error(), 0); +} + +TEST(TEST_CATEGORY, exec_space_thread_safety_team_policy) { +// FIXME_OPENMPTARGET +#ifdef KOKKOS_ENABLE_OPENMPTARGET + if (std::is_same_v) + GTEST_SKIP() << "skipping for OpenMPTarget since the test is designed to " + "run with vector_length=1"; +#endif + run_exec_space_thread_safety_team_policy(); +} + +void run_exec_space_thread_safety_range_reduce() { + constexpr int N = 1000000; + constexpr int M = 10; + + Kokkos::View view("view"); + Kokkos::View error("error"); + + auto lambda = [=]() { + TEST_EXECSPACE exec; + for (int j = 0; j < M; ++j) { + Kokkos::parallel_reduce( + Kokkos::RangePolicy(exec, 0, 1), + KOKKOS_LAMBDA(int, int &update) { + Kokkos::atomic_store(view.data(), 0); + for (int i = 0; i < N; ++i) Kokkos::atomic_inc(view.data()); + if (Kokkos::atomic_load(view.data()) != N) ++update; + }, + error); + } + exec.fence(); + }; + + run_threaded_test(lambda, lambda); + + auto host_error = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, error); + ASSERT_EQ(host_error(), 0); +} + +TEST(TEST_CATEGORY, exec_space_thread_safety_range_reduce) { + run_exec_space_thread_safety_range_reduce(); +} + +void run_exec_space_thread_safety_mdrange_reduce() { + constexpr int N = 1000000; + constexpr int M = 10; + + Kokkos::View view("view"); + Kokkos::View error("error"); + + auto lambda = [=]() { + TEST_EXECSPACE exec; + for (int j = 0; j < M; ++j) { + Kokkos::parallel_reduce( + Kokkos::MDRangePolicy>(exec, {0, 0}, + {1, 1}), + KOKKOS_LAMBDA(int, int, int &update) { + Kokkos::atomic_store(view.data(), 0); + for (int i = 0; i < N; ++i) Kokkos::atomic_inc(view.data()); + if (Kokkos::atomic_load(view.data()) != N) ++update; + }, + error); + } + exec.fence(); + }; + + run_threaded_test(lambda, lambda); + + auto host_error = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, error); + ASSERT_EQ(host_error(), 0); +} + +TEST(TEST_CATEGORY, exec_space_thread_safety_mdrange_reduce) { +// FIXME_INTEL +#if defined(KOKKOS_COMPILER_INTEL) && defined(KOKKOS_ENABLE_OPENMP) + if (std::is_same_v) + GTEST_SKIP() << "skipping since test is known to fail for OpenMP using the " + "legacy Intel compiler"; +#endif + run_exec_space_thread_safety_mdrange_reduce(); +} + +void run_exec_space_thread_safety_team_policy_reduce() { + constexpr int N = 1000000; + constexpr int M = 10; + + Kokkos::View view("view"); + Kokkos::View error("error"); + + auto lambda = [=]() { + TEST_EXECSPACE exec; + for (int j = 0; j < M; ++j) { + Kokkos::parallel_reduce( + Kokkos::TeamPolicy(exec, 1, 1, 1), + KOKKOS_LAMBDA(const Kokkos::TeamPolicy::member_type + &team_member, + int &update) { + Kokkos::single(Kokkos::PerTeam(team_member), [=, &update]() { + Kokkos::atomic_store(view.data(), 0); + for (int i = 0; i < N; ++i) Kokkos::atomic_inc(view.data()); + if (Kokkos::atomic_load(view.data()) != N) ++update; + }); + }, + error); + } + }; + run_threaded_test(lambda, lambda); + + auto host_error = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, error); + ASSERT_EQ(host_error(), 0); +} + +TEST(TEST_CATEGORY, exec_space_thread_safety_team_policy_reduce) { +// FIXME_OPENMPTARGET +#ifdef KOKKOS_ENABLE_OPENMPTARGET + if (std::is_same_v) + GTEST_SKIP() << "skipping for OpenMPTarget since the test is designed to " + "run with vector_length=1"; +#endif + // FIXME_SYCL +#if defined(KOKKOS_ENABLE_SYCL) && defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) + if (std::is_same_v) + GTEST_SKIP() << "skipping since test is know to fail with SYCL+Cuda"; +#endif + run_exec_space_thread_safety_team_policy_reduce(); +} + +void run_exec_space_thread_safety_range_scan() { + constexpr int N = 1000000; + constexpr int M = 10; + + Kokkos::View view("view"); + Kokkos::View error("error"); + + auto lambda = [=]() { + TEST_EXECSPACE exec; + for (int j = 0; j < M; ++j) { + Kokkos::parallel_scan( + Kokkos::RangePolicy(exec, 0, 1), + KOKKOS_LAMBDA(int, int &, const bool final) { + if (final) { + Kokkos::atomic_store(view.data(), 0); + for (int i = 0; i < N; ++i) Kokkos::atomic_inc(view.data()); + if (Kokkos::atomic_load(view.data()) != N) + Kokkos::atomic_store(error.data(), 1); + } + }); + } + exec.fence(); + }; + + run_threaded_test(lambda, lambda); + + auto host_error = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, error); + ASSERT_EQ(host_error(), 0); +} + +TEST(TEST_CATEGORY, exec_space_thread_safety_range_scan) { +#ifdef KOKKOS_ENABLE_OPENACC // FIXME_OPENACC + if (std::is_same_v) + GTEST_SKIP() << "skipping since test is known to fail with OpenACC"; +#endif + run_exec_space_thread_safety_range_scan(); +} + +} // namespace diff --git a/packages/kokkos/core/unit_test/TestExecutionSpace.hpp b/packages/kokkos/core/unit_test/TestExecutionSpace.hpp index 983a5975afd6..d4142dee18be 100644 --- a/packages/kokkos/core/unit_test/TestExecutionSpace.hpp +++ b/packages/kokkos/core/unit_test/TestExecutionSpace.hpp @@ -44,4 +44,60 @@ TEST(TEST_CATEGORY, execution_space_as_class_data_member) { } #endif +constexpr bool test_execspace_explicit_construction() { +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 +#ifdef KOKKOS_ENABLE_SERIAL + static_assert(std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_OPENMP + static_assert(std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_CUDA + static_assert(std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_HIP + static_assert(std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_HPX + static_assert(std::is_convertible_v); + static_assert( + std::is_convertible_v&&, + Kokkos::Experimental::HPX>); +#endif +#else +#ifdef KOKKOS_ENABLE_SERIAL + static_assert(!std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_OPENMP + static_assert(!std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_CUDA + static_assert(!std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_HIP + static_assert(!std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_HPX + static_assert(!std::is_convertible_v); + static_assert(!std::is_convertible_v< + hpx::execution::experimental::unique_any_sender<>&&, + Kokkos::Experimental::HPX>); +#endif +#endif + +#ifdef KOKKOS_ENABLE_OPENACC + static_assert(!std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_SYCL + static_assert( + !std::is_convertible_v); +#endif + + return true; +} + +static_assert(test_execspace_explicit_construction()); + } // namespace diff --git a/packages/kokkos/core/unit_test/TestGraph.hpp b/packages/kokkos/core/unit_test/TestGraph.hpp index 9a36d08f445a..f9dc63d30c45 100644 --- a/packages/kokkos/core/unit_test/TestGraph.hpp +++ b/packages/kokkos/core/unit_test/TestGraph.hpp @@ -21,6 +21,21 @@ namespace Test { +template +struct NoOpReduceFunctor { + KOKKOS_FUNCTION void operator()(int, ValueType&) const { + Kokkos::abort("Should never be called!"); + } + KOKKOS_FUNCTION void operator()(int, int, ValueType&) const { + Kokkos::abort("Should never be called!"); + } + KOKKOS_FUNCTION void operator()( + const typename Kokkos::TeamPolicy::member_type&, + ValueType&) const { + Kokkos::abort("Should never be called!"); + } +}; + template struct CountTestFunctor { using value_type = int; @@ -66,7 +81,7 @@ struct SetResultToViewFunctor { } }; -struct TEST_CATEGORY_FIXTURE(count_bugs) : public ::testing::Test { +struct TEST_CATEGORY_FIXTURE(graph) : public ::testing::Test { public: using count_functor = CountTestFunctor; using set_functor = SetViewToValueFunctor; @@ -88,7 +103,7 @@ struct TEST_CATEGORY_FIXTURE(count_bugs) : public ::testing::Test { } }; -TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), launch_one) { +TEST_F(TEST_CATEGORY_FIXTURE(graph), launch_one) { auto graph = Kokkos::Experimental::create_graph([&](auto root) { root.then_parallel_for(1, count_functor{count, bugs, 0, 0}); @@ -101,7 +116,7 @@ TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), launch_one) { ASSERT_EQ(0, bugs_host()); } -TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), launch_one_rvalue) { +TEST_F(TEST_CATEGORY_FIXTURE(graph), launch_one_rvalue) { Kokkos::Experimental::create_graph(ex, [&](auto root) { root.then_parallel_for(1, count_functor{count, bugs, 0, 0}); }).submit(); @@ -112,7 +127,17 @@ TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), launch_one_rvalue) { ASSERT_EQ(0, bugs_host()); } -TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), launch_six) { +TEST_F(TEST_CATEGORY_FIXTURE(graph), launch_six) { +#ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET team_size incompatible + if (std::is_same_v) + GTEST_SKIP() << "skipping since OpenMPTarget can't use team_size 1"; +#endif +#if defined(KOKKOS_ENABLE_SYCL) && \ + !defined(SYCL_EXT_ONEAPI_GRAPH) // FIXME_SYCL + if (std::is_same_v) + GTEST_SKIP() << "skipping since test case is known to fail with SYCL"; +#endif + auto graph = Kokkos::Experimental::create_graph(ex, [&](auto root) { auto f_setup_count = root.then_parallel_for(1, set_functor{count, 0}); auto f_setup_bugs = root.then_parallel_for(1, set_functor{bugs, 0}); @@ -145,7 +170,7 @@ TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), launch_six) { ASSERT_EQ(0, bugs_host()); } -TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), when_all_cycle) { +TEST_F(TEST_CATEGORY_FIXTURE(graph), when_all_cycle) { view_type reduction_out{"reduction_out"}; view_host reduction_host{"reduction_host"}; Kokkos::Experimental::create_graph(ex, [&](auto root) { @@ -172,7 +197,7 @@ TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), when_all_cycle) { // This test is disabled because we don't currently support copying to host, // even asynchronously. We _may_ want to do that eventually? -TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), DISABLED_repeat_chain) { +TEST_F(TEST_CATEGORY_FIXTURE(graph), DISABLED_repeat_chain) { auto graph = Kokkos::Experimental::create_graph( ex, [&, count_host = count_host](auto root) { //---------------------------------------- @@ -198,10 +223,27 @@ TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), DISABLED_repeat_chain) { //---------------------------------------- } -TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), zero_work_reduce) { - auto graph = Kokkos::Experimental::create_graph(ex, [&](auto root) { - root.then_parallel_reduce(0, set_result_functor{bugs}, count); - }); +TEST_F(TEST_CATEGORY_FIXTURE(graph), zero_work_reduce) { + auto graph = Kokkos::Experimental::create_graph( + ex, [&](Kokkos::Experimental::GraphNodeRef root) { + NoOpReduceFunctor no_op_functor; + root.then_parallel_reduce(Kokkos::RangePolicy(0, 0), + no_op_functor, count) +#if !defined(KOKKOS_ENABLE_SYCL) || \ + defined(SYCL_EXT_ONEAPI_GRAPH) // FIXME_SYCL +#if !defined(KOKKOS_ENABLE_CUDA) && \ + !defined(KOKKOS_ENABLE_HIP) // FIXME_CUDA FIXME_HIP + .then_parallel_reduce( + Kokkos::MDRangePolicy>{{0, 0}, + {0, 0}}, + no_op_functor, count) +#endif + .then_parallel_reduce( + Kokkos::TeamPolicy{0, Kokkos::AUTO}, + no_op_functor, count) +#endif + ; + }); // These fences are only necessary because of the weirdness of how CUDA // UVM works on pre pascal cards. #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_CUDA_UVM) && \ @@ -214,12 +256,15 @@ TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), zero_work_reduce) { // UVM works on pre pascal cards. #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_CUDA_UVM) && \ (defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL)) - Kokkos::fence(); + if constexpr (std::is_same_v) Kokkos::fence(); +#endif +#ifdef KOKKOS_ENABLE_HPX // FIXME_HPX graph.submit() isn't properly enqueued + if constexpr (std::is_same_v) + Kokkos::fence(); #endif - graph.submit(); // should reset to 0, but doesn't + graph.submit(); Kokkos::deep_copy(ex, count_host, count); ex.fence(); ASSERT_EQ(count_host(), 0); } - } // end namespace Test diff --git a/packages/kokkos/core/unit_test/TestLocalDeepCopy.hpp b/packages/kokkos/core/unit_test/TestLocalDeepCopy.hpp index 1ee23a47c456..c6ee687cf91b 100644 --- a/packages/kokkos/core/unit_test/TestLocalDeepCopy.hpp +++ b/packages/kokkos/core/unit_test/TestLocalDeepCopy.hpp @@ -907,13 +907,7 @@ void impl_test_local_deepcopy_rangepolicy_rank_7(const int N) { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) TEST(TEST_CATEGORY, local_deepcopy_teampolicy_layoutleft) { using ExecSpace = TEST_EXECSPACE; -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 - if (std::is_same_v) - GTEST_SKIP() - << "FIXME_NVHPC : Compiler bug affecting subviews of high rank Views"; -#endif - using ViewType = Kokkos::View; + using ViewType = Kokkos::View; { // Rank-1 impl_test_local_deepcopy_teampolicy_rank_1(8); @@ -940,13 +934,7 @@ TEST(TEST_CATEGORY, local_deepcopy_teampolicy_layoutleft) { //------------------------------------------------------------------------------------------------------------- TEST(TEST_CATEGORY, local_deepcopy_rangepolicy_layoutleft) { using ExecSpace = TEST_EXECSPACE; -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 - if (std::is_same_v) - GTEST_SKIP() - << "FIXME_NVHPC : Compiler bug affecting subviews of high rank Views"; -#endif - using ViewType = Kokkos::View; + using ViewType = Kokkos::View; { // Rank-1 impl_test_local_deepcopy_rangepolicy_rank_1(8); @@ -973,12 +961,6 @@ TEST(TEST_CATEGORY, local_deepcopy_rangepolicy_layoutleft) { //------------------------------------------------------------------------------------------------------------- TEST(TEST_CATEGORY, local_deepcopy_teampolicy_layoutright) { using ExecSpace = TEST_EXECSPACE; -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 - if (std::is_same_v) - GTEST_SKIP() - << "FIXME_NVHPC : Compiler bug affecting subviews of high rank Views"; -#endif using ViewType = Kokkos::View; { // Rank-1 @@ -1006,12 +988,6 @@ TEST(TEST_CATEGORY, local_deepcopy_teampolicy_layoutright) { //------------------------------------------------------------------------------------------------------------- TEST(TEST_CATEGORY, local_deepcopy_rangepolicy_layoutright) { using ExecSpace = TEST_EXECSPACE; -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 - if (std::is_same_v) - GTEST_SKIP() - << "FIXME_NVHPC : Compiler bug affecting subviews of high rank Views"; -#endif using ViewType = Kokkos::View; diff --git a/packages/kokkos/core/unit_test/TestMDSpan.hpp b/packages/kokkos/core/unit_test/TestMDSpan.hpp index ef0bea1394a6..fa88b547a5f0 100644 --- a/packages/kokkos/core/unit_test/TestMDSpan.hpp +++ b/packages/kokkos/core/unit_test/TestMDSpan.hpp @@ -35,13 +35,19 @@ void test_mdspan_minimal_functional() { Kokkos::parallel_reduce( "CheckMinimalMDSpan", Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(int i, int& err) { +#if !defined(KOKKOS_ENABLE_OPENACC) Kokkos::mdspan> b_mds(a.data(), N); -#ifdef KOKKOS_ENABLE_CXX23 +#endif +#if !defined(KOKKOS_ENABLE_CXX17) && !defined(KOKKOS_ENABLE_CXX20) if (a_mds[i] != i) err++; +#if !defined(KOKKOS_ENABLE_OPENACC) if (b_mds[i] != i) err++; +#endif #else if (a_mds(i) != i) err++; +#if !defined(KOKKOS_ENABLE_OPENACC) if (b_mds(i) != i) err++; +#endif #endif }, errors); diff --git a/packages/kokkos/core/unit_test/TestMDSpanAtomicAccessor.hpp b/packages/kokkos/core/unit_test/TestMDSpanAtomicAccessor.hpp new file mode 100644 index 000000000000..04460e641951 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestMDSpanAtomicAccessor.hpp @@ -0,0 +1,112 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include + +#include +#ifndef KOKKOS_ENABLE_CXX17 +#include +#endif + +template +void test_atomic_accessor() { + using value_type = std::remove_const_t; + Kokkos::View v("V", 100); + + Kokkos::parallel_for( + Kokkos::RangePolicy(0, v.extent(0)), + KOKKOS_LAMBDA(int i) { v(i) = i; }); + + int errors; + using acc_t = Kokkos::Impl::AtomicAccessorRelaxed; + acc_t acc{}; + typename acc_t::data_handle_type ptr = v.data(); + + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, v.extent(0)), + KOKKOS_LAMBDA(int i, int& error) { + if (acc.access(ptr, i) != ptr[i]) error++; + if (acc.offset(ptr, i) != ptr + i) error++; + static_assert(std::is_same_v); + static_assert( + std::is_same_v>); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_nothrow_move_constructible_v); + static_assert(std::is_nothrow_move_assignable_v); + static_assert(std::is_nothrow_swappable_v); + static_assert(std::is_trivially_copyable_v); + static_assert(std::is_trivially_default_constructible_v); + static_assert(std::is_trivially_constructible_v); + static_assert(std::is_trivially_move_constructible_v); + static_assert(std::is_trivially_assignable_v); + static_assert(std::is_trivially_move_assignable_v); +#ifndef KOKKOS_ENABLE_CXX17 + static_assert(std::copyable); + static_assert(std::is_empty_v); +#endif + }, + errors); + ASSERT_EQ(errors, 0); +} + +void test_atomic_accessor_conversion() { + using ExecutionSpace = TEST_EXECSPACE; + using T = float; + using acc_t = Kokkos::Impl::AtomicAccessorRelaxed; + using const_acc_t = Kokkos::Impl::AtomicAccessorRelaxed; + using int_acc_t = Kokkos::Impl::AtomicAccessorRelaxed; + using defacc_t = Kokkos::default_accessor; + using const_defacc_t = Kokkos::default_accessor; + using int_defacc_t = Kokkos::default_accessor; + + Kokkos::parallel_for( + Kokkos::RangePolicy(0, 1), KOKKOS_LAMBDA(int) { + static_assert(std::is_constructible_v); + static_assert(std::is_convertible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(std::is_convertible_v); + static_assert(std::is_convertible_v); + static_assert(std::is_convertible_v); + static_assert(std::is_convertible_v); + static_assert(!std::is_convertible_v); + static_assert(!std::is_convertible_v); + static_assert(!std::is_convertible_v); + }); +} + +TEST(TEST_CATEGORY, mdspan_atomic_accessor) { + using ExecutionSpace = TEST_EXECSPACE; + test_atomic_accessor(); + test_atomic_accessor(); +} diff --git a/packages/kokkos/core/unit_test/TestMDSpanConversion.hpp b/packages/kokkos/core/unit_test/TestMDSpanConversion.hpp new file mode 100644 index 000000000000..10123901c43a --- /dev/null +++ b/packages/kokkos/core/unit_test/TestMDSpanConversion.hpp @@ -0,0 +1,507 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include + +#include +#include "experimental/__p0009_bits/layout_stride.hpp" + +namespace { + +template +struct TestViewMDSpanConversion { + using value_type = T; + + template + using layout_left_padded = Kokkos::Experimental::layout_left_padded; + + template + using layout_right_padded = + Kokkos::Experimental::layout_right_padded; + + struct TestAccessor { + using offset_policy = TestAccessor; + using element_type = value_type; + using reference = element_type &; + using data_handle_type = element_type *; + + constexpr TestAccessor() noexcept = default; + constexpr reference access(data_handle_type p, std::size_t i) noexcept { + return p[i]; + } + constexpr data_handle_type offset(data_handle_type p, + std::size_t i) noexcept { + return p + i; + } + }; + + template + static void test_conversion_from_mdspan( + Kokkos::View ref, + const MDSpanLayoutMapping &mapping) { + using unmanaged_view_type = + Kokkos::View>; + using natural_mdspan_type = typename Kokkos::Impl::MDSpanViewTraits< + typename unmanaged_view_type::traits>::mdspan_type; + using mapping_type = MDSpanLayoutMapping; + using mdspan_layout_type = typename MDSpanLayoutMapping::layout_type; + using extents_type = typename mapping_type::extents_type; + using mdspan_type = + Kokkos::mdspan; + + static_assert(std::is_constructible_v); + static_assert(std::is_convertible_v == + std::is_convertible_v); + // Manually create an mdspan from ref so we have a valid pointer to play + // with + const auto &exts = mapping.extents(); + auto mds = mdspan_type{ref.data(), mapping}; + + auto test_view = unmanaged_view_type(mds); + + ASSERT_EQ(test_view.data(), ref.data()); + ASSERT_EQ(test_view.data(), mds.data_handle()); + ASSERT_EQ(test_view.layout(), ref.layout()); + for (std::size_t r = 0; r < mdspan_type::rank(); ++r) { + ASSERT_EQ(test_view.extent(r), ref.extent(r)); + ASSERT_EQ(test_view.extent(r), exts.extent(r)); + } + } + + template + static void test_conversion_to_mdspan( + const MDSpanLayoutMapping &ref_layout_mapping, ViewType v) { + using view_type = ViewType; + using natural_mdspan_type = typename Kokkos::Impl::MDSpanViewTraits< + typename view_type::traits>::mdspan_type; + + static_assert(natural_mdspan_type::rank() == view_type::rank); + static_assert(std::is_same_v); + constexpr bool is_strided_layout = + std::is_same_v; + if constexpr (!is_strided_layout) { + static_assert(natural_mdspan_type::mapping_type::padding_value == + Kokkos::dynamic_extent); + } + // test conversion operator to natural mdspan + { + natural_mdspan_type cvt = v; + ASSERT_EQ(cvt.data_handle(), v.data()); + ASSERT_EQ(cvt.mapping(), ref_layout_mapping); + + if constexpr (!is_strided_layout && natural_mdspan_type::rank() > 1) { + ASSERT_EQ(cvt.mapping().stride(1), ref_layout_mapping.stride(1)); + } + } + // test to_mdspan() returning natural mdspan + { + auto cvt = v.to_mdspan(); + static_assert(std::is_same_v); + ASSERT_EQ(cvt.data_handle(), v.data()); + ASSERT_EQ(cvt.mapping(), ref_layout_mapping); + } + // test conversion operator to different mdspan type + { + using element_type = const typename natural_mdspan_type::element_type; + using const_acc_type = Kokkos::Impl::SpaceAwareAccessor< + typename ViewType::memory_space, + Kokkos::default_accessor>; + using mdspan_type = Kokkos::mdspan< + element_type, + Kokkos::dextents, + typename natural_mdspan_type::layout_type, const_acc_type>; + mdspan_type cvt = v; + ASSERT_EQ(cvt.data_handle(), v.data()); + ASSERT_EQ(cvt.mapping(), ref_layout_mapping); + } + } + + template + static void test_conversion_to_mdspan_with_accessor( + const MDSpanLayoutMapping &ref_layout_mapping, ViewType v, + const AccessorType &a) { + auto cvt = v.to_mdspan(a); + static_assert(decltype(cvt)::rank() == ViewType::rank); + static_assert(std::is_same_v); + ASSERT_EQ(cvt.data_handle(), v.data()); + ASSERT_EQ(cvt.mapping(), ref_layout_mapping); + } + + template + using natural_mdspan_type_for_view = typename Kokkos::Impl::MDSpanViewTraits< + typename ViewType::traits>::mdspan_type; + + static void run_test() { + // Verify we can only convert to compatible mdspans + static_assert(std::is_convertible_v< + Kokkos::View, + natural_mdspan_type_for_view>>); + static_assert( + std::is_convertible_v< + Kokkos::View, + natural_mdspan_type_for_view>>); + + // Do not cast const away + static_assert(!std::is_convertible_v< + Kokkos::View, + natural_mdspan_type_for_view>>); + + // Mismatched dim + static_assert(!std::is_convertible_v< + Kokkos::View, + natural_mdspan_type_for_view>>); + + // Mismatched layouts + static_assert( + !std::is_convertible_v, + natural_mdspan_type_for_view>>); + static_assert( + !std::is_convertible_v, + natural_mdspan_type_for_view>>); + // nvcc doesn't do CTAD properly here, making this way more verbose.. + // LayoutLeft + test_conversion_from_mdspan( + Kokkos::View("ref", + 7), + typename layout_left_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(7)}); + + test_conversion_from_mdspan( + Kokkos::View("ref"), + typename layout_left_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + test_conversion_from_mdspan( + Kokkos::View("ref"), + typename layout_left_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(7)}); + test_conversion_from_mdspan( + Kokkos::View("ref", + 7), + typename layout_left_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + + test_conversion_from_mdspan( + Kokkos::View("ref", + 7, 3), + typename layout_left_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(7, 3)}); + test_conversion_from_mdspan( + Kokkos::View( + "ref"), + typename layout_left_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + test_conversion_from_mdspan( + Kokkos::View( + "ref"), + typename layout_left_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(7, 3)}); + test_conversion_from_mdspan( + Kokkos::View("ref", + 7, 3), + typename layout_left_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + + // LayoutRight + test_conversion_from_mdspan( + Kokkos::View("ref", + 7), + typename layout_right_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(7)}); + test_conversion_from_mdspan( + Kokkos::View("ref"), + typename layout_right_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + test_conversion_from_mdspan( + Kokkos::View("ref"), + typename layout_right_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(7)}); + test_conversion_from_mdspan( + Kokkos::View("ref", + 7), + typename layout_right_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + + test_conversion_from_mdspan( + Kokkos::View("ref", + 3, 7), + typename layout_right_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(3, 7)}); + test_conversion_from_mdspan( + Kokkos::View( + "ref"), + typename layout_right_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + test_conversion_from_mdspan( + Kokkos::View( + "ref"), + typename layout_right_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(3, 7)}); + test_conversion_from_mdspan( + Kokkos::View("ref", + 3, 7), + typename layout_right_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + + // LayoutStride + { + const size_t strides[] = {2}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, Kokkos::dextents{7}, + strides}); + } + { + const size_t strides[] = {2}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, {}, strides}); + } + { + const size_t strides[] = {2}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, Kokkos::dextents{7}, + strides}); + } + { + const size_t strides[] = {2}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, Kokkos::extents(), + strides}); + } + + { + const size_t strides[] = {2, 4}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2, 3, 4}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, + Kokkos::dextents(7, 3), strides}); + } + { + const size_t strides[] = {2, 4}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2, 3, 4}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, Kokkos::extents(), + strides}); + } + { + const size_t strides[] = {2, 4}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2, 3, 4}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, + Kokkos::dextents(7, 3), strides}); + } + { + const size_t strides[] = {2, 4}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2, 3, 4}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, Kokkos::extents(), + strides}); + } + + // Conversion to mdspan + test_conversion_to_mdspan( + layout_left_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4)); + test_conversion_to_mdspan( + layout_left_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4, + 7)); + + test_conversion_to_mdspan( + layout_right_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", + 4)); + test_conversion_to_mdspan( + layout_right_padded::mapping< + Kokkos::extents>({}, 7), + Kokkos::View("v", 4, + 7)); + + { + const size_t strides[] = {5}; + test_conversion_to_mdspan( + Kokkos::layout_stride::mapping>( + Kokkos::mdspan_non_standard, {}, strides), + Kokkos::View( + "v", Kokkos::LayoutStride{4, 5})); + } + { + const size_t strides[] = {5, 9}; + test_conversion_to_mdspan( + Kokkos::layout_stride::mapping>( + Kokkos::mdspan_non_standard, {}, strides), + Kokkos::View( + "v", Kokkos::LayoutStride{4, 5, 7, 9})); + } + + // Aligned types (for padded layouts) + test_conversion_to_mdspan( + layout_left_padded::mapping< + Kokkos::extents>({}, 128), + Kokkos::View( + Kokkos::view_alloc("v", Kokkos::AllowPadding), 127, 7)); + + test_conversion_to_mdspan( + layout_right_padded::mapping< + Kokkos::extents>({}, 128), + Kokkos::View( + Kokkos::view_alloc("v", Kokkos::AllowPadding), 7, 127)); + + // Conversion with standard default_accessor + + test_conversion_to_mdspan_with_accessor( + layout_left_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4), + Kokkos::default_accessor{}); + test_conversion_to_mdspan_with_accessor( + layout_left_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4, + 7), + Kokkos::default_accessor{}); + + test_conversion_to_mdspan_with_accessor( + layout_right_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4), + Kokkos::default_accessor{}); + test_conversion_to_mdspan_with_accessor( + layout_right_padded::mapping< + Kokkos::extents>({}, 7), + Kokkos::View("v", 4, + 7), + Kokkos::default_accessor{}); + + { + const size_t strides[] = {5}; + test_conversion_to_mdspan_with_accessor( + Kokkos::layout_stride::mapping>( + Kokkos::mdspan_non_standard, {}, strides), + Kokkos::View( + "v", Kokkos::LayoutStride{4, 5}), + Kokkos::default_accessor{}); + } + { + const size_t strides[] = {5, 9}; + test_conversion_to_mdspan_with_accessor( + Kokkos::layout_stride::mapping>( + Kokkos::mdspan_non_standard, {}, strides), + Kokkos::View( + "v", Kokkos::LayoutStride{4, 5, 7, 9}), + Kokkos::default_accessor{}); + } + + // Conversion with a test accessor + + test_conversion_to_mdspan_with_accessor( + layout_left_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4), + TestAccessor{}); + test_conversion_to_mdspan_with_accessor( + layout_left_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4, + 7), + TestAccessor{}); + + test_conversion_to_mdspan_with_accessor( + layout_right_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4), + TestAccessor{}); + test_conversion_to_mdspan_with_accessor( + layout_right_padded::mapping< + Kokkos::extents>({}, 7), + Kokkos::View("v", 4, + 7), + TestAccessor{}); + + { + const size_t strides[] = {5}; + test_conversion_to_mdspan_with_accessor( + Kokkos::layout_stride::mapping>( + Kokkos::mdspan_non_standard, {}, strides), + Kokkos::View( + "v", Kokkos::LayoutStride{4, 5}), + TestAccessor{}); + } + { + const size_t strides[] = {5, 9}; + test_conversion_to_mdspan_with_accessor( + Kokkos::layout_stride::mapping>( + Kokkos::mdspan_non_standard, {}, strides), + Kokkos::View( + "v", Kokkos::LayoutStride{4, 5, 7, 9}), + TestAccessor{}); + } + } +}; + +TEST(TEST_CATEGORY, view_mdspan_conversion) { + TestViewMDSpanConversion::run_test(); + TestViewMDSpanConversion::run_test(); + TestViewMDSpanConversion::run_test(); +} + +} // namespace diff --git a/packages/kokkos/core/unit_test/TestMathematicalConstants.hpp b/packages/kokkos/core/unit_test/TestMathematicalConstants.hpp index e446d8132101..f52bfeaff7d9 100644 --- a/packages/kokkos/core/unit_test/TestMathematicalConstants.hpp +++ b/packages/kokkos/core/unit_test/TestMathematicalConstants.hpp @@ -63,8 +63,7 @@ struct TestMathematicalConstants { KOKKOS_FUNCTION void use_on_device() const { #if defined(KOKKOS_COMPILER_NVCC) || defined(KOKKOS_ENABLE_OPENMPTARGET) || \ - defined(KOKKOS_ENABLE_OPENACC) || \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 + defined(KOKKOS_ENABLE_OPENACC) take_by_value(Trait::value); #else (void)take_address_of(Trait::value); diff --git a/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp b/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp index ad035d4e4bf7..f996c61a527b 100644 --- a/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp +++ b/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp @@ -1585,34 +1585,24 @@ struct TestIsFinite { Kokkos::printf("failed isfinite(float)\n"); } #if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC)) - if (!isfinite(static_cast(2.f)) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || isfinite(quiet_NaN::value) || + if (!isfinite(static_cast(2.f)) || + isfinite(quiet_NaN::value) || isfinite(signaling_NaN::value) || - isfinite(infinity::value) -#endif - ) { + isfinite(infinity::value)) { ++e; Kokkos::printf("failed isfinite(KE::half_t)\n"); } - if (!isfinite(static_cast(2.f)) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || isfinite(quiet_NaN::value) || + if (!isfinite(static_cast(2.f)) || + isfinite(quiet_NaN::value) || isfinite(signaling_NaN::value) || - isfinite(infinity::value) -#endif - ) { + isfinite(infinity::value)) { ++e; Kokkos::printf("failed isfinite(KE::bhalf_t)\n"); } #endif - if (!isfinite(3.) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || isfinite(quiet_NaN::value) || + if (!isfinite(3.) || isfinite(quiet_NaN::value) || isfinite(signaling_NaN::value) || - isfinite(infinity::value) -#endif - ) { + isfinite(infinity::value)) { ++e; Kokkos::printf("failed isfinite(double)\n"); } @@ -1666,33 +1656,24 @@ struct TestIsInf { Kokkos::printf("failed isinf(float)\n"); } #if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC)) - if (isinf(static_cast(2.f)) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || isinf(quiet_NaN::value) || + if (isinf(static_cast(2.f)) || + isinf(quiet_NaN::value) || isinf(signaling_NaN::value) || - !isinf(infinity::value) -#endif - ) { + !isinf(infinity::value)) { ++e; Kokkos::printf("failed isinf(KE::half_t)\n"); } - if (isinf(static_cast(2.f)) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || isinf(quiet_NaN::value) || + if (isinf(static_cast(2.f)) || + isinf(quiet_NaN::value) || isinf(signaling_NaN::value) || - !isinf(infinity::value) -#endif - ) { + !isinf(infinity::value)) { ++e; Kokkos::printf("failed isinf(KE::bhalf_t)\n"); } #endif - if (isinf(3.) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || isinf(quiet_NaN::value) || - isinf(signaling_NaN::value) || !isinf(infinity::value) -#endif - ) { + if (isinf(3.) || isinf(quiet_NaN::value) || + isinf(signaling_NaN::value) || + !isinf(infinity::value)) { ++e; Kokkos::printf("failed isinf(double)\n"); } @@ -1746,32 +1727,23 @@ struct TestIsNaN { Kokkos::printf("failed isnan(float)\n"); } #if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC)) - if (isnan(static_cast(2.f)) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || !isnan(quiet_NaN::value) || + if (isnan(static_cast(2.f)) || + !isnan(quiet_NaN::value) || !isnan(signaling_NaN::value) || - isnan(infinity::value) -#endif - ) { + isnan(infinity::value)) { ++e; Kokkos::printf("failed isnan(KE::half_t)\n"); } - if (isnan(static_cast(2.f)) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || !isnan(quiet_NaN::value) || + if (isnan(static_cast(2.f)) || + !isnan(quiet_NaN::value) || !isnan(signaling_NaN::value) || - isnan(infinity::value) -#endif - ) { + isnan(infinity::value)) { ++e; Kokkos::printf("failed isnan(KE::bhalf_t)\n"); } - if (isnan(3.) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || !isnan(quiet_NaN::value) || - !isnan(signaling_NaN::value) || isnan(infinity::value) -#endif - ) { + if (isnan(3.) || !isnan(quiet_NaN::value) || + !isnan(signaling_NaN::value) || + isnan(infinity::value)) { ++e; Kokkos::printf("failed isnan(double)\n"); } diff --git a/packages/kokkos/core/unit_test/TestMultiGPU.hpp b/packages/kokkos/core/unit_test/TestMultiGPU.hpp new file mode 100644 index 000000000000..aad2fa45f492 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestMultiGPU.hpp @@ -0,0 +1,184 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +namespace { + +void test_policies(TEST_EXECSPACE exec0, Kokkos::View v0, + TEST_EXECSPACE exec, Kokkos::View v) { + using MemorySpace = typename TEST_EXECSPACE::memory_space; + + exec.fence(); + exec0.fence(); + + Kokkos::deep_copy(exec, v, 5); + Kokkos::deep_copy(exec0, v0, 5); + + Kokkos::deep_copy(v, v0); + + int sum; + int sum0; + + Kokkos::parallel_for("Test::Range_0", + Kokkos::RangePolicy(exec0, 0, 100), + Test::FunctorRange(v0)); + Kokkos::parallel_for("Test::Range", + Kokkos::RangePolicy(exec, 0, 100), + Test::FunctorRange(v)); + exec.fence(); + exec0.fence(); + Kokkos::parallel_reduce( + "Test::RangeReduce_0", + Kokkos::RangePolicy>(exec0, + 0, 100), + Test::FunctorRangeReduce(v0), sum0); + Kokkos::parallel_reduce( + "Test::RangeReduce", + Kokkos::RangePolicy>(exec, 0, + 100), + Test::FunctorRangeReduce(v), sum); + ASSERT_EQ(600, sum0); + ASSERT_EQ(600, sum); + + Kokkos::parallel_for("Test::MDRange_0", + Kokkos::MDRangePolicy>( + exec0, {0, 0}, {10, 10}), + Test::FunctorMDRange(v0)); + Kokkos::parallel_for("Test::MDRange", + Kokkos::MDRangePolicy>( + exec, {0, 0}, {10, 10}), + Test::FunctorMDRange(v)); + Kokkos::parallel_reduce("Test::MDRangeReduce_0", + Kokkos::MDRangePolicy, + Kokkos::LaunchBounds<128, 2>>( + exec0, {0, 0}, {10, 10}), + Test::FunctorMDRangeReduce(v0), sum0); + Kokkos::parallel_reduce("Test::MDRangeReduce", + Kokkos::MDRangePolicy, + Kokkos::LaunchBounds<128, 2>>( + exec, {0, 0}, {10, 10}), + Test::FunctorMDRangeReduce(v), sum); + ASSERT_EQ(700, sum0); + ASSERT_EQ(700, sum); + + Kokkos::parallel_for("Test::Team_0", + Kokkos::TeamPolicy(exec0, 10, 10), + Test::FunctorTeam(v0)); + Kokkos::parallel_for("Test::Team", + Kokkos::TeamPolicy(exec, 10, 10), + Test::FunctorTeam(v)); + Kokkos::parallel_reduce( + "Test::Team_0", + Kokkos::TeamPolicy>(exec0, + 10, 10), + Test::FunctorTeamReduce(v0), sum0); + Kokkos::parallel_reduce( + "Test::Team", + Kokkos::TeamPolicy>(exec, 10, + 10), + Test::FunctorTeamReduce(v), sum); + ASSERT_EQ(800, sum0); + ASSERT_EQ(800, sum); +} + +struct ScratchFunctor { + int scratch_size; + int R; + + ScratchFunctor(int scratch_size_, int R_) + : scratch_size(scratch_size_), R(R_) {} + + KOKKOS_FUNCTION + void operator()(const Kokkos::TeamPolicy::member_type &team, + int &error_accum) const { + Kokkos::View scratch_mem( + team.team_scratch(1), scratch_size); + + // Initialize scratch memory + Kokkos::parallel_for(Kokkos::TeamVectorRange(team, 0, scratch_size), + [&](int i) { scratch_mem(i) = 0; }); + team.team_barrier(); + + // Increment each entry in scratch memory R times + for (int r = 0; r < R; ++r) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(team, 0, scratch_size), + [&](int i) { scratch_mem(i) += 1; }); + } + team.team_barrier(); + + // Check that each scratch entry has been incremented exactly R times + int team_error_accum; + auto R_loc = R; // avoid implicit capture of this + Kokkos::parallel_reduce( + Kokkos::TeamVectorRange(team, 0, scratch_size), + [&](int i, int &tsum) { + if (scratch_mem(i) != R_loc) { + tsum += 1; + } + }, + team_error_accum); + Kokkos::single(Kokkos::PerTeam(team), + [&]() { error_accum += team_error_accum; }); + } +}; + +void test_scratch(TEST_EXECSPACE exec0, TEST_EXECSPACE exec1) { + constexpr int N = 10; + constexpr int R = 1000; + constexpr int scratch_size = 100; + using ScratchType = Kokkos::View; + + // Test allocating and using scratch space + ScratchFunctor f(scratch_size, R); + + auto policy0 = + Kokkos::TeamPolicy(exec0, N, 10) + .set_scratch_size( + 1, Kokkos::PerTeam(ScratchType::shmem_size(scratch_size))); + auto policy1 = + Kokkos::TeamPolicy(exec1, N, 10) + .set_scratch_size( + 1, Kokkos::PerTeam(ScratchType::shmem_size(scratch_size))); + + int error0, error1; + + Kokkos::parallel_reduce("test_scratch_device_0", policy0, f, error0); + Kokkos::parallel_reduce("test_scratch_device_1", policy1, f, error1); + ASSERT_EQ(error0, 0); + ASSERT_EQ(error1, 0); + + // Request larger scratch size to trigger a realloc and test + const auto new_scratch_size = scratch_size + 10; + ScratchFunctor f_more_scratch(new_scratch_size, R); + + auto policy0_more_scratch = + Kokkos::TeamPolicy(exec0, N, 10) + .set_scratch_size( + 1, Kokkos::PerTeam(ScratchType::shmem_size(new_scratch_size))); + auto policy1_more_scratch = + Kokkos::TeamPolicy(exec1, N, 10) + .set_scratch_size( + 1, Kokkos::PerTeam(ScratchType::shmem_size(new_scratch_size))); + + Kokkos::parallel_reduce("test_realloc_scratch_device_0", policy0_more_scratch, + f_more_scratch, error0); + Kokkos::parallel_reduce("test_realloc_scratch_device_1", policy1_more_scratch, + f_more_scratch, error1); + ASSERT_EQ(error0, 0); + ASSERT_EQ(error1, 0); +} +} // namespace diff --git a/packages/kokkos/core/unit_test/TestNestedReducerCTAD.cpp b/packages/kokkos/core/unit_test/TestNestedReducerCTAD.cpp new file mode 100644 index 000000000000..95493a58742c --- /dev/null +++ b/packages/kokkos/core/unit_test/TestNestedReducerCTAD.cpp @@ -0,0 +1,246 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +namespace { + +struct TestNestedReducerCTAD { + using MemorySpace = Kokkos::DefaultExecutionSpace::memory_space; + using ScalarType = int; + using IndexType = int; + using TeamPolicy = Kokkos::TeamPolicy; + using TeamHandle = TeamPolicy::member_type; + + struct FakeComparator { + template + KOKKOS_FUNCTION bool operator()(T const&, T const&) const { + return true; + } + }; + + template + struct FakeFunctor { + KOKKOS_FUNCTION void operator()(int, ValueType&) const {} + }; + + template + KOKKOS_FUNCTION static void check_types([ + [maybe_unused]] ReducerTypeToCheck const& reducer) { + static_assert(std::is_same_v); + } + + KOKKOS_FUNCTION void operator()([ + [maybe_unused]] TeamHandle const& team_handle) const { + { + using ReducerTypeExpected = Kokkos::Sum; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::Sum reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::Prod; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::Prod reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::Min; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::Min reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::Max; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::Max reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::LAnd; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::LAnd reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::LOr; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::LOr reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::BAnd; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::BAnd reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::BOr; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::BOr reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MinLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::MinLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MaxLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::MaxLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::MinMax; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::MinMax reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MinMaxLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::MinMaxLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MaxFirstLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::MaxFirstLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MaxFirstLocCustomComparator; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + FakeComparator comparator; + Kokkos::MaxFirstLocCustomComparator reducer(view, comparator); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MinFirstLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::MinFirstLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MinFirstLocCustomComparator; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + FakeComparator comparator; + Kokkos::MinFirstLocCustomComparator reducer(view, comparator); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MinMaxFirstLastLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::MinMaxFirstLastLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::MinMaxFirstLastLocCustomComparator< + ScalarType, IndexType, FakeComparator, MemorySpace>; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + FakeComparator comparator; + Kokkos::MinMaxFirstLastLocCustomComparator reducer(view, comparator); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::FirstLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::FirstLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::LastLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::LastLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::StdIsPartitioned; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::StdIsPartitioned reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::StdPartitionPoint; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::StdPartitionPoint reducer(view); + check_types(reducer); + } + } + + TestNestedReducerCTAD() { + Kokkos::parallel_for(TeamPolicy(0, Kokkos::AUTO), *this); + } +}; + +} // namespace diff --git a/packages/kokkos/core/unit_test/TestNumericTraits.hpp b/packages/kokkos/core/unit_test/TestNumericTraits.hpp index 81a9d0a5e0dd..0c8033548837 100644 --- a/packages/kokkos/core/unit_test/TestNumericTraits.hpp +++ b/packages/kokkos/core/unit_test/TestNumericTraits.hpp @@ -21,6 +21,19 @@ #include #include "Kokkos_NumericTraits.hpp" +// Suppress "'long double' is treated as 'double' in device code" +#ifdef KOKKOS_COMPILER_NVCC +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress 20208 +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic push +#pragma diag_suppress 20208 +#endif +#endif +#endif + struct extrema { #define DEFINE_EXTREMA(T, m, M) \ KOKKOS_FUNCTION static T min(T) { return m; } \ @@ -145,33 +158,25 @@ struct TestNumericTraits { KOKKOS_FUNCTION void operator()(MaxExponent10, int, int&) const { use_on_device(); } // clang-format on KOKKOS_FUNCTION void operator()(QuietNaN, int, int& e) const { -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 nan using Kokkos::Experimental::quiet_NaN; constexpr auto nan = quiet_NaN::value; auto const zero = T(0); e += (int)!(nan != nan); e += (int)!(nan != zero); -#else - (void)e; -#endif use_on_device(); } KOKKOS_FUNCTION void operator()(SignalingNaN, int, int& e) const { -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 nan using Kokkos::Experimental::signaling_NaN; constexpr auto nan = signaling_NaN::value; auto const zero = T(0); e += (int)!(nan != nan); e += (int)!(nan != zero); -#else - (void)e; -#endif use_on_device(); } KOKKOS_FUNCTION void use_on_device() const { -#if defined(KOKKOS_COMPILER_NVCC) || defined(KOKKOS_COMPILER_NVHPC) || \ - defined(KOKKOS_ENABLE_OPENMPTARGET) || defined(KOKKOS_ENABLE_OPENACC) +#if defined(KOKKOS_COMPILER_NVCC) || defined(KOKKOS_ENABLE_OPENMPTARGET) || \ + defined(KOKKOS_ENABLE_OPENACC) take_by_value(trait::value); #else (void)take_address_of(trait::value); @@ -204,59 +209,46 @@ struct TestNumericTraits< #endif TEST(TEST_CATEGORY, numeric_traits_infinity) { -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 TestNumericTraits(); TestNumericTraits(); -#endif TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } TEST(TEST_CATEGORY, numeric_traits_epsilon) { -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 bit_comparison_type TestNumericTraits(); TestNumericTraits(); -#endif TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) + // FIXME_OPENMPTARGET long double on Intel GPUs +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } TEST(TEST_CATEGORY, numeric_traits_round_error) { -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 bit_comparison_type TestNumericTraits(); TestNumericTraits(); -#endif TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) + // FIXME_OPENMPTARGET long double on Intel GPUs +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } TEST(TEST_CATEGORY, numeric_traits_norm_min) { -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 bit_comparison_type TestNumericTraits(); TestNumericTraits(); -#endif TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) + // FIXME_OPENMPTARGET long double on Intel GPUs +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } @@ -264,9 +256,8 @@ TEST(TEST_CATEGORY, numeric_traits_norm_min) { TEST(TEST_CATEGORY, numeric_traits_denorm_min) { TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) + // FIXME_OPENMPTARGET long double on Intel GPUs +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } @@ -303,10 +294,8 @@ TEST(TEST_CATEGORY, numeric_traits_finite_min_max) { TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); TestNumericTraits(); #endif @@ -329,10 +318,8 @@ TEST(TEST_CATEGORY, numeric_traits_digits) { TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } @@ -354,10 +341,8 @@ TEST(TEST_CATEGORY, numeric_traits_digits10) { TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } @@ -365,10 +350,8 @@ TEST(TEST_CATEGORY, numeric_traits_digits10) { TEST(TEST_CATEGORY, numeric_traits_max_digits10) { TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } @@ -389,10 +372,8 @@ TEST(TEST_CATEGORY, numeric_traits_radix) { TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } @@ -406,10 +387,8 @@ TEST(TEST_CATEGORY, numeric_traits_min_max_exponent) { TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); TestNumericTraits(); #endif @@ -420,31 +399,29 @@ TEST(TEST_CATEGORY, numeric_traits_min_max_exponent10) { TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); TestNumericTraits(); #endif } TEST(TEST_CATEGORY, numeric_traits_quiet_and_signaling_nan) { -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 +// FIXME_NVHPC +#ifdef KOKKOS_COMPILER_NVHPC + GTEST_SKIP() << "This test is known to fail with the NVHPC compiler"; +#endif TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); -#endif TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); TestNumericTraits(); #endif @@ -736,3 +713,13 @@ CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(signaling_NaN); #undef CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT #undef CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES + +#ifdef KOKKOS_COMPILER_NVCC +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic pop +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic pop +#endif +#endif +#endif diff --git a/packages/kokkos/core/unit_test/TestOther.hpp b/packages/kokkos/core/unit_test/TestOther.hpp index fcf0353a88ca..9daef3ca3f39 100644 --- a/packages/kokkos/core/unit_test/TestOther.hpp +++ b/packages/kokkos/core/unit_test/TestOther.hpp @@ -16,13 +16,8 @@ #ifndef KOKKOS_TEST_OTHER_HPP #define KOKKOS_TEST_OTHER_HPP -#include #include #include #include -// with VS 16.11.3 and CUDA 11.4.2 getting cudafe stackoverflow crash -#if !(defined(_WIN32) && defined(KOKKOS_ENABLE_CUDA)) -#include -#endif #endif diff --git a/packages/kokkos/core/unit_test/TestRangePolicyConstructors.hpp b/packages/kokkos/core/unit_test/TestRangePolicyConstructors.hpp index c8c1542af138..d6920beed042 100644 --- a/packages/kokkos/core/unit_test/TestRangePolicyConstructors.hpp +++ b/packages/kokkos/core/unit_test/TestRangePolicyConstructors.hpp @@ -20,6 +20,7 @@ #include #include +#include namespace { @@ -196,4 +197,43 @@ TEST(TEST_CATEGORY_DEATH, range_policy_implicitly_converted_bounds) { #endif } +constexpr bool test_chunk_size_explicit() { + using ExecutionSpace = TEST_EXECSPACE; + using Kokkos::ChunkSize; + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + static_assert(std::is_convertible_v); + static_assert(std::is_constructible_v); + // Some execution spaces were implicitly constructible from int + // which made the constructor call ambiguous. + static_assert( + std::is_constructible_v || + std::is_constructible_v< + Kokkos::RangePolicy, int, int, int>); + static_assert(std::is_constructible_v< + Kokkos::RangePolicy, int, int, + ChunkSize>); + static_assert(std::is_constructible_v, + ExecutionSpace, int, int, int>); + static_assert(std::is_constructible_v, + ExecutionSpace, int, int, ChunkSize>); +#else + static_assert(!std::is_convertible_v); + static_assert(std::is_constructible_v); + static_assert( + !std::is_constructible_v< + Kokkos::RangePolicy, int, int, int>); + static_assert(std::is_constructible_v< + Kokkos::RangePolicy, int, int, + ChunkSize>); + static_assert(!std::is_constructible_v, + ExecutionSpace, int, int, int>); + static_assert(std::is_constructible_v, + ExecutionSpace, int, int, ChunkSize>); +#endif + return true; +} + +static_assert(test_chunk_size_explicit()); + } // namespace diff --git a/packages/kokkos/core/unit_test/TestRealloc.hpp b/packages/kokkos/core/unit_test/TestRealloc.hpp index 2c9dc5ee4732..f30c9e15e1c0 100644 --- a/packages/kokkos/core/unit_test/TestRealloc.hpp +++ b/packages/kokkos/core/unit_test/TestRealloc.hpp @@ -144,6 +144,11 @@ void impl_testRealloc() { EXPECT_EQ(oldPointer, newPointer); } } +struct NoDefaultConstructor { + int value; + KOKKOS_FUNCTION + NoDefaultConstructor(int x) : value(x) {} +}; template void testRealloc() { @@ -154,6 +159,14 @@ void testRealloc() { impl_testRealloc(); // without data initialization } + // Check #6992 fix (no default initialization in realloc without initializing) + { + using view_type = Kokkos::View; + view_type view_1d_no_default( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "view_1d_no_default"), + 5); + realloc_dispatch(WithoutInitializing{}, view_1d_no_default, 3); + } } } // namespace TestViewRealloc diff --git a/packages/kokkos/core/unit_test/TestResize.hpp b/packages/kokkos/core/unit_test/TestResize.hpp index 13d7e16d5890..3102d2b9a168 100644 --- a/packages/kokkos/core/unit_test/TestResize.hpp +++ b/packages/kokkos/core/unit_test/TestResize.hpp @@ -358,6 +358,12 @@ void impl_testResize() { } } +struct NoDefaultConstructor { + int value; + KOKKOS_FUNCTION + NoDefaultConstructor(int x) : value(x) {} +}; + template void testResize() { { @@ -367,6 +373,13 @@ void testResize() { impl_testResize(); // without data initialization } + { + using view_type = Kokkos::View; + view_type view_1d_no_default( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "view_1d_no_default"), + 5); + resize_dispatch(WithoutInitializing{}, view_1d_no_default, 3); + } } } // namespace TestViewResize diff --git a/packages/kokkos/core/unit_test/TestSpaceAwareAccessor.hpp b/packages/kokkos/core/unit_test/TestSpaceAwareAccessor.hpp new file mode 100644 index 000000000000..2fad17cb8545 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestSpaceAwareAccessor.hpp @@ -0,0 +1,156 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include + +#include +#ifndef KOKKOS_ENABLE_CXX17 +#include +#endif + +template +struct funky_data_handle { + T* val; + + KOKKOS_FUNCTION + operator T*() { return val; } + KOKKOS_FUNCTION + operator const T*() const { return val; } +}; + +template +struct FunkyAcc { + using element_type = ElementType; + using reference = std::conditional_t, + element_type, element_type&>; + using data_handle_type = funky_data_handle; + using offset_policy = Kokkos::default_accessor; + KOKKOS_FUNCTION + reference access(data_handle_type p, size_t i) const { return p.val[i]; } + KOKKOS_FUNCTION + element_type* offset(data_handle_type p, size_t i) const { return p.val + i; } +}; + +template +void test_space_aware_accessor() { + using memory_space_t = MemorySpace; + using value_type = std::remove_const_t; + Kokkos::View v("V", 100); + + Kokkos::parallel_for( + Kokkos::RangePolicy(0, v.extent(0)), + KOKKOS_LAMBDA(int i) { v(i) = i; }); + + int errors; + using acc_t = Kokkos::Impl::SpaceAwareAccessor>; + acc_t acc{}; + typename acc_t::data_handle_type ptr{v.data()}; + + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, v.extent(0)), + KOKKOS_LAMBDA(int i, int& error) { + if (acc.access(ptr, i) != ptr[i]) error++; + if (acc.offset(ptr, i) != ptr + i) error++; + static_assert(std::is_same_v); + if constexpr (std::is_const_v) { + static_assert(std::is_same_v); + } else { + static_assert(std::is_same_v); + } + static_assert(std::is_same_v>); + static_assert( + std::is_same_v>>); + if constexpr (std::is_const_v) { + static_assert(std::is_same_v>); + } else { + static_assert(std::is_same_v); + } + static_assert(std::is_same_v); + static_assert(std::is_same_v&>); + static_assert(std::is_nothrow_move_constructible_v); + static_assert(std::is_nothrow_move_assignable_v); + static_assert(std::is_nothrow_swappable_v); + static_assert( + std::is_same_v); + static_assert( + std::is_same_v>); +#ifndef KOKKOS_ENABLE_CXX17 + static_assert(std::copyable); + static_assert(std::is_empty_v); +#endif + }, + errors); + ASSERT_EQ(errors, 0); +} + +void test_space_aware_accessor_conversion() { + using ExecutionSpace = TEST_EXECSPACE; + using memory_space_t = typename ExecutionSpace::memory_space; + using T = float; + using acc_t = Kokkos::Impl::SpaceAwareAccessor>; + using const_acc_t = + Kokkos::Impl::SpaceAwareAccessor>; + using int_acc_t = + Kokkos::Impl::SpaceAwareAccessor>; + using host_acc_t = + Kokkos::Impl::SpaceAwareAccessor>; + using anon_acc_t = + Kokkos::Impl::SpaceAwareAccessor>; + + Kokkos::parallel_for( + Kokkos::RangePolicy(0, 1), KOKKOS_LAMBDA(int) { + static_assert(std::is_constructible_v); + static_assert(std::is_convertible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert( + std::is_constructible_v == + Kokkos::Impl::MemorySpaceAccess::assignable); + static_assert( + std::is_constructible_v == + Kokkos::Impl::MemorySpaceAccess::assignable); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_convertible_v); + static_assert(std::is_convertible_v); + }); +} + +TEST(TEST_CATEGORY, mdspan_space_aware_accessor) { + using ExecutionSpace = TEST_EXECSPACE; + test_space_aware_accessor(); + test_space_aware_accessor(); + test_space_aware_accessor(); + test_space_aware_accessor(); + test_space_aware_accessor(); + test_space_aware_accessor(); + test_space_aware_accessor_conversion(); +} diff --git a/packages/kokkos/core/unit_test/TestSpaceAwareAccessorAccessViolation.hpp b/packages/kokkos/core/unit_test/TestSpaceAwareAccessorAccessViolation.hpp new file mode 100644 index 000000000000..b9982d5fc450 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestSpaceAwareAccessorAccessViolation.hpp @@ -0,0 +1,128 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +#include + +template +struct TestMemoryAccessViolation { + Kokkos::Impl::SpaceAwareAccessor> + acc; + + KOKKOS_FUNCTION decltype(auto) bad_access() const { + return acc.access(nullptr, 0); + } + + KOKKOS_FUNCTION void operator()(int) const { ++bad_access(); } + + TestMemoryAccessViolation(ExecutionSpace const& s, + std::string const& matcher) { + constexpr bool accessible_from_execution_space = Kokkos::SpaceAccessibility< + /*AccessSpace=*/ExecutionSpace, + /*MemorySpace=*/MemorySpace>::accessible; + EXPECT_FALSE(accessible_from_execution_space); + EXPECT_DEATH( + { + Kokkos::parallel_for(Kokkos::RangePolicy(s, 0, 1), + *this); + Kokkos::fence(); + }, + matcher); + } +}; + +template +void test_memory_access_violation(ExecutionSpace const& s, + std::string const& m) { + TestMemoryAccessViolation(s, m); +} + +template +void test_memory_access_violations_from_host() { + using memory_space_t = typename ExecutionSpace::memory_space; + using exec_space_t = Kokkos::DefaultHostExecutionSpace; + const exec_space_t exec_space{}; + std::string const message = + "Kokkos::SpaceAwareAccessor ERROR: attempt to access inaccessible memory " + "space"; + test_memory_access_violation(exec_space, + message); +} + +template +void test_memory_access_violations_from_device() { + using memory_space_t = Kokkos::HostSpace; + using exec_space_t = ExecutionSpace; + const exec_space_t exec_space{}; + std::string const message = + "Kokkos::SpaceAwareAccessor ERROR: attempt to access inaccessible memory " + "space"; + test_memory_access_violation(exec_space, + message); +} + +// FIXME_SYCL +#if !(defined(KOKKOS_COMPILER_INTEL_LLVM) && defined(KOKKOS_ENABLE_SYCL)) +TEST(TEST_CATEGORY_DEATH, + mdspan_space_aware_accessor_invalid_access_from_host) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + + using ExecutionSpace = TEST_EXECSPACE; + + if (Kokkos::SpaceAccessibility< + /*AccessSpace=*/Kokkos::HostSpace, + /*MemorySpace=*/typename ExecutionSpace::memory_space>::accessible) { + GTEST_SKIP() << "skipping since no memory access violation would occur"; + } + + test_memory_access_violations_from_host(); +} +#endif + +TEST(TEST_CATEGORY_DEATH, + mdspan_space_aware_accessor_invalid_access_from_device) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + + using ExecutionSpace = TEST_EXECSPACE; + + if (Kokkos::SpaceAccessibility< + /*AccessSpace=*/ExecutionSpace, + /*MemorySpace=*/Kokkos::HostSpace>::accessible) { + GTEST_SKIP() << "skipping since no memory access violation would occur"; + } + +#if defined(KOKKOS_ENABLE_SYCL) && defined(NDEBUG) // FIXME_SYCL + if (std::is_same::value) { + GTEST_SKIP() << "skipping SYCL device-side abort does not work when NDEBUG " + "is defined"; + } +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) // FIXME_OPENMPTARGET + if (std::is_same::value) { + GTEST_SKIP() << "skipping because OpenMPTarget backend is currently not " + "able to abort from the device"; + } +#endif +#if defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC + if (std::is_same::value) { + GTEST_SKIP() << "skipping because OpenACC backend is currently not " + "able to abort from the device"; + } +#endif + + test_memory_access_violations_from_device(); +} diff --git a/packages/kokkos/core/unit_test/TestTeamMDRangePolicyCTAD.cpp b/packages/kokkos/core/unit_test/TestTeamMDRangePolicyCTAD.cpp new file mode 100644 index 000000000000..0de639e02e69 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestTeamMDRangePolicyCTAD.cpp @@ -0,0 +1,199 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +namespace { + +struct TestTeamThreadMDRangeCTAD { + using TeamPolicy = Kokkos::TeamPolicy; + using TeamHandle = TeamPolicy::member_type; + + KOKKOS_FUNCTION void operator()(TeamHandle const& team_handle) const { + { + Kokkos::TeamThreadMDRange md_range(team_handle, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamThreadMDRange md_range(team_handle, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamThreadMDRange md_range(team_handle, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamThreadMDRange md_range(team_handle, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamThreadMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamThreadMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamThreadMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + } + + TestTeamThreadMDRangeCTAD() { + Kokkos::parallel_for(TeamPolicy(0, Kokkos::AUTO), *this); + } +}; + +struct TestTeamVectorMDRangeCTAD { + using TeamPolicy = Kokkos::TeamPolicy; + using TeamHandle = TeamPolicy::member_type; + + KOKKOS_FUNCTION void operator()(TeamHandle const& team_handle) const { + { + Kokkos::TeamVectorMDRange md_range(team_handle, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + { + Kokkos::TeamVectorMDRange md_range(team_handle, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamVectorMDRange md_range(team_handle, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + } + + TestTeamVectorMDRangeCTAD() { + Kokkos::parallel_for(TeamPolicy(0, Kokkos::AUTO), *this); + } +}; + +struct TestThreadVectorMDRangeCTAD { + using TeamPolicy = Kokkos::TeamPolicy; + using TeamHandle = TeamPolicy::member_type; + + template + KOKKOS_FUNCTION static void check_types([ + [maybe_unused]] PolicyTypeToCheck const& team_handle) { + static_assert(std::is_same_v); + } + + KOKKOS_FUNCTION void operator()(TeamHandle const& team_handle) const { + { + Kokkos::ThreadVectorMDRange md_range(team_handle, 0, 0); + check_types, TeamHandle>>( + md_range); + } + + { + Kokkos::ThreadVectorMDRange md_range(team_handle, 0, 0, 0); + check_types, TeamHandle>>( + md_range); + } + + { + Kokkos::ThreadVectorMDRange md_range(team_handle, 0, 0, 0, 0); + check_types, TeamHandle>>( + md_range); + } + + { + Kokkos::ThreadVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0); + check_types, TeamHandle>>( + md_range); + } + + { + Kokkos::ThreadVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0); + check_types, TeamHandle>>( + md_range); + } + + { + Kokkos::ThreadVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0, 0); + check_types, TeamHandle>>( + md_range); + } + + { + Kokkos::ThreadVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0, 0, 0); + check_types, TeamHandle>>( + md_range); + } + } + + TestThreadVectorMDRangeCTAD() { + Kokkos::parallel_for(TeamPolicy(0, Kokkos::AUTO), *this); + } +}; + +} // namespace diff --git a/packages/kokkos/core/unit_test/TestTeamPolicyCTAD.cpp b/packages/kokkos/core/unit_test/TestTeamPolicyCTAD.cpp new file mode 100644 index 000000000000..07aaeae819ef --- /dev/null +++ b/packages/kokkos/core/unit_test/TestTeamPolicyCTAD.cpp @@ -0,0 +1,135 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +namespace { + +struct TestTeamPolicyCTAD { + template + static void maybe_unused(Ts&&...) {} + + struct SomeExecutionSpace { + using execution_space = SomeExecutionSpace; + using size_type = size_t; + }; + static_assert(Kokkos::is_execution_space_v); + + struct ImplicitlyConvertibleToDefaultExecutionSpace { + [[maybe_unused]] operator Kokkos::DefaultExecutionSpace() const { + return Kokkos::DefaultExecutionSpace(); + } + }; + static_assert(!Kokkos::is_execution_space_v< + ImplicitlyConvertibleToDefaultExecutionSpace>); + + [[maybe_unused]] static inline Kokkos::DefaultExecutionSpace des; + [[maybe_unused]] static inline ImplicitlyConvertibleToDefaultExecutionSpace + notEs; + [[maybe_unused]] static inline SomeExecutionSpace ses; + + [[maybe_unused]] static inline int i; + + // Workaround for nvc++ (CUDA-11.7-NVHPC) ignoring [[maybe_unused]] on + // ImplicitlyConvertibleToDefaultExecutionSpace::operator + // Kokkos::DefaultExecutionSpace() const + [[maybe_unused]] static inline Kokkos::DefaultExecutionSpace notEsToDes = + notEs; + + // Workaround for HIP-ROCm-5.2 warning about was declared but never referenced + TestTeamPolicyCTAD() { maybe_unused(des, notEs, ses, i, notEsToDes); } + + // Default construction deduces to TeamPolicy<> + static_assert( + std::is_same_v, decltype(Kokkos::TeamPolicy{})>); + + // Execution space not provided deduces to TeamPolicy<> + + static_assert( + std::is_same_v, decltype(Kokkos::TeamPolicy(i, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(i, i, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(i, Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(i, Kokkos::AUTO, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(i, Kokkos::AUTO, + Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(i, i, Kokkos::AUTO))>); + + // DefaultExecutionSpace deduces to TeamPolicy<> + + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(des, i, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(des, i, i, i))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(des, i, Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(des, i, Kokkos::AUTO, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(des, i, Kokkos::AUTO, + Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(des, i, i, Kokkos::AUTO))>); + + // Convertible to DefaultExecutionSpace deduces to TeamPolicy<> + + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(notEs, i, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(notEs, i, i, i))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(notEs, i, Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(notEs, i, Kokkos::AUTO, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy( + notEs, i, Kokkos::AUTO, Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(notEs, i, i, Kokkos::AUTO))>); + + // SES != DefaultExecutionSpace deduces to TeamPolicy + + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(ses, i, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(ses, i, i, i))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(ses, i, Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(ses, i, Kokkos::AUTO, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(ses, i, Kokkos::AUTO, + Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(ses, i, i, Kokkos::AUTO))>); +}; + +} // namespace diff --git a/packages/kokkos/core/unit_test/TestTeamVector.hpp b/packages/kokkos/core/unit_test/TestTeamVector.hpp index 5e16539d652c..4d8f42720d8b 100644 --- a/packages/kokkos/core/unit_test/TestTeamVector.hpp +++ b/packages/kokkos/core/unit_test/TestTeamVector.hpp @@ -1060,11 +1060,8 @@ TEST(TEST_CATEGORY, parallel_scan_with_reducers) { constexpr int n = 1000000; constexpr int n_vector_range = 100; -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 - if constexpr (std::is_same_v) { - GTEST_SKIP() << "All but max inclusive scan differ at index 101"; - } +#ifdef KOKKOS_IMPL_32BIT + GTEST_SKIP() << "Failing KOKKOS_IMPL_32BIT"; // FIXME_32BIT #endif checkScan(0))); -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 - if constexpr (std::is_same_v) { - GTEST_SKIP() << "Disabling 2/3rd of the test for now"; - } -#endif ASSERT_TRUE((TestTeamVectorRange::Test(1))); // FIXME_OPENMPTARGET - Use of kokkos reducers currently results in runtime // memory errors. diff --git a/packages/kokkos/core/unit_test/TestViewAPI.hpp b/packages/kokkos/core/unit_test/TestViewAPI.hpp index ca098dbc2472..53c1f016789b 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI.hpp @@ -837,18 +837,15 @@ struct TestViewMirror { view_const_cast(v)); } - template + template struct CopyUnInit { - using mirror_view_type = typename Kokkos::Impl::MirrorViewType< - Space, double *, Layout, Kokkos::HostSpace, MemoryTraits>::view_type; - - mirror_view_type a_d; + View a_d; KOKKOS_INLINE_FUNCTION - CopyUnInit(mirror_view_type &a_d_) : a_d(a_d_) {} + explicit CopyUnInit(View const &a_d_) : a_d(a_d_) {} KOKKOS_INLINE_FUNCTION - void operator()(const typename Space::size_type i) const { + void operator()(const typename View::size_type i) const { a_d(i) = (double)(10 - i); } }; @@ -875,7 +872,8 @@ struct TestViewMirror { Kokkos::parallel_for( Kokkos::RangePolicy(0, int(10)), - CopyUnInit(a_d)); + // decltype required for Intel classics, that doesn't recognize the CTAD + CopyUnInit(a_d)); Kokkos::deep_copy(a_h, a_d); @@ -1339,6 +1337,40 @@ class TestViewAPI { ASSERT_EQ(dz.data(), nullptr); } + struct test_refcount_poison_copy_functor { + using view_type = Kokkos::View; + explicit test_refcount_poison_copy_functor(view_type v) : view(v) {} + + test_refcount_poison_copy_functor( + const test_refcount_poison_copy_functor &other) + : view(other.view) { + throw std::bad_alloc(); + } + + KOKKOS_INLINE_FUNCTION void operator()(int) const {} + + view_type view; + }; + + static void run_test_refcount_exception() { + using view_type = typename test_refcount_poison_copy_functor::view_type; + view_type original("original", N0); + ASSERT_EQ(original.use_count(), 1); + + // test_refcount_poison_copy_functor throws during copy construction + try { + Kokkos::parallel_for( + Kokkos::RangePolicy(0, N0), + test_refcount_poison_copy_functor(original)); + } catch (const std::bad_alloc &) { + } + + // Ensure refcounting is enabled, we should increment here + auto copy = original; + ASSERT_EQ(original.use_count(), 2); + ASSERT_EQ(copy.use_count(), 2); + } + static void run_test_deep_copy_empty() { // Check Deep Copy of LayoutLeft to LayoutRight { @@ -1539,56 +1571,6 @@ class TestViewAPI { typename multivector_type::const_type cmvX(cmv); typename const_multivector_type::const_type ccmvX(cmv); } - - static void run_test_error() { -#ifdef KOKKOS_ENABLE_OPENMPTARGET - if (std::is_same::value) - return; -#endif -// FIXME_MSVC_WITH_CUDA -// This test doesn't behave as expected on Windows with CUDA -#if defined(_WIN32) && defined(KOKKOS_ENABLE_CUDA) - if (std::is_same::value) - return; -#endif - bool did_throw = false; - auto alloc_size = std::numeric_limits::max() - 42; - try { - auto should_always_fail = dView1("hello_world_failure", alloc_size); - } catch (std::runtime_error const &error) { - // TODO once we remove the conversion to std::runtime_error, catch the - // appropriate Kokkos error here - std::string msg = error.what(); - ASSERT_PRED_FORMAT2(::testing::IsSubstring, "hello_world_failure", msg); - ASSERT_PRED_FORMAT2(::testing::IsSubstring, - typename device::memory_space{}.name(), msg); - // Can't figure out how to make assertions either/or, so we'll just use - // an if statement here for now. Test failure message will be a bit - // misleading, but developers should figure out what's going on pretty - // quickly. - if (msg.find("is not a valid size") != std::string::npos) { - ASSERT_PRED_FORMAT2(::testing::IsSubstring, "is not a valid size", msg); - } else -#ifdef KOKKOS_ENABLE_SYCL - if (msg.find("insufficient memory") != std::string::npos) -#endif - { - ASSERT_PRED_FORMAT2(::testing::IsSubstring, "insufficient memory", msg); - } - // SYCL cannot tell the reason why a memory allocation failed -#ifdef KOKKOS_ENABLE_SYCL - else { - // Otherwise, there has to be some sort of "unknown error" error - ASSERT_PRED_FORMAT2(::testing::IsSubstring, - "because of an unknown error.", msg); - } -#endif - did_throw = true; - } - ASSERT_TRUE(did_throw); - } }; } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestViewAPI_c.hpp b/packages/kokkos/core/unit_test/TestViewAPI_c.hpp index 5efbd95bc94e..042da1e98427 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI_c.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI_c.hpp @@ -19,6 +19,7 @@ namespace Test { TEST(TEST_CATEGORY, view_api_c) { + TestViewAPI::run_test_refcount_exception(); TestViewAPI::run_test_deep_copy_empty(); TestViewAPI::run_test_view_operator_b(); } diff --git a/packages/kokkos/core/unit_test/TestViewAPI_d.hpp b/packages/kokkos/core/unit_test/TestViewAPI_d.hpp index b0d759ffccc6..075ac3329c0a 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI_d.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI_d.hpp @@ -26,22 +26,4 @@ TEST(TEST_CATEGORY, view_api_d) { TestViewAPI::run_test_view_operator_c(); } -TEST(TEST_CATEGORY, view_allocation_error) { -#if defined(__has_feature) -#if __has_feature(address_sanitizer) - GTEST_SKIP() << "AddressSanitzer detects allocating too much memory " - "preventing our checks to run"; -#endif -#endif -#if ((HIP_VERSION_MAJOR == 5) && (HIP_VERSION_MINOR == 3)) - GTEST_SKIP() << "ROCm 5.3 segfaults when trying to allocate too much memory"; -#endif -#if defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - if (std::is_same_v) { - GTEST_SKIP() << "acc_malloc() not properly returning nullptr"; - } -#endif - TestViewAPI::run_test_error(); -} - } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestViewBadAlloc.hpp b/packages/kokkos/core/unit_test/TestViewBadAlloc.hpp new file mode 100644 index 000000000000..7cb2f91655d9 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestViewBadAlloc.hpp @@ -0,0 +1,86 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +#include + +namespace { + +template +void test_view_bad_alloc() { + bool did_throw = false; + auto too_large = std::numeric_limits::max() - 42; + std::string label = "my_label"; + try { + auto should_always_fail = + Kokkos::View(label, too_large); + } catch (std::runtime_error const &error) { + std::string msg = error.what(); + ASSERT_PRED_FORMAT2( + ::testing::IsSubstring, + std::string(MemorySpace::name()) + " memory space failed to allocate", + msg) + << "memory space name is missing"; + ASSERT_PRED_FORMAT2(::testing::IsSubstring, + std::string("(label=\"") + label + "\")", msg) + << "label is missing"; + did_throw = true; + } + ASSERT_TRUE(did_throw); +} + +TEST(TEST_CATEGORY, view_bad_alloc) { + using ExecutionSpace = TEST_EXECSPACE; + using MemorySpace = ExecutionSpace::memory_space; +#if defined(__has_feature) +#if __has_feature(address_sanitizer) + if (std::is_same_v) { + GTEST_SKIP() << "AddressSanitizer detects allocating too much memory " + "preventing our checks to run"; + } +#endif +#endif +#if ((HIP_VERSION_MAJOR == 5) && (HIP_VERSION_MINOR == 3)) + if (std::is_same_v) { + GTEST_SKIP() + << "ROCm 5.3 segfaults when trying to allocate too much memory"; + } +#endif +#if defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC + if (std::is_same_v) { + GTEST_SKIP() << "acc_malloc() not properly returning nullptr"; + } +#endif + + test_view_bad_alloc(); + + constexpr bool execution_space_is_device = + std::is_same_v && + !std::is_same_v; + + if constexpr (execution_space_is_device) { + if constexpr (Kokkos::has_shared_space) { + test_view_bad_alloc(); + } + if constexpr (Kokkos::has_shared_host_pinned_space) { + test_view_bad_alloc(); + } + } +} + +} // namespace diff --git a/packages/kokkos/core/unit_test/TestViewCopy_c.hpp b/packages/kokkos/core/unit_test/TestViewCopy_c.hpp new file mode 100644 index 000000000000..758af13c7df0 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestViewCopy_c.hpp @@ -0,0 +1,434 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +#include + +namespace { +// Do not rely on deep_copy(0) as we want to test it! +template +void reset_view(const ExecSpace& space, ViewType& a, int magic) { + auto policy = Kokkos::RangePolicy(space, 0, a.span()); + + assert(a.span_is_contiguous()); + + Kokkos::parallel_for( + "TestViewCopy::ResetView", policy, + KOKKOS_LAMBDA(int i) { a.data()[i] = magic; }); +} + +template +size_t compute_overall_sum(const ExecSpace& space, ViewType& a) { + auto policy = Kokkos::RangePolicy(space, 0, a.span()); + + assert(a.span_is_contiguous()); + + typename ViewType::value_type sum = 0; + Kokkos::parallel_reduce( + "TestViewCopy::ComputeSum", policy, + KOKKOS_LAMBDA(int i, int& lcl_sum) { lcl_sum += a.data()[i]; }, sum); + + return static_cast(sum); +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 0>* = nullptr) { + auto policy = Kokkos::RangePolicy(space, 0, 1); + + bool all_elements_are_set; // Uninitialized, set by parallel_reduce + + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank0", policy, + KOKKOS_LAMBDA(int, bool& local_check) { local_check &= (a() == magic); }, + Kokkos::LAnd(all_elements_are_set)); + + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 1>* = nullptr) { + auto policy = Kokkos::RangePolicy(space, 0, a.extent(0)); + + bool all_elements_are_set; // Uninitialized, set by parallel_reduce + + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank1", policy, + KOKKOS_LAMBDA(int i, bool& local_check) { + local_check &= (a(i) == magic); + }, + Kokkos::LAnd(all_elements_are_set)); + + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 2>* = nullptr) { + auto policy = Kokkos::MDRangePolicy, ExecSpace>( + space, {0, 0}, {a.extent(0), a.extent(1)}); + + bool all_elements_are_set; // Uninitialized, set by parallel_reduce + + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank2", policy, + KOKKOS_LAMBDA(int i0, int i1, bool& local_check) { + local_check &= (a(i0, i1) == magic); + }, + Kokkos::LAnd(all_elements_are_set)); + + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 3>* = nullptr) { + auto policy = Kokkos::MDRangePolicy, ExecSpace>( + space, {0, 0, 0}, {a.extent(0), a.extent(1), a.extent(2)}); + + bool all_elements_are_set; // Uninitialized, set by parallel_reduce + + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank3", policy, + KOKKOS_LAMBDA(int i0, int i1, int i2, bool& local_check) { + local_check &= (a(i0, i1, i2) == magic); + }, + Kokkos::LAnd(all_elements_are_set)); + + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 4>* = nullptr) { + auto policy = Kokkos::MDRangePolicy, ExecSpace>( + space, {0, 0, 0, 0}, + {a.extent(0), a.extent(1), a.extent(2), a.extent(3)}); + + bool all_elements_are_set; // Uninitialized, set by parallel_reduce + + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank4", policy, + KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, bool& local_check) { + local_check &= (a(i0, i1, i2, i3) == magic); + }, + Kokkos::LAnd(all_elements_are_set)); + + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 5>* = nullptr) { + auto policy = Kokkos::MDRangePolicy, ExecSpace>( + space, {0, 0, 0, 0, 0}, + {a.extent(0), a.extent(1), a.extent(2), a.extent(3), a.extent(4)}); + + bool all_elements_are_set; // Uninitialized, set by parallel_reduce + + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank5", policy, + KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, bool& local_check) { + local_check &= (a(i0, i1, i2, i3, i4) == magic); + }, + Kokkos::LAnd(all_elements_are_set)); + + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 6>* = nullptr) { + auto policy = Kokkos::MDRangePolicy, ExecSpace>( + space, {0, 0, 0, 0, 0, 0}, + {a.extent(0), a.extent(1), a.extent(2), a.extent(3), a.extent(4), + a.extent(5)}); + + bool all_elements_are_set; // Uninitialized, set by parallel_reduce + + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank6", policy, + KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5, + bool& local_check) { + local_check &= (a(i0, i1, i2, i3, i4, i5) == magic); + }, + Kokkos::LAnd(all_elements_are_set)); + + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 7>* = nullptr) { + auto policy = Kokkos::MDRangePolicy, ExecSpace>( + space, {0, 0, 0, 0, 0, 0}, + {a.extent(0), a.extent(1), a.extent(2), a.extent(3), a.extent(4), + a.extent(5)}); + + bool all_elements_are_set = true; + + for (size_t outer = 0; outer < a.extent(6); ++outer) { + bool all_local_elements_are_set; // Uninitialized, set by parallel_reduce + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank7", policy, + KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5, + bool& local_check) { + local_check &= (a(i0, i1, i2, i3, i4, i5, outer) == magic); + }, + Kokkos::LAnd(all_local_elements_are_set)); + + all_elements_are_set = all_elements_are_set && all_local_elements_are_set; + } + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 8>* = nullptr) { + auto policy = Kokkos::MDRangePolicy, ExecSpace>( + space, {0, 0, 0, 0, 0, 0}, + {a.extent(0), a.extent(1), a.extent(2), a.extent(3), a.extent(4), + a.extent(5)}); + + bool all_elements_are_set = true; + + for (size_t outer = 0; outer < a.extent(7); ++outer) { + for (size_t inner = 0; inner < a.extent(6); ++inner) { + bool all_local_elements_are_set; // Uninitialized, set by parallel_reduce + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank8", policy, + KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5, + bool& local_check) { + local_check &= (a(i0, i1, i2, i3, i4, i5, inner, outer) == magic); + }, + Kokkos::LAnd(all_local_elements_are_set)); + + all_elements_are_set = all_elements_are_set && all_local_elements_are_set; + } + } + return all_elements_are_set; +} + +template +bool view_fill_test(const ExecSpace& space, ViewType& a, int magic) { + Kokkos::deep_copy(space, a, magic); +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + // FIXME_OPENMPTARGET Does not work with Land reducer + return true; +#else // KOKKOS_ENABLE_OPENMPTARGET + return check_magic_value(space, a, magic); +#endif // KOKKOS_ENABLE_OPENMPTARGET +} + +template +void run_test() { + int magic = 19; + + using ViewType = Kokkos::View; + // Create views with different lengths for each dimension + // We want to test if all loops are over the correct dimensions + // We use prime numbers to make sure that the strides are different + ViewType a_decreasing("a", 23, 19, 17, 13, 11, 7, 5, 3); + // We also test with increasing strides to catch more "out-of-bounds" errors + // within subviews. + ViewType a_increasing("a", 3, 5, 7, 11, 13, 17, 19, 23); + + using exec_space = typename Space::execution_space; + auto space = exec_space(); + + // Use subviews in the tests to have cases with different ranks and + // non-contiguous memory + // Tests have two parts: + // 1. Fill the subview with a magic value and check that all elements are set + // 2. Check if only the subview is set by summing all elements in the view and + // comparing to the subview size times the magic value + + // Rank 0 + { + auto sub_dec = Kokkos::subview(a_decreasing, 0, 0, 0, 0, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), + static_cast(magic)); + + auto sub_inc = Kokkos::subview(a_increasing, 0, 0, 0, 0, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), + static_cast(magic)); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + + // Rank 1 + { + auto sub_dec = + Kokkos::subview(a_decreasing, Kokkos::ALL, 0, 0, 0, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), sub_dec.size() * magic); + + auto sub_inc = + Kokkos::subview(a_increasing, Kokkos::ALL, 0, 0, 0, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + + // Rank 2 + { + auto sub_dec = Kokkos::subview(a_decreasing, Kokkos::ALL, Kokkos::ALL, 0, 0, + 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), sub_dec.size() * magic); + + auto sub_inc = Kokkos::subview(a_increasing, Kokkos::ALL, Kokkos::ALL, 0, 0, + 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + space.fence(); + + // Rank 3 + { + auto sub_dec = Kokkos::subview(a_decreasing, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, 0, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ( + compute_overall_sum(space, a_decreasing), + sub_dec.extent(0) * sub_dec.extent(1) * sub_dec.extent(2) * magic); + + auto sub_inc = Kokkos::subview(a_increasing, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, 0, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + space.fence(); + + // Rank 4 + { + auto sub_dec = Kokkos::subview(a_decreasing, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), + sub_dec.extent(0) * sub_dec.extent(1) * sub_dec.extent(2) * + sub_dec.extent(3) * magic); + + auto sub_inc = Kokkos::subview(a_increasing, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + space.fence(); + + // Rank 5 + { + auto sub_dec = + Kokkos::subview(a_decreasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), sub_dec.size() * magic); + + auto sub_inc = + Kokkos::subview(a_increasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + space.fence(); + + // Rank 6 + { + auto sub_dec = + Kokkos::subview(a_decreasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), sub_dec.size() * magic); + + auto sub_inc = + Kokkos::subview(a_increasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + space.fence(); + + // Rank 7 + { + auto sub_dec = + Kokkos::subview(a_decreasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), sub_dec.size() * magic); + + auto sub_inc = + Kokkos::subview(a_increasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + space.fence(); + + // Rank 8 + { + auto sub_dec = Kokkos::subview( + a_decreasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, std::make_pair(0, 2)); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), sub_dec.size() * magic); + + auto sub_inc = Kokkos::subview( + a_increasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, std::make_pair(0, 2)); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } +} + +TEST(TEST_CATEGORY, view_fill_tests_layout_right) { + using Space = TEST_EXECSPACE; + using Layout = Kokkos::LayoutRight; + run_test(); +} + +TEST(TEST_CATEGORY, view_fill_tests_layout_left) { + using Space = TEST_EXECSPACE; + using Layout = Kokkos::LayoutLeft; + run_test(); +} + +} // namespace diff --git a/packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp b/packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp deleted file mode 100644 index 67308212ee0f..000000000000 --- a/packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp +++ /dev/null @@ -1,1756 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE -#define KOKKOS_IMPL_PUBLIC_INCLUDE -#endif - -#include - -#include - -#include -#include - -#include -#include - -namespace Test { - -namespace { - -template -struct TestViewLayoutTiled { - using Scalar = double; - - static constexpr int T0 = 2; - static constexpr int T1 = 4; - static constexpr int T2 = 4; - static constexpr int T3 = 2; - static constexpr int T4 = 2; - static constexpr int T5 = 2; - static constexpr int T6 = 2; - static constexpr int T7 = 2; - - // Rank 2 - using LayoutLL_2D_2x4 = - Kokkos::Experimental::LayoutTiled; - using LayoutRL_2D_2x4 = - Kokkos::Experimental::LayoutTiled; - using LayoutLR_2D_2x4 = - Kokkos::Experimental::LayoutTiled; - using LayoutRR_2D_2x4 = - Kokkos::Experimental::LayoutTiled; - - // Rank 3 - using LayoutLL_3D_2x4x4 = - Kokkos::Experimental::LayoutTiled; - using LayoutRL_3D_2x4x4 = - Kokkos::Experimental::LayoutTiled; - using LayoutLR_3D_2x4x4 = - Kokkos::Experimental::LayoutTiled; - using LayoutRR_3D_2x4x4 = - Kokkos::Experimental::LayoutTiled; - - // Rank 4 - using LayoutLL_4D_2x4x4x2 = - Kokkos::Experimental::LayoutTiled; - using LayoutRL_4D_2x4x4x2 = - Kokkos::Experimental::LayoutTiled; - using LayoutLR_4D_2x4x4x2 = - Kokkos::Experimental::LayoutTiled; - using LayoutRR_4D_2x4x4x2 = - Kokkos::Experimental::LayoutTiled; - -#if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) - static void test_view_layout_tiled_2d(const int, const int) { -#else - static void test_view_layout_tiled_2d(const int N0, const int N1) { - const int FT = T0 * T1; - - const int NT0 = int(std::ceil(N0 / T0)); - const int NT1 = int(std::ceil(N1 / T1)); - // Test create_mirror_view, deep_copy - // Create LL View - { - using ViewType = - typename Kokkos::View; - ViewType v("v", N0, N1); - - typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); - - // Initialize host-view - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - hv(ti * T0 + i, tj * T1 + j) = - (ti + tj * NT0) * FT + (i + j * T0); - } - } - } - } - - // copy to device - Kokkos::deep_copy(v, hv); - - Kokkos::MDRangePolicy< - Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, - ExecSpace> - mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 2 LL", mdrangepolicy, - KOKKOS_LAMBDA(const int ti, const int tj) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) { - v(ti * T0 + i, tj * T1 + j) += 1; - } - } - } - }); - - Kokkos::deep_copy(hv, v); - - long counter_subview = 0; - long counter_inc = 0; - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(hv, ti, tj); - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) { - ++counter_subview; - } - if (tile_subview(i, j) != - ((ti + tj * NT0) * FT + (i + j * T0) + 1)) { - ++counter_inc; - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } - - // Create RL View - { - using ViewType = - typename Kokkos::View; - Kokkos::View v("v", N0, N1); - - typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); - - // Initialize host-view - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - hv(ti * T0 + i, tj * T1 + j) = - (ti * NT1 + tj) * FT + (i + j * T0); - } - } - } - } - - // copy to device - Kokkos::deep_copy(v, hv); - - Kokkos::MDRangePolicy< - Kokkos::Rank<2, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, - ExecSpace> - mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 2 RL", mdrangepolicy, - KOKKOS_LAMBDA(const int ti, const int tj) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) { - v(ti * T0 + i, tj * T1 + j) += 1; - } - } - } - }); - - Kokkos::deep_copy(hv, v); - - long counter_subview = 0; - long counter_inc = 0; - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - auto tile_subview = Kokkos::tile_subview(hv, ti, tj); - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) { - ++counter_subview; - } - if (tile_subview(i, j) != - ((ti * NT1 + tj) * FT + (i + j * T0) + 1)) { - ++counter_inc; - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create LR View - { - using ViewType = - typename Kokkos::View; - Kokkos::View v("v", N0, N1); - - typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); - - // Initialize host-view - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - hv(ti * T0 + i, tj * T1 + j) = - (ti + tj * NT0) * FT + (i * T1 + j); - } - } - } - } - - // copy to device - Kokkos::deep_copy(v, hv); - - Kokkos::MDRangePolicy< - Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, - ExecSpace> - mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 2 LR", mdrangepolicy, - KOKKOS_LAMBDA(const int ti, const int tj) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) { - v(ti * T0 + i, tj * T1 + j) += 1; - } - } - } - }); - - Kokkos::deep_copy(hv, v); - - long counter_subview = 0; - long counter_inc = 0; - - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(hv, ti, tj); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) { - ++counter_subview; - } - if (tile_subview(i, j) != - ((ti + tj * NT0) * FT + (i * T1 + j) + 1)) { - ++counter_inc; - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create RR View - { - using ViewType = - typename Kokkos::View; - Kokkos::View v("v", N0, N1); - - typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); - - // Initialize host-view - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - hv(ti * T0 + i, tj * T1 + j) = - (ti * NT1 + tj) * FT + (i * T1 + j); - } - } - } - } - - // copy to device - Kokkos::deep_copy(v, hv); - - Kokkos::MDRangePolicy< - Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, - ExecSpace> - mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 2 LR", mdrangepolicy, - KOKKOS_LAMBDA(const int ti, const int tj) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) { - v(ti * T0 + i, tj * T1 + j) += 1; - } - } - } - }); - - Kokkos::deep_copy(hv, v); - - long counter_subview = 0; - long counter_inc = 0; - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - auto tile_subview = Kokkos::tile_subview(hv, ti, tj); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) { - ++counter_subview; - } - if (tile_subview(i, j) != - ((ti * NT1 + tj) * FT + (i * T1 + j) + 1)) { - ++counter_inc; - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope -#endif - } // end test_view_layout_tiled_2d - -#if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) - static void test_view_layout_tiled_3d(const int, const int, const int) { -#else - static void test_view_layout_tiled_3d(const int N0, const int N1, - const int N2) { - const int FT = T0 * T1 * T2; - - const int NT0 = int(std::ceil(N0 / T0)); - const int NT1 = int(std::ceil(N1 / T1)); - const int NT2 = int(std::ceil(N2 / T2)); - - // Create LL View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti + tj * NT0 + tk * N0 * N1) * FT + - (i + j * T0 + k * T0 * T1); - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, - ExecSpace> - mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 3 LL", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k) { - dv(i, j, k) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter_subview; - } - if (tile_subview(i, j, k) != - ((ti + tj * NT0 + tk * N0 * N1) * FT + - (i + j * T0 + k * T0 * T1) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create RL View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i + j * T0 + k * T0 * T1); - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, - ExecSpace> - mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 3 RL", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k) { - dv(i, j, k) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter_subview; - } - if (tile_subview(i, j, k) != - ((ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i + j * T0 + k * T0 * T1) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create LR View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti + tj * NT0 + tk * NT0 * NT1) * FT + - (i * T1 * T2 + j * T2 + k); - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, - ExecSpace> - mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 3 LR", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k) { - dv(i, j, k) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter_subview; - } - if (tile_subview(i, j, k) != - ((ti + tj * NT0 + tk * NT0 * NT1) * FT + - (i * T1 * T2 + j * T2 + k) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create RR View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i * T1 * T2 + j * T2 + k); - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, - ExecSpace> - mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 3 RR", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k) { - dv(i, j, k) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter_subview; - } - if (tile_subview(i, j, k) != - ((ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i * T1 * T2 + j * T2 + k) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope -#endif - } // end test_view_layout_tiled_3d - -#if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) - static void test_view_layout_tiled_4d(const int, const int, const int, - const int){ -#else - static void test_view_layout_tiled_4d(const int N0, const int N1, - const int N2, const int N3) { - const int FT = T0 * T1 * T2 * T3; - - const int NT0 = int(std::ceil(N0 / T0)); - const int NT1 = int(std::ceil(N1 / T1)); - const int NT2 = int(std::ceil(N2 / T2)); - const int NT3 = int(std::ceil(N3 / T3)); - - // Create LL View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2, N3); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti + tj * NT0 + tk * N0 * N1 + tl * N0 * N1 * N2) * - FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2); - } - } - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, - ExecSpace> - mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 4 LL", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { - dv(i, j, k, l) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter_subview; - } - if (tile_subview(i, j, k, l) != - ((ti + tj * NT0 + tk * N0 * N1 + tl * N0 * N1 * N2) * - FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create RL View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2, N3); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 + tl) * - FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2); - } - } - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, - ExecSpace> - mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 4 RL", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { - dv(i, j, k, l) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter_subview; - } - if (tile_subview(i, j, k, l) != - ((ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 + - tl) * - FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create LR View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2, N3); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti + tj * NT0 + tk * NT0 * NT1 + - tl * NT0 * NT1 * NT2) * - FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l); - } - } - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, - ExecSpace> - mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 4 LR", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { - dv(i, j, k, l) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter_subview; - } - if (tile_subview(i, j, k, l) != - ((ti + tj * NT0 + tk * NT0 * NT1 + - tl * NT0 * NT1 * NT2) * - FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create RR View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2, N3); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 + - tl) * - FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l); - } - } - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, - ExecSpace> - mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 4 RR", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { - dv(i, j, k, l) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter_subview; - } - if (tile_subview(i, j, k, l) != - ((ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 + - tl) * - FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope -#endif - } // end test_view_layout_tiled_4d - - static void test_view_layout_tiled_subtile_2d(const int N0, const int N1) { - const int FT = T0 * T1; - - const int NT0 = int(std::ceil(N0 / T0)); - const int NT1 = int(std::ceil(N1 / T1)); - - // Counter to check for errors at the end - long counter[4] = {0}; - - // Create LL View - { - Kokkos::View v("v", N0, N1); - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j) = (ti + tj * NT0) * FT + (i + j * T0); - } - } - } - } - - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj); - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) { - ++counter[0]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j - << std::endl; - std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," - << j << " v = " << v(ti * T0 + i, tj * T1 + j) - << " flat idx = " - << (ti + tj * NT0) * FT + (i + j * T0) << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j) - << std::endl; -#endif - } - } - } - } - } // end scope - - // Create RL View - { - Kokkos::View v("v", N0, N1); - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j) = (ti * NT1 + tj) * FT + (i + j * T0); - } - } - } - } - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj); - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) { - ++counter[1]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j - << std::endl; - std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," - << j << " v = " << v(ti * T0 + i, tj * T1 + j) - << " flat idx = " - << (ti * NT1 + tj) * FT + (i + j * T0) << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j) - << std::endl; -#endif - } - } - } - } - } // end scope - - // Create LR View - { - Kokkos::View v("v", N0, N1); - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - v(ti * T0 + i, tj * T1 + j) = (ti + tj * NT0) * FT + (i * T1 + j); - } - } - } - } - - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) { - ++counter[2]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j - << std::endl; - std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," - << j << " v = " << v(ti * T0 + i, tj * T1 + j) - << " flat idx = " - << (ti + tj * NT0) * FT + (i * T1 + j) << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j) - << std::endl; -#endif - } - } - } - } - } // end scope - - // Create RR View - { - Kokkos::View v("v", N0, N1); - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - v(ti * T0 + i, tj * T1 + j) = (ti * NT1 + tj) * FT + (i * T1 + j); - } - } - } - } - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) { - ++counter[3]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j - << std::endl; - std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," - << j << " v = " << v(ti * T0 + i, tj * T1 + j) - << " flat idx = " - << (ti * NT1 + tj) * FT + (i * T1 + j) << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j) - << std::endl; - std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) - << std::endl; -#endif - } - } - } - } - } // end scope - -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "subview_tile vs view errors:\n" - << " LL: " << counter[0] << " RL: " << counter[1] - << " LR: " << counter[2] << " RR: " << counter[3] << std::endl; -#endif - - ASSERT_EQ(counter[0], long(0)); - ASSERT_EQ(counter[1], long(0)); - ASSERT_EQ(counter[2], long(0)); - ASSERT_EQ(counter[3], long(0)); - } // end test_view_layout_tiled_subtile_2d - - static void test_view_layout_tiled_subtile_3d(const int N0, const int N1, - const int N2) { - const int FT = T0 * T1 * T2; - - const int NT0 = int(std::ceil(N0 / T0)); - const int NT1 = int(std::ceil(N1 / T1)); - const int NT2 = int(std::ceil(N2 / T2)); - - // Counter to check for errors at the end - long counter[4] = {0}; - // Create LL View - { - Kokkos::View v("v", N0, - N1, N2); - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti + tj * NT0 + tk * N0 * N1) * FT + - (i + j * T0 + k * T0 * T1); - } - } - } - } - } - } - - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter[0]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << "," - << tj * T1 + j << "," << tk * T2 + k << std::endl; - std::cout - << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk - << "," << i << "," << j << "," << k - << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k) - << " flat idx = " - << (ti + tj * NT0 + tk * N0 * N1) * FT + - (i + j * T0 + k * T0 * T1) - << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j, k) - << std::endl; - std::cout - << "subview tile rank = " << Kokkos::rank(tile_subview) - << std::endl; -#endif - } - } - } - } - } - } - } // end scope - - // Create RL View - { - Kokkos::View v("v", N0, - N1, N2); - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i + j * T0 + k * T0 * T1); - } - } - } - } - } - } - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter[1]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << "," - << tj * T1 + j << "," << tk * T2 + k << std::endl; - std::cout - << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk - << "," << i << "," << j << "," << k - << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k) - << " flat idx = " - << (ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i + j * T0 + k * T0 * T1) - << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j, k) - << std::endl; -#endif - } - } - } - } - } - } - } // end scope - - // Create LR View - { - Kokkos::View v("v", N0, - N1, N2); - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti + tj * NT0 + tk * NT0 * NT1) * FT + - (i * T1 * T2 + j * T2 + k); - } - } - } - } - } - } - - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter[2]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << "," - << tj * T1 + j << "," << tk * T2 + k << std::endl; - std::cout - << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk - << "," << i << "," << j << "," << k - << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k) - << " flat idx = " - << (ti + tj * NT0 + tk * NT0 * NT1) * FT + - (i * T1 * T2 + j * T2 + k) - << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j, k) - << std::endl; - std::cout - << "subview tile rank = " << Kokkos::rank(tile_subview) - << std::endl; -#endif - } - } - } - } - } - } - } // end scope - - // Create RR View - { - Kokkos::View v("v", N0, - N1, N2); - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i * T1 * T2 + j * T2 + k); - } - } - } - } - } - } - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter[3]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << "," - << tj * T1 + j << "," << tk * T2 + k << std::endl; - std::cout - << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk - << "," << i << "," << j << "," << k - << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k) - << " flat idx = " - << (ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i * T1 * T2 + j * T2 + k) - << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j, k) - << std::endl; - std::cout - << "subview tile rank = " << Kokkos::rank(tile_subview) - << std::endl; -#endif - } - } - } - } - } - } - } // end scope - -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "subview_tile vs view errors:\n" - << " LL: " << counter[0] << " RL: " << counter[1] - << " LR: " << counter[2] << " RR: " << counter[3] << std::endl; -#endif - - ASSERT_EQ(counter[0], long(0)); - ASSERT_EQ(counter[1], long(0)); - ASSERT_EQ(counter[2], long(0)); - ASSERT_EQ(counter[3], long(0)); - - } // end test_view_layout_tiled_subtile_3d - - static void test_view_layout_tiled_subtile_4d(const int N0, const int N1, - const int N2, const int N3) { - const int FT = T0 * T1 * T2 * T3; - - const int NT0 = int(std::ceil(N0 / T0)); - const int NT1 = int(std::ceil(N1 / T1)); - const int NT2 = int(std::ceil(N2 / T2)); - const int NT3 = int(std::ceil(N3 / T3)); - - // Counter to check for errors at the end - long counter[4] = {0}; - // Create LL View - { - Kokkos::View v( - "v", N0, N1, N2, N3); - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti + tj * NT0 + tk * N0 * N1 + tl * N0 * N1 * N2) * - FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2); - } - } - } - } - } - } - } - } - - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter[0]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i - << "," << tj * T1 + j << "," << tk * T2 + k - << "," << tl * T3 + l << std::endl; - std::cout - << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk - << "," << tl << "," - << " i,j,k,l: " << i << "," << j << "," << k << "," - << l << " v = " - << v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l) - << " flat idx = " - << (ti + tj * NT0 + tk * N0 * N1 + - tl * N0 * N1 * N2) * - FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2) - << std::endl; - std::cout << "subview_tile output = " - << tile_subview(i, j, k, l) << std::endl; - std::cout << "subview tile rank = " - << Kokkos::rank(tile_subview) << std::endl; -#endif - } - } - } - } - } - } - } - } - } // end scope - - // Create RL View - { - Kokkos::View v( - "v", N0, N1, N2, N3); - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 + tl) * - FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2); - } - } - } - } - } - } - } - } - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter[1]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i - << "," << tj * T1 + j << "," << tk * T2 + k - << "," << tl * T3 + l << std::endl; - std::cout - << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk - << "," << tl << "," - << " i,j,k,l: " << i << "," << j << "," << k << "," - << l << " v = " - << v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l) - << " flat idx = " - << (ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 + - tl) * FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2) - << std::endl; - std::cout << "subview_tile output = " - << tile_subview(i, j, k, l) << std::endl; - std::cout << "subview tile rank = " - << Kokkos::rank(tile_subview) << std::endl; -#endif - } - } - } - } - } - } - } - } - } // end scope - - // Create LR View - { - Kokkos::View v( - "v", N0, N1, N2, N3); - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti + tj * NT0 + tk * NT0 * NT1 + - tl * NT0 * NT1 * NT2) * - FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l); - } - } - } - } - } - } - } - } - - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter[2]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i - << "," << tj * T1 + j << "," << tk * T2 + k - << "," << tl * T3 + l << std::endl; - std::cout - << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk - << "," << tl << "," - << " i,j,k,l: " << i << "," << j << "," << k << "," - << l << " v = " - << v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l) - << " flat idx = " - << (ti + tj * NT0 + tk * NT0 * NT1 + - tl * NT0 * NT1 * NT2) * - FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l) - << std::endl; - std::cout << "subview_tile output = " - << tile_subview(i, j, k, l) << std::endl; - std::cout << "subview tile rank = " - << Kokkos::rank(tile_subview) << std::endl; -#endif - } - } - } - } - } - } - } - } - } // end scope - - // Create RR View - { - Kokkos::View v( - "v", N0, N1, N2, N3); - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 + - tl) * - FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l); - } - } - } - } - } - } - } - } - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter[3]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i - << "," << tj * T1 + j << "," << tk * T2 + k - << "," << tl * T3 + l << std::endl; - std::cout - << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk - << "," << tl << "," - << " i,j,k,l: " << i << "," << j << "," << k << "," - << l << " v = " - << v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l) - << " flat idx = " - << (ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 + - tl) * FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l) - << std::endl; - std::cout << "subview_tile output = " - << tile_subview(i, j, k, l) << std::endl; - std::cout << "subview tile rank = " - << Kokkos::rank(tile_subview) << std::endl; -#endif - } - } - } - } - } - } - } - } - } // end scope - -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "subview_tile vs view errors:\n" - << " LL: " << counter[0] << " RL: " << counter[1] - << " LR: " << counter[2] << " RR: " << counter[3] << std::endl; -#endif - - ASSERT_EQ(counter[0], long(0)); - ASSERT_EQ(counter[1], long(0)); - ASSERT_EQ(counter[2], long(0)); - ASSERT_EQ(counter[3], long(0)); - - } // end test_view_layout_tiled_subtile_4d - -}; // end TestViewLayoutTiled struct - -} // namespace - -TEST(TEST_CATEGORY, view_layouttiled) { - // These two examples are iterating by tile, then within a tile - not by - // extents If N# is not a power of two, but want to iterate by tile then - // within a tile, need to check that mapped index is within extent - TestViewLayoutTiled::test_view_layout_tiled_2d(4, 12); - TestViewLayoutTiled::test_view_layout_tiled_3d(4, 12, 16); - TestViewLayoutTiled::test_view_layout_tiled_4d(4, 12, 16, 12); -} -TEST(TEST_CATEGORY, view_layouttiled_subtile) { - // These two examples are iterating by tile, then within a tile - not by - // extents If N# is not a power of two, but want to iterate by tile then - // within a tile, need to check that mapped index is within extent - TestViewLayoutTiled::test_view_layout_tiled_subtile_2d(4, 12); - TestViewLayoutTiled::test_view_layout_tiled_subtile_3d(4, 12, - 16); - TestViewLayoutTiled::test_view_layout_tiled_subtile_4d( - 4, 12, 16, 12); -} -} // namespace Test - -#undef KOKKOS_IMPL_PUBLIC_INCLUDE diff --git a/packages/kokkos/core/unit_test/TestViewOfViews.hpp b/packages/kokkos/core/unit_test/TestViewOfViews.hpp new file mode 100644 index 000000000000..a87c829bb73c --- /dev/null +++ b/packages/kokkos/core/unit_test/TestViewOfViews.hpp @@ -0,0 +1,75 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +#include + +namespace { + +// User-defined type with a View data member +template +class S { + V v_; + + public: + template + S(std::string label, Extents... extents) : v_(std::move(label), extents...) {} + S() = default; +}; + +template +void test_view_of_views() { + using VoV = Kokkos::View; + { // assigning a default-constructed view to destruct the inner objects + VoV vov("vov", 2, 3); + V a("a"); + V b("b"); + vov(0, 0) = a; + vov(1, 0) = a; + vov(0, 1) = b; +#ifndef KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND + vov(0, 0) = V(); + vov(1, 0) = V(); + vov(0, 1) = V(); +#endif + } + { // using placement new to construct the inner objects and explicitly + // calling the destructor + VoV vov(Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3); + V a("a"); + V b("b"); + new (&vov(0, 0)) V(a); + new (&vov(1, 0)) V(a); + new (&vov(0, 1)) V(b); +#ifndef KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND + vov(0, 0).~V(); + vov(1, 0).~V(); + vov(0, 1).~V(); +#else + // leaks memory +#endif + } +} + +TEST(TEST_CATEGORY, view_of_views) { + test_view_of_views>(); + test_view_of_views>(); + // User-defined type with View data member + test_view_of_views>>(); +} + +} // namespace diff --git a/packages/kokkos/core/unit_test/TestViewSubview.hpp b/packages/kokkos/core/unit_test/TestViewSubview.hpp index 386887d923ea..c60aa2fe2690 100644 --- a/packages/kokkos/core/unit_test/TestViewSubview.hpp +++ b/packages/kokkos/core/unit_test/TestViewSubview.hpp @@ -2294,9 +2294,8 @@ template struct TestExtentsStaticTests { using test1 = typename static_expect_same< /* expected */ - Kokkos::Experimental::Extents, + Kokkos::Experimental::Extents, /* actual */ typename Kokkos::Impl::ParseViewExtents::type>::type; diff --git a/packages/kokkos/core/unit_test/UnitTest_ScopeGuard.cpp b/packages/kokkos/core/unit_test/UnitTest_ScopeGuard.cpp new file mode 100644 index 000000000000..b2176f3ef059 --- /dev/null +++ b/packages/kokkos/core/unit_test/UnitTest_ScopeGuard.cpp @@ -0,0 +1,155 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include + +namespace { + +/** + * Fixture that checks Kokkos is neither initialized nor finalized before and + * after the test. + */ +class AssertEnvironmentTest : public ::testing::Test { + protected: + void SetUp() override { + ASSERT_FALSE(Kokkos::is_initialized()); + ASSERT_FALSE(Kokkos::is_finalized()); + } + + void TearDown() override { + ASSERT_FALSE(Kokkos::is_initialized()); + ASSERT_FALSE(Kokkos::is_finalized()); + } +}; + +using scope_guard_DeathTest = AssertEnvironmentTest; + +/** + * Test to create a scope guard normally. + */ +TEST_F(scope_guard_DeathTest, create) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + // run it in a different process so side effects are not kept + EXPECT_EXIT( + { + { + Kokkos::ScopeGuard guard{}; + + if (!Kokkos::is_initialized()) std::exit(EXIT_FAILURE); + if (Kokkos::is_finalized()) std::exit(EXIT_FAILURE); + } + + if (Kokkos::is_initialized()) std::exit(EXIT_FAILURE); + if (!Kokkos::is_finalized()) std::exit(EXIT_FAILURE); + + std::exit(EXIT_SUCCESS); + }, + testing::ExitedWithCode(EXIT_SUCCESS), ""); +} + +/** + * Test to create a scope guard with an argument. + */ +TEST_F(scope_guard_DeathTest, create_argument) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + // run it in a different process so side effects are not kept + EXPECT_EXIT( + { + { + Kokkos::InitializationSettings settings{}; + Kokkos::ScopeGuard guard{settings}; + } + + std::exit(EXIT_SUCCESS); + }, + testing::ExitedWithCode(EXIT_SUCCESS), ""); +} + +/** + * Test to create another scope guard when one has been created. + */ +TEST_F(scope_guard_DeathTest, create_while_initialize) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_DEATH( + { + Kokkos::ScopeGuard guard1{}; + + // create a second scope guard while there is one already existing + Kokkos::ScopeGuard guard2{}; + }, + "Creating a ScopeGuard while Kokkos is initialized"); +} + +/** + * Test to create a scope guard when initialization has been done manually. + */ +TEST_F(scope_guard_DeathTest, create_after_initialize) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_DEATH( + { + Kokkos::initialize(); + + // create a scope guard after manual initialization + Kokkos::ScopeGuard guard{}; + }, + "Creating a ScopeGuard while Kokkos is initialized"); +} + +/** + * Test to create another scope guard when one has been destroyed. + */ +TEST_F(scope_guard_DeathTest, create_after_finalize) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_DEATH( + { + { Kokkos::ScopeGuard guard1{}; } + + // create a second scope guard while the first one has been destroyed + // already + Kokkos::ScopeGuard guard2{}; + }, + "Creating a ScopeGuard after Kokkos was finalized"); +} + +/** + * Test to destroy a scope guard when finalization has been done manually. + */ +TEST_F(scope_guard_DeathTest, destroy_after_finalize) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_DEATH( + { + // create a scope guard and finalize it manually + Kokkos::ScopeGuard guard{}; + Kokkos::finalize(); + }, + "Destroying a ScopeGuard after Kokkos was finalized"); +} + +/** + * Static tests + */ + +// Test scope guard is not copyable. +static_assert(!std::is_copy_assignable()); +static_assert(!std::is_copy_constructible()); + +// Test scope guard is not movable. +static_assert(!std::is_move_assignable()); +static_assert(!std::is_move_constructible()); + +} // namespace diff --git a/packages/kokkos/core/unit_test/category_files/TestHPX_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestHPX_Category.hpp index d3a7cdbea530..c6a2aa9f201f 100644 --- a/packages/kokkos/core/unit_test/category_files/TestHPX_Category.hpp +++ b/packages/kokkos/core/unit_test/category_files/TestHPX_Category.hpp @@ -23,5 +23,6 @@ #define TEST_CATEGORY_NUMBER 3 #define TEST_CATEGORY_DEATH hpx_DeathTest #define TEST_EXECSPACE Kokkos::Experimental::HPX +#define TEST_CATEGORY_FIXTURE(name) hpx_##name #endif diff --git a/packages/kokkos/core/unit_test/category_files/TestOpenACC_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestOpenACC_Category.hpp index 0c4e4b7e1195..6105eadf14fe 100644 --- a/packages/kokkos/core/unit_test/category_files/TestOpenACC_Category.hpp +++ b/packages/kokkos/core/unit_test/category_files/TestOpenACC_Category.hpp @@ -23,5 +23,6 @@ #define TEST_CATEGORY_NUMBER 8 #define TEST_CATEGORY_DEATH openacc_DeathTest #define TEST_EXECSPACE Kokkos::Experimental::OpenACC +#define TEST_CATEGORY_FIXTURE(name) openacc_##name #endif diff --git a/packages/kokkos/core/unit_test/category_files/TestOpenMPTarget_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestOpenMPTarget_Category.hpp index 235b34ffab78..921cff789020 100644 --- a/packages/kokkos/core/unit_test/category_files/TestOpenMPTarget_Category.hpp +++ b/packages/kokkos/core/unit_test/category_files/TestOpenMPTarget_Category.hpp @@ -23,5 +23,6 @@ #define TEST_CATEGORY_NUMBER 4 #define TEST_CATEGORY_DEATH openmptarget_DeathTest #define TEST_EXECSPACE Kokkos::Experimental::OpenMPTarget +#define TEST_CATEGORY_FIXTURE(name) openmptarget_##name #endif diff --git a/packages/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp index 8e1b18c9acd9..59e72c72c773 100644 --- a/packages/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp +++ b/packages/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp @@ -23,5 +23,6 @@ #define TEST_CATEGORY_NUMBER 7 #define TEST_CATEGORY_DEATH sycl_DeathTest #define TEST_EXECSPACE Kokkos::Experimental::SYCL +#define TEST_CATEGORY_FIXTURE(name) sycl_##name #endif diff --git a/packages/kokkos/core/unit_test/category_files/TestThreads_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestThreads_Category.hpp index 13b0b653f21e..ae8ac608339c 100644 --- a/packages/kokkos/core/unit_test/category_files/TestThreads_Category.hpp +++ b/packages/kokkos/core/unit_test/category_files/TestThreads_Category.hpp @@ -23,5 +23,6 @@ #define TEST_CATEGORY_NUMBER 1 #define TEST_CATEGORY_DEATH threads_DeathTest #define TEST_EXECSPACE Kokkos::Threads +#define TEST_CATEGORY_FIXTURE(name) threads_##name #endif diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_Graph.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_Graph.cpp deleted file mode 100644 index 272036396905..000000000000 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_Graph.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#include diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_StreamsMultiGPU.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_StreamsMultiGPU.cpp index d94735ceb230..40955e9c7caf 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_StreamsMultiGPU.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_StreamsMultiGPU.cpp @@ -15,7 +15,7 @@ //@HEADER #include -#include +#include namespace { @@ -57,79 +57,6 @@ std::array get_execution_spaces( return {exec0, exec1}; } -// Test Interoperability with Cuda Streams -void test_policies(TEST_EXECSPACE exec0, Kokkos::View v0, - TEST_EXECSPACE exec, Kokkos::View v) { - using MemorySpace = typename TEST_EXECSPACE::memory_space; - - Kokkos::deep_copy(exec, v, 5); - Kokkos::deep_copy(exec0, v0, 5); - - Kokkos::deep_copy(v, v0); - - int sum; - int sum0; - - Kokkos::parallel_for("Test::cuda::raw_cuda_stream::Range_0", - Kokkos::RangePolicy(exec0, 0, 100), - Test::FunctorRange(v0)); - Kokkos::parallel_for("Test::cuda::raw_cuda_stream::Range", - Kokkos::RangePolicy(exec, 0, 100), - Test::FunctorRange(v)); - Kokkos::parallel_reduce( - "Test::cuda::raw_cuda_stream::RangeReduce_0", - Kokkos::RangePolicy>(exec0, - 0, 100), - Test::FunctorRangeReduce(v0), sum0); - Kokkos::parallel_reduce( - "Test::cuda::raw_cuda_stream::RangeReduce", - Kokkos::RangePolicy>(exec, 0, - 100), - Test::FunctorRangeReduce(v), sum); - ASSERT_EQ(600, sum0); - ASSERT_EQ(600, sum); - - Kokkos::parallel_for("Test::cuda::raw_cuda_stream::MDRange_0", - Kokkos::MDRangePolicy>( - exec0, {0, 0}, {10, 10}), - Test::FunctorMDRange(v0)); - Kokkos::parallel_for("Test::cuda::raw_cuda_stream::MDRange", - Kokkos::MDRangePolicy>( - exec, {0, 0}, {10, 10}), - Test::FunctorMDRange(v)); - Kokkos::parallel_reduce("Test::cuda::raw_cuda_stream::MDRangeReduce_0", - Kokkos::MDRangePolicy, - Kokkos::LaunchBounds<128, 2>>( - exec0, {0, 0}, {10, 10}), - Test::FunctorMDRangeReduce(v0), sum0); - Kokkos::parallel_reduce("Test::cuda::raw_cuda_stream::MDRangeReduce", - Kokkos::MDRangePolicy, - Kokkos::LaunchBounds<128, 2>>( - exec, {0, 0}, {10, 10}), - Test::FunctorMDRangeReduce(v), sum); - ASSERT_EQ(700, sum0); - ASSERT_EQ(700, sum); - - Kokkos::parallel_for("Test::cuda::raw_cuda_stream::Team_0", - Kokkos::TeamPolicy(exec0, 10, 10), - Test::FunctorTeam(v0)); - Kokkos::parallel_for("Test::cuda::raw_cuda_stream::Team", - Kokkos::TeamPolicy(exec, 10, 10), - Test::FunctorTeam(v)); - Kokkos::parallel_reduce( - "Test::cuda::raw_cuda_stream::Team_0", - Kokkos::TeamPolicy>(exec0, - 10, 10), - Test::FunctorTeamReduce(v0), sum0); - Kokkos::parallel_reduce( - "Test::cuda::raw_cuda_stream::Team", - Kokkos::TeamPolicy>(exec, 10, - 10), - Test::FunctorTeamReduce(v), sum); - ASSERT_EQ(800, sum0); - ASSERT_EQ(800, sum); -} - TEST(cuda_multi_gpu, managed_views) { StreamsAndDevices streams_and_devices; { @@ -169,93 +96,6 @@ TEST(cuda_multi_gpu, unmanaged_views) { } } -struct ScratchFunctor { - int scratch_size; - int R; - - ScratchFunctor(int scratch_size_, int R_) - : scratch_size(scratch_size_), R(R_) {} - - KOKKOS_FUNCTION - void operator()(const Kokkos::TeamPolicy::member_type &team, - int &error_accum) const { - Kokkos::View scratch_mem( - team.team_scratch(1), scratch_size); - - // Initialize scratch memory - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, 0, scratch_size), - [&](int i) { scratch_mem(i) = 0; }); - team.team_barrier(); - - // Increment each entry in scratch memory R times - for (int r = 0; r < R; ++r) { - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, 0, scratch_size), - [&](int i) { scratch_mem(i) += 1; }); - } - team.team_barrier(); - - // Check that each scratch entry has been incremented exactly R times - int team_error_accum; - auto R_loc = R; // avoid implicit capture of this - Kokkos::parallel_reduce( - Kokkos::TeamVectorRange(team, 0, scratch_size), - [&](int i, int &tsum) { - if (scratch_mem(i) != R_loc) { - tsum += 1; - } - }, - team_error_accum); - Kokkos::single(Kokkos::PerTeam(team), - [&]() { error_accum += team_error_accum; }); - } -}; - -void test_scratch(TEST_EXECSPACE exec0, TEST_EXECSPACE exec1) { - constexpr int N = 10; - constexpr int R = 1000; - constexpr int scratch_size = 100; - using ScratchType = Kokkos::View; - - // Test allocating and using scratch space - ScratchFunctor f(scratch_size, R); - - auto policy0 = - Kokkos::TeamPolicy(exec0, N, 10) - .set_scratch_size( - 1, Kokkos::PerTeam(ScratchType::shmem_size(scratch_size))); - auto policy1 = - Kokkos::TeamPolicy(exec1, N, 10) - .set_scratch_size( - 1, Kokkos::PerTeam(ScratchType::shmem_size(scratch_size))); - - int error0, error1; - - Kokkos::parallel_reduce("test_scratch_device_0", policy0, f, error0); - Kokkos::parallel_reduce("test_scratch_device_1", policy1, f, error1); - ASSERT_EQ(error0, 0); - ASSERT_EQ(error1, 0); - - // Request larger scratch size to trigger a realloc and test - const auto new_scratch_size = scratch_size + 10; - ScratchFunctor f_more_scratch(new_scratch_size, R); - - auto policy0_more_scratch = - Kokkos::TeamPolicy(exec0, N, 10) - .set_scratch_size( - 1, Kokkos::PerTeam(ScratchType::shmem_size(new_scratch_size))); - auto policy1_more_scratch = - Kokkos::TeamPolicy(exec1, N, 10) - .set_scratch_size( - 1, Kokkos::PerTeam(ScratchType::shmem_size(new_scratch_size))); - - Kokkos::parallel_reduce("test_realloc_scratch_device_0", policy0_more_scratch, - f_more_scratch, error0); - Kokkos::parallel_reduce("test_realloc_scratch_device_1", policy1_more_scratch, - f_more_scratch, error1); - ASSERT_EQ(error0, 0); - ASSERT_EQ(error1, 0); -} - TEST(cuda_multi_gpu, scratch_space) { StreamsAndDevices streams_and_devices; { diff --git a/packages/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt b/packages/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt index f792b03ed880..4c364ceee75b 100644 --- a/packages/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt +++ b/packages/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt @@ -10,7 +10,8 @@ file(GLOB KOKKOS_CONTAINERS_HEADERS RELATIVE ${BASE_DIR}/containers/src file(GLOB KOKKOS_ALGORITHMS_HEADERS RELATIVE ${BASE_DIR}/algorithms/src ${BASE_DIR}/algorithms/src/*.hpp) -if(NOT Kokkos_ENABLE_DEPRECATED_CODE_4) +# erroring out when deprecated code is disabled and raising warnings that are treated as errors in the CI otherwise +if(NOT Kokkos_ENABLE_DEPRECATED_CODE_4 OR Kokkos_ENABLE_DEPRECATION_WARNINGS) list(REMOVE_ITEM KOKKOS_CONTAINERS_HEADERS "Kokkos_Vector.hpp") endif() diff --git a/packages/kokkos/core/unit_test/hip/TestHIP_Memory_Requirements.cpp b/packages/kokkos/core/unit_test/hip/TestHIP_Memory_Requirements.cpp index a213453ea182..8c72e9f29724 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIP_Memory_Requirements.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIP_Memory_Requirements.cpp @@ -48,9 +48,6 @@ TEST(hip, memory_requirements) { // we want all user-facing memory in hip to be coarse grained. As of // today(07.01.22) the documentation is not reliable/correct, we test the // memory on the device and host - // FIXME_HIP - GTEST_SKIP() << "skipping the test because the CI on MI100 returns: error( " - "hipErrorInvalidValue)"; KOKKOS_TEST_MEMORY_COARSEGRAINEDNESS(Kokkos::HIPSpace, int, 10); KOKKOS_TEST_MEMORY_COARSEGRAINEDNESS(Kokkos::HIPHostPinnedSpace, int, 10); KOKKOS_TEST_MEMORY_COARSEGRAINEDNESS(Kokkos::HIPManagedSpace, int, 10); diff --git a/packages/kokkos/core/unit_test/incremental/Test01_execspace.hpp b/packages/kokkos/core/unit_test/incremental/Test01_execspace.hpp index d7b2a57b4421..a7fa26c7282b 100644 --- a/packages/kokkos/core/unit_test/incremental/Test01_execspace.hpp +++ b/packages/kokkos/core/unit_test/incremental/Test01_execspace.hpp @@ -63,7 +63,9 @@ struct TestIncrExecSpace { ASSERT_GT(concurrency, 0); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() int in_parallel = ExecSpace::in_parallel(); + KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() ASSERT_FALSE(in_parallel); #endif diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Graph.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Graph.cpp deleted file mode 100644 index 22c8ab1bf8fd..000000000000 --- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Graph.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#include diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_Graph.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_Graph.cpp deleted file mode 100644 index bff64d83e276..000000000000 --- a/packages/kokkos/core/unit_test/serial/TestSerial_Graph.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#include diff --git a/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_StreamsMultiGPU.cpp b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_StreamsMultiGPU.cpp new file mode 100644 index 000000000000..d3906e409f5a --- /dev/null +++ b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_StreamsMultiGPU.cpp @@ -0,0 +1,64 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include + +namespace { + +std::array get_execution_spaces() { + std::vector gpu_devices = + sycl::device::get_devices(sycl::info::device_type::gpu); + + TEST_EXECSPACE exec0( + sycl::queue{gpu_devices.front(), sycl::property::queue::in_order()}); + TEST_EXECSPACE exec1( + sycl::queue{gpu_devices.back(), sycl::property::queue::in_order()}); + + return {exec0, exec1}; +} + +TEST(sycl_multi_gpu, managed_views) { + std::array execs = get_execution_spaces(); + + Kokkos::View view0(Kokkos::view_alloc("v0", execs[0]), + 100); + Kokkos::View view(Kokkos::view_alloc("v", execs[1]), + 100); + + test_policies(execs[0], view0, execs[1], view); +} + +TEST(sycl_multi_gpu, unmanaged_views) { + std::array execs = get_execution_spaces(); + + int *p0 = sycl::malloc_device(100, execs[0].sycl_queue()); + Kokkos::View view0(p0, 100); + + int *p1 = sycl::malloc_device(100, execs[1].sycl_queue()); + Kokkos::View view1(p1, 100); + + test_policies(execs[0], view0, execs[1], view1); + sycl::free(p0, execs[0].sycl_queue()); + sycl::free(p1, execs[1].sycl_queue()); +} + +TEST(sycl_multi_gpu, scratch_space) { + std::array execs = get_execution_spaces(); + + test_scratch(execs[0], execs[1]); +} +} // namespace diff --git a/packages/kokkos/core/unit_test/view/TestExtentsDatatypeConversion.cpp b/packages/kokkos/core/unit_test/view/TestExtentsDatatypeConversion.cpp index b95890614e0a..1b9b2a368197 100644 --- a/packages/kokkos/core/unit_test/view/TestExtentsDatatypeConversion.cpp +++ b/packages/kokkos/core/unit_test/view/TestExtentsDatatypeConversion.cpp @@ -23,15 +23,14 @@ namespace { // Helper to make static tests more succinct template -constexpr bool datatype_matches_extent = - std::is_same_v::type, - Extent>; +constexpr bool datatype_matches_extent = std::is_same_v< + typename Kokkos::Impl::ExtentsFromDataType::type, + Extent>; template constexpr bool extent_matches_datatype = - std::is_same_v::type>; + std::is_same_v::type>; // Conversion from DataType to extents // 0-rank view diff --git a/packages/kokkos/example/README b/packages/kokkos/example/README index 66860512448a..2fe872764848 100644 --- a/packages/kokkos/example/README +++ b/packages/kokkos/example/README @@ -1,7 +1,7 @@ This directory contains example application proxies that use different parts of Kokkos. If you are looking for the FENL ("finite element -nonlinear" solve) example, it has moved into the LinAlg subpackage of -Tpetra. +nonlinear" solve) example, it has moved into the TrilinosCouplings +package in Trilinos. MANIFEST: diff --git a/packages/kokkos/example/build_cmake_installed/CMakeLists.txt b/packages/kokkos/example/build_cmake_installed/CMakeLists.txt index aaf745b418de..c025f1d7d289 100644 --- a/packages/kokkos/example/build_cmake_installed/CMakeLists.txt +++ b/packages/kokkos/example/build_cmake_installed/CMakeLists.txt @@ -12,6 +12,7 @@ find_package(Kokkos REQUIRED) add_executable(example cmake_example.cpp foo.f) if(CMAKE_Fortran_COMPILER_ID STREQUAL LLVMFlang) set_target_properties(example PROPERTIES LINKER_LANGUAGE Fortran) + target_link_options(example PRIVATE -fno-fortran-main) endif() # This is the only thing required to set up compiler/linker flags diff --git a/packages/kokkos/example/tutorial/01_hello_world/hello_world.cpp b/packages/kokkos/example/tutorial/01_hello_world/hello_world.cpp index 22b8b6d63c88..3104003fb487 100644 --- a/packages/kokkos/example/tutorial/01_hello_world/hello_world.cpp +++ b/packages/kokkos/example/tutorial/01_hello_world/hello_world.cpp @@ -16,7 +16,6 @@ #include #include -#include // // "Hello world" parallel_for example: @@ -25,12 +24,12 @@ // using a functor to define the loop body // 3. Shut down Kokkos // -// If Kokkos was built with C++11 enabled, try comparing this example -// to 01_hello_world_lambda. The latter uses C++11 lambdas (anonymous -// functions) to define the loop body of the parallel_for. That makes -// the code much more concise and readable. On the other hand, -// breaking out the loop body into an explicit functor makes it easier -// to test the loop independently of the parallel pattern. +// Try comparing this example to 01_hello_world_lambda, which uses +// C++11 lambdas (anonymous functions) to define the loop body of the +// parallel_for. That makes the code much more concise and readable. +// On the other hand, breaking out the loop body into an explicit +// functor makes it easier to test the loop independently of the +// parallel pattern. // // Functor that defines the parallel_for's loop body. @@ -72,11 +71,9 @@ int main(int argc, char* argv[]) { // start with "--kokkos-". Kokkos::initialize(argc, argv); - // Print the name of Kokkos' default execution space. We're using - // typeid here, so the name might get a bit mangled by the linker, - // but you should still be able to figure out what it is. + // Print the name of Kokkos' default execution space. printf("Hello World on Kokkos execution space %s\n", - typeid(Kokkos::DefaultExecutionSpace).name()); + Kokkos::DefaultExecutionSpace::name()); // Run the above functor on the default Kokkos execution space in // parallel, with a parallel for loop count of 15. diff --git a/packages/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp b/packages/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp index 909765e1fc31..ad2c258c0fe4 100644 --- a/packages/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp +++ b/packages/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp @@ -16,7 +16,6 @@ #include #include -#include // // "Hello world" parallel_for example: @@ -25,10 +24,9 @@ // using a C++11 lambda to define the loop body // 3. Shut down Kokkos // -// This example only builds if C++11 is enabled. Compare this example -// to 01_hello_world, which uses functors (explicitly defined classes) -// to define the loop body of the parallel_for. Both functors and -// lambdas have their places. +// Compare this example to 01_hello_world, which uses functors +// (explicitly defined classes) to define the loop body of the +// parallel_for. Both functors and lambdas have their places. // int main(int argc, char* argv[]) { @@ -41,11 +39,9 @@ int main(int argc, char* argv[]) { // start with "--kokkos-". Kokkos::initialize(argc, argv); - // Print the name of Kokkos' default execution space. We're using - // typeid here, so the name might get a bit mangled by the linker, - // but you should still be able to figure out what it is. + // Print the name of Kokkos' default execution space. printf("Hello World on Kokkos execution space %s\n", - typeid(Kokkos::DefaultExecutionSpace).name()); + Kokkos::DefaultExecutionSpace::name()); // Run lambda on the default Kokkos execution space in parallel, // with a parallel for loop count of 15. The lambda's argument is diff --git a/packages/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp b/packages/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp index 5cae6da16cf0..1ca30e07e881 100644 --- a/packages/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp +++ b/packages/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp @@ -24,9 +24,8 @@ // using a C++11 lambda to define the loop body // 3. Shut down Kokkos // -// This example only builds if C++11 is enabled. Compare this example -// to 02_simple_reduce, which uses a functor to define the loop body -// of the parallel_reduce. +// Compare this example to 02_simple_reduce, which uses a functor to +// define the loop body of the parallel_reduce. // int main(int argc, char* argv[]) { diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt index 31be92532543..a0e83bef237d 100644 --- a/packages/kokkos/master_history.txt +++ b/packages/kokkos/master_history.txt @@ -37,3 +37,4 @@ tag: 4.2.00 date: 11:09:2023 master: 1a3ea28f release: abe01c88 tag: 4.2.01 date: 01:30:2024 master: 71a9bcae release: 221e5f7a tag: 4.3.00 date: 04:03:2024 master: e0dc0128 release: f08217a4 tag: 4.3.01 date: 05:07:2024 master: 486cc745 release: 262d2d6e +tag: 4.4.00 date: 08:08:2024 master: 6ecdf605 release: 6068673c diff --git a/packages/kokkos/scripts/docker/Dockerfile.openmptarget b/packages/kokkos/scripts/docker/Dockerfile.openmptarget index 22edcda2a073..b9e8442c74e7 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.openmptarget +++ b/packages/kokkos/scripts/docker/Dockerfile.openmptarget @@ -1,4 +1,4 @@ -ARG BASE=nvidia/cuda:11.1.1-devel-ubuntu20.04 +ARG BASE=nvcr.io/nvidia/cuda:12.3.2-devel-ubuntu22.04 FROM $BASE RUN apt-get update && apt-get install -y \ @@ -55,8 +55,7 @@ RUN LLVM_URL=https://github.com/llvm/llvm-project/archive &&\ -DCMAKE_CXX_COMPILER=g++ \ -DLLVM_ENABLE_PROJECTS="clang" \ -DLLVM_ENABLE_RUNTIMES="openmp" \ - -DCLANG_OPENMP_NVPTX_DEFAULT_ARCH=sm_70 \ - -DLIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES=70 \ + -DLIBOMPTARGET_DEVICE_ARCHITECTURES=sm_70 \ ../llvm && \ make -j${NPROC} && \ make install && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.sycl b/packages/kokkos/scripts/docker/Dockerfile.sycl index 87864da1bf76..d6162975b414 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.sycl +++ b/packages/kokkos/scripts/docker/Dockerfile.sycl @@ -51,10 +51,19 @@ RUN wget https://cloud.cees.ornl.gov/download/oneapi-for-nvidia-gpus-2023.0.0-li ./oneapi-for-nvidia-gpus-2023.0.0-linux.sh -y && \ rm oneapi-for-nvidia-gpus-2023.0.0-linux.sh -RUN wget https://registrationcenter-download.intel.com/akdlm/irc_nas/19133/l_oneDPL_p_2022.0.0.25335.sh &&\ - chmod +x ./l_oneDPL_p_2022.0.0.25335.sh && \ - ./l_oneDPL_p_2022.0.0.25335.sh -a -s --eula accept && \ - rm l_oneDPL_p_2022.0.0.25335.sh +ENV ONE_DPL_DIR=/opt/onedpl +RUN . /opt/intel/oneapi/setvars.sh --include-intel-llvm && \ + ONE_DPL_VERSION=oneDPL-2022.2.0 && \ + ONE_DPL_URL=https://github.com/oneapi-src/oneDPL/archive && \ + ONE_DPL_ARCHIVE=${ONE_DPL_VERSION}-rc1.tar.gz && \ + SCRATCH_DIR=/scratch && mkdir -p ${SCRATCH_DIR} && cd ${SCRATCH_DIR} && \ + wget --quiet ${ONE_DPL_URL}/${ONE_DPL_ARCHIVE} && \ + mkdir onedpl && \ + tar -xf ${ONE_DPL_ARCHIVE} -C onedpl --strip-components=1 && cd onedpl && \ + mkdir build && cd build && \ + cmake -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-w" -DCMAKE_INSTALL_PREFIX=${ONE_DPL_DIR} -DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=TRUE -DONEDPL_BACKEND="dpcpp_only" .. && \ + make -j${NPROCS} install && \ + rm -rf ${SCRATCH_DIR} # clang++ ENV PATH=/opt/intel/oneapi/compiler/latest/linux/bin-llvm/:$PATH diff --git a/packages/kokkos/simd/src/Kokkos_SIMD.hpp b/packages/kokkos/simd/src/Kokkos_SIMD.hpp index 57d4afd88bee..5e34e51989ca 100644 --- a/packages/kokkos/simd/src/Kokkos_SIMD.hpp +++ b/packages/kokkos/simd/src/Kokkos_SIMD.hpp @@ -183,15 +183,18 @@ template class data_types {}; #if defined(KOKKOS_ARCH_AVX512XEON) -using host_abi_set = abi_set>; +using host_abi_set = abi_set, + simd_abi::avx512_fixed_size<16>>; using data_type_set = data_types; #elif defined(KOKKOS_ARCH_AVX2) -using host_abi_set = abi_set>; +using host_abi_set = abi_set, + simd_abi::avx2_fixed_size<8>>; using data_type_set = data_types; #elif defined(KOKKOS_ARCH_ARM_NEON) -using host_abi_set = abi_set>; +using host_abi_set = abi_set, + simd_abi::neon_fixed_size<4>>; using data_type_set = data_types; #else diff --git a/packages/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp b/packages/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp index 6d0956f38321..27c8af79abd6 100644 --- a/packages/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp +++ b/packages/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp @@ -228,6 +228,106 @@ class simd_mask> { } }; +template <> +class simd_mask> { + __m256 m_value; + + public: + class reference { + __m256& m_mask; + int m_lane; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION __m256 bit_mask() const { + // FIXME_HIP ROCm 5.6, 5.7, and 6.0 can't compile with the intrinsic used + // here. +#ifdef KOKKOS_IMPL_WORKAROUND_ROCM_AVX2_ISSUE + return _mm256_cvtepi32_ps(_mm256_setr_epi32( +#else + return _mm256_castsi256_ps(_mm256_setr_epi32( +#endif + -std::int32_t(m_lane == 0), -std::int32_t(m_lane == 1), + -std::int32_t(m_lane == 2), -std::int32_t(m_lane == 3), + -std::int32_t(m_lane == 4), -std::int32_t(m_lane == 5), + -std::int32_t(m_lane == 6), -std::int32_t(m_lane == 7))); + } + + public: + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference(__m256& mask_arg, + int lane_arg) + : m_mask(mask_arg), m_lane(lane_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference + operator=(bool value) const { + if (value) { + m_mask = _mm256_or_ps(bit_mask(), m_mask); + } else { + m_mask = _mm256_andnot_ps(bit_mask(), m_mask); + } + return *this; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION operator bool() const { + return (_mm256_movemask_ps(m_mask) & (1 << m_lane)) != 0; + } + }; + using value_type = bool; + using abi_type = simd_abi::avx2_fixed_size<8>; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value) + : m_value(_mm256_castsi256_ps(_mm256_set1_epi32(-std::int32_t(value)))) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + G&& gen) noexcept + : m_value(_mm256_castsi256_ps(_mm256_setr_epi32( + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant()))))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 8; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + __m256 const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256() + const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reference(m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return static_cast( + reference(const_cast<__m256&>(m_value), int(i))); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask + operator||(simd_mask const& other) const { + return simd_mask(_mm256_or_ps(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask + operator&&(simd_mask const& other) const { + return simd_mask(_mm256_and_ps(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask operator!() const { + auto const true_value = static_cast<__m256>(simd_mask(true)); + return simd_mask(_mm256_andnot_ps(m_value, true_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator==( + simd_mask const& other) const { + return _mm256_movemask_ps(m_value) == _mm256_movemask_ps(other.m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator!=( + simd_mask const& other) const { + return !operator==(other); + } +}; + template <> class simd_mask> { __m128i m_value; @@ -324,6 +424,109 @@ class simd_mask> { } }; +template <> +class simd_mask> { + __m256i m_value; + + public: + class reference { + __m256i& m_mask; + int m_lane; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION __m256i bit_mask() const { + return _mm256_setr_epi32( + -std::int32_t(m_lane == 0), -std::int32_t(m_lane == 1), + -std::int32_t(m_lane == 2), -std::int32_t(m_lane == 3), + -std::int32_t(m_lane == 4), -std::int32_t(m_lane == 5), + -std::int32_t(m_lane == 6), -std::int32_t(m_lane == 7)); + } + + public: + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference(__m256i& mask_arg, + int lane_arg) + : m_mask(mask_arg), m_lane(lane_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference + operator=(bool value) const { + if (value) { + m_mask = _mm256_or_si256(bit_mask(), m_mask); + } else { + m_mask = _mm256_andnot_si256(bit_mask(), m_mask); + } + return *this; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION operator bool() const { + return (_mm256_movemask_ps(_mm256_castsi256_ps(m_mask)) & + (1 << m_lane)) != 0; + } + }; + using value_type = bool; + using abi_type = simd_abi::avx2_fixed_size<8>; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value) + : m_value(_mm256_set1_epi32(-std::int32_t(value))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 8; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + __m256i const& value_in) + : m_value(value_in) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + G&& gen) noexcept + : m_value(_mm256_setr_epi32( + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())))) {} + template + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask( + simd_mask const& other) { + for (std::size_t i = 0; i < size(); ++i) (*this)[i] = other[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256i() + const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reference(m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return static_cast( + reference(const_cast<__m256i&>(m_value), int(i))); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask + operator||(simd_mask const& other) const { + return simd_mask(_mm256_or_si256(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask + operator&&(simd_mask const& other) const { + return simd_mask(_mm256_and_si256(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask operator!() const { + auto const true_value = static_cast<__m256i>(simd_mask(true)); + return simd_mask(_mm256_andnot_si256(m_value, true_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator==( + simd_mask const& other) const { + return _mm256_movemask_ps(_mm256_castsi256_ps(m_value)) == + _mm256_movemask_ps(_mm256_castsi256_ps(other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator!=( + simd_mask const& other) const { + return !operator==(other); + } +}; + template <> class simd_mask> { __m256i m_value; @@ -800,11 +1003,11 @@ class simd> { KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { return 4; } - template , - bool> = false> + template , + bool> = false> KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) : m_value(_mm_set1_ps(value_type(value))) {} - template >, @@ -1031,12 +1234,12 @@ namespace Experimental { } template <> -class simd> { - __m128i m_value; +class simd> { + __m256 m_value; public: - using value_type = std::int32_t; - using abi_type = simd_abi::avx2_fixed_size<4>; + using value_type = float; + using abi_type = simd_abi::avx2_fixed_size<8>; using mask_type = simd_mask; using reference = value_type&; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; @@ -1045,29 +1248,30 @@ class simd> { KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { - return 4; + return 8; } template , bool> = false> KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) - : m_value(_mm_set1_epi32(value_type(value))) {} + : m_value(_mm256_set1_ps(value_type(value))) {} template >, bool> = false> - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( - G&& gen) noexcept - : m_value(_mm_setr_epi32(gen(std::integral_constant()), + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(G&& gen) + : m_value(_mm256_setr_ps(gen(std::integral_constant()), gen(std::integral_constant()), gen(std::integral_constant()), - gen(std::integral_constant()))) { + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()))) { } KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( - __m128i const& value_in) + __m256 const& value_in) : m_value(value_in) {} - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( - simd const& other); KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { return reinterpret_cast(&m_value)[i]; } @@ -1077,93 +1281,350 @@ class simd> { } KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, element_aligned_tag) { - // FIXME_HIP ROCm 5.6, 5.7, and 6.0 can't compile with the intrinsic used - // here. -#ifdef KOKKOS_IMPL_WORKAROUND_ROCM_AVX2_ISSUE - m_value = _mm_loadu_si128(reinterpret_cast<__m128i const*>(ptr)); -#else - m_value = _mm_maskload_epi32(ptr, static_cast<__m128i>(mask_type(true))); -#endif + m_value = _mm256_loadu_ps(ptr); } KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, vector_aligned_tag) { - // FIXME_HIP ROCm 5.6 can't compile with the intrinsic used here. -#ifdef KOKKOS_IMPL_WORKAROUND_ROCM_AVX2_ISSUE - m_value = _mm_load_si128(reinterpret_cast<__m128i const*>(ptr)); -#else - m_value = _mm_maskload_epi32(ptr, static_cast<__m128i>(mask_type(true))); -#endif + m_value = _mm256_load_ps(ptr); } KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( value_type* ptr, element_aligned_tag) const { - _mm_maskstore_epi32(ptr, static_cast<__m128i>(mask_type(true)), m_value); + _mm256_storeu_ps(ptr, m_value); } KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, vector_aligned_tag) const { - _mm_maskstore_epi32(ptr, static_cast<__m128i>(mask_type(true)), m_value); + _mm256_store_ps(ptr, m_value); } - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m128i() + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256() const { return m_value; } - [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type - operator==(simd const& lhs, simd const& rhs) noexcept { - return mask_type( - _mm_cmpeq_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs))); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd operator-() const + noexcept { + return simd(_mm256_sub_ps(_mm256_set1_ps(0.0), m_value)); } - [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type - operator>(simd const& lhs, simd const& rhs) noexcept { - return mask_type( - _mm_cmpgt_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs))); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_mul_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator/( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_div_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_add_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_sub_ps(lhs.m_value, rhs.m_value)); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type operator<(simd const& lhs, simd const& rhs) noexcept { - return mask_type( - _mm_cmplt_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs))); + return mask_type(_mm256_cmp_ps(static_cast<__m256>(lhs), + static_cast<__m256>(rhs), _CMP_LT_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmp_ps(static_cast<__m256>(lhs), + static_cast<__m256>(rhs), _CMP_GT_OS)); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type operator<=(simd const& lhs, simd const& rhs) noexcept { - return (lhs < rhs) || (lhs == rhs); + return mask_type(_mm256_cmp_ps(static_cast<__m256>(lhs), + static_cast<__m256>(rhs), _CMP_LE_OS)); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type operator>=(simd const& lhs, simd const& rhs) noexcept { - return (lhs > rhs) || (lhs == rhs); + return mask_type(_mm256_cmp_ps(static_cast<__m256>(lhs), + static_cast<__m256>(rhs), _CMP_GE_OS)); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type - operator!=(simd const& lhs, simd const& rhs) noexcept { - return !(lhs == rhs); - } - [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( - simd const& lhs, simd const& rhs) noexcept { - return simd( - _mm_sub_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs))); - } - [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( - simd const& lhs, simd const& rhs) noexcept { - return simd( - _mm_add_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs))); - } - [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( - simd const& lhs, simd const& rhs) noexcept { - return simd( - _mm_mullo_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs))); + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmp_ps(static_cast<__m256>(lhs), + static_cast<__m256>(rhs), _CMP_EQ_OS)); } - - [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( - simd const& lhs, int rhs) noexcept { - return simd(_mm_srai_epi32(static_cast<__m128i>(lhs), rhs)); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmp_ps(static_cast<__m256>(lhs), + static_cast<__m256>(rhs), _CMP_NEQ_OS)); } +}; - [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( - simd const& lhs, simd const& rhs) noexcept { - return simd( - _mm_srav_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs))); - } +} // namespace Experimental - [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( - simd const& lhs, int rhs) noexcept { - return simd(_mm_slli_epi32(static_cast<__m128i>(lhs), rhs)); - } +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> +copysign( + Experimental::simd> const& + a, + Experimental::simd> const& + b) { + __m256 const sign_mask = _mm256_set1_ps(-0.0); + return Experimental::simd>( + _mm256_xor_ps(_mm256_andnot_ps(sign_mask, static_cast<__m256>(a)), + _mm256_and_ps(sign_mask, static_cast<__m256>(b)))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + abs(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + __m256 const sign_mask = _mm256_set1_ps(-0.0); + return Experimental::simd>( + _mm256_andnot_ps(sign_mask, static_cast<__m256>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + floor(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_round_ps(static_cast<__m256>(a), + (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + ceil(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_round_ps(static_cast<__m256>(a), + (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + round(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_round_ps(static_cast<__m256>(a), + (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + trunc(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_round_ps(static_cast<__m256>(a), + (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + sqrt(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_sqrt_ps(static_cast<__m256>(a))); +} + +#ifdef __INTEL_COMPILER + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + cbrt(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_cbrt_ps(static_cast<__m256>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + exp(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_exp_ps(static_cast<__m256>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + log(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_log_ps(static_cast<__m256>(a))); +} + +#endif + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> +fma(Experimental::simd> const& + a, + Experimental::simd> const& + b, + Experimental::simd> const& + c) { + return Experimental::simd>( + _mm256_fmadd_ps(static_cast<__m256>(a), static_cast<__m256>(b), + static_cast<__m256>(c))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> +max(Experimental::simd> const& + a, + Experimental::simd> const& + b) { + return Experimental::simd>( + _mm256_max_ps(static_cast<__m256>(a), static_cast<__m256>(b))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> +min(Experimental::simd> const& + a, + Experimental::simd> const& + b) { + return Experimental::simd>( + _mm256_min_ps(static_cast<__m256>(a), static_cast<__m256>(b))); +} + +namespace Experimental { + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd> + condition(simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>(_mm256_blendv_ps( + static_cast<__m256>(c), static_cast<__m256>(b), static_cast<__m256>(a))); +} + +template <> +class simd> { + __m128i m_value; + + public: + using value_type = std::int32_t; + using abi_type = simd_abi::avx2_fixed_size<4>; + using mask_type = simd_mask; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 4; + } + template , + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm_set1_epi32(value_type(value))) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + G&& gen) noexcept + : m_value(_mm_setr_epi32(gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()))) { + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + __m128i const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( + simd const& other); + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + // FIXME_HIP ROCm 5.6, 5.7, and 6.0 can't compile with the intrinsic used + // here. +#ifdef KOKKOS_IMPL_WORKAROUND_ROCM_AVX2_ISSUE + m_value = _mm_loadu_si128(reinterpret_cast<__m128i const*>(ptr)); +#else + m_value = _mm_maskload_epi32(ptr, static_cast<__m128i>(mask_type(true))); +#endif + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + // FIXME_HIP ROCm 5.6 can't compile with the intrinsic used here. +#ifdef KOKKOS_IMPL_WORKAROUND_ROCM_AVX2_ISSUE + m_value = _mm_load_si128(reinterpret_cast<__m128i const*>(ptr)); +#else + m_value = _mm_maskload_epi32(ptr, static_cast<__m128i>(mask_type(true))); +#endif + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm_maskstore_epi32(ptr, static_cast<__m128i>(mask_type(true)), m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + _mm_maskstore_epi32(ptr, static_cast<__m128i>(mask_type(true)), m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m128i() + const { + return m_value; + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type( + _mm_cmpeq_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type( + _mm_cmpgt_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<(simd const& lhs, simd const& rhs) noexcept { + return mask_type( + _mm_cmplt_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<=(simd const& lhs, simd const& rhs) noexcept { + return (lhs < rhs) || (lhs == rhs); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>=(simd const& lhs, simd const& rhs) noexcept { + return (lhs > rhs) || (lhs == rhs); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return !(lhs == rhs); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd( + _mm_sub_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd( + _mm_add_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd( + _mm_mullo_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs))); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, int rhs) noexcept { + return simd(_mm_srai_epi32(static_cast<__m128i>(lhs), rhs)); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, simd const& rhs) noexcept { + return simd( + _mm_srav_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs))); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, int rhs) noexcept { + return simd(_mm_slli_epi32(static_cast<__m128i>(lhs), rhs)); + } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( simd const& lhs, simd const& rhs) noexcept { @@ -1229,6 +1690,207 @@ namespace Experimental { _mm_castsi128_ps(static_cast<__m128i>(a))))); } +template <> +class simd> { + __m256i m_value; + + public: + using value_type = std::int32_t; + using abi_type = simd_abi::avx2_fixed_size<8>; + using mask_type = simd_mask; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 8; + } + template , + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm256_set1_epi32(value_type(value))) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + G&& gen) noexcept + : m_value( + _mm256_setr_epi32(gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + __m256i const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + // FIXME_HIP ROCm 5.6, 5.7, and 6.0 can't compile with the intrinsic used + // here. +#ifdef KOKKOS_IMPL_WORKAROUND_ROCM_AVX2_ISSUE + m_value = _mm256_loadu_si256(reinterpret_cast<__m256i const*>(ptr)); +#else + m_value = _mm256_maskload_epi32(ptr, static_cast<__m256i>(mask_type(true))); +#endif + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + // FIXME_HIP ROCm 5.6, 5.7, and 6.0 can't compile with the intrinsic used + // here. +#ifdef KOKKOS_IMPL_WORKAROUND_ROCM_AVX2_ISSUE + m_value = _mm256_loadu_si256(reinterpret_cast<__m256i const*>(ptr)); +#else + m_value = _mm256_maskload_epi32(ptr, static_cast<__m256i>(mask_type(true))); +#endif + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm256_maskstore_epi32(ptr, static_cast<__m256i>(mask_type(true)), m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + _mm256_maskstore_epi32(ptr, static_cast<__m256i>(mask_type(true)), m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256i() + const { + return m_value; + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmpeq_epi32(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmpgt_epi32(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<(simd const& lhs, simd const& rhs) noexcept { + return !(lhs >= rhs); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<=(simd const& lhs, simd const& rhs) noexcept { + return (lhs < rhs) || (lhs == rhs); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>=(simd const& lhs, simd const& rhs) noexcept { + return (lhs > rhs) || (lhs == rhs); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return !(lhs == rhs); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd( + _mm256_sub_epi32(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd( + _mm256_add_epi32(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_mullo_epi32(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, int rhs) noexcept { + return simd(_mm256_srai_epi32(static_cast<__m256i>(lhs), rhs)); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_srav_epi32(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, int rhs) noexcept { + return simd(_mm256_slli_epi32(static_cast<__m256i>(lhs), rhs)); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_sllv_epi32(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } +}; + +} // namespace Experimental + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + abs(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + __m256i const rhs = static_cast<__m256i>(a); + return Experimental::simd>( + _mm256_abs_epi32(rhs)); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + floor(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_cvtepi32_ps(static_cast<__m256i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + ceil(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_cvtepi32_ps(static_cast<__m256i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + round(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_cvtepi32_ps(static_cast<__m256i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + trunc(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_cvtepi32_ps(static_cast<__m256i>(a))); +} + +namespace Experimental { + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd> + condition(simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>(_mm256_castps_si256( + _mm256_blendv_ps(_mm256_castsi256_ps(static_cast<__m256i>(c)), + _mm256_castsi256_ps(static_cast<__m256i>(b)), + _mm256_castsi256_ps(static_cast<__m256i>(a))))); +} + template <> class simd> { __m256i m_value; @@ -1515,6 +2177,16 @@ class simd> { static_cast<__m256i>(mask_type(true))); #endif } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm256_maskstore_epi64(reinterpret_cast(ptr), + static_cast<__m256i>(mask_type(true)), m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + _mm256_maskstore_epi64(reinterpret_cast(ptr), + static_cast<__m256i>(mask_type(true)), m_value); + } KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256i() const { return m_value; @@ -1821,6 +2493,94 @@ class where_expression>, } }; +template <> +class const_where_expression>, + simd>> { + public: + using abi_type = simd_abi::avx2_fixed_size<8>; + using value_type = simd; + using mask_type = simd_mask; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast(value_arg)), m_mask(mask_arg) {} + + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(float* mem, element_aligned_tag) const { + _mm256_maskstore_ps(mem, _mm256_castps_si256(static_cast<__m256>(m_mask)), + static_cast<__m256>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(float* mem, vector_aligned_tag) const { + _mm256_maskstore_ps(mem, _mm256_castps_si256(static_cast<__m256>(m_mask)), + static_cast<__m256>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + float* mem, + simd> const& index) const { + for (std::size_t lane = 0; lane < value_type::size(); ++lane) { + if (m_mask[lane]) mem[index[lane]] = m_value[lane]; + } + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } +}; + +template <> +class where_expression>, + simd>> + : public const_where_expression< + simd_mask>, + simd>> { + public: + where_expression( + simd_mask> const& mask_arg, + simd>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(float const* mem, element_aligned_tag) { + m_value = value_type(_mm256_maskload_ps( + mem, _mm256_castps_si256(static_cast<__m256>(m_mask)))); + } + void copy_from(float const* mem, vector_aligned_tag) { + m_value = value_type(_mm256_maskload_ps( + mem, _mm256_castps_si256(static_cast<__m256>(m_mask)))); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + float const* mem, + simd> const& index) { + m_value = value_type(_mm256_mask_i32gather_ps( + static_cast<__m256>(m_value), mem, static_cast<__m256i>(index), + static_cast<__m256>(m_mask), 4)); + } + template >>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast>>( + std::forward(x)); + m_value = simd>(_mm256_blendv_ps( + static_cast<__m256>(m_value), static_cast<__m256>(x_as_value_type), + static_cast<__m256>(m_mask))); + } +}; + template <> class const_where_expression< simd_mask>, @@ -1923,6 +2683,109 @@ class where_expression>, } }; +template <> +class const_where_expression< + simd_mask>, + simd>> { + public: + using abi_type = simd_abi::avx2_fixed_size<8>; + using value_type = simd; + using mask_type = simd_mask; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast(value_arg)), m_mask(mask_arg) {} + + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::int32_t* mem, element_aligned_tag) const { + _mm256_maskstore_epi32(mem, static_cast<__m256i>(m_mask), + static_cast<__m256i>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::int32_t* mem, vector_aligned_tag) const { + _mm256_maskstore_epi32(mem, static_cast<__m256i>(m_mask), + static_cast<__m256i>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + std::int32_t* mem, + simd> const& index) const { + for (std::size_t lane = 0; lane < value_type::size(); ++lane) { + if (m_mask[lane]) mem[index[lane]] = m_value[lane]; + } + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } +}; + +template <> +class where_expression>, + simd>> + : public const_where_expression< + simd_mask>, + simd>> { + public: + where_expression( + simd_mask> const& mask_arg, + simd>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::int32_t const* mem, element_aligned_tag) { +#ifdef KOKKOS_IMPL_WORKAROUND_ROCM_AVX2_ISSUE + __m256i tmp = _mm256_loadu_si256(reinterpret_cast<__m256i const*>(mem)); + m_value = value_type(_mm256_and_si256(tmp, static_cast<__m256i>(m_mask))); +#else + m_value = + value_type(_mm256_maskload_epi32(mem, static_cast<__m256i>(m_mask))); +#endif + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::int32_t const* mem, vector_aligned_tag) { +#ifdef KOKKOS_IMPL_WORKAROUND_ROCM_AVX2_ISSUE + __m256i tmp = _mm256_load_si256(reinterpret_cast<__m256i const*>(mem)); + m_value = value_type(_mm256_and_si256(tmp, static_cast<__m256i>(m_mask))); +#else + m_value = + value_type(_mm256_maskload_epi32(mem, static_cast<__m256i>(m_mask))); +#endif + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + std::int32_t const* mem, + simd> const& index) { + m_value = value_type(_mm256_mask_i32gather_epi32( + static_cast<__m256i>(m_value), mem, static_cast<__m256i>(index), + static_cast<__m256i>(m_mask), 4)); + } + template < + class U, + std::enable_if_t>>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast>>( + std::forward(x)); + m_value = simd>( + _mm256_castps_si256(_mm256_blendv_ps( + _mm256_castsi256_ps(static_cast<__m256i>(m_value)), + _mm256_castsi256_ps(static_cast<__m256i>(x_as_value_type)), + _mm256_castsi256_ps(static_cast<__m256i>(m_mask))))); + } +}; + template <> class const_where_expression< simd_mask>, diff --git a/packages/kokkos/simd/src/Kokkos_SIMD_AVX512.hpp b/packages/kokkos/simd/src/Kokkos_SIMD_AVX512.hpp index 7fa35c204ae1..84e8af3cd766 100644 --- a/packages/kokkos/simd/src/Kokkos_SIMD_AVX512.hpp +++ b/packages/kokkos/simd/src/Kokkos_SIMD_AVX512.hpp @@ -140,6 +140,122 @@ class simd_mask> { } }; +template +class simd_mask> { + __mmask16 m_value; + + public: + class reference { + __mmask16& m_mask; + int m_lane; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION __mmask16 bit_mask() const { + return __mmask16(std::int32_t(1 << m_lane)); + } + + public: + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference(__mmask16& mask_arg, + int lane_arg) + : m_mask(mask_arg), m_lane(lane_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference + operator=(bool value) const { + if (value) { + m_mask |= bit_mask(); + } else { + m_mask &= ~bit_mask(); + } + return *this; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION operator bool() const { + return (m_mask & bit_mask()) != 0; + } + }; + using value_type = bool; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value) + : m_value(-std::int32_t(value)) {} + template + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask( + simd_mask> const& other) + : m_value(static_cast<__mmask16>(other)) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(G&& gen) : m_value(false) { + reference(m_value, int(0)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(1)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(2)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(3)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(4)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(5)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(6)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(7)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(8)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(9)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(10)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(11)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(12)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(13)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(14)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(15)) = + static_cast(gen(std::integral_constant())); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 16; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + __mmask16 const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __mmask16() + const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reference(m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + auto const bit_mask = __mmask16(std::int32_t(1 << i)); + return (m_value & bit_mask) != 0; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask + operator||(simd_mask const& other) const { + return simd_mask(_kor_mask16(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask + operator&&(simd_mask const& other) const { + return simd_mask(_kand_mask16(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask operator!() const { + static const __mmask16 true_value(static_cast<__mmask16>(simd_mask(true))); + return simd_mask(_kxor_mask16(true_value, m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator==( + simd_mask const& other) const { + return m_value == other.m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator!=( + simd_mask const& other) const { + return m_value != other.m_value; + } +}; + template <> class simd> { __m512d m_value; @@ -700,6 +816,280 @@ simd> condition( static_cast<__m256>(b))); } +template <> +class simd> { + __m512 m_value; + + public: + using value_type = float; + using abi_type = simd_abi::avx512_fixed_size<16>; + using mask_type = simd_mask; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 16; + } + template , + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm512_set1_ps(value_type(value))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + __m512 const& value_in) + : m_value(value_in) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(G&& gen) + : m_value( + _mm512_setr_ps(gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + m_value = _mm512_loadu_ps(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + m_value = _mm512_load_ps(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm512_storeu_ps(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + _mm512_store_ps(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m512() + const { + return m_value; + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd operator-() const + noexcept { + return simd(_mm512_sub_ps(_mm512_set1_ps(0.0), m_value)); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_mul_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator/( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_div_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_add_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_sub_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmp_ps_mask(lhs.m_value, rhs.m_value, _CMP_LT_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmp_ps_mask(lhs.m_value, rhs.m_value, _CMP_GT_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmp_ps_mask(lhs.m_value, rhs.m_value, _CMP_LE_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmp_ps_mask(lhs.m_value, rhs.m_value, _CMP_GE_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmp_ps_mask(lhs.m_value, rhs.m_value, _CMP_EQ_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmp_ps_mask(lhs.m_value, rhs.m_value, _CMP_NEQ_OS)); + } +}; + +} // namespace Experimental + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> +copysign(Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a, + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& b) { + __m512 const sign_mask = _mm512_set1_ps(-0.0); + return Experimental::simd>( + _mm512_xor_ps(_mm512_andnot_ps(sign_mask, static_cast<__m512>(a)), + _mm512_and_ps(sign_mask, static_cast<__m512>(b)))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> abs( + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + __m512 const sign_mask = _mm512_set1_ps(-0.0); + return Experimental::simd>( + _mm512_andnot_ps(sign_mask, static_cast<__m512>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + floor(Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + __m512 const val = static_cast<__m512>(a); + return Experimental::simd>( + _mm512_roundscale_ps(val, _MM_FROUND_TO_NEG_INF)); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + ceil(Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + __m512 const val = static_cast<__m512>(a); + return Experimental::simd>( + _mm512_roundscale_ps(val, _MM_FROUND_TO_POS_INF)); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + round(Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + __m512 const val = static_cast<__m512>(a); + return Experimental::simd>( + _mm512_roundscale_ps(val, _MM_FROUND_TO_NEAREST_INT)); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + trunc(Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + __m512 const val = static_cast<__m512>(a); + return Experimental::simd>( + _mm512_roundscale_ps(val, _MM_FROUND_TO_ZERO)); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> sqrt( + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_sqrt_ps(static_cast<__m512>(a))); +} + +#ifdef __INTEL_COMPILER + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> cbrt( + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cbrt_ps(static_cast<__m512>(a))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> exp( + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_exp_ps(static_cast<__m512>(a))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> log( + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_log_ps(static_cast<__m512>(a))); +} + +#endif + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> fma( + Experimental::simd> const& a, + Experimental::simd> const& b, + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& c) { + return Experimental::simd>( + _mm512_fmadd_ps(static_cast<__m512>(a), static_cast<__m512>(b), + static_cast<__m512>(c))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> max( + Experimental::simd> const& a, + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& b) { + return Experimental::simd>( + _mm512_max_ps(static_cast<__m512>(a), static_cast<__m512>(b))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> min( + Experimental::simd> const& a, + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& b) { + return Experimental::simd>( + _mm512_min_ps(static_cast<__m512>(a), static_cast<__m512>(b))); +} + +namespace Experimental { + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd> condition( + simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>( + _mm512_mask_blend_ps(static_cast<__mmask16>(a), static_cast<__m512>(c), + static_cast<__m512>(b))); +} + template <> class simd> { __m256i m_value; @@ -908,12 +1298,12 @@ namespace Experimental { } template <> -class simd> { - __m256i m_value; +class simd> { + __m512i m_value; public: - using value_type = std::uint32_t; - using abi_type = simd_abi::avx512_fixed_size<8>; + using value_type = std::int32_t; + using abi_type = simd_abi::avx512_fixed_size<16>; using mask_type = simd_mask; using reference = value_type&; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; @@ -922,19 +1312,17 @@ class simd> { KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { - return 8; + return 16; } template , bool> = false> KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) - : m_value(_mm256_set1_epi32( - Kokkos::bit_cast(value_type(value)))) {} + : m_value(_mm512_set1_epi32(value_type(value))) {} KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( - __m256i const& value_in) + __m512i const& value_in) : m_value(value_in) {} KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( - simd> const& other) - : m_value(static_cast<__m256i>(other)) {} + simd const& other); template > { bool> = false> KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( G&& gen) noexcept - : m_value( - _mm256_setr_epi32(gen(std::integral_constant()), - gen(std::integral_constant()), - gen(std::integral_constant()), - gen(std::integral_constant()), - gen(std::integral_constant()), + : m_value(_mm512_setr_epi32( + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm512_mask_storeu_epi32(ptr, static_cast<__mmask16>(mask_type(true)), + m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + _mm512_mask_store_epi32(ptr, static_cast<__mmask16>(mask_type(true)), + m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + m_value = _mm512_mask_loadu_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(mask_type(true)), ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + m_value = _mm512_mask_load_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(mask_type(true)), ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m512i() + const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd operator-() const + noexcept { + return simd(_mm512_sub_epi32(_mm512_set1_epi32(0), m_value)); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_mullo_epi32(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd>( + _mm512_add_epi32(static_cast<__m512i>(lhs), static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd>( + _mm512_sub_epi32(static_cast<__m512i>(lhs), static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmplt_epi32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmplt_epi32_mask(static_cast<__m512i>(rhs), + static_cast<__m512i>(lhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmple_epi32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmple_epi32_mask(static_cast<__m512i>(rhs), + static_cast<__m512i>(lhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmpeq_epi32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmpneq_epi32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, int rhs) noexcept { + return simd(_mm512_srai_epi32(static_cast<__m512i>(lhs), rhs)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_srav_epi32(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, int rhs) noexcept { + return simd(_mm512_slli_epi32(static_cast<__m512i>(lhs), rhs)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_sllv_epi32(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } +}; + +} // namespace Experimental + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + std::int32_t, Experimental::simd_abi::avx512_fixed_size<16>> +abs(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + __m512i const rhs = static_cast<__m512i>(a); + return Experimental::simd>( + _mm512_abs_epi32(rhs)); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> +floor(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepi32_ps(static_cast<__m512i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> +ceil(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepi32_ps(static_cast<__m512i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> +round(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepi32_ps(static_cast<__m512i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> +trunc(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepi32_ps(static_cast<__m512i>(a))); +} + +namespace Experimental { + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd> + condition(simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>( + _mm512_mask_blend_epi32(static_cast<__mmask16>(a), + static_cast<__m512i>(c), + static_cast<__m512i>(b))); +} + +template <> +class simd> { + __m256i m_value; + + public: + using value_type = std::uint32_t; + using abi_type = simd_abi::avx512_fixed_size<8>; + using mask_type = simd_mask; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 8; + } + template , + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm256_set1_epi32( + Kokkos::bit_cast(value_type(value)))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + __m256i const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( + simd> const& other) + : m_value(static_cast<__m256i>(other)) {} + template ()); } + std::is_invocable_r_v>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + G&& gen) noexcept + : m_value( + _mm256_setr_epi32(gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), gen(std::integral_constant()), gen(std::integral_constant()), gen(std::integral_constant()))) {} @@ -960,6 +1566,16 @@ class simd> { operator[](std::size_t i) const { return reinterpret_cast(&m_value)[i]; } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm256_mask_storeu_epi32(ptr, static_cast<__mmask8>(mask_type(true)), + m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + _mm256_mask_store_epi32(ptr, static_cast<__mmask8>(mask_type(true)), + m_value); + } KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, element_aligned_tag) { m_value = _mm256_mask_loadu_epi32( @@ -970,142 +1586,344 @@ class simd> { m_value = _mm256_mask_load_epi32( _mm256_set1_epi32(0), static_cast<__mmask8>(mask_type(true)), ptr); } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256i() + const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_mullo_epi32(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd( + _mm256_add_epi32(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd( + _mm256_sub_epi32(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs))); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmplt_epu32_mask(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmplt_epu32_mask(static_cast<__m256i>(rhs), + static_cast<__m256i>(lhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmple_epu32_mask(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmple_epu32_mask(static_cast<__m256i>(rhs), + static_cast<__m256i>(lhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmpeq_epu32_mask(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmpneq_epu32_mask(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, int rhs) noexcept { + return simd(_mm256_srli_epi32(static_cast<__m256i>(lhs), rhs)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_srlv_epi32(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, int rhs) noexcept { + return simd(_mm256_slli_epi32(static_cast<__m256i>(lhs), rhs)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_sllv_epi32(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } +}; + +} // namespace Experimental + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<8>> +abs(Experimental::simd> const& a) { + return a; +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + double, Experimental::simd_abi::avx512_fixed_size<8>> +floor(Experimental::simd< + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm512_cvtepu32_pd(static_cast<__m256i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + double, Experimental::simd_abi::avx512_fixed_size<8>> +ceil(Experimental::simd< + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm512_cvtepu32_pd(static_cast<__m256i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + double, Experimental::simd_abi::avx512_fixed_size<8>> +round(Experimental::simd< + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm512_cvtepu32_pd(static_cast<__m256i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + double, Experimental::simd_abi::avx512_fixed_size<8>> +trunc(Experimental::simd< + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm512_cvtepu32_pd(static_cast<__m256i>(a))); +} + +namespace Experimental { + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd> + condition(simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>( + _mm256_mask_blend_epi32(static_cast<__mmask8>(a), static_cast<__m256i>(c), + static_cast<__m256i>(b))); +} + +template <> +class simd> { + __m512i m_value; + + public: + using value_type = std::uint32_t; + using abi_type = simd_abi::avx512_fixed_size<16>; + using mask_type = simd_mask; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 16; + } + template , + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm512_set1_epi32( + Kokkos::bit_cast(value_type(value)))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + __m512i const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( + simd> const& other) + : m_value(static_cast<__m512i>(other)) {} + template ()); } + std::is_invocable_r_v>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + G&& gen) noexcept + : m_value(_mm512_setr_epi32( + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + m_value = _mm512_mask_loadu_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(mask_type(true)), ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + m_value = _mm512_mask_load_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(mask_type(true)), ptr); + } KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( value_type* ptr, element_aligned_tag) const { - _mm256_mask_storeu_epi32(ptr, static_cast<__mmask8>(mask_type(true)), + _mm512_mask_storeu_epi32(ptr, static_cast<__mmask16>(mask_type(true)), m_value); } KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, vector_aligned_tag) const { - _mm256_mask_store_epi32(ptr, static_cast<__mmask8>(mask_type(true)), + _mm512_mask_store_epi32(ptr, static_cast<__mmask16>(mask_type(true)), m_value); } - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256i() + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m512i() const { return m_value; } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( simd const& lhs, simd const& rhs) noexcept { - return simd(_mm256_mullo_epi32(static_cast<__m256i>(lhs), - static_cast<__m256i>(rhs))); + return simd(_mm512_mullo_epi32(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( simd const& lhs, simd const& rhs) noexcept { return simd( - _mm256_add_epi32(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs))); + _mm512_add_epi32(static_cast<__m512i>(lhs), static_cast<__m512i>(rhs))); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( simd const& lhs, simd const& rhs) noexcept { return simd( - _mm256_sub_epi32(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs))); + _mm512_sub_epi32(static_cast<__m512i>(lhs), static_cast<__m512i>(rhs))); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type operator<(simd const& lhs, simd const& rhs) noexcept { - return mask_type(_mm256_cmplt_epu32_mask(static_cast<__m256i>(lhs), - static_cast<__m256i>(rhs))); + return mask_type(_mm512_cmplt_epu32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type operator>(simd const& lhs, simd const& rhs) noexcept { - return mask_type(_mm256_cmplt_epu32_mask(static_cast<__m256i>(rhs), - static_cast<__m256i>(lhs))); + return mask_type(_mm512_cmplt_epu32_mask(static_cast<__m512i>(rhs), + static_cast<__m512i>(lhs))); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type operator<=(simd const& lhs, simd const& rhs) noexcept { - return mask_type(_mm256_cmple_epu32_mask(static_cast<__m256i>(lhs), - static_cast<__m256i>(rhs))); + return mask_type(_mm512_cmple_epu32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type operator>=(simd const& lhs, simd const& rhs) noexcept { - return mask_type(_mm256_cmple_epu32_mask(static_cast<__m256i>(rhs), - static_cast<__m256i>(lhs))); + return mask_type(_mm512_cmple_epu32_mask(static_cast<__m512i>(rhs), + static_cast<__m512i>(lhs))); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type operator==(simd const& lhs, simd const& rhs) noexcept { - return mask_type(_mm256_cmpeq_epu32_mask(static_cast<__m256i>(lhs), - static_cast<__m256i>(rhs))); + return mask_type(_mm512_cmpeq_epu32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type operator!=(simd const& lhs, simd const& rhs) noexcept { - return mask_type(_mm256_cmpneq_epu32_mask(static_cast<__m256i>(lhs), - static_cast<__m256i>(rhs))); + return mask_type(_mm512_cmpneq_epu32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( simd const& lhs, int rhs) noexcept { - return simd(_mm256_srli_epi32(static_cast<__m256i>(lhs), rhs)); + return simd(_mm512_srli_epi32(static_cast<__m512i>(lhs), rhs)); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( simd const& lhs, simd const& rhs) noexcept { - return simd(_mm256_srlv_epi32(static_cast<__m256i>(lhs), - static_cast<__m256i>(rhs))); + return simd(_mm512_srlv_epi32(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( simd const& lhs, int rhs) noexcept { - return simd(_mm256_slli_epi32(static_cast<__m256i>(lhs), rhs)); + return simd(_mm512_slli_epi32(static_cast<__m512i>(lhs), rhs)); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( simd const& lhs, simd const& rhs) noexcept { - return simd(_mm256_sllv_epi32(static_cast<__m256i>(lhs), - static_cast<__m256i>(rhs))); + return simd(_mm512_sllv_epi32(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); } }; } // namespace Experimental [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< - std::uint32_t, Experimental::simd_abi::avx512_fixed_size<8>> -abs(Experimental::simd> const& a) { + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<16>> +abs(Experimental::simd< + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { return a; } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< - double, Experimental::simd_abi::avx512_fixed_size<8>> + float, Experimental::simd_abi::avx512_fixed_size<16>> floor(Experimental::simd< - std::uint32_t, Experimental::simd_abi::avx512_fixed_size<8>> const& a) { - return Experimental::simd>( - _mm512_cvtepu32_pd(static_cast<__m256i>(a))); + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepu32_ps(static_cast<__m512i>(a))); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< - double, Experimental::simd_abi::avx512_fixed_size<8>> + float, Experimental::simd_abi::avx512_fixed_size<16>> ceil(Experimental::simd< - std::uint32_t, Experimental::simd_abi::avx512_fixed_size<8>> const& a) { - return Experimental::simd>( - _mm512_cvtepu32_pd(static_cast<__m256i>(a))); + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepu32_ps(static_cast<__m512i>(a))); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< - double, Experimental::simd_abi::avx512_fixed_size<8>> + float, Experimental::simd_abi::avx512_fixed_size<16>> round(Experimental::simd< - std::uint32_t, Experimental::simd_abi::avx512_fixed_size<8>> const& a) { - return Experimental::simd>( - _mm512_cvtepu32_pd(static_cast<__m256i>(a))); + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepu32_ps(static_cast<__m512i>(a))); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< - double, Experimental::simd_abi::avx512_fixed_size<8>> + float, Experimental::simd_abi::avx512_fixed_size<16>> trunc(Experimental::simd< - std::uint32_t, Experimental::simd_abi::avx512_fixed_size<8>> const& a) { - return Experimental::simd>( - _mm512_cvtepu32_pd(static_cast<__m256i>(a))); + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepu32_ps(static_cast<__m512i>(a))); } namespace Experimental { [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION - simd> - condition(simd_mask> const& a, - simd> const& b, - simd> const& c) { - return simd>( - _mm256_mask_blend_epi32(static_cast<__mmask8>(a), static_cast<__m256i>(c), - static_cast<__m256i>(b))); + simd> + condition( + simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>( + _mm512_mask_blend_epi32(static_cast<__mmask16>(a), + static_cast<__m512i>(c), + static_cast<__m512i>(b))); } template <> @@ -1716,6 +2534,95 @@ class where_expression>, } }; +template <> +class const_where_expression>, + simd>> { + public: + using abi_type = simd_abi::avx512_fixed_size<16>; + using value_type = simd; + using mask_type = simd_mask; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast(value_arg)), m_mask(mask_arg) {} + + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(float* mem, element_aligned_tag) const { + _mm512_mask_storeu_ps(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(float* mem, vector_aligned_tag) const { + _mm512_mask_store_ps(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + float* mem, + simd> const& index) const { + _mm512_mask_i32scatter_ps(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512i>(index), + static_cast<__m512>(m_value), 4); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } +}; + +template <> +class where_expression>, + simd>> + : public const_where_expression< + simd_mask>, + simd>> { + public: + where_expression( + simd_mask> const& mask_arg, + simd>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(float const* mem, element_aligned_tag) { + m_value = value_type(_mm512_mask_loadu_ps( + _mm512_set1_ps(0.0), static_cast<__mmask16>(m_mask), mem)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(float const* mem, vector_aligned_tag) { + m_value = value_type(_mm512_mask_load_ps( + _mm512_set1_ps(0.0), static_cast<__mmask16>(m_mask), mem)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + float const* mem, + simd> const& index) { + m_value = value_type(_mm512_mask_i32gather_ps( + static_cast<__m512>(m_value), static_cast<__mmask16>(m_mask), + static_cast<__m512i>(index), mem, 4)); + } + template >>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast>>( + std::forward(x)); + m_value = simd>(_mm512_mask_blend_ps( + static_cast<__mmask16>(m_mask), static_cast<__m512>(m_value), + static_cast<__m512>(x_as_value_type))); + } +}; + template <> class const_where_expression< simd_mask>, @@ -1810,6 +2717,98 @@ class where_expression>, } }; +template <> +class const_where_expression< + simd_mask>, + simd>> { + public: + using abi_type = simd_abi::avx512_fixed_size<16>; + using value_type = simd; + using mask_type = simd_mask; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast(value_arg)), m_mask(mask_arg) {} + + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::int32_t* mem, element_aligned_tag) const { + _mm512_mask_storeu_epi32(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512i>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::int32_t* mem, vector_aligned_tag) const { + _mm512_mask_store_epi32(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512i>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + std::int32_t* mem, + simd> const& index) const { + _mm512_mask_i32scatter_epi32(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512i>(index), + static_cast<__m512i>(m_value), 4); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } +}; + +template <> +class where_expression>, + simd>> + : public const_where_expression< + simd_mask>, + simd>> { + public: + where_expression( + simd_mask> const& mask_arg, + simd>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::int32_t const* mem, element_aligned_tag) { + m_value = value_type(_mm512_mask_loadu_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(m_mask), mem)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::int32_t const* mem, vector_aligned_tag) { + m_value = value_type(_mm512_mask_load_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(m_mask), mem)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + std::int32_t const* mem, + simd> const& index) { + m_value = value_type(_mm512_mask_i32gather_epi32( + static_cast<__m512i>(m_value), static_cast<__mmask16>(m_mask), + static_cast<__m512i>(index), mem, 4)); + } + template >>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast>>( + std::forward(x)); + m_value = simd>( + _mm512_mask_blend_epi32(static_cast<__mmask16>(m_mask), + static_cast<__m512i>(m_value), + static_cast<__m512i>(x_as_value_type))); + } +}; + template <> class const_where_expression< simd_mask>, @@ -1905,6 +2904,99 @@ class where_expression>, } }; +template <> +class const_where_expression< + simd_mask>, + simd>> { + public: + using abi_type = simd_abi::avx512_fixed_size<16>; + using value_type = simd; + using mask_type = simd_mask; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast(value_arg)), m_mask(mask_arg) {} + + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::uint32_t* mem, element_aligned_tag) const { + _mm512_mask_storeu_epi32(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512i>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::uint32_t* mem, vector_aligned_tag) const { + _mm512_mask_store_epi32(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512i>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + std::uint32_t* mem, + simd> const& index) const { + _mm512_mask_i32scatter_epi32(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512i>(index), + static_cast<__m512i>(m_value), 4); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } +}; + +template <> +class where_expression< + simd_mask>, + simd>> + : public const_where_expression< + simd_mask>, + simd>> { + public: + where_expression( + simd_mask> const& mask_arg, + simd>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::uint32_t const* mem, element_aligned_tag) { + m_value = value_type(_mm512_mask_loadu_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(m_mask), mem)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::uint32_t const* mem, vector_aligned_tag) { + m_value = value_type(_mm512_mask_load_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(m_mask), mem)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + std::uint32_t const* mem, + simd> const& index) { + m_value = value_type(_mm512_mask_i32gather_epi32( + static_cast<__m512i>(m_value), static_cast<__mmask16>(m_mask), + static_cast<__m512i>(index), mem, 4)); + } + template >>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast>>( + std::forward(x)); + m_value = simd>( + _mm512_mask_blend_epi32(static_cast<__mmask16>(m_mask), + static_cast<__m512i>(m_value), + static_cast<__m512i>(x_as_value_type))); + } +}; + template <> class const_where_expression< simd_mask>, diff --git a/packages/kokkos/simd/src/Kokkos_SIMD_NEON.hpp b/packages/kokkos/simd/src/Kokkos_SIMD_NEON.hpp index efc81135d165..8cb0cc75fc0e 100644 --- a/packages/kokkos/simd/src/Kokkos_SIMD_NEON.hpp +++ b/packages/kokkos/simd/src/Kokkos_SIMD_NEON.hpp @@ -42,11 +42,11 @@ class neon_fixed_size {}; namespace Impl { -template +template class neon_mask; template -class neon_mask { +class neon_mask { uint64x2_t m_value; public: @@ -104,12 +104,13 @@ class neon_mask { } template KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask( - neon_mask const& other) { + neon_mask const& other) { operator[](0) = bool(other[0]); operator[](1) = bool(other[1]); } template - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask(neon_mask const& other) + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask( + neon_mask const& other) : neon_mask(static_cast(other)) {} KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { return 2; @@ -158,7 +159,7 @@ class neon_mask { }; template -class neon_mask { +class neon_mask { uint32x2_t m_value; public: @@ -211,10 +212,12 @@ class neon_mask { m_value, 1); } template - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask(neon_mask const& other) + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask( + neon_mask const& other) : m_value(vqmovn_u64(static_cast(other))) {} template - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask(neon_mask const& other) + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask( + neon_mask const& other) : m_value(static_cast(other)) {} KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { return 2; @@ -260,14 +263,125 @@ class neon_mask { } }; +template +class neon_mask { + uint32x4_t m_value; + + public: + class reference { + uint32x4_t& m_mask; + int m_lane; + + public: + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference(uint32x4_t& mask_arg, + int lane_arg) + : m_mask(mask_arg), m_lane(lane_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference + operator=(bool value) const { + switch (m_lane) { + case 0: + m_mask = vsetq_lane_u32(value ? 0xFFFFFFFFU : 0, m_mask, 0); + break; + case 1: + m_mask = vsetq_lane_u32(value ? 0xFFFFFFFFU : 0, m_mask, 1); + break; + case 2: + m_mask = vsetq_lane_u32(value ? 0xFFFFFFFFU : 0, m_mask, 2); + break; + case 3: + m_mask = vsetq_lane_u32(value ? 0xFFFFFFFFU : 0, m_mask, 3); + break; + } + return *this; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION operator bool() const { + switch (m_lane) { + case 0: return vgetq_lane_u32(m_mask, 0) != 0; + case 1: return vgetq_lane_u32(m_mask, 1) != 0; + case 2: return vgetq_lane_u32(m_mask, 2) != 0; + case 3: return vgetq_lane_u32(m_mask, 3) != 0; + } + return false; + } + }; + using value_type = bool; + using abi_type = simd_abi::neon_fixed_size<4>; + using implementation_type = uint32x4_t; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit neon_mask(value_type value) + : m_value(vmovq_n_u32(value ? 0xFFFFFFFFU : 0)) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit neon_mask( + G&& gen) noexcept { + m_value = vsetq_lane_u32( + (gen(std::integral_constant()) ? 0xFFFFFFFFU : 0), + m_value, 0); + m_value = vsetq_lane_u32( + (gen(std::integral_constant()) ? 0xFFFFFFFFU : 0), + m_value, 1); + m_value = vsetq_lane_u32( + (gen(std::integral_constant()) ? 0xFFFFFFFFU : 0), + m_value, 2); + m_value = vsetq_lane_u32( + (gen(std::integral_constant()) ? 0xFFFFFFFFU : 0), + m_value, 3); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 4; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit neon_mask( + uint32x4_t const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator uint32x4_t() + const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reference(m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return static_cast( + reference(const_cast(m_value), int(i))); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Derived + operator||(neon_mask const& other) const { + return Derived(vorrq_u32(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Derived + operator&&(neon_mask const& other) const { + return Derived(vandq_u32(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Derived operator!() const { + auto const true_value = static_cast(neon_mask(true)); + return Derived(veorq_u32(m_value, true_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator==( + neon_mask const& other) const { + uint32x4_t const elementwise_equality = vceqq_u32(m_value, other.m_value); + uint64x2_t const overall_equality_neon = + vreinterpretq_u64_u32(elementwise_equality); + return (overall_equality_neon[0] == 0xFFFFFFFFFFFFFFFFULL) && + (overall_equality_neon[1] == 0xFFFFFFFFFFFFFFFFULL); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator!=( + neon_mask const& other) const { + return !operator==(other); + } +}; + } // namespace Impl template class simd_mask> : public Impl::neon_mask>, - sizeof(T) * 8> { + sizeof(T) * 8, 2> { using base_type = Impl::neon_mask>, - sizeof(T) * 8>; + sizeof(T) * 8, 2>; public: using implementation_type = typename base_type::implementation_type; @@ -291,6 +405,35 @@ class simd_mask> : base_type(gen) {} }; +template +class simd_mask> + : public Impl::neon_mask>, + sizeof(T) * 8, 4> { + using base_type = Impl::neon_mask>, + sizeof(T) * 8, 4>; + + public: + using implementation_type = typename base_type::implementation_type; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(bool value) + : base_type(value) {} + template + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask( + simd_mask> const& other) + : base_type(other) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + implementation_type const& value) + : base_type(value) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + G&& gen) noexcept + : base_type(gen) {} +}; + template <> class simd> { float64x2_t m_value; @@ -788,6 +931,256 @@ namespace Experimental { static_cast(c))); } +template <> +class simd> { + float32x4_t m_value; + + public: + using value_type = float; + using abi_type = simd_abi::neon_fixed_size<4>; + using mask_type = simd_mask; + class reference { + float32x4_t& m_value; + int m_lane; + + public: + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference(float32x4_t& value_arg, + int lane_arg) + : m_value(value_arg), m_lane(lane_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference + operator=(float value) const { + switch (m_lane) { + case 0: m_value = vsetq_lane_f32(value, m_value, 0); break; + case 1: m_value = vsetq_lane_f32(value, m_value, 1); break; + case 2: m_value = vsetq_lane_f32(value, m_value, 2); break; + case 3: m_value = vsetq_lane_f32(value, m_value, 3); break; + } + return *this; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION operator float() const { + switch (m_lane) { + case 0: return vgetq_lane_f32(m_value, 0); + case 1: return vgetq_lane_f32(m_value, 1); + case 2: return vgetq_lane_f32(m_value, 2); + case 3: return vgetq_lane_f32(m_value, 3); + } + return 0; + } + }; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 4; + } + template , + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(vmovq_n_f32(value_type(value))) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(G&& gen) { + m_value = vsetq_lane_f32(gen(std::integral_constant()), + m_value, 0); + m_value = vsetq_lane_f32(gen(std::integral_constant()), + m_value, 1); + m_value = vsetq_lane_f32(gen(std::integral_constant()), + m_value, 2); + m_value = vsetq_lane_f32(gen(std::integral_constant()), + m_value, 3); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + float32x4_t const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reference(m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reference(const_cast(this)->m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + m_value = vld1q_f32(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + m_value = vld1q_f32(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + vst1q_f32(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + vst1q_f32(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit + operator float32x4_t() const { + return m_value; + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd operator-() const + noexcept { + return simd(vnegq_f32(m_value)); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd(vmulq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator/( + simd const& lhs, simd const& rhs) noexcept { + return simd(vdivq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd(vaddq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd(vsubq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<(simd const& lhs, simd const& rhs) noexcept { + return mask_type(vcltq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type(vcgtq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(vcleq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(vcgeq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type(vceqq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return !(lhs == rhs); + } +}; + +} // namespace Experimental + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + abs(Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return Experimental::simd>( + vabsq_f32(static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + floor(Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return Experimental::simd>( + vrndmq_f32(static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + ceil(Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return Experimental::simd>( + vrndpq_f32(static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + round(Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return Experimental::simd>( + vrndxq_f32(static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + trunc(Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return Experimental::simd>( + vrndq_f32(static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> +copysign( + Experimental::simd> const& + a, + Experimental::simd> const& + b) { + uint32x4_t const sign_mask = vreinterpretq_u32_f32(vmovq_n_f32(-0.0)); + return Experimental::simd>( + vreinterpretq_f32_u32(vorrq_u32( + vreinterpretq_u32_f32(static_cast(abs(a))), + vandq_u32(sign_mask, + vreinterpretq_u32_f32(static_cast(b)))))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + sqrt(Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return Experimental::simd>( + vsqrtq_f32(static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> +fma(Experimental::simd> const& + a, + Experimental::simd> const& + b, + Experimental::simd> const& + c) { + return Experimental::simd>( + vfmaq_f32(static_cast(c), static_cast(b), + static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> +max(Experimental::simd> const& + a, + Experimental::simd> const& + b) { + return Experimental::simd>( + vmaxq_f32(static_cast(a), static_cast(b))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> +min(Experimental::simd> const& + a, + Experimental::simd> const& + b) { + return Experimental::simd>( + vminq_f32(static_cast(a), static_cast(b))); +} + +namespace Experimental { + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd> + condition(simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>( + vbslq_f32(static_cast(a), static_cast(b), + static_cast(c))); +} + template <> class simd> { int32x2_t m_value; @@ -1001,7 +1394,227 @@ namespace Experimental { } template <> -class simd> { +class simd> { + int32x4_t m_value; + + public: + using value_type = std::int32_t; + using abi_type = simd_abi::neon_fixed_size<4>; + using mask_type = simd_mask; + class reference { + int32x4_t& m_value; + int m_lane; + + public: + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference(int32x4_t& value_arg, + int lane_arg) + : m_value(value_arg), m_lane(lane_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference + operator=(std::int32_t value) const { + switch (m_lane) { + case 0: m_value = vsetq_lane_s32(value, m_value, 0); break; + case 1: m_value = vsetq_lane_s32(value, m_value, 1); break; + case 2: m_value = vsetq_lane_s32(value, m_value, 2); break; + case 3: m_value = vsetq_lane_s32(value, m_value, 3); break; + } + return *this; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION operator std::int32_t() const { + switch (m_lane) { + case 0: return vgetq_lane_s32(m_value, 0); + case 1: return vgetq_lane_s32(m_value, 1); + case 2: return vgetq_lane_s32(m_value, 2); + case 3: return vgetq_lane_s32(m_value, 3); + } + return 0; + } + }; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 4; + } + template , + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(vmovq_n_s32(value_type(value))) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + G&& gen) noexcept { + m_value = vsetq_lane_s32(gen(std::integral_constant()), + m_value, 0); + m_value = vsetq_lane_s32(gen(std::integral_constant()), + m_value, 1); + m_value = vsetq_lane_s32(gen(std::integral_constant()), + m_value, 2); + m_value = vsetq_lane_s32(gen(std::integral_constant()), + m_value, 3); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + int32x4_t const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( + simd const& other); + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reference(m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reference(const_cast(this)->m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + m_value = vld1q_s32(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + m_value = vld1q_s32(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + vst1q_s32(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + vst1q_s32(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator int32x4_t() + const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd operator-() const + noexcept { + return simd(vnegq_s32(m_value)); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd( + vsubq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd( + vaddq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd( + vmulq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type( + vceqq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type( + vcgtq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<(simd const& lhs, simd const& rhs) noexcept { + return mask_type( + vcltq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<=(simd const& lhs, simd const& rhs) noexcept { + return mask_type( + vcleq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>=(simd const& lhs, simd const& rhs) noexcept { + return mask_type( + vcgeq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return !(lhs == rhs); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, int rhs) noexcept { + return simd(vshlq_s32(static_cast(lhs), + vnegq_s32(vmovq_n_s32(std::int32_t(rhs))))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, simd const& rhs) noexcept { + return simd(vshlq_s32(static_cast(lhs), + vnegq_s32(static_cast(rhs)))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, int rhs) noexcept { + return simd( + vshlq_s32(static_cast(lhs), vmovq_n_s32(std::int32_t(rhs)))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, simd const& rhs) noexcept { + return simd( + vshlq_s32(static_cast(lhs), static_cast(rhs))); + } +}; + +} // namespace Experimental + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + abs(Experimental::simd< + std::int32_t, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return Experimental::simd>( + vabsq_s32(static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + floor(Experimental::simd< + std::int32_t, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return a; +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + ceil(Experimental::simd< + std::int32_t, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return a; +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + round(Experimental::simd< + std::int32_t, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return a; +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + trunc(Experimental::simd< + std::int32_t, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return a; +} + +namespace Experimental { + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd> + condition(simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>( + vbslq_s32(static_cast(a), static_cast(b), + static_cast(c))); +} + +template <> +class simd> { int64x2_t m_value; public: @@ -1593,6 +2206,106 @@ class where_expression>, } }; +template <> +class const_where_expression>, + simd>> { + public: + using abi_type = simd_abi::neon_fixed_size<4>; + using value_type = simd; + using mask_type = simd_mask; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast(value_arg)), m_mask(mask_arg) {} + + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(float* mem, element_aligned_tag) const { + if (m_mask[0]) mem[0] = m_value[0]; + if (m_mask[1]) mem[1] = m_value[1]; + if (m_mask[2]) mem[2] = m_value[2]; + if (m_mask[3]) mem[3] = m_value[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(float* mem, vector_aligned_tag) const { + if (m_mask[0]) mem[0] = m_value[0]; + if (m_mask[1]) mem[1] = m_value[1]; + if (m_mask[2]) mem[2] = m_value[2]; + if (m_mask[3]) mem[3] = m_value[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + float* mem, + simd> const& index) const { + if (m_mask[0]) mem[index[0]] = m_value[0]; + if (m_mask[1]) mem[index[1]] = m_value[1]; + if (m_mask[2]) mem[index[2]] = m_value[2]; + if (m_mask[3]) mem[index[3]] = m_value[3]; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } +}; + +template <> +class where_expression>, + simd>> + : public const_where_expression< + simd_mask>, + simd>> { + public: + where_expression( + simd_mask> const& mask_arg, + simd>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(float const* mem, element_aligned_tag) { + if (m_mask[0]) m_value[0] = mem[0]; + if (m_mask[1]) m_value[1] = mem[1]; + if (m_mask[2]) m_value[2] = mem[2]; + if (m_mask[3]) m_value[3] = mem[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(float const* mem, vector_aligned_tag) { + if (m_mask[0]) m_value[0] = mem[0]; + if (m_mask[1]) m_value[1] = mem[1]; + if (m_mask[2]) m_value[2] = mem[2]; + if (m_mask[3]) m_value[3] = mem[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + float const* mem, + simd> const& index) { + if (m_mask[0]) m_value[0] = mem[index[0]]; + if (m_mask[1]) m_value[1] = mem[index[1]]; + if (m_mask[2]) m_value[2] = mem[index[2]]; + if (m_mask[3]) m_value[3] = mem[index[3]]; + } + template >>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast>>( + std::forward(x)); + m_value = static_cast>>( + vbslq_f32(static_cast(m_mask), + static_cast(x_as_value_type), + static_cast(m_value))); + } +}; + template <> class const_where_expression< simd_mask>, @@ -1686,6 +2399,108 @@ class where_expression>, } }; +template <> +class const_where_expression< + simd_mask>, + simd>> { + public: + using abi_type = simd_abi::neon_fixed_size<4>; + using value_type = simd; + using mask_type = simd_mask; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast(value_arg)), m_mask(mask_arg) {} + + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::int32_t* mem, element_aligned_tag) const { + if (m_mask[0]) mem[0] = m_value[0]; + if (m_mask[1]) mem[1] = m_value[1]; + if (m_mask[2]) mem[2] = m_value[2]; + if (m_mask[3]) mem[3] = m_value[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::int32_t* mem, vector_aligned_tag) const { + if (m_mask[0]) mem[0] = m_value[0]; + if (m_mask[1]) mem[1] = m_value[1]; + if (m_mask[2]) mem[2] = m_value[2]; + if (m_mask[3]) mem[3] = m_value[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + std::int32_t* mem, + simd> const& index) const { + if (m_mask[0]) mem[index[0]] = m_value[0]; + if (m_mask[1]) mem[index[1]] = m_value[1]; + if (m_mask[2]) mem[index[2]] = m_value[2]; + if (m_mask[3]) mem[index[3]] = m_value[3]; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } +}; + +template <> +class where_expression>, + simd>> + : public const_where_expression< + simd_mask>, + simd>> { + public: + where_expression( + simd_mask> const& mask_arg, + simd>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::int32_t const* mem, element_aligned_tag) { + if (m_mask[0]) m_value[0] = mem[0]; + if (m_mask[1]) m_value[1] = mem[1]; + if (m_mask[2]) m_value[2] = mem[2]; + if (m_mask[3]) m_value[3] = mem[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::int32_t const* mem, vector_aligned_tag) { + if (m_mask[0]) m_value[0] = mem[0]; + if (m_mask[1]) m_value[1] = mem[1]; + if (m_mask[2]) m_value[2] = mem[2]; + if (m_mask[3]) m_value[3] = mem[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + std::int32_t const* mem, + simd> const& index) { + if (m_mask[0]) m_value[0] = mem[index[0]]; + if (m_mask[1]) m_value[1] = mem[index[1]]; + if (m_mask[2]) m_value[2] = mem[index[2]]; + if (m_mask[3]) m_value[3] = mem[index[3]]; + } + template < + class U, + std::enable_if_t< + std::is_convertible_v>>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast>>( + std::forward(x)); + m_value = static_cast>>( + vbslq_s32(static_cast(m_mask), + static_cast(x_as_value_type), + static_cast(m_value))); + } +}; + template <> class const_where_expression< simd_mask>, diff --git a/packages/kokkos/simd/unit_tests/CMakeLists.txt b/packages/kokkos/simd/unit_tests/CMakeLists.txt index 75d557e8b525..109effc710d4 100644 --- a/packages/kokkos/simd/unit_tests/CMakeLists.txt +++ b/packages/kokkos/simd/unit_tests/CMakeLists.txt @@ -1,7 +1,9 @@ KOKKOS_INCLUDE_DIRECTORIES(${KOKKOS_SOURCE_DIR}/simd/unit_tests/include) -KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_SIMD - SOURCES - UnitTestMain.cpp - TestSIMD.cpp) +IF((NOT (Kokkos_ENABLE_CUDA AND WIN32))) + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_SIMD + SOURCES + UnitTestMain.cpp + TestSIMD.cpp) +ENDIF() diff --git a/packages/kokkos/simd/unit_tests/include/SIMDTesting_Ops.hpp b/packages/kokkos/simd/unit_tests/include/SIMDTesting_Ops.hpp index c587ccf30468..74141f253162 100644 --- a/packages/kokkos/simd/unit_tests/include/SIMDTesting_Ops.hpp +++ b/packages/kokkos/simd/unit_tests/include/SIMDTesting_Ops.hpp @@ -81,7 +81,9 @@ class absolutes { auto on_host(T const& a) const { if constexpr (std::is_signed_v) { #if defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) + KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() return Kokkos::Experimental::abs(a); + KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() #else return Kokkos::abs(a); #endif diff --git a/packages/kokkos/simd/unit_tests/include/SIMDTesting_Utilities.hpp b/packages/kokkos/simd/unit_tests/include/SIMDTesting_Utilities.hpp index d36e1e5afc5e..9719855f0ffe 100644 --- a/packages/kokkos/simd/unit_tests/include/SIMDTesting_Utilities.hpp +++ b/packages/kokkos/simd/unit_tests/include/SIMDTesting_Utilities.hpp @@ -135,8 +135,8 @@ class load_masked { for (std::size_t i = 0; i < n; ++i) { mask[i] = true; } + result = T(0); where(mask, result).copy_from(mem, Kokkos::Experimental::simd_flag_default); - where(!mask, result) = 0; return true; } template @@ -181,4 +181,14 @@ class load_as_scalars { } }; +// Simple check to loosely test that T is a complete type. +// Some capabilities are only defined for specific data type and abi pairs (i.e. +// extended vector width); this is used to exclude pairs that +// are not defined from being tested. +template +constexpr bool is_type_v = false; + +template +constexpr bool is_type_v = true; + #endif diff --git a/packages/kokkos/simd/unit_tests/include/TestSIMD_Condition.hpp b/packages/kokkos/simd/unit_tests/include/TestSIMD_Condition.hpp index f8d8cc70fa4e..bf22cf3352b0 100644 --- a/packages/kokkos/simd/unit_tests/include/TestSIMD_Condition.hpp +++ b/packages/kokkos/simd/unit_tests/include/TestSIMD_Condition.hpp @@ -22,21 +22,23 @@ template inline void host_check_condition() { - using simd_type = typename Kokkos::Experimental::simd; - using mask_type = typename simd_type::mask_type; - - auto condition_op = [](mask_type const& mask, simd_type const& a, - simd_type const& b) { - return Kokkos::Experimental::condition(mask, a, b); - }; - - simd_type value_a(16); - simd_type value_b(20); - - auto condition_result = condition_op(mask_type(false), value_a, value_b); - EXPECT_TRUE(all_of(condition_result == value_b)); - condition_result = condition_op(mask_type(true), value_a, value_b); - EXPECT_TRUE(all_of(condition_result == value_a)); + if constexpr (is_type_v>) { + using simd_type = typename Kokkos::Experimental::simd; + using mask_type = typename simd_type::mask_type; + + auto condition_op = [](mask_type const& mask, simd_type const& a, + simd_type const& b) { + return Kokkos::Experimental::condition(mask, a, b); + }; + + simd_type value_a(16); + simd_type value_b(20); + + auto condition_result = condition_op(mask_type(false), value_a, value_b); + EXPECT_TRUE(all_of(condition_result == value_b)); + condition_result = condition_op(mask_type(true), value_a, value_b); + EXPECT_TRUE(all_of(condition_result == value_a)); + } } template @@ -54,22 +56,24 @@ inline void host_check_condition_all_abis( template KOKKOS_INLINE_FUNCTION void device_check_condition() { - using simd_type = typename Kokkos::Experimental::simd; - using mask_type = typename simd_type::mask_type; - kokkos_checker checker; - - auto condition_op = [](mask_type const& mask, simd_type const& a, - simd_type const& b) { - return Kokkos::Experimental::condition(mask, a, b); - }; - - simd_type value_a(16); - simd_type value_b(20); - - auto condition_result = condition_op(mask_type(false), value_a, value_b); - checker.truth(all_of(condition_result == value_b)); - condition_result = condition_op(mask_type(true), value_a, value_b); - checker.truth(all_of(condition_result == value_a)); + if constexpr (is_type_v>) { + using simd_type = typename Kokkos::Experimental::simd; + using mask_type = typename simd_type::mask_type; + kokkos_checker checker; + + auto condition_op = [](mask_type const& mask, simd_type const& a, + simd_type const& b) { + return Kokkos::Experimental::condition(mask, a, b); + }; + + simd_type value_a(16); + simd_type value_b(20); + + auto condition_result = condition_op(mask_type(false), value_a, value_b); + checker.truth(all_of(condition_result == value_b)); + condition_result = condition_op(mask_type(true), value_a, value_b); + checker.truth(all_of(condition_result == value_a)); + } } template diff --git a/packages/kokkos/simd/unit_tests/include/TestSIMD_Conversions.hpp b/packages/kokkos/simd/unit_tests/include/TestSIMD_Conversions.hpp index b98871bbab80..20b0729762c4 100644 --- a/packages/kokkos/simd/unit_tests/include/TestSIMD_Conversions.hpp +++ b/packages/kokkos/simd/unit_tests/include/TestSIMD_Conversions.hpp @@ -22,40 +22,42 @@ template inline void host_check_conversions() { - { - auto a = Kokkos::Experimental::simd(1); - auto b = Kokkos::Experimental::simd(a); - EXPECT_TRUE(all_of(b == decltype(b)(1))); - } - { - auto a = Kokkos::Experimental::simd(1); - auto b = Kokkos::Experimental::simd(a); - EXPECT_TRUE(all_of(b == decltype(b)(1))); - } - { - auto a = Kokkos::Experimental::simd(1); - auto b = Kokkos::Experimental::simd(a); - EXPECT_TRUE(all_of(b == decltype(b)(1))); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - EXPECT_TRUE(b == decltype(b)(true)); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - EXPECT_TRUE(b == decltype(b)(true)); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - EXPECT_TRUE(b == decltype(b)(true)); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - EXPECT_TRUE(b == decltype(b)(true)); + if constexpr (is_type_v>) { + { + auto a = Kokkos::Experimental::simd(1); + auto b = Kokkos::Experimental::simd(a); + EXPECT_TRUE(all_of(b == decltype(b)(1))); + } + { + auto a = Kokkos::Experimental::simd(1); + auto b = Kokkos::Experimental::simd(a); + EXPECT_TRUE(all_of(b == decltype(b)(1))); + } + { + auto a = Kokkos::Experimental::simd(1); + auto b = Kokkos::Experimental::simd(a); + EXPECT_TRUE(all_of(b == decltype(b)(1))); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + EXPECT_TRUE(b == decltype(b)(true)); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + EXPECT_TRUE(b == decltype(b)(true)); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + EXPECT_TRUE(b == decltype(b)(true)); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + EXPECT_TRUE(b == decltype(b)(true)); + } } } @@ -67,41 +69,43 @@ inline void host_check_conversions_all_abis( template KOKKOS_INLINE_FUNCTION void device_check_conversions() { - kokkos_checker checker; - { - auto a = Kokkos::Experimental::simd(1); - auto b = Kokkos::Experimental::simd(a); - checker.truth(all_of(b == decltype(b)(1))); - } - { - auto a = Kokkos::Experimental::simd(1); - auto b = Kokkos::Experimental::simd(a); - checker.truth(all_of(b == decltype(b)(1))); - } - { - auto a = Kokkos::Experimental::simd(1); - auto b = Kokkos::Experimental::simd(a); - checker.truth(all_of(b == decltype(b)(1))); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - checker.truth(b == decltype(b)(true)); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - checker.truth(b == decltype(b)(true)); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - checker.truth(b == decltype(b)(true)); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - checker.truth(b == decltype(b)(true)); + if constexpr (is_type_v>) { + kokkos_checker checker; + { + auto a = Kokkos::Experimental::simd(1); + auto b = Kokkos::Experimental::simd(a); + checker.truth(all_of(b == decltype(b)(1))); + } + { + auto a = Kokkos::Experimental::simd(1); + auto b = Kokkos::Experimental::simd(a); + checker.truth(all_of(b == decltype(b)(1))); + } + { + auto a = Kokkos::Experimental::simd(1); + auto b = Kokkos::Experimental::simd(a); + checker.truth(all_of(b == decltype(b)(1))); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + checker.truth(b == decltype(b)(true)); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + checker.truth(b == decltype(b)(true)); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + checker.truth(b == decltype(b)(true)); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + checker.truth(b == decltype(b)(true)); + } } } diff --git a/packages/kokkos/simd/unit_tests/include/TestSIMD_GeneratorCtors.hpp b/packages/kokkos/simd/unit_tests/include/TestSIMD_GeneratorCtors.hpp index 23e3826c752a..1a61fd9cbbb6 100644 --- a/packages/kokkos/simd/unit_tests/include/TestSIMD_GeneratorCtors.hpp +++ b/packages/kokkos/simd/unit_tests/include/TestSIMD_GeneratorCtors.hpp @@ -22,49 +22,51 @@ template inline void host_check_gen_ctor() { - using simd_type = Kokkos::Experimental::simd; - using mask_type = typename simd_type::mask_type; - constexpr std::size_t lanes = simd_type::size(); - - DataType init[lanes]; - DataType expected[lanes]; - mask_type init_mask(false); - - for (std::size_t i = 0; i < lanes; ++i) { - if (i % 3 == 0) init_mask[i] = true; - init[i] = 7; - expected[i] = (init_mask[i]) ? init[i] * 9 : init[i]; - } - - simd_type rhs; - rhs.copy_from(init, Kokkos::Experimental::simd_flag_default); - - simd_type blend; - blend.copy_from(expected, Kokkos::Experimental::simd_flag_default); + if constexpr (is_type_v>) { + using simd_type = Kokkos::Experimental::simd; + using mask_type = typename simd_type::mask_type; + constexpr std::size_t lanes = simd_type::size(); -#if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC)) - if constexpr (std::is_same_v) { - simd_type basic(KOKKOS_LAMBDA(std::size_t i) { return init[i]; }); - host_check_equality(basic, rhs, lanes); + DataType init[lanes]; + DataType expected[lanes]; + mask_type init_mask(false); - simd_type lhs(KOKKOS_LAMBDA(std::size_t i) { return init[i] * 9; }); - mask_type mask(KOKKOS_LAMBDA(std::size_t i) { return init_mask[i]; }); - simd_type result( - KOKKOS_LAMBDA(std::size_t i) { return (mask[i]) ? lhs[i] : rhs[i]; }); + for (std::size_t i = 0; i < lanes; ++i) { + if (i % 3 == 0) init_mask[i] = true; + init[i] = 7; + expected[i] = (init_mask[i]) ? init[i] * 9 : init[i]; + } - host_check_equality(blend, result, lanes); - } else { - simd_type basic([=](std::size_t i) { return init[i]; }); - host_check_equality(basic, rhs, lanes); + simd_type rhs; + rhs.copy_from(init, Kokkos::Experimental::simd_flag_default); - simd_type lhs([=](std::size_t i) { return init[i] * 9; }); - mask_type mask([=](std::size_t i) { return init_mask[i]; }); - simd_type result( - [=](std::size_t i) { return (mask[i]) ? lhs[i] : rhs[i]; }); + simd_type blend; + blend.copy_from(expected, Kokkos::Experimental::simd_flag_default); - host_check_equality(blend, result, lanes); - } +#if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC)) + if constexpr (std::is_same_v) { + simd_type basic(KOKKOS_LAMBDA(std::size_t i) { return init[i]; }); + host_check_equality(basic, rhs, lanes); + + simd_type lhs(KOKKOS_LAMBDA(std::size_t i) { return init[i] * 9; }); + mask_type mask(KOKKOS_LAMBDA(std::size_t i) { return init_mask[i]; }); + simd_type result( + KOKKOS_LAMBDA(std::size_t i) { return (mask[i]) ? lhs[i] : rhs[i]; }); + + host_check_equality(blend, result, lanes); + } else { + simd_type basic([=](std::size_t i) { return init[i]; }); + host_check_equality(basic, rhs, lanes); + + simd_type lhs([=](std::size_t i) { return init[i] * 9; }); + mask_type mask([=](std::size_t i) { return init_mask[i]; }); + simd_type result( + [=](std::size_t i) { return (mask[i]) ? lhs[i] : rhs[i]; }); + + host_check_equality(blend, result, lanes); + } #endif + } } template @@ -82,32 +84,34 @@ inline void host_check_gen_ctors_all_abis( template KOKKOS_INLINE_FUNCTION void device_check_gen_ctor() { - using simd_type = Kokkos::Experimental::simd; - using mask_type = typename simd_type::mask_type; - constexpr std::size_t lanes = simd_type::size(); - - DataType init[lanes]; - DataType expected[lanes]; - mask_type mask(false); - - for (std::size_t i = 0; i < lanes; ++i) { - if (i % 3 == 0) mask[i] = true; - init[i] = 7; - expected[i] = (mask[i]) ? init[i] * 9 : init[i]; - } + if constexpr (is_type_v>) { + using simd_type = Kokkos::Experimental::simd; + using mask_type = typename simd_type::mask_type; + constexpr std::size_t lanes = simd_type::size(); + + DataType init[lanes]; + DataType expected[lanes]; + mask_type mask(false); + + for (std::size_t i = 0; i < lanes; ++i) { + if (i % 3 == 0) mask[i] = true; + init[i] = 7; + expected[i] = (mask[i]) ? init[i] * 9 : init[i]; + } - simd_type basic(KOKKOS_LAMBDA(std::size_t i) { return init[i]; }); - simd_type rhs; - rhs.copy_from(init, Kokkos::Experimental::simd_flag_default); - device_check_equality(basic, rhs, lanes); + simd_type basic(KOKKOS_LAMBDA(std::size_t i) { return init[i]; }); + simd_type rhs; + rhs.copy_from(init, Kokkos::Experimental::simd_flag_default); + device_check_equality(basic, rhs, lanes); - simd_type lhs(KOKKOS_LAMBDA(std::size_t i) { return init[i] * 9; }); - simd_type result( - KOKKOS_LAMBDA(std::size_t i) { return (mask[i]) ? lhs[i] : rhs[i]; }); + simd_type lhs(KOKKOS_LAMBDA(std::size_t i) { return init[i] * 9; }); + simd_type result( + KOKKOS_LAMBDA(std::size_t i) { return (mask[i]) ? lhs[i] : rhs[i]; }); - simd_type blend; - blend.copy_from(expected, Kokkos::Experimental::simd_flag_default); - device_check_equality(result, blend, lanes); + simd_type blend; + blend.copy_from(expected, Kokkos::Experimental::simd_flag_default); + device_check_equality(result, blend, lanes); + } } template diff --git a/packages/kokkos/simd/unit_tests/include/TestSIMD_MaskOps.hpp b/packages/kokkos/simd/unit_tests/include/TestSIMD_MaskOps.hpp index a93c52e9a8d5..c3d4ac594d09 100644 --- a/packages/kokkos/simd/unit_tests/include/TestSIMD_MaskOps.hpp +++ b/packages/kokkos/simd/unit_tests/include/TestSIMD_MaskOps.hpp @@ -22,25 +22,27 @@ template inline void host_check_mask_ops() { - using mask_type = Kokkos::Experimental::simd_mask; - - EXPECT_FALSE(none_of(mask_type(true))); - EXPECT_TRUE(none_of(mask_type(false))); - EXPECT_TRUE(all_of(mask_type(true))); - EXPECT_FALSE(all_of(mask_type(false))); - EXPECT_TRUE(any_of(mask_type(true))); - EXPECT_FALSE(any_of(mask_type(false))); - - for (std::size_t i = 0; i < mask_type::size(); ++i) { - mask_type test_mask(KOKKOS_LAMBDA(std::size_t j) { return i == j; }); - - EXPECT_TRUE(any_of(test_mask)); - EXPECT_FALSE(none_of(test_mask)); - - if constexpr (mask_type::size() > 1) { - EXPECT_FALSE(all_of(test_mask)); - } else { - EXPECT_TRUE(all_of(test_mask)); + if constexpr (is_type_v>) { + using mask_type = Kokkos::Experimental::simd_mask; + + EXPECT_FALSE(none_of(mask_type(true))); + EXPECT_TRUE(none_of(mask_type(false))); + EXPECT_TRUE(all_of(mask_type(true))); + EXPECT_FALSE(all_of(mask_type(false))); + EXPECT_TRUE(any_of(mask_type(true))); + EXPECT_FALSE(any_of(mask_type(false))); + + for (std::size_t i = 0; i < mask_type::size(); ++i) { + mask_type test_mask(KOKKOS_LAMBDA(std::size_t j) { return i == j; }); + + EXPECT_TRUE(any_of(test_mask)); + EXPECT_FALSE(none_of(test_mask)); + + if constexpr (mask_type::size() > 1) { + EXPECT_FALSE(all_of(test_mask)); + } else { + EXPECT_TRUE(all_of(test_mask)); + } } } } @@ -60,25 +62,27 @@ inline void host_check_mask_ops_all_abis( template KOKKOS_INLINE_FUNCTION void device_check_mask_ops() { - using mask_type = Kokkos::Experimental::simd_mask; - kokkos_checker checker; - checker.truth(!none_of(mask_type(true))); - checker.truth(none_of(mask_type(false))); - checker.truth(all_of(mask_type(true))); - checker.truth(!all_of(mask_type(false))); - checker.truth(any_of(mask_type(true))); - checker.truth(!any_of(mask_type(false))); - - for (std::size_t i = 0; i < mask_type::size(); ++i) { - mask_type test_mask(KOKKOS_LAMBDA(std::size_t j) { return i == j; }); - - checker.truth(any_of(test_mask)); - checker.truth(!none_of(test_mask)); - - if constexpr (mask_type::size() > 1) { - checker.truth(!all_of(test_mask)); - } else { - checker.truth(all_of(test_mask)); + if constexpr (is_type_v>) { + using mask_type = Kokkos::Experimental::simd_mask; + kokkos_checker checker; + checker.truth(!none_of(mask_type(true))); + checker.truth(none_of(mask_type(false))); + checker.truth(all_of(mask_type(true))); + checker.truth(!all_of(mask_type(false))); + checker.truth(any_of(mask_type(true))); + checker.truth(!any_of(mask_type(false))); + + for (std::size_t i = 0; i < mask_type::size(); ++i) { + mask_type test_mask(KOKKOS_LAMBDA(std::size_t j) { return i == j; }); + + checker.truth(any_of(test_mask)); + checker.truth(!none_of(test_mask)); + + if constexpr (mask_type::size() > 1) { + checker.truth(!all_of(test_mask)); + } else { + checker.truth(all_of(test_mask)); + } } } } diff --git a/packages/kokkos/simd/unit_tests/include/TestSIMD_MathOps.hpp b/packages/kokkos/simd/unit_tests/include/TestSIMD_MathOps.hpp index 59f2f6c18fdf..4891a54f6c54 100644 --- a/packages/kokkos/simd/unit_tests/include/TestSIMD_MathOps.hpp +++ b/packages/kokkos/simd/unit_tests/include/TestSIMD_MathOps.hpp @@ -121,31 +121,34 @@ inline void host_check_abi_size() { template inline void host_check_math_ops() { - constexpr size_t n = 11; - constexpr size_t alignment = - Kokkos::Experimental::simd::size() * sizeof(DataType); - - host_check_abi_size(); - - if constexpr (!std::is_integral_v) { - alignas(alignment) DataType const first_args[n] = { - 0.1, 0.4, 0.5, 0.7, 1.0, 1.5, -2.0, 10.0, 0.0, 1.2, -2.8}; - alignas(alignment) DataType const second_args[n] = { - 1.0, 0.2, 1.1, 1.8, -0.1, -3.0, -2.4, 1.0, 13.0, -3.2, -2.1}; - host_check_all_math_ops(first_args, second_args); - } else { - if constexpr (std::is_signed_v) { - alignas(alignment) - DataType const first_args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2}; - alignas(alignment) DataType const second_args[n] = {1, 2, 1, 1, 1, -3, - -2, 1, 13, -3, -2}; + if constexpr (is_type_v>) { + constexpr size_t alignment = + Kokkos::Experimental::simd::size() * sizeof(DataType); + + host_check_abi_size(); + + if constexpr (!std::is_integral_v) { + alignas(alignment) DataType const first_args[] = { + 0.1, 0.4, 0.5, 0.7, 1.0, 1.5, -2.0, 10.0, + 0.0, 1.2, -2.8, 3.0, 4.0, -0.1, 5.0, -0.2}; + alignas(alignment) DataType const second_args[] = { + 1.0, 0.2, 1.1, 1.8, -0.1, -3.0, -2.4, 1.0, + 13.0, -3.2, -2.1, 3.0, -15.0, -0.5, -0.2, -0.2}; host_check_all_math_ops(first_args, second_args); } else { - alignas(alignment) - DataType const first_args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2}; - alignas(alignment) - DataType const second_args[n] = {1, 2, 1, 1, 1, 3, 2, 1, 13, 3, 2}; - host_check_all_math_ops(first_args, second_args); + if constexpr (std::is_signed_v) { + alignas(alignment) DataType const first_args[] = { + 1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2, -3, 7, 4, -9, -15}; + alignas(alignment) DataType const second_args[] = { + 1, 2, 1, 1, 1, -3, -2, 1, 13, -3, -2, 10, -15, 7, 2, -10}; + host_check_all_math_ops(first_args, second_args); + } else { + alignas(alignment) DataType const first_args[] = { + 1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2, 11, 5, 8, 2, 14}; + alignas(alignment) DataType const second_args[] = { + 1, 2, 1, 1, 1, 3, 2, 1, 13, 3, 2, 3, 6, 20, 5, 14}; + host_check_all_math_ops(first_args, second_args); + } } } } @@ -253,25 +256,31 @@ KOKKOS_INLINE_FUNCTION void device_check_abi_size() { template KOKKOS_INLINE_FUNCTION void device_check_math_ops() { - constexpr size_t n = 11; - - device_check_abi_size(); - - if constexpr (!std::is_integral_v) { - DataType const first_args[n] = {0.1, 0.4, 0.5, 0.7, 1.0, 1.5, - -2.0, 10.0, 0.0, 1.2, -2.8}; - DataType const second_args[n] = {1.0, 0.2, 1.1, 1.8, -0.1, -3.0, - -2.4, 1.0, 13.0, -3.2, -2.1}; - device_check_all_math_ops(first_args, second_args); - } else { - if constexpr (std::is_signed_v) { - DataType const first_args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2}; - DataType const second_args[n] = {1, 2, 1, 1, 1, -3, -2, 1, 13, -3, -2}; + if constexpr (is_type_v>) { + device_check_abi_size(); + + if constexpr (!std::is_integral_v) { + DataType const first_args[] = {0.1, 0.4, 0.5, 0.7, 1.0, 1.5, + -2.0, 10.0, 0.0, 1.2, -2.8, 3.0, + 4.0, -0.1, 5.0, -0.2}; + DataType const second_args[] = {1.0, 0.2, 1.1, 1.8, -0.1, -3.0, + -2.4, 1.0, 13.0, -3.2, -2.1, 3.0, + -15.0, -0.5, -0.2, -0.2}; device_check_all_math_ops(first_args, second_args); } else { - DataType const first_args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2}; - DataType const second_args[n] = {1, 2, 1, 1, 1, 3, 2, 1, 13, 3, 2}; - device_check_all_math_ops(first_args, second_args); + if constexpr (std::is_signed_v) { + DataType const first_args[] = {1, 2, -1, 10, 0, 1, -2, 10, + 0, 1, -2, -3, 7, 4, -9, -15}; + DataType const second_args[] = {1, 2, 1, 1, 1, -3, -2, 1, + 13, -3, -2, 10, -15, 7, 2, -10}; + device_check_all_math_ops(first_args, second_args); + } else { + DataType const first_args[] = {1, 2, 1, 10, 0, 1, 2, 10, + 0, 1, 2, 11, 5, 8, 2, 14}; + DataType const second_args[] = {1, 2, 1, 1, 1, 3, 2, 1, + 13, 3, 2, 3, 6, 20, 5, 14}; + device_check_all_math_ops(first_args, second_args); + } } } } diff --git a/packages/kokkos/simd/unit_tests/include/TestSIMD_Reductions.hpp b/packages/kokkos/simd/unit_tests/include/TestSIMD_Reductions.hpp index b3c7ac9a01e8..a3e796a0301e 100644 --- a/packages/kokkos/simd/unit_tests/include/TestSIMD_Reductions.hpp +++ b/packages/kokkos/simd/unit_tests/include/TestSIMD_Reductions.hpp @@ -65,14 +65,18 @@ inline void host_check_all_reductions(const DataType (&args)[n]) { template inline void host_check_reductions() { - constexpr size_t n = 11; - - if constexpr (std::is_signed_v) { - DataType const args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2}; - host_check_all_reductions(args); - } else { - DataType const args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2}; - host_check_all_reductions(args); + if constexpr (is_type_v>) { + constexpr size_t n = 16; + + if constexpr (std::is_signed_v) { + DataType const args[n] = {1, 2, -1, 10, 0, 1, -2, 10, + 0, 1, -2, -15, 5, 17, -22, 20}; + host_check_all_reductions(args); + } else { + DataType const args[n] = {1, 2, 1, 10, 0, 1, 2, 10, + 0, 1, 2, 15, 5, 17, 22, 20}; + host_check_all_reductions(args); + } } } @@ -135,14 +139,18 @@ KOKKOS_INLINE_FUNCTION void device_check_all_reductions( template KOKKOS_INLINE_FUNCTION void device_check_reductions() { - constexpr size_t n = 11; - - if constexpr (std::is_signed_v) { - DataType const args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2}; - device_check_all_reductions(args); - } else { - DataType const args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2}; - device_check_all_reductions(args); + if constexpr (is_type_v>) { + constexpr size_t n = 16; + + if constexpr (std::is_signed_v) { + DataType const args[n] = {1, 2, -1, 10, 0, 1, -2, 10, + 0, 1, -2, -15, 5, 17, -22, 20}; + device_check_all_reductions(args); + } else { + DataType const args[n] = {1, 2, 1, 10, 0, 1, 2, 10, + 0, 1, 2, 15, 5, 17, 22, 20}; + device_check_all_reductions(args); + } } } diff --git a/packages/kokkos/simd/unit_tests/include/TestSIMD_ShiftOps.hpp b/packages/kokkos/simd/unit_tests/include/TestSIMD_ShiftOps.hpp index ffdd2cba4a0e..7329f085018c 100644 --- a/packages/kokkos/simd/unit_tests/include/TestSIMD_ShiftOps.hpp +++ b/packages/kokkos/simd/unit_tests/include/TestSIMD_ShiftOps.hpp @@ -103,34 +103,35 @@ inline void host_check_shift_op_all_loaders(ShiftOp shift_op, template inline void host_check_shift_ops() { - if constexpr (std::is_integral_v) { - using simd_type = Kokkos::Experimental::simd; - constexpr std::size_t width = simd_type::size(); - constexpr std::size_t num_cases = 8; - constexpr size_t alignment = - Kokkos::Experimental::simd::size() * sizeof(DataType); - - DataType max = std::numeric_limits::max(); - - alignas(alignment) DataType shift_by[num_cases] = { - 0, 1, 3, width / 2, width / 2 + 1, width - 1, width, width + 1}; - alignas(alignment) DataType test_vals[width]; - for (std::size_t i = 0; i < width; ++i) { - DataType inc = max / width; - test_vals[i] = i * inc + 1; - } - - host_check_shift_op_all_loaders(shift_right(), test_vals, shift_by, - num_cases); - host_check_shift_op_all_loaders(shift_left(), test_vals, shift_by, - num_cases); + if constexpr (is_type_v>) { + if constexpr (std::is_integral_v) { + using simd_type = Kokkos::Experimental::simd; + constexpr std::size_t width = simd_type::size(); + constexpr std::size_t num_cases = 16; + constexpr size_t alignment = + Kokkos::Experimental::simd::size() * sizeof(DataType); + + DataType max = std::numeric_limits::max(); + + alignas(alignment) DataType shift_by[num_cases] = { + 0, 1, 3, width / 2, width / 2 + 1, width - 1, width, width + 1, + 0, 1, 3, width / 2, width / 2 + 1, width - 1, width, width + 1}; + alignas(alignment) DataType test_vals[width]; + for (std::size_t i = 0; i < width; ++i) { + DataType inc = max / width; + test_vals[i] = i * inc + 1; + } - if constexpr (std::is_signed_v) { - for (std::size_t i = 0; i < width; ++i) test_vals[i] *= -1; host_check_shift_op_all_loaders(shift_right(), test_vals, shift_by, num_cases); host_check_shift_op_all_loaders(shift_left(), test_vals, shift_by, num_cases); + + if constexpr (std::is_signed_v) { + for (std::size_t i = 0; i < width; ++i) test_vals[i] *= -1; + host_check_shift_op_all_loaders(shift_right(), test_vals, shift_by, + num_cases); + } } } } @@ -224,33 +225,34 @@ KOKKOS_INLINE_FUNCTION void device_check_shift_op_all_loaders( template KOKKOS_INLINE_FUNCTION void device_check_shift_ops() { - if constexpr (std::is_integral_v) { - using simd_type = Kokkos::Experimental::simd; - constexpr std::size_t width = simd_type::size(); - constexpr std::size_t num_cases = 8; + if constexpr (is_type_v>) { + if constexpr (std::is_integral_v) { + using simd_type = Kokkos::Experimental::simd; + constexpr std::size_t width = simd_type::size(); + constexpr std::size_t num_cases = 16; - DataType max = Kokkos::reduction_identity::max(); + DataType max = Kokkos::reduction_identity::max(); - DataType shift_by[num_cases] = { - 0, 1, 3, width / 2, width / 2 + 1, width - 1, width, width + 1}; - DataType test_vals[width]; + DataType shift_by[num_cases] = { + 0, 1, 3, width / 2, width / 2 + 1, width - 1, width, width + 1, + 0, 1, 3, width / 2, width / 2 + 1, width - 1, width, width + 1}; + DataType test_vals[width]; - for (std::size_t i = 0; i < width; ++i) { - DataType inc = max / width; - test_vals[i] = i * inc + 1; - } + for (std::size_t i = 0; i < width; ++i) { + DataType inc = max / width; + test_vals[i] = i * inc + 1; + } - device_check_shift_op_all_loaders(shift_right(), test_vals, shift_by, - num_cases); - device_check_shift_op_all_loaders(shift_left(), test_vals, shift_by, - num_cases); - - if constexpr (std::is_signed_v) { - for (std::size_t i = 0; i < width; ++i) test_vals[i] *= -1; device_check_shift_op_all_loaders(shift_right(), test_vals, shift_by, num_cases); device_check_shift_op_all_loaders(shift_left(), test_vals, shift_by, num_cases); + + if constexpr (std::is_signed_v) { + for (std::size_t i = 0; i < width; ++i) test_vals[i] *= -1; + device_check_shift_op_all_loaders(shift_right(), test_vals, + shift_by, num_cases); + } } } } diff --git a/packages/kokkos/simd/unit_tests/include/TestSIMD_WhereExpressions.hpp b/packages/kokkos/simd/unit_tests/include/TestSIMD_WhereExpressions.hpp index 152fd9e9840d..904b2c665e56 100644 --- a/packages/kokkos/simd/unit_tests/include/TestSIMD_WhereExpressions.hpp +++ b/packages/kokkos/simd/unit_tests/include/TestSIMD_WhereExpressions.hpp @@ -22,60 +22,66 @@ template inline void host_check_where_expr_scatter_to() { - using simd_type = Kokkos::Experimental::simd; - using index_type = Kokkos::Experimental::simd; - using mask_type = typename simd_type::mask_type; - - std::size_t nlanes = simd_type::size(); - DataType init[] = {11, 13, 17, 19, 23, 29, 31, 37}; - simd_type src; - src.copy_from(init, Kokkos::Experimental::simd_flag_default); - - for (std::size_t idx = 0; idx < nlanes; ++idx) { - mask_type mask(true); - mask[idx] = false; - - DataType dst[8] = {0}; - index_type index; - simd_type expected_result; - for (std::size_t i = 0; i < nlanes; ++i) { - dst[i] = (2 + (i * 2)); - index[i] = i; - expected_result[i] = (mask[i]) ? src[index[i]] : dst[i]; + if constexpr (is_type_v>) { + using simd_type = Kokkos::Experimental::simd; + using index_type = Kokkos::Experimental::simd; + using mask_type = typename simd_type::mask_type; + + std::size_t nlanes = simd_type::size(); + DataType init[] = {11, 13, 17, 19, 23, 29, 31, 37, + 53, 71, 79, 83, 89, 93, 97, 103}; + simd_type src; + src.copy_from(init, Kokkos::Experimental::simd_flag_default); + + for (std::size_t idx = 0; idx < nlanes; ++idx) { + mask_type mask(true); + mask[idx] = false; + + DataType dst[simd_type::size()] = {0}; + index_type index; + simd_type expected_result; + for (std::size_t i = 0; i < nlanes; ++i) { + dst[i] = (2 + (i * 2)); + index[i] = i; + expected_result[i] = (mask[i]) ? src[index[i]] : dst[i]; + } + where(mask, src).scatter_to(dst, index); + + simd_type dst_simd; + dst_simd.copy_from(dst, Kokkos::Experimental::simd_flag_default); + + host_check_equality(expected_result, dst_simd, nlanes); } - where(mask, src).scatter_to(dst, index); - - simd_type dst_simd; - dst_simd.copy_from(dst, Kokkos::Experimental::simd_flag_default); - - host_check_equality(expected_result, dst_simd, nlanes); } } template inline void host_check_where_expr_gather_from() { - using simd_type = Kokkos::Experimental::simd; - using index_type = Kokkos::Experimental::simd; - using mask_type = typename simd_type::mask_type; - - std::size_t nlanes = simd_type::size(); - DataType src[] = {11, 13, 17, 19, 23, 29, 31, 37}; - - for (std::size_t idx = 0; idx < nlanes; ++idx) { - mask_type mask(true); - mask[idx] = false; - - simd_type dst; - index_type index; - simd_type expected_result; - for (std::size_t i = 0; i < nlanes; ++i) { - dst[i] = (2 + (i * 2)); - index[i] = i; - expected_result[i] = (mask[i]) ? src[index[i]] : dst[i]; + if constexpr (is_type_v>) { + using simd_type = Kokkos::Experimental::simd; + using index_type = Kokkos::Experimental::simd; + using mask_type = typename simd_type::mask_type; + + std::size_t nlanes = simd_type::size(); + DataType src[] = {11, 13, 17, 19, 23, 29, 31, 37, + 53, 71, 79, 83, 89, 93, 97, 103}; + + for (std::size_t idx = 0; idx < nlanes; ++idx) { + mask_type mask(true); + mask[idx] = false; + + simd_type dst; + index_type index; + simd_type expected_result; + for (std::size_t i = 0; i < nlanes; ++i) { + dst[i] = (2 + (i * 2)); + index[i] = i; + expected_result[i] = (mask[i]) ? src[index[i]] : dst[i]; + } + where(mask, dst).gather_from(src, index); + + host_check_equality(expected_result, dst, nlanes); } - where(mask, dst).gather_from(src, index); - - host_check_equality(expected_result, dst, nlanes); } } @@ -100,33 +106,36 @@ inline void host_check_where_expr_all_abis( template KOKKOS_INLINE_FUNCTION void device_check_where_expr_scatter_to() { - using simd_type = Kokkos::Experimental::simd; - using index_type = Kokkos::Experimental::simd; - using mask_type = typename simd_type::mask_type; - - std::size_t nlanes = simd_type::size(); - DataType init[] = {11, 13, 17, 19, 23, 29, 31, 37}; - simd_type src; - src.copy_from(init, Kokkos::Experimental::simd_flag_default); - - for (std::size_t idx = 0; idx < nlanes; ++idx) { - mask_type mask(true); - mask[idx] = false; - - DataType dst[8] = {0}; - index_type index; - simd_type expected_result; - for (std::size_t i = 0; i < nlanes; ++i) { - dst[i] = (2 + (i * 2)); - index[i] = i; - expected_result[i] = (mask[i]) ? src[index[i]] : dst[i]; + if constexpr (is_type_v>) { + using simd_type = Kokkos::Experimental::simd; + using index_type = Kokkos::Experimental::simd; + using mask_type = typename simd_type::mask_type; + + std::size_t nlanes = simd_type::size(); + DataType init[] = {11, 13, 17, 19, 23, 29, 31, 37, + 53, 71, 79, 83, 89, 93, 97, 103}; + simd_type src; + src.copy_from(init, Kokkos::Experimental::simd_flag_default); + + for (std::size_t idx = 0; idx < nlanes; ++idx) { + mask_type mask(true); + mask[idx] = false; + + DataType dst[simd_type::size()] = {0}; + index_type index; + simd_type expected_result; + for (std::size_t i = 0; i < nlanes; ++i) { + dst[i] = (2 + (i * 2)); + index[i] = i; + expected_result[i] = (mask[i]) ? src[index[i]] : dst[i]; + } + where(mask, src).scatter_to(dst, index); + + simd_type dst_simd; + dst_simd.copy_from(dst, Kokkos::Experimental::simd_flag_default); + + device_check_equality(expected_result, dst_simd, nlanes); } - where(mask, src).scatter_to(dst, index); - - simd_type dst_simd; - dst_simd.copy_from(dst, Kokkos::Experimental::simd_flag_default); - - device_check_equality(expected_result, dst_simd, nlanes); } } @@ -137,7 +146,8 @@ KOKKOS_INLINE_FUNCTION void device_check_where_expr_gather_from() { using mask_type = typename simd_type::mask_type; std::size_t nlanes = simd_type::size(); - DataType src[] = {11, 13, 17, 19, 23, 29, 31, 37}; + DataType src[] = {11, 13, 17, 19, 23, 29, 31, 37, + 53, 71, 79, 83, 89, 93, 97, 103}; for (std::size_t idx = 0; idx < nlanes; ++idx) { mask_type mask(true); diff --git a/packages/kokkos/tpls/desul/include/desul/atomics/Adapt_HIP.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Adapt_HIP.hpp new file mode 100644 index 000000000000..0eab27fe989f --- /dev/null +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Adapt_HIP.hpp @@ -0,0 +1,77 @@ +/* +Copyright (c) 2019, Lawrence Livermore National Security, LLC +and DESUL project contributors. See the COPYRIGHT file for details. +Source: https://github.com/desul/desul + +SPDX-License-Identifier: (BSD-3-Clause) +*/ + +#ifndef DESUL_ATOMICS_ADAPT_HIP_HPP_ +#define DESUL_ATOMICS_ADAPT_HIP_HPP_ + +#include + +namespace desul { +namespace Impl { + +// FIXME same code as GCCMemoryOrder +template +struct HIPMemoryOrder; + +template <> +struct HIPMemoryOrder { + static constexpr int value = __ATOMIC_RELAXED; +}; + +template <> +struct HIPMemoryOrder { + static constexpr int value = __ATOMIC_ACQUIRE; +}; + +template <> +struct HIPMemoryOrder { + static constexpr int value = __ATOMIC_RELEASE; +}; + +template <> +struct HIPMemoryOrder { + static constexpr int value = __ATOMIC_ACQ_REL; +}; + +template <> +struct HIPMemoryOrder { + static constexpr int value = __ATOMIC_SEQ_CST; +}; + +// __HIP_MEMORY_SCOPE_SYSTEM +// __HIP_MEMORY_SCOPE_AGENT +// __HIP_MEMORY_SCOPE_WORKGROUP +// __HIP_MEMORY_SCOPE_WAVEFRONT +// __HIP_MEMORY_SCOPE_SINGLETHREAD +template +struct HIPMemoryScope; + +template <> +struct HIPMemoryScope { + static constexpr int value = __HIP_MEMORY_SCOPE_WORKGROUP; +}; + +template <> +struct HIPMemoryScope { + static constexpr int value = __HIP_MEMORY_SCOPE_AGENT; +}; + +template <> +struct HIPMemoryScope { + static constexpr int value = __HIP_MEMORY_SCOPE_SYSTEM; +}; + +template <> +struct HIPMemoryScope { + static constexpr int value = __HIP_MEMORY_SCOPE_SYSTEM; +}; + +} // namespace Impl +} // namespace desul + +#endif diff --git a/packages/kokkos/tpls/desul/include/desul/atomics/Atomic_Ref.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Atomic_Ref.hpp index 3d69dcf6c50c..e7f9239e03da 100644 --- a/packages/kokkos/tpls/desul/include/desul/atomics/Atomic_Ref.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Atomic_Ref.hpp @@ -6,533 +6,95 @@ Source: https://github.com/desul/desul SPDX-License-Identifier: (BSD-3-Clause) */ -#ifndef DESUL_ATOMIC_REF_IMPL_HPP_ -#define DESUL_ATOMIC_REF_IMPL_HPP_ +#ifndef DESUL_ATOMIC_REF_HPP_ +#define DESUL_ATOMIC_REF_HPP_ -#include #include #include #include -#include -#include namespace desul { -namespace Impl { -// TODO current implementation is missing the following: -// * member functions -// * wait -// * notify_one -// * notify_all - -template {}, - bool = std::is_floating_point{}> -struct basic_atomic_ref; - -// base class for non-integral, non-floating-point, non-pointer types -template -struct basic_atomic_ref { - static_assert(std::is_trivially_copyable{}, ""); - - private: - T* _ptr; - - // 1/2/4/8/16-byte types must be aligned to at least their size - static constexpr int _min_alignment = (sizeof(T) & (sizeof(T) - 1)) || sizeof(T) > 16 - ? 0 - : sizeof(T); - - public: - using value_type = T; - - static constexpr bool is_always_lock_free = atomic_always_lock_free(sizeof(T)); - - static constexpr std::size_t required_alignment = _min_alignment > alignof(T) - ? _min_alignment - : alignof(T); - - basic_atomic_ref() = delete; - basic_atomic_ref& operator=(basic_atomic_ref const&) = delete; - - basic_atomic_ref(basic_atomic_ref const&) = default; - - explicit basic_atomic_ref(T& obj) : _ptr(std::addressof(obj)) {} - - T operator=(T desired) const noexcept { - this->store(desired); - return desired; - } - - operator T() const noexcept { return this->load(); } - - template - DESUL_FUNCTION void store(T desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - atomic_store(_ptr, desired, order, MemoryScope()); - } - - template - DESUL_FUNCTION T load(_MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, order, MemoryScope()); - } - - template - DESUL_FUNCTION T exchange(T desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, desired, order, MemoryScope()); - } - - DESUL_FUNCTION bool is_lock_free() const noexcept { - return atomic_is_lock_free(); - } - - template - DESUL_FUNCTION bool compare_exchange_weak(T& expected, - T desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_weak( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_weak( - T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_weak(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T& expected, - T desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_strong( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_strong(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } -}; - -// base class for atomic_ref -template -struct basic_atomic_ref { - static_assert(std::is_integral{}, ""); - - private: - T* _ptr; - - public: - using value_type = T; - using difference_type = value_type; - - static constexpr bool is_always_lock_free = atomic_always_lock_free(sizeof(T)); - - static constexpr std::size_t required_alignment = sizeof(T) > alignof(T) ? sizeof(T) - : alignof(T); - - basic_atomic_ref() = delete; - basic_atomic_ref& operator=(basic_atomic_ref const&) = delete; - - explicit basic_atomic_ref(T& obj) : _ptr(&obj) {} - - basic_atomic_ref(basic_atomic_ref const&) = default; - - T operator=(T desired) const noexcept { - this->store(desired); - return desired; - } - - operator T() const noexcept { return this->load(); } - - template - DESUL_FUNCTION void store(T desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - atomic_store(_ptr, desired, order, MemoryScope()); - } - - template - DESUL_FUNCTION T load(_MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, order, MemoryScope()); - } - - template - DESUL_FUNCTION T exchange(T desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, desired, order, MemoryScope()); - } - - DESUL_FUNCTION bool is_lock_free() const noexcept { - return atomic_is_lock_free(); - } - - template - DESUL_FUNCTION bool compare_exchange_weak(T& expected, - T desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_weak( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_weak( - T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_weak(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T& expected, - T desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_strong( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_strong(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_add(value_type arg, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_add(_ptr, arg, order, MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_sub(value_type arg, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_sub(_ptr, arg, order, MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_and(value_type arg, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_and(_ptr, arg, order, MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_or(value_type arg, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_or(_ptr, arg, order, MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_xor(value_type arg, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_xor(_ptr, arg, order, MemoryScope()); - } - - DESUL_FUNCTION value_type operator++() const noexcept { - return atomic_add_fetch(_ptr, value_type(1), MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator++(int) const noexcept { return fetch_add(1); } - - DESUL_FUNCTION value_type operator--() const noexcept { - return atomic_sub_fetch(_ptr, value_type(1), MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator--(int) const noexcept { return fetch_sub(1); } - - DESUL_FUNCTION value_type operator+=(value_type arg) const noexcept { - atomic_add_fetch(_ptr, arg, MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator-=(value_type arg) const noexcept { - atomic_sub_fetch(_ptr, arg, MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator&=(value_type arg) const noexcept { - atomic_and_fetch(_ptr, arg, MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator|=(value_type arg) const noexcept { - atomic_or_fetch(_ptr, arg, MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator^=(value_type arg) const noexcept { - atomic_xor_fetch(_ptr, arg, MemoryOrder(), MemoryScope()); - } -}; - -// base class for atomic_ref template -struct basic_atomic_ref { - static_assert(std::is_floating_point{}, ""); - - private: - T* _ptr; +class AtomicRef { + T* ptr_; public: using value_type = T; - using difference_type = value_type; - - static constexpr bool is_always_lock_free = atomic_always_lock_free(sizeof(T)); - - static constexpr std::size_t required_alignment = alignof(T); - - basic_atomic_ref() = delete; - basic_atomic_ref& operator=(basic_atomic_ref const&) = delete; - - explicit basic_atomic_ref(T& obj) : _ptr(&obj) {} - - basic_atomic_ref(basic_atomic_ref const&) = default; - - T operator=(T desired) const noexcept { - this->store(desired); - return desired; - } - - operator T() const noexcept { return this->load(); } - - template - DESUL_FUNCTION void store(T desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - atomic_store(_ptr, desired, order, MemoryScope()); - } - - template - DESUL_FUNCTION T load(_MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, order, MemoryScope()); - } - - template - DESUL_FUNCTION T exchange(T desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, desired, order, MemoryScope()); - } - - DESUL_FUNCTION bool is_lock_free() const noexcept { - return atomic_is_lock_free(); - } - - template - DESUL_FUNCTION bool compare_exchange_weak(T& expected, - T desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_weak( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_weak( - T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_weak(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T& expected, - T desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_strong( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_strong(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_add(value_type arg, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_add(_ptr, arg, order, MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_sub(value_type arg, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_sub(_ptr, arg, order, MemoryScope()); - } - - DESUL_FUNCTION value_type operator+=(value_type arg) const noexcept { - atomic_add_fetch(_ptr, arg, MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator-=(value_type arg) const noexcept { - atomic_sub_fetch(_ptr, arg, MemoryOrder(), MemoryScope()); - } -}; - -// base class for atomic_ref -template -struct basic_atomic_ref { - private: - T** _ptr; + using memory_order = MemoryOrder; + using memory_scope = MemoryScope; - public: - using value_type = T*; - using difference_type = std::ptrdiff_t; - - static constexpr bool is_always_lock_free = atomic_always_lock_free(sizeof(T)); - - static constexpr std::size_t required_alignment = alignof(T*); - - basic_atomic_ref() = delete; - basic_atomic_ref& operator=(basic_atomic_ref const&) = delete; - - explicit basic_atomic_ref(T*& arg) : _ptr(std::addressof(arg)) {} + DESUL_FUNCTION explicit AtomicRef(T& obj) : ptr_(&obj) {} - basic_atomic_ref(basic_atomic_ref const&) = default; - - T* operator=(T* desired) const noexcept { - this->store(desired); + DESUL_FUNCTION T operator=(T desired) const noexcept { + store(desired); return desired; } - operator T*() const noexcept { return this->load(); } - - template - DESUL_FUNCTION void store(T* desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - atomic_store(_ptr, desired, order, MemoryScope()); - } - - template - DESUL_FUNCTION T* load(_MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, order, MemoryScope()); - } - - template - DESUL_FUNCTION T* exchange(T* desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, desired, order, MemoryScope()); - } - - DESUL_FUNCTION bool is_lock_free() const noexcept { - return atomic_is_lock_free(); - } - - template - DESUL_FUNCTION bool compare_exchange_weak(T*& expected, - T* desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_weak( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_weak( - T*& expected, T* desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_weak(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T*& expected, - T* desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_strong( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T*& expected, T* desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_strong(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } + DESUL_FUNCTION operator T() const noexcept { return load(); } - template - DESUL_FUNCTION value_type - fetch_add(difference_type d, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_add(_ptr, _type_size(d), order, MemoryScope()); + DESUL_FUNCTION T load() const noexcept { + return desul::atomic_load(ptr_, MemoryOrder(), MemoryScope()); } - template - DESUL_FUNCTION value_type - fetch_sub(difference_type d, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_sub(_ptr, _type_size(d), order, MemoryScope()); + DESUL_FUNCTION void store(T desired) const noexcept { + return desul::atomic_store(ptr_, desired, MemoryOrder(), MemoryScope()); } - DESUL_FUNCTION value_type operator++() const noexcept { - return atomic_add_fetch(_ptr, _type_size(1), MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator++(int) const noexcept { return fetch_add(1); } - - DESUL_FUNCTION value_type operator--() const noexcept { - return atomic_sub_fetch(_ptr, _type_size(1), MemoryOrder(), MemoryScope()); + DESUL_FUNCTION T exchange(T desired) const noexcept { + return desul::atomic_exchange(ptr_, desired, MemoryOrder(), MemoryScope()); } - DESUL_FUNCTION value_type operator--(int) const noexcept { return fetch_sub(1); } + // TODO compare_exchange_{weak,strong} and is_lock_free - DESUL_FUNCTION value_type operator+=(difference_type d) const noexcept { - atomic_add_fetch(_ptr, _type_size(d), MemoryOrder(), MemoryScope()); +#define DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(FETCH_OP, OP_FETCH) \ + DESUL_FUNCTION T FETCH_OP(T arg) const noexcept { \ + return desul::atomic_##FETCH_OP(ptr_, arg, MemoryOrder(), MemoryScope()); \ + } \ + DESUL_FUNCTION T OP_FETCH(T arg) const noexcept { \ + return desul::atomic_##OP_FETCH(ptr_, arg, MemoryOrder(), MemoryScope()); \ } - DESUL_FUNCTION value_type operator-=(difference_type d) const noexcept { - atomic_sub_fetch(_ptr, _type_size(d), MemoryOrder(), MemoryScope()); - } +#define DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(COMPD_ASGMT, OP_FETCH) \ + DESUL_FUNCTION T operator COMPD_ASGMT(T arg) const noexcept { return OP_FETCH(arg); } - private: - static constexpr std::ptrdiff_t _type_size(std::ptrdiff_t d) noexcept { - static_assert(std::is_object{}, ""); - return d * sizeof(T); - } -}; + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_add, add_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(+=, add_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_sub, sub_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(-=, sub_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_min, min_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_max, max_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_mul, mul_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(*=, mul_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_div, div_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(/=, div_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_mod, mod_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(%=, mod_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_and, and_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(&=, and_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_or, or_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(|=, or_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_xor, xor_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(^=, xor_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_nand, nand_fetch) -} // namespace Impl - -template -struct scoped_atomic_ref : Impl::basic_atomic_ref { - explicit scoped_atomic_ref(T& obj) noexcept - : Impl::basic_atomic_ref(obj) {} +#undef DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP +#undef DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP - scoped_atomic_ref& operator=(scoped_atomic_ref const&) = delete; +#define DESUL_IMPL_DEFINE_ATOMIC_INCREMENT_DECREMENT(OPER, NAME) \ + DESUL_FUNCTION T fetch_##NAME() const noexcept { \ + return desul::atomic_fetch_##NAME(ptr_, MemoryOrder(), MemoryScope()); \ + } \ + DESUL_FUNCTION T NAME##_fetch() const noexcept { \ + return desul::atomic_##NAME##_fetch(ptr_, MemoryOrder(), MemoryScope()); \ + } \ + DESUL_FUNCTION T operator OPER() const noexcept { return NAME##_fetch(); } \ + DESUL_FUNCTION T operator OPER(int) const noexcept { return fetch_##NAME(); } - scoped_atomic_ref(scoped_atomic_ref const&) = default; + DESUL_IMPL_DEFINE_ATOMIC_INCREMENT_DECREMENT(++, inc) + DESUL_IMPL_DEFINE_ATOMIC_INCREMENT_DECREMENT(--, dec) - using Impl::basic_atomic_ref::operator=; +#undef DESUL_IMPL_DEFINE_ATOMIC_INCREMENT_DECREMENT }; } // namespace desul diff --git a/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_HIP.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_HIP.hpp index 8c909bacdf41..0ade34f25dfe 100644 --- a/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_HIP.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_HIP.hpp @@ -9,6 +9,7 @@ SPDX-License-Identifier: (BSD-3-Clause) #ifndef DESUL_ATOMICS_COMPARE_EXCHANGE_HIP_HPP_ #define DESUL_ATOMICS_COMPARE_EXCHANGE_HIP_HPP_ +#include #include #include #include @@ -17,130 +18,40 @@ SPDX-License-Identifier: (BSD-3-Clause) namespace desul { namespace Impl { -template -__device__ std::enable_if_t device_atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrderRelaxed, MemoryScope) { - static_assert(sizeof(unsigned int) == 4, - "this function assumes an unsigned int is 32-bit"); - unsigned int return_val = atomicCAS(reinterpret_cast(dest), - reinterpret_cast(compare), - reinterpret_cast(value)); - return reinterpret_cast(return_val); -} -template -__device__ std::enable_if_t device_atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrderRelaxed, MemoryScope) { - static_assert(sizeof(unsigned long long int) == 8, - "this function assumes an unsigned long long is 64-bit"); - unsigned long long int return_val = - atomicCAS(reinterpret_cast(dest), - reinterpret_cast(compare), - reinterpret_cast(value)); - return reinterpret_cast(return_val); -} +template +struct atomic_exchange_available_hip { + constexpr static bool value = + ((sizeof(T) == 1 && alignof(T) == 1) || (sizeof(T) == 4 && alignof(T) == 4) || + (sizeof(T) == 8 && alignof(T) == 8)) && + std::is_trivially_copyable::value; +}; -template -__device__ std::enable_if_t -device_atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrderRelease, MemoryScope) { - T return_val = atomic_compare_exchange( - dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); - atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); - return return_val; -} - -template -__device__ std::enable_if_t -device_atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrderAcquire, MemoryScope) { - atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); - T return_val = atomic_compare_exchange( - dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); - return return_val; -} - -template -__device__ std::enable_if_t +template +__device__ std::enable_if_t::value, T> device_atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrderAcqRel, MemoryScope) { - atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); - T return_val = atomic_compare_exchange( - dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); - atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); - return return_val; + T* const dest, T compare, T value, MemoryOrder, MemoryScope) { + (void)__hip_atomic_compare_exchange_strong( + dest, + &compare, + value, + HIPMemoryOrder::value, + HIPMemoryOrder>::value, + HIPMemoryScope::value); + return compare; } -template -__device__ std::enable_if_t device_atomic_exchange( - T* const dest, T value, MemoryOrderRelaxed, MemoryScope) { - static_assert(sizeof(unsigned int) == 4, - "this function assumes an unsigned int is 32-bit"); - unsigned int return_val = atomicExch(reinterpret_cast(dest), - reinterpret_cast(value)); - return reinterpret_cast(return_val); -} -template -__device__ std::enable_if_t device_atomic_exchange( - T* const dest, T value, MemoryOrderRelaxed, MemoryScope) { - static_assert(sizeof(unsigned long long int) == 8, - "this function assumes an unsigned long long is 64-bit"); - unsigned long long int return_val = - atomicExch(reinterpret_cast(dest), - reinterpret_cast(value)); - return reinterpret_cast(return_val); -} - -template -__device__ std::enable_if_t device_atomic_exchange( - T* const dest, T compare, T value, MemoryOrderRelease, MemoryScope) { - T return_val = device_atomic_compare_exchange( - dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); - device_atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); - return reinterpret_cast(return_val); -} - -template -__device__ std::enable_if_t device_atomic_exchange( - T* const dest, T /*compare*/, T value, MemoryOrderAcquire, MemoryScope) { - device_atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); - T return_val = - device_atomic_exchange(dest, value, MemoryOrderRelaxed(), MemoryScope()); - return reinterpret_cast(return_val); -} - -template -__device__ std::enable_if_t device_atomic_exchange( - T* const dest, T value, MemoryOrderAcqRel, MemoryScope) { - device_atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); - T return_val = - device_atomic_exchange(dest, value, MemoryOrderRelaxed(), MemoryScope()); - device_atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); - return reinterpret_cast(return_val); -} - -template -__device__ std::enable_if_t device_atomic_exchange( - T* const dest, T value, MemoryOrderSeqCst, MemoryScope) { - device_atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); - T return_val = - device_atomic_exchange(dest, value, MemoryOrderRelaxed(), MemoryScope()); - device_atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); - return reinterpret_cast(return_val); -} - -template -__device__ std::enable_if_t -device_atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrderSeqCst, MemoryScope) { - device_atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); - T return_val = device_atomic_compare_exchange( - dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); - device_atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); +template +__device__ std::enable_if_t::value, T> +device_atomic_exchange(T* const dest, T value, MemoryOrder, MemoryScope) { + T return_val = __hip_atomic_exchange(dest, + value, + HIPMemoryOrder::value, + HIPMemoryScope::value); return return_val; } template -__device__ std::enable_if_t<(sizeof(T) != 8) && (sizeof(T) != 4), T> +__device__ std::enable_if_t::value, T> device_atomic_compare_exchange( T* const dest, T compare, T value, MemoryOrder, MemoryScope scope) { // This is a way to avoid deadlock in a warp or wave front @@ -169,7 +80,7 @@ device_atomic_compare_exchange( } template -__device__ std::enable_if_t<(sizeof(T) != 8) && (sizeof(T) != 4), T> +__device__ std::enable_if_t::value, T> device_atomic_exchange(T* const dest, T value, MemoryOrder, MemoryScope scope) { // This is a way to avoid deadlock in a warp or wave front T return_val; diff --git a/packages/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_CUDA.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_CUDA.hpp index 69ed8bcb9fd8..68622758d8e0 100644 --- a/packages/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_CUDA.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_CUDA.hpp @@ -69,56 +69,56 @@ inline __device__ unsigned int device_atomic_fetch_inc_mod( unsigned int* inline __device__ unsigned int device_atomic_fetch_dec_mod( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicDec(ptr, val); } // clang-format on -#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(OP, TYPE) \ +#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(FETCH_OP, TYPE) \ template \ - __device__ TYPE device_atomic_fetch_##OP( \ + __device__ TYPE device_atomic_##FETCH_OP( \ TYPE* ptr, TYPE val, MemoryOrder, MemoryScopeDevice) { \ __threadfence(); \ TYPE return_val = \ - device_atomic_fetch_##OP(ptr, val, MemoryOrderRelaxed(), MemoryScopeDevice()); \ + device_atomic_##FETCH_OP(ptr, val, MemoryOrderRelaxed(), MemoryScopeDevice()); \ __threadfence(); \ return return_val; \ } \ template \ - __device__ TYPE device_atomic_fetch_##OP( \ + __device__ TYPE device_atomic_##FETCH_OP( \ TYPE* ptr, TYPE val, MemoryOrder, MemoryScopeCore) { \ - return device_atomic_fetch_##OP(ptr, val, MemoryOrder(), MemoryScopeDevice()); \ + return device_atomic_##FETCH_OP(ptr, val, MemoryOrder(), MemoryScopeDevice()); \ } -#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(OP) \ - DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(OP, int) \ - DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(OP, unsigned int) \ - DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(OP, unsigned long long) +#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(FETCH_OP) \ + DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(FETCH_OP, int) \ + DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(FETCH_OP, unsigned int) \ + DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(FETCH_OP, unsigned long long) #ifdef DESUL_CUDA_ARCH_IS_PRE_PASCAL -#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(OP) \ - DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(OP, float) +#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(FETCH_OP) \ + DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(FETCH_OP, float) #else -#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(OP) \ - DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(OP, float) \ - DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(OP, double) +#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(FETCH_OP) \ + DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(FETCH_OP, float) \ + DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(FETCH_OP, double) #endif -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(min) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(max) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(and) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(or) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(xor) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_min) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_max) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_and) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_or) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_xor) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(add) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(add) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(sub) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(sub) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(fetch_add) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_add) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(fetch_sub) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_sub) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(inc) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(dec) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_inc) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_dec) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(inc_mod, unsigned int) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(dec_mod, unsigned int) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(fetch_inc_mod, unsigned int) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(fetch_dec_mod, unsigned int) #undef DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT #undef DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL diff --git a/packages/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_Generic.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_Generic.hpp index a94ff8ef1875..530195a83271 100644 --- a/packages/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_Generic.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_Generic.hpp @@ -18,38 +18,38 @@ SPDX-License-Identifier: (BSD-3-Clause) namespace desul { namespace Impl { -#define DESUL_IMPL_ATOMIC_FETCH_OP(ANNOTATION, HOST_OR_DEVICE, OP) \ - template \ - ANNOTATION T HOST_OR_DEVICE##_atomic_fetch_##OP( \ - T* const dest, const T val, MemoryOrder order, MemoryScope scope) { \ - return HOST_OR_DEVICE##_atomic_fetch_oper( \ - OP##_operator(), dest, val, order, scope); \ - } \ - template \ - ANNOTATION T HOST_OR_DEVICE##_atomic_##OP##_fetch( \ - T* const dest, const T val, MemoryOrder order, MemoryScope scope) { \ - return HOST_OR_DEVICE##_atomic_oper_fetch( \ - OP##_operator(), dest, val, order, scope); \ +#define DESUL_IMPL_ATOMIC_FETCH_OP(ANNOTATION, HOST_OR_DEVICE, FETCH_OP, OP_FETCH) \ + template \ + ANNOTATION T HOST_OR_DEVICE##_atomic_##FETCH_OP( \ + T* const dest, const T val, MemoryOrder order, MemoryScope scope) { \ + return HOST_OR_DEVICE##_atomic_fetch_oper( \ + OP_FETCH##_operator(), dest, val, order, scope); \ + } \ + template \ + ANNOTATION T HOST_OR_DEVICE##_atomic_##OP_FETCH( \ + T* const dest, const T val, MemoryOrder order, MemoryScope scope) { \ + return HOST_OR_DEVICE##_atomic_oper_fetch( \ + OP_FETCH##_operator(), dest, val, order, scope); \ } -#define DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(OP) \ - DESUL_IMPL_ATOMIC_FETCH_OP(DESUL_IMPL_HOST_FUNCTION, host, OP) \ - DESUL_IMPL_ATOMIC_FETCH_OP(DESUL_IMPL_DEVICE_FUNCTION, device, OP) - -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(add) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(sub) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(max) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(min) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(mul) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(div) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(mod) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(and) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(or) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(xor) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(nand) - -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(inc_mod) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(dec_mod) +#define DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(FETCH_OP, OP_FETCH) \ + DESUL_IMPL_ATOMIC_FETCH_OP(DESUL_IMPL_HOST_FUNCTION, host, FETCH_OP, OP_FETCH) \ + DESUL_IMPL_ATOMIC_FETCH_OP(DESUL_IMPL_DEVICE_FUNCTION, device, FETCH_OP, OP_FETCH) + +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_add, add_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_sub, sub_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_max, max_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_min, min_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_mul, mul_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_div, div_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_mod, mod_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_and, and_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_or, or_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_xor, xor_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_nand, nand_fetch) + +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_inc_mod, inc_mod_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_dec_mod, dec_mod_fetch) #undef DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE #undef DESUL_IMPL_ATOMIC_FETCH_OP @@ -59,13 +59,13 @@ DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(dec_mod) ANNOTATION T HOST_OR_DEVICE##_atomic_fetch_##OP( \ T* const dest, const unsigned int val, MemoryOrder order, MemoryScope scope) { \ return HOST_OR_DEVICE##_atomic_fetch_oper( \ - OP##_operator(), dest, val, order, scope); \ + OP##_fetch_operator(), dest, val, order, scope); \ } \ template \ ANNOTATION T HOST_OR_DEVICE##_atomic_##OP##_fetch( \ T* const dest, const unsigned int val, MemoryOrder order, MemoryScope scope) { \ return HOST_OR_DEVICE##_atomic_oper_fetch( \ - OP##_operator(), dest, val, order, scope); \ + OP##_fetch_operator(), dest, val, order, scope); \ } #define DESUL_IMPL_ATOMIC_FETCH_OP_SHIFT_HOST_AND_DEVICE(OP) \ @@ -78,19 +78,21 @@ DESUL_IMPL_ATOMIC_FETCH_OP_SHIFT_HOST_AND_DEVICE(rshift) #undef DESUL_IMPL_ATOMIC_FETCH_OP_SHIFT_HOST_AND_DEVICE #undef DESUL_IMPL_ATOMIC_FETCH_OP_SHIFT -#define DESUL_IMPL_ATOMIC_LOAD_AND_STORE(ANNOTATION, HOST_OR_DEVICE) \ - template \ - ANNOTATION T HOST_OR_DEVICE##_atomic_load( \ - const T* const dest, MemoryOrder order, MemoryScope scope) { \ - return HOST_OR_DEVICE##_atomic_fetch_oper( \ - load_operator(), const_cast(dest), T(), order, scope); \ - } \ - \ - template \ - ANNOTATION void HOST_OR_DEVICE##_atomic_store( \ - T* const dest, const T val, MemoryOrder order, MemoryScope scope) { \ - (void)HOST_OR_DEVICE##_atomic_fetch_oper( \ - store_operator(), dest, val, order, scope); \ +// NOTE: using atomic_oper_fetch in the fallback implementation of atomic_store to avoid +// reading potentially uninitialized values which would yield undefined behavior. +#define DESUL_IMPL_ATOMIC_LOAD_AND_STORE(ANNOTATION, HOST_OR_DEVICE) \ + template \ + ANNOTATION T HOST_OR_DEVICE##_atomic_load( \ + const T* const dest, MemoryOrder order, MemoryScope scope) { \ + return HOST_OR_DEVICE##_atomic_fetch_oper( \ + load_fetch_operator(), const_cast(dest), T(), order, scope); \ + } \ + \ + template \ + ANNOTATION void HOST_OR_DEVICE##_atomic_store( \ + T* const dest, const T val, MemoryOrder order, MemoryScope scope) { \ + (void)HOST_OR_DEVICE##_atomic_oper_fetch( \ + store_fetch_operator(), dest, val, order, scope); \ } DESUL_IMPL_ATOMIC_LOAD_AND_STORE(DESUL_IMPL_HOST_FUNCTION, host) diff --git a/packages/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_HIP.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_HIP.hpp index e9c749809de5..8d9bd8682506 100644 --- a/packages/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_HIP.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_HIP.hpp @@ -9,99 +9,108 @@ SPDX-License-Identifier: (BSD-3-Clause) #ifndef DESUL_ATOMICS_FECH_OP_HIP_HPP_ #define DESUL_ATOMICS_FECH_OP_HIP_HPP_ +#include + namespace desul { namespace Impl { -// clang-format off -inline __device__ int device_atomic_fetch_add( int* ptr, int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); } -inline __device__ unsigned int device_atomic_fetch_add( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); } -inline __device__ unsigned long long device_atomic_fetch_add(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); } -inline __device__ float device_atomic_fetch_add( float* ptr, float val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); } -inline __device__ double device_atomic_fetch_add( double* ptr, double val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); } - -inline __device__ int device_atomic_fetch_sub( int* ptr, int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicSub(ptr, val); } -inline __device__ unsigned int device_atomic_fetch_sub( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicSub(ptr, val); } -inline __device__ unsigned long long device_atomic_fetch_sub(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, -val); } -inline __device__ float device_atomic_fetch_sub( float* ptr, float val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, -val); } -inline __device__ double device_atomic_fetch_sub( double* ptr, double val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, -val); } - -inline __device__ int device_atomic_fetch_min( int* ptr, int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMin(ptr, val); } -inline __device__ unsigned int device_atomic_fetch_min( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMin(ptr, val); } -inline __device__ unsigned long long device_atomic_fetch_min(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMin(ptr, val); } - -inline __device__ int device_atomic_fetch_max( int* ptr, int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMax(ptr, val); } -inline __device__ unsigned int device_atomic_fetch_max( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMax(ptr, val); } -inline __device__ unsigned long long device_atomic_fetch_max(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMax(ptr, val); } - -inline __device__ int device_atomic_fetch_and( int* ptr, int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAnd(ptr, val); } -inline __device__ unsigned int device_atomic_fetch_and( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAnd(ptr, val); } -inline __device__ unsigned long long device_atomic_fetch_and(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAnd(ptr, val); } - -inline __device__ int device_atomic_fetch_or ( int* ptr, int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicOr (ptr, val); } -inline __device__ unsigned int device_atomic_fetch_or ( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicOr (ptr, val); } -inline __device__ unsigned long long device_atomic_fetch_or (unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicOr (ptr, val); } +#define DESUL_IMPL_HIP_ATOMIC_FETCH_OP(OP, T) \ + template \ + __device__ inline T device_atomic_fetch_##OP( \ + T* ptr, T val, MemoryOrder, MemoryScope) { \ + return __hip_atomic_fetch_##OP(ptr, \ + val, \ + HIPMemoryOrder::value, \ + HIPMemoryScope::value); \ + } -inline __device__ int device_atomic_fetch_xor( int* ptr, int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicXor(ptr, val); } -inline __device__ unsigned int device_atomic_fetch_xor( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicXor(ptr, val); } -inline __device__ unsigned long long device_atomic_fetch_xor(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicXor(ptr, val); } +#define DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL(OP) \ + DESUL_IMPL_HIP_ATOMIC_FETCH_OP(OP, int) \ + DESUL_IMPL_HIP_ATOMIC_FETCH_OP(OP, long long) \ + DESUL_IMPL_HIP_ATOMIC_FETCH_OP(OP, unsigned int) \ + DESUL_IMPL_HIP_ATOMIC_FETCH_OP(OP, unsigned long long) + +#define DESUL_IMPL_HIP_ATOMIC_FETCH_OP_FLOATING_POINT(OP) \ + DESUL_IMPL_HIP_ATOMIC_FETCH_OP(OP, float) \ + DESUL_IMPL_HIP_ATOMIC_FETCH_OP(OP, double) + +DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL(add) +DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL(min) +DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL(max) +DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL(and) +DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL(or) +DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL(xor) +DESUL_IMPL_HIP_ATOMIC_FETCH_OP_FLOATING_POINT(add) +// atomic min/max gives the wrong results (tested with ROCm 6.0 on Frontier) +// DESUL_IMPL_HIP_ATOMIC_FETCH_OP_FLOATING_POINT(min) +// DESUL_IMPL_HIP_ATOMIC_FETCH_OP_FLOATING_POINT(max) + +#undef DESUL_IMPL_HIP_ATOMIC_FETCH_OP_FLOATING_POINT +#undef DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL +#undef DESUL_IMPL_HIP_ATOMIC_FETCH_OP + +#define DESUL_IMPL_HIP_ATOMIC_FETCH_SUB(T) \ + template \ + __device__ inline T device_atomic_fetch_sub( \ + T* ptr, T val, MemoryOrder, MemoryScope) { \ + return __hip_atomic_fetch_add(ptr, \ + -val, \ + HIPMemoryOrder::value, \ + HIPMemoryScope::value); \ + } -inline __device__ int device_atomic_fetch_inc( int* ptr, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, 1 ); } -inline __device__ unsigned int device_atomic_fetch_inc( unsigned int* ptr, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, 1u ); } -inline __device__ unsigned long long device_atomic_fetch_inc(unsigned long long* ptr, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, 1ull); } +DESUL_IMPL_HIP_ATOMIC_FETCH_SUB(int) +DESUL_IMPL_HIP_ATOMIC_FETCH_SUB(long long) +DESUL_IMPL_HIP_ATOMIC_FETCH_SUB(unsigned int) +DESUL_IMPL_HIP_ATOMIC_FETCH_SUB(unsigned long long) +DESUL_IMPL_HIP_ATOMIC_FETCH_SUB(float) +DESUL_IMPL_HIP_ATOMIC_FETCH_SUB(double) + +#undef DESUL_IMPL_HIP_ATOMIC_FETCH_SUB + +#define DESUL_IMPL_HIP_ATOMIC_FETCH_INC(T) \ + template \ + __device__ inline T device_atomic_fetch_inc(T* ptr, MemoryOrder, MemoryScope) { \ + return __hip_atomic_fetch_add(ptr, \ + 1, \ + HIPMemoryOrder::value, \ + HIPMemoryScope::value); \ + } \ + template \ + __device__ inline T device_atomic_fetch_dec(T* ptr, MemoryOrder, MemoryScope) { \ + return __hip_atomic_fetch_add(ptr, \ + -1, \ + HIPMemoryOrder::value, \ + HIPMemoryScope::value); \ + } -inline __device__ int device_atomic_fetch_dec( int* ptr, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicSub(ptr, 1 ); } -inline __device__ unsigned int device_atomic_fetch_dec( unsigned int* ptr, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicSub(ptr, 1u ); } -inline __device__ unsigned long long device_atomic_fetch_dec(unsigned long long* ptr, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, -1 ); } +DESUL_IMPL_HIP_ATOMIC_FETCH_INC(int) +DESUL_IMPL_HIP_ATOMIC_FETCH_INC(long long) +DESUL_IMPL_HIP_ATOMIC_FETCH_INC(unsigned int) +DESUL_IMPL_HIP_ATOMIC_FETCH_INC(unsigned long long) -inline __device__ unsigned int device_atomic_fetch_inc_mod( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicInc(ptr, val); } -inline __device__ unsigned int device_atomic_fetch_dec_mod( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicDec(ptr, val); } -// clang-format on +#undef DESUL_IMPL_HIP_ATOMIC_FETCH_INC -#define DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, TYPE) \ +#define DESUL_IMPL_HIP_ATOMIC_FETCH_INC_MOD(MEMORY_SCOPE, MEMORY_SCOPE_STRING_LITERAL) \ template \ - __device__ TYPE device_atomic_fetch_##OP( \ - TYPE* ptr, TYPE val, MemoryOrder, MemoryScopeDevice) { \ - __threadfence(); \ - TYPE return_val = \ - device_atomic_fetch_##OP(ptr, val, MemoryOrderRelaxed(), MemoryScopeDevice()); \ - __threadfence(); \ - return return_val; \ + __device__ inline unsigned int device_atomic_fetch_inc_mod( \ + unsigned int* ptr, unsigned int val, MemoryOrder, MEMORY_SCOPE) { \ + return __builtin_amdgcn_atomic_inc32( \ + ptr, val, HIPMemoryOrder::value, MEMORY_SCOPE_STRING_LITERAL); \ } \ template \ - __device__ TYPE device_atomic_fetch_##OP( \ - TYPE* ptr, TYPE val, MemoryOrder, MemoryScopeCore) { \ - return device_atomic_fetch_##OP(ptr, val, MemoryOrder(), MemoryScopeDevice()); \ + __device__ inline unsigned int device_atomic_fetch_dec_mod( \ + unsigned int* ptr, unsigned int val, MemoryOrder, MEMORY_SCOPE) { \ + return __builtin_amdgcn_atomic_dec32( \ + ptr, val, HIPMemoryOrder::value, MEMORY_SCOPE_STRING_LITERAL); \ } -#define DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(OP) \ - DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, int) \ - DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, unsigned int) \ - DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, unsigned long long) - -#define DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(OP) \ - DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, float) \ - DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, double) - -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(min) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(max) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(and) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(or) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(xor) - -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(add) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(add) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(sub) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(sub) - -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(inc) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(dec) - -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(inc_mod, unsigned int) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(dec_mod, unsigned int) +DESUL_IMPL_HIP_ATOMIC_FETCH_INC_MOD(MemoryScopeCore, "workgroup") +DESUL_IMPL_HIP_ATOMIC_FETCH_INC_MOD(MemoryScopeDevice, "agent") +DESUL_IMPL_HIP_ATOMIC_FETCH_INC_MOD(MemoryScopeNode, "") +DESUL_IMPL_HIP_ATOMIC_FETCH_INC_MOD(MemoryScopeSystem, "") -#undef DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT -#undef DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL -#undef DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP +#undef DESUL_IMPL_HIP_ATOMIC_FETCH_INC_MOD } // namespace Impl } // namespace desul diff --git a/packages/kokkos/tpls/desul/include/desul/atomics/Operator_Function_Objects.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Operator_Function_Objects.hpp index be90cdbbd86f..1f5159c4f8b0 100644 --- a/packages/kokkos/tpls/desul/include/desul/atomics/Operator_Function_Objects.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Operator_Function_Objects.hpp @@ -18,7 +18,7 @@ namespace desul { namespace Impl { template -struct max_operator { +struct max_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return (val1 > val2 ? val1 : val2); @@ -30,7 +30,7 @@ struct max_operator { }; template -struct min_operator { +struct min_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return (val1 < val2 ? val1 : val2); @@ -70,55 +70,55 @@ constexpr DESUL_FUNCTION } template -struct add_operator { +struct add_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 + val2; } }; template -struct sub_operator { +struct sub_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 - val2; } }; template -struct mul_operator { +struct mul_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 * val2; } }; template -struct div_operator { +struct div_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 / val2; } }; template -struct mod_operator { +struct mod_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 % val2; } }; template -struct and_operator { +struct and_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 & val2; } }; template -struct or_operator { +struct or_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 | val2; } }; template -struct xor_operator { +struct xor_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 ^ val2; } }; template -struct nand_operator { +struct nand_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return ~(val1 & val2); @@ -126,7 +126,7 @@ struct nand_operator { }; template -struct lshift_operator { +struct lshift_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 << val2; @@ -134,7 +134,7 @@ struct lshift_operator { }; template -struct rshift_operator { +struct rshift_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 >> val2; @@ -142,7 +142,7 @@ struct rshift_operator { }; template -struct inc_mod_operator { +struct inc_mod_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return ((val1 >= val2) ? Scalar1(0) : val1 + Scalar1(1)); @@ -150,7 +150,7 @@ struct inc_mod_operator { }; template -struct dec_mod_operator { +struct dec_mod_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return (((val1 == Scalar1(0)) | (val1 > val2)) ? val2 : (val1 - Scalar1(1))); @@ -158,13 +158,13 @@ struct dec_mod_operator { }; template -struct store_operator { +struct store_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1&, const Scalar2& val2) { return val2; } }; template -struct load_operator { +struct load_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2&) { return val1; } }; diff --git a/packages/kokkos/tpls/gtest/gtest/gtest.h b/packages/kokkos/tpls/gtest/gtest/gtest.h index c17c9ab3fc22..2b34f3a60686 100644 --- a/packages/kokkos/tpls/gtest/gtest/gtest.h +++ b/packages/kokkos/tpls/gtest/gtest/gtest.h @@ -4910,7 +4910,7 @@ class NeverThrown { class GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) \ : public parent_class { \ public: \ - GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() = default; \ + GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() { (void)test_info_; }\ ~GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() override = default; \ GTEST_DISALLOW_COPY_AND_ASSIGN_(GTEST_TEST_CLASS_NAME_(test_suite_name, \ test_name)); \ diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp index 8e42a37ba7c7..24166462e7ab 100644 --- a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp @@ -205,7 +205,7 @@ static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or #endif #ifndef _MDSPAN_USE_CLASS_TEMPLATE_ARGUMENT_DEDUCTION -# if (!defined(__NVCC__) || (__CUDACC_VER_MAJOR__ >= 11 && __CUDACC_VER_MINOR__ >= 7)) && \ +# if (!defined(__NVCC__) || (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10 >= 1170)) && \ ((defined(__cpp_deduction_guides) && __cpp_deduction_guides >= 201703) || \ (!defined(__cpp_deduction_guides) && MDSPAN_HAS_CXX_17)) # define _MDSPAN_USE_CLASS_TEMPLATE_ARGUMENT_DEDUCTION 1 diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/extents.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/extents.hpp index 9a28c3ed5ca3..d58d37732dda 100644 --- a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/extents.hpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/extents.hpp @@ -16,12 +16,15 @@ #pragma once #include "dynamic_extent.hpp" +#include "utility.hpp" #ifdef __cpp_lib_span #include #endif #include +#include +#include #include namespace MDSPAN_IMPL_STANDARD_NAMESPACE { @@ -30,6 +33,7 @@ namespace detail { // Function used to check compatibility of extents in converting constructor // can't be a private member function for some reason. template +MDSPAN_INLINE_FUNCTION static constexpr std::integral_constant __check_compatible_extents( std::integral_constant, std::integer_sequence, @@ -46,6 +50,7 @@ struct __compare_extent_compatible : std::integral_constant +MDSPAN_INLINE_FUNCTION static constexpr std::integral_constant< bool, _MDSPAN_FOLD_AND(__compare_extent_compatible::value)> __check_compatible_extents( @@ -59,8 +64,8 @@ template MDSPAN_INLINE_FUNCTION static constexpr bool are_valid_indices() { return - (std::is_convertible::value && ... && true) && - (std::is_nothrow_constructible::value && ... && true); + _MDSPAN_FOLD_AND(std::is_convertible::value) && + _MDSPAN_FOLD_AND(std::is_nothrow_constructible::value); } // ------------------------------------------------------------------ @@ -538,14 +543,9 @@ template class extents { MDSPAN_INLINE_FUNCTION friend constexpr bool operator==(const extents &lhs, const extents &rhs) noexcept { - if constexpr (rank() != extents::rank()) { - return false; - } else { - using common_t = std::common_type_t; - for (size_type r = 0; r < m_rank; r++) - if(static_cast(rhs.extent(r)) != static_cast(lhs.extent(r))) return false; - } - return true; + return + rank() == extents::rank() && + detail::rankwise_equal(detail::with_rank{}, rhs, lhs, detail::extent); } #if !(MDSPAN_HAS_CXX_20) @@ -614,5 +614,80 @@ static #endif constexpr bool __is_extents_v = __is_extents::value; +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_lower_bound(InputIndexType user_index, + ExtentsIndexType /* current_extent */, + std::true_type /* is_signed */) +{ + (void) user_index; // prevent unused variable warning +#ifdef _MDSPAN_DEBUG + assert(static_cast(user_index) >= 0); +#endif +} + +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_lower_bound(InputIndexType /* user_index */, + ExtentsIndexType /* current_extent */, + std::false_type /* is_signed */) +{} + +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_upper_bound(InputIndexType user_index, + ExtentsIndexType current_extent) +{ + (void) user_index; // prevent unused variable warnings + (void) current_extent; +#ifdef _MDSPAN_DEBUG + assert(static_cast(user_index) < current_extent); +#endif +} + +// Returning true to use AND fold instead of comma +// CPP14 mode doesn't like the use of void expressions +// with the way the _MDSPAN_FOLD_AND is set up +template +MDSPAN_INLINE_FUNCTION +constexpr bool +check_one_index(InputIndex user_index, + ExtentsIndexType current_extent) +{ + check_lower_bound(user_index, current_extent, + std::integral_constant::value>{}); + check_upper_bound(user_index, current_extent); + return true; +} + +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_all_indices_helper(std::index_sequence, + const extents& exts, + Indices... indices) +{ + // Suppress warning about statement has no effect + (void) _MDSPAN_FOLD_AND( + (check_one_index(indices, exts.extent(RankIndices))) + ); +} + +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_all_indices(const extents& exts, + Indices... indices) +{ + check_all_indices_helper(std::make_index_sequence(), + exts, indices...); +} + } // namespace detail } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_left.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_left.hpp index 83ed9ef7fe36..222fba7aa049 100644 --- a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_left.hpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_left.hpp @@ -18,8 +18,11 @@ #include "macros.hpp" #include "trait_backports.hpp" #include "extents.hpp" +#include "layout_stride.hpp" +#include "utility.hpp" +#if MDSPAN_HAS_CXX_17 #include "../__p2642_bits/layout_padded_fwd.hpp" -#include +#endif #include namespace MDSPAN_IMPL_STANDARD_NAMESPACE { @@ -133,11 +136,11 @@ class layout_left::mapping { : __extents(__other.extents()) { MDSPAN_IMPL_PROPOSED_NAMESPACE::detail:: - check_padded_layout_converting_constructor_mandates(); + check_padded_layout_converting_constructor_mandates< + extents_type, _Mapping>(detail::with_rank{}); MDSPAN_IMPL_PROPOSED_NAMESPACE::detail:: check_padded_layout_converting_constructor_preconditions< - extents_type>(__other); + extents_type>(detail::with_rank{}, __other); } #endif @@ -156,17 +159,7 @@ class layout_left::mapping { * TODO: check precondition * other.required_span_size() is a representable value of type index_type */ - #if !defined(_MDSPAN_HAS_CUDA) && !defined(_MDSPAN_HAS_HIP) && !defined(NDEBUG) - if constexpr (extents_type::rank() > 0) { - index_type stride = 1; - using common_t = std::common_type_t; - for(rank_type r=0; r<__extents.rank(); r++) { - if(static_cast(stride) != static_cast(other.stride(r))) - std::abort(); // ("Assigning layout_stride to layout_left with invalid strides."); - stride *= __extents.extent(r); - } - } - #endif + detail::validate_strides(detail::with_rank{}, layout_left{}, __extents, other); } MDSPAN_INLINE_FUNCTION_DEFAULTED _MDSPAN_CONSTEXPR_14_DEFAULTED mapping& operator=(mapping const&) noexcept = default; @@ -194,6 +187,9 @@ class layout_left::mapping { ) _MDSPAN_HOST_DEVICE constexpr index_type operator()(Indices... idxs) const noexcept { +#if ! defined(NDEBUG) + detail::check_all_indices(this->extents(), idxs...); +#endif // ! NDEBUG return __compute_offset(__rank_count<0, extents_type::rank()>(), static_cast(idxs)...); } diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_right.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_right.hpp index 3d3927df7bcc..284569f65332 100644 --- a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_right.hpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_right.hpp @@ -18,9 +18,11 @@ #include "macros.hpp" #include "trait_backports.hpp" #include "extents.hpp" -#include #include "layout_stride.hpp" +#include "utility.hpp" +#if MDSPAN_HAS_CXX_17 #include "../__p2642_bits/layout_padded_fwd.hpp" +#endif namespace MDSPAN_IMPL_STANDARD_NAMESPACE { @@ -134,11 +136,11 @@ class layout_right::mapping { : __extents(__other.extents()) { MDSPAN_IMPL_PROPOSED_NAMESPACE::detail:: - check_padded_layout_converting_constructor_mandates(); + check_padded_layout_converting_constructor_mandates< + extents_type, _Mapping>(detail::with_rank{}); MDSPAN_IMPL_PROPOSED_NAMESPACE::detail:: check_padded_layout_converting_constructor_preconditions< - extents_type>(__other); + extents_type>(detail::with_rank{}, __other); } #endif @@ -157,17 +159,7 @@ class layout_right::mapping { * TODO: check precondition * other.required_span_size() is a representable value of type index_type */ - #if !defined(_MDSPAN_HAS_CUDA) && !defined(_MDSPAN_HAS_HIP) && !defined(NDEBUG) - if constexpr (extents_type::rank() > 0) { - index_type stride = 1; - using common_t = std::common_type_t; - for(rank_type r=__extents.rank(); r>0; r--) { - if(static_cast(stride) != static_cast(other.stride(r-1))) - std::abort(); // ("Assigning layout_stride to layout_right with invalid strides."); - stride *= __extents.extent(r-1); - } - } - #endif + detail::validate_strides(detail::with_rank{}, layout_right{}, __extents, other); } MDSPAN_INLINE_FUNCTION_DEFAULTED _MDSPAN_CONSTEXPR_14_DEFAULTED mapping& operator=(mapping const&) noexcept = default; @@ -195,6 +187,9 @@ class layout_right::mapping { ) _MDSPAN_HOST_DEVICE constexpr index_type operator()(Indices... idxs) const noexcept { +#if ! defined(NDEBUG) + detail::check_all_indices(this->extents(), idxs...); +#endif // ! NDEBUG return __compute_offset(__rank_count<0, extents_type::rank()>(), static_cast(idxs)...); } diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_stride.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_stride.hpp index 15ad577d149c..d6cdad2ab234 100644 --- a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_stride.hpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_stride.hpp @@ -19,14 +19,16 @@ #include "extents.hpp" #include "trait_backports.hpp" #include "compressed_pair.hpp" +#include "utility.hpp" #if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) # include "no_unique_address.hpp" #endif -#include -#include #include +#include +#include + #ifdef __cpp_lib_span #include #endif @@ -38,11 +40,11 @@ namespace MDSPAN_IMPL_STANDARD_NAMESPACE { struct layout_left { template - class mapping; + class mapping; }; struct layout_right { template - class mapping; + class mapping; }; namespace detail { @@ -79,6 +81,7 @@ namespace detail { std::bool_constant::value; }; #endif + } // namespace detail struct layout_stride { @@ -199,6 +202,20 @@ struct layout_stride { return __strides_storage_t{static_cast(s[Idxs])...}; } + MDSPAN_TEMPLATE_REQUIRES( + class IntegralType, + // The is_convertible condition is added to make sfinae valid + // the extents_type::rank() > 0 is added to avoid use of non-standard zero length c-array + (std::is_convertible::value && (extents_type::rank() > 0)) + ) + MDSPAN_INLINE_FUNCTION + // despite the requirement some compilers still complain about zero length array during parsing + // making it length 1 now, but since the thing can't be instantiated due to requirement the actual + // instantiation of strides_storage will not fail despite mismatching length + static constexpr const __strides_storage_t fill_strides(mdspan_non_standard_tag, const IntegralType (&s)[extents_type::rank()>0?extents_type::rank():1]) { + return __strides_storage_t{static_cast(s[Idxs])...}; + } + #ifdef __cpp_lib_span template MDSPAN_INLINE_FUNCTION @@ -225,7 +242,11 @@ struct layout_stride { // Can't use defaulted parameter in the __deduction_workaround template because of a bug in MSVC warning C4348. using __impl = __deduction_workaround>; - static constexpr __strides_storage_t strides_storage(std::true_type) { + static constexpr __strides_storage_t strides_storage(detail::with_rank<0>) { + return {}; + } + template + static constexpr __strides_storage_t strides_storage(detail::with_rank) { __strides_storage_t s{}; extents_type e; @@ -237,9 +258,6 @@ struct layout_stride { return s; } - static constexpr __strides_storage_t strides_storage(std::false_type) { - return {}; - } //---------------------------------------------------------------------------- @@ -262,7 +280,7 @@ struct layout_stride { : __base_t(__base_t{__member_pair_t( #endif extents_type(), - __strides_storage_t(strides_storage(std::integral_constant 0)>{})) + __strides_storage_t(strides_storage(detail::with_rank{})) #if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) } #else @@ -309,6 +327,48 @@ struct layout_stride { */ } + MDSPAN_TEMPLATE_REQUIRES( + class IntegralTypes, + /* requires */ ( + // MSVC 19.32 does not like using index_type here, requires the typename Extents::index_type + // error C2641: cannot deduce template arguments for 'MDSPAN_IMPL_STANDARD_NAMESPACE::layout_stride::mapping' + _MDSPAN_TRAIT(std::is_convertible, const std::remove_const_t&, typename Extents::index_type) && + _MDSPAN_TRAIT(std::is_nothrow_constructible, typename Extents::index_type, const std::remove_const_t&) && + (Extents::rank() > 0) + ) + ) + MDSPAN_INLINE_FUNCTION + constexpr + mapping( + mdspan_non_standard_tag, + extents_type const& e, + // despite the requirement some compilers still complain about zero length array during parsing + // making it length 1 now, but since the thing can't be instantiated due to requirement the actual + // instantiation of strides_storage will not fail despite mismatching length + IntegralTypes (&s)[extents_type::rank()>0?extents_type::rank():1] + ) noexcept +#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) + : __members{ +#else + : __base_t(__base_t{__member_pair_t( +#endif + e, __strides_storage_t(__impl::fill_strides(mdspan_non_standard, s)) +#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) + } +#else + )}) +#endif + { + /* + * TODO: check preconditions + * - s[i] > 0 is true for all i in the range [0, rank_ ). + * - REQUIRED-SPAN-SIZE(e, s) is a representable value of type index_type ([basic.fundamental]). + * - If rank_ is greater than 0, then there exists a permutation P of the integers in the + * range [0, rank_), such that s[ pi ] >= s[ pi − 1 ] * e.extent( pi − 1 ) is true for + * all i in the range [1, rank_ ), where pi is the ith element of P. + */ + } + #ifdef __cpp_lib_span MDSPAN_TEMPLATE_REQUIRES( class IntegralTypes, @@ -434,6 +494,9 @@ struct layout_stride { ) MDSPAN_FORCE_INLINE_FUNCTION constexpr index_type operator()(Indices... idxs) const noexcept { +#if ! defined(NDEBUG) + detail::check_all_indices(this->extents(), idxs...); +#endif // ! NDEBUG return static_cast(__impl::_call_op_impl(*this, static_cast(idxs)...)); } @@ -444,32 +507,48 @@ struct layout_stride { MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { return true; } MDSPAN_INLINE_FUNCTION static constexpr bool is_unique() noexcept { return true; } - MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 bool is_exhaustive() const noexcept { - if constexpr (extents_type::rank() == 0) - return true; - else { - index_type span_size = required_span_size(); - if (span_size == static_cast(0)) { - if constexpr (extents_type::rank() == 1) { - return stride(0) == 1; - } else { - rank_type r_largest = 0; - for (rank_type r = 1; r < extents_type::rank(); r++) { - if (stride(r) > stride(r_largest)) { - r_largest = r; - } - } - for (rank_type r = 0; r < extents_type::rank(); r++) { - if (extents().extent(r) == 0 && r != r_largest) { - return false; - } - } - return true; - } - } else { - return required_span_size() == __get_size(extents(), std::make_index_sequence()); + + private: + constexpr bool exhaustive_for_nonzero_span_size() const + { + return required_span_size() == __get_size(extents(), std::make_index_sequence()); + } + + constexpr bool is_exhaustive_impl(detail::with_rank<0>) const + { + return true; + } + constexpr bool is_exhaustive_impl(detail::with_rank<1>) const + { + if (required_span_size() != static_cast(0)) { + return exhaustive_for_nonzero_span_size(); + } + return stride(0) == 1; + } + template + constexpr bool is_exhaustive_impl(detail::with_rank) const + { + if (required_span_size() != static_cast(0)) { + return exhaustive_for_nonzero_span_size(); + } + + rank_type r_largest = 0; + for (rank_type r = 1; r < extents_type::rank(); r++) { + if (stride(r) > stride(r_largest)) { + r_largest = r; } } + for (rank_type r = 0; r < extents_type::rank(); r++) { + if (extents().extent(r) == 0 && r != r_largest) { + return false; + } + } + return true; + } + + public: + MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 bool is_exhaustive() const noexcept { + return is_exhaustive_impl(detail::with_rank{}); } MDSPAN_INLINE_FUNCTION static constexpr bool is_strided() noexcept { return true; } @@ -498,15 +577,9 @@ struct layout_stride { #endif MDSPAN_INLINE_FUNCTION friend constexpr bool operator==(const mapping& x, const StridedLayoutMapping& y) noexcept { - bool strides_match = true; - if constexpr (extents_type::rank() > 0) { - using common_t = std::common_type_t; - for(rank_type r = 0; r < extents_type::rank(); r++) - strides_match = strides_match && (static_cast(x.stride(r)) == static_cast(y.stride(r))); - } return (x.extents() == y.extents()) && (__impl::__OFFSET(y) == static_cast(0)) && - strides_match; + detail::rankwise_equal(detail::with_rank{}, x, y, detail::stride); } // This one is not technically part of the proposal. Just here to make implementation a bit more optimal hopefully @@ -532,7 +605,7 @@ struct layout_stride { ) MDSPAN_INLINE_FUNCTION friend constexpr bool operator!=(const mapping& x, const StridedLayoutMapping& y) noexcept { - return not (x == y); + return !(x == y); } MDSPAN_TEMPLATE_REQUIRES( @@ -561,4 +634,34 @@ struct layout_stride { }; }; +namespace detail { + +template +constexpr void validate_strides(with_rank<0>, Layout, const Extents&, const Mapping&) +{} + +template +constexpr void validate_strides(with_rank, Layout, const Extents& ext, const Mapping& other) +{ + static_assert(std::is_same::value && + (std::is_same::value || + std::is_same::value) + , "This function is only intended to validate construction of " + "a layout_left or layout_right mapping from a layout_stride mapping."); + + constexpr auto is_left = std::is_same::value; + + typename Extents::index_type expected_stride = 1; + + for (std::size_t r = 0; r < N; r++) { + const std::size_t s = is_left ? r : N - 1 - r; + + MDSPAN_IMPL_PRECONDITION(common_integral_compare(expected_stride, other.stride(s)) + && "invalid strides for layout_{left,right}"); + + expected_stride *= ext.extent(s); + } +} + +} // namespace detail } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/macros.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/macros.hpp index 3eeb39755c8a..b60c4261779e 100644 --- a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/macros.hpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/macros.hpp @@ -18,7 +18,12 @@ #include "config.hpp" +#include +#include #include // std::is_void +#if defined(_MDSPAN_HAS_CUDA) || defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_SYCL) +#include "assert.h" +#endif #ifndef _MDSPAN_HOST_DEVICE # if defined(_MDSPAN_HAS_CUDA) || defined(_MDSPAN_HAS_HIP) @@ -101,6 +106,69 @@ #define MDSPAN_IMPL_STANDARD_NAMESPACE_STRING MDSPAN_PP_STRINGIFY(MDSPAN_IMPL_STANDARD_NAMESPACE) #define MDSPAN_IMPL_PROPOSED_NAMESPACE_STRING MDSPAN_PP_STRINGIFY(MDSPAN_IMPL_STANDARD_NAMESPACE) "::" MDSPAN_PP_STRINGIFY(MDSPAN_IMPL_PROPOSED_NAMESPACE) +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace detail { + +#if defined(_MDSPAN_HAS_CUDA) || defined(_MDSPAN_HAS_HIP) +MDSPAN_FUNCTION inline void default_precondition_violation_handler(const char* cond, const char* file, unsigned line) +{ + printf("%s:%u: precondition failure: `%s`\n", file, line, cond); + assert(0); +} +#elif defined(_MDSPAN_HAS_SYCL) +MDSPAN_FUNCTION inline void default_precondition_violation_handler(const char* cond, const char* file, unsigned line) +{ + sycl::ext::oneapi::experimental::printf("%s:%u: precondition failure: `%s`\n", file, line, cond); + assert(0); +} +#else +MDSPAN_FUNCTION inline void default_precondition_violation_handler(const char* cond, const char* file, unsigned line) +{ + std::fprintf(stderr, "%s:%u: precondition failure: `%s`\n", file, line, cond); + std::abort(); +} +#endif + +} // namespace detail +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#ifndef MDSPAN_IMPL_PRECONDITION_VIOLATION_HANDLER +#define MDSPAN_IMPL_PRECONDITION_VIOLATION_HANDLER(cond, file, line) \ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::default_precondition_violation_handler(cond, file, line) +#endif + +#ifndef MDSPAN_IMPL_CHECK_PRECONDITION + #ifndef NDEBUG + #define MDSPAN_IMPL_CHECK_PRECONDITION 0 + #else + #define MDSPAN_IMPL_CHECK_PRECONDITION 1 + #endif +#endif + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace detail { + +template +MDSPAN_FUNCTION constexpr void precondition(const char* cond, const char* file, unsigned line) +{ + if (!check) { return; } + // in case the macro doesn't use the arguments for custom macros + (void) cond; + (void) file; + (void) line; + MDSPAN_IMPL_PRECONDITION_VIOLATION_HANDLER(cond, file, line); +} + +} // namespace detail +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#define MDSPAN_IMPL_PRECONDITION(...) \ + do { \ + if (!(__VA_ARGS__)) { \ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::precondition(#__VA_ARGS__, __FILE__, __LINE__); \ + } \ + } while (0) + // end Preprocessor helpers }}}1 //============================================================================== @@ -574,7 +642,7 @@ __fold_left_assign_impl(Args&&... args) { template -constexpr __mdspan_enable_fold_comma __fold_comma_impl(Args&&... args) noexcept { return { }; } +constexpr __mdspan_enable_fold_comma __fold_comma_impl(Args&&...) noexcept { return { }; } template struct __bools; diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/mdspan.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/mdspan.hpp index d6ec49e65bf8..23114aa55068 100644 --- a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/mdspan.hpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/mdspan.hpp @@ -34,6 +34,8 @@ class mdspan private: static_assert(detail::__is_extents_v, MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::mdspan's Extents template parameter must be a specialization of " MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::extents."); + static_assert(std::is_same::value, + MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::mdspan's ElementType template parameter must be the same as its AccessorPolicy::element_type."); // Workaround for non-deducibility of the index sequence template parameter if it's given at the top level template @@ -321,7 +323,7 @@ class mdspan #endif // MDSPAN_USE_PAREN_OPERATOR MDSPAN_INLINE_FUNCTION constexpr size_type size() const noexcept { - return __impl::__size(*this); + return static_cast(__impl::__size(*this)); }; MDSPAN_INLINE_FUNCTION constexpr bool empty() const noexcept { diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/utility.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/utility.hpp new file mode 100644 index 000000000000..e690cd6939bf --- /dev/null +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/utility.hpp @@ -0,0 +1,72 @@ +#pragma once + +#include +#include + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace detail { + +// type alias used for rank-based tag dispatch +// +// this is used to enable alternatives to constexpr if when building for C++14 +// +template +using with_rank = std::integral_constant; + +template +MDSPAN_INLINE_FUNCTION +constexpr bool common_integral_compare(I1 x, I2 y) +{ + static_assert(std::is_integral::value && + std::is_integral::value, ""); + + using I = std::common_type_t; + return static_cast(x) == static_cast(y); +} + +template +MDSPAN_INLINE_FUNCTION +constexpr bool rankwise_equal(with_rank<0>, const T1&, const T2&, F) +{ + return true; +} + +template +MDSPAN_INLINE_FUNCTION +constexpr bool rankwise_equal(with_rank, const T1& x, const T2& y, F func) +{ + bool match = true; + + for (std::size_t r = 0; r < N; r++) { + match = match && common_integral_compare(func(x, r), func(y, r)); + } + + return match; +} + +constexpr struct +{ + template + MDSPAN_INLINE_FUNCTION + constexpr auto operator()(const T& x, I i) const + { + return x.extent(i); + } +} extent; + +constexpr struct +{ + template + MDSPAN_INLINE_FUNCTION + constexpr auto operator()(const T& x, I i) const + { + return x.stride(i); + } +} stride; + +} // namespace detail + +constexpr struct mdspan_non_standard_tag { +} mdspan_non_standard; + +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/packages/kokkos/core/unit_test/hip/TestHIP_Graph.cpp b/packages/kokkos/tpls/mdspan/include/experimental/__p2389_bits/dims.hpp similarity index 59% rename from packages/kokkos/core/unit_test/hip/TestHIP_Graph.cpp rename to packages/kokkos/tpls/mdspan/include/experimental/__p2389_bits/dims.hpp index 405cb76c643c..00045215c489 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIP_Graph.cpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p2389_bits/dims.hpp @@ -14,5 +14,15 @@ // //@HEADER -#include -#include +#pragma once + +// backward compatibility import into experimental +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { + +template< ::std::size_t Rank, class IndexType = std::size_t> +using dims = + :: MDSPAN_IMPL_STANDARD_NAMESPACE :: dextents; + +} // namespace MDSPAN_IMPL_PROPOSED_NAMESPACE +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp index ca6948c9a9f7..e1390fdeb57c 100644 --- a/packages/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -17,10 +17,30 @@ #pragma once #include -#include #include +#include #include // index_sequence +// Suppress spurious warning with NVCC about no return statement. +// This is a known issue in NVCC and NVC++ +// Depending on the CUDA and GCC version we need both the builtin +// and the diagnostic push. I tried really hard to find something shorter +// but no luck ... +#if defined __NVCC__ +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress = implicit_return_from_non_void_function +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic push +#pragma diag_suppress implicit_return_from_non_void_function +#endif +#endif +#elif defined __NVCOMPILER +#pragma diagnostic push +#pragma diag_suppress = implicit_return_from_non_void_function +#endif + namespace MDSPAN_IMPL_STANDARD_NAMESPACE { //****************************************** // Return type of submdspan_mapping overloads @@ -31,18 +51,68 @@ template struct submdspan_mapping_result { }; namespace detail { +// We use const Slice& and not Slice&& because the various +// submdspan_mapping_impl overloads use their slices arguments +// multiple times. This makes perfect forwarding not useful, but we +// still don't want to pass those (possibly of size 64 x 3 bits) +// objects by value. +template +MDSPAN_INLINE_FUNCTION constexpr bool +one_slice_out_of_bounds(const IndexType &ext, const Slice &slice) { + using common_t = + std::common_type_t; + return static_cast(detail::first_of(slice)) == + static_cast(ext); +} + +template +MDSPAN_INLINE_FUNCTION constexpr bool +any_slice_out_of_bounds_helper(std::index_sequence, + const extents &exts, + const Slices &... slices) { + return _MDSPAN_FOLD_OR( + (one_slice_out_of_bounds(exts.extent(RankIndices), slices))); +} + +template +MDSPAN_INLINE_FUNCTION constexpr bool +any_slice_out_of_bounds(const extents &exts, + const Slices &... slices) { + return any_slice_out_of_bounds_helper( + std::make_index_sequence(), exts, slices...); +} + // constructs sub strides template -MDSPAN_INLINE_FUNCTION -constexpr auto -construct_sub_strides(const SrcMapping &src_mapping, - std::index_sequence, - const std::tuple &slices_stride_factor) { +MDSPAN_INLINE_FUNCTION constexpr auto construct_sub_strides( + const SrcMapping &src_mapping, std::index_sequence, + const std::tuple &slices_stride_factor) { using index_type = typename SrcMapping::index_type; return std::array{ (static_cast(src_mapping.stride(InvMapIdxs)) * static_cast(std::get(slices_stride_factor)))...}; } + +template +struct is_range_slice { + constexpr static bool value = + std::is_same_v || + std::is_convertible_v>; +}; + +template +constexpr bool is_range_slice_v = is_range_slice::value; + +template +struct is_index_slice { + constexpr static bool value = std::is_convertible_v; +}; + +template +constexpr bool is_index_slice_v = is_index_slice::value; + } // namespace detail //********************************** @@ -51,52 +121,90 @@ construct_sub_strides(const SrcMapping &src_mapping, namespace detail { // Figure out whether to preserve layout_left -template -struct preserve_layout_left_mapping; +template +struct deduce_layout_left_submapping; -template -struct preserve_layout_left_mapping, SubRank, - SliceSpecifiers...> { - constexpr static bool value = - // Preserve layout for rank 0 - (SubRank == 0) || - ( - // Slice specifiers up to subrank need to be full_extent_t - except - // for the last one which could also be tuple but not a strided index - // range slice specifiers after subrank are integrals - ((Idx > SubRank - 1) || // these are only integral slice specifiers - (std::is_same_v) || - ((Idx == SubRank - 1) && - std::is_convertible_v>)) && - ...); +template +struct deduce_layout_left_submapping< + IndexType, SubRank, std::index_sequence, SliceSpecifiers...> { + + using count_range = index_sequence_scan_impl< + 0u, (is_index_slice_v ? 0u : 1u)...>; + + constexpr static int gap_len = + (((Idx > 0 && count_range::get(Idx) == 1 && + is_index_slice_v) + ? 1 + : 0) + + ... + 0); + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_left_value() { + // Use layout_left for rank 0 + if constexpr (SubRank == 0) { + return true; + // Use layout_left for rank 1 result if leftmost slice specifier is range like + } else if constexpr (SubRank == 1) { + return ((Idx > 0 || is_range_slice_v)&&...); + } else { + // Preserve if leftmost SubRank-1 slices are full_extent_t and + // the slice at idx Subrank - 1 is a range and + // for idx > SubRank the slice is an index + return ((((Idx < SubRank - 1) && std::is_same_v) || + ((Idx == SubRank - 1) && is_range_slice_v) || + ((Idx > SubRank - 1) && is_index_slice_v)) && ...); + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif + } + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_left_padded_value() { + // Technically could also keep layout_left_padded for SubRank==0 + // and SubRank==1 with leftmost slice specifier being a contiguous range + // but we intercept these cases separately + + // In all other cases: + // leftmost slice must be range + // then there can be a gap with index slices + // then SubRank - 2 full_extent slices + // then another range slice + // then more index slices + // e.g. R I I I F F F R I I for obtaining a rank-5 from a rank-10 + return ((((Idx == 0) && is_range_slice_v) || + ((Idx > 0 && Idx <= gap_len) && is_index_slice_v) || + ((Idx > gap_len && Idx < gap_len + SubRank - 1) && std::is_same_v) || + ((Idx == gap_len + SubRank - 1) && is_range_slice_v) || + ((Idx > gap_len + SubRank - 1) && is_index_slice_v)) && ... ); + } }; + +// We are reusing the same thing for layout_left and layout_left_padded +// For layout_left as source StaticStride is static_extent(0) +template +struct compute_s_static_layout_left { + // Neither StaticStride nor any of the provided extents can be zero. + // StaticStride can never be zero, the static_extents we are looking at are associated with + // integral slice specifiers - which wouldn't be valid for zero extent + template + MDSPAN_INLINE_FUNCTION + static constexpr size_t value(std::index_sequence) { + size_t val = ((Idx>0 && Idx<=NumGaps ? (Extents::static_extent(Idx) == dynamic_extent?0:Extents::static_extent(Idx)) : 1) * ... * (StaticStride == dynamic_extent?0:StaticStride)); + return val == 0?dynamic_extent:val; + } +}; + } // namespace detail -// Suppress spurious warning with NVCC about no return statement. -// This is a known issue in NVCC and NVC++ -// Depending on the CUDA and GCC version we need both the builtin -// and the diagnostic push. I tried really hard to find something shorter -// but no luck ... -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic push - #pragma nv_diag_suppress = implicit_return_from_non_void_function - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic push - #pragma diag_suppress implicit_return_from_non_void_function - #endif - #endif -#elif defined __NVCOMPILER - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function -#endif // Actual submdspan mapping call template template -MDSPAN_INLINE_FUNCTION -constexpr auto -layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) const { +MDSPAN_INLINE_FUNCTION constexpr auto +layout_left::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { // compute sub extents using src_ext_t = Extents; @@ -104,51 +212,137 @@ layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) using dst_ext_t = decltype(dst_ext); // figure out sub layout type - constexpr bool preserve_layout = detail::preserve_layout_left_mapping< - decltype(std::make_index_sequence()), dst_ext_t::rank(), - SliceSpecifiers...>::value; - using dst_layout_t = - std::conditional_t; - using dst_mapping_t = typename dst_layout_t::template mapping; - - if constexpr (std::is_same_v) { + using deduce_layout = detail::deduce_layout_left_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + std::make_index_sequence, + SliceSpecifiers...>; + + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); + + if constexpr (deduce_layout::layout_left_value()) { // layout_left case + using dst_mapping_t = typename layout_left::template mapping; + return submdspan_mapping_result{dst_mapping_t(dst_ext), + offset}; + } else if constexpr (deduce_layout::layout_left_padded_value()) { + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::template mapping; return submdspan_mapping_result{ - dst_mapping_t(dst_ext), - static_cast(this->operator()(detail::first_of(slices)...))}; + dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset}; } else { // layout_stride case - auto inv_map = detail::inv_map_rank( - std::integral_constant(), - std::index_sequence<>(), - slices...); - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, detail::construct_sub_strides( - *this, inv_map, - // HIP needs deduction guides to have markups so we need to be explicit - // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue - #if defined(_MDSPAN_HAS_HIP) || (defined(__NVCC__) && (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) - std::tuple{detail::stride_of(slices)...})), - #else - std::tuple{detail::stride_of(slices)...})), - #endif - static_cast(this->operator()(detail::first_of(slices)...))}; + using dst_mapping_t = typename layout_stride::mapping; + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA altogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{ + detail::stride_of(slices)...})), +#else + std::tuple{detail::stride_of(slices)...})), +#endif + offset + }; } #if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) __builtin_unreachable(); #endif } -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic pop - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic pop - #endif - #endif -#elif defined __NVCOMPILER - #pragma diagnostic pop + +template +template +template +MDSPAN_INLINE_FUNCTION constexpr auto +MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + if constexpr (Extents::rank() == 0) { // rank-0 case + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::template mapping; + return submdspan_mapping_result{*this, 0}; + } else { + const bool out_of_bounds = + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::first_of(slices)...)); + if constexpr (dst_ext_t::rank() == 0) { // result rank-0 + // The following for some reasons leads to compiler error later, while not using a typedef works: + // Compilers: CUDA 11.2 with GCC 9.1 + // + // using dst_mapping_t = typename layout_left::template mapping; + // return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + // + // Error: submdspan_mapping.hpp:299:23: error: 'dst_mapping_t' does not name a type + // 299 | using dst_mapping_t = typename layout_left::template mapping; + // The same error is given (about dst_mapping_t not naming type) when a different name is used in 299: + // using dst_mapping_t2 = typename layout_left::template mapping; + + return submdspan_mapping_result> + {typename layout_left::template mapping{dst_ext}, offset}; + } else { // general case + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + // figure out sub layout type + using deduce_layout = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::deduce_layout_left_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + decltype(std::make_index_sequence()), + SliceSpecifiers...>; + + if constexpr (deduce_layout::layout_left_value() && dst_ext_t::rank() == 1) { // getting rank-1 from leftmost + using dst_mapping_t = typename layout_left::template mapping; + return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + } else if constexpr (deduce_layout::layout_left_padded_value()) { // can keep layout_left_padded + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::template mapping; + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset}; + } else { // layout_stride + auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + using dst_mapping_t = typename layout_stride::template mapping; + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...})), +#else + std::tuple{MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...})), #endif + offset + }; + } + } + } + + +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} //********************************** // layout_right submdspan_mapping @@ -156,134 +350,276 @@ layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) namespace detail { // Figure out whether to preserve layout_right -template -struct preserve_layout_right_mapping; +template +struct deduce_layout_right_submapping; -template -struct preserve_layout_right_mapping, SubRank, - SliceSpecifiers...> { - constexpr static size_t SrcRank = sizeof...(SliceSpecifiers); - constexpr static bool value = - // Preserve layout for rank 0 - (SubRank == 0) || - ( - // The last subrank slice specifiers need to be full_extent_t - except - // for the srcrank-subrank one which could also be tuple but not a - // strided index range slice specifiers before srcrank-subrank are - // integrals - ((Idx < - SrcRank - SubRank) || // these are only integral slice specifiers - (std::is_same_v) || - ((Idx == SrcRank - SubRank) && - std::is_convertible_v>)) && - ...); +template +struct deduce_layout_right_submapping< + IndexType, SubRank, std::index_sequence, SliceSpecifiers...> { + + static constexpr size_t Rank = sizeof...(Idx); + using count_range = index_sequence_scan_impl< + 0u, (std::is_convertible_v ? 0u : 1u)...>; + //__static_partial_sums...>; + constexpr static int gap_len = + (((Idx < Rank - 1 && count_range::get(Idx) == SubRank - 1 && + std::is_convertible_v) + ? 1 + : 0) + + ... + 0); + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_right_value() { + // Use layout_right for rank 0 + if constexpr (SubRank == 0) { + return true; + // Use layout_right for rank 1 result if rightmost slice specifier is range like + } else if constexpr (SubRank == 1) { + return ((Idx < Rank - 1 || is_range_slice_v)&&...); + } else { + // Preserve if rightmost SubRank-1 slices are full_extent_t and + // the slice at idx Rank-Subrank is a range and + // for idx < Rank - SubRank the slice is an index + return ((((Idx >= Rank - SubRank) && std::is_same_v) || + ((Idx == Rank - SubRank) && is_range_slice_v) || + ((Idx < Rank - SubRank) && is_index_slice_v)) && ...); + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif + } + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_right_padded_value() { + // Technically could also keep layout_right_padded for SubRank==0 + // and SubRank==1 with rightmost slice specifier being a contiguous range + // but we intercept these cases separately + + // In all other cases: + // rightmost slice must be range + // then there can be a gap with index slices + // then SubRank - 2 full_extent slices + // then another range slice + // then more index slices + // e.g. I I R F F F I I I R for obtaining a rank-5 from a rank-10 + return ((((Idx == Rank - 1) && is_range_slice_v) || + ((Idx >= Rank - gap_len - 1 && Idx < Rank - 1) && is_index_slice_v) || + ((Idx > Rank - gap_len - SubRank && Idx < Rank - gap_len - 1) && std::is_same_v) || + ((Idx == Rank - gap_len - SubRank) && is_range_slice_v) || + ((Idx < Rank - gap_len - SubRank) && is_index_slice_v)) && ... ); + } }; + +// We are reusing the same thing for layout_right and layout_right_padded +// For layout_right as source StaticStride is static_extent(Rank-1) +template +struct compute_s_static_layout_right { + // Neither StaticStride nor any of the provided extents can be zero. + // StaticStride can never be zero, the static_extents we are looking at are associated with + // integral slice specifiers - which wouldn't be valid for zero extent + template + MDSPAN_INLINE_FUNCTION + static constexpr size_t value(std::index_sequence) { + size_t val = ((Idx >= Extents::rank() - 1 - NumGaps && Idx < Extents::rank() - 1 ? (Extents::static_extent(Idx) == dynamic_extent?0:Extents::static_extent(Idx)) : 1) * ... * (StaticStride == dynamic_extent?0:StaticStride)); + return val == 0?dynamic_extent:val; + } +}; + } // namespace detail -// Suppress spurious warning with NVCC about no return statement. -// This is a known issue in NVCC and NVC++ -// Depending on the CUDA and GCC version we need both the builtin -// and the diagnostic push. I tried really hard to find something shorter -// but no luck ... -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic push - #pragma nv_diag_suppress = implicit_return_from_non_void_function - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic push - #pragma diag_suppress implicit_return_from_non_void_function - #endif - #endif -#elif defined __NVCOMPILER - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function -#endif +// Actual submdspan mapping call template template -MDSPAN_INLINE_FUNCTION -constexpr auto +MDSPAN_INLINE_FUNCTION constexpr auto layout_right::mapping::submdspan_mapping_impl( - SliceSpecifiers... slices) const { - // get sub extents + SliceSpecifiers... slices) const { + + // compute sub extents using src_ext_t = Extents; auto dst_ext = submdspan_extents(extents(), slices...); using dst_ext_t = decltype(dst_ext); - // determine new layout type - constexpr bool preserve_layout = detail::preserve_layout_right_mapping< - decltype(std::make_index_sequence()), dst_ext_t::rank(), - SliceSpecifiers...>::value; - using dst_layout_t = - std::conditional_t; - using dst_mapping_t = typename dst_layout_t::template mapping; + // figure out sub layout type + using deduce_layout = detail::deduce_layout_right_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + std::make_index_sequence, + SliceSpecifiers...>; + + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); - if constexpr (std::is_same_v) { + if constexpr (deduce_layout::layout_right_value()) { // layout_right case + using dst_mapping_t = typename layout_right::mapping; + return submdspan_mapping_result{dst_mapping_t(dst_ext), + offset}; + } else if constexpr (deduce_layout::layout_right_padded_value()) { + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::template mapping; return submdspan_mapping_result{ - dst_mapping_t(dst_ext), - static_cast(this->operator()(detail::first_of(slices)...))}; + dst_mapping_t(dst_ext, + stride(src_ext_t::rank() - 2 - deduce_layout::gap_len)), + offset}; } else { // layout_stride case - auto inv_map = detail::inv_map_rank( - std::integral_constant(), - std::index_sequence<>(), - slices...); - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, detail::construct_sub_strides( - *this, inv_map, - // HIP needs deduction guides to have markups so we need to be explicit - // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue - #if defined(_MDSPAN_HAS_HIP) || (defined(__NVCC__) && (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) - std::tuple{detail::stride_of(slices)...})), - #else - std::tuple{detail::stride_of(slices)...})), - #endif - static_cast(this->operator()(detail::first_of(slices)...))}; + using dst_mapping_t = typename layout_stride::mapping; + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA altogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{ + detail::stride_of(slices)...})), +#else + std::tuple{detail::stride_of(slices)...})), +#endif + offset + }; } #if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) __builtin_unreachable(); #endif } -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic pop - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic pop - #endif - #endif -#elif defined __NVCOMPILER - #pragma diagnostic pop + +template +template +template +MDSPAN_INLINE_FUNCTION constexpr auto +MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + if constexpr (Extents::rank() == 0) { // rank-0 case + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::template mapping; + return submdspan_mapping_result{*this, 0}; + } else { + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + // figure out sub layout type + const bool out_of_bounds = + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::first_of(slices)...)); + if constexpr (dst_ext_t::rank() == 0) { // result rank-0 + // Same issue as in layout_left_padded: see comment there + // using dst_mapping_t = typename layout_right::template mapping; + // return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + return submdspan_mapping_result> + {typename layout_right::template mapping{dst_ext}, offset}; + } else { // general case + using deduce_layout = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::deduce_layout_right_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + decltype(std::make_index_sequence()), + SliceSpecifiers...>; + + if constexpr (deduce_layout::layout_right_value() && dst_ext_t::rank() == 1) { // getting rank-1 from rightmost + using dst_mapping_t = typename layout_right::template mapping; + return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + } else if constexpr (deduce_layout::layout_right_padded_value()) { // can keep layout_right_padded + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_right::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::template mapping; + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, stride(Extents::rank() - 2 - deduce_layout::gap_len)), offset}; + } else { // layout_stride + auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + using dst_mapping_t = typename layout_stride::template mapping; + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...})), +#else + std::tuple{MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...})), #endif + offset + }; + } + } + } + + +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} //********************************** // layout_stride submdspan_mapping //********************************* template template -MDSPAN_INLINE_FUNCTION -constexpr auto +MDSPAN_INLINE_FUNCTION constexpr auto layout_stride::mapping::submdspan_mapping_impl( - SliceSpecifiers... slices) const { + SliceSpecifiers... slices) const { auto dst_ext = submdspan_extents(extents(), slices...); using dst_ext_t = decltype(dst_ext); - auto inv_map = detail::inv_map_rank( - std::integral_constant(), - std::index_sequence<>(), - slices...); + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); using dst_mapping_t = typename layout_stride::template mapping; - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, detail::construct_sub_strides( - *this, inv_map, - // HIP needs deduction guides to have markups so we need to be explicit - // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue - #if defined(_MDSPAN_HAS_HIP) || (defined(__NVCC__) && (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) - std::tuple(detail::stride_of(slices)...))), + + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); + + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue +#if defined(_MDSPAN_HAS_HIP) || \ + (defined(__NVCC__) && \ + (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) + std::tuple( + detail::stride_of(slices)...))), #else - std::tuple(detail::stride_of(slices)...))), + std::tuple(detail::stride_of(slices)...))), #endif - static_cast(this->operator()(detail::first_of(slices)...))}; + offset + }; } } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#if defined __NVCC__ +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic pop +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic pop +#endif +#endif +#elif defined __NVCOMPILER +#pragma diagnostic pop +#endif diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded.hpp index a80148679238..e5f7bee4cadb 100644 --- a/packages/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded.hpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded.hpp @@ -59,6 +59,10 @@ MDSPAN_INLINE_FUNCTION constexpr size_t get_actual_static_padding_value() { } else { return dynamic_extent; } + // Missing return statement warning from NVCC and ICC +#if defined(__NVCC__) || defined(__INTEL_COMPILER) + return 0; +#endif } template @@ -69,7 +73,7 @@ struct static_array_type_for_padded_extent using extents_type = _Extents; using type = ::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::maybe_static_array< index_type, size_t, dynamic_extent, - detail::get_actual_static_padding_value()>; }; @@ -101,6 +105,10 @@ struct padded_extent { } else { return init_padding(exts, padding_value); } + // Missing return statement warning from NVCC and ICC +#if defined(__NVCC__) || defined(__INTEL_COMPILER) + return {}; +#endif } MDSPAN_INLINE_FUNCTION static constexpr static_array_type @@ -112,6 +120,10 @@ struct padded_extent { } else { return {}; } + // Missing return statement warning from NVCC and ICC +#if defined(__NVCC__) || defined(__INTEL_COMPILER) + return {}; +#endif } template @@ -123,6 +135,10 @@ struct padded_extent { } else { return {}; } + // Missing return statement warning from NVCC and ICC +#if defined(__NVCC__) || defined(__INTEL_COMPILER) + return {}; +#endif } }; } // namespace detail @@ -158,19 +174,21 @@ class layout_left_padded::mapping { typename padded_stride_type::static_array_type padded_stride = {}; extents_type exts = {}; - constexpr index_type compute_offset(std::index_sequence<>) const { + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence<>) const { return 0; } template - constexpr index_type compute_offset(std::index_sequence, - IndexOffset index_offset) const { + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence, IndexOffset index_offset) const { return index_offset; } template - constexpr index_type compute_offset(std::index_sequence, - IndexOffsets... index_offsets) const { + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence, + IndexOffsets... index_offsets) const { index_type indices[] = {static_cast(index_offsets)...}; // self-recursive fold trick from // https://github.com/llvm/llvm-project/blob/96e1914aa2e6d8966acbfbe2f4d184201f1aa318/libcxx/include/mdspan/layout_left.h#L144 @@ -203,7 +221,7 @@ class layout_left_padded::mapping { #endif MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping(const mapping&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED mapping& operator=(const mapping&) noexcept = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping& operator=(const mapping&) noexcept = default; /** * Initializes the mapping with the given extents. @@ -241,62 +259,71 @@ class layout_left_padded::mapping { /** * Converting constructor from `layout_left::mapping`. * - * This overload participates in overload resolution only if `is_constructible_v` is true. - * If `OtherExtents::rank() > 1` then one of `padding_value`, `static_extent(0)`, or `OtherExtents::static_extent(0)` must be `dynamic_extent`; - * otherwise, `OtherExtents::static_extent(0)` must be equal to the least multiple of `padding_value` greater than or equal to `extents_type::static_extent(0)` + * This overload participates in overload resolution only if + * `is_constructible_v` is true. If + * `OtherExtents::rank() > 1` then one of `padding_value`, `static_extent(0)`, + * or `OtherExtents::static_extent(0)` must be `dynamic_extent`; otherwise, + * `OtherExtents::static_extent(0)` must be equal to the least multiple of + * `padding_value` greater than or equal to `extents_type::static_extent(0)` */ MDSPAN_TEMPLATE_REQUIRES( - class _OtherExtents, - /* requires */ ( - std::is_constructible_v - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((!std::is_convertible_v<_OtherExtents, extents_type>)) + class _OtherExtents, + /* requires */ (std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT( + (!std::is_convertible_v<_OtherExtents, extents_type>)) + MDSPAN_INLINE_FUNCTION constexpr mapping(const layout_left::mapping<_OtherExtents> &other_mapping) - : padded_stride(padded_stride_type::init_padding(other_mapping, std::integral_constant{})), - exts(other_mapping.extents()) - { - static_assert((_OtherExtents::rank() > 1) || (static_padding_stride != dynamic_extent) || (_OtherExtents::static_extent(extent_to_pad_idx) != dynamic_extent) - || (static_padding_stride == _OtherExtents::static_extent(extent_to_pad_idx))); + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) { + static_assert( + (_OtherExtents::rank() > 1) || + (static_padding_stride != dynamic_extent) || + (_OtherExtents::static_extent(extent_to_pad_idx) != dynamic_extent) || + (static_padding_stride == + _OtherExtents::static_extent(extent_to_pad_idx))); } /** * Converting constructor from `layout_stride::mapping`. * - * This overload participates in overload resolution only if `is_constructible_v` is true + * This overload participates in overload resolution only if + * `is_constructible_v` is true */ MDSPAN_TEMPLATE_REQUIRES( - class _OtherExtents, - /* requires */ ( - std::is_constructible_v - ) - ) + class _OtherExtents, + /* requires */ (std::is_constructible_v)) MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 0)) + MDSPAN_INLINE_FUNCTION constexpr mapping(const layout_stride::mapping<_OtherExtents> &other_mapping) - : padded_stride(padded_stride_type::init_padding(other_mapping, std::integral_constant{})), - exts(other_mapping.extents()) - { - } + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) {} /** * Converting constructor from `layout_left_padded::mapping`. * - * This overload participates in overload resolution only if `is_constructible_v` is true. - * Either `padding_value` or `OtherPaddingStride` must be `std::dynamic_extent`, or `padding_value == OtherPaddingStride`. + * This overload participates in overload resolution only if + * `is_constructible_v` is true. Either + * `padding_value` or `OtherPaddingStride` must be `std::dynamic_extent`, or + * `padding_value == OtherPaddingStride`. */ MDSPAN_TEMPLATE_REQUIRES( - class _Mapping, - /* requires */ ( - detail::is_layout_left_padded_mapping<_Mapping>::value - && std::is_constructible_v - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 1 && (padding_value == dynamic_extent || _Mapping::padding_value == dynamic_extent))) - constexpr - mapping(const _Mapping &other_mapping) - : padded_stride(padded_stride_type::init_padding(other_mapping, std::integral_constant{})), - exts(other_mapping.extents()) - { + class _Mapping, + /* requires */ (detail::is_layout_left_padded_mapping<_Mapping>::value + &&std::is_constructible_v< + extents_type, typename _Mapping::extents_type>)) + MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 1 && + (padding_value == dynamic_extent || + _Mapping::padding_value == dynamic_extent))) + MDSPAN_INLINE_FUNCTION + constexpr mapping(const _Mapping &other_mapping) + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) { static_assert(padding_value == dynamic_extent || _Mapping::padding_value == dynamic_extent || padding_value == _Mapping::padding_value); @@ -305,42 +332,43 @@ class layout_left_padded::mapping { /** * Converting constructor from `layout_right_padded::mapping`. * - * This overload participates in overload resolution only if `extents_type::rank()` is 0 or 1 and `is_constructible_v` is `true`. + * This overload participates in overload resolution only if + * `extents_type::rank()` is 0 or 1 and `is_constructible_v` is `true`. */ MDSPAN_TEMPLATE_REQUIRES( - class _Mapping, - /* requires */ ( - detail::is_layout_right_padded_mapping<_Mapping>::value - && extents_type::rank() <= 1 - && std::is_constructible_v - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((!std::is_convertible_v)) - constexpr - mapping(const _Mapping &other_mapping) noexcept - : padded_stride(padded_stride_type::init_padding(other_mapping.extents(), other_mapping.extents().extent(extent_to_pad_idx))), - exts(other_mapping.extents()) - {} + class _Mapping, + /* requires */ (detail::is_layout_right_padded_mapping<_Mapping>::value + &&extents_type::rank() <= 1 && + std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT( + (!std::is_convertible_v)) + MDSPAN_INLINE_FUNCTION + constexpr mapping(const _Mapping &other_mapping) noexcept + : padded_stride(padded_stride_type::init_padding( + other_mapping.extents(), + other_mapping.extents().extent(extent_to_pad_idx))), + exts(other_mapping.extents()) {} - constexpr const extents_type &extents() const noexcept - { + MDSPAN_INLINE_FUNCTION constexpr const extents_type & + extents() const noexcept { return exts; } - constexpr std::array - strides() const noexcept - { - if constexpr ( extents_type::rank() == 0 ) { + MDSPAN_INLINE_FUNCTION constexpr std::array + strides() const noexcept { + if constexpr (extents_type::rank() == 0) { return {}; - } else if constexpr ( extents_type::rank() == 1 ) { + } else if constexpr (extents_type::rank() == 1) { return {1}; } else { index_type value = 1; std::array s{}; s[extent_to_pad_idx] = value; value *= padded_stride.value(0); - for (rank_type r = extent_to_pad_idx + 1; r < extents_type::rank() - 1; ++r) - { + for (rank_type r = extent_to_pad_idx + 1; r < extents_type::rank() - 1; + ++r) { s[r] = value; value *= exts.extent(r); } @@ -349,12 +377,11 @@ class layout_left_padded::mapping { } } - constexpr index_type - required_span_size() const noexcept - { - if constexpr ( extents_type::rank() == 0 ) { + MDSPAN_INLINE_FUNCTION constexpr index_type + required_span_size() const noexcept { + if constexpr (extents_type::rank() == 0) { return 1; - } else if constexpr ( extents_type::rank() == 1 ) { + } else if constexpr (extents_type::rank() == 1) { return exts.extent(0); } else { index_type value = padded_stride.value(0); @@ -375,40 +402,51 @@ class layout_left_padded::mapping { */ MDSPAN_TEMPLATE_REQUIRES( class... _Indices, - /* requires */ ( - sizeof...(_Indices) == extents_type::rank() && - (::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::are_valid_indices()) - ) - ) - constexpr size_t operator()(_Indices... idxs) const noexcept - { + /* requires */ (sizeof...(_Indices) == extents_type::rank() && + (::MDSPAN_IMPL_STANDARD_NAMESPACE::detail:: + are_valid_indices()))) + MDSPAN_INLINE_FUNCTION constexpr size_t + operator()(_Indices... idxs) const noexcept { +#if !defined(NDEBUG) + ::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::check_all_indices(this->extents(), + idxs...); +#endif // ! NDEBUG return compute_offset(std::index_sequence_for<_Indices...>{}, idxs...); } - static constexpr bool is_always_unique() noexcept { return true; } - static constexpr bool is_always_exhaustive() noexcept - { - return (extents_type::rank() <= rank_type(1)) - || (extents_type::static_extent(extent_to_pad_idx) != dynamic_extent - && extents_type::static_extent(extent_to_pad_idx) == padded_stride_type::static_value()); + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_unique() noexcept { + return true; + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_exhaustive() noexcept { + return (extents_type::rank() <= rank_type(1)) || + (extents_type::static_extent(extent_to_pad_idx) != dynamic_extent && + extents_type::static_extent(extent_to_pad_idx) == + padded_stride_type::static_value()); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { + return true; } - static constexpr bool is_always_strided() noexcept { return true; } - static constexpr bool is_unique() noexcept { return true; } - constexpr bool is_exhaustive() const noexcept - { - return (extents_type::rank() < 2) - || (exts.extent(extent_to_pad_idx) == padded_stride.value(0)); + MDSPAN_INLINE_FUNCTION static constexpr bool is_unique() noexcept { + return true; + } + MDSPAN_INLINE_FUNCTION constexpr bool is_exhaustive() const noexcept { + return (extents_type::rank() < 2) || + (exts.extent(extent_to_pad_idx) == padded_stride.value(0)); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_strided() noexcept { + return true; } - static constexpr bool is_strided() noexcept { return true; } - constexpr index_type stride(rank_type r) const noexcept - { + MDSPAN_INLINE_FUNCTION + constexpr index_type stride(rank_type r) const noexcept { assert(r < extents_type::rank()); - if(r == 0) return index_type(1); + if (r == 0) + return index_type(1); index_type value = padded_stride.value(0); - for (rank_type k = 1; k < r; k++) value *= exts.extent(k); + for (rank_type k = 1; k < r; k++) + value *= exts.extent(k); return value; } @@ -416,26 +454,26 @@ class layout_left_padded::mapping { /** * Equality operator between `layout_left_padded`s * - * This overload only participates in overload resolution if `OtherExtents::rank() == extents_type::rank()`. + * This overload only participates in overload resolution if + * `OtherExtents::rank() == extents_type::rank()`. * - * \note There is currently a difference from p2642r2, where this function is specified as taking - * `layout_left_padded< padding_value >::mapping< Extents>`. However, this makes `padding_value` non-deducible. + * \note There is currently a difference from p2642r2, where this function is + * specified as taking `layout_left_padded< padding_value >::mapping< + * Extents>`. However, this makes `padding_value` non-deducible. */ MDSPAN_TEMPLATE_REQUIRES( - class _Mapping, - /* requires */ ( - detail::is_layout_left_padded_mapping<_Mapping>::value - && (_Mapping::extents_type::rank() == extents_type::rank()) - ) - ) - friend constexpr bool operator==(const mapping &left, const _Mapping &right) noexcept - { - // Workaround for some compilers not short-circuiting properly with compile-time checks - // i.e. we can't access stride(_padding_stride_idx) of a rank 0 mapping + class _Mapping, + /* requires */ (detail::is_layout_left_padded_mapping<_Mapping>::value && + (_Mapping::extents_type::rank() == extents_type::rank()))) + MDSPAN_INLINE_FUNCTION friend constexpr bool + operator==(const mapping &left, const _Mapping &right) noexcept { + // Workaround for some compilers not short-circuiting properly with + // compile-time checks i.e. we can't access stride(_padding_stride_idx) of a + // rank 0 mapping bool strides_equal = true; - if constexpr (extents_type::rank() > rank_type(1)) - { - strides_equal = left.stride(padded_stride_idx) == right.stride(padded_stride_idx); + if constexpr (extents_type::rank() > rank_type(1)) { + strides_equal = + left.stride(padded_stride_idx) == right.stride(padded_stride_idx); } return (left.extents() == right.extents()) && strides_equal; } @@ -444,20 +482,31 @@ class layout_left_padded::mapping { /** * Inequality operator between `layout_left_padded`s * - * This overload only participates in overload resolution if `OtherExtents::rank() == extents_type::rank()`. + * This overload only participates in overload resolution if + * `OtherExtents::rank() == extents_type::rank()`. */ MDSPAN_TEMPLATE_REQUIRES( - class _Mapping, - /* requires */ ( - detail::is_layout_left_padded_mapping<_Mapping>::value - && (_Mapping::extents_type::rank() == extents_type::rank()) - ) - ) - friend constexpr bool operator!=(const mapping &left, const _Mapping &right) noexcept - { + class _Mapping, + /* requires */ (detail::is_layout_left_padded_mapping<_Mapping>::value && + (_Mapping::extents_type::rank() == extents_type::rank()))) + MDSPAN_INLINE_FUNCTION friend constexpr bool + operator!=(const mapping &left, const _Mapping &right) noexcept { return !(left == right); } #endif + + // [mdspan.submdspan.mapping], submdspan mapping specialization + template + MDSPAN_INLINE_FUNCTION + constexpr auto submdspan_mapping_impl( + SliceSpecifiers... slices) const; + + template + MDSPAN_INLINE_FUNCTION + friend constexpr auto submdspan_mapping( + const mapping& src, SliceSpecifiers... slices) { + return src.submdspan_mapping_impl(slices...); + } }; template @@ -490,25 +539,27 @@ class layout_right_padded::mapping { typename padded_stride_type::static_array_type padded_stride = {}; extents_type exts = {}; - constexpr index_type compute_offset(std::index_sequence<>) const { + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence<>) const { return 0; } template - constexpr index_type compute_offset(std::index_sequence, - IndexOffset index_offset) const { + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence, IndexOffset index_offset) const { return index_offset; } template - constexpr index_type compute_offset(std::index_sequence, - IndexOffsets... index_offsets) const { + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence, + IndexOffsets... index_offsets) const { // self-recursive fold trick from // https://github.com/llvm/llvm-project/blob/4d9771741d40cc9cfcccb6b033f43689d36b705a/libcxx/include/mdspan/layout_right.h#L141 index_type res = 0; ((res = static_cast(index_offsets) + (Ranks == extent_to_pad_idx ? padded_stride.value(0) - : exts.extent(Ranks)) * + : exts.extent(Ranks)) * res), ...); return res; @@ -533,7 +584,7 @@ class layout_right_padded::mapping { #endif MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping(const mapping&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED mapping& operator=(const mapping&) noexcept = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping& operator=(const mapping&) noexcept = default; /** * Initializes the mapping with the given extents. @@ -577,56 +628,62 @@ class layout_right_padded::mapping { */ MDSPAN_TEMPLATE_REQUIRES( class _OtherExtents, - /* requires */ ( - std::is_constructible_v - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((!std::is_convertible_v<_OtherExtents, extents_type>)) + /* requires */ (std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT( + (!std::is_convertible_v<_OtherExtents, extents_type>)) + MDSPAN_INLINE_FUNCTION constexpr mapping(const layout_right::mapping<_OtherExtents> &other_mapping) - : padded_stride(padded_stride_type::init_padding(other_mapping, std::integral_constant{})), - exts(other_mapping.extents()) - { - static_assert((_OtherExtents::rank() > 1) || (padded_stride_type::static_value() != dynamic_extent) || (_OtherExtents::static_extent(extent_to_pad_idx) != dynamic_extent) - || (padded_stride_type::static_value() == _OtherExtents::static_extent(extent_to_pad_idx))); + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) { + static_assert( + (_OtherExtents::rank() > 1) || + (padded_stride_type::static_value() != dynamic_extent) || + (_OtherExtents::static_extent(extent_to_pad_idx) != dynamic_extent) || + (padded_stride_type::static_value() == + _OtherExtents::static_extent(extent_to_pad_idx))); } /** * Converting constructor from `layout_stride::mapping`. * - * This overload participates in overload resolution only if `is_constructible_v` is true + * This overload participates in overload resolution only if + * `is_constructible_v` is true */ MDSPAN_TEMPLATE_REQUIRES( class _OtherExtents, - /* requires */ ( - std::is_constructible_v - ) - ) + /* requires */ (std::is_constructible_v)) MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 0)) + MDSPAN_INLINE_FUNCTION constexpr mapping(const layout_stride::mapping<_OtherExtents> &other_mapping) - : padded_stride(padded_stride_type::init_padding(other_mapping, std::integral_constant{})), - exts(other_mapping.extents()) - {} + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) {} /** * Converting constructor from `layout_right_padded::mapping`. * - * This overload participates in overload resolution only if `is_constructible_v` is true. - * Either `padding_value` or `OtherPaddingStride` must be `std::dynamic_extent`, or `padding_value == OtherPaddingStride`. + * This overload participates in overload resolution only if + * `is_constructible_v` is true. Either + * `padding_value` or `OtherPaddingStride` must be `std::dynamic_extent`, or + * `padding_value == OtherPaddingStride`. */ MDSPAN_TEMPLATE_REQUIRES( class _Mapping, - /* requires */ ( - detail::is_layout_right_padded_mapping<_Mapping>::value - && std::is_constructible_v - ) - ) + /* requires */ (detail::is_layout_right_padded_mapping<_Mapping>::value + &&std::is_constructible_v< + extents_type, typename _Mapping::extents_type>)) MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 1 && (padding_value == dynamic_extent || _Mapping::padding_value == dynamic_extent))) + MDSPAN_INLINE_FUNCTION constexpr mapping(const _Mapping &other_mapping) - : padded_stride(padded_stride_type::init_padding(other_mapping, std::integral_constant{})), - exts(other_mapping.extents()) - { + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) { static_assert(padding_value == dynamic_extent || _Mapping::padding_value == dynamic_extent || padding_value == _Mapping::padding_value); @@ -635,41 +692,42 @@ class layout_right_padded::mapping { /** * Converting constructor from `layout_left_padded::mapping`. * - * This overload participates in overload resolution only if `extents_type::rank()` is 0 or 1 and `is_constructible_v` is `true`. + * This overload participates in overload resolution only if + * `extents_type::rank()` is 0 or 1 and `is_constructible_v` is `true`. */ MDSPAN_TEMPLATE_REQUIRES( class _Mapping, - /* requires */ ( - detail::is_layout_left_padded_mapping<_Mapping>::value - && extents_type::rank() <= 1 - && std::is_constructible_v - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((!std::is_convertible_v)) + /* requires */ (detail::is_layout_left_padded_mapping<_Mapping>::value + &&extents_type::rank() <= 1 && + std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT( + (!std::is_convertible_v)) + MDSPAN_INLINE_FUNCTION constexpr mapping(const _Mapping &other_mapping) noexcept - : padded_stride(padded_stride_type::init_padding(other_mapping.extents(), other_mapping.extents().extent(extent_to_pad_idx))), - exts(other_mapping.extents()) - {} + : padded_stride(padded_stride_type::init_padding( + other_mapping.extents(), + other_mapping.extents().extent(extent_to_pad_idx))), + exts(other_mapping.extents()) {} - constexpr const extents_type &extents() const noexcept - { + MDSPAN_INLINE_FUNCTION constexpr const extents_type & + extents() const noexcept { return exts; } - constexpr std::array - strides() const noexcept - { - if constexpr ( extents_type::rank() == 0 ) { + MDSPAN_INLINE_FUNCTION constexpr std::array + strides() const noexcept { + if constexpr (extents_type::rank() == 0) { return {}; - } else if constexpr ( extents_type::rank() == 1 ) { + } else if constexpr (extents_type::rank() == 1) { return {1}; } else { index_type value = 1; std::array s{}; s[extent_to_pad_idx] = value; value *= padded_stride.value(0); - for (rank_type r = extent_to_pad_idx - 1; r > 0; --r) - { + for (rank_type r = extent_to_pad_idx - 1; r > 0; --r) { s[r] = value; value *= exts.extent(r); } @@ -678,17 +736,15 @@ class layout_right_padded::mapping { } } - constexpr index_type - required_span_size() const noexcept - { - if constexpr ( extents_type::rank() == 0 ) { + MDSPAN_INLINE_FUNCTION constexpr index_type + required_span_size() const noexcept { + if constexpr (extents_type::rank() == 0) { return 1; - } else if constexpr ( extents_type::rank() == 1 ) { + } else if constexpr (extents_type::rank() == 1) { return exts.extent(0); } else { index_type value = 1; - for (rank_type r = 0; r < extent_to_pad_idx; ++r) - { + for (rank_type r = 0; r < extent_to_pad_idx; ++r) { value *= exts.extent(r); } return value * padded_stride.value(0); @@ -705,40 +761,47 @@ class layout_right_padded::mapping { */ MDSPAN_TEMPLATE_REQUIRES( class... _Indices, - /* requires */ ( - sizeof...(_Indices) == extents_type::rank() && - (::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::are_valid_indices()) - ) - ) - constexpr size_t operator()(_Indices... idxs) const noexcept - { + /* requires */ (sizeof...(_Indices) == extents_type::rank() && + (::MDSPAN_IMPL_STANDARD_NAMESPACE::detail:: + are_valid_indices()))) + MDSPAN_INLINE_FUNCTION constexpr size_t + operator()(_Indices... idxs) const noexcept { return compute_offset(std::index_sequence_for<_Indices...>{}, idxs...); } - static constexpr bool is_always_unique() noexcept { return true; } - static constexpr bool is_always_exhaustive() noexcept - { - return (extents_type::rank() <= rank_type(1)) - || (extents_type::static_extent(extent_to_pad_idx) != dynamic_extent - && extents_type::static_extent(extent_to_pad_idx) == padded_stride_type::static_value()); + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_unique() noexcept { + return true; + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_exhaustive() noexcept { + return (extents_type::rank() <= rank_type(1)) || + (extents_type::static_extent(extent_to_pad_idx) != dynamic_extent && + extents_type::static_extent(extent_to_pad_idx) == + padded_stride_type::static_value()); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { + return true; } - static constexpr bool is_always_strided() noexcept { return true; } - static constexpr bool is_unique() noexcept { return true; } - constexpr bool is_exhaustive() const noexcept - { - return (extents_type::rank() < 2) - || (exts.extent(extent_to_pad_idx) == padded_stride.value(0)); + MDSPAN_INLINE_FUNCTION static constexpr bool is_unique() noexcept { + return true; + } + MDSPAN_INLINE_FUNCTION constexpr bool is_exhaustive() const noexcept { + return (extents_type::rank() < 2) || + (exts.extent(extent_to_pad_idx) == padded_stride.value(0)); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_strided() noexcept { + return true; } - static constexpr bool is_strided() noexcept { return true; } - constexpr index_type stride(rank_type r) const noexcept - { + MDSPAN_INLINE_FUNCTION constexpr index_type + stride(rank_type r) const noexcept { assert(r < extents_type::rank()); - if(r == extents_type::rank() - 1) return index_type(1); + if (r == extents_type::rank() - 1) + return index_type(1); index_type value = padded_stride.value(0); - for (rank_type k = extents_type::rank() - 2; k > r; k--) value *= exts.extent(k); + for (rank_type k = extents_type::rank() - 2; k > r; k--) + value *= exts.extent(k); return value; } @@ -746,26 +809,26 @@ class layout_right_padded::mapping { /** * Equality operator between `layout_right_padded`s * - * This overload only participates in overload resolution if `OtherExtents::rank() == extents_type::rank()`. + * This overload only participates in overload resolution if + * `OtherExtents::rank() == extents_type::rank()`. * - * \note There is currently a difference from p2642r2, where this function is specified as taking - * `layout_right_padded< padding_value >::mapping< Extents>`. However, this makes `padding_value` non-deducible. + * \note There is currently a difference from p2642r2, where this function is + * specified as taking `layout_right_padded< padding_value >::mapping< + * Extents>`. However, this makes `padding_value` non-deducible. */ MDSPAN_TEMPLATE_REQUIRES( class _Mapping, - /* requires */ ( - detail::is_layout_right_padded_mapping<_Mapping>::value - && (_Mapping::extents_type::rank() == extents_type::rank()) - ) - ) - friend constexpr bool operator==(const mapping &left, const _Mapping &right) noexcept - { - // Workaround for some compilers not short-circuiting properly with compile-time checks - // i.e. we can't access stride(_padding_stride_idx) of a rank 0 mapping + /* requires */ (detail::is_layout_right_padded_mapping<_Mapping>::value && + (_Mapping::extents_type::rank() == extents_type::rank()))) + MDSPAN_INLINE_FUNCTION friend constexpr bool + operator==(const mapping &left, const _Mapping &right) noexcept { + // Workaround for some compilers not short-circuiting properly with + // compile-time checks i.e. we can't access stride(_padding_stride_idx) of a + // rank 0 mapping bool strides_equal = true; - if constexpr (extents_type::rank() > rank_type(1)) - { - strides_equal = left.stride(padded_stride_idx) == right.stride(padded_stride_idx); + if constexpr (extents_type::rank() > rank_type(1)) { + strides_equal = + left.stride(padded_stride_idx) == right.stride(padded_stride_idx); } return (left.extents() == right.extents()) && strides_equal; } @@ -774,20 +837,31 @@ class layout_right_padded::mapping { /** * Inequality operator between `layout_right_padded`s * - * This overload only participates in overload resolution if `OtherExtents::rank() == extents_type::rank()`. + * This overload only participates in overload resolution if + * `OtherExtents::rank() == extents_type::rank()`. */ MDSPAN_TEMPLATE_REQUIRES( class _Mapping, - /* requires */ ( - detail::is_layout_right_padded_mapping<_Mapping>::value - && (_Mapping::extents_type::rank() == extents_type::rank()) - ) - ) - friend constexpr bool operator!=(const mapping &left, const _Mapping &right) noexcept - { + /* requires */ (detail::is_layout_right_padded_mapping<_Mapping>::value && + (_Mapping::extents_type::rank() == extents_type::rank()))) + MDSPAN_INLINE_FUNCTION friend constexpr bool + operator!=(const mapping &left, const _Mapping &right) noexcept { return !(left == right); } #endif + + // [mdspan.submdspan.mapping], submdspan mapping specialization + template + MDSPAN_INLINE_FUNCTION + constexpr auto submdspan_mapping_impl( + SliceSpecifiers... slices) const; + + template + MDSPAN_INLINE_FUNCTION + friend constexpr auto submdspan_mapping( + const mapping& src, SliceSpecifiers... slices) { + return src.submdspan_mapping_impl(slices...); + } }; } } diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded_fwd.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded_fwd.hpp index 945f091a2dc9..18daa28cc689 100644 --- a/packages/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded_fwd.hpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded_fwd.hpp @@ -17,6 +17,7 @@ #include #include "../__p0009_bits/dynamic_extent.hpp" +#include "../__p0009_bits/utility.hpp" namespace MDSPAN_IMPL_STANDARD_NAMESPACE { namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { @@ -82,36 +83,49 @@ struct is_layout_right_padded_mapping<_Mapping, std::enable_if_t::template mapping>::value>> : std::true_type {}; + +template +constexpr void check_padded_layout_converting_constructor_mandates(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank<0>) {} + template -constexpr void check_padded_layout_converting_constructor_mandates() +constexpr void check_padded_layout_converting_constructor_mandates(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank<1>) {} + +template +constexpr void check_padded_layout_converting_constructor_mandates(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank) { - if constexpr (_LayoutExtentsType::rank() > 1) { - using extents_type = typename _PaddedLayoutMappingType::extents_type; - constexpr auto padding_value = _PaddedLayoutMappingType::padding_value; - constexpr auto idx = layout_padded_constants::extent_to_pad_idx; - if constexpr ((_LayoutExtentsType::static_extent(idx) != dynamic_extent) && - (extents_type::static_extent(idx) != dynamic_extent) && - (padding_value != dynamic_extent)) { - if constexpr (padding_value == 0) { - static_assert(_LayoutExtentsType::static_extent(idx) == 0); - } else { - static_assert( - _LayoutExtentsType::static_extent(idx) % padding_value == 0); - } - } - } + using extents_type = typename _PaddedLayoutMappingType::extents_type; + constexpr auto padding_value = _PaddedLayoutMappingType::padding_value; + constexpr auto idx = layout_padded_constants::extent_to_pad_idx; + + constexpr auto statically_determinable = + (_LayoutExtentsType::static_extent(idx) != dynamic_extent) && + (extents_type::static_extent(idx) != dynamic_extent) && + (padding_value != dynamic_extent); + + static_assert(!statically_determinable || + (padding_value == 0 + ? _LayoutExtentsType::static_extent(idx) == 0 + : _LayoutExtentsType::static_extent(idx) % padding_value == 0), + ""); } template -constexpr void check_padded_layout_converting_constructor_preconditions([[maybe_unused]] const _OtherMapping &other_mapping) { - if constexpr (_ExtentsType::rank() > 1) { - constexpr auto padded_stride_idx = - layout_padded_constants::padded_stride_idx; - constexpr auto extent_to_pad_idx = layout_padded_constants::extent_to_pad_idx; - assert(other_mapping.stride(padded_stride_idx) == other_mapping.extents().extent(extent_to_pad_idx)); - } +constexpr void check_padded_layout_converting_constructor_preconditions(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank<0>, + const _OtherMapping&) {} +template +constexpr void check_padded_layout_converting_constructor_preconditions(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank<1>, + const _OtherMapping&) {} +template +constexpr void check_padded_layout_converting_constructor_preconditions(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank, + const _OtherMapping &other_mapping) { + constexpr auto padded_stride_idx = + layout_padded_constants::padded_stride_idx; + constexpr auto extent_to_pad_idx = layout_padded_constants::extent_to_pad_idx; + MDSPAN_IMPL_PRECONDITION(other_mapping.stride(padded_stride_idx) == other_mapping.extents().extent(extent_to_pad_idx)); } + + } } } diff --git a/packages/kokkos/tpls/mdspan/include/mdspan/mdspan.hpp b/packages/kokkos/tpls/mdspan/include/mdspan/mdspan.hpp index ac72a1a4e64f..4a0e354ffd02 100644 --- a/packages/kokkos/tpls/mdspan/include/mdspan/mdspan.hpp +++ b/packages/kokkos/tpls/mdspan/include/mdspan/mdspan.hpp @@ -38,5 +38,6 @@ #include "../experimental/__p2642_bits/layout_padded.hpp" #include "../experimental/__p2630_bits/submdspan.hpp" #endif +#include "../experimental/__p2389_bits/dims.hpp" #endif // MDSPAN_HPP_ From 4386c33f1fdc2aed18c7146714927fb575fba5fd Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Thu, 8 Aug 2024 15:38:14 -0600 Subject: [PATCH 15/37] Snapshot of kokkos-kernels.git from commit 630a58d5f34e8f1c3c587d45c735ce52331646bb From repository at git@github.com:kokkos/kokkos-kernels.git At commit: commit 630a58d5f34e8f1c3c587d45c735ce52331646bb Author: Nathan Ellingwood Date: Thu Aug 8 15:29:27 2024 -0600 update master_history.txt for 4.4.00 Signed-off-by: Nathan Ellingwood --- packages/kokkos-kernels/.clang-format | 1 + packages/kokkos-kernels/CHANGELOG.md | 72 + packages/kokkos-kernels/CMakeLists.txt | 19 +- packages/kokkos-kernels/README.md | 1 + .../batched/KokkosBatched_Util.hpp | 276 +- .../impl/KokkosBatched_AddRadial_Impl.hpp | 17 +- .../impl/KokkosBatched_AddRadial_Internal.hpp | 9 +- ...kosBatched_ApplyGivens_Serial_Internal.hpp | 42 +- ...osBatched_ApplyHouseholder_Serial_Impl.hpp | 30 +- ...tched_ApplyHouseholder_Serial_Internal.hpp | 20 +- ...tched_ApplyHouseholder_TeamVector_Impl.hpp | 34 +- ...d_ApplyHouseholder_TeamVector_Internal.hpp | 76 +- .../impl/KokkosBatched_ApplyPivot_Impl.hpp | 88 +- .../KokkosBatched_ApplyPivot_Internal.hpp | 86 +- .../impl/KokkosBatched_ApplyQ_Serial_Impl.hpp | 48 +- .../KokkosBatched_ApplyQ_Serial_Internal.hpp | 42 +- .../KokkosBatched_ApplyQ_TeamVector_Impl.hpp | 60 +- ...kkosBatched_ApplyQ_TeamVector_Internal.hpp | 48 +- .../dense/impl/KokkosBatched_Axpy_Impl.hpp | 280 +- .../dense/impl/KokkosBatched_Copy_Impl.hpp | 234 +- .../impl/KokkosBatched_Copy_Internal.hpp | 80 +- .../dense/impl/KokkosBatched_Dot_Internal.hpp | 370 +- ...Batched_Eigendecomposition_Serial_Impl.hpp | 36 +- ...hed_Eigendecomposition_Serial_Internal.hpp | 47 +- ...hed_Eigendecomposition_TeamVector_Impl.hpp | 41 +- ...Eigendecomposition_TeamVector_Internal.hpp | 22 +- ...kkosBatched_Eigenvalue_Serial_Internal.hpp | 38 +- .../impl/KokkosBatched_FindAmax_Internal.hpp | 11 +- .../KokkosBatched_Francis_Serial_Internal.hpp | 55 +- .../impl/KokkosBatched_Gemm_Serial_Impl.hpp | 268 +- .../KokkosBatched_Gemm_Serial_Internal.hpp | 35 +- .../KokkosBatched_Gemm_TeamVector_Impl.hpp | 64 +- ...KokkosBatched_Gemm_TeamVector_Internal.hpp | 72 +- .../impl/KokkosBatched_Gemm_Team_Impl.hpp | 144 +- .../impl/KokkosBatched_Gemm_Team_Internal.hpp | 83 +- .../KokkosBatched_Gemv_TeamVector_Impl.hpp | 63 +- ...KokkosBatched_Gemv_TeamVector_Internal.hpp | 69 +- .../impl/KokkosBatched_Gemv_Team_Impl.hpp | 71 +- .../impl/KokkosBatched_Gemv_Team_Internal.hpp | 67 +- .../dense/impl/KokkosBatched_Gesv_Impl.hpp | 418 +- .../KokkosBatched_Givens_Serial_Internal.hpp | 11 +- .../KokkosBatched_HadamardProduct_Impl.hpp | 206 +- ...atched_HessenbergFormQ_Serial_Internal.hpp | 17 +- ...HessenbergQR_WithShift_Serial_Internal.hpp | 30 +- ...kkosBatched_Hessenberg_Serial_Internal.hpp | 16 +- ...okkosBatched_HostLevel_Gemm_Armpl_Impl.hpp | 58 +- ...kkosBatched_HostLevel_Gemm_DblBuf_Impl.hpp | 401 +- .../KokkosBatched_HostLevel_Gemm_Impl.hpp | 176 +- ...kkosBatched_HostLevel_Gemm_Serial_Impl.hpp | 43 +- .../KokkosBatched_HostLevel_Gemm_Spec.hpp | 279 +- .../KokkosBatched_Householder_Serial_Impl.hpp | 8 +- ...kosBatched_Householder_Serial_Internal.hpp | 8 +- ...kosBatched_Householder_TeamVector_Impl.hpp | 9 +- ...atched_Householder_TeamVector_Internal.hpp | 16 +- ...okkosBatched_InnerGemmFixA_Serial_Impl.hpp | 517 +- ...okkosBatched_InnerGemmFixB_Serial_Impl.hpp | 504 +- ...okkosBatched_InnerGemmFixC_Serial_Impl.hpp | 538 +- .../KokkosBatched_InnerGemmFixC_Team_Impl.hpp | 51 +- .../KokkosBatched_InnerLU_Serial_Impl.hpp | 76 +- .../KokkosBatched_InnerTrsm_Serial_Impl.hpp | 472 +- .../KokkosBatched_InverseLU_Serial_Impl.hpp | 35 +- .../impl/KokkosBatched_LU_Serial_Impl.hpp | 37 +- .../impl/KokkosBatched_LU_Serial_Internal.hpp | 31 +- .../dense/impl/KokkosBatched_LU_Team_Impl.hpp | 16 +- .../impl/KokkosBatched_LU_Team_Internal.hpp | 80 +- ...ftEigenvectorFromSchur_Serial_Internal.hpp | 34 +- .../impl/KokkosBatched_Normalize_Internal.hpp | 9 +- .../impl/KokkosBatched_Pttrf_Serial_Impl.hpp | 68 + .../KokkosBatched_Pttrf_Serial_Internal.hpp | 202 + ...KokkosBatched_QR_FormQ_Serial_Internal.hpp | 12 +- ...osBatched_QR_FormQ_TeamVector_Internal.hpp | 17 +- .../impl/KokkosBatched_QR_Serial_Impl.hpp | 7 +- .../impl/KokkosBatched_QR_Serial_Internal.hpp | 11 +- .../impl/KokkosBatched_QR_TeamVector_Impl.hpp | 7 +- .../KokkosBatched_QR_TeamVector_Internal.hpp | 11 +- ..._QR_WithColumnPivoting_TeamVector_Impl.hpp | 16 +- ...WithColumnPivoting_TeamVector_Internal.hpp | 36 +- ...htEigenvectorFromSchur_Serial_Internal.hpp | 30 +- .../impl/KokkosBatched_SVD_Serial_Impl.hpp | 61 +- .../KokkosBatched_SVD_Serial_Internal.hpp | 143 +- ...KokkosBatched_Schur2x2_Serial_Internal.hpp | 39 +- .../KokkosBatched_Schur_Serial_Internal.hpp | 80 +- .../impl/KokkosBatched_SetIdentity_Impl.hpp | 9 +- .../KokkosBatched_SetIdentity_Internal.hpp | 20 +- .../KokkosBatched_SetTriangular_Internal.hpp | 21 +- ...kosBatched_ShiftedTrsv_Serial_Internal.hpp | 28 +- ...KokkosBatched_SolveUTV_TeamVector_Impl.hpp | 31 +- ...osBatched_SolveUTV_TeamVector_Internal.hpp | 77 +- .../impl/KokkosBatched_Tbsv_Serial_Impl.hpp | 146 + .../KokkosBatched_Tbsv_Serial_Internal.hpp | 199 + .../impl/KokkosBatched_Trmm_Serial_Impl.hpp | 144 +- .../KokkosBatched_Trmm_Serial_Internal.hpp | 218 +- .../impl/KokkosBatched_Trsm_Serial_Impl.hpp | 321 +- .../KokkosBatched_Trsm_Serial_Internal.hpp | 92 +- .../KokkosBatched_Trsm_TeamVector_Impl.hpp | 60 +- ...KokkosBatched_Trsm_TeamVector_Internal.hpp | 92 +- .../impl/KokkosBatched_Trsm_Team_Impl.hpp | 196 +- .../impl/KokkosBatched_Trsm_Team_Internal.hpp | 177 +- .../impl/KokkosBatched_Trsv_Serial_Impl.hpp | 222 +- .../KokkosBatched_Trsv_Serial_Internal.hpp | 99 +- .../KokkosBatched_Trsv_TeamVector_Impl.hpp | 52 +- ...KokkosBatched_Trsv_TeamVector_Internal.hpp | 71 +- .../impl/KokkosBatched_Trsv_Team_Impl.hpp | 103 +- .../impl/KokkosBatched_Trsv_Team_Internal.hpp | 89 +- .../impl/KokkosBatched_Trtri_Serial_Impl.hpp | 10 +- .../KokkosBatched_Trtri_Serial_Internal.hpp | 46 +- .../KokkosBatched_UTV_TeamVector_Impl.hpp | 17 +- .../KokkosBatched_UTV_TeamVector_Internal.hpp | 33 +- .../KokkosBatched_UpdateGivens_Internal.hpp | 5 +- .../impl/KokkosBatched_Vector_SIMD_Arith.hpp | 445 +- .../KokkosBatched_Vector_SIMD_Logical.hpp | 41 +- .../impl/KokkosBatched_Vector_SIMD_Math.hpp | 45 +- .../impl/KokkosBatched_Vector_SIMD_Misc.hpp | 76 +- .../KokkosBatched_Vector_SIMD_Relation.hpp | 40 +- .../impl/KokkosBatched_Vector_SIMD_View.hpp | 178 +- ...Batched_WilkinsonShift_Serial_Internal.hpp | 10 +- .../dense/impl/KokkosBatched_Xpay_Impl.hpp | 235 +- .../src/KokkosBatched_AddRadial_Decl.hpp | 7 +- .../KokkosBatched_ApplyHouseholder_Decl.hpp | 13 +- .../src/KokkosBatched_ApplyPivot_Decl.hpp | 7 +- .../dense/src/KokkosBatched_ApplyQ_Decl.hpp | 52 +- .../batched/dense/src/KokkosBatched_Axpy.hpp | 12 +- .../dense/src/KokkosBatched_Copy_Decl.hpp | 48 +- .../batched/dense/src/KokkosBatched_Dot.hpp | 12 +- .../KokkosBatched_Eigendecomposition_Decl.hpp | 18 +- .../dense/src/KokkosBatched_Gemm_Decl.hpp | 49 +- .../dense/src/KokkosBatched_Gemv_Decl.hpp | 110 +- .../batched/dense/src/KokkosBatched_Gesv.hpp | 17 +- .../src/KokkosBatched_HadamardProduct.hpp | 27 +- .../src/KokkosBatched_HostLevel_Gemm.hpp | 33 +- .../KokkosBatched_HostLevel_Gemm_Handle.hpp | 39 +- .../src/KokkosBatched_Householder_Decl.hpp | 7 +- .../src/KokkosBatched_InnerGemmFixA_Decl.hpp | 16 +- .../src/KokkosBatched_InnerGemmFixB_Decl.hpp | 16 +- .../src/KokkosBatched_InnerGemmFixC_Decl.hpp | 34 +- .../dense/src/KokkosBatched_InnerLU_Decl.hpp | 6 +- .../src/KokkosBatched_InnerTrsm_Decl.hpp | 36 +- .../src/KokkosBatched_InverseLU_Decl.hpp | 20 +- .../dense/src/KokkosBatched_Kernel_Handle.hpp | 14 +- .../dense/src/KokkosBatched_LU_Decl.hpp | 10 +- .../batched/dense/src/KokkosBatched_Pttrf.hpp | 51 + .../dense/src/KokkosBatched_QR_Decl.hpp | 18 +- ...kkosBatched_QR_WithColumnPivoting_Decl.hpp | 10 +- .../dense/src/KokkosBatched_SVD_Decl.hpp | 17 +- .../dense/src/KokkosBatched_Scale_Decl.hpp | 50 +- .../src/KokkosBatched_SetIdentity_Decl.hpp | 6 +- .../dense/src/KokkosBatched_Set_Decl.hpp | 50 +- .../dense/src/KokkosBatched_SolveLU_Decl.hpp | 48 +- .../dense/src/KokkosBatched_SolveUTV_Decl.hpp | 12 +- .../batched/dense/src/KokkosBatched_Tbsv.hpp | 54 + .../dense/src/KokkosBatched_Trmm_Decl.hpp | 7 +- .../dense/src/KokkosBatched_Trsm_Decl.hpp | 38 +- .../dense/src/KokkosBatched_Trsv_Decl.hpp | 231 +- .../dense/src/KokkosBatched_UTV_Decl.hpp | 10 +- .../dense/src/KokkosBatched_Vector.hpp | 34 +- .../dense/src/KokkosBatched_Vector_SIMD.hpp | 77 +- .../batched/dense/src/KokkosBatched_Xpay.hpp | 12 +- .../unit_test/Test_Batched_BatchedGemm.hpp | 242 +- .../Test_Batched_BatchedGemm_Complex.hpp | 118 +- .../Test_Batched_BatchedGemm_Real.hpp | 182 +- .../dense/unit_test/Test_Batched_Dense.hpp | 6 + .../unit_test/Test_Batched_DenseUtils.hpp | 107 +- .../unit_test/Test_Batched_SerialAxpy.hpp | 31 +- .../Test_Batched_SerialAxpy_Complex.hpp | 3 +- .../Test_Batched_SerialAxpy_Real.hpp | 8 +- .../unit_test/Test_Batched_SerialGemm.hpp | 142 +- .../Test_Batched_SerialGemm_Complex.hpp | 48 +- .../Test_Batched_SerialGemm_Real.hpp | 112 +- .../unit_test/Test_Batched_SerialGesv.hpp | 48 +- .../Test_Batched_SerialInverseLU.hpp | 62 +- .../Test_Batched_SerialInverseLU_Complex.hpp | 6 +- .../dense/unit_test/Test_Batched_SerialLU.hpp | 14 +- .../unit_test/Test_Batched_SerialPttrf.hpp | 422 ++ .../Test_Batched_SerialPttrf_Complex.hpp | 31 + .../Test_Batched_SerialPttrf_Real.hpp | 31 + .../unit_test/Test_Batched_SerialSVD.hpp | 122 +- .../unit_test/Test_Batched_SerialSolveLU.hpp | 48 +- .../Test_Batched_SerialSolveLU_Complex.hpp | 6 +- .../unit_test/Test_Batched_SerialTbsv.hpp | 312 + .../Test_Batched_SerialTbsv_Complex.hpp | 104 + .../Test_Batched_SerialTbsv_Real.hpp | 121 + .../unit_test/Test_Batched_SerialTrmm.hpp | 144 +- .../Test_Batched_SerialTrmm_Complex.hpp | 270 +- .../Test_Batched_SerialTrmm_Real.hpp | 198 +- .../unit_test/Test_Batched_SerialTrsm.hpp | 62 +- .../Test_Batched_SerialTrsm_Complex.hpp | 112 +- .../Test_Batched_SerialTrsm_Real.hpp | 110 +- .../unit_test/Test_Batched_SerialTrsv.hpp | 69 +- .../Test_Batched_SerialTrsv_Complex.hpp | 48 +- .../Test_Batched_SerialTrsv_Real.hpp | 36 +- .../unit_test/Test_Batched_SerialTrtri.hpp | 89 +- .../Test_Batched_SerialTrtri_Complex.hpp | 28 +- .../Test_Batched_SerialTrtri_Real.hpp | 12 +- .../dense/unit_test/Test_Batched_TeamAxpy.hpp | 48 +- .../Test_Batched_TeamAxpy_Complex.hpp | 3 +- .../unit_test/Test_Batched_TeamAxpy_Real.hpp | 4 +- .../dense/unit_test/Test_Batched_TeamGemm.hpp | 148 +- .../Test_Batched_TeamGemm_Complex.hpp | 52 +- .../unit_test/Test_Batched_TeamGemm_Real.hpp | 120 +- .../dense/unit_test/Test_Batched_TeamGesv.hpp | 61 +- .../unit_test/Test_Batched_TeamGesv_Real.hpp | 6 +- .../unit_test/Test_Batched_TeamInverseLU.hpp | 72 +- .../Test_Batched_TeamInverseLU_Complex.hpp | 6 +- .../dense/unit_test/Test_Batched_TeamLU.hpp | 26 +- .../unit_test/Test_Batched_TeamSolveLU.hpp | 60 +- .../Test_Batched_TeamSolveLU_Complex.hpp | 6 +- .../dense/unit_test/Test_Batched_TeamTrsm.hpp | 77 +- .../Test_Batched_TeamTrsm_Complex.hpp | 150 +- .../unit_test/Test_Batched_TeamTrsm_Real.hpp | 140 +- .../dense/unit_test/Test_Batched_TeamTrsv.hpp | 60 +- .../unit_test/Test_Batched_TeamVectorAxpy.hpp | 49 +- .../Test_Batched_TeamVectorAxpy_Complex.hpp | 3 +- .../unit_test/Test_Batched_TeamVectorGemm.hpp | 166 +- .../Test_Batched_TeamVectorGemm_Complex.hpp | 50 +- .../Test_Batched_TeamVectorGemm_Real.hpp | 100 +- .../unit_test/Test_Batched_TeamVectorGesv.hpp | 65 +- .../Test_Batched_TeamVectorGesv_Real.hpp | 12 +- .../unit_test/Test_Batched_TeamVectorQR.hpp | 66 +- ...atched_TeamVectorQR_WithColumnPivoting.hpp | 90 +- .../Test_Batched_TeamVectorSolveUTV.hpp | 105 +- .../Test_Batched_TeamVectorSolveUTV2.hpp | 115 +- .../unit_test/Test_Batched_TeamVectorUTV.hpp | 133 +- .../Test_Batched_VectorArithmatic.hpp | 138 +- .../unit_test/Test_Batched_VectorLogical.hpp | 54 +- .../unit_test/Test_Batched_VectorMath.hpp | 69 +- .../unit_test/Test_Batched_VectorMisc.hpp | 27 +- .../unit_test/Test_Batched_VectorRelation.hpp | 38 +- .../unit_test/Test_Batched_VectorView.hpp | 270 +- .../impl/KokkosBatched_CG_TeamVector_Impl.hpp | 91 +- .../impl/KokkosBatched_CG_Team_Impl.hpp | 92 +- .../impl/KokkosBatched_GMRES_Serial_Impl.hpp | 93 +- .../KokkosBatched_GMRES_TeamVector_Impl.hpp | 355 +- .../impl/KokkosBatched_GMRES_Team_Impl.hpp | 348 +- .../impl/KokkosBatched_Spmv_Serial_Impl.hpp | 283 +- .../KokkosBatched_Spmv_TeamVector_Impl.hpp | 449 +- .../impl/KokkosBatched_Spmv_Team_Impl.hpp | 385 +- .../batched/sparse/src/KokkosBatched_CG.hpp | 18 +- .../sparse/src/KokkosBatched_CrsMatrix.hpp | 42 +- .../sparse/src/KokkosBatched_GMRES.hpp | 21 +- .../sparse/src/KokkosBatched_Identity.hpp | 17 +- .../sparse/src/KokkosBatched_JacobiPrec.hpp | 54 +- .../src/KokkosBatched_Krylov_Handle.hpp | 46 +- .../src/KokkosBatched_Krylov_Solvers.hpp | 121 +- .../batched/sparse/src/KokkosBatched_Spmv.hpp | 140 +- .../unit_test/Test_Batched_SerialGMRES.hpp | 91 +- .../Test_Batched_SerialGMRES_Real.hpp | 8 +- .../unit_test/Test_Batched_SerialSpmv.hpp | 74 +- .../unit_test/Test_Batched_SparseUtils.hpp | 19 +- .../sparse/unit_test/Test_Batched_TeamCG.hpp | 76 +- .../unit_test/Test_Batched_TeamCG_Real.hpp | 8 +- .../unit_test/Test_Batched_TeamGMRES.hpp | 102 +- .../unit_test/Test_Batched_TeamGMRES_Real.hpp | 8 +- .../unit_test/Test_Batched_TeamSpmv.hpp | 116 +- .../unit_test/Test_Batched_TeamVectorCG.hpp | 81 +- .../Test_Batched_TeamVectorCG_Real.hpp | 8 +- .../Test_Batched_TeamVectorGMRES.hpp | 104 +- .../Test_Batched_TeamVectorGMRES_Real.hpp | 8 +- .../unit_test/Test_Batched_TeamVectorSpmv.hpp | 122 +- .../blas/impl/KokkosBlas1_abs_impl.hpp | 9 +- .../blas/impl/KokkosBlas1_abs_spec.hpp | 137 +- .../blas/impl/KokkosBlas1_axpby_impl.hpp | 116 +- .../blas/impl/KokkosBlas1_axpby_mv_impl.hpp | 212 +- .../blas/impl/KokkosBlas1_axpby_spec.hpp | 434 +- ...Blas1_axpby_unification_attempt_traits.hpp | 734 +-- .../blas/impl/KokkosBlas1_dot_impl.hpp | 18 +- .../blas/impl/KokkosBlas1_dot_mv_impl.hpp | 48 +- .../blas/impl/KokkosBlas1_dot_spec.hpp | 542 +- .../blas/impl/KokkosBlas1_iamax_impl.hpp | 15 +- .../blas/impl/KokkosBlas1_iamax_spec.hpp | 307 +- .../blas/impl/KokkosBlas1_mult_impl.hpp | 29 +- .../blas/impl/KokkosBlas1_mult_spec.hpp | 213 +- .../blas/impl/KokkosBlas1_nrm1_impl.hpp | 47 +- .../blas/impl/KokkosBlas1_nrm1_spec.hpp | 172 +- .../blas/impl/KokkosBlas1_nrm2_impl.hpp | 69 +- .../blas/impl/KokkosBlas1_nrm2_spec.hpp | 187 +- .../blas/impl/KokkosBlas1_nrm2w_impl.hpp | 76 +- .../blas/impl/KokkosBlas1_nrm2w_spec.hpp | 179 +- .../blas/impl/KokkosBlas1_nrminf_impl.hpp | 9 +- .../blas/impl/KokkosBlas1_nrminf_spec.hpp | 168 +- .../blas/impl/KokkosBlas1_reciprocal_impl.hpp | 9 +- .../blas/impl/KokkosBlas1_reciprocal_spec.hpp | 150 +- .../blas/impl/KokkosBlas1_rot_impl.hpp | 7 +- .../blas/impl/KokkosBlas1_rot_spec.hpp | 76 +- .../blas/impl/KokkosBlas1_rotg_impl.hpp | 46 +- .../blas/impl/KokkosBlas1_rotg_spec.hpp | 78 +- .../blas/impl/KokkosBlas1_rotm_impl.hpp | 29 +- .../blas/impl/KokkosBlas1_rotm_spec.hpp | 77 +- .../blas/impl/KokkosBlas1_rotmg_impl.hpp | 15 +- .../blas/impl/KokkosBlas1_rotmg_spec.hpp | 91 +- .../blas/impl/KokkosBlas1_scal_impl.hpp | 33 +- .../blas/impl/KokkosBlas1_scal_mv_impl.hpp | 184 +- .../blas/impl/KokkosBlas1_scal_spec.hpp | 278 +- .../impl/KokkosBlas1_serial_scal_impl.hpp | 9 +- .../blas/impl/KokkosBlas1_set_impl.hpp | 71 +- .../blas/impl/KokkosBlas1_sum_impl.hpp | 49 +- .../blas/impl/KokkosBlas1_sum_spec.hpp | 148 +- .../blas/impl/KokkosBlas1_swap_impl.hpp | 3 +- .../blas/impl/KokkosBlas1_swap_spec.hpp | 70 +- .../blas/impl/KokkosBlas1_team_abs_spec.hpp | 12 +- .../blas/impl/KokkosBlas1_team_axpby_spec.hpp | 20 +- .../blas/impl/KokkosBlas1_team_dot_spec.hpp | 17 +- .../blas/impl/KokkosBlas1_team_mult_spec.hpp | 24 +- .../blas/impl/KokkosBlas1_team_nrm2_spec.hpp | 23 +- .../blas/impl/KokkosBlas1_team_scal_impl.hpp | 63 +- .../blas/impl/KokkosBlas1_team_scal_spec.hpp | 16 +- .../impl/KokkosBlas1_team_update_spec.hpp | 27 +- .../blas/impl/KokkosBlas1_update_impl.hpp | 144 +- .../blas/impl/KokkosBlas1_update_spec.hpp | 252 +- .../blas/impl/KokkosBlas2_gemv_impl.hpp | 438 +- .../blas/impl/KokkosBlas2_gemv_spec.hpp | 108 +- .../blas/impl/KokkosBlas2_ger_impl.hpp | 118 +- .../blas/impl/KokkosBlas2_ger_spec.hpp | 87 +- .../impl/KokkosBlas2_serial_gemv_impl.hpp | 100 +- ...osBlas2_serial_gemv_inner_multiple_dot.hpp | 134 +- .../impl/KokkosBlas2_serial_gemv_internal.hpp | 43 +- .../blas/impl/KokkosBlas2_syr2_impl.hpp | 231 +- .../blas/impl/KokkosBlas2_syr2_spec.hpp | 110 +- .../blas/impl/KokkosBlas2_syr_impl.hpp | 150 +- .../blas/impl/KokkosBlas2_syr_spec.hpp | 90 +- .../blas/impl/KokkosBlas2_team_gemv_impl.hpp | 131 +- .../blas/impl/KokkosBlas2_team_gemv_spec.hpp | 177 +- .../impl/KokkosBlas3_gemm_dotbased_impl.hpp | 56 +- .../blas/impl/KokkosBlas3_gemm_impl.hpp | 611 +- .../blas/impl/KokkosBlas3_gemm_spec.hpp | 299 +- .../blas/impl/KokkosBlas3_trmm_impl.hpp | 69 +- .../blas/impl/KokkosBlas3_trmm_spec.hpp | 94 +- .../blas/impl/KokkosBlas3_trsm_impl.hpp | 287 +- .../blas/impl/KokkosBlas3_trsm_spec.hpp | 111 +- .../blas/impl/KokkosBlas_serial_axpy.hpp | 20 +- .../blas/impl/KokkosBlas_serial_nrm2.hpp | 19 +- .../blas/impl/KokkosBlas_util.hpp | 11 +- .../blas/src/KokkosBlas1_abs.hpp | 40 +- .../blas/src/KokkosBlas1_axpby.hpp | 97 +- .../blas/src/KokkosBlas1_dot.hpp | 156 +- .../blas/src/KokkosBlas1_fill.hpp | 3 +- .../blas/src/KokkosBlas1_iamax.hpp | 78 +- .../blas/src/KokkosBlas1_mult.hpp | 69 +- .../blas/src/KokkosBlas1_nrm1.hpp | 92 +- .../blas/src/KokkosBlas1_nrm2.hpp | 119 +- .../blas/src/KokkosBlas1_nrm2_squared.hpp | 88 +- .../blas/src/KokkosBlas1_nrm2w.hpp | 75 +- .../blas/src/KokkosBlas1_nrm2w_squared.hpp | 90 +- .../blas/src/KokkosBlas1_nrminf.hpp | 88 +- .../blas/src/KokkosBlas1_reciprocal.hpp | 40 +- .../blas/src/KokkosBlas1_rot.hpp | 63 +- .../blas/src/KokkosBlas1_rotg.hpp | 40 +- .../blas/src/KokkosBlas1_rotm.hpp | 63 +- .../blas/src/KokkosBlas1_rotmg.hpp | 47 +- .../blas/src/KokkosBlas1_scal.hpp | 69 +- .../blas/src/KokkosBlas1_set.hpp | 21 +- .../blas/src/KokkosBlas1_sum.hpp | 64 +- .../blas/src/KokkosBlas1_swap.hpp | 42 +- .../blas/src/KokkosBlas1_team_abs.hpp | 3 +- .../blas/src/KokkosBlas1_team_axpby.hpp | 17 +- .../blas/src/KokkosBlas1_team_dot.hpp | 6 +- .../blas/src/KokkosBlas1_team_mult.hpp | 10 +- .../blas/src/KokkosBlas1_team_nrm2.hpp | 6 +- .../blas/src/KokkosBlas1_team_scal.hpp | 5 +- .../blas/src/KokkosBlas1_team_update.hpp | 12 +- .../blas/src/KokkosBlas1_update.hpp | 66 +- .../blas/src/KokkosBlas2_gemv.hpp | 149 +- .../blas/src/KokkosBlas2_ger.hpp | 83 +- .../blas/src/KokkosBlas2_serial_gemv.hpp | 17 +- .../blas/src/KokkosBlas2_syr.hpp | 69 +- .../blas/src/KokkosBlas2_syr2.hpp | 93 +- .../blas/src/KokkosBlas2_team_gemv.hpp | 61 +- .../blas/src/KokkosBlas3_gemm.hpp | 129 +- .../blas/src/KokkosBlas3_trmm.hpp | 61 +- .../blas/src/KokkosBlas3_trsm.hpp | 61 +- .../blas/src/KokkosBlas_trtri.hpp | 3 +- .../tpls/KokkosBlas1_axpby_tpl_spec_avail.hpp | 69 +- .../tpls/KokkosBlas1_axpby_tpl_spec_decl.hpp | 583 +- .../tpls/KokkosBlas1_dot_tpl_spec_avail.hpp | 74 +- .../tpls/KokkosBlas1_dot_tpl_spec_decl.hpp | 398 +- .../tpls/KokkosBlas1_iamax_tpl_spec_avail.hpp | 189 +- .../tpls/KokkosBlas1_iamax_tpl_spec_decl.hpp | 708 +-- .../tpls/KokkosBlas1_mult_tpl_spec_avail.hpp | 3 +- .../tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp | 141 +- .../tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp | 484 +- .../tpls/KokkosBlas1_nrm2_tpl_spec_avail.hpp | 74 +- .../tpls/KokkosBlas1_nrm2_tpl_spec_decl.hpp | 571 +- .../KokkosBlas1_nrminf_tpl_spec_avail.hpp | 33 +- .../tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp | 328 +- .../tpls/KokkosBlas1_rot_tpl_spec_avail.hpp | 62 +- .../tpls/KokkosBlas1_rot_tpl_spec_decl.hpp | 512 +- .../tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp | 195 +- .../tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp | 946 ++- .../tpls/KokkosBlas1_rotm_tpl_spec_avail.hpp | 105 +- .../tpls/KokkosBlas1_rotm_tpl_spec_decl.hpp | 396 +- .../tpls/KokkosBlas1_rotmg_tpl_spec_avail.hpp | 108 +- .../tpls/KokkosBlas1_rotmg_tpl_spec_decl.hpp | 443 +- .../tpls/KokkosBlas1_scal_tpl_spec_avail.hpp | 122 +- .../tpls/KokkosBlas1_scal_tpl_spec_decl.hpp | 458 +- .../tpls/KokkosBlas1_swap_tpl_spec_avail.hpp | 149 +- .../tpls/KokkosBlas1_swap_tpl_spec_decl.hpp | 733 +-- .../KokkosBlas1_update_tpl_spec_avail.hpp | 3 +- .../tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp | 180 +- .../tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp | 1171 ++-- .../tpls/KokkosBlas2_ger_tpl_spec_avail.hpp | 218 +- .../KokkosBlas2_ger_tpl_spec_decl_blas.hpp | 485 +- .../KokkosBlas2_ger_tpl_spec_decl_cublas.hpp | 513 +- .../KokkosBlas2_ger_tpl_spec_decl_rocblas.hpp | 482 +- .../KokkosBlas2_serial_gemv_tpl_spec_decl.hpp | 76 +- .../tpls/KokkosBlas2_syr2_tpl_spec_avail.hpp | 222 +- .../KokkosBlas2_syr2_tpl_spec_decl_blas.hpp | 466 +- .../KokkosBlas2_syr2_tpl_spec_decl_cublas.hpp | 562 +- ...KokkosBlas2_syr2_tpl_spec_decl_rocblas.hpp | 504 +- .../tpls/KokkosBlas2_syr_tpl_spec_avail.hpp | 203 +- .../KokkosBlas2_syr_tpl_spec_decl_blas.hpp | 404 +- .../KokkosBlas2_syr_tpl_spec_decl_cublas.hpp | 505 +- .../KokkosBlas2_syr_tpl_spec_decl_rocblas.hpp | 452 +- .../tpls/KokkosBlas3_gemm_tpl_spec_avail.hpp | 175 +- .../tpls/KokkosBlas3_gemm_tpl_spec_decl.hpp | 689 +-- .../tpls/KokkosBlas3_trmm_tpl_spec_avail.hpp | 113 +- .../tpls/KokkosBlas3_trmm_tpl_spec_decl.hpp | 508 +- .../tpls/KokkosBlas3_trsm_tpl_spec_avail.hpp | 113 +- .../tpls/KokkosBlas3_trsm_tpl_spec_decl.hpp | 1348 ++-- .../blas/tpls/KokkosBlas_Cuda_tpl.hpp | 3 +- .../blas/tpls/KokkosBlas_Host_tpl.cpp | 837 +-- .../blas/tpls/KokkosBlas_Host_tpl.hpp | 61 +- .../blas/tpls/KokkosBlas_Rocm_tpl.hpp | 3 +- .../blas/tpls/KokkosBlas_tpl_spec.hpp | 53 +- .../blas/unit_test/Test_Blas1_abs.hpp | 56 +- .../blas/unit_test/Test_Blas1_asum.hpp | 29 +- .../blas/unit_test/Test_Blas1_axpby.hpp | 71 +- .../Test_Blas1_axpby_unification.hpp | 999 ++- .../blas/unit_test/Test_Blas1_axpy.hpp | 63 +- .../blas/unit_test/Test_Blas1_dot.hpp | 67 +- .../blas/unit_test/Test_Blas1_iamax.hpp | 64 +- .../blas/unit_test/Test_Blas1_mult.hpp | 141 +- .../blas/unit_test/Test_Blas1_nrm1.hpp | 57 +- .../blas/unit_test/Test_Blas1_nrm2.hpp | 47 +- .../unit_test/Test_Blas1_nrm2_squared.hpp | 56 +- .../blas/unit_test/Test_Blas1_nrm2w.hpp | 53 +- .../unit_test/Test_Blas1_nrm2w_squared.hpp | 50 +- .../blas/unit_test/Test_Blas1_nrminf.hpp | 47 +- .../blas/unit_test/Test_Blas1_reciprocal.hpp | 83 +- .../blas/unit_test/Test_Blas1_rot.hpp | 12 +- .../blas/unit_test/Test_Blas1_rotg.hpp | 15 +- .../blas/unit_test/Test_Blas1_rotm.hpp | 15 +- .../blas/unit_test/Test_Blas1_rotmg.hpp | 19 +- .../blas/unit_test/Test_Blas1_scal.hpp | 62 +- .../unit_test/Test_Blas1_serial_setscal.hpp | 89 +- .../blas/unit_test/Test_Blas1_sum.hpp | 41 +- .../blas/unit_test/Test_Blas1_swap.hpp | 12 +- .../blas/unit_test/Test_Blas1_team_abs.hpp | 139 +- .../blas/unit_test/Test_Blas1_team_axpby.hpp | 150 +- .../blas/unit_test/Test_Blas1_team_axpy.hpp | 147 +- .../blas/unit_test/Test_Blas1_team_dot.hpp | 179 +- .../blas/unit_test/Test_Blas1_team_mult.hpp | 244 +- .../blas/unit_test/Test_Blas1_team_nrm2.hpp | 49 +- .../blas/unit_test/Test_Blas1_team_scal.hpp | 188 +- .../unit_test/Test_Blas1_team_setscal.hpp | 103 +- .../blas/unit_test/Test_Blas1_team_update.hpp | 258 +- .../blas/unit_test/Test_Blas1_update.hpp | 152 +- .../blas/unit_test/Test_Blas2_gemv.hpp | 189 +- .../blas/unit_test/Test_Blas2_gemv_util.hpp | 129 +- .../blas/unit_test/Test_Blas2_ger.hpp | 922 +-- .../blas/unit_test/Test_Blas2_serial_gemv.hpp | 42 +- .../blas/unit_test/Test_Blas2_syr.hpp | 971 +-- .../blas/unit_test/Test_Blas2_syr2.hpp | 980 +-- .../blas/unit_test/Test_Blas2_team_gemv.hpp | 21 +- .../unit_test/Test_Blas2_teamvector_gemv.hpp | 28 +- .../blas/unit_test/Test_Blas3_gemm.hpp | 198 +- .../blas/unit_test/Test_Blas3_trmm.hpp | 299 +- .../blas/unit_test/Test_Blas3_trsm.hpp | 290 +- .../blas/unit_test/Test_Blas_Newton.hpp | 15 +- .../blas/unit_test/Test_Blas_rocblas.hpp | 3 +- .../blas/unit_test/Test_Blas_serial_axpy.hpp | 67 +- .../blas/unit_test/Test_Blas_serial_nrm2.hpp | 89 +- .../cmake/Modules/FindTPLROCBLAS.cmake | 58 +- .../cmake/Modules/FindTPLROCSOLVER.cmake | 55 +- .../cmake/Modules/FindTPLROCSPARSE.cmake | 54 +- .../cmake/kokkoskernels_eti_offsets.cmake | 4 +- .../cmake/kokkoskernels_tpls.cmake | 3 + .../cmake/kokkoskernels_tribits.cmake | 11 + .../common/impl/KokkosKernels_Iota.hpp | 10 +- .../common/impl/KokkosKernels_NaN.hpp | 43 + .../common/impl/KokkosKernels_SafeCompare.hpp | 3 +- .../common/impl/KokkosKernels_ViewUtils.hpp | 8 +- .../common/src/KokkosKernels_BitUtils.hpp | 3 +- .../KokkosKernels_BlockHashmapAccumulator.hpp | 117 +- .../common/src/KokkosKernels_BlockUtils.hpp | 46 +- .../common/src/KokkosKernels_Error.hpp | 37 +- .../src/KokkosKernels_ExecSpaceUtils.hpp | 121 +- .../common/src/KokkosKernels_Half.hpp | 65 - .../src/KokkosKernels_HashmapAccumulator.hpp | 173 +- .../common/src/KokkosKernels_IOUtils.hpp | 28 +- .../common/src/KokkosKernels_LowerBound.hpp | 95 +- .../common/src/KokkosKernels_Macros.hpp | 11 +- .../common/src/KokkosKernels_Predicates.hpp | 39 +- .../src/KokkosKernels_PrintConfiguration.hpp | 11 +- .../common/src/KokkosKernels_PrintUtils.hpp | 34 +- .../common/src/KokkosKernels_SimpleUtils.hpp | 187 +- .../common/src/KokkosKernels_Sorting.hpp | 398 +- .../common/src/KokkosKernels_TplsVersion.hpp | 6 +- ...Kernels_Uniform_Initialized_MemoryPool.hpp | 26 +- .../common/src/KokkosKernels_UpperBound.hpp | 13 +- .../common/src/KokkosKernels_Utils.hpp | 868 +-- .../common/src/KokkosKernels_VectorUtils.hpp | 35 +- .../common/src/KokkosKernels_helpers.hpp | 40 +- .../common/src/Kokkos_ArithTraits.hpp | 1004 +-- .../src/Kokkos_InnerProductSpaceTraits.hpp | 61 +- .../common/unit_test/Test_Common.hpp | 1 + .../unit_test/Test_Common_AlignPtrTo.hpp | 159 + .../unit_test/Test_Common_ArithTraits.hpp | 492 +- .../common/unit_test/Test_Common_Error.hpp | 3 +- .../common/unit_test/Test_Common_Iota.hpp | 18 +- .../unit_test/Test_Common_LowerBound.hpp | 79 +- .../Test_Common_PrintConfiguration.hpp | 4 +- .../common/unit_test/Test_Common_Sorting.hpp | 138 +- .../unit_test/Test_Common_UpperBound.hpp | 79 +- .../common/unit_test/Test_Common_Version.hpp | 3 +- .../common/unit_test/Test_Common_float128.hpp | 13 +- .../unit_test/Test_Common_set_bit_count.hpp | 58 +- .../example/batched_solve/examples_helper.hpp | 45 +- .../example/batched_solve/static_pivoting.cpp | 31 +- .../example/batched_solve/team_GMRES.cpp | 140 +- .../example/gmres/ex_real_A.cpp | 62 +- .../example/gmres/test_prec.cpp | 73 +- ...kosKernels_Example_Distance2GraphColor.cpp | 178 +- .../example/graph/PartitioningExample | Bin 21536 -> 0 bytes .../example/graph/PartitioningExample.cpp | 2 +- packages/kokkos-kernels/example/half/xpy.cpp | 21 +- ...kkosKernels_Example_HashmapAccumulator.cpp | 79 +- .../example/wiki/blas/abs/abs.cpp | 3 +- .../graph/KokkosGraph_wiki_9pt_stencil.hpp | 10 +- .../graph/KokkosGraph_wiki_coarsening.cpp | 5 +- .../wiki/graph/KokkosGraph_wiki_coloring.cpp | 17 +- .../wiki/graph/KokkosGraph_wiki_mis2.cpp | 19 +- .../wiki/graph/KokkosGraph_wiki_rcm.cpp | 21 +- .../example/wiki/sparse/CMakeLists.txt | 5 + .../sparse/KokkosSparse_wiki_bsrmatrix.cpp | 34 +- .../sparse/KokkosSparse_wiki_bsrmatrix_2.cpp | 228 + .../sparse/KokkosSparse_wiki_crsmatrix.cpp | 22 +- .../sparse/KokkosSparse_wiki_gauss_seidel.cpp | 34 +- .../wiki/sparse/KokkosSparse_wiki_spadd.cpp | 22 +- .../wiki/sparse/KokkosSparse_wiki_spgemm.cpp | 22 +- .../wiki/sparse/KokkosSparse_wiki_spmv.cpp | 24 +- .../graph/impl/KokkosGraph_BFS_impl.hpp | 80 +- .../impl/KokkosGraph_Distance1Color_impl.hpp | 1136 ++-- .../impl/KokkosGraph_Distance2Color_impl.hpp | 412 +- .../impl/KokkosGraph_Distance2MIS_impl.hpp | 365 +- .../KokkosGraph_ExplicitCoarsening_impl.hpp | 166 +- .../graph/impl/KokkosGraph_color_d1_spec.hpp | 89 +- .../src/KokkosGraph_CoarsenConstruct.hpp | 1050 ++-- .../src/KokkosGraph_CoarsenHeuristics.hpp | 229 +- .../graph/src/KokkosGraph_Distance1Color.hpp | 45 +- .../src/KokkosGraph_Distance1ColorHandle.hpp | 292 +- .../graph/src/KokkosGraph_Distance2Color.hpp | 80 +- .../src/KokkosGraph_Distance2ColorHandle.hpp | 167 +- .../src/KokkosGraph_ExplicitCoarsening.hpp | 68 +- .../graph/src/KokkosGraph_MIS2.hpp | 49 +- .../graph/src/KokkosGraph_Triangle.hpp | 231 +- .../graph/unit_test/Test_Graph_coarsen.hpp | 221 +- .../unit_test/Test_Graph_graph_color.hpp | 118 +- .../Test_Graph_graph_color_deterministic.hpp | 75 +- .../Test_Graph_graph_color_distance2.hpp | 291 +- .../graph/unit_test/Test_Graph_mis2.hpp | 189 +- .../graph/unit_test/Test_Graph_rcm.hpp | 205 +- .../lapack/impl/KokkosLapack_gesv_spec.hpp | 88 +- .../lapack/impl/KokkosLapack_svd_spec.hpp | 114 +- .../lapack/impl/KokkosLapack_trtri_impl.hpp | 23 +- .../lapack/impl/KokkosLapack_trtri_spec.hpp | 57 +- .../lapack/src/KokkosLapack_gesv.hpp | 71 +- .../lapack/src/KokkosLapack_svd.hpp | 114 +- .../lapack/src/KokkosLapack_trtri.hpp | 25 +- .../lapack/tpls/KokkosLapack_Cuda_tpl.hpp | 3 +- .../lapack/tpls/KokkosLapack_Host_tpl.cpp | 128 +- .../lapack/tpls/KokkosLapack_Host_tpl.hpp | 14 +- .../lapack/tpls/KokkosLapack_cusolver.hpp | 34 +- .../tpls/KokkosLapack_gesv_tpl_spec_avail.hpp | 147 +- .../tpls/KokkosLapack_gesv_tpl_spec_decl.hpp | 546 +- .../tpls/KokkosLapack_svd_tpl_spec_avail.hpp | 174 +- .../tpls/KokkosLapack_svd_tpl_spec_decl.hpp | 617 +- .../KokkosLapack_trtri_tpl_spec_avail.hpp | 91 +- .../tpls/KokkosLapack_trtri_tpl_spec_decl.hpp | 253 +- .../lapack/unit_test/Test_Lapack_gesv.hpp | 177 +- .../lapack/unit_test/Test_Lapack_svd.hpp | 105 +- .../lapack/unit_test/Test_Lapack_trtri.hpp | 107 +- packages/kokkos-kernels/master_history.txt | 1 + .../ode/impl/KokkosODE_BDF_impl.hpp | 183 +- .../ode/impl/KokkosODE_Newton_impl.hpp | 34 +- .../impl/KokkosODE_RungeKuttaTables_impl.hpp | 97 +- .../ode/impl/KokkosODE_RungeKutta_impl.hpp | 42 +- .../kokkos-kernels/ode/src/KokkosODE_BDF.hpp | 43 +- .../ode/src/KokkosODE_Newton.hpp | 15 +- .../ode/src/KokkosODE_RungeKutta.hpp | 14 +- .../ode/src/KokkosODE_Types.hpp | 14 +- .../ode/unit_test/Test_ODE_BDF.hpp | 272 +- .../ode/unit_test/Test_ODE_Newton.hpp | 152 +- .../ode/unit_test/Test_ODE_RK.hpp | 197 +- .../ode/unit_test/Test_ODE_RK_chem.hpp | 50 +- .../perf_test/Benchmark_Context.hpp | 42 +- .../KokkosKernels_perf_test_instantiation.hpp | 7 +- .../KokkosKernels_perf_test_utilities.hpp | 69 +- .../perf_test/PerfTestUtilities.cpp | 4 +- .../perf_test/PerfTestUtilities.hpp | 37 +- ...okkosBatched_Test_BlockJacobi_Tutorial.cpp | 92 +- .../KokkosBatched_Test_BlockTridiagDirect.cpp | 530 +- .../KokkosBatched_Test_BlockTridiagJacobi.cpp | 471 +- .../KokkosBatched_Test_Gemm_Cuda.cpp | 330 +- .../KokkosBatched_Test_Gemm_Host.hpp | 185 +- .../KokkosBatched_Test_Gemv_Host.hpp | 110 +- .../do-not-use/KokkosBatched_Test_LU_Cuda.cpp | 233 +- .../do-not-use/KokkosBatched_Test_LU_Host.hpp | 101 +- .../KokkosBatched_Test_Trsm_Cuda.cpp | 476 +- .../KokkosBatched_Test_Trsm_Host.hpp | 204 +- .../CG/Functor_TestBatchedTeamVectorCG_1.hpp | 35 +- .../CG/Functor_TestBatchedTeamVectorCG_2.hpp | 58 +- .../CG/Functor_TestBatchedTeamVectorCG_3.hpp | 57 +- .../sparse/CG/KokkosBatched_Test_CG.cpp | 172 +- .../Functor_TestBatchedTeamVectorGMRES_1.hpp | 56 +- .../Functor_TestBatchedTeamVectorGMRES_2.hpp | 99 +- .../Functor_TestBatchedTeamVectorGMRES_3.hpp | 100 +- .../sparse/GMRES/KokkosBatched_Test_GMRES.cpp | 290 +- .../KokkosBatched_Test_Sparse_Helper.hpp | 30 +- .../sparse/SPMV/KokkosBatched_SPMV_View.hpp | 212 +- .../sparse/SPMV/KokkosBatched_Test_SPMV.cpp | 247 +- .../KokkosBatched_Test_cusolverDn.cpp | 73 +- .../KokkosBatched_Test_cusolverSp.cpp | 168 +- .../perf_test/blas/KokkosBlas_blas1.cpp | 34 +- .../perf_test/blas/KokkosBlas_blas1_MV.cpp | 69 +- .../blas1/KokkosBlas_dot_mv_perf_test.cpp | 28 +- .../KokkosBlas_dot_mv_perf_test_benchmark.cpp | 18 +- .../blas/blas1/KokkosBlas_dot_perf_test.cpp | 22 +- .../blas/blas1/KokkosBlas_dot_perf_test.hpp | 6 +- .../KokkosBlas_dot_perf_test_benchmark.cpp | 15 +- .../KokkosBlas_dot_tracked_perf_test.cpp | 7 +- .../blas/blas1/KokkosBlas_perf_test.cpp | 54 +- .../blas1/KokkosBlas_team_dot_perf_test.cpp | 29 +- .../blas1/KokkosBlas_team_dot_perf_test.hpp | 28 +- ...okkosBlas_team_dot_perf_test_benchmark.cpp | 25 +- .../KokkosBlas_team_dot_tracked_perf_test.cpp | 15 +- .../perf_test/blas/blas1/tracked_testing.hpp | 9 +- .../perf_test/blas/blas2/CMakeLists.txt | 9 +- .../blas/blas2/KokkosBlas2_gemv_perf_test.cpp | 43 +- .../blas/blas2/KokkosBlas2_gemv_perf_test.hpp | 15 +- .../KokkosBlas2_gemv_perf_test_benchmark.cpp | 51 +- .../KokkosBlas2_gemv_tracked_perf_test.cpp | 4 +- .../KokkosBlas2_ger_perf_test_benchmark.cpp | 312 + .../perf_test/blas/blas2/tracked_testing.hpp | 6 +- .../blas/blas3/KokkosBlas3_common.hpp | 13 +- .../blas/blas3/KokkosBlas3_gemm_perf_test.hpp | 1340 ++-- .../KokkosBlas3_gemm_standalone_perf_test.cpp | 24 +- ...s3_gemm_standalone_perf_test_benchmark.cpp | 70 +- .../KokkosBlas3_gemm_tracked_perf_test.cpp | 15 +- .../KokkosBlas3_gemm_tracked_perf_test.hpp | 18 +- .../blas/blas3/KokkosBlas3_perf_test.cpp | 92 +- .../blas/blas3/KokkosBlas3_trmm_perf_test.hpp | 308 +- .../blas/blas3/KokkosBlas_trtri_perf_test.hpp | 186 +- .../perf_test/blas/blas3/tracked_testing.hpp | 9 +- .../perf_test/graph/KokkosGraph_color.cpp | 260 +- .../perf_test/graph/KokkosGraph_color_d2.cpp | 288 +- .../perf_test/graph/KokkosGraph_mis_d2.cpp | 61 +- .../perf_test/graph/KokkosGraph_triangle.cpp | 120 +- .../lapack/KokkosLapack_SVD_benchmark.cpp | 26 +- .../perf_test/ode/KokkosODE_BDF.cpp | 64 +- .../perf_test/ode/KokkosODE_RK.cpp | 113 +- .../performance/performance_example.cpp | 30 +- .../performance/performance_validate.cpp | 166 +- .../sparse/KokkosSparse_block_pcg.cpp | 194 +- .../perf_test/sparse/KokkosSparse_gs.cpp | 124 +- .../perf_test/sparse/KokkosSparse_kk_spmv.cpp | 38 +- .../perf_test/sparse/KokkosSparse_mdf.cpp | 69 +- .../sparse/KokkosSparse_par_ilut.cpp | 144 +- .../perf_test/sparse/KokkosSparse_pcg.cpp | 70 +- .../perf_test/sparse/KokkosSparse_pcg.hpp | 136 +- .../sparse/KokkosSparse_run_spgemm_jacobi.hpp | 258 +- .../perf_test/sparse/KokkosSparse_spadd.cpp | 255 +- .../perf_test/sparse/KokkosSparse_spgemm.cpp | 223 +- .../sparse/KokkosSparse_spgemm_jacobi.cpp | 63 +- .../perf_test/sparse/KokkosSparse_spiluk.cpp | 261 +- .../perf_test/sparse/KokkosSparse_spmv.cpp | 51 +- .../sparse/KokkosSparse_spmv_benchmark.cpp | 59 +- .../sparse/KokkosSparse_spmv_bsr.cpp | 177 +- .../KokkosSparse_spmv_bsr_benchmark.cpp | 123 +- .../sparse/KokkosSparse_spmv_merge.cpp | 72 +- .../sparse/KokkosSparse_spmv_struct.cpp | 99 +- .../KokkosSparse_spmv_struct_tuning.cpp | 280 +- .../sparse/KokkosSparse_spmv_test.cpp | 29 +- .../sparse/KokkosSparse_spmv_test.hpp | 78 +- .../perf_test/sparse/KokkosSparse_sptrsv.cpp | 275 +- .../sparse/KokkosSparse_sptrsv_aux.hpp | 266 +- .../sparse/KokkosSparse_sptrsv_cholmod.cpp | 175 +- .../sparse/KokkosSparse_sptrsv_superlu.cpp | 299 +- .../sparse/KokkosSparse_sptrsv_supernode.cpp | 114 +- .../perf_test/sparse/spmv/CuSparse_SPMV.hpp | 33 +- .../sparse/spmv/KokkosKernels_SPMV.hpp | 24 +- .../sparse/spmv/KokkosKernels_spmv_data.hpp | 6 +- .../perf_test/sparse/spmv/Kokkos_SPMV.hpp | 112 +- .../sparse/spmv/Kokkos_SPMV_Inspector.hpp | 56 +- .../perf_test/sparse/spmv/MKL_SPMV.hpp | 22 +- .../sparse/spmv/OpenMPDynamic_SPMV.hpp | 3 +- .../sparse/spmv/OpenMPSmartStatic_SPMV.hpp | 17 +- .../sparse/spmv/OpenMPStatic_SPMV.hpp | 3 +- .../perf_test/sparse/spmv/matrix_market.hpp | 61 +- .../perf_test/sparse/tracked_testing.hpp | 3 +- .../perf_test/test_crsmatrix.cpp | 177 +- packages/kokkos-kernels/perf_test/test_mv.cpp | 59 +- .../kokkos-kernels/scripts/cm_test_all_sandia | 23 +- .../scripts/docker/Dockerfile.hip | 3 +- .../scripts/docker/Dockerfile.sycl | 3 +- .../sparse/impl/KokkosSparse_bspgemm_impl.hpp | 92 +- .../impl/KokkosSparse_bspgemm_impl_def.hpp | 30 +- .../impl/KokkosSparse_bspgemm_impl_kkmem.hpp | 955 ++- .../impl/KokkosSparse_bspgemm_impl_seq.hpp | 68 +- .../impl/KokkosSparse_bspgemm_impl_speed.hpp | 382 +- .../KokkosSparse_bspgemm_numeric_spec.hpp | 457 +- .../impl/KokkosSparse_bsr_to_crs_impl.hpp | 24 +- ...KokkosSparse_cluster_gauss_seidel_impl.hpp | 463 +- .../sparse/impl/KokkosSparse_coo2crs_impl.hpp | 82 +- .../KokkosSparse_crs_detect_block_size.hpp | 14 +- .../impl/KokkosSparse_crs_to_bsr_impl.hpp | 50 +- .../impl/KokkosSparse_gauss_seidel_impl.hpp | 1342 ++-- .../impl/KokkosSparse_gauss_seidel_spec.hpp | 656 +- ...kkosSparse_getDiagCopyWithOffsets_impl.hpp | 61 +- .../sparse/impl/KokkosSparse_gmres_impl.hpp | 115 +- .../sparse/impl/KokkosSparse_gmres_spec.hpp | 135 +- .../sparse/impl/KokkosSparse_mdf_impl.hpp | 376 +- .../sparse/impl/KokkosSparse_merge_matrix.hpp | 64 +- .../KokkosSparse_par_ilut_numeric_impl.hpp | 500 +- .../KokkosSparse_par_ilut_numeric_spec.hpp | 258 +- .../KokkosSparse_par_ilut_symbolic_impl.hpp | 10 +- .../KokkosSparse_par_ilut_symbolic_spec.hpp | 149 +- .../impl/KokkosSparse_partitioning_impl.hpp | 104 +- .../impl/KokkosSparse_sor_sequential_impl.hpp | 48 +- .../impl/KokkosSparse_spadd_numeric_impl.hpp | 150 +- .../impl/KokkosSparse_spadd_numeric_spec.hpp | 251 +- .../impl/KokkosSparse_spadd_symbolic_impl.hpp | 385 +- .../impl/KokkosSparse_spadd_symbolic_spec.hpp | 168 +- .../impl/KokkosSparse_spgemm_imp_outer.hpp | 371 +- .../sparse/impl/KokkosSparse_spgemm_impl.hpp | 486 +- .../impl/KokkosSparse_spgemm_impl_color.hpp | 592 +- .../KokkosSparse_spgemm_impl_compression.hpp | 829 ++- .../impl/KokkosSparse_spgemm_impl_def.hpp | 142 +- .../impl/KokkosSparse_spgemm_impl_kkmem.hpp | 1055 ++-- .../KokkosSparse_spgemm_impl_memaccess.hpp | 478 +- .../impl/KokkosSparse_spgemm_impl_seq.hpp | 86 +- .../impl/KokkosSparse_spgemm_impl_speed.hpp | 357 +- .../KokkosSparse_spgemm_impl_symbolic.hpp | 1823 +++--- .../KokkosSparse_spgemm_impl_triangle.hpp | 1499 ++--- ...se_spgemm_impl_triangle_no_compression.hpp | 811 ++- ...kkosSparse_spgemm_jacobi_denseacc_impl.hpp | 226 +- .../KokkosSparse_spgemm_jacobi_seq_impl.hpp | 58 +- ...kosSparse_spgemm_jacobi_sparseacc_impl.hpp | 1048 ++-- .../impl/KokkosSparse_spgemm_jacobi_spec.hpp | 299 +- .../impl/KokkosSparse_spgemm_noreuse_spec.hpp | 114 +- .../impl/KokkosSparse_spgemm_numeric_spec.hpp | 256 +- .../KokkosSparse_spgemm_symbolic_spec.hpp | 178 +- .../impl/KokkosSparse_spiluk_numeric_impl.hpp | 429 +- .../impl/KokkosSparse_spiluk_numeric_spec.hpp | 327 +- .../KokkosSparse_spiluk_symbolic_impl.hpp | 106 +- .../KokkosSparse_spiluk_symbolic_spec.hpp | 203 +- .../impl/KokkosSparse_spmv_bsrmatrix_impl.hpp | 956 ++- .../KokkosSparse_spmv_bsrmatrix_impl_v42.hpp | 27 +- .../impl/KokkosSparse_spmv_bsrmatrix_spec.hpp | 350 +- .../sparse/impl/KokkosSparse_spmv_impl.hpp | 796 +-- .../impl/KokkosSparse_spmv_impl_merge.hpp | 184 +- .../impl/KokkosSparse_spmv_impl_omp.hpp | 19 +- .../sparse/impl/KokkosSparse_spmv_spec.hpp | 295 +- .../impl/KokkosSparse_spmv_struct_impl.hpp | 1016 ++- .../impl/KokkosSparse_spmv_struct_spec.hpp | 257 +- .../impl/KokkosSparse_spmv_team_impl.hpp | 132 +- .../impl/KokkosSparse_spmv_team_spec.hpp | 48 +- .../KokkosSparse_sptrsv_cuSPARSE_impl.hpp | 392 +- .../impl/KokkosSparse_sptrsv_solve_impl.hpp | 5478 +++++------------ .../impl/KokkosSparse_sptrsv_solve_spec.hpp | 270 +- .../KokkosSparse_sptrsv_symbolic_impl.hpp | 210 +- .../KokkosSparse_sptrsv_symbolic_spec.hpp | 102 +- .../sparse/impl/KokkosSparse_trsv_impl.hpp | 110 +- .../sparse/impl/KokkosSparse_trsv_spec.hpp | 184 +- ...okkosSparse_twostage_gauss_seidel_impl.hpp | 369 +- .../sparse/src/KokkosKernels_Controls.hpp | 38 +- .../sparse/src/KokkosKernels_Handle.hpp | 390 +- .../sparse/src/KokkosSparse_BsrMatrix.hpp | 336 +- .../sparse/src/KokkosSparse_CcsMatrix.hpp | 67 +- .../sparse/src/KokkosSparse_CooMatrix.hpp | 33 +- .../sparse/src/KokkosSparse_CrsMatrix.hpp | 247 +- .../sparse/src/KokkosSparse_IOUtils.hpp | 575 +- .../sparse/src/KokkosSparse_LUPrec.hpp | 99 +- .../sparse/src/KokkosSparse_MatrixPrec.hpp | 8 +- .../sparse/src/KokkosSparse_OrdinalTraits.hpp | 16 +- .../src/KokkosSparse_Preconditioner.hpp | 8 +- .../sparse/src/KokkosSparse_SortCrs.hpp | 432 +- .../sparse/src/KokkosSparse_Utils.hpp | 1579 ++--- .../src/KokkosSparse_Utils_cusparse.hpp | 55 +- .../sparse/src/KokkosSparse_Utils_mkl.hpp | 139 +- .../src/KokkosSparse_Utils_rocsparse.hpp | 61 +- .../sparse/src/KokkosSparse_ccs2crs.hpp | 61 +- .../sparse/src/KokkosSparse_coo2crs.hpp | 36 +- .../sparse/src/KokkosSparse_crs2ccs.hpp | 63 +- .../sparse/src/KokkosSparse_crs2coo.hpp | 76 +- .../sparse/src/KokkosSparse_findRelOffset.hpp | 12 +- .../sparse/src/KokkosSparse_gauss_seidel.hpp | 756 +-- .../src/KokkosSparse_gauss_seidel_handle.hpp | 401 +- .../sparse/src/KokkosSparse_getDiagCopy.hpp | 35 +- .../sparse/src/KokkosSparse_gmres.hpp | 122 +- .../sparse/src/KokkosSparse_gmres_handle.hpp | 26 +- .../sparse/src/KokkosSparse_mdf.hpp | 126 +- .../sparse/src/KokkosSparse_mdf_handle.hpp | 18 +- .../sparse/src/KokkosSparse_par_ilut.hpp | 443 +- .../src/KokkosSparse_par_ilut_handle.hpp | 72 +- .../sparse/src/KokkosSparse_spadd.hpp | 275 +- .../sparse/src/KokkosSparse_spadd_handle.hpp | 18 +- .../sparse/src/KokkosSparse_spgemm.hpp | 120 +- .../sparse/src/KokkosSparse_spgemm_handle.hpp | 264 +- .../sparse/src/KokkosSparse_spgemm_jacobi.hpp | 141 +- .../src/KokkosSparse_spgemm_numeric.hpp | 157 +- .../src/KokkosSparse_spgemm_symbolic.hpp | 94 +- .../sparse/src/KokkosSparse_spiluk.hpp | 1005 ++- .../sparse/src/KokkosSparse_spiluk_handle.hpp | 69 +- .../sparse/src/KokkosSparse_spmv.hpp | 713 +-- .../src/KokkosSparse_spmv_deprecated.hpp | 243 +- .../sparse/src/KokkosSparse_spmv_handle.hpp | 116 +- .../sparse/src/KokkosSparse_spmv_team.hpp | 122 +- .../sparse/src/KokkosSparse_sptrsv.hpp | 534 +- .../src/KokkosSparse_sptrsv_cholmod.hpp | 84 +- .../sparse/src/KokkosSparse_sptrsv_handle.hpp | 349 +- .../src/KokkosSparse_sptrsv_superlu.hpp | 94 +- .../src/KokkosSparse_sptrsv_supernode.hpp | 547 +- .../sparse/src/KokkosSparse_trsv.hpp | 70 +- .../tpls/KokkosKernels_tpl_handles_def.hpp | 4 +- ...kkosSparse_gauss_seidel_tpl_spec_avail.hpp | 7 +- .../KokkosSparse_gmres_tpl_spec_avail.hpp | 3 +- ...Sparse_par_ilut_numeric_tpl_spec_avail.hpp | 6 +- ...parse_par_ilut_symbolic_tpl_spec_avail.hpp | 3 +- ...kkosSparse_spadd_numeric_tpl_spec_decl.hpp | 420 +- ...kosSparse_spadd_symbolic_tpl_spec_decl.hpp | 345 +- .../KokkosSparse_spadd_tpl_spec_avail.hpp | 168 +- ...kosSparse_spgemm_jacobi_tpl_spec_avail.hpp | 7 +- ...osSparse_spgemm_noreuse_tpl_spec_avail.hpp | 45 +- ...kosSparse_spgemm_noreuse_tpl_spec_decl.hpp | 199 +- ...osSparse_spgemm_numeric_tpl_spec_avail.hpp | 180 +- ...kosSparse_spgemm_numeric_tpl_spec_decl.hpp | 660 +- ...sSparse_spgemm_symbolic_tpl_spec_avail.hpp | 118 +- ...osSparse_spgemm_symbolic_tpl_spec_decl.hpp | 606 +- ...osSparse_spiluk_numeric_tpl_spec_avail.hpp | 7 +- ...sSparse_spiluk_symbolic_tpl_spec_avail.hpp | 5 +- ...osSparse_spmv_bsrmatrix_tpl_spec_avail.hpp | 334 +- ...kosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp | 1093 ++-- .../KokkosSparse_spmv_mv_tpl_spec_avail.hpp | 158 +- .../KokkosSparse_spmv_mv_tpl_spec_decl.hpp | 441 +- .../tpls/KokkosSparse_spmv_tpl_spec_avail.hpp | 283 +- .../tpls/KokkosSparse_spmv_tpl_spec_decl.hpp | 839 +-- ...kkosSparse_sptrsv_solve_tpl_spec_avail.hpp | 4 +- .../tpls/KokkosSparse_trsv_tpl_spec_avail.hpp | 3 +- .../sparse/unit_test/Test_Sparse.hpp | 1 + .../unit_test/Test_Sparse_BsrMatrix.hpp | 86 +- .../sparse/unit_test/Test_Sparse_Controls.hpp | 3 +- .../unit_test/Test_Sparse_CrsMatrix.hpp | 92 +- .../sparse/unit_test/Test_Sparse_IOUtils.hpp | 179 + .../unit_test/Test_Sparse_MergeMatrix.hpp | 45 +- .../sparse/unit_test/Test_Sparse_SortCrs.hpp | 278 +- .../Test_Sparse_TestUtils_RandCsMat.hpp | 15 +- .../unit_test/Test_Sparse_Transpose.hpp | 193 +- .../sparse/unit_test/Test_Sparse_Utils.hpp | 47 +- .../Test_Sparse_block_gauss_seidel.hpp | 190 +- .../sparse/unit_test/Test_Sparse_bspgemm.hpp | 116 +- .../sparse/unit_test/Test_Sparse_ccs2crs.hpp | 51 +- .../sparse/unit_test/Test_Sparse_coo2crs.hpp | 125 +- .../sparse/unit_test/Test_Sparse_crs2ccs.hpp | 51 +- .../sparse/unit_test/Test_Sparse_crs2coo.hpp | 42 +- .../sparse/unit_test/Test_Sparse_csc2csr.hpp | 37 +- .../Test_Sparse_extractCrsDiagonalBlocks.hpp | 39 +- .../unit_test/Test_Sparse_findRelOffset.hpp | 98 +- .../unit_test/Test_Sparse_gauss_seidel.hpp | 576 +- .../sparse/unit_test/Test_Sparse_gmres.hpp | 41 +- .../sparse/unit_test/Test_Sparse_mdf.hpp | 133 +- .../sparse/unit_test/Test_Sparse_par_ilut.hpp | 125 +- .../Test_Sparse_removeCrsMatrixZeros.hpp | 123 +- .../unit_test/Test_Sparse_replaceSumInto.hpp | 66 +- .../Test_Sparse_replaceSumIntoLonger.hpp | 135 +- .../unit_test/Test_Sparse_rocsparse.hpp | 7 +- .../sparse/unit_test/Test_Sparse_spadd.hpp | 147 +- .../sparse/unit_test/Test_Sparse_spgemm.hpp | 265 +- .../unit_test/Test_Sparse_spgemm_jacobi.hpp | 108 +- .../sparse/unit_test/Test_Sparse_spiluk.hpp | 292 +- .../sparse/unit_test/Test_Sparse_spmv.hpp | 664 +- .../sparse/unit_test/Test_Sparse_spmv_bsr.hpp | 268 +- .../sparse/unit_test/Test_Sparse_sptrsv.hpp | 649 +- .../sparse/unit_test/Test_Sparse_trsv.hpp | 81 +- .../sparse/unit_test/Test_vector_fixtures.hpp | 40 +- .../sparse/unit_test/matrixIssue402.hpp | 4476 ++++++-------- .../KokkosKernels_MatrixConverter.cpp | 48 +- .../test_common/KokkosKernels_MyCRSMatrix.hpp | 80 +- .../test_common/KokkosKernels_TestUtils.hpp | 318 +- .../KokkosKernels_Test_Structured_Matrix.hpp | 423 +- ...KokkosKernels_WriteBinaryFromBinSrcDst.cpp | 41 +- .../test_common/Kokkos_Performance.hpp | 138 +- .../Test_Common_Test_All_Type_Combos.hpp | 51 +- .../kokkos-kernels/test_common/Test_Cuda.hpp | 6 +- .../kokkos-kernels/test_common/Test_HIP.hpp | 8 +- 893 files changed, 56611 insertions(+), 90592 deletions(-) create mode 100644 packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Pttrf_Serial_Impl.hpp create mode 100644 packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Pttrf_Serial_Internal.hpp create mode 100644 packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Tbsv_Serial_Impl.hpp create mode 100644 packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Tbsv_Serial_Internal.hpp create mode 100644 packages/kokkos-kernels/batched/dense/src/KokkosBatched_Pttrf.hpp create mode 100644 packages/kokkos-kernels/batched/dense/src/KokkosBatched_Tbsv.hpp create mode 100644 packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialPttrf.hpp create mode 100644 packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialPttrf_Complex.hpp create mode 100644 packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialPttrf_Real.hpp create mode 100644 packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTbsv.hpp create mode 100644 packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTbsv_Complex.hpp create mode 100644 packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTbsv_Real.hpp create mode 100644 packages/kokkos-kernels/common/impl/KokkosKernels_NaN.hpp delete mode 100644 packages/kokkos-kernels/common/src/KokkosKernels_Half.hpp create mode 100644 packages/kokkos-kernels/common/unit_test/Test_Common_AlignPtrTo.hpp delete mode 100755 packages/kokkos-kernels/example/graph/PartitioningExample create mode 100644 packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_bsrmatrix_2.cpp create mode 100644 packages/kokkos-kernels/perf_test/blas/blas2/KokkosBlas2_ger_perf_test_benchmark.cpp create mode 100644 packages/kokkos-kernels/sparse/unit_test/Test_Sparse_IOUtils.hpp diff --git a/packages/kokkos-kernels/.clang-format b/packages/kokkos-kernels/.clang-format index db5f94fa2ebb..ca42ad54e790 100644 --- a/packages/kokkos-kernels/.clang-format +++ b/packages/kokkos-kernels/.clang-format @@ -4,3 +4,4 @@ SortIncludes: false AlignConsecutiveAssignments: true AllowShortCaseLabelsOnASingleLine: true AllowShortIfStatementsOnASingleLine: true +ColumnLimit: 120 diff --git a/packages/kokkos-kernels/CHANGELOG.md b/packages/kokkos-kernels/CHANGELOG.md index 9cb40b5e74ae..cefc116c83a0 100644 --- a/packages/kokkos-kernels/CHANGELOG.md +++ b/packages/kokkos-kernels/CHANGELOG.md @@ -1,5 +1,77 @@ # Change Log +## [4.4.00](https://github.com/kokkos/kokkos-kernels/tree/4.4.00) +[Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/4.3.01...4.4.00) + +### New Features + +#### Sparse +- Add support for BSRs to sptrsv [\#2281](https://github.com/kokkos/kokkos-kernels/pull/2281) + +#### Batched updates +- Add batched serial tbsv [\#2202](https://github.com/kokkos/kokkos-kernels/pull/2202) +- Implement batched serial pttrf [\#2256](https://github.com/kokkos/kokkos-kernels/pull/2256) + +### Enhancements: +- clang-format version update, increase column limit to 120. [\#2255](https://github.com/kokkos/kokkos-kernels/pull/2255) +- Add big reformat commits to ignore revs for blame [\#2286](https://github.com/kokkos/kokkos-kernels/pull/2286) + +#### Sparse +- RCM fixes, improvements [\#2254](https://github.com/kokkos/kokkos-kernels/pull/2254) +- Support non-squared matrice in spmv_team [\#2273](https://github.com/kokkos/kokkos-kernels/pull/2273) + +#### Common utilities +- c++17: add `[[fallthrough]]` attribute [\#1493](https://github.com/kokkos/kokkos-kernels/pull/1493) + +#### Misc updates + +#### TPL support +- Performance improvement: disable cuBLAS dot wrapper [\#2206](https://github.com/kokkos/kokkos-kernels/pull/2206) +- SPMV TPLs: improve profile region labels [\#2219](https://github.com/kokkos/kokkos-kernels/pull/2219) +- cusparse spgemm: provide non-null row-ptr [\#2213](https://github.com/kokkos/kokkos-kernels/pull/2213) +- spmv_mv wrappers for rocsparse [\#2233](https://github.com/kokkos/kokkos-kernels/pull/2233) +- Update rocsparse algo defaults [\#2245](https://github.com/kokkos/kokkos-kernels/pull/2245) + +### Build System: +- cmake: add CMake language support for CUDA/HIP [\#2173](https://github.com/kokkos/kokkos-kernels/pull/2173) +- FindTPLROC*: updates to fix export of import targets [\#2250](https://github.com/kokkos/kokkos-kernels/pull/2250) + +### Documentation and Testing: +- Enable 3 at2 builds [\#2210](https://github.com/kokkos/kokkos-kernels/pull/2210) +- At2 ROCM+TPL fixes, remove volta70 too [\#2182](https://github.com/kokkos/kokkos-kernels/pull/2182) +- Add AutoTester2 CI Configs (Sans Power9 & ROCM w/ TPLS) [\#2174](https://github.com/kokkos/kokkos-kernels/pull/2174) +- Kokkos Kernels: initial security policy [\#2220](https://github.com/kokkos/kokkos-kernels/pull/2220) +- Sparse - BsrMatrix: adding new wiki example for documentation [\#2228](https://github.com/kokkos/kokkos-kernels/pull/2228) +- Add testing for transpose corner cases [\#2234](https://github.com/kokkos/kokkos-kernels/pull/2234) +- spgemm unit test: change matrix value distribution [\#2241](https://github.com/kokkos/kokkos-kernels/pull/2241) + +- docs.yml: change kokkos version to latest release [\#2199](https://github.com/kokkos/kokkos-kernels/pull/2199) + +### Cleanup: +- Bigger sptrsv cleanup [\#2280](https://github.com/kokkos/kokkos-kernels/pull/2280) +- Sparse - SpGEMM: labeling spgemm_symbolic in TPL layer [\#2193](https://github.com/kokkos/kokkos-kernels/pull/2193) +- A little sptrsv cleanup before the main block effort [\#2247](https://github.com/kokkos/kokkos-kernels/pull/2247) +- sparse: replace macros with constexpr bools [\#2260](https://github.com/kokkos/kokkos-kernels/pull/2260) +- spgemm: add profiling regions to native implementations [\#2253](https://github.com/kokkos/kokkos-kernels/pull/2253) + +### Bug Fixes: +- Sparse - SpMV: removing calls to unsuported oneapi - MKL functions [\#2274](https://github.com/kokkos/kokkos-kernels/pull/2274) +- Sycl gemv beta [\#2276](https://github.com/kokkos/kokkos-kernels/pull/2276) +- Unify alignPtrTo implementation [\#2275](https://github.com/kokkos/kokkos-kernels/pull/2275) +- SpMV: Test NaN, fix NaN handling when beta=0 [\#2188](https://github.com/kokkos/kokkos-kernels/pull/2188) +- KokkosLapack_svd_tpl_spec_decl: defer to MKL spec when LAPACK also enabled [\#2171](https://github.com/kokkos/kokkos-kernels/pull/2171) +- Fix spmv regressions [\#2204](https://github.com/kokkos/kokkos-kernels/pull/2204) +- Sparse - CrsToBsr: fix type mismatch [\#2242](https://github.com/kokkos/kokkos-kernels/pull/2242) +- Fix logic around merge path with TPLs [\#2240](https://github.com/kokkos/kokkos-kernels/pull/2240) +- In deprecated spmv, fix Controls algorithm mapping [\#2246](https://github.com/kokkos/kokkos-kernels/pull/2246) +- kokkoskernels_tpls.cmake: remove duplicates arguments when creating a… [\#2244](https://github.com/kokkos/kokkos-kernels/pull/2244) +- sparse: spadd_symbolic fences before device values used on host [\#2259](https://github.com/kokkos/kokkos-kernels/pull/2259) +- Fix warning about memcpy [\#2252](https://github.com/kokkos/kokkos-kernels/pull/2252) +- sycl: use alternative `alignPtrTo` when SYCL is enabled (SpGEMM) [\#2262](https://github.com/kokkos/kokkos-kernels/pull/2262) +- Rename `Impl::alignPtr` to `Impl::alignPtrTo`, allow it to infer argument type [\#2261](https://github.com/kokkos/kokkos-kernels/pull/2261) +- Workarounds for removed cusparse functions [\#2270](https://github.com/kokkos/kokkos-kernels/pull/2270) +- handle_t* -> std::unique_ptr in Bsr SpMV unit tests [\#2269](https://github.com/kokkos/kokkos-kernels/pull/2269) + ## [4.3.01](https://github.com/kokkos/kokkos-kernels/tree/4.3.01) [Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/4.3.00...4.3.01) diff --git a/packages/kokkos-kernels/CMakeLists.txt b/packages/kokkos-kernels/CMakeLists.txt index 45e91a90f0d3..48608e756911 100644 --- a/packages/kokkos-kernels/CMakeLists.txt +++ b/packages/kokkos-kernels/CMakeLists.txt @@ -10,8 +10,8 @@ SET(KOKKOSKERNELS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) SET(KOKKOSKERNELS_TOP_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) SET(KokkosKernels_VERSION_MAJOR 4) -SET(KokkosKernels_VERSION_MINOR 3) -SET(KokkosKernels_VERSION_PATCH 1) +SET(KokkosKernels_VERSION_MINOR 4) +SET(KokkosKernels_VERSION_PATCH 0) SET(KokkosKernels_VERSION "${KokkosKernels_VERSION_MAJOR}.${KokkosKernels_VERSION_MINOR}.${KokkosKernels_VERSION_PATCH}") #Set variables for config file @@ -127,13 +127,22 @@ ELSE() IF (NOT KOKKOSKERNELS_HAS_TRILINOS AND NOT KOKKOSKERNELS_HAS_PARENT) # This is a standalone build FIND_PACKAGE(Kokkos REQUIRED) - IF((${Kokkos_VERSION} VERSION_GREATER_EQUAL "4.1.0") AND (${Kokkos_VERSION} VERSION_LESS_EQUAL "4.3.1")) + IF(Kokkos_COMPILE_LANGUAGE) + ENABLE_LANGUAGE(${Kokkos_COMPILE_LANGUAGE}) + IF(Kokkos_COMPILE_LANGUAGE STREQUAL CUDA) + SET(CMAKE_CUDA_ARCHITECTURES ${Kokkos_CUDA_ARCHITECTURES}) + ENDIF() + IF(Kokkos_COMPILE_LANGUAGE STREQUAL HIP) + SET(CMAKE_HIP_ARCHITECTURES ${Kokkos_HIP_ARCHITECTURES}) + ENDIF() + ENDIF() + IF(${Kokkos_VERSION} VERSION_GREATER_EQUAL "4.3.01") MESSAGE(STATUS "Found Kokkos version ${Kokkos_VERSION} at ${Kokkos_DIR}") - IF((${Kokkos_VERSION} VERSION_GREATER "4.3.99")) + IF((${Kokkos_VERSION} VERSION_GREATER "4.4.99")) MESSAGE(WARNING "Configuring with Kokkos ${Kokkos_VERSION} which is newer than the expected develop branch - version check may need update") ENDIF() ELSE() - MESSAGE(FATAL_ERROR "Kokkos Kernels ${KokkosKernels_VERSION} requires Kokkos_VERSION 4.1.0, 4.2.0, 4.2.1, 4.3.0, or 4.3.1") + MESSAGE(FATAL_ERROR "Kokkos Kernels ${KokkosKernels_VERSION} requires Kokkos 4.3.01 or greater (found ${Kokkos_VERSION})") ENDIF() ENDIF() diff --git a/packages/kokkos-kernels/README.md b/packages/kokkos-kernels/README.md index bdad1442ce45..173e3e33338d 100644 --- a/packages/kokkos-kernels/README.md +++ b/packages/kokkos-kernels/README.md @@ -1,3 +1,4 @@ +[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/kokkos/kokkos-kernels/badge)](https://securityscorecards.dev/viewer/?uri=github.com/kokkos/kokkos-kernels) [![Generic badge](https://readthedocs.org/projects/kokkos-kernels/badge/?version=latest)](https://kokkos-kernels.readthedocs.io/en/latest/) ![KokkosKernels](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4) diff --git a/packages/kokkos-kernels/batched/KokkosBatched_Util.hpp b/packages/kokkos-kernels/batched/KokkosBatched_Util.hpp index fc14bd5a19be..8a1cb0e01b73 100644 --- a/packages/kokkos-kernels/batched/KokkosBatched_Util.hpp +++ b/packages/kokkos-kernels/batched/KokkosBatched_Util.hpp @@ -48,7 +48,7 @@ #define __KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__ 1 #define __KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__ 1 #include "mkl.h" -//#include "mkl_types.h" +// #include "mkl_types.h" #endif #endif @@ -71,12 +71,11 @@ struct is_vector : public std::false_type {}; template struct is_same_mag_type { - static const bool is_specialized = (Kokkos::ArithTraits::is_specialized && - Kokkos::ArithTraits::is_specialized); + static const bool is_specialized = + (Kokkos::ArithTraits::is_specialized && Kokkos::ArithTraits::is_specialized); static const bool is_mag_type_same = - std::is_same::mag_type, - typename Kokkos::ArithTraits::mag_type>::value; + std::is_same::mag_type, typename Kokkos::ArithTraits::mag_type>::value; static const bool value = is_specialized && is_mag_type_same; }; @@ -87,42 +86,36 @@ using std::min; // view manipulation template -using MemoryTraits = Kokkos::MemoryTraits; template -using UnmanagedViewType = Kokkos::View< - typename ViewType::data_type, typename ViewType::array_layout, - typename ViewType::device_type, - MemoryTraits >; +using UnmanagedViewType = + Kokkos::View >; template -using ConstViewType = Kokkos::View< - typename ViewType::const_data_type, typename ViewType::array_layout, - typename ViewType::device_type, typename ViewType::memory_traits>; +using ConstViewType = Kokkos::View; template using ConstUnmanagedViewType = ConstViewType >; template -using ScratchViewType = Kokkos::View< - typename ViewType::data_type, typename ViewType::array_layout, - typename ViewType::execution_space::scratch_memory_space, - MemoryTraits >; +using ScratchViewType = Kokkos::View >; // helper for vector type template -KOKKOS_INLINE_FUNCTION - typename std::enable_if::value, size_t>::type - adjustDimension(const size_t &m) { +KOKKOS_INLINE_FUNCTION typename std::enable_if::value, size_t>::type adjustDimension( + const size_t &m) { return m; } template -KOKKOS_INLINE_FUNCTION - typename std::enable_if::value, size_t>::type - adjustDimension(const size_t &m) { +KOKKOS_INLINE_FUNCTION typename std::enable_if::value, size_t>::type adjustDimension( + const size_t &m) { return (m / T::vector_length + (m % T::vector_length > 0)); } @@ -132,9 +125,7 @@ struct Flush { // flush a large host buffer Kokkos::View _buf; - Flush() : _buf("Flush::buf", BufSize / sizeof(double)) { - Kokkos::deep_copy(_buf, 1); - } + Flush() : _buf("Flush::buf", BufSize / sizeof(double)) { Kokkos::deep_copy(_buf, 1); } KOKKOS_INLINE_FUNCTION void init(value_type &update) { update = 0; } @@ -147,9 +138,7 @@ struct Flush { void run() { double sum = 0; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, BufSize / sizeof(double)), *this, - sum); + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, BufSize / sizeof(double)), *this, sum); SpaceType().fence(); FILE *fp = fopen("/dev/null", "w"); fprintf(fp, "%f\n", sum); @@ -161,9 +150,7 @@ template struct Random; template -struct Random::value || - std::is_same::value, - T>::type> { +struct Random::value || std::is_same::value, T>::type> { Random(const unsigned int seed = 0) { srand(seed); } T value() { const auto val = (rand() / ((T)RAND_MAX) - 0.5) * 2.0; @@ -172,18 +159,16 @@ struct Random::value || }; template -struct Random >::value || - std::is_same >::value || - std::is_same >::value || - std::is_same >::value, - T>::type> { +struct Random< + T, typename std::enable_if< + std::is_same >::value || std::is_same >::value || + std::is_same >::value || std::is_same >::value, + T>::type> { Random(const unsigned int seed = 0) { srand(seed); } T value() { const auto rval = (rand() / ((double)RAND_MAX) - 0.5) * 2.0; const auto ival = (rand() / ((double)RAND_MAX) - 0.5) * 2.0; - return T(rval > 0 ? rval + 1.0e-3 : rval - 1.0e-3, - ival > 0 ? ival + 1.0e-3 : ival - 1.0e-3); + return T(rval > 0 ? rval + 1.0e-3 : rval - 1.0e-3, ival > 0 ? ival + 1.0e-3 : ival - 1.0e-3); } }; @@ -199,23 +184,18 @@ struct Timer { const double t = _clock.seconds(); std::string label = _label; label.resize(24); - std::cout << "KokkosKernels::Timer:: " << std::setw(26) << label - << std::setw(15) << std::scientific << t << " [sec] " - << std::endl; + std::cout << "KokkosKernels::Timer:: " << std::setw(26) << label << std::setw(15) << std::scientific << t + << " [sec] " << std::endl; } }; // Implicit vectorization template struct SIMD { - static_assert(std::is_same::value || std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same >::value || - std::is_same >::value || - std::is_same >::value || - std::is_same >::value || + static_assert(std::is_same::value || std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value || + std::is_same >::value || std::is_same >::value || + std::is_same >::value || std::is_same >::value || std::is_same::value || std::is_same::value, "KokkosKernels:: Invalid SIMD<> type."); @@ -225,10 +205,8 @@ struct SIMD { // Intel AVX instruction device (explicit vectorization) template struct AVX { - static_assert(std::is_same::value || - std::is_same::value || - std::is_same >::value || - std::is_same >::value, + static_assert(std::is_same::value || std::is_same::value || + std::is_same >::value || std::is_same >::value, "KokkosKernels:: Invalid AVX<> type."); using value_type = T; }; @@ -304,17 +282,15 @@ using KokkosBlas::Mode; struct Util { template - KOKKOS_INLINE_FUNCTION static void packColMajor( - ValueType *KOKKOS_RESTRICT A, const int m, const int n, - const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { + KOKKOS_INLINE_FUNCTION static void packColMajor(ValueType *KOKKOS_RESTRICT A, const int m, const int n, + const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { for (int j = 0; j < n; ++j) for (int i = 0; i < m; ++i) A[i + j * m] = B[i * bs0 + j * bs1]; } template - KOKKOS_INLINE_FUNCTION static void packRowMajor( - ValueType *KOKKOS_RESTRICT A, const int m, const int n, - const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { + KOKKOS_INLINE_FUNCTION static void packRowMajor(ValueType *KOKKOS_RESTRICT A, const int m, const int n, + const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { for (int i = 0; i < m; ++i) for (int j = 0; j < n; ++j) A[i * n + j] = B[i * bs0 + j * bs1]; } @@ -366,8 +342,7 @@ struct Partition1x3 { ValueType *A0, *A1, *A2; KOKKOS_INLINE_FUNCTION - Partition1x3(const int arg_as1) - : as1(arg_as1), A0(NULL), A1(NULL), A2(NULL) {} + Partition1x3(const int arg_as1) : as1(arg_as1), A0(NULL), A1(NULL), A2(NULL) {} KOKKOS_INLINE_FUNCTION void partWithAL(const Partition1x2 &part, const int mA1) { @@ -403,9 +378,7 @@ struct Partition2x1 { } KOKKOS_INLINE_FUNCTION - void partWithAB(ValueType *A, const int mA, const int mAB) { - partWithAT(A, mA, mA - mAB); - } + void partWithAB(ValueType *A, const int mA, const int mAB) { partWithAT(A, mA, mA - mAB); } // A0 // A1 is merged into AT @@ -430,8 +403,7 @@ struct Partition3x1 { /* */ *A2; KOKKOS_INLINE_FUNCTION - Partition3x1(const int arg_as0) - : as0(arg_as0), A0(NULL), A1(NULL), A2(NULL) {} + Partition3x1(const int arg_as0) : as0(arg_as0), A0(NULL), A1(NULL), A2(NULL) {} KOKKOS_INLINE_FUNCTION void partWithAB(const Partition2x1 &part, const int mA1) { @@ -460,16 +432,10 @@ struct Partition2x2 { KOKKOS_INLINE_FUNCTION Partition2x2(const int arg_as0, const int arg_as1) - : as0(arg_as0), - as1(arg_as1), - ATL(NULL), - ATR(NULL), - ABL(NULL), - ABR(NULL) {} + : as0(arg_as0), as1(arg_as1), ATL(NULL), ATR(NULL), ABL(NULL), ABR(NULL) {} KOKKOS_INLINE_FUNCTION - void partWithATL(ValueType *A, const int /* mA */, const int /* nA */, - const int mATL, const int nATL) { + void partWithATL(ValueType *A, const int /* mA */, const int /* nA */, const int mATL, const int nATL) { ATL = A; ATR = ATL + nATL * as1; ABL = ATL + mATL * as0; @@ -477,8 +443,7 @@ struct Partition2x2 { } KOKKOS_INLINE_FUNCTION - void partWithABR(ValueType *A, const int mA, const int nA, const int mABR, - const int nABR) { + void partWithABR(ValueType *A, const int mA, const int nA, const int mABR, const int nABR) { partWithATL(A, mA, nA, mA - mABR, nA - nABR); } @@ -523,8 +488,7 @@ struct Partition3x3 { A22(NULL) {} KOKKOS_INLINE_FUNCTION - void partWithABR(const Partition2x2 &part, const int mA11, - const int nA11) { + void partWithABR(const Partition2x2 &part, const int mA11, const int nA11) { A00 = part.ATL; A01 = part.ATR; A02 = part.ATR + nA11 * as1; @@ -537,8 +501,7 @@ struct Partition3x3 { } KOKKOS_INLINE_FUNCTION - void partWithATL(const Partition2x2 &part, const int mA11, - const int nA11) { + void partWithATL(const Partition2x2 &part, const int mA11, const int nA11) { A00 = part.ATL; A01 = part.ATR - nA11 * as1; A02 = part.ATR; @@ -552,94 +515,74 @@ struct Partition3x3 { }; template -KOKKOS_INLINE_FUNCTION - typename std::enable_if::value, - void>::type - getIndices(const OrdinalType iTemp, const OrdinalType /*numRows*/, - const OrdinalType numMatrices, OrdinalType &iRow, - OrdinalType &iMatrix) { +KOKKOS_INLINE_FUNCTION typename std::enable_if::value, void>::type getIndices( + const OrdinalType iTemp, const OrdinalType /*numRows*/, const OrdinalType numMatrices, OrdinalType &iRow, + OrdinalType &iMatrix) { iRow = iTemp / numMatrices; iMatrix = iTemp % numMatrices; } template -KOKKOS_INLINE_FUNCTION - typename std::enable_if::value, - void>::type - getIndices(const OrdinalType iTemp, const OrdinalType numRows, - const OrdinalType /*numMatrices*/, OrdinalType &iRow, - OrdinalType &iMatrix) { +KOKKOS_INLINE_FUNCTION typename std::enable_if::value, void>::type getIndices( + const OrdinalType iTemp, const OrdinalType numRows, const OrdinalType /*numMatrices*/, OrdinalType &iRow, + OrdinalType &iMatrix) { iRow = iTemp % numRows; iMatrix = iTemp / numRows; } template -KOKKOS_INLINE_FUNCTION - typename std::enable_if::value, - void>::type - getIndices(const OrdinalType iTemp, const OrdinalType /*numRows*/, - const OrdinalType numMatrices, OrdinalType &iRow, - OrdinalType &iMatrix) { +KOKKOS_INLINE_FUNCTION typename std::enable_if::value, void>::type +getIndices(const OrdinalType iTemp, const OrdinalType /*numRows*/, const OrdinalType numMatrices, OrdinalType &iRow, + OrdinalType &iMatrix) { iRow = iTemp / numMatrices; iMatrix = iTemp % numMatrices; } template KOKKOS_INLINE_FUNCTION auto transpose_2d_view(ViewType v, const int *order) { - constexpr int rank = 2; - const int dim[] = {v.extent_int(1), v.extent_int(0)}; - using view_value_type = typename ViewType::value_type; - using execution_space_type = typename ViewType::execution_space; - using view_type = Kokkos::View; - Kokkos::LayoutStride stride = - Kokkos::LayoutStride::order_dimensions(rank, order, dim); + constexpr int rank = 2; + const int dim[] = {v.extent_int(1), v.extent_int(0)}; + using view_value_type = typename ViewType::value_type; + using execution_space_type = typename ViewType::execution_space; + using view_type = Kokkos::View; + Kokkos::LayoutStride stride = Kokkos::LayoutStride::order_dimensions(rank, order, dim); return view_type(v.data(), stride); } template -KOKKOS_INLINE_FUNCTION auto transpose_2d_view(ViewType v, - const BatchLayout::Left &) { +KOKKOS_INLINE_FUNCTION auto transpose_2d_view(ViewType v, const BatchLayout::Left &) { const int order[] = {0, 1}; // v is LayoutRight return transpose_2d_view(v, order); } template -KOKKOS_INLINE_FUNCTION auto transpose_2d_view(ViewType v, - const BatchLayout::Right &) { +KOKKOS_INLINE_FUNCTION auto transpose_2d_view(ViewType v, const BatchLayout::Right &) { const int order[] = {1, 0}; // v is LayoutLeft return transpose_2d_view(v, order); } ///// subview_wrapper overloads for handling 3-rank BatchLayout::Left views template -KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1, - IdxType2 i2, IdxType3 i3, +KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1, IdxType2 i2, IdxType3 i3, const BatchLayout::Left &) { return Kokkos::subview(v, i1, i2, i3); } template -KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1, - IdxType2 i2, IdxType3 i3, - const BatchLayout::Left &layout_tag, - const Trans::NoTranspose) { +KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1, IdxType2 i2, IdxType3 i3, + const BatchLayout::Left &layout_tag, const Trans::NoTranspose) { return subview_wrapper(v, i1, i2, i3, layout_tag); } template -KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1, - Kokkos::ALL_t i2, Kokkos::ALL_t i3, - const BatchLayout::Left &layout_tag, - const Trans::Transpose) { +KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1, Kokkos::ALL_t i2, Kokkos::ALL_t i3, + const BatchLayout::Left &layout_tag, const Trans::Transpose) { auto sv_nt = subview_wrapper(v, i1, i3, i2, layout_tag); return transpose_2d_view(sv_nt, layout_tag); } template -KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1, - IdxType2 i2, IdxType3 i3, - const BatchLayout::Left &layout_tag, - const Trans::Transpose) { +KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1, IdxType2 i2, IdxType3 i3, + const BatchLayout::Left &layout_tag, const Trans::Transpose) { auto sv_nt = subview_wrapper(v, i1, i3, i2, layout_tag); return sv_nt; @@ -647,29 +590,25 @@ KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1, //// subview_wrapper overloads for handling 3-rank BatchLayout::Right views template -KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1, - IdxType2 i2, IdxType3 i3, +KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1, IdxType2 i2, IdxType3 i3, const BatchLayout::Right &) { return Kokkos::subview(v, i2, i3, i1); } template -KOKKOS_INLINE_FUNCTION auto subview_wrapper( - ViewType v, IdxType1 i1, IdxType2 i2, IdxType3 i3, - const BatchLayout::Right &layout_tag, const Trans::NoTranspose &) { +KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1, IdxType2 i2, IdxType3 i3, + const BatchLayout::Right &layout_tag, const Trans::NoTranspose &) { return subview_wrapper(v, i1, i2, i3, layout_tag); } template -KOKKOS_INLINE_FUNCTION auto subview_wrapper( - ViewType v, IdxType1 i1, Kokkos::ALL_t i2, Kokkos::ALL_t i3, - const BatchLayout::Right &layout_tag, const Trans::Transpose &) { +KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1, Kokkos::ALL_t i2, Kokkos::ALL_t i3, + const BatchLayout::Right &layout_tag, const Trans::Transpose &) { auto sv_nt = subview_wrapper(v, i1, i3, i2, layout_tag); return transpose_2d_view(sv_nt, layout_tag); } template -KOKKOS_INLINE_FUNCTION auto subview_wrapper( - ViewType v, IdxType1 i1, IdxType2 i2, IdxType3 i3, - const BatchLayout::Right &layout_tag, const Trans::Transpose &) { +KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1, IdxType2 i2, IdxType3 i3, + const BatchLayout::Right &layout_tag, const Trans::Transpose &) { auto sv_nt = subview_wrapper(v, i1, i3, i2, layout_tag); return sv_nt; @@ -686,71 +625,48 @@ KOKKOS_INLINE_FUNCTION auto subview_wrapper( * otherwise, the last element of v. */ template -KOKKOS_INLINE_FUNCTION ViewValueType -access_view_bounds_check(ViewType v, int m, int n, const BoundsCheck::Yes &) { - return v(KOKKOSKERNELS_MACRO_MIN(m, v.extent_int(0) - 1), - KOKKOSKERNELS_MACRO_MIN(n, v.extent_int(1) - 1)); +KOKKOS_INLINE_FUNCTION ViewValueType access_view_bounds_check(ViewType v, int m, int n, const BoundsCheck::Yes &) { + return v(KOKKOSKERNELS_MACRO_MIN(m, v.extent_int(0) - 1), KOKKOSKERNELS_MACRO_MIN(n, v.extent_int(1) - 1)); } template -KOKKOS_INLINE_FUNCTION ViewValueType -access_view_bounds_check(ViewType v, int m, int n, const BoundsCheck::No &) { +KOKKOS_INLINE_FUNCTION ViewValueType access_view_bounds_check(ViewType v, int m, int n, const BoundsCheck::No &) { return v(m, n); } template -KOKKOS_INLINE_FUNCTION ViewValueType fma_alpha(ViewValueType reg_c, - ScalarType alpha, - const AlphaTag::Yes &) { +KOKKOS_INLINE_FUNCTION ViewValueType fma_alpha(ViewValueType reg_c, ScalarType alpha, const AlphaTag::Yes &) { return reg_c * alpha; } template -KOKKOS_INLINE_FUNCTION ViewValueType fma_alpha(ViewValueType reg_c, - ScalarType /*alpha*/, - const AlphaTag::No &) { +KOKKOS_INLINE_FUNCTION ViewValueType fma_alpha(ViewValueType reg_c, ScalarType /*alpha*/, const AlphaTag::No &) { return reg_c; } -template -KOKKOS_INLINE_FUNCTION void fma_bounds_check(ViewType v, SizeType m, SizeType n, - ViewValueType reg_c, - ScalarType alpha, ScalarType beta, - const ArgAlphaFmaTag &alpha_tag, +template +KOKKOS_INLINE_FUNCTION void fma_bounds_check(ViewType v, SizeType m, SizeType n, ViewValueType reg_c, ScalarType alpha, + ScalarType beta, const ArgAlphaFmaTag &alpha_tag, const BoundsCheck::Yes &) { - if (m < v.extent_int(0) && n < v.extent_int(1)) - v(m, n) = fma_alpha(reg_c, alpha, alpha_tag) + v(m, n) * beta; + if (m < v.extent_int(0) && n < v.extent_int(1)) v(m, n) = fma_alpha(reg_c, alpha, alpha_tag) + v(m, n) * beta; } -template -KOKKOS_INLINE_FUNCTION void fma_bounds_check(ViewType v, SizeType m, SizeType n, - ViewValueType reg_c, - ScalarType alpha, ScalarType beta, - const ArgAlphaFmaTag &alpha_tag, +template +KOKKOS_INLINE_FUNCTION void fma_bounds_check(ViewType v, SizeType m, SizeType n, ViewValueType reg_c, ScalarType alpha, + ScalarType beta, const ArgAlphaFmaTag &alpha_tag, const BoundsCheck::No &) { v(m, n) = fma_alpha(reg_c, alpha, alpha_tag) + v(m, n) * beta; } -template -KOKKOS_INLINE_FUNCTION void fma_bounds_check(ViewType v, SizeType m, SizeType n, - ViewValueType reg_c, - ScalarType alpha, - const ArgAlphaFmaTag &alpha_tag, - const BoundsCheck::Yes &) { - if (m < v.extent_int(0) && n < v.extent_int(1)) - v(m, n) = fma_alpha(reg_c, alpha, alpha_tag); +template +KOKKOS_INLINE_FUNCTION void fma_bounds_check(ViewType v, SizeType m, SizeType n, ViewValueType reg_c, ScalarType alpha, + const ArgAlphaFmaTag &alpha_tag, const BoundsCheck::Yes &) { + if (m < v.extent_int(0) && n < v.extent_int(1)) v(m, n) = fma_alpha(reg_c, alpha, alpha_tag); } -template -KOKKOS_INLINE_FUNCTION void fma_bounds_check(ViewType v, SizeType m, SizeType n, - ViewValueType reg_c, - ScalarType alpha, - const ArgAlphaFmaTag &alpha_tag, - const BoundsCheck::No &) { +template +KOKKOS_INLINE_FUNCTION void fma_bounds_check(ViewType v, SizeType m, SizeType n, ViewValueType reg_c, ScalarType alpha, + const ArgAlphaFmaTag &alpha_tag, const BoundsCheck::No &) { v(m, n) = fma_alpha(reg_c, alpha, alpha_tag); } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_AddRadial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_AddRadial_Impl.hpp index 252c78d5c520..d89a82ae2cb8 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_AddRadial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_AddRadial_Impl.hpp @@ -28,11 +28,9 @@ namespace KokkosBatched { /// =========== template -KOKKOS_INLINE_FUNCTION int SerialAddRadial::invoke(const ScalarType tiny, - const AViewType &A) { - return SerialAddRadialInternal::invoke( - (A.extent(0) < A.extent(1) ? A.extent(0) : A.extent(1)), tiny, A.data(), - (A.stride_0() + A.stride_1())); +KOKKOS_INLINE_FUNCTION int SerialAddRadial::invoke(const ScalarType tiny, const AViewType &A) { + return SerialAddRadialInternal::invoke((A.extent(0) < A.extent(1) ? A.extent(0) : A.extent(1)), tiny, A.data(), + (A.stride_0() + A.stride_1())); } /// @@ -41,11 +39,10 @@ KOKKOS_INLINE_FUNCTION int SerialAddRadial::invoke(const ScalarType tiny, template template -KOKKOS_INLINE_FUNCTION int TeamAddRadial::invoke( - const MemberType &member, const ScalarType tiny, const AViewType &A) { - return TeamAddRadialInternal::invoke( - member, (A.extent(0) < A.extent(1) ? A.extent(0) : A.extent(1)), tiny, - A.data(), (A.stride_0() + A.stride_1())); +KOKKOS_INLINE_FUNCTION int TeamAddRadial::invoke(const MemberType &member, const ScalarType tiny, + const AViewType &A) { + return TeamAddRadialInternal::invoke(member, (A.extent(0) < A.extent(1) ? A.extent(0) : A.extent(1)), tiny, A.data(), + (A.stride_0() + A.stride_1())); } } // end namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_AddRadial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_AddRadial_Internal.hpp index 24ecafe0a0fb..634879530e5f 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_AddRadial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_AddRadial_Internal.hpp @@ -28,8 +28,7 @@ namespace KokkosBatched { struct SerialAddRadialInternal { template KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ScalarType tiny, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as) { + /* */ ValueType *KOKKOS_RESTRICT A, const int as) { const auto abs_tiny = tiny > 0 ? tiny : -tiny; const auto minus_abs_tiny = -abs_tiny; @@ -52,10 +51,8 @@ struct SerialAddRadialInternal { /// ================== struct TeamAddRadialInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, const ScalarType tiny, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const ScalarType tiny, + /* */ ValueType *KOKKOS_RESTRICT A, const int as) { const auto abs_tiny = tiny > 0 ? tiny : -tiny; const auto minus_abs_tiny = -abs_tiny; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyGivens_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyGivens_Serial_Internal.hpp index cf8a946e994d..2d3d2af915f1 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyGivens_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyGivens_Serial_Internal.hpp @@ -30,10 +30,9 @@ namespace KokkosBatched { /// struct SerialApplyLeftGivensInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const Kokkos::pair G, const int n, - /* */ ValueType *a1t, const int a1ts, - /* */ ValueType *a2t, const int a2ts) { + KOKKOS_INLINE_FUNCTION static int invoke(const Kokkos::pair G, const int n, + /* */ ValueType *a1t, const int a1ts, + /* */ ValueType *a2t, const int a2ts) { typedef ValueType value_type; if (n == 0) return 0; // quick return if (G.first == value_type(1) && G.second == value_type(0)) return 0; @@ -59,10 +58,9 @@ struct SerialApplyLeftGivensInternal { struct SerialApplyRightGivensInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const Kokkos::pair G, const int m, - /* */ ValueType *a1, const int a1s, - /* */ ValueType *a2, const int a2s) { + KOKKOS_INLINE_FUNCTION static int invoke(const Kokkos::pair G, const int m, + /* */ ValueType *a1, const int a1s, + /* */ ValueType *a2, const int a2s) { typedef ValueType value_type; if (m == 0) return 0; // quick return if (G.first == value_type(1) && G.second == value_type(0)) return 0; @@ -88,12 +86,11 @@ struct SerialApplyRightGivensInternal { struct SerialApplyLeftRightGivensInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const Kokkos::pair &G12, const int &m, const int &n, - /* */ ValueType *KOKKOS_RESTRICT a1t, - /* */ ValueType *KOKKOS_RESTRICT a2t, - /* */ ValueType *KOKKOS_RESTRICT a1, - /* */ ValueType *KOKKOS_RESTRICT a2, const int &as0, const int &as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const Kokkos::pair &G12, const int &m, const int &n, + /* */ ValueType *KOKKOS_RESTRICT a1t, + /* */ ValueType *KOKKOS_RESTRICT a2t, + /* */ ValueType *KOKKOS_RESTRICT a1, + /* */ ValueType *KOKKOS_RESTRICT a2, const int &as0, const int &as1) { typedef ValueType value_type; if (G12.first == value_type(1) && G12.second == value_type(0)) return 0; if (m == 0 && n == 0) return 0; // quick return @@ -124,15 +121,14 @@ struct SerialApplyLeftRightGivensInternal { } template - KOKKOS_INLINE_FUNCTION static int invoke( - const Kokkos::pair &G12, - const Kokkos::pair &G13, const int &m, const int &n, - /* */ ValueType *KOKKOS_RESTRICT a1t, - /* */ ValueType *KOKKOS_RESTRICT a2t, - /* */ ValueType *KOKKOS_RESTRICT a3t, - /* */ ValueType *KOKKOS_RESTRICT a1, - /* */ ValueType *KOKKOS_RESTRICT a2, - /* */ ValueType *KOKKOS_RESTRICT a3, const int &as0, const int &as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const Kokkos::pair &G12, + const Kokkos::pair &G13, const int &m, const int &n, + /* */ ValueType *KOKKOS_RESTRICT a1t, + /* */ ValueType *KOKKOS_RESTRICT a2t, + /* */ ValueType *KOKKOS_RESTRICT a3t, + /* */ ValueType *KOKKOS_RESTRICT a1, + /* */ ValueType *KOKKOS_RESTRICT a2, + /* */ ValueType *KOKKOS_RESTRICT a3, const int &as0, const int &as1) { typedef ValueType value_type; if (m == 0 && n == 0) return 0; // quick return diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_Serial_Impl.hpp index be720bef2e71..db85d96680a8 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_Serial_Impl.hpp @@ -28,27 +28,21 @@ namespace KokkosBatched { /// =========== template <> -template -KOKKOS_INLINE_FUNCTION int SerialApplyHouseholder::invoke( - const uViewType &u2, const tauViewType &tau, const AViewType &A, - const wViewType &w) { - return SerialApplyLeftHouseholderInternal::invoke( - A.extent(0) - 1, A.extent(1), tau.data(), u2.data(), u2.stride(0), - A.data(), A.stride(1), A.data() + A.stride(0), A.stride(0), A.stride(1), - w.data()); +template +KOKKOS_INLINE_FUNCTION int SerialApplyHouseholder::invoke(const uViewType &u2, const tauViewType &tau, + const AViewType &A, const wViewType &w) { + return SerialApplyLeftHouseholderInternal::invoke(A.extent(0) - 1, A.extent(1), tau.data(), u2.data(), u2.stride(0), + A.data(), A.stride(1), A.data() + A.stride(0), A.stride(0), + A.stride(1), w.data()); } template <> -template -KOKKOS_INLINE_FUNCTION int SerialApplyHouseholder::invoke( - const uViewType &u2, const tauViewType &tau, const AViewType &A, - const wViewType &w) { - return SerialApplyRightHouseholderInternal::invoke( - A.extent(0), A.extent(1) - 1, tau.data(), u2.data(), u2.stride(0), - A.data(), A.stride(0), A.data() + A.stride(1), A.stride(0), A.stride(1), - w.data()); +template +KOKKOS_INLINE_FUNCTION int SerialApplyHouseholder::invoke(const uViewType &u2, const tauViewType &tau, + const AViewType &A, const wViewType &w) { + return SerialApplyRightHouseholderInternal::invoke(A.extent(0), A.extent(1) - 1, tau.data(), u2.data(), u2.stride(0), + A.data(), A.stride(0), A.data() + A.stride(1), A.stride(0), + A.stride(1), w.data()); } } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_Serial_Internal.hpp index 611e9440b50b..e129fef5a572 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_Serial_Internal.hpp @@ -30,12 +30,10 @@ namespace KokkosBatched { /// struct SerialApplyLeftHouseholderInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, - const ValueType* tau, + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const ValueType* tau, /* */ ValueType* u2, const int u2s, /* */ ValueType* a1t, const int a1ts, - /* */ ValueType* A2, const int as0, - const int as1, + /* */ ValueType* A2, const int as0, const int as1, /* */ ValueType* w1t) { typedef ValueType value_type; @@ -55,9 +53,7 @@ struct SerialApplyLeftHouseholderInternal { // w1t /= tau for (int j = 0; j < n; ++j) { value_type tmp = a1t[j * a1ts]; - for (int i = 0; i < m; ++i) - tmp += Kokkos::ArithTraits::conj(u2[i * u2s]) * - A2[i * as0 + j * as1]; + for (int i = 0; i < m; ++i) tmp += Kokkos::ArithTraits::conj(u2[i * u2s]) * A2[i * as0 + j * as1]; w1t[j] = tmp * inv_tau; // /= (*tau); } @@ -74,12 +70,10 @@ struct SerialApplyLeftHouseholderInternal { struct SerialApplyRightHouseholderInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, - const ValueType* tau, + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const ValueType* tau, /* */ ValueType* u2, const int u2s, /* */ ValueType* a1, const int a1s, - /* */ ValueType* A2, const int as0, - const int as1, + /* */ ValueType* A2, const int as0, const int as1, /* */ ValueType* w1) { typedef ValueType value_type; /// u2 n x 1 @@ -107,9 +101,7 @@ struct SerialApplyRightHouseholderInternal { // A2 -= w1 * u2' (ger with conjugate) for (int j = 0; j < n; ++j) - for (int i = 0; i < m; ++i) - A2[i * as0 + j * as1] -= - w1[i] * Kokkos::ArithTraits::conj(u2[j * u2s]); + for (int i = 0; i < m; ++i) A2[i * as0 + j * as1] -= w1[i] * Kokkos::ArithTraits::conj(u2[j * u2s]); return 0; } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_TeamVector_Impl.hpp index d1dcc58d1840..b322574ad00f 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_TeamVector_Impl.hpp @@ -29,33 +29,23 @@ namespace KokkosBatched { template struct TeamVectorApplyHouseholder { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const uViewType &u2, - const tauViewType &tau, - const AViewType &A, - const wViewType &w) { - return TeamVectorApplyLeftHouseholderInternal::invoke( - member, A.extent(0) - 1, A.extent(1), tau.data(), u2.data(), - u2.stride(0), A.data(), A.stride(1), A.data() + A.stride(0), - A.stride(0), A.stride(1), w.data()); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const uViewType &u2, const tauViewType &tau, + const AViewType &A, const wViewType &w) { + return TeamVectorApplyLeftHouseholderInternal::invoke(member, A.extent(0) - 1, A.extent(1), tau.data(), u2.data(), + u2.stride(0), A.data(), A.stride(1), A.data() + A.stride(0), + A.stride(0), A.stride(1), w.data()); } }; template struct TeamVectorApplyHouseholder { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const uViewType &u2, - const tauViewType &tau, - const AViewType &A, - const wViewType &w) { - return TeamVectorApplyRightHouseholderInternal::invoke( - member, A.extent(0), A.extent(1) - 1, tau.data(), u2.data(), - u2.stride(0), A.data(), A.stride(0), A.data() + A.stride(1), - A.stride(0), A.stride(1), w.data()); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const uViewType &u2, const tauViewType &tau, + const AViewType &A, const wViewType &w) { + return TeamVectorApplyRightHouseholderInternal::invoke(member, A.extent(0), A.extent(1) - 1, tau.data(), u2.data(), + u2.stride(0), A.data(), A.stride(0), A.data() + A.stride(1), + A.stride(0), A.stride(1), w.data()); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_TeamVector_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_TeamVector_Internal.hpp index 2754818fbfd6..2474a10fe3ee 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_TeamVector_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_TeamVector_Internal.hpp @@ -30,13 +30,10 @@ namespace KokkosBatched { /// struct TeamVectorApplyLeftHouseholderInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, const int n, - const ValueType *tau, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const ValueType *tau, /* */ ValueType *u2, const int u2s, /* */ ValueType *a1t, const int a1ts, - /* */ ValueType *A2, const int as0, - const int as1, + /* */ ValueType *A2, const int as0, const int as1, /* */ ValueType *w1t) { typedef ValueType value_type; @@ -59,8 +56,7 @@ struct TeamVectorApplyLeftHouseholderInternal { Kokkos::parallel_reduce( Kokkos::ThreadVectorRange(member, m), [&](const int &i, value_type &val) { - val += Kokkos::ArithTraits::conj(u2[i * u2s]) * - A2[i * as0 + j * as1]; + val += Kokkos::ArithTraits::conj(u2[i * u2s]) * A2[i * as0 + j * as1]; }, tmp); Kokkos::single(Kokkos::PerThread(member), [&]() { @@ -70,26 +66,19 @@ struct TeamVectorApplyLeftHouseholderInternal { member.team_barrier(); // a1t -= w1t (axpy) - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), - [&](const int &j) { a1t[j * a1ts] -= w1t[j]; }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), [&](const int &j) { a1t[j * a1ts] -= w1t[j]; }); // A2 -= u2 w1t (ger) if (as0 <= as1) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, n), [&](const int &j) { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, m), [&](const int &i) { - A2[i * as0 + j * as1] -= u2[i * u2s] * w1t[j]; - }); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, m), + [&](const int &i) { A2[i * as0 + j * as1] -= u2[i * u2s] * w1t[j]; }); + }); } else { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, n), [&](const int &j) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, m), [&](const int &i) { - A2[i * as0 + j * as1] -= u2[i * u2s] * w1t[j]; - }); - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n), [&](const int &j) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), + [&](const int &i) { A2[i * as0 + j * as1] -= u2[i * u2s] * w1t[j]; }); + }); } return 0; @@ -98,13 +87,10 @@ struct TeamVectorApplyLeftHouseholderInternal { struct TeamVectorApplyRightHouseholderInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, const int n, - const ValueType *tau, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const ValueType *tau, /* */ ValueType *u2, const int u2s, /* */ ValueType *a1, const int a1s, - /* */ ValueType *A2, const int as0, - const int as1, + /* */ ValueType *A2, const int as0, const int as1, /* */ ValueType *w1) { typedef ValueType value_type; /// u2 n x 1 @@ -125,10 +111,7 @@ struct TeamVectorApplyRightHouseholderInternal { value_type tmp(0); Kokkos::parallel_reduce( Kokkos::ThreadVectorRange(member, n), - [&](const int &j, value_type &val) { - val += A2[i * as0 + j * as1] * u2[j * u2s]; - }, - tmp); + [&](const int &j, value_type &val) { val += A2[i * as0 + j * as1] * u2[j * u2s]; }, tmp); Kokkos::single(Kokkos::PerThread(member), [&]() { w1[i] = (tmp + a1[i * a1s]) * inv_tau; // \= (*tau); }); @@ -136,28 +119,21 @@ struct TeamVectorApplyRightHouseholderInternal { member.team_barrier(); // a1 -= w1 (axpy) - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), - [&](const int &i) { a1[i * a1s] -= w1[i]; }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int &i) { a1[i * a1s] -= w1[i]; }); // A2 -= w1 * u2' (ger with conjugate) if (as0 <= as1) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, n), [&](const int &j) { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, m), [&](const int &i) { - A2[i * as0 + j * as1] -= - w1[i] * Kokkos::ArithTraits::conj(u2[j * u2s]); - }); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, m), [&](const int &i) { + A2[i * as0 + j * as1] -= w1[i] * Kokkos::ArithTraits::conj(u2[j * u2s]); + }); + }); } else { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, n), [&](const int &j) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, m), [&](const int &i) { - A2[i * as0 + j * as1] -= - w1[i] * Kokkos::ArithTraits::conj(u2[j * u2s]); - }); - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n), [&](const int &j) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int &i) { + A2[i * as0 + j * as1] -= w1[i] * Kokkos::ArithTraits::conj(u2[j * u2s]); + }); + }); } return 0; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyPivot_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyPivot_Impl.hpp index afc518f43c5b..10455f65b600 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyPivot_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyPivot_Impl.hpp @@ -35,34 +35,26 @@ namespace KokkosBatched { template struct TeamVectorApplyPivot { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int piv, const AViewType &A) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int piv, const AViewType &A) { if (AViewType::rank == 1) { const int as0 = A.stride(0); - TeamVectorApplyPivotVectorForwardInternal::invoke(member, piv, A.data(), - as0); + TeamVectorApplyPivotVectorForwardInternal::invoke(member, piv, A.data(), as0); } else if (AViewType::rank == 2) { const int n = A.extent(1), as0 = A.stride(0), as1 = A.stride(1); - TeamVectorApplyPivotMatrixForwardInternal::invoke(member, n, piv, - A.data(), as0, as1); + TeamVectorApplyPivotMatrixForwardInternal::invoke(member, n, piv, A.data(), as0, as1); } return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const PivViewType piv, - const AViewType &A) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const PivViewType piv, const AViewType &A) { if (AViewType::rank == 1) { const int plen = piv.extent(0), ps0 = piv.stride(0), as0 = A.stride(0); - TeamVectorApplyPivotVectorForwardInternal::invoke( - member, plen, piv.data(), ps0, A.data(), as0); + TeamVectorApplyPivotVectorForwardInternal::invoke(member, plen, piv.data(), ps0, A.data(), as0); } else if (AViewType::rank == 2) { // row permutation - const int plen = piv.extent(0), ps0 = piv.stride(0), n = A.extent(1), - as0 = A.stride(0), as1 = A.stride(1); - TeamVectorApplyPivotMatrixForwardInternal::invoke( - member, n, plen, piv.data(), ps0, A.data(), as0, as1); + const int plen = piv.extent(0), ps0 = piv.stride(0), n = A.extent(1), as0 = A.stride(0), as1 = A.stride(1); + TeamVectorApplyPivotMatrixForwardInternal::invoke(member, n, plen, piv.data(), ps0, A.data(), as0, as1); } return 0; } @@ -72,34 +64,26 @@ struct TeamVectorApplyPivot { template struct TeamVectorApplyPivot { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int piv, const AViewType &A) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int piv, const AViewType &A) { if (AViewType::rank == 1) { const int as0 = A.stride(0); - TeamVectorApplyPivotVectorForwardInternal::invoke(member, piv, A.data(), - as0); + TeamVectorApplyPivotVectorForwardInternal::invoke(member, piv, A.data(), as0); } else if (AViewType::rank == 2) { const int m = A.extent(0), as0 = A.stride(0), as1 = A.stride(1); - TeamVectorApplyPivotMatrixForwardInternal::invoke(member, m, piv, - A.data(), as1, as0); + TeamVectorApplyPivotMatrixForwardInternal::invoke(member, m, piv, A.data(), as1, as0); } return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const PivViewType &piv, - const AViewType &A) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const PivViewType &piv, const AViewType &A) { if (AViewType::rank == 1) { const int plen = piv.extent(0), as0 = A.stride(0); - TeamVectorApplyPivotVectorForwardInternal ::invoke( - member, plen, piv.data(), A.data(), as0); + TeamVectorApplyPivotVectorForwardInternal ::invoke(member, plen, piv.data(), A.data(), as0); } else if (AViewType::rank == 2) { // column permutation - const int plen = piv.extent(0), ps = piv.stride(0), m = A.extent(0), - as0 = A.stride(0), as1 = A.stride(1); - TeamVectorApplyPivotMatrixForwardInternal ::invoke( - member, m, plen, piv.data(), ps, A.data(), as1, as0); + const int plen = piv.extent(0), ps = piv.stride(0), m = A.extent(0), as0 = A.stride(0), as1 = A.stride(1); + TeamVectorApplyPivotMatrixForwardInternal ::invoke(member, m, plen, piv.data(), ps, A.data(), as1, as0); } return 0; } @@ -113,34 +97,26 @@ struct TeamVectorApplyPivot { template struct TeamVectorApplyPivot { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int piv, const AViewType &A) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int piv, const AViewType &A) { if (AViewType::rank == 1) { const int as0 = A.stride(0); - TeamVectorApplyPivotVectorBackwardInternal::invoke(member, piv, A.data(), - as0); + TeamVectorApplyPivotVectorBackwardInternal::invoke(member, piv, A.data(), as0); } else if (AViewType::rank == 2) { const int n = A.extent(1), as0 = A.stride(0), as1 = A.stride(1); - TeamVectorApplyPivotMatrixBackwardInternal::invoke(member, n, piv, - A.data(), as0, as1); + TeamVectorApplyPivotMatrixBackwardInternal::invoke(member, n, piv, A.data(), as0, as1); } return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const PivViewType piv, - const AViewType &A) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const PivViewType piv, const AViewType &A) { if (AViewType::rank == 1) { const int plen = piv.extent(0), ps0 = piv.stride(0), as0 = A.stride(0); - TeamVectorApplyPivotVectorBackwardInternal::invoke( - member, plen, piv.data(), ps0, A.data(), as0); + TeamVectorApplyPivotVectorBackwardInternal::invoke(member, plen, piv.data(), ps0, A.data(), as0); } else if (AViewType::rank == 2) { // row permutation - const int plen = piv.extent(0), ps0 = piv.stride(0), n = A.extent(1), - as0 = A.stride(0), as1 = A.stride(1); - TeamVectorApplyPivotMatrixBackwardInternal::invoke( - member, n, plen, piv.data(), ps0, A.data(), as0, as1); + const int plen = piv.extent(0), ps0 = piv.stride(0), n = A.extent(1), as0 = A.stride(0), as1 = A.stride(1); + TeamVectorApplyPivotMatrixBackwardInternal::invoke(member, n, plen, piv.data(), ps0, A.data(), as0, as1); } return 0; } @@ -150,34 +126,26 @@ struct TeamVectorApplyPivot { template struct TeamVectorApplyPivot { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int piv, const AViewType &A) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int piv, const AViewType &A) { if (AViewType::rank == 1) { const int as0 = A.stride(0); - TeamVectorApplyPivotVectorBackwardInternal::invoke(member, piv, A.data(), - as0); + TeamVectorApplyPivotVectorBackwardInternal::invoke(member, piv, A.data(), as0); } else if (AViewType::rank == 2) { const int m = A.extent(0), as0 = A.stride(0), as1 = A.stride(1); - TeamVectorApplyPivotMatrixBackwardInternal::invoke(member, m, piv, - A.data(), as1, as0); + TeamVectorApplyPivotMatrixBackwardInternal::invoke(member, m, piv, A.data(), as1, as0); } return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const PivViewType &piv, - const AViewType &A) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const PivViewType &piv, const AViewType &A) { if (AViewType::rank == 1) { const int plen = piv.extent(0), as0 = A.stride(0); - TeamVectorApplyPivotVectorBackwardInternal ::invoke( - member, plen, piv.data(), A.data(), as0); + TeamVectorApplyPivotVectorBackwardInternal ::invoke(member, plen, piv.data(), A.data(), as0); } else if (AViewType::rank == 2) { // column permutation - const int plen = piv.extent(0), ps = piv.stride(0), m = A.extent(0), - as0 = A.stride(0), as1 = A.stride(1); - TeamVectorApplyPivotMatrixBackwardInternal ::invoke( - member, m, plen, piv.data(), ps, A.data(), as1, as0); + const int plen = piv.extent(0), ps = piv.stride(0), m = A.extent(0), as0 = A.stride(0), as1 = A.stride(1); + TeamVectorApplyPivotMatrixBackwardInternal ::invoke(member, m, plen, piv.data(), ps, A.data(), as1, as0); } return 0; } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyPivot_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyPivot_Internal.hpp index 59548c3d26be..a30138210841 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyPivot_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyPivot_Internal.hpp @@ -31,10 +31,8 @@ namespace KokkosBatched { /// struct TeamVectorApplyPivotVectorForwardInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int piv, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int piv, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0) { if (piv != 0) { Kokkos::single(Kokkos::PerTeam(member), [&]() { const int idx_p = piv * as0; @@ -47,12 +45,9 @@ struct TeamVectorApplyPivotVectorForwardInternal { } template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int plen, - const IntType *KOKKOS_RESTRICT p, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int plen, const IntType *KOKKOS_RESTRICT p, const int ps0, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0) { + /* */ ValueType *KOKKOS_RESTRICT A, const int as0) { Kokkos::single(Kokkos::PerTeam(member), [&]() { for (int i = 0; i < plen; ++i) { const int piv = p[i * ps0]; @@ -71,30 +66,24 @@ struct TeamVectorApplyPivotVectorForwardInternal { /// Pivot a row struct TeamVectorApplyPivotMatrixForwardInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int n, const int piv, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int n, const int piv, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { if (piv != 0) { - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), - [&](const int &j) { - ValueType *KOKKOS_RESTRICT A_at_j = A + j * as1; - const int idx_p = piv * as0; - const ValueType tmp = A_at_j[0]; - A_at_j[0] = A_at_j[idx_p]; - A_at_j[idx_p] = tmp; - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), [&](const int &j) { + ValueType *KOKKOS_RESTRICT A_at_j = A + j * as1; + const int idx_p = piv * as0; + const ValueType tmp = A_at_j[0]; + A_at_j[0] = A_at_j[idx_p]; + A_at_j[idx_p] = tmp; + }); } return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int n, const int plen, - const IntType *KOKKOS_RESTRICT p, - const int ps0, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int n, const int plen, + const IntType *KOKKOS_RESTRICT p, const int ps0, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), [&](const int &j) { ValueType *KOKKOS_RESTRICT A_at_j = A + j * as1; for (int i = 0; i < plen; ++i) { @@ -116,10 +105,8 @@ struct TeamVectorApplyPivotMatrixForwardInternal { /// struct TeamVectorApplyPivotVectorBackwardInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int piv, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int piv, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0) { if (piv != 0) { Kokkos::single(Kokkos::PerTeam(member), [&]() { const int idx_p = piv * as0; @@ -132,12 +119,9 @@ struct TeamVectorApplyPivotVectorBackwardInternal { } template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int plen, - const IntType *KOKKOS_RESTRICT p, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int plen, const IntType *KOKKOS_RESTRICT p, const int ps0, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0) { + /* */ ValueType *KOKKOS_RESTRICT A, const int as0) { Kokkos::single(Kokkos::PerTeam(member), [&]() { for (int i = (plen - 1); i >= 0; --i) { const int piv = p[i * ps0]; @@ -156,30 +140,24 @@ struct TeamVectorApplyPivotVectorBackwardInternal { /// Pivot a row struct TeamVectorApplyPivotMatrixBackwardInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int n, const int piv, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int n, const int piv, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { if (piv != 0) { - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), - [&](const int &j) { - ValueType *KOKKOS_RESTRICT A_at_j = A + j * as1; - const int idx_p = piv * as0; - const ValueType tmp = A_at_j[0]; - A_at_j[0] = A_at_j[idx_p]; - A_at_j[idx_p] = tmp; - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), [&](const int &j) { + ValueType *KOKKOS_RESTRICT A_at_j = A + j * as1; + const int idx_p = piv * as0; + const ValueType tmp = A_at_j[0]; + A_at_j[0] = A_at_j[idx_p]; + A_at_j[idx_p] = tmp; + }); } return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int n, const int plen, - const IntType *KOKKOS_RESTRICT p, - const int ps0, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int n, const int plen, + const IntType *KOKKOS_RESTRICT p, const int ps0, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), [&](const int &j) { ValueType *KOKKOS_RESTRICT A_at_j = A + j * as1; for (int i = (plen - 1); i >= 0; --i) { diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_Serial_Impl.hpp index 2a7519f2dc7e..ba9d85350ffe 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_Serial_Impl.hpp @@ -28,42 +28,30 @@ namespace KokkosBatched { /// =========== template <> -template -KOKKOS_INLINE_FUNCTION int -SerialApplyQ::invoke( - const AViewType &A, const tViewType &t, const BViewType &B, - const wViewType &w) { - return SerialApplyQ_LeftForwardInternal::invoke( - B.extent(0), B.extent(1), A.extent(1), A.data(), A.stride_0(), - A.stride_1(), t.data(), t.stride_0(), B.data(), B.stride_0(), - B.stride_1(), w.data()); +template +KOKKOS_INLINE_FUNCTION int SerialApplyQ::invoke( + const AViewType &A, const tViewType &t, const BViewType &B, const wViewType &w) { + return SerialApplyQ_LeftForwardInternal::invoke(B.extent(0), B.extent(1), A.extent(1), A.data(), A.stride_0(), + A.stride_1(), t.data(), t.stride_0(), B.data(), B.stride_0(), + B.stride_1(), w.data()); } template <> -template -KOKKOS_INLINE_FUNCTION int -SerialApplyQ::invoke( - const AViewType &A, const tViewType &t, const BViewType &B, - const wViewType &w) { - return SerialApplyQ_LeftBackwardInternal::invoke( - B.extent(0), B.extent(1), A.extent(1), A.data(), A.stride_0(), - A.stride_1(), t.data(), t.stride_0(), B.data(), B.stride_0(), - B.stride_1(), w.data()); +template +KOKKOS_INLINE_FUNCTION int SerialApplyQ::invoke( + const AViewType &A, const tViewType &t, const BViewType &B, const wViewType &w) { + return SerialApplyQ_LeftBackwardInternal::invoke(B.extent(0), B.extent(1), A.extent(1), A.data(), A.stride_0(), + A.stride_1(), t.data(), t.stride_0(), B.data(), B.stride_0(), + B.stride_1(), w.data()); } template <> -template -KOKKOS_INLINE_FUNCTION int -SerialApplyQ::invoke( - const AViewType &A, const tViewType &t, const BViewType &B, - const wViewType &w) { - return SerialApplyQ_RightForwardInternal::invoke( - B.extent(0), B.extent(1), A.extent(1), A.data(), A.stride_0(), - A.stride_1(), t.data(), t.stride_0(), B.data(), B.stride_0(), - B.stride_1(), w.data()); +template +KOKKOS_INLINE_FUNCTION int SerialApplyQ::invoke( + const AViewType &A, const tViewType &t, const BViewType &B, const wViewType &w) { + return SerialApplyQ_RightForwardInternal::invoke(B.extent(0), B.extent(1), A.extent(1), A.data(), A.stride_0(), + A.stride_1(), t.data(), t.stride_0(), B.data(), B.stride_0(), + B.stride_1(), w.data()); } } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_Serial_Internal.hpp index e8d69059648c..dbb11df74706 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_Serial_Internal.hpp @@ -32,13 +32,10 @@ namespace KokkosBatched { struct SerialApplyQ_LeftForwardInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, - const int k, - /* */ ValueType *A, const int as0, - const int as1, + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const int k, + /* */ ValueType *A, const int as0, const int as1, /* */ ValueType *t, const int ts, - /* */ ValueType *B, const int bs0, - const int bs1, + /* */ ValueType *B, const int bs0, const int bs1, /* */ ValueType *w) { typedef ValueType value_type; @@ -75,9 +72,8 @@ struct SerialApplyQ_LeftForwardInternal { const int m_A2 = m - m_A0 - 1; /// ----------------------------------------------------- // left apply householder to partitioned B1 and B2 - SerialApplyLeftHouseholderInternal::invoke(m_A2, n, tau, A_part3x3.A21, - as0, B_part3x1.A1, bs1, - B_part3x1.A2, bs0, bs1, w); + SerialApplyLeftHouseholderInternal::invoke(m_A2, n, tau, A_part3x3.A21, as0, B_part3x1.A1, bs1, B_part3x1.A2, bs0, + bs1, w); /// ----------------------------------------------------- A_part2x2.mergeToABR(A_part3x3); @@ -90,13 +86,10 @@ struct SerialApplyQ_LeftForwardInternal { struct SerialApplyQ_LeftBackwardInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, - const int k, - /* */ ValueType *A, const int as0, - const int as1, + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const int k, + /* */ ValueType *A, const int as0, const int as1, /* */ ValueType *t, const int ts, - /* */ ValueType *B, const int bs0, - const int bs1, + /* */ ValueType *B, const int bs0, const int bs1, /* */ ValueType *w) { typedef ValueType value_type; @@ -133,9 +126,8 @@ struct SerialApplyQ_LeftBackwardInternal { const int m_A2 = m - m_A0 - 1; /// ----------------------------------------------------- // left apply householder to partitioned B1 and B2 - SerialApplyLeftHouseholderInternal::invoke(m_A2, n, tau, A_part3x3.A21, - as0, B_part3x1.A1, bs1, - B_part3x1.A2, bs0, bs1, w); + SerialApplyLeftHouseholderInternal::invoke(m_A2, n, tau, A_part3x3.A21, as0, B_part3x1.A1, bs1, B_part3x1.A2, bs0, + bs1, w); /// ----------------------------------------------------- A_part2x2.mergeToATL(A_part3x3); @@ -148,13 +140,10 @@ struct SerialApplyQ_LeftBackwardInternal { struct SerialApplyQ_RightForwardInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, - const int k, - /* */ ValueType *A, const int as0, - const int as1, + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const int k, + /* */ ValueType *A, const int as0, const int as1, /* */ ValueType *t, const int ts, - /* */ ValueType *B, const int bs0, - const int bs1, + /* */ ValueType *B, const int bs0, const int bs1, /* */ ValueType *w) { typedef ValueType value_type; @@ -191,9 +180,8 @@ struct SerialApplyQ_RightForwardInternal { const int n_B2 = n - n_A0 - 1; /// ----------------------------------------------------- // right apply householder to partitioned B1 and B2 - SerialApplyRightHouseholderInternal::invoke(m, n_B2, tau, A_part3x3.A21, - as0, B_part1x3.A1, bs0, - B_part1x3.A2, bs0, bs1, w); + SerialApplyRightHouseholderInternal::invoke(m, n_B2, tau, A_part3x3.A21, as0, B_part1x3.A1, bs0, B_part1x3.A2, + bs0, bs1, w); /// ----------------------------------------------------- A_part2x2.mergeToATL(A_part3x3); t_part2x1.mergeToAT(t_part3x1); diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_TeamVector_Impl.hpp index 7f3a695d7517..d6abd61a784e 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_TeamVector_Impl.hpp @@ -28,53 +28,35 @@ namespace KokkosBatched { /// =============== template -struct TeamVectorApplyQ { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const tViewType &t, - const BViewType &B, - const wViewType &w) { - return TeamVectorApplyQ_LeftForwardInternal::invoke( - member, B.extent(0), B.extent(1), A.extent(1), A.data(), A.stride_0(), - A.stride_1(), t.data(), t.stride_0(), B.data(), B.stride_0(), - B.stride_1(), w.data()); +struct TeamVectorApplyQ { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const tViewType &t, + const BViewType &B, const wViewType &w) { + return TeamVectorApplyQ_LeftForwardInternal::invoke(member, B.extent(0), B.extent(1), A.extent(1), A.data(), + A.stride_0(), A.stride_1(), t.data(), t.stride_0(), B.data(), + B.stride_0(), B.stride_1(), w.data()); } }; template -struct TeamVectorApplyQ { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const tViewType &t, - const BViewType &B, - const wViewType &w) { - return TeamVectorApplyQ_LeftBackwardInternal::invoke( - member, B.extent(0), B.extent(1), A.extent(1), A.data(), A.stride_0(), - A.stride_1(), t.data(), t.stride_0(), B.data(), B.stride_0(), - B.stride_1(), w.data()); +struct TeamVectorApplyQ { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const tViewType &t, + const BViewType &B, const wViewType &w) { + return TeamVectorApplyQ_LeftBackwardInternal::invoke(member, B.extent(0), B.extent(1), A.extent(1), A.data(), + A.stride_0(), A.stride_1(), t.data(), t.stride_0(), B.data(), + B.stride_0(), B.stride_1(), w.data()); } }; template -struct TeamVectorApplyQ { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const tViewType &t, - const BViewType &B, - const wViewType &w) { - return TeamVectorApplyQ_RightForwardInternal::invoke( - member, B.extent(0), B.extent(1), A.extent(1), A.data(), A.stride_0(), - A.stride_1(), t.data(), t.stride_0(), B.data(), B.stride_0(), - B.stride_1(), w.data()); +struct TeamVectorApplyQ { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const tViewType &t, + const BViewType &B, const wViewType &w) { + return TeamVectorApplyQ_RightForwardInternal::invoke(member, B.extent(0), B.extent(1), A.extent(1), A.data(), + A.stride_0(), A.stride_1(), t.data(), t.stride_0(), B.data(), + B.stride_0(), B.stride_1(), w.data()); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_TeamVector_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_TeamVector_Internal.hpp index 233daa8978db..8fc6c8a78af6 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_TeamVector_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_TeamVector_Internal.hpp @@ -32,12 +32,11 @@ namespace KokkosBatched { struct TeamVectorApplyQ_LeftForwardInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const int n, const int k, - /* */ ValueType *A, const int as0, const int as1, - /* */ ValueType *t, const int ts, - /* */ ValueType *B, const int bs0, const int bs1, - /* */ ValueType *w) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const int k, + /* */ ValueType *A, const int as0, const int as1, + /* */ ValueType *t, const int ts, + /* */ ValueType *B, const int bs0, const int bs1, + /* */ ValueType *w) { typedef ValueType value_type; /// Given a matrix A that includes a series of householder vectors, @@ -73,9 +72,8 @@ struct TeamVectorApplyQ_LeftForwardInternal { const int m_A2 = m - m_A0 - 1; /// ----------------------------------------------------- // left apply householder to partitioned B1 and B2 - TeamVectorApplyLeftHouseholderInternal::invoke( - member, m_A2, n, tau, A_part3x3.A21, as0, B_part3x1.A1, bs1, - B_part3x1.A2, bs0, bs1, w); + TeamVectorApplyLeftHouseholderInternal::invoke(member, m_A2, n, tau, A_part3x3.A21, as0, B_part3x1.A1, bs1, + B_part3x1.A2, bs0, bs1, w); member.team_barrier(); /// ----------------------------------------------------- A_part2x2.mergeToABR(A_part3x3); @@ -88,12 +86,11 @@ struct TeamVectorApplyQ_LeftForwardInternal { struct TeamVectorApplyQ_LeftBackwardInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const int n, const int k, - /* */ ValueType *A, const int as0, const int as1, - /* */ ValueType *t, const int ts, - /* */ ValueType *B, const int bs0, const int bs1, - /* */ ValueType *w) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const int k, + /* */ ValueType *A, const int as0, const int as1, + /* */ ValueType *t, const int ts, + /* */ ValueType *B, const int bs0, const int bs1, + /* */ ValueType *w) { typedef ValueType value_type; /// Given a matrix A that includes a series of householder vectors, @@ -129,9 +126,8 @@ struct TeamVectorApplyQ_LeftBackwardInternal { const int m_A2 = m - m_A0 - 1; /// ----------------------------------------------------- // left apply householder to partitioned B1 and B2 - TeamVectorApplyLeftHouseholderInternal::invoke( - member, m_A2, n, tau, A_part3x3.A21, as0, B_part3x1.A1, bs1, - B_part3x1.A2, bs0, bs1, w); + TeamVectorApplyLeftHouseholderInternal::invoke(member, m_A2, n, tau, A_part3x3.A21, as0, B_part3x1.A1, bs1, + B_part3x1.A2, bs0, bs1, w); member.team_barrier(); /// ----------------------------------------------------- A_part2x2.mergeToATL(A_part3x3); @@ -144,12 +140,11 @@ struct TeamVectorApplyQ_LeftBackwardInternal { struct TeamVectorApplyQ_RightForwardInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const int n, const int k, - /* */ ValueType *A, const int as0, const int as1, - /* */ ValueType *t, const int ts, - /* */ ValueType *B, const int bs0, const int bs1, - /* */ ValueType *w) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const int k, + /* */ ValueType *A, const int as0, const int as1, + /* */ ValueType *t, const int ts, + /* */ ValueType *B, const int bs0, const int bs1, + /* */ ValueType *w) { typedef ValueType value_type; /// Given a matrix A that includes a series of householder vectors, @@ -185,9 +180,8 @@ struct TeamVectorApplyQ_RightForwardInternal { const int n_B2 = n - n_A0 - 1; /// ----------------------------------------------------- // right apply householder to partitioned B1 and B2 - TeamVectorApplyRightHouseholderInternal::invoke( - member, m, n_B2, tau, A_part3x3.A21, as0, B_part1x3.A1, bs0, - B_part1x3.A2, bs0, bs1, w); + TeamVectorApplyRightHouseholderInternal::invoke(member, m, n_B2, tau, A_part3x3.A21, as0, B_part1x3.A1, bs0, + B_part1x3.A2, bs0, bs1, w); member.team_barrier(); /// ----------------------------------------------------- A_part2x2.mergeToATL(A_part3x3); diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Axpy_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Axpy_Impl.hpp index 400c46544db6..6d65ebc294e4 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Axpy_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Axpy_Impl.hpp @@ -28,11 +28,9 @@ namespace KokkosBatched { /// ==================== struct SerialAxpyInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ScalarType alpha, - const ValueType* KOKKOS_RESTRICT X, + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ScalarType alpha, const ValueType* KOKKOS_RESTRICT X, const int xs0, - /* */ ValueType* KOKKOS_RESTRICT Y, - const int ys0) { + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif @@ -42,10 +40,9 @@ struct SerialAxpyInternal { } template - KOKKOS_INLINE_FUNCTION static int invoke( - const int m, const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, - const ValueType* KOKKOS_RESTRICT X, const int xs0, - /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif @@ -55,17 +52,14 @@ struct SerialAxpyInternal { } template - KOKKOS_INLINE_FUNCTION static int invoke( - const int m, const int n, const ScalarType* KOKKOS_RESTRICT alpha, - const int alphas0, const ValueType* KOKKOS_RESTRICT X, const int xs0, - const int xs1, - /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const ScalarType* KOKKOS_RESTRICT alpha, + const int alphas0, const ValueType* KOKKOS_RESTRICT X, const int xs0, + const int xs1, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { if (xs0 > xs1) - for (int i = 0; i < m; ++i) - invoke(n, alpha[i * alphas0], X + i * xs0, xs1, Y + i * ys0, ys1); + for (int i = 0; i < m; ++i) invoke(n, alpha[i * alphas0], X + i * xs0, xs1, Y + i * ys0, ys1); else - for (int j = 0; j < n; ++j) - invoke(m, alpha, alphas0, X + j * xs1, xs0, Y + j * ys1, ys0); + for (int j = 0; j < n; ++j) invoke(m, alpha, alphas0, X + j * xs1, xs0, Y + j * ys1, ys0); return 0; } @@ -76,50 +70,38 @@ struct SerialAxpyInternal { /// ==================== struct TeamAxpyInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, - const int m, const ScalarType alpha, - const ValueType* KOKKOS_RESTRICT X, - const int xs0, - /* */ ValueType* KOKKOS_RESTRICT Y, - const int ys0) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int& i) { - Y[i * ys0] += alpha * X[i * xs0]; - }); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const int m, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT X, const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int& i) { Y[i * ys0] += alpha * X[i * xs0]; }); // member.team_barrier(); return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const int m, - const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, - const ValueType* KOKKOS_RESTRICT X, const int xs0, - /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int& i) { - Y[i * ys0] += alpha[i * alphas0] * X[i * xs0]; - }); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const int m, + const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), + [&](const int& i) { Y[i * ys0] += alpha[i * alphas0] * X[i * xs0]; }); // member.team_barrier(); return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const int m, const int n, - const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, - const ValueType* KOKKOS_RESTRICT X, const int xs0, const int xs1, - /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const int m, const int n, + const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, const int xs1, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { if (m > n) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, m), [&](const int& i) { - SerialAxpyInternal::invoke(n, alpha[i * alphas0], X + i * xs0, xs1, - Y + i * ys0, ys1); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int& i) { + SerialAxpyInternal::invoke(n, alpha[i * alphas0], X + i * xs0, xs1, Y + i * ys0, ys1); + }); } else { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, n), [&](const int& j) { - SerialAxpyInternal::invoke(m, alpha, alphas0, X + j * xs1, xs0, - Y + j * ys1, ys0); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int& j) { + SerialAxpyInternal::invoke(m, alpha, alphas0, X + j * xs1, xs0, Y + j * ys1, ys0); + }); } // member.team_barrier(); return 0; @@ -131,45 +113,35 @@ struct TeamAxpyInternal { /// ======================== struct TeamVectorAxpyInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, - const int m, const ScalarType alpha, - const ValueType* KOKKOS_RESTRICT X, - const int xs0, - /* */ ValueType* KOKKOS_RESTRICT Y, - const int ys0) { - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int& i) { - Y[i * ys0] += alpha * X[i * xs0]; - }); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const int m, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT X, const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int& i) { Y[i * ys0] += alpha * X[i * xs0]; }); // member.team_barrier(); return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const int m, - const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, - const ValueType* KOKKOS_RESTRICT X, const int xs0, - /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int& i) { - Y[i * ys0] += alpha[i * alphas0] * X[i * xs0]; - }); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const int m, + const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), + [&](const int& i) { Y[i * ys0] += alpha[i * alphas0] * X[i * xs0]; }); // member.team_barrier(); return 0; } - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const int m, const int n, - const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, - const ValueType* KOKKOS_RESTRICT X, const int xs0, const int xs1, - /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, 0, m * n), [&](const int& iTemp) { - int i, j; - getIndices(iTemp, n, m, j, i); - Y[i * ys0 + j * ys1] += alpha[i * alphas0] * X[i * xs0 + j * xs1]; - }); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const int m, const int n, + const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, const int xs1, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, m * n), [&](const int& iTemp) { + int i, j; + getIndices(iTemp, n, m, j, i); + Y[i * ys0 + j * ys1] += alpha[i * alphas0] * X[i * xs0 + j * xs1]; + }); // member.team_barrier(); return 0; } @@ -180,50 +152,28 @@ struct TeamVectorAxpyInternal { /// =========== template -KOKKOS_INLINE_FUNCTION int SerialAxpy::invoke(const alphaViewType& alpha, - const XViewType& X, - const YViewType& Y) { +KOKKOS_INLINE_FUNCTION int SerialAxpy::invoke(const alphaViewType& alpha, const XViewType& X, const YViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::axpy: XViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::axpy: YViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::axpy: alphaViewType is not a Kokkos::View."); - static_assert(XViewType::rank == 2, - "KokkosBatched::axpy: XViewType must have rank 2."); - static_assert(YViewType::rank == 2, - "KokkosBatched::axpy: YViewType must have rank 2."); - static_assert(alphaViewType::rank == 1, - "KokkosBatched::axpy: alphaViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::axpy: XViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::axpy: YViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::axpy: alphaViewType is not a Kokkos::View."); + static_assert(XViewType::rank == 2, "KokkosBatched::axpy: XViewType must have rank 2."); + static_assert(YViewType::rank == 2, "KokkosBatched::axpy: YViewType must have rank 2."); + static_assert(alphaViewType::rank == 1, "KokkosBatched::axpy: alphaViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::axpy: Dimensions of X and Y do not match: X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::axpy: Dimensions of X and Y do not match: X: %d x %d, " "Y: %d x %d\n", (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#endif return 1; } if (X.extent(0) != alpha.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::axpy: First dimension of X and alpha do not match: X: " - "%d x %d, alpha: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#else Kokkos::printf( "KokkosBatched::axpy: First dimension of X and alpha do not match: X: " "%d x %d, alpha: %d\n", (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#endif return 1; } #endif @@ -231,11 +181,10 @@ KOKKOS_INLINE_FUNCTION int SerialAxpy::invoke(const alphaViewType& alpha, // No need to check if X.extent(0)==1 in the serial case as we don't // parallelize the kernel anyway. - return SerialAxpyInternal::template invoke< - typename alphaViewType::non_const_value_type, - typename XViewType::non_const_value_type>( - X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), X.data(), - X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), Y.stride_1()); + return SerialAxpyInternal::template invoke( + X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), X.data(), X.stride_0(), X.stride_1(), Y.data(), + Y.stride_0(), Y.stride_1()); } /// @@ -244,67 +193,43 @@ KOKKOS_INLINE_FUNCTION int SerialAxpy::invoke(const alphaViewType& alpha, template template -KOKKOS_INLINE_FUNCTION int TeamAxpy::invoke( - const MemberType& member, const alphaViewType& alpha, const XViewType& X, - const YViewType& Y) { +KOKKOS_INLINE_FUNCTION int TeamAxpy::invoke(const MemberType& member, const alphaViewType& alpha, + const XViewType& X, const YViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::axpy: XViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::axpy: YViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::axpy: alphaViewType is not a Kokkos::View."); - static_assert(XViewType::rank == 2, - "KokkosBatched::axpy: XViewType must have rank 2."); - static_assert(YViewType::rank == 2, - "KokkosBatched::axpy: YViewType must have rank 2."); - static_assert(alphaViewType::rank == 1, - "KokkosBatched::axpy: alphaViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::axpy: XViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::axpy: YViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::axpy: alphaViewType is not a Kokkos::View."); + static_assert(XViewType::rank == 2, "KokkosBatched::axpy: XViewType must have rank 2."); + static_assert(YViewType::rank == 2, "KokkosBatched::axpy: YViewType must have rank 2."); + static_assert(alphaViewType::rank == 1, "KokkosBatched::axpy: alphaViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::axpy: Dimensions of X and Y do not match: X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::axpy: Dimensions of X and Y do not match: X: %d x %d, " "Y: %d x %d\n", (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#endif return 1; } if (X.extent(0) != alpha.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::axpy: First dimension of X and alpha do not match: X: " - "%d x %d, alpha: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#else Kokkos::printf( "KokkosBatched::axpy: First dimension of X and alpha do not match: X: " "%d x %d, alpha: %d\n", (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#endif return 1; } #endif if (X.extent(0) == 1) { - KokkosBlas::Experimental::axpy( - member, alpha.data()[0], Kokkos::subview(X, 0, Kokkos::ALL), - Kokkos::subview(Y, 0, Kokkos::ALL)); + KokkosBlas::Experimental::axpy(member, alpha.data()[0], Kokkos::subview(X, 0, Kokkos::ALL), + Kokkos::subview(Y, 0, Kokkos::ALL)); return 0; } - return TeamAxpyInternal::template invoke< - MemberType, typename alphaViewType::non_const_value_type, - typename XViewType::non_const_value_type>( - member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), - X.data(), X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), - Y.stride_1()); + return TeamAxpyInternal::template invoke( + member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), X.data(), X.stride_0(), X.stride_1(), Y.data(), + Y.stride_0(), Y.stride_1()); } /// @@ -313,68 +238,43 @@ KOKKOS_INLINE_FUNCTION int TeamAxpy::invoke( template template -KOKKOS_INLINE_FUNCTION int TeamVectorAxpy::invoke( - const MemberType& member, const alphaViewType& alpha, const XViewType& X, - const YViewType& Y) { +KOKKOS_INLINE_FUNCTION int TeamVectorAxpy::invoke(const MemberType& member, const alphaViewType& alpha, + const XViewType& X, const YViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::axpy: XViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::axpy: YViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::axpy: alphaViewType is not a Kokkos::View."); - static_assert(XViewType::rank == 2, - "KokkosBatched::axpy: XViewType must have rank 2."); - static_assert(YViewType::rank == 2, - "KokkosBatched::axpy: YViewType must have rank 2."); - static_assert(alphaViewType::rank == 1, - "KokkosBatched::axpy: alphaViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::axpy: XViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::axpy: YViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::axpy: alphaViewType is not a Kokkos::View."); + static_assert(XViewType::rank == 2, "KokkosBatched::axpy: XViewType must have rank 2."); + static_assert(YViewType::rank == 2, "KokkosBatched::axpy: YViewType must have rank 2."); + static_assert(alphaViewType::rank == 1, "KokkosBatched::axpy: alphaViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::axpy: Dimensions of X and Y do not match: X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::axpy: Dimensions of X and Y do not match: X: %d x %d, " "Y: %d x %d\n", (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#endif return 1; } if (X.extent(0) != alpha.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::axpy: First dimension of X and alpha do not match: X: " - "%d x %d, alpha: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#else Kokkos::printf( "KokkosBatched::axpy: First dimension of X and alpha do not match: X: " "%d x %d, alpha: %d\n", (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#endif return 1; } #endif if (X.extent(0) == 1) { - KokkosBlas::Experimental::axpy( - member, alpha.data()[0], Kokkos::subview(X, 0, Kokkos::ALL), - Kokkos::subview(Y, 0, Kokkos::ALL)); + KokkosBlas::Experimental::axpy(member, alpha.data()[0], Kokkos::subview(X, 0, Kokkos::ALL), + Kokkos::subview(Y, 0, Kokkos::ALL)); return 0; } - return TeamVectorAxpyInternal::invoke< - MemberType, typename alphaViewType::non_const_value_type, - typename XViewType::non_const_value_type, - typename XViewType::array_layout>(member, X.extent(0), X.extent(1), - alpha.data(), alpha.stride_0(), - X.data(), X.stride_0(), X.stride_1(), - Y.data(), Y.stride_0(), Y.stride_1()); + return TeamVectorAxpyInternal::invoke( + member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), X.data(), X.stride_0(), X.stride_1(), Y.data(), + Y.stride_0(), Y.stride_1()); } } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Copy_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Copy_Impl.hpp index 5b693bb87ad7..e11106cc24df 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Copy_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Copy_Impl.hpp @@ -29,87 +29,57 @@ namespace KokkosBatched { template <> template -KOKKOS_INLINE_FUNCTION int SerialCopy::invoke( - const AViewType &A, const BViewType &B) { - return SerialCopyInternal::invoke(A.extent(0), A.data(), A.stride_0(), - B.data(), B.stride_0()); +KOKKOS_INLINE_FUNCTION int SerialCopy::invoke(const AViewType &A, const BViewType &B) { + return SerialCopyInternal::invoke(A.extent(0), A.data(), A.stride_0(), B.data(), B.stride_0()); } template <> template -KOKKOS_INLINE_FUNCTION int SerialCopy::invoke( - const AViewType &A, const BViewType &B) { - return SerialCopyInternal::invoke(A.extent(0), A.data(), A.stride_0(), - B.data(), B.stride_0()); +KOKKOS_INLINE_FUNCTION int SerialCopy::invoke(const AViewType &A, const BViewType &B) { + return SerialCopyInternal::invoke(A.extent(0), A.data(), A.stride_0(), B.data(), B.stride_0()); } template <> template -KOKKOS_INLINE_FUNCTION int SerialCopy::invoke( - const AViewType &A, const BViewType &B) { +KOKKOS_INLINE_FUNCTION int SerialCopy::invoke(const AViewType &A, const BViewType &B) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::copy: AViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::copy: BViewType is not a Kokkos::View."); - static_assert(AViewType::rank == 2, - "KokkosBatched::copy: AViewType must have rank 2."); - static_assert(BViewType::rank == 2, - "KokkosBatched::copy: BViewType must have rank 2."); + static_assert(Kokkos::is_view::value, "KokkosBatched::copy: AViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::copy: BViewType is not a Kokkos::View."); + static_assert(AViewType::rank == 2, "KokkosBatched::copy: AViewType must have rank 2."); + static_assert(BViewType::rank == 2, "KokkosBatched::copy: BViewType must have rank 2."); // Check compatibility of dimensions at run time. if (A.extent(0) != B.extent(0) || A.extent(1) != B.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::copy: Dimensions of A and B do not match: A: %d x %d, " - "B: %d x %d\n", - (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), (int)B.extent(1)); -#else Kokkos::printf( "KokkosBatched::copy: Dimensions of A and B do not match: A: %d x %d, " "B: %d x %d\n", (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), (int)B.extent(1)); -#endif return 1; } #endif - return SerialCopyInternal::invoke(A.extent(0), A.extent(1), A.data(), - A.stride_0(), A.stride_1(), B.data(), + return SerialCopyInternal::invoke(A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_0(), B.stride_1()); } template <> template -KOKKOS_INLINE_FUNCTION int SerialCopy::invoke( - const AViewType &A, const BViewType &B) { +KOKKOS_INLINE_FUNCTION int SerialCopy::invoke(const AViewType &A, const BViewType &B) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::copy: AViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::copy: BViewType is not a Kokkos::View."); - static_assert(AViewType::rank == 2, - "KokkosBatched::copy: AViewType must have rank 2."); - static_assert(BViewType::rank == 2, - "KokkosBatched::copy: BViewType must have rank 2."); + static_assert(Kokkos::is_view::value, "KokkosBatched::copy: AViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::copy: BViewType is not a Kokkos::View."); + static_assert(AViewType::rank == 2, "KokkosBatched::copy: AViewType must have rank 2."); + static_assert(BViewType::rank == 2, "KokkosBatched::copy: BViewType must have rank 2."); // Check compatibility of dimensions at run time. if (A.extent(0) != B.extent(0) || A.extent(1) != B.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::copy: Dimensions of A and B do not match: A: %d x %d, " - "B: %d x %d\n", - (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), (int)B.extent(1)); -#else Kokkos::printf( "KokkosBatched::copy: Dimensions of A and B do not match: A: %d x %d, " "B: %d x %d\n", (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), (int)B.extent(1)); -#endif return 1; } #endif - return SerialCopyInternal::invoke(A.extent(1), A.extent(0), A.data(), - A.stride_1(), A.stride_0(), B.data(), + return SerialCopyInternal::invoke(A.extent(1), A.extent(0), A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } @@ -120,68 +90,44 @@ KOKKOS_INLINE_FUNCTION int SerialCopy::invoke( template struct TeamCopy { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const BViewType &B) { - return TeamCopyInternal::invoke(member, A.extent(0), A.data(), A.stride_0(), - B.data(), B.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B) { + return TeamCopyInternal::invoke(member, A.extent(0), A.data(), A.stride_0(), B.data(), B.stride_0()); } }; template struct TeamCopy { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const BViewType &B) { - return TeamCopyInternal::invoke(member, A.extent(0), A.data(), A.stride_0(), - B.data(), B.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B) { + return TeamCopyInternal::invoke(member, A.extent(0), A.data(), A.stride_0(), B.data(), B.stride_0()); } }; template struct TeamCopy { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::copy: AViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::copy: BViewType is not a Kokkos::View."); - static_assert(AViewType::rank == 2, - "KokkosBatched::copy: AViewType must have rank 2."); - static_assert(BViewType::rank == 2, - "KokkosBatched::copy: BViewType must have rank 2."); + static_assert(Kokkos::is_view::value, "KokkosBatched::copy: AViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::copy: BViewType is not a Kokkos::View."); + static_assert(AViewType::rank == 2, "KokkosBatched::copy: AViewType must have rank 2."); + static_assert(BViewType::rank == 2, "KokkosBatched::copy: BViewType must have rank 2."); // Check compatibility of dimensions at run time. if (A.extent(0) != B.extent(0) || A.extent(1) != B.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::copy: Dimensions of A and B do not match: A: %d x " - "%d, " - "B: %d x %d\n", - (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), - (int)B.extent(1)); -#else Kokkos::printf( "KokkosBatched::copy: Dimensions of A and B do not match: A: %d x " "%d, " "B: %d x %d\n", - (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), - (int)B.extent(1)); -#endif + (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), (int)B.extent(1)); return 1; } #endif if (A.extent(0) == 1) { - return TeamCopy::invoke( - member, Kokkos::subview(A, 0, Kokkos::ALL), - Kokkos::subview(B, 0, Kokkos::ALL)); + return TeamCopy::invoke(member, Kokkos::subview(A, 0, Kokkos::ALL), + Kokkos::subview(B, 0, Kokkos::ALL)); } - return TeamCopyInternal::invoke(member, A.extent(0), A.extent(1), A.data(), - A.stride_0(), A.stride_1(), B.data(), + return TeamCopyInternal::invoke(member, A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_0(), B.stride_1()); } }; @@ -189,46 +135,28 @@ struct TeamCopy { template struct TeamCopy { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::copy: AViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::copy: BViewType is not a Kokkos::View."); - static_assert(AViewType::rank == 2, - "KokkosBatched::copy: AViewType must have rank 2."); - static_assert(BViewType::rank == 2, - "KokkosBatched::copy: BViewType must have rank 2."); + static_assert(Kokkos::is_view::value, "KokkosBatched::copy: AViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::copy: BViewType is not a Kokkos::View."); + static_assert(AViewType::rank == 2, "KokkosBatched::copy: AViewType must have rank 2."); + static_assert(BViewType::rank == 2, "KokkosBatched::copy: BViewType must have rank 2."); // Check compatibility of dimensions at run time. if (A.extent(0) != B.extent(0) || A.extent(1) != B.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::copy: Dimensions of A and B do not match: A: %d x " - "%d, " - "B: %d x %d\n", - (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), - (int)B.extent(1)); -#else Kokkos::printf( "KokkosBatched::copy: Dimensions of A and B do not match: A: %d x " "%d, " "B: %d x %d\n", - (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), - (int)B.extent(1)); -#endif + (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), (int)B.extent(1)); return 1; } #endif if (A.extent(1) == 1) { - return TeamCopy::invoke( - member, Kokkos::subview(A, Kokkos::ALL, 0), - Kokkos::subview(B, Kokkos::ALL, 0)); + return TeamCopy::invoke(member, Kokkos::subview(A, Kokkos::ALL, 0), + Kokkos::subview(B, Kokkos::ALL, 0)); } - return TeamCopyInternal::invoke(member, A.extent(1), A.extent(0), A.data(), - A.stride_1(), A.stride_0(), B.data(), + return TeamCopyInternal::invoke(member, A.extent(1), A.extent(0), A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } }; @@ -240,68 +168,44 @@ struct TeamCopy { template struct TeamVectorCopy { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const BViewType &B) { - return TeamVectorCopyInternal::invoke(member, A.extent(0), A.data(), - A.stride_0(), B.data(), B.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B) { + return TeamVectorCopyInternal::invoke(member, A.extent(0), A.data(), A.stride_0(), B.data(), B.stride_0()); } }; template struct TeamVectorCopy { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const BViewType &B) { - return TeamVectorCopyInternal::invoke(member, A.extent(0), A.data(), - A.stride_0(), B.data(), B.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B) { + return TeamVectorCopyInternal::invoke(member, A.extent(0), A.data(), A.stride_0(), B.data(), B.stride_0()); } }; template struct TeamVectorCopy { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::copy: AViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::copy: BViewType is not a Kokkos::View."); - static_assert(AViewType::rank == 2, - "KokkosBatched::copy: AViewType must have rank 2."); - static_assert(BViewType::rank == 2, - "KokkosBatched::copy: BViewType must have rank 2."); + static_assert(Kokkos::is_view::value, "KokkosBatched::copy: AViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::copy: BViewType is not a Kokkos::View."); + static_assert(AViewType::rank == 2, "KokkosBatched::copy: AViewType must have rank 2."); + static_assert(BViewType::rank == 2, "KokkosBatched::copy: BViewType must have rank 2."); // Check compatibility of dimensions at run time. if (A.extent(0) != B.extent(0) || A.extent(1) != B.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::copy: Dimensions of A and B do not match: A: %d x " - "%d, " - "B: %d x %d\n", - (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), - (int)B.extent(1)); -#else Kokkos::printf( "KokkosBatched::copy: Dimensions of A and B do not match: A: %d x " "%d, " "B: %d x %d\n", - (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), - (int)B.extent(1)); -#endif + (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), (int)B.extent(1)); return 1; } #endif if (A.extent(0) == 1) { - return TeamVectorCopy::invoke( - member, Kokkos::subview(A, 0, Kokkos::ALL), - Kokkos::subview(B, 0, Kokkos::ALL)); + return TeamVectorCopy::invoke(member, Kokkos::subview(A, 0, Kokkos::ALL), + Kokkos::subview(B, 0, Kokkos::ALL)); } - return TeamVectorCopyInternal::invoke(member, A.extent(0), A.extent(1), - A.data(), A.stride_0(), A.stride_1(), + return TeamVectorCopyInternal::invoke(member, A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_0(), B.stride_1()); } }; @@ -309,46 +213,28 @@ struct TeamVectorCopy { template struct TeamVectorCopy { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::copy: AViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::copy: BViewType is not a Kokkos::View."); - static_assert(AViewType::rank == 2, - "KokkosBatched::copy: AViewType must have rank 2."); - static_assert(BViewType::rank == 2, - "KokkosBatched::copy: BViewType must have rank 2."); + static_assert(Kokkos::is_view::value, "KokkosBatched::copy: AViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::copy: BViewType is not a Kokkos::View."); + static_assert(AViewType::rank == 2, "KokkosBatched::copy: AViewType must have rank 2."); + static_assert(BViewType::rank == 2, "KokkosBatched::copy: BViewType must have rank 2."); // Check compatibility of dimensions at run time. if (A.extent(0) != B.extent(0) || A.extent(1) != B.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::copy: Dimensions of A and B do not match: A: %d x " - "%d, " - "B: %d x %d\n", - (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), - (int)B.extent(1)); -#else Kokkos::printf( "KokkosBatched::copy: Dimensions of A and B do not match: A: %d x " "%d, " "B: %d x %d\n", - (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), - (int)B.extent(1)); -#endif + (int)A.extent(0), (int)A.extent(1), (int)B.extent(0), (int)B.extent(1)); return 1; } #endif if (A.extent(1) == 1) { - return TeamVectorCopy::invoke( - member, Kokkos::subview(A, Kokkos::ALL, 0), - Kokkos::subview(B, Kokkos::ALL, 0)); + return TeamVectorCopy::invoke(member, Kokkos::subview(A, Kokkos::ALL, 0), + Kokkos::subview(B, Kokkos::ALL, 0)); } - return TeamVectorCopyInternal::invoke(member, A.extent(1), A.extent(0), - A.data(), A.stride_1(), A.stride_0(), + return TeamVectorCopyInternal::invoke(member, A.extent(1), A.extent(0), A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Copy_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Copy_Internal.hpp index ca59e4f79c6d..004c62646a41 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Copy_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Copy_Internal.hpp @@ -28,9 +28,8 @@ namespace KokkosBatched { struct SerialCopyInternal { template - KOKKOS_FORCEINLINE_FUNCTION static int invoke( - const int m, const ValueType *KOKKOS_RESTRICT A, const int as0, - /* */ ValueType *KOKKOS_RESTRICT B, const int bs0) { + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const int m, const ValueType *KOKKOS_RESTRICT A, const int as0, + /* */ ValueType *KOKKOS_RESTRICT B, const int bs0) { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif @@ -39,10 +38,9 @@ struct SerialCopyInternal { return 0; } template - KOKKOS_FORCEINLINE_FUNCTION static int invoke( - const int m, const int n, const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, - /* */ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const int m, const int n, const ValueType *KOKKOS_RESTRICT A, + const int as0, const int as1, + /* */ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { if (as1 < as0) for (int i = 0; i < m; ++i) invoke(n, A + i * as0, as1, B + i * bs0, bs1); else @@ -56,30 +54,23 @@ struct SerialCopyInternal { /// ================== struct TeamCopyInternal { template - KOKKOS_FORCEINLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const ValueType *KOKKOS_RESTRICT A, - const int as0, - /* */ ValueType *KOKKOS_RESTRICT B, const int bs0) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), - [&](const int &i) { B[i * bs0] = A[i * as0]; }); + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const int m, + const ValueType *KOKKOS_RESTRICT A, const int as0, + /* */ ValueType *KOKKOS_RESTRICT B, const int bs0) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int &i) { B[i * bs0] = A[i * as0]; }); // member.team_barrier(); return 0; } template - KOKKOS_FORCEINLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const int n, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - /* */ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + /* */ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { if (m >= n) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, m), [&](const int &i) { - SerialCopyInternal::invoke(n, A + i * as0, as1, B + i * bs0, bs1); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), + [&](const int &i) { SerialCopyInternal::invoke(n, A + i * as0, as1, B + i * bs0, bs1); }); } else { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, n), [&](const int &j) { - SerialCopyInternal::invoke(m, A + j * as1, as0, B + j * bs1, bs0); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), + [&](const int &j) { SerialCopyInternal::invoke(m, A + j * as1, as0, B + j * bs1, bs0); }); } // member.team_barrier(); return 0; @@ -91,36 +82,27 @@ struct TeamCopyInternal { /// ======================== struct TeamVectorCopyInternal { template - KOKKOS_FORCEINLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const ValueType *KOKKOS_RESTRICT A, - const int as0, - /* */ ValueType *KOKKOS_RESTRICT B, const int bs0) { - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), - [&](const int &i) { B[i * bs0] = A[i * as0]; }); + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const int m, + const ValueType *KOKKOS_RESTRICT A, const int as0, + /* */ ValueType *KOKKOS_RESTRICT B, const int bs0) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int &i) { B[i * bs0] = A[i * as0]; }); // member.team_barrier(); return 0; } template - KOKKOS_FORCEINLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const int n, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - /* */ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + /* */ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { if (as0 > as1) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, m), [&](const int &i) { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n), - [&](const int &j) { - B[i * bs0 + j * bs1] = A[i * as0 + j * as1]; - }); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int &i) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n), + [&](const int &j) { B[i * bs0 + j * bs1] = A[i * as0 + j * as1]; }); + }); } else { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, m), [&](const int &i) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), - [&](const int &j) { - B[i * bs0 + j * bs1] = A[i * as0 + j * as1]; - }); - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, m), [&](const int &i) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), + [&](const int &j) { B[i * bs0 + j * bs1] = A[i * as0 + j * as1]; }); + }); } // member.team_barrier(); return 0; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Dot_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Dot_Internal.hpp index 854069289ef6..48d1b1f1acd8 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Dot_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Dot_Internal.hpp @@ -31,10 +31,9 @@ struct SerialDotInternal { // i \in [0,m) // C = conj(A(:))*B(:) template - KOKKOS_FORCEINLINE_FUNCTION static int invoke( - const int m, const ValueType *KOKKOS_RESTRICT A, const int as0, - const ValueType *KOKKOS_RESTRICT B, const int bs0, - /* */ MagnitudeType *KOKKOS_RESTRICT C) { + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const int m, const ValueType *KOKKOS_RESTRICT A, const int as0, + const ValueType *KOKKOS_RESTRICT B, const int bs0, + /* */ MagnitudeType *KOKKOS_RESTRICT C) { using ats = Kokkos::ArithTraits; C[0] = ValueType(0); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) @@ -50,13 +49,11 @@ struct SerialDotInternal { // j \in [0,n), i \in [0,m) // C(j) = conj(A(:,j))*B(:,j) template - KOKKOS_INLINE_FUNCTION static int invoke( - const int m, const int n, const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, const ValueType *KOKKOS_RESTRICT B, - const int bs0, const int bs1, - /* */ MagnitudeType *KOKKOS_RESTRICT C, const int cs) { - for (int j = 0; j < n; ++j) - invoke(m, A + j * as1, as0, B + j * bs1, bs0, C + j * cs); + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const ValueType *KOKKOS_RESTRICT A, const int as0, + const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, + const int bs1, + /* */ MagnitudeType *KOKKOS_RESTRICT C, const int cs) { + for (int j = 0; j < n; ++j) invoke(m, A + j * as1, as0, B + j * bs1, bs0, C + j * cs); return 0; } }; @@ -69,10 +66,10 @@ struct SerialDotInternal { // C = conj(A(:))*B(:) struct TeamDotInternal { template - KOKKOS_FORCEINLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const ValueType *KOKKOS_RESTRICT A, - const int as0, const ValueType *KOKKOS_RESTRICT B, const int bs0, - /* */ MagnitudeType *KOKKOS_RESTRICT C) { + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const int m, + const ValueType *KOKKOS_RESTRICT A, const int as0, + const ValueType *KOKKOS_RESTRICT B, const int bs0, + /* */ MagnitudeType *KOKKOS_RESTRICT C) { using ats = Kokkos::ArithTraits; ValueType t(0); Kokkos::parallel_reduce( @@ -89,11 +86,10 @@ struct TeamDotInternal { // j \in [0,n), i \in [0,m) // C(j) = conj(A(:,j))*B(:,j) template - KOKKOS_FORCEINLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const int n, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1, - /* */ MagnitudeType *KOKKOS_RESTRICT C, const int cs) { + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1, + /* */ MagnitudeType *KOKKOS_RESTRICT C, const int cs) { using ats = Kokkos::ArithTraits; Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { ValueType t(0); @@ -117,10 +113,10 @@ struct TeamDotInternal { // C = conj(A(:))*B(:) struct TeamVectorDotInternal { template - KOKKOS_FORCEINLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const ValueType *KOKKOS_RESTRICT A, - const int as0, const ValueType *KOKKOS_RESTRICT B, const int bs0, - /* */ MagnitudeType *KOKKOS_RESTRICT C) { + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const int m, + const ValueType *KOKKOS_RESTRICT A, const int as0, + const ValueType *KOKKOS_RESTRICT B, const int bs0, + /* */ MagnitudeType *KOKKOS_RESTRICT C) { using ats = Kokkos::ArithTraits; ValueType t(0); Kokkos::parallel_reduce( @@ -137,11 +133,10 @@ struct TeamVectorDotInternal { // j \in [0,n), i \in [0,m) // C(j) = conj(A(:,j))*B(:,j) template - KOKKOS_FORCEINLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const int n, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1, - /* */ MagnitudeType *KOKKOS_RESTRICT C, const int cs) { + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1, + /* */ MagnitudeType *KOKKOS_RESTRICT C, const int cs) { using ats = Kokkos::ArithTraits; Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { ValueType t(0); @@ -167,122 +162,71 @@ struct TeamVectorDotInternal { template <> struct SerialDot { template - KOKKOS_INLINE_FUNCTION static int invoke(const XViewType &X, - const YViewType &Y, - const NormViewType &dot) { + KOKKOS_INLINE_FUNCTION static int invoke(const XViewType &X, const YViewType &Y, const NormViewType &dot) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: XViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: YViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: NormViewType is not a Kokkos::View."); - static_assert(XViewType::rank == 2, - "KokkosBatched::dot: XViewType must have rank 2."); - static_assert(YViewType::rank == 2, - "KokkosBatched::dot: YViewType must have rank 2."); - static_assert(NormViewType::rank == 1, - "KokkosBatched::dot: NormViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: XViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: YViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: NormViewType is not a Kokkos::View."); + static_assert(XViewType::rank == 2, "KokkosBatched::dot: XViewType must have rank 2."); + static_assert(YViewType::rank == 2, "KokkosBatched::dot: YViewType must have rank 2."); + static_assert(NormViewType::rank == 1, "KokkosBatched::dot: NormViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); return 1; } if (X.extent(1) != dot.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::dot: Second dimension of X and alpha do not match: " - "X: " - "%d x %d, dot: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); -#else Kokkos::printf( "KokkosBatched::dot: Second dimension of X and alpha do not match: " "X: " "%d x %d, dot: %d\n", (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); -#endif return 1; } #endif - return SerialDotInternal::template invoke< - typename XViewType::non_const_value_type, - typename NormViewType::non_const_value_type>( - X.extent(0), X.extent(1), X.data(), X.stride_0(), X.stride_1(), - Y.data(), Y.stride_0(), Y.stride_1(), dot.data(), dot.stride_0()); + return SerialDotInternal::template invoke( + X.extent(0), X.extent(1), X.data(), X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), Y.stride_1(), + dot.data(), dot.stride_0()); } }; template <> struct SerialDot { template - KOKKOS_INLINE_FUNCTION static int invoke(const XViewType &X, - const YViewType &Y, - const NormViewType &dot) { + KOKKOS_INLINE_FUNCTION static int invoke(const XViewType &X, const YViewType &Y, const NormViewType &dot) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: XViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: YViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: NormViewType is not a Kokkos::View."); - static_assert(XViewType::rank == 2, - "KokkosBatched::dot: XViewType must have rank 2."); - static_assert(YViewType::rank == 2, - "KokkosBatched::dot: YViewType must have rank 2."); - static_assert(NormViewType::rank == 1, - "KokkosBatched::dot: NormViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: XViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: YViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: NormViewType is not a Kokkos::View."); + static_assert(XViewType::rank == 2, "KokkosBatched::dot: XViewType must have rank 2."); + static_assert(YViewType::rank == 2, "KokkosBatched::dot: YViewType must have rank 2."); + static_assert(NormViewType::rank == 1, "KokkosBatched::dot: NormViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); return 1; } if (X.extent(0) != dot.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::dot: First dimension of X and alpha do not match: X: " - "%d x %d, dot: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); -#else Kokkos::printf( "KokkosBatched::dot: First dimension of X and alpha do not match: X: " "%d x %d, dot: %d\n", (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); -#endif return 1; } #endif - return SerialDotInternal::template invoke< - typename XViewType::non_const_value_type, - typename NormViewType::non_const_value_type>( - X.extent(1), X.extent(0), X.data(), X.stride_1(), X.stride_0(), - Y.data(), Y.stride_1(), Y.stride_0(), dot.data(), dot.stride_0()); + return SerialDotInternal::template invoke( + X.extent(1), X.extent(0), X.data(), X.stride_1(), X.stride_0(), Y.data(), Y.stride_1(), Y.stride_0(), + dot.data(), dot.stride_0()); } }; @@ -293,140 +237,87 @@ struct SerialDot { template struct TeamDot { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const XViewType &X, - const YViewType &Y, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const XViewType &X, const YViewType &Y, const NormViewType &dot) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: XViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: YViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: NormViewType is not a Kokkos::View."); - static_assert(XViewType::rank == 2, - "KokkosBatched::dot: XViewType must have rank 2."); - static_assert(YViewType::rank == 2, - "KokkosBatched::dot: YViewType must have rank 2."); - static_assert(NormViewType::rank == 1, - "KokkosBatched::dot: NormViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: XViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: YViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: NormViewType is not a Kokkos::View."); + static_assert(XViewType::rank == 2, "KokkosBatched::dot: XViewType must have rank 2."); + static_assert(YViewType::rank == 2, "KokkosBatched::dot: YViewType must have rank 2."); + static_assert(NormViewType::rank == 1, "KokkosBatched::dot: NormViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); return 1; } if (X.extent(1) != dot.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::dot: Second dimension of X and alpha do not match: " - "X: " - "%d x %d, dot: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); -#else Kokkos::printf( "KokkosBatched::dot: Second dimension of X and alpha do not match: " "X: " "%d x %d, dot: %d\n", (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); -#endif return 1; } #endif if (X.extent(1) == 1) { - dot(0) = KokkosBlas::Experimental::dot( - member, Kokkos::subview(X, Kokkos::ALL, 0), - Kokkos::subview(Y, Kokkos::ALL, 0)); + dot(0) = + KokkosBlas::Experimental::dot(member, Kokkos::subview(X, Kokkos::ALL, 0), Kokkos::subview(Y, Kokkos::ALL, 0)); return 0; } - return TeamDotInternal::template invoke< - MemberType, typename XViewType::non_const_value_type, - typename NormViewType::non_const_value_type>( - member, X.extent(0), X.extent(1), X.data(), X.stride_0(), X.stride_1(), - Y.data(), Y.stride_0(), Y.stride_1(), dot.data(), dot.stride_0()); + return TeamDotInternal::template invoke( + member, X.extent(0), X.extent(1), X.data(), X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), Y.stride_1(), + dot.data(), dot.stride_0()); } }; template struct TeamDot { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const XViewType &X, - const YViewType &Y, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const XViewType &X, const YViewType &Y, const NormViewType &dot) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: XViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: YViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: NormViewType is not a Kokkos::View."); - static_assert(XViewType::rank == 2, - "KokkosBatched::dot: XViewType must have rank 2."); - static_assert(YViewType::rank == 2, - "KokkosBatched::dot: YViewType must have rank 2."); - static_assert(NormViewType::rank == 1, - "KokkosBatched::dot: NormViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: XViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: YViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: NormViewType is not a Kokkos::View."); + static_assert(XViewType::rank == 2, "KokkosBatched::dot: XViewType must have rank 2."); + static_assert(YViewType::rank == 2, "KokkosBatched::dot: YViewType must have rank 2."); + static_assert(NormViewType::rank == 1, "KokkosBatched::dot: NormViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); return 1; } if (X.extent(0) != dot.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::dot: First dimension of X and alpha do not match: X: " - "%d x %d, dot: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); -#else Kokkos::printf( "KokkosBatched::dot: First dimension of X and alpha do not match: X: " "%d x %d, dot: %d\n", (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); -#endif return 1; } #endif if (X.extent(0) == 1) { - dot(0) = KokkosBlas::Experimental::dot( - member, Kokkos::subview(X, 0, Kokkos::ALL), - Kokkos::subview(Y, 0, Kokkos::ALL)); + dot(0) = + KokkosBlas::Experimental::dot(member, Kokkos::subview(X, 0, Kokkos::ALL), Kokkos::subview(Y, 0, Kokkos::ALL)); return 0; } - return TeamDotInternal::template invoke< - MemberType, typename XViewType::non_const_value_type, - typename NormViewType::non_const_value_type>( - member, X.extent(1), X.extent(0), X.data(), X.stride_1(), X.stride_0(), - Y.data(), Y.stride_1(), Y.stride_0(), dot.data(), dot.stride_0()); + return TeamDotInternal::template invoke( + member, X.extent(1), X.extent(0), X.data(), X.stride_1(), X.stride_0(), Y.data(), Y.stride_1(), Y.stride_0(), + dot.data(), dot.stride_0()); } }; @@ -437,140 +328,87 @@ struct TeamDot { template struct TeamVectorDot { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const XViewType &X, - const YViewType &Y, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const XViewType &X, const YViewType &Y, const NormViewType &dot) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: XViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: YViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: NormViewType is not a Kokkos::View."); - static_assert(XViewType::rank == 2, - "KokkosBatched::dot: XViewType must have rank 2."); - static_assert(YViewType::rank == 2, - "KokkosBatched::dot: YViewType must have rank 2."); - static_assert(NormViewType::rank == 1, - "KokkosBatched::dot: NormViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: XViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: YViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: NormViewType is not a Kokkos::View."); + static_assert(XViewType::rank == 2, "KokkosBatched::dot: XViewType must have rank 2."); + static_assert(YViewType::rank == 2, "KokkosBatched::dot: YViewType must have rank 2."); + static_assert(NormViewType::rank == 1, "KokkosBatched::dot: NormViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); return 1; } if (X.extent(1) != dot.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::dot: Second dimension of X and alpha do not match: " - "X: " - "%d x %d, dot: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); -#else Kokkos::printf( "KokkosBatched::dot: Second dimension of X and alpha do not match: " "X: " "%d x %d, dot: %d\n", (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); -#endif return 1; } #endif if (X.extent(1) == 1) { - dot(0) = KokkosBlas::Experimental::dot( - member, Kokkos::subview(X, Kokkos::ALL, 0), - Kokkos::subview(Y, Kokkos::ALL, 0)); + dot(0) = + KokkosBlas::Experimental::dot(member, Kokkos::subview(X, Kokkos::ALL, 0), Kokkos::subview(Y, Kokkos::ALL, 0)); return 0; } - return TeamVectorDotInternal::template invoke< - MemberType, typename XViewType::non_const_value_type, - typename NormViewType::non_const_value_type>( - member, X.extent(0), X.extent(1), X.data(), X.stride_0(), X.stride_1(), - Y.data(), Y.stride_0(), Y.stride_1(), dot.data(), dot.stride_0()); + return TeamVectorDotInternal::template invoke( + member, X.extent(0), X.extent(1), X.data(), X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), Y.stride_1(), + dot.data(), dot.stride_0()); } }; template struct TeamVectorDot { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const XViewType &X, - const YViewType &Y, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const XViewType &X, const YViewType &Y, const NormViewType &dot) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: XViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: YViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::dot: NormViewType is not a Kokkos::View."); - static_assert(XViewType::rank == 2, - "KokkosBatched::dot: XViewType must have rank 2."); - static_assert(YViewType::rank == 2, - "KokkosBatched::dot: YViewType must have rank 2."); - static_assert(NormViewType::rank == 1, - "KokkosBatched::dot: NormViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: XViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: YViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::dot: NormViewType is not a Kokkos::View."); + static_assert(XViewType::rank == 2, "KokkosBatched::dot: XViewType must have rank 2."); + static_assert(YViewType::rank == 2, "KokkosBatched::dot: YViewType must have rank 2."); + static_assert(NormViewType::rank == 1, "KokkosBatched::dot: NormViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); return 1; } if (X.extent(0) != dot.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::dot: First dimension of X and alpha do not match: X: " - "%d x %d, dot: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); -#else Kokkos::printf( "KokkosBatched::dot: First dimension of X and alpha do not match: X: " "%d x %d, dot: %d\n", (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); -#endif return 1; } #endif if (X.extent(0) == 1) { - dot(0) = KokkosBlas::Experimental::dot( - member, Kokkos::subview(X, 0, Kokkos::ALL), - Kokkos::subview(Y, 0, Kokkos::ALL)); + dot(0) = + KokkosBlas::Experimental::dot(member, Kokkos::subview(X, 0, Kokkos::ALL), Kokkos::subview(Y, 0, Kokkos::ALL)); return 0; } - return TeamVectorDotInternal::template invoke< - MemberType, typename XViewType::non_const_value_type, - typename NormViewType::non_const_value_type>( - member, X.extent(1), X.extent(0), X.data(), X.stride_1(), X.stride_0(), - Y.data(), Y.stride_1(), Y.stride_0(), dot.data(), dot.stride_0()); + return TeamVectorDotInternal::template invoke( + member, X.extent(1), X.extent(0), X.data(), X.stride_1(), X.stride_0(), Y.data(), Y.stride_1(), Y.stride_0(), + dot.data(), dot.stride_0()); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Impl.hpp index 49a7184e3948..8ca3b09e5977 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Impl.hpp @@ -26,38 +26,28 @@ namespace KokkosBatched { /// /// Serial Impl /// =========== -template -KOKKOS_INLINE_FUNCTION int SerialEigendecomposition::invoke( - const AViewType &A, const EViewType &er, const EViewType &ei, - const UViewType &UL, const UViewType &UR, const WViewType &W) { +template +KOKKOS_INLINE_FUNCTION int SerialEigendecomposition::invoke(const AViewType &A, const EViewType &er, + const EViewType &ei, const UViewType &UL, + const UViewType &UR, const WViewType &W) { /// view checking const int m = A.extent(0); assert(m == int(A.extent(1)) && "Eigendecomposition: A is not square"); - assert(m == int(er.extent(0)) && - "Eigendecomposition: Length of er does not match to A's dimension"); - assert(m == int(ei.extent(0)) && - "Eigendecomposition: Length of ei does not match to A's dimension"); - assert(m == int(UL.extent(0)) && - "Eigendecomposition: Length of UL does not match to A's dimension"); - assert(m == int(UL.extent(1)) && - "Eigendecomposition: Width of UL does not match to A's dimension"); - assert(m == int(UR.extent(0)) && - "Eigendecomposition: Length of UR does not match to A's dimension"); - assert(m == int(UR.extent(1)) && - "Eigendecomposition: Width of UR does not match to A's dimension"); + assert(m == int(er.extent(0)) && "Eigendecomposition: Length of er does not match to A's dimension"); + assert(m == int(ei.extent(0)) && "Eigendecomposition: Length of ei does not match to A's dimension"); + assert(m == int(UL.extent(0)) && "Eigendecomposition: Length of UL does not match to A's dimension"); + assert(m == int(UL.extent(1)) && "Eigendecomposition: Width of UL does not match to A's dimension"); + assert(m == int(UR.extent(0)) && "Eigendecomposition: Length of UR does not match to A's dimension"); + assert(m == int(UR.extent(1)) && "Eigendecomposition: Width of UR does not match to A's dimension"); // assert(int(W.extent(0)) >= int(2*m*m+5*m) && "Eigendecomposition: workspace // size is too small"); - assert(int(W.stride(0)) == int(1) && - "Eigendecomposition: Provided workspace is not contiguous"); + assert(int(W.stride(0)) == int(1) && "Eigendecomposition: Provided workspace is not contiguous"); /// static assert A,er,ei,UL,UR,W has the same value_type /// static assert all views have the same memory space return m ? SerialEigendecompositionInternal ::invoke( - A.extent(0), A.data(), A.stride(0), A.stride(1), er.data(), - er.stride(0), ei.data(), ei.stride(0), UL.data(), UL.stride(0), - UL.stride(1), UR.data(), UR.stride(0), UR.stride(1), W.data(), - W.extent(0)) + A.extent(0), A.data(), A.stride(0), A.stride(1), er.data(), er.stride(0), ei.data(), ei.stride(0), + UL.data(), UL.stride(0), UL.stride(1), UR.data(), UR.stride(0), UR.stride(1), W.data(), W.extent(0)) : 0; } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp index c857de19c25d..b1cfb6ef253d 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp @@ -61,11 +61,10 @@ struct SerialEigendecompositionInternal { /// [out]w, [in]wlen /// Workspace template - KOKKOS_INLINE_FUNCTION static int device_invoke( - const int m, RealType* A, const int as0, const int as1, RealType* er, - const int ers, RealType* ei, const int eis, RealType* UL, const int uls0, - const int uls1, RealType* UR, const int urs0, const int urs1, RealType* w, - const int wlen) { + KOKKOS_INLINE_FUNCTION static int device_invoke(const int m, RealType* A, const int as0, const int as1, RealType* er, + const int ers, RealType* ei, const int eis, RealType* UL, + const int uls0, const int uls1, RealType* UR, const int urs0, + const int urs1, RealType* w, const int wlen) { /// until debugging is done, comment out the code /// testing happens only for TPLs on host. static_assert(false, @@ -336,14 +335,10 @@ struct SerialEigendecompositionInternal { } template - inline static int host_invoke(const int m, RealType* A, const int as0, - const int as1, RealType* er, const int ers, - RealType* ei, const int eis, RealType* UL, - const int uls0, const int uls1, RealType* UR, - const int urs0, const int urs1, RealType* w, - const int wlen) { -#if defined(__KOKKOSBATCHED_ENABLE_LAPACKE__) || \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) + inline static int host_invoke(const int m, RealType* A, const int as0, const int as1, RealType* er, const int ers, + RealType* ei, const int eis, RealType* UL, const int uls0, const int uls1, RealType* UR, + const int urs0, const int urs1, RealType* w, const int wlen) { +#if defined(__KOKKOSBATCHED_ENABLE_LAPACKE__) || defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) int matrix_layout(0), lda(0), uls(0), urs(0); if (as0 == 1) { assert(uls0 == 1 && "UL is not column major"); @@ -365,33 +360,29 @@ struct SerialEigendecompositionInternal { } assert(matrix_layout != 0 && "Either stride of A is not unit"); if (std::is_same::value) { - LAPACKE_sgeev(matrix_layout, 'V', 'V', m, (float*)A, lda, (float*)er, - (float*)ei, (float*)UL, uls, (float*)UR, urs); + LAPACKE_sgeev(matrix_layout, 'V', 'V', m, (float*)A, lda, (float*)er, (float*)ei, (float*)UL, uls, (float*)UR, + urs); } else if (std::is_same::value) { - LAPACKE_dgeev(matrix_layout, 'V', 'V', m, (double*)A, lda, (double*)er, - (double*)ei, (double*)UL, uls, (double*)UR, urs); + LAPACKE_dgeev(matrix_layout, 'V', 'V', m, (double*)A, lda, (double*)er, (double*)ei, (double*)UL, uls, + (double*)UR, urs); } else { // no complex is needed for this moment assert(false && "complex type is not supported"); } #else - device_invoke(m, A, as0, as1, er, ers, ei, eis, UL, uls0, uls1, UR, urs0, - urs1, w, wlen); + device_invoke(m, A, as0, as1, er, ers, ei, eis, UL, uls0, uls1, UR, urs0, urs1, w, wlen); #endif return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke( - const int m, RealType* A, const int as0, const int as1, RealType* er, - const int ers, RealType* ei, const int eis, RealType* UL, const int uls0, - const int uls1, RealType* UR, const int urs0, const int urs1, RealType* w, - const int wlen) { - KOKKOS_IF_ON_HOST((host_invoke(m, A, as0, as1, er, ers, ei, eis, UL, uls0, - uls1, UR, urs0, urs1, w, wlen);)) - KOKKOS_IF_ON_DEVICE((device_invoke(m, A, as0, as1, er, ers, ei, eis, UL, - uls0, uls1, UR, urs0, urs1, w, wlen);)) + KOKKOS_INLINE_FUNCTION static int invoke(const int m, RealType* A, const int as0, const int as1, RealType* er, + const int ers, RealType* ei, const int eis, RealType* UL, const int uls0, + const int uls1, RealType* UR, const int urs0, const int urs1, RealType* w, + const int wlen) { + KOKKOS_IF_ON_HOST((host_invoke(m, A, as0, as1, er, ers, ei, eis, UL, uls0, uls1, UR, urs0, urs1, w, wlen);)) + KOKKOS_IF_ON_DEVICE((device_invoke(m, A, as0, as1, er, ers, ei, eis, UL, uls0, uls1, UR, urs0, urs1, w, wlen);)) return 0; } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Impl.hpp index a05ee11965be..97f68d63de0c 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Impl.hpp @@ -28,37 +28,28 @@ namespace KokkosBatched { /// ========= template -template -KOKKOS_INLINE_FUNCTION int TeamVectorEigendecomposition::invoke( - const MemberType &member, const AViewType &A, const EViewType &er, - const EViewType &ei, const UViewType &UL, const UViewType &UR, - const WViewType &W) { +template +KOKKOS_INLINE_FUNCTION int TeamVectorEigendecomposition::invoke(const MemberType &member, + const AViewType &A, const EViewType &er, + const EViewType &ei, const UViewType &UL, + const UViewType &UR, const WViewType &W) { /// view checking const int m = A.extent(0); assert(m == A.extent(1) && "Eigendecomposition: A is not square"); - assert(m == er.extent(0) && - "Eigendecomposition: Length of er does not match to A's dimension"); - assert(m == ei.extent(0) && - "Eigendecomposition: Length of ei does not match to A's dimension"); - assert(m == UL.extent(0) && - "Eigendecomposition: Length of UL does not match to A's dimension"); - assert(m == UL.extent(1) && - "Eigendecomposition: Width of UL does not match to A's dimension"); - assert(m == UR.extent(0) && - "Eigendecomposition: Length of UR does not match to A's dimension"); - assert(m == UR.extent(1) && - "Eigendecomposition: Width of UR does not match to A's dimension"); + assert(m == er.extent(0) && "Eigendecomposition: Length of er does not match to A's dimension"); + assert(m == ei.extent(0) && "Eigendecomposition: Length of ei does not match to A's dimension"); + assert(m == UL.extent(0) && "Eigendecomposition: Length of UL does not match to A's dimension"); + assert(m == UL.extent(1) && "Eigendecomposition: Width of UL does not match to A's dimension"); + assert(m == UR.extent(0) && "Eigendecomposition: Length of UR does not match to A's dimension"); + assert(m == UR.extent(1) && "Eigendecomposition: Width of UR does not match to A's dimension"); // assert(W.extent(0) >= (2*m*m+5*m) && "Eigendecomposition: workspace size is // too small"); - assert(W.stride(0) == 1 && - "Eigendecomposition: Provided workspace is not contiguous"); + assert(W.stride(0) == 1 && "Eigendecomposition: Provided workspace is not contiguous"); - return m ? TeamVectorEigendecompositionInternal ::invoke( - member, A.extent(0), A.data(), A.stride(0), A.stride(1), - er.data(), er.stride(0), ei.data(), ei.stride(0), UL.data(), - UL.stride(0), UL.stride(1), UR.data(), UR.stride(0), - UR.stride(1), W.data(), W.extent(0)) + return m ? TeamVectorEigendecompositionInternal ::invoke(member, A.extent(0), A.data(), A.stride(0), A.stride(1), + er.data(), er.stride(0), ei.data(), ei.stride(0), UL.data(), + UL.stride(0), UL.stride(1), UR.data(), UR.stride(0), + UR.stride(1), W.data(), W.extent(0)) : 0; } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Internal.hpp index 50324338ee40..567bbd3ad569 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Internal.hpp @@ -40,11 +40,11 @@ namespace KokkosBatched { struct TeamVectorEigendecompositionInternal { template - KOKKOS_INLINE_FUNCTION static int device_invoke( - const MemberType &member, const int m, RealType *A, const int as0, - const int as1, RealType *er, const int ers, RealType *ei, const int eis, - RealType *UL, const int uls0, const int uls1, RealType *UR, - const int urs0, const int urs1, RealType *w, const int wlen) { + KOKKOS_INLINE_FUNCTION static int device_invoke(const MemberType &member, const int m, RealType *A, const int as0, + const int as1, RealType *er, const int ers, RealType *ei, + const int eis, RealType *UL, const int uls0, const int uls1, + RealType *UR, const int urs0, const int urs1, RealType *w, + const int wlen) { /// not yet implemented return 0; } @@ -74,13 +74,11 @@ struct TeamVectorEigendecompositionInternal { /// [out]w, [in]wlen /// Workspace template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int m, RealType *A, const int as0, - const int as1, RealType *er, const int ers, RealType *ei, const int eis, - RealType *UL, const int uls0, const int uls1, RealType *UR, - const int urs0, const int urs1, RealType *w, const int wlen) { - static_assert(false, - "TeamVector eigendecomposition is not implemented yet."); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, RealType *A, const int as0, + const int as1, RealType *er, const int ers, RealType *ei, const int eis, + RealType *UL, const int uls0, const int uls1, RealType *UR, const int urs0, + const int urs1, RealType *w, const int wlen) { + static_assert(false, "TeamVector eigendecomposition is not implemented yet."); /* // DO NOT USE // diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigenvalue_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigenvalue_Serial_Internal.hpp index ae4cf10634af..0ac8ed3859a8 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigenvalue_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Eigenvalue_Serial_Internal.hpp @@ -61,11 +61,9 @@ struct SerialEigenvalueInternal { /// returns -1. template KOKKOS_INLINE_FUNCTION static int invoke(const int m, - /* */ RealType *H, const int hs0, - const int hs1, + /* */ RealType *H, const int hs0, const int hs1, /* */ RealType *er, const int ers, - /* */ RealType *ei, const int eis, - const bool restart = false, + /* */ RealType *ei, const int eis, const bool restart = false, const int user_max_iteration = -1) { typedef RealType real_type; typedef Kokkos::ArithTraits ats; @@ -94,8 +92,7 @@ struct SerialEigenvalueInternal { /// compute eigenvalues from the characteristic determinant equation bool is_complex; Kokkos::complex lambda1, lambda2; - SerialWilkinsonShiftInternal::invoke(H[0], H[hs1], H[hs0], H[hs], - &lambda1, &lambda2, &is_complex); + SerialWilkinsonShiftInternal::invoke(H[0], H[hs1], H[hs0], H[hs], &lambda1, &lambda2, &is_complex); er[0] = lambda1.real(); ei[0] = lambda1.imag(); er[1] = lambda2.real(); @@ -150,9 +147,8 @@ struct SerialEigenvalueInternal { bool is_complex; real_type *sub2x2 = H + (mend - 2) * hs; if (2 == mdiff) { - SerialWilkinsonShiftInternal::invoke( - sub2x2[0], sub2x2[hs1], sub2x2[hs0], sub2x2[hs], &lambda1, - &lambda2, &is_complex); + SerialWilkinsonShiftInternal::invoke(sub2x2[0], sub2x2[hs1], sub2x2[hs0], sub2x2[hs], &lambda1, + &lambda2, &is_complex); sub2x2[hs0] = zero; /// eigenvalues are from wilkinson shift @@ -161,13 +157,10 @@ struct SerialEigenvalueInternal { er[(mbeg + 1) * ers] = lambda2.real(); ei[(mbeg + 1) * eis] = lambda2.imag(); } else { - SerialWilkinsonShiftInternal::invoke( - sub2x2[0], sub2x2[hs1], sub2x2[hs0], sub2x2[hs], &lambda1, - &lambda2, &is_complex); + SerialWilkinsonShiftInternal::invoke(sub2x2[0], sub2x2[hs1], sub2x2[hs0], sub2x2[hs], &lambda1, + &lambda2, &is_complex); - SerialFrancisInternal::invoke(0, mdiff, mdiff, H + hs * mbeg, - hs0, hs1, lambda1, lambda2, - is_complex); + SerialFrancisInternal::invoke(0, mdiff, mdiff, H + hs * mbeg, hs0, hs1, lambda1, lambda2, is_complex); /* */ auto &val1 = *(sub2x2 + hs0); /* */ auto &val2 = *(sub2x2 - hs1); const auto abs_val1 = ats::abs(val1); @@ -217,18 +210,15 @@ struct SerialEigenvalueInternal { /// complex interface template - KOKKOS_INLINE_FUNCTION static int invoke( - const int m, - /* */ RealType *H, const int hs0, const int hs1, - /* */ Kokkos::complex *e, const int es, - const int max_iteration = 300, - const RealType user_tolerence = RealType(-1), - const bool restart = false) { + KOKKOS_INLINE_FUNCTION static int invoke(const int m, + /* */ RealType *H, const int hs0, const int hs1, + /* */ Kokkos::complex *e, const int es, + const int max_iteration = 300, const RealType user_tolerence = RealType(-1), + const bool restart = false) { RealType *er = (RealType *)e; RealType *ei = er + 1; const int two_es = 2 * es; - return invoke(m, H, hs0, hs1, er, two_es, ei, two_es, user_tolerence, - restart, max_iteration); + return invoke(m, H, hs0, hs1, er, two_es, ei, two_es, user_tolerence, restart, max_iteration); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_FindAmax_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_FindAmax_Internal.hpp index ffe911d688fb..42dc9480144d 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_FindAmax_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_FindAmax_Internal.hpp @@ -27,9 +27,7 @@ namespace KokkosBatched { /// ===================== struct SerialFindAmaxInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const int m, - const ValueType *KOKKOS_RESTRICT A, - const int as0, + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ValueType *KOKKOS_RESTRICT A, const int as0, /**/ IntType *KOKKOS_RESTRICT idx) { ValueType max_val(A[0]); IntType val_loc(0); @@ -50,14 +48,11 @@ struct SerialFindAmaxInternal { /// ======================== struct TeamVectorFindAmaxInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, - const ValueType *KOKKOS_RESTRICT A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const ValueType *KOKKOS_RESTRICT A, const int as0, /**/ IntType *KOKKOS_RESTRICT idx) { if (m > 0) { - using reducer_value_type = - typename Kokkos::MaxLoc::value_type; + using reducer_value_type = typename Kokkos::MaxLoc::value_type; reducer_value_type value{}; Kokkos::MaxLoc reducer_value(value); Kokkos::parallel_reduce( diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Francis_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Francis_Serial_Internal.hpp index 21587f4481d6..e303cafd1fe1 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Francis_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Francis_Serial_Internal.hpp @@ -32,12 +32,11 @@ namespace KokkosBatched { /// struct SerialFrancisInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const int mbeg, const int mend, const int morg, - /* */ ValueType *HH, const int hs0, const int hs1, - const Kokkos::complex lambda1, - const Kokkos::complex lambda2, const bool is_complex, - /* */ Kokkos::pair *GG, const bool request_schur) { + KOKKOS_INLINE_FUNCTION static int invoke(const int mbeg, const int mend, const int morg, + /* */ ValueType *HH, const int hs0, const int hs1, + const Kokkos::complex lambda1, + const Kokkos::complex lambda2, const bool is_complex, + /* */ Kokkos::pair *GG, const bool request_schur) { typedef ValueType value_type; const int hs = hs0 + hs1; @@ -73,25 +72,21 @@ struct SerialFrancisInternal { // this needs m>=3 // v = M e_1 = (H*H - 2 Re(lambda) H + |lambda|^2 I)e_1 value_type s, t; - const value_type h00 = H[0 * hs0 + 0 * hs1], h01 = H[0 * hs0 + 1 * hs1], - h10 = H[1 * hs0 + 0 * hs1], h11 = H[1 * hs0 + 1 * hs1], + const value_type h00 = H[0 * hs0 + 0 * hs1], h01 = H[0 * hs0 + 1 * hs1], h10 = H[1 * hs0 + 0 * hs1], + h11 = H[1 * hs0 + 1 * hs1], /* */ h21 = H[2 * hs0 + 1 * hs1]; if (is_complex) { s = 2 * lambda1.real(); t = lambda1.real() * lambda1.real() + lambda1.imag() * lambda1.imag(); } else { - const value_type val = H[(m - 1) * hs]; - const auto dist_lambda1 = - Kokkos::ArithTraits::abs(lambda1.real() - val); - const auto dist_lambda2 = - Kokkos::ArithTraits::abs(lambda2.real() - val); - const value_type lambda = - dist_lambda1 < dist_lambda2 ? lambda1.real() : lambda2.real(); - s = 2 * lambda; - t = lambda * lambda; + const value_type val = H[(m - 1) * hs]; + const auto dist_lambda1 = Kokkos::ArithTraits::abs(lambda1.real() - val); + const auto dist_lambda2 = Kokkos::ArithTraits::abs(lambda2.real() - val); + const value_type lambda = dist_lambda1 < dist_lambda2 ? lambda1.real() : lambda2.real(); + s = 2 * lambda; + t = lambda * lambda; } - v[0] = - h00 * h00 + h01 * h10 /* H^2 e_1 */ - s * h00 /* 2 Re(lambda) */ + t; + v[0] = h00 * h00 + h01 * h10 /* H^2 e_1 */ - s * h00 /* 2 Re(lambda) */ + t; v[1] = h10 * h00 + h11 * h10 /* */ - s * h10; v[2] = h21 * h10; } @@ -112,9 +107,8 @@ struct SerialFrancisInternal { const int mm = m < 4 ? m : 4, nn = m; value_type *Hs = H - mbeg_mult_hs0; - SerialApplyLeftRightGivensInternal ::invoke( - G[0], G[1], mm + mbeg, nn + mrst, H, H + hs0, H + 2 * hs0, Hs, - Hs + hs1, Hs + 2 * hs1, hs0, hs1); + SerialApplyLeftRightGivensInternal ::invoke(G[0], G[1], mm + mbeg, nn + mrst, H, H + hs0, H + 2 * hs0, Hs, + Hs + hs1, Hs + 2 * hs1, hs0, hs1); } /// 1. chase the bulge @@ -155,9 +149,8 @@ struct SerialFrancisInternal { value_type *a2 = a1 + hs1; value_type *a3 = a2 + hs1; - SerialApplyLeftRightGivensInternal ::invoke(G[0], G[1], mm + mbeg, - nn + mrst, a1t, a2t, a3t, a1, - a2, a3, hs0, hs1); + SerialApplyLeftRightGivensInternal ::invoke(G[0], G[1], mm + mbeg, nn + mrst, a1t, a2t, a3t, a1, a2, a3, hs0, + hs1); /// ----------------------------------------------------- H_part2x2.mergeToATL(H_part3x3); } @@ -181,8 +174,7 @@ struct SerialFrancisInternal { value_type *a2t = a1t + hs0; value_type *a1 = H_part3x3.A01 - mbeg_mult_hs0; value_type *a2 = a1 + hs1; - SerialApplyLeftRightGivensInternal ::invoke(G[0], mm + mbeg, nn + mrst, - a1t, a2t, a1, a2, hs0, hs1); + SerialApplyLeftRightGivensInternal ::invoke(G[0], mm + mbeg, nn + mrst, a1t, a2t, a1, a2, hs0, hs1); /// ----------------------------------------------------- H_part2x2.mergeToATL(H_part3x3); @@ -192,11 +184,10 @@ struct SerialFrancisInternal { } template - KOKKOS_FORCEINLINE_FUNCTION static int invoke( - const int mbeg, const int mend, const int morg, - /* */ ValueType *HH, const int hs0, const int hs1, - const Kokkos::complex lambda1, - const Kokkos::complex lambda2, const bool is_complex) { + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const int mbeg, const int mend, const int morg, + /* */ ValueType *HH, const int hs0, const int hs1, + const Kokkos::complex lambda1, + const Kokkos::complex lambda2, const bool is_complex) { return invoke(mbeg, mend, morg, HH, hs0, hs1, lambda1, lambda2, is_complex, (Kokkos::pair *)NULL, false); } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Serial_Impl.hpp index 6b3cec25daba..82d6b1641b30 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Serial_Impl.hpp @@ -36,44 +36,31 @@ namespace KokkosBatched { /// NT/NT /// -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ +#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__) template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemm::invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B, - const ScalarType beta, - const CViewType &C) { +template +KOKKOS_INLINE_FUNCTION int SerialGemm::invoke( + const ScalarType alpha, const AViewType &A, const BViewType &B, const ScalarType beta, const CViewType &C) { typedef typename CViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = C.extent(0), n = C.extent(1), k = A.extent(1); static_assert(is_vector::value, "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; // no error check int r_val = 0; if (A.stride_0() == 1 && B.stride_0() == 1 && C.stride_0() == 1) { - mkl_dgemm_compact(MKL_COL_MAJOR, MKL_NOTRANS, MKL_NOTRANS, m, n, k, alpha, - (const double *)A.data(), A.stride_1(), - (const double *)B.data(), B.stride_1(), beta, - (double *)C.data(), C.stride_1(), format, + mkl_dgemm_compact(MKL_COL_MAJOR, MKL_NOTRANS, MKL_NOTRANS, m, n, k, alpha, (const double *)A.data(), A.stride_1(), + (const double *)B.data(), B.stride_1(), beta, (double *)C.data(), C.stride_1(), format, (MKL_INT)vector_type::vector_length); } else if (A.stride_1() == 1 && B.stride_1() == 1 && C.stride_1() == 1) { - mkl_dgemm_compact(MKL_ROW_MAJOR, MKL_NOTRANS, MKL_NOTRANS, m, n, k, alpha, - (const double *)A.data(), A.stride_0(), - (const double *)B.data(), B.stride_0(), beta, - (double *)C.data(), C.stride_0(), format, + mkl_dgemm_compact(MKL_ROW_MAJOR, MKL_NOTRANS, MKL_NOTRANS, m, n, k, alpha, (const double *)A.data(), A.stride_0(), + (const double *)B.data(), B.stride_0(), beta, (double *)C.data(), C.stride_0(), format, (MKL_INT)vector_type::vector_length); } else { r_val = -1; @@ -83,80 +70,56 @@ SerialGemm -template -KOKKOS_INLINE_FUNCTION int -SerialGemm::invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B, - const ScalarType beta, - const CViewType &C) { +template +KOKKOS_INLINE_FUNCTION int SerialGemm::invoke( + const ScalarType alpha, const AViewType &A, const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return SerialGemmInternal::invoke( - C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), A.stride_0(), - A.stride_1(), B.data(), B.stride_0(), B.stride_1(), beta, C.data(), - C.stride_0(), C.stride_1()); + return SerialGemmInternal::invoke(C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), B.data(), B.stride_0(), + B.stride_1(), beta, C.data(), C.stride_0(), C.stride_1()); } template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemm::invoke( - const ScalarType alpha, const AViewType &A, const BViewType &B, - const ScalarType beta, const CViewType &C) { +template +KOKKOS_INLINE_FUNCTION int SerialGemm::invoke( + const ScalarType alpha, const AViewType &A, const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return SerialGemmInternal::invoke( - C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), A.stride_0(), - A.stride_1(), B.data(), B.stride_0(), B.stride_1(), beta, C.data(), - C.stride_0(), C.stride_1()); + return SerialGemmInternal::invoke(C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), B.data(), B.stride_0(), + B.stride_1(), beta, C.data(), C.stride_0(), C.stride_1()); } /// /// T/NT /// -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ +#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__) template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemm::invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B, - const ScalarType beta, - const CViewType &C) { +template +KOKKOS_INLINE_FUNCTION int SerialGemm::invoke( + const ScalarType alpha, const AViewType &A, const BViewType &B, const ScalarType beta, const CViewType &C) { typedef typename CViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = C.extent(0), n = C.extent(1), k = A.extent(0); static_assert(is_vector::value, "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; // no error check int r_val = 0; if (A.stride_0() == 1 && B.stride_0() == 1 && C.stride_0() == 1) { - mkl_dgemm_compact(MKL_COL_MAJOR, MKL_TRANS, MKL_NOTRANS, m, n, k, alpha, - (const double *)A.data(), A.stride_1(), - (const double *)B.data(), B.stride_1(), beta, - (double *)C.data(), C.stride_1(), format, + mkl_dgemm_compact(MKL_COL_MAJOR, MKL_TRANS, MKL_NOTRANS, m, n, k, alpha, (const double *)A.data(), A.stride_1(), + (const double *)B.data(), B.stride_1(), beta, (double *)C.data(), C.stride_1(), format, (MKL_INT)vector_type::vector_length); } else if (A.stride_1() == 1 && B.stride_1() == 1 && C.stride_1() == 1) { - mkl_dgemm_compact(MKL_ROW_MAJOR, MKL_TRANS, MKL_NOTRANS, m, n, k, alpha, - (const double *)A.data(), A.stride_0(), - (const double *)B.data(), B.stride_0(), beta, - (double *)C.data(), C.stride_0(), format, + mkl_dgemm_compact(MKL_ROW_MAJOR, MKL_TRANS, MKL_NOTRANS, m, n, k, alpha, (const double *)A.data(), A.stride_0(), + (const double *)B.data(), B.stride_0(), beta, (double *)C.data(), C.stride_0(), format, (MKL_INT)vector_type::vector_length); } else { r_val = -1; @@ -166,77 +129,56 @@ SerialGemm -template -KOKKOS_INLINE_FUNCTION int -SerialGemm::invoke( - const ScalarType alpha, const AViewType &A, const BViewType &B, - const ScalarType beta, const CViewType &C) { +template +KOKKOS_INLINE_FUNCTION int SerialGemm::invoke( + const ScalarType alpha, const AViewType &A, const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return SerialGemmInternal::invoke( - C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), - A.stride_0(), B.data(), B.stride_0(), B.stride_1(), beta, C.data(), - C.stride_0(), C.stride_1()); + return SerialGemmInternal::invoke(C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), + A.stride_1(), A.stride_0(), B.data(), B.stride_0(), + B.stride_1(), beta, C.data(), C.stride_0(), C.stride_1()); } template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemm::invoke( - const ScalarType alpha, const AViewType &A, const BViewType &B, - const ScalarType beta, const CViewType &C) { +template +KOKKOS_INLINE_FUNCTION int SerialGemm::invoke( + const ScalarType alpha, const AViewType &A, const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return SerialGemmInternal::invoke( - C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), - A.stride_0(), B.data(), B.stride_0(), B.stride_1(), beta, C.data(), - C.stride_0(), C.stride_1()); + return SerialGemmInternal::invoke(C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), + A.stride_1(), A.stride_0(), B.data(), B.stride_0(), + B.stride_1(), beta, C.data(), C.stride_0(), C.stride_1()); } /// /// NT/T /// -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ +#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__) template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemm::invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B, - const ScalarType beta, - const CViewType &C) { +template +KOKKOS_INLINE_FUNCTION int SerialGemm::invoke( + const ScalarType alpha, const AViewType &A, const BViewType &B, const ScalarType beta, const CViewType &C) { typedef typename CViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = C.extent(0), n = C.extent(1), k = A.extent(1); static_assert(is_vector::value, "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; // no error check int r_val = 0; if (A.stride_0() == 1 && B.stride_0() == 1 && C.stride_0() == 1) { - mkl_dgemm_compact(MKL_COL_MAJOR, MKL_NOTRANS, MKL_TRANS, m, n, k, alpha, - (const double *)A.data(), A.stride_1(), - (const double *)B.data(), B.stride_1(), beta, - (double *)C.data(), C.stride_1(), format, + mkl_dgemm_compact(MKL_COL_MAJOR, MKL_NOTRANS, MKL_TRANS, m, n, k, alpha, (const double *)A.data(), A.stride_1(), + (const double *)B.data(), B.stride_1(), beta, (double *)C.data(), C.stride_1(), format, (MKL_INT)vector_type::vector_length); } else if (A.stride_1() == 1 && B.stride_1() == 1 && C.stride_1() == 1) { - mkl_dgemm_compact(MKL_ROW_MAJOR, MKL_NOTRANS, MKL_TRANS, m, n, k, alpha, - (const double *)A.data(), A.stride_0(), - (const double *)B.data(), B.stride_0(), beta, - (double *)C.data(), C.stride_0(), format, + mkl_dgemm_compact(MKL_ROW_MAJOR, MKL_NOTRANS, MKL_TRANS, m, n, k, alpha, (const double *)A.data(), A.stride_0(), + (const double *)B.data(), B.stride_0(), beta, (double *)C.data(), C.stride_0(), format, (MKL_INT)vector_type::vector_length); } else { r_val = -1; @@ -246,74 +188,56 @@ SerialGemm -template -KOKKOS_INLINE_FUNCTION int -SerialGemm::invoke( - const ScalarType alpha, const AViewType &A, const BViewType &B, - const ScalarType beta, const CViewType &C) { +template +KOKKOS_INLINE_FUNCTION int SerialGemm::invoke( + const ScalarType alpha, const AViewType &A, const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return SerialGemmInternal::invoke( - C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), A.stride_0(), - A.stride_1(), B.data(), B.stride_1(), B.stride_0(), beta, C.data(), - C.stride_0(), C.stride_1()); + return SerialGemmInternal::invoke(C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), B.data(), B.stride_1(), + B.stride_0(), beta, C.data(), C.stride_0(), C.stride_1()); } template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemm::invoke( - const ScalarType alpha, const AViewType &A, const BViewType &B, - const ScalarType beta, const CViewType &C) { +template +KOKKOS_INLINE_FUNCTION int SerialGemm::invoke( + const ScalarType alpha, const AViewType &A, const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return SerialGemmInternal::invoke( - C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), A.stride_0(), - A.stride_1(), B.data(), B.stride_1(), B.stride_0(), beta, C.data(), - C.stride_0(), C.stride_1()); + return SerialGemmInternal::invoke(C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), B.data(), B.stride_1(), + B.stride_0(), beta, C.data(), C.stride_0(), C.stride_1()); } /// /// T/T /// -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ +#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__) template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemm::invoke( - const ScalarType alpha, const AViewType &A, const BViewType &B, - const ScalarType beta, const CViewType &C) { +template +KOKKOS_INLINE_FUNCTION int SerialGemm::invoke( + const ScalarType alpha, const AViewType &A, const BViewType &B, const ScalarType beta, const CViewType &C) { typedef typename CViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = C.extent(0), n = C.extent(1), k = A.extent(0); static_assert(is_vector::value, "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; // no error check int r_val = 0; if (A.stride_0() == 1 && B.stride_0() == 1 && C.stride_0() == 1) { - mkl_dgemm_compact(MKL_COL_MAJOR, MKL_TRANS, MKL_TRANS, m, n, k, alpha, - (const double *)A.data(), A.stride_1(), - (const double *)B.data(), B.stride_1(), beta, - (double *)C.data(), C.stride_1(), format, + mkl_dgemm_compact(MKL_COL_MAJOR, MKL_TRANS, MKL_TRANS, m, n, k, alpha, (const double *)A.data(), A.stride_1(), + (const double *)B.data(), B.stride_1(), beta, (double *)C.data(), C.stride_1(), format, (MKL_INT)vector_type::vector_length); } else if (A.stride_1() == 1 && B.stride_1() == 1 && C.stride_1() == 1) { - mkl_dgemm_compact(MKL_ROW_MAJOR, MKL_TRANS, MKL_TRANS, m, n, k, alpha, - (const double *)A.data(), A.stride_0(), - (const double *)B.data(), B.stride_0(), beta, - (double *)C.data(), C.stride_0(), format, + mkl_dgemm_compact(MKL_ROW_MAJOR, MKL_TRANS, MKL_TRANS, m, n, k, alpha, (const double *)A.data(), A.stride_0(), + (const double *)B.data(), B.stride_0(), beta, (double *)C.data(), C.stride_0(), format, (MKL_INT)vector_type::vector_length); } else { r_val = -1; @@ -323,33 +247,25 @@ SerialGemm::invoke( #endif template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemm::invoke( - const ScalarType alpha, const AViewType &A, const BViewType &B, - const ScalarType beta, const CViewType &C) { +template +KOKKOS_INLINE_FUNCTION int SerialGemm::invoke( + const ScalarType alpha, const AViewType &A, const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return SerialGemmInternal::invoke( - C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), - A.stride_0(), B.data(), B.stride_1(), B.stride_0(), beta, C.data(), - C.stride_0(), C.stride_1()); + return SerialGemmInternal::invoke(C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), + A.stride_1(), A.stride_0(), B.data(), B.stride_1(), + B.stride_0(), beta, C.data(), C.stride_0(), C.stride_1()); } template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemm::invoke( - const ScalarType alpha, const AViewType &A, const BViewType &B, - const ScalarType beta, const CViewType &C) { +template +KOKKOS_INLINE_FUNCTION int SerialGemm::invoke( + const ScalarType alpha, const AViewType &A, const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return SerialGemmInternal::invoke( - C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), - A.stride_0(), B.data(), B.stride_1(), B.stride_0(), beta, C.data(), - C.stride_0(), C.stride_1()); + return SerialGemmInternal::invoke(C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), + A.stride_1(), A.stride_0(), B.data(), B.stride_1(), + B.stride_0(), beta, C.data(), C.stride_0(), C.stride_1()); } } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Serial_Internal.hpp index 43197f1da3bf..eaa5b67ffa15 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Serial_Internal.hpp @@ -34,21 +34,18 @@ namespace KokkosBatched { template struct SerialGemmInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const int m, const int n, const int k, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1, - const ScalarType beta, - /**/ ValueType *KOKKOS_RESTRICT C, const int cs0, const int cs1); + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const int k, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1, + const ScalarType beta, + /**/ ValueType *KOKKOS_RESTRICT C, const int cs0, const int cs1); }; template <> template KOKKOS_INLINE_FUNCTION int SerialGemmInternal::invoke( - const int m, const int n, const int k, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1, - const ScalarType beta, + const int m, const int n, const int k, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, + const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1, const ScalarType beta, /**/ ValueType *KOKKOS_RESTRICT C, const int cs0, const int cs1) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) @@ -65,8 +62,7 @@ KOKKOS_INLINE_FUNCTION int SerialGemmInternal::invoke( ValueType *KOKKOS_RESTRICT pC = C; for (int p = 0; p < k; ++p) { - const ValueType *KOKKOS_RESTRICT pA = A + p * as1, - *KOKKOS_RESTRICT pB = B + p * bs0; + const ValueType *KOKKOS_RESTRICT pA = A + p * as1, *KOKKOS_RESTRICT pB = B + p * bs0; for (int i = 0; i < m; ++i) { const ValueType tA(alpha * pA[i * as0]); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) @@ -82,10 +78,8 @@ KOKKOS_INLINE_FUNCTION int SerialGemmInternal::invoke( template <> template KOKKOS_INLINE_FUNCTION int SerialGemmInternal::invoke( - const int m, const int n, const int k, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1, - const ScalarType beta, + const int m, const int n, const int k, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, + const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1, const ScalarType beta, /**/ ValueType *KOKKOS_RESTRICT C, const int cs0, const int cs1) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) @@ -105,17 +99,14 @@ KOKKOS_INLINE_FUNCTION int SerialGemmInternal::invoke( const ValueType alpha_value(alpha); InnerGemmFixC inner(as0, as1, bs0, bs1, cs0, cs1); - auto gemm = [&](const int ib, const int jb, const int pb, - const ValueType *KOKKOS_RESTRICT AA, + auto gemm = [&](const int ib, const int jb, const int pb, const ValueType *KOKKOS_RESTRICT AA, const ValueType *KOKKOS_RESTRICT BB, /**/ ValueType *KOKKOS_RESTRICT CC) { const int mb = mbAlgo, nb = nbAlgo; for (int i = 0; i < ib; i += mb) for (int j = 0; j < jb; j += nb) - inner.serial_invoke(alpha_value, AA + i * as0, BB + j * bs1, - (i + mb) > ib ? (ib - i) : mb, - (j + nb) > jb ? (jb - j) : nb, pb, - CC + i * cs0 + j * cs1); + inner.serial_invoke(alpha_value, AA + i * as0, BB + j * bs1, (i + mb) > ib ? (ib - i) : mb, + (j + nb) > jb ? (jb - j) : nb, pb, CC + i * cs0 + j * cs1); }; const bool is_small = true; //(m*n*k <= 64*64*64); diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_TeamVector_Impl.hpp index aedfb9f662f7..64e65d62d828 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_TeamVector_Impl.hpp @@ -40,19 +40,15 @@ namespace KokkosBatched { /// template -struct TeamVectorGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C) { +struct TeamVectorGemm { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) return TeamVectorGemmInternal::invoke( - member, C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), - A.stride_0(), A.stride_1(), B.data(), B.stride_0(), B.stride_1(), beta, - C.data(), C.stride_0(), C.stride_1()); + member, C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_1(), B.data(), + B.stride_0(), B.stride_1(), beta, C.data(), C.stride_0(), C.stride_1()); } }; @@ -61,19 +57,15 @@ struct TeamVectorGemm -struct TeamVectorGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C) { +struct TeamVectorGemm { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) return TeamVectorGemmInternal::invoke( - member, C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), - A.stride_1(), A.stride_0(), B.data(), B.stride_0(), B.stride_1(), beta, - C.data(), C.stride_0(), C.stride_1()); + member, C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), A.stride_0(), B.data(), + B.stride_0(), B.stride_1(), beta, C.data(), C.stride_0(), C.stride_1()); } }; @@ -82,19 +74,15 @@ struct TeamVectorGemm -struct TeamVectorGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C) { +struct TeamVectorGemm { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) return TeamVectorGemmInternal::invoke( - member, C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), - A.stride_0(), A.stride_1(), B.data(), B.stride_1(), B.stride_0(), beta, - C.data(), C.stride_0(), C.stride_1()); + member, C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_1(), B.data(), + B.stride_1(), B.stride_0(), beta, C.data(), C.stride_0(), C.stride_1()); } }; @@ -103,19 +91,15 @@ struct TeamVectorGemm -struct TeamVectorGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C) { +struct TeamVectorGemm { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) return TeamVectorGemmInternal::invoke( - member, C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), - A.stride_1(), A.stride_0(), B.data(), B.stride_1(), B.stride_0(), beta, - C.data(), C.stride_0(), C.stride_1()); + member, C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), A.stride_0(), B.data(), + B.stride_1(), B.stride_0(), beta, C.data(), C.stride_0(), C.stride_1()); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_TeamVector_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_TeamVector_Internal.hpp index 7e40ec4415f8..8ad7d570df09 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_TeamVector_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_TeamVector_Internal.hpp @@ -31,21 +31,18 @@ namespace KokkosBatched { template struct TeamVectorGemmInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const int n, const int k, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, - const int bs1, const ScalarType beta, - /**/ ValueType *KOKKOS_RESTRICT C, const int cs0, const int cs1); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const int k, + const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, + const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, + const int bs1, const ScalarType beta, + /**/ ValueType *KOKKOS_RESTRICT C, const int cs0, const int cs1); }; template <> template -KOKKOS_INLINE_FUNCTION int -TeamVectorGemmInternal::invoke( - const MemberType &member, const int m, const int n, const int k, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, +KOKKOS_INLINE_FUNCTION int TeamVectorGemmInternal::invoke( + const MemberType &member, const int m, const int n, const int k, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1, const ScalarType beta, /**/ ValueType *KOKKOS_RESTRICT C, const int cs0, const int cs1) { // C = beta C + alpha A B @@ -54,11 +51,9 @@ TeamVectorGemmInternal::invoke( const ScalarType one(1.0), zero(0.0); if (beta == zero) - KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, n, zero, C, cs0, - cs1); + KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, n, zero, C, cs0, cs1); else if (beta != one) - KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, n, beta, C, - cs0, cs1); + KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, n, beta, C, cs0, cs1); if (alpha != ScalarType(0.0)) { if (m <= 0 || n <= 0 || k <= 0) return 0; @@ -67,15 +62,13 @@ TeamVectorGemmInternal::invoke( Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int &i) { const ValueType *KOKKOS_RESTRICT pA = A + i * as0; - Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n), - [&](const int &j) { - const ValueType *KOKKOS_RESTRICT pB = B + j * bs1; - - ValueType c = ValueType(0); - for (int p = 0; p < k; ++p) - c += pA[p * as1] * pB[p * bs0]; - C[i * cs0 + j * cs1] += alpha * c; - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n), [&](const int &j) { + const ValueType *KOKKOS_RESTRICT pB = B + j * bs1; + + ValueType c = ValueType(0); + for (int p = 0; p < k; ++p) c += pA[p * as1] * pB[p * bs0]; + C[i * cs0 + j * cs1] += alpha * c; + }); }); } return 0; @@ -83,11 +76,9 @@ TeamVectorGemmInternal::invoke( template <> template -KOKKOS_INLINE_FUNCTION int -TeamVectorGemmInternal::invoke( - const MemberType &member, const int m, const int n, const int k, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, +KOKKOS_INLINE_FUNCTION int TeamVectorGemmInternal::invoke( + const MemberType &member, const int m, const int n, const int k, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1, const ScalarType beta, /**/ ValueType *KOKKOS_RESTRICT C, const int cs0, const int cs1) { // C = beta C + alpha A B @@ -96,11 +87,9 @@ TeamVectorGemmInternal::invoke( const ScalarType one(1.0), zero(0.0); if (beta == zero) - KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, n, zero, C, cs0, - cs1); + KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, n, zero, C, cs0, cs1); else if (beta != one) - KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, n, beta, C, - cs0, cs1); + KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, n, beta, C, cs0, cs1); if (alpha != ScalarType(0.0)) { if (m <= 0 || n <= 0 || k <= 0) return 0; @@ -109,16 +98,13 @@ TeamVectorGemmInternal::invoke( Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int &i) { const ValueType *KOKKOS_RESTRICT pA = A + i * as0; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, n), [&](const int &j) { - const ValueType *KOKKOS_RESTRICT pB = B + j * bs1; - - ValueType c = ValueType(0); - for (int p = 0; p < k; ++p) - c += Kokkos::ArithTraits::conj(pA[p * as1]) * - pB[p * bs0]; - C[i * cs0 + j * cs1] += alpha * c; - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n), [&](const int &j) { + const ValueType *KOKKOS_RESTRICT pB = B + j * bs1; + + ValueType c = ValueType(0); + for (int p = 0; p < k; ++p) c += Kokkos::ArithTraits::conj(pA[p * as1]) * pB[p * bs0]; + C[i * cs0 + j * cs1] += alpha * c; + }); }); } return 0; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Team_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Team_Impl.hpp index 647ffbdb266d..0a9fb87b9e6d 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Team_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Team_Impl.hpp @@ -40,36 +40,28 @@ namespace KokkosBatched { /// template -struct TeamGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C) { +struct TeamGemm { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return TeamGemmInternal::invoke( - member, C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), - A.stride_0(), A.stride_1(), B.data(), B.stride_0(), B.stride_1(), beta, - C.data(), C.stride_0(), C.stride_1()); + return TeamGemmInternal::invoke(member, C.extent(0), C.extent(1), A.extent(1), alpha, + A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_0(), + B.stride_1(), beta, C.data(), C.stride_0(), C.stride_1()); } }; template -struct TeamGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C) { +struct TeamGemm { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return TeamGemmInternal::invoke( - member, C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), - A.stride_0(), A.stride_1(), B.data(), B.stride_0(), B.stride_1(), beta, - C.data(), C.stride_0(), C.stride_1()); + return TeamGemmInternal::invoke(member, C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), B.data(), B.stride_0(), + B.stride_1(), beta, C.data(), C.stride_0(), C.stride_1()); } }; @@ -78,36 +70,28 @@ struct TeamGemm -struct TeamGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C) { +struct TeamGemm { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return TeamGemmInternal::invoke( - member, C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), - A.stride_1(), A.stride_0(), B.data(), B.stride_0(), B.stride_1(), beta, - C.data(), C.stride_0(), C.stride_1()); + return TeamGemmInternal::invoke(member, C.extent(0), C.extent(1), A.extent(0), alpha, + A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_0(), + B.stride_1(), beta, C.data(), C.stride_0(), C.stride_1()); } }; template -struct TeamGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C) { +struct TeamGemm { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return TeamGemmInternal::invoke( - member, C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), - A.stride_1(), A.stride_0(), B.data(), B.stride_0(), B.stride_1(), beta, - C.data(), C.stride_0(), C.stride_1()); + return TeamGemmInternal::invoke(member, C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), + A.stride_1(), A.stride_0(), B.data(), B.stride_0(), + B.stride_1(), beta, C.data(), C.stride_0(), C.stride_1()); } }; @@ -116,36 +100,28 @@ struct TeamGemm -struct TeamGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C) { +struct TeamGemm { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return TeamGemmInternal::invoke( - member, C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), - A.stride_0(), A.stride_1(), B.data(), B.stride_1(), B.stride_0(), beta, - C.data(), C.stride_0(), C.stride_1()); + return TeamGemmInternal::invoke(member, C.extent(0), C.extent(1), A.extent(1), alpha, + A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_1(), + B.stride_0(), beta, C.data(), C.stride_0(), C.stride_1()); } }; template -struct TeamGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C) { +struct TeamGemm { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return TeamGemmInternal::invoke( - member, C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), - A.stride_0(), A.stride_1(), B.data(), B.stride_1(), B.stride_0(), beta, - C.data(), C.stride_0(), C.stride_1()); + return TeamGemmInternal::invoke(member, C.extent(0), C.extent(1), A.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), B.data(), B.stride_1(), + B.stride_0(), beta, C.data(), C.stride_0(), C.stride_1()); } }; @@ -154,36 +130,28 @@ struct TeamGemm -struct TeamGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C) { +struct TeamGemm { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return TeamGemmInternal::invoke( - member, C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), - A.stride_1(), A.stride_0(), B.data(), B.stride_1(), B.stride_0(), beta, - C.data(), C.stride_0(), C.stride_1()); + return TeamGemmInternal::invoke(member, C.extent(0), C.extent(1), A.extent(0), alpha, + A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_1(), + B.stride_0(), beta, C.data(), C.stride_0(), C.stride_1()); } }; template -struct TeamGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C) { +struct TeamGemm { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C) { // C = beta C + alpha A B // C (m x n), A(m x k), B(k x n) - return TeamGemmInternal::invoke( - member, C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), - A.stride_1(), A.stride_0(), B.data(), B.stride_1(), B.stride_0(), beta, - C.data(), C.stride_0(), C.stride_1()); + return TeamGemmInternal::invoke(member, C.extent(0), C.extent(1), A.extent(0), alpha, A.data(), + A.stride_1(), A.stride_0(), B.data(), B.stride_1(), + B.stride_0(), beta, C.data(), C.stride_0(), C.stride_1()); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Team_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Team_Internal.hpp index 988a4e5da271..1b77a2599109 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Team_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemm_Team_Internal.hpp @@ -34,20 +34,18 @@ namespace KokkosBatched { template struct TeamGemmInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const int n, const int k, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, - const int bs1, const ScalarType beta, - /**/ ValueType *KOKKOS_RESTRICT C, const int cs0, const int cs1); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const int k, + const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, + const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, + const int bs1, const ScalarType beta, + /**/ ValueType *KOKKOS_RESTRICT C, const int cs0, const int cs1); }; template <> template KOKKOS_INLINE_FUNCTION int TeamGemmInternal::invoke( - const MemberType &member, const int m, const int n, const int k, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, + const MemberType &member, const int m, const int n, const int k, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1, const ScalarType beta, /**/ ValueType *KOKKOS_RESTRICT C, const int cs0, const int cs1) { // C = beta C + alpha A B @@ -58,25 +56,22 @@ KOKKOS_INLINE_FUNCTION int TeamGemmInternal::invoke( if (beta == zero) KokkosBlas::Impl::TeamSetInternal::invoke(member, m, n, zero, C, cs0, cs1); else if (beta != one) - KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, n, beta, C, cs0, - cs1); + KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, n, beta, C, cs0, cs1); if (alpha != ScalarType(0.0)) { if (m <= 0 || n <= 0 || k <= 0) return 0; if (beta != one) member.team_barrier(); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, m * n), [&](const int &ij) { - // assume layout right for batched computation - const int i = ij / n, j = ij % n; - const ValueType *KOKKOS_RESTRICT pA = A + i * as0, - *KOKKOS_RESTRICT pB = B + j * bs1; - - ValueType c = ValueType(0); - for (int p = 0; p < k; ++p) c += pA[p * as1] * pB[p * bs0]; - C[i * cs0 + j * cs1] += alpha * c; - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, m * n), [&](const int &ij) { + // assume layout right for batched computation + const int i = ij / n, j = ij % n; + const ValueType *KOKKOS_RESTRICT pA = A + i * as0, *KOKKOS_RESTRICT pB = B + j * bs1; + + ValueType c = ValueType(0); + for (int p = 0; p < k; ++p) c += pA[p * as1] * pB[p * bs0]; + C[i * cs0 + j * cs1] += alpha * c; + }); } return 0; } @@ -84,9 +79,8 @@ KOKKOS_INLINE_FUNCTION int TeamGemmInternal::invoke( template <> template KOKKOS_INLINE_FUNCTION int TeamGemmInternal::invoke( - const MemberType &member, const int m, const int n, const int k, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, + const MemberType &member, const int m, const int n, const int k, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, const ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1, const ScalarType beta, /**/ ValueType *KOKKOS_RESTRICT C, const int cs0, const int cs1) { // C = beta C + alpha A B @@ -100,8 +94,7 @@ KOKKOS_INLINE_FUNCTION int TeamGemmInternal::invoke( if (beta == zero) KokkosBlas::Impl::TeamSetInternal::invoke(member, m, n, zero, C, cs0, cs1); else if (beta != one) - KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, n, beta, C, cs0, - cs1); + KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, n, beta, C, cs0, cs1); if (alpha != ScalarType(0.0)) { if (m <= 0 || n <= 0 || k <= 0) return 0; @@ -111,31 +104,27 @@ KOKKOS_INLINE_FUNCTION int TeamGemmInternal::invoke( /// /// GPU case: team size is large and blocksize (mb,nb) is small InnerGemmFixC inner(as0, as1, bs0, bs1, cs0, cs1); - auto gemm = [&](const int ib, const int jb, const int pb, - const ValueType *KOKKOS_RESTRICT AA, + auto gemm = [&](const int ib, const int jb, const int pb, const ValueType *KOKKOS_RESTRICT AA, const ValueType *KOKKOS_RESTRICT BB, /**/ ValueType *KOKKOS_RESTRICT CC) { // Made this non-const in order to WORKAROUND issue #349 - int mb = mbAlgo, mp = (ib % mb), mq = (ib / mb) + (mp > 0), nb = nbAlgo, - np = (jb % nb), nq = (jb / nb) + (np > 0); + int mb = mbAlgo, mp = (ib % mb), mq = (ib / mb) + (mp > 0), nb = nbAlgo, np = (jb % nb), + nq = (jb / nb) + (np > 0); // square tiling - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, mq * nq), [&](const int &ij) { - int i, j; - // note: the condition is constexpr - if (KokkosKernels::Impl::kk_is_gpu_exec_space< - typename MemberType::execution_space>()) { - i = ij % mq * mb; - j = ij / mq * nb; - } else { - i = ij / nq * mb; - j = ij % nq * nb; - } - inner.serial_invoke( - alpha, AA + i * as0, BB + j * bs1, (i + mb) > ib ? mp : mb, - (j + nb) > jb ? np : nb, pb, CC + i * cs0 + j * cs1); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, mq * nq), [&](const int &ij) { + int i, j; + // note: the condition is constexpr + if (KokkosKernels::Impl::kk_is_gpu_exec_space()) { + i = ij % mq * mb; + j = ij / mq * nb; + } else { + i = ij / nq * mb; + j = ij % nq * nb; + } + inner.serial_invoke(alpha, AA + i * as0, BB + j * bs1, (i + mb) > ib ? mp : mb, (j + nb) > jb ? np : nb, pb, + CC + i * cs0 + j * cs1); + }); }; const bool is_small = true; //(m*n*k <= 64*64*64); diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp index a0b948bb132d..4f54bf7f31c0 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp @@ -41,43 +41,30 @@ namespace KokkosBatched { template struct TeamVectorGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const xViewType &x, const ScalarType beta, const yViewType &y) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const xViewType &x, const ScalarType beta, const yViewType &y) { static_assert(AViewType::rank == 3, "Batched TeamVectorGemv requires rank-3 A matrix (use " "KokkosBlas::TeamVectorGemv for regular rank-2 matrix)"); if (A.extent(0) == 1) { - KokkosBlas::TeamVectorGemv< - MemberType, Trans::NoTranspose, - Algo::Gemv::Unblocked>::invoke(member, alpha, - Kokkos::subview(A, 0, Kokkos::ALL, - Kokkos::ALL), - Kokkos::subview(x, 0, Kokkos::ALL), - beta, - Kokkos::subview(y, 0, Kokkos::ALL)); + KokkosBlas::TeamVectorGemv::invoke( + member, alpha, Kokkos::subview(A, 0, Kokkos::ALL, Kokkos::ALL), Kokkos::subview(x, 0, Kokkos::ALL), beta, + Kokkos::subview(y, 0, Kokkos::ALL)); return 0; } return TeamVectorGemvInternal::template invoke< - MemberType, ScalarType, typename AViewType::array_layout, - typename AViewType::non_const_value_type>( - member, A.extent(0), A.extent(1), A.extent(2), alpha, A.data(), - A.stride_0(), A.stride_1(), A.stride_2(), x.data(), x.stride_0(), - x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); + MemberType, ScalarType, typename AViewType::array_layout, typename AViewType::non_const_value_type>( + member, A.extent(0), A.extent(1), A.extent(2), alpha, A.data(), A.stride_0(), A.stride_1(), A.stride_2(), + x.data(), x.stride_0(), x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); } }; template struct TeamVectorGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, - const ScalarType /*alpha*/, - const AViewType & /*A*/, - const xViewType & /*x*/, - const ScalarType /*beta*/, + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const ScalarType /*alpha*/, + const AViewType & /*A*/, const xViewType & /*x*/, const ScalarType /*beta*/, const yViewType & /*y*/) { static_assert(AViewType::rank == 3, "Batched TeamVectorGemv requires rank-3 A matrix (use " @@ -94,32 +81,24 @@ struct TeamVectorGemv { template struct TeamVectorGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const xViewType &x, const ScalarType beta, const yViewType &y) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const xViewType &x, const ScalarType beta, const yViewType &y) { static_assert(AViewType::rank == 3, "Batched TeamVectorGemv requires rank-3 A matrix (use " "KokkosBlas::TeamVectorGemv for regular rank-2 matrix)"); return TeamVectorGemvInternal::template invoke< - MemberType, ScalarType, typename AViewType::array_layout, - typename AViewType::non_const_value_type>( - member, A.extent(0), A.extent(2), A.extent(1), alpha, A.data(), - A.stride_0(), A.stride_2(), A.stride_1(), x.data(), x.stride_0(), - x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); + MemberType, ScalarType, typename AViewType::array_layout, typename AViewType::non_const_value_type>( + member, A.extent(0), A.extent(2), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_2(), A.stride_1(), + x.data(), x.stride_0(), x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); } }; template struct TeamVectorGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, - const ScalarType /*alpha*/, - const AViewType & /*A*/, - const xViewType & /*x*/, - const ScalarType /*beta*/, + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const ScalarType /*alpha*/, + const AViewType & /*A*/, const xViewType & /*x*/, const ScalarType /*beta*/, const yViewType & /*y*/) { static_assert(AViewType::rank == 3, "Batched TeamVectorGemv requires rank-3 A matrix (use " diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Internal.hpp index 0ffc60ec9000..8d9676b22360 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Internal.hpp @@ -30,30 +30,24 @@ namespace KokkosBatched { /// ==================== template struct TeamVectorGemvInternal { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType & /*member*/, const int /*N*/, const int /*m*/, - const int /*n*/, const ScalarType /*alpha*/, - const ValueType *KOKKOS_RESTRICT /*A*/, const int /*as0*/, - const int /*as1*/, const int /*as2*/, - const ValueType *KOKKOS_RESTRICT /*x*/, const int /*xs0*/, - const int /*xs1*/, const ScalarType /*beta*/, - /**/ ValueType *KOKKOS_RESTRICT /*y*/, const int /*ys0*/, - const int /*ys1*/) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const int /*N*/, const int /*m*/, + const int /*n*/, const ScalarType /*alpha*/, + const ValueType *KOKKOS_RESTRICT /*A*/, const int /*as0*/, const int /*as1*/, + const int /*as2*/, const ValueType *KOKKOS_RESTRICT /*x*/, const int /*xs0*/, + const int /*xs1*/, const ScalarType /*beta*/, + /**/ ValueType *KOKKOS_RESTRICT /*y*/, const int /*ys0*/, + const int /*ys1*/) { assert(false && "Error: encounter dummy impl"); return 0; } }; template <> -template -KOKKOS_INLINE_FUNCTION int -TeamVectorGemvInternal::invoke( - const MemberType &member, const int N, const int m, const int n, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, const int as2, const ValueType *KOKKOS_RESTRICT X, +template +KOKKOS_INLINE_FUNCTION int TeamVectorGemvInternal::invoke( + const MemberType &member, const int N, const int m, const int n, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, const int as2, const ValueType *KOKKOS_RESTRICT X, const int xs0, const int xs1, const ScalarType beta, /**/ ValueType *KOKKOS_RESTRICT Y, const int ys0, const int ys1) { const ScalarType one(1.0), zero(0.0); @@ -64,37 +58,32 @@ TeamVectorGemvInternal::invoke( if (beta == zero) // TODO: KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, zero, y, // ys0); - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, N * m), - [&](const int &iTemp) { - int iRow, iMatrix; - getIndices(iTemp, m, N, iRow, iMatrix); - Y[ys0 * iMatrix + ys1 * iRow] = zero; - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, N * m), [&](const int &iTemp) { + int iRow, iMatrix; + getIndices(iTemp, m, N, iRow, iMatrix); + Y[ys0 * iMatrix + ys1 * iRow] = zero; + }); else if (beta != one) // TODO: KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, beta, // y, ys0); - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, N * m), - [&](const int &iTemp) { - int iRow, iMatrix; - getIndices(iTemp, m, N, iRow, iMatrix); - Y[ys0 * iMatrix + ys1 * iRow] *= beta; - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, N * m), [&](const int &iTemp) { + int iRow, iMatrix; + getIndices(iTemp, m, N, iRow, iMatrix); + Y[ys0 * iMatrix + ys1 * iRow] *= beta; + }); if (alpha != zero) { if (m <= 0 || n <= 0) return 0; if (beta != one) member.team_barrier(); - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, N * m), - [&](const int &iTemp) { - int iRow, iMatrix; - ValueType t(0); - getIndices(iTemp, m, N, iRow, iMatrix); - for (int i = 0; i < n; ++i) - t += A[as0 * iMatrix + as1 * iRow + as2 * i] * - X[xs0 * iMatrix + xs1 * i]; - Y[ys0 * iMatrix + ys1 * iRow] += alpha * t; - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, N * m), [&](const int &iTemp) { + int iRow, iMatrix; + ValueType t(0); + getIndices(iTemp, m, N, iRow, iMatrix); + for (int i = 0; i < n; ++i) t += A[as0 * iMatrix + as1 * iRow + as2 * i] * X[xs0 * iMatrix + xs1 * i]; + Y[ys0 * iMatrix + ys1 * iRow] += alpha * t; + }); } return 0; } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp index 48627aaf308d..16f12529d49d 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp @@ -42,11 +42,9 @@ namespace KokkosBatched { template struct TeamGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const xViewType &x, const ScalarType beta, const yViewType &y) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const xViewType &x, const ScalarType beta, const yViewType &y) { if constexpr (Kokkos::is_dyn_rank_view::value) { assert(A.rank_dynamic() == 3 && "Batched TeamGemv requires rank-3 A matrix (use " @@ -58,34 +56,23 @@ struct TeamGemv { } if (A.extent(0) == 1) { - KokkosBlas::TeamGemv< - MemberType, Trans::NoTranspose, - Algo::Gemv::Unblocked>::invoke(member, alpha, - Kokkos::subview(A, 0, Kokkos::ALL, - Kokkos::ALL), - Kokkos::subview(x, 0, Kokkos::ALL), - beta, - Kokkos::subview(y, 0, Kokkos::ALL)); + KokkosBlas::TeamGemv::invoke( + member, alpha, Kokkos::subview(A, 0, Kokkos::ALL, Kokkos::ALL), Kokkos::subview(x, 0, Kokkos::ALL), beta, + Kokkos::subview(y, 0, Kokkos::ALL)); return 0; } return TeamGemvInternal::template invoke< - MemberType, ScalarType, typename AViewType::array_layout, - typename AViewType::non_const_value_type>( - member, A.extent(0), A.extent(1), A.extent(2), alpha, A.data(), - A.stride_0(), A.stride_1(), A.stride_2(), x.data(), x.stride_0(), - x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); + MemberType, ScalarType, typename AViewType::array_layout, typename AViewType::non_const_value_type>( + member, A.extent(0), A.extent(1), A.extent(2), alpha, A.data(), A.stride_0(), A.stride_1(), A.stride_2(), + x.data(), x.stride_0(), x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); } }; template struct TeamGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, - const ScalarType /*alpha*/, - const AViewType & /*A*/, - const xViewType & /*x*/, - const ScalarType /*beta*/, + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const ScalarType /*alpha*/, + const AViewType & /*A*/, const xViewType & /*x*/, const ScalarType /*beta*/, const yViewType & /*y*/) { /* if constexpr (Kokkos::is_dyn_rank_view::value) { assert(A.rank_dynamic() == 3 && @@ -108,11 +95,9 @@ struct TeamGemv { template struct TeamGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const xViewType &x, const ScalarType beta, const yViewType &y) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const xViewType &x, const ScalarType beta, const yViewType &y) { if constexpr (Kokkos::is_dyn_rank_view::value) { assert(A.rank_dynamic() == 3 && "Batched TeamGemv requires rank-3 A matrix (use " @@ -123,31 +108,23 @@ struct TeamGemv { "KokkosBlas::TeamGemv for regular rank-2 matrix)"); } if (A.extent(0) == 1) { - KokkosBlas:: - TeamGemv::invoke( - member, alpha, Kokkos::subview(A, 0, Kokkos::ALL, Kokkos::ALL), - Kokkos::subview(x, 0, Kokkos::ALL), beta, - Kokkos::subview(y, 0, Kokkos::ALL)); + KokkosBlas::TeamGemv::invoke( + member, alpha, Kokkos::subview(A, 0, Kokkos::ALL, Kokkos::ALL), Kokkos::subview(x, 0, Kokkos::ALL), beta, + Kokkos::subview(y, 0, Kokkos::ALL)); return 0; } return TeamGemvInternal::template invoke< - MemberType, ScalarType, typename AViewType::array_layout, - typename AViewType::non_const_value_type>( - member, A.extent(0), A.extent(2), A.extent(1), alpha, A.data(), - A.stride_0(), A.stride_2(), A.stride_1(), x.data(), x.stride_0(), - x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); + MemberType, ScalarType, typename AViewType::array_layout, typename AViewType::non_const_value_type>( + member, A.extent(0), A.extent(2), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_2(), A.stride_1(), + x.data(), x.stride_0(), x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); } }; template struct TeamGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, - const ScalarType /*alpha*/, - const AViewType & /*A*/, - const xViewType & /*x*/, - const ScalarType /*beta*/, + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const ScalarType /*alpha*/, + const AViewType & /*A*/, const xViewType & /*x*/, const ScalarType /*beta*/, const yViewType & /*y*/) { /* if constexpr (Kokkos::is_dyn_rank_view::value) { assert(A.rank_dynamic() == 3 && diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_Team_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_Team_Internal.hpp index 77629c678f6b..8f63e24b27ec 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_Team_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gemv_Team_Internal.hpp @@ -20,9 +20,9 @@ #include "KokkosBatched_Util.hpp" -//#include "KokkosBlas1_set_impl.hpp" -//#include "KokkosBlas1_team_scal_impl.hpp" -//#include "KokkosBlas2_serial_gemv_inner_multiple_dot.hpp" +// #include "KokkosBlas1_set_impl.hpp" +// #include "KokkosBlas1_team_scal_impl.hpp" +// #include "KokkosBlas2_serial_gemv_inner_multiple_dot.hpp" namespace KokkosBatched { @@ -31,23 +31,19 @@ namespace KokkosBatched { /// ==================== template struct TeamGemvInternal { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int N, const int m, const int n, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, const int as2, const ValueType *KOKKOS_RESTRICT x, - const int xs0, const int xs1, const ScalarType beta, - /**/ ValueType *KOKKOS_RESTRICT y, const int ys0, const int ys1); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int N, const int m, const int n, + const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, + const int as1, const int as2, const ValueType *KOKKOS_RESTRICT x, + const int xs0, const int xs1, const ScalarType beta, + /**/ ValueType *KOKKOS_RESTRICT y, const int ys0, const int ys1); }; template <> -template +template KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( - const MemberType &member, const int N, const int m, const int n, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, const int as2, const ValueType *KOKKOS_RESTRICT X, + const MemberType &member, const int N, const int m, const int n, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, const int as2, const ValueType *KOKKOS_RESTRICT X, const int xs0, const int xs1, const ScalarType beta, /**/ ValueType *KOKKOS_RESTRICT Y, const int ys0, const int ys1) { const ScalarType one(1.0), zero(0.0); @@ -56,35 +52,30 @@ KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( // y_l (m), A_l(m x n), B_l(n) if (beta == zero) - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, N * m), - [&](const int &iTemp) { - int iRow, iMatrix; - getIndices(iTemp, m, N, iRow, iMatrix); - Y[ys0 * iMatrix + ys1 * iRow] = zero; - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, N * m), [&](const int &iTemp) { + int iRow, iMatrix; + getIndices(iTemp, m, N, iRow, iMatrix); + Y[ys0 * iMatrix + ys1 * iRow] = zero; + }); else if (beta != one) - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, N * m), - [&](const int &iTemp) { - int iRow, iMatrix; - getIndices(iTemp, m, N, iRow, iMatrix); - Y[ys0 * iMatrix + ys1 * iRow] *= beta; - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, N * m), [&](const int &iTemp) { + int iRow, iMatrix; + getIndices(iTemp, m, N, iRow, iMatrix); + Y[ys0 * iMatrix + ys1 * iRow] *= beta; + }); if (alpha != zero) { if (m <= 0 || n <= 0) return 0; if (beta != one) member.team_barrier(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, N * m), - [&](const int &iTemp) { - int iRow, iMatrix; - ValueType t(0); - getIndices(iTemp, m, N, iRow, iMatrix); - for (int i = 0; i < n; ++i) - t += A[as0 * iMatrix + as1 * iRow + as2 * i] * - X[xs0 * iMatrix + xs1 * i]; - Y[ys0 * iMatrix + ys1 * iRow] += alpha * t; - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, N * m), [&](const int &iTemp) { + int iRow, iMatrix; + ValueType t(0); + getIndices(iTemp, m, N, iRow, iMatrix); + for (int i = 0; i < n; ++i) t += A[as0 * iMatrix + as1 * iRow + as2 * i] * X[xs0 * iMatrix + xs1 * i]; + Y[ys0 * iMatrix + ys1 * iRow] += alpha * t; + }); } return 0; } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gesv_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gesv_Impl.hpp index 86d0d0873efa..ba18cbafd7f8 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gesv_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Gesv_Impl.hpp @@ -26,40 +26,33 @@ namespace KokkosBatched { struct SerialStaticPivoting { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MatrixType1 A, const MatrixType2 PDAD, const VectorType1 Y, - const VectorType2 PDY, const VectorType2 D2, const VectorType2 tmp_v_1, - const VectorType2 tmp_v_2); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MatrixType1 A, const MatrixType2 PDAD, const VectorType1 Y, + const VectorType2 PDY, const VectorType2 D2, const VectorType2 tmp_v_1, + const VectorType2 tmp_v_2); }; template struct TeamStaticPivoting { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const MatrixType1 A, const MatrixType2 PDAD, - const VectorType1 Y, const VectorType2 PDY, const VectorType2 D2, - const VectorType2 tmp_v_1, const VectorType2 tmp_v_2); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const MatrixType1 A, const MatrixType2 PDAD, + const VectorType1 Y, const VectorType2 PDY, const VectorType2 D2, + const VectorType2 tmp_v_1, const VectorType2 tmp_v_2); }; template struct TeamVectorStaticPivoting { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const MatrixType1 A, const MatrixType2 PDAD, - const VectorType1 Y, const VectorType2 PDY, const VectorType2 D2, - const VectorType2 tmp_v_1, const VectorType2 tmp_v_2); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const MatrixType1 A, const MatrixType2 PDAD, + const VectorType1 Y, const VectorType2 PDY, const VectorType2 D2, + const VectorType2 tmp_v_1, const VectorType2 tmp_v_2); }; -template -KOKKOS_INLINE_FUNCTION int SerialStaticPivoting::invoke( - const MatrixType1 A, const MatrixType2 PDAD, const VectorType1 Y, - const VectorType2 PDY, const VectorType2 D2, const VectorType2 tmp_v_1, - const VectorType2 tmp_v_2) { +template +KOKKOS_INLINE_FUNCTION int SerialStaticPivoting::invoke(const MatrixType1 A, const MatrixType2 PDAD, + const VectorType1 Y, const VectorType2 PDY, + const VectorType2 D2, const VectorType2 tmp_v_1, + const VectorType2 tmp_v_2) { using value_type = typename MatrixType1::non_const_value_type; const size_t n = A.extent(0); @@ -139,15 +132,14 @@ KOKKOS_INLINE_FUNCTION int SerialStaticPivoting::invoke( } template -template -KOKKOS_INLINE_FUNCTION int TeamStaticPivoting::invoke( - const MemberType &member, const MatrixType1 A, const MatrixType2 PDAD, - const VectorType1 Y, const VectorType2 PDY, const VectorType2 D2, - const VectorType2 tmp_v_1, const VectorType2 tmp_v_2) { - using value_type = typename MatrixType1::non_const_value_type; - using reducer_value_type = - typename Kokkos::MaxLoc::value_type; +template +KOKKOS_INLINE_FUNCTION int TeamStaticPivoting::invoke(const MemberType &member, const MatrixType1 A, + const MatrixType2 PDAD, const VectorType1 Y, + const VectorType2 PDY, const VectorType2 D2, + const VectorType2 tmp_v_1, + const VectorType2 tmp_v_2) { + using value_type = typename MatrixType1::non_const_value_type; + using reducer_value_type = typename Kokkos::MaxLoc::value_type; // This implementation follows the strategy of SerialStaticPivoting but uses // an extra level of parallelism. @@ -222,15 +214,14 @@ KOKKOS_INLINE_FUNCTION int TeamStaticPivoting::invoke( } template -template -KOKKOS_INLINE_FUNCTION int TeamVectorStaticPivoting::invoke( - const MemberType &member, const MatrixType1 A, const MatrixType2 PDAD, - const VectorType1 Y, const VectorType2 PDY, const VectorType2 D2, - const VectorType2 tmp_v_1, const VectorType2 tmp_v_2) { - using value_type = typename MatrixType1::non_const_value_type; - using reducer_value_type = - typename Kokkos::MaxLoc::value_type; +template +KOKKOS_INLINE_FUNCTION int TeamVectorStaticPivoting::invoke(const MemberType &member, const MatrixType1 A, + const MatrixType2 PDAD, const VectorType1 Y, + const VectorType2 PDY, const VectorType2 D2, + const VectorType2 tmp_v_1, + const VectorType2 tmp_v_2) { + using value_type = typename MatrixType1::non_const_value_type; + using reducer_value_type = typename Kokkos::MaxLoc::value_type; // This implementation follows the strategy of SerialStaticPivoting but uses // two extra levels of parallelism. @@ -265,8 +256,7 @@ KOKKOS_INLINE_FUNCTION int TeamVectorStaticPivoting::invoke( }); Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &i) { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n), - [&](const int &j) { A(i, j) *= D2(j); }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n), [&](const int &j) { A(i, j) *= D2(j); }); }); Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &i) { @@ -283,8 +273,7 @@ KOKKOS_INLINE_FUNCTION int TeamVectorStaticPivoting::invoke( }, reducer_value); D1_i = 1. / value.val; - Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n), - [&](const int &j) { A(i, j) *= D1_i; }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n), [&](const int &j) { A(i, j) *= D1_i; }); Y(i) *= D1_i; }); @@ -318,18 +307,15 @@ KOKKOS_INLINE_FUNCTION int TeamVectorStaticPivoting::invoke( tmp_v_1(row_index) = Kokkos::ArithTraits::zero(); tmp_v_2(col_index) = Kokkos::ArithTraits::zero(); - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), [&](const int &j) { - PDAD(col_index, j) = A(row_index, j); - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), + [&](const int &j) { PDAD(col_index, j) = A(row_index, j); }); PDY(col_index) = Y(row_index); } return 0; } template -KOKKOS_INLINE_FUNCTION void SerialHadamard1D(const VectorType1 X, - const VectorType2 D, - const VectorType3 DX) { +KOKKOS_INLINE_FUNCTION void SerialHadamard1D(const VectorType1 X, const VectorType2 D, const VectorType3 DX) { const size_t n = X.extent(0); for (size_t i = 0; i < n; ++i) { @@ -337,28 +323,20 @@ KOKKOS_INLINE_FUNCTION void SerialHadamard1D(const VectorType1 X, } } -template -KOKKOS_INLINE_FUNCTION void TeamHadamard1D(const MemberType &member, - const VectorType1 X, - const VectorType2 D, +template +KOKKOS_INLINE_FUNCTION void TeamHadamard1D(const MemberType &member, const VectorType1 X, const VectorType2 D, const VectorType3 DX) { const size_t n = X.extent(0); - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), - [&](const size_t &i) { DX(i) = D(i) * X(i); }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const size_t &i) { DX(i) = D(i) * X(i); }); } -template -KOKKOS_INLINE_FUNCTION void TeamVectorHadamard1D(const MemberType &member, - const VectorType1 X, - const VectorType2 D, +template +KOKKOS_INLINE_FUNCTION void TeamVectorHadamard1D(const MemberType &member, const VectorType1 X, const VectorType2 D, const VectorType3 DX) { const size_t n = X.extent(0); - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), - [&](const size_t &i) { DX(i) = D(i) * X(i); }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), [&](const size_t &i) { DX(i) = D(i) * X(i); }); } /// @@ -367,60 +345,32 @@ KOKKOS_INLINE_FUNCTION void TeamVectorHadamard1D(const MemberType &member, template <> struct SerialGesv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MatrixType A, - const XVectorType X, - const YVectorType Y, + KOKKOS_INLINE_FUNCTION static int invoke(const MatrixType A, const XVectorType X, const YVectorType Y, const MatrixType tmp) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::gesv: MatrixType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::gesv: XVectorType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::gesv: YVectorType is not a Kokkos::View."); - static_assert(MatrixType::rank == 2, - "KokkosBatched::gesv: MatrixType must have rank 2."); - static_assert(XVectorType::rank == 1, - "KokkosBatched::gesv: XVectorType must have rank 1."); - static_assert(YVectorType::rank == 1, - "KokkosBatched::gesv: YVectorType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::gesv: MatrixType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::gesv: XVectorType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::gesv: YVectorType is not a Kokkos::View."); + static_assert(MatrixType::rank == 2, "KokkosBatched::gesv: MatrixType must have rank 2."); + static_assert(XVectorType::rank == 1, "KokkosBatched::gesv: XVectorType must have rank 1."); + static_assert(YVectorType::rank == 1, "KokkosBatched::gesv: YVectorType must have rank 1."); // Check compatibility of dimensions at run time. if (A.extent(0) != tmp.extent(0) || A.extent(1) + 4 != tmp.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::gesv: dimensions of A and tmp do not match: A: " - "%d x %d, tmp (note: its second dimension should be the second " - "dimension of A + 4): %d x %d\n", - (int)A.extent(0), (int)A.extent(1), (int)tmp.extent(0), - (int)tmp.extent(1)); -#else Kokkos::printf( "KokkosBatched::gesv: dimensions of A and tmp do not match: A: " "%d x %d, tmp (note: its second dimension should be the second " "dimension of A + 4): %d x %d\n", - (int)A.extent(0), (int)A.extent(1), (int)tmp.extent(0), - (int)tmp.extent(1)); -#endif + (int)A.extent(0), (int)A.extent(1), (int)tmp.extent(0), (int)tmp.extent(1)); return 1; } - if (A.extent(0) != X.extent(0) || A.extent(1) != X.extent(0) || - A.extent(0) != Y.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::gesv: dimensions of A and X and Y do not match: A: " - "%d x %d, X: %d, Y: %d\n", - (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), - (int)Y.extent(0)); -#else + if (A.extent(0) != X.extent(0) || A.extent(1) != X.extent(0) || A.extent(0) != Y.extent(0)) { Kokkos::printf( "KokkosBatched::gesv: dimensions of A and X and Y do not match: A: " "%d x %d, X: %d, Y: %d\n", - (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), - (int)Y.extent(0)); -#endif + (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), (int)Y.extent(0)); return 1; } #endif @@ -433,31 +383,22 @@ struct SerialGesv { auto tmp_v_1 = Kokkos::subview(tmp, Kokkos::ALL, n + 2); auto tmp_v_2 = Kokkos::subview(tmp, Kokkos::ALL, n + 3); - if (SerialStaticPivoting::invoke(A, PDAD, Y, PDY, D2, tmp_v_1, tmp_v_2) == - 1) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::gesv: the currently implemented static pivoting " - "failed.\n"); -#else + if (SerialStaticPivoting::invoke(A, PDAD, Y, PDY, D2, tmp_v_1, tmp_v_2) == 1) { Kokkos::printf( "KokkosBatched::gesv: the currently implemented static pivoting " "failed.\n"); -#endif return 1; } int r_val = SerialLU::invoke(PDAD); if (r_val == 0) - r_val = - SerialTrsm::invoke(1.0, PDAD, PDY); + r_val = SerialTrsm::invoke( + 1.0, PDAD, PDY); if (r_val == 0) - r_val = - SerialTrsm::invoke(1.0, PDAD, PDY); + r_val = SerialTrsm::invoke( + 1.0, PDAD, PDY); if (r_val == 0) SerialHadamard1D(PDY, D2, X); return r_val; @@ -467,41 +408,23 @@ struct SerialGesv { template <> struct SerialGesv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MatrixType A, - const XVectorType X, - const YVectorType Y, + KOKKOS_INLINE_FUNCTION static int invoke(const MatrixType A, const XVectorType X, const YVectorType Y, const MatrixType /*tmp*/) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::gesv: MatrixType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::gesv: XVectorType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::gesv: YVectorType is not a Kokkos::View."); - static_assert(MatrixType::rank == 2, - "KokkosBatched::gesv: MatrixType must have rank 2."); - static_assert(XVectorType::rank == 1, - "KokkosBatched::gesv: XVectorType must have rank 1."); - static_assert(YVectorType::rank == 1, - "KokkosBatched::gesv: YVectorType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::gesv: MatrixType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::gesv: XVectorType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::gesv: YVectorType is not a Kokkos::View."); + static_assert(MatrixType::rank == 2, "KokkosBatched::gesv: MatrixType must have rank 2."); + static_assert(XVectorType::rank == 1, "KokkosBatched::gesv: XVectorType must have rank 1."); + static_assert(YVectorType::rank == 1, "KokkosBatched::gesv: YVectorType must have rank 1."); // Check compatibility of dimensions at run time. - if (A.extent(0) != X.extent(0) || A.extent(1) != X.extent(0) || - A.extent(0) != Y.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::gesv: dimensions of A and X and Y do not match: A: " - "%d x %d, X: %d, Y: %d\n", - (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), - (int)Y.extent(0)); -#else + if (A.extent(0) != X.extent(0) || A.extent(1) != X.extent(0) || A.extent(0) != Y.extent(0)) { Kokkos::printf( "KokkosBatched::gesv: dimensions of A and X and Y do not match: A: " "%d x %d, X: %d, Y: %d\n", - (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), - (int)Y.extent(0)); -#endif + (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), (int)Y.extent(0)); return 1; } #endif @@ -511,14 +434,12 @@ struct SerialGesv { if (r_val == 0) r_val = SerialCopy::invoke(Y, X); if (r_val == 0) - r_val = - SerialTrsm::invoke(1.0, A, X); + r_val = SerialTrsm::invoke( + 1.0, A, X); if (r_val == 0) - r_val = - SerialTrsm::invoke(1.0, A, X); + r_val = SerialTrsm::invoke( + 1.0, A, X); return r_val; } @@ -531,42 +452,25 @@ struct SerialGesv { template struct TeamGesv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const MatrixType A, - const VectorType X, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const MatrixType A, const VectorType X, const VectorType Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::gesv: MatrixType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::gesv: VectorType is not a Kokkos::View."); - static_assert(MatrixType::rank == 2, - "KokkosBatched::gesv: MatrixType must have rank 2."); - static_assert(VectorType::rank == 1, - "KokkosBatched::gesv: VectorType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::gesv: MatrixType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::gesv: VectorType is not a Kokkos::View."); + static_assert(MatrixType::rank == 2, "KokkosBatched::gesv: MatrixType must have rank 2."); + static_assert(VectorType::rank == 1, "KokkosBatched::gesv: VectorType must have rank 1."); // Check compatibility of dimensions at run time. - if (A.extent(0) != X.extent(0) || A.extent(1) != X.extent(0) || - A.extent(0) != Y.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::gesv: dimensions of A and X and Y do not match: A: " - "%d x %d, X: %d, Y: %d\n", - (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), - (int)Y.extent(0)); -#else + if (A.extent(0) != X.extent(0) || A.extent(1) != X.extent(0) || A.extent(0) != Y.extent(0)) { Kokkos::printf( "KokkosBatched::gesv: dimensions of A and X and Y do not match: A: " "%d x %d, X: %d, Y: %d\n", - (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), - (int)Y.extent(0)); -#endif + (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), (int)Y.extent(0)); return 1; } #endif - using ScratchPadMatrixViewType = Kokkos::View< - typename MatrixType::non_const_value_type **, - typename MatrixType::execution_space::scratch_memory_space>; + using ScratchPadMatrixViewType = Kokkos::View; const int n = A.extent(0); @@ -577,37 +481,26 @@ struct TeamGesv { auto tmp_v_1 = Kokkos::subview(tmp, Kokkos::ALL, n + 2); auto tmp_v_2 = Kokkos::subview(tmp, Kokkos::ALL, n + 3); - if (TeamStaticPivoting::invoke(member, A, PDAD, Y, PDY, D2, - tmp_v_1, tmp_v_2) == 1) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::gesv: the currently implemented static pivoting " - "failed.\n"); -#else + if (TeamStaticPivoting::invoke(member, A, PDAD, Y, PDY, D2, tmp_v_1, tmp_v_2) == 1) { Kokkos::printf( "KokkosBatched::gesv: the currently implemented static pivoting " "failed.\n"); -#endif return 1; } member.team_barrier(); - int r_val = - TeamLU::invoke(member, PDAD); + int r_val = TeamLU::invoke(member, PDAD); member.team_barrier(); if (r_val == 0) { - r_val = TeamTrsm::invoke(member, 1.0, - PDAD, PDY); + r_val = TeamTrsm::invoke(member, 1.0, PDAD, PDY); member.team_barrier(); } if (r_val == 0) { - r_val = - TeamTrsm::invoke(member, 1.0, - PDAD, PDY); + r_val = TeamTrsm::invoke(member, 1.0, PDAD, PDY); member.team_barrier(); } @@ -623,36 +516,20 @@ struct TeamGesv { template struct TeamGesv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const MatrixType A, - const VectorType X, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const MatrixType A, const VectorType X, const VectorType Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::gesv: MatrixType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::gesv: VectorType is not a Kokkos::View."); - static_assert(MatrixType::rank == 2, - "KokkosBatched::gesv: MatrixType must have rank 2."); - static_assert(VectorType::rank == 1, - "KokkosBatched::gesv: VectorType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::gesv: MatrixType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::gesv: VectorType is not a Kokkos::View."); + static_assert(MatrixType::rank == 2, "KokkosBatched::gesv: MatrixType must have rank 2."); + static_assert(VectorType::rank == 1, "KokkosBatched::gesv: VectorType must have rank 1."); // Check compatibility of dimensions at run time. - if (A.extent(0) != X.extent(0) || A.extent(1) != X.extent(0) || - A.extent(0) != Y.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::gesv: dimensions of A and X and Y do not match: A: " - "%d x %d, X: %d, Y: %d\n", - (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), - (int)Y.extent(0)); -#else + if (A.extent(0) != X.extent(0) || A.extent(1) != X.extent(0) || A.extent(0) != Y.extent(0)) { Kokkos::printf( "KokkosBatched::gesv: dimensions of A and X and Y do not match: A: " "%d x %d, X: %d, Y: %d\n", - (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), - (int)Y.extent(0)); -#endif + (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), (int)Y.extent(0)); return 1; } #endif @@ -666,15 +543,14 @@ struct TeamGesv { } if (r_val == 0) { - TeamTrsm::invoke(member, 1.0, A, X); + TeamTrsm::invoke( + member, 1.0, A, X); member.team_barrier(); } if (r_val == 0) { - TeamTrsm::invoke(member, 1.0, A, - X); + TeamTrsm::invoke( + member, 1.0, A, X); member.team_barrier(); } @@ -689,42 +565,25 @@ struct TeamGesv { template struct TeamVectorGesv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const MatrixType A, - const VectorType X, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const MatrixType A, const VectorType X, const VectorType Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::gesv: MatrixType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::gesv: VectorType is not a Kokkos::View."); - static_assert(MatrixType::rank == 2, - "KokkosBatched::gesv: MatrixType must have rank 2."); - static_assert(VectorType::rank == 1, - "KokkosBatched::gesv: VectorType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::gesv: MatrixType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::gesv: VectorType is not a Kokkos::View."); + static_assert(MatrixType::rank == 2, "KokkosBatched::gesv: MatrixType must have rank 2."); + static_assert(VectorType::rank == 1, "KokkosBatched::gesv: VectorType must have rank 1."); // Check compatibility of dimensions at run time. - if (A.extent(0) != X.extent(0) || A.extent(1) != X.extent(0) || - A.extent(0) != Y.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::gesv: dimensions of A and X and Y do not match: A: " - "%d x %d, X: %d, Y: %d\n", - (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), - (int)Y.extent(0)); -#else + if (A.extent(0) != X.extent(0) || A.extent(1) != X.extent(0) || A.extent(0) != Y.extent(0)) { Kokkos::printf( "KokkosBatched::gesv: dimensions of A and X and Y do not match: A: " "%d x %d, X: %d, Y: %d\n", - (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), - (int)Y.extent(0)); -#endif + (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), (int)Y.extent(0)); return 1; } #endif - using ScratchPadMatrixViewType = Kokkos::View< - typename MatrixType::non_const_value_type **, - typename MatrixType::execution_space::scratch_memory_space>; + using ScratchPadMatrixViewType = Kokkos::View; const int n = A.extent(0); @@ -735,38 +594,27 @@ struct TeamVectorGesv { auto tmp_v_1 = Kokkos::subview(tmp, Kokkos::ALL, n + 2); auto tmp_v_2 = Kokkos::subview(tmp, Kokkos::ALL, n + 3); - if (TeamVectorStaticPivoting::invoke( - member, A, PDAD, Y, PDY, D2, tmp_v_1, tmp_v_2) == 1) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::gesv: the currently implemented static pivoting " - "failed.\n"); -#else + if (TeamVectorStaticPivoting::invoke(member, A, PDAD, Y, PDY, D2, tmp_v_1, tmp_v_2) == 1) { Kokkos::printf( "KokkosBatched::gesv: the currently implemented static pivoting " "failed.\n"); -#endif return 1; } member.team_barrier(); - int r_val = - TeamLU::invoke(member, PDAD); + int r_val = TeamLU::invoke(member, PDAD); member.team_barrier(); if (r_val == 0) { - TeamVectorTrsm::invoke(member, 1.0, - PDAD, PDY); + TeamVectorTrsm::invoke(member, 1.0, PDAD, PDY); member.team_barrier(); } if (r_val == 0) { - TeamVectorTrsm::invoke(member, - 1.0, PDAD, - PDY); + TeamVectorTrsm::invoke(member, 1.0, PDAD, PDY); member.team_barrier(); } @@ -782,36 +630,20 @@ struct TeamVectorGesv { template struct TeamVectorGesv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const MatrixType A, - const VectorType X, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const MatrixType A, const VectorType X, const VectorType Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::gesv: MatrixType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::gesv: VectorType is not a Kokkos::View."); - static_assert(MatrixType::rank == 2, - "KokkosBatched::gesv: MatrixType must have rank 2."); - static_assert(VectorType::rank == 1, - "KokkosBatched::gesv: VectorType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::gesv: MatrixType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::gesv: VectorType is not a Kokkos::View."); + static_assert(MatrixType::rank == 2, "KokkosBatched::gesv: MatrixType must have rank 2."); + static_assert(VectorType::rank == 1, "KokkosBatched::gesv: VectorType must have rank 1."); // Check compatibility of dimensions at run time. - if (A.extent(0) != X.extent(0) || A.extent(1) != X.extent(0) || - A.extent(0) != Y.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::gesv: dimensions of A and X and Y do not match: A: " - "%d x %d, X: %d, Y: %d\n", - (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), - (int)Y.extent(0)); -#else + if (A.extent(0) != X.extent(0) || A.extent(1) != X.extent(0) || A.extent(0) != Y.extent(0)) { Kokkos::printf( "KokkosBatched::gesv: dimensions of A and X and Y do not match: A: " "%d x %d, X: %d, Y: %d\n", - (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), - (int)Y.extent(0)); -#endif + (int)A.extent(0), (int)A.extent(1), (int)X.extent(0), (int)Y.extent(0)); return 1; } #endif @@ -825,16 +657,14 @@ struct TeamVectorGesv { } if (r_val == 0) { - TeamVectorTrsm::invoke(member, 1.0, - A, X); + TeamVectorTrsm::invoke(member, 1.0, A, X); member.team_barrier(); } if (r_val == 0) { - TeamVectorTrsm::invoke(member, - 1.0, A, X); + TeamVectorTrsm::invoke(member, 1.0, A, X); member.team_barrier(); } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Givens_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Givens_Serial_Internal.hpp index 4d80c6a25007..963862661b16 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Givens_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Givens_Serial_Internal.hpp @@ -30,10 +30,9 @@ namespace KokkosBatched { /// struct SerialGivensInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const ValueType chi1, const ValueType chi2, - /* */ Kokkos::pair* G, - /* */ ValueType* chi1_new) { + KOKKOS_INLINE_FUNCTION static int invoke(const ValueType chi1, const ValueType chi2, + /* */ Kokkos::pair* G, + /* */ ValueType* chi1_new) { typedef ValueType value_type; const value_type zero(0), one(1); /// compute G = [ gamma -sigma; @@ -58,9 +57,7 @@ struct SerialGivensInternal { cs = chi1 / r; sn = chi2 / r; - if (Kokkos::ArithTraits::abs(chi1) > - Kokkos::ArithTraits::abs(chi2) && - cs < zero) { + if (Kokkos::ArithTraits::abs(chi1) > Kokkos::ArithTraits::abs(chi2) && cs < zero) { cs = -cs; sn = -sn; r = -r; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HadamardProduct_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HadamardProduct_Impl.hpp index 0570bc4ccc54..658acd6b605f 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HadamardProduct_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HadamardProduct_Impl.hpp @@ -27,16 +27,12 @@ namespace KokkosBatched { /// ==================== struct SerialHadamardProductInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, - const ValueType* KOKKOS_RESTRICT X, - const int xs0, const int xs1, - const ValueType* KOKKOS_RESTRICT Y, - const int ys0, const int ys1, - /* */ ValueType* KOKKOS_RESTRICT V, - const int vs0, const int vs1) { + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const ValueType* KOKKOS_RESTRICT X, const int xs0, + const int xs1, const ValueType* KOKKOS_RESTRICT Y, const int ys0, + const int ys1, + /* */ ValueType* KOKKOS_RESTRICT V, const int vs0, const int vs1) { for (int i = 0; i < m; ++i) - for (int j = 0; j < n; ++j) - V[i * vs0 + j * vs1] = X[i * xs0 + j * xs1] * Y[i * ys0 + j * ys1]; + for (int j = 0; j < n; ++j) V[i * vs0 + j * vs1] = X[i * xs0 + j * xs1] * Y[i * ys0 + j * ys1]; return 0; } @@ -47,17 +43,15 @@ struct SerialHadamardProductInternal { /// ==================== struct TeamHadamardProductInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const int m, const int n, - const ValueType* KOKKOS_RESTRICT X, const int xs0, const int xs1, - const ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1, - /* */ ValueType* KOKKOS_RESTRICT V, const int vs0, const int vs1) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, m * n), [&](const int& iTemp) { - int i, j; - getIndices(iTemp, n, m, j, i); - V[i * vs0 + j * vs1] = X[i * xs0 + j * xs1] * Y[i * ys0 + j * ys1]; - }); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const int m, const int n, + const ValueType* KOKKOS_RESTRICT X, const int xs0, const int xs1, + const ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1, + /* */ ValueType* KOKKOS_RESTRICT V, const int vs0, const int vs1) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, m * n), [&](const int& iTemp) { + int i, j; + getIndices(iTemp, n, m, j, i); + V[i * vs0 + j * vs1] = X[i * xs0 + j * xs1] * Y[i * ys0 + j * ys1]; + }); // member.team_barrier(); return 0; } @@ -68,17 +62,15 @@ struct TeamHadamardProductInternal { /// ======================== struct TeamVectorHadamardProductInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const int m, const int n, - const ValueType* KOKKOS_RESTRICT X, const int xs0, const int xs1, - const ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1, - /* */ ValueType* KOKKOS_RESTRICT V, const int vs0, const int vs1) { - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, 0, m * n), [&](const int& iTemp) { - int i, j; - getIndices(iTemp, n, m, j, i); - V[i * vs0 + j * vs1] = X[i * xs0 + j * xs1] * Y[i * ys0 + j * ys1]; - }); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const int m, const int n, + const ValueType* KOKKOS_RESTRICT X, const int xs0, const int xs1, + const ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1, + /* */ ValueType* KOKKOS_RESTRICT V, const int vs0, const int vs1) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, m * n), [&](const int& iTemp) { + int i, j; + getIndices(iTemp, n, m, j, i); + V[i * vs0 + j * vs1] = X[i * xs0 + j * xs1] * Y[i * ys0 + j * ys1]; + }); // member.team_barrier(); return 0; } @@ -88,65 +80,37 @@ struct TeamVectorHadamardProductInternal { /// Serial Impl /// =========== template -KOKKOS_INLINE_FUNCTION int SerialHadamardProduct::invoke(const XViewType& X, - const YViewType& Y, - const VViewType& V) { +KOKKOS_INLINE_FUNCTION int SerialHadamardProduct::invoke(const XViewType& X, const YViewType& Y, const VViewType& V) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert( - Kokkos::is_view::value, - "KokkosBatched::HadamardProduct: XViewType is not a Kokkos::View."); - static_assert( - Kokkos::is_view::value, - "KokkosBatched::HadamardProduct: YViewType is not a Kokkos::View."); - static_assert( - Kokkos::is_view::value, - "KokkosBatched::HadamardProduct: VViewType is not a Kokkos::View."); - static_assert(XViewType::rank == 2, - "KokkosBatched::HadamardProduct: XViewType must have rank 2."); - static_assert(YViewType::rank == 2, - "KokkosBatched::HadamardProduct: YViewType must have rank 2."); - static_assert(VViewType::rank == 2, - "KokkosBatched::HadamardProduct: VViewType must have rank 2."); + static_assert(Kokkos::is_view::value, "KokkosBatched::HadamardProduct: XViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::HadamardProduct: YViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::HadamardProduct: VViewType is not a Kokkos::View."); + static_assert(XViewType::rank == 2, "KokkosBatched::HadamardProduct: XViewType must have rank 2."); + static_assert(YViewType::rank == 2, "KokkosBatched::HadamardProduct: YViewType must have rank 2."); + static_assert(VViewType::rank == 2, "KokkosBatched::HadamardProduct: VViewType must have rank 2."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::HadamardProduct: Dimensions of X and Y do not match: " - "X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::HadamardProduct: Dimensions of X and Y do not match: " "X: %d x %d, " "Y: %d x %d\n", (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#endif return 1; } if (X.extent(0) != V.extent(0) || X.extent(1) != V.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::HadamardProduct: Dimensions of X and V do not match: " - "X: %d x %d, " - "V: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)V.extent(0), (int)V.extent(1)); -#else Kokkos::printf( "KokkosBatched::HadamardProduct: Dimensions of X and V do not match: " "X: %d x %d, " "V: %d x %d\n", (int)X.extent(0), (int)X.extent(1), (int)V.extent(0), (int)V.extent(1)); -#endif return 1; } #endif - return SerialHadamardProductInternal::template invoke< - typename XViewType::non_const_value_type>( - X.extent(0), X.extent(1), X.data(), X.stride_0(), X.stride_1(), Y.data(), - Y.stride_0(), Y.stride_1(), V.data(), V.stride_0(), V.stride_1()); + return SerialHadamardProductInternal::template invoke( + X.extent(0), X.extent(1), X.data(), X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), Y.stride_1(), V.data(), + V.stride_0(), V.stride_1()); } /// @@ -155,67 +119,39 @@ KOKKOS_INLINE_FUNCTION int SerialHadamardProduct::invoke(const XViewType& X, template template -KOKKOS_INLINE_FUNCTION int TeamHadamardProduct::invoke( - const MemberType& member, const XViewType& X, const YViewType& Y, - const VViewType& V) { +KOKKOS_INLINE_FUNCTION int TeamHadamardProduct::invoke(const MemberType& member, const XViewType& X, + const YViewType& Y, const VViewType& V) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert( - Kokkos::is_view::value, - "KokkosBatched::HadamardProduct: XViewType is not a Kokkos::View."); - static_assert( - Kokkos::is_view::value, - "KokkosBatched::HadamardProduct: YViewType is not a Kokkos::View."); - static_assert( - Kokkos::is_view::value, - "KokkosBatched::HadamardProduct: VViewType is not a Kokkos::View."); - static_assert(XViewType::rank == 2, - "KokkosBatched::HadamardProduct: XViewType must have rank 2."); - static_assert(YViewType::rank == 2, - "KokkosBatched::HadamardProduct: YViewType must have rank 2."); - static_assert(VViewType::rank == 2, - "KokkosBatched::HadamardProduct: VViewType must have rank 2."); + static_assert(Kokkos::is_view::value, "KokkosBatched::HadamardProduct: XViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::HadamardProduct: YViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::HadamardProduct: VViewType is not a Kokkos::View."); + static_assert(XViewType::rank == 2, "KokkosBatched::HadamardProduct: XViewType must have rank 2."); + static_assert(YViewType::rank == 2, "KokkosBatched::HadamardProduct: YViewType must have rank 2."); + static_assert(VViewType::rank == 2, "KokkosBatched::HadamardProduct: VViewType must have rank 2."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::HadamardProduct: Dimensions of X and Y do not match: " - "X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::HadamardProduct: Dimensions of X and Y do not match: " "X: %d x %d, " "Y: %d x %d\n", (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#endif return 1; } if (X.extent(0) != V.extent(0) || X.extent(1) != V.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::HadamardProduct: Dimensions of X and V do not match: " - "X: %d x %d, " - "V: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)V.extent(0), (int)V.extent(1)); -#else Kokkos::printf( "KokkosBatched::HadamardProduct: Dimensions of X and V do not match: " "X: %d x %d, " "V: %d x %d\n", (int)X.extent(0), (int)X.extent(1), (int)V.extent(0), (int)V.extent(1)); -#endif return 1; } #endif - return TeamHadamardProductInternal::template invoke< - MemberType, typename XViewType::non_const_value_type, - typename XViewType::array_layout>(member, X.extent(0), X.extent(1), - X.data(), X.stride_0(), X.stride_1(), - Y.data(), Y.stride_0(), Y.stride_1(), - V.data(), V.stride_0(), V.stride_1()); + return TeamHadamardProductInternal::template invoke( + member, X.extent(0), X.extent(1), X.data(), X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), Y.stride_1(), + V.data(), V.stride_0(), V.stride_1()); } /// @@ -224,67 +160,39 @@ KOKKOS_INLINE_FUNCTION int TeamHadamardProduct::invoke( template template -KOKKOS_INLINE_FUNCTION int TeamVectorHadamardProduct::invoke( - const MemberType& member, const XViewType& X, const YViewType& Y, - const VViewType& V) { +KOKKOS_INLINE_FUNCTION int TeamVectorHadamardProduct::invoke(const MemberType& member, const XViewType& X, + const YViewType& Y, const VViewType& V) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert( - Kokkos::is_view::value, - "KokkosBatched::HadamardProduct: XViewType is not a Kokkos::View."); - static_assert( - Kokkos::is_view::value, - "KokkosBatched::HadamardProduct: YViewType is not a Kokkos::View."); - static_assert( - Kokkos::is_view::value, - "KokkosBatched::HadamardProduct: VViewType is not a Kokkos::View."); - static_assert(XViewType::rank == 2, - "KokkosBatched::HadamardProduct: XViewType must have rank 2."); - static_assert(YViewType::rank == 2, - "KokkosBatched::HadamardProduct: YViewType must have rank 2."); - static_assert(VViewType::rank == 2, - "KokkosBatched::HadamardProduct: VViewType must have rank 2."); + static_assert(Kokkos::is_view::value, "KokkosBatched::HadamardProduct: XViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::HadamardProduct: YViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::HadamardProduct: VViewType is not a Kokkos::View."); + static_assert(XViewType::rank == 2, "KokkosBatched::HadamardProduct: XViewType must have rank 2."); + static_assert(YViewType::rank == 2, "KokkosBatched::HadamardProduct: YViewType must have rank 2."); + static_assert(VViewType::rank == 2, "KokkosBatched::HadamardProduct: VViewType must have rank 2."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::HadamardProduct: Dimensions of X and Y do not match: " - "X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::HadamardProduct: Dimensions of X and Y do not match: " "X: %d x %d, " "Y: %d x %d\n", (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#endif return 1; } if (X.extent(0) != V.extent(0) || X.extent(1) != V.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::HadamardProduct: Dimensions of X and V do not match: " - "X: %d x %d, " - "V: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)V.extent(0), (int)V.extent(1)); -#else Kokkos::printf( "KokkosBatched::HadamardProduct: Dimensions of X and V do not match: " "X: %d x %d, " "V: %d x %d\n", (int)X.extent(0), (int)X.extent(1), (int)V.extent(0), (int)V.extent(1)); -#endif return 1; } #endif - return TeamVectorHadamardProductInternal::invoke< - MemberType, typename XViewType::non_const_value_type, - typename XViewType::array_layout>(member, X.extent(0), X.extent(1), - X.data(), X.stride_0(), X.stride_1(), - Y.data(), Y.stride_0(), Y.stride_1(), - V.data(), V.stride_0(), V.stride_1()); + return TeamVectorHadamardProductInternal::invoke( + member, X.extent(0), X.extent(1), X.data(), X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), Y.stride_1(), + V.data(), V.stride_0(), V.stride_1()); } } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HessenbergFormQ_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HessenbergFormQ_Serial_Internal.hpp index 023257c8ed5d..8db5d40a9801 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HessenbergFormQ_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HessenbergFormQ_Serial_Internal.hpp @@ -34,13 +34,10 @@ namespace KokkosBatched { struct SerialHessenbergFormQInternal { template KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int k, - /* */ ValueType* A, const int as0, - const int as1, + /* */ ValueType* A, const int as0, const int as1, /* */ ValueType* t, const int ts, - /* */ ValueType* Q, const int qs0, - const int qs1, - /* */ ValueType* w, - const bool is_Q_zero = false) { + /* */ ValueType* Q, const int qs0, const int qs1, + /* */ ValueType* w, const bool is_Q_zero = false) { typedef ValueType value_type; /// Given a matrix A that includes Hessenberg factorization @@ -52,14 +49,12 @@ struct SerialHessenbergFormQInternal { /// B is m x m // set identity if (is_Q_zero) - KokkosBlas::Impl::SerialSetInternal::invoke(m, value_type(1), Q, - qs0 + qs1); + KokkosBlas::Impl::SerialSetInternal::invoke(m, value_type(1), Q, qs0 + qs1); else SerialSetIdentityInternal::invoke(m, Q, qs0, qs1); - return SerialApplyQ_LeftNoTransForwardInternal ::invoke( - m - 1, m - 1, k - 1, A + as0, as0, as1, t, ts, Q + qs0 + qs1, qs1, qs0, - w); + return SerialApplyQ_LeftNoTransForwardInternal ::invoke(m - 1, m - 1, k - 1, A + as0, as0, as1, t, ts, + Q + qs0 + qs1, qs1, qs0, w); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HessenbergQR_WithShift_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HessenbergQR_WithShift_Serial_Internal.hpp index 3d2b75e64dc3..3815a9e18e8f 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HessenbergQR_WithShift_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HessenbergQR_WithShift_Serial_Internal.hpp @@ -32,10 +32,9 @@ namespace KokkosBatched { /// struct SerialHessenbergQR_WithShiftInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const int mbeg, const int mend, const int morg, - /* */ ValueType *HH, const int hs0, const int hs1, const ValueType shift, - /* */ Kokkos::pair *GG, const bool request_schur) { + KOKKOS_INLINE_FUNCTION static int invoke(const int mbeg, const int mend, const int morg, + /* */ ValueType *HH, const int hs0, const int hs1, const ValueType shift, + /* */ Kokkos::pair *GG, const bool request_schur) { typedef ValueType value_type; // typedef Kokkos::ArithTraits ats; @@ -79,13 +78,11 @@ struct SerialHessenbergQR_WithShiftInternal { // apply G' from left G.second = -G.second; // transpose G const int nn = m; - SerialApplyLeftGivensInternal::invoke(G, nn + (morg - mend), h11, hs1, - h21, hs1); + SerialApplyLeftGivensInternal::invoke(G, nn + (morg - mend), h11, hs1, h21, hs1); // apply (G')' from right const int mm = m < 3 ? m : 3; - SerialApplyRightGivensInternal::invoke(G, mm + mbeg, h11 - mbeg_mult_hs0, - hs0, h12 - mbeg_mult_hs0, hs0); + SerialApplyRightGivensInternal::invoke(G, mm + mbeg, h11 - mbeg_mult_hs0, hs0, h12 - mbeg_mult_hs0, hs0); } /// 1. chase the bulge @@ -112,13 +109,11 @@ struct SerialHessenbergQR_WithShiftInternal { G.second = -G.second; // transpose G const int nn = m - m_htl; - SerialApplyLeftGivensInternal::invoke( - G, nn + (morg - mend), H_part3x3.A11, hs1, H_part3x3.A21, hs1); + SerialApplyLeftGivensInternal::invoke(G, nn + (morg - mend), H_part3x3.A11, hs1, H_part3x3.A21, hs1); const int mtmp = m_htl + 3, mm = mtmp < m ? mtmp : m; - SerialApplyRightGivensInternal::invoke( - G, mm + mbeg, H_part3x3.A01 - mbeg_mult_hs0, hs0, - H_part3x3.A02 - mbeg_mult_hs0, hs0); + SerialApplyRightGivensInternal::invoke(G, mm + mbeg, H_part3x3.A01 - mbeg_mult_hs0, hs0, + H_part3x3.A02 - mbeg_mult_hs0, hs0); /// ----------------------------------------------------- H_part2x2.mergeToATL(H_part3x3); } @@ -126,13 +121,10 @@ struct SerialHessenbergQR_WithShiftInternal { } template - KOKKOS_FORCEINLINE_FUNCTION static int invoke(const int mbeg, const int mend, - const int morg, - /* */ ValueType *HH, - const int hs0, const int hs1, + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const int mbeg, const int mend, const int morg, + /* */ ValueType *HH, const int hs0, const int hs1, const ValueType shift) { - return invoke(mbeg, mend, morg, HH, hs0, hs1, shift, - (Kokkos::pair *)NULL, false); + return invoke(mbeg, mend, morg, HH, hs0, hs1, shift, (Kokkos::pair *)NULL, false); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Hessenberg_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Hessenberg_Serial_Internal.hpp index f12115e4de30..44c5b44373c7 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Hessenberg_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Hessenberg_Serial_Internal.hpp @@ -34,8 +34,7 @@ struct SerialHessenbergInternal { template KOKKOS_INLINE_FUNCTION static int invoke(const int m, // m = NumRows(A) const int n, // n = NumCols(A) - /* */ ValueType *A, const int as0, - const int as1, + /* */ ValueType *A, const int as0, const int as1, /* */ ValueType *t, const int ts, /* */ ValueType *w) { typedef ValueType value_type; @@ -76,25 +75,22 @@ struct SerialHessenbergInternal { // perform householder transformation const int m_A22_b = m_A22 - 1; - SerialLeftHouseholderInternal::invoke(m_A22_b, A21_part2x1.AT, - A21_part2x1.AB, as0, tau); + SerialLeftHouseholderInternal::invoke(m_A22_b, A21_part2x1.AT, A21_part2x1.AB, as0, tau); // partition A22 into 2x1 A22_part2x1.partWithAT(A_part3x3.A22, m_A22, 1); // left apply householder to partitioned A22 - SerialApplyLeftHouseholderInternal::invoke( - m_A22_b, n_A22, tau, A21_part2x1.AB, as0, A22_part2x1.AT, as1, - A22_part2x1.AB, as0, as1, w); + SerialApplyLeftHouseholderInternal::invoke(m_A22_b, n_A22, tau, A21_part2x1.AB, as0, A22_part2x1.AT, as1, + A22_part2x1.AB, as0, as1, w); // partition A*2 column into 1x2 A2_part1x2.partWithAL(A_part3x3.A02, n_A22, 1); // right apply householder to A*2 colums const int n_A22_r = n_A22 - 1; - SerialApplyRightHouseholderInternal::invoke( - m, n_A22_r, tau, A21_part2x1.AB, as0, A2_part1x2.AL, as0, - A2_part1x2.AR, as0, as1, w); + SerialApplyRightHouseholderInternal::invoke(m, n_A22_r, tau, A21_part2x1.AB, as0, A2_part1x2.AL, as0, + A2_part1x2.AR, as0, as1, w); } /// ----------------------------------------------------- A_part2x2.mergeToATL(A_part3x3); diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Armpl_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Armpl_Impl.hpp index 971fb36081e0..7e814646a206 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Armpl_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Armpl_Impl.hpp @@ -67,9 +67,8 @@ namespace Impl { /// ScalarType, AViewType, BViewType, CViewType> /// (handle, alpha, A, B, beta, C).invoke(); // clang-format on -template +template class BatchedArmplGemm { private: HandleType *const __handle; @@ -107,26 +106,21 @@ class BatchedArmplGemm { for (int ib = 0; ib < __nbatch; ++ib) { for (int i = 0; i < __ninter; ++i) { auto svA = - subview_wrapper(__A, ib * __ninter + i, Kokkos::ALL(), - Kokkos::ALL(), __batch_layout_tag, __no_trans_tag); + subview_wrapper(__A, ib * __ninter + i, Kokkos::ALL(), Kokkos::ALL(), __batch_layout_tag, __no_trans_tag); auto svB = - subview_wrapper(__B, ib * __ninter + i, Kokkos::ALL(), - Kokkos::ALL(), __batch_layout_tag, __no_trans_tag); + subview_wrapper(__B, ib * __ninter + i, Kokkos::ALL(), Kokkos::ALL(), __batch_layout_tag, __no_trans_tag); auto svC = - subview_wrapper(__C, ib * __ninter + i, Kokkos::ALL(), - Kokkos::ALL(), __batch_layout_tag, __no_trans_tag); + subview_wrapper(__C, ib * __ninter + i, Kokkos::ALL(), Kokkos::ALL(), __batch_layout_tag, __no_trans_tag); - auto info = armpl_dge_interleave( - __ninter, i, __Am, __An, svA.data(), svA.stride(0), svA.stride(1), - &__Adp[__Abstrd * ib], __Aistrd, __Ajstrd); + auto info = armpl_dge_interleave(__ninter, i, __Am, __An, svA.data(), svA.stride(0), svA.stride(1), + &__Adp[__Abstrd * ib], __Aistrd, __Ajstrd); if (info != ARMPL_STATUS_SUCCESS) { std::ostringstream os; os << "armpl_dge_interleave(A) returned:" << info << std::endl; KokkosKernels::Impl::throw_runtime_exception(os.str()); } - info = armpl_dge_interleave(__ninter, i, __Bm, __Bn, svB.data(), - svB.stride(0), svB.stride(1), + info = armpl_dge_interleave(__ninter, i, __Bm, __Bn, svB.data(), svB.stride(0), svB.stride(1), &__Bdp[__Bbstrd * ib], __Bistrd, __Bjstrd); if (info != ARMPL_STATUS_SUCCESS) { std::ostringstream os; @@ -134,8 +128,7 @@ class BatchedArmplGemm { KokkosKernels::Impl::throw_runtime_exception(os.str()); } - info = armpl_dge_interleave(__ninter, i, __Cm, __Cn, svC.data(), - svC.stride(0), svC.stride(1), + info = armpl_dge_interleave(__ninter, i, __Cm, __Cn, svC.data(), svC.stride(0), svC.stride(1), &__Cdp[__Cbstrd * ib], __Cistrd, __Cjstrd); if (info != ARMPL_STATUS_SUCCESS) { std::ostringstream os; @@ -152,12 +145,10 @@ class BatchedArmplGemm { for (int ib = 0; ib < __nbatch; ++ib) { for (int i = 0; i < __ninter; ++i) { auto svC = - subview_wrapper(__C, ib * __ninter + i, Kokkos::ALL(), - Kokkos::ALL(), __batch_layout_tag, __no_trans_tag); + subview_wrapper(__C, ib * __ninter + i, Kokkos::ALL(), Kokkos::ALL(), __batch_layout_tag, __no_trans_tag); - auto info = armpl_dge_deinterleave( - __ninter, i, __Cm, __Cn, svC.data(), svC.stride(0), svC.stride(1), - &__Cdp[__Cbstrd * ib], __Cistrd, __Cjstrd); + auto info = armpl_dge_deinterleave(__ninter, i, __Cm, __Cn, svC.data(), svC.stride(0), svC.stride(1), + &__Cdp[__Cbstrd * ib], __Cistrd, __Cjstrd); if (info != ARMPL_STATUS_SUCCESS) { std::ostringstream os; os << "armpl_dge_deinterleave returned:" << info << std::endl; @@ -170,11 +161,10 @@ class BatchedArmplGemm { template std::enable_if_t::value, void> __run(T &) { - auto info = armpl_dgemm_interleave_batch( - __ninter, __nbatch, __transa, __transb, __Cm, __Cn, - std::is_same::value ? __An : __Am, - __alpha, __Adp, __Abstrd, __Aistrd, __Ajstrd, __Bdp, __Bbstrd, __Bistrd, - __Bjstrd, __beta, __Cdp, __Cbstrd, __Cistrd, __Cjstrd); + auto info = armpl_dgemm_interleave_batch(__ninter, __nbatch, __transa, __transb, __Cm, __Cn, + std::is_same::value ? __An : __Am, __alpha, + __Adp, __Abstrd, __Aistrd, __Ajstrd, __Bdp, __Bbstrd, __Bistrd, __Bjstrd, + __beta, __Cdp, __Cbstrd, __Cistrd, __Cjstrd); if (info != ARMPL_STATUS_SUCCESS) { std::ostringstream os; os << "armpl_dgemm_interleave_batch returned :" << info << std::endl; @@ -193,8 +183,7 @@ class BatchedArmplGemm { std::enable_if_t::value, void> __run(T &) {} public: - BatchedArmplGemm(HandleType *const handle, ScalarType alpha, AViewType A, - BViewType B, ScalarType beta, CViewType C) + BatchedArmplGemm(HandleType *const handle, ScalarType alpha, AViewType A, BViewType B, ScalarType beta, CViewType C) : __handle(handle), __A(A), __B(B), __C(C), __alpha(alpha), __beta(beta) { __ninter = __handle->get_tpl_params()[0]; @@ -234,15 +223,11 @@ class BatchedArmplGemm { int invoke() { if (__handle->enableDebug) { - std::cerr << "__nbatch:" << std::to_string(__nbatch) - << ", __ninter:" << std::to_string(__ninter) - << ", __Am:" << std::to_string(__Am) - << ", __An:" << std::to_string(__An) << std::endl; + std::cerr << "__nbatch:" << std::to_string(__nbatch) << ", __ninter:" << std::to_string(__ninter) + << ", __Am:" << std::to_string(__Am) << ", __An:" << std::to_string(__An) << std::endl; } - if (!std::is_same::value || - !std::is_same::value || - !std::is_same::value || + if (!std::is_same::value || !std::is_same::value || !std::is_same::value || !std::is_same::value) { std::ostringstream os; os << "KokkosBatched::Impl::BatchedArmplGemm only supports 'double' " @@ -254,8 +239,7 @@ class BatchedArmplGemm { if (__nbatch != 0) { if (__ninter == 0 || __nbatch % __ninter) { std::ostringstream os; - os << "batch size must be evenly divisible by ninter. __nbatch: " - << std::to_string(__nbatch) + os << "batch size must be evenly divisible by ninter. __nbatch: " << std::to_string(__nbatch) << ", __ninter: " << std::to_string(__ninter) << std::endl; KokkosKernels::Impl::throw_runtime_exception(os.str()); } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_DblBuf_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_DblBuf_Impl.hpp index 50d662b281f8..6888de725d79 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_DblBuf_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_DblBuf_Impl.hpp @@ -126,15 +126,13 @@ using TagFromLayout = typename TagFromLayoutHelper::tag; /// ScalarType, AViewType, BViewType, CViewType /// ArgBoundsCheck, tile_m, tile_n, tile_k>(alpha, A, B, beta, C).invoke(); // clang-format on -template +template class BatchedDblBufGemm { private: using AlphaMulTag = - std::conditional_t::value, - AlphaTag::No, AlphaTag::Yes>; + std::conditional_t::value, AlphaTag::No, AlphaTag::Yes>; HandleType *const __handle; AViewType __A; @@ -153,20 +151,12 @@ class BatchedDblBufGemm { using layout_type = typename CViewType::array_layout; using device_type = typename CViewType::device_type; using execution_space_type = typename device_type::execution_space; - using scratch_space_type = - typename execution_space_type::scratch_memory_space; - using view_type_2d_scratch = - Kokkos::View; + using scratch_space_type = typename execution_space_type::scratch_memory_space; + using view_type_2d_scratch = Kokkos::View; public: - BatchedDblBufGemm(HandleType *const handle, ScalarType alpha, AViewType A, - BViewType B, ScalarType beta, CViewType C) - : __handle(handle), - __A(A), - __B(B), - __C(C), - __alpha(alpha), - __beta(beta) {} + BatchedDblBufGemm(HandleType *const handle, ScalarType alpha, AViewType A, BViewType B, ScalarType beta, CViewType C) + : __handle(handle), __A(A), __B(B), __C(C), __alpha(alpha), __beta(beta) {} int invoke() { __run(); @@ -175,8 +165,7 @@ class BatchedDblBufGemm { private: void __run() { - using policy_type = - Kokkos::TeamPolicy, execution_space_type>; + using policy_type = Kokkos::TeamPolicy, execution_space_type>; using member_type = typename policy_type::member_type; // Compile-time expressions required for functor-level register allocations: @@ -190,7 +179,7 @@ class BatchedDblBufGemm { constexpr int reg_n = TILE_N / TILE_K + 2 * !!(TILE_N % TILE_K); constexpr int stride_m = TILE_K; constexpr int stride_n = TILE_N / reg_n; - using functor_type = Functor; + using functor_type = Functor; functor_type functor(*this, __A, __B, __C); @@ -211,43 +200,35 @@ class BatchedDblBufGemm { int vector_len = stride_n; const int max_team_size = - policy_type(league_size, Kokkos::AUTO, vector_len) - .team_size_max(functor, Kokkos::ParallelForTag()); + policy_type(league_size, Kokkos::AUTO, vector_len).team_size_max(functor, Kokkos::ParallelForTag()); if (team_size > max_team_size) { std::ostringstream os; - os << "KokkosBatched::BatchedGemm with kernelAlgoType = " - << std::to_string(__handle->get_kernel_algo_type()) - << " does not support team_size > " << std::to_string(max_team_size) - << "." << std::endl + os << "KokkosBatched::BatchedGemm with kernelAlgoType = " << std::to_string(__handle->get_kernel_algo_type()) + << " does not support team_size > " << std::to_string(max_team_size) << "." << std::endl << " The tile dimensions must be adjusted." << std::endl; KokkosKernels::Impl::throw_runtime_exception(os.str()); } - const int max_vector_len = - policy_type(league_size, team_size, Kokkos::AUTO).vector_length_max(); + const int max_vector_len = policy_type(league_size, team_size, Kokkos::AUTO).vector_length_max(); if (vector_len > max_vector_len) { std::ostringstream os; - os << "KokkosBatched::BatchedGemm with kernelAlgoType = " - << std::to_string(__handle->get_kernel_algo_type()) - << " does not support vector_len > " << std::to_string(max_vector_len) - << "." << std::endl + os << "KokkosBatched::BatchedGemm with kernelAlgoType = " << std::to_string(__handle->get_kernel_algo_type()) + << " does not support vector_len > " << std::to_string(max_vector_len) << "." << std::endl << " The tile dimensions must be adjusted." << std::endl; KokkosKernels::Impl::throw_runtime_exception(os.str()); } if (__handle->enableDebug) { - std::cout << "max_team_size:" << max_team_size - << " team_size:" << team_size << std::endl - << "max_vector_len:" << max_vector_len - << " vector_len:" << vector_len << std::endl + std::cout << "max_team_size:" << max_team_size << " team_size:" << team_size << std::endl + << "max_vector_len:" << max_vector_len << " vector_len:" << vector_len << std::endl << "TILE_M:" << TILE_M << std::endl << "TILE_N:" << TILE_N << std::endl << "TILE_K:" << TILE_K << std::endl; } // TODO: Use statically allocated shmem - int shmem_size = view_type_2d_scratch::shmem_size(TILE_M, TILE_K) + - view_type_2d_scratch::shmem_size(TILE_K, TILE_N); + int shmem_size = + view_type_2d_scratch::shmem_size(TILE_M, TILE_K) + view_type_2d_scratch::shmem_size(TILE_K, TILE_N); // Each member solves a portion of TILE_K in parallel with other members policy_type team_policy(league_size, team_size, vector_len); @@ -278,8 +259,7 @@ class BatchedDblBufGemm { // below. If those are used, we get an invalid memory error from cuda. I // suspect this is due the values not being copied to device and then // runtime resolution of the host address &__ei. - Functor(BatchedDblBufGemm &ei, AViewType A, BViewType B, CViewType C) - : __ei(ei), __A(A), __B(B), __C(C) { + Functor(BatchedDblBufGemm &ei, AViewType A, BViewType B, CViewType C) : __ei(ei), __A(A), __B(B), __C(C) { if (std::is_same::value) { ei.__c_batch_size = ei.__C.extent_int(0); ei.__c_m = ei.__C.extent_int(1); @@ -310,24 +290,17 @@ class BatchedDblBufGemm { } KOKKOS_INLINE_FUNCTION - void __mul(view_value_type a, view_value_type b, view_value_type &c, - const AlphaTag::No &) const { - c += a * b; - } + void __mul(view_value_type a, view_value_type b, view_value_type &c, const AlphaTag::No &) const { c += a * b; } KOKKOS_INLINE_FUNCTION - void __mul(view_value_type a, view_value_type b, view_value_type &c, - const AlphaTag::Yes &) const { + void __mul(view_value_type a, view_value_type b, view_value_type &c, const AlphaTag::Yes &) const { c += a * b * __alpha; } KOKKOS_INLINE_FUNCTION - void __rshmem_and_mul(const int &thread_id, const int &vlane_id, - const unsigned &nk, view_value_type reg_a[REG_M], - view_value_type reg_b[REG_N], - view_value_type reg_c[REG_M][REG_N], - view_type_2d_scratch &svA_scr, - view_type_2d_scratch &svB_scr) const { + void __rshmem_and_mul(const int &thread_id, const int &vlane_id, const unsigned &nk, view_value_type reg_a[REG_M], + view_value_type reg_b[REG_N], view_value_type reg_c[REG_M][REG_N], + view_type_2d_scratch &svA_scr, view_type_2d_scratch &svB_scr) const { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL @@ -335,14 +308,12 @@ class BatchedDblBufGemm { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int m = 0; m < REG_M; ++m) - reg_a[m] = svA_scr(thread_id + m * STRIDE_M, k); + for (int m = 0; m < REG_M; ++m) reg_a[m] = svA_scr(thread_id + m * STRIDE_M, k); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int n = 0; n < REG_N; ++n) - reg_b[n] = svB_scr(k, vlane_id + n * STRIDE_N); + for (int n = 0; n < REG_N; ++n) reg_b[n] = svB_scr(k, vlane_id + n * STRIDE_N); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -351,18 +322,15 @@ class BatchedDblBufGemm { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int n = 0; n < REG_N; ++n) - __mul(reg_a[m], reg_b[n], reg_c[m][n], __ei.__alpha_mul_tag); + for (int n = 0; n < REG_N; ++n) __mul(reg_a[m], reg_b[n], reg_c[m][n], __ei.__alpha_mul_tag); } } } KOKKOS_INLINE_FUNCTION - void __rshmem_and_mul_ll(const int &thread_id, const int &vlane_id, - const unsigned &nk, view_value_type reg_a[REG_M], - view_value_type reg_b[REG_N], - view_value_type reg_c[REG_M][REG_N], - view_type_2d_scratch &svA_scr, + void __rshmem_and_mul_ll(const int &thread_id, const int &vlane_id, const unsigned &nk, + view_value_type reg_a[REG_M], view_value_type reg_b[REG_N], + view_value_type reg_c[REG_M][REG_N], view_type_2d_scratch &svA_scr, view_type_2d_scratch &svB_scr) const { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -371,14 +339,12 @@ class BatchedDblBufGemm { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int m = 0; m < REG_M; ++m) - reg_a[m] = svA_scr(k, vlane_id + m * STRIDE_M); + for (int m = 0; m < REG_M; ++m) reg_a[m] = svA_scr(k, vlane_id + m * STRIDE_M); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int n = 0; n < REG_N; ++n) - reg_b[n] = svB_scr(thread_id + n * STRIDE_N, k); + for (int n = 0; n < REG_N; ++n) reg_b[n] = svB_scr(thread_id + n * STRIDE_N, k); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -387,8 +353,7 @@ class BatchedDblBufGemm { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int n = 0; n < REG_N; ++n) - __mul(reg_a[m], reg_b[n], reg_c[m][n], __ei.__alpha_mul_tag); + for (int n = 0; n < REG_N; ++n) __mul(reg_a[m], reg_b[n], reg_c[m][n], __ei.__alpha_mul_tag); } } } @@ -401,8 +366,7 @@ class BatchedDblBufGemm { view_value_type prefetch_reg_a[REG_M] = {0}, prefetch_reg_b[REG_N] = {0}; // Allocate registers used for FMAs - view_value_type reg_a[REG_M] = {0}, reg_b[REG_N] = {0}, - reg_c[REG_M][REG_N] = {{0}}; + view_value_type reg_a[REG_M] = {0}, reg_b[REG_N] = {0}, reg_c[REG_M][REG_N] = {{0}}; // TODO: look at local loads and stores via nvprof // TODO: look at GPU trace in nvprof to find out how many registers are // used. @@ -417,147 +381,124 @@ class BatchedDblBufGemm { int kk; // Fetch entire 2-rank sub-matrix - auto svA = subview_wrapper(__A, batch_idx, Kokkos::ALL(), Kokkos::ALL(), - __ei.__batch_layout_tag, __ei.__transA_tag); - auto svB = subview_wrapper(__B, batch_idx, Kokkos::ALL(), Kokkos::ALL(), - __ei.__batch_layout_tag, __ei.__transB_tag); - auto svC = subview_wrapper(__C, batch_idx, Kokkos::ALL(), Kokkos::ALL(), - __ei.__batch_layout_tag); + auto svA = + subview_wrapper(__A, batch_idx, Kokkos::ALL(), Kokkos::ALL(), __ei.__batch_layout_tag, __ei.__transA_tag); + auto svB = + subview_wrapper(__B, batch_idx, Kokkos::ALL(), Kokkos::ALL(), __ei.__batch_layout_tag, __ei.__transB_tag); + auto svC = subview_wrapper(__C, batch_idx, Kokkos::ALL(), Kokkos::ALL(), __ei.__batch_layout_tag); // Allocate scratch memory buffers used for prefetching view_type_2d_scratch svA_scr(member.team_scratch(0), TILE_M, TILE_K); view_type_2d_scratch svB_scr(member.team_scratch(0), TILE_K, TILE_N); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, STRIDE_M), - [&](const int &thread_id) { - int m_offset = thread_id + start_m; + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, STRIDE_M), [&](const int &thread_id) { + int m_offset = thread_id + start_m; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, 0, STRIDE_N), - [&](const int &vlane_id) { - int n_offset = vlane_id + start_n; + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, 0, STRIDE_N), [&](const int &vlane_id) { + int n_offset = vlane_id + start_n; // Here we populate scratch memory with one or more "k" tiles for // every thread of the team! #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int i = 0; i < REG_N * STRIDE_N; i += STRIDE_N) - svB_scr(thread_id, vlane_id + i) = - access_view_bounds_check( - svB, thread_id, n_offset + i, - __ei.__bounds_check_tag); + for (int i = 0; i < REG_N * STRIDE_N; i += STRIDE_N) + svB_scr(thread_id, vlane_id + i) = + access_view_bounds_check(svB, thread_id, n_offset + i, __ei.__bounds_check_tag); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int i = 0; i < REG_M * STRIDE_M; i += STRIDE_M) - svA_scr(thread_id + i, vlane_id) = - access_view_bounds_check( - svA, m_offset + i, vlane_id, - __ei.__bounds_check_tag); + for (int i = 0; i < REG_M * STRIDE_M; i += STRIDE_M) + svA_scr(thread_id + i, vlane_id) = + access_view_bounds_check(svA, m_offset + i, vlane_id, __ei.__bounds_check_tag); - // Wait for A, B to reside in scratch memory - member.team_barrier(); + // Wait for A, B to reside in scratch memory + member.team_barrier(); - // Each thread calculates a single dot product in chunks of - // size TILE_K - for (kk = 0; kk < __k - TILE_K; kk += TILE_K) { - int k_tile_offset = kk + TILE_K; + // Each thread calculates a single dot product in chunks of + // size TILE_K + for (kk = 0; kk < __k - TILE_K; kk += TILE_K) { + int k_tile_offset = kk + TILE_K; // Get this threads next TILE_K entries from global memory // Each thread has its own copy of prefetch_reg_b. #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int i = 0; i < REG_N; ++i) - prefetch_reg_b[i] = - access_view_bounds_check( - svB, thread_id + k_tile_offset, - n_offset + i * STRIDE_N, __ei.__bounds_check_tag); + for (int i = 0; i < REG_N; ++i) + prefetch_reg_b[i] = access_view_bounds_check( + svB, thread_id + k_tile_offset, n_offset + i * STRIDE_N, __ei.__bounds_check_tag); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int i = 0; i < REG_M; ++i) - prefetch_reg_a[i] = - access_view_bounds_check( - svA, m_offset + i * STRIDE_M, - vlane_id + k_tile_offset, - __ei.__bounds_check_tag); + for (int i = 0; i < REG_M; ++i) + prefetch_reg_a[i] = access_view_bounds_check( + svA, m_offset + i * STRIDE_M, vlane_id + k_tile_offset, __ei.__bounds_check_tag); - __rshmem_and_mul(thread_id, vlane_id, TILE_K, reg_a, reg_b, - reg_c, svA_scr, svB_scr); + __rshmem_and_mul(thread_id, vlane_id, TILE_K, reg_a, reg_b, reg_c, svA_scr, svB_scr); - // Wait for: - // 1. prefetch_regs to be populated - // 2. for shmem to no longer be read from - member.team_barrier(); + // Wait for: + // 1. prefetch_regs to be populated + // 2. for shmem to no longer be read from + member.team_barrier(); // populate shmem from prefetch registers. Each thread has its own // copy of prefetch_reg_b. #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int i = 0; i < REG_N; ++i) - svB_scr(thread_id, vlane_id + i * STRIDE_N) = - prefetch_reg_b[i]; + for (int i = 0; i < REG_N; ++i) svB_scr(thread_id, vlane_id + i * STRIDE_N) = prefetch_reg_b[i]; // populate shmem from prefetch registers. Each thread has its own // copy of prefetch_reg_a. #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int i = 0; i < REG_M; ++i) - svA_scr(thread_id + i * STRIDE_M, vlane_id) = - prefetch_reg_a[i]; + for (int i = 0; i < REG_M; ++i) svA_scr(thread_id + i * STRIDE_M, vlane_id) = prefetch_reg_a[i]; - // Wait for shmem stores to land before performing next - // TILE_K multiply - member.team_barrier(); - } // end n_tile_k_tiles loop + // Wait for shmem stores to land before performing next + // TILE_K multiply + member.team_barrier(); + } // end n_tile_k_tiles loop - // Multiply last tile, may be a partial tile - __rshmem_and_mul(thread_id, vlane_id, __k - kk, reg_a, reg_b, - reg_c, svA_scr, svB_scr); + // Multiply last tile, may be a partial tile + __rshmem_and_mul(thread_id, vlane_id, __k - kk, reg_a, reg_b, reg_c, svA_scr, svB_scr); - // store results back to global memory - if (__beta == 0.0F) { + // store results back to global memory + if (__beta == 0.0F) { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int m = 0; m < REG_M; ++m) { - int cm = m_offset + m * STRIDE_M; + for (int m = 0; m < REG_M; ++m) { + int cm = m_offset + m * STRIDE_M; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int n = 0; n < REG_N; ++n) { - int cn = n_offset + n * STRIDE_N; - fma_bounds_check(svC, cm, cn, reg_c[m][n], __alpha, - __ei.__alpha_fma_tag, - __ei.__bounds_check_tag); - } - } - } else { + for (int n = 0; n < REG_N; ++n) { + int cn = n_offset + n * STRIDE_N; + fma_bounds_check(svC, cm, cn, reg_c[m][n], __alpha, __ei.__alpha_fma_tag, __ei.__bounds_check_tag); + } + } + } else { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int m = 0; m < REG_M; ++m) { - int cm = m_offset + m * STRIDE_M; + for (int m = 0; m < REG_M; ++m) { + int cm = m_offset + m * STRIDE_M; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int n = 0; n < REG_N; ++n) { - int cn = n_offset + n * STRIDE_N; - fma_bounds_check(svC, cm, cn, reg_c[m][n], __alpha, - __beta, __ei.__alpha_fma_tag, - __ei.__bounds_check_tag); - } - } - } - }); - }); + for (int n = 0; n < REG_N; ++n) { + int cn = n_offset + n * STRIDE_N; + fma_bounds_check(svC, cm, cn, reg_c[m][n], __alpha, __beta, __ei.__alpha_fma_tag, + __ei.__bounds_check_tag); + } + } + } + }); + }); } KOKKOS_INLINE_FUNCTION @@ -568,8 +509,7 @@ class BatchedDblBufGemm { view_value_type prefetch_reg_a[REG_M] = {0}, prefetch_reg_b[REG_N] = {0}; // Allocate registers used for FMAs - view_value_type reg_a[REG_M] = {0}, reg_b[REG_N] = {0}, - reg_c[REG_M][REG_N] = {{0}}; + view_value_type reg_a[REG_M] = {0}, reg_b[REG_N] = {0}, reg_c[REG_M][REG_N] = {{0}}; // TODO: look at local loads and stores via nvprof // TODO: look at GPU trace in nvprof to find out how many registers are // used. @@ -584,149 +524,126 @@ class BatchedDblBufGemm { int kk; // Fetch entire 2-rank sub-matrix - auto svA = subview_wrapper(__A, batch_idx, Kokkos::ALL(), Kokkos::ALL(), - __ei.__batch_layout_tag, __ei.__transA_tag); - auto svB = subview_wrapper(__B, batch_idx, Kokkos::ALL(), Kokkos::ALL(), - __ei.__batch_layout_tag, __ei.__transB_tag); - auto svC = subview_wrapper(__C, batch_idx, Kokkos::ALL(), Kokkos::ALL(), - __ei.__batch_layout_tag); + auto svA = + subview_wrapper(__A, batch_idx, Kokkos::ALL(), Kokkos::ALL(), __ei.__batch_layout_tag, __ei.__transA_tag); + auto svB = + subview_wrapper(__B, batch_idx, Kokkos::ALL(), Kokkos::ALL(), __ei.__batch_layout_tag, __ei.__transB_tag); + auto svC = subview_wrapper(__C, batch_idx, Kokkos::ALL(), Kokkos::ALL(), __ei.__batch_layout_tag); // Allocate scratch memory buffers used for prefetching view_type_2d_scratch svA_scr(member.team_scratch(0), TILE_K, TILE_M); view_type_2d_scratch svB_scr(member.team_scratch(0), TILE_N, TILE_K); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, STRIDE_N), - [&](const int &thread_id) { - int n_offset = thread_id + start_n; + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, STRIDE_N), [&](const int &thread_id) { + int n_offset = thread_id + start_n; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, 0, STRIDE_M), - [&](const int &vlane_id) { - int m_offset = vlane_id + start_m; + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, 0, STRIDE_M), [&](const int &vlane_id) { + int m_offset = vlane_id + start_m; // Here we populate scratch memory with one or more "k" tiles for // every thread of the team! #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int i = 0; i < REG_N * STRIDE_N; i += STRIDE_N) - svB_scr(thread_id + i, vlane_id) = - access_view_bounds_check( - svB, vlane_id, n_offset + i, - __ei.__bounds_check_tag); + for (int i = 0; i < REG_N * STRIDE_N; i += STRIDE_N) + svB_scr(thread_id + i, vlane_id) = + access_view_bounds_check(svB, vlane_id, n_offset + i, __ei.__bounds_check_tag); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int i = 0; i < REG_M * STRIDE_M; i += STRIDE_M) - svA_scr(thread_id, vlane_id + i) = - access_view_bounds_check( - svA, m_offset + i, thread_id, - __ei.__bounds_check_tag); + for (int i = 0; i < REG_M * STRIDE_M; i += STRIDE_M) + svA_scr(thread_id, vlane_id + i) = + access_view_bounds_check(svA, m_offset + i, thread_id, __ei.__bounds_check_tag); - // Wait for A, B to reside in scratch memory - member.team_barrier(); + // Wait for A, B to reside in scratch memory + member.team_barrier(); - // Each thread calculates a single dot product in chunks of - // size TILE_K - for (kk = 0; kk < __k - TILE_K; kk += TILE_K) { - int k_tile_offset = kk + TILE_K; + // Each thread calculates a single dot product in chunks of + // size TILE_K + for (kk = 0; kk < __k - TILE_K; kk += TILE_K) { + int k_tile_offset = kk + TILE_K; // Get this threads next TILE_K entries from global memory // Each thread has its own copy of prefetch_reg_b. #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int i = 0; i < REG_N; ++i) - prefetch_reg_b[i] = - access_view_bounds_check( - svB, vlane_id + k_tile_offset, - n_offset + i * STRIDE_N, __ei.__bounds_check_tag); + for (int i = 0; i < REG_N; ++i) + prefetch_reg_b[i] = access_view_bounds_check( + svB, vlane_id + k_tile_offset, n_offset + i * STRIDE_N, __ei.__bounds_check_tag); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int i = 0; i < REG_M; ++i) - prefetch_reg_a[i] = - access_view_bounds_check( - svA, m_offset + i * STRIDE_M, - thread_id + k_tile_offset, - __ei.__bounds_check_tag); + for (int i = 0; i < REG_M; ++i) + prefetch_reg_a[i] = access_view_bounds_check( + svA, m_offset + i * STRIDE_M, thread_id + k_tile_offset, __ei.__bounds_check_tag); - __rshmem_and_mul_ll(thread_id, vlane_id, TILE_K, reg_a, - reg_b, reg_c, svA_scr, svB_scr); + __rshmem_and_mul_ll(thread_id, vlane_id, TILE_K, reg_a, reg_b, reg_c, svA_scr, svB_scr); - // Wait for: - // 1. prefetch_regs to be populated - // 2. for shmem to no longer be read from - member.team_barrier(); + // Wait for: + // 1. prefetch_regs to be populated + // 2. for shmem to no longer be read from + member.team_barrier(); // populate shmem from prefetch registers. Each thread has its own // copy of prefetch_reg_b. #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int i = 0; i < REG_N; ++i) - svB_scr(thread_id + i * STRIDE_N, vlane_id) = - prefetch_reg_b[i]; + for (int i = 0; i < REG_N; ++i) svB_scr(thread_id + i * STRIDE_N, vlane_id) = prefetch_reg_b[i]; // populate shmem from prefetch registers. Each thread has its own // copy of prefetch_reg_a. #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int i = 0; i < REG_M; ++i) - svA_scr(thread_id, vlane_id + i * STRIDE_M) = - prefetch_reg_a[i]; + for (int i = 0; i < REG_M; ++i) svA_scr(thread_id, vlane_id + i * STRIDE_M) = prefetch_reg_a[i]; - // Wait for shmem stores to land before performing next - // TILE_K multiply - member.team_barrier(); - } // end n_tile_k_tiles loop + // Wait for shmem stores to land before performing next + // TILE_K multiply + member.team_barrier(); + } // end n_tile_k_tiles loop - // Multiply last tile, may be a partial tile - __rshmem_and_mul_ll(thread_id, vlane_id, __k - kk, reg_a, - reg_b, reg_c, svA_scr, svB_scr); + // Multiply last tile, may be a partial tile + __rshmem_and_mul_ll(thread_id, vlane_id, __k - kk, reg_a, reg_b, reg_c, svA_scr, svB_scr); - // store results back to global memory - if (__beta == 0.0F) { + // store results back to global memory + if (__beta == 0.0F) { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int n = 0; n < REG_N; ++n) { - int cn = n_offset + n * STRIDE_N; + for (int n = 0; n < REG_N; ++n) { + int cn = n_offset + n * STRIDE_N; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int m = 0; m < REG_M; ++m) { - int cm = m_offset + m * STRIDE_M; - fma_bounds_check(svC, cm, cn, reg_c[m][n], __alpha, - __ei.__alpha_fma_tag, - __ei.__bounds_check_tag); - } - } - } else { + for (int m = 0; m < REG_M; ++m) { + int cm = m_offset + m * STRIDE_M; + fma_bounds_check(svC, cm, cn, reg_c[m][n], __alpha, __ei.__alpha_fma_tag, __ei.__bounds_check_tag); + } + } + } else { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int n = 0; n < REG_N; ++n) { - int cn = n_offset + n * STRIDE_N; + for (int n = 0; n < REG_N; ++n) { + int cn = n_offset + n * STRIDE_N; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif // KOKKOS_ENABLE_PRAGMA_UNROLL - for (int m = 0; m < REG_M; ++m) { - int cm = m_offset + m * STRIDE_M; - fma_bounds_check(svC, cm, cn, reg_c[m][n], __alpha, - __beta, __ei.__alpha_fma_tag, - __ei.__bounds_check_tag); - } - } - } - }); - }); + for (int m = 0; m < REG_M; ++m) { + int cm = m_offset + m * STRIDE_M; + fma_bounds_check(svC, cm, cn, reg_c[m][n], __alpha, __beta, __ei.__alpha_fma_tag, + __ei.__bounds_check_tag); + } + } + } + }); + }); } }; }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Impl.hpp index 464ea6d04a8a..6216aeb099c8 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Impl.hpp @@ -62,30 +62,21 @@ constexpr KOKKOS_INLINE_FUNCTION size_t kk_gemm_dbl_buf_alpha_in_fma_thresh() { #endif // __CUDAACC_RDC__ } -template -int BatchedGemmImpl(BatchedGemmHandleType *const handle, const ScalarType alpha, - const AViewType &A, const BViewType &B, +template +int BatchedGemmImpl(BatchedGemmHandleType *const handle, const ScalarType alpha, const AViewType &A, const BViewType &B, const ScalarType beta, const CViewType &C) { int ret = 0; size_t c_m, c_n; using ViewValueType = typename CViewType::value_type; // Check for valid input views - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "BViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "CViewType must be a Kokkos::View."); - static_assert( - std::is_same::value || - std::is_same::value, - "ArgTransA must be either Trans::Transpose or Trans::NoTranspose."); - static_assert( - std::is_same::value || - std::is_same::value, - "ArgTransB must be either Trans::Transpose or Trans::NoTranspose."); + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "BViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "CViewType must be a Kokkos::View."); + static_assert(std::is_same::value || std::is_same::value, + "ArgTransA must be either Trans::Transpose or Trans::NoTranspose."); + static_assert(std::is_same::value || std::is_same::value, + "ArgTransB must be either Trans::Transpose or Trans::NoTranspose."); if constexpr (is_vector::value) { // Check ranks of view with underlying SIMD value types // For SIMD views, we can have either 3-rank or 4-ranks inputs. @@ -100,31 +91,27 @@ int BatchedGemmImpl(BatchedGemmHandleType *const handle, const ScalarType alpha, default: std::ostringstream os; os << "KokkosBatched::BatchedGemm does not support kernelAlgoType = " - << std::to_string(handle->get_kernel_algo_type()) - << " with SIMD views." << std::endl; + << std::to_string(handle->get_kernel_algo_type()) << " with SIMD views." << std::endl; KokkosKernels::Impl::throw_runtime_exception(os.str()); break; } } else { // Check ranks of views with underlying scalar value types - static_assert(static_cast(AViewType::rank) == 3, - "AViewType must have rank 3."); - static_assert(static_cast(BViewType::rank) == 3, - "BViewType must have rank 3."); - static_assert(static_cast(CViewType::rank) == 3, - "CViewType must have rank 3."); + static_assert(static_cast(AViewType::rank) == 3, "AViewType must have rank 3."); + static_assert(static_cast(BViewType::rank) == 3, "BViewType must have rank 3."); + static_assert(static_cast(CViewType::rank) == 3, "CViewType must have rank 3."); } // Check for valid data access patterns // Skip checking a_layout == b_layout == c_layout // Skip checking for LayoutStride using c_layout = typename CViewType::array_layout; - static_assert(!(std::is_same::value && - !std::is_same::value), - "LayoutLeft views require BatchLayout::Right"); - static_assert(!(std::is_same::value && - !std::is_same::value), - "LayoutRight views require BatchLayout::Left"); + static_assert( + !(std::is_same::value && !std::is_same::value), + "LayoutLeft views require BatchLayout::Right"); + static_assert( + !(std::is_same::value && !std::is_same::value), + "LayoutRight views require BatchLayout::Left"); if constexpr (std::is_same::value) { // c_b = C.extent(0); @@ -141,17 +128,13 @@ int BatchedGemmImpl(BatchedGemmHandleType *const handle, const ScalarType alpha, using layout_type = typename CViewType::array_layout; using exec_space = typename CViewType::execution_space; constexpr bool is_vector = KokkosBatched::is_vector::value; - constexpr bool on_gpu = - KokkosKernels::Impl::kk_is_gpu_exec_space(); - constexpr bool on_x86_64 = KokkosKernels::Impl::kk_is_x86_64_mem_space< - typename exec_space::memory_space>(); - constexpr bool on_a64fx = KokkosKernels::Impl::kk_is_a64fx_mem_space< - typename exec_space::memory_space>(); - bool out_of_range = false; + constexpr bool on_gpu = KokkosKernels::Impl::kk_is_gpu_exec_space(); + constexpr bool on_x86_64 = KokkosKernels::Impl::kk_is_x86_64_mem_space(); + constexpr bool on_a64fx = KokkosKernels::Impl::kk_is_a64fx_mem_space(); + bool out_of_range = false; if (handle->enableDebug) { - std::cout << "view_scalar_type:" << typeid(view_scalar_type).name() - << std::endl + std::cout << "view_scalar_type:" << typeid(view_scalar_type).name() << std::endl << "execution_space:" << typeid(exec_space).name() << std::endl << std::endl << "is_vector:" << is_vector << std::endl @@ -166,79 +149,58 @@ int BatchedGemmImpl(BatchedGemmHandleType *const handle, const ScalarType alpha, if (c_m != c_n) { std::ostringstream os; os << "KokkosBatched::BatchedGemm does not support kernelAlgoType = " - << std::to_string(handle->get_kernel_algo_type()) << " when c_m(" - << std::to_string(c_m) << ") != c_n(" << std::to_string(c_n) << ")" - << std::endl; + << std::to_string(handle->get_kernel_algo_type()) << " when c_m(" << std::to_string(c_m) << ") != c_n(" + << std::to_string(c_n) << ")" << std::endl; KokkosKernels::Impl::throw_runtime_exception(os.str()); } // Select optimal resultsPerThread param for BatchedSerialGemm using bsgResultsPerThread = - std::conditional_t; + std::conditional_t; // Select optimal mode param for SerialGemm. using bsgModeType = typename std::conditional< - is_vector, - typename std::conditional::type, + is_vector, typename std::conditional::type, typename std::conditional< on_gpu, Algo::Gemm::Unblocked, - typename std::conditional::type>::type>:: - type; + typename std::conditional::type>::type>::type; if (handle->enableDebug) { - std::cout << "bsgResultsPerThread: " - << typeid(bsgResultsPerThread).name() << std::endl + std::cout << "bsgResultsPerThread: " << typeid(bsgResultsPerThread).name() << std::endl << "bsgModeType: " << typeid(bsgModeType).name() << std::endl; } if constexpr (on_gpu) { - if (((std::is_same::value) - ? (c_m >= 16) - : (c_m >= 24 && c_m <= 32) || c_m >= 40)) { - handle->teamSz = handle->vecLen = 8; - constexpr int tile_m = Impl::kk_gemm_dbl_buf_tile_m(); - constexpr int tile_n = Impl::kk_gemm_dbl_buf_tile_n(); - constexpr int tile_k = Impl::kk_gemm_dbl_buf_tile_k(); - constexpr size_t alpha_in_fma_thresh = - Impl::kk_gemm_dbl_buf_alpha_in_fma_thresh(); + if (((std::is_same::value) ? (c_m >= 16) + : (c_m >= 24 && c_m <= 32) || c_m >= 40)) { + handle->teamSz = handle->vecLen = 8; + constexpr int tile_m = Impl::kk_gemm_dbl_buf_tile_m(); + constexpr int tile_n = Impl::kk_gemm_dbl_buf_tile_n(); + constexpr int tile_k = Impl::kk_gemm_dbl_buf_tile_k(); + constexpr size_t alpha_in_fma_thresh = Impl::kk_gemm_dbl_buf_alpha_in_fma_thresh(); if (c_m % 32 == 0) { // No bounds checking if (c_m >= alpha_in_fma_thresh) { // apply alpha in fma - ret = Impl::BatchedDblBufGemm( - handle, alpha, A, B, beta, C) + ret = Impl::BatchedDblBufGemm(handle, alpha, A, B, beta, C) .invoke(); } else { // apply alpha in mul - ret = Impl::BatchedDblBufGemm( - handle, alpha, A, B, beta, C) + ret = Impl::BatchedDblBufGemm(handle, alpha, A, B, beta, C) .invoke(); } } else { // bounds checking if (c_m >= alpha_in_fma_thresh) { // apply alpha in fma - ret = Impl::BatchedDblBufGemm( - handle, alpha, A, B, beta, C) + ret = Impl::BatchedDblBufGemm(handle, alpha, A, B, beta, C) .invoke(); } else { // apply alpha in mul - ret = Impl::BatchedDblBufGemm( - handle, alpha, A, B, beta, C) + ret = Impl::BatchedDblBufGemm(handle, alpha, A, B, beta, C) .invoke(); } } @@ -247,10 +209,8 @@ int BatchedGemmImpl(BatchedGemmHandleType *const handle, const ScalarType alpha, } } if (!on_gpu || out_of_range) { - ret = Impl::BatchedSerialGemm(alpha, A, B, beta, C) + ret = Impl::BatchedSerialGemm(alpha, A, B, beta, C) .invoke(); } break; @@ -261,10 +221,8 @@ int BatchedGemmImpl(BatchedGemmHandleType *const handle, const ScalarType alpha, ////////////// TPL ALGOS ////////////// #if defined(KOKKOSKERNELS_ENABLE_TPL_ARMPL) && ARMPL_BUILD >= 1058 case BaseTplAlgos::ARMPL: - ret = Impl::BatchedArmplGemm(handle, alpha, A, B, - beta, C) + ret = Impl::BatchedArmplGemm(handle, alpha, A, B, beta, C) .invoke(); break; #endif // KOKKOSKERNELS_ENABLE_TPL_ARMPL @@ -276,23 +234,17 @@ int BatchedGemmImpl(BatchedGemmHandleType *const handle, const ScalarType alpha, ////////////// KokkosBatched ALGOS ////////////// case BaseKokkosBatchedAlgos::KK_SERIAL: - ret = - Impl::BatchedSerialGemm( - alpha, A, B, beta, C) - .invoke(); + ret = Impl::BatchedSerialGemm(alpha, A, B, beta, C) + .invoke(); break; // case GemmKokkosBatchedAlgos::KK_SERIALSIMD: case GemmKokkosBatchedAlgos::KK_SERIAL_RANK0: - ret = - Impl::BatchedSerialGemm( - alpha, A, B, beta, C) - .invoke(); + ret = Impl::BatchedSerialGemm(alpha, A, B, beta, C) + .invoke(); break; // case GemmKokkosBatchedAlgos::KK_SERIAL_SHMEM: @@ -308,11 +260,9 @@ int BatchedGemmImpl(BatchedGemmHandleType *const handle, const ScalarType alpha, // performance. // TODO: Add auto-selection of tile size based on inputs and device type - ret = Impl::BatchedDblBufGemm( - handle, alpha, A, B, beta, C) + ret = Impl::BatchedDblBufGemm(handle, alpha, A, B, + beta, C) .invoke(); break; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp index 5ff581bb64b7..8da3c7acd1ae 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp @@ -76,9 +76,8 @@ namespace Impl { /// ArgResultsPerThread, ScalarType, AViewType, /// BViewType, CViewType>(alpha, A, B, beta, C).invoke(); // clang-format on -template +template class BatchedSerialGemm { private: AViewType A; @@ -92,10 +91,8 @@ class BatchedSerialGemm { void run() { using execution_space = typename CViewType::device_type::execution_space; - using policy_type = - Kokkos::RangePolicy; - Kokkos::parallel_for("BatchedSerialGemm", policy_type(0, batch_size), - *this); + using policy_type = Kokkos::RangePolicy; + Kokkos::parallel_for("BatchedSerialGemm", policy_type(0, batch_size), *this); } public: @@ -117,8 +114,7 @@ class BatchedSerialGemm { batch_size *= divisor; run(); - } else if (std::is_same::value) { + } else if (std::is_same::value) { if (std::is_same::value) batch_size = C.extent(0); else @@ -132,8 +128,7 @@ class BatchedSerialGemm { return 0; } - BatchedSerialGemm(ScalarType _alpha, AViewType _A, BViewType _B, - ScalarType _beta, CViewType _C) + BatchedSerialGemm(ScalarType _alpha, AViewType _A, BViewType _B, ScalarType _beta, CViewType _C) : A(_A), B(_B), C(_C), alpha(_alpha), beta(_beta) {} KOKKOS_INLINE_FUNCTION @@ -149,34 +144,26 @@ class BatchedSerialGemm { // Due to taking 1-rank subviews out, we must handle transpose here. // Use overloads of subview_wrapper to handle transpose at compile time. - auto svA_row = subview_wrapper(A, batch_idx, row_idx, Kokkos::ALL(), - batch_layout_tag, transA_tag); - auto svB_col = subview_wrapper(B, batch_idx, Kokkos::ALL(), col_idx, - batch_layout_tag, transB_tag); - auto svC_ele = - subview_wrapper(C, batch_idx, row_idx, col_idx, batch_layout_tag); + auto svA_row = subview_wrapper(A, batch_idx, row_idx, Kokkos::ALL(), batch_layout_tag, transA_tag); + auto svB_col = subview_wrapper(B, batch_idx, Kokkos::ALL(), col_idx, batch_layout_tag, transB_tag); + auto svC_ele = subview_wrapper(C, batch_idx, row_idx, col_idx, batch_layout_tag); // Kokkos::subview(scalar, ALL) or Kokkos::subview(ALL, scalar) always // returns a column vector. Since the subviews above handle the // matrix transpositions, here we must perform the GEMM on: // row_vec x col_vec, which is svA_row' x svB_col to compute the element // of C. - KokkosBatched::SerialGemm::invoke(alpha, svA_row, svB_col, beta, - svC_ele); + KokkosBatched::SerialGemm::invoke(alpha, svA_row, svB_col, beta, + svC_ele); } KOKKOS_INLINE_FUNCTION void operator()(const ResultsPerThread::Rank2 &, const int &i) const { - auto svA = - subview_wrapper(A, i, Kokkos::ALL(), Kokkos::ALL(), batch_layout_tag); - auto svB = - subview_wrapper(B, i, Kokkos::ALL(), Kokkos::ALL(), batch_layout_tag); - auto svC = - subview_wrapper(C, i, Kokkos::ALL(), Kokkos::ALL(), batch_layout_tag); + auto svA = subview_wrapper(A, i, Kokkos::ALL(), Kokkos::ALL(), batch_layout_tag); + auto svB = subview_wrapper(B, i, Kokkos::ALL(), Kokkos::ALL(), batch_layout_tag); + auto svC = subview_wrapper(C, i, Kokkos::ALL(), Kokkos::ALL(), batch_layout_tag); - KokkosBatched::SerialGemm::invoke( - alpha, svA, svB, beta, svC); + KokkosBatched::SerialGemm::invoke(alpha, svA, svB, beta, svC); } }; } // namespace Impl diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Spec.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Spec.hpp index 6ec792172bf6..6f06694f0971 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Spec.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Spec.hpp @@ -29,17 +29,15 @@ namespace KokkosBatched { namespace Impl { // Specialization struct which defines whether a specialization exists // This struct is currently never specialized. -template +template struct batched_gemm_tpl_spec_avail { enum : bool { value = false }; }; // Specialization struct which defines whether a specialization exists -template +template struct batched_gemm_eti_spec_avail { enum : bool { value = false }; }; @@ -47,71 +45,55 @@ struct batched_gemm_eti_spec_avail { } // namespace KokkosBatched // ETI specalization macros, consumed by generated *_eti_spec_avail.hpp files -#define KOKKOSBATCHED_GEMM_ETI_SPEC_AVAIL_INNER(ARG_TRANS_A, ARG_TRANS_B, \ - ARG_BATCH_LAYOUT, SCALAR, \ - LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct batched_gemm_eti_spec_avail< \ - ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, BatchedGemmHandle, SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBATCHED_GEMM_ETI_SPEC_AVAIL_INNER(ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct batched_gemm_eti_spec_avail, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) -#define KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_AVAIL_INNER( \ - ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_ETI_SPEC_AVAIL_INNER( \ - ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, Kokkos::LayoutRight, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_AVAIL_INNER(ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_ETI_SPEC_AVAIL_INNER(ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, Kokkos::LayoutRight, \ + EXEC_SPACE, MEM_SPACE) #else -#define KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_AVAIL_INNER( \ - ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) +#define KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_AVAIL_INNER(ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) #endif #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) -#define KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_AVAIL_INNER( \ - ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_ETI_SPEC_AVAIL_INNER( \ - ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, Kokkos::LayoutLeft, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_AVAIL_INNER(ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_ETI_SPEC_AVAIL_INNER(ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, Kokkos::LayoutLeft, \ + EXEC_SPACE, MEM_SPACE) #else -#define KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_AVAIL_INNER( \ - ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) +#define KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_AVAIL_INNER(ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) #endif ///////////////// BatchLayout::Left Permutations ///////////////// -#define KOKKOSBATCHED_GEMM_NT_NT_BLL_ETI_SPEC_AVAIL(SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_AVAIL_INNER( \ - Trans::NoTranspose, Trans::NoTranspose, BatchLayout::Left, SCALAR, \ - LAYOUT, EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_NT_NT_BLL_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_AVAIL_INNER(Trans::NoTranspose, Trans::NoTranspose, BatchLayout::Left, SCALAR, \ + LAYOUT, EXEC_SPACE, MEM_SPACE) -#define KOKKOSBATCHED_GEMM_NT_T_BLL_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_AVAIL_INNER( \ - Trans::NoTranspose, Trans::Transpose, BatchLayout::Left, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_NT_T_BLL_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_AVAIL_INNER(Trans::NoTranspose, Trans::Transpose, BatchLayout::Left, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) -#define KOKKOSBATCHED_GEMM_T_NT_BLL_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_AVAIL_INNER( \ - Trans::Transpose, Trans::NoTranspose, BatchLayout::Left, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_T_NT_BLL_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_AVAIL_INNER(Trans::Transpose, Trans::NoTranspose, BatchLayout::Left, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) -#define KOKKOSBATCHED_GEMM_T_T_BLL_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_AVAIL_INNER( \ - Trans::Transpose, Trans::Transpose, BatchLayout::Left, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_T_T_BLL_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_AVAIL_INNER(Trans::Transpose, Trans::Transpose, BatchLayout::Left, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) // Include the BLL ETI specalizations #include @@ -120,29 +102,21 @@ struct batched_gemm_eti_spec_avail { #include ///////////////// BatchLayout::Right Permutations ///////////////// -#define KOKKOSBATCHED_GEMM_NT_NT_BLR_ETI_SPEC_AVAIL(SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_AVAIL_INNER( \ - Trans::NoTranspose, Trans::NoTranspose, BatchLayout::Right, SCALAR, \ - LAYOUT, EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_NT_NT_BLR_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_AVAIL_INNER(Trans::NoTranspose, Trans::NoTranspose, BatchLayout::Right, SCALAR, \ + LAYOUT, EXEC_SPACE, MEM_SPACE) -#define KOKKOSBATCHED_GEMM_NT_T_BLR_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_AVAIL_INNER( \ - Trans::NoTranspose, Trans::Transpose, BatchLayout::Right, SCALAR, \ - LAYOUT, EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_NT_T_BLR_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_AVAIL_INNER(Trans::NoTranspose, Trans::Transpose, BatchLayout::Right, SCALAR, \ + LAYOUT, EXEC_SPACE, MEM_SPACE) -#define KOKKOSBATCHED_GEMM_T_NT_BLR_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_AVAIL_INNER( \ - Trans::Transpose, Trans::NoTranspose, BatchLayout::Right, SCALAR, \ - LAYOUT, EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_T_NT_BLR_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_AVAIL_INNER(Trans::Transpose, Trans::NoTranspose, BatchLayout::Right, SCALAR, \ + LAYOUT, EXEC_SPACE, MEM_SPACE) -#define KOKKOSBATCHED_GEMM_T_T_BLR_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_AVAIL_INNER( \ - Trans::Transpose, Trans::Transpose, BatchLayout::Right, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_T_T_BLR_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_AVAIL_INNER(Trans::Transpose, Trans::Transpose, BatchLayout::Right, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) // Include the BLR ETI specalizations #include @@ -152,19 +126,15 @@ struct batched_gemm_eti_spec_avail { namespace KokkosBatched { namespace Impl { -template ::value, - bool eti_spec_avail = batched_gemm_eti_spec_avail< - ArgTransA, ArgTransB, ArgBatchSzDim, BatchedGemmHandleType, - ScalarType, AViewType, BViewType, CViewType>::value> +template ::value, + bool eti_spec_avail = batched_gemm_eti_spec_avail::value> struct BatchedGemmSpec { - static int run(BatchedGemmHandleType *const handle, const ScalarType alpha, - const AViewType &A, const BViewType &B, const ScalarType beta, - const CViewType &C) + static int run(BatchedGemmHandleType *const handle, const ScalarType alpha, const AViewType &A, const BViewType &B, + const ScalarType beta, const CViewType &C) #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION @@ -172,23 +142,20 @@ struct BatchedGemmSpec { printf( "KokkosBatched::BatchedGemm<> ETI specialization for < %s, %s, %s, " "%s, %s, %s, %s, %s >\n", - typeid(ArgTransA).name(), typeid(ArgTransB).name(), - typeid(ArgBatchSzDim).name(), typeid(BatchedGemmHandleType).name(), - typeid(ScalarType).name(), typeid(AViewType).name(), + typeid(ArgTransA).name(), typeid(ArgTransB).name(), typeid(ArgBatchSzDim).name(), + typeid(BatchedGemmHandleType).name(), typeid(ScalarType).name(), typeid(AViewType).name(), typeid(BViewType).name(), typeid(CViewType).name()); #else printf( "KokkosBatched::BatchedGemm<> non-ETI specialization for < %s, %s, " "%s, %s, %s, %s, %s, %s >\n", - typeid(ArgTransA).name(), typeid(ArgTransB).name(), - typeid(ArgBatchSzDim).name(), typeid(BatchedGemmHandleType).name(), - typeid(ScalarType).name(), typeid(AViewType).name(), + typeid(ArgTransA).name(), typeid(ArgTransB).name(), typeid(ArgBatchSzDim).name(), + typeid(BatchedGemmHandleType).name(), typeid(ScalarType).name(), typeid(AViewType).name(), typeid(BViewType).name(), typeid(CViewType).name()); #endif // KOKKOSKERNELS_IMPL_COMPILE_LIBRARY #endif // KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION - return KokkosBatched::Impl::BatchedGemmImpl< - ArgTransA, ArgTransB, ArgBatchSzDim, BatchedGemmHandleType, ScalarType, - AViewType, BViewType, CViewType>(handle, alpha, A, B, beta, C); + return KokkosBatched::Impl::BatchedGemmImpl(handle, alpha, A, B, beta, C); } #else ; @@ -199,92 +166,68 @@ struct BatchedGemmSpec { } // namespace KokkosBatched // ETI instantiation macros, consumed by *.cpp.in files -#define KOKKOSBATCHED_GEMM_ETI_SPEC_INST_INNER(ARG_TRANS_A, ARG_TRANS_B, \ - ARG_BATCH_LAYOUT, SCALAR, \ - LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template struct BatchedGemmSpec< \ - ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, BatchedGemmHandle, SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - false, true>; +#define KOKKOSBATCHED_GEMM_ETI_SPEC_INST_INNER(ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, EXEC_SPACE, \ + MEM_SPACE) \ + template struct BatchedGemmSpec, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) -#define KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_INST_INNER( \ - ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_ETI_SPEC_INST_INNER( \ - ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, Kokkos::LayoutRight, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_INST_INNER(ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_ETI_SPEC_INST_INNER(ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, Kokkos::LayoutRight, \ + EXEC_SPACE, MEM_SPACE) #else -#define KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_INST_INNER( \ - ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) +#define KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_INST_INNER(ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) #endif #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) -#define KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_INST_INNER( \ - ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_ETI_SPEC_INST_INNER( \ - ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, Kokkos::LayoutLeft, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_INST_INNER(ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_ETI_SPEC_INST_INNER(ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, Kokkos::LayoutLeft, \ + EXEC_SPACE, MEM_SPACE) #else -#define KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_INST_INNER( \ - ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) +#define KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_INST_INNER(ARG_TRANS_A, ARG_TRANS_B, ARG_BATCH_LAYOUT, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) #endif ///////////////// BatchLayout::Left Permutations ///////////////// -#define KOKKOSBATCHED_GEMM_NT_NT_BLL_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_INST_INNER( \ - Trans::NoTranspose, Trans::NoTranspose, BatchLayout::Left, SCALAR, \ - LAYOUT, EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_NT_NT_BLL_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_INST_INNER(Trans::NoTranspose, Trans::NoTranspose, BatchLayout::Left, SCALAR, \ + LAYOUT, EXEC_SPACE, MEM_SPACE) -#define KOKKOSBATCHED_GEMM_NT_T_BLL_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_INST_INNER( \ - Trans::NoTranspose, Trans::Transpose, BatchLayout::Left, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_NT_T_BLL_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_INST_INNER(Trans::NoTranspose, Trans::Transpose, BatchLayout::Left, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) -#define KOKKOSBATCHED_GEMM_T_NT_BLL_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_INST_INNER( \ - Trans::Transpose, Trans::NoTranspose, BatchLayout::Left, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_T_NT_BLL_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_INST_INNER(Trans::Transpose, Trans::NoTranspose, BatchLayout::Left, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) -#define KOKKOSBATCHED_GEMM_T_T_BLL_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_INST_INNER( \ - Trans::Transpose, Trans::Transpose, BatchLayout::Left, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_T_T_BLL_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLL_ETI_SPEC_INST_INNER(Trans::Transpose, Trans::Transpose, BatchLayout::Left, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) ///////////////// BatchLayout::Right Permutations ///////////////// -#define KOKKOSBATCHED_GEMM_NT_NT_BLR_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_INST_INNER( \ - Trans::NoTranspose, Trans::NoTranspose, BatchLayout::Right, SCALAR, \ - LAYOUT, EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_NT_NT_BLR_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_INST_INNER(Trans::NoTranspose, Trans::NoTranspose, BatchLayout::Right, SCALAR, \ + LAYOUT, EXEC_SPACE, MEM_SPACE) -#define KOKKOSBATCHED_GEMM_NT_T_BLR_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_INST_INNER( \ - Trans::NoTranspose, Trans::Transpose, BatchLayout::Right, SCALAR, \ - LAYOUT, EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_NT_T_BLR_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_INST_INNER(Trans::NoTranspose, Trans::Transpose, BatchLayout::Right, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) -#define KOKKOSBATCHED_GEMM_T_NT_BLR_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_INST_INNER( \ - Trans::Transpose, Trans::NoTranspose, BatchLayout::Right, SCALAR, \ - LAYOUT, EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_T_NT_BLR_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_INST_INNER(Trans::Transpose, Trans::NoTranspose, BatchLayout::Right, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) -#define KOKKOSBATCHED_GEMM_T_T_BLR_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_INST_INNER( \ - Trans::Transpose, Trans::Transpose, BatchLayout::Right, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBATCHED_GEMM_T_T_BLR_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBATCHED_GEMM_BLR_ETI_SPEC_INST_INNER(Trans::Transpose, Trans::Transpose, BatchLayout::Right, SCALAR, LAYOUT, \ + EXEC_SPACE, MEM_SPACE) #endif // __KOKKOSBATCHED_HOSTLEVEL_GEMM_SPEC_HPP__ diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_Serial_Impl.hpp index 4a3e26685b75..c8f5c7a20eb0 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_Serial_Impl.hpp @@ -29,11 +29,9 @@ namespace KokkosBatched { template <> template -KOKKOS_INLINE_FUNCTION int SerialHouseholder::invoke( - const aViewType &a, const tauViewType &tau) { - return SerialLeftHouseholderInternal::invoke(a.extent(0) - 1, a.data(), - a.data() + a.stride(0), - a.stride(0), tau.data()); +KOKKOS_INLINE_FUNCTION int SerialHouseholder::invoke(const aViewType &a, const tauViewType &tau) { + return SerialLeftHouseholderInternal::invoke(a.extent(0) - 1, a.data(), a.data() + a.stride(0), a.stride(0), + tau.data()); } } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_Serial_Internal.hpp index 05654a2f377d..0257ff4d9b97 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_Serial_Internal.hpp @@ -61,8 +61,7 @@ struct SerialLeftHouseholderInternal { const mag_type norm_chi1 = Kokkos::ArithTraits::abs(*chi1); /// compute 2 norm of x using norm_chi1 and norm_x2 - const mag_type norm_x = Kokkos::ArithTraits::sqrt( - norm_x2_square + norm_chi1 * norm_chi1); + const mag_type norm_x = Kokkos::ArithTraits::sqrt(norm_x2_square + norm_chi1 * norm_chi1); /// compute alpha const mag_type alpha = (*chi1 < 0 ? one : minus_one) * norm_x; @@ -76,9 +75,8 @@ struct SerialLeftHouseholderInternal { // SerialScaleInternal::invoke(m_x2, inv_chi1_minus_alpha, x2, x2s); /// compute tau - const mag_type chi1_minus_alpha_square = - chi1_minus_alpha * chi1_minus_alpha; - *tau = half + half * (norm_x2_square / chi1_minus_alpha_square); + const mag_type chi1_minus_alpha_square = chi1_minus_alpha * chi1_minus_alpha; + *tau = half + half * (norm_x2_square / chi1_minus_alpha_square); /// overwrite chi1 with alpha *chi1 = alpha; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_TeamVector_Impl.hpp index 955e1a72b818..bc55a646bc25 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_TeamVector_Impl.hpp @@ -29,11 +29,10 @@ namespace KokkosBatched { template template -KOKKOS_INLINE_FUNCTION int TeamVectorHouseholder::invoke( - const MemberType &member, const aViewType &a, const tauViewType &tau) { - return TeamVectorLeftHouseholderInternal::invoke( - member, a.extent(0) - 1, a.data(), a.data() + a.stride(0), a.stride(0), - tau.data()); +KOKKOS_INLINE_FUNCTION int TeamVectorHouseholder::invoke(const MemberType &member, const aViewType &a, + const tauViewType &tau) { + return TeamVectorLeftHouseholderInternal::invoke(member, a.extent(0) - 1, a.data(), a.data() + a.stride(0), + a.stride(0), tau.data()); } } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_TeamVector_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_TeamVector_Internal.hpp index 64fe24fa3160..1074dc4280c2 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_TeamVector_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_TeamVector_Internal.hpp @@ -30,8 +30,7 @@ namespace KokkosBatched { /// struct TeamVectorLeftHouseholderInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m_x2, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m_x2, /* */ ValueType *chi1, /* */ ValueType *x2, const int x2s, /* */ ValueType *tau) { @@ -67,8 +66,7 @@ struct TeamVectorLeftHouseholderInternal { const mag_type norm_chi1 = Kokkos::ArithTraits::abs(*chi1); /// compute 2 norm of x using norm_chi1 and norm_x2 - const mag_type norm_x = Kokkos::ArithTraits::sqrt( - norm_x2_square + norm_chi1 * norm_chi1); + const mag_type norm_x = Kokkos::ArithTraits::sqrt(norm_x2_square + norm_chi1 * norm_chi1); /// compute alpha const mag_type alpha = (*chi1 < 0 ? one : minus_one) * norm_x; @@ -76,9 +74,8 @@ struct TeamVectorLeftHouseholderInternal { /// overwrite x2 with u2 const value_type chi1_minus_alpha = *chi1 - alpha; const value_type inv_chi1_minus_alpha = one / chi1_minus_alpha; - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, m_x2), - [&](const int &i) { x2[i * x2s] *= inv_chi1_minus_alpha; }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m_x2), + [&](const int &i) { x2[i * x2s] *= inv_chi1_minus_alpha; }); member.team_barrier(); // later consider to use the following @@ -86,9 +83,8 @@ struct TeamVectorLeftHouseholderInternal { /// compute tau Kokkos::single(Kokkos::PerTeam(member), [&]() { - const mag_type chi1_minus_alpha_square = - chi1_minus_alpha * chi1_minus_alpha; - *tau = half + half * (norm_x2_square / chi1_minus_alpha_square); + const mag_type chi1_minus_alpha_square = chi1_minus_alpha * chi1_minus_alpha; + *tau = half + half * (norm_x2_square / chi1_minus_alpha_square); /// overwrite chi1 with alpha *chi1 = alpha; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixA_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixA_Serial_Impl.hpp index d59f9e0c0bfc..eb576f1dff8c 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixA_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixA_Serial_Impl.hpp @@ -29,31 +29,26 @@ namespace KokkosBatched { template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], - a_04 = A[0 * _as0 + 4 * _as1], a_10 = A[1 * _as0 + 0 * _as1], - a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], - a_13 = A[1 * _as0 + 3 * _as1], a_14 = A[1 * _as0 + 4 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], - a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1], - a_24 = A[2 * _as0 + 4 * _as1], a_30 = A[3 * _as0 + 0 * _as1], - a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], - a_33 = A[3 * _as0 + 3 * _as1], a_34 = A[3 * _as0 + 4 * _as1], - a_40 = A[4 * _as0 + 0 * _as1], a_41 = A[4 * _as0 + 1 * _as1], - a_42 = A[4 * _as0 + 2 * _as1], a_43 = A[4 * _as0 + 3 * _as1], + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_03 = A[0 * _as0 + 3 * _as1], a_04 = A[0 * _as0 + 4 * _as1], a_10 = A[1 * _as0 + 0 * _as1], + a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], + a_14 = A[1 * _as0 + 4 * _as1], a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], + a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1], a_24 = A[2 * _as0 + 4 * _as1], + a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], + a_33 = A[3 * _as0 + 3 * _as1], a_34 = A[3 * _as0 + 4 * _as1], a_40 = A[4 * _as0 + 0 * _as1], + a_41 = A[4 * _as0 + 1 * _as1], a_42 = A[4 * _as0 + 2 * _as1], a_43 = A[4 * _as0 + 3 * _as1], a_44 = A[4 * _as0 + 4 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, b_2p, c_2p, b_3p, c_3p, b_4p, c_4p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, - ib4 = 4 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0, - ic3 = 3 * _cs0, ic4 = 4 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, ib4 = 4 * _bs0, ic0 = 0 * _cs0, + ic1 = 1 * _cs0, ic2 = 2 * _cs0, ic3 = 3 * _cs0, ic4 = 4 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -100,29 +95,25 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], - a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], - a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], - a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1], - a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], - a_32 = A[3 * _as0 + 2 * _as1], a_33 = A[3 * _as0 + 3 * _as1], - a_40 = A[4 * _as0 + 0 * _as1], a_41 = A[4 * _as0 + 1 * _as1], + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_03 = A[0 * _as0 + 3 * _as1], a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], + a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], a_20 = A[2 * _as0 + 0 * _as1], + a_21 = A[2 * _as0 + 1 * _as1], a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1], + a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], + a_33 = A[3 * _as0 + 3 * _as1], a_40 = A[4 * _as0 + 0 * _as1], a_41 = A[4 * _as0 + 1 * _as1], a_42 = A[4 * _as0 + 2 * _as1], a_43 = A[4 * _as0 + 3 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, b_2p, c_2p, b_3p, c_3p, /**/ c_4p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, - ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0, ic3 = 3 * _cs0, - ic4 = 4 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, + ic2 = 2 * _cs0, ic3 = 3 * _cs0, ic4 = 4 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -163,27 +154,24 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_10 = A[1 * _as0 + 0 * _as1], - a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], - a_22 = A[2 * _as0 + 2 * _as1], a_30 = A[3 * _as0 + 0 * _as1], - a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], - a_40 = A[4 * _as0 + 0 * _as1], a_41 = A[4 * _as0 + 1 * _as1], - a_42 = A[4 * _as0 + 2 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], + a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], a_22 = A[2 * _as0 + 2 * _as1], + a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], + a_40 = A[4 * _as0 + 0 * _as1], a_41 = A[4 * _as0 + 1 * _as1], a_42 = A[4 * _as0 + 2 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, b_2p, c_2p, /**/ c_3p, /**/ c_4p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ic0 = 0 * _cs0, - ic1 = 1 * _cs0, ic2 = 2 * _cs0, ic3 = 3 * _cs0, ic4 = 4 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0, + ic3 = 3 * _cs0, ic4 = 4 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -217,25 +205,24 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 3>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], - a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], - a_40 = A[4 * _as0 + 0 * _as1], a_41 = A[4 * _as0 + 1 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_10 = A[1 * _as0 + 0 * _as1], + a_11 = A[1 * _as0 + 1 * _as1], a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], + a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], a_40 = A[4 * _as0 + 0 * _as1], + a_41 = A[4 * _as0 + 1 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, /**/ c_2p, /**/ c_3p, /**/ c_4p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, - ic2 = 2 * _cs0, ic3 = 3 * _cs0, ic4 = 4 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0, ic3 = 3 * _cs0, + ic4 = 4 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -263,15 +250,14 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 2>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_10 = A[1 * _as0 + 0 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_30 = A[3 * _as0 + 0 * _as1], - a_40 = A[4 * _as0 + 0 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1], + a_30 = A[3 * _as0 + 0 * _as1], a_40 = A[4 * _as0 + 0 * _as1]; ValueType b_0p, c_0p, /**/ c_1p, @@ -279,8 +265,7 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 1>::serial_invoke( /**/ c_3p, /**/ c_4p; - const int ib0 = 0 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0, - ic3 = 3 * _cs0, ic4 = 4 * _cs0; + const int ib0 = 0 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0, ic3 = 3 * _cs0, ic4 = 4 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -303,28 +288,24 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], - a_04 = A[0 * _as0 + 4 * _as1], a_10 = A[1 * _as0 + 0 * _as1], - a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], - a_13 = A[1 * _as0 + 3 * _as1], a_14 = A[1 * _as0 + 4 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], - a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1], - a_24 = A[2 * _as0 + 4 * _as1], a_30 = A[3 * _as0 + 0 * _as1], - a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_03 = A[0 * _as0 + 3 * _as1], a_04 = A[0 * _as0 + 4 * _as1], a_10 = A[1 * _as0 + 0 * _as1], + a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], + a_14 = A[1 * _as0 + 4 * _as1], a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], + a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1], a_24 = A[2 * _as0 + 4 * _as1], + a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], a_33 = A[3 * _as0 + 3 * _as1], a_34 = A[3 * _as0 + 4 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, b_2p, c_2p, b_3p, c_3p, b_4p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, - ib4 = 4 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0, - ic3 = 3 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, ib4 = 4 * _bs0, ic0 = 0 * _cs0, + ic1 = 1 * _cs0, ic2 = 2 * _cs0, ic3 = 3 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -365,25 +346,22 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], - a_04 = A[0 * _as0 + 4 * _as1], a_10 = A[1 * _as0 + 0 * _as1], - a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], - a_13 = A[1 * _as0 + 3 * _as1], a_14 = A[1 * _as0 + 4 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], - a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1], - a_24 = A[2 * _as0 + 4 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_03 = A[0 * _as0 + 3 * _as1], a_04 = A[0 * _as0 + 4 * _as1], a_10 = A[1 * _as0 + 0 * _as1], + a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], + a_14 = A[1 * _as0 + 4 * _as1], a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], + a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1], a_24 = A[2 * _as0 + 4 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, b_2p, c_2p, b_3p, b_4p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, - ib4 = 4 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, ib4 = 4 * _bs0, ic0 = 0 * _cs0, + ic1 = 1 * _cs0, ic2 = 2 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -417,22 +395,21 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 5>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], - a_04 = A[0 * _as0 + 4 * _as1], a_10 = A[1 * _as0 + 0 * _as1], - a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], - a_13 = A[1 * _as0 + 3 * _as1], a_14 = A[1 * _as0 + 4 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_03 = A[0 * _as0 + 3 * _as1], a_04 = A[0 * _as0 + 4 * _as1], a_10 = A[1 * _as0 + 0 * _as1], + a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], + a_14 = A[1 * _as0 + 4 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, b_2p, b_3p, b_4p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, - ib4 = 4 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, ib4 = 4 * _bs0, ic0 = 0 * _cs0, + ic1 = 1 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -460,20 +437,18 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 5>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<1, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<1, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], - a_04 = A[0 * _as0 + 4 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_03 = A[0 * _as0 + 3 * _as1], a_04 = A[0 * _as0 + 4 * _as1]; ValueType b_0p, c_0p, b_1p, b_2p, b_3p, b_4p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, - ib4 = 4 * _bs0, ic0 = 0 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, ib4 = 4 * _bs0, ic0 = 0 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -496,10 +471,11 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<1, 5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || n <= 0 || k <= 0) return 0; switch (m * 10 + k) { @@ -548,12 +524,10 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 5>::serial_invoke( InnerGemmFixA<2, 2> inner(_as0, _as1, _bs0, _bs1, _cs0, _cs1); for (int i = 0; i < m; i += 2) for (int p = 0; p < k; p += 2) - inner.serial_invoke(alpha, A + i * _as0 + p * _as1, B + p * _bs0, - (i + 2 > m ? 1 : 2), n, (p + 2 > k ? 1 : 2), - C + i * _cs0); + inner.serial_invoke(alpha, A + i * _as0 + p * _as1, B + p * _bs0, (i + 2 > m ? 1 : 2), n, + (p + 2 > k ? 1 : 2), C + i * _cs0); } else { - Kokkos::abort( - "InnerGemmFixA<5,5>::serial_invoke, assert failure (m<5 && n<5)"); + Kokkos::abort("InnerGemmFixA<5,5>::serial_invoke, assert failure (m<5 && n<5)"); } break; } @@ -568,25 +542,23 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<5, 5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], - a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], - a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], - a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1], - a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], - a_32 = A[3 * _as0 + 2 * _as1], a_33 = A[3 * _as0 + 3 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_03 = A[0 * _as0 + 3 * _as1], a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], + a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], a_20 = A[2 * _as0 + 0 * _as1], + a_21 = A[2 * _as0 + 1 * _as1], a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1], + a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], + a_33 = A[3 * _as0 + 3 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, b_2p, c_2p, b_3p, c_3p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, - ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0, ic3 = 3 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, + ic2 = 2 * _cs0, ic3 = 3 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -622,24 +594,22 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_10 = A[1 * _as0 + 0 * _as1], - a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], - a_22 = A[2 * _as0 + 2 * _as1], a_30 = A[3 * _as0 + 0 * _as1], - a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], + a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], a_22 = A[2 * _as0 + 2 * _as1], + a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, b_2p, c_2p, /**/ c_3p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ic0 = 0 * _cs0, - ic1 = 1 * _cs0, ic2 = 2 * _cs0, ic3 = 3 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0, + ic3 = 3 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -670,23 +640,21 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_10 = A[1 * _as0 + 0 * _as1], + a_11 = A[1 * _as0 + 1 * _as1], a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, /**/ c_2p, /**/ c_3p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, - ic2 = 2 * _cs0, ic3 = 3 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0, ic3 = 3 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -712,22 +680,21 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_10 = A[1 * _as0 + 0 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_30 = A[3 * _as0 + 0 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1], + a_30 = A[3 * _as0 + 0 * _as1]; ValueType b_0p, c_0p, /**/ c_1p, /**/ c_2p, /**/ c_3p; - const int ib0 = 0 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0, - ic3 = 3 * _cs0; + const int ib0 = 0 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0, ic3 = 3 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -748,23 +715,21 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], - a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], - a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], - a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_03 = A[0 * _as0 + 3 * _as1], a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], + a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], a_20 = A[2 * _as0 + 0 * _as1], + a_21 = A[2 * _as0 + 1 * _as1], a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, b_2p, c_2p, b_3p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, - ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, + ic2 = 2 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -795,21 +760,19 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], - a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_03 = A[0 * _as0 + 3 * _as1], a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, b_2p, b_3p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, - ic0 = 0 * _cs0, ic1 = 1 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -835,19 +798,18 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<1, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<1, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_03 = A[0 * _as0 + 3 * _as1]; ValueType b_0p, c_0p, b_1p, b_2p, b_3p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, - ic0 = 0 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ib3 = 3 * _bs0, ic0 = 0 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -868,10 +830,11 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<1, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || n <= 0 || k <= 0) return 0; switch (m * 10 + k) { @@ -915,12 +878,10 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 4>::serial_invoke( InnerGemmFixA<2, 2> inner(_as0, _as1, _bs0, _bs1, _cs0, _cs1); for (int i = 0; i < m; i += 2) for (int p = 0; p < k; p += 2) - inner.serial_invoke(alpha, A + i * _as0 + p * _as1, B + p * _bs0, - (i + 2 > m ? 1 : 2), n, (p + 2 > k ? 1 : 2), - C + i * _cs0); + inner.serial_invoke(alpha, A + i * _as0 + p * _as1, B + p * _bs0, (i + 2 > m ? 1 : 2), n, + (p + 2 > k ? 1 : 2), C + i * _cs0); } else { - Kokkos::abort( - "InnerGemmFixA<4,4>::serial_invoke, assert failure (m<4 && n<4)"); + Kokkos::abort("InnerGemmFixA<4,4>::serial_invoke, assert failure (m<4 && n<4)"); } break; } @@ -935,22 +896,19 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<4, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_10 = A[1 * _as0 + 0 * _as1], - a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], - a_22 = A[2 * _as0 + 2 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], + a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], a_22 = A[2 * _as0 + 2 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, b_2p, c_2p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ic0 = 0 * _cs0, - ic1 = 1 * _cs0, ic2 = 2 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -977,21 +935,19 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_10 = A[1 * _as0 + 0 * _as1], + a_11 = A[1 * _as0 + 1 * _as1], a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, /**/ c_2p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, - ic2 = 2 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0, ic2 = 2 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -1014,14 +970,13 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_10 = A[1 * _as0 + 0 * _as1], - a_20 = A[2 * _as0 + 0 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1]; ValueType b_0p, c_0p, /**/ c_1p, @@ -1046,20 +1001,18 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_10 = A[1 * _as0 + 0 * _as1], - a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p, b_2p; - const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ic0 = 0 * _cs0, - ic1 = 1 * _cs0; + const int ib0 = 0 * _bs0, ib1 = 1 * _bs0, ib2 = 2 * _bs0, ic0 = 0 * _cs0, ic1 = 1 * _cs0; for (int p = 0; p < n; ++p) { b_0p = B[ib0 + p * _bs1]; @@ -1081,14 +1034,13 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 3>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<1, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<1, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1]; ValueType b_0p, c_0p, b_1p, b_2p; @@ -1111,10 +1063,11 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<1, 3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || n <= 0 || k <= 0) return 0; switch (m * 10 + k) { @@ -1148,12 +1101,10 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 3>::serial_invoke( InnerGemmFixA<2, 2> inner(_as0, _as1, _bs0, _bs1, _cs0, _cs1); for (int i = 0; i < m; i += 2) for (int p = 0; p < k; p += 2) - inner.serial_invoke(alpha, A + i * _as0 + p * _as1, B + p * _bs0, - (i + 2 > m ? 1 : 2), n, (p + 2 > k ? 1 : 2), - C + i * _cs0); + inner.serial_invoke(alpha, A + i * _as0 + p * _as1, B + p * _bs0, (i + 2 > m ? 1 : 2), n, + (p + 2 > k ? 1 : 2), C + i * _cs0); } else { - Kokkos::abort( - "InnerGemmFixA<3,3>::serial_invoke, assert failure (m<3 && n<3)"); + Kokkos::abort("InnerGemmFixA<3,3>::serial_invoke, assert failure (m<3 && n<3)"); } break; } @@ -1168,14 +1119,14 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<3, 3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; - const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1]; + const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_10 = A[1 * _as0 + 0 * _as1], + a_11 = A[1 * _as0 + 1 * _as1]; ValueType b_0p, c_0p, b_1p, c_1p; @@ -1199,10 +1150,10 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_10 = A[1 * _as0 + 0 * _as1]; @@ -1227,10 +1178,10 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<1, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<1, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; const ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1]; @@ -1254,10 +1205,11 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<1, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || n <= 0 || k <= 0) return 0; switch (m * 10 + k) { @@ -1282,8 +1234,7 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 2>::serial_invoke( break; } default: { - Kokkos::abort( - "InnerGemmFixA<2,2>::serial_invoke, assert failure (m<2 && n<2)"); + Kokkos::abort("InnerGemmFixA<2,2>::serial_invoke, assert failure (m<2 && n<2)"); break; } } @@ -1297,10 +1248,10 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixA<2, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixA<1, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int n, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixA<1, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, + /**/ ValueType *KOKKOS_RESTRICT C) { if (n <= 0) return 0; const ValueType a_00 = A[0 * _as0 + 0 * _as1]; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixB_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixB_Serial_Impl.hpp index a725bf5b45e4..6912c285a60f 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixB_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixB_Serial_Impl.hpp @@ -29,31 +29,26 @@ namespace KokkosBatched { template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_02 = B[0 * _bs0 + 2 * _bs1], b_03 = B[0 * _bs0 + 3 * _bs1], - b_04 = B[0 * _bs0 + 4 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], - b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], - b_13 = B[1 * _bs0 + 3 * _bs1], b_14 = B[1 * _bs0 + 4 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], - b_22 = B[2 * _bs0 + 2 * _bs1], b_23 = B[2 * _bs0 + 3 * _bs1], - b_24 = B[2 * _bs0 + 4 * _bs1], b_30 = B[3 * _bs0 + 0 * _bs1], - b_31 = B[3 * _bs0 + 1 * _bs1], b_32 = B[3 * _bs0 + 2 * _bs1], - b_33 = B[3 * _bs0 + 3 * _bs1], b_34 = B[3 * _bs0 + 4 * _bs1], - b_40 = B[4 * _bs0 + 0 * _bs1], b_41 = B[4 * _bs0 + 1 * _bs1], - b_42 = B[4 * _bs0 + 2 * _bs1], b_43 = B[4 * _bs0 + 3 * _bs1], + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_02 = B[0 * _bs0 + 2 * _bs1], + b_03 = B[0 * _bs0 + 3 * _bs1], b_04 = B[0 * _bs0 + 4 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], + b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], b_13 = B[1 * _bs0 + 3 * _bs1], + b_14 = B[1 * _bs0 + 4 * _bs1], b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], + b_22 = B[2 * _bs0 + 2 * _bs1], b_23 = B[2 * _bs0 + 3 * _bs1], b_24 = B[2 * _bs0 + 4 * _bs1], + b_30 = B[3 * _bs0 + 0 * _bs1], b_31 = B[3 * _bs0 + 1 * _bs1], b_32 = B[3 * _bs0 + 2 * _bs1], + b_33 = B[3 * _bs0 + 3 * _bs1], b_34 = B[3 * _bs0 + 4 * _bs1], b_40 = B[4 * _bs0 + 0 * _bs1], + b_41 = B[4 * _bs0 + 1 * _bs1], b_42 = B[4 * _bs0 + 2 * _bs1], b_43 = B[4 * _bs0 + 3 * _bs1], b_44 = B[4 * _bs0 + 4 * _bs1]; ValueType a_p0, a_p1, a_p2, a_p3, a_p4, c_p0, c_p1, c_p2, c_p3, c_p4; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, - ja4 = 4 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1, - jc3 = 3 * _cs1, jc4 = 4 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, ja4 = 4 * _as1, jc0 = 0 * _cs1, + jc1 = 1 * _cs1, jc2 = 2 * _cs1, jc3 = 3 * _cs1, jc4 = 4 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -100,28 +95,24 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_02 = B[0 * _bs0 + 2 * _bs1], b_03 = B[0 * _bs0 + 3 * _bs1], - b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], - b_12 = B[1 * _bs0 + 2 * _bs1], b_13 = B[1 * _bs0 + 3 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], - b_22 = B[2 * _bs0 + 2 * _bs1], b_23 = B[2 * _bs0 + 3 * _bs1], - b_30 = B[3 * _bs0 + 0 * _bs1], b_31 = B[3 * _bs0 + 1 * _bs1], - b_32 = B[3 * _bs0 + 2 * _bs1], b_33 = B[3 * _bs0 + 3 * _bs1], - b_40 = B[4 * _bs0 + 0 * _bs1], b_41 = B[4 * _bs0 + 1 * _bs1], + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_02 = B[0 * _bs0 + 2 * _bs1], + b_03 = B[0 * _bs0 + 3 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], + b_12 = B[1 * _bs0 + 2 * _bs1], b_13 = B[1 * _bs0 + 3 * _bs1], b_20 = B[2 * _bs0 + 0 * _bs1], + b_21 = B[2 * _bs0 + 1 * _bs1], b_22 = B[2 * _bs0 + 2 * _bs1], b_23 = B[2 * _bs0 + 3 * _bs1], + b_30 = B[3 * _bs0 + 0 * _bs1], b_31 = B[3 * _bs0 + 1 * _bs1], b_32 = B[3 * _bs0 + 2 * _bs1], + b_33 = B[3 * _bs0 + 3 * _bs1], b_40 = B[4 * _bs0 + 0 * _bs1], b_41 = B[4 * _bs0 + 1 * _bs1], b_42 = B[4 * _bs0 + 2 * _bs1], b_43 = B[4 * _bs0 + 3 * _bs1]; ValueType a_p0, a_p1, a_p2, a_p3, a_p4, c_p0, c_p1, c_p2, c_p3; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, - ja4 = 4 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1, - jc3 = 3 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, ja4 = 4 * _as1, jc0 = 0 * _cs1, + jc1 = 1 * _cs1, jc2 = 2 * _cs1, jc3 = 3 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -162,25 +153,22 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_02 = B[0 * _bs0 + 2 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], - b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], - b_22 = B[2 * _bs0 + 2 * _bs1], b_30 = B[3 * _bs0 + 0 * _bs1], - b_31 = B[3 * _bs0 + 1 * _bs1], b_32 = B[3 * _bs0 + 2 * _bs1], - b_40 = B[4 * _bs0 + 0 * _bs1], b_41 = B[4 * _bs0 + 1 * _bs1], - b_42 = B[4 * _bs0 + 2 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_02 = B[0 * _bs0 + 2 * _bs1], + b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], + b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], b_22 = B[2 * _bs0 + 2 * _bs1], + b_30 = B[3 * _bs0 + 0 * _bs1], b_31 = B[3 * _bs0 + 1 * _bs1], b_32 = B[3 * _bs0 + 2 * _bs1], + b_40 = B[4 * _bs0 + 0 * _bs1], b_41 = B[4 * _bs0 + 1 * _bs1], b_42 = B[4 * _bs0 + 2 * _bs1]; ValueType a_p0, a_p1, a_p2, a_p3, a_p4, c_p0, c_p1, c_p2; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, - ja4 = 4 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, ja4 = 4 * _as1, jc0 = 0 * _cs1, + jc1 = 1 * _cs1, jc2 = 2 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -214,22 +202,21 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 3>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], - b_30 = B[3 * _bs0 + 0 * _bs1], b_31 = B[3 * _bs0 + 1 * _bs1], - b_40 = B[4 * _bs0 + 0 * _bs1], b_41 = B[4 * _bs0 + 1 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], + b_11 = B[1 * _bs0 + 1 * _bs1], b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], + b_30 = B[3 * _bs0 + 0 * _bs1], b_31 = B[3 * _bs0 + 1 * _bs1], b_40 = B[4 * _bs0 + 0 * _bs1], + b_41 = B[4 * _bs0 + 1 * _bs1]; ValueType a_p0, a_p1, a_p2, a_p3, a_p4, c_p0, c_p1; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, - ja4 = 4 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, ja4 = 4 * _as1, jc0 = 0 * _cs1, + jc1 = 1 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -257,20 +244,18 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 2>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1], b_30 = B[3 * _bs0 + 0 * _bs1], - b_40 = B[4 * _bs0 + 0 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], b_20 = B[2 * _bs0 + 0 * _bs1], + b_30 = B[3 * _bs0 + 0 * _bs1], b_40 = B[4 * _bs0 + 0 * _bs1]; ValueType a_p0, a_p1, a_p2, a_p3, a_p4, c_p0; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, - ja4 = 4 * _as1, jc0 = 0 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, ja4 = 4 * _as1, jc0 = 0 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -293,28 +278,24 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_02 = B[0 * _bs0 + 2 * _bs1], b_03 = B[0 * _bs0 + 3 * _bs1], - b_04 = B[0 * _bs0 + 4 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], - b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], - b_13 = B[1 * _bs0 + 3 * _bs1], b_14 = B[1 * _bs0 + 4 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], - b_22 = B[2 * _bs0 + 2 * _bs1], b_23 = B[2 * _bs0 + 3 * _bs1], - b_24 = B[2 * _bs0 + 4 * _bs1], b_30 = B[3 * _bs0 + 0 * _bs1], - b_31 = B[3 * _bs0 + 1 * _bs1], b_32 = B[3 * _bs0 + 2 * _bs1], + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_02 = B[0 * _bs0 + 2 * _bs1], + b_03 = B[0 * _bs0 + 3 * _bs1], b_04 = B[0 * _bs0 + 4 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], + b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], b_13 = B[1 * _bs0 + 3 * _bs1], + b_14 = B[1 * _bs0 + 4 * _bs1], b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], + b_22 = B[2 * _bs0 + 2 * _bs1], b_23 = B[2 * _bs0 + 3 * _bs1], b_24 = B[2 * _bs0 + 4 * _bs1], + b_30 = B[3 * _bs0 + 0 * _bs1], b_31 = B[3 * _bs0 + 1 * _bs1], b_32 = B[3 * _bs0 + 2 * _bs1], b_33 = B[3 * _bs0 + 3 * _bs1], b_34 = B[3 * _bs0 + 4 * _bs1]; ValueType a_p0, a_p1, a_p2, a_p3, c_p0, c_p1, c_p2, c_p3, c_p4; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, - jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1, jc3 = 3 * _cs1, - jc4 = 4 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, + jc2 = 2 * _cs1, jc3 = 3 * _cs1, jc4 = 4 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -355,25 +336,22 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_02 = B[0 * _bs0 + 2 * _bs1], b_03 = B[0 * _bs0 + 3 * _bs1], - b_04 = B[0 * _bs0 + 4 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], - b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], - b_13 = B[1 * _bs0 + 3 * _bs1], b_14 = B[1 * _bs0 + 4 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], - b_22 = B[2 * _bs0 + 2 * _bs1], b_23 = B[2 * _bs0 + 3 * _bs1], - b_24 = B[2 * _bs0 + 4 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_02 = B[0 * _bs0 + 2 * _bs1], + b_03 = B[0 * _bs0 + 3 * _bs1], b_04 = B[0 * _bs0 + 4 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], + b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], b_13 = B[1 * _bs0 + 3 * _bs1], + b_14 = B[1 * _bs0 + 4 * _bs1], b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], + b_22 = B[2 * _bs0 + 2 * _bs1], b_23 = B[2 * _bs0 + 3 * _bs1], b_24 = B[2 * _bs0 + 4 * _bs1]; ValueType a_p0, a_p1, a_p2, c_p0, c_p1, c_p2, c_p3, c_p4; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, jc0 = 0 * _cs1, - jc1 = 1 * _cs1, jc2 = 2 * _cs1, jc3 = 3 * _cs1, jc4 = 4 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1, + jc3 = 3 * _cs1, jc4 = 4 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -407,22 +385,21 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 5>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_02 = B[0 * _bs0 + 2 * _bs1], b_03 = B[0 * _bs0 + 3 * _bs1], - b_04 = B[0 * _bs0 + 4 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], - b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], - b_13 = B[1 * _bs0 + 3 * _bs1], b_14 = B[1 * _bs0 + 4 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_02 = B[0 * _bs0 + 2 * _bs1], + b_03 = B[0 * _bs0 + 3 * _bs1], b_04 = B[0 * _bs0 + 4 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], + b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], b_13 = B[1 * _bs0 + 3 * _bs1], + b_14 = B[1 * _bs0 + 4 * _bs1]; ValueType a_p0, a_p1, c_p0, c_p1, c_p2, c_p3, c_p4; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, - jc2 = 2 * _cs1, jc3 = 3 * _cs1, jc4 = 4 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1, jc3 = 3 * _cs1, + jc4 = 4 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -450,20 +427,18 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 5>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_02 = B[0 * _bs0 + 2 * _bs1], b_03 = B[0 * _bs0 + 3 * _bs1], - b_04 = B[0 * _bs0 + 4 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_02 = B[0 * _bs0 + 2 * _bs1], + b_03 = B[0 * _bs0 + 3 * _bs1], b_04 = B[0 * _bs0 + 4 * _bs1]; ValueType a_p0, c_p0, c_p1, c_p2, c_p3, c_p4; - const int ja0 = 0 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1, - jc3 = 3 * _cs1, jc4 = 4 * _cs1; + const int ja0 = 0 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1, jc3 = 3 * _cs1, jc4 = 4 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -486,10 +461,11 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || n <= 0 || k <= 0) return 0; switch (k * 10 + n) { @@ -544,25 +520,23 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<5, 5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_02 = B[0 * _bs0 + 2 * _bs1], b_03 = B[0 * _bs0 + 3 * _bs1], - b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], - b_12 = B[1 * _bs0 + 2 * _bs1], b_13 = B[1 * _bs0 + 3 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], - b_22 = B[2 * _bs0 + 2 * _bs1], b_23 = B[2 * _bs0 + 3 * _bs1], - b_30 = B[3 * _bs0 + 0 * _bs1], b_31 = B[3 * _bs0 + 1 * _bs1], - b_32 = B[3 * _bs0 + 2 * _bs1], b_33 = B[3 * _bs0 + 3 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_02 = B[0 * _bs0 + 2 * _bs1], + b_03 = B[0 * _bs0 + 3 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], + b_12 = B[1 * _bs0 + 2 * _bs1], b_13 = B[1 * _bs0 + 3 * _bs1], b_20 = B[2 * _bs0 + 0 * _bs1], + b_21 = B[2 * _bs0 + 1 * _bs1], b_22 = B[2 * _bs0 + 2 * _bs1], b_23 = B[2 * _bs0 + 3 * _bs1], + b_30 = B[3 * _bs0 + 0 * _bs1], b_31 = B[3 * _bs0 + 1 * _bs1], b_32 = B[3 * _bs0 + 2 * _bs1], + b_33 = B[3 * _bs0 + 3 * _bs1]; ValueType a_p0, a_p1, a_p2, a_p3, c_p0, c_p1, c_p2, c_p3; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, - jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1, jc3 = 3 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, + jc2 = 2 * _cs1, jc3 = 3 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -598,23 +572,21 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_02 = B[0 * _bs0 + 2 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], - b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], - b_22 = B[2 * _bs0 + 2 * _bs1], b_30 = B[3 * _bs0 + 0 * _bs1], - b_31 = B[3 * _bs0 + 1 * _bs1], b_32 = B[3 * _bs0 + 2 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_02 = B[0 * _bs0 + 2 * _bs1], + b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], + b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], b_22 = B[2 * _bs0 + 2 * _bs1], + b_30 = B[3 * _bs0 + 0 * _bs1], b_31 = B[3 * _bs0 + 1 * _bs1], b_32 = B[3 * _bs0 + 2 * _bs1]; ValueType a_p0, a_p1, a_p2, a_p3, c_p0, c_p1, c_p2; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, - jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, + jc2 = 2 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -645,21 +617,19 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], + b_11 = B[1 * _bs0 + 1 * _bs1], b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], b_30 = B[3 * _bs0 + 0 * _bs1], b_31 = B[3 * _bs0 + 1 * _bs1]; ValueType a_p0, a_p1, a_p2, a_p3, c_p0, c_p1; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, - jc0 = 0 * _cs1, jc1 = 1 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -685,19 +655,18 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1], b_30 = B[3 * _bs0 + 0 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], b_20 = B[2 * _bs0 + 0 * _bs1], + b_30 = B[3 * _bs0 + 0 * _bs1]; ValueType a_p0, a_p1, a_p2, a_p3, c_p0; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, - jc0 = 0 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, ja3 = 3 * _as1, jc0 = 0 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -718,23 +687,21 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_02 = B[0 * _bs0 + 2 * _bs1], b_03 = B[0 * _bs0 + 3 * _bs1], - b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], - b_12 = B[1 * _bs0 + 2 * _bs1], b_13 = B[1 * _bs0 + 3 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], - b_22 = B[2 * _bs0 + 2 * _bs1], b_23 = B[2 * _bs0 + 3 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_02 = B[0 * _bs0 + 2 * _bs1], + b_03 = B[0 * _bs0 + 3 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], + b_12 = B[1 * _bs0 + 2 * _bs1], b_13 = B[1 * _bs0 + 3 * _bs1], b_20 = B[2 * _bs0 + 0 * _bs1], + b_21 = B[2 * _bs0 + 1 * _bs1], b_22 = B[2 * _bs0 + 2 * _bs1], b_23 = B[2 * _bs0 + 3 * _bs1]; ValueType a_p0, a_p1, a_p2, c_p0, c_p1, c_p2, c_p3; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, jc0 = 0 * _cs1, - jc1 = 1 * _cs1, jc2 = 2 * _cs1, jc3 = 3 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1, + jc3 = 3 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -765,21 +732,19 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_02 = B[0 * _bs0 + 2 * _bs1], b_03 = B[0 * _bs0 + 3 * _bs1], - b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_02 = B[0 * _bs0 + 2 * _bs1], + b_03 = B[0 * _bs0 + 3 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], b_13 = B[1 * _bs0 + 3 * _bs1]; ValueType a_p0, a_p1, c_p0, c_p1, c_p2, c_p3; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, - jc2 = 2 * _cs1, jc3 = 3 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1, jc3 = 3 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -805,19 +770,18 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_02 = B[0 * _bs0 + 2 * _bs1], b_03 = B[0 * _bs0 + 3 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_02 = B[0 * _bs0 + 2 * _bs1], + b_03 = B[0 * _bs0 + 3 * _bs1]; ValueType a_p0, c_p0, c_p1, c_p2, c_p3; - const int ja0 = 0 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1, - jc3 = 3 * _cs1; + const int ja0 = 0 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1, jc3 = 3 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -838,10 +802,11 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || n <= 0 || k <= 0) return 0; switch (k * 10 + n) { @@ -886,22 +851,19 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<4, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_02 = B[0 * _bs0 + 2 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], - b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], - b_22 = B[2 * _bs0 + 2 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_02 = B[0 * _bs0 + 2 * _bs1], + b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1], + b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1], b_22 = B[2 * _bs0 + 2 * _bs1]; ValueType a_p0, a_p1, a_p2, c_p0, c_p1, c_p2; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, jc0 = 0 * _cs1, - jc1 = 1 * _cs1, jc2 = 2 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1, jc2 = 2 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -928,20 +890,18 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], + b_11 = B[1 * _bs0 + 1 * _bs1], b_20 = B[2 * _bs0 + 0 * _bs1], b_21 = B[2 * _bs0 + 1 * _bs1]; ValueType a_p0, a_p1, a_p2, c_p0, c_p1; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, jc0 = 0 * _cs1, - jc1 = 1 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -964,14 +924,13 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], - b_20 = B[2 * _bs0 + 0 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], b_20 = B[2 * _bs0 + 0 * _bs1]; ValueType a_p0, a_p1, a_p2, c_p0; @@ -994,20 +953,18 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_02 = B[0 * _bs0 + 2 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], - b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_02 = B[0 * _bs0 + 2 * _bs1], + b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1], b_12 = B[1 * _bs0 + 2 * _bs1]; ValueType a_p0, a_p1, a_p2, c_p0, c_p1; - const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, jc0 = 0 * _cs1, - jc1 = 1 * _cs1; + const int ja0 = 0 * _as1, ja1 = 1 * _as1, ja2 = 2 * _as1, jc0 = 0 * _cs1, jc1 = 1 * _cs1; for (int p = 0; p < m; ++p) { a_p0 = A[p * _bs0 + ja0]; @@ -1029,14 +986,14 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 3>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], + b_11 = B[1 * _bs0 + 1 * _bs1]; ValueType a_p0, a_p1, a_p2, c_p0; @@ -1059,10 +1016,11 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || n <= 0 || k <= 0) return 0; switch (k * 10 + n) { @@ -1097,14 +1055,14 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<3, 3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; - const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], - b_10 = B[1 * _bs0 + 0 * _bs1], b_11 = B[1 * _bs0 + 1 * _bs1]; + const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1], + b_11 = B[1 * _bs0 + 1 * _bs1]; ValueType a_p0, a_p1, c_p0, c_p1; @@ -1128,10 +1086,10 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_10 = B[1 * _bs0 + 0 * _bs1]; @@ -1155,10 +1113,10 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; const ValueType b_00 = B[0 * _bs0 + 0 * _bs1], b_01 = B[0 * _bs0 + 1 * _bs1]; @@ -1182,10 +1140,11 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || n <= 0 || k <= 0) return 0; switch (k * 10 + n) { @@ -1210,10 +1169,10 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<2, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0) return 0; const ValueType b_00 = B[0 * _bs0 + 0 * _bs1]; @@ -1239,10 +1198,11 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<1, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixB<0, 0>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixB<0, 0>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || n <= 0 || k <= 0) return 0; if (k == n) { @@ -1276,10 +1236,10 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixB<0, 0>::serial_invoke( } else { for (int i = 0; i < m; ++i) { const ValueType *KOKKOS_RESTRICT iA = A + i * _as0; - /**/ ValueType *KOKKOS_RESTRICT iC = C + i * _cs0; + /**/ ValueType *KOKKOS_RESTRICT iC = C + i * _cs0; for (int j = 0; j < n; ++j) { const ValueType *KOKKOS_RESTRICT jB = B + j * _bs1; - /**/ ValueType tC = 0; + /**/ ValueType tC = 0; for (int p = 0; p < k; ++p) tC += iA[p * _as1] * jB[p * _bs0]; pC[i * _cs0] += alpha * tC; } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixC_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixC_Serial_Impl.hpp index 8bdf4fee4fbf..9ad08549cbf7 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixC_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixC_Serial_Impl.hpp @@ -29,22 +29,19 @@ namespace KokkosBatched { template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, - c_00 = 0, c_01 = 0, c_02 = 0, c_03 = 0, c_04 = 0, a_1p, b_p1, c_10 = 0, - c_11 = 0, c_12 = 0, c_13 = 0, c_14 = 0, a_2p, b_p2, c_20 = 0, c_21 = 0, - c_22 = 0, c_23 = 0, c_24 = 0, a_3p, b_p3, c_30 = 0, c_31 = 0, c_32 = 0, - c_33 = 0, c_34 = 0, a_4p, b_p4, c_40 = 0, c_41 = 0, c_42 = 0, c_43 = 0, - c_44 = 0; + ValueType a_0p, b_p0, c_00 = 0, c_01 = 0, c_02 = 0, c_03 = 0, c_04 = 0, a_1p, b_p1, c_10 = 0, c_11 = 0, c_12 = 0, + c_13 = 0, c_14 = 0, a_2p, b_p2, c_20 = 0, c_21 = 0, c_22 = 0, c_23 = 0, c_24 = 0, a_3p, b_p3, + c_30 = 0, c_31 = 0, c_32 = 0, c_33 = 0, c_34 = 0, a_4p, b_p4, c_40 = 0, c_41 = 0, c_42 = 0, + c_43 = 0, c_44 = 0; - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, - i4 = 4 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1, - j3 = 3 * _bs1, j4 = 4 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, i4 = 4 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, + j2 = 2 * _bs1, j3 = 3 * _bs1, j4 = 4 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -119,21 +116,18 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = 0, c_01 = 0, c_02 = 0, c_03 = 0, a_1p, b_p1, - c_10 = 0, c_11 = 0, c_12 = 0, c_13 = 0, a_2p, b_p2, - c_20 = 0, c_21 = 0, c_22 = 0, c_23 = 0, a_3p, b_p3, - c_30 = 0, c_31 = 0, c_32 = 0, c_33 = 0, a_4p, c_40 = 0, - c_41 = 0, c_42 = 0, c_43 = 0; + ValueType a_0p, b_p0, c_00 = 0, c_01 = 0, c_02 = 0, c_03 = 0, a_1p, b_p1, c_10 = 0, c_11 = 0, c_12 = 0, c_13 = 0, + a_2p, b_p2, c_20 = 0, c_21 = 0, c_22 = 0, c_23 = 0, a_3p, b_p3, c_30 = 0, c_31 = 0, c_32 = 0, + c_33 = 0, a_4p, c_40 = 0, c_41 = 0, c_42 = 0, c_43 = 0; - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, - i4 = 4 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1, - j3 = 3 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, i4 = 4 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, + j2 = 2 * _bs1, j3 = 3 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -197,19 +191,17 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = 0, c_01 = 0, c_02 = 0, a_1p, b_p1, c_10 = 0, - c_11 = 0, c_12 = 0, a_2p, b_p2, c_20 = 0, c_21 = 0, - c_22 = 0, a_3p, c_30 = 0, c_31 = 0, c_32 = 0, a_4p, - c_40 = 0, c_41 = 0, c_42 = 0; + ValueType a_0p, b_p0, c_00 = 0, c_01 = 0, c_02 = 0, a_1p, b_p1, c_10 = 0, c_11 = 0, c_12 = 0, a_2p, b_p2, c_20 = 0, + c_21 = 0, c_22 = 0, a_3p, c_30 = 0, c_31 = 0, c_32 = 0, a_4p, c_40 = 0, c_41 = 0, c_42 = 0; - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, - i4 = 4 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, i4 = 4 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, + j2 = 2 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -262,18 +254,16 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = 0, c_01 = 0, a_1p, b_p1, c_10 = 0, c_11 = 0, - a_2p, c_20 = 0, c_21 = 0, a_3p, c_30 = 0, c_31 = 0, - a_4p, c_40 = 0, c_41 = 0; + ValueType a_0p, b_p0, c_00 = 0, c_01 = 0, a_1p, b_p1, c_10 = 0, c_11 = 0, a_2p, c_20 = 0, c_21 = 0, a_3p, c_30 = 0, + c_31 = 0, a_4p, c_40 = 0, c_41 = 0; - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, - i4 = 4 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, i4 = 4 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -315,17 +305,15 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = 0, a_1p, c_10 = 0, a_2p, c_20 = 0, a_3p, - c_30 = 0, a_4p, c_40 = 0; + ValueType a_0p, b_p0, c_00 = 0, a_1p, c_10 = 0, a_2p, c_20 = 0, a_3p, c_30 = 0, a_4p, c_40 = 0; - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, - i4 = 4 * _as0, j0 = 0 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, i4 = 4 * _as0, j0 = 0 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -356,35 +344,32 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = 0, c_01 = 0, c_02 = 0, c_03 = 0, c_04 = 0, a_1p, - b_p1, c_10 = 0, c_11 = 0, c_12 = 0, c_13 = 0, c_14 = 0, - a_2p, b_p2, c_20 = 0, c_21 = 0, c_22 = 0, c_23 = 0, - c_24 = 0, a_3p, b_p3, c_30 = 0, c_31 = 0, c_32 = 0, - c_33 = 0, c_34 = 0, + ValueType a_0p, b_p0, c_00 = 0, c_01 = 0, c_02 = 0, c_03 = 0, c_04 = 0, a_1p, b_p1, c_10 = 0, c_11 = 0, c_12 = 0, + c_13 = 0, c_14 = 0, a_2p, b_p2, c_20 = 0, c_21 = 0, c_22 = 0, c_23 = 0, c_24 = 0, a_3p, b_p3, + c_30 = 0, c_31 = 0, c_32 = 0, c_33 = 0, c_34 = 0, /**/ b_p4; - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, - j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1, j3 = 3 * _bs1, - j4 = 4 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1, + j3 = 3 * _bs1, j4 = 4 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif for (int p = 0; p < k; ++p) { - a_0p = A[i0 + p * _as1]; - b_p0 = B[p * _bs0 + j0]; - a_1p = A[i1 + p * _as1]; - b_p1 = B[p * _bs0 + j1]; - a_2p = A[i2 + p * _as1]; - b_p2 = B[p * _bs0 + j2]; - a_3p = A[i3 + p * _as1]; - b_p3 = B[p * _bs0 + j3]; + a_0p = A[i0 + p * _as1]; + b_p0 = B[p * _bs0 + j0]; + a_1p = A[i1 + p * _as1]; + b_p1 = B[p * _bs0 + j1]; + a_2p = A[i2 + p * _as1]; + b_p2 = B[p * _bs0 + j2]; + a_3p = A[i3 + p * _as1]; + b_p3 = B[p * _bs0 + j3]; /**/ b_p4 = B[p * _bs0 + j4]; c_00 += a_0p * b_p0; @@ -435,32 +420,30 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = 0, c_01 = 0, c_02 = 0, c_03 = 0, c_04 = 0, a_1p, - b_p1, c_10 = 0, c_11 = 0, c_12 = 0, c_13 = 0, c_14 = 0, - a_2p, b_p2, c_20 = 0, c_21 = 0, c_22 = 0, c_23 = 0, - c_24 = 0, + ValueType a_0p, b_p0, c_00 = 0, c_01 = 0, c_02 = 0, c_03 = 0, c_04 = 0, a_1p, b_p1, c_10 = 0, c_11 = 0, c_12 = 0, + c_13 = 0, c_14 = 0, a_2p, b_p2, c_20 = 0, c_21 = 0, c_22 = 0, c_23 = 0, c_24 = 0, /**/ b_p3, /**/ b_p4; - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, j0 = 0 * _bs1, - j1 = 1 * _bs1, j2 = 2 * _bs1, j3 = 3 * _bs1, j4 = 4 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1, j3 = 3 * _bs1, + j4 = 4 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif for (int p = 0; p < k; ++p) { - a_0p = A[i0 + p * _as1]; - b_p0 = B[p * _bs0 + j0]; - a_1p = A[i1 + p * _as1]; - b_p1 = B[p * _bs0 + j1]; - a_2p = A[i2 + p * _as1]; - b_p2 = B[p * _bs0 + j2]; + a_0p = A[i0 + p * _as1]; + b_p0 = B[p * _bs0 + j0]; + a_1p = A[i1 + p * _as1]; + b_p1 = B[p * _bs0 + j1]; + a_2p = A[i2 + p * _as1]; + b_p2 = B[p * _bs0 + j2]; /**/ b_p3 = B[p * _bs0 + j3]; /**/ b_p4 = B[p * _bs0 + j4]; @@ -502,29 +485,28 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = 0, c_01 = 0, c_02 = 0, c_03 = 0, c_04 = 0, a_1p, - b_p1, c_10 = 0, c_11 = 0, c_12 = 0, c_13 = 0, c_14 = 0, + ValueType a_0p, b_p0, c_00 = 0, c_01 = 0, c_02 = 0, c_03 = 0, c_04 = 0, a_1p, b_p1, c_10 = 0, c_11 = 0, c_12 = 0, + c_13 = 0, c_14 = 0, /**/ b_p2, /**/ b_p3, /**/ b_p4; - const int i0 = 0 * _as0, i1 = 1 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, - j2 = 2 * _bs1, j3 = 3 * _bs1, j4 = 4 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1, j3 = 3 * _bs1, j4 = 4 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif for (int p = 0; p < k; ++p) { - a_0p = A[i0 + p * _as1]; - b_p0 = B[p * _bs0 + j0]; - a_1p = A[i1 + p * _as1]; - b_p1 = B[p * _bs0 + j1]; + a_0p = A[i0 + p * _as1]; + b_p0 = B[p * _bs0 + j0]; + a_1p = A[i1 + p * _as1]; + b_p1 = B[p * _bs0 + j1]; /**/ b_p2 = B[p * _bs0 + j2]; /**/ b_p3 = B[p * _bs0 + j3]; /**/ b_p4 = B[p * _bs0 + j4]; @@ -557,10 +539,10 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; ValueType a_0p, b_p0, c_00 = 0, c_01 = 0, c_02 = 0, c_03 = 0, c_04 = 0, @@ -569,15 +551,14 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 5>::serial_invoke( /**/ b_p3, /**/ b_p4; - const int i0 = 0 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1, - j3 = 3 * _bs1, j4 = 4 * _bs1; + const int i0 = 0 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1, j3 = 3 * _bs1, j4 = 4 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif for (int p = 0; p < k; ++p) { - a_0p = A[i0 + p * _as1]; - b_p0 = B[p * _bs0 + j0]; + a_0p = A[i0 + p * _as1]; + b_p0 = B[p * _bs0 + j0]; /**/ b_p1 = B[p * _bs0 + j1]; /**/ b_p2 = B[p * _bs0 + j2]; /**/ b_p3 = B[p * _bs0 + j3]; @@ -604,22 +585,19 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, - c_00 = ValueType(0), c_01 = ValueType(0), c_02 = ValueType(0), - c_03 = ValueType(0), a_1p, b_p1, c_10 = ValueType(0), c_11 = ValueType(0), - c_12 = ValueType(0), c_13 = ValueType(0), a_2p, b_p2, c_20 = ValueType(0), - c_21 = ValueType(0), c_22 = ValueType(0), c_23 = ValueType(0), a_3p, b_p3, - c_30 = ValueType(0), c_31 = ValueType(0), c_32 = ValueType(0), - c_33 = ValueType(0); + ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), c_02 = ValueType(0), c_03 = ValueType(0), a_1p, b_p1, + c_10 = ValueType(0), c_11 = ValueType(0), c_12 = ValueType(0), c_13 = ValueType(0), a_2p, b_p2, + c_20 = ValueType(0), c_21 = ValueType(0), c_22 = ValueType(0), c_23 = ValueType(0), a_3p, b_p3, + c_30 = ValueType(0), c_31 = ValueType(0), c_32 = ValueType(0), c_33 = ValueType(0); - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, - j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1, j3 = 3 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1, + j3 = 3 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -674,20 +652,17 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, - c_00 = ValueType(0), c_01 = ValueType(0), c_02 = ValueType(0), a_1p, b_p1, - c_10 = ValueType(0), c_11 = ValueType(0), c_12 = ValueType(0), a_2p, b_p2, - c_20 = ValueType(0), c_21 = ValueType(0), c_22 = ValueType(0), a_3p, - c_30 = ValueType(0), c_31 = ValueType(0), c_32 = ValueType(0); + ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), c_02 = ValueType(0), a_1p, b_p1, c_10 = ValueType(0), + c_11 = ValueType(0), c_12 = ValueType(0), a_2p, b_p2, c_20 = ValueType(0), c_21 = ValueType(0), + c_22 = ValueType(0), a_3p, c_30 = ValueType(0), c_31 = ValueType(0), c_32 = ValueType(0); - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, - j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -733,19 +708,16 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), a_1p, b_p1, - c_10 = ValueType(0), c_11 = ValueType(0), a_2p, - c_20 = ValueType(0), c_21 = ValueType(0), a_3p, - c_30 = ValueType(0), c_31 = ValueType(0); + ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), a_1p, b_p1, c_10 = ValueType(0), c_11 = ValueType(0), + a_2p, c_20 = ValueType(0), c_21 = ValueType(0), a_3p, c_30 = ValueType(0), c_31 = ValueType(0); - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, - j0 = 0 * _bs1, j1 = 1 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -782,17 +754,16 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = ValueType(0), a_1p, c_10 = ValueType(0), a_2p, - c_20 = ValueType(0), a_3p, c_30 = ValueType(0); + ValueType a_0p, b_p0, c_00 = ValueType(0), a_1p, c_10 = ValueType(0), a_2p, c_20 = ValueType(0), a_3p, + c_30 = ValueType(0); - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, - j0 = 0 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, j0 = 0 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -820,32 +791,29 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, - c_00 = ValueType(0), c_01 = ValueType(0), c_02 = ValueType(0), - c_03 = ValueType(0), a_1p, b_p1, c_10 = ValueType(0), c_11 = ValueType(0), - c_12 = ValueType(0), c_13 = ValueType(0), a_2p, b_p2, c_20 = ValueType(0), - c_21 = ValueType(0), c_22 = ValueType(0), c_23 = ValueType(0), - /**/ b_p3; + ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), c_02 = ValueType(0), c_03 = ValueType(0), a_1p, b_p1, + c_10 = ValueType(0), c_11 = ValueType(0), c_12 = ValueType(0), c_13 = ValueType(0), a_2p, b_p2, + c_20 = ValueType(0), c_21 = ValueType(0), c_22 = ValueType(0), c_23 = ValueType(0), + /**/ b_p3; - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, j0 = 0 * _bs1, - j1 = 1 * _bs1, j2 = 2 * _bs1, j3 = 3 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1, j3 = 3 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif for (int p = 0; p < k; ++p) { - a_0p = A[i0 + p * _as1]; - b_p0 = B[p * _bs0 + j0]; - a_1p = A[i1 + p * _as1]; - b_p1 = B[p * _bs0 + j1]; - a_2p = A[i2 + p * _as1]; - b_p2 = B[p * _bs0 + j2]; + a_0p = A[i0 + p * _as1]; + b_p0 = B[p * _bs0 + j0]; + a_1p = A[i1 + p * _as1]; + b_p1 = B[p * _bs0 + j1]; + a_2p = A[i2 + p * _as1]; + b_p2 = B[p * _bs0 + j2]; /**/ b_p3 = B[p * _bs0 + j3]; c_00 += a_0p * b_p0; @@ -880,30 +848,27 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), - c_02 = ValueType(0), c_03 = ValueType(0), a_1p, b_p1, - c_10 = ValueType(0), c_11 = ValueType(0), - c_12 = ValueType(0), c_13 = ValueType(0), + ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), c_02 = ValueType(0), c_03 = ValueType(0), a_1p, b_p1, + c_10 = ValueType(0), c_11 = ValueType(0), c_12 = ValueType(0), c_13 = ValueType(0), /**/ b_p2, /**/ b_p3; - const int i0 = 0 * _as0, i1 = 1 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, - j2 = 2 * _bs1, j3 = 3 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1, j3 = 3 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif for (int p = 0; p < k; ++p) { - a_0p = A[i0 + p * _as1]; - b_p0 = B[p * _bs0 + j0]; - a_1p = A[i1 + p * _as1]; - b_p1 = B[p * _bs0 + j1]; + a_0p = A[i0 + p * _as1]; + b_p0 = B[p * _bs0 + j0]; + a_1p = A[i1 + p * _as1]; + b_p1 = B[p * _bs0 + j1]; /**/ b_p2 = B[p * _bs0 + j2]; /**/ b_p3 = B[p * _bs0 + j3]; @@ -931,27 +896,25 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), - c_02 = ValueType(0), c_03 = ValueType(0), + ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), c_02 = ValueType(0), c_03 = ValueType(0), /**/ b_p1, /**/ b_p2, /**/ b_p3; - const int i0 = 0 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1, - j3 = 3 * _bs1; + const int i0 = 0 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1, j3 = 3 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif for (int p = 0; p < k; ++p) { - a_0p = A[i0 + p * _as1]; - b_p0 = B[p * _bs0 + j0]; + a_0p = A[i0 + p * _as1]; + b_p0 = B[p * _bs0 + j0]; /**/ b_p1 = B[p * _bs0 + j1]; /**/ b_p2 = B[p * _bs0 + j2]; /**/ b_p3 = B[p * _bs0 + j3]; @@ -976,19 +939,17 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, - c_00 = ValueType(0), c_01 = ValueType(0), c_02 = ValueType(0), a_1p, b_p1, - c_10 = ValueType(0), c_11 = ValueType(0), c_12 = ValueType(0), a_2p, b_p2, - c_20 = ValueType(0), c_21 = ValueType(0), c_22 = ValueType(0); + ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), c_02 = ValueType(0), a_1p, b_p1, c_10 = ValueType(0), + c_11 = ValueType(0), c_12 = ValueType(0), a_2p, b_p2, c_20 = ValueType(0), c_21 = ValueType(0), + c_22 = ValueType(0); - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, j0 = 0 * _bs1, - j1 = 1 * _bs1, j2 = 2 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -1027,18 +988,16 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), a_1p, b_p1, - c_10 = ValueType(0), c_11 = ValueType(0), a_2p, - c_20 = ValueType(0), c_21 = ValueType(0); + ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), a_1p, b_p1, c_10 = ValueType(0), c_11 = ValueType(0), + a_2p, c_20 = ValueType(0), c_21 = ValueType(0); - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, j0 = 0 * _bs1, - j1 = 1 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -1070,14 +1029,13 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = ValueType(0), a_1p, c_10 = ValueType(0), a_2p, - c_20 = ValueType(0); + ValueType a_0p, b_p0, c_00 = ValueType(0), a_1p, c_10 = ValueType(0), a_2p, c_20 = ValueType(0); const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, j0 = 0 * _bs1; @@ -1104,28 +1062,26 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), - c_02 = ValueType(0), a_1p, b_p1, c_10 = ValueType(0), + ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), c_02 = ValueType(0), a_1p, b_p1, c_10 = ValueType(0), c_11 = ValueType(0), c_12 = ValueType(0), /**/ b_p2; - const int i0 = 0 * _as0, i1 = 1 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, - j2 = 2 * _bs1; + const int i0 = 0 * _as0, i1 = 1 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1, j2 = 2 * _bs1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif for (int p = 0; p < k; ++p) { - a_0p = A[i0 + p * _as1]; - b_p0 = B[p * _bs0 + j0]; - a_1p = A[i1 + p * _as1]; - b_p1 = B[p * _bs0 + j1]; + a_0p = A[i0 + p * _as1]; + b_p0 = B[p * _bs0 + j0]; + a_1p = A[i1 + p * _as1]; + b_p1 = B[p * _bs0 + j1]; /**/ b_p2 = B[p * _bs0 + j2]; c_00 += a_0p * b_p0; @@ -1147,14 +1103,13 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 3>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), - c_02 = ValueType(0), + ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), c_02 = ValueType(0), /**/ b_p1, /**/ b_p2; @@ -1164,8 +1119,8 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 3>::serial_invoke( #pragma unroll #endif for (int p = 0; p < k; ++p) { - a_0p = A[i0 + p * _as1]; - b_p0 = B[p * _bs0 + j0]; + a_0p = A[i0 + p * _as1]; + b_p0 = B[p * _bs0 + j0]; /**/ b_p1 = B[p * _bs0 + j1]; /**/ b_p2 = B[p * _bs0 + j2]; @@ -1187,14 +1142,13 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; - ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), a_1p, b_p1, - c_10 = ValueType(0), c_11 = ValueType(0); + ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), a_1p, b_p1, c_10 = ValueType(0), c_11 = ValueType(0); const int i0 = 0 * _as0, i1 = 1 * _as0, j0 = 0 * _bs1, j1 = 1 * _bs1; @@ -1223,10 +1177,10 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; ValueType a_0p, b_p0, c_00 = ValueType(0), a_1p, c_10 = ValueType(0); @@ -1253,10 +1207,10 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; ValueType a_0p, b_p0, c_00 = ValueType(0), c_01 = ValueType(0), @@ -1286,10 +1240,10 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (k <= 0) return 0; ValueType a_0p, b_p0, c_00 = ValueType(0); @@ -1311,10 +1265,11 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<0, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<0, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || k <= 0) return 0; switch (m) { @@ -1353,14 +1308,13 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<0, 1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 5>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 5>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || n <= 0 || k <= 0) return 0; - if (!(m <= 5 && n <= 5)) - Kokkos::abort( - "InnerGemmFixC<5,5>::serial_invoke, assert failure (m<=5 && n<=5)"); + if (!(m <= 5 && n <= 5)) Kokkos::abort("InnerGemmFixC<5,5>::serial_invoke, assert failure (m<=5 && n<=5)"); switch (m * 10 + n) { case 55: { @@ -1419,14 +1373,13 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<5, 5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 4>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 4>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || n <= 0 || k <= 0) return 0; - if (!(m <= 4 && n <= 4)) - Kokkos::abort( - "InnerGemmFixC<4,4>::serial_invoke, assert failure (m<=4 && n<=4)"); + if (!(m <= 4 && n <= 4)) Kokkos::abort("InnerGemmFixC<4,4>::serial_invoke, assert failure (m<=4 && n<=4)"); switch (m * 10 + n) { case 44: { @@ -1475,14 +1428,13 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<4, 4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 3>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 3>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || n <= 0 || k <= 0) return 0; - if (!(m <= 3 && n <= 3)) - Kokkos::abort( - "InnerGemmFixC<3,3>::serial_invoke, assert failure (m<=3 && n<=3)"); + if (!(m <= 3 && n <= 3)) Kokkos::abort("InnerGemmFixC<3,3>::serial_invoke, assert failure (m<=3 && n<=3)"); switch (m * 10 + n) { case 33: { @@ -1521,14 +1473,13 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<3, 3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 2>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 2>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || n <= 0 || k <= 0) return 0; - if (!(m <= 2 && n <= 2)) - Kokkos::abort( - "InnerGemmFixC<2,2>::serial_invoke, assert failure (m<=2 && n<=2)"); + if (!(m <= 2 && n <= 2)) Kokkos::abort("InnerGemmFixC<2,2>::serial_invoke, assert failure (m<=2 && n<=2)"); switch (m * 10 + n) { case 22: { @@ -1557,14 +1508,13 @@ KOKKOS_INLINE_FUNCTION int InnerGemmFixC<2, 2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 1>::serial_invoke( - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { +KOKKOS_INLINE_FUNCTION int InnerGemmFixC<1, 1>::serial_invoke(const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { if (m <= 0 || n <= 0 || k <= 0) return 0; - if (!(m <= 1 && n <= 1)) - Kokkos::abort( - "InnerGemmFixC<1,1>::serial_invoke, assert failure (m<=1 && n<=1)"); + if (!(m <= 1 && n <= 1)) Kokkos::abort("InnerGemmFixC<1,1>::serial_invoke, assert failure (m<=1 && n<=1)"); return serial_invoke(alpha, A, B, k, C); ; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixC_Team_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixC_Team_Impl.hpp index 116545f6538c..a3d6dece581f 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixC_Team_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerGemmFixC_Team_Impl.hpp @@ -25,43 +25,38 @@ namespace KokkosBatched { template template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC::team_invoke( - const MemberType &member, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const ValueType *KOKKOS_RESTRICT B, - const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, mb * nb), [&](const int &ij) { - const int i = ij / nb, j = ij % nb; +KOKKOS_INLINE_FUNCTION int InnerGemmFixC::team_invoke(const MemberType &member, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, mb * nb), [&](const int &ij) { + const int i = ij / nb, j = ij % nb; - const ValueType *KOKKOS_RESTRICT pA = A + i * _as0, - *KOKKOS_RESTRICT pB = B + j * _bs1; + const ValueType *KOKKOS_RESTRICT pA = A + i * _as0, *KOKKOS_RESTRICT pB = B + j * _bs1; - ValueType c = 0; - for (int p = 0; p < k; ++p) c += pA[p * _as1] * pB[p * _bs0]; - C[i * _cs0 + j * _cs1] += alpha * c; - }); + ValueType c = 0; + for (int p = 0; p < k; ++p) c += pA[p * _as1] * pB[p * _bs0]; + C[i * _cs0 + j * _cs1] += alpha * c; + }); return 0; } template template -KOKKOS_INLINE_FUNCTION int InnerGemmFixC::team_invoke( - const MemberType &member, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const ValueType *KOKKOS_RESTRICT B, - const int m, const int n, const int k, - /**/ ValueType *KOKKOS_RESTRICT C) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, m * n), [&](const int &ij) { - const int i = ij / n, j = ij % n; +KOKKOS_INLINE_FUNCTION int InnerGemmFixC::team_invoke(const MemberType &member, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, + const int n, const int k, + /**/ ValueType *KOKKOS_RESTRICT C) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, m * n), [&](const int &ij) { + const int i = ij / n, j = ij % n; - const ValueType *KOKKOS_RESTRICT pA = A + i * _as0, - *KOKKOS_RESTRICT pB = B + j * _bs1; + const ValueType *KOKKOS_RESTRICT pA = A + i * _as0, *KOKKOS_RESTRICT pB = B + j * _bs1; - ValueType c = 0; - for (int p = 0; p < k; ++p) c += pA[p * _as1] * pB[p * _bs0]; - C[i * _cs0 + j * _cs1] += alpha * c; - }); + ValueType c = 0; + for (int p = 0; p < k; ++p) c += pA[p * _as1] * pB[p * _bs0]; + C[i * _cs0 + j * _cs1] += alpha * c; + }); return 0; } } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerLU_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerLU_Serial_Impl.hpp index 3089d068bb40..0d74598b2449 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerLU_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerLU_Serial_Impl.hpp @@ -29,21 +29,16 @@ namespace KokkosBatched { template <> template -KOKKOS_INLINE_FUNCTION int InnerLU<5>::serial_invoke( - ValueType *KOKKOS_RESTRICT A) { +KOKKOS_INLINE_FUNCTION int InnerLU<5>::serial_invoke(ValueType *KOKKOS_RESTRICT A) { // load - ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], - a_04 = A[0 * _as0 + 4 * _as1], a_10 = A[1 * _as0 + 0 * _as1], - a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], - a_13 = A[1 * _as0 + 3 * _as1], a_14 = A[1 * _as0 + 4 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], - a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1], - a_24 = A[2 * _as0 + 4 * _as1], a_30 = A[3 * _as0 + 0 * _as1], - a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], - a_33 = A[3 * _as0 + 3 * _as1], a_34 = A[3 * _as0 + 4 * _as1], - a_40 = A[4 * _as0 + 0 * _as1], a_41 = A[4 * _as0 + 1 * _as1], - a_42 = A[4 * _as0 + 2 * _as1], a_43 = A[4 * _as0 + 3 * _as1], + ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_03 = A[0 * _as0 + 3 * _as1], a_04 = A[0 * _as0 + 4 * _as1], a_10 = A[1 * _as0 + 0 * _as1], + a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], + a_14 = A[1 * _as0 + 4 * _as1], a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], + a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1], a_24 = A[2 * _as0 + 4 * _as1], + a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], + a_33 = A[3 * _as0 + 3 * _as1], a_34 = A[3 * _as0 + 4 * _as1], a_40 = A[4 * _as0 + 0 * _as1], + a_41 = A[4 * _as0 + 1 * _as1], a_42 = A[4 * _as0 + 2 * _as1], a_43 = A[4 * _as0 + 3 * _as1], a_44 = A[4 * _as0 + 4 * _as1]; // 0 iteration @@ -121,17 +116,14 @@ KOKKOS_INLINE_FUNCTION int InnerLU<5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerLU<4>::serial_invoke( - ValueType *KOKKOS_RESTRICT A) { +KOKKOS_INLINE_FUNCTION int InnerLU<4>::serial_invoke(ValueType *KOKKOS_RESTRICT A) { // load - ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], - a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], - a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], - a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1], - a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], - a_32 = A[3 * _as0 + 2 * _as1], a_33 = A[3 * _as0 + 3 * _as1]; + ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_03 = A[0 * _as0 + 3 * _as1], a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], + a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], a_20 = A[2 * _as0 + 0 * _as1], + a_21 = A[2 * _as0 + 1 * _as1], a_22 = A[2 * _as0 + 2 * _as1], a_23 = A[2 * _as0 + 3 * _as1], + a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], + a_33 = A[3 * _as0 + 3 * _as1]; // 0 iteration a_10 /= a_00; @@ -178,14 +170,11 @@ KOKKOS_INLINE_FUNCTION int InnerLU<4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerLU<3>::serial_invoke( - ValueType *KOKKOS_RESTRICT A) { +KOKKOS_INLINE_FUNCTION int InnerLU<3>::serial_invoke(ValueType *KOKKOS_RESTRICT A) { // load - ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_02 = A[0 * _as0 + 2 * _as1], a_10 = A[1 * _as0 + 0 * _as1], - a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], - a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], - a_22 = A[2 * _as0 + 2 * _as1]; + ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], + a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1], a_12 = A[1 * _as0 + 2 * _as1], + a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], a_22 = A[2 * _as0 + 2 * _as1]; // 0 iteration a_10 /= a_00; @@ -212,11 +201,10 @@ KOKKOS_INLINE_FUNCTION int InnerLU<3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerLU<2>::serial_invoke( - ValueType *KOKKOS_RESTRICT A) { +KOKKOS_INLINE_FUNCTION int InnerLU<2>::serial_invoke(ValueType *KOKKOS_RESTRICT A) { // load - ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], - a_10 = A[1 * _as0 + 0 * _as1], a_11 = A[1 * _as0 + 1 * _as1]; + ValueType a_00 = A[0 * _as0 + 0 * _as1], a_01 = A[0 * _as0 + 1 * _as1], a_10 = A[1 * _as0 + 0 * _as1], + a_11 = A[1 * _as0 + 1 * _as1]; // 0 iteration a_10 /= a_00; @@ -231,15 +219,13 @@ KOKKOS_INLINE_FUNCTION int InnerLU<2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerLU<1>::serial_invoke( - ValueType *KOKKOS_RESTRICT /* A */) { +KOKKOS_INLINE_FUNCTION int InnerLU<1>::serial_invoke(ValueType *KOKKOS_RESTRICT /* A */) { return 0; } template <> template -KOKKOS_INLINE_FUNCTION int InnerLU<5>::serial_invoke( - const int m, ValueType *KOKKOS_RESTRICT A) { +KOKKOS_INLINE_FUNCTION int InnerLU<5>::serial_invoke(const int m, ValueType *KOKKOS_RESTRICT A) { if (m > 5) Kokkos::abort("InnerLU<5>::serial_invoke, assert failure (m<=5)"); if (m <= 0) return 0; @@ -275,8 +261,7 @@ KOKKOS_INLINE_FUNCTION int InnerLU<5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerLU<4>::serial_invoke( - const int m, ValueType *KOKKOS_RESTRICT A) { +KOKKOS_INLINE_FUNCTION int InnerLU<4>::serial_invoke(const int m, ValueType *KOKKOS_RESTRICT A) { if (m > 4) Kokkos::abort("InnerLU<4>::serial_invoke, assert failure (m<=4)"); if (m <= 0) return 0; @@ -307,8 +292,7 @@ KOKKOS_INLINE_FUNCTION int InnerLU<4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerLU<3>::serial_invoke( - const int m, ValueType *KOKKOS_RESTRICT A) { +KOKKOS_INLINE_FUNCTION int InnerLU<3>::serial_invoke(const int m, ValueType *KOKKOS_RESTRICT A) { if (m > 3) Kokkos::abort("InnerLU<3>::serial_invoke, assert failure (m<=3)"); if (m <= 0) return 0; @@ -334,8 +318,7 @@ KOKKOS_INLINE_FUNCTION int InnerLU<3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerLU<2>::serial_invoke( - const int m, ValueType *KOKKOS_RESTRICT A) { +KOKKOS_INLINE_FUNCTION int InnerLU<2>::serial_invoke(const int m, ValueType *KOKKOS_RESTRICT A) { if (m > 2) Kokkos::abort("InnerLU<2>::serial_invoke, assert failure (m<=2)"); if (m <= 0) return 0; @@ -356,8 +339,7 @@ KOKKOS_INLINE_FUNCTION int InnerLU<2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerLU<1>::serial_invoke( - const int m, ValueType *KOKKOS_RESTRICT A) { +KOKKOS_INLINE_FUNCTION int InnerLU<1>::serial_invoke(const int m, ValueType *KOKKOS_RESTRICT A) { if (m > 1) Kokkos::abort("InnerLU<1>::serial_invoke, assert failure (m<=1)"); if (m <= 0) return 0; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerTrsm_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerTrsm_Serial_Impl.hpp index 539980a70538..04825ac61c03 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerTrsm_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InnerTrsm_Serial_Impl.hpp @@ -30,19 +30,16 @@ namespace KokkosBatched { template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<5>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<5>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; - const ValueType a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1], - a_21 = A[2 * _as0 + 1 * _as1], a_30 = A[3 * _as0 + 0 * _as1], - a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], - a_40 = A[4 * _as0 + 0 * _as1], a_41 = A[4 * _as0 + 1 * _as1], - a_42 = A[4 * _as0 + 2 * _as1], a_43 = A[4 * _as0 + 3 * _as1]; + const ValueType a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], + a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], + a_40 = A[4 * _as0 + 0 * _as1], a_41 = A[4 * _as0 + 1 * _as1], a_42 = A[4 * _as0 + 2 * _as1], + a_43 = A[4 * _as0 + 3 * _as1]; - auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, - ValueType &b_2p, ValueType &b_3p, ValueType &b_4p) { + auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, ValueType &b_2p, ValueType &b_3p, ValueType &b_4p) { // load b_0p = B[0 * _bs0 + p * _bs1]; b_1p = B[1 * _bs0 + p * _bs1]; @@ -87,17 +84,14 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<4>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<4>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; - const ValueType a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1], - a_21 = A[2 * _as0 + 1 * _as1], a_30 = A[3 * _as0 + 0 * _as1], - a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1]; + const ValueType a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], + a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1]; - auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, - ValueType &b_2p, ValueType &b_3p) { + auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, ValueType &b_2p, ValueType &b_3p) { // load b_0p = B[0 * _bs0 + p * _bs1]; b_1p = B[1 * _bs0 + p * _bs1]; @@ -134,16 +128,13 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<3>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<3>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; - const ValueType a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1], - a_21 = A[2 * _as0 + 1 * _as1]; + const ValueType a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1]; - auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, - ValueType &b_2p) { + auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, ValueType &b_2p) { // load b_0p = B[0 * _bs0 + p * _bs1]; b_1p = B[1 * _bs0 + p * _bs1]; @@ -173,9 +164,8 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<2>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<2>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; const ValueType a_10 = A[1 * _as0 + 0 * _as1]; @@ -205,9 +195,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<1>::serial_invoke( - const ValueType *KOKKOS_RESTRICT /* A */, const int /* n */, - /**/ ValueType *KOKKOS_RESTRICT /* B */) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<1>::serial_invoke(const ValueType *KOKKOS_RESTRICT /* A */, + const int /* n */, + /**/ ValueType *KOKKOS_RESTRICT /* B */) { return 0; } @@ -218,12 +208,10 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<5>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { - if (m > 5) - Kokkos::abort( - "InnerTrsmLeftLowerUnitDiag<5>::serial_invoke, assert failure (m<=5)"); +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<5>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int m, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { + if (m > 5) Kokkos::abort("InnerTrsmLeftLowerUnitDiag<5>::serial_invoke, assert failure (m<=5)"); if (m <= 0 || n <= 0) return 0; switch (m) { case 5: { @@ -256,12 +244,10 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<5>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<4>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { - if (m > 4) - Kokkos::abort( - "InnerTrsmLeftLowerUnitDiag<4>::serial_invoke, assert failure (m<=4)"); +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<4>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int m, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { + if (m > 4) Kokkos::abort("InnerTrsmLeftLowerUnitDiag<4>::serial_invoke, assert failure (m<=4)"); if (m <= 0 || n <= 0) return 0; switch (m) { case 4: { @@ -289,12 +275,10 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<4>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<3>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { - if (m > 3) - Kokkos::abort( - "InnerTrsmLeftLowerUnitDiag<3>::serial_invoke, assert failure (m<=3)"); +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<3>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int m, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { + if (m > 3) Kokkos::abort("InnerTrsmLeftLowerUnitDiag<3>::serial_invoke, assert failure (m<=3)"); if (m <= 0 || n <= 0) return 0; switch (m) { case 3: { @@ -317,12 +301,10 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<3>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<2>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { - if (m > 2) - Kokkos::abort( - "InnerTrsmLeftLowerUnitDiag<2>::serial_invoke, assert failure (m<=2)"); +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<2>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int m, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { + if (m > 2) Kokkos::abort("InnerTrsmLeftLowerUnitDiag<2>::serial_invoke, assert failure (m<=2)"); if (m <= 0 || n <= 0) return 0; switch (m) { case 2: { @@ -340,12 +322,10 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<2>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<1>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { - if (m > 1) - Kokkos::abort( - "InnerTrsmLeftLowerUnitDiag<1>::serial_invoke, assert failure (m<=1)"); +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<1>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int m, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { + if (m > 1) Kokkos::abort("InnerTrsmLeftLowerUnitDiag<1>::serial_invoke, assert failure (m<=1)"); if (m <= 0 || n <= 0) return 0; switch (m) { case 1: { @@ -364,16 +344,15 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerUnitDiag<1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<5>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<5>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; - const ValueType a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1], - a_21 = A[2 * _as0 + 1 * _as1], a_30 = A[3 * _as0 + 0 * _as1], - a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], - a_40 = A[4 * _as0 + 0 * _as1], a_41 = A[4 * _as0 + 1 * _as1], - a_42 = A[4 * _as0 + 2 * _as1], a_43 = A[4 * _as0 + 3 * _as1]; + const ValueType a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], + a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1], + a_40 = A[4 * _as0 + 0 * _as1], a_41 = A[4 * _as0 + 1 * _as1], a_42 = A[4 * _as0 + 2 * _as1], + a_43 = A[4 * _as0 + 3 * _as1]; // const ValueType // a_00 = A[0*_as0+0*_as1], @@ -382,19 +361,13 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<5>::serial_invoke( // a_33 = A[3*_as0+3*_as1], // a_44 = A[4*_as0+4*_as1]; - const ValueType inv_a_00 = - static_cast(1.0) / A[0 * _as0 + 0 * _as1], - inv_a_11 = - static_cast(1.0) / A[1 * _as0 + 1 * _as1], - inv_a_22 = - static_cast(1.0) / A[2 * _as0 + 2 * _as1], - inv_a_33 = - static_cast(1.0) / A[3 * _as0 + 3 * _as1], - inv_a_44 = - static_cast(1.0) / A[4 * _as0 + 4 * _as1]; - - auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, - ValueType &b_2p, ValueType &b_3p, ValueType &b_4p) { + const ValueType inv_a_00 = static_cast(1.0) / A[0 * _as0 + 0 * _as1], + inv_a_11 = static_cast(1.0) / A[1 * _as0 + 1 * _as1], + inv_a_22 = static_cast(1.0) / A[2 * _as0 + 2 * _as1], + inv_a_33 = static_cast(1.0) / A[3 * _as0 + 3 * _as1], + inv_a_44 = static_cast(1.0) / A[4 * _as0 + 4 * _as1]; + + auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, ValueType &b_2p, ValueType &b_3p, ValueType &b_4p) { // load b_0p = B[0 * _bs0 + p * _bs1]; b_1p = B[1 * _bs0 + p * _bs1]; @@ -448,14 +421,13 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<4>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<4>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; - const ValueType a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1], - a_21 = A[2 * _as0 + 1 * _as1], a_30 = A[3 * _as0 + 0 * _as1], - a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1]; + const ValueType a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1], + a_30 = A[3 * _as0 + 0 * _as1], a_31 = A[3 * _as0 + 1 * _as1], a_32 = A[3 * _as0 + 2 * _as1]; // const ValueType // a_00 = A[0*_as0+0*_as1], @@ -463,17 +435,12 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<4>::serial_invoke( // a_22 = A[2*_as0+2*_as1], // a_33 = A[3*_as0+3*_as1]; - const ValueType inv_a_00 = - static_cast(1.0) / A[0 * _as0 + 0 * _as1], - inv_a_11 = - static_cast(1.0) / A[1 * _as0 + 1 * _as1], - inv_a_22 = - static_cast(1.0) / A[2 * _as0 + 2 * _as1], - inv_a_33 = - static_cast(1.0) / A[3 * _as0 + 3 * _as1]; - - auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, - ValueType &b_2p, ValueType &b_3p) { + const ValueType inv_a_00 = static_cast(1.0) / A[0 * _as0 + 0 * _as1], + inv_a_11 = static_cast(1.0) / A[1 * _as0 + 1 * _as1], + inv_a_22 = static_cast(1.0) / A[2 * _as0 + 2 * _as1], + inv_a_33 = static_cast(1.0) / A[3 * _as0 + 3 * _as1]; + + auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, ValueType &b_2p, ValueType &b_3p) { // load b_0p = B[0 * _bs0 + p * _bs1]; b_1p = B[1 * _bs0 + p * _bs1]; @@ -518,28 +485,23 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<3>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<3>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; - const ValueType a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1], - a_21 = A[2 * _as0 + 1 * _as1]; + const ValueType a_10 = A[1 * _as0 + 0 * _as1], a_20 = A[2 * _as0 + 0 * _as1], a_21 = A[2 * _as0 + 1 * _as1]; // const ValueType // a_00 = A[0*_as0+0*_as1], // a_11 = A[1*_as0+1*_as1], // a_22 = A[2*_as0+2*_as1]; - const ValueType inv_a_00 = - static_cast(1.0) / A[0 * _as0 + 0 * _as1], - inv_a_11 = - static_cast(1.0) / A[1 * _as0 + 1 * _as1], - inv_a_22 = - static_cast(1.0) / A[2 * _as0 + 2 * _as1]; + const ValueType inv_a_00 = static_cast(1.0) / A[0 * _as0 + 0 * _as1], + inv_a_11 = static_cast(1.0) / A[1 * _as0 + 1 * _as1], + inv_a_22 = static_cast(1.0) / A[2 * _as0 + 2 * _as1]; - auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, - ValueType &b_2p) { + auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, ValueType &b_2p) { // load b_0p = B[0 * _bs0 + p * _bs1]; b_1p = B[1 * _bs0 + p * _bs1]; @@ -576,9 +538,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<2>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<2>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; const ValueType a_10 = A[1 * _as0 + 0 * _as1]; @@ -587,10 +549,8 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<2>::serial_invoke( // a_00 = A[0*_as0+0*_as1], // a_11 = A[1*_as0+1*_as1]; - const ValueType inv_a_00 = - static_cast(1.0) / A[0 * _as0 + 0 * _as1], - inv_a_11 = - static_cast(1.0) / A[1 * _as0 + 1 * _as1]; + const ValueType inv_a_00 = static_cast(1.0) / A[0 * _as0 + 0 * _as1], + inv_a_11 = static_cast(1.0) / A[1 * _as0 + 1 * _as1]; auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p) { // load @@ -622,16 +582,15 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<1>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<1>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; // const ValueType // a_00 = A[0*_as0+0*_as1]; - const ValueType inv_a_00 = - static_cast(1.0) / A[0 * _as0 + 0 * _as1]; + const ValueType inv_a_00 = static_cast(1.0) / A[0 * _as0 + 0 * _as1]; auto trsv = [&](const int p, ValueType & /* b_0p */) { B[0 * _bs0 + p * _bs1] *= inv_a_00; /* b_0p /= a_00;*/ @@ -655,9 +614,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<5>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<5>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int m, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (m > 5) Kokkos::abort( "InnerTrsmLeftLowerNonUnitDiag<5>::serial_invoke, assert failure " @@ -694,9 +653,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<5>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<4>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<4>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int m, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (m > 4) Kokkos::abort( "InnerTrsmLeftLowerNonUnitDiag<4>::serial_invoke, assert failure " @@ -728,9 +687,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<4>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<3>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<3>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int m, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (m > 3) Kokkos::abort( "InnerTrsmLeftLowerNonUnitDiag<3>::serial_invoke, assert failure " @@ -757,9 +716,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<3>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<2>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<2>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int m, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (m > 2) Kokkos::abort( "InnerTrsmLeftLowerNonUnitDiag<2>::serial_invoke, assert failure " @@ -781,9 +740,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<2>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<1>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<1>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int m, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (m > 1) Kokkos::abort( "InnerTrsmLeftLowerNonUnitDiag<1>::serial_invoke, assert failure " @@ -806,21 +765,17 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftLowerNonUnitDiag<1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<5>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<5>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; - const ValueType a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], - a_03 = A[0 * _as0 + 3 * _as1], a_04 = A[0 * _as0 + 4 * _as1], - /**/ a_12 = A[1 * _as0 + 2 * _as1], - a_13 = A[1 * _as0 + 3 * _as1], a_14 = A[1 * _as0 + 4 * _as1], - /**/ a_23 = A[2 * _as0 + 3 * _as1], - a_24 = A[2 * _as0 + 4 * _as1], + const ValueType a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], + a_04 = A[0 * _as0 + 4 * _as1], + /**/ a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], a_14 = A[1 * _as0 + 4 * _as1], + /**/ a_23 = A[2 * _as0 + 3 * _as1], a_24 = A[2 * _as0 + 4 * _as1], /**/ a_34 = A[3 * _as0 + 4 * _as1]; - auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, - ValueType &b_2p, ValueType &b_3p, ValueType &b_4p) { + auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, ValueType &b_2p, ValueType &b_3p, ValueType &b_4p) { // load b_0p = B[0 * _bs0 + p * _bs1]; b_1p = B[1 * _bs0 + p * _bs1]; @@ -866,19 +821,15 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<4>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<4>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; - const ValueType a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], - a_03 = A[0 * _as0 + 3 * _as1], - /**/ a_12 = A[1 * _as0 + 2 * _as1], - a_13 = A[1 * _as0 + 3 * _as1], + const ValueType a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], + /**/ a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], /**/ a_23 = A[2 * _as0 + 3 * _as1]; - auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, - ValueType &b_2p, ValueType &b_3p) { + auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, ValueType &b_2p, ValueType &b_3p) { // load b_0p = B[0 * _bs0 + p * _bs1]; b_1p = B[1 * _bs0 + p * _bs1]; @@ -916,16 +867,14 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<3>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<3>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; const ValueType a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], /**/ a_12 = A[1 * _as0 + 2 * _as1]; - auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, - ValueType &b_2p) { + auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, ValueType &b_2p) { // load b_0p = B[0 * _bs0 + p * _bs1]; b_1p = B[1 * _bs0 + p * _bs1]; @@ -956,9 +905,8 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<2>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<2>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; const ValueType a_01 = A[0 * _as0 + 1 * _as1]; @@ -988,9 +936,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<1>::serial_invoke( - const ValueType *KOKKOS_RESTRICT /* A */, const int /* n */, - /**/ ValueType *KOKKOS_RESTRICT /* B */) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<1>::serial_invoke(const ValueType *KOKKOS_RESTRICT /* A */, + const int /* n */, + /**/ ValueType *KOKKOS_RESTRICT /* B */) { return 0; } @@ -1001,12 +949,10 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<5>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { - if (m > 5) - Kokkos::abort( - "InnerTrsmLeftUpperUnitDiag<5>::serial_invoke, assert failure (m<=5)"); +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<5>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int m, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { + if (m > 5) Kokkos::abort("InnerTrsmLeftUpperUnitDiag<5>::serial_invoke, assert failure (m<=5)"); if (m <= 0 || n <= 0) return 0; switch (m) { case 5: { @@ -1039,12 +985,10 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<5>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<4>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { - if (m > 4) - Kokkos::abort( - "InnerTrsmLeftUpperUnitDiag<4>::serial_invoke, assert failure (m<=4)"); +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<4>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int m, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { + if (m > 4) Kokkos::abort("InnerTrsmLeftUpperUnitDiag<4>::serial_invoke, assert failure (m<=4)"); if (m <= 0 || n <= 0) return 0; switch (m) { case 4: { @@ -1072,12 +1016,10 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<4>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<3>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { - if (m > 3) - Kokkos::abort( - "InnerTrsmLeftUpperUnitDiag<3>::serial_invoke, assert failure (m<=3)"); +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<3>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int m, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { + if (m > 3) Kokkos::abort("InnerTrsmLeftUpperUnitDiag<3>::serial_invoke, assert failure (m<=3)"); if (m <= 0 || n <= 0) return 0; switch (m) { case 3: { @@ -1100,12 +1042,10 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<3>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<2>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { - if (m > 2) - Kokkos::abort( - "InnerTrsmLeftUpperUnitDiag<2>::serial_invoke, assert failure (m<=2)"); +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<2>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int m, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { + if (m > 2) Kokkos::abort("InnerTrsmLeftUpperUnitDiag<2>::serial_invoke, assert failure (m<=2)"); if (m <= 0 || n <= 0) return 0; switch (m) { case 2: { @@ -1123,12 +1063,10 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<2>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<1>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { - if (m > 1) - Kokkos::abort( - "InnerTrsmLeftUpperUnitDiag<1>::serial_invoke, assert failure (m<=1)"); +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<1>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int m, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { + if (m > 1) Kokkos::abort("InnerTrsmLeftUpperUnitDiag<1>::serial_invoke, assert failure (m<=1)"); if (m <= 0 || n <= 0) return 0; switch (m) { case 1: { @@ -1147,17 +1085,15 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperUnitDiag<1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<5>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<5>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; - const ValueType a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], - a_03 = A[0 * _as0 + 3 * _as1], a_04 = A[0 * _as0 + 4 * _as1], - /**/ a_12 = A[1 * _as0 + 2 * _as1], - a_13 = A[1 * _as0 + 3 * _as1], a_14 = A[1 * _as0 + 4 * _as1], - /**/ a_23 = A[2 * _as0 + 3 * _as1], - a_24 = A[2 * _as0 + 4 * _as1], + const ValueType a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], + a_04 = A[0 * _as0 + 4 * _as1], + /**/ a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], a_14 = A[1 * _as0 + 4 * _as1], + /**/ a_23 = A[2 * _as0 + 3 * _as1], a_24 = A[2 * _as0 + 4 * _as1], /**/ a_34 = A[3 * _as0 + 4 * _as1]; // const ValueType @@ -1167,19 +1103,13 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<5>::serial_invoke( // a_33 = A[3*_as0+3*_as1], // a_44 = A[4*_as0+4*_as1]; - const ValueType inv_a_00 = - static_cast(1.0) / A[0 * _as0 + 0 * _as1], - inv_a_11 = - static_cast(1.0) / A[1 * _as0 + 1 * _as1], - inv_a_22 = - static_cast(1.0) / A[2 * _as0 + 2 * _as1], - inv_a_33 = - static_cast(1.0) / A[3 * _as0 + 3 * _as1], - inv_a_44 = - static_cast(1.0) / A[4 * _as0 + 4 * _as1]; - - auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, - ValueType &b_2p, ValueType &b_3p, ValueType &b_4p) { + const ValueType inv_a_00 = static_cast(1.0) / A[0 * _as0 + 0 * _as1], + inv_a_11 = static_cast(1.0) / A[1 * _as0 + 1 * _as1], + inv_a_22 = static_cast(1.0) / A[2 * _as0 + 2 * _as1], + inv_a_33 = static_cast(1.0) / A[3 * _as0 + 3 * _as1], + inv_a_44 = static_cast(1.0) / A[4 * _as0 + 4 * _as1]; + + auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, ValueType &b_2p, ValueType &b_3p, ValueType &b_4p) { // load b_0p = B[0 * _bs0 + p * _bs1]; b_1p = B[1 * _bs0 + p * _bs1]; @@ -1233,15 +1163,13 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<5>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<4>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<4>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; - const ValueType a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], - a_03 = A[0 * _as0 + 3 * _as1], - /**/ a_12 = A[1 * _as0 + 2 * _as1], - a_13 = A[1 * _as0 + 3 * _as1], + const ValueType a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], a_03 = A[0 * _as0 + 3 * _as1], + /**/ a_12 = A[1 * _as0 + 2 * _as1], a_13 = A[1 * _as0 + 3 * _as1], /**/ a_23 = A[2 * _as0 + 3 * _as1]; // const ValueType @@ -1250,17 +1178,12 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<4>::serial_invoke( // a_22 = A[2*_as0+2*_as1], // a_33 = A[3*_as0+3*_as1]; - const ValueType inv_a_00 = - static_cast(1.0) / A[0 * _as0 + 0 * _as1], - inv_a_11 = - static_cast(1.0) / A[1 * _as0 + 1 * _as1], - inv_a_22 = - static_cast(1.0) / A[2 * _as0 + 2 * _as1], - inv_a_33 = - static_cast(1.0) / A[3 * _as0 + 3 * _as1]; - - auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, - ValueType &b_2p, ValueType &b_3p) { + const ValueType inv_a_00 = static_cast(1.0) / A[0 * _as0 + 0 * _as1], + inv_a_11 = static_cast(1.0) / A[1 * _as0 + 1 * _as1], + inv_a_22 = static_cast(1.0) / A[2 * _as0 + 2 * _as1], + inv_a_33 = static_cast(1.0) / A[3 * _as0 + 3 * _as1]; + + auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, ValueType &b_2p, ValueType &b_3p) { // load b_0p = B[0 * _bs0 + p * _bs1]; b_1p = B[1 * _bs0 + p * _bs1]; @@ -1305,9 +1228,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<4>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<3>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<3>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; const ValueType a_01 = A[0 * _as0 + 1 * _as1], a_02 = A[0 * _as0 + 2 * _as1], @@ -1318,15 +1241,11 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<3>::serial_invoke( // a_11 = A[1*_as0+1*_as1], // a_22 = A[2*_as0+2*_as1]; - const ValueType inv_a_00 = - static_cast(1.0) / A[0 * _as0 + 0 * _as1], - inv_a_11 = - static_cast(1.0) / A[1 * _as0 + 1 * _as1], - inv_a_22 = - static_cast(1.0) / A[2 * _as0 + 2 * _as1]; + const ValueType inv_a_00 = static_cast(1.0) / A[0 * _as0 + 0 * _as1], + inv_a_11 = static_cast(1.0) / A[1 * _as0 + 1 * _as1], + inv_a_22 = static_cast(1.0) / A[2 * _as0 + 2 * _as1]; - auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, - ValueType &b_2p) { + auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p, ValueType &b_2p) { // load b_0p = B[0 * _bs0 + p * _bs1]; b_1p = B[1 * _bs0 + p * _bs1]; @@ -1363,9 +1282,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<3>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<2>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<2>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; const ValueType a_01 = A[0 * _as0 + 1 * _as1]; @@ -1374,10 +1293,8 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<2>::serial_invoke( // a_00 = A[0*_as0+0*_as1], // a_11 = A[1*_as0+1*_as1]; - const ValueType inv_a_00 = - static_cast(1.0) / A[0 * _as0 + 0 * _as1], - inv_a_11 = - static_cast(1.0) / A[1 * _as0 + 1 * _as1]; + const ValueType inv_a_00 = static_cast(1.0) / A[0 * _as0 + 0 * _as1], + inv_a_11 = static_cast(1.0) / A[1 * _as0 + 1 * _as1]; auto trsv = [&](const int p, ValueType &b_0p, ValueType &b_1p) { // load @@ -1409,16 +1326,15 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<2>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<1>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<1>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (n <= 0) return 0; // const ValueType // a_00 = A[0*_as0+0*_as1]; - const ValueType inv_a_00 = - static_cast(1.0) / A[0 * _as0 + 0 * _as1]; + const ValueType inv_a_00 = static_cast(1.0) / A[0 * _as0 + 0 * _as1]; auto trsv = [&](const int p, ValueType & /* b_0p */) { // 0 iteration @@ -1443,9 +1359,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<1>::serial_invoke( template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<5>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<5>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int m, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (m > 5) Kokkos::abort( "InnerTrsmLeftUpperNonUnitDiag<5>::serial_invoke, assert failure " @@ -1482,9 +1398,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<5>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<4>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<4>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int m, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (m > 4) Kokkos::abort( "InnerTrsmLeftUpperNonUnitDiag<4>::serial_invoke, assert failure " @@ -1516,9 +1432,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<4>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<3>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<3>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int m, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (m > 3) Kokkos::abort( "InnerTrsmLeftUpperNonUnitDiag<3>::serial_invoke, assert failure " @@ -1545,9 +1461,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<3>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<2>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<2>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int m, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (m > 2) Kokkos::abort( "InnerTrsmLeftUpperNonUnitDiag<2>::serial_invoke, assert failure " @@ -1569,9 +1485,9 @@ KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<2>::serial_invoke( } template <> template -KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<1>::serial_invoke( - const ValueType *KOKKOS_RESTRICT A, const int m, const int n, - /**/ ValueType *KOKKOS_RESTRICT B) { +KOKKOS_INLINE_FUNCTION int InnerTrsmLeftUpperNonUnitDiag<1>::serial_invoke(const ValueType *KOKKOS_RESTRICT A, + const int m, const int n, + /**/ ValueType *KOKKOS_RESTRICT B) { if (m > 1) Kokkos::abort( "InnerTrsmLeftUpperNonUnitDiag<1>::serial_invoke, assert failure " diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InverseLU_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InverseLU_Serial_Impl.hpp index 070a620531ca..215c62e9f258 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InverseLU_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_InverseLU_Serial_Impl.hpp @@ -32,49 +32,42 @@ namespace KokkosBatched { /// InverseLU no piv /// -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ +#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__) template <> template -KOKKOS_INLINE_FUNCTION int SerialInverseLU::invoke( - const AViewType &A, const WViewType &W) { +KOKKOS_INLINE_FUNCTION int SerialInverseLU::invoke(const AViewType &A, + const WViewType &W) { typedef typename AViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = A.extent(0), n = A.extent(1); static_assert(is_vector::value, "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); static_assert(AViewType::rank == 2, "A should have two dimensions"); static_assert(WViewType::rank == 1, "W should have one dimension"); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "A and W should be on the same memory space"); - static_assert(!std::is_same::value, + static_assert(!std::is_same::value, "W should be an contiguous 1D array"); assert(A.extent(0) * A.extent(1) * sizeof(typename AViewType::value_type) <= W.span() * sizeof(typename WViewType::value_type)); assert(m == n); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; int r_val = 0; if (A.stride(0) == 1) { - mkl_dgetrinp_compact( - MKL_COL_MAJOR, n, (double *)A.data(), A.stride(1), (double *)W.data(), - (MKL_INT)(n * n * vector_type::vector_length), (MKL_INT *)&r_val, - format, (MKL_INT)vector_type::vector_length); + mkl_dgetrinp_compact(MKL_COL_MAJOR, n, (double *)A.data(), A.stride(1), (double *)W.data(), + (MKL_INT)(n * n * vector_type::vector_length), (MKL_INT *)&r_val, format, + (MKL_INT)vector_type::vector_length); } else if (A.stride(1) == 1) { - mkl_dgetrinp_compact( - MKL_ROW_MAJOR, n, (double *)A.data(), A.stride(0), (double *)W.data(), - (MKL_INT)(n * n * vector_type::vector_length), (MKL_INT *)&r_val, - format, (MKL_INT)vector_type::vector_length); + mkl_dgetrinp_compact(MKL_ROW_MAJOR, n, (double *)A.data(), A.stride(0), (double *)W.data(), + (MKL_INT)(n * n * vector_type::vector_length), (MKL_INT *)&r_val, format, + (MKL_INT)vector_type::vector_length); } else { r_val = -1; } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Serial_Impl.hpp index 2fa372aa7ce2..e2acd012cb35 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Serial_Impl.hpp @@ -31,35 +31,28 @@ namespace KokkosBatched { /// SerialLU no piv /// -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ +#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__) template <> template KOKKOS_INLINE_FUNCTION int SerialLU::invoke( - const AViewType &A, - const typename MagnitudeScalarType< - typename AViewType::non_const_value_type>::type tiny) { + const AViewType &A, const typename MagnitudeScalarType::type tiny) { typedef typename AViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = A.extent(0), n = A.extent(1); static_assert(is_vector::value, "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; int r_val = 0; if (A.stride_0() == 1) { - mkl_dgetrfnp_compact(MKL_COL_MAJOR, m, n, (double *)A.data(), A.stride_1(), - (MKL_INT *)&r_val, format, + mkl_dgetrfnp_compact(MKL_COL_MAJOR, m, n, (double *)A.data(), A.stride_1(), (MKL_INT *)&r_val, format, (MKL_INT)vector_type::vector_length); } else if (A.stride_1() == 1) { - mkl_dgetrfnp_compact(MKL_ROW_MAJOR, m, n, (double *)A.data(), A.stride_0(), - (MKL_INT *)&r_val, format, + mkl_dgetrfnp_compact(MKL_ROW_MAJOR, m, n, (double *)A.data(), A.stride_0(), (MKL_INT *)&r_val, format, (MKL_INT)vector_type::vector_length); } else { r_val = -1; @@ -71,21 +64,17 @@ KOKKOS_INLINE_FUNCTION int SerialLU::invoke( template <> template KOKKOS_INLINE_FUNCTION int SerialLU::invoke( - const AViewType &A, - const typename MagnitudeScalarType< - typename AViewType::non_const_value_type>::type tiny) { - return SerialLU_Internal::invoke( - A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), tiny); + const AViewType &A, const typename MagnitudeScalarType::type tiny) { + return SerialLU_Internal::invoke(A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), + tiny); } template <> template KOKKOS_INLINE_FUNCTION int SerialLU::invoke( - const AViewType &A, - const typename MagnitudeScalarType< - typename AViewType::non_const_value_type>::type tiny) { - return SerialLU_Internal::invoke( - A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), tiny); + const AViewType &A, const typename MagnitudeScalarType::type tiny) { + return SerialLU_Internal::invoke(A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), + tiny); } } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Serial_Internal.hpp index e6b34d8f1b1c..6555a16d93cc 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Serial_Internal.hpp @@ -33,16 +33,15 @@ namespace KokkosBatched { template struct SerialLU_Internal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const int m, const int n, ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, const typename MagnitudeScalarType::type tiny); + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, ValueType *KOKKOS_RESTRICT A, const int as0, + const int as1, const typename MagnitudeScalarType::type tiny); }; template <> template KOKKOS_INLINE_FUNCTION int SerialLU_Internal::invoke( - const int m, const int n, ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, const typename MagnitudeScalarType::type tiny) { + const int m, const int n, ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + const typename MagnitudeScalarType::type tiny) { const int k = (m < n ? m : n); if (k <= 0) return 0; @@ -55,14 +54,12 @@ KOKKOS_INLINE_FUNCTION int SerialLU_Internal::invoke( const ValueType *KOKKOS_RESTRICT a12t = A + (p)*as0 + (p + 1) * as1; - ValueType *KOKKOS_RESTRICT a21 = A + (p + 1) * as0 + (p)*as1, - *KOKKOS_RESTRICT A22 = - A + (p + 1) * as0 + (p + 1) * as1; + ValueType *KOKKOS_RESTRICT a21 = A + (p + 1) * as0 + (p)*as1, + *KOKKOS_RESTRICT A22 = A + (p + 1) * as0 + (p + 1) * as1; if (tiny != 0) { ValueType &alpha11_reference = A[p * as0 + p * as1]; - const auto alpha11_real = - Kokkos::ArithTraits::real(alpha11_reference); + const auto alpha11_real = Kokkos::ArithTraits::real(alpha11_reference); alpha11_reference += minus_abs_tiny * ValueType(alpha11_real < 0); alpha11_reference += abs_tiny * ValueType(alpha11_real >= 0); } @@ -76,8 +73,7 @@ KOKKOS_INLINE_FUNCTION int SerialLU_Internal::invoke( #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif - for (int j = 0; j < jend; ++j) - A22[i * as0 + j * as1] -= a21[i * as0] * a12t[j * as1]; + for (int j = 0; j < jend; ++j) A22[i * as0 + j * as1] -= a21[i * as0] * a12t[j * as1]; } } return 0; @@ -86,8 +82,7 @@ KOKKOS_INLINE_FUNCTION int SerialLU_Internal::invoke( template <> template KOKKOS_INLINE_FUNCTION int SerialLU_Internal::invoke( - const int m, const int n, ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, + const int m, const int n, ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, const typename MagnitudeScalarType::type /*tiny*/) { constexpr int mbAlgo = Algo::LU::Blocked::mb(); const typename MagnitudeScalarType::type one(1.0), minus_one(-1.0); @@ -100,8 +95,7 @@ KOKKOS_INLINE_FUNCTION int SerialLU_Internal::invoke( InnerTrsmLeftLowerUnitDiag trsm_llu(as0, as1, as0, as1); InnerTrsmLeftLowerNonUnitDiag trsm_run(as1, as0, as1, as0); - auto lu_factorize = [&](const int ib, const int jb, - ValueType *KOKKOS_RESTRICT AA) { + auto lu_factorize = [&](const int ib, const int jb, ValueType *KOKKOS_RESTRICT AA) { const int mb = mbAlgo; const int kb = ib < jb ? ib : jb; for (int p = 0; p < kb; p += mb) { @@ -121,9 +115,8 @@ KOKKOS_INLINE_FUNCTION int SerialLU_Internal::invoke( trsm_run.serial_invoke(Ap, pb, m_abr, Ap + mb * as0); // gemm update - SerialGemmInternal::invoke( - m_abr, n_abr, pb, minus_one, Ap + mb * as0, as0, as1, Ap + mb * as1, - as0, as1, one, Ap + mb * as0 + mb * as1, as0, as1); + SerialGemmInternal::invoke(m_abr, n_abr, pb, minus_one, Ap + mb * as0, as0, as1, + Ap + mb * as1, as0, as1, one, Ap + mb * as0 + mb * as1, as0, as1); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Team_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Team_Impl.hpp index 3f28c063b83b..9ed5e244d2d5 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Team_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Team_Impl.hpp @@ -36,11 +36,9 @@ struct TeamLU { template KOKKOS_INLINE_FUNCTION static int invoke( const MemberType &member, const AViewType &A, - const typename MagnitudeScalarType< - typename AViewType::non_const_value_type>::type tiny = 0) { - return TeamLU_Internal::invoke( - member, A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), - tiny); + const typename MagnitudeScalarType::type tiny = 0) { + return TeamLU_Internal::invoke(member, A.extent(0), A.extent(1), A.data(), A.stride_0(), + A.stride_1(), tiny); } }; @@ -49,11 +47,9 @@ struct TeamLU { template KOKKOS_INLINE_FUNCTION static int invoke( const MemberType &member, const AViewType &A, - const typename MagnitudeScalarType< - typename AViewType::non_const_value_type>::type tiny = 0) { - return TeamLU_Internal::invoke( - member, A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), - tiny); + const typename MagnitudeScalarType::type tiny = 0) { + return TeamLU_Internal::invoke(member, A.extent(0), A.extent(1), A.data(), A.stride_0(), + A.stride_1(), tiny); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Team_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Team_Internal.hpp index cbc811de5e12..dacfb02ed480 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Team_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LU_Team_Internal.hpp @@ -35,17 +35,15 @@ namespace KokkosBatched { template struct TeamLU_Internal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const int n, - ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - const typename MagnitudeScalarType::type tiny); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, + ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + const typename MagnitudeScalarType::type tiny); }; template <> template KOKKOS_INLINE_FUNCTION int TeamLU_Internal::invoke( - const MemberType &member, const int m, const int n, - ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + const MemberType &member, const int m, const int n, ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, const typename MagnitudeScalarType::type tiny) { const int k = (m < n ? m : n); if (k <= 0) return 0; @@ -60,15 +58,13 @@ KOKKOS_INLINE_FUNCTION int TeamLU_Internal::invoke( const ValueType *KOKKOS_RESTRICT a12t = A + (p)*as0 + (p + 1) * as1; - ValueType *KOKKOS_RESTRICT a21 = A + (p + 1) * as0 + (p)*as1, - *KOKKOS_RESTRICT A22 = - A + (p + 1) * as0 + (p + 1) * as1; + ValueType *KOKKOS_RESTRICT a21 = A + (p + 1) * as0 + (p)*as1, + *KOKKOS_RESTRICT A22 = A + (p + 1) * as0 + (p + 1) * as1; if (tiny != 0) { if (member.team_rank() == 0) { ValueType &alpha11_reference = A[p * as0 + p * as1]; - const auto alpha11_real = - Kokkos::ArithTraits::real(alpha11_reference); + const auto alpha11_real = Kokkos::ArithTraits::real(alpha11_reference); alpha11_reference += minus_abs_tiny * ValueType(alpha11_real < 0); alpha11_reference += abs_tiny * ValueType(alpha11_real >= 0); } @@ -76,19 +72,17 @@ KOKKOS_INLINE_FUNCTION int TeamLU_Internal::invoke( member.team_barrier(); const ValueType alpha11 = A[p * as0 + p * as1]; - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, iend), - [&](const int &i) { - // a21[i*as0] *= inv_alpha11; - a21[i * as0] /= alpha11; - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, iend), [&](const int &i) { + // a21[i*as0] *= inv_alpha11; + a21[i * as0] /= alpha11; + }); member.team_barrier(); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, iend * jend), [&](const int &ij) { - // assume layout right for batched computation - const int i = ij / jend, j = ij % jend; - A22[i * as0 + j * as1] -= a21[i * as0] * a12t[j * as1]; - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, iend * jend), [&](const int &ij) { + // assume layout right for batched computation + const int i = ij / jend, j = ij % jend; + A22[i * as0 + j * as1] -= a21[i * as0] * a12t[j * as1]; + }); } return 0; } @@ -96,8 +90,7 @@ KOKKOS_INLINE_FUNCTION int TeamLU_Internal::invoke( template <> template KOKKOS_INLINE_FUNCTION int TeamLU_Internal::invoke( - const MemberType &member, const int m, const int n, - ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + const MemberType &member, const int m, const int n, ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, const typename MagnitudeScalarType::type /*tiny*/) { constexpr int mbAlgo = Algo::LU::Blocked::mb(); @@ -110,15 +103,11 @@ KOKKOS_INLINE_FUNCTION int TeamLU_Internal::invoke( InnerTrsmLeftLowerUnitDiag trsm_llu(as0, as1, as0, as1); InnerTrsmLeftLowerNonUnitDiag trsm_run(as1, as0, as1, as0); - auto lu_factorize = [&](const int ib, const int jb, - ValueType *KOKKOS_RESTRICT AA) { + auto lu_factorize = [&](const int ib, const int jb, ValueType *KOKKOS_RESTRICT AA) { const int tsize = member.team_size(); // Made this non-const in order to WORKAROUND issue #349 int mb = mbAlgo; - int nb = ((jb - mb) + (ib - mb)) > 0 - ? ((jb - mb) + (ib - mb)) / tsize + - (((jb - mb) + (ib - mb)) % tsize > 0) - : 1; + int nb = ((jb - mb) + (ib - mb)) > 0 ? ((jb - mb) + (ib - mb)) / tsize + (((jb - mb) + (ib - mb)) % tsize > 0) : 1; const int kb = ib < jb ? ib : jb; for (int p = 0; p < kb; p += mb) { @@ -133,29 +122,24 @@ KOKKOS_INLINE_FUNCTION int TeamLU_Internal::invoke( member.team_barrier(); // Made this non-const in order to WORKAROUND issue #349 - int m_abr = ib - p - mb, n_abr = jb - p - mb, mp_abr = m_abr % nb, - np_abr = n_abr % nb, mq_abr = (m_abr / nb) + (mp_abr > 0), - nq_abr = (n_abr / nb) + (np_abr > 0); + int m_abr = ib - p - mb, n_abr = jb - p - mb, mp_abr = m_abr % nb, np_abr = n_abr % nb, + mq_abr = (m_abr / nb) + (mp_abr > 0), nq_abr = (n_abr / nb) + (np_abr > 0); // trsm update - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, mq_abr + nq_abr), - [&](const int &ij) { - if (ij < nq_abr) { - const int j = (ij)*nb, qb = (j + nb) > n_abr ? np_abr : nb; - trsm_llu.serial_invoke(Ap, pb, qb, Ap + (j + mb) * as1); - } else { - const int i = (ij - nq_abr) * nb, - qb = (i + nb) > m_abr ? mp_abr : nb; - trsm_run.serial_invoke(Ap, pb, qb, Ap + (i + mb) * as0); - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, mq_abr + nq_abr), [&](const int &ij) { + if (ij < nq_abr) { + const int j = (ij)*nb, qb = (j + nb) > n_abr ? np_abr : nb; + trsm_llu.serial_invoke(Ap, pb, qb, Ap + (j + mb) * as1); + } else { + const int i = (ij - nq_abr) * nb, qb = (i + nb) > m_abr ? mp_abr : nb; + trsm_run.serial_invoke(Ap, pb, qb, Ap + (i + mb) * as0); + } + }); member.team_barrier(); // gemm update - TeamGemmInternal::invoke( - member, m_abr, n_abr, pb, minus_one, Ap + mb * as0, as0, as1, - Ap + mb * as1, as0, as1, one, Ap + mb * as0 + mb * as1, as0, as1); + TeamGemmInternal::invoke(member, m_abr, n_abr, pb, minus_one, Ap + mb * as0, as0, as1, + Ap + mb * as1, as0, as1, one, Ap + mb * as0 + mb * as1, as0, as1); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LeftEigenvectorFromSchur_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LeftEigenvectorFromSchur_Serial_Internal.hpp index ea87217a3734..c266d65c54e3 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LeftEigenvectorFromSchur_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_LeftEigenvectorFromSchur_Serial_Internal.hpp @@ -45,12 +45,9 @@ struct SerialLeftEigenvectorFromSchurInternal { /// contiguous workspace that can hold complex array (m) template KOKKOS_INLINE_FUNCTION static int invoke(const int m, - /* */ ValueType *S, const int ss0, - const int ss1, - /* */ ValueType *V, const int vs0, - const int vs1, - /* */ ValueType *w, - const int *blks) { + /* */ ValueType *S, const int ss0, const int ss1, + /* */ ValueType *V, const int vs0, const int vs1, + /* */ ValueType *w, const int *blks) { typedef ValueType value_type; typedef Kokkos::ArithTraits ats; // typedef typename ats::mag_type mag_type; @@ -77,8 +74,7 @@ struct SerialLeftEigenvectorFromSchurInternal { for (; m_stl < (m - 1);) { /// part 2x2 into 3x3 const int mA11 = blks[m_stl]; - assert(((mA11 == 1) || (mA11 == 2)) && - "LeftEigenvectorFromSchur: blk is not 1x1 nor 2x2"); + assert(((mA11 == 1) || (mA11 == 2)) && "LeftEigenvectorFromSchur: blk is not 1x1 nor 2x2"); S_part3x3.partWithABR(S_part2x2, mA11, mA11); V_part3x1.partWithAB(V_part2x1, mA11); @@ -90,23 +86,19 @@ struct SerialLeftEigenvectorFromSchurInternal { /// initialize a left hand side b[m_stl] = one; - for (int j = 0; j < (m - m_stl_plus_mA11); ++j) - b[j + m_stl_plus_mA11] = -S_part3x3.A12[j * ss1]; + for (int j = 0; j < (m - m_stl_plus_mA11); ++j) b[j + m_stl_plus_mA11] = -S_part3x3.A12[j * ss1]; /// perform shifted trsv (transposed) - SerialShiftedTrsvInternalLower::invoke( - m - m_stl_plus_mA11, lambda, S_part3x3.A22, ss1, ss0, - b + m_stl_plus_mA11, 1, blks + m_stl_plus_mA11); + SerialShiftedTrsvInternalLower::invoke(m - m_stl_plus_mA11, lambda, S_part3x3.A22, ss1, ss0, + b + m_stl_plus_mA11, 1, blks + m_stl_plus_mA11); /// copy back to V (row wise copy) for (int j = 0; j < m_stl; ++j) V_part3x1.A1[j * vs1] = zero; for (int j = m_stl; j < m; ++j) V_part3x1.A1[j * vs1] = b[j]; } else { /// complex eigen pair - const value_type alpha11 = S_part3x3.A11[0], - alpha12 = S_part3x3.A11[ss1], - alpha21 = S_part3x3.A11[ss0], - beta = ats::sqrt(-alpha12 * alpha21); + const value_type alpha11 = S_part3x3.A11[0], alpha12 = S_part3x3.A11[ss1], alpha21 = S_part3x3.A11[ss0], + beta = ats::sqrt(-alpha12 * alpha21); const complex_type lambda(alpha11, beta); complex_type *bc = (complex_type *)(b); @@ -118,13 +110,11 @@ struct SerialLeftEigenvectorFromSchurInternal { const value_type *S_A12_a = S_part3x3.A12; const value_type *S_A12_b = S_part3x3.A12 + ss0; for (int j = 0; j < (m - m_stl_plus_mA11); ++j) - bc[j + m_stl_plus_mA11] = complex_type(-S_A12_a[j * ss1] * beta, - S_A12_b[j * ss1] * alpha12); + bc[j + m_stl_plus_mA11] = complex_type(-S_A12_a[j * ss1] * beta, S_A12_b[j * ss1] * alpha12); /// perform shifted trsv - SerialShiftedTrsvInternalLower::invoke( - m - m_stl_plus_mA11, lambda, S_part3x3.A22, ss1, ss0, - bc + m_stl_plus_mA11, 1, blks + m_stl_plus_mA11); + SerialShiftedTrsvInternalLower::invoke(m - m_stl_plus_mA11, lambda, S_part3x3.A22, ss1, ss0, + bc + m_stl_plus_mA11, 1, blks + m_stl_plus_mA11); /// copy back to V value_type *V_A1_r = V_part3x1.A1; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Normalize_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Normalize_Internal.hpp index 42adf8eeba14..af6832940b5e 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Normalize_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Normalize_Internal.hpp @@ -28,8 +28,7 @@ namespace KokkosBatched { struct SerialNormalizeInternal { template KOKKOS_INLINE_FUNCTION static int invoke(const int m, - /* */ ValueType *KOKKOS_RESTRICT v, - const int vs) { + /* */ ValueType *KOKKOS_RESTRICT v, const int vs) { typedef ValueType value_type; typedef Kokkos::ArithTraits ats; typedef typename ats::mag_type mag_type; @@ -53,10 +52,8 @@ struct SerialNormalizeInternal { template KOKKOS_INLINE_FUNCTION static int invoke(const int m, - /* */ RealType *KOKKOS_RESTRICT vr, - const int vrs, - /* */ RealType *KOKKOS_RESTRICT vi, - const int vis) { + /* */ RealType *KOKKOS_RESTRICT vr, const int vrs, + /* */ RealType *KOKKOS_RESTRICT vi, const int vis) { typedef RealType real_type; typedef Kokkos::ArithTraits ats; typedef typename ats::mag_type mag_type; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Pttrf_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Pttrf_Serial_Impl.hpp new file mode 100644 index 000000000000..b96c47e64242 --- /dev/null +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Pttrf_Serial_Impl.hpp @@ -0,0 +1,68 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSBATCHED_PTTRF_SERIAL_IMPL_HPP_ +#define KOKKOSBATCHED_PTTRF_SERIAL_IMPL_HPP_ + +#include +#include "KokkosBatched_Pttrf_Serial_Internal.hpp" + +/// \author Yuuichi Asahi (yuuichi.asahi@cea.fr) + +namespace KokkosBatched { + +template +KOKKOS_INLINE_FUNCTION static int checkPttrfInput([[maybe_unused]] const DViewType &d, + [[maybe_unused]] const EViewType &e) { + static_assert(Kokkos::is_view::value, "KokkosBatched::pttrf: DViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::pttrf: EViewType is not a Kokkos::View."); + + static_assert(DViewType::rank == 1, "KokkosBatched::pttrf: DViewType must have rank 1."); + static_assert(EViewType::rank == 1, "KokkosBatched::pttrf: EViewType must have rank 1."); + +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + const int nd = d.extent(0); + const int ne = e.extent(0); + + if (ne + 1 != nd) { + Kokkos::printf( + "KokkosBatched::pttrf: Dimensions of d and e do not match: d: %d, e: " + "%d \n" + "e.extent(0) must be equal to d.extent(0) - 1\n", + nd, ne); + return 1; + } +#endif + return 0; +} + +template <> +struct SerialPttrf { + template + KOKKOS_INLINE_FUNCTION static int invoke(const DViewType &d, const EViewType &e) { + // Quick return if possible + if (d.extent(0) == 0) return 0; + if (d.extent(0) == 1) return (d(0) < 0 ? 1 : 0); + + auto info = checkPttrfInput(d, e); + if (info) return info; + + return SerialPttrfInternal::invoke(d.extent(0), d.data(), d.stride(0), e.data(), + e.stride(0)); + } +}; +} // namespace KokkosBatched + +#endif // KOKKOSBATCHED_PTTRF_SERIAL_IMPL_HPP_ diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Pttrf_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Pttrf_Serial_Internal.hpp new file mode 100644 index 000000000000..438ec43320fe --- /dev/null +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Pttrf_Serial_Internal.hpp @@ -0,0 +1,202 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSBATCHED_PTTRF_SERIAL_INTERNAL_HPP_ +#define KOKKOSBATCHED_PTTRF_SERIAL_INTERNAL_HPP_ + +#include + +/// \author Yuuichi Asahi (yuuichi.asahi@cea.fr) + +namespace KokkosBatched { + +template +struct SerialPttrfInternal { + template + KOKKOS_INLINE_FUNCTION static int invoke(const int n, ValueType *KOKKOS_RESTRICT d, const int ds0, + ValueType *KOKKOS_RESTRICT e, const int es0); + + template + KOKKOS_INLINE_FUNCTION static int invoke(const int n, ValueType *KOKKOS_RESTRICT d, const int ds0, + Kokkos::complex *KOKKOS_RESTRICT e, const int es0); +}; + +/// +/// Real matrix +/// + +template <> +template +KOKKOS_INLINE_FUNCTION int SerialPttrfInternal::invoke( + const int n, ValueType *KOKKOS_RESTRICT d, const int ds0, ValueType *KOKKOS_RESTRICT e, const int es0) { + int info = 0; + + auto update = [&](const int i) { + auto ei_tmp = e[i * es0]; + e[i * es0] = ei_tmp / d[i * ds0]; + d[(i + 1) * ds0] -= e[i * es0] * ei_tmp; + }; + + auto check_positive_definitiveness = [&](const int i) { return (d[i] <= 0.0) ? (i + 1) : 0; }; + + // Compute the L*D*L' (or U'*D*U) factorization of A. + const int i4 = (n - 1) % 4; + for (int i = 0; i < i4; i++) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + info = check_positive_definitiveness(i); + if (info) { + return info; + } +#endif + + update(i); + } // for (int i = 0; i < i4; i++) + + for (int i = i4; i < n - 4; i += 4) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + info = check_positive_definitiveness(i); + if (info) { + return info; + } +#endif + + update(i); + +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + info = check_positive_definitiveness(i + 1); + if (info) { + return info; + } +#endif + + update(i + 1); + +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + info = check_positive_definitiveness(i + 2); + if (info) { + return info; + } +#endif + + update(i + 2); + +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + info = check_positive_definitiveness(i + 3); + if (info) { + return info; + } +#endif + + update(i + 3); + + } // for (int i = i4; i < n-4; 4) + +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + info = check_positive_definitiveness(n - 1); + if (info) { + return info; + } +#endif + + return 0; +} + +/// +/// Complex matrix +/// + +template <> +template +KOKKOS_INLINE_FUNCTION int SerialPttrfInternal::invoke( + const int n, ValueType *KOKKOS_RESTRICT d, const int ds0, Kokkos::complex *KOKKOS_RESTRICT e, + const int es0) { + int info = 0; + + auto update = [&](const int i) { + auto eir_tmp = e[i * es0].real(); + auto eii_tmp = e[i * es0].imag(); + auto f_tmp = eir_tmp / d[i * ds0]; + auto g_tmp = eii_tmp / d[i * ds0]; + e[i * es0] = Kokkos::complex(f_tmp, g_tmp); + d[(i + 1) * ds0] = d[(i + 1) * ds0] - f_tmp * eir_tmp - g_tmp * eii_tmp; + }; + + auto check_positive_definitiveness = [&](const int i) { return (d[i] <= 0.0) ? (i + 1) : 0; }; + + // Compute the L*D*L' (or U'*D*U) factorization of A. + const int i4 = (n - 1) % 4; + for (int i = 0; i < i4; i++) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + info = check_positive_definitiveness(i); + if (info) { + return info; + } +#endif + + update(i); + } // for (int i = 0; i < i4; i++) + + for (int i = i4; i < n - 4; i += 4) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + info = check_positive_definitiveness(i); + if (info) { + return info; + } +#endif + + update(i); + +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + info = check_positive_definitiveness(i + 1); + if (info) { + return info; + } +#endif + + update(i + 1); + +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + info = check_positive_definitiveness(i + 2); + if (info) { + return info; + } +#endif + + update(i + 2); + +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + info = check_positive_definitiveness(i + 3); + if (info) { + return info; + } +#endif + + update(i + 3); + + } // for (int i = i4; i < n-4; 4) + +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + info = check_positive_definitiveness(n - 1); + if (info) { + return info; + } +#endif + + return 0; +} + +} // namespace KokkosBatched + +#endif // KOKKOSBATCHED_PTTRF_SERIAL_INTERNAL_HPP_ diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_FormQ_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_FormQ_Serial_Internal.hpp index ac97a3f7725c..7c717c2eed11 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_FormQ_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_FormQ_Serial_Internal.hpp @@ -34,13 +34,10 @@ namespace KokkosBatched { struct SerialQR_FormQ_Internal { template KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int k, - /* */ ValueType* A, const int as0, - const int as1, + /* */ ValueType* A, const int as0, const int as1, /* */ ValueType* t, const int ts, - /* */ ValueType* Q, const int qs0, - const int qs1, - /* */ ValueType* w, - const bool is_Q_zero = false) { + /* */ ValueType* Q, const int qs0, const int qs1, + /* */ ValueType* w, const bool is_Q_zero = false) { typedef ValueType value_type; /// Given a matrix A that includes QR factorization @@ -57,8 +54,7 @@ struct SerialQR_FormQ_Internal { else SerialSetIdentityInternal::invoke(m, Q, qs0, qs1); - return SerialApplyQ_LeftNoTransForwardInternal ::invoke( - m, m, k, A, as0, as1, t, ts, Q, qs0, qs1, w); + return SerialApplyQ_LeftNoTransForwardInternal ::invoke(m, m, k, A, as0, as1, t, ts, Q, qs0, qs1, w); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_FormQ_TeamVector_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_FormQ_TeamVector_Internal.hpp index 66b63f23f6ba..af7f458898b6 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_FormQ_TeamVector_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_FormQ_TeamVector_Internal.hpp @@ -33,12 +33,11 @@ namespace KokkosBatched { /// struct TeamVectorQR_FormQ_Internal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const int n, const int k, - /* */ ValueType *A, const int as0, const int as1, - /* */ ValueType *t, const int ts, - /* */ ValueType *Q, const int qs0, const int qs1, - /* */ ValueType *w, const bool is_Q_zero = false) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const int k, + /* */ ValueType *A, const int as0, const int as1, + /* */ ValueType *t, const int ts, + /* */ ValueType *Q, const int qs0, const int qs1, + /* */ ValueType *w, const bool is_Q_zero = false) { typedef ValueType value_type; /// Given a matrix A that includes QR factorization @@ -51,14 +50,12 @@ struct TeamVectorQR_FormQ_Internal { // set identity if (is_Q_zero) - KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, value_type(1), - Q, qs0 + qs1); + KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, value_type(1), Q, qs0 + qs1); else TeamVectorSetIdentityInternal::invoke(member, m, n, Q, qs0, qs1); member.team_barrier(); - return TeamVectorApplyQ_LeftForwardInternal ::invoke( - member, m, n, k, A, as0, as1, t, ts, Q, qs0, qs1, w); + return TeamVectorApplyQ_LeftForwardInternal ::invoke(member, m, n, k, A, as0, as1, t, ts, Q, qs0, qs1, w); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_Serial_Impl.hpp index 5eac699f569d..1083e6af2ab8 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_Serial_Impl.hpp @@ -29,10 +29,9 @@ namespace KokkosBatched { template <> template -KOKKOS_INLINE_FUNCTION int SerialQR::invoke( - const AViewType &A, const tViewType &t, const wViewType &w) { - return SerialQR_Internal::invoke(A.extent(0), A.extent(1), A.data(), - A.stride_0(), A.stride_1(), t.data(), +KOKKOS_INLINE_FUNCTION int SerialQR::invoke(const AViewType &A, const tViewType &t, + const wViewType &w) { + return SerialQR_Internal::invoke(A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), t.data(), t.stride_0(), w.data()); } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_Serial_Internal.hpp index 729604f6c32a..95ca1c4340d1 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_Serial_Internal.hpp @@ -34,8 +34,7 @@ struct SerialQR_Internal { template KOKKOS_INLINE_FUNCTION static int invoke(const int m, // m = NumRows(A) const int n, // n = NumCols(A) - /* */ ValueType *A, const int as0, - const int as1, + /* */ ValueType *A, const int as0, const int as1, /* */ ValueType *t, const int ts, /* */ ValueType *w) { typedef ValueType value_type; @@ -66,13 +65,11 @@ struct SerialQR_Internal { /// ----------------------------------------------------- // perform householder transformation - SerialLeftHouseholderInternal::invoke(m_A22, A_part3x3.A11, A_part3x3.A21, - as0, tau); + SerialLeftHouseholderInternal::invoke(m_A22, A_part3x3.A11, A_part3x3.A21, as0, tau); // left apply householder to A22 - SerialApplyLeftHouseholderInternal::invoke( - m_A22, n_A22, tau, A_part3x3.A21, as0, A_part3x3.A12, as1, - A_part3x3.A22, as0, as1, w); + SerialApplyLeftHouseholderInternal::invoke(m_A22, n_A22, tau, A_part3x3.A21, as0, A_part3x3.A12, as1, + A_part3x3.A22, as0, as1, w); /// ----------------------------------------------------- A_part2x2.mergeToATL(A_part3x3); t_part2x1.mergeToAT(t_part3x1); diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_TeamVector_Impl.hpp index 78d6e226a8f5..2497e5adf5d4 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_TeamVector_Impl.hpp @@ -30,12 +30,9 @@ namespace KokkosBatched { template struct TeamVectorQR { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const tViewType &t, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const tViewType &t, const wViewType &w) { - return TeamVectorQR_Internal::invoke(member, A.extent(0), A.extent(1), - A.data(), A.stride_0(), A.stride_1(), + return TeamVectorQR_Internal::invoke(member, A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), t.data(), t.stride_0(), w.data()); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_TeamVector_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_TeamVector_Internal.hpp index 312feba9977f..e3dde679865a 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_TeamVector_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_TeamVector_Internal.hpp @@ -35,8 +35,7 @@ struct TeamVectorQR_Internal { KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, // m = NumRows(A) const int n, // n = NumCols(A) - /* */ ValueType *A, const int as0, - const int as1, + /* */ ValueType *A, const int as0, const int as1, /* */ ValueType *t, const int ts, /* */ ValueType *w) { typedef ValueType value_type; @@ -67,14 +66,12 @@ struct TeamVectorQR_Internal { /// ----------------------------------------------------- // perform householder transformation - TeamVectorLeftHouseholderInternal::invoke(member, m_A22, A_part3x3.A11, - A_part3x3.A21, as0, tau); + TeamVectorLeftHouseholderInternal::invoke(member, m_A22, A_part3x3.A11, A_part3x3.A21, as0, tau); member.team_barrier(); // left apply householder to A22 - TeamVectorApplyLeftHouseholderInternal::invoke( - member, m_A22, n_A22, tau, A_part3x3.A21, as0, A_part3x3.A12, as1, - A_part3x3.A22, as0, as1, w); + TeamVectorApplyLeftHouseholderInternal::invoke(member, m_A22, n_A22, tau, A_part3x3.A21, as0, A_part3x3.A12, as1, + A_part3x3.A22, as0, as1, w); member.team_barrier(); /// ----------------------------------------------------- A_part2x2.mergeToATL(A_part3x3); diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_WithColumnPivoting_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_WithColumnPivoting_TeamVector_Impl.hpp index 4f293f12cf82..ed9ccd8cce27 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_WithColumnPivoting_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_WithColumnPivoting_TeamVector_Impl.hpp @@ -29,17 +29,13 @@ namespace KokkosBatched { template struct TeamVectorQR_WithColumnPivoting { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const tViewType &t, - const pViewType &p, - const wViewType &w, + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const tViewType &t, + const pViewType &p, const wViewType &w, /* */ int &matrix_rank) { - return TeamVectorQR_WithColumnPivotingInternal::invoke( - member, A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), - t.data(), t.stride_0(), p.data(), p.stride_0(), w.data(), matrix_rank); + return TeamVectorQR_WithColumnPivotingInternal::invoke(member, A.extent(0), A.extent(1), A.data(), A.stride_0(), + A.stride_1(), t.data(), t.stride_0(), p.data(), p.stride_0(), + w.data(), matrix_rank); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_WithColumnPivoting_TeamVector_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_WithColumnPivoting_TeamVector_Internal.hpp index 26efb70c7775..280bfa434b49 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_WithColumnPivoting_TeamVector_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_WithColumnPivoting_TeamVector_Internal.hpp @@ -37,10 +37,9 @@ namespace KokkosBatched { /// struct TeamVectorUpdateColumnNormsInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int n, const ValueType *KOKKOS_RESTRICT a, - const int as0, - /* */ ValueType *KOKKOS_RESTRICT norm, const int ns0) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int n, const ValueType *KOKKOS_RESTRICT a, + const int as0, + /* */ ValueType *KOKKOS_RESTRICT norm, const int ns0) { using ats = Kokkos::ArithTraits; Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), [&](const int &j) { const int idx_a = j * as0, idx_n = j * ns0; @@ -55,8 +54,7 @@ struct TeamVectorQR_WithColumnPivotingInternal { KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, // m = NumRows(A) const int n, // n = NumCols(A) - /* */ ValueType *A, const int as0, - const int as1, + /* */ ValueType *A, const int as0, const int as1, /* */ ValueType *t, const int ts0, /* */ IntType *p, const int ps0, /* */ ValueType *w, @@ -98,8 +96,7 @@ struct TeamVectorQR_WithColumnPivotingInternal { norm_part1x2.partWithAL(norm, n, 0); // compute initial column norms (replaced by dot product) - TeamVectorDotInternal::invoke(member, m, n, A, as0, as1, A, as0, as1, norm, - 1); + TeamVectorDotInternal::invoke(member, m, n, A, as0, as1, A, as0, as1, norm, 1); member.team_barrier(); const bool finish_when_rank_found = (matrix_rank == -1); @@ -124,33 +121,27 @@ struct TeamVectorQR_WithColumnPivotingInternal { /// ----------------------------------------------------- // find max location - TeamVectorFindAmaxInternal::invoke(member, n_AR, norm_part1x2.AR, 1, - pividx); + TeamVectorFindAmaxInternal::invoke(member, n_AR, norm_part1x2.AR, 1, pividx); member.team_barrier(); // apply pivot - TeamVectorApplyPivotVectorForwardInternal::invoke(member, *pividx, - norm_part1x2.AR, 1); - TeamVectorApplyPivotMatrixForwardInternal::invoke( - member, m, *pividx, A_part2x2.ATR, as1, as0); + TeamVectorApplyPivotVectorForwardInternal::invoke(member, *pividx, norm_part1x2.AR, 1); + TeamVectorApplyPivotMatrixForwardInternal::invoke(member, m, *pividx, A_part2x2.ATR, as1, as0); member.team_barrier(); // perform householder transformation - TeamVectorLeftHouseholderInternal::invoke(member, m_A22, A_part3x3.A11, - A_part3x3.A21, as0, tau); + TeamVectorLeftHouseholderInternal::invoke(member, m_A22, A_part3x3.A11, A_part3x3.A21, as0, tau); member.team_barrier(); // left apply householder to A22 - TeamVectorApplyLeftHouseholderInternal::invoke( - member, m_A22, n_A22, tau, A_part3x3.A21, as0, A_part3x3.A12, as1, - A_part3x3.A22, as0, as1, w); + TeamVectorApplyLeftHouseholderInternal::invoke(member, m_A22, n_A22, tau, A_part3x3.A21, as0, A_part3x3.A12, as1, + A_part3x3.A22, as0, as1, w); member.team_barrier(); // break condition if (matrix_rank == min_mn) { if (m_atl == 0) max_diag = ats::abs(A[0]); - const value_type val_diag = ats::abs(A_part3x3.A11[0]), - threshold(10 * max_diag * ats::epsilon()); + const value_type val_diag = ats::abs(A_part3x3.A11[0]), threshold(10 * max_diag * ats::epsilon()); if (val_diag < threshold) { matrix_rank = m_atl; if (finish_when_rank_found) break; @@ -158,8 +149,7 @@ struct TeamVectorQR_WithColumnPivotingInternal { } // norm update - TeamVectorUpdateColumnNormsInternal::invoke(member, n_A22, A_part3x3.A12, - as1, norm_part1x3.A2, 1); + TeamVectorUpdateColumnNormsInternal::invoke(member, n_A22, A_part3x3.A12, as1, norm_part1x3.A2, 1); member.team_barrier(); /// ----------------------------------------------------- A_part2x2.mergeToATL(A_part3x3); diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_RightEigenvectorFromSchur_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_RightEigenvectorFromSchur_Serial_Internal.hpp index 47165060649b..029875f81034 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_RightEigenvectorFromSchur_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_RightEigenvectorFromSchur_Serial_Internal.hpp @@ -45,12 +45,9 @@ struct SerialRightEigenvectorFromSchurInternal { /// contiguous workspace that can hold complex array (m) template KOKKOS_INLINE_FUNCTION static int invoke(const int m, - /* */ ValueType *S, const int ss0, - const int ss1, - /* */ ValueType *V, const int vs0, - const int vs1, - /* */ ValueType *w, - const int *blks) { + /* */ ValueType *S, const int ss0, const int ss1, + /* */ ValueType *V, const int vs0, const int vs1, + /* */ ValueType *w, const int *blks) { typedef ValueType value_type; typedef Kokkos::ArithTraits ats; // typedef typename ats::mag_type mag_type; @@ -78,8 +75,7 @@ struct SerialRightEigenvectorFromSchurInternal { for (; m_stl > 0;) { /// part 2x2 into 3x3 const int mA11 = blks[m_stl - 1]; - assert(((mA11 == 1) || (mA11 == 2)) && - "RightEigenvectorFromSchur: blk is not 1x1 nor 2x2"); + assert(((mA11 == 1) || (mA11 == 2)) && "RightEigenvectorFromSchur: blk is not 1x1 nor 2x2"); S_part3x3.partWithATL(S_part2x2, mA11, mA11); V_part1x3.partWithAL(V_part1x2, mA11); @@ -90,23 +86,19 @@ struct SerialRightEigenvectorFromSchurInternal { const value_type lambda = *S_part3x3.A11; /// initialize a right eigen vector - for (int i = 0; i < m_stl_minus_mA11; ++i) - b[i] = -S_part3x3.A01[i * ss0]; + for (int i = 0; i < m_stl_minus_mA11; ++i) b[i] = -S_part3x3.A01[i * ss0]; b[m_stl - 1] = one; /// perform shifted trsv - SerialShiftedTrsvInternalUpper::invoke( - m_stl_minus_mA11, lambda, S_part3x3.A00, ss0, ss1, w, 1, blks); + SerialShiftedTrsvInternalUpper::invoke(m_stl_minus_mA11, lambda, S_part3x3.A00, ss0, ss1, w, 1, blks); /// copy back to V for (int i = 0; i < m_stl; ++i) V_part1x3.A1[i * vs0] = w[i]; for (int i = m_stl; i < m; ++i) V_part1x3.A1[i * vs0] = zero; } else { /// complex eigen pair - const value_type alpha11 = S_part3x3.A11[0], - alpha12 = S_part3x3.A11[ss1], - alpha21 = S_part3x3.A11[ss0], - beta = ats::sqrt(-alpha12 * alpha21); + const value_type alpha11 = S_part3x3.A11[0], alpha12 = S_part3x3.A11[ss1], alpha21 = S_part3x3.A11[ss0], + beta = ats::sqrt(-alpha12 * alpha21); const complex_type lambda(alpha11, beta); complex_type *bc = (complex_type *)(b); @@ -115,14 +107,12 @@ struct SerialRightEigenvectorFromSchurInternal { const value_type *S_A01_a = S_part3x3.A01; const value_type *S_A01_b = S_part3x3.A01 + ss1; for (int i = 0; i < m_stl_minus_mA11; ++i) - bc[i] = complex_type(-S_A01_a[i * ss0] * beta, - S_A01_b[i * ss0] * alpha21); + bc[i] = complex_type(-S_A01_a[i * ss0] * beta, S_A01_b[i * ss0] * alpha21); bc[m_stl - 2] = complex_type(beta, zero); bc[m_stl - 1] = complex_type(zero, -alpha21); /// perform shifted trsv - SerialShiftedTrsvInternalUpper::invoke( - m_stl_minus_mA11, lambda, S_part3x3.A00, ss0, ss1, bc, 1, blks); + SerialShiftedTrsvInternalUpper::invoke(m_stl_minus_mA11, lambda, S_part3x3.A00, ss0, ss1, bc, 1, blks); /// copy back to V value_type *V_A1_r = V_part1x3.A1; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SVD_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SVD_Serial_Impl.hpp index 20dab77092cd..e0c25c2ce739 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SVD_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SVD_Serial_Impl.hpp @@ -22,51 +22,36 @@ namespace KokkosBatched { // Version which computes the full factorization -template -KOKKOS_INLINE_FUNCTION int SerialSVD::invoke(SVD_USV_Tag, const AViewType &A, - const UViewType &U, - const SViewType &sigma, - const VViewType &Vt, - const WViewType &work) { - static_assert(Kokkos::is_view_v && AViewType::rank == 2, - "SVD: A must be a rank-2 view"); - static_assert(Kokkos::is_view_v && UViewType::rank == 2, - "SVD: U must be a rank-2 view"); - static_assert(Kokkos::is_view_v && SViewType::rank == 1, - "SVD: s must be a rank-1 view"); - static_assert(Kokkos::is_view_v && VViewType::rank == 2, - "SVD: V must be a rank-2 view"); - static_assert(Kokkos::is_view_v && WViewType::rank == 1, - "SVD: W must be a rank-1 view"); - static_assert( - !std::is_same_v, - "SVD: W must be contiguous (not LayoutStride)"); +template +KOKKOS_INLINE_FUNCTION int SerialSVD::invoke(SVD_USV_Tag, const AViewType &A, const UViewType &U, + const SViewType &sigma, const VViewType &Vt, const WViewType &work, + typename AViewType::const_value_type tol) { + static_assert(Kokkos::is_view_v && AViewType::rank == 2, "SVD: A must be a rank-2 view"); + static_assert(Kokkos::is_view_v && UViewType::rank == 2, "SVD: U must be a rank-2 view"); + static_assert(Kokkos::is_view_v && SViewType::rank == 1, "SVD: s must be a rank-1 view"); + static_assert(Kokkos::is_view_v && VViewType::rank == 2, "SVD: V must be a rank-2 view"); + static_assert(Kokkos::is_view_v && WViewType::rank == 1, "SVD: W must be a rank-1 view"); + static_assert(!std::is_same_v, + "SVD: W must be contiguous (not LayoutStride)"); using value_type = typename AViewType::non_const_value_type; return KokkosBatched::SerialSVDInternal::invoke( - A.extent(0), A.extent(1), A.data(), A.stride(0), A.stride(1), U.data(), - U.stride(0), U.stride(1), Vt.data(), Vt.stride(0), Vt.stride(1), - sigma.data(), sigma.stride(0), work.data()); + A.extent(0), A.extent(1), A.data(), A.stride(0), A.stride(1), U.data(), U.stride(0), U.stride(1), Vt.data(), + Vt.stride(0), Vt.stride(1), sigma.data(), sigma.stride(0), work.data(), tol); } // Version which computes only singular values template -KOKKOS_INLINE_FUNCTION int SerialSVD::invoke(SVD_S_Tag, const AViewType &A, - const SViewType &sigma, - const WViewType &work) { - static_assert(Kokkos::is_view_v && AViewType::rank == 2, - "SVD: A must be a rank-2 view"); - static_assert(Kokkos::is_view_v && SViewType::rank == 1, - "SVD: s must be a rank-1 view"); - static_assert(Kokkos::is_view_v && WViewType::rank == 1, - "SVD: W must be a rank-1 view"); - static_assert( - !std::is_same_v, - "SVD: W must be contiguous (not LayoutStride)"); +KOKKOS_INLINE_FUNCTION int SerialSVD::invoke(SVD_S_Tag, const AViewType &A, const SViewType &sigma, + const WViewType &work, typename AViewType::const_value_type tol) { + static_assert(Kokkos::is_view_v && AViewType::rank == 2, "SVD: A must be a rank-2 view"); + static_assert(Kokkos::is_view_v && SViewType::rank == 1, "SVD: s must be a rank-1 view"); + static_assert(Kokkos::is_view_v && WViewType::rank == 1, "SVD: W must be a rank-1 view"); + static_assert(!std::is_same_v, + "SVD: W must be contiguous (not LayoutStride)"); using value_type = typename AViewType::non_const_value_type; - return KokkosBatched::SerialSVDInternal::invoke( - A.extent(0), A.extent(1), A.data(), A.stride(0), A.stride(1), nullptr, 0, - 0, nullptr, 0, 0, sigma.data(), sigma.stride(0), work.data()); + return KokkosBatched::SerialSVDInternal::invoke(A.extent(0), A.extent(1), A.data(), A.stride(0), + A.stride(1), nullptr, 0, 0, nullptr, 0, 0, sigma.data(), + sigma.stride(0), work.data(), tol); } } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp index 34c92c2d244f..0b85b1e28ed7 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp @@ -49,8 +49,7 @@ struct SerialSVDInternal { // however this is simpler because it exploits the symmetric structure, and // the realness of the eigenvalues. template - KOKKOS_INLINE_FUNCTION static void symEigen2x2(value_type a11, value_type a21, - value_type a22, value_type& e1, + KOKKOS_INLINE_FUNCTION static void symEigen2x2(value_type a11, value_type a21, value_type a22, value_type& e1, value_type& e2) { value_type a = Kokkos::ArithTraits::one(); value_type b = -a11 - a22; @@ -67,10 +66,8 @@ struct SerialSVDInternal { // // B22 is nsub * nsub, Usub is m * nsub, and Vtsub is nsub * n template - KOKKOS_INLINE_FUNCTION static void svdStep(value_type* B, value_type* U, - value_type* Vt, int um, int vn, - int n, int Bs0, int Bs1, int Us0, - int Us1, int Vts0, int Vts1) { + KOKKOS_INLINE_FUNCTION static void svdStep(value_type* B, value_type* U, value_type* Vt, int um, int vn, int n, + int Bs0, int Bs1, int Us0, int Us1, int Vts0, int Vts1) { using KAT = Kokkos::ArithTraits; // Compute the eigenvalues of trailing 2x2 value_type dn = SVDIND(B, n - 1, n - 1); @@ -91,34 +88,30 @@ struct SerialSVDInternal { // Use Givens to zero out z in [y; z] Kokkos::pair G; value_type discard; // Don't actually write [alpha; 0] anywhere - KokkosBatched::SerialGivensInternal::invoke(y, z, &G, - &discard); + KokkosBatched::SerialGivensInternal::invoke(y, z, &G, &discard); // apply the Givens transformation to B on the right, to columns k,k+1 // B := BG(k, k+1, theta) int minrow = KOKKOSKERNELS_MACRO_MAX(0, k - 1); int maxrow = KOKKOSKERNELS_MACRO_MIN(n, k + 2); - KokkosBatched::SerialApplyRightGivensInternal::invoke( - G, maxrow - minrow, &SVDIND(B, minrow, k + 1), Bs0, - &SVDIND(B, minrow, k), Bs0); + KokkosBatched::SerialApplyRightGivensInternal::invoke(G, maxrow - minrow, &SVDIND(B, minrow, k + 1), + Bs0, &SVDIND(B, minrow, k), Bs0); if (Vt) { - KokkosBatched::SerialApplyLeftGivensInternal::invoke( - G, vn, &SVDIND(Vt, k + 1, 0), Vts1, &SVDIND(Vt, k, 0), Vts1); + KokkosBatched::SerialApplyLeftGivensInternal::invoke(G, vn, &SVDIND(Vt, k + 1, 0), Vts1, + &SVDIND(Vt, k, 0), Vts1); } y = SVDIND(B, k, k); z = SVDIND(B, k + 1, k); - KokkosBatched::SerialGivensInternal::invoke(y, z, &G, - &SVDIND(B, k, k)); + KokkosBatched::SerialGivensInternal::invoke(y, z, &G, &SVDIND(B, k, k)); SVDIND(B, k + 1, k) = KAT::zero(); int mincol = k + 1; int maxcol = KOKKOSKERNELS_MACRO_MIN(n, k + 3); // apply Givens transformation to B on the left, to rows k, k + 1 // B := G(k, k+1, theta)^T * B - KokkosBatched::SerialApplyLeftGivensInternal::invoke( - G, maxcol - mincol, &SVDIND(B, k + 1, mincol), Bs1, - &SVDIND(B, k, mincol), Bs1); + KokkosBatched::SerialApplyLeftGivensInternal::invoke(G, maxcol - mincol, &SVDIND(B, k + 1, mincol), + Bs1, &SVDIND(B, k, mincol), Bs1); if (U) { - KokkosBatched::SerialApplyRightGivensInternal::invoke( - G, um, &SVDIND(U, 0, k + 1), Us0, &SVDIND(U, 0, k), Us0); + KokkosBatched::SerialApplyRightGivensInternal::invoke(G, um, &SVDIND(U, 0, k + 1), Us0, + &SVDIND(U, 0, k), Us0); } if (k < n - 2) { y = SVDIND(B, k, k + 1); @@ -131,71 +124,65 @@ struct SerialSVDInternal { // Assumes i is not the last row. // U is m*m, B is n*n template - KOKKOS_INLINE_FUNCTION static void svdZeroRow(int i, value_type* B, int n, - int Bs0, int Bs1, value_type* U, - int m, int Us0, int Us1) { + KOKKOS_INLINE_FUNCTION static void svdZeroRow(int i, value_type* B, int n, int Bs0, int Bs1, value_type* U, int m, + int Us0, int Us1) { Kokkos::pair G; for (int j = i + 1; j < n; j++) { // Zero out B(i, j) against diagonal j, introducing nonzero in B(i, j + 1) - KokkosBatched::SerialGivensInternal::invoke( - SVDIND(B, j, j), SVDIND(B, i, j), &G, &SVDIND(B, j, j)); + KokkosBatched::SerialGivensInternal::invoke(SVDIND(B, j, j), SVDIND(B, i, j), &G, &SVDIND(B, j, j)); SVDIND(B, i, j) = Kokkos::ArithTraits::zero(); // Now, only need to apply givens to a single column (if not already at // the end), introducing the next nonzero if (j < n - 1) { - KokkosBatched::SerialApplyLeftGivensInternal::invoke( - G, 1, &SVDIND(B, i, j + 1), Bs1, &SVDIND(B, j, j + 1), Bs1); + KokkosBatched::SerialApplyLeftGivensInternal::invoke(G, 1, &SVDIND(B, i, j + 1), Bs1, + &SVDIND(B, j, j + 1), Bs1); } if (U) { - KokkosBatched::SerialApplyRightGivensInternal::invoke( - G, m, &SVDIND(U, 0, i), Us0, &SVDIND(U, 0, j), Us0); + KokkosBatched::SerialApplyRightGivensInternal::invoke(G, m, &SVDIND(U, 0, i), Us0, &SVDIND(U, 0, j), + Us0); } } } template - KOKKOS_INLINE_FUNCTION static void svdZeroLastColumn(value_type* B, int n, - int Bs0, int Bs1, - value_type* Vt, int Vts0, + KOKKOS_INLINE_FUNCTION static void svdZeroLastColumn(value_type* B, int n, int Bs0, int Bs1, value_type* Vt, int Vts0, int Vts1) { // Deal with B(n-1, n-1) = 0, by chasing the superdiagonal nonzero up the // last column. Kokkos::pair G; for (int j = n - 2; j >= 0; j--) { - KokkosBatched::SerialGivensInternal::invoke( - SVDIND(B, j, j), SVDIND(B, j, n - 1), &G, &SVDIND(B, j, j)); + KokkosBatched::SerialGivensInternal::invoke(SVDIND(B, j, j), SVDIND(B, j, n - 1), &G, + &SVDIND(B, j, j)); SVDIND(B, j, n - 1) = Kokkos::ArithTraits::zero(); if (j != 0) { - KokkosBatched::SerialApplyRightGivensInternal::invoke( - G, 1, &SVDIND(B, j - 1, n - 1), Bs0, &SVDIND(B, j - 1, j), Bs0); + KokkosBatched::SerialApplyRightGivensInternal::invoke(G, 1, &SVDIND(B, j - 1, n - 1), Bs0, + &SVDIND(B, j - 1, j), Bs0); } if (Vt) { - KokkosBatched::SerialApplyLeftGivensInternal::invoke( - G, n, &SVDIND(Vt, n - 1, 0), Vts1, &SVDIND(Vt, j, 0), Vts1); + KokkosBatched::SerialApplyLeftGivensInternal::invoke(G, n, &SVDIND(Vt, n - 1, 0), Vts1, + &SVDIND(Vt, j, 0), Vts1); } } } template - KOKKOS_INLINE_FUNCTION static void bidiagonalize( - int m, int n, value_type* A, int As0, int As1, value_type* U, int Us0, - int Us1, value_type* Vt, int Vts0, int Vts1, value_type* work) { + KOKKOS_INLINE_FUNCTION static void bidiagonalize(int m, int n, value_type* A, int As0, int As1, value_type* U, + int Us0, int Us1, value_type* Vt, int Vts0, int Vts1, + value_type* work) { using KAT = Kokkos::ArithTraits; value_type tau; for (int i = 0; i < n; i++) { // Eliminating column i of A below the diagonal - KokkosBatched::SerialLeftHouseholderInternal::invoke( - m - i - 1, &SVDIND(A, i, i), &SVDIND(A, i + 1, i), As0, &tau); + KokkosBatched::SerialLeftHouseholderInternal::invoke(m - i - 1, &SVDIND(A, i, i), + &SVDIND(A, i + 1, i), As0, &tau); if (n - i > 1) { KokkosBatched::SerialApplyLeftHouseholderInternal::invoke( - m - i - 1, n - i - 1, &tau, &SVDIND(A, i + 1, i), As0, - &SVDIND(A, i, i + 1), As1, &SVDIND(A, i + 1, i + 1), As0, As1, - work); + m - i - 1, n - i - 1, &tau, &SVDIND(A, i + 1, i), As0, &SVDIND(A, i, i + 1), As1, &SVDIND(A, i + 1, i + 1), + As0, As1, work); } if (U) { KokkosBatched::SerialApplyRightHouseholderInternal::invoke( - m, m - i - 1, &tau, &SVDIND(A, i + 1, i), As0, &SVDIND(U, 0, i), - Us0, &SVDIND(U, 0, i + 1), Us0, Us1, work); + m, m - i - 1, &tau, &SVDIND(A, i + 1, i), As0, &SVDIND(U, 0, i), Us0, &SVDIND(U, 0, i + 1), Us0, Us1, work); } // Zero out A subdiag explicitly (NOTE: may not be necessary...) for (int j = i + 1; j < m; j++) { @@ -203,19 +190,17 @@ struct SerialSVDInternal { } if (i < n - 2) { // Eliminating row i of A to the right of the 1st superdiagonal - KokkosBatched::SerialLeftHouseholderInternal::invoke( - n - i - 2, &SVDIND(A, i, i + 1), &SVDIND(A, i, i + 2), As1, &tau); + KokkosBatched::SerialLeftHouseholderInternal::invoke(n - i - 2, &SVDIND(A, i, i + 1), + &SVDIND(A, i, i + 2), As1, &tau); if (m - i > 1) { - KokkosBatched::SerialApplyRightHouseholderInternal::invoke< - value_type>(m - i - 1, n - i - 2, &tau, &SVDIND(A, i, i + 2), As1, - &SVDIND(A, i + 1, i + 1), As0, - &SVDIND(A, i + 1, i + 2), As0, As1, work); + KokkosBatched::SerialApplyRightHouseholderInternal::invoke( + m - i - 1, n - i - 2, &tau, &SVDIND(A, i, i + 2), As1, &SVDIND(A, i + 1, i + 1), As0, + &SVDIND(A, i + 1, i + 2), As0, As1, work); } if (Vt) { KokkosBatched::SerialApplyLeftHouseholderInternal::invoke( - n - i - 2, n, &tau, &SVDIND(A, i, i + 2), As1, - &SVDIND(Vt, i + 1, 0), Vts1, &SVDIND(Vt, i + 2, 0), Vts0, Vts1, - work); + n - i - 2, n, &tau, &SVDIND(A, i, i + 2), As1, &SVDIND(Vt, i + 1, 0), Vts1, &SVDIND(Vt, i + 2, 0), Vts0, + Vts1, work); } // Zero out A superdiag row explicitly for (int j = i + 2; j < n; j++) { @@ -229,11 +214,9 @@ struct SerialSVDInternal { // U and Vt to maintain the product U*B*Vt. At the end, the singular values // are copied to sigma. template - KOKKOS_INLINE_FUNCTION static void bidiSVD(int m, int n, value_type* B, - int Bs0, int Bs1, value_type* U, - int Us0, int Us1, value_type* Vt, - int Vts0, int Vts1, - value_type* sigma, int ss) { + KOKKOS_INLINE_FUNCTION static void bidiSVD(int m, int n, value_type* B, int Bs0, int Bs1, value_type* U, int Us0, + int Us1, value_type* Vt, int Vts0, int Vts1, value_type* sigma, int ss, + const value_type& tol) { using KAT = Kokkos::ArithTraits; const value_type eps = Kokkos::ArithTraits::epsilon(); int p = 0; @@ -241,8 +224,8 @@ struct SerialSVDInternal { while (true) { // Zero out tiny superdiagonal entries for (int i = 0; i < n - 1; i++) { - if (fabs(SVDIND(B, i, i + 1)) < - eps * (fabs(SVDIND(B, i, i)) + fabs(SVDIND(B, i + 1, i + 1)))) { + if (fabs(SVDIND(B, i, i + 1)) < eps * (fabs(SVDIND(B, i, i)) + fabs(SVDIND(B, i + 1, i + 1))) || + fabs(SVDIND(B, i, i + 1)) < tol) { SVDIND(B, i, i + 1) = KAT::zero(); } } @@ -281,8 +264,7 @@ struct SerialSVDInternal { } int nsub = q - p; // B22 is nsub * nsub, Usub is m * nsub, and Vtsub is nsub * n - svdStep(&SVDIND(B, p, p), &SVDIND(U, 0, p), &SVDIND(Vt, p, 0), m, n, nsub, - Bs0, Bs1, Us0, Us1, Vts0, Vts1); + svdStep(&SVDIND(B, p, p), &SVDIND(U, 0, p), &SVDIND(Vt, p, 0), m, n, nsub, Bs0, Bs1, Us0, Us1, Vts0, Vts1); } for (int i = 0; i < n; i++) { sigma[i * ss] = SVDIND(B, i, i); @@ -292,11 +274,8 @@ struct SerialSVDInternal { // Convert SVD into conventional form: singular values positive and in // descending order template - KOKKOS_INLINE_FUNCTION static void postprocessSVD(int m, int n, value_type* U, - int Us0, int Us1, - value_type* Vt, int Vts0, - int Vts1, value_type* sigma, - int ss) { + KOKKOS_INLINE_FUNCTION static void postprocessSVD(int m, int n, value_type* U, int Us0, int Us1, value_type* Vt, + int Vts0, int Vts1, value_type* sigma, int ss) { // First step: flip signs on negative singular values for (int i = 0; i < n; i++) { if (sigma[i * ss] < 0) { @@ -325,23 +304,19 @@ struct SerialSVDInternal { if (i != maxloc) { SVDSWAP(sigma[i * ss], sigma[maxloc * ss]); if (U) { - for (int j = 0; j < m; j++) - SVDSWAP(SVDIND(U, j, i), SVDIND(U, j, maxloc)) + for (int j = 0; j < m; j++) SVDSWAP(SVDIND(U, j, i), SVDIND(U, j, maxloc)) } if (Vt) { - for (int j = 0; j < n; j++) - SVDSWAP(SVDIND(Vt, i, j), SVDIND(Vt, maxloc, j)) + for (int j = 0; j < n; j++) SVDSWAP(SVDIND(Vt, i, j), SVDIND(Vt, maxloc, j)) } } } } template - KOKKOS_INLINE_FUNCTION static int invoke(int m, int n, value_type* A, int As0, - int As1, value_type* U, int Us0, - int Us1, value_type* Vt, int Vts0, - int Vts1, value_type* sigma, int ss, - value_type* work) { + KOKKOS_INLINE_FUNCTION static int invoke(int m, int n, value_type* A, int As0, int As1, value_type* U, int Us0, + int Us1, value_type* Vt, int Vts0, int Vts1, value_type* sigma, int ss, + value_type* work, value_type tol = Kokkos::ArithTraits::zero()) { // First, if m < n, need to instead compute (V, s, U^T) = A^T. // This just means swapping U & Vt, and implicitly transposing A, U and Vt. if (m < n) { @@ -354,19 +329,17 @@ struct SerialSVDInternal { SVDSWAP(Us1, Vts0); } if (U) { - KokkosBatched::SerialSetIdentityInternal::invoke(m, m, U, Us0, - Us1); + KokkosBatched::SerialSetIdentityInternal::invoke(m, m, U, Us0, Us1); } if (Vt) { - KokkosBatched::SerialSetIdentityInternal::invoke(n, n, Vt, - Vts0, Vts1); + KokkosBatched::SerialSetIdentityInternal::invoke(n, n, Vt, Vts0, Vts1); } if (m == 0 || n == 0) { // sigma is length 0, so there's nothing left to compute return 0; } bidiagonalize(m, n, A, As0, As1, U, Us0, Us1, Vt, Vts0, Vts1, work); - bidiSVD(m, n, A, As0, As1, U, Us0, Us1, Vt, Vts0, Vts1, sigma, ss); + bidiSVD(m, n, A, As0, As1, U, Us0, Us1, Vt, Vts0, Vts1, sigma, ss, tol); postprocessSVD(m, n, U, Us0, Us1, Vt, Vts0, Vts1, sigma, ss); return 0; } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Schur2x2_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Schur2x2_Serial_Internal.hpp index 22a599ed589c..41e525d2ba4e 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Schur2x2_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Schur2x2_Serial_Internal.hpp @@ -30,12 +30,9 @@ namespace KokkosBatched { /// struct SerialSchur2x2Internal { template - KOKKOS_INLINE_FUNCTION static int invoke(RealType* alpha00, RealType* alpha01, - RealType* alpha10, RealType* alpha11, - Kokkos::pair* G, - Kokkos::complex* lambda1, - Kokkos::complex* lambda2, - bool* is_complex) { + KOKKOS_INLINE_FUNCTION static int invoke(RealType* alpha00, RealType* alpha01, RealType* alpha10, RealType* alpha11, + Kokkos::pair* G, Kokkos::complex* lambda1, + Kokkos::complex* lambda2, bool* is_complex) { typedef RealType real_type; typedef Kokkos::ArithTraits ats; const real_type zero(0), one(1), half(0.5), minus_one(-1); @@ -70,8 +67,7 @@ struct SerialSchur2x2Internal { *lambda1 = Kokkos::complex(*alpha00, zero); *lambda2 = Kokkos::complex(*alpha11, zero); *is_complex = false; - } else if (ats::abs(*alpha00 - *alpha11) < tol && - (*alpha01) * (*alpha10) > zero) { + } else if (ats::abs(*alpha00 - *alpha11) < tol && (*alpha01) * (*alpha10) > zero) { // no rotation (already the standard schur form) *G = Kokkos::pair(one, zero); /// two real eigen values @@ -84,9 +80,8 @@ struct SerialSchur2x2Internal { const real_type b = (*alpha01) + (*alpha10); const real_type l = ats::sqrt(a * a + b * b); const real_type c = ats::sqrt(half * (one + ats::abs(b) / l)); - const real_type s = - -((half * a) / (l * c)) * (b > zero ? one : minus_one); - *G = Kokkos::pair(c, s); + const real_type s = -((half * a) / (l * c)) * (b > zero ? one : minus_one); + *G = Kokkos::pair(c, s); /// [ gamma sigma ][ alpha00 alpha01 [ gamma -sigma --> [ alpha11 /// -alpha10 /// -sigma gamma ] alpha10 alpha11 ] sigma gamma ] 0 alpha00] @@ -105,19 +100,17 @@ struct SerialSchur2x2Internal { const real_type mult_alpha_offdiags = (*alpha10) * (*alpha01); if (mult_alpha_offdiags > zero) { /// transforms the matrix into a upper triangular - const real_type sqrt_mult_alpha_offdiags = - ats::sqrt(mult_alpha_offdiags); + const real_type sqrt_mult_alpha_offdiags = ats::sqrt(mult_alpha_offdiags); /// redefine the rotation matrix // const real_type sqrt_abs_alpha01 = ats::sqrt(ats::abs(*alpha01)); // const real_type sqrt_abs_alpha10 = ats::sqrt(ats::abs(*alpha10)); const real_type abs_sum_offidags = ats::abs((*alpha01) + (*alpha10)); - const real_type c1 = ats::sqrt(ats::abs(*alpha01) / abs_sum_offidags); - const real_type s1 = ats::sqrt(ats::abs(*alpha10) / abs_sum_offidags); - const real_type sign_alpha10 = *alpha10 > zero ? one : minus_one; + const real_type c1 = ats::sqrt(ats::abs(*alpha01) / abs_sum_offidags); + const real_type s1 = ats::sqrt(ats::abs(*alpha10) / abs_sum_offidags); + const real_type sign_alpha10 = *alpha10 > zero ? one : minus_one; - *G = Kokkos::pair(c * c1 - s * s1, - c * s1 + s * c1); + *G = Kokkos::pair(c * c1 - s * s1, c * s1 + s * c1); /// apply rotation to 2x2 matrix so that alpha10 becomes zero *alpha00 = tmp + sign_alpha10 * sqrt_mult_alpha_offdiags; @@ -131,12 +124,10 @@ struct SerialSchur2x2Internal { *is_complex = false; } else { /// two complex eigen values - const real_type sqrt_mult_alpha_offdiags = - ats::sqrt(-mult_alpha_offdiags); - *lambda1 = Kokkos::complex(tmp, sqrt_mult_alpha_offdiags); - *lambda2 = - Kokkos::complex(lambda1->real(), -lambda1->imag()); - *is_complex = true; + const real_type sqrt_mult_alpha_offdiags = ats::sqrt(-mult_alpha_offdiags); + *lambda1 = Kokkos::complex(tmp, sqrt_mult_alpha_offdiags); + *lambda2 = Kokkos::complex(lambda1->real(), -lambda1->imag()); + *is_complex = true; } } return 0; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Schur_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Schur_Serial_Internal.hpp index c7f35d5c4f87..c6d55b301bf0 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Schur_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Schur_Serial_Internal.hpp @@ -68,33 +68,27 @@ struct SerialSchurInternal { /// returns -1. template KOKKOS_INLINE_FUNCTION static int invoke(const int m, - /* */ RealType *H, const int hs0, - const int hs1, - /* */ RealType *Z, const int zs0, - const int zs1, - /* */ RealType *w, const int wlen, - const bool restart = false, + /* */ RealType *H, const int hs0, const int hs1, + /* */ RealType *Z, const int zs0, const int zs1, + /* */ RealType *w, const int wlen, const bool restart = false, const int user_max_iteration = -1) { typedef RealType real_type; typedef Kokkos::ArithTraits ats; const real_type /* one(1), */ zero(0), tol = 1e2 * ats::epsilon(); const int max_iteration = user_max_iteration < 0 ? 300 : user_max_iteration; - if (wlen < m * 5) - Kokkos::abort("Error: provided workspace is smaller than 3*m"); + if (wlen < m * 5) Kokkos::abort("Error: provided workspace is smaller than 3*m"); int r_val = 0; if (restart) { - if (m <= 2) - Kokkos::abort("Error: restart option cannot be used for m=1 or m=2"); + if (m <= 2) Kokkos::abort("Error: restart option cannot be used for m=1 or m=2"); } else { /// do not touch input /// SerialSetIdentityInternal::invoke(m, Z, zs0, zs1); } // workspaces - real_type *subdiags = w; - Kokkos::pair *Gs = - (Kokkos::pair *)(w + m); + real_type *subdiags = w; + Kokkos::pair *Gs = (Kokkos::pair *)(w + m); if (!restart) { /// initialize workspace and Gs for (int i = 0; i < m; ++i) subdiags[i] = zero; @@ -111,8 +105,7 @@ struct SerialSchurInternal { bool is_complex; Kokkos::complex lambda1, lambda2; Kokkos::pair G; - SerialSchur2x2Internal::invoke(H, H + hs1, H + hs0, H + hs, &G, - &lambda1, &lambda2, &is_complex); + SerialSchur2x2Internal::invoke(H, H + hs1, H + hs0, H + hs, &G, &lambda1, &lambda2, &is_complex); G.second = -G.second; // transpose SerialApplyRightGivensInternal::invoke(G, 2, Z, zs0, Z + zs1, zs0); @@ -171,49 +164,37 @@ struct SerialSchurInternal { real_type *sub2x2 = H + (mend - 2) * hs; if (2 == mdiff) { Kokkos::pair G; - SerialSchur2x2Internal::invoke(sub2x2, sub2x2 + hs1, - sub2x2 + hs0, sub2x2 + hs, &G, - &lambda1, &lambda2, &is_complex); + SerialSchur2x2Internal::invoke(sub2x2, sub2x2 + hs1, sub2x2 + hs0, sub2x2 + hs, &G, &lambda1, &lambda2, + &is_complex); subdiags[mend - 1] = sub2x2[hs0]; /// apply G' from left G.second = -G.second; - SerialApplyLeftGivensInternal::invoke( - G, m - mend, sub2x2 + 2 * hs1, hs1, sub2x2 + hs0 + 2 * hs1, - hs1); + SerialApplyLeftGivensInternal::invoke(G, m - mend, sub2x2 + 2 * hs1, hs1, sub2x2 + hs0 + 2 * hs1, hs1); /// apply (G')' from right - SerialApplyRightGivensInternal::invoke( - G, mend - 2, sub2x2 - mend_minus_two_mult_hs0, hs0, - sub2x2 + hs1 - mend_minus_two_mult_hs0, hs0); + SerialApplyRightGivensInternal::invoke(G, mend - 2, sub2x2 - mend_minus_two_mult_hs0, hs0, + sub2x2 + hs1 - mend_minus_two_mult_hs0, hs0); sub2x2[hs0] = zero; /// apply (G')' from right to compute Z - SerialApplyRightGivensInternal::invoke( - G, m, Z + (mend - 2) * zs1, zs0, Z + (mend - 1) * zs1, zs0); + SerialApplyRightGivensInternal::invoke(G, m, Z + (mend - 2) * zs1, zs0, Z + (mend - 1) * zs1, zs0); } else { - SerialWilkinsonShiftInternal::invoke( - sub2x2[0], sub2x2[hs1], sub2x2[hs0], sub2x2[hs], &lambda1, - &lambda2, &is_complex); + SerialWilkinsonShiftInternal::invoke(sub2x2[0], sub2x2[hs1], sub2x2[hs0], sub2x2[hs], &lambda1, + &lambda2, &is_complex); - SerialFrancisInternal::invoke(mbeg, mend, m, H, hs0, hs1, - lambda1, lambda2, is_complex, Gs, - true); + SerialFrancisInternal::invoke(mbeg, mend, m, H, hs0, hs1, lambda1, lambda2, is_complex, Gs, true); /* */ auto &val1 = *(sub2x2 + hs0); /* */ auto &val2 = *(sub2x2 - hs1); const auto abs_val1 = ats::abs(val1); const auto abs_val2 = ats::abs(val2); for (int i = mbeg; i < (mend - 1); ++i) { - const Kokkos::pair G0( - Gs[2 * i].first, -Gs[2 * i].second); - const Kokkos::pair G1( - Gs[2 * i + 1].first, -Gs[2 * i + 1].second); - SerialApplyRightGivensInternal::invoke( - G0, m, Z + i * zs1, zs0, Z + i * zs1 + 1 * zs1, zs0); - SerialApplyRightGivensInternal::invoke( - G1, m, Z + i * zs1, zs0, Z + i * zs1 + 2 * zs1, zs0); + const Kokkos::pair G0(Gs[2 * i].first, -Gs[2 * i].second); + const Kokkos::pair G1(Gs[2 * i + 1].first, -Gs[2 * i + 1].second); + SerialApplyRightGivensInternal::invoke(G0, m, Z + i * zs1, zs0, Z + i * zs1 + 1 * zs1, zs0); + SerialApplyRightGivensInternal::invoke(G1, m, Z + i * zs1, zs0, Z + i * zs1 + 2 * zs1, zs0); } /// convergence check @@ -222,28 +203,23 @@ struct SerialSchurInternal { } else if (abs_val2 < tol) { /// preserve the standard schur form Kokkos::pair G; - SerialSchur2x2Internal::invoke( - sub2x2, sub2x2 + hs1, sub2x2 + hs0, sub2x2 + hs, &G, - &lambda1, &lambda2, &is_complex); + SerialSchur2x2Internal::invoke(sub2x2, sub2x2 + hs1, sub2x2 + hs0, sub2x2 + hs, &G, &lambda1, + &lambda2, &is_complex); subdiags[mend - 1] = val1; /// apply G' from left G.second = -G.second; - SerialApplyLeftGivensInternal::invoke( - G, m - mend, sub2x2 + 2 * hs1, hs1, - sub2x2 + hs0 + 2 * hs1, hs1); + SerialApplyLeftGivensInternal::invoke(G, m - mend, sub2x2 + 2 * hs1, hs1, sub2x2 + hs0 + 2 * hs1, + hs1); // apply (G')' from right - SerialApplyRightGivensInternal::invoke( - G, mend - 2, sub2x2 - mend_minus_two_mult_hs0, hs0, - sub2x2 + hs1 - mend_minus_two_mult_hs0, hs0); + SerialApplyRightGivensInternal::invoke(G, mend - 2, sub2x2 - mend_minus_two_mult_hs0, hs0, + sub2x2 + hs1 - mend_minus_two_mult_hs0, hs0); val1 = zero; val2 = zero; // apply (G')' from right - SerialApplyRightGivensInternal::invoke( - G, m, Z + (mend - 2) * zs1, zs0, Z + (mend - 1) * zs1, - zs0); + SerialApplyRightGivensInternal::invoke(G, m, Z + (mend - 2) * zs1, zs0, Z + (mend - 1) * zs1, zs0); } } } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SetIdentity_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SetIdentity_Impl.hpp index e826c4cbb707..9219f3a9ecf2 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SetIdentity_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SetIdentity_Impl.hpp @@ -29,8 +29,7 @@ namespace KokkosBatched { template KOKKOS_INLINE_FUNCTION int SerialSetIdentity::invoke(const AViewType &A) { - return SerialSetIdentityInternal::invoke(A.extent(0), A.extent(1), A.data(), - A.stride_0(), A.stride_1()); + return SerialSetIdentityInternal::invoke(A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1()); } /// @@ -39,10 +38,8 @@ KOKKOS_INLINE_FUNCTION int SerialSetIdentity::invoke(const AViewType &A) { template template -KOKKOS_INLINE_FUNCTION int TeamSetIdentity::invoke( - const MemberType &member, const AViewType &A) { - return TeamSetIdentityInternal::invoke(member, A.extent(0), A.extent(1), - A.data(), A.stride_0(), A.stride_1()); +KOKKOS_INLINE_FUNCTION int TeamSetIdentity::invoke(const MemberType &member, const AViewType &A) { + return TeamSetIdentityInternal::invoke(member, A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1()); } } // end namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SetIdentity_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SetIdentity_Internal.hpp index 7a8976752618..f5afb5c79cb2 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SetIdentity_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SetIdentity_Internal.hpp @@ -28,8 +28,7 @@ namespace KokkosBatched { struct SerialSetIdentityInternal { template KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { const ValueType one(1), zero(0); for (int j = 0; j < n; ++j) { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) @@ -49,10 +48,8 @@ struct SerialSetIdentityInternal { /// ================== struct TeamSetIdentityInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, const int n, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { const ValueType one(1), zero(0); Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int &i) { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) @@ -70,15 +67,12 @@ struct TeamSetIdentityInternal { /// ======================== struct TeamVectorSetIdentityInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, const int n, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { const ValueType one(1), zero(0); Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int &i) { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, n), - [&](const int &j) { A[i * as0 + j * as1] = i == j ? one : zero; }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n), + [&](const int &j) { A[i * as0 + j * as1] = i == j ? one : zero; }); }); return 0; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SetTriangular_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SetTriangular_Internal.hpp index 844c3f72c511..09e94ab5f3f4 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SetTriangular_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SetTriangular_Internal.hpp @@ -27,11 +27,8 @@ namespace KokkosBatched { /// ==================== struct SerialSetLowerTriangularInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, - const int dist, - const ScalarType alpha, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const int dist, const ScalarType alpha, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { for (int j = 0; j < n; ++j) { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -47,18 +44,14 @@ struct SerialSetLowerTriangularInternal { struct TeamVectorSetLowerTriangularInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, const int n, - const int dist, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const int dist, const ScalarType alpha, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { const int jdist = j + dist; - Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, m), - [=](const int &i) { - if (i >= jdist) A[i * as0 + j * as1] = alpha; - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, m), [=](const int &i) { + if (i >= jdist) A[i * as0 + j * as1] = alpha; + }); }); return 0; } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ShiftedTrsv_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ShiftedTrsv_Serial_Internal.hpp index 2e356f818e43..c0963447c40f 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ShiftedTrsv_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ShiftedTrsv_Serial_Internal.hpp @@ -36,19 +36,16 @@ namespace KokkosBatched { struct SerialShiftedTrsvInternalLower { template - KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ScalarType lambda, - const ValueTypeA *KOKKOS_RESTRICT A, + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ScalarType lambda, const ValueTypeA *KOKKOS_RESTRICT A, const int as0, const int as1, - /* */ ValueTypeB *KOKKOS_RESTRICT b, - const int bs0, + /* */ ValueTypeB *KOKKOS_RESTRICT b, const int bs0, const int *KOKKOS_RESTRICT blks) { const int as = as0 + as1; int p = 0; for (; p < m;) { const int blk = blks[p], iend = m - p - blk; - assert(((blk == 1) || (blk == 2)) && - "ShiftedTrsvLower: blocks are not 1x1 or 2x2"); + assert(((blk == 1) || (blk == 2)) && "ShiftedTrsvLower: blocks are not 1x1 or 2x2"); if (blk == 1) { const auto alpha11 = A[p * as] - lambda; ValueTypeB *KOKKOS_RESTRICT beta1 = b + p * bs0; @@ -84,9 +81,7 @@ struct SerialShiftedTrsvInternalLower { const ValueTypeA *KOKKOS_RESTRICT A21 = A + p * as + 2 * as0; ValueTypeB *KOKKOS_RESTRICT b2 = beta1 + 2 * bs0; - for (int i = 0; i < iend; ++i) - b2[i * bs0] -= - (A21[i * as0] * (*beta1) + A21[i * as0 + as1] * (*beta2)); + for (int i = 0; i < iend; ++i) b2[i * bs0] -= (A21[i * as0] * (*beta1) + A21[i * as0 + as1] * (*beta2)); } } p += blk; @@ -101,11 +96,9 @@ struct SerialShiftedTrsvInternalLower { struct SerialShiftedTrsvInternalUpper { template - KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ScalarType lambda, - const ValueTypeA *KOKKOS_RESTRICT A, + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ScalarType lambda, const ValueTypeA *KOKKOS_RESTRICT A, const int as0, const int as1, - /**/ ValueTypeB *KOKKOS_RESTRICT b, - const int bs0, + /**/ ValueTypeB *KOKKOS_RESTRICT b, const int bs0, const int *KOKKOS_RESTRICT blks) { const int as = as0 + as1; @@ -114,10 +107,9 @@ struct SerialShiftedTrsvInternalUpper { int p = m - 1; for (; p >= 0;) { const int blk = blks[p], iend = p + 1 - blk; - assert(((blk == 1) || (blk == 2)) && - "ShiftedTrsvUpper: blocks are not 1x1 or 2x2"); + assert(((blk == 1) || (blk == 2)) && "ShiftedTrsvUpper: blocks are not 1x1 or 2x2"); if (blk == 1) { - const auto alpha11 = A[p * as] - lambda; + const auto alpha11 = A[p * as] - lambda; /**/ ValueTypeB *KOKKOS_RESTRICT beta1 = b + p * bs0; // with KOKKOS_RESTRICT a compiler assumes that the pointer is not @@ -148,9 +140,7 @@ struct SerialShiftedTrsvInternalUpper { if (iend) { const ValueTypeA *KOKKOS_RESTRICT A01 = A + p_minus_one * as1; - for (int i = 0; i < iend; ++i) - b0[i * bs0] -= - (A01[i * as0] * (*beta1) + A01[i * as0 + as1] * (*beta2)); + for (int i = 0; i < iend; ++i) b0[i * bs0] -= (A01[i * as0] * (*beta1) + A01[i * as0 + as1] * (*beta2)); } } p -= blk; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SolveUTV_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SolveUTV_TeamVector_Impl.hpp index 4f6f81216da4..3b85a26294b2 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SolveUTV_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SolveUTV_TeamVector_Impl.hpp @@ -28,26 +28,21 @@ namespace KokkosBatched { /// =============== template struct TeamVectorSolveUTV { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int matrix_rank, const UViewType &U, - const TViewType &T, const VViewType &V, const pViewType &p, - const XViewType &X, const BViewType &B, const wViewType &w) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int matrix_rank, const UViewType &U, + const TViewType &T, const VViewType &V, const pViewType &p, + const XViewType &X, const BViewType &B, const wViewType &w) { if (BViewType::rank == 1) - TeamVectorSolveUTV_Internal::invoke( - member, matrix_rank, T.extent(0), V.extent(0), U.data(), U.stride(0), - U.stride(1), T.data(), T.stride(0), T.stride(1), V.data(), - V.stride(0), V.stride(1), p.data(), p.stride(0), X.data(), - X.stride(0), B.data(), B.stride(0), w.data()); + TeamVectorSolveUTV_Internal::invoke(member, matrix_rank, T.extent(0), V.extent(0), U.data(), U.stride(0), + U.stride(1), T.data(), T.stride(0), T.stride(1), V.data(), V.stride(0), + V.stride(1), p.data(), p.stride(0), X.data(), X.stride(0), B.data(), + B.stride(0), w.data()); else - TeamVectorSolveUTV_Internal::invoke( - member, matrix_rank, T.extent(0), V.extent(0), B.extent(1), U.data(), - U.stride(0), U.stride(1), T.data(), T.stride(0), T.stride(1), - V.data(), V.stride(0), V.stride(1), p.data(), p.stride(0), X.data(), - X.stride(0), X.stride(1), B.data(), B.stride(0), B.stride(1), - w.data()); + TeamVectorSolveUTV_Internal::invoke(member, matrix_rank, T.extent(0), V.extent(0), B.extent(1), U.data(), + U.stride(0), U.stride(1), T.data(), T.stride(0), T.stride(1), V.data(), + V.stride(0), V.stride(1), p.data(), p.stride(0), X.data(), X.stride(0), + X.stride(1), B.data(), B.stride(0), B.stride(1), w.data()); return 0; } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SolveUTV_TeamVector_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SolveUTV_TeamVector_Internal.hpp index 71050504aa5d..18440745eb90 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SolveUTV_TeamVector_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SolveUTV_TeamVector_Internal.hpp @@ -33,14 +33,13 @@ namespace KokkosBatched { /// =================== struct TeamVectorSolveUTV_Internal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int matrix_rank, const int m, - const int /*n*/, const ValueType *U, const int us0, const int us1, - const ValueType *T, const int ts0, const int ts1, const ValueType *V, - const int vs0, const int vs1, const IntType *p, const int ps0, - /* */ ValueType *x, const int xs0, - /* */ ValueType *b, const int bs0, - /* */ ValueType *w) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int matrix_rank, const int m, + const int /*n*/, const ValueType *U, const int us0, const int us1, + const ValueType *T, const int ts0, const int ts1, const ValueType *V, + const int vs0, const int vs1, const IntType *p, const int ps0, + /* */ ValueType *x, const int xs0, + /* */ ValueType *b, const int bs0, + /* */ ValueType *w) { typedef ValueType value_type; // typedef IntType int_type; @@ -49,40 +48,36 @@ struct TeamVectorSolveUTV_Internal { if (matrix_rank < m) { /// w = U^T b - KokkosBlas::Impl::TeamVectorGemvInternal::invoke( - member, matrix_rank, m, one, U, us1, us0, b, bs0, zero, w, ws0); + KokkosBlas::Impl::TeamVectorGemvInternal::invoke(member, matrix_rank, m, one, U, us1, us0, + b, bs0, zero, w, ws0); /// w = T^{-1} w - TeamVectorTrsvInternalLower::invoke( - member, false, matrix_rank, one, T, ts0, ts1, w, ws0); + TeamVectorTrsvInternalLower::invoke(member, false, matrix_rank, one, T, ts0, ts1, w, ws0); /// x = V^T w - KokkosBlas::Impl::TeamVectorGemvInternal::invoke( - member, m, matrix_rank, one, V, vs1, vs0, w, ws0, zero, x, xs0); + KokkosBlas::Impl::TeamVectorGemvInternal::invoke(member, m, matrix_rank, one, V, vs1, vs0, + w, ws0, zero, x, xs0); } else { - KokkosBlas::Impl::TeamVectorGemvInternal::invoke( - member, matrix_rank, m, one, U, us1, us0, b, bs0, zero, x, xs0); + KokkosBlas::Impl::TeamVectorGemvInternal::invoke(member, matrix_rank, m, one, U, us1, us0, + b, bs0, zero, x, xs0); - TeamVectorTrsvInternalUpper::invoke( - member, false, matrix_rank, one, T, ts0, ts1, x, xs0); + TeamVectorTrsvInternalUpper::invoke(member, false, matrix_rank, one, T, ts0, ts1, x, xs0); } /// x = P^T x - TeamVectorApplyPivotVectorBackwardInternal ::invoke(member, m, p, ps0, x, - xs0); + TeamVectorApplyPivotVectorBackwardInternal ::invoke(member, m, p, ps0, x, xs0); return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int matrix_rank, const int m, const int n, - const int nrhs, const ValueType *U, const int us0, const int us1, - const ValueType *T, const int ts0, const int ts1, const ValueType *V, - const int vs0, const int vs1, const IntType *p, const int ps0, - /* */ ValueType *X, const int xs0, const int xs1, - /* */ ValueType *B, const int bs0, const int bs1, - /* */ ValueType *w) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int matrix_rank, const int m, const int n, + const int nrhs, const ValueType *U, const int us0, const int us1, + const ValueType *T, const int ts0, const int ts1, const ValueType *V, + const int vs0, const int vs1, const IntType *p, const int ps0, + /* */ ValueType *X, const int xs0, const int xs1, + /* */ ValueType *B, const int bs0, const int bs1, + /* */ ValueType *w) { typedef ValueType value_type; // typedef IntType int_type; @@ -96,37 +91,33 @@ struct TeamVectorSolveUTV_Internal { /// T is matrix_rank x matrix_rank /// V is matrix_rank x n /// W = U^T B - TeamVectorGemmInternal::invoke( - member, matrix_rank, nrhs, m, one, U, us1, us0, B, bs0, bs1, zero, W, - ws0, ws1); + TeamVectorGemmInternal::invoke(member, matrix_rank, nrhs, m, one, U, us1, us0, B, bs0, bs1, + zero, W, ws0, ws1); member.team_barrier(); /// W = T^{-1} W - TeamVectorTrsmInternalLeftLower::invoke( - member, false, matrix_rank, nrhs, one, T, ts0, ts1, W, ws0, ws1); + TeamVectorTrsmInternalLeftLower::invoke(member, false, matrix_rank, nrhs, one, T, ts0, ts1, + W, ws0, ws1); member.team_barrier(); /// X = V^T W - TeamVectorGemmInternal::invoke( - member, n, nrhs, matrix_rank, one, V, vs1, vs0, W, ws0, ws1, zero, X, - xs0, xs1); + TeamVectorGemmInternal::invoke(member, n, nrhs, matrix_rank, one, V, vs1, vs0, W, ws0, ws1, + zero, X, xs0, xs1); member.team_barrier(); } else { /// W = U^T B - TeamVectorGemmInternal::invoke( - member, matrix_rank, nrhs, m, one, U, us1, us0, B, bs0, bs1, zero, X, - xs0, xs1); + TeamVectorGemmInternal::invoke(member, matrix_rank, nrhs, m, one, U, us1, us0, B, bs0, bs1, + zero, X, xs0, xs1); member.team_barrier(); /// X = T^{-1} X - TeamVectorTrsmInternalLeftUpper::invoke( - member, false, matrix_rank, nrhs, one, T, ts0, ts1, X, xs0, xs1); + TeamVectorTrsmInternalLeftUpper::invoke(member, false, matrix_rank, nrhs, one, T, ts0, ts1, + X, xs0, xs1); member.team_barrier(); } /// X = P^T X - TeamVectorApplyPivotMatrixBackwardInternal ::invoke(member, nrhs, n, p, ps0, - X, xs0, xs1); + TeamVectorApplyPivotMatrixBackwardInternal ::invoke(member, nrhs, n, p, ps0, X, xs0, xs1); return 0; } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Tbsv_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Tbsv_Serial_Impl.hpp new file mode 100644 index 000000000000..853e453b893b --- /dev/null +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Tbsv_Serial_Impl.hpp @@ -0,0 +1,146 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSBATCHED_TBSV_SERIAL_IMPL_HPP_ +#define KOKKOSBATCHED_TBSV_SERIAL_IMPL_HPP_ + +/// \author Yuuichi Asahi (yuuichi.asahi@cea.fr) + +#include "KokkosBatched_Util.hpp" +#include "KokkosBatched_Tbsv_Serial_Internal.hpp" + +namespace KokkosBatched { + +template +KOKKOS_INLINE_FUNCTION static int checkTbsvInput([[maybe_unused]] const AViewType &A, + [[maybe_unused]] const XViewType &x, [[maybe_unused]] const int k) { + static_assert(Kokkos::is_view::value, "KokkosBatched::tbsv: AViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::tbsv: XViewType is not a Kokkos::View."); + static_assert(AViewType::rank == 2, "KokkosBatched::tbsv: AViewType must have rank 2."); + static_assert(XViewType::rank == 1, "KokkosBatched::tbsv: XViewType must have rank 1."); + +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + if (k < 0) { + Kokkos::printf( + "KokkosBatched::tbsv: input parameter k must not be less than 0: k = " + "%d\n", + k); + return 1; + } + + const int lda = A.extent(0), n = A.extent(1); + if (lda < (k + 1)) { + Kokkos::printf( + "KokkosBatched::tbsv: leading dimension of A must be smaller than k+1: " + "lda = %d, k = %d\n", + lda, k); + return 1; + } + + const int nx = x.extent(0); + if (nx != n) { + Kokkos::printf( + "KokkosBatched::tbsv: Dimensions of x and A do not match: X: %d, A: %d " + "x %d\n" + "x.extent(0) must be equal to A.extent(1)\n", + nx, lda, n); + return 1; + } +#endif + return 0; +} + +//// Lower non-transpose //// +template +struct SerialTbsv { + template + KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const XViewType &x, const int k) { + auto info = checkTbsvInput(A, x, k); + if (info) return info; + + return SerialTbsvInternalLower::invoke( + ArgDiag::use_unit_diag, A.extent(1), A.data(), A.stride_0(), A.stride_1(), x.data(), x.stride_0(), k); + } +}; + +//// Lower transpose //// +template +struct SerialTbsv { + template + KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const XViewType &x, const int k) { + auto info = checkTbsvInput(A, x, k); + if (info) return info; + + return SerialTbsvInternalLowerTranspose::invoke( + ArgDiag::use_unit_diag, false, A.extent(1), A.data(), A.stride_0(), A.stride_1(), x.data(), x.stride_0(), k); + } +}; + +//// Lower conjugate-transpose //// +template +struct SerialTbsv { + template + KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const XViewType &x, const int k) { + auto info = checkTbsvInput(A, x, k); + if (info) return info; + + return SerialTbsvInternalLowerTranspose::invoke( + ArgDiag::use_unit_diag, true, A.extent(1), A.data(), A.stride_0(), A.stride_1(), x.data(), x.stride_0(), k); + } +}; + +//// Upper non-transpose //// +template +struct SerialTbsv { + template + KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const XViewType &x, const int k) { + auto info = checkTbsvInput(A, x, k); + if (info) return info; + + return SerialTbsvInternalUpper::invoke( + ArgDiag::use_unit_diag, A.extent(1), A.data(), A.stride_0(), A.stride_1(), x.data(), x.stride_0(), k); + } +}; + +//// Upper transpose //// +template +struct SerialTbsv { + template + KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const XViewType &x, const int k) { + auto info = checkTbsvInput(A, x, k); + if (info) return info; + + return SerialTbsvInternalUpperTranspose::invoke( + ArgDiag::use_unit_diag, false, A.extent(1), A.data(), A.stride_0(), A.stride_1(), x.data(), x.stride_0(), k); + } +}; + +//// Upper conjugate-transpose //// +template +struct SerialTbsv { + template + KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const XViewType &x, const int k) { + auto info = checkTbsvInput(A, x, k); + if (info) return info; + + return SerialTbsvInternalUpperTranspose::invoke( + ArgDiag::use_unit_diag, true, A.extent(1), A.data(), A.stride_0(), A.stride_1(), x.data(), x.stride_0(), k); + } +}; + +} // namespace KokkosBatched + +#endif // KOKKOSBATCHED_TBSV_SERIAL_IMPL_HPP_ diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Tbsv_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Tbsv_Serial_Internal.hpp new file mode 100644 index 000000000000..64221008ccc4 --- /dev/null +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Tbsv_Serial_Internal.hpp @@ -0,0 +1,199 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSBATCHED_TBSV_SERIAL_INTERNAL_HPP_ +#define KOKKOSBATCHED_TBSV_SERIAL_INTERNAL_HPP_ + +/// \author Yuuichi Asahi (yuuichi.asahi@cea.fr) + +#include "KokkosBatched_Util.hpp" + +namespace KokkosBatched { + +/// +/// Serial Internal Impl +/// ==================== + +/// +/// Lower, Non-Transpose +/// + +template +struct SerialTbsvInternalLower { + template + KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, const int an, const ValueType *KOKKOS_RESTRICT A, + const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT x, const int xs0, const int k); +}; + +template <> +template +KOKKOS_INLINE_FUNCTION int SerialTbsvInternalLower::invoke( + const bool use_unit_diag, const int an, const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT x, const int xs0, const int k) { +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (int j = 0; j < an; ++j) { + if (x[j * xs0] != static_cast(0)) { + if (!use_unit_diag) x[j * xs0] = x[j * xs0] / A[0 + j * as1]; + + auto temp = x[j * xs0]; +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (int i = j + 1; i < Kokkos::min(an, j + k + 1); ++i) { + x[i * xs0] = x[i * xs0] - temp * A[(i - j) * as0 + j * as1]; + } + } + } + + return 0; +} + +/// +/// Lower, Transpose +/// + +template +struct SerialTbsvInternalLowerTranspose { + template + KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, const bool do_conj, const int an, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT x, const int xs0, const int k); +}; + +template <> +template +KOKKOS_INLINE_FUNCTION int SerialTbsvInternalLowerTranspose::invoke( + const bool use_unit_diag, const bool do_conj, const int an, const ValueType *KOKKOS_RESTRICT A, const int as0, + const int as1, + /**/ ValueType *KOKKOS_RESTRICT x, const int xs0, const int k) { +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (int j = an - 1; j >= 0; --j) { + auto temp = x[j * xs0]; + + if (do_conj) { +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (int i = Kokkos::min(an - 1, j + k); i > j; --i) { + temp -= Kokkos::ArithTraits::conj(A[(i - j) * as0 + j * as1]) * x[i * xs0]; + } + if (!use_unit_diag) temp = temp / Kokkos::ArithTraits::conj(A[0 + j * as1]); + } else { +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (int i = Kokkos::min(an - 1, j + k); i > j; --i) { + temp -= A[(i - j) * as0 + j * as1] * x[i * xs0]; + } + if (!use_unit_diag) temp = temp / A[0 + j * as1]; + } + x[j * xs0] = temp; + } + + return 0; +} + +/// +/// Upper, Non-Transpose +/// + +template +struct SerialTbsvInternalUpper { + template + KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, const int an, const ValueType *KOKKOS_RESTRICT A, + const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT x, const int xs0, const int k); +}; + +template <> +template +KOKKOS_INLINE_FUNCTION int SerialTbsvInternalUpper::invoke( + const bool use_unit_diag, const int an, const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT x, const int xs0, const int k) { +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (int j = an - 1; j >= 0; --j) { + if (x[j * xs0] != 0) { + if (!use_unit_diag) x[j * xs0] = x[j * xs0] / A[k * as0 + j * as1]; + + auto temp = x[j * xs0]; +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (int i = j - 1; i >= Kokkos::max(0, j - k); --i) { + x[i * xs0] = x[i * xs0] - temp * A[(k - j + i) * as0 + j * as1]; + } + } + } + + return 0; +} + +/// +/// Upper, Transpose +/// + +template +struct SerialTbsvInternalUpperTranspose { + template + KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, const bool do_conj, const int an, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT x, const int xs0, const int k); +}; + +template <> +template +KOKKOS_INLINE_FUNCTION int SerialTbsvInternalUpperTranspose::invoke( + const bool use_unit_diag, const bool do_conj, const int an, const ValueType *KOKKOS_RESTRICT A, const int as0, + const int as1, + /**/ ValueType *KOKKOS_RESTRICT x, const int xs0, const int k) { +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (int j = 0; j < an; j++) { + auto temp = x[j * xs0]; + if (do_conj) { +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (int i = Kokkos::max(0, j - k); i < j; ++i) { + temp -= Kokkos::ArithTraits::conj(A[(i + k - j) * as0 + j * as1]) * x[i * xs0]; + } + if (!use_unit_diag) temp = temp / Kokkos::ArithTraits::conj(A[k * as0 + j * as1]); + } else { +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (int i = Kokkos::max(0, j - k); i < j; ++i) { + temp -= A[(i + k - j) * as0 + j * as1] * x[i * xs0]; + } + if (!use_unit_diag) temp = temp / A[k * as0 + j * as1]; + } + x[j * xs0] = temp; + } + + return 0; +} + +} // namespace KokkosBatched + +#endif // KOKKOSBATCHED_TBSV_SERIAL_INTERNAL_HPP_ diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trmm_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trmm_Serial_Impl.hpp index 044af0814c05..6313d817c6a6 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trmm_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trmm_Serial_Impl.hpp @@ -23,164 +23,116 @@ namespace KokkosBatched { //// Lower non-transpose //// template -struct SerialTrmm { +struct SerialTrmm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { return SerialTrmmInternalLeftLower::invoke( - ArgDiag::use_unit_diag, false, A.extent(0), A.extent(1), B.extent(0), - B.extent(1), alpha, A.data(), A.stride_0(), A.stride_1(), B.data(), - B.stride_0(), B.stride_1()); + ArgDiag::use_unit_diag, false, A.extent(0), A.extent(1), B.extent(0), B.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), B.data(), B.stride_0(), B.stride_1()); } }; template -struct SerialTrmm { +struct SerialTrmm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { return SerialTrmmInternalRightLower::invoke( - ArgDiag::use_unit_diag, false, A.extent(0), A.extent(1), B.extent(0), - B.extent(1), alpha, A.data(), A.stride_0(), A.stride_1(), B.data(), - B.stride_0(), B.stride_1()); + ArgDiag::use_unit_diag, false, A.extent(0), A.extent(1), B.extent(0), B.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), B.data(), B.stride_0(), B.stride_1()); } }; //// Lower transpose ///// template -struct SerialTrmm { +struct SerialTrmm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { return SerialTrmmInternalLeftUpper::invoke( - ArgDiag::use_unit_diag, false, A.extent(1), A.extent(0), B.extent(0), - B.extent(1), alpha, A.data(), A.stride_1(), A.stride_0(), B.data(), - B.stride_0(), B.stride_1()); + ArgDiag::use_unit_diag, false, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), + A.stride_1(), A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } }; template -struct SerialTrmm { +struct SerialTrmm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { return SerialTrmmInternalRightUpper::invoke( - ArgDiag::use_unit_diag, false, A.extent(1), A.extent(0), B.extent(0), - B.extent(1), alpha, A.data(), A.stride_1(), A.stride_0(), B.data(), - B.stride_0(), B.stride_1()); + ArgDiag::use_unit_diag, false, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), + A.stride_1(), A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } }; //// Lower conjugate-transpose //// template -struct SerialTrmm { +struct SerialTrmm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { return SerialTrmmInternalLeftUpper::invoke( - ArgDiag::use_unit_diag, true, A.extent(1), A.extent(0), B.extent(0), - B.extent(1), alpha, A.data(), A.stride_1(), A.stride_0(), B.data(), - B.stride_0(), B.stride_1()); + ArgDiag::use_unit_diag, true, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), A.stride_1(), + A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } }; template -struct SerialTrmm { +struct SerialTrmm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { return SerialTrmmInternalRightUpper::invoke( - ArgDiag::use_unit_diag, true, A.extent(1), A.extent(0), B.extent(0), - B.extent(1), alpha, A.data(), A.stride_1(), A.stride_0(), B.data(), - B.stride_0(), B.stride_1()); + ArgDiag::use_unit_diag, true, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), A.stride_1(), + A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } }; //// Upper non-transpose //// template -struct SerialTrmm { +struct SerialTrmm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { return SerialTrmmInternalLeftUpper::invoke( - ArgDiag::use_unit_diag, false, A.extent(0), A.extent(1), B.extent(0), - B.extent(1), alpha, A.data(), A.stride_0(), A.stride_1(), B.data(), - B.stride_0(), B.stride_1()); + ArgDiag::use_unit_diag, false, A.extent(0), A.extent(1), B.extent(0), B.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), B.data(), B.stride_0(), B.stride_1()); } }; template -struct SerialTrmm { +struct SerialTrmm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { return SerialTrmmInternalRightUpper::invoke( - ArgDiag::use_unit_diag, false, A.extent(0), A.extent(1), B.extent(0), - B.extent(1), alpha, A.data(), A.stride_0(), A.stride_1(), B.data(), - B.stride_0(), B.stride_1()); + ArgDiag::use_unit_diag, false, A.extent(0), A.extent(1), B.extent(0), B.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), B.data(), B.stride_0(), B.stride_1()); } }; //// Upper transpose ///// template -struct SerialTrmm { +struct SerialTrmm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { return SerialTrmmInternalLeftLower::invoke( - ArgDiag::use_unit_diag, false, A.extent(1), A.extent(0), B.extent(0), - B.extent(1), alpha, A.data(), A.stride_1(), A.stride_0(), B.data(), - B.stride_0(), B.stride_1()); + ArgDiag::use_unit_diag, false, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), + A.stride_1(), A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } }; template -struct SerialTrmm { +struct SerialTrmm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { return SerialTrmmInternalRightLower::invoke( - ArgDiag::use_unit_diag, false, A.extent(1), A.extent(0), B.extent(0), - B.extent(1), alpha, A.data(), A.stride_1(), A.stride_0(), B.data(), - B.stride_0(), B.stride_1()); + ArgDiag::use_unit_diag, false, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), + A.stride_1(), A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } }; //// Upper conjugate-transpose //// template -struct SerialTrmm { +struct SerialTrmm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { return SerialTrmmInternalLeftLower::invoke( - ArgDiag::use_unit_diag, true, A.extent(1), A.extent(0), B.extent(0), - B.extent(1), alpha, A.data(), A.stride_1(), A.stride_0(), B.data(), - B.stride_0(), B.stride_1()); + ArgDiag::use_unit_diag, true, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), A.stride_1(), + A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } }; template -struct SerialTrmm { +struct SerialTrmm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { return SerialTrmmInternalRightLower::invoke( - ArgDiag::use_unit_diag, true, A.extent(1), A.extent(0), B.extent(0), - B.extent(1), alpha, A.data(), A.stride_1(), A.stride_0(), B.data(), - B.stride_0(), B.stride_1()); + ArgDiag::use_unit_diag, true, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), A.stride_1(), + A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trmm_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trmm_Serial_Internal.hpp index 3e4024974b06..c36d04213d69 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trmm_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trmm_Serial_Internal.hpp @@ -27,41 +27,37 @@ namespace KokkosBatched { template struct SerialTrmmInternalLeftLower { template - KOKKOS_INLINE_FUNCTION static int invoke( - const bool use_unit_diag, const bool do_conj, const int am, const int an, - const int bm, const int bn, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); + KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, const bool do_conj, const int am, const int an, + const int bm, const int bn, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); }; template struct SerialTrmmInternalLeftUpper { template - KOKKOS_INLINE_FUNCTION static int invoke( - const bool use_unit_diag, const bool do_conj, const int am, const int an, - const int bm, const int bn, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); + KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, const bool do_conj, const int am, const int an, + const int bm, const int bn, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); }; template struct SerialTrmmInternalRightLower { template - KOKKOS_INLINE_FUNCTION static int invoke( - const bool use_unit_diag, const bool do_conj, const int am, const int an, - const int bm, const int bn, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); + KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, const bool do_conj, const int am, const int an, + const int bm, const int bn, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); }; template struct SerialTrmmInternalRightUpper { template - KOKKOS_INLINE_FUNCTION static int invoke( - const bool use_unit_diag, const bool do_conj, const int am, const int an, - const int bm, const int bn, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); + KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, const bool do_conj, const int am, const int an, + const int bm, const int bn, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); }; // ech-note: use_unit_diag intentionally ignored for now. Compiler can optimize @@ -70,11 +66,9 @@ struct SerialTrmmInternalRightUpper { // if use_unit_diag. template <> template -KOKKOS_INLINE_FUNCTION int -SerialTrmmInternalLeftLower::invoke( - const bool /*use_unit_diag*/, const bool do_conj, const int am, - const int an, const int bm, const int bn, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, +KOKKOS_INLINE_FUNCTION int SerialTrmmInternalLeftLower::invoke( + const bool /*use_unit_diag*/, const bool do_conj, const int am, const int an, const int bm, const int bn, + const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { const ScalarType one(1.0), zero(0.0); typedef Kokkos::ArithTraits AT; @@ -87,27 +81,23 @@ SerialTrmmInternalLeftLower::invoke( //} // printf("SerialTrmmInternalLeftLower\n"); - auto dotLowerLeftConj = - [&](const ValueType *KOKKOS_RESTRICT __A, const int __as0, - const int __as1, const int __left_row, ValueType *KOKKOS_RESTRICT __B, - const int __bs0, const int __bs1, const int __right_col) { - auto B_elems = __left_row; - ScalarType sum = 0; + auto dotLowerLeftConj = [&](const ValueType *KOKKOS_RESTRICT __A, const int __as0, const int __as1, + const int __left_row, ValueType *KOKKOS_RESTRICT __B, const int __bs0, const int __bs1, + const int __right_col) { + auto B_elems = __left_row; + ScalarType sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif - for (int i = 0; i <= B_elems; i++) { - // sum += A[left_row, i] * B[i, right_col] - sum += AT::conj(__A[__left_row * __as0 + i * __as1]) * - __B[i * __bs0 + __bs1 * __right_col]; - } - return sum; - }; + for (int i = 0; i <= B_elems; i++) { + // sum += A[left_row, i] * B[i, right_col] + sum += AT::conj(__A[__left_row * __as0 + i * __as1]) * __B[i * __bs0 + __bs1 * __right_col]; + } + return sum; + }; - auto dotLowerLeft = [&](const ValueType *KOKKOS_RESTRICT __A, const int __as0, - const int __as1, const int __left_row, - ValueType *KOKKOS_RESTRICT __B, const int __bs0, - const int __bs1, const int __right_col) { + auto dotLowerLeft = [&](const ValueType *KOKKOS_RESTRICT __A, const int __as0, const int __as1, const int __left_row, + ValueType *KOKKOS_RESTRICT __B, const int __bs0, const int __bs1, const int __right_col) { auto B_elems = __left_row; ScalarType sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) @@ -115,8 +105,7 @@ SerialTrmmInternalLeftLower::invoke( #endif for (int i = 0; i <= B_elems; i++) { // sum += A[left_row, i] * B[i, right_col] - sum += __A[__left_row * __as0 + i * __as1] * - __B[i * __bs0 + __bs1 * __right_col]; + sum += __A[__left_row * __as0 + i * __as1] * __B[i * __bs0 + __bs1 * __right_col]; } return sum; }; @@ -126,8 +115,7 @@ SerialTrmmInternalLeftLower::invoke( if (alpha == zero) KokkosBlas::Impl::SerialSetInternal::invoke(bm, bn, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::SerialScaleInternal::invoke(bm, bn, alpha, B, bs0, bs1); + if (alpha != one) KokkosBlas::Impl::SerialScaleInternal::invoke(bm, bn, alpha, B, bs0, bs1); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -138,8 +126,7 @@ SerialTrmmInternalLeftLower::invoke( #endif for (int n = 0; n < right_n; n++) { if (do_conj) { - B[m * bs0 + n * bs1] = - dotLowerLeftConj(A, as0, as1, m, B, bs0, bs1, n); + B[m * bs0 + n * bs1] = dotLowerLeftConj(A, as0, as1, m, B, bs0, bs1, n); } else { B[m * bs0 + n * bs1] = dotLowerLeft(A, as0, as1, m, B, bs0, bs1, n); } @@ -155,11 +142,9 @@ SerialTrmmInternalLeftLower::invoke( // if use_unit_diag. template <> template -KOKKOS_INLINE_FUNCTION int -SerialTrmmInternalRightLower::invoke( - const bool /*use_unit_diag*/, const bool do_conj, const int am, - const int an, const int bm, const int bn, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, +KOKKOS_INLINE_FUNCTION int SerialTrmmInternalRightLower::invoke( + const bool /*use_unit_diag*/, const bool do_conj, const int am, const int an, const int bm, const int bn, + const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { const ScalarType one(1.0), zero(0.0); typedef Kokkos::ArithTraits AT; @@ -174,11 +159,9 @@ SerialTrmmInternalRightLower::invoke( // Lower triangular matrix is on RHS with the base facing down. // Everytime we compute a new output row of B, we must shift over to the // right by one in A's column to ensure we skip the 0's. - auto dotLowerRightConj = [&](const ValueType *KOKKOS_RESTRICT __A, - const int __as0, const int __as1, const int __am, - const int __left_row, - ValueType *KOKKOS_RESTRICT __B, const int __bs0, - const int __bs1, const int __right_col) { + auto dotLowerRightConj = [&](const ValueType *KOKKOS_RESTRICT __A, const int __as0, const int __as1, const int __am, + const int __left_row, ValueType *KOKKOS_RESTRICT __B, const int __bs0, const int __bs1, + const int __right_col) { auto B_elems = __am - 1; ScalarType sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) @@ -186,16 +169,13 @@ SerialTrmmInternalRightLower::invoke( #endif for (int i = __right_col; i <= B_elems; i++) { // sum += B[left_row, i] * A[i, right_col] - sum += __B[__bs0 * __left_row + i * __bs1] * - AT::conj(__A[i * __as0 + __right_col * __as1]); + sum += __B[__bs0 * __left_row + i * __bs1] * AT::conj(__A[i * __as0 + __right_col * __as1]); } return sum; }; - auto dotLowerRight = [&](const ValueType *KOKKOS_RESTRICT __A, - const int __as0, const int __as1, const int __am, - const int __left_row, ValueType *KOKKOS_RESTRICT __B, - const int __bs0, const int __bs1, + auto dotLowerRight = [&](const ValueType *KOKKOS_RESTRICT __A, const int __as0, const int __as1, const int __am, + const int __left_row, ValueType *KOKKOS_RESTRICT __B, const int __bs0, const int __bs1, const int __right_col) { auto B_elems = __am - 1; ScalarType sum = 0; @@ -204,8 +184,7 @@ SerialTrmmInternalRightLower::invoke( #endif for (int i = __right_col; i <= B_elems; i++) { // sum += B[left_row, i] * A[i, right_col] - sum += __B[__bs0 * __left_row + i * __bs1] * - __A[i * __as0 + __right_col * __as1]; + sum += __B[__bs0 * __left_row + i * __bs1] * __A[i * __as0 + __right_col * __as1]; } return sum; }; @@ -215,8 +194,7 @@ SerialTrmmInternalRightLower::invoke( if (alpha == zero) KokkosBlas::Impl::SerialSetInternal::invoke(bm, bn, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::SerialScaleInternal::invoke(bm, bn, alpha, B, bs0, bs1); + if (alpha != one) KokkosBlas::Impl::SerialScaleInternal::invoke(bm, bn, alpha, B, bs0, bs1); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -227,11 +205,9 @@ SerialTrmmInternalRightLower::invoke( #endif for (int n = 0; n < right_n; n++) { if (do_conj) { - B[m * bs0 + n * bs1] = - dotLowerRightConj(A, as0, as1, am, m, B, bs0, bs1, n); + B[m * bs0 + n * bs1] = dotLowerRightConj(A, as0, as1, am, m, B, bs0, bs1, n); } else { - B[m * bs0 + n * bs1] = - dotLowerRight(A, as0, as1, am, m, B, bs0, bs1, n); + B[m * bs0 + n * bs1] = dotLowerRight(A, as0, as1, am, m, B, bs0, bs1, n); } } } @@ -241,11 +217,9 @@ SerialTrmmInternalRightLower::invoke( template <> template -KOKKOS_INLINE_FUNCTION int -SerialTrmmInternalLeftUpper::invoke( - const bool /*use_unit_diag*/, const bool do_conj, const int am, - const int an, const int bm, const int bn, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, +KOKKOS_INLINE_FUNCTION int SerialTrmmInternalLeftUpper::invoke( + const bool /*use_unit_diag*/, const bool do_conj, const int am, const int an, const int bm, const int bn, + const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { const ScalarType one(1.0), zero(0.0); typedef Kokkos::ArithTraits AT; @@ -257,11 +231,9 @@ SerialTrmmInternalLeftUpper::invoke( // conjOp = AT::conj; //} - auto dotUpperLeftConj = [&](const ValueType *KOKKOS_RESTRICT __A, - const int __as0, const int __as1, const int __an, - const int __left_row, - ValueType *KOKKOS_RESTRICT __B, const int __bs0, - const int __bs1, const int __right_col) { + auto dotUpperLeftConj = [&](const ValueType *KOKKOS_RESTRICT __A, const int __as0, const int __as1, const int __an, + const int __left_row, ValueType *KOKKOS_RESTRICT __B, const int __bs0, const int __bs1, + const int __right_col) { auto B_elems = __an - __left_row - 1; ScalarType sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) @@ -275,10 +247,9 @@ SerialTrmmInternalLeftUpper::invoke( return sum; }; - auto dotUpperLeft = [&](const ValueType *KOKKOS_RESTRICT __A, const int __as0, - const int __as1, const int __an, const int __left_row, - ValueType *KOKKOS_RESTRICT __B, const int __bs0, - const int __bs1, const int __right_col) { + auto dotUpperLeft = [&](const ValueType *KOKKOS_RESTRICT __A, const int __as0, const int __as1, const int __an, + const int __left_row, ValueType *KOKKOS_RESTRICT __B, const int __bs0, const int __bs1, + const int __right_col) { auto B_elems = __an - __left_row - 1; ScalarType sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) @@ -286,8 +257,7 @@ SerialTrmmInternalLeftUpper::invoke( #endif for (int i = 0; i <= B_elems; i++) { // sum += A[left_row, i+left_row] * B[i+left_row, right_col] - sum += __A[__left_row * __as0 + (i + __left_row) * __as1] * - __B[(i + __left_row) * __bs0 + __bs1 * __right_col]; + sum += __A[__left_row * __as0 + (i + __left_row) * __as1] * __B[(i + __left_row) * __bs0 + __bs1 * __right_col]; } return sum; }; @@ -297,8 +267,7 @@ SerialTrmmInternalLeftUpper::invoke( if (alpha == zero) KokkosBlas::Impl::SerialSetInternal::invoke(bm, bn, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::SerialScaleInternal::invoke(bm, bn, alpha, B, bs0, bs1); + if (alpha != one) KokkosBlas::Impl::SerialScaleInternal::invoke(bm, bn, alpha, B, bs0, bs1); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -309,11 +278,9 @@ SerialTrmmInternalLeftUpper::invoke( #endif for (int n = 0; n < right_n; ++n) { if (do_conj) { - B[m * bs0 + n * bs1] = - dotUpperLeftConj(A, as0, as1, an, m, B, bs0, bs1, n); + B[m * bs0 + n * bs1] = dotUpperLeftConj(A, as0, as1, an, m, B, bs0, bs1, n); } else { - B[m * bs0 + n * bs1] = - dotUpperLeft(A, as0, as1, an, m, B, bs0, bs1, n); + B[m * bs0 + n * bs1] = dotUpperLeft(A, as0, as1, an, m, B, bs0, bs1, n); } } } @@ -323,11 +290,9 @@ SerialTrmmInternalLeftUpper::invoke( template <> template -KOKKOS_INLINE_FUNCTION int -SerialTrmmInternalRightUpper::invoke( - const bool /*use_unit_diag*/, const bool do_conj, const int am, - const int an, const int bm, const int bn, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, +KOKKOS_INLINE_FUNCTION int SerialTrmmInternalRightUpper::invoke( + const bool /*use_unit_diag*/, const bool do_conj, const int am, const int an, const int bm, const int bn, + const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { const ScalarType one(1.0), zero(0.0); typedef Kokkos::ArithTraits AT; @@ -339,47 +304,41 @@ SerialTrmmInternalRightUpper::invoke( // conjOp = AT::conj; //} - auto dotUpperRightConj = - [&](const ValueType *KOKKOS_RESTRICT __A, const int __as0, - const int __as1, const int __left_row, ValueType *KOKKOS_RESTRICT __B, - const int __bs0, const int __bs1, const int __right_col) { - auto B_elems = __right_col; - ScalarType sum = 0; + auto dotUpperRightConj = [&](const ValueType *KOKKOS_RESTRICT __A, const int __as0, const int __as1, + const int __left_row, ValueType *KOKKOS_RESTRICT __B, const int __bs0, const int __bs1, + const int __right_col) { + auto B_elems = __right_col; + ScalarType sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif - for (int i = 0; i <= B_elems; i++) { - // sum += B[left_row, i] * A[i, right_col] - sum += __B[__left_row * __bs0 + i * __bs1] * - AT::conj(__A[i * __as0 + __right_col * __as1]); - } - return sum; - }; - - auto dotUpperRight = - [&](const ValueType *KOKKOS_RESTRICT __A, const int __as0, - const int __as1, const int __left_row, ValueType *KOKKOS_RESTRICT __B, - const int __bs0, const int __bs1, const int __right_col) { - auto B_elems = __right_col; - ScalarType sum = 0; + for (int i = 0; i <= B_elems; i++) { + // sum += B[left_row, i] * A[i, right_col] + sum += __B[__left_row * __bs0 + i * __bs1] * AT::conj(__A[i * __as0 + __right_col * __as1]); + } + return sum; + }; + + auto dotUpperRight = [&](const ValueType *KOKKOS_RESTRICT __A, const int __as0, const int __as1, const int __left_row, + ValueType *KOKKOS_RESTRICT __B, const int __bs0, const int __bs1, const int __right_col) { + auto B_elems = __right_col; + ScalarType sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif - for (int i = 0; i <= B_elems; i++) { - // sum += B[left_row, i] * A[i, right_col] - sum += __B[__left_row * __bs0 + i * __bs1] * - __A[i * __as0 + __right_col * __as1]; - } - return sum; - }; + for (int i = 0; i <= B_elems; i++) { + // sum += B[left_row, i] * A[i, right_col] + sum += __B[__left_row * __bs0 + i * __bs1] * __A[i * __as0 + __right_col * __as1]; + } + return sum; + }; if (bm <= 0 || bn <= 0 || am <= 0 || an <= 0) return 0; if (alpha == zero) KokkosBlas::Impl::SerialSetInternal::invoke(bm, bn, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::SerialScaleInternal::invoke(bm, bn, alpha, B, bs0, bs1); + if (alpha != one) KokkosBlas::Impl::SerialScaleInternal::invoke(bm, bn, alpha, B, bs0, bs1); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -390,8 +349,7 @@ SerialTrmmInternalRightUpper::invoke( #endif for (int n = right_n - 1; n >= 0; --n) { if (do_conj) { - B[m * bs0 + n * bs1] = - dotUpperRightConj(A, as0, as1, m, B, bs0, bs1, n); + B[m * bs0 + n * bs1] = dotUpperRightConj(A, as0, as1, m, B, bs0, bs1, n); } else { B[m * bs0 + n * bs1] = dotUpperRight(A, as0, as1, m, B, bs0, bs1, n); } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Serial_Impl.hpp index 4d094c24d254..694ac36fa0d2 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Serial_Impl.hpp @@ -29,43 +29,32 @@ namespace KokkosBatched { /// B := inv(tril(A)) (alpha*B) /// A(m x m), B(m x n) -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ +#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__) template -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { typedef typename BViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = B.extent(0), n = B.extent(1); - static_assert(is_vector::value, - "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + static_assert(is_vector::value, "value type is not vector type"); + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; // no error check int r_val = 0; if (A.stride_0() == 1 && B.stride_0() == 1) { mkl_dtrsm_compact(MKL_COL_MAJOR, MKL_LEFT, MKL_LOWER, MKL_NOTRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_1(), - (double *)B.data(), B.stride_1(), format, - (MKL_INT)vector_type::vector_length); + ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, alpha, (const double *)A.data(), + A.stride_1(), (double *)B.data(), B.stride_1(), format, (MKL_INT)vector_type::vector_length); } else if (A.stride_1() == 1 && B.stride_1() == 1) { mkl_dtrsm_compact(MKL_ROW_MAJOR, MKL_LEFT, MKL_LOWER, MKL_NOTRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_0(), - (double *)B.data(), B.stride_0(), format, - (MKL_INT)vector_type::vector_length); + ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, alpha, (const double *)A.data(), + A.stride_0(), (double *)B.data(), B.stride_0(), format, (MKL_INT)vector_type::vector_length); } else { r_val = -1; } @@ -75,28 +64,22 @@ struct SerialTrsm -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { - return SerialTrsmInternalLeftLower::invoke( - ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride_0(), A.stride_1(), B.data(), B.stride_0(), B.stride_1()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { + return SerialTrsmInternalLeftLower::invoke(ArgDiag::use_unit_diag, B.extent(0), B.extent(1), + alpha, A.data(), A.stride_0(), A.stride_1(), + B.data(), B.stride_0(), B.stride_1()); } }; template -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { - return SerialTrsmInternalLeftLower::invoke( - ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride_0(), A.stride_1(), B.data(), B.stride_0(), B.stride_1()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { + return SerialTrsmInternalLeftLower::invoke(ArgDiag::use_unit_diag, B.extent(0), B.extent(1), + alpha, A.data(), A.stride_0(), A.stride_1(), + B.data(), B.stride_0(), B.stride_1()); } }; @@ -105,43 +88,32 @@ struct SerialTrsm -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { typedef typename BViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = B.extent(0), n = B.extent(1); - static_assert(is_vector::value, - "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + static_assert(is_vector::value, "value type is not vector type"); + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; // no error check int r_val = 0; if (A.stride_0() == 1 && B.stride_0() == 1) { mkl_dtrsm_compact(MKL_COL_MAJOR, MKL_RIGHT, MKL_UPPER, MKL_NOTRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_1(), - (double *)B.data(), B.stride_1(), format, - (MKL_INT)vector_type::vector_length); + ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, alpha, (const double *)A.data(), + A.stride_1(), (double *)B.data(), B.stride_1(), format, (MKL_INT)vector_type::vector_length); } else if (A.stride_1() == 1 && B.stride_1() == 1) { mkl_dtrsm_compact(MKL_ROW_MAJOR, MKL_RIGHT, MKL_UPPER, MKL_NOTRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_0(), - (double *)B.data(), B.stride_0(), format, - (MKL_INT)vector_type::vector_length); + ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, alpha, (const double *)A.data(), + A.stride_0(), (double *)B.data(), B.stride_0(), format, (MKL_INT)vector_type::vector_length); } else { r_val = -1; } @@ -151,54 +123,42 @@ struct SerialTrsm -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { - return SerialTrsmInternalLeftLower::invoke( - ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride_1(), A.stride_0(), B.data(), B.stride_1(), B.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { + return SerialTrsmInternalLeftLower::invoke(ArgDiag::use_unit_diag, B.extent(1), B.extent(0), + alpha, A.data(), A.stride_1(), A.stride_0(), + B.data(), B.stride_1(), B.stride_0()); } }; template -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { - return SerialTrsmInternalLeftLower::invoke( - ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride_1(), A.stride_0(), B.data(), B.stride_1(), B.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { + return SerialTrsmInternalLeftLower::invoke(ArgDiag::use_unit_diag, B.extent(1), B.extent(0), + alpha, A.data(), A.stride_1(), A.stride_0(), + B.data(), B.stride_1(), B.stride_0()); } }; template -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { - return SerialTrsmInternalLeftLower::invoke( - ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride_0(), A.stride_1(), B.data(), B.stride_1(), B.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { + return SerialTrsmInternalLeftLower::invoke(ArgDiag::use_unit_diag, B.extent(1), B.extent(0), + alpha, A.data(), A.stride_0(), A.stride_1(), + B.data(), B.stride_1(), B.stride_0()); } }; template -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { - return SerialTrsmInternalLeftLower::invoke( - ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride_0(), A.stride_1(), B.data(), B.stride_1(), B.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { + return SerialTrsmInternalLeftLower::invoke(ArgDiag::use_unit_diag, B.extent(1), B.extent(0), + alpha, A.data(), A.stride_0(), A.stride_1(), + B.data(), B.stride_1(), B.stride_0()); } }; @@ -207,43 +167,32 @@ struct SerialTrsm -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { typedef typename BViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = B.extent(0), n = B.extent(1); - static_assert(is_vector::value, - "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + static_assert(is_vector::value, "value type is not vector type"); + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; // no error check int r_val = 0; if (A.stride_0() == 1 && B.stride_0() == 1) { mkl_dtrsm_compact(MKL_COL_MAJOR, MKL_LEFT, MKL_UPPER, MKL_NOTRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_1(), - (double *)B.data(), B.stride_1(), format, - (MKL_INT)vector_type::vector_length); + ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, alpha, (const double *)A.data(), + A.stride_1(), (double *)B.data(), B.stride_1(), format, (MKL_INT)vector_type::vector_length); } else if (A.stride_1() == 1 && B.stride_1() == 1) { mkl_dtrsm_compact(MKL_ROW_MAJOR, MKL_LEFT, MKL_UPPER, MKL_NOTRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_0(), - (double *)B.data(), B.stride_0(), format, - (MKL_INT)vector_type::vector_length); + ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, alpha, (const double *)A.data(), + A.stride_0(), (double *)B.data(), B.stride_0(), format, (MKL_INT)vector_type::vector_length); } else { r_val = -1; } @@ -253,28 +202,22 @@ struct SerialTrsm -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { - return SerialTrsmInternalLeftUpper::invoke( - ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride_0(), A.stride_1(), B.data(), B.stride_0(), B.stride_1()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { + return SerialTrsmInternalLeftUpper::invoke(ArgDiag::use_unit_diag, B.extent(0), B.extent(1), + alpha, A.data(), A.stride_0(), A.stride_1(), + B.data(), B.stride_0(), B.stride_1()); } }; template -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { - return SerialTrsmInternalLeftUpper::invoke( - ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride_0(), A.stride_1(), B.data(), B.stride_0(), B.stride_1()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { + return SerialTrsmInternalLeftUpper::invoke(ArgDiag::use_unit_diag, B.extent(0), B.extent(1), + alpha, A.data(), A.stride_0(), A.stride_1(), + B.data(), B.stride_0(), B.stride_1()); } }; @@ -284,42 +227,31 @@ struct SerialTrsm -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { typedef typename BViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = B.extent(0), n = B.extent(1); - static_assert(is_vector::value, - "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + static_assert(is_vector::value, "value type is not vector type"); + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; // no error check int r_val = 0; if (A.stride_0() == 1 && B.stride_0() == 1) { - mkl_dtrsm_compact(MKL_COL_MAJOR, MKL_LEFT, MKL_LOWER, MKL_TRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_1(), - (double *)B.data(), B.stride_1(), format, + mkl_dtrsm_compact(MKL_COL_MAJOR, MKL_LEFT, MKL_LOWER, MKL_TRANS, ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, + m, n, alpha, (const double *)A.data(), A.stride_1(), (double *)B.data(), B.stride_1(), format, (MKL_INT)vector_type::vector_length); } else if (A.stride_1() == 1 && B.stride_1() == 1) { - mkl_dtrsm_compact(MKL_ROW_MAJOR, MKL_LEFT, MKL_LOWER, MKL_TRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_0(), - (double *)B.data(), B.stride_0(), format, + mkl_dtrsm_compact(MKL_ROW_MAJOR, MKL_LEFT, MKL_LOWER, MKL_TRANS, ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, + m, n, alpha, (const double *)A.data(), A.stride_0(), (double *)B.data(), B.stride_0(), format, (MKL_INT)vector_type::vector_length); } else { r_val = -1; @@ -330,28 +262,22 @@ struct SerialTrsm -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { - return SerialTrsmInternalLeftUpper::invoke( - ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride_1(), A.stride_0(), B.data(), B.stride_0(), B.stride_1()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { + return SerialTrsmInternalLeftUpper::invoke(ArgDiag::use_unit_diag, B.extent(0), B.extent(1), + alpha, A.data(), A.stride_1(), A.stride_0(), + B.data(), B.stride_0(), B.stride_1()); } }; template -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { - return SerialTrsmInternalLeftUpper::invoke( - ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride_1(), A.stride_0(), B.data(), B.stride_0(), B.stride_1()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { + return SerialTrsmInternalLeftUpper::invoke(ArgDiag::use_unit_diag, B.extent(0), B.extent(1), + alpha, A.data(), A.stride_1(), A.stride_0(), + B.data(), B.stride_0(), B.stride_1()); } }; /// @@ -359,42 +285,31 @@ struct SerialTrsm -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { typedef typename BViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = B.extent(0), n = B.extent(1); - static_assert(is_vector::value, - "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + static_assert(is_vector::value, "value type is not vector type"); + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; // no error check int r_val = 0; if (A.stride_0() == 1 && B.stride_0() == 1) { - mkl_dtrsm_compact(MKL_COL_MAJOR, MKL_LEFT, MKL_UPPER, MKL_TRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_1(), - (double *)B.data(), B.stride_1(), format, + mkl_dtrsm_compact(MKL_COL_MAJOR, MKL_LEFT, MKL_UPPER, MKL_TRANS, ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, + m, n, alpha, (const double *)A.data(), A.stride_1(), (double *)B.data(), B.stride_1(), format, (MKL_INT)vector_type::vector_length); } else if (A.stride_1() == 1 && B.stride_1() == 1) { - mkl_dtrsm_compact(MKL_ROW_MAJOR, MKL_LEFT, MKL_UPPER, MKL_TRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_0(), - (double *)B.data(), B.stride_0(), format, + mkl_dtrsm_compact(MKL_ROW_MAJOR, MKL_LEFT, MKL_UPPER, MKL_TRANS, ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, + m, n, alpha, (const double *)A.data(), A.stride_0(), (double *)B.data(), B.stride_0(), format, (MKL_INT)vector_type::vector_length); } else { r_val = -1; @@ -405,28 +320,22 @@ struct SerialTrsm -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { - return SerialTrsmInternalLeftLower::invoke( - ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride_1(), A.stride_0(), B.data(), B.stride_0(), B.stride_1()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { + return SerialTrsmInternalLeftLower::invoke(ArgDiag::use_unit_diag, B.extent(0), B.extent(1), + alpha, A.data(), A.stride_1(), A.stride_0(), + B.data(), B.stride_0(), B.stride_1()); } }; template -struct SerialTrsm { +struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B) { - return SerialTrsmInternalLeftLower::invoke( - ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride_1(), A.stride_0(), B.data(), B.stride_0(), B.stride_1()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B) { + return SerialTrsmInternalLeftLower::invoke(ArgDiag::use_unit_diag, B.extent(0), B.extent(1), + alpha, A.data(), A.stride_1(), A.stride_0(), + B.data(), B.stride_0(), B.stride_1()); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Serial_Internal.hpp index a44943e5d6bd..0e65d269f0c4 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Serial_Internal.hpp @@ -34,40 +34,31 @@ namespace KokkosBatched { template struct SerialTrsmInternalLeftLower { template - KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, - const int m, const int n, - const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT B, - const int bs0, const int bs1); + KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, const int m, const int n, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); }; template <> template -KOKKOS_INLINE_FUNCTION int -SerialTrsmInternalLeftLower::invoke( - const bool use_unit_diag, const int m, const int n, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, +KOKKOS_INLINE_FUNCTION int SerialTrsmInternalLeftLower::invoke( + const bool use_unit_diag, const int m, const int n, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, + const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { const ScalarType one(1.0), zero(0.0); if (alpha == zero) KokkosBlas::Impl::SerialSetInternal::invoke(m, n, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::SerialScaleInternal::invoke(m, n, alpha, B, bs0, bs1); + if (alpha != one) KokkosBlas::Impl::SerialScaleInternal::invoke(m, n, alpha, B, bs0, bs1); if (m <= 0 || n <= 0) return 0; for (int p = 0; p < m; ++p) { const int iend = m - p - 1, jend = n; - const ValueType *KOKKOS_RESTRICT a21 = - iend ? A + (p + 1) * as0 + p * as1 : NULL; + const ValueType *KOKKOS_RESTRICT a21 = iend ? A + (p + 1) * as0 + p * as1 : NULL; - ValueType *KOKKOS_RESTRICT b1t = B + p * bs0, - *KOKKOS_RESTRICT B2 = - iend ? B + (p + 1) * bs0 : NULL; + ValueType *KOKKOS_RESTRICT b1t = B + p * bs0, *KOKKOS_RESTRICT B2 = iend ? B + (p + 1) * bs0 : NULL; if (!use_unit_diag) { const ValueType alpha11 = A[p * as0 + p * as1]; @@ -83,8 +74,7 @@ SerialTrsmInternalLeftLower::invoke( #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif - for (int j = 0; j < jend; ++j) - B2[i * bs0 + j * bs1] -= a21[i * as0] * b1t[j * bs1]; + for (int j = 0; j < jend; ++j) B2[i * bs0 + j * bs1] -= a21[i * as0] * b1t[j * bs1]; } } return 0; @@ -92,10 +82,9 @@ SerialTrsmInternalLeftLower::invoke( template <> template -KOKKOS_INLINE_FUNCTION int -SerialTrsmInternalLeftLower::invoke( - const bool use_unit_diag, const int m, const int n, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, +KOKKOS_INLINE_FUNCTION int SerialTrsmInternalLeftLower::invoke( + const bool use_unit_diag, const int m, const int n, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, + const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { constexpr int mbAlgo = Algo::Trsm::Blocked::mb(); @@ -104,16 +93,14 @@ SerialTrsmInternalLeftLower::invoke( if (alpha == zero) KokkosBlas::Impl::SerialSetInternal::invoke(m, n, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::SerialScaleInternal::invoke(m, n, alpha, B, bs0, bs1); + if (alpha != one) KokkosBlas::Impl::SerialScaleInternal::invoke(m, n, alpha, B, bs0, bs1); if (m <= 0 || n <= 0) return 0; InnerTrsmLeftLowerUnitDiag trsm_u(as0, as1, bs0, bs1); InnerTrsmLeftLowerNonUnitDiag trsm_n(as0, as1, bs0, bs1); InnerGemmFixA gemm(as0, as1, bs0, bs1, bs0, bs1); - auto trsm = [&](const int ib, const int jb, - const ValueType *KOKKOS_RESTRICT AA, + auto trsm = [&](const int ib, const int jb, const ValueType *KOKKOS_RESTRICT AA, /**/ ValueType *KOKKOS_RESTRICT BB) { const int mb = mbAlgo; for (int p = 0; p < ib; p += mb) { @@ -121,7 +108,7 @@ SerialTrsmInternalLeftLower::invoke( // trsm update const ValueType *KOKKOS_RESTRICT Ap = AA + p * as0 + p * as1; - /**/ ValueType *KOKKOS_RESTRICT Bp = BB + p * bs0; + /**/ ValueType *KOKKOS_RESTRICT Bp = BB + p * bs0; if (use_unit_diag) trsm_u.serial_invoke(Ap, pb, jb, Bp); @@ -131,8 +118,7 @@ SerialTrsmInternalLeftLower::invoke( // gemm update for (int i = p + mb; i < ib; i += mb) { const int mm = (i + mb) > ib ? (ib - i) : mb; - gemm.serial_invoke(minus_one, AA + i * as0 + p * as1, BB + p * bs0, - mm, jb, pb, BB + i * bs0); + gemm.serial_invoke(minus_one, AA + i * as0 + p * as1, BB + p * bs0, mm, jb, pb, BB + i * bs0); } } }; @@ -151,29 +137,23 @@ SerialTrsmInternalLeftLower::invoke( template struct SerialTrsmInternalLeftUpper { template - KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, - const int m, const int n, - const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT B, - const int bs0, const int bs1); + KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, const int m, const int n, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); }; template <> template -KOKKOS_INLINE_FUNCTION int -SerialTrsmInternalLeftUpper::invoke( - const bool use_unit_diag, const int m, const int n, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, +KOKKOS_INLINE_FUNCTION int SerialTrsmInternalLeftUpper::invoke( + const bool use_unit_diag, const int m, const int n, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, + const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { const ScalarType one(1.0), zero(0.0); if (alpha == zero) KokkosBlas::Impl::SerialSetInternal::invoke(m, n, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::SerialScaleInternal::invoke(m, n, alpha, B, bs0, bs1); + if (alpha != one) KokkosBlas::Impl::SerialScaleInternal::invoke(m, n, alpha, B, bs0, bs1); if (m <= 0 || n <= 0) return 0; ValueType *KOKKOS_RESTRICT B0 = B; @@ -199,8 +179,7 @@ SerialTrsmInternalLeftUpper::invoke( #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif - for (int j = 0; j < jend; ++j) - B0[i * bs0 + j * bs1] -= a01[i * as0] * b1t[j * bs1]; + for (int j = 0; j < jend; ++j) B0[i * bs0 + j * bs1] -= a01[i * as0] * b1t[j * bs1]; } } } @@ -209,10 +188,9 @@ SerialTrsmInternalLeftUpper::invoke( template <> template -KOKKOS_INLINE_FUNCTION int -SerialTrsmInternalLeftUpper::invoke( - const bool use_unit_diag, const int m, const int n, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, +KOKKOS_INLINE_FUNCTION int SerialTrsmInternalLeftUpper::invoke( + const bool use_unit_diag, const int m, const int n, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, + const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { const ScalarType one(1.0), zero(0.0), minus_one(-1.0); @@ -221,8 +199,7 @@ SerialTrsmInternalLeftUpper::invoke( if (alpha == zero) KokkosBlas::Impl::SerialSetInternal::invoke(m, n, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::SerialScaleInternal::invoke(m, n, alpha, B, bs0, bs1); + if (alpha != one) KokkosBlas::Impl::SerialScaleInternal::invoke(m, n, alpha, B, bs0, bs1); if (m <= 0 || n <= 0) return 0; InnerTrsmLeftUpperUnitDiag trsm_u(as0, as1, bs0, bs1); @@ -230,17 +207,15 @@ SerialTrsmInternalLeftUpper::invoke( InnerGemmFixA gemm(as0, as1, bs0, bs1, bs0, bs1); - auto trsm = [&](const int ib, const int jb, - const ValueType *KOKKOS_RESTRICT AA, + auto trsm = [&](const int ib, const int jb, const ValueType *KOKKOS_RESTRICT AA, /**/ ValueType *KOKKOS_RESTRICT BB) { const int mb = mbAlgo; for (int pp = 0; pp < ib; pp += mb) { - const int ptmp = ib - pp - mb, p = ptmp < 0 ? 0 : ptmp, - pb = mb + (ptmp < 0) * ptmp; + const int ptmp = ib - pp - mb, p = ptmp < 0 ? 0 : ptmp, pb = mb + (ptmp < 0) * ptmp; // trsm update const ValueType *KOKKOS_RESTRICT Ap = AA + p * as0 + p * as1; - /**/ ValueType *KOKKOS_RESTRICT Bp = BB + p * bs0; + /**/ ValueType *KOKKOS_RESTRICT Bp = BB + p * bs0; if (use_unit_diag) trsm_u.serial_invoke(Ap, pb, jb, Bp); @@ -249,8 +224,7 @@ SerialTrsmInternalLeftUpper::invoke( // gemm update for (int i = 0; i < p; i += mb) { - gemm.serial_invoke(minus_one, AA + i * as0 + p * as1, Bp, - (i + mb) > p ? (p - i) : mb, jb, pb, BB + i * bs0); + gemm.serial_invoke(minus_one, AA + i * as0 + p * as1, Bp, (i + mb) > p ? (p - i) : mb, jb, pb, BB + i * bs0); } } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_TeamVector_Impl.hpp index dbaba7fc6c74..145f8e0c2df5 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_TeamVector_Impl.hpp @@ -34,17 +34,13 @@ namespace KokkosBatched { /// A(m x m), B(m x n) template -struct TeamVectorTrsm { +struct TeamVectorTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { return TeamVectorTrsmInternalLeftLower::invoke( - member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, - A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_0(), - B.stride_1()); + member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), A.stride_0(), A.stride_1(), B.data(), + B.stride_0(), B.stride_1()); } }; @@ -55,17 +51,13 @@ struct TeamVectorTrsm -struct TeamVectorTrsm { +struct TeamVectorTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { return TeamVectorTrsmInternalLeftLower::invoke( - member, ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha, - A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_1(), - B.stride_0()); + member, ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), A.stride_1(), A.stride_0(), B.data(), + B.stride_1(), B.stride_0()); } }; @@ -76,17 +68,13 @@ struct TeamVectorTrsm -struct TeamVectorTrsm { +struct TeamVectorTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { return TeamVectorTrsmInternalLeftUpper::invoke( - member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, - A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_0(), - B.stride_1()); + member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), A.stride_0(), A.stride_1(), B.data(), + B.stride_0(), B.stride_1()); } }; @@ -97,17 +85,13 @@ struct TeamVectorTrsm -struct TeamVectorTrsm { +struct TeamVectorTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { return TeamVectorTrsmInternalLeftUpper::invoke( - member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, - A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_0(), - B.stride_1()); + member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), A.stride_1(), A.stride_0(), B.data(), + B.stride_0(), B.stride_1()); } }; @@ -118,17 +102,13 @@ struct TeamVectorTrsm -struct TeamVectorTrsm { +struct TeamVectorTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { return TeamVectorTrsmInternalLeftLower::invoke( - member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, - A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_0(), - B.stride_1()); + member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), A.stride_1(), A.stride_0(), B.data(), + B.stride_0(), B.stride_1()); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_TeamVector_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_TeamVector_Internal.hpp index 3ee13f0b809c..c1781a001cc9 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_TeamVector_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_TeamVector_Internal.hpp @@ -32,30 +32,24 @@ namespace KokkosBatched { template struct TeamVectorTrsmInternalLeftLower { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const int n, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const bool use_unit_diag, const int m, const int n, + const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, + const int as1, + /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); }; template <> template -KOKKOS_INLINE_FUNCTION int -TeamVectorTrsmInternalLeftLower::invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const int n, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, +KOKKOS_INLINE_FUNCTION int TeamVectorTrsmInternalLeftLower::invoke( + const MemberType &member, const bool use_unit_diag, const int m, const int n, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { const ScalarType one(1.0), zero(0.0); if (alpha == zero) - KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, n, zero, B, bs0, - bs1); + KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, n, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, n, alpha, B, - bs0, bs1); + if (alpha != one) KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, n, alpha, B, bs0, bs1); if (m <= 0 || n <= 0) return 0; for (int p = 0; p < m; ++p) { @@ -63,29 +57,23 @@ TeamVectorTrsmInternalLeftLower::invoke( int iend = m - p - 1; int jend = n; - const ValueType *KOKKOS_RESTRICT a21 = - iend ? A + (p + 1) * as0 + p * as1 : NULL; + const ValueType *KOKKOS_RESTRICT a21 = iend ? A + (p + 1) * as0 + p * as1 : NULL; - ValueType *KOKKOS_RESTRICT b1t = B + p * bs0, - *KOKKOS_RESTRICT B2 = - iend ? B + (p + 1) * bs0 : NULL; + ValueType *KOKKOS_RESTRICT b1t = B + p * bs0, *KOKKOS_RESTRICT B2 = iend ? B + (p + 1) * bs0 : NULL; member.team_barrier(); if (!use_unit_diag) { const ValueType alpha11 = A[p * as0 + p * as1]; - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, 0, jend), - [&](const int &j) { b1t[j * bs1] = b1t[j * bs1] / alpha11; }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, jend), + [&](const int &j) { b1t[j * bs1] = b1t[j * bs1] / alpha11; }); member.team_barrier(); } - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, iend), [&](const int &i) { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, jend), [&](const int &j) { - // assume layout right for batched computation - B2[i * bs0 + j * bs1] -= a21[i * as0] * b1t[j * bs1]; - }); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, iend), [&](const int &i) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, jend), [&](const int &j) { + // assume layout right for batched computation + B2[i * bs0 + j * bs1] -= a21[i * as0] * b1t[j * bs1]; + }); + }); } } return 0; @@ -94,31 +82,25 @@ TeamVectorTrsmInternalLeftLower::invoke( template struct TeamVectorTrsmInternalLeftUpper { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const int n, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const bool use_unit_diag, const int m, const int n, + const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, + const int as1, + /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); }; template <> template -KOKKOS_INLINE_FUNCTION int -TeamVectorTrsmInternalLeftUpper::invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const int n, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, +KOKKOS_INLINE_FUNCTION int TeamVectorTrsmInternalLeftUpper::invoke( + const MemberType &member, const bool use_unit_diag, const int m, const int n, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { const ScalarType one(1.0), zero(0.0); // note that parallel range is different ( m*n vs m-1*n); if (alpha == zero) - KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, n, zero, B, bs0, - bs1); + KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, n, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, n, alpha, B, - bs0, bs1); + if (alpha != one) KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, n, alpha, B, bs0, bs1); if (m <= 0 || n <= 0) return 0; ValueType *KOKKOS_RESTRICT B0 = B; @@ -128,24 +110,20 @@ TeamVectorTrsmInternalLeftUpper::invoke( int jend = n; const ValueType *KOKKOS_RESTRICT a01 = A + p * as1; - /**/ ValueType *KOKKOS_RESTRICT b1t = B + p * bs0; + /**/ ValueType *KOKKOS_RESTRICT b1t = B + p * bs0; member.team_barrier(); if (!use_unit_diag) { const ValueType alpha11 = A[p * as0 + p * as1]; - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, 0, jend), - [&](const int &j) { b1t[j * bs1] = b1t[j * bs1] / alpha11; }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, jend), + [&](const int &j) { b1t[j * bs1] = b1t[j * bs1] / alpha11; }); member.team_barrier(); } - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, iend), [&](const int &i) { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, jend), [&](const int &j) { - B0[i * bs0 + j * bs1] -= a01[i * as0] * b1t[j * bs1]; - }); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, iend), [&](const int &i) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, jend), + [&](const int &j) { B0[i * bs0 + j * bs1] -= a01[i * as0] * b1t[j * bs1]; }); + }); } } return 0; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Team_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Team_Impl.hpp index 9f5f857e444d..371dbb483c6a 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Team_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Team_Impl.hpp @@ -34,32 +34,24 @@ namespace KokkosBatched { /// A(m x m), B(m x n) template -struct TeamTrsm { +struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { - return TeamTrsmInternalLeftLower::invoke( - member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, - A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_0(), - B.stride_1()); + return TeamTrsmInternalLeftLower::invoke(member, ArgDiag::use_unit_diag, B.extent(0), + B.extent(1), alpha, A.data(), A.stride_0(), + A.stride_1(), B.data(), B.stride_0(), B.stride_1()); } }; template -struct TeamTrsm { +struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { - return TeamTrsmInternalLeftLower::invoke( - member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, - A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_0(), - B.stride_1()); + return TeamTrsmInternalLeftLower::invoke(member, ArgDiag::use_unit_diag, B.extent(0), + B.extent(1), alpha, A.data(), A.stride_0(), + A.stride_1(), B.data(), B.stride_0(), B.stride_1()); } }; @@ -70,32 +62,24 @@ struct TeamTrsm -struct TeamTrsm { +struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { - return TeamTrsmInternalLeftLower::invoke( - member, ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha, - A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_1(), - B.stride_0()); + return TeamTrsmInternalLeftLower::invoke(member, ArgDiag::use_unit_diag, B.extent(1), + B.extent(0), alpha, A.data(), A.stride_1(), + A.stride_0(), B.data(), B.stride_1(), B.stride_0()); } }; template -struct TeamTrsm { +struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { - return TeamTrsmInternalLeftLower::invoke( - member, ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha, - A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_1(), - B.stride_0()); + return TeamTrsmInternalLeftLower::invoke(member, ArgDiag::use_unit_diag, B.extent(1), + B.extent(0), alpha, A.data(), A.stride_1(), + A.stride_0(), B.data(), B.stride_1(), B.stride_0()); } }; @@ -106,32 +90,24 @@ struct TeamTrsm -struct TeamTrsm { +struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { - return TeamTrsmInternalLeftUpper::invoke( - member, ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha, - A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_1(), - B.stride_0()); + return TeamTrsmInternalLeftUpper::invoke(member, ArgDiag::use_unit_diag, B.extent(1), + B.extent(0), alpha, A.data(), A.stride_1(), + A.stride_0(), B.data(), B.stride_1(), B.stride_0()); } }; template -struct TeamTrsm { +struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { - return TeamTrsmInternalLeftUpper::invoke( - member, ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha, - A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_1(), - B.stride_0()); + return TeamTrsmInternalLeftUpper::invoke(member, ArgDiag::use_unit_diag, B.extent(1), + B.extent(0), alpha, A.data(), A.stride_1(), + A.stride_0(), B.data(), B.stride_1(), B.stride_0()); } }; @@ -142,32 +118,24 @@ struct TeamTrsm -struct TeamTrsm { +struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { - return TeamTrsmInternalLeftUpper::invoke( - member, ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha, - A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_1(), - B.stride_0()); + return TeamTrsmInternalLeftUpper::invoke(member, ArgDiag::use_unit_diag, B.extent(1), + B.extent(0), alpha, A.data(), A.stride_0(), + A.stride_1(), B.data(), B.stride_1(), B.stride_0()); } }; template -struct TeamTrsm { +struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { - return TeamTrsmInternalLeftUpper::invoke( - member, ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha, - A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_1(), - B.stride_0()); + return TeamTrsmInternalLeftUpper::invoke(member, ArgDiag::use_unit_diag, B.extent(1), + B.extent(0), alpha, A.data(), A.stride_0(), + A.stride_1(), B.data(), B.stride_1(), B.stride_0()); } }; @@ -178,32 +146,24 @@ struct TeamTrsm -struct TeamTrsm { +struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { - return TeamTrsmInternalLeftUpper::invoke( - member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, - A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_0(), - B.stride_1()); + return TeamTrsmInternalLeftUpper::invoke(member, ArgDiag::use_unit_diag, B.extent(0), + B.extent(1), alpha, A.data(), A.stride_0(), + A.stride_1(), B.data(), B.stride_0(), B.stride_1()); } }; template -struct TeamTrsm { +struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { - return TeamTrsmInternalLeftUpper::invoke( - member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, - A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_0(), - B.stride_1()); + return TeamTrsmInternalLeftUpper::invoke(member, ArgDiag::use_unit_diag, B.extent(0), + B.extent(1), alpha, A.data(), A.stride_0(), + A.stride_1(), B.data(), B.stride_0(), B.stride_1()); } }; @@ -214,32 +174,24 @@ struct TeamTrsm -struct TeamTrsm { +struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { - return TeamTrsmInternalLeftUpper::invoke( - member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, - A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_0(), - B.stride_1()); + return TeamTrsmInternalLeftUpper::invoke(member, ArgDiag::use_unit_diag, B.extent(0), + B.extent(1), alpha, A.data(), A.stride_1(), + A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } }; template -struct TeamTrsm { +struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { - return TeamTrsmInternalLeftUpper::invoke( - member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, - A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_0(), - B.stride_1()); + return TeamTrsmInternalLeftUpper::invoke(member, ArgDiag::use_unit_diag, B.extent(0), + B.extent(1), alpha, A.data(), A.stride_1(), + A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } }; @@ -250,32 +202,24 @@ struct TeamTrsm -struct TeamTrsm { +struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { - return TeamTrsmInternalLeftLower::invoke( - member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, - A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_0(), - B.stride_1()); + return TeamTrsmInternalLeftLower::invoke(member, ArgDiag::use_unit_diag, B.extent(0), + B.extent(1), alpha, A.data(), A.stride_1(), + A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } }; template -struct TeamTrsm { +struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { - return TeamTrsmInternalLeftLower::invoke( - member, ArgDiag::use_unit_diag, B.extent(0), B.extent(1), alpha, - A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_0(), - B.stride_1()); + return TeamTrsmInternalLeftLower::invoke(member, ArgDiag::use_unit_diag, B.extent(0), + B.extent(1), alpha, A.data(), A.stride_1(), + A.stride_0(), B.data(), B.stride_0(), B.stride_1()); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Team_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Team_Internal.hpp index a880186ae950..a1a70628094c 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Team_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsm_Team_Internal.hpp @@ -35,29 +35,24 @@ namespace KokkosBatched { template struct TeamTrsmInternalLeftLower { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const int n, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const bool use_unit_diag, const int m, const int n, + const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, + const int as1, + /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); }; template <> template -KOKKOS_INLINE_FUNCTION int -TeamTrsmInternalLeftLower::invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const int n, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, +KOKKOS_INLINE_FUNCTION int TeamTrsmInternalLeftLower::invoke( + const MemberType &member, const bool use_unit_diag, const int m, const int n, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { const ScalarType one(1.0), zero(0.0); if (alpha == zero) KokkosBlas::Impl::TeamSetInternal::invoke(member, m, n, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, n, alpha, B, bs0, - bs1); + if (alpha != one) KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, n, alpha, B, bs0, bs1); if (m <= 0 || n <= 0) return 0; for (int p = 0; p < m; ++p) { @@ -65,27 +60,22 @@ TeamTrsmInternalLeftLower::invoke( int iend = m - p - 1; int jend = n; - const ValueType *KOKKOS_RESTRICT a21 = - iend ? A + (p + 1) * as0 + p * as1 : NULL; + const ValueType *KOKKOS_RESTRICT a21 = iend ? A + (p + 1) * as0 + p * as1 : NULL; - ValueType *KOKKOS_RESTRICT b1t = B + p * bs0, - *KOKKOS_RESTRICT B2 = - iend ? B + (p + 1) * bs0 : NULL; + ValueType *KOKKOS_RESTRICT b1t = B + p * bs0, *KOKKOS_RESTRICT B2 = iend ? B + (p + 1) * bs0 : NULL; member.team_barrier(); if (!use_unit_diag) { const ValueType alpha11 = A[p * as0 + p * as1]; - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, jend), - [&](const int &j) { b1t[j * bs1] = b1t[j * bs1] / alpha11; }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, jend), + [&](const int &j) { b1t[j * bs1] = b1t[j * bs1] / alpha11; }); member.team_barrier(); } - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, iend * jend), [&](const int &ij) { - // assume layout right for batched computation - const int i = ij / jend, j = ij % jend; - B2[i * bs0 + j * bs1] -= a21[i * as0] * b1t[j * bs1]; - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, iend * jend), [&](const int &ij) { + // assume layout right for batched computation + const int i = ij / jend, j = ij % jend; + B2[i * bs0 + j * bs1] -= a21[i * as0] * b1t[j * bs1]; + }); } } return 0; @@ -93,11 +83,9 @@ TeamTrsmInternalLeftLower::invoke( template <> template -KOKKOS_INLINE_FUNCTION int -TeamTrsmInternalLeftLower::invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const int n, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, +KOKKOS_INLINE_FUNCTION int TeamTrsmInternalLeftLower::invoke( + const MemberType &member, const bool use_unit_diag, const int m, const int n, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { constexpr int mbAlgo = Algo::Trsm::Blocked::mb(); @@ -107,9 +95,7 @@ TeamTrsmInternalLeftLower::invoke( if (alpha == zero) KokkosBlas::Impl::TeamSetInternal::invoke(member, m, n, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, n, alpha, B, bs0, - bs1); + if (alpha != one) KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, n, alpha, B, bs0, bs1); if (m <= 0 || n <= 0) return 0; /// @@ -120,8 +106,7 @@ TeamTrsmInternalLeftLower::invoke( InnerTrsmLeftLowerUnitDiag trsm_u(as0, as1, bs0, bs1); InnerTrsmLeftLowerNonUnitDiag trsm_n(as0, as1, bs0, bs1); - auto trsm = [&](const int ib, const int jb, - const ValueType *KOKKOS_RESTRICT AA, + auto trsm = [&](const int ib, const int jb, const ValueType *KOKKOS_RESTRICT AA, /**/ ValueType *KOKKOS_RESTRICT BB) { const int mb = mbAlgo; const int tsize = member.team_size(); @@ -134,25 +119,22 @@ TeamTrsmInternalLeftLower::invoke( // trsm update const ValueType *KOKKOS_RESTRICT Ap = AA + p * as0 + p * as1; - /**/ ValueType *KOKKOS_RESTRICT Bp = BB + p * bs0; + /**/ ValueType *KOKKOS_RESTRICT Bp = BB + p * bs0; member.team_barrier(); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, (jb / nb) + (np > 0)), - [&](const int jj) { - // Made this non-const in order to WORKAROUND issue #349 - int j = jj * nb, qb = (j + nb) > jb ? np : nb; - if (use_unit_diag) - trsm_u.serial_invoke(Ap, pb, qb, Bp + j * bs1); - else - trsm_n.serial_invoke(Ap, pb, qb, Bp + j * bs1); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, (jb / nb) + (np > 0)), [&](const int jj) { + // Made this non-const in order to WORKAROUND issue #349 + int j = jj * nb, qb = (j + nb) > jb ? np : nb; + if (use_unit_diag) + trsm_u.serial_invoke(Ap, pb, qb, Bp + j * bs1); + else + trsm_n.serial_invoke(Ap, pb, qb, Bp + j * bs1); + }); member.team_barrier(); // gemm update - TeamGemmInternal::invoke( - member, ib - p - pb, jb, pb, minus_one, Ap + pb * as0, as0, as1, Bp, - bs0, bs1, one, Bp + pb * bs0, bs0, bs1); + TeamGemmInternal::invoke(member, ib - p - pb, jb, pb, minus_one, Ap + pb * as0, as0, as1, + Bp, bs0, bs1, one, Bp + pb * bs0, bs0, bs1); } }; @@ -170,20 +152,17 @@ TeamTrsmInternalLeftLower::invoke( template struct TeamTrsmInternalLeftUpper { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const int n, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const bool use_unit_diag, const int m, const int n, + const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, + const int as1, + /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1); }; template <> template -KOKKOS_INLINE_FUNCTION int -TeamTrsmInternalLeftUpper::invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const int n, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, +KOKKOS_INLINE_FUNCTION int TeamTrsmInternalLeftUpper::invoke( + const MemberType &member, const bool use_unit_diag, const int m, const int n, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { const ScalarType one(1.0), zero(0.0); @@ -191,9 +170,7 @@ TeamTrsmInternalLeftUpper::invoke( if (alpha == zero) KokkosBlas::Impl::TeamSetInternal::invoke(member, m, n, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, n, alpha, B, bs0, - bs1); + if (alpha != one) KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, n, alpha, B, bs0, bs1); if (m <= 0 || n <= 0) return 0; ValueType *KOKKOS_RESTRICT B0 = B; @@ -203,30 +180,27 @@ TeamTrsmInternalLeftUpper::invoke( int jend = n; const ValueType *KOKKOS_RESTRICT a01 = A + p * as1; - /**/ ValueType *KOKKOS_RESTRICT b1t = B + p * bs0; + /**/ ValueType *KOKKOS_RESTRICT b1t = B + p * bs0; member.team_barrier(); if (!use_unit_diag) { const ValueType alpha11 = A[p * as0 + p * as1]; - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, jend), - [&](const int &j) { b1t[j * bs1] = b1t[j * bs1] / alpha11; }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, jend), + [&](const int &j) { b1t[j * bs1] = b1t[j * bs1] / alpha11; }); member.team_barrier(); } - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, iend * jend), [&](const int &ij) { - int i, j; - if (KokkosKernels::Impl::kk_is_gpu_exec_space< - typename MemberType::execution_space>()) { - i = ij % iend; - j = ij / iend; - } else { - i = ij / jend; - j = ij % jend; - } - B0[i * bs0 + j * bs1] -= a01[i * as0] * b1t[j * bs1]; - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, iend * jend), [&](const int &ij) { + int i, j; + if (KokkosKernels::Impl::kk_is_gpu_exec_space()) { + i = ij % iend; + j = ij / iend; + } else { + i = ij / jend; + j = ij % jend; + } + B0[i * bs0 + j * bs1] -= a01[i * as0] * b1t[j * bs1]; + }); } } return 0; @@ -234,11 +208,9 @@ TeamTrsmInternalLeftUpper::invoke( template <> template -KOKKOS_INLINE_FUNCTION int -TeamTrsmInternalLeftUpper::invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const int n, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, +KOKKOS_INLINE_FUNCTION int TeamTrsmInternalLeftUpper::invoke( + const MemberType &member, const bool use_unit_diag, const int m, const int n, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT B, const int bs0, const int bs1) { constexpr int mbAlgo = Algo::Trsm::Blocked::mb(); @@ -248,16 +220,13 @@ TeamTrsmInternalLeftUpper::invoke( if (alpha == zero) KokkosBlas::Impl::TeamSetInternal::invoke(member, m, n, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, n, alpha, B, bs0, - bs1); + if (alpha != one) KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, n, alpha, B, bs0, bs1); if (m <= 0 || n <= 0) return 0; InnerTrsmLeftUpperUnitDiag trsm_u(as0, as1, bs0, bs1); InnerTrsmLeftUpperNonUnitDiag trsm_n(as0, as1, bs0, bs1); - auto trsm = [&](const int ib, const int jb, - const ValueType *KOKKOS_RESTRICT AA, + auto trsm = [&](const int ib, const int jb, const ValueType *KOKKOS_RESTRICT AA, /**/ ValueType *KOKKOS_RESTRICT BB) { const int mb = mbAlgo; //(ib <=5 ? ib : mbAlgo); const int tsize = member.team_size(); @@ -265,29 +234,25 @@ TeamTrsmInternalLeftUpper::invoke( int nb = (jb / tsize + jb % tsize > 0); int np = jb % nb; for (int pp = 0; pp < ib; pp += mb) { - const int ptmp = (ib - pp - mb), p = (ptmp < 0 ? 0 : ptmp), - pb = (mb + (ptmp < 0) * ptmp); + const int ptmp = (ib - pp - mb), p = (ptmp < 0 ? 0 : ptmp), pb = (mb + (ptmp < 0) * ptmp); // trsm update const ValueType *KOKKOS_RESTRICT Ap = AA + p * as0 + p * as1; - /**/ ValueType *KOKKOS_RESTRICT Bp = BB + p * bs0; + /**/ ValueType *KOKKOS_RESTRICT Bp = BB + p * bs0; member.team_barrier(); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, (jb / nb) + (np > 0)), - [&](const int &jj) { - const int j = jj * nb, qb = (j + nb) > jb ? np : nb; - if (use_unit_diag) - trsm_u.serial_invoke(Ap, pb, qb, Bp + j * bs1); - else - trsm_n.serial_invoke(Ap, pb, qb, Bp + j * bs1); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, (jb / nb) + (np > 0)), [&](const int &jj) { + const int j = jj * nb, qb = (j + nb) > jb ? np : nb; + if (use_unit_diag) + trsm_u.serial_invoke(Ap, pb, qb, Bp + j * bs1); + else + trsm_n.serial_invoke(Ap, pb, qb, Bp + j * bs1); + }); member.team_barrier(); // gemm update - TeamGemmInternal::invoke( - member, p, jb, pb, minus_one, Ap - p * as0, as0, as1, Bp, bs0, bs1, - one, BB, bs0, bs1); + TeamGemmInternal::invoke(member, p, jb, pb, minus_one, Ap - p * as0, as0, as1, Bp, bs0, + bs1, one, BB, bs0, bs1); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Serial_Impl.hpp index 0fc375a7b23b..073970caa682 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Serial_Impl.hpp @@ -38,43 +38,32 @@ namespace KokkosBatched { /// L/NT /// -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ +#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__) template -struct SerialTrsv { +struct SerialTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const bViewType &b) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const bViewType &b) { typedef typename bViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = b.extent(0), n = 1; - static_assert(is_vector::value, - "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + static_assert(is_vector::value, "value type is not vector type"); + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; // no error check int r_val = 0; if (A.stride_0() == 1) { mkl_dtrsm_compact(MKL_COL_MAJOR, MKL_LEFT, MKL_LOWER, MKL_NOTRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_0(), - (double *)b.data(), b.stride_0(), format, - (MKL_INT)vector_type::vector_length); + ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, alpha, (const double *)A.data(), + A.stride_0(), (double *)b.data(), b.stride_0(), format, (MKL_INT)vector_type::vector_length); } else if (A.stride_1() == 1) { mkl_dtrsm_compact(MKL_ROW_MAJOR, MKL_LEFT, MKL_LOWER, MKL_NOTRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_0(), - (double *)b.data(), b.stride_0(), format, - (MKL_INT)vector_type::vector_length); + ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, alpha, (const double *)A.data(), + A.stride_0(), (double *)b.data(), b.stride_0(), format, (MKL_INT)vector_type::vector_length); } else { r_val = -1; } @@ -84,28 +73,20 @@ struct SerialTrsv -struct SerialTrsv { +struct SerialTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const bViewType &b) { - return SerialTrsvInternalLower::invoke( - ArgDiag::use_unit_diag, A.extent(0), alpha, A.data(), A.stride_0(), - A.stride_1(), b.data(), b.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const bViewType &b) { + return SerialTrsvInternalLower::invoke(ArgDiag::use_unit_diag, A.extent(0), alpha, A.data(), + A.stride_0(), A.stride_1(), b.data(), b.stride_0()); } }; template -struct SerialTrsv { +struct SerialTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const bViewType &b) { - return SerialTrsvInternalLower::invoke( - ArgDiag::use_unit_diag, A.extent(0), alpha, A.data(), A.stride_0(), - A.stride_1(), b.data(), b.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const bViewType &b) { + return SerialTrsvInternalLower::invoke(ArgDiag::use_unit_diag, A.extent(0), alpha, A.data(), + A.stride_0(), A.stride_1(), b.data(), b.stride_0()); } }; @@ -113,42 +94,31 @@ struct SerialTrsv -struct SerialTrsv { +struct SerialTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const bViewType &b) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const bViewType &b) { typedef typename bViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = b.extent(0), n = 1; - static_assert(is_vector::value, - "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + static_assert(is_vector::value, "value type is not vector type"); + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; // no error check int r_val = 0; if (A.stride_0() == 1) { - mkl_dtrsm_compact(MKL_COL_MAJOR, MKL_LEFT, MKL_LOWER, MKL_TRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_0(), - (double *)b.data(), b.stride_0(), format, + mkl_dtrsm_compact(MKL_COL_MAJOR, MKL_LEFT, MKL_LOWER, MKL_TRANS, ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, + m, n, alpha, (const double *)A.data(), A.stride_0(), (double *)b.data(), b.stride_0(), format, (MKL_INT)vector_type::vector_length); } else if (A.stride_1() == 1) { - mkl_dtrsm_compact(MKL_ROW_MAJOR, MKL_LEFT, MKL_LOWER, MKL_TRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_0(), - (double *)b.data(), b.stride_0(), format, + mkl_dtrsm_compact(MKL_ROW_MAJOR, MKL_LEFT, MKL_LOWER, MKL_TRANS, ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, + m, n, alpha, (const double *)A.data(), A.stride_0(), (double *)b.data(), b.stride_0(), format, (MKL_INT)vector_type::vector_length); } else { r_val = -1; @@ -159,27 +129,20 @@ struct SerialTrsv -struct SerialTrsv { +struct SerialTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const bViewType &b) { - return SerialTrsvInternalUpper::invoke( - ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), A.stride_1(), - A.stride_0(), b.data(), b.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const bViewType &b) { + return SerialTrsvInternalUpper::invoke(ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), + A.stride_1(), A.stride_0(), b.data(), b.stride_0()); } }; template struct SerialTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const bViewType &b) { - return SerialTrsvInternalUpper::invoke( - ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), A.stride_1(), - A.stride_0(), b.data(), b.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const bViewType &b) { + return SerialTrsvInternalUpper::invoke(ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), + A.stride_1(), A.stride_0(), b.data(), b.stride_0()); } }; @@ -187,43 +150,32 @@ struct SerialTrsv { /// U/NT /// -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ +#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__) template -struct SerialTrsv { +struct SerialTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const bViewType &b) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const bViewType &b) { typedef typename bViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = b.extent(0), n = 1; - static_assert(is_vector::value, - "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + static_assert(is_vector::value, "value type is not vector type"); + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; // no error check int r_val = 0; if (A.stride_0() == 1) { mkl_dtrsm_compact(MKL_COL_MAJOR, MKL_LEFT, MKL_UPPER, MKL_NOTRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_0(), - (double *)b.data(), b.stride_0(), format, - (MKL_INT)vector_type::vector_length); + ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, alpha, (const double *)A.data(), + A.stride_0(), (double *)b.data(), b.stride_0(), format, (MKL_INT)vector_type::vector_length); } else if (A.stride_1() == 1) { mkl_dtrsm_compact(MKL_ROW_MAJOR, MKL_LEFT, MKL_UPPER, MKL_NOTRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_0(), - (double *)b.data(), b.stride_0(), format, - (MKL_INT)vector_type::vector_length); + ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, alpha, (const double *)A.data(), + A.stride_0(), (double *)b.data(), b.stride_0(), format, (MKL_INT)vector_type::vector_length); } else { r_val = -1; } @@ -233,28 +185,20 @@ struct SerialTrsv -struct SerialTrsv { +struct SerialTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const bViewType &b) { - return SerialTrsvInternalUpper::invoke( - ArgDiag::use_unit_diag, A.extent(0), alpha, A.data(), A.stride_0(), - A.stride_1(), b.data(), b.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const bViewType &b) { + return SerialTrsvInternalUpper::invoke(ArgDiag::use_unit_diag, A.extent(0), alpha, A.data(), + A.stride_0(), A.stride_1(), b.data(), b.stride_0()); } }; template -struct SerialTrsv { +struct SerialTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const bViewType &b) { - return SerialTrsvInternalUpper::invoke( - ArgDiag::use_unit_diag, A.extent(0), alpha, A.data(), A.stride_0(), - A.stride_1(), b.data(), b.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const bViewType &b) { + return SerialTrsvInternalUpper::invoke(ArgDiag::use_unit_diag, A.extent(0), alpha, A.data(), + A.stride_0(), A.stride_1(), b.data(), b.stride_0()); } }; @@ -262,42 +206,31 @@ struct SerialTrsv -struct SerialTrsv { +struct SerialTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const bViewType &b) { + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const bViewType &b) { typedef typename bViewType::value_type vector_type; // typedef typename vector_type::value_type value_type; const int m = b.extent(0), n = 1; - static_assert(is_vector::value, - "value type is not vector type"); - static_assert( - vector_type::vector_length == 4 || vector_type::vector_length == 8, - "AVX, AVX2 and AVX512 is supported"); - const MKL_COMPACT_PACK format = - vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; + static_assert(is_vector::value, "value type is not vector type"); + static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8, + "AVX, AVX2 and AVX512 is supported"); + const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX; // no error check int r_val = 0; if (A.stride_0() == 1) { - mkl_dtrsm_compact(MKL_COL_MAJOR, MKL_LEFT, MKL_UPPER, MKL_TRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_0(), - (double *)b.data(), b.stride_0(), format, + mkl_dtrsm_compact(MKL_COL_MAJOR, MKL_LEFT, MKL_UPPER, MKL_TRANS, ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, + m, n, alpha, (const double *)A.data(), A.stride_0(), (double *)b.data(), b.stride_0(), format, (MKL_INT)vector_type::vector_length); } else if (A.stride_1() == 1) { - mkl_dtrsm_compact(MKL_ROW_MAJOR, MKL_LEFT, MKL_UPPER, MKL_TRANS, - ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, m, n, - alpha, (const double *)A.data(), A.stride_0(), - (double *)b.data(), b.stride_0(), format, + mkl_dtrsm_compact(MKL_ROW_MAJOR, MKL_LEFT, MKL_UPPER, MKL_TRANS, ArgDiag::use_unit_diag ? MKL_UNIT : MKL_NONUNIT, + m, n, alpha, (const double *)A.data(), A.stride_0(), (double *)b.data(), b.stride_0(), format, (MKL_INT)vector_type::vector_length); } else { r_val = -1; @@ -308,27 +241,20 @@ struct SerialTrsv -struct SerialTrsv { +struct SerialTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const bViewType &b) { - return SerialTrsvInternalLower::invoke( - ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), A.stride_1(), - A.stride_0(), b.data(), b.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const bViewType &b) { + return SerialTrsvInternalLower::invoke(ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), + A.stride_1(), A.stride_0(), b.data(), b.stride_0()); } }; template struct SerialTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const bViewType &b) { - return SerialTrsvInternalLower::invoke( - ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), A.stride_1(), - A.stride_0(), b.data(), b.stride_0()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const bViewType &b) { + return SerialTrsvInternalLower::invoke(ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), + A.stride_1(), A.stride_0(), b.data(), b.stride_0()); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Serial_Internal.hpp index 3ae206cc0961..43d95377d438 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Serial_Internal.hpp @@ -38,39 +38,33 @@ namespace KokkosBatched { template struct SerialTrsvInternalLower { template - KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, - const int m, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT b, - const int bs0); + KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, const int m, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT b, const int bs0); }; template <> template -KOKKOS_INLINE_FUNCTION int -SerialTrsvInternalLower::invoke( - const bool use_unit_diag, const int m, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT b, const int bs0) { +KOKKOS_INLINE_FUNCTION int SerialTrsvInternalLower::invoke(const bool use_unit_diag, const int m, + const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT b, + const int bs0) { const ScalarType one(1.0), zero(0.0); if (alpha == zero) KokkosBlas::Impl::SerialSetInternal::invoke(m, zero, b, bs0); else { - if (alpha != one) - KokkosBlas::Impl::SerialScaleInternal::invoke(m, alpha, b, bs0); + if (alpha != one) KokkosBlas::Impl::SerialScaleInternal::invoke(m, alpha, b, bs0); if (m <= 0) return 0; for (int p = 0; p < m; ++p) { const int iend = m - p - 1; - const ValueType *KOKKOS_RESTRICT a21 = - iend ? A + (p + 1) * as0 + p * as1 : NULL; + const ValueType *KOKKOS_RESTRICT a21 = iend ? A + (p + 1) * as0 + p * as1 : NULL; - ValueType *KOKKOS_RESTRICT beta1 = b + p * bs0, - *KOKKOS_RESTRICT b2 = - iend ? beta1 + bs0 : NULL; + ValueType *KOKKOS_RESTRICT beta1 = b + p * bs0, *KOKKOS_RESTRICT b2 = iend ? beta1 + bs0 : NULL; // with KOKKOS_RESTRICT a compiler assumes that the pointer is not // accessed by others op(/=) uses this pointer and changes the associated @@ -85,10 +79,12 @@ SerialTrsvInternalLower::invoke( template <> template -KOKKOS_INLINE_FUNCTION int SerialTrsvInternalLower::invoke( - const bool use_unit_diag, const int m, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT b, const int bs0) { +KOKKOS_INLINE_FUNCTION int SerialTrsvInternalLower::invoke(const bool use_unit_diag, const int m, + const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT b, + const int bs0) { const ScalarType one(1.0), zero(0.0), minus_one(-1.0); constexpr int mbAlgo = Algo::Trsv::Blocked::mb(); @@ -96,8 +92,7 @@ KOKKOS_INLINE_FUNCTION int SerialTrsvInternalLower::invoke( if (alpha == zero) KokkosBlas::Impl::SerialSetInternal::invoke(m, zero, b, bs0); else { - if (alpha != one) - KokkosBlas::Impl::SerialScaleInternal::invoke(m, alpha, b, bs0); + if (alpha != one) KokkosBlas::Impl::SerialScaleInternal::invoke(m, alpha, b, bs0); if (m <= 0) return 0; /// case GPU: team size is large and blocksize (mb,nb) is small @@ -110,7 +105,7 @@ KOKKOS_INLINE_FUNCTION int SerialTrsvInternalLower::invoke( // trsm update const ValueType *KOKKOS_RESTRICT Ap = A + p * as0 + p * as1; - /**/ ValueType *KOKKOS_RESTRICT bp = b + p * bs0; + /**/ ValueType *KOKKOS_RESTRICT bp = b + p * bs0; if (use_unit_diag) trsm_u.serial_invoke(Ap, pb, 1, bp); @@ -118,9 +113,8 @@ KOKKOS_INLINE_FUNCTION int SerialTrsvInternalLower::invoke( trsm_n.serial_invoke(Ap, pb, 1, bp); // gemv update - KokkosBlas::Impl::SerialGemvInternal::invoke( - m - p - pb, pb, minus_one, Ap + pb * as0, as0, as1, bp, bs0, one, - bp + pb * bs0, bs0); + KokkosBlas::Impl::SerialGemvInternal::invoke(m - p - pb, pb, minus_one, Ap + pb * as0, as0, + as1, bp, bs0, one, bp + pb * bs0, bs0); } } return 0; @@ -133,36 +127,33 @@ KOKKOS_INLINE_FUNCTION int SerialTrsvInternalLower::invoke( template struct SerialTrsvInternalUpper { template - KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, - const int m, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT b, - const int bs0); + KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, const int m, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT b, const int bs0); }; template <> template -KOKKOS_INLINE_FUNCTION int -SerialTrsvInternalUpper::invoke( - const bool use_unit_diag, const int m, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT b, const int bs0) { +KOKKOS_INLINE_FUNCTION int SerialTrsvInternalUpper::invoke(const bool use_unit_diag, const int m, + const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT b, + const int bs0) { const ScalarType one(1.0), zero(0.0); if (alpha == zero) KokkosBlas::Impl::SerialSetInternal::invoke(m, zero, b, bs0); else { - if (alpha != one) - KokkosBlas::Impl::SerialScaleInternal::invoke(m, alpha, b, bs0); + if (alpha != one) KokkosBlas::Impl::SerialScaleInternal::invoke(m, alpha, b, bs0); if (m <= 0) return 0; ValueType *KOKKOS_RESTRICT b0 = b; for (int p = (m - 1); p >= 0; --p) { const int iend = p; - const ValueType *KOKKOS_RESTRICT a01 = A + p * as1; - /**/ ValueType *KOKKOS_RESTRICT beta1 = b + p * bs0; + const ValueType *KOKKOS_RESTRICT a01 = A + p * as1; + /**/ ValueType *KOKKOS_RESTRICT beta1 = b + p * bs0; // with KOKKOS_RESTRICT a compiler assumes that the pointer is not // accessed by others op(/=) uses this pointer and changes the associated @@ -177,10 +168,12 @@ SerialTrsvInternalUpper::invoke( template <> template -KOKKOS_INLINE_FUNCTION int SerialTrsvInternalUpper::invoke( - const bool use_unit_diag, const int m, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - /**/ ValueType *KOKKOS_RESTRICT b, const int bs0) { +KOKKOS_INLINE_FUNCTION int SerialTrsvInternalUpper::invoke(const bool use_unit_diag, const int m, + const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, + const int as0, const int as1, + /**/ ValueType *KOKKOS_RESTRICT b, + const int bs0) { const ScalarType one(1.0), zero(0.0), minus_one(-1.0); constexpr int mbAlgo = Algo::Trsm::Blocked::mb(); @@ -189,8 +182,7 @@ KOKKOS_INLINE_FUNCTION int SerialTrsvInternalUpper::invoke( if (alpha == zero) KokkosBlas::Impl::SerialSetInternal::invoke(m, zero, b, bs0); else { - if (alpha != one) - KokkosBlas::Impl::SerialScaleInternal::invoke(m, alpha, b, bs0); + if (alpha != one) KokkosBlas::Impl::SerialScaleInternal::invoke(m, alpha, b, bs0); if (m <= 0) return 0; InnerTrsmLeftUpperUnitDiag trsm_u(as0, as1, bs0, 0); @@ -198,12 +190,11 @@ KOKKOS_INLINE_FUNCTION int SerialTrsvInternalUpper::invoke( const int mb = mbAlgo; for (int pp = 0; pp < m; pp += mb) { - const int ptmp = (m - pp - mb), p = (ptmp < 0 ? 0 : ptmp), - pb = (mb + (ptmp < 0) * ptmp); + const int ptmp = (m - pp - mb), p = (ptmp < 0 ? 0 : ptmp), pb = (mb + (ptmp < 0) * ptmp); // trsm update const ValueType *KOKKOS_RESTRICT Ap = A + p * as0 + p * as1; - /**/ ValueType *KOKKOS_RESTRICT bp = b + p * bs0; + /**/ ValueType *KOKKOS_RESTRICT bp = b + p * bs0; if (use_unit_diag) trsm_u.serial_invoke(Ap, pb, 1, bp); @@ -211,8 +202,8 @@ KOKKOS_INLINE_FUNCTION int SerialTrsvInternalUpper::invoke( trsm_n.serial_invoke(Ap, pb, 1, bp); // gemv update - KokkosBlas::Impl::SerialGemvInternal::invoke( - p, pb, minus_one, Ap - p * as0, as0, as1, bp, bs0, one, b, bs0); + KokkosBlas::Impl::SerialGemvInternal::invoke(p, pb, minus_one, Ap - p * as0, as0, as1, bp, + bs0, one, b, bs0); } } return 0; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_TeamVector_Impl.hpp index 8e14b5ef378d..42c242414c51 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_TeamVector_Impl.hpp @@ -38,16 +38,13 @@ namespace KokkosBatched { /// template -struct TeamVectorTrsv { +struct TeamVectorTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const bViewType &b) { - return TeamVectorTrsvInternalLower::invoke( - member, ArgDiag::use_unit_diag, A.extent(0), alpha, A.data(), - A.stride_0(), A.stride_1(), b.data(), b.stride_0()); + return TeamVectorTrsvInternalLower::invoke(member, ArgDiag::use_unit_diag, A.extent(0), + alpha, A.data(), A.stride_0(), A.stride_1(), + b.data(), b.stride_0()); } }; @@ -56,16 +53,13 @@ struct TeamVectorTrsv -struct TeamVectorTrsv { +struct TeamVectorTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const bViewType &b) { - return TeamVectorTrsvInternalUpper::invoke( - member, ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), - A.stride_1(), A.stride_0(), b.data(), b.stride_0()); + return TeamVectorTrsvInternalUpper::invoke(member, ArgDiag::use_unit_diag, A.extent(1), + alpha, A.data(), A.stride_1(), A.stride_0(), + b.data(), b.stride_0()); } }; @@ -74,16 +68,13 @@ struct TeamVectorTrsv -struct TeamVectorTrsv { +struct TeamVectorTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const bViewType &b) { - return TeamVectorTrsvInternalUpper::invoke( - member, ArgDiag::use_unit_diag, A.extent(0), alpha, A.data(), - A.stride_0(), A.stride_1(), b.data(), b.stride_0()); + return TeamVectorTrsvInternalUpper::invoke(member, ArgDiag::use_unit_diag, A.extent(0), + alpha, A.data(), A.stride_0(), A.stride_1(), + b.data(), b.stride_0()); } }; @@ -92,16 +83,13 @@ struct TeamVectorTrsv -struct TeamVectorTrsv { +struct TeamVectorTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const bViewType &b) { - return TeamVectorTrsvInternalLower::invoke( - member, ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), - A.stride_1(), A.stride_0(), b.data(), b.stride_0()); + return TeamVectorTrsvInternalLower::invoke(member, ArgDiag::use_unit_diag, A.extent(1), + alpha, A.data(), A.stride_1(), A.stride_0(), + b.data(), b.stride_0()); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_TeamVector_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_TeamVector_Internal.hpp index 40bca5a64a39..894e684ef2ee 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_TeamVector_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_TeamVector_Internal.hpp @@ -36,12 +36,10 @@ namespace KokkosBatched { template struct TeamVectorTrsvInternalLower { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType & /*member*/, const bool /*use_unit_diag*/, - const int /*m*/, const ScalarType /*alpha*/, - const ValueType *KOKKOS_RESTRICT /*A*/, const int /*as0*/, - const int /*as1*/, - /**/ ValueType *KOKKOS_RESTRICT /*b*/, const int /*bs0*/) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const bool /*use_unit_diag*/, const int /*m*/, + const ScalarType /*alpha*/, const ValueType *KOKKOS_RESTRICT /*A*/, + const int /*as0*/, const int /*as1*/, + /**/ ValueType *KOKKOS_RESTRICT /*b*/, const int /*bs0*/) { assert(false && "Error: encounter dummy impl"); return 0; } @@ -49,31 +47,24 @@ struct TeamVectorTrsvInternalLower { template <> template -KOKKOS_INLINE_FUNCTION int -TeamVectorTrsvInternalLower::invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, +KOKKOS_INLINE_FUNCTION int TeamVectorTrsvInternalLower::invoke( + const MemberType &member, const bool use_unit_diag, const int m, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT b, const int bs0) { const ScalarType one(1.0), zero(0.0); if (alpha == zero) KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, zero, b, bs0); else { - if (alpha != one) - KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, alpha, b, - bs0); + if (alpha != one) KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, alpha, b, bs0); if (m <= 0) return 0; for (int p = 0; p < m; ++p) { const int iend = m - p - 1; - const ValueType *KOKKOS_RESTRICT a21 = - iend ? A + (p + 1) * as0 + p * as1 : NULL; + const ValueType *KOKKOS_RESTRICT a21 = iend ? A + (p + 1) * as0 + p * as1 : NULL; - ValueType *KOKKOS_RESTRICT beta1 = b + p * bs0, - *KOKKOS_RESTRICT b2 = - iend ? beta1 + bs0 : NULL; + ValueType *KOKKOS_RESTRICT beta1 = b + p * bs0, *KOKKOS_RESTRICT b2 = iend ? beta1 + bs0 : NULL; member.team_barrier(); ValueType local_beta1 = *beta1; @@ -82,12 +73,10 @@ TeamVectorTrsvInternalLower::invoke( local_beta1 = local_beta1 / alpha11; member.team_barrier(); - Kokkos::single(Kokkos::PerTeam(member), - [&]() { *beta1 = local_beta1; }); + Kokkos::single(Kokkos::PerTeam(member), [&]() { *beta1 = local_beta1; }); } - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, 0, iend), - [&](const int &i) { b2[i * bs0] -= a21[i * as0] * local_beta1; }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, iend), + [&](const int &i) { b2[i * bs0] -= a21[i * as0] * local_beta1; }); } } return 0; @@ -100,12 +89,10 @@ TeamVectorTrsvInternalLower::invoke( template struct TeamVectorTrsvInternalUpper { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType & /*member*/, const bool /*use_unit_diag*/, - const int /*m*/, const ScalarType /*alpha*/, - const ValueType *KOKKOS_RESTRICT /*A*/, const int /*as0*/, - const int /*as1*/, - /**/ ValueType *KOKKOS_RESTRICT /*b*/, const int /*bs0*/) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const bool /*use_unit_diag*/, const int /*m*/, + const ScalarType /*alpha*/, const ValueType *KOKKOS_RESTRICT /*A*/, + const int /*as0*/, const int /*as1*/, + /**/ ValueType *KOKKOS_RESTRICT /*b*/, const int /*bs0*/) { assert(false && "Error: encounter dummy impl"); return 0; } @@ -113,28 +100,24 @@ struct TeamVectorTrsvInternalUpper { template <> template -KOKKOS_INLINE_FUNCTION int -TeamVectorTrsvInternalUpper::invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, +KOKKOS_INLINE_FUNCTION int TeamVectorTrsvInternalUpper::invoke( + const MemberType &member, const bool use_unit_diag, const int m, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT b, const int bs0) { const ScalarType one(1.0), zero(0.0); if (alpha == zero) KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, zero, b, bs0); else { - if (alpha != one) - KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, alpha, b, - bs0); + if (alpha != one) KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, alpha, b, bs0); if (m <= 0) return 0; ValueType *KOKKOS_RESTRICT b0 = b; for (int p = (m - 1); p >= 0; --p) { const int iend = p; - const ValueType *KOKKOS_RESTRICT a01 = A + p * as1; - /**/ ValueType *KOKKOS_RESTRICT beta1 = b + p * bs0; + const ValueType *KOKKOS_RESTRICT a01 = A + p * as1; + /**/ ValueType *KOKKOS_RESTRICT beta1 = b + p * bs0; member.team_barrier(); ValueType local_beta1 = *beta1; @@ -143,12 +126,10 @@ TeamVectorTrsvInternalUpper::invoke( local_beta1 = local_beta1 / alpha11; member.team_barrier(); - Kokkos::single(Kokkos::PerTeam(member), - [&]() { *beta1 = local_beta1; }); + Kokkos::single(Kokkos::PerTeam(member), [&]() { *beta1 = local_beta1; }); } - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, 0, iend), - [&](const int &i) { b0[i * bs0] -= a01[i * as0] * local_beta1; }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, iend), + [&](const int &i) { b0[i * bs0] -= a01[i * as0] * local_beta1; }); } } return 0; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Team_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Team_Impl.hpp index 7f370c1f01d2..c658080dc214 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Team_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Team_Impl.hpp @@ -38,30 +38,24 @@ namespace KokkosBatched { /// template -struct TeamTrsv { +struct TeamTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const bViewType &b) { - return TeamTrsvInternalLower::invoke( - member, ArgDiag::use_unit_diag, A.extent(0), alpha, A.data(), - A.stride_0(), A.stride_1(), b.data(), b.stride_0()); + return TeamTrsvInternalLower::invoke(member, ArgDiag::use_unit_diag, A.extent(0), alpha, + A.data(), A.stride_0(), A.stride_1(), b.data(), + b.stride_0()); } }; template -struct TeamTrsv { +struct TeamTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const bViewType &b) { - return TeamTrsvInternalLower::invoke( - member, ArgDiag::use_unit_diag, A.extent(0), alpha, A.data(), - A.stride_0(), A.stride_1(), b.data(), b.stride_0()); + return TeamTrsvInternalLower::invoke(member, ArgDiag::use_unit_diag, A.extent(0), alpha, + A.data(), A.stride_0(), A.stride_1(), b.data(), + b.stride_0()); } }; @@ -70,30 +64,23 @@ struct TeamTrsv -struct TeamTrsv { +struct TeamTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const bViewType &b) { - return TeamTrsvInternalUpper::invoke( - member, ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), - A.stride_1(), A.stride_0(), b.data(), b.stride_0()); + return TeamTrsvInternalUpper::invoke(member, ArgDiag::use_unit_diag, A.extent(1), alpha, + A.data(), A.stride_1(), A.stride_0(), b.data(), + b.stride_0()); } }; template -struct TeamTrsv { +struct TeamTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const ScalarType alpha, const AViewType &A, const bViewType &b) { - return TeamTrsvInternalUpper::invoke( - ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), A.stride_1(), - A.stride_0(), b.data(), b.stride_0()); + return TeamTrsvInternalUpper::invoke(ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), + A.stride_1(), A.stride_0(), b.data(), b.stride_0()); } }; @@ -102,30 +89,24 @@ struct TeamTrsv -struct TeamTrsv { +struct TeamTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const bViewType &b) { - return TeamTrsvInternalUpper::invoke( - member, ArgDiag::use_unit_diag, A.extent(0), alpha, A.data(), - A.stride_0(), A.stride_1(), b.data(), b.stride_0()); + return TeamTrsvInternalUpper::invoke(member, ArgDiag::use_unit_diag, A.extent(0), alpha, + A.data(), A.stride_0(), A.stride_1(), b.data(), + b.stride_0()); } }; template -struct TeamTrsv { +struct TeamTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const bViewType &b) { - return TeamTrsvInternalUpper::invoke( - member, ArgDiag::use_unit_diag, A.extent(0), alpha, A.data(), - A.stride_0(), A.stride_1(), b.data(), b.stride_0()); + return TeamTrsvInternalUpper::invoke(member, ArgDiag::use_unit_diag, A.extent(0), alpha, + A.data(), A.stride_0(), A.stride_1(), b.data(), + b.stride_0()); } }; @@ -134,30 +115,24 @@ struct TeamTrsv -struct TeamTrsv { +struct TeamTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const bViewType &b) { - return TeamTrsvInternalLower::invoke( - member, ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), - A.stride_1(), A.stride_0(), b.data(), b.stride_0()); + return TeamTrsvInternalLower::invoke(member, ArgDiag::use_unit_diag, A.extent(1), alpha, + A.data(), A.stride_1(), A.stride_0(), b.data(), + b.stride_0()); } }; template -struct TeamTrsv { +struct TeamTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const bViewType &b) { - return TeamTrsvInternalLower::invoke( - member, ArgDiag::use_unit_diag, A.extent(1), alpha, A.data(), - A.stride_1(), A.stride_0(), b.data(), b.stride_0()); + return TeamTrsvInternalLower::invoke(member, ArgDiag::use_unit_diag, A.extent(1), alpha, + A.data(), A.stride_1(), A.stride_0(), b.data(), + b.stride_0()); } }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Team_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Team_Internal.hpp index 600a0c6e81df..ba3b2ff7b545 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Team_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trsv_Team_Internal.hpp @@ -38,12 +38,10 @@ namespace KokkosBatched { template struct TeamTrsvInternalLower { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType & /*member*/, const bool /*use_unit_diag*/, - const int /*m*/, const ScalarType /*alpha*/, - const ValueType *KOKKOS_RESTRICT /*A*/, const int /*as0*/, - const int /*as1*/, - /**/ ValueType *KOKKOS_RESTRICT /*b*/, const int /*bs0*/) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const bool /*use_unit_diag*/, const int /*m*/, + const ScalarType /*alpha*/, const ValueType *KOKKOS_RESTRICT /*A*/, + const int /*as0*/, const int /*as1*/, + /**/ ValueType *KOKKOS_RESTRICT /*b*/, const int /*bs0*/) { assert(false && "Error: encounter dummy impl"); return 0; } @@ -52,28 +50,23 @@ struct TeamTrsvInternalLower { template <> template KOKKOS_INLINE_FUNCTION int TeamTrsvInternalLower::invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, + const MemberType &member, const bool use_unit_diag, const int m, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT b, const int bs0) { const ScalarType one(1.0), zero(0.0); if (alpha == zero) KokkosBlas::Impl::TeamSetInternal::invoke(member, m, zero, b, bs0); else { - if (alpha != one) - KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, alpha, b, bs0); + if (alpha != one) KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, alpha, b, bs0); if (m <= 0) return 0; for (int p = 0; p < m; ++p) { const int iend = m - p - 1; - const ValueType *KOKKOS_RESTRICT a21 = - iend ? A + (p + 1) * as0 + p * as1 : NULL; + const ValueType *KOKKOS_RESTRICT a21 = iend ? A + (p + 1) * as0 + p * as1 : NULL; - ValueType *KOKKOS_RESTRICT beta1 = b + p * bs0, - *KOKKOS_RESTRICT b2 = - iend ? beta1 + bs0 : NULL; + ValueType *KOKKOS_RESTRICT beta1 = b + p * bs0, *KOKKOS_RESTRICT b2 = iend ? beta1 + bs0 : NULL; member.team_barrier(); ValueType local_beta1 = *beta1; @@ -85,9 +78,8 @@ KOKKOS_INLINE_FUNCTION int TeamTrsvInternalLower::invoke( if (member.team_rank() == 0) *beta1 = local_beta1; } /// member.team_barrier(); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, iend), - [&](const int &i) { b2[i * bs0] -= a21[i * as0] * local_beta1; }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, iend), + [&](const int &i) { b2[i * bs0] -= a21[i * as0] * local_beta1; }); } } return 0; @@ -96,9 +88,8 @@ KOKKOS_INLINE_FUNCTION int TeamTrsvInternalLower::invoke( template <> template KOKKOS_INLINE_FUNCTION int TeamTrsvInternalLower::invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, + const MemberType &member, const bool use_unit_diag, const int m, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT b, const int bs0) { const ScalarType one(1.0), zero(0.0), minus_one(-1.0); @@ -107,8 +98,7 @@ KOKKOS_INLINE_FUNCTION int TeamTrsvInternalLower::invoke( if (alpha == zero) KokkosBlas::Impl::TeamSetInternal::invoke(member, m, zero, b, bs0); else { - if (alpha != one) - KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, alpha, b, bs0); + if (alpha != one) KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, alpha, b, bs0); if (m <= 0) return 0; /// case GPU: team size is large and blocksize (mb,nb) is small @@ -122,7 +112,7 @@ KOKKOS_INLINE_FUNCTION int TeamTrsvInternalLower::invoke( // trsm update const ValueType *KOKKOS_RESTRICT Ap = A + p * as0 + p * as1; - /**/ ValueType *KOKKOS_RESTRICT bp = b + p * bs0; + /**/ ValueType *KOKKOS_RESTRICT bp = b + p * bs0; member.team_barrier(); if (member.team_rank() == 0) { @@ -134,9 +124,8 @@ KOKKOS_INLINE_FUNCTION int TeamTrsvInternalLower::invoke( // gemv update member.team_barrier(); - KokkosBlas::Impl::TeamGemvInternal::invoke( - member, m - p - pb, pb, minus_one, Ap + pb * as0, as0, as1, bp, 1, - one, bp + pb * bs0, bs0); + KokkosBlas::Impl::TeamGemvInternal::invoke(member, m - p - pb, pb, minus_one, Ap + pb * as0, + as0, as1, bp, 1, one, bp + pb * bs0, bs0); } } return 0; @@ -149,12 +138,10 @@ KOKKOS_INLINE_FUNCTION int TeamTrsvInternalLower::invoke( template struct TeamTrsvInternalUpper { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType & /*member*/, const bool /*use_unit_diag*/, - const int /*m*/, const ScalarType /*alpha*/, - const ValueType *KOKKOS_RESTRICT /*A*/, const int /*as0*/, - const int /*as1*/, - /**/ ValueType *KOKKOS_RESTRICT /*b*/, const int /*bs0*/) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const bool /*use_unit_diag*/, const int /*m*/, + const ScalarType /*alpha*/, const ValueType *KOKKOS_RESTRICT /*A*/, + const int /*as0*/, const int /*as1*/, + /**/ ValueType *KOKKOS_RESTRICT /*b*/, const int /*bs0*/) { assert(false && "Error: encounter dummy impl"); return 0; } @@ -163,25 +150,23 @@ struct TeamTrsvInternalUpper { template <> template KOKKOS_INLINE_FUNCTION int TeamTrsvInternalUpper::invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, + const MemberType &member, const bool use_unit_diag, const int m, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT b, const int bs0) { const ScalarType one(1.0), zero(0.0); if (alpha == zero) KokkosBlas::Impl::TeamSetInternal::invoke(member, m, zero, b, bs0); else { - if (alpha != one) - KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, alpha, b, bs0); + if (alpha != one) KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, alpha, b, bs0); if (m <= 0) return 0; ValueType *KOKKOS_RESTRICT b0 = b; for (int p = (m - 1); p >= 0; --p) { const int iend = p; - const ValueType *KOKKOS_RESTRICT a01 = A + p * as1; - /**/ ValueType *KOKKOS_RESTRICT beta1 = b + p * bs0; + const ValueType *KOKKOS_RESTRICT a01 = A + p * as1; + /**/ ValueType *KOKKOS_RESTRICT beta1 = b + p * bs0; member.team_barrier(); ValueType local_beta1 = *beta1; @@ -193,9 +178,8 @@ KOKKOS_INLINE_FUNCTION int TeamTrsvInternalUpper::invoke( if (member.team_rank() == 0) *beta1 = local_beta1; } // member.team_barrier(); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, iend), - [&](const int &i) { b0[i * bs0] -= a01[i * as0] * local_beta1; }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, iend), + [&](const int &i) { b0[i * bs0] -= a01[i * as0] * local_beta1; }); } } return 0; @@ -204,9 +188,8 @@ KOKKOS_INLINE_FUNCTION int TeamTrsvInternalUpper::invoke( template <> template KOKKOS_INLINE_FUNCTION int TeamTrsvInternalUpper::invoke( - const MemberType &member, const bool use_unit_diag, const int m, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, + const MemberType &member, const bool use_unit_diag, const int m, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, /**/ ValueType *KOKKOS_RESTRICT b, const int bs0) { const ScalarType one(1.0), zero(0.0), minus_one(-1.0); @@ -216,8 +199,7 @@ KOKKOS_INLINE_FUNCTION int TeamTrsvInternalUpper::invoke( if (alpha == zero) KokkosBlas::Impl::TeamSetInternal::invoke(member, m, zero, b, bs0); else { - if (alpha != one) - KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, alpha, b, bs0); + if (alpha != one) KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, alpha, b, bs0); if (m <= 0) return 0; InnerTrsmLeftUpperUnitDiag trsm_u(as0, as1, bs0, 0); @@ -225,12 +207,11 @@ KOKKOS_INLINE_FUNCTION int TeamTrsvInternalUpper::invoke( const int mb = mbAlgo; for (int pp = 0; pp < m; pp += mb) { - const int ptmp = (m - pp - mb), p = (ptmp < 0 ? 0 : ptmp), - pb = (mb + (ptmp < 0) * ptmp); + const int ptmp = (m - pp - mb), p = (ptmp < 0 ? 0 : ptmp), pb = (mb + (ptmp < 0) * ptmp); // trsm update const ValueType *KOKKOS_RESTRICT Ap = A + p * as0 + p * as1; - /**/ ValueType *KOKKOS_RESTRICT bp = b + p * bs0; + /**/ ValueType *KOKKOS_RESTRICT bp = b + p * bs0; member.team_barrier(); if (member.team_rank() == 0) { @@ -242,8 +223,8 @@ KOKKOS_INLINE_FUNCTION int TeamTrsvInternalUpper::invoke( // gemv update member.team_barrier(); - KokkosBlas::Impl::TeamGemvInternal::invoke( - member, p, pb, minus_one, Ap - p * as0, as0, as1, bp, 1, one, b, bs0); + KokkosBlas::Impl::TeamGemvInternal::invoke(member, p, pb, minus_one, Ap - p * as0, as0, + as1, bp, 1, one, b, bs0); } } return 0; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trtri_Serial_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trtri_Serial_Impl.hpp index 66c8f91ac9b6..1068bf9e544e 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trtri_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trtri_Serial_Impl.hpp @@ -25,18 +25,16 @@ template struct SerialTrtri { template KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A) { - return SerialTrtriInternalLower::invoke( - ArgDiag::use_unit_diag, A.extent(0), A.extent(1), A.data(), - A.stride_0(), A.stride_1()); + return SerialTrtriInternalLower::invoke(ArgDiag::use_unit_diag, A.extent(0), A.extent(1), + A.data(), A.stride_0(), A.stride_1()); } }; template struct SerialTrtri { template KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A) { - return SerialTrtriInternalUpper::invoke( - ArgDiag::use_unit_diag, A.extent(0), A.extent(1), A.data(), A.stride(0), - A.stride(1)); + return SerialTrtriInternalUpper::invoke(ArgDiag::use_unit_diag, A.extent(0), A.extent(1), + A.data(), A.stride(0), A.stride(1)); } }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trtri_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trtri_Serial_Internal.hpp index 2941b03ccfc2..f6b0b4bf6d8b 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trtri_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Trtri_Serial_Internal.hpp @@ -25,27 +25,23 @@ namespace KokkosBatched { template struct SerialTrtriInternalLower { template - KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, - const int am, const int an, - ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1); + KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, const int am, const int an, + ValueType *KOKKOS_RESTRICT A, const int as0, const int as1); }; template struct SerialTrtriInternalUpper { template - KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, - const int am, const int an, - ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1); + KOKKOS_INLINE_FUNCTION static int invoke(const bool use_unit_diag, const int am, const int an, + ValueType *KOKKOS_RESTRICT A, const int as0, const int as1); }; template <> template -KOKKOS_INLINE_FUNCTION int -SerialTrtriInternalLower::invoke( - const bool use_unit_diag, const int am, const int /*an*/, - ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { +KOKKOS_INLINE_FUNCTION int SerialTrtriInternalLower::invoke(const bool use_unit_diag, + const int am, const int /*an*/, + ValueType *KOKKOS_RESTRICT A, + const int as0, const int as1) { ValueType one(1.0), zero(0.0), A_ii; if (!use_unit_diag) { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) @@ -74,14 +70,13 @@ SerialTrtriInternalLower::invoke( int A_col_vec_m = am - i - 1, A_col_vec_n = 1; // TRMV/TRMM −− x=Ax // A((j+1):n,j) = A((j+1):n,(j+1):n) ∗ A((j+1):n,j) ; - SerialTrmmInternalLeftLower::invoke( - use_unit_diag, false, A_subblock_m, A_subblock_n, A_col_vec_m, - A_col_vec_n, one, A_subblock, as0, as1, A_col_vec, as0, as1); + SerialTrmmInternalLeftLower::invoke(use_unit_diag, false, A_subblock_m, A_subblock_n, + A_col_vec_m, A_col_vec_n, one, A_subblock, as0, as1, + A_col_vec, as0, as1); // SCAL -- x=ax // A((j+1):n,j) = A_ii * A((j+1):n,j) - KokkosBlas::Impl::SerialScaleInternal::invoke(A_col_vec_m, A_col_vec_n, - A_ii, A_col_vec, as0, as1); + KokkosBlas::Impl::SerialScaleInternal::invoke(A_col_vec_m, A_col_vec_n, A_ii, A_col_vec, as0, as1); } } return 0; @@ -89,10 +84,10 @@ SerialTrtriInternalLower::invoke( template <> template -KOKKOS_INLINE_FUNCTION int -SerialTrtriInternalUpper::invoke( - const bool use_unit_diag, const int am, const int /*an*/, - ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { +KOKKOS_INLINE_FUNCTION int SerialTrtriInternalUpper::invoke(const bool use_unit_diag, + const int am, const int /*an*/, + ValueType *KOKKOS_RESTRICT A, + const int as0, const int as1) { ValueType one(1.0), zero(0.0), A_ii; if (!use_unit_diag) { @@ -123,14 +118,13 @@ SerialTrtriInternalUpper::invoke( // TRMV/TRMM −− x=Ax // A(1:(j-1),j) = A(1:(j-1),1:(j-1)) ∗ A(1:(j-1),j) ; // SerialTrmm - SerialTrmmInternalLeftUpper::invoke( - use_unit_diag, false, A_subblock_m, A_subblock_n, A_col_vec_m, - A_col_vec_n, one, A_subblock, as0, as1, A_col_vec, as0, as1); + SerialTrmmInternalLeftUpper::invoke(use_unit_diag, false, A_subblock_m, A_subblock_n, + A_col_vec_m, A_col_vec_n, one, A_subblock, as0, as1, + A_col_vec, as0, as1); // SCAL -- x=ax // A((j+1):n,j) = A_ii * A((j+1):n,j) - KokkosBlas::Impl::SerialScaleInternal::invoke(A_col_vec_m, A_col_vec_n, - A_ii, A_col_vec, as0, as1); + KokkosBlas::Impl::SerialScaleInternal::invoke(A_col_vec_m, A_col_vec_n, A_ii, A_col_vec, as0, as1); } } return 0; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_UTV_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_UTV_TeamVector_Impl.hpp index b57a145ccb7b..de5ecebf94da 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_UTV_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_UTV_TeamVector_Impl.hpp @@ -29,16 +29,13 @@ namespace KokkosBatched { template struct TeamVectorUTV { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const AViewType &A, const pViewType &p, - const UViewType &U, const VViewType &V, const wViewType &w, - int &matrix_rank) { - return TeamVectorUTV_Internal::invoke( - member, A.extent(0), A.extent(1), A.data(), A.stride(0), A.stride(1), - p.data(), p.stride(0), U.data(), U.stride(0), U.stride(1), V.data(), - V.stride(0), V.stride(1), w.data(), matrix_rank); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const pViewType &p, + const UViewType &U, const VViewType &V, const wViewType &w, + int &matrix_rank) { + return TeamVectorUTV_Internal::invoke(member, A.extent(0), A.extent(1), A.data(), A.stride(0), A.stride(1), + p.data(), p.stride(0), U.data(), U.stride(0), U.stride(1), V.data(), + V.stride(0), V.stride(1), w.data(), matrix_rank); } }; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_UTV_TeamVector_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_UTV_TeamVector_Internal.hpp index 106646741453..e39dba9a40dc 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_UTV_TeamVector_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_UTV_TeamVector_Internal.hpp @@ -32,15 +32,14 @@ namespace KokkosBatched { /// =================== struct TeamVectorUTV_Internal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int m, - const int n, // m = NumRows(A), n = NumCols(A) - /* */ ValueType *A, const int as0, const int as1, - /* */ IntType *p, const int ps0, - /* */ ValueType *U, const int us0, const int us1, - /* */ ValueType *V, const int vs0, const int vs1, - /* */ ValueType *w, // 3*m, tau, norm, householder workspace - /* */ int &matrix_rank) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, + const int n, // m = NumRows(A), n = NumCols(A) + /* */ ValueType *A, const int as0, const int as1, + /* */ IntType *p, const int ps0, + /* */ ValueType *U, const int us0, const int us1, + /* */ ValueType *V, const int vs0, const int vs1, + /* */ ValueType *w, // 3*m, tau, norm, householder workspace + /* */ int &matrix_rank) { typedef ValueType value_type; // typedef IntType int_type; @@ -51,25 +50,19 @@ struct TeamVectorUTV_Internal { value_type *work = w; matrix_rank = -1; - TeamVectorQR_WithColumnPivotingInternal ::invoke( - member, m, n, A, as0, as1, t, ts0, p, ps0, work, matrix_rank); + TeamVectorQR_WithColumnPivotingInternal ::invoke(member, m, n, A, as0, as1, t, ts0, p, ps0, work, matrix_rank); - TeamVectorQR_FormQ_Internal ::invoke(member, m, matrix_rank, matrix_rank, A, - as0, as1, t, ts0, U, us0, us1, work); + TeamVectorQR_FormQ_Internal ::invoke(member, m, matrix_rank, matrix_rank, A, as0, as1, t, ts0, U, us0, us1, work); member.team_barrier(); /// for rank deficient matrix if (matrix_rank < n) { const value_type zero(0); - TeamVectorSetLowerTriangularInternal ::invoke( - member, matrix_rank, matrix_rank, 1, zero, A, as0, as1); + TeamVectorSetLowerTriangularInternal ::invoke(member, matrix_rank, matrix_rank, 1, zero, A, as0, as1); - TeamVectorQR_Internal ::invoke(member, n, matrix_rank, A, as1, as0, t, - ts0, work); + TeamVectorQR_Internal ::invoke(member, n, matrix_rank, A, as1, as0, t, ts0, work); - TeamVectorQR_FormQ_Internal ::invoke(member, n, matrix_rank, matrix_rank, - A, as1, as0, t, ts0, V, vs1, vs0, - work); + TeamVectorQR_FormQ_Internal ::invoke(member, n, matrix_rank, matrix_rank, A, as1, as0, t, ts0, V, vs1, vs0, work); } return 0; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_UpdateGivens_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_UpdateGivens_Internal.hpp index 54e2791dbb6d..3f56e71422b4 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_UpdateGivens_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_UpdateGivens_Internal.hpp @@ -30,9 +30,8 @@ namespace KokkosBatched { /// struct SerialUpdateGivensInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const Kokkos::pair &S, - /* */ Kokkos::pair &G) { + KOKKOS_INLINE_FUNCTION static int invoke(const Kokkos::pair &S, + /* */ Kokkos::pair &G) { const ValueType tmp = S.first * G.first - S.second * G.second; G.second = S.first * G.second + S.second * G.first; G.first = tmp; diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Arith.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Arith.hpp index f87492ea5af3..08628729bc3d 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Arith.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Arith.hpp @@ -24,23 +24,21 @@ namespace KokkosBatched { #define KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) Vector, l> -#define KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(T, l) \ - Vector, l> & +#define KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(T, l) Vector, l> & /// simd, simd #if defined(__KOKKOSBATCHED_ENABLE_AVX__) #if defined(__AVX512F__) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 8) operator+( - const Vector, 8> &a, const Vector, 8> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 8) operator+(const Vector, 8> &a, + const Vector, 8> &b) { return _mm512_add_pd(a, b); } #if !defined(KOKKOS_COMPILER_GNU) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 4) -operator+(const Vector >, 4> &a, - const Vector >, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 4) operator+( + const Vector >, 4> &a, const Vector >, 4> &b) { return _mm512_add_pd(a, b); } #endif @@ -48,16 +46,15 @@ operator+(const Vector >, 4> &a, #endif #if defined(__AVX__) || defined(__AVX2__) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator+( - const Vector, 4> &a, const Vector, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator+(const Vector, 4> &a, + const Vector, 4> &b) { return _mm256_add_pd(a, b); } #if !defined(KOKKOS_COMPILER_GNU) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 2) -operator+(const Vector >, 2> &a, - const Vector >, 2> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 2) operator+( + const Vector >, 2> &a, const Vector >, 2> &b) { return _mm256_add_pd(a, b); } #endif @@ -66,8 +63,8 @@ operator+(const Vector >, 2> &a, #endif template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator+(const Vector, l> &a, const Vector, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator+(const Vector, l> &a, + const Vector, l> &b) { Vector, l> r_val; if (std::is_fundamental::value) { KOKKOSKERNELS_FORCE_SIMD @@ -80,24 +77,24 @@ operator+(const Vector, l> &a, const Vector, l> &b) { #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 2) operator+( - const Vector, 2> &a, const Vector, 2> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 2) operator+(const Vector, 2> &a, + const Vector, 2> &b) { float2 r_val; r_val.x = a.float2().x + b.float2().x; r_val.y = a.float2().y + b.float2().y; return r_val; } KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 2) operator+( - const Vector, 2> &a, const Vector, 2> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 2) operator+(const Vector, 2> &a, + const Vector, 2> &b) { double2 r_val; r_val.x = a.double2().x + b.double2().x; r_val.y = a.double2().y + b.double2().y; return r_val; } KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 4) operator+( - const Vector, 4> &a, const Vector, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 4) operator+(const Vector, 4> &a, + const Vector, 4> &b) { float4 r_val; r_val.x = a.float4().x + b.float4().x; r_val.y = a.float4().y + b.float4().y; @@ -106,8 +103,8 @@ static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 4) operator+( return r_val; } KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator+( - const Vector, 4> &a, const Vector, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator+(const Vector, 4> &a, + const Vector, 4> &b) { double4 r_val; r_val.x = a.double4().x + b.double4().x; r_val.y = a.double4().y + b.double4().y; @@ -119,9 +116,8 @@ static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator+( #endif template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - T, l) -operator+=(Vector, l> &a, const Vector, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(T, l) operator+=( + Vector, l> &a, const Vector, l> &b) { a = a + b; return a; } @@ -129,37 +125,34 @@ operator+=(Vector, l> &a, const Vector, l> &b) { /// simd, real template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator+(const Vector, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator+(const Vector, l> &a, + const T b) { return a + Vector, l>(b); } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator+(const T a, const Vector, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator+(const T a, + const Vector, l> &b) { return Vector, l>(a) + b; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - T, l) -operator+=(Vector, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(T, l) operator+=( + Vector, l> &a, const T b) { a = a + b; return a; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator++(Vector, l> &a, int) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator++(Vector, l> &a, int) { Vector, l> a0 = a; a = a + typename Kokkos::ArithTraits::mag_type(1); return a0; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - T, l) -operator++(Vector, l> &a) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(T, l) operator++( + Vector, l> &a) { a = a + typename Kokkos::ArithTraits::mag_type(1); return a; } @@ -167,23 +160,20 @@ operator++(Vector, l> &a) { /// simd complex, real template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator+(const Vector >, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator+( + const Vector >, l> &a, const T b) { return a + Vector >, l>(b); } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator+(const T a, const Vector >, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator+( + const T a, const Vector >, l> &b) { return Vector >, l>(a) + b; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - Kokkos::complex, l) -operator+=(Vector >, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(Kokkos::complex, l) operator+=( + Vector >, l> &a, const T b) { a = a + b; return a; } @@ -191,26 +181,20 @@ operator+=(Vector >, l> &a, const T b) { /// simd complex, complex template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator+(const Vector >, l> &a, - const Kokkos::complex b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator+( + const Vector >, l> &a, const Kokkos::complex b) { return a + Vector >, l>(b); } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator+(const Kokkos::complex a, - const Vector >, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator+( + const Kokkos::complex a, const Vector >, l> &b) { return Vector >, l>(a) + b; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - Kokkos::complex, l) -operator+=(Vector >, l> &a, - const Kokkos::complex b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(Kokkos::complex, l) operator+=( + Vector >, l> &a, const Kokkos::complex b) { a = a + b; return a; } @@ -222,16 +206,15 @@ operator+=(Vector >, l> &a, #if defined(__KOKKOSBATCHED_ENABLE_AVX__) #if defined(__AVX512F__) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 8) operator-( - const Vector, 8> &a, const Vector, 8> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 8) operator-(const Vector, 8> &a, + const Vector, 8> &b) { return _mm512_sub_pd(a, b); } #if !defined(KOKKOS_COMPILER_GNU) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 4) -operator-(const Vector >, 4> &a, - const Vector >, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 4) operator-( + const Vector >, 4> &a, const Vector >, 4> &b) { return _mm512_sub_pd(a, b); } #endif @@ -239,16 +222,15 @@ operator-(const Vector >, 4> &a, #endif #if defined(__AVX__) || defined(__AVX2__) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator-( - const Vector, 4> &a, const Vector, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator-(const Vector, 4> &a, + const Vector, 4> &b) { return _mm256_sub_pd(a, b); } #if !defined(KOKKOS_COMPILER_GNU) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 2) -operator-(const Vector >, 2> &a, - const Vector >, 2> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 2) operator-( + const Vector >, 2> &a, const Vector >, 2> &b) { return _mm256_sub_pd(a, b); } #endif @@ -257,8 +239,8 @@ operator-(const Vector >, 2> &a, #endif template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator-(const Vector, l> &a, const Vector, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator-(const Vector, l> &a, + const Vector, l> &b) { Vector, l> r_val; if (std::is_fundamental::value) { KOKKOSKERNELS_FORCE_SIMD @@ -271,24 +253,24 @@ operator-(const Vector, l> &a, const Vector, l> &b) { #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 2) operator-( - const Vector, 2> &a, const Vector, 2> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 2) operator-(const Vector, 2> &a, + const Vector, 2> &b) { float2 r_val; r_val.x = a.float2().x - b.float2().x; r_val.y = a.float2().y - b.float2().y; return r_val; } KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 2) operator-( - const Vector, 2> &a, const Vector, 2> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 2) operator-(const Vector, 2> &a, + const Vector, 2> &b) { double2 r_val; r_val.x = a.double2().x - b.double2().x; r_val.y = a.double2().y - b.double2().y; return r_val; } KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 4) operator-( - const Vector, 4> &a, const Vector, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 4) operator-(const Vector, 4> &a, + const Vector, 4> &b) { float4 r_val; r_val.x = a.float4().x - b.float4().x; r_val.y = a.float4().y - b.float4().y; @@ -297,8 +279,8 @@ static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 4) operator-( return r_val; } KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator-( - const Vector, 4> &a, const Vector, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator-(const Vector, 4> &a, + const Vector, 4> &b) { double4 r_val; r_val.x = a.double4().x - b.double4().x; r_val.y = a.double4().y - b.double4().y; @@ -309,8 +291,7 @@ static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator-( #endif template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator-(const Vector, l> &a) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator-(const Vector, l> &a) { Vector, l> r_val; if (std::is_fundamental::value) { KOKKOSKERNELS_FORCE_SIMD @@ -322,9 +303,8 @@ operator-(const Vector, l> &a) { } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - T, l) -operator-=(Vector, l> &a, const Vector, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(T, l) operator-=( + Vector, l> &a, const Vector, l> &b) { a = a - b; return a; } @@ -332,37 +312,34 @@ operator-=(Vector, l> &a, const Vector, l> &b) { /// simd, real template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator-(const Vector, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator-(const Vector, l> &a, + const T b) { return a - Vector, l>(b); } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator-(const T a, const Vector, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator-(const T a, + const Vector, l> &b) { return Vector, l>(a) - b; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - T, l) -operator-=(Vector, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(T, l) operator-=( + Vector, l> &a, const T b) { a = a - b; return a; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator--(Vector, l> &a, int) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator--(Vector, l> &a, int) { Vector, l> a0 = a; a = a - typename Kokkos::ArithTraits::mag_type(1); return a0; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - T, l) -operator--(Vector, l> &a) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(T, l) operator--( + Vector, l> &a) { a = a - typename Kokkos::ArithTraits::mag_type(1); return a; } @@ -370,23 +347,20 @@ operator--(Vector, l> &a) { /// simd complex, real template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator-(const Vector >, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator-( + const Vector >, l> &a, const T b) { return a - Vector >, l>(b); } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator-(const T a, const Vector >, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator-( + const T a, const Vector >, l> &b) { return Vector >, l>(a) - b; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - Kokkos::complex, l) -operator-=(Vector >, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(Kokkos::complex, l) operator-=( + Vector >, l> &a, const T b) { a = a - b; return a; } @@ -394,26 +368,20 @@ operator-=(Vector >, l> &a, const T b) { /// simd complex, complex template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator-(const Vector >, l> &a, - const Kokkos::complex b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator-( + const Vector >, l> &a, const Kokkos::complex b) { return a - Vector >, l>(b); } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator-(const Kokkos::complex a, - const Vector >, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator-( + const Kokkos::complex a, const Vector >, l> &b) { return Vector >, l>(a) - b; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - Kokkos::complex, l) -operator-=(Vector >, l> &a, - const Kokkos::complex b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(Kokkos::complex, l) operator-=( + Vector >, l> &a, const Kokkos::complex b) { a = a - b; return a; } @@ -425,30 +393,25 @@ operator-=(Vector >, l> &a, #if defined(__KOKKOSBATCHED_ENABLE_AVX__) #if defined(__AVX512F__) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 8) operator*( - const Vector, 8> &a, const Vector, 8> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 8) operator*(const Vector, 8> &a, + const Vector, 8> &b) { return _mm512_mul_pd(a, b); } #if !defined(KOKKOS_COMPILER_GNU) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 4) operator - *(const Vector >, 4> &a, - const Vector >, 4> &b) { - const __m512d as = _mm512_permute_pd(a, 0x55), - br = _mm512_permute_pd(b, 0x00), - bi = _mm512_permute_pd(b, 0xff); +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 4) operator*( + const Vector >, 4> &a, const Vector >, 4> &b) { + const __m512d as = _mm512_permute_pd(a, 0x55), br = _mm512_permute_pd(b, 0x00), bi = _mm512_permute_pd(b, 0xff); #if defined(__FMA__) // latency 7, throughput 0.5 return _mm512_fmaddsub_pd(a, br, _mm512_mul_pd(as, bi)); #else - return _mm512_add_pd( - _mm512_mul_pd(a, br), - _mm512_castsi512_pd(_mm512_xor_si512( - _mm512_castpd_si512(_mm512_mul_pd(as, bi)), - _mm512_castpd_si512(_mm512_mask_broadcast_f64x4( - _mm512_setzero_pd(), 0x55, _mm256_set1_pd(-0.0)))))); + return _mm512_add_pd(_mm512_mul_pd(a, br), + _mm512_castsi512_pd(_mm512_xor_si512(_mm512_castpd_si512(_mm512_mul_pd(as, bi)), + _mm512_castpd_si512(_mm512_mask_broadcast_f64x4( + _mm512_setzero_pd(), 0x55, _mm256_set1_pd(-0.0)))))); // const __mm512d cc = _mm512_mul_pd(as, bi); // return _mm512_mask_sub_pd(_mm512_mask_add_pd(_mm512_mul_pd(a, br), 0x55, // cc), 0xaa, cc); @@ -459,25 +422,21 @@ static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 4) operator #endif #if defined(__AVX__) || defined(__AVX2__) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator*( - const Vector, 4> &a, const Vector, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator*(const Vector, 4> &a, + const Vector, 4> &b) { return _mm256_mul_pd(a, b); } #if !defined(KOKKOS_COMPILER_GNU) KOKKOS_FORCEINLINE_FUNCTION -static Vector >, 2> operator*( - const Vector >, 2> &a, - const Vector >, 2> &b) { - const __m256d as = _mm256_permute_pd(a, 0x5), br = _mm256_permute_pd(b, 0x0), - bi = _mm256_permute_pd(b, 0xf); +static Vector >, 2> operator*(const Vector >, 2> &a, + const Vector >, 2> &b) { + const __m256d as = _mm256_permute_pd(a, 0x5), br = _mm256_permute_pd(b, 0x0), bi = _mm256_permute_pd(b, 0xf); #if defined(__FMA__) return _mm256_fmaddsub_pd(a, br, _mm256_mul_pd(as, bi)); #else - return _mm256_add_pd(_mm256_mul_pd(a, br), - _mm256_xor_pd(_mm256_mul_pd(as, bi), - _mm256_set_pd(0.0, -0.0, 0.0, -0.0))); + return _mm256_add_pd(_mm256_mul_pd(a, br), _mm256_xor_pd(_mm256_mul_pd(as, bi), _mm256_set_pd(0.0, -0.0, 0.0, -0.0))); #endif } #endif @@ -486,8 +445,8 @@ static Vector >, 2> operator*( #endif template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator*(const Vector, l> &a, const Vector, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator*(const Vector, l> &a, + const Vector, l> &b) { Vector, l> r_val; if (std::is_fundamental::value) { KOKKOSKERNELS_FORCE_SIMD @@ -500,24 +459,24 @@ operator*(const Vector, l> &a, const Vector, l> &b) { #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 2) operator*( - const Vector, 2> &a, const Vector, 2> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 2) operator*(const Vector, 2> &a, + const Vector, 2> &b) { float2 r_val; r_val.x = a.float2().x * b.float2().x; r_val.y = a.float2().y * b.float2().y; return r_val; } KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 2) operator*( - const Vector, 2> &a, const Vector, 2> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 2) operator*(const Vector, 2> &a, + const Vector, 2> &b) { double2 r_val; r_val.x = a.double2().x * b.double2().x; r_val.y = a.double2().y * b.double2().y; return r_val; } KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 4) operator*( - const Vector, 4> &a, const Vector, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 4) operator*(const Vector, 4> &a, + const Vector, 4> &b) { float4 r_val; r_val.x = a.float4().x * b.float4().x; r_val.y = a.float4().y * b.float4().y; @@ -526,8 +485,8 @@ static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 4) operator*( return r_val; } KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator*( - const Vector, 4> &a, const Vector, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator*(const Vector, 4> &a, + const Vector, 4> &b) { double4 r_val; r_val.x = a.double4().x * b.double4().x; r_val.y = a.double4().y * b.double4().y; @@ -538,9 +497,8 @@ static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator*( #endif template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - T, l) -operator*=(Vector, l> &a, const Vector, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(T, l) operator*=( + Vector, l> &a, const Vector, l> &b) { a = a * b; return a; } @@ -548,21 +506,20 @@ operator*=(Vector, l> &a, const Vector, l> &b) { /// simd, real template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator*(const Vector, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator*(const Vector, l> &a, + const T b) { return a * Vector, l>(b); } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator*(const T a, const Vector, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator*(const T a, + const Vector, l> &b) { return Vector, l>(a) * b; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - T, l) -operator*=(Vector, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(T, l) operator*=( + Vector, l> &a, const T b) { a = a * b; return a; } @@ -585,8 +542,8 @@ operator*(const Vector >, 4> &a, const double b) { #if !defined(KOKKOS_COMPILER_GNU) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 2) operator - *(const Vector >, 2> &a, const double b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 2) operator*( + const Vector >, 2> &a, const double b) { return _mm256_mul_pd(a, _mm256_set1_pd(b)); } #endif @@ -595,9 +552,8 @@ static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 2) operator #endif template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator*(const Vector >, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator*( + const Vector >, l> &a, const T b) { return a * Vector >, l>(b); } @@ -617,8 +573,8 @@ operator*(const double a, const Vector >, 4> &b) { #if !defined(KOKKOS_COMPILER_GNU) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 2) operator - *(const double a, const Vector >, 2> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 2) operator*( + const double a, const Vector >, 2> &b) { return _mm256_mul_pd(_mm256_set1_pd(a), b); } #endif @@ -627,16 +583,14 @@ static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 2) operator #endif template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator*(const T a, const Vector >, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator*( + const T a, const Vector >, l> &b) { return Vector >, l>(a) * b; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - Kokkos::complex, l) -operator*=(Vector >, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(Kokkos::complex, l) operator*=( + Vector >, l> &a, const T b) { a = a * b; return a; } @@ -644,26 +598,20 @@ operator*=(Vector >, l> &a, const T b) { /// simd complex, complex template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator*(const Vector >, l> &a, - const Kokkos::complex b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator*( + const Vector >, l> &a, const Kokkos::complex b) { return a * Vector >, l>(b); } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator*(const Kokkos::complex a, - const Vector >, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator*( + const Kokkos::complex a, const Vector >, l> &b) { return Vector >, l>(a) * b; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - Kokkos::complex, l) -operator*=(Vector >, l> &a, - const Kokkos::complex b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(Kokkos::complex, l) operator*=( + Vector >, l> &a, const Kokkos::complex b) { a = a * b; return a; } @@ -675,36 +623,30 @@ operator*=(Vector >, l> &a, #if defined(__KOKKOSBATCHED_ENABLE_AVX__) #if defined(__AVX512F__) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 8) operator/( - const Vector, 8> &a, const Vector, 8> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 8) operator/(const Vector, 8> &a, + const Vector, 8> &b) { return _mm512_div_pd(a, b); } #if !defined(KOKKOS_COMPILER_GNU) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 4) -operator/(const Vector >, 4> &a, - const Vector >, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 4) operator/( + const Vector >, 4> &a, const Vector >, 4> &b) { const __m512d as = _mm512_permute_pd(a, 0x55), cb = _mm512_castsi512_pd(_mm512_xor_si512( _mm512_castpd_si512(b), - _mm512_castpd_si512(_mm512_mask_broadcast_f64x4( - _mm512_setzero_pd(), 0xAA, _mm256_set1_pd(-0.0))))), - br = _mm512_permute_pd(cb, 0x00), - bi = _mm512_permute_pd(cb, 0xff); + _mm512_castpd_si512(_mm512_mask_broadcast_f64x4(_mm512_setzero_pd(), 0xAA, _mm256_set1_pd(-0.0))))), + br = _mm512_permute_pd(cb, 0x00), bi = _mm512_permute_pd(cb, 0xff); #if defined(__FMA__) return _mm512_div_pd(_mm512_fmaddsub_pd(a, br, _mm512_mul_pd(as, bi)), _mm512_fmadd_pd(br, br, _mm512_mul_pd(bi, bi))); #else - return _mm512_div_pd( - _mm512_add_pd( - _mm512_mul_pd(a, br), - _mm512_castsi512_pd(_mm512_xor_si512( - _mm512_castpd_si512(_mm512_mul_pd(as, bi)), - _mm512_castpd_si512(_mm512_mask_broadcast_f64x4( - _mm512_setzero_pd(), 0xAA, _mm256_set1_pd(-0.0)))))), - _mm512_add_pd(_mm512_mul_pd(br, br), _mm512_mul_pd(bi, bi))); + return _mm512_div_pd(_mm512_add_pd(_mm512_mul_pd(a, br), _mm512_castsi512_pd(_mm512_xor_si512( + _mm512_castpd_si512(_mm512_mul_pd(as, bi)), + _mm512_castpd_si512(_mm512_mask_broadcast_f64x4( + _mm512_setzero_pd(), 0xAA, _mm256_set1_pd(-0.0)))))), + _mm512_add_pd(_mm512_mul_pd(br, br), _mm512_mul_pd(bi, bi))); // const __mm512d cc = _mm512_mul_pd(as, bi); // return _mm512_div_pd(_mm512_mask_sub_pd(_mm512_mask_add_pd(_mm512_mul_pd(a, // br), 0x55, cc), 0xaa, cc), @@ -718,30 +660,24 @@ operator/(const Vector >, 4> &a, #if defined(__AVX__) || defined(__AVX2__) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator/( - const Vector, 4> &a, const Vector, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator/(const Vector, 4> &a, + const Vector, 4> &b) { return _mm256_div_pd(a, b); } #if !defined(KOKKOS_COMPILER_GNU) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 2) -operator/(Vector >, 2> const &a, - Vector >, 2> const &b) { - const __m256d as = _mm256_permute_pd(a, 0x5), - cb = _mm256_xor_pd(b, _mm256_set_pd(-0.0, 0.0, -0.0, 0.0)), - br = _mm256_permute_pd(cb, 0x0), - bi = _mm256_permute_pd(cb, 0xf); +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 2) operator/( + Vector >, 2> const &a, Vector >, 2> const &b) { + const __m256d as = _mm256_permute_pd(a, 0x5), cb = _mm256_xor_pd(b, _mm256_set_pd(-0.0, 0.0, -0.0, 0.0)), + br = _mm256_permute_pd(cb, 0x0), bi = _mm256_permute_pd(cb, 0xf); #if defined(__FMA__) - return _mm256_div_pd( - _mm256_fmaddsub_pd(a, br, _mm256_mul_pd(as, bi)), - _mm256_add_pd(_mm256_mul_pd(br, br), _mm256_mul_pd(bi, bi))); + return _mm256_div_pd(_mm256_fmaddsub_pd(a, br, _mm256_mul_pd(as, bi)), + _mm256_add_pd(_mm256_mul_pd(br, br), _mm256_mul_pd(bi, bi))); #else return _mm256_div_pd( - _mm256_add_pd(_mm256_mul_pd(a, br), - _mm256_xor_pd(_mm256_mul_pd(as, bi), - _mm256_set_pd(0.0, -0.0, 0.0, -0.0))), + _mm256_add_pd(_mm256_mul_pd(a, br), _mm256_xor_pd(_mm256_mul_pd(as, bi), _mm256_set_pd(0.0, -0.0, 0.0, -0.0))), _mm256_add_pd(_mm256_mul_pd(br, br), _mm256_mul_pd(bi, bi))); #endif } @@ -751,8 +687,8 @@ operator/(Vector >, 2> const &a, #endif template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator/(const Vector, l> &a, const Vector, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator/(const Vector, l> &a, + const Vector, l> &b) { Vector, l> r_val; if (std::is_fundamental::value) { KOKKOSKERNELS_FORCE_SIMD @@ -765,24 +701,24 @@ operator/(const Vector, l> &a, const Vector, l> &b) { #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 2) operator/( - const Vector, 2> &a, const Vector, 2> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 2) operator/(const Vector, 2> &a, + const Vector, 2> &b) { float2 r_val; r_val.x = a.float2().x / b.float2().x; r_val.y = a.float2().y / b.float2().y; return r_val; } KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 2) operator/( - const Vector, 2> &a, const Vector, 2> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 2) operator/(const Vector, 2> &a, + const Vector, 2> &b) { double2 r_val; r_val.x = a.double2().x / b.double2().x; r_val.y = a.double2().y / b.double2().y; return r_val; } KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 4) operator/( - const Vector, 4> &a, const Vector, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 4) operator/(const Vector, 4> &a, + const Vector, 4> &b) { float4 r_val; r_val.x = a.float4().x / b.float4().x; r_val.y = a.float4().y / b.float4().y; @@ -791,8 +727,8 @@ static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(float, 4) operator/( return r_val; } KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator/( - const Vector, 4> &a, const Vector, 4> &b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator/(const Vector, 4> &a, + const Vector, 4> &b) { double4 r_val; r_val.x = a.double4().x / b.double4().x; r_val.y = a.double4().y / b.double4().y; @@ -803,9 +739,8 @@ static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(double, 4) operator/( #endif template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - T, l) -operator/=(Vector, l> &a, const Vector, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(T, l) operator/=( + Vector, l> &a, const Vector, l> &b) { a = a / b; return a; } @@ -816,8 +751,8 @@ operator/=(Vector, l> &a, const Vector, l> &b) { #if !defined(KOKKOS_COMPILER_GNU) KOKKOS_FORCEINLINE_FUNCTION -static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 4) -operator/(const Vector >, 4> &a, const double b) { +static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, 4) operator/( + const Vector >, 4> &a, const double b) { return _mm512_div_pd(a, _mm512_set1_pd(b)); } #endif @@ -826,21 +761,20 @@ operator/(const Vector >, 4> &a, const double b) { #endif template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator/(const Vector, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator/(const Vector, l> &a, + const T b) { return a / Vector, l>(b); } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) -operator/(const T a, const Vector, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(T, l) operator/(const T a, + const Vector, l> &b) { return Vector, l>(a) / b; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - T, l) -operator/=(Vector, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(T, l) operator/=( + Vector, l> &a, const T b) { a = a / b; return a; } @@ -848,23 +782,20 @@ operator/=(Vector, l> &a, const T b) { /// simd complex, real template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator/(const Vector >, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator/( + const Vector >, l> &a, const T b) { return a / Vector >, l>(b); } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator/(const T a, const Vector >, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator/( + const T a, const Vector >, l> &b) { return Vector >, l>(a) / b; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - Kokkos::complex, l) -operator/=(Vector >, l> &a, const T b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(Kokkos::complex, l) operator/=( + Vector >, l> &a, const T b) { a = a / b; return a; } @@ -872,26 +803,20 @@ operator/=(Vector >, l> &a, const T b) { /// simd complex, complex template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator/(const Vector >, l> &a, - const Kokkos::complex b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator/( + const Vector >, l> &a, const Kokkos::complex b) { return a / Vector >, l>(b); } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE( - Kokkos::complex, l) -operator/(const Kokkos::complex a, - const Vector >, l> &b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_TYPE(Kokkos::complex, l) operator/( + const Kokkos::complex a, const Vector >, l> &b) { return Vector >, l>(a) / b; } template -KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE( - Kokkos::complex, l) -operator/=(Vector >, l> &a, - const Kokkos::complex b) { +KOKKOS_FORCEINLINE_FUNCTION static KOKKOSKERNELS_SIMD_ARITH_RETURN_REFERENCE_TYPE(Kokkos::complex, l) operator/=( + Vector >, l> &a, const Kokkos::complex b) { a = a / b; return a; } diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Logical.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Logical.hpp index c8c07e97c4cb..f289d5be095a 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Logical.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Logical.hpp @@ -22,16 +22,13 @@ namespace KokkosBatched { -#define KOKKOSKERNELS_SIMD_LOGICAL_RETURN_BOOL_TYPE(T0, T1, l) \ - typename std::enable_if::value && \ - std::is_integral::value, \ +#define KOKKOSKERNELS_SIMD_LOGICAL_RETURN_BOOL_TYPE(T0, T1, l) \ + typename std::enable_if::value && std::is_integral::value, \ const Vector, l> >::type template -KOKKOS_INLINE_FUNCTION static - typename std::enable_if::value, - const Vector, l> >::type - operator!(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static typename std::enable_if::value, const Vector, l> >::type +operator!(const Vector, l> &a) { Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) #pragma ivdep @@ -44,9 +41,8 @@ KOKKOS_INLINE_FUNCTION static } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_LOGICAL_RETURN_BOOL_TYPE(T0, - T1, l) -operator||(const Vector, l> &a, const Vector, l> &b) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_LOGICAL_RETURN_BOOL_TYPE(T0, T1, l) operator||( + const Vector, l> &a, const Vector, l> &b) { Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) #pragma ivdep @@ -59,9 +55,8 @@ operator||(const Vector, l> &a, const Vector, l> &b) { } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_LOGICAL_RETURN_BOOL_TYPE(T0, - T1, l) -operator&&(const Vector, l> &a, const Vector, l> &b) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_LOGICAL_RETURN_BOOL_TYPE(T0, T1, l) operator&&( + const Vector, l> &a, const Vector, l> &b) { Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) #pragma ivdep @@ -74,9 +69,8 @@ operator&&(const Vector, l> &a, const Vector, l> &b) { } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_LOGICAL_RETURN_BOOL_TYPE(T0, - T1, l) -operator||(const Vector, l> &a, const T1 &b) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_LOGICAL_RETURN_BOOL_TYPE(T0, T1, l) operator||( + const Vector, l> &a, const T1 &b) { Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) #pragma ivdep @@ -89,9 +83,8 @@ operator||(const Vector, l> &a, const T1 &b) { } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_LOGICAL_RETURN_BOOL_TYPE(T0, - T1, l) -operator&&(const Vector, l> &a, const T1 &b) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_LOGICAL_RETURN_BOOL_TYPE(T0, T1, l) operator&&( + const Vector, l> &a, const T1 &b) { Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) #pragma ivdep @@ -104,9 +97,8 @@ operator&&(const Vector, l> &a, const T1 &b) { } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_LOGICAL_RETURN_BOOL_TYPE(T0, - T1, l) -operator||(const T0 &a, const Vector, l> &b) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_LOGICAL_RETURN_BOOL_TYPE(T0, T1, l) operator||( + const T0 &a, const Vector, l> &b) { Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) #pragma ivdep @@ -119,9 +111,8 @@ operator||(const T0 &a, const Vector, l> &b) { } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_LOGICAL_RETURN_BOOL_TYPE(T0, - T1, l) -operator&&(const T0 &a, const Vector, l> &b) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_LOGICAL_RETURN_BOOL_TYPE(T0, T1, l) operator&&( + const T0 &a, const Vector, l> &b) { Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) #pragma ivdep diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Math.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Math.hpp index 69bbb53c6b79..eefaf4ce0d7b 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Math.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Math.hpp @@ -24,14 +24,12 @@ namespace KokkosBatched { #define KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) Vector, l> #define KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) \ - typename std::enable_if::value, \ - Vector, l> >::type + typename std::enable_if::value, Vector, l> >::type /// simd template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) - sqrt(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) sqrt(const Vector, l> &a) { typedef Kokkos::ArithTraits ats; Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) @@ -46,8 +44,7 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) - cbrt(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) cbrt(const Vector, l> &a) { typedef Kokkos::ArithTraits ats; Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) @@ -62,8 +59,7 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) - log(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) log(const Vector, l> &a) { typedef Kokkos::ArithTraits ats; Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) @@ -78,8 +74,7 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) - log10(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) log10(const Vector, l> &a) { typedef Kokkos::ArithTraits ats; Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) @@ -94,8 +89,7 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) - exp(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T, l) exp(const Vector, l> &a) { typedef Kokkos::ArithTraits ats; Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) @@ -138,8 +132,7 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_TYPE(T0, l) } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) - sin(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) sin(const Vector, l> &a) { typedef Kokkos::ArithTraits ats; Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) @@ -154,8 +147,7 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) - cos(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) cos(const Vector, l> &a) { typedef Kokkos::ArithTraits ats; Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) @@ -170,8 +162,7 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) - tan(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) tan(const Vector, l> &a) { typedef Kokkos::ArithTraits ats; Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) @@ -186,8 +177,7 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) - sinh(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) sinh(const Vector, l> &a) { typedef Kokkos::ArithTraits ats; Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) @@ -202,8 +192,7 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) - cosh(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) cosh(const Vector, l> &a) { typedef Kokkos::ArithTraits ats; Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) @@ -218,8 +207,7 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) - tanh(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) tanh(const Vector, l> &a) { typedef Kokkos::ArithTraits ats; Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) @@ -234,8 +222,7 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) - asin(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) asin(const Vector, l> &a) { typedef Kokkos::ArithTraits ats; Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) @@ -250,8 +237,7 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) - acos(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) acos(const Vector, l> &a) { typedef Kokkos::ArithTraits ats; Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) @@ -266,8 +252,7 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) - atan(const Vector, l> &a) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MATH_RETURN_FLOAT_TYPE(T, l) atan(const Vector, l> &a) { typedef Kokkos::ArithTraits ats; Vector, l> r_val; #if defined(KOKKOS_ENABLE_PRAGMA_IVDEP) diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Misc.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Misc.hpp index a95a75277994..02f717d4583f 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Misc.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Misc.hpp @@ -30,17 +30,13 @@ namespace KokkosBatched { // scalar, scalar template -KOKKOS_INLINE_FUNCTION static T conditional_assign(const bool cond, - const T &if_true_val, - const T &if_false_val) { +KOKKOS_INLINE_FUNCTION static T conditional_assign(const bool cond, const T &if_true_val, const T &if_false_val) { return cond ? if_true_val : if_false_val; } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MISC_CONVERTIBLE_RETURN_VOID_TYPE( - T0, T1, T2, l) - conditional_assign(/* */ T0 &r_val, const bool cond, const T1 &if_true_val, - const T2 &if_false_val) { +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MISC_CONVERTIBLE_RETURN_VOID_TYPE(T0, T1, T2, l) + conditional_assign(/* */ T0 &r_val, const bool cond, const T1 &if_true_val, const T2 &if_false_val) { r_val = cond ? if_true_val : if_false_val; } @@ -48,23 +44,18 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MISC_CONVERTIBLE_RETURN_VOID_TY template KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MISC_RETURN_TYPE(T, l) - conditional_assign(const Vector, l> &cond, - const Vector, l> &if_true_val, + conditional_assign(const Vector, l> &cond, const Vector, l> &if_true_val, const T &if_false_val) { Vector, l> r_val; - for (int i = 0; i < l; ++i) - r_val[i] = cond[i] ? if_true_val[i] : if_false_val; + for (int i = 0; i < l; ++i) r_val[i] = cond[i] ? if_true_val[i] : if_false_val; return r_val; } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MISC_CONVERTIBLE_RETURN_VOID_TYPE( - T0, T1, T2, l) conditional_assign(/* */ Vector, l> &r_val, - const Vector, l> &cond, - const Vector, l> &if_true_val, - const T2 &if_false_val) { - for (int i = 0; i < l; ++i) - r_val[i] = cond[i] ? if_true_val[i] : if_false_val; +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MISC_CONVERTIBLE_RETURN_VOID_TYPE(T0, T1, T2, l) + conditional_assign(/* */ Vector, l> &r_val, const Vector, l> &cond, + const Vector, l> &if_true_val, const T2 &if_false_val) { + for (int i = 0; i < l; ++i) r_val[i] = cond[i] ? if_true_val[i] : if_false_val; } // scalar, vector @@ -74,74 +65,57 @@ KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MISC_RETURN_TYPE(T, l) conditional_assign(const Vector, l> &cond, const T &if_true_val, const Vector, l> &if_false_val) { Vector, l> r_val; - for (int i = 0; i < l; ++i) - r_val[i] = cond[i] ? if_true_val : if_false_val[i]; + for (int i = 0; i < l; ++i) r_val[i] = cond[i] ? if_true_val : if_false_val[i]; return r_val; } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MISC_CONVERTIBLE_RETURN_VOID_TYPE( - T0, T1, T2, l) - conditional_assign(/* */ Vector, l> &r_val, - const Vector, l> &cond, const T1 &if_true_val, +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MISC_CONVERTIBLE_RETURN_VOID_TYPE(T0, T1, T2, l) + conditional_assign(/* */ Vector, l> &r_val, const Vector, l> &cond, const T1 &if_true_val, const Vector, l> &if_false_val) { - for (int i = 0; i < l; ++i) - r_val[i] = cond[i] ? if_true_val : if_false_val[i]; + for (int i = 0; i < l; ++i) r_val[i] = cond[i] ? if_true_val : if_false_val[i]; } // vector, vector template KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MISC_RETURN_TYPE(T, l) - conditional_assign(const Vector, l> &cond, - const Vector, l> &if_true_val, + conditional_assign(const Vector, l> &cond, const Vector, l> &if_true_val, const Vector, l> &if_false_val) { Vector, l> r_val; - for (int i = 0; i < l; ++i) - r_val[i] = cond[i] ? if_true_val[i] : if_false_val[i]; + for (int i = 0; i < l; ++i) r_val[i] = cond[i] ? if_true_val[i] : if_false_val[i]; return r_val; } template -KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MISC_CONVERTIBLE_RETURN_VOID_TYPE( - T0, T1, T2, l) conditional_assign(/* */ Vector, l> &r_val, - const Vector, l> &cond, - const Vector, l> &if_true_val, - const Vector, l> &if_false_val) { - for (int i = 0; i < l; ++i) - r_val[i] = cond[i] ? if_true_val[i] : if_false_val[i]; +KOKKOS_INLINE_FUNCTION static KOKKOSKERNELS_SIMD_MISC_CONVERTIBLE_RETURN_VOID_TYPE(T0, T1, T2, l) + conditional_assign(/* */ Vector, l> &r_val, const Vector, l> &cond, + const Vector, l> &if_true_val, const Vector, l> &if_false_val) { + for (int i = 0; i < l; ++i) r_val[i] = cond[i] ? if_true_val[i] : if_false_val[i]; } template -KOKKOS_INLINE_FUNCTION static T reduce(const Vector, l> &val, - const BinaryOp &func) { +KOKKOS_INLINE_FUNCTION static T reduce(const Vector, l> &val, const BinaryOp &func) { T r_val = val[0]; for (int i = 1; i < l; ++i) r_val = func(r_val, val[i]); return r_val; } template -KOKKOS_INLINE_FUNCTION static T reduce(const Vector, l> &val, - const BinaryOp &func, const T init) { +KOKKOS_INLINE_FUNCTION static T reduce(const Vector, l> &val, const BinaryOp &func, const T init) { T r_val = init; for (int i = 0; i < l; ++i) r_val = func(r_val, val[i]); return r_val; } template -KOKKOS_INLINE_FUNCTION static bool is_all_true( - const Vector, l> &cond) { - return reduce(cond, [](const bool left, const bool right) -> bool { - return (left && right); - }); +KOKKOS_INLINE_FUNCTION static bool is_all_true(const Vector, l> &cond) { + return reduce(cond, [](const bool left, const bool right) -> bool { return (left && right); }); } template -KOKKOS_INLINE_FUNCTION static bool is_any_true( - const Vector, l> &cond) { - return reduce(cond, [](const bool left, const bool right) -> bool { - return left || right; - }); +KOKKOS_INLINE_FUNCTION static bool is_any_true(const Vector, l> &cond) { + return reduce(cond, [](const bool left, const bool right) -> bool { return left || right; }); } template diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Relation.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Relation.hpp index d49c6f35f992..c95678019250 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Relation.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_Relation.hpp @@ -25,13 +25,13 @@ namespace KokkosBatched { // vector, vector #undef KOKKOSBATCHED_RELATION_OPERATOR -#define KOKKOSBATCHED_RELATION_OPERATOR(op) \ - template \ - KOKKOS_INLINE_FUNCTION const Vector, l> operator op( \ - const Vector, l> &a, const Vector, l> &b) { \ - Vector, l> r_val; \ - for (int i = 0; i < l; ++i) r_val[i] = a[i] op b[i]; \ - return r_val; \ +#define KOKKOSBATCHED_RELATION_OPERATOR(op) \ + template \ + KOKKOS_INLINE_FUNCTION const Vector, l> operator op(const Vector, l> &a, \ + const Vector, l> &b) { \ + Vector, l> r_val; \ + for (int i = 0; i < l; ++i) r_val[i] = a[i] op b[i]; \ + return r_val; \ } KOKKOSBATCHED_RELATION_OPERATOR(<) @@ -43,13 +43,12 @@ KOKKOSBATCHED_RELATION_OPERATOR(!=) // vector, scalar #undef KOKKOSBATCHED_RELATION_OPERATOR -#define KOKKOSBATCHED_RELATION_OPERATOR(op) \ - template \ - KOKKOS_INLINE_FUNCTION const Vector, l> operator op( \ - const Vector, l> &a, const T2 &b) { \ - Vector, l> r_val; \ - for (int i = 0; i < l; ++i) r_val[i] = a[i] op b; \ - return r_val; \ +#define KOKKOSBATCHED_RELATION_OPERATOR(op) \ + template \ + KOKKOS_INLINE_FUNCTION const Vector, l> operator op(const Vector, l> &a, const T2 &b) { \ + Vector, l> r_val; \ + for (int i = 0; i < l; ++i) r_val[i] = a[i] op b; \ + return r_val; \ } KOKKOSBATCHED_RELATION_OPERATOR(<) @@ -61,13 +60,12 @@ KOKKOSBATCHED_RELATION_OPERATOR(!=) // scalar, vector #undef KOKKOSBATCHED_RELATION_OPERATOR -#define KOKKOSBATCHED_RELATION_OPERATOR(op) \ - template \ - KOKKOS_INLINE_FUNCTION const Vector, l> operator op( \ - const T1 &a, const Vector, l> &b) { \ - Vector, l> r_val; \ - for (int i = 0; i < l; ++i) r_val[i] = a op b[i]; \ - return r_val; \ +#define KOKKOSBATCHED_RELATION_OPERATOR(op) \ + template \ + KOKKOS_INLINE_FUNCTION const Vector, l> operator op(const T1 &a, const Vector, l> &b) { \ + Vector, l> r_val; \ + for (int i = 0; i < l; ++i) r_val[i] = a op b[i]; \ + return r_val; \ } KOKKOSBATCHED_RELATION_OPERATOR(<) diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_View.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_View.hpp index 3fb7ac872bf6..60e5e43e5796 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_View.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Vector_SIMD_View.hpp @@ -63,52 +63,38 @@ struct SimdViewAccess { } template - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if::value, size_t>::type - extent(const iType &r) const { + KOKKOS_INLINE_FUNCTION constexpr typename std::enable_if::value, size_t>::type extent( + const iType &r) const { return _a.extent(r) * (r == PackDim::value ? vector_length : 1); } template - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if::value, int>::type - extent_int(const iType &r) const { - return static_cast(_a.extent(r) * - (r == PackDim::value ? vector_length : 1)); + KOKKOS_INLINE_FUNCTION constexpr typename std::enable_if::value, int>::type extent_int( + const iType &r) const { + return static_cast(_a.extent(r) * (r == PackDim::value ? vector_length : 1)); } - KOKKOS_INLINE_FUNCTION constexpr size_t size() const { - return (_a.size() * vector_length); - } + KOKKOS_INLINE_FUNCTION constexpr size_t size() const { return (_a.size() * vector_length); } - KOKKOS_INLINE_FUNCTION constexpr size_t span() const { - return _a.span() * vector_length; - } - KOKKOS_INLINE_FUNCTION constexpr bool span_span_is_contiguous() const { - return _a.span_span_is_contiguous(); - } - KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { - return _a.data(); - } + KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return _a.span() * vector_length; } + KOKKOS_INLINE_FUNCTION constexpr bool span_span_is_contiguous() const { return _a.span_span_is_contiguous(); } + KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { return _a.data(); } /// rank 0 /// this does not make sense as this is flat view to simd view /// rank 1 template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - KokkosKernels::Impl::are_integral_v && 1 == ViewType::rank, - reference_type> - operator()(const I0 &i0, Args... /*args*/) const { + KOKKOS_FORCEINLINE_FUNCTION + std::enable_if_t && 1 == ViewType::rank, reference_type> + operator()(const I0 &i0, Args... /*args*/) const { return _a(i0 / vector_length)[i0 % vector_length]; } /// rank 2 template KOKKOS_FORCEINLINE_FUNCTION - std::enable_if_t && - 2 == ViewType::rank, - reference_type> + std::enable_if_t && 2 == ViewType::rank, reference_type> operator()(const I0 &i0, const I1 &i1, Args... /*args*/) const { switch (PackDim::value) { case 0: return _a(i0 / vector_length, i1)[i0 % vector_length]; @@ -120,11 +106,9 @@ struct SimdViewAccess { /// rank 3 template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - KokkosKernels::Impl::are_integral_v && - 3 == ViewType::rank, - reference_type> - operator()(const I0 &i0, const I1 &i1, const I2 &i2, Args... /*args*/) const { + KOKKOS_FORCEINLINE_FUNCTION + std::enable_if_t && 3 == ViewType::rank, reference_type> + operator()(const I0 &i0, const I1 &i1, const I2 &i2, Args... /*args*/) const { switch (PackDim::value) { case 0: return _a(i0 / vector_length, i1, i2)[i0 % vector_length]; case 1: return _a(i0, i1 / vector_length, i2)[i1 % vector_length]; @@ -137,11 +121,8 @@ struct SimdViewAccess { /// rank 4 template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - KokkosKernels::Impl::are_integral_v && - 4 == ViewType::rank, - reference_type> - operator()(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, - Args... /*args*/) const { + KokkosKernels::Impl::are_integral_v && 4 == ViewType::rank, reference_type> + operator()(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, Args... /*args*/) const { switch (PackDim::value) { case 0: return _a(i0 / vector_length, i1, i2, i3)[i0 % vector_length]; case 1: return _a(i0, i1 / vector_length, i2, i3)[i1 % vector_length]; @@ -153,14 +134,10 @@ struct SimdViewAccess { } /// rank 5 - template + template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - KokkosKernels::Impl::are_integral_v && - 5 == ViewType::rank, - reference_type> - operator()(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, - const I4 &i4, Args... /*args*/) const { + KokkosKernels::Impl::are_integral_v && 5 == ViewType::rank, reference_type> + operator()(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, const I4 &i4, Args... /*args*/) const { switch (PackDim::value) { case 0: return _a(i0 / vector_length, i1, i2, i3, i4)[i0 % vector_length]; case 1: return _a(i0, i1 / vector_length, i2, i3, i4)[i1 % vector_length]; @@ -173,25 +150,17 @@ struct SimdViewAccess { } /// rank 6 - template + template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - KokkosKernels::Impl::are_integral_v && - 6 == ViewType::rank, - reference_type> - operator()(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, - const I4 &i4, const I5 &i5, Args... /*args*/) const { + KokkosKernels::Impl::are_integral_v && 6 == ViewType::rank, reference_type> + operator()(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, const I4 &i4, const I5 &i5, + Args... /*args*/) const { switch (PackDim::value) { - case 0: - return _a(i0 / vector_length, i1, i2, i3, i4, i5)[i0 % vector_length]; - case 1: - return _a(i0, i1 / vector_length, i2, i3, i4, i5)[i1 % vector_length]; - case 2: - return _a(i0, i1, i2 / vector_length, i3, i4, i5)[i2 % vector_length]; - case 3: - return _a(i0, i1, i2, i3 / vector_length, i4, i5)[i3 % vector_length]; - case 4: - return _a(i0, i1, i2, i3, i4 / vector_length, i5)[i4 % vector_length]; + case 0: return _a(i0 / vector_length, i1, i2, i3, i4, i5)[i0 % vector_length]; + case 1: return _a(i0, i1 / vector_length, i2, i3, i4, i5)[i1 % vector_length]; + case 2: return _a(i0, i1, i2 / vector_length, i3, i4, i5)[i2 % vector_length]; + case 3: return _a(i0, i1, i2, i3 / vector_length, i4, i5)[i3 % vector_length]; + case 4: return _a(i0, i1, i2, i3, i4 / vector_length, i5)[i4 % vector_length]; case 5: break; default: break; } @@ -199,35 +168,18 @@ struct SimdViewAccess { } /// rank 7 - template - KOKKOS_FORCEINLINE_FUNCTION - std::enable_if_t && - 7 == ViewType::rank, - reference_type> - operator()(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, - const I4 &i4, const I5 &i5, const I6 &i6, - Args... /*args*/) const { + template + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + KokkosKernels::Impl::are_integral_v && 7 == ViewType::rank, reference_type> + operator()(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, const I4 &i4, const I5 &i5, const I6 &i6, + Args... /*args*/) const { switch (PackDim::value) { - case 0: - return _a(i0 / vector_length, i1, i2, i3, i4, i5, - i6)[i0 % vector_length]; - case 1: - return _a(i0, i1 / vector_length, i2, i3, i4, i5, - i6)[i1 % vector_length]; - case 2: - return _a(i0, i1, i2 / vector_length, i3, i4, i5, - i6)[i2 % vector_length]; - case 3: - return _a(i0, i1, i2, i3 / vector_length, i4, i5, - i6)[i3 % vector_length]; - case 4: - return _a(i0, i1, i2, i3, i4 / vector_length, i5, - i6)[i4 % vector_length]; - case 5: - return _a(i0, i1, i2, i3, i4, i5 / vector_length, - i6)[i5 % vector_length]; + case 0: return _a(i0 / vector_length, i1, i2, i3, i4, i5, i6)[i0 % vector_length]; + case 1: return _a(i0, i1 / vector_length, i2, i3, i4, i5, i6)[i1 % vector_length]; + case 2: return _a(i0, i1, i2 / vector_length, i3, i4, i5, i6)[i2 % vector_length]; + case 3: return _a(i0, i1, i2, i3 / vector_length, i4, i5, i6)[i3 % vector_length]; + case 4: return _a(i0, i1, i2, i3, i4 / vector_length, i5, i6)[i4 % vector_length]; + case 5: return _a(i0, i1, i2, i3, i4, i5 / vector_length, i6)[i5 % vector_length]; case 6: break; default: break; } @@ -235,43 +187,25 @@ struct SimdViewAccess { } /// rank 8 - template - KOKKOS_FORCEINLINE_FUNCTION - std::enable_if_t && - 8 == ViewType::rank, - reference_type> - operator()(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, - const I4 &i4, const I5 &i5, const I6 &i6, const I7 &i7, - Args... /*args*/) const { + template + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + KokkosKernels::Impl::are_integral_v && 8 == ViewType::rank, + reference_type> + operator()(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, const I4 &i4, const I5 &i5, const I6 &i6, + const I7 &i7, Args... /*args*/) const { switch (PackDim::value) { - case 0: - return _a(i0 / vector_length, i1, i2, i3, i4, i5, i6, - i7)[i0 % vector_length]; - case 1: - return _a(i0, i1 / vector_length, i2, i3, i4, i5, i6, - i7)[i1 % vector_length]; - case 2: - return _a(i0, i1, i2 / vector_length, i3, i4, i5, i6, - i7)[i2 % vector_length]; - case 3: - return _a(i0, i1, i2, i3 / vector_length, i4, i5, i6, - i7)[i3 % vector_length]; - case 4: - return _a(i0, i1, i2, i3, i4 / vector_length, i5, i6, - i7)[i4 % vector_length]; - case 5: - return _a(i0, i1, i2, i3, i4, i5 / vector_length, i6, - i7)[i5 % vector_length]; - case 6: - return _a(i0, i1, i2, i3, i4, i5, i6 / vector_length, - i7)[i6 % vector_length]; + case 0: return _a(i0 / vector_length, i1, i2, i3, i4, i5, i6, i7)[i0 % vector_length]; + case 1: return _a(i0, i1 / vector_length, i2, i3, i4, i5, i6, i7)[i1 % vector_length]; + case 2: return _a(i0, i1, i2 / vector_length, i3, i4, i5, i6, i7)[i2 % vector_length]; + case 3: return _a(i0, i1, i2, i3 / vector_length, i4, i5, i6, i7)[i3 % vector_length]; + case 4: return _a(i0, i1, i2, i3, i4 / vector_length, i5, i6, i7)[i4 % vector_length]; + case 5: return _a(i0, i1, i2, i3, i4, i5 / vector_length, i6, i7)[i5 % vector_length]; + case 6: return _a(i0, i1, i2, i3, i4, i5, i6 / vector_length, i7)[i6 % vector_length]; case 7: break; default: break; } - return _a(i0, i1, i2, i3, i4, i5, i6, - i7 / vector_length)[i7 % vector_length]; + return _a(i0, i1, i2, i3, i4, i5, i6, i7 / vector_length)[i7 % vector_length]; } }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_WilkinsonShift_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_WilkinsonShift_Serial_Internal.hpp index 0d3a9b3df997..a23a9ea4d03d 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_WilkinsonShift_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_WilkinsonShift_Serial_Internal.hpp @@ -29,12 +29,10 @@ namespace KokkosBatched { /// struct SerialWilkinsonShiftInternal { template - KOKKOS_INLINE_FUNCTION static int invoke( - const ValueType a, const ValueType b, const ValueType c, - const ValueType d, - /* */ Kokkos::complex* lambda1, - /* */ Kokkos::complex* lambda2, - /* */ bool* is_complex) { + KOKKOS_INLINE_FUNCTION static int invoke(const ValueType a, const ValueType b, const ValueType c, const ValueType d, + /* */ Kokkos::complex* lambda1, + /* */ Kokkos::complex* lambda2, + /* */ bool* is_complex) { /// compute eigenvalues of 2x2 system [a b; /// c d] /// when the system has a real complex values, diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Xpay_Impl.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Xpay_Impl.hpp index 5e5b7e13ccfc..988bd30c931b 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Xpay_Impl.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Xpay_Impl.hpp @@ -27,11 +27,9 @@ namespace KokkosBatched { /// ==================== struct SerialXpayInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ScalarType alpha, - const ValueType* KOKKOS_RESTRICT X, + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ScalarType alpha, const ValueType* KOKKOS_RESTRICT X, const int xs0, - /* */ ValueType* KOKKOS_RESTRICT Y, - const int ys0) { + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif @@ -44,10 +42,9 @@ struct SerialXpayInternal { } template - KOKKOS_INLINE_FUNCTION static int invoke( - const int m, const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, - const ValueType* KOKKOS_RESTRICT X, const int xs0, - /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif @@ -60,17 +57,14 @@ struct SerialXpayInternal { } template - KOKKOS_INLINE_FUNCTION static int invoke( - const int m, const int n, const ScalarType* KOKKOS_RESTRICT alpha, - const int alphas0, const ValueType* KOKKOS_RESTRICT X, const int xs0, - const int xs1, - /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const ScalarType* KOKKOS_RESTRICT alpha, + const int alphas0, const ValueType* KOKKOS_RESTRICT X, const int xs0, + const int xs1, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { if (xs0 > xs1) - for (int i = 0; i < m; ++i) - invoke(n, alpha[i * alphas0], X + i * xs0, xs1, Y + i * ys0, ys1); + for (int i = 0; i < m; ++i) invoke(n, alpha[i * alphas0], X + i * xs0, xs1, Y + i * ys0, ys1); else - for (int j = 0; j < n; ++j) - invoke(m, alpha, alphas0, X + j * xs1, xs0, Y + j * ys1, ys0); + for (int j = 0; j < n; ++j) invoke(m, alpha, alphas0, X + j * xs1, xs0, Y + j * ys1, ys0); return 0; } @@ -81,12 +75,9 @@ struct SerialXpayInternal { /// ==================== struct TeamXpayInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, - const int m, const ScalarType alpha, - const ValueType* KOKKOS_RESTRICT X, - const int xs0, - /* */ ValueType* KOKKOS_RESTRICT Y, - const int ys0) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const int m, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT X, const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int& i) { Y[i * ys0] *= alpha; Y[i * ys0] += X[i * xs0]; @@ -96,11 +87,10 @@ struct TeamXpayInternal { } template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const int m, - const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, - const ValueType* KOKKOS_RESTRICT X, const int xs0, - /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const int m, + const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int& i) { Y[i * ys0] *= alpha[i * alphas0]; Y[i * ys0] += X[i * xs0]; @@ -110,23 +100,18 @@ struct TeamXpayInternal { } template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const int m, const int n, - const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, - const ValueType* KOKKOS_RESTRICT X, const int xs0, const int xs1, - /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const int m, const int n, + const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, const int xs1, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { if (m > n) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, m), [&](const int& i) { - SerialXpayInternal::invoke(n, alpha[i * alphas0], X + i * xs0, xs1, - Y + i * ys0, ys1); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int& i) { + SerialXpayInternal::invoke(n, alpha[i * alphas0], X + i * xs0, xs1, Y + i * ys0, ys1); + }); } else { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, n), [&](const int& j) { - SerialXpayInternal::invoke(m, alpha, alphas0, X + j * xs1, xs0, - Y + j * ys1, ys0); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int& j) { + SerialXpayInternal::invoke(m, alpha, alphas0, X + j * xs1, xs0, Y + j * ys1, ys0); + }); } // member.team_barrier(); return 0; @@ -138,12 +123,9 @@ struct TeamXpayInternal { /// ======================== struct TeamVectorXpayInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, - const int m, const ScalarType alpha, - const ValueType* KOKKOS_RESTRICT X, - const int xs0, - /* */ ValueType* KOKKOS_RESTRICT Y, - const int ys0) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const int m, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT X, const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int& i) { Y[i * ys0] *= alpha; Y[i * ys0] += X[i * xs0]; @@ -153,11 +135,10 @@ struct TeamVectorXpayInternal { } template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const int m, - const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, - const ValueType* KOKKOS_RESTRICT X, const int xs0, - /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const int m, + const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int& i) { Y[i * ys0] *= alpha[i * alphas0]; Y[i * ys0] += X[i * xs0]; @@ -166,20 +147,17 @@ struct TeamVectorXpayInternal { return 0; } - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const int m, const int n, - const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, - const ValueType* KOKKOS_RESTRICT X, const int xs0, const int xs1, - /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, m * n), - [&](const int& iTemp) { - int i, j; - getIndices(iTemp, n, m, j, i); - Y[i * ys0 + j * ys1] *= alpha[i * alphas0]; - Y[i * ys0 + j * ys1] += X[i * xs0 + j * xs1]; - }); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const int m, const int n, + const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, const int xs1, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, m * n), [&](const int& iTemp) { + int i, j; + getIndices(iTemp, n, m, j, i); + Y[i * ys0 + j * ys1] *= alpha[i * alphas0]; + Y[i * ys0 + j * ys1] += X[i * xs0 + j * xs1]; + }); // member.team_barrier(); return 0; } @@ -189,55 +167,34 @@ struct TeamVectorXpayInternal { /// Serial Impl /// =========== template -KOKKOS_INLINE_FUNCTION int SerialXpay::invoke(const alphaViewType& alpha, - const ViewType& X, - const ViewType& Y) { +KOKKOS_INLINE_FUNCTION int SerialXpay::invoke(const alphaViewType& alpha, const ViewType& X, const ViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::xpay: ViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::xpay: alphaViewType is not a Kokkos::View."); - static_assert(ViewType::rank == 2, - "KokkosBatched::xpay: ViewType must have rank 2."); - static_assert(alphaViewType::rank == 1, - "KokkosBatched::xpay: alphaViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::xpay: ViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::xpay: alphaViewType is not a Kokkos::View."); + static_assert(ViewType::rank == 2, "KokkosBatched::xpay: ViewType must have rank 2."); + static_assert(alphaViewType::rank == 1, "KokkosBatched::xpay: alphaViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::xpay: Dimensions of X and Y do not match: X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::xpay: Dimensions of X and Y do not match: X: %d x %d, " "Y: %d x %d\n", (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#endif return 1; } if (X.extent(0) != alpha.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::xpay: First dimension of X and alpha do not match: X: " - "%d x %d, alpha: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#else Kokkos::printf( "KokkosBatched::xpay: First dimension of X and alpha do not match: X: " "%d x %d, alpha: %d\n", (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#endif return 1; } #endif - return SerialXpayInternal::template invoke< - typename alphaViewType::non_const_value_type, - typename ViewType::non_const_value_type>( - X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), X.data(), - X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), Y.stride_1()); + return SerialXpayInternal::template invoke( + X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), X.data(), X.stride_0(), X.stride_1(), Y.data(), + Y.stride_0(), Y.stride_1()); } /// @@ -246,56 +203,35 @@ KOKKOS_INLINE_FUNCTION int SerialXpay::invoke(const alphaViewType& alpha, template template -KOKKOS_INLINE_FUNCTION int TeamXpay::invoke( - const MemberType& member, const alphaViewType& alpha, const ViewType& X, - const ViewType& Y) { +KOKKOS_INLINE_FUNCTION int TeamXpay::invoke(const MemberType& member, const alphaViewType& alpha, + const ViewType& X, const ViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::xpay: ViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::xpay: alphaViewType is not a Kokkos::View."); - static_assert(ViewType::rank == 2, - "KokkosBatched::xpay: ViewType must have rank 2."); - static_assert(alphaViewType::rank == 1, - "KokkosBatched::xpay: alphaViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::xpay: ViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::xpay: alphaViewType is not a Kokkos::View."); + static_assert(ViewType::rank == 2, "KokkosBatched::xpay: ViewType must have rank 2."); + static_assert(alphaViewType::rank == 1, "KokkosBatched::xpay: alphaViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::xpay: Dimensions of X and Y do not match: X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::xpay: Dimensions of X and Y do not match: X: %d x %d, " "Y: %d x %d\n", (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#endif return 1; } if (X.extent(0) != alpha.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::xpay: First dimension of X and alpha do not match: X: " - "%d x %d, alpha: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#else Kokkos::printf( "KokkosBatched::xpay: First dimension of X and alpha do not match: X: " "%d x %d, alpha: %d\n", (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#endif return 1; } #endif - return TeamXpayInternal::template invoke< - MemberType, typename alphaViewType::non_const_value_type, - typename ViewType::non_const_value_type>( - member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), - X.data(), X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), - Y.stride_1()); + return TeamXpayInternal::template invoke( + member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), X.data(), X.stride_0(), X.stride_1(), Y.data(), + Y.stride_0(), Y.stride_1()); } /// @@ -304,56 +240,35 @@ KOKKOS_INLINE_FUNCTION int TeamXpay::invoke( template template -KOKKOS_INLINE_FUNCTION int TeamVectorXpay::invoke( - const MemberType& member, const alphaViewType& alpha, const ViewType& X, - const ViewType& Y) { +KOKKOS_INLINE_FUNCTION int TeamVectorXpay::invoke(const MemberType& member, const alphaViewType& alpha, + const ViewType& X, const ViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::xpay: ViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::xpay: alphaViewType is not a Kokkos::View."); - static_assert(ViewType::rank == 2, - "KokkosBatched::xpay: ViewType must have rank 2."); - static_assert(alphaViewType::rank == 1, - "KokkosBatched::xpay: alphaViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::xpay: ViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::xpay: alphaViewType is not a Kokkos::View."); + static_assert(ViewType::rank == 2, "KokkosBatched::xpay: ViewType must have rank 2."); + static_assert(alphaViewType::rank == 1, "KokkosBatched::xpay: alphaViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::xpay: Dimensions of X and Y do not match: X: %d x %d, " - "Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::xpay: Dimensions of X and Y do not match: X: %d x %d, " "Y: %d x %d\n", (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); -#endif return 1; } if (X.extent(0) != alpha.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::xpay: First dimension of X and alpha do not match: X: " - "%d x %d, alpha: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#else Kokkos::printf( "KokkosBatched::xpay: First dimension of X and alpha do not match: X: " "%d x %d, alpha: %d\n", (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#endif return 1; } #endif - return TeamVectorXpayInternal::invoke< - MemberType, typename alphaViewType::non_const_value_type, - typename ViewType::non_const_value_type, typename ViewType::array_layout>( - member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), - X.data(), X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), - Y.stride_1()); + return TeamVectorXpayInternal::invoke( + member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), X.data(), X.stride_0(), X.stride_1(), Y.data(), + Y.stride_0(), Y.stride_1()); } } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_AddRadial_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_AddRadial_Decl.hpp index 6b75a11dc7c1..7eadc4326941 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_AddRadial_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_AddRadial_Decl.hpp @@ -34,8 +34,7 @@ namespace KokkosBatched { struct SerialAddRadial { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType tiny, - const AViewType &A); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType tiny, const AViewType &A); }; /// @@ -45,9 +44,7 @@ struct SerialAddRadial { template struct TeamAddRadial { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType tiny, - const AViewType &A); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType tiny, const AViewType &A); }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_ApplyHouseholder_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_ApplyHouseholder_Decl.hpp index 3fe51f313867..bee7d3a645d8 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_ApplyHouseholder_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_ApplyHouseholder_Decl.hpp @@ -29,21 +29,16 @@ namespace KokkosBatched { // level 1 operation template struct SerialApplyHouseholder { - template - KOKKOS_INLINE_FUNCTION static int invoke(const uViewType &u2, - const tauViewType &tau, + template + KOKKOS_INLINE_FUNCTION static int invoke(const uViewType &u2, const tauViewType &tau, const AViewType const wViewType &w); }; // level 1 operation template struct TeamVectorApplyHouseholder { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const uViewType &u2, - const tauViewType &tau, + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const uViewType &u2, const tauViewType &tau, const AViewType const wViewType &w); }; diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_ApplyPivot_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_ApplyPivot_Decl.hpp index fb9bef60ae99..2aa00bf8c2ca 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_ApplyPivot_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_ApplyPivot_Decl.hpp @@ -28,13 +28,10 @@ namespace KokkosBatched { template struct TeamVectorApplyPivot { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int piv, const AViewType &A); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int piv, const AViewType &A); template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const PivViewType piv, - const AViewType &A); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const PivViewType piv, const AViewType &A); }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_ApplyQ_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_ApplyQ_Decl.hpp index 177c338a98eb..7f78e317004a 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_ApplyQ_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_ApplyQ_Decl.hpp @@ -28,11 +28,8 @@ namespace KokkosBatched { template struct SerialApplyQ { - template - KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, - const tViewType &t, - const BViewType &B, + template + KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const tViewType &t, const BViewType &B, const wViewType &w); }; @@ -40,56 +37,39 @@ struct SerialApplyQ { /// Team ApplyQ /// -template +template struct TeamApplyQ { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const tViewType &t, - const BViewType &B, - const wViewType &w); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const tViewType &t, + const BViewType &B, const wViewType &w); }; /// /// TeamVector ApplyQ /// -template +template struct TeamVectorApplyQ { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const tViewType &t, - const BViewType &B, - const wViewType &w); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const tViewType &t, + const BViewType &B, const wViewType &w); }; /// /// Selective Interface /// -template +template struct ApplyQ { - template - KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const tViewType &t, - const BViewType &B, - const wViewType &w) { + template + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const tViewType &t, + const BViewType &B, const wViewType &w) { int r_val = 0; if (std::is_same::value) { r_val = SerialApplyQ::invoke(A, t, B, w); } else if (std::is_same::value) { - r_val = TeamApplyQ::invoke( - member, A, t, B, w); + r_val = TeamApplyQ::invoke(member, A, t, B, w); } else if (std::is_same::value) { - r_val = TeamVectorApplyQ::invoke( - member, A, t, B, w); + r_val = TeamVectorApplyQ::invoke(member, A, t, B, w); } return r_val; } diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Axpy.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Axpy.hpp index b76772f3b23d..5b89c0862ec3 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Axpy.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Axpy.hpp @@ -44,9 +44,7 @@ namespace KokkosBatched { struct SerialAxpy { template - KOKKOS_INLINE_FUNCTION static int invoke(const alphaViewType &alpha, - const XViewType &X, - const YViewType &Y); + KOKKOS_INLINE_FUNCTION static int invoke(const alphaViewType &alpha, const XViewType &X, const YViewType &Y); }; /// \brief Team Batched AXPY: @@ -72,9 +70,7 @@ struct SerialAxpy { template struct TeamAxpy { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const alphaViewType &alpha, - const XViewType &X, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const alphaViewType &alpha, const XViewType &X, const YViewType &Y); }; @@ -102,9 +98,7 @@ struct TeamAxpy { template struct TeamVectorAxpy { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const alphaViewType &alpha, - const XViewType &X, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const alphaViewType &alpha, const XViewType &X, const YViewType &Y); }; diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Copy_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Copy_Decl.hpp index 07e6ea42da9f..0e2b24e91dda 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Copy_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Copy_Decl.hpp @@ -29,46 +29,36 @@ namespace KokkosBatched { template struct SerialCopy { template - KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, - const BViewType &B); + KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const BViewType &B); }; /// /// Team Copy /// -template +template struct TeamCopy { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const BViewType &B); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B); }; /// /// TeamVector Copy /// -template +template struct TeamVectorCopy { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const BViewType &B); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B); }; /// /// Selective Interface /// -template +template struct Copy { template - KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const BViewType &B) { + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B) { int r_val = 0; if (std::is_same::value) { r_val = SerialCopy::invoke(A, B); @@ -85,29 +75,23 @@ struct Copy { #include "KokkosBatched_Copy_Impl.hpp" -#define KOKKOSBATCHED_SERIAL_COPY_MATRIX_NO_TRANSPOSE_INTERNAL_INVOKE( \ - M, N, A, AS0, AS1, B, BS0, BS1) \ +#define KOKKOSBATCHED_SERIAL_COPY_MATRIX_NO_TRANSPOSE_INTERNAL_INVOKE(M, N, A, AS0, AS1, B, BS0, BS1) \ KokkosBatched::SerialCopyInternal ::invoke(M, N, A, AS0, AS1, B, BS0, BS1) -#define KOKKOSBATCHED_TEAM_COPY_MATRIX_NO_TRANSPOSE_INTERNAL_INVOKE( \ - MEMBER, M, N, A, AS0, AS1, B, BS0, BS1) \ - KokkosBatched::TeamCopyInternal ::invoke(MEMBER, M, N, A, AS0, AS1, B, BS0, \ - BS1) +#define KOKKOSBATCHED_TEAM_COPY_MATRIX_NO_TRANSPOSE_INTERNAL_INVOKE(MEMBER, M, N, A, AS0, AS1, B, BS0, BS1) \ + KokkosBatched::TeamCopyInternal ::invoke(MEMBER, M, N, A, AS0, AS1, B, BS0, BS1) #define KOKKOSBATCHED_SERIAL_COPY_VECTOR_INTERNAL_INVOKE(M, A, AS, B, BS) \ KokkosBatched::SerialCopyInternal ::invoke(M, A, AS, B, BS) -#define KOKKOSBATCHED_TEAM_COPY_VECTOR_NO_TRANSPOSE_INTERNAL_INVOKE( \ - MEMBER, M, A, AS, B, BS) \ +#define KOKKOSBATCHED_TEAM_COPY_VECTOR_NO_TRANSPOSE_INTERNAL_INVOKE(MEMBER, M, A, AS, B, BS) \ KokkosBatched::TeamCopyInternal ::invoke(MEMBER, M, A, AS, B, BS) -#define KOKKOSBATCHED_COPY_VECTOR_NO_TRANSPOSE_INTERNAL_INVOKE( \ - MODETYPE, MEMBER, M, A, AS, B, BS) \ - if (std::is_same::value) { \ - KOKKOSBATCHED_SERIAL_COPY_VECTOR_INTERNAL_INVOKE(M, A, AS, B, BS); \ - } else if (std::is_same::value) { \ - KOKKOSBATCHED_TEAM_COPY_VECTOR_NO_TRANSPOSE_INTERNAL_INVOKE(MEMBER, M, A, \ - AS, B, BS); \ +#define KOKKOSBATCHED_COPY_VECTOR_NO_TRANSPOSE_INTERNAL_INVOKE(MODETYPE, MEMBER, M, A, AS, B, BS) \ + if (std::is_same::value) { \ + KOKKOSBATCHED_SERIAL_COPY_VECTOR_INTERNAL_INVOKE(M, A, AS, B, BS); \ + } else if (std::is_same::value) { \ + KOKKOSBATCHED_TEAM_COPY_VECTOR_NO_TRANSPOSE_INTERNAL_INVOKE(MEMBER, M, A, AS, B, BS); \ } #endif diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Dot.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Dot.hpp index c04914e22009..545a4954ce83 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Dot.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Dot.hpp @@ -52,9 +52,7 @@ namespace KokkosBatched { template struct SerialDot { template - KOKKOS_INLINE_FUNCTION static int invoke(const XViewType &X, - const YViewType &Y, - const NormViewType &dot); + KOKKOS_INLINE_FUNCTION static int invoke(const XViewType &X, const YViewType &Y, const NormViewType &dot); }; /// \brief Team Batched DOT: @@ -86,9 +84,7 @@ struct SerialDot { template struct TeamDot { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const XViewType &X, - const YViewType &Y, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const XViewType &X, const YViewType &Y, const NormViewType &dot); }; @@ -122,9 +118,7 @@ struct TeamDot { template struct TeamVectorDot { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const XViewType &X, - const YViewType &Y, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const XViewType &X, const YViewType &Y, const NormViewType &dot); }; diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Eigendecomposition_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Eigendecomposition_Decl.hpp index 4ba24d519bb3..39ead9e26cae 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Eigendecomposition_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Eigendecomposition_Decl.hpp @@ -49,21 +49,17 @@ namespace KokkosBatched { /// dimension of matrix A. struct SerialEigendecomposition { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const AViewType &A, const EViewType &er, const EViewType &ei, - const UViewType &UL, const UViewType &UR, const WViewType &W); + template + KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const EViewType &er, const EViewType &ei, + const UViewType &UL, const UViewType &UR, const WViewType &W); }; template struct TeamVectorEigendecomposition { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const AViewType &A, const EViewType &er, - const EViewType &ei, const UViewType &UL, const UViewType &UR, - const WViewType &W); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const EViewType &er, + const EViewType &ei, const UViewType &UL, const UViewType &UR, + const WViewType &W); }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Gemm_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Gemm_Decl.hpp index 1febcaa77171..9f4b7455612e 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Gemm_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Gemm_Decl.hpp @@ -25,61 +25,46 @@ namespace KokkosBatched { template struct SerialGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B, - const ScalarType beta, - const CViewType &C); + template + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B, + const ScalarType beta, const CViewType &C); }; /// /// Team Gemm /// -template +template struct TeamGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C); }; /// /// TeamVector Gemm /// -template +template struct TeamVectorGemm { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C); }; /// /// Selective Interface /// -template +template struct Gemm { - template - KOKKOS_FORCEINLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, const CViewType &C) { + template + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C) { int r_val = 0; if (std::is_same::value) { - r_val = SerialGemm::invoke(alpha, A, B, - beta, C); + r_val = SerialGemm::invoke(alpha, A, B, beta, C); } else if (std::is_same::value) { - r_val = TeamGemm::invoke( - member, alpha, A, B, beta, C); + r_val = TeamGemm::invoke(member, alpha, A, B, beta, C); } return r_val; } diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Gemv_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Gemv_Decl.hpp index 825efa9dc5ca..9ab86d9e0727 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Gemv_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Gemv_Decl.hpp @@ -29,13 +29,9 @@ namespace KokkosBatched { template struct SerialGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType /*alpha*/, - const AViewType & /*A*/, - const xViewType & /*x*/, - const ScalarType /*beta*/, - const yViewType & /*y*/) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType /*alpha*/, const AViewType & /*A*/, const xViewType & /*x*/, + const ScalarType /*beta*/, const yViewType & /*y*/) { Kokkos::abort( "Error: KokkosBatched::SerialGemv has been deprecated - use " "KokkosBlas::SerialGemv instead"); @@ -49,13 +45,9 @@ struct SerialGemv { template struct TeamGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, - const ScalarType /*alpha*/, - const AViewType & /*A*/, - const xViewType & /*x*/, - const ScalarType /*beta*/, + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const ScalarType /*alpha*/, + const AViewType & /*A*/, const xViewType & /*x*/, const ScalarType /*beta*/, const yViewType & /*y*/) { assert(false && "Error: encounter dummy impl"); return 0; @@ -68,13 +60,9 @@ struct TeamGemv { template struct TeamVectorGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, - const ScalarType /*alpha*/, - const AViewType & /*A*/, - const xViewType & /*x*/, - const ScalarType /*beta*/, + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const ScalarType /*alpha*/, + const AViewType & /*A*/, const xViewType & /*x*/, const ScalarType /*beta*/, const yViewType & /*y*/) { assert(false && "Error: encounter dummy impl"); return 0; @@ -84,23 +72,18 @@ struct TeamVectorGemv { /// /// Selective Interface /// -template +template struct Gemv { - template - KOKKOS_FORCEINLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const xViewType &x, const ScalarType beta, const yViewType &y) { + template + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, + const xViewType &x, const ScalarType beta, const yViewType &y) { int r_val = 0; if (std::is_same::value) { r_val = SerialGemv::invoke(alpha, A, x, beta, y); } else if (std::is_same::value) { - r_val = TeamGemv::invoke(member, alpha, A, - x, beta, y); + r_val = TeamGemv::invoke(member, alpha, A, x, beta, y); } else if (std::is_same::value) { - r_val = TeamVectorGemv::invoke( - member, alpha, A, x, beta, y); + r_val = TeamVectorGemv::invoke(member, alpha, A, x, beta, y); } return r_val; } @@ -112,44 +95,35 @@ struct Gemv { #include "KokkosBatched_Gemv_TeamVector_Impl.hpp" #include "KokkosBlas2_serial_gemv_internal.hpp" -#define KOKKOSBATCHED_SERIAL_GEMV_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS) \ - KokkosBlas::Impl::SerialGemvInternal::invoke( \ - M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS) - -#define KOKKOSBATCHED_SERIAL_GEMV_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS) \ - KokkosBlas::Impl::SerialGemvInternal::invoke( \ - N, M, ALPHA, A, AS1, AS0, X, XS, BETA, Y, YS) - -#define KOKKOSBATCHED_TEAM_GEMV_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS) \ - KokkosBlas::Impl::TeamGemvInternal::invoke( \ - MEMBER, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS) - -#define KOKKOSBATCHED_TEAM_GEMV_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS) \ - KokkosBlas::Impl::TeamGemvInternal::invoke( \ - MEMBER, N, M, ALPHA, A, AS1, AS0, X, XS, BETA, Y, YS) - -#define KOKKOSBATCHED_GEMV_NO_TRANSPOSE_INTERNAL_INVOKE( \ - MODETYPE, ALGOTYPE, MEMBER, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS) \ - if (std::is_same::value) { \ - KOKKOSBATCHED_SERIAL_GEMV_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS); \ - } else if (std::is_same::value) { \ - KOKKOSBATCHED_TEAM_GEMV_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS); \ +#define KOKKOSBATCHED_SERIAL_GEMV_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS) \ + KokkosBlas::Impl::SerialGemvInternal::invoke(M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS) + +#define KOKKOSBATCHED_SERIAL_GEMV_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS) \ + KokkosBlas::Impl::SerialGemvInternal::invoke(N, M, ALPHA, A, AS1, AS0, X, XS, BETA, Y, YS) + +#define KOKKOSBATCHED_TEAM_GEMV_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, \ + Y, YS) \ + KokkosBlas::Impl::TeamGemvInternal::invoke(MEMBER, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS) + +#define KOKKOSBATCHED_TEAM_GEMV_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, \ + YS) \ + KokkosBlas::Impl::TeamGemvInternal::invoke(MEMBER, N, M, ALPHA, A, AS1, AS0, X, XS, BETA, Y, YS) + +#define KOKKOSBATCHED_GEMV_NO_TRANSPOSE_INTERNAL_INVOKE(MODETYPE, ALGOTYPE, MEMBER, M, N, ALPHA, A, AS0, AS1, X, XS, \ + BETA, Y, YS) \ + if (std::is_same::value) { \ + KOKKOSBATCHED_SERIAL_GEMV_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS); \ + } else if (std::is_same::value) { \ + KOKKOSBATCHED_TEAM_GEMV_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, \ + YS); \ } -#define KOKKOSBATCHED_GEMV_TRANSPOSE_INTERNAL_INVOKE( \ - MODETYPE, ALGOTYPE, MEMBER, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS) \ - if (std::is_same::value) { \ - KOKKOSBATCHED_SERIAL_GEMV_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS); \ - } else if (std::is_same::value) { \ - KOKKOSBATCHED_TEAM_GEMV_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS); \ +#define KOKKOSBATCHED_GEMV_TRANSPOSE_INTERNAL_INVOKE(MODETYPE, ALGOTYPE, MEMBER, M, N, ALPHA, A, AS0, AS1, X, XS, \ + BETA, Y, YS) \ + if (std::is_same::value) { \ + KOKKOSBATCHED_SERIAL_GEMV_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS); \ + } else if (std::is_same::value) { \ + KOKKOSBATCHED_TEAM_GEMV_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, M, N, ALPHA, A, AS0, AS1, X, XS, BETA, Y, YS); \ } #endif diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Gesv.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Gesv.hpp index c4821db4597f..77922e4da02c 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Gesv.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Gesv.hpp @@ -64,15 +64,12 @@ struct Gesv { template struct SerialGesv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MatrixType A, - const XVectorType X, - const YVectorType Y, + KOKKOS_INLINE_FUNCTION static int invoke(const MatrixType A, const XVectorType X, const YVectorType Y, const MatrixType tmp); template - [[deprecated]] KOKKOS_INLINE_FUNCTION static int invoke( - const MatrixType A, const VectorType X, const VectorType Y, - const MatrixType tmp) { + [[deprecated]] KOKKOS_INLINE_FUNCTION static int invoke(const MatrixType A, const VectorType X, const VectorType Y, + const MatrixType tmp) { return invoke(A, X, Y, tmp); } }; @@ -109,9 +106,7 @@ struct SerialGesv { template struct TeamGesv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const MatrixType A, - const VectorType X, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const MatrixType A, const VectorType X, const VectorType Y); }; @@ -148,9 +143,7 @@ struct TeamGesv { template struct TeamVectorGesv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const MatrixType A, - const VectorType X, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const MatrixType A, const VectorType X, const VectorType Y); }; diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_HadamardProduct.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_HadamardProduct.hpp index fadd4b5774d4..f21aa8bae257 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_HadamardProduct.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_HadamardProduct.hpp @@ -42,9 +42,7 @@ namespace KokkosBatched { struct SerialHadamardProduct { template - KOKKOS_INLINE_FUNCTION static int invoke(const XViewType &X, - const YViewType &Y, - const VViewType &V); + KOKKOS_INLINE_FUNCTION static int invoke(const XViewType &X, const YViewType &Y, const VViewType &V); }; /// \brief Team Batched Hadamard Product: @@ -68,9 +66,7 @@ struct SerialHadamardProduct { template struct TeamHadamardProduct { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const XViewType &X, - const YViewType &Y, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const XViewType &X, const YViewType &Y, const VViewType &V); }; @@ -96,31 +92,22 @@ struct TeamHadamardProduct { template struct TeamVectorHadamardProduct { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const XViewType &X, - const YViewType &Y, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const XViewType &X, const YViewType &Y, const VViewType &V); }; template struct HadamardProduct { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const XViewType &X, - const YViewType &Y, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const XViewType &X, const YViewType &Y, const VViewType &V) { int r_val = 0; if (std::is_same::value) { - r_val = SerialHadamardProduct::template invoke(X, Y, V); + r_val = SerialHadamardProduct::template invoke(X, Y, V); } else if (std::is_same::value) { - r_val = - TeamHadamardProduct::template invoke(member, X, - Y, V); + r_val = TeamHadamardProduct::template invoke(member, X, Y, V); } else if (std::is_same::value) { - r_val = TeamVectorHadamardProduct::template invoke< - XViewType, YViewType, VViewType>(member, X, Y, V); + r_val = TeamVectorHadamardProduct::template invoke(member, X, Y, V); } return r_val; } diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_HostLevel_Gemm.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_HostLevel_Gemm.hpp index 4725e0220d2b..0741b5b41e8a 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_HostLevel_Gemm.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_HostLevel_Gemm.hpp @@ -82,32 +82,23 @@ namespace KokkosBatched { /// BatchedGemm(handle, alpha, A, B, beta, C); // clang-format on -template -inline int BatchedGemm(BatchedGemmHandleType *const handle, - const ScalarType alpha, const AViewType &A, - const BViewType &B, const ScalarType beta, - const CViewType &C) { +template +inline int BatchedGemm(BatchedGemmHandleType *const handle, const ScalarType alpha, const AViewType &A, + const BViewType &B, const ScalarType beta, const CViewType &C) { // Minimize the number of ImplBatchedGemmWrapper instantiations, by // standardizing on particular View specializations for its template // parameters. - using UnifiedAVT = Kokkos::View< - typename AViewType::value_type ***, typename AViewType::array_layout, - typename AViewType::device_type, Kokkos::MemoryTraits>; - using UnifiedBVT = Kokkos::View< - typename BViewType::value_type ***, typename BViewType::array_layout, - typename BViewType::device_type, Kokkos::MemoryTraits>; - using UnifiedCVT = Kokkos::View>; + using UnifiedAVT = Kokkos::View>; + using UnifiedBVT = Kokkos::View>; + using UnifiedCVT = Kokkos::View>; // Go through specialization layer in case ETI'd symbols are available. - return Impl::BatchedGemmSpec::run(handle, alpha, A, B, - beta, C); + return Impl::BatchedGemmSpec::run(handle, alpha, A, B, beta, C); } } // namespace KokkosBatched #endif // __KOKKOSBATCHED_HOSTLEVEL_GEMM_DECL_HPP__ diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_HostLevel_Gemm_Handle.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_HostLevel_Gemm_Handle.hpp index 95e8f36bc2f9..2aa6f47cb0f5 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_HostLevel_Gemm_Handle.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_HostLevel_Gemm_Handle.hpp @@ -40,15 +40,11 @@ enum GEMM_KOKKOS_BATCHED_ALGOS : int { }; } -#define GEMM_ALGO_STRS \ - "GemmTplAlgos::CUBLAS", "GemmTplAlgos::MAGMA", \ - "GemmKokkosBatchedAlgos::KK_TEAM", \ - "GemmKokkosBatchedAlgos::KK_TEAMVECTOR", \ - "GemmKokkosBatchedAlgos::KK_SERIALSIMD", \ - "GemmKokkosBatchedAlgos::KK_TEAMSIMD", \ - "GemmKokkosBatchedAlgos::KK_SERIAL_RANK0", \ - "GemmKokkosBatchedAlgos::KK_SERIAL_SHMEM", \ - "GemmKokkosBatchedAlgos::KK_DBLBUF" +#define GEMM_ALGO_STRS \ + "GemmTplAlgos::CUBLAS", "GemmTplAlgos::MAGMA", "GemmKokkosBatchedAlgos::KK_TEAM", \ + "GemmKokkosBatchedAlgos::KK_TEAMVECTOR", "GemmKokkosBatchedAlgos::KK_SERIALSIMD", \ + "GemmKokkosBatchedAlgos::KK_TEAMSIMD", "GemmKokkosBatchedAlgos::KK_SERIAL_RANK0", \ + "GemmKokkosBatchedAlgos::KK_SERIAL_SHMEM", "GemmKokkosBatchedAlgos::KK_DBLBUF" // clang-format off /// \brief Handle for selecting runtime behavior of the BatchedGemm interface. /// @@ -96,8 +92,7 @@ enum GEMM_KOKKOS_BATCHED_ALGOS : int { // clang-format on class BatchedGemmHandle : public BatchedKernelHandle { public: - BatchedGemmHandle(int kernelAlgoType = BaseHeuristicAlgos::SQUARE, - int teamSize = 0, int vecLength = 0) + BatchedGemmHandle(int kernelAlgoType = BaseHeuristicAlgos::SQUARE, int teamSize = 0, int vecLength = 0) : BatchedKernelHandle(kernelAlgoType, teamSize, vecLength) { #if defined(KOKKOSKERNELS_ENABLE_TPL_CUBLAS) if (!_tplParamsSet && kernelAlgoType == GemmTplAlgos::CUBLAS) { @@ -116,26 +111,23 @@ class BatchedGemmHandle : public BatchedKernelHandle { #endif // MAGMA }; - BatchedGemmHandle(bool tplParamsSet, - int kernelAlgoType = BaseHeuristicAlgos::SQUARE, - int teamSize = 0, int vecLength = 0) + BatchedGemmHandle(bool tplParamsSet, int kernelAlgoType = BaseHeuristicAlgos::SQUARE, int teamSize = 0, + int vecLength = 0) : BatchedKernelHandle(kernelAlgoType, teamSize, vecLength) { _tplParamsSet = tplParamsSet; }; #if defined(KOKKOSKERNELS_ENABLE_TPL_CUBLAS) - BatchedGemmHandle(cublasHandle_t &cublas_handle, - int kernelAlgoType = BaseHeuristicAlgos::SQUARE, - int teamSize = 0, int vecLength = 0) + BatchedGemmHandle(cublasHandle_t &cublas_handle, int kernelAlgoType = BaseHeuristicAlgos::SQUARE, int teamSize = 0, + int vecLength = 0) : BatchedGemmHandle(true, kernelAlgoType, teamSize, vecLength) { _tplParamsSingleton.cublas_handle = &cublas_handle; }; #endif // CUBLAS #if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) - BatchedGemmHandle(magma_queue_t &magma_queue, - int kernelAlgoType = BaseHeuristicAlgos::SQUARE, - int teamSize = 0, int vecLength = 0) + BatchedGemmHandle(magma_queue_t &magma_queue, int kernelAlgoType = BaseHeuristicAlgos::SQUARE, int teamSize = 0, + int vecLength = 0) : BatchedGemmHandle(true, kernelAlgoType, teamSize, vecLength) { _tplParamsSingleton.magma_queue = &magma_queue; }; @@ -151,13 +143,10 @@ class BatchedGemmHandle : public BatchedKernelHandle { #endif } - std::string get_kernel_algo_type_str() const { - return gemm_algo_type_strs[_kernelAlgoType]; - } + std::string get_kernel_algo_type_str() const { return gemm_algo_type_strs[_kernelAlgoType]; } private: - const char *gemm_algo_type_strs[GemmKokkosBatchedAlgos::N] = {BASE_ALGO_STRS, - GEMM_ALGO_STRS}; + const char *gemm_algo_type_strs[GemmKokkosBatchedAlgos::N] = {BASE_ALGO_STRS, GEMM_ALGO_STRS}; }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Householder_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Householder_Decl.hpp index 6d749bd73afb..0a4845755192 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Householder_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Householder_Decl.hpp @@ -30,8 +30,7 @@ namespace KokkosBatched { template struct SerialHouseholder { template - KOKKOS_INLINE_FUNCTION static int invoke(const aViewType &a, - const tauViewType &tau); + KOKKOS_INLINE_FUNCTION static int invoke(const aViewType &a, const tauViewType &tau); }; /// @@ -42,9 +41,7 @@ struct SerialHouseholder { template struct TeamVectorHouseholder { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const aViewType &a, - const tauViewType &tau); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const aViewType &a, const tauViewType &tau); }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerGemmFixA_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerGemmFixA_Decl.hpp index 90f2cdb643f9..757a92ca211b 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerGemmFixA_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerGemmFixA_Decl.hpp @@ -25,25 +25,19 @@ struct InnerGemmFixA { const int _as0, _as1, _bs0, _bs1, _cs0, _cs1; KOKKOS_INLINE_FUNCTION - InnerGemmFixA(const int as0, const int as1, const int bs0, const int bs1, - const int cs0, const int cs1) + InnerGemmFixA(const int as0, const int as1, const int bs0, const int bs1, const int cs0, const int cs1) : _as0(as0), _as1(as1), _bs0(bs0), _bs1(bs1), _cs0(cs0), _cs1(cs1) {} // serial rank update template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, - const int n, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, /**/ ValueType *KOKKOS_RESTRICT C); // serial rank update for remainder template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, - const int m, const int n, - const int k, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, /**/ ValueType *KOKKOS_RESTRICT C); }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerGemmFixB_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerGemmFixB_Decl.hpp index 67d968a356d6..b2f885970fb8 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerGemmFixB_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerGemmFixB_Decl.hpp @@ -25,25 +25,19 @@ struct InnerGemmFixB { const int _as0, _as1, _bs0, _bs1, _cs0, _cs1; KOKKOS_INLINE_FUNCTION - InnerGemmFixA(const int as0, const int as1, const int bs0, const int bs1, - const int cs0, const int cs1) + InnerGemmFixA(const int as0, const int as1, const int bs0, const int bs1, const int cs0, const int cs1) : _as0(as0), _as1(as1), _bs0(bs0), _bs1(bs1), _cs0(cs0), _cs1(cs1) {} // serial rank update template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, - const int n, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int n, /**/ ValueType *KOKKOS_RESTRICT C); // serial rank update for remainder template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, - const int m, const int n, - const int k, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, /**/ ValueType *KOKKOS_RESTRICT C); }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerGemmFixC_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerGemmFixC_Decl.hpp index 64d00845eef3..c61d966f7701 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerGemmFixC_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerGemmFixC_Decl.hpp @@ -25,49 +25,37 @@ struct InnerGemmFixC { const int _as0, _as1, _bs0, _bs1, _cs0, _cs1; KOKKOS_INLINE_FUNCTION - InnerGemmFixC(const int as0, const int as1, const int bs0, const int bs1, - const int cs0, const int cs1) + InnerGemmFixC(const int as0, const int as1, const int bs0, const int bs1, const int cs0, const int cs1) : _as0(as0), _as1(as1), _bs0(bs0), _bs1(bs1), _cs0(cs0), _cs1(cs1) {} // serial rank update template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, - const int k, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int k, /**/ ValueType *KOKKOS_RESTRICT C); // serial rank update for remainder template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, - const int m, const int k, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, const int k, /**/ ValueType *KOKKOS_RESTRICT C); // serial rank update for remainder template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, - const int m, const int n, - const int k, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, + const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, /**/ ValueType *KOKKOS_RESTRICT C); template - KOKKOS_INLINE_FUNCTION int team_invoke(const MemberType &member, - const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, + KOKKOS_INLINE_FUNCTION int team_invoke(const MemberType &member, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const ValueType *KOKKOS_RESTRICT B, const int k, /**/ ValueType *KOKKOS_RESTRICT C); // team rank update for remainder template - KOKKOS_INLINE_FUNCTION int team_invoke(const MemberType &member, - const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, - const ValueType *KOKKOS_RESTRICT B, + KOKKOS_INLINE_FUNCTION int team_invoke(const MemberType &member, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT A, const ValueType *KOKKOS_RESTRICT B, const int m, const int n, const int k, /**/ ValueType *KOKKOS_RESTRICT C); }; diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerLU_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerLU_Decl.hpp index d0d50a146c54..c355185b742e 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerLU_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerLU_Decl.hpp @@ -33,13 +33,11 @@ struct InnerLU { // for remainder square template - KOKKOS_INLINE_FUNCTION int serial_invoke(const int m, - ValueType *KOKKOS_RESTRICT A); + KOKKOS_INLINE_FUNCTION int serial_invoke(const int m, ValueType *KOKKOS_RESTRICT A); // for remainder template - KOKKOS_INLINE_FUNCTION int serial_invoke(const int m, const int n, - ValueType *KOKKOS_RESTRICT A); + KOKKOS_INLINE_FUNCTION int serial_invoke(const int m, const int n, ValueType *KOKKOS_RESTRICT A); }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerTrsm_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerTrsm_Decl.hpp index 22395c92017a..5b5b9bb147ea 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerTrsm_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InnerTrsm_Decl.hpp @@ -27,20 +27,17 @@ struct InnerTrsmLeftLowerUnitDiag { const int _as0, _as1, _bs0, _bs1; KOKKOS_INLINE_FUNCTION - InnerTrsmLeftLowerUnitDiag(const int as0, const int as1, const int bs0, - const int bs1) + InnerTrsmLeftLowerUnitDiag(const int as0, const int as1, const int bs0, const int bs1) : _as0(as0), _as1(as1), _bs0(bs0), _bs1(bs1) {} // trisolve template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, - const int n, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int n, /**/ ValueType *KOKKOS_RESTRICT B); // for remainder template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, - const int m, const int n, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int m, const int n, /**/ ValueType *KOKKOS_RESTRICT B); }; @@ -51,20 +48,17 @@ struct InnerTrsmLeftLowerNonUnitDiag { const int _as0, _as1, _bs0, _bs1; KOKKOS_INLINE_FUNCTION - InnerTrsmLeftLowerNonUnitDiag(const int as0, const int as1, const int bs0, - const int bs1) + InnerTrsmLeftLowerNonUnitDiag(const int as0, const int as1, const int bs0, const int bs1) : _as0(as0), _as1(as1), _bs0(bs0), _bs1(bs1) {} // trisolve template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, - const int n, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int n, /**/ ValueType *KOKKOS_RESTRICT B); // for remainder template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, - const int m, const int n, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int m, const int n, /**/ ValueType *KOKKOS_RESTRICT B); }; @@ -75,20 +69,17 @@ struct InnerTrsmLeftUpperUnitDiag { const int _as0, _as1, _bs0, _bs1; KOKKOS_INLINE_FUNCTION - InnerTrsmLeftUpperUnitDiag(const int as0, const int as1, const int bs0, - const int bs1) + InnerTrsmLeftUpperUnitDiag(const int as0, const int as1, const int bs0, const int bs1) : _as0(as0), _as1(as1), _bs0(bs0), _bs1(bs1) {} // trisolve template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, - const int n, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int n, /**/ ValueType *KOKKOS_RESTRICT B); // for remainder template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, - const int m, const int n, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int m, const int n, /**/ ValueType *KOKKOS_RESTRICT B); }; @@ -99,20 +90,17 @@ struct InnerTrsmLeftUpperNonUnitDiag { const int _as0, _as1, _bs0, _bs1; KOKKOS_INLINE_FUNCTION - InnerTrsmLeftUpperNonUnitDiag(const int as0, const int as1, const int bs0, - const int bs1) + InnerTrsmLeftUpperNonUnitDiag(const int as0, const int as1, const int bs0, const int bs1) : _as0(as0), _as1(as1), _bs0(bs0), _bs1(bs1) {} // trisolve template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, - const int n, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int n, /**/ ValueType *KOKKOS_RESTRICT B); // for remainder template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, - const int m, const int n, + KOKKOS_INLINE_FUNCTION int serial_invoke(const ValueType *KOKKOS_RESTRICT A, const int m, const int n, /**/ ValueType *KOKKOS_RESTRICT B); }; diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InverseLU_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InverseLU_Decl.hpp index e28a0151ed4f..930bc790b033 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InverseLU_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_InverseLU_Decl.hpp @@ -30,12 +30,10 @@ namespace KokkosBatched { template struct SerialInverseLU { template - KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, - const wViewType &w) { + KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const wViewType &w) { typedef typename wViewType::value_type value_type; // workspace w is always 1D view; reinterpret it - Kokkos::View W( - w.data(), A.extent(0), A.extent(1)); + Kokkos::View W(w.data(), A.extent(0), A.extent(1)); int r_val[3] = {}; r_val[0] = SerialCopy::invoke(A, W); @@ -48,19 +46,15 @@ struct SerialInverseLU { template struct TeamInverseLU { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const wViewType &w) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const wViewType &w) { typedef typename wViewType::value_type value_type; // workspace w is always 1D view; reinterpret it - Kokkos::View W( - w.data(), A.extent(0), A.extent(1)); + Kokkos::View W(w.data(), A.extent(0), A.extent(1)); int r_val[3] = {}; - r_val[0] = TeamCopy::invoke(member, A, W); - r_val[1] = TeamSetIdentity::invoke(member, A); - r_val[2] = TeamSolveLU::invoke( - member, W, A); + r_val[0] = TeamCopy::invoke(member, A, W); + r_val[1] = TeamSetIdentity::invoke(member, A); + r_val[2] = TeamSolveLU::invoke(member, W, A); return r_val[0] + r_val[1] + r_val[2]; } }; diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Kernel_Handle.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Kernel_Handle.hpp index 051f78979ded..bd73b4e267e8 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Kernel_Handle.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Kernel_Handle.hpp @@ -56,10 +56,9 @@ enum BASE_KOKKOS_BATCHED_ALGOS : int { KK_SERIAL = BaseTplAlgos::N, N }; } #define N_BASE_ALGOS BaseKokkosBatchedAlgos::N -#define BASE_ALGO_STRS \ - "BaseHeuristicAlgos::SQUARE", "BaseHeuristicAlgos::TALL", \ - "BaseHeuristicAlgos::WIDE", "BaseTplAlgos::ARMPL", "BaseTplAlgosMKL", \ - "BaseKokkosBatchedAlgos::KK_SERIAL" +#define BASE_ALGO_STRS \ + "BaseHeuristicAlgos::SQUARE", "BaseHeuristicAlgos::TALL", "BaseHeuristicAlgos::WIDE", "BaseTplAlgos::ARMPL", \ + "BaseTplAlgosMKL", "BaseKokkosBatchedAlgos::KK_SERIAL" /// \brief TplParams abstracts underlying handle or execution queue type. struct TplParams { @@ -145,8 +144,7 @@ class BatchedKernelHandle { int vecLen = 0; bool enableDebug = false; - BatchedKernelHandle(int kernelAlgoType = BaseHeuristicAlgos::SQUARE, - int teamSize = 0, int vecLength = 0) + BatchedKernelHandle(int kernelAlgoType = BaseHeuristicAlgos::SQUARE, int teamSize = 0, int vecLength = 0) : teamSz(teamSize), vecLen(vecLength), _kernelAlgoType(kernelAlgoType) { #if !defined(KOKKOSKERNELS_ENABLE_TPL_ARMPL) || ARMPL_BUILD < 1058 if (_kernelAlgoType == BaseTplAlgos::ARMPL) { @@ -161,9 +159,7 @@ class BatchedKernelHandle { int get_kernel_algo_type() const { return _kernelAlgoType; } - std::string get_kernel_algo_type_str() const { - return algo_type_strs[_kernelAlgoType]; - } + std::string get_kernel_algo_type_str() const { return algo_type_strs[_kernelAlgoType]; } decltype(auto) get_tpl_params() const { #if _kernelAlgoType == ARMPL && defined(KOKKOSKERNELS_ENABLE_TPL_ARMPL) diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_LU_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_LU_Decl.hpp index fcba6e20f8aa..363193c14784 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_LU_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_LU_Decl.hpp @@ -28,9 +28,7 @@ struct SerialLU { // no piv version template KOKKOS_INLINE_FUNCTION static int invoke( - const AViewType &A, - const typename MagnitudeScalarType< - typename AViewType::non_const_value_type>::type tiny = 0); + const AViewType &A, const typename MagnitudeScalarType::type tiny = 0); }; template @@ -39,8 +37,7 @@ struct TeamLU { template KOKKOS_INLINE_FUNCTION static int invoke( const MemberType &member, const AViewType &A, - const typename MagnitudeScalarType< - typename AViewType::non_const_value_type>::type tiny = 0); + const typename MagnitudeScalarType::type tiny = 0); }; /// @@ -52,8 +49,7 @@ struct LU { template KOKKOS_FORCEINLINE_FUNCTION static int invoke( const MemberType &member, const AViewType &A, - const typename MagnitudeScalarType< - typename AViewType::non_const_value_type>::type tiny = 0) { + const typename MagnitudeScalarType::type tiny = 0) { int r_val = 0; if (std::is_same::value) { r_val = SerialLU::invoke(A, tiny); diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Pttrf.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Pttrf.hpp new file mode 100644 index 000000000000..787e5aeee343 --- /dev/null +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Pttrf.hpp @@ -0,0 +1,51 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSBATCHED_PTTRF_HPP_ +#define KOKKOSBATCHED_PTTRF_HPP_ + +#include + +/// \author Yuuichi Asahi (yuuichi.asahi@cea.fr) + +namespace KokkosBatched { + +/// \brief Serial Batched Pttrf: +/// Compute the Cholesky factorization L*D*L**T (or L*D*L**H) of a real +/// symmetric (or complex Hermitian) positive definite tridiagonal matrix A_l +/// for all l = 0, ..., N +/// +/// \tparam DViewType: Input type for the a diagonal matrix, needs to be a 1D +/// view +/// \tparam EViewType: Input type for the a upper/lower diagonal matrix, +/// needs to be a 1D view +/// +/// \param d [inout]: n diagonal elements of the diagonal matrix D +/// \param e [inout]: n-1 upper/lower diagonal elements of the diagonal matrix E +/// +/// No nested parallel_for is used inside of the function. +/// + +template +struct SerialPttrf { + template + KOKKOS_INLINE_FUNCTION static int invoke(const DViewType &d, const EViewType &e); +}; + +} // namespace KokkosBatched + +#include "KokkosBatched_Pttrf_Serial_Impl.hpp" + +#endif // KOKKOSBATCHED_PTTRF_HPP_ diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_QR_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_QR_Decl.hpp index 993e9345fb19..78bdcd4d4b12 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_QR_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_QR_Decl.hpp @@ -29,9 +29,7 @@ namespace KokkosBatched { template struct SerialQR { template - KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, - const tViewType &t, - const wViewType &w); + KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const tViewType &t, const wViewType &w); }; /// @@ -41,10 +39,8 @@ struct SerialQR { template struct TeamQR { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, - const AViewType & /*A*/, - const tViewType & /*t*/, - const wViewType & /*w*/) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const AViewType & /*A*/, + const tViewType & /*t*/, const wViewType & /*w*/) { /// not implemented return -1; } @@ -57,9 +53,7 @@ struct TeamQR { template struct TeamVectorQR { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const tViewType &t, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const tViewType &t, const wViewType &w); }; @@ -69,9 +63,7 @@ struct TeamVectorQR { template struct QR { template - KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const tViewType &t, + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const tViewType &t, const wViewType &w) { int r_val = 0; if (std::is_same::value) { diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_QR_WithColumnPivoting_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_QR_WithColumnPivoting_Decl.hpp index 134a97ed7370..b08e5277a0a5 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_QR_WithColumnPivoting_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_QR_WithColumnPivoting_Decl.hpp @@ -28,13 +28,9 @@ namespace KokkosBatched { template struct TeamVectorQR_WithColumnPivoting { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const tViewType &t, - const pViewType &p, - const wViewType &w, + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const tViewType &t, + const pViewType &p, const wViewType &w, /* */ int &matrix_rank); }; diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SVD_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SVD_Decl.hpp index c5dc5805d937..efade8029bb7 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SVD_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SVD_Decl.hpp @@ -56,19 +56,16 @@ struct SVD_S_Tag {}; struct SerialSVD { // Version to compute full factorization: A == U * diag(s) * Vt - template - KOKKOS_INLINE_FUNCTION static int invoke(SVD_USV_Tag, const AViewType &A, - const UViewType &U, - const SViewType &s, - const VtViewType &Vt, - const WViewType &W); + template + KOKKOS_INLINE_FUNCTION static int invoke( + SVD_USV_Tag, const AViewType &A, const UViewType &U, const SViewType &s, const VtViewType &Vt, const WViewType &W, + typename AViewType::const_value_type tol = Kokkos::ArithTraits::zero()); // Version which computes only singular values template - KOKKOS_INLINE_FUNCTION static int invoke(SVD_S_Tag, const AViewType &A, - const SViewType &s, - const WViewType &W); + KOKKOS_INLINE_FUNCTION static int invoke( + SVD_S_Tag, const AViewType &A, const SViewType &s, const WViewType &W, + typename AViewType::const_value_type tol = Kokkos::ArithTraits::zero()); }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Scale_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Scale_Decl.hpp index dbb9a43ffbeb..94453a5ede49 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Scale_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Scale_Decl.hpp @@ -26,49 +26,45 @@ namespace KokkosBatched { /// Serial Scale /// -struct [[deprecated]] SerialScale{ - template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A){Kokkos::abort( +struct [[deprecated]] SerialScale { + template + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A) { + Kokkos::abort( "KokkosBatched::SerialScale is deprecated: use KokkosBlas::SerialScale " "instead"); -return 0; -} // namespace KokkosBatched -} -; + return 0; + } // namespace KokkosBatched +}; /// /// Team Scale /// template -struct [[deprecated]] TeamScale{ - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A){Kokkos::abort( +struct [[deprecated]] TeamScale { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A) { + Kokkos::abort( "KokkosBatched::TeamScale is deprecated: use KokkosBlas::TeamScale " "instead"); -return 0; -} -} -; + return 0; + } +}; /// /// TeamVector Scale /// template -struct [[deprecated]] TeamVectorScale{ - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A){ - Kokkos::abort("KokkosBatched::TeamVectorScale is deprecated: use " - "KokkosBlas::TeamVectorScale instead"); -return 0; -} -} -; +struct [[deprecated]] TeamVectorScale { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A) { + Kokkos::abort( + "KokkosBatched::TeamVectorScale is deprecated: use " + "KokkosBlas::TeamVectorScale instead"); + return 0; + } +}; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SetIdentity_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SetIdentity_Decl.hpp index b78d3e7b05ee..27c2b22ed7fc 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SetIdentity_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SetIdentity_Decl.hpp @@ -39,8 +39,7 @@ struct SerialSetIdentity { template struct TeamSetIdentity { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A); }; /// @@ -49,8 +48,7 @@ struct TeamSetIdentity { template struct SetIdentity { template - KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A) { + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A) { int r_val = 0; if (std::is_same::value) { r_val = SerialSetIdentity::invoke(A); diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Set_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Set_Decl.hpp index ebddb72a4a1a..d33d186275c0 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Set_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Set_Decl.hpp @@ -25,49 +25,45 @@ namespace KokkosBatched { /// Serial Set /// -struct [[deprecated]] SerialSet{ - template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A){Kokkos::abort( +struct [[deprecated]] SerialSet { + template + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A) { + Kokkos::abort( "KokkosBatched::SerialSet is deprecated: use KokkosBlas::SerialSet " "instead"); -return 0; -} // namespace KokkosBatched -} -; + return 0; + } // namespace KokkosBatched +}; /// /// Team Set /// template -struct [[deprecated]] TeamSet{ - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A){Kokkos::abort( +struct [[deprecated]] TeamSet { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A) { + Kokkos::abort( "KokkosBatched::TeamSet is deprecated: use KokkosBlas::TeamSet " "instead"); -return 0; -} -} -; + return 0; + } +}; /// /// TeamVector Set /// template -struct [[deprecated]] TeamVectorSet{ - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A){ - Kokkos::abort("KokkosBatched::TeamVectorSet is deprecated: use " - "KokkosBlas::TeamVectorSet instead"); -return 0; -} -} -; +struct [[deprecated]] TeamVectorSet { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A) { + Kokkos::abort( + "KokkosBatched::TeamVectorSet is deprecated: use " + "KokkosBlas::TeamVectorSet instead"); + return 0; + } +}; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SolveLU_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SolveLU_Decl.hpp index 8e731e266642..119f5c691648 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SolveLU_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SolveLU_Decl.hpp @@ -30,25 +30,19 @@ template struct SerialSolveLU { // no piv version template - KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const BViewType &B) { int r_val[2] = {}; const typename AViewType::non_const_value_type one(1.0); if (std::is_same::value) { // First, compute Y (= U*X) by solving the system L*Y = B for Y - r_val[0] = SerialTrsm::invoke(one, A, B); + r_val[0] = SerialTrsm::invoke(one, A, B); // Second, compute X by solving the system U*X = Y for X - r_val[1] = SerialTrsm::invoke(one, A, B); - } else if (std::is_same::value || - std::is_same::value) { + r_val[1] = SerialTrsm::invoke(one, A, B); + } else if (std::is_same::value || std::is_same::value) { // First, compute Y (= L'*X) by solving the system U'*Y = B for Y - r_val[0] = SerialTrsm::invoke(one, A, B); + r_val[0] = SerialTrsm::invoke(one, A, B); // Second, compute X by solving the system L'*X = Y for X - r_val[1] = SerialTrsm::invoke(one, A, B); + r_val[1] = SerialTrsm::invoke(one, A, B); } return r_val[0] + r_val[1]; } @@ -58,26 +52,23 @@ template struct TeamSolveLU { // no piv version template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const BViewType &B) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B) { int r_val[2] = {}; const typename AViewType::non_const_value_type one(1.0); if (std::is_same::value) { // First, compute Y (= U*X) by solving the system L*Y = B for Y - r_val[0] = TeamTrsm::invoke(member, one, A, B); + r_val[0] = + TeamTrsm::invoke(member, one, A, B); // Second, compute X by solving the system U*X = Y for X - r_val[1] = TeamTrsm::invoke(member, one, A, B); - } else if (std::is_same::value || - std::is_same::value) { + r_val[1] = + TeamTrsm::invoke(member, one, A, B); + } else if (std::is_same::value || std::is_same::value) { // First, compute Y (= L'*X) by solving the system U'*Y = B for Y - r_val[0] = TeamTrsm::invoke(member, one, A, B); + r_val[0] = + TeamTrsm::invoke(member, one, A, B); // Second, compute X by solving the system L'*X = Y for X - r_val[1] = TeamTrsm::invoke(member, one, A, B); + r_val[1] = + TeamTrsm::invoke(member, one, A, B); } return r_val[0] + r_val[1]; } @@ -86,14 +77,11 @@ struct TeamSolveLU { /// /// Selective Interface /// -template +template struct SolveLU { // no piv version template - KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, - const AViewType &A, - const BViewType &B) { + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B) { int r_val = 0; if (std::is_same::value) { r_val = SerialSolveLU::invoke(A, B); diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SolveUTV_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SolveUTV_Decl.hpp index e55836de6cdd..c881a0b0f7d2 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SolveUTV_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_SolveUTV_Decl.hpp @@ -46,13 +46,11 @@ namespace KokkosBatched { template struct TeamVectorSolveUTV { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int matrix_rank, const UViewType &U, - const TViewType &T, const VViewType &V, const pViewType &p, - const XViewType &X, const BViewType &B, const wViewType &w); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int matrix_rank, const UViewType &U, + const TViewType &T, const VViewType &V, const pViewType &p, + const XViewType &X, const BViewType &B, const wViewType &w); }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Tbsv.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Tbsv.hpp new file mode 100644 index 000000000000..f7d700be44ca --- /dev/null +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Tbsv.hpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSBATCHED_TBSV_HPP_ +#define KOKKOSBATCHED_TBSV_HPP_ + +#include + +/// \author Yuuichi Asahi (yuuichi.asahi@cea.fr) + +namespace KokkosBatched { + +/// \brief Serial Batched Tbsv: +/// +/// Solve Ab_l x_l = b_l for all l = 0, ..., N +/// using the triangular solve algorithm Tbsv. Ab is an n by n unit, or +/// non-unit, upper or lower triangular band matrix, with ( k + 1 ) +/// diagonals. +/// +/// \tparam AViewType: Input type for the matrix, needs to be a 2D view +/// \tparam XViewType: Input type for the right-hand side and the solution, +/// needs to be a 1D view +/// +/// \param A [in]: A is a lda by n banded matrix, with ( k + 1 ) diagonals +/// \param X [inout]: right-hand side and the solution, a rank 1 view +/// \param k [in]: k specifies the number of superdiagonals or subdiagonals of +/// matrix A. k >= 0 +/// +/// No nested parallel_for is used inside of the function. +/// + +template +struct SerialTbsv { + template + KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const XViewType &X, const int k); +}; + +} // namespace KokkosBatched + +#include "KokkosBatched_Tbsv_Serial_Impl.hpp" + +#endif // KOKKOSBATCHED_TBSV_HPP_ diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Trmm_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Trmm_Decl.hpp index 81d1f8d07394..c284ed63b259 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Trmm_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Trmm_Decl.hpp @@ -22,13 +22,10 @@ namespace KokkosBatched { -template +template struct SerialTrmm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B); }; } // namespace KokkosBatched #endif // __KOKKOSBATCHED_TRMM_DECL_HPP__ diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Trsm_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Trsm_Decl.hpp index e0aee4659f9b..d2220953cce4 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Trsm_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Trsm_Decl.hpp @@ -23,54 +23,42 @@ namespace KokkosBatched { -template +template struct SerialTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A, - const BViewType &B); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A, const BViewType &B); }; -template +template struct TeamTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B); }; -template +template struct TeamVectorTrsm { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B); }; /// /// Selective Interface /// -template +template struct Trsm { template - KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const BViewType &B) { int r_val = 0; if (std::is_same::value) { - r_val = SerialTrsm::invoke( - alpha, A, B); + r_val = SerialTrsm::invoke(alpha, A, B); } else if (std::is_same::value) { - r_val = TeamTrsm::invoke(member, alpha, A, B); + r_val = TeamTrsm::invoke(member, alpha, A, B); } return r_val; } diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Trsv_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Trsv_Decl.hpp index ed9f5cca268c..e3da43a95d32 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Trsv_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Trsv_Decl.hpp @@ -27,12 +27,10 @@ namespace KokkosBatched { /// Serial Trsv /// -template +template struct SerialTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType /*alpha*/, - const AViewType & /*A*/, + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType /*alpha*/, const AViewType & /*A*/, const bViewType & /*b*/) { assert(false && "Error: encounter dummy impl"); return 0; @@ -43,14 +41,11 @@ struct SerialTrsv { /// Team Trsv /// -template +template struct TeamTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, - const ScalarType /*alpha*/, - const AViewType & /*A*/, - const bViewType & /*b*/) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const ScalarType /*alpha*/, + const AViewType & /*A*/, const bViewType & /*b*/) { assert(false && "Error: encounter dummy impl"); return 0; } @@ -60,14 +55,11 @@ struct TeamTrsv { /// TeamVector Trsv /// -template +template struct TeamVectorTrsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, - const ScalarType /*alpha*/, - const AViewType & /*A*/, - const bViewType & /*b*/) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, const ScalarType /*alpha*/, + const AViewType & /*A*/, const bViewType & /*b*/) { assert(false && "Error: encounter dummy impl"); return 0; } @@ -76,24 +68,19 @@ struct TeamVectorTrsv { /// /// Selective Interface /// -template +template struct Trsv { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A, const bViewType &b) { int r_val = 0; if (std::is_same::value) { - r_val = - SerialTrsv::invoke(alpha, A, b); + r_val = SerialTrsv::invoke(alpha, A, b); } else if (std::is_same::value) { - r_val = TeamTrsv::invoke( - member, alpha, A, b); + r_val = TeamTrsv::invoke(member, alpha, A, b); } else if (std::is_same::value) { - r_val = TeamVectorTrsv::invoke(member, alpha, A, b); + r_val = TeamVectorTrsv::invoke(member, alpha, A, b); } return r_val; } @@ -105,116 +92,98 @@ struct Trsv { #include "KokkosBatched_Trsv_Team_Impl.hpp" #include "KokkosBatched_Trsv_TeamVector_Impl.hpp" -#define KOKKOSBATCHED_SERIAL_TRSV_LOWER_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - KokkosBatched::SerialTrsvInternalLower::invoke( \ - DIAG::use_unit_diag, M, ALPHA, A, AS0, AS1, B, BS) - -#define KOKKOSBATCHED_SERIAL_TRSV_LOWER_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - KokkosBatched::SerialTrsvInternalUpper::invoke( \ - DIAG::use_unit_diag, N, ALPHA, A, AS1, AS0, B, BS) - -#define KOKKOSBATCHED_SERIAL_TRSV_UPPER_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - KokkosBatched::SerialTrsvInternalUpper::invoke( \ - DIAG::use_unit_diag, M, ALPHA, A, AS0, AS1, B, BS) - -#define KOKKOSBATCHED_SERIAL_TRSV_UPPER_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - KokkosBatched::SerialTrsvInternalLower::invoke( \ - DIAG::use_unit_diag, N, ALPHA, A, AS1, AS0, B, BS) - -#define KOKKOSBATCHED_TEAM_TRSV_LOWER_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - KokkosBatched::TeamTrsvInternalLower::invoke( \ - MEMBER, DIAG::use_unit_diag, M, ALPHA, A, AS0, AS1, B, BS) - -#define KOKKOSBATCHED_TEAM_TRSV_LOWER_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - KokkosBatched::TeamTrsvInternalUpper::invoke( \ - MEMBER, DIAG::use_unit_diag, N, ALPHA, A, AS1, AS0, B, BS) - -#define KOKKOSBATCHED_TEAM_TRSV_UPPER_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - KokkosBatched::TeamTrsvInternalUpper::invoke( \ - MEMBER, DIAG::use_unit_diag, M, ALPHA, A, AS0, AS1, B, BS) - -#define KOKKOSBATCHED_TEAM_TRSV_UPPER_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - KokkosBatched::TeamTrsvInternalLower::invoke( \ - MEMBER, DIAG::use_unit_diag, N, ALPHA, A, AS1, AS0, B, BS) - -#define KOKKOSBATCHED_TEAMVECTOR_TRSV_LOWER_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - KokkosBatched::TeamVectorTrsvInternalLower::invoke( \ - MEMBER, DIAG::use_unit_diag, M, ALPHA, A, AS0, AS1, B, BS) - -#define KOKKOSBATCHED_TEAMVECTOR_TRSV_LOWER_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - KokkosBatched::TeamVectorTrsvInternalUpper::invoke( \ - MEMBER, DIAG::use_unit_diag, N, ALPHA, A, AS1, AS0, B, BS) - -#define KOKKOSBATCHED_TEAMVECTOR_TRSV_UPPER_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - KokkosBatched::TeamVectorTrsvInternalUpper::invoke( \ - MEMBER, DIAG::use_unit_diag, M, ALPHA, A, AS0, AS1, B, BS) - -#define KOKKOSBATCHED_TEAMVECTOR_TRSV_UPPER_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - KokkosBatched::TeamVectorTrsvInternalLower::invoke( \ - MEMBER, DIAG::use_unit_diag, N, ALPHA, A, AS1, AS0, B, BS) - -#define KOKKOSBATCHED_TRSV_LOWER_NO_TRANSPOSE_INTERNAL_INVOKE( \ - MODETYPE, ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - if (std::is_same::value) { \ - KOKKOSBATCHED_SERIAL_TRSV_LOWER_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ - } else if (std::is_same::value) { \ - KOKKOSBATCHED_TEAM_TRSV_LOWER_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ - } else if (std::is_same::value) { \ - KOKKOSBATCHED_TEAMVECTOR_TRSV_LOWER_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ +#define KOKKOSBATCHED_SERIAL_TRSV_LOWER_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ + KokkosBatched::SerialTrsvInternalLower::invoke(DIAG::use_unit_diag, M, ALPHA, A, AS0, AS1, B, BS) + +#define KOKKOSBATCHED_SERIAL_TRSV_LOWER_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ + KokkosBatched::SerialTrsvInternalUpper::invoke(DIAG::use_unit_diag, N, ALPHA, A, AS1, AS0, B, BS) + +#define KOKKOSBATCHED_SERIAL_TRSV_UPPER_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ + KokkosBatched::SerialTrsvInternalUpper::invoke(DIAG::use_unit_diag, M, ALPHA, A, AS0, AS1, B, BS) + +#define KOKKOSBATCHED_SERIAL_TRSV_UPPER_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ + KokkosBatched::SerialTrsvInternalLower::invoke(DIAG::use_unit_diag, N, ALPHA, A, AS1, AS0, B, BS) + +#define KOKKOSBATCHED_TEAM_TRSV_LOWER_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, \ + B, BS) \ + KokkosBatched::TeamTrsvInternalLower::invoke(MEMBER, DIAG::use_unit_diag, M, ALPHA, A, AS0, AS1, B, BS) + +#define KOKKOSBATCHED_TEAM_TRSV_LOWER_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, \ + BS) \ + KokkosBatched::TeamTrsvInternalUpper::invoke(MEMBER, DIAG::use_unit_diag, N, ALPHA, A, AS1, AS0, B, BS) + +#define KOKKOSBATCHED_TEAM_TRSV_UPPER_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, \ + B, BS) \ + KokkosBatched::TeamTrsvInternalUpper::invoke(MEMBER, DIAG::use_unit_diag, M, ALPHA, A, AS0, AS1, B, BS) + +#define KOKKOSBATCHED_TEAM_TRSV_UPPER_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, \ + BS) \ + KokkosBatched::TeamTrsvInternalLower::invoke(MEMBER, DIAG::use_unit_diag, N, ALPHA, A, AS1, AS0, B, BS) + +#define KOKKOSBATCHED_TEAMVECTOR_TRSV_LOWER_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, \ + AS1, B, BS) \ + KokkosBatched::TeamVectorTrsvInternalLower::invoke(MEMBER, DIAG::use_unit_diag, M, ALPHA, A, AS0, AS1, B, \ + BS) + +#define KOKKOSBATCHED_TEAMVECTOR_TRSV_LOWER_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, \ + AS1, B, BS) \ + KokkosBatched::TeamVectorTrsvInternalUpper::invoke(MEMBER, DIAG::use_unit_diag, N, ALPHA, A, AS1, AS0, B, \ + BS) + +#define KOKKOSBATCHED_TEAMVECTOR_TRSV_UPPER_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, \ + AS1, B, BS) \ + KokkosBatched::TeamVectorTrsvInternalUpper::invoke(MEMBER, DIAG::use_unit_diag, M, ALPHA, A, AS0, AS1, B, \ + BS) + +#define KOKKOSBATCHED_TEAMVECTOR_TRSV_UPPER_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, \ + AS1, B, BS) \ + KokkosBatched::TeamVectorTrsvInternalLower::invoke(MEMBER, DIAG::use_unit_diag, N, ALPHA, A, AS1, AS0, B, \ + BS) + +#define KOKKOSBATCHED_TRSV_LOWER_NO_TRANSPOSE_INTERNAL_INVOKE(MODETYPE, ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, \ + AS1, B, BS) \ + if (std::is_same::value) { \ + KOKKOSBATCHED_SERIAL_TRSV_LOWER_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ + } else if (std::is_same::value) { \ + KOKKOSBATCHED_TEAM_TRSV_LOWER_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, \ + BS); \ + } else if (std::is_same::value) { \ + KOKKOSBATCHED_TEAMVECTOR_TRSV_LOWER_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, \ + B, BS); \ } -#define KOKKOSBATCHED_TRSV_LOWER_TRANSPOSE_INTERNAL_INVOKE( \ - MODETYPE, ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - if (std::is_same::value) { \ - KOKKOSBATCHED_SERIAL_TRSV_LOWER_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ - } else if (std::is_same::value) { \ - KOKKOSBATCHED_TEAM_TRSV_LOWER_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ - } else if (std::is_same::value) { \ - KOKKOSBATCHED_TEAMVECTOR_TRSV_LOWER_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ +#define KOKKOSBATCHED_TRSV_LOWER_TRANSPOSE_INTERNAL_INVOKE(MODETYPE, ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, \ + B, BS) \ + if (std::is_same::value) { \ + KOKKOSBATCHED_SERIAL_TRSV_LOWER_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ + } else if (std::is_same::value) { \ + KOKKOSBATCHED_TEAM_TRSV_LOWER_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ + } else if (std::is_same::value) { \ + KOKKOSBATCHED_TEAMVECTOR_TRSV_LOWER_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, \ + BS); \ } -#define KOKKOSBATCHED_TRSV_UPPER_NO_TRANSPOSE_INTERNAL_INVOKE( \ - MODETYPE, ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - if (std::is_same::value) { \ - KOKKOSBATCHED_SERIAL_TRSV_UPPER_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ - } else if (std::is_same::value) { \ - KOKKOSBATCHED_TEAM_TRSV_UPPER_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ - } else if (std::is_same::value) { \ - KOKKOSBATCHED_TEAMVECTOR_TRSV_UPPER_NO_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ +#define KOKKOSBATCHED_TRSV_UPPER_NO_TRANSPOSE_INTERNAL_INVOKE(MODETYPE, ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, \ + AS1, B, BS) \ + if (std::is_same::value) { \ + KOKKOSBATCHED_SERIAL_TRSV_UPPER_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ + } else if (std::is_same::value) { \ + KOKKOSBATCHED_TEAM_TRSV_UPPER_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, \ + BS); \ + } else if (std::is_same::value) { \ + KOKKOSBATCHED_TEAMVECTOR_TRSV_UPPER_NO_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, \ + B, BS); \ } -#define KOKKOSBATCHED_TRSV_UPPER_TRANSPOSE_INTERNAL_INVOKE( \ - MODETYPE, ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS) \ - if (std::is_same::value) { \ - KOKKOSBATCHED_SERIAL_TRSV_UPPER_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ - } else if (std::is_same::value) { \ - KOKKOSBATCHED_TEAM_TRSV_UPPER_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ - } else if (std::is_same::value) { \ - KOKKOSBATCHED_TEAMVECTOR_TRSV_UPPER_TRANSPOSE_INTERNAL_INVOKE( \ - ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ +#define KOKKOSBATCHED_TRSV_UPPER_TRANSPOSE_INTERNAL_INVOKE(MODETYPE, ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, \ + B, BS) \ + if (std::is_same::value) { \ + KOKKOSBATCHED_SERIAL_TRSV_UPPER_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ + } else if (std::is_same::value) { \ + KOKKOSBATCHED_TEAM_TRSV_UPPER_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, BS); \ + } else if (std::is_same::value) { \ + KOKKOSBATCHED_TEAMVECTOR_TRSV_UPPER_TRANSPOSE_INTERNAL_INVOKE(ALGOTYPE, MEMBER, DIAG, M, N, ALPHA, A, AS0, AS1, B, \ + BS); \ } #endif diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_UTV_Decl.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_UTV_Decl.hpp index 792236a14fef..bae2780e1089 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_UTV_Decl.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_UTV_Decl.hpp @@ -57,12 +57,10 @@ namespace KokkosBatched { template struct TeamVectorUTV { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const AViewType &A, const pViewType &p, - const UViewType &U, const VViewType &V, const wViewType &w, - int &matrix_rank); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const pViewType &p, + const UViewType &U, const VViewType &V, const wViewType &w, + int &matrix_rank); }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Vector.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Vector.hpp index 71d159cb0369..e44af7bc04fc 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Vector.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Vector.hpp @@ -143,9 +143,7 @@ struct DefaultInternalVectorLength { }; template struct DefaultInternalVectorLength { - enum : int { - value = DefaultVectorLength::value - }; + enum : int { value = DefaultVectorLength::value }; }; #if defined(KOKKOS_ENABLE_CUDA) @@ -174,13 +172,11 @@ struct DefaultInternalVectorLength { enum : int { value = 2 }; }; template <> -struct DefaultInternalVectorLength, - Kokkos::CudaUVMSpace> { +struct DefaultInternalVectorLength, Kokkos::CudaUVMSpace> { enum : int { value = 2 }; }; template <> -struct DefaultInternalVectorLength, - Kokkos::CudaUVMSpace> { +struct DefaultInternalVectorLength, Kokkos::CudaUVMSpace> { enum : int { value = 1 }; }; #endif @@ -256,18 +252,12 @@ class ArithTraits, l>> { typedef typename ArithTraits::val_type val_scalar_type; typedef typename ArithTraits::mag_type mag_scalar_type; - typedef KokkosBatched::Vector, l> - val_type; - typedef KokkosBatched::Vector, l> - mag_type; + typedef KokkosBatched::Vector, l> val_type; + typedef KokkosBatched::Vector, l> mag_type; - static KOKKOS_FORCEINLINE_FUNCTION mag_type real(const val_type &val) { - return val; - } + static KOKKOS_FORCEINLINE_FUNCTION mag_type real(const val_type &val) { return val; } - static KOKKOS_FORCEINLINE_FUNCTION val_type conj(const val_type &val) { - return val; - } + static KOKKOS_FORCEINLINE_FUNCTION val_type conj(const val_type &val) { return val; } static KOKKOS_FORCEINLINE_FUNCTION val_type abs(const val_type &val) { using KAT = ArithTraits; @@ -286,17 +276,13 @@ class ArithTraits, l>> { }; template -class ArithTraits< - KokkosBatched::Vector>, l>> { +class ArithTraits>, l>> { public: typedef typename ArithTraits::val_type val_scalar_type; typedef typename ArithTraits::mag_type mag_scalar_type; - typedef KokkosBatched::Vector< - KokkosBatched::SIMD>, l> - val_type; - typedef KokkosBatched::Vector, l> - mag_type; + typedef KokkosBatched::Vector>, l> val_type; + typedef KokkosBatched::Vector, l> mag_type; static KOKKOS_FORCEINLINE_FUNCTION mag_type real(const val_type &val) { mag_type r_val; diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Vector_SIMD.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Vector_SIMD.hpp index 753904dbb9e0..52a73deda4f6 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Vector_SIMD.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Vector_SIMD.hpp @@ -63,8 +63,7 @@ class Vector, l> { for (int i = 0; i < vector_length; ++i) _data[i] = val; } template - KOKKOS_INLINE_FUNCTION Vector( - const Vector, vector_length> &b) { + KOKKOS_INLINE_FUNCTION Vector(const Vector, vector_length> &b) { KOKKOSKERNELS_FORCE_SIMD for (int i = 0; i < vector_length; ++i) _data[i] = b[i]; } @@ -140,8 +139,7 @@ class Vector, 2> { } template - KOKKOS_INLINE_FUNCTION Vector( - const Vector, vector_length> &b) { + KOKKOS_INLINE_FUNCTION Vector(const Vector, vector_length> &b) { _data.x = b[0]; _data.y = b[1]; } @@ -183,9 +181,7 @@ class Vector, 2> { } KOKKOS_INLINE_FUNCTION - value_type &operator[](const int &i) const { - return reinterpret_cast(&_data)[i]; - } + value_type &operator[](const int &i) const { return reinterpret_cast(&_data)[i]; } }; template <> @@ -232,8 +228,7 @@ class Vector, 2> { } template - KOKKOS_INLINE_FUNCTION Vector( - const Vector, vector_length> &b) { + KOKKOS_INLINE_FUNCTION Vector(const Vector, vector_length> &b) { _data.x = b[0]; _data.y = b[1]; } @@ -275,9 +270,7 @@ class Vector, 2> { } KOKKOS_INLINE_FUNCTION - value_type &operator[](const int &i) const { - return reinterpret_cast(&_data)[i]; - } + value_type &operator[](const int &i) const { return reinterpret_cast(&_data)[i]; } }; template <> @@ -334,8 +327,7 @@ class Vector, 4> { } template - KOKKOS_INLINE_FUNCTION Vector( - const Vector, vector_length> &b) { + KOKKOS_INLINE_FUNCTION Vector(const Vector, vector_length> &b) { _data.x = b[0]; _data.y = b[1]; _data.z = b[2]; @@ -389,9 +381,7 @@ class Vector, 4> { } KOKKOS_INLINE_FUNCTION - value_type &operator[](const int &i) const { - return reinterpret_cast(&_data)[i]; - } + value_type &operator[](const int &i) const { return reinterpret_cast(&_data)[i]; } }; template <> @@ -448,8 +438,7 @@ class Vector, 4> { } template - KOKKOS_INLINE_FUNCTION Vector( - const Vector, vector_length> &b) { + KOKKOS_INLINE_FUNCTION Vector(const Vector, vector_length> &b) { _data.x = b[0]; _data.y = b[1]; _data.z = b[2]; @@ -503,9 +492,7 @@ class Vector, 4> { } KOKKOS_INLINE_FUNCTION - value_type &operator[](const int &i) const { - return reinterpret_cast(&_data)[i]; - } + value_type &operator[](const int &i) const { return reinterpret_cast(&_data)[i]; } }; } // namespace KokkosBatched @@ -580,13 +567,9 @@ class Vector, 4> { inline void storeAligned(value_type *p) const { _mm256_store_pd(p, _data); } - inline void storeUnaligned(value_type *p) const { - _mm256_storeu_pd(p, _data); - } + inline void storeUnaligned(value_type *p) const { _mm256_storeu_pd(p, _data); } - inline value_type &operator[](const int &i) const { - return reinterpret_cast(&_data)[i]; - } + inline value_type &operator[](const int &i) const { return reinterpret_cast(&_data)[i]; } }; template <> @@ -657,17 +640,11 @@ class Vector >, 2> { return *this; } - inline void storeAligned(value_type *p) const { - _mm256_store_pd((mag_type *)p, _data); - } + inline void storeAligned(value_type *p) const { _mm256_store_pd((mag_type *)p, _data); } - inline void storeUnaligned(value_type *p) const { - _mm256_storeu_pd((mag_type *)p, _data); - } + inline void storeUnaligned(value_type *p) const { _mm256_storeu_pd((mag_type *)p, _data); } - inline value_type &operator[](const int &i) const { - return reinterpret_cast(&_data)[i]; - } + inline value_type &operator[](const int &i) const { return reinterpret_cast(&_data)[i]; } }; } // namespace KokkosBatched #endif /* #if defined(__AVX__) || defined(__AVX2__) */ @@ -737,13 +714,9 @@ class Vector, 8> { inline void storeAligned(value_type *p) const { _mm512_store_pd(p, _data); } - inline void storeUnaligned(value_type *p) const { - _mm512_storeu_pd(p, _data); - } + inline void storeUnaligned(value_type *p) const { _mm512_storeu_pd(p, _data); } - inline value_type &operator[](const int &i) const { - return reinterpret_cast(&_data)[i]; - } + inline value_type &operator[](const int &i) const { return reinterpret_cast(&_data)[i]; } }; template <> @@ -767,13 +740,11 @@ class Vector >, 4> { public: inline Vector() { _data = _mm512_setzero_pd(); } inline Vector(const value_type &val) { - _data = _mm512_mask_broadcast_f64x4(_mm512_set1_pd(val.imag()), 0x55, - _mm256_set1_pd(val.real())); + _data = _mm512_mask_broadcast_f64x4(_mm512_set1_pd(val.imag()), 0x55, _mm256_set1_pd(val.real())); KOKKOSKERNELS_GNU_COMPILER_FENCE } inline Vector(const mag_type &val) { - _data = _mm512_mask_broadcast_f64x4(_mm512_setzero_pd(), 0x55, - _mm256_set1_pd(val)); + _data = _mm512_mask_broadcast_f64x4(_mm512_setzero_pd(), 0x55, _mm256_set1_pd(val)); KOKKOSKERNELS_GNU_COMPILER_FENCE } inline Vector(const type &b) { _data = b._data; } @@ -810,17 +781,11 @@ class Vector >, 4> { return *this; } - inline void storeAligned(value_type *p) const { - _mm512_store_pd((mag_type *)p, _data); - } + inline void storeAligned(value_type *p) const { _mm512_store_pd((mag_type *)p, _data); } - inline void storeUnaligned(value_type *p) const { - _mm512_storeu_pd((mag_type *)p, _data); - } + inline void storeUnaligned(value_type *p) const { _mm512_storeu_pd((mag_type *)p, _data); } - inline value_type &operator[](const int &i) const { - return reinterpret_cast(&_data)[i]; - } + inline value_type &operator[](const int &i) const { return reinterpret_cast(&_data)[i]; } }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Xpay.hpp b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Xpay.hpp index 1e9a08623b13..51418fd81a87 100644 --- a/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Xpay.hpp +++ b/packages/kokkos-kernels/batched/dense/src/KokkosBatched_Xpay.hpp @@ -44,9 +44,7 @@ namespace KokkosBatched { struct SerialXpay { template - KOKKOS_INLINE_FUNCTION static int invoke(const alphaViewType &alpha, - const ViewType &X, - const ViewType &Y); + KOKKOS_INLINE_FUNCTION static int invoke(const alphaViewType &alpha, const ViewType &X, const ViewType &Y); }; /// \brief Team Batched XPAY: @@ -72,9 +70,7 @@ struct SerialXpay { template struct TeamXpay { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const alphaViewType &alpha, - const ViewType &X, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const alphaViewType &alpha, const ViewType &X, const ViewType &Y); }; @@ -102,9 +98,7 @@ struct TeamXpay { template struct TeamVectorXpay { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const alphaViewType &alpha, - const ViewType &X, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const alphaViewType &alpha, const ViewType &X, const ViewType &Y); }; diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_BatchedGemm.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_BatchedGemm.hpp index 3c00b4f47721..6c2c359f00d7 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_BatchedGemm.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_BatchedGemm.hpp @@ -25,14 +25,10 @@ using namespace KokkosBatched; namespace Test { -template -void impl_test_batched_gemm_with_handle(BatchedGemmHandle* batchedGemmHandle, - const int N, const int matAdim1, - const int matAdim2, const int matBdim1, - const int matBdim2, const int matCdim1, - const int matCdim2, ScalarType alpha, - ScalarType beta) { +template +void impl_test_batched_gemm_with_handle(BatchedGemmHandle* batchedGemmHandle, const int N, const int matAdim1, + const int matAdim2, const int matBdim1, const int matBdim2, const int matCdim1, + const int matCdim2, ScalarType alpha, ScalarType beta) { using execution_space = typename DeviceType::execution_space; using transA = typename ParamTagType::transA; using transB = typename ParamTagType::transB; @@ -43,15 +39,11 @@ void impl_test_batched_gemm_with_handle(BatchedGemmHandle* batchedGemmHandle, auto algo_type = batchedGemmHandle->get_kernel_algo_type(); ViewType a_expected, a_actual, b_expected, b_actual, c_expected, c_actual; std::string fmsg; - std::string fmsg_rhs = - "algo_type:" + batchedGemmHandle->get_kernel_algo_type_str() + ", "; + std::string fmsg_rhs = "algo_type:" + batchedGemmHandle->get_kernel_algo_type_str() + ", "; fmsg_rhs += ("N:" + std::to_string(N) + ", "); - fmsg_rhs += - ("A:" + std::to_string(matAdim1) + "x" + std::to_string(matAdim2) + ", "); - fmsg_rhs += - ("B:" + std::to_string(matBdim1) + "x" + std::to_string(matBdim2) + ", "); - fmsg_rhs += - ("C:" + std::to_string(matCdim1) + "x" + std::to_string(matCdim2) + "\n"); + fmsg_rhs += ("A:" + std::to_string(matAdim1) + "x" + std::to_string(matAdim2) + ", "); + fmsg_rhs += ("B:" + std::to_string(matBdim1) + "x" + std::to_string(matBdim2) + ", "); + fmsg_rhs += ("C:" + std::to_string(matCdim1) + "x" + std::to_string(matCdim2) + "\n"); if (std::is_same::value) { a_expected = ViewType("a_expected", N, matAdim1, matAdim2); @@ -86,10 +78,8 @@ void impl_test_batched_gemm_with_handle(BatchedGemmHandle* batchedGemmHandle, // Check for DblBuf runtime errors related to team_size try { fmsg = kk_failure_str(__FILE__, __FUNCTION__, __LINE__); - Impl::BatchedDblBufGemm( + Impl::BatchedDblBufGemm( batchedGemmHandle, alpha, a_actual, b_actual, beta, c_actual) .invoke(); FAIL() << (fmsg + fmsg_rhs); @@ -100,11 +90,9 @@ void impl_test_batched_gemm_with_handle(BatchedGemmHandle* batchedGemmHandle, // Check for DblBuf runtime errors related to vector_len try { fmsg = kk_failure_str(__FILE__, __FUNCTION__, __LINE__); - Impl::BatchedDblBufGemm< - transA, transB, batchLayout, BatchedGemmHandle, ScalarType, - decltype(a_actual), decltype(b_actual), decltype(c_actual), - BoundsCheck::No, AlphaTag::No, 65536, 65536 * 2, 65536>( - batchedGemmHandle, alpha, a_actual, b_actual, beta, c_actual) + Impl::BatchedDblBufGemm(batchedGemmHandle, alpha, a_actual, b_actual, beta, c_actual) .invoke(); FAIL() << (fmsg + fmsg_rhs); } catch (const std::runtime_error& error) { @@ -123,9 +111,8 @@ void impl_test_batched_gemm_with_handle(BatchedGemmHandle* batchedGemmHandle, #endif fmsg = kk_failure_str(__FILE__, __FUNCTION__, __LINE__); - ret = BatchedGemm( - batchedGemmHandle, alpha, a_actual, b_actual, beta, - c_actual); // Compute c_actual + ret = BatchedGemm(batchedGemmHandle, alpha, a_actual, b_actual, beta, + c_actual); // Compute c_actual } catch (const std::runtime_error& error) { std::string error_msg = error.what(); if (algo_type == BaseHeuristicAlgos::SQUARE && matCdim1 != matCdim2) { @@ -135,8 +122,7 @@ void impl_test_batched_gemm_with_handle(BatchedGemmHandle* batchedGemmHandle, auto ninter = batchedGemmHandle->get_tpl_params()[0]; // No runtime errors expected since layout is valid, double is a supported // type, and ninter != 0 - if (std::is_same::value && - ninter != 0) { + if (std::is_same::value && ninter != 0) { FAIL() << (error_msg + fmsg + fmsg_rhs); } #else @@ -149,12 +135,10 @@ void impl_test_batched_gemm_with_handle(BatchedGemmHandle* batchedGemmHandle, } ASSERT_EQ(ret, 0) << (fmsg + fmsg_rhs); - Functor_BatchedVanillaGEMM - vgemm; - vgemm.A_t = std::is_same::value; - vgemm.B_t = std::is_same::value; - vgemm.batch_size_last_dim = - std::is_same::value; + Functor_BatchedVanillaGEMM vgemm; + vgemm.A_t = std::is_same::value; + vgemm.B_t = std::is_same::value; + vgemm.batch_size_last_dim = std::is_same::value; vgemm.A_c = vgemm.B_c = false; vgemm.A = a_expected; vgemm.B = b_expected; @@ -165,10 +149,8 @@ void impl_test_batched_gemm_with_handle(BatchedGemmHandle* batchedGemmHandle, Kokkos::fence(); - typename ViewType::HostMirror c_expected_host = - Kokkos::create_mirror_view(c_expected); - typename ViewType::HostMirror c_actual_host = - Kokkos::create_mirror_view(c_actual); + typename ViewType::HostMirror c_expected_host = Kokkos::create_mirror_view(c_expected); + typename ViewType::HostMirror c_actual_host = Kokkos::create_mirror_view(c_actual); // Copy to host Kokkos::deep_copy(c_expected_host, c_expected); @@ -205,26 +187,21 @@ void impl_test_batched_gemm_with_handle(BatchedGemmHandle* batchedGemmHandle, EXPECT_NEAR_KK(diff / sum, 0, eps, fmsg + fmsg_rhs); } -template -void impl_test_batched_gemm(const int N, const int matAdim1, const int matAdim2, - const int matBdim1, const int matBdim2, +template +void impl_test_batched_gemm(const int N, const int matAdim1, const int matAdim2, const int matBdim1, const int matBdim2, const int matCdim1, const int matCdim2) { { BatchedGemmHandle batchedGemmHandle; - ASSERT_EQ(batchedGemmHandle.get_kernel_algo_type(), - BaseHeuristicAlgos::SQUARE); + ASSERT_EQ(batchedGemmHandle.get_kernel_algo_type(), BaseHeuristicAlgos::SQUARE); ASSERT_EQ(batchedGemmHandle.teamSz, 0); ASSERT_EQ(batchedGemmHandle.vecLen, 0); #if defined(KOKKOSKERNELS_ENABLE_TPL_CUBLAS) cublasHandle_t cublas_handle; - BatchedGemmHandle batchedGemmHandleCublas(cublas_handle, - GemmTplAlgos::CUBLAS, 0, 0); + BatchedGemmHandle batchedGemmHandleCublas(cublas_handle, GemmTplAlgos::CUBLAS, 0, 0); ASSERT_EQ(&cublas_handle, batchedGemmHandleCublas.get_tpl_params()); - ASSERT_EQ(batchedGemmHandleCublas.get_kernel_algo_type(), - (int)GemmTplAlgos::CUBLAS); + ASSERT_EQ(batchedGemmHandleCublas.get_kernel_algo_type(), (int)GemmTplAlgos::CUBLAS); ASSERT_EQ(batchedGemmHandleCublas.teamSz, 0); ASSERT_EQ(batchedGemmHandleCublas.vecLen, 0); #endif @@ -232,53 +209,37 @@ void impl_test_batched_gemm(const int N, const int matAdim1, const int matAdim2, // FIXME temporary workaround to run this magma test only if cublas is not // enabled the design of the BatchedGemmHandle currently does not allow // simultanous testing in this way. See issue #2177 -#if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && \ - !defined(KOKKOSKERNELS_ENABLE_TPL_CUBLAS) +#if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && !defined(KOKKOSKERNELS_ENABLE_TPL_CUBLAS) magma_queue_t magma_queue; - BatchedGemmHandle batchedGemmHandleMagma(magma_queue, GemmTplAlgos::MAGMA, - 0, 0); + BatchedGemmHandle batchedGemmHandleMagma(magma_queue, GemmTplAlgos::MAGMA, 0, 0); ASSERT_EQ(&magma_queue, batchedGemmHandleMagma.get_tpl_params()); - ASSERT_EQ(batchedGemmHandleMagma.get_kernel_algo_type(), - (int)GemmTplAlgos::MAGMA); + ASSERT_EQ(batchedGemmHandleMagma.get_kernel_algo_type(), (int)GemmTplAlgos::MAGMA); ASSERT_EQ(batchedGemmHandleMagma.teamSz, 0); ASSERT_EQ(batchedGemmHandleMagma.vecLen, 0); #endif } - for (int algo_type = BaseHeuristicAlgos::SQUARE; - algo_type < GemmKokkosBatchedAlgos::N; ++algo_type) { + for (int algo_type = BaseHeuristicAlgos::SQUARE; algo_type < GemmKokkosBatchedAlgos::N; ++algo_type) { { try { BatchedGemmHandle batchedGemmHandle(algo_type); ASSERT_EQ(batchedGemmHandle.get_kernel_algo_type(), algo_type); - if (algo_type == BaseTplAlgos::ARMPL || - algo_type == BaseKokkosBatchedAlgos::KK_SERIAL || - algo_type == GemmKokkosBatchedAlgos::KK_SERIAL_RANK0 || - algo_type == GemmKokkosBatchedAlgos::KK_DBLBUF) { - impl_test_batched_gemm_with_handle( - &batchedGemmHandle, N, matAdim1, matAdim2, matBdim1, matBdim2, - matCdim1, matCdim2, 1.5, 3.0); + if (algo_type == BaseTplAlgos::ARMPL || algo_type == BaseKokkosBatchedAlgos::KK_SERIAL || + algo_type == GemmKokkosBatchedAlgos::KK_SERIAL_RANK0 || algo_type == GemmKokkosBatchedAlgos::KK_DBLBUF) { + impl_test_batched_gemm_with_handle( + &batchedGemmHandle, N, matAdim1, matAdim2, matBdim1, matBdim2, matCdim1, matCdim2, 1.5, 3.0); } else if (algo_type == BaseHeuristicAlgos::SQUARE) { // Invoke 4 times to ensure we cover all paths for alpha and beta - impl_test_batched_gemm_with_handle( - &batchedGemmHandle, N, matAdim1, matAdim2, matBdim1, matBdim2, - matCdim1, matCdim2, 0.0, 0.0); - impl_test_batched_gemm_with_handle( - &batchedGemmHandle, N, matAdim1, matAdim2, matBdim1, matBdim2, - matCdim1, matCdim2, 1.0, 0.0); - impl_test_batched_gemm_with_handle( - &batchedGemmHandle, N, matAdim1, matAdim2, matBdim1, matBdim2, - matCdim1, matCdim2, 0.0, 1.0); - impl_test_batched_gemm_with_handle( - &batchedGemmHandle, N, matAdim1, matAdim2, matBdim1, matBdim2, - matCdim1, matCdim2, 1.5, 3.0); + impl_test_batched_gemm_with_handle( + &batchedGemmHandle, N, matAdim1, matAdim2, matBdim1, matBdim2, matCdim1, matCdim2, 0.0, 0.0); + impl_test_batched_gemm_with_handle( + &batchedGemmHandle, N, matAdim1, matAdim2, matBdim1, matBdim2, matCdim1, matCdim2, 1.0, 0.0); + impl_test_batched_gemm_with_handle( + &batchedGemmHandle, N, matAdim1, matAdim2, matBdim1, matBdim2, matCdim1, matCdim2, 0.0, 1.0); + impl_test_batched_gemm_with_handle( + &batchedGemmHandle, N, matAdim1, matAdim2, matBdim1, matBdim2, matCdim1, matCdim2, 1.5, 3.0); } else { try { // Allocate these views to invoke BatchedGemm with an unsupported @@ -291,8 +252,7 @@ void impl_test_batched_gemm(const int N, const int matAdim1, const int matAdim2, using bl = typename ParamTagType::batchLayout; ScalarType alpha = 0.34; ScalarType beta = 0.43; - BatchedGemm(&batchedGemmHandle, alpha, a_actual, - b_actual, beta, c_actual); + BatchedGemm(&batchedGemmHandle, alpha, a_actual, b_actual, beta, c_actual); std::string fmsg = kk_failure_str(__FILE__, __FUNCTION__, __LINE__); FAIL() << fmsg; } catch (const std::runtime_error& error) { @@ -314,26 +274,21 @@ void impl_test_batched_gemm(const int N, const int matAdim1, const int matAdim2, } } // namespace Test -template +template void test_batched_gemm_with_layout(int N) { // Square cases { int i = 0; - Test::impl_test_batched_gemm(N, i, i, i, i, i, i); + Test::impl_test_batched_gemm(N, i, i, i, i, i, i); i = 10; - Test::impl_test_batched_gemm(N, i, i, i, i, i, i); + Test::impl_test_batched_gemm(N, i, i, i, i, i, i); i = 25; - Test::impl_test_batched_gemm(N, i, i, i, i, i, i); + Test::impl_test_batched_gemm(N, i, i, i, i, i, i); i = 32; - Test::impl_test_batched_gemm(N, i, i, i, i, i, i); + Test::impl_test_batched_gemm(N, i, i, i, i, i, i); } // Non-square cases @@ -341,63 +296,42 @@ void test_batched_gemm_with_layout(int N) { int dimM = 1 * i; int dimN = 2 * i; int dimK = 3 * i; - if ((std::is_same::value) && - (std::is_same::value)) { - Test::impl_test_batched_gemm(N, dimM, dimK, dimK, dimN, - dimM, dimN); + if ((std::is_same::value) && + (std::is_same::value)) { + Test::impl_test_batched_gemm(N, dimM, dimK, dimK, dimN, dimM, + dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::impl_test_batched_gemm(N, dimM, dimK, dimN, dimK, - dimM, dimN); + if ((std::is_same::value) && + (std::is_same::value)) { + Test::impl_test_batched_gemm(N, dimM, dimK, dimN, dimK, dimM, + dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::impl_test_batched_gemm(N, dimK, dimM, dimK, dimN, - dimM, dimN); + if ((std::is_same::value) && + (std::is_same::value)) { + Test::impl_test_batched_gemm(N, dimK, dimM, dimK, dimN, dimM, + dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::impl_test_batched_gemm(N, dimK, dimM, dimN, dimK, - dimM, dimN); + if ((std::is_same::value) && + (std::is_same::value)) { + Test::impl_test_batched_gemm(N, dimK, dimM, dimN, dimK, dimM, + dimN); } } } -template +template int test_batched_gemm() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - if constexpr (std::is_same_v) { - using param_tag_type = ::Test::SharedParamTag; + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + if constexpr (std::is_same_v) { + using param_tag_type = + ::Test::SharedParamTag; typedef Kokkos::View llVt; - test_batched_gemm_with_layout(0); - test_batched_gemm_with_layout(1); - test_batched_gemm_with_layout(4); - test_batched_gemm_with_layout(8); - test_batched_gemm_with_layout(16); + test_batched_gemm_with_layout(0); + test_batched_gemm_with_layout(1); + test_batched_gemm_with_layout(4); + test_batched_gemm_with_layout(8); + test_batched_gemm_with_layout(16); } else { std::cerr << "TEST SKIPPED since BatchLayout is not Right." << std::endl; } @@ -406,24 +340,16 @@ int test_batched_gemm() { #endif // KOKKOSKERNELS_INST_LAYOUTLEFT #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - if constexpr (std::is_same_v) { - using param_tag_type = ::Test::SharedParamTag; + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + if constexpr (std::is_same_v) { + using param_tag_type = + ::Test::SharedParamTag; typedef Kokkos::View lrVt; - test_batched_gemm_with_layout(0); - test_batched_gemm_with_layout(1); - test_batched_gemm_with_layout(4); - test_batched_gemm_with_layout(8); - test_batched_gemm_with_layout(16); + test_batched_gemm_with_layout(0); + test_batched_gemm_with_layout(1); + test_batched_gemm_with_layout(4); + test_batched_gemm_with_layout(8); + test_batched_gemm_with_layout(16); } else { std::cerr << "TEST SKIPPED since BatchLayout is not Left." << std::endl; } diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_BatchedGemm_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_BatchedGemm_Complex.hpp index 3c58f432ec2a..4e9bfa42eff3 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_BatchedGemm_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_BatchedGemm_Complex.hpp @@ -16,139 +16,89 @@ #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) /********************* BatchLayout::Left *********************/ TEST_F(TestCategory, batched_scalar_batched_gemm_nt_nt_scomplex_scomplex_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, Kokkos::complex, - param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_nt_scomplex_scomplex_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, Kokkos::complex, - param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } TEST_F(TestCategory, batched_scalar_batched_gemm_nt_t_scomplex_scomplex_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, Kokkos::complex, - param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_t_scomplex_scomplex_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, Kokkos::complex, - param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } /********************* BatchLayout::Right *********************/ -TEST_F(TestCategory, - batched_scalar_batched_gemm_nt_nt_scomplex_scomplex_right) { - typedef ::Test::SharedParamTag - param_tag_type; +TEST_F(TestCategory, batched_scalar_batched_gemm_nt_nt_scomplex_scomplex_right) { + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, Kokkos::complex, - param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_nt_scomplex_scomplex_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, Kokkos::complex, - param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } TEST_F(TestCategory, batched_scalar_batched_gemm_nt_t_scomplex_scomplex_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, Kokkos::complex, - param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_t_scomplex_scomplex_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, Kokkos::complex, - param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) /********************* BatchLayout::Left *********************/ TEST_F(TestCategory, batched_scalar_batched_gemm_nt_nt_dcomplex_dcomplex_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, - Kokkos::complex, param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_nt_dcomplex_dcomplex_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, - Kokkos::complex, param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } TEST_F(TestCategory, batched_scalar_batched_gemm_nt_t_dcomplex_dcomplex_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, - Kokkos::complex, param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_t_dcomplex_dcomplex_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, - Kokkos::complex, param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } /********************* BatchLayout::Right *********************/ -TEST_F(TestCategory, - batched_scalar_batched_gemm_nt_nt_dcomplex_dcomplex_right) { - typedef ::Test::SharedParamTag - param_tag_type; +TEST_F(TestCategory, batched_scalar_batched_gemm_nt_nt_dcomplex_dcomplex_right) { + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, - Kokkos::complex, param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_nt_dcomplex_dcomplex_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, - Kokkos::complex, param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } TEST_F(TestCategory, batched_scalar_batched_gemm_nt_t_dcomplex_dcomplex_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, - Kokkos::complex, param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_t_dcomplex_dcomplex_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm, - Kokkos::complex, param_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type>(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_BatchedGemm_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_BatchedGemm_Real.hpp index 62a4a291a86b..d2e9fe48d796 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_BatchedGemm_Real.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_BatchedGemm_Real.hpp @@ -16,206 +16,140 @@ // We do not ETI half-types. Only test this if ETI ONLY is off // and bhalf_t is not an alias to float. -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) && \ +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) && \ defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT /********************* BatchLayout::Left *********************/ TEST_F(TestCategory, batched_scalar_batched_gemm_nt_nt_bhalf_bhalf_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_nt_bhalf_bhalf_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_nt_t_bhalf_bhalf_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_t_bhalf_bhalf_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } /********************* BatchLayout::Right *********************/ TEST_F(TestCategory, batched_scalar_batched_gemm_nt_nt_bhalf_bhalf_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_nt_bhalf_bhalf_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_nt_t_bhalf_bhalf_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_t_bhalf_bhalf_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } #endif // KOKKOS_BHALF_T_IS_FLOAT // We do not ETI half-types. Only test this if ETI ONLY is off // and half_t is not an alias to float. -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) && \ +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) && \ defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT /********************* BatchLayout::Left *********************/ TEST_F(TestCategory, batched_scalar_batched_gemm_nt_nt_half_half_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_nt_half_half_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_nt_t_half_half_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_t_half_half_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } /********************* BatchLayout::Right *********************/ TEST_F(TestCategory, batched_scalar_batched_gemm_nt_nt_half_half_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_nt_half_half_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_nt_t_half_half_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_t_half_half_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; - test_batched_gemm(); + test_batched_gemm(); } #endif // KOKKOS_HALF_T_IS_FLOAT #if defined(KOKKOSKERNELS_INST_FLOAT) /********************* BatchLayout::Left *********************/ TEST_F(TestCategory, batched_scalar_batched_gemm_nt_nt_float_float_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_nt_float_float_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_nt_t_float_float_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_t_float_float_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } /********************* BatchLayout::Right *********************/ TEST_F(TestCategory, batched_scalar_batched_gemm_nt_nt_float_float_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_nt_float_float_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_nt_t_float_float_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_t_float_float_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } @@ -224,59 +158,43 @@ TEST_F(TestCategory, batched_scalar_batched_gemm_t_t_float_float_right) { #if defined(KOKKOSKERNELS_INST_DOUBLE) /********************* BatchLayout::Left *********************/ TEST_F(TestCategory, batched_scalar_batched_gemm_nt_nt_double_double_left) { - using param_tag_type = - ::Test::SharedParamTag; + using param_tag_type = ::Test::SharedParamTag; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_nt_double_double_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_nt_t_double_double_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_t_double_double_left) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } /********************* BatchLayout::Right *********************/ TEST_F(TestCategory, batched_scalar_batched_gemm_nt_nt_double_double_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_nt_double_double_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_nt_t_double_double_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_batched_gemm_t_t_double_double_right) { - typedef ::Test::SharedParamTag - param_tag_type; + typedef ::Test::SharedParamTag param_tag_type; test_batched_gemm(); } diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_Dense.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_Dense.hpp index cf9b3c23f402..76215b58f836 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_Dense.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_Dense.hpp @@ -42,10 +42,16 @@ #include "Test_Batched_SerialTrsv.hpp" #include "Test_Batched_SerialTrsv_Real.hpp" #include "Test_Batched_SerialTrsv_Complex.hpp" +#include "Test_Batched_SerialTbsv.hpp" +#include "Test_Batched_SerialTbsv_Real.hpp" +#include "Test_Batched_SerialTbsv_Complex.hpp" #include "Test_Batched_SerialTrtri.hpp" #include "Test_Batched_SerialTrtri_Real.hpp" #include "Test_Batched_SerialTrtri_Complex.hpp" #include "Test_Batched_SerialSVD.hpp" +#include "Test_Batched_SerialPttrf.hpp" +#include "Test_Batched_SerialPttrf_Real.hpp" +#include "Test_Batched_SerialPttrf_Complex.hpp" // Team Kernels #include "Test_Batched_TeamAxpy.hpp" diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_DenseUtils.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_DenseUtils.hpp index 6a96bd193a80..f536f220d3a5 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_DenseUtils.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_DenseUtils.hpp @@ -16,16 +16,13 @@ #ifndef TEST_BATCHED_DENSE_HELPER_HPP #define TEST_BATCHED_DENSE_HELPER_HPP +#include "KokkosBatched_Util.hpp" + namespace KokkosBatched { template -void create_tridiagonal_batched_matrices(const MatrixViewType &A, - const VectorViewType &B) { - Kokkos::Random_XorShift64_Pool< - typename VectorViewType::device_type::execution_space> - random(13718); - Kokkos::fill_random( - B, random, - Kokkos::reduction_identity::prod()); +void create_tridiagonal_batched_matrices(const MatrixViewType& A, const VectorViewType& B) { + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(B, random, Kokkos::reduction_identity::prod()); auto A_host = Kokkos::create_mirror_view(A); @@ -54,6 +51,100 @@ void create_tridiagonal_batched_matrices(const MatrixViewType &A, Kokkos::fence(); } + +template +void create_banded_triangular_matrix(InViewType& in, OutViewType& out, int k = 1, bool band_storage = true) { + auto h_in = Kokkos::create_mirror_view(in); + auto h_out = Kokkos::create_mirror_view(out); + const int N = in.extent(0), BlkSize = in.extent(1); + + Kokkos::deep_copy(h_in, in); + if (band_storage) { + assert(out.extent(0) == in.extent(0)); + assert(out.extent(1) == static_cast(k + 1)); + assert(out.extent(2) == in.extent(2)); + if constexpr (std::is_same_v) { + for (int i0 = 0; i0 < N; i0++) { + for (int i1 = 0; i1 < k + 1; i1++) { + for (int i2 = i1; i2 < BlkSize; i2++) { + h_out(i0, k - i1, i2) = h_in(i0, i2 - i1, i2); + } + } + } + } else { + for (int i0 = 0; i0 < N; i0++) { + for (int i1 = 0; i1 < k + 1; i1++) { + for (int i2 = 0; i2 < BlkSize - i1; i2++) { + h_out(i0, i1, i2) = h_in(i0, i2 + i1, i2); + } + } + } + } + } else { + for (std::size_t i = 0; i < InViewType::rank(); i++) { + assert(out.extent(i) == in.extent(i)); + } + + if constexpr (std::is_same_v) { + for (int i0 = 0; i0 < N; i0++) { + for (int i1 = 0; i1 < BlkSize; i1++) { + for (int i2 = i1; i2 < Kokkos::min(i1 + k + 1, BlkSize); i2++) { + h_out(i0, i1, i2) = h_in(i0, i1, i2); + } + } + } + } else { + for (int i0 = 0; i0 < N; i0++) { + for (int i1 = 0; i1 < BlkSize; i1++) { + for (int i2 = Kokkos::max(0, i1 - k); i2 <= i1; i2++) { + h_out(i0, i1, i2) = h_in(i0, i1, i2); + } + } + } + } + } + Kokkos::deep_copy(out, h_out); +} + +/// \brief Create a diagonal matrix from an input vector: +/// Copies the input vector into the diagonal of the output matrix specified +/// by the parameter k. k > 0 means that the matrix is upper-diagonal and +/// k < 0 means the lower-diagonal. k = 0 means the diagonal. +/// +/// \tparam InViewType: Input type for the vector, needs to be a 2D view +/// \tparam OutViewType: Output type for the matrix, needs to be a 3D view +/// +/// \param in [in]: Input batched vector, a rank 2 view +/// \param out [out]: Output batched matrix, where the diagonal compnent +/// specified by k is filled with the input vector, a rank 3 view +/// \param k [in]: The diagonal offset to be filled (default is 0). +/// +template +void create_diagonal_matrix(InViewType& in, OutViewType& out, int k = 0) { + auto h_in = Kokkos::create_mirror_view(in); + auto h_out = Kokkos::create_mirror_view(out); + const int N = in.extent(0), BlkSize = in.extent(1); + + assert(out.extent(0) == in.extent(0)); + assert(out.extent(1) == in.extent(1) + abs(k)); + + int i1_start = k >= 0 ? 0 : -k; + int i2_start = k >= 0 ? k : 0; + + // Zero clear the output matrix + using ScalarType = typename OutViewType::non_const_value_type; + Kokkos::deep_copy(h_out, ScalarType(0.0)); + + Kokkos::deep_copy(h_in, in); + for (int i0 = 0; i0 < N; i0++) { + for (int i1 = 0; i1 < BlkSize; i1++) { + h_out(i0, i1 + i1_start, i1 + i2_start) = h_in(i0, i1); + } + } + + Kokkos::deep_copy(out, h_out); +} + } // namespace KokkosBatched #endif // TEST_BATCHED_DENSE_HELPER_HPP diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialAxpy.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialAxpy.hpp index 90ce5addc3e7..df6f0ee069c4 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialAxpy.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialAxpy.hpp @@ -36,8 +36,7 @@ struct Functor_TestBatchedSerialAxpy { const ViewType _Y; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedSerialAxpy(const alphaViewType &alpha, const ViewType &X, - const ViewType &Y) + Functor_TestBatchedSerialAxpy(const alphaViewType &alpha, const ViewType &X, const ViewType &Y) : _alpha(alpha), _X(X), _Y(Y) {} KOKKOS_INLINE_FUNCTION @@ -68,13 +67,11 @@ void impl_test_batched_axpy(const int N, const int BlkSize) { typedef typename alphaViewType::const_value_type alpha_const_value_type; typedef Kokkos::ArithTraits ats; - ViewType X0("x0", N, BlkSize), X1("x1", N, BlkSize), Y0("y0", N, BlkSize), - Y1("y1", N, BlkSize); + ViewType X0("x0", N, BlkSize), X1("x1", N, BlkSize), Y0("y0", N, BlkSize), Y1("y1", N, BlkSize); alphaViewType alpha("alpha", N); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(X0, random, const_value_type(1.0)); Kokkos::fill_random(Y0, random, const_value_type(1.0)); Kokkos::fill_random(alpha, random, alpha_const_value_type(1.0)); @@ -94,12 +91,9 @@ void impl_test_batched_axpy(const int N, const int BlkSize) { Kokkos::deep_copy(Y0_host, Y0); for (int l = 0; l < N; ++l) - for (int i = 0; i < BlkSize; ++i) - Y0_host(l, i) += alpha_host(l) * X0_host(l, i); + for (int i = 0; i < BlkSize; ++i) Y0_host(l, i) += alpha_host(l) * X0_host(l, i); - Functor_TestBatchedSerialAxpy(alpha, X1, - Y1) - .run(); + Functor_TestBatchedSerialAxpy(alpha, X1, Y1).run(); Kokkos::fence(); @@ -128,25 +122,20 @@ int test_batched_axpy() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { typedef Kokkos::View ViewType; - typedef Kokkos::View - alphaViewType; + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { - Test::Axpy::impl_test_batched_axpy( - 1024, i); + Test::Axpy::impl_test_batched_axpy(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - typedef Kokkos::View - alphaViewType; + typedef Kokkos::View ViewType; + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { - Test::Axpy::impl_test_batched_axpy( - 1024, i); + Test::Axpy::impl_test_batched_axpy(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialAxpy_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialAxpy_Complex.hpp index ed647f1e3b1c..7d1b3301f195 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialAxpy_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialAxpy_Complex.hpp @@ -16,8 +16,7 @@ #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) TEST_F(TestCategory, batched_scalar_serial_axpy_nt_dcomplex_dcomplex) { - test_batched_axpy, - Kokkos::complex>(); + test_batched_axpy, Kokkos::complex>(); } TEST_F(TestCategory, batched_scalar_serial_axpy_nt_dcomplex_double) { diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialAxpy_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialAxpy_Real.hpp index 3f1f6af2fdf8..a0c49287f740 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialAxpy_Real.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialAxpy_Real.hpp @@ -15,13 +15,9 @@ //@HEADER #if defined(KOKKOSKERNELS_INST_FLOAT) -TEST_F(TestCategory, batched_scalar_serial_axpy_nt_float_float) { - test_batched_axpy(); -} +TEST_F(TestCategory, batched_scalar_serial_axpy_nt_float_float) { test_batched_axpy(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) -TEST_F(TestCategory, batched_scalar_serial_axpy_nt_double_double) { - test_batched_axpy(); -} +TEST_F(TestCategory, batched_scalar_serial_axpy_nt_double_double) { test_batched_axpy(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGemm.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGemm.hpp index 7f27fa7dcf7d..144bb2251e78 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGemm.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGemm.hpp @@ -19,7 +19,7 @@ #include "Kokkos_Core.hpp" #include "Kokkos_Random.hpp" -//#include "KokkosBatched_Vector.hpp" +// #include "KokkosBatched_Vector.hpp" #include "KokkosBatched_Gemm_Decl.hpp" #include "KokkosBatched_Gemm_Serial_Impl.hpp" @@ -37,8 +37,7 @@ struct ParamTag { typedef TB transB; }; -template +template struct Functor_TestBatchedSerialGemm { using execution_space = typename DeviceType::execution_space; ViewType _a, _b, _c; @@ -46,8 +45,7 @@ struct Functor_TestBatchedSerialGemm { ScalarType _alpha, _beta; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedSerialGemm(const ScalarType alpha, const ViewType &a, - const ViewType &b, const ScalarType beta, + Functor_TestBatchedSerialGemm(const ScalarType alpha, const ViewType &a, const ViewType &b, const ScalarType beta, const ViewType &c) : _a(a), _b(b), _c(c), _alpha(alpha), _beta(beta) {} @@ -57,8 +55,8 @@ struct Functor_TestBatchedSerialGemm { auto bb = Kokkos::subview(_b, k, Kokkos::ALL(), Kokkos::ALL()); auto cc = Kokkos::subview(_c, k, Kokkos::ALL(), Kokkos::ALL()); - SerialGemm::invoke(_alpha, aa, bb, _beta, cc); + SerialGemm::invoke(_alpha, aa, bb, _beta, + cc); } inline void run() { @@ -73,10 +71,8 @@ struct Functor_TestBatchedSerialGemm { } }; -template -void impl_test_batched_gemm(const int N, const int matAdim1, const int matAdim2, - const int matBdim1, const int matBdim2, +template +void impl_test_batched_gemm(const int N, const int matAdim1, const int matAdim2, const int matBdim1, const int matBdim2, const int matCdim1, const int matCdim2) { using execution_space = typename DeviceType::execution_space; using transA = typename ParamTagType::transA; @@ -88,12 +84,9 @@ void impl_test_batched_gemm(const int N, const int matAdim1, const int matAdim2, ScalarType alpha = ScalarType(1.5); ScalarType beta = ScalarType(3.0); - ViewType a_expected("a_expected", N, matAdim1, matAdim2), - a_actual("a_actual", N, matAdim1, matAdim2), - b_expected("b_expected", N, matBdim1, matBdim2), - b_actual("b_actual", N, matBdim1, matBdim2), - c_expected("c_expected", N, matCdim1, matCdim2), - c_actual("c_actual", N, matCdim1, matCdim2); + ViewType a_expected("a_expected", N, matAdim1, matAdim2), a_actual("a_actual", N, matAdim1, matAdim2), + b_expected("b_expected", N, matBdim1, matBdim2), b_actual("b_actual", N, matBdim1, matBdim2), + c_expected("c_expected", N, matCdim1, matCdim2), c_actual("c_actual", N, matCdim1, matCdim2); Kokkos::Random_XorShift64_Pool random(13718); @@ -107,8 +100,7 @@ void impl_test_batched_gemm(const int N, const int matAdim1, const int matAdim2, Kokkos::deep_copy(b_actual, b_expected); Kokkos::deep_copy(c_actual, c_expected); - Functor_BatchedVanillaGEMM - vgemm; + Functor_BatchedVanillaGEMM vgemm; vgemm.A_t = std::is_same::value; vgemm.B_t = std::is_same::value; vgemm.A_c = vgemm.B_c = false; @@ -118,15 +110,12 @@ void impl_test_batched_gemm(const int N, const int matAdim1, const int matAdim2, vgemm.alpha = alpha; vgemm.beta = beta; vgemm.run(); // Compute c_expected - Functor_TestBatchedSerialGemm(alpha, a_actual, b_actual, beta, - c_actual) + Functor_TestBatchedSerialGemm(alpha, a_actual, b_actual, + beta, c_actual) .run(); - typename ViewType::HostMirror c_expected_host = - Kokkos::create_mirror_view(c_expected); - typename ViewType::HostMirror c_actual_host = - Kokkos::create_mirror_view(c_actual); + typename ViewType::HostMirror c_expected_host = Kokkos::create_mirror_view(c_expected); + typename ViewType::HostMirror c_actual_host = Kokkos::create_mirror_view(c_actual); // Copy to host for comparison Kokkos::deep_copy(c_expected_host, c_expected); @@ -157,57 +146,41 @@ void impl_test_batched_gemm(const int N, const int matAdim1, const int matAdim2, } // namespace Gemm } // namespace Test -template +template int test_batched_gemm() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - ViewType; - Test::Gemm::impl_test_batched_gemm(0, 10, 10, 10, - 10, 10, 10); + typedef Kokkos::View ViewType; + Test::Gemm::impl_test_batched_gemm(0, 10, 10, 10, 10, + 10, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::Gemm::impl_test_batched_gemm(1024, i, i, - i, i, i, i); + Test::Gemm::impl_test_batched_gemm(1024, i, i, i, i, + i, i); } for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); int dimM = i; int dimN = 2 * i; int dimK = 3 * i; - if ((std::is_same::value) && - (std::is_same::value)) { - Test::Gemm::impl_test_batched_gemm( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::Gemm::impl_test_batched_gemm( 1024, dimM, dimK, dimK, dimN, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::Gemm::impl_test_batched_gemm( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::Gemm::impl_test_batched_gemm( 1024, dimM, dimK, dimN, dimK, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::Gemm::impl_test_batched_gemm( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::Gemm::impl_test_batched_gemm( 1024, dimK, dimM, dimK, dimN, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::Gemm::impl_test_batched_gemm( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::Gemm::impl_test_batched_gemm( 1024, dimK, dimM, dimN, dimK, dimM, dimN); } } @@ -215,52 +188,37 @@ int test_batched_gemm() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - Test::Gemm::impl_test_batched_gemm(0, 10, 10, 10, - 10, 10, 10); + typedef Kokkos::View ViewType; + Test::Gemm::impl_test_batched_gemm(0, 10, 10, 10, 10, + 10, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutRight, Blksize %d\n", i); - Test::Gemm::impl_test_batched_gemm(1024, i, i, - i, i, i, i); + Test::Gemm::impl_test_batched_gemm(1024, i, i, i, i, + i, i); } for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); int dimM = i; int dimN = 2 * i; int dimK = 3 * i; - if ((std::is_same::value) && - (std::is_same::value)) { - Test::Gemm::impl_test_batched_gemm( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::Gemm::impl_test_batched_gemm( 1024, dimM, dimK, dimK, dimN, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::Gemm::impl_test_batched_gemm( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::Gemm::impl_test_batched_gemm( 1024, dimM, dimK, dimN, dimK, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::Gemm::impl_test_batched_gemm( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::Gemm::impl_test_batched_gemm( 1024, dimK, dimM, dimK, dimN, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::Gemm::impl_test_batched_gemm( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::Gemm::impl_test_batched_gemm( 1024, dimK, dimM, dimN, dimK, dimM, dimN); } } diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGemm_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGemm_Complex.hpp index f671292c989d..f785965602cb 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGemm_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGemm_Complex.hpp @@ -18,32 +18,24 @@ /// dcomplex, dcomplex TEST_F(TestCategory, batched_scalar_serial_gemm_nt_nt_dcomplex_dcomplex) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_gemm, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_gemm_t_nt_dcomplex_dcomplex) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_gemm, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_gemm_nt_t_dcomplex_dcomplex) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_gemm, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_gemm_t_t_dcomplex_dcomplex) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_gemm, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_gemm, Kokkos::complex, param_tag_type, algo_tag_type>(); } // TEST_F( TestCategory, batched_scalar_serial_gemm_ct_nt_dcomplex_dcomplex ) { // typedef ::Test::Gemm::ParamTag @@ -59,32 +51,24 @@ TEST_F(TestCategory, batched_scalar_serial_gemm_t_t_dcomplex_dcomplex) { /// dcomplex, double TEST_F(TestCategory, batched_scalar_serial_gemm_nt_nt_dcomplex_double) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_gemm, double, param_tag_type, - algo_tag_type>(); + test_batched_gemm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_gemm_t_nt_dcomplex_double) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_gemm, double, param_tag_type, - algo_tag_type>(); + test_batched_gemm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_gemm_nt_t_dcomplex_double) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_gemm, double, param_tag_type, - algo_tag_type>(); + test_batched_gemm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_gemm_t_t_dcomplex_double) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_gemm, double, param_tag_type, - algo_tag_type>(); + test_batched_gemm, double, param_tag_type, algo_tag_type>(); } // TEST_F( TestCategory, batched_scalar_serial_gemm_ct_nt_dcomplex_double ) { // typedef ::Test::Gemm::ParamTag diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGemm_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGemm_Real.hpp index 6f074867d996..afe574468852 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGemm_Real.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGemm_Real.hpp @@ -15,112 +15,88 @@ //@HEADER #if defined(KOKKOS_BHALF_T_IS_FLOAT) TEST_F(TestCategory, batched_scalar_serial_gemm_nt_nt_bhalf_bhalf) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; - test_batched_gemm(); - test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_serial_gemm_t_nt_bhalf_bhalf) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; - test_batched_gemm(); - test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_serial_gemm_nt_t_bhalf_bhalf) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; - test_batched_gemm(); - test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_serial_gemm_t_t_bhalf_bhalf) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; - test_batched_gemm(); - test_batched_gemm(); } #endif // KOKKOS_BHALF_T_IS_FLOAT #if defined(KOKKOS_HALF_T_IS_FLOAT) TEST_F(TestCategory, batched_scalar_serial_gemm_nt_nt_half_half) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; - test_batched_gemm(); - test_batched_gemm(); + test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_serial_gemm_t_nt_half_half) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; - test_batched_gemm(); - test_batched_gemm(); + test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_serial_gemm_nt_t_half_half) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; - test_batched_gemm(); - test_batched_gemm(); + test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_serial_gemm_t_t_half_half) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; - test_batched_gemm(); - test_batched_gemm(); + test_batched_gemm(); + test_batched_gemm(); } #endif // KOKKOS_HALF_T_IS_FLOAT #if defined(KOKKOSKERNELS_INST_FLOAT) TEST_F(TestCategory, batched_scalar_serial_gemm_nt_nt_float_float) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_serial_gemm_t_nt_float_float) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_serial_gemm_nt_t_float_float) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_serial_gemm_t_t_float_float) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; test_batched_gemm(); } @@ -128,31 +104,23 @@ TEST_F(TestCategory, batched_scalar_serial_gemm_t_t_float_float) { #if defined(KOKKOSKERNELS_INST_DOUBLE) TEST_F(TestCategory, batched_scalar_serial_gemm_nt_nt_double_double) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_serial_gemm_t_nt_double_double) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_serial_gemm_nt_t_double_double) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_gemm(); + test_batched_gemm(); } TEST_F(TestCategory, batched_scalar_serial_gemm_t_t_double_double) { - typedef ::Test::Gemm::ParamTag - param_tag_type; + typedef ::Test::Gemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_gemm(); + test_batched_gemm(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGesv.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGesv.hpp index bb05fab3bbc1..8ec0dd818931 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGesv.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialGesv.hpp @@ -32,8 +32,7 @@ using namespace KokkosBatched; namespace Test { namespace Gesv { -template +template struct Functor_TestBatchedSerialGesv { using execution_space = typename DeviceType::execution_space; const MatrixType _A; @@ -42,8 +41,7 @@ struct Functor_TestBatchedSerialGesv { const VectorType _B; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedSerialGesv(const MatrixType &A, const MatrixType &tmp, - const VectorType &X, const VectorType &B) + Functor_TestBatchedSerialGesv(const MatrixType &A, const MatrixType &tmp, const VectorType &X, const VectorType &B) : _A(A), _tmp(tmp), _X(X), _B(B) {} KOKKOS_INLINE_FUNCTION @@ -68,21 +66,18 @@ struct Functor_TestBatchedSerialGesv { } }; -template +template void impl_test_batched_gesv(const int N, const int BlkSize) { typedef typename MatrixType::value_type value_type; typedef Kokkos::ArithTraits ats; using MagnitudeType = typename Kokkos::ArithTraits::mag_type; - using NormViewType = - Kokkos::View; + using NormViewType = Kokkos::View; NormViewType sqr_norm_j("sqr_norm_j", N); auto sqr_norm_j_host = Kokkos::create_mirror_view(sqr_norm_j); - MatrixType A("A", N, BlkSize, BlkSize), A2("A", N, BlkSize, BlkSize), - tmp("tmp", N, BlkSize, BlkSize + 4); + MatrixType A("A", N, BlkSize, BlkSize), A2("A", N, BlkSize, BlkSize), tmp("tmp", N, BlkSize, BlkSize + 4); VectorType B("b", N, BlkSize), B2("b", N, BlkSize), X("x", N, BlkSize); create_tridiagonal_batched_matrices(A, B); @@ -98,23 +93,18 @@ void impl_test_batched_gesv(const int N, const int BlkSize) { Kokkos::fence(); - Functor_TestBatchedSerialGesv(A, tmp, X, B) - .run(); + Functor_TestBatchedSerialGesv(A, tmp, X, B).run(); Kokkos::fence(); Kokkos::deep_copy(X_host, X); for (int l = 0; l < N; ++l) - KokkosBlas::SerialGemv:: - invoke(-1, Kokkos::subview(A_host, l, Kokkos::ALL, Kokkos::ALL), - Kokkos::subview(X_host, l, Kokkos::ALL), 1, - Kokkos::subview(B_host, l, Kokkos::ALL)); + KokkosBlas::SerialGemv::invoke( + -1, Kokkos::subview(A_host, l, Kokkos::ALL, Kokkos::ALL), Kokkos::subview(X_host, l, Kokkos::ALL), 1, + Kokkos::subview(B_host, l, Kokkos::ALL)); - KokkosBatched::SerialDot::invoke(B_host, B_host, - sqr_norm_j_host); + KokkosBatched::SerialDot::invoke(B_host, B_host, sqr_norm_j_host); const MagnitudeType eps = 1.0e3 * ats::epsilon(); @@ -127,27 +117,21 @@ template int test_batched_gesv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - MatrixType; - typedef Kokkos::View - VectorType; + typedef Kokkos::View MatrixType; + typedef Kokkos::View VectorType; for (int i = 3; i < 10; ++i) { - Test::Gesv::impl_test_batched_gesv(1024, i); + Test::Gesv::impl_test_batched_gesv(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - MatrixType; - typedef Kokkos::View - VectorType; + typedef Kokkos::View MatrixType; + typedef Kokkos::View VectorType; for (int i = 3; i < 10; ++i) { - Test::Gesv::impl_test_batched_gesv(1024, i); + Test::Gesv::impl_test_batched_gesv(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialInverseLU.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialInverseLU.hpp index 23ded73e2516..6f1115447129 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialInverseLU.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialInverseLU.hpp @@ -19,14 +19,14 @@ #include "Kokkos_Core.hpp" #include "Kokkos_Random.hpp" -//#include "KokkosBatched_Vector.hpp" +// #include "KokkosBatched_Vector.hpp" #include "KokkosBatched_Gemm_Decl.hpp" #include "KokkosBatched_Gemm_Serial_Impl.hpp" #include "KokkosBatched_LU_Decl.hpp" #include "KokkosBatched_LU_Serial_Impl.hpp" #include "KokkosBatched_InverseLU_Decl.hpp" -//#include "KokkosBatched_InverseLU_Serial_Impl.hpp" +// #include "KokkosBatched_InverseLU_Serial_Impl.hpp" #include "KokkosKernels_TestUtils.hpp" @@ -41,8 +41,7 @@ struct ParamTag { typedef TB transB; }; -template +template struct Functor_BatchedSerialGemm { using execution_space = typename DeviceType::execution_space; ViewType _a, _b, _c; @@ -50,8 +49,7 @@ struct Functor_BatchedSerialGemm { ScalarType _alpha, _beta; KOKKOS_INLINE_FUNCTION - Functor_BatchedSerialGemm(const ScalarType alpha, const ViewType &a, - const ViewType &b, const ScalarType beta, + Functor_BatchedSerialGemm(const ScalarType alpha, const ViewType &a, const ViewType &b, const ScalarType beta, const ViewType &c) : _a(a), _b(b), _c(c), _alpha(alpha), _beta(beta) {} @@ -63,8 +61,8 @@ struct Functor_BatchedSerialGemm { for (int i = 0; i < static_cast(aa.extent(0)); ++i) aa(i, i) += 10.0; - SerialGemm::invoke(_alpha, aa, bb, _beta, cc); + SerialGemm::invoke(_alpha, aa, bb, _beta, + cc); } inline void run() { @@ -108,16 +106,14 @@ struct Functor_BatchedSerialLU { } }; -template +template struct Functor_TestBatchedSerialInverseLU { using execution_space = typename DeviceType::execution_space; AViewType _a; WViewType _w; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedSerialInverseLU(const AViewType &a, const WViewType &w) - : _a(a), _w(w) {} + Functor_TestBatchedSerialInverseLU(const AViewType &a, const WViewType &w) : _a(a), _w(w) {} KOKKOS_INLINE_FUNCTION void operator()(const int k) const { @@ -139,8 +135,7 @@ struct Functor_TestBatchedSerialInverseLU { } }; -template +template void impl_test_batched_inverselu(const int N, const int BlkSize) { typedef typename AViewType::value_type value_type; typedef Kokkos::ArithTraits ats; @@ -151,8 +146,7 @@ void impl_test_batched_inverselu(const int N, const int BlkSize) { WViewType w("w", N, BlkSize * BlkSize); AViewType c0("c0", N, BlkSize, BlkSize); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a0, random, value_type(1.0)); Kokkos::fence(); @@ -162,16 +156,12 @@ void impl_test_batched_inverselu(const int N, const int BlkSize) { Functor_BatchedSerialLU(a1).run(); - Functor_TestBatchedSerialInverseLU(a1, w) - .run(); + Functor_TestBatchedSerialInverseLU(a1, w).run(); value_type alpha = 1.0, beta = 0.0; - typedef SerialInverseLU::ParamTag - param_tag_type; + typedef SerialInverseLU::ParamTag param_tag_type; - Functor_BatchedSerialGemm(alpha, a0, a1, beta, c0) + Functor_BatchedSerialGemm(alpha, a0, a1, beta, c0) .run(); Kokkos::fence(); @@ -202,31 +192,21 @@ template int test_batched_inverselu() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - AViewType; - typedef Kokkos::View - WViewType; - Test::SerialInverseLU::impl_test_batched_inverselu( - 0, 10); + typedef Kokkos::View AViewType; + typedef Kokkos::View WViewType; + Test::SerialInverseLU::impl_test_batched_inverselu(0, 10); for (int i = 0; i < 10; ++i) { - Test::SerialInverseLU::impl_test_batched_inverselu< - DeviceType, AViewType, WViewType, AlgoTagType>(1024, i); + Test::SerialInverseLU::impl_test_batched_inverselu(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - AViewType; - typedef Kokkos::View - WViewType; - Test::SerialInverseLU::impl_test_batched_inverselu( - 0, 10); + typedef Kokkos::View AViewType; + typedef Kokkos::View WViewType; + Test::SerialInverseLU::impl_test_batched_inverselu(0, 10); for (int i = 0; i < 10; ++i) { - Test::SerialInverseLU::impl_test_batched_inverselu< - DeviceType, AViewType, WViewType, AlgoTagType>(1024, i); + Test::SerialInverseLU::impl_test_batched_inverselu(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialInverseLU_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialInverseLU_Complex.hpp index 243ed219086e..01e63724719f 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialInverseLU_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialInverseLU_Complex.hpp @@ -18,11 +18,9 @@ TEST_F(TestCategory, batched_scalar_serial_inverselu_dcomplex) { // printf("Batched serial inverse LU - double complex - algorithm type: // Unblocked\n"); - test_batched_inverselu, - Algo::InverseLU::Unblocked>(); + test_batched_inverselu, Algo::InverseLU::Unblocked>(); // printf("Batched serial inverse LU - double complex - algorithm type: // Blocked\n"); - test_batched_inverselu, - Algo::InverseLU::Blocked>(); + test_batched_inverselu, Algo::InverseLU::Blocked>(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialLU.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialLU.hpp index 87224aa8880b..33e079dd9bc6 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialLU.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialLU.hpp @@ -19,7 +19,7 @@ #include "Kokkos_Core.hpp" #include "Kokkos_Random.hpp" -//#include "KokkosBatched_Vector.hpp" +// #include "KokkosBatched_Vector.hpp" #include "KokkosBatched_LU_Decl.hpp" #include "KokkosBatched_LU_Serial_Impl.hpp" @@ -67,16 +67,14 @@ void impl_test_batched_lu(const int N, const int BlkSize) { /// randomized input testing views ViewType a0("a0", N, BlkSize, BlkSize), a1("a1", N, BlkSize, BlkSize); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a0, random, value_type(1.0)); Kokkos::fence(); Kokkos::deep_copy(a1, a0); - Functor_TestBatchedSerialLU(a0) - .run(); + Functor_TestBatchedSerialLU(a0).run(); Functor_TestBatchedSerialLU(a1).run(); Kokkos::fence(); @@ -107,8 +105,7 @@ template int test_batched_lu() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - ViewType; + typedef Kokkos::View ViewType; Test::impl_test_batched_lu(0, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); @@ -118,8 +115,7 @@ int test_batched_lu() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; + typedef Kokkos::View ViewType; Test::impl_test_batched_lu(0, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialPttrf.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialPttrf.hpp new file mode 100644 index 000000000000..11274fc311de --- /dev/null +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialPttrf.hpp @@ -0,0 +1,422 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +/// \author Yuuichi Asahi (yuuichi.asahi@cea.fr) +#include +#include +#include + +#include "KokkosBatched_Util.hpp" +#include "KokkosBatched_Pttrf.hpp" +#include "Test_Batched_DenseUtils.hpp" + +using namespace KokkosBatched; + +namespace Test { +namespace Pttrf { + +template +struct Functor_BatchedSerialPttrf { + using execution_space = typename DeviceType::execution_space; + DViewType _d; + EViewType _e; + + KOKKOS_INLINE_FUNCTION + Functor_BatchedSerialPttrf(const DViewType &d, const EViewType &e) : _d(d), _e(e) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int k, int &info) const { + auto dd = Kokkos::subview(_d, k, Kokkos::ALL()); + auto ee = Kokkos::subview(_e, k, Kokkos::ALL()); + + info += KokkosBatched::SerialPttrf::invoke(dd, ee); + } + + inline int run() { + using value_type = typename DViewType::non_const_value_type; + std::string name_region("KokkosBatched::Test::SerialPttrf"); + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; + int info_sum = 0; + Kokkos::Profiling::pushRegion(name.c_str()); + Kokkos::RangePolicy policy(0, _d.extent(0)); + Kokkos::parallel_reduce(name.c_str(), policy, *this, info_sum); + Kokkos::Profiling::popRegion(); + return info_sum; + } +}; + +template +struct Functor_BatchedSerialGemm { + using execution_space = typename DeviceType::execution_space; + AViewType _a; + BViewType _b; + CViewType _c; + ScalarType _alpha, _beta; + + KOKKOS_INLINE_FUNCTION + Functor_BatchedSerialGemm(const ScalarType alpha, const AViewType &a, const BViewType &b, const ScalarType beta, + const CViewType &c) + : _a(a), _b(b), _c(c), _alpha(alpha), _beta(beta) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int k) const { + auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); + auto bb = Kokkos::subview(_b, k, Kokkos::ALL(), Kokkos::ALL()); + auto cc = Kokkos::subview(_c, k, Kokkos::ALL(), Kokkos::ALL()); + + KokkosBatched::SerialGemm::invoke(_alpha, aa, bb, _beta, cc); + } + + inline void run() { + using value_type = typename AViewType::non_const_value_type; + std::string name_region("KokkosBatched::Test::SerialPttrf"); + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; + Kokkos::RangePolicy policy(0, _a.extent(0)); + Kokkos::parallel_for(name.c_str(), policy, *this); + } +}; + +template +/// \brief Implementation details of batched pttrf test for random matrix +/// +/// \param N [in] Batch size of matrix A +/// \param BlkSize [in] Block size of matrix A +void impl_test_batched_pttrf(const int N, const int BlkSize) { + using ats = typename Kokkos::ArithTraits; + using RealType = typename ats::mag_type; + using RealView2DType = Kokkos::View; + using View2DType = Kokkos::View; + using View3DType = Kokkos::View; + + View3DType A("A", N, BlkSize, BlkSize), A_reconst("A_reconst", N, BlkSize, BlkSize); + View3DType EL("EL", N, BlkSize, BlkSize), EU("EU", N, BlkSize, BlkSize), D("D", N, BlkSize, BlkSize), + LD("LD", N, BlkSize, BlkSize), L("L", N, BlkSize, BlkSize), I("I", N, BlkSize, BlkSize); + RealView2DType d("d", N, BlkSize), // Diagonal components + ones(Kokkos::view_alloc("ones", Kokkos::WithoutInitializing), N, BlkSize); + View2DType e_upper("e_upper", N, BlkSize - 1), e_lower("e_lower", N, + BlkSize - 1); // upper and lower diagonal components + + using execution_space = typename DeviceType::execution_space; + Kokkos::Random_XorShift64_Pool rand_pool(13718); + RealType realRandStart, realRandEnd; + ScalarType randStart, randEnd; + + KokkosKernels::Impl::getRandomBounds(1.0, realRandStart, realRandEnd); + KokkosKernels::Impl::getRandomBounds(1.0, randStart, randEnd); + + // Add BlkSize to ensure positive definiteness + Kokkos::fill_random(d, rand_pool, realRandStart + BlkSize, realRandEnd + BlkSize); + Kokkos::fill_random(e_upper, rand_pool, randStart, randEnd); + + auto h_e_upper = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), e_upper); + auto h_e_lower = Kokkos::create_mirror_view(e_lower); + + for (int ib = 0; ib < N; ib++) { + for (int i = 0; i < BlkSize - 1; i++) { + // Fill the lower diagonal with conjugate of the upper diagonal + h_e_lower(ib, i) = Kokkos::ArithTraits::conj(h_e_upper(ib, i)); + } + } + + Kokkos::deep_copy(e_lower, h_e_lower); + Kokkos::deep_copy(ones, RealType(1.0)); + + // Reconstruct Tridiagonal matrix A + // A = D + EL + EU + create_diagonal_matrix(e_lower, EL, -1); + create_diagonal_matrix(e_upper, EU, 1); + create_diagonal_matrix(d, D); + create_diagonal_matrix(ones, I); + + // Matrix matrix addition by Gemm + // D + EU by D * I + EU (result stored in EU) + Functor_BatchedSerialGemm(1.0, D, I, + 1.0, EU) + .run(); + + // Copy EL to A + Kokkos::deep_copy(A, EL); + + // EU + EL by EU * I + A (result stored in A) + Functor_BatchedSerialGemm(1.0, EU, I, + 1.0, A) + .run(); + + // Factorize matrix A -> L * D * L**H + // d and e are updated by pttrf + auto info = Functor_BatchedSerialPttrf(d, e_lower).run(); + + Kokkos::fence(); + +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + EXPECT_EQ(info, 0); +#endif + + // Reconstruct L and D from factorized matrix + create_diagonal_matrix(e_lower, EL, -1); + create_diagonal_matrix(d, D); + + // Copy I to L + Kokkos::deep_copy(L, I); + + // EL + I by EL * I + L (result stored in L) + Functor_BatchedSerialGemm(1.0, EL, I, + 1.0, L) + .run(); + + // Reconstruct A by L*D*L**H + // Gemm to compute L*D -> LD + Functor_BatchedSerialGemm(1.0, L, D, + 0.0, LD) + .run(); + + // FIXME: We should use SerialGemm Trans::ConjTranspose. + // For the moment, we compute the complex conjugate of L and + // then use Trans::Transpose. + // Gemm to compute (L*D)*L**H -> A_reconst + // Functor_BatchedSerialGemm(1.0, LD, L, 0.0, + // A_reconst) + // .run(); + + // Compute the complex conjugate of L + // L -> conj(L) + auto h_L = Kokkos::create_mirror_view(L); + Kokkos::deep_copy(h_L, L); + for (int ib = 0; ib < N; ib++) { + for (int i = 0; i < BlkSize; i++) { + for (int j = 0; j < BlkSize; j++) { + h_L(ib, i, j) = Kokkos::ArithTraits::conj(h_L(ib, i, j)); + } + } + } + Kokkos::deep_copy(L, h_L); + + // Gemm to compute (L*D)*(conj(L))**T -> A_reconst + Functor_BatchedSerialGemm( + 1.0, LD, L, 0.0, A_reconst) + .run(); + + Kokkos::fence(); + + // this eps is about 10^-14 + RealType eps = 1.0e3 * ats::epsilon(); + + auto h_A = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A); + auto h_A_reconst = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_reconst); + + // Check A = L*D*L**H + for (int ib = 0; ib < N; ib++) { + for (int i = 0; i < BlkSize; i++) { + for (int j = 0; j < BlkSize; j++) { + EXPECT_NEAR_KK(h_A_reconst(ib, i, j), h_A(ib, i, j), eps); + } + } + } +} + +template +/// \brief Implementation details of batched pttrf test for early return +/// BlkSize must be 0 or 1 +/// +/// \param N [in] Batch size of matrix A +/// \param BlkSize [in] Block size of matrix A +void impl_test_batched_pttrf_quick_return(const int N, const int BlkSize) { + using ats = typename Kokkos::ArithTraits; + using RealType = typename ats::mag_type; + using RealView2DType = Kokkos::View; + using View2DType = Kokkos::View; + + if (BlkSize > 1) return; + + const int BlkSize_minus_1 = BlkSize > 0 ? BlkSize - 1 : 0; + + RealView2DType d("d", N, BlkSize), d2("d2", N, BlkSize); // Diagonal components + View2DType e("e", N, + BlkSize_minus_1); // lower diagonal components + + const RealType reference_value = 4.0; + + Kokkos::deep_copy(d, reference_value); + Kokkos::deep_copy(d2, -reference_value); + Kokkos::deep_copy(e, ScalarType(1.0)); + + // Factorize matrix A -> L * D * L**H + // d and e are updated by pttrf + // Early return if BlkSize is 0 or 1 + auto info = Functor_BatchedSerialPttrf(d, e).run(); + + // For negative values, info should be 1 for BlkSize = 1 + auto info2 = Functor_BatchedSerialPttrf(d2, e).run(); + + Kokkos::fence(); + + int expected_info2 = BlkSize == 0 ? 0 : N; + EXPECT_EQ(info, 0); + EXPECT_EQ(info2, expected_info2); + + // this eps is about 10^-14 + RealType eps = 1.0e3 * ats::epsilon(); + + auto h_d = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), d); + auto h_d2 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), d2); + + // Check if d is unchanged + for (int ib = 0; ib < N; ib++) { + for (int i = 0; i < BlkSize; i++) { + EXPECT_NEAR_KK(h_d(ib, i), reference_value, eps); + EXPECT_NEAR_KK(h_d2(ib, i), -reference_value, eps); + } + } +} + +template +/// \brief Implementation details of batched pttrf test +/// +/// \param N [in] Batch size of matrix A +/// \param BlkSize [in] Block size of matrix A +void impl_test_batched_pttrf_analytical(const int N, const int BlkSize) { + using ats = typename Kokkos::ArithTraits; + using RealType = typename ats::mag_type; + using RealView2DType = Kokkos::View; + using View2DType = Kokkos::View; + using View3DType = Kokkos::View; + + View3DType A("A", N, BlkSize, BlkSize), A_reconst("A_reconst", N, BlkSize, BlkSize); + View3DType EL("EL", N, BlkSize, BlkSize), EU("EU", N, BlkSize, BlkSize), D("D", N, BlkSize, BlkSize), + LD("LD", N, BlkSize, BlkSize), L("L", N, BlkSize, BlkSize), I("I", N, BlkSize, BlkSize); + RealView2DType d(Kokkos::view_alloc("d", Kokkos::WithoutInitializing), N, + BlkSize), // Diagonal components + ones(Kokkos::view_alloc("ones", Kokkos::WithoutInitializing), N, BlkSize); + View2DType e(Kokkos::view_alloc("e", Kokkos::WithoutInitializing), N, + BlkSize - 1); // Upper and lower diagonal components (identical) + + Kokkos::deep_copy(d, RealType(4.0)); + Kokkos::deep_copy(e, ScalarType(1.0)); + Kokkos::deep_copy(ones, RealType(1.0)); + + // Reconstruct Tridiaonal matrix A + // A = D + EL + EU + create_diagonal_matrix(e, EL, -1); + create_diagonal_matrix(e, EU, 1); + create_diagonal_matrix(d, D); + create_diagonal_matrix(ones, I); + + // Matrix matrix addition by Gemm + // D + EU by D * I + EU (result stored in EU) + Functor_BatchedSerialGemm(1.0, D, I, + 1.0, EU) + .run(); + + // Copy EL to A + Kokkos::deep_copy(A, EL); + + // EU + EL by EU * I + A (result stored in A) + Functor_BatchedSerialGemm(1.0, EU, I, + 1.0, A) + .run(); + + // Factorize matrix A -> L * D * L**T + // d and e are updated by pttrf + auto info = Functor_BatchedSerialPttrf(d, e).run(); + + Kokkos::fence(); + +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + EXPECT_EQ(info, 0); +#endif + + // Reconstruct L and D from factorized matrix + create_diagonal_matrix(e, EL, -1); + create_diagonal_matrix(d, D); + + // Copy I to L + Kokkos::deep_copy(L, I); + + // EL + I by EL * I + L (result stored in L) + Functor_BatchedSerialGemm(1.0, EL, I, + 1.0, L) + .run(); + + // Reconstruct A by L*D*L**T + // Gemm to compute L*D -> LD + Functor_BatchedSerialGemm(1.0, L, D, + 0.0, LD) + .run(); + + // Gemm to compute (L*D)*L**T -> A_reconst + Functor_BatchedSerialGemm( + 1.0, LD, L, 0.0, A_reconst) + .run(); + + Kokkos::fence(); + + // this eps is about 10^-14 + RealType eps = 1.0e3 * ats::epsilon(); + + auto h_A = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A); + auto h_A_reconst = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_reconst); + + // Check A = L*D*L.T + for (int ib = 0; ib < N; ib++) { + for (int i = 0; i < BlkSize; i++) { + for (int j = 0; j < BlkSize; j++) { + EXPECT_NEAR_KK(h_A_reconst(ib, i, j), h_A(ib, i, j), eps); + } + } + } +} + +} // namespace Pttrf +} // namespace Test + +template +int test_batched_pttrf() { +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) + { + using LayoutType = Kokkos::LayoutLeft; + for (int i = 0; i < 2; i++) { + Test::Pttrf::impl_test_batched_pttrf_quick_return(1, i); + Test::Pttrf::impl_test_batched_pttrf_quick_return(2, i); + } + for (int i = 2; i < 10; i++) { + Test::Pttrf::impl_test_batched_pttrf(1, i); + Test::Pttrf::impl_test_batched_pttrf(2, i); + Test::Pttrf::impl_test_batched_pttrf_analytical(1, i); + Test::Pttrf::impl_test_batched_pttrf_analytical(2, i); + } + } +#endif +#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) + { + using LayoutType = Kokkos::LayoutRight; + for (int i = 0; i < 2; i++) { + Test::Pttrf::impl_test_batched_pttrf_quick_return(1, i); + Test::Pttrf::impl_test_batched_pttrf_quick_return(2, i); + } + for (int i = 2; i < 10; i++) { + Test::Pttrf::impl_test_batched_pttrf(1, i); + Test::Pttrf::impl_test_batched_pttrf(2, i); + Test::Pttrf::impl_test_batched_pttrf_analytical(1, i); + Test::Pttrf::impl_test_batched_pttrf_analytical(2, i); + } + } +#endif + + return 0; +} diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialPttrf_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialPttrf_Complex.hpp new file mode 100644 index 000000000000..febccc5cb37c --- /dev/null +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialPttrf_Complex.hpp @@ -0,0 +1,31 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) +TEST_F(TestCategory, test_batched_pttrf_fcomplex) { + using algo_tag_type = typename Algo::Pttrf::Unblocked; + + test_batched_pttrf(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) +TEST_F(TestCategory, test_batched_pttrf_dcomplex) { + using algo_tag_type = typename Algo::Pttrf::Unblocked; + + test_batched_pttrf(); +} +#endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialPttrf_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialPttrf_Real.hpp new file mode 100644 index 000000000000..8b0fb658fe22 --- /dev/null +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialPttrf_Real.hpp @@ -0,0 +1,31 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#if defined(KOKKOSKERNELS_INST_FLOAT) +TEST_F(TestCategory, test_batched_pttrf_float) { + using algo_tag_type = typename Algo::Pttrf::Unblocked; + + test_batched_pttrf(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) +TEST_F(TestCategory, test_batched_pttrf_double) { + using algo_tag_type = typename Algo::Pttrf::Unblocked; + + test_batched_pttrf(); +} +#endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSVD.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSVD.hpp index 099fa9219f0a..9bf9d43578e5 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSVD.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSVD.hpp @@ -70,8 +70,7 @@ void verifyOrthogonal(const Mat& X) { } template -void verifySVD(const AView& A, const UView& U, const VtView& Vt, - const SigmaView& sigma) { +void verifySVD(const AView& A, const UView& U, const VtView& Vt, const SigmaView& sigma) { using Scalar = typename AView::non_const_value_type; using KAT = Kokkos::ArithTraits; // Check that U/V columns are unit length and orthogonal, and that U * @@ -85,10 +84,8 @@ void verifySVD(const AView& A, const UView& U, const VtView& Vt, verifyOrthogonal(Vt); Kokkos::View usvt("USV^T", m, n); for (int i = 0; i < maxrank; i++) { - auto Ucol = - Kokkos::subview(U, Kokkos::ALL(), Kokkos::make_pair(i, i + 1)); - auto Vtrow = - Kokkos::subview(Vt, Kokkos::make_pair(i, i + 1), Kokkos::ALL()); + auto Ucol = Kokkos::subview(U, Kokkos::ALL(), Kokkos::make_pair(i, i + 1)); + auto Vtrow = Kokkos::subview(Vt, Kokkos::make_pair(i, i + 1), Kokkos::ALL()); Test::vanillaGEMM(sigma(i), Ucol, Vtrow, 1.0, usvt); } for (int i = 0; i < m; i++) { @@ -113,8 +110,7 @@ Matrix createRandomMatrix(int m, int n, int deficiency, double maxval = 1.0) { auto mhost = Kokkos::create_mirror_view(mat); // Fill mat with random values first if (maxval != 0.0) { - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); Scalar minrand, maxrand; Test::getRandomBounds(maxval, minrand, maxrand); Kokkos::fill_random(mhost, rand_pool, minrand, maxrand); @@ -143,15 +139,14 @@ Matrix createRandomMatrix(int m, int n, int deficiency, double maxval = 1.0) { template struct SerialSVDFunctor_Full { - SerialSVDFunctor_Full(const Matrix& A_, const Matrix& U_, const Matrix& Vt_, - const Vector& sigma_, const Vector& work_) + SerialSVDFunctor_Full(const Matrix& A_, const Matrix& U_, const Matrix& Vt_, const Vector& sigma_, + const Vector& work_) : A(A_), U(U_), Vt(Vt_), sigma(sigma_), work(work_) {} // NOTE: this functor is only meant to be launched with a single element range // policy KOKKOS_INLINE_FUNCTION void operator()(int) const { - KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U, sigma, - Vt, work); + KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U, sigma, Vt, work); } Matrix A; @@ -163,15 +158,13 @@ struct SerialSVDFunctor_Full { template struct SerialSVDFunctor_SingularValuesOnly { - SerialSVDFunctor_SingularValuesOnly(const Matrix& A_, const Vector& sigma_, - const Vector& work_) + SerialSVDFunctor_SingularValuesOnly(const Matrix& A_, const Vector& sigma_, const Vector& work_) : A(A_), sigma(sigma_), work(work_) {} // NOTE: this functor is only meant to be launched with a single element range // policy KOKKOS_INLINE_FUNCTION void operator()(int) const { - KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_S_Tag(), A, sigma, - work); + KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_S_Tag(), A, sigma, work); } Matrix A; @@ -201,14 +194,12 @@ void testSerialSVD(int m, int n, int deficiency, double maxval = 1.0) { typename Matrix::HostMirror Acopy("Acopy", m, n); Kokkos::deep_copy(Acopy, A); // Run the SVD - Kokkos::parallel_for( - Kokkos::RangePolicy(0, 1), - SerialSVDFunctor_Full(A, U, Vt, sigma, work)); + Kokkos::parallel_for(Kokkos::RangePolicy(0, 1), + SerialSVDFunctor_Full(A, U, Vt, sigma, work)); // Get the results back - auto Uhost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), U); - auto Vthost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), Vt); - auto sigmaHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), sigma); + auto Uhost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), U); + auto Vthost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), Vt); + auto sigmaHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), sigma); // Verify the SVD is correct verifySVD(Acopy, Uhost, Vthost, sigmaHost); } @@ -237,22 +228,17 @@ void testSerialSVDSingularValuesOnly(int m, int n) { typename Matrix::HostMirror Acopy("Acopy", m, n); Kokkos::deep_copy(Acopy, A); // Run the SVD (full mode) - Kokkos::parallel_for( - Kokkos::RangePolicy(0, 1), - SerialSVDFunctor_Full(A, U, Vt, sigma1, work)); + Kokkos::parallel_for(Kokkos::RangePolicy(0, 1), + SerialSVDFunctor_Full(A, U, Vt, sigma1, work)); Kokkos::deep_copy(A, Acopy); // Run the same SVD (singular values only mode) - Kokkos::parallel_for( - Kokkos::RangePolicy(0, 1), - SerialSVDFunctor_SingularValuesOnly(A, sigma2, work)); - auto sigma1Host = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), sigma1); - auto sigma2Host = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), sigma2); + Kokkos::parallel_for(Kokkos::RangePolicy(0, 1), + SerialSVDFunctor_SingularValuesOnly(A, sigma2, work)); + auto sigma1Host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), sigma1); + auto sigma2Host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), sigma2); // Make sure they match for (int i = 0; i < maxrank; i++) { - Test::EXPECT_NEAR_KK(sigma1Host(i), sigma2Host(i), - Test::svdEpsilon()); + Test::EXPECT_NEAR_KK(sigma1Host(i), sigma2Host(i), Test::svdEpsilon()); } } @@ -279,9 +265,8 @@ void testSerialSVDZeroLastRow(int n) { Matrix BVt("UBVt", n, n); Test::vanillaGEMM(1.0, B, Vt, 0.0, BVt); // Run the routine (just on host) - KokkosBatched::SerialSVDInternal::svdZeroLastColumn( - B.data(), n, B.stride(0), B.stride(1), Vt.data(), Vt.stride(0), - Vt.stride(1)); + KokkosBatched::SerialSVDInternal::svdZeroLastColumn(B.data(), n, B.stride(0), B.stride(1), Vt.data(), + Vt.stride(0), Vt.stride(1)); // Check that B is still bidiagonal (to a tight tolerance, but not exactly // zero) for (int i = 0; i < n; i++) { @@ -292,8 +277,7 @@ void testSerialSVDZeroLastRow(int n) { } } // Check that the last superdiagonal is now zero - Test::EXPECT_NEAR_KK(B(n - 2, n - 1), KAT::zero(), - Test::svdEpsilon()); + Test::EXPECT_NEAR_KK(B(n - 2, n - 1), KAT::zero(), Test::svdEpsilon()); // Check that the product is still maintained Matrix BVt2("UBVt", n, n); Test::vanillaGEMM(1.0, B, Vt, 0.0, BVt2); @@ -312,8 +296,8 @@ void testSerialSVDZeroDiagonal(int n, int row) { // Generate a bidiagonal matrix using Matrix = Kokkos::View; using KAT = Kokkos::ArithTraits; - int m = n + 2; // Make U somewhat bigger to make sure the Givens transforms - // are applied correctly + int m = n + 2; // Make U somewhat bigger to make sure the Givens transforms + // are applied correctly Matrix B = createRandomMatrix(m, n, 0, 1.0); // Zero out entries to make B bidiagonal for (int i = 0; i < m; i++) { @@ -331,9 +315,8 @@ void testSerialSVDZeroDiagonal(int n, int row) { Matrix UB("UB", m, n); Test::vanillaGEMM(1.0, U, B, 0.0, UB); // Run the routine (just on host) - KokkosBatched::SerialSVDInternal::svdZeroRow( - row, B.data(), n, B.stride(0), B.stride(1), U.data(), m, U.stride(0), - U.stride(1)); + KokkosBatched::SerialSVDInternal::svdZeroRow(row, B.data(), n, B.stride(0), B.stride(1), U.data(), m, + U.stride(0), U.stride(1)); // Check that B is still bidiagonal (to a tight tolerance, but not exactly // zero) for (int i = 0; i < m; i++) { @@ -381,12 +364,9 @@ void testSVD() { template KOKKOS_INLINE_FUNCTION constexpr auto Determinant(ViewT F) - -> std::enable_if_t::value && ViewT::rank == 2, - double> { - return (F(0, 0) * F(1, 1) * F(2, 2) + F(0, 1) * F(1, 2) * F(2, 0) + - F(0, 2) * F(1, 0) * F(2, 1) - - (F(0, 2) * F(1, 1) * F(2, 0) + F(0, 1) * F(1, 0) * F(2, 2) + - F(0, 0) * F(1, 2) * F(2, 1))); + -> std::enable_if_t::value && ViewT::rank == 2, double> { + return (F(0, 0) * F(1, 1) * F(2, 2) + F(0, 1) * F(1, 2) * F(2, 0) + F(0, 2) * F(1, 0) * F(2, 1) - + (F(0, 2) * F(1, 1) * F(2, 0) + F(0, 1) * F(1, 0) * F(2, 2) + F(0, 0) * F(1, 2) * F(2, 1))); } template @@ -411,39 +391,31 @@ void testIssue1786() { using execution_space = typename Device::execution_space; using memory_space = typename Device::memory_space; constexpr int num_tests = 4; - Kokkos::View matrices("data", - num_tests); + Kokkos::View matrices("data", num_tests); GenerateTestData(matrices); - Kokkos::View Us("Us", - matrices.extent(0)); - Kokkos::View Ss("Ss", matrices.extent(0)); - Kokkos::View Vts("Vts", - matrices.extent(0)); + Kokkos::View Us("Us", matrices.extent(0)); + Kokkos::View Ss("Ss", matrices.extent(0)); + Kokkos::View Vts("Vts", matrices.extent(0)); // Make sure the 2nd dimension of works is contiguous - Kokkos::View works( - "works", matrices.extent(0)); - Kokkos::View matrices_copy( - "matrices_copy", matrices.extent(0)); + Kokkos::View works("works", matrices.extent(0)); + Kokkos::View matrices_copy("matrices_copy", matrices.extent(0)); // make a copy of the input data to avoid overwriting it Kokkos::deep_copy(matrices_copy, matrices); auto policy = Kokkos::RangePolicy(0, matrices.extent(0)); Kokkos::parallel_for( "polar decomposition", policy, KOKKOS_LAMBDA(int i) { - auto matrix_copy = - Kokkos::subview(matrices_copy, i, Kokkos::ALL(), Kokkos::ALL()); - auto U = Kokkos::subview(Us, i, Kokkos::ALL(), Kokkos::ALL()); - auto S = Kokkos::subview(Ss, i, Kokkos::ALL()); - auto Vt = Kokkos::subview(Vts, i, Kokkos::ALL(), Kokkos::ALL()); - auto work = Kokkos::subview(works, i, Kokkos::ALL()); - KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag{}, - matrix_copy, U, S, Vt, work); + auto matrix_copy = Kokkos::subview(matrices_copy, i, Kokkos::ALL(), Kokkos::ALL()); + auto U = Kokkos::subview(Us, i, Kokkos::ALL(), Kokkos::ALL()); + auto S = Kokkos::subview(Ss, i, Kokkos::ALL()); + auto Vt = Kokkos::subview(Vts, i, Kokkos::ALL(), Kokkos::ALL()); + auto work = Kokkos::subview(works, i, Kokkos::ALL()); + KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag{}, matrix_copy, U, S, Vt, work); }); - auto Us_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, Us); - auto Ss_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, Ss); - auto Vts_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, Vts); - auto matrices_h = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, matrices); + auto Us_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, Us); + auto Ss_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, Ss); + auto Vts_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, Vts); + auto matrices_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, matrices); for (int i = 0; i < num_tests; i++) { auto A = Kokkos::subview(matrices_h, i, Kokkos::ALL(), Kokkos::ALL()); auto U = Kokkos::subview(Us_h, i, Kokkos::ALL(), Kokkos::ALL()); diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSolveLU.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSolveLU.hpp index 43cb8fab2fce..734eda28bd0e 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSolveLU.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSolveLU.hpp @@ -19,14 +19,14 @@ #include "Kokkos_Core.hpp" #include "Kokkos_Random.hpp" -//#include "KokkosBatched_Vector.hpp" +// #include "KokkosBatched_Vector.hpp" #include "KokkosBatched_Gemm_Decl.hpp" #include "KokkosBatched_Gemm_Serial_Impl.hpp" #include "KokkosBatched_LU_Decl.hpp" #include "KokkosBatched_LU_Serial_Impl.hpp" #include "KokkosBatched_SolveLU_Decl.hpp" -//#include "KokkosBatched_SolveLU_Serial_Impl.hpp" +// #include "KokkosBatched_SolveLU_Serial_Impl.hpp" #include "KokkosKernels_TestUtils.hpp" @@ -41,8 +41,7 @@ struct ParamTag { typedef TB transB; }; -template +template struct Functor_BatchedSerialGemm { using execution_space = typename DeviceType::execution_space; ViewType _a, _b, _c; @@ -50,8 +49,7 @@ struct Functor_BatchedSerialGemm { ScalarType _alpha, _beta; KOKKOS_INLINE_FUNCTION - Functor_BatchedSerialGemm(const ScalarType alpha, const ViewType &a, - const ViewType &b, const ScalarType beta, + Functor_BatchedSerialGemm(const ScalarType alpha, const ViewType &a, const ViewType &b, const ScalarType beta, const ViewType &c) : _a(a), _b(b), _c(c), _alpha(alpha), _beta(beta) {} @@ -63,8 +61,8 @@ struct Functor_BatchedSerialGemm { for (int i = 0; i < static_cast(aa.extent(0)); ++i) aa(i, i) += 10.0; - SerialGemm::invoke(_alpha, aa, bb, _beta, cc); + SerialGemm::invoke(_alpha, aa, bb, _beta, + cc); } inline void run() { @@ -108,16 +106,14 @@ struct Functor_BatchedSerialLU { } }; -template +template struct Functor_TestBatchedSerialSolveLU { using execution_space = typename DeviceType::execution_space; ViewType _a; ViewType _b; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedSerialSolveLU(const ViewType &a, const ViewType &b) - : _a(a), _b(b) {} + Functor_TestBatchedSerialSolveLU(const ViewType &a, const ViewType &b) : _a(a), _b(b) {} KOKKOS_INLINE_FUNCTION void operator()(const int k) const { @@ -152,8 +148,7 @@ void impl_test_batched_solvelu(const int N, const int BlkSize) { // ViewType a0_T("a0_T", N, BlkSize, BlkSize); // ViewType b_T ("b_T", N, BlkSize, 5 ); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a0, random, value_type(1.0)); Kokkos::fill_random(x0, random, value_type(1.0)); @@ -165,15 +160,12 @@ void impl_test_batched_solvelu(const int N, const int BlkSize) { value_type alpha = 1.0, beta = 0.0; typedef ParamTag param_tag_type; - Functor_BatchedSerialGemm(alpha, a0, x0, beta, b) + Functor_BatchedSerialGemm(alpha, a0, x0, beta, b) .run(); Functor_BatchedSerialLU(a1).run(); - Functor_TestBatchedSerialSolveLU(a1, b) - .run(); + Functor_TestBatchedSerialSolveLU(a1, b).run(); Kokkos::fence(); @@ -230,25 +222,19 @@ template int test_batched_solvelu() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - ViewType; - Test::SerialSolveLU::impl_test_batched_solvelu(0, 10); + typedef Kokkos::View ViewType; + Test::SerialSolveLU::impl_test_batched_solvelu(0, 10); for (int i = 0; i < 10; ++i) { - Test::SerialSolveLU::impl_test_batched_solvelu(1024, i); + Test::SerialSolveLU::impl_test_batched_solvelu(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - Test::SerialSolveLU::impl_test_batched_solvelu(0, 10); + typedef Kokkos::View ViewType; + Test::SerialSolveLU::impl_test_batched_solvelu(0, 10); for (int i = 0; i < 10; ++i) { - Test::SerialSolveLU::impl_test_batched_solvelu(1024, i); + Test::SerialSolveLU::impl_test_batched_solvelu(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSolveLU_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSolveLU_Complex.hpp index 6eaf9ca5aa16..66a99e28d25b 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSolveLU_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSolveLU_Complex.hpp @@ -18,11 +18,9 @@ TEST_F(TestCategory, batched_scalar_serial_solvelu_dcomplex) { // printf("Batched serial solveLU - double complex - algorithm type: // Unblocked\n"); - test_batched_solvelu, - Algo::SolveLU::Unblocked>(); + test_batched_solvelu, Algo::SolveLU::Unblocked>(); // printf("Batched serial solveLU - double complex - algorithm type: // Blocked\n"); - test_batched_solvelu, - Algo::SolveLU::Blocked>(); + test_batched_solvelu, Algo::SolveLU::Blocked>(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTbsv.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTbsv.hpp new file mode 100644 index 000000000000..cd52235dd6db --- /dev/null +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTbsv.hpp @@ -0,0 +1,312 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +/// \author Yuuichi Asahi (yuuichi.asahi@cea.fr) +#include +#include +#include + +#include "KokkosBatched_Util.hpp" +#include "KokkosBatched_Tbsv.hpp" +#include "Test_Batched_DenseUtils.hpp" + +using namespace KokkosBatched; + +namespace Test { +namespace Tbsv { + +template +struct ParamTag { + using uplo = U; + using trans = T; + using diag = D; +}; + +template +struct Functor_BatchedSerialTrsv { + using execution_space = typename DeviceType::execution_space; + AViewType _a; + BViewType _b; + + ScalarType _alpha; + + KOKKOS_INLINE_FUNCTION + Functor_BatchedSerialTrsv(const ScalarType alpha, const AViewType &a, const BViewType &b) + : _a(a), _b(b), _alpha(alpha) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const ParamTagType &, const int k) const { + auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); + auto bb = Kokkos::subview(_b, k, Kokkos::ALL()); + + KokkosBatched::SerialTrsv::invoke(_alpha, aa, bb); + } + + inline void run() { + using value_type = typename AViewType::non_const_value_type; + std::string name_region("KokkosBatched::Test::SerialTbsv"); + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; + Kokkos::RangePolicy policy(0, _b.extent(0)); + Kokkos::parallel_for(name.c_str(), policy, *this); + } +}; + +template +struct Functor_BatchedSerialTbsv { + using execution_space = typename DeviceType::execution_space; + AViewType _a; + BViewType _b; + int _k; + + KOKKOS_INLINE_FUNCTION + Functor_BatchedSerialTbsv(const AViewType &a, const BViewType &b, const int k) : _a(a), _b(b), _k(k) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const ParamTagType &, const int k) const { + auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); + auto bb = Kokkos::subview(_b, k, Kokkos::ALL()); + + KokkosBatched::SerialTbsv::invoke(aa, bb, _k); + } + + inline void run() { + using value_type = typename AViewType::non_const_value_type; + std::string name_region("KokkosBatched::Test::SerialTbsv"); + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; + Kokkos::Profiling::pushRegion(name.c_str()); + Kokkos::RangePolicy policy(0, _b.extent(0)); + Kokkos::parallel_for(name.c_str(), policy, *this); + Kokkos::Profiling::popRegion(); + } +}; + +template +/// \brief Implementation details of batched tbsv test +/// +/// \param N [in] Batch size of RHS (banded matrix can also be batched matrix) +/// \param k [in] Number of superdiagonals or subdiagonals of matrix A +/// \param BlkSize [in] Block size of matrix A +void impl_test_batched_tbsv(const int N, const int k, const int BlkSize) { + using execution_space = typename DeviceType::execution_space; + using View2DType = Kokkos::View; + using View3DType = Kokkos::View; + + // Reference is created by trsv from triangular matrix + View3DType A("A", N, BlkSize, BlkSize), Ref("Ref", N, BlkSize, BlkSize); + View3DType Ab("Ab", N, k + 1, BlkSize); // Banded matrix + View2DType x0("x0", N, BlkSize), x1("x1", N, BlkSize); // Solutions + + Kokkos::Random_XorShift64_Pool rand_pool(13718); + ScalarType randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(Ref, rand_pool, randStart, randEnd); + Kokkos::fill_random(x0, rand_pool, randStart, randEnd); + + Kokkos::deep_copy(x1, x0); + + // Create triangluar or banded matrix + create_banded_triangular_matrix(Ref, A, k, false); + create_banded_triangular_matrix(Ref, Ab, k, true); + + // Reference trsv + Functor_BatchedSerialTrsv(1.0, A, + x0) + .run(); + + // tbsv + Functor_BatchedSerialTbsv(Ab, x1, k).run(); + + Kokkos::fence(); + + // this eps is about 10^-14 + using ats = typename Kokkos::ArithTraits; + using mag_type = typename ats::mag_type; + mag_type eps = 1.0e3 * ats::epsilon(); + + // Check x0 = x1 + auto h_x0 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), x0); + auto h_x1 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), x1); + for (int i = 0; i < N; i++) { + for (int j = 0; j < BlkSize; j++) { + EXPECT_NEAR_KK(h_x0(i, j), h_x1(i, j), eps); + } + } +} + +template +/// \brief Implementation details of batched tbsv test +/// +/// \param N [in] Batch size of RHS (banded matrix can also be batched matrix) +void impl_test_batched_tbsv_analytical(const std::size_t N) { + using execution_space = typename DeviceType::execution_space; + using View2DType = Kokkos::View; + using StridedView2DType = Kokkos::View; + using View3DType = Kokkos::View; + + // Reference is created by trsv from triangular matrix + constexpr std::size_t BlkSize = 3, k = 2, incx = 2; + + View3DType A("A", N, BlkSize, BlkSize), ref("Ref", N, BlkSize, BlkSize); + View3DType Ab("Ab", N, k + 1, BlkSize); // Banded matrix + View2DType x0("x0", N, BlkSize), x_ref("x_ref", N, BlkSize); // Solutions + + // Testing incx argument with strided Views + Kokkos::LayoutStride layout{N, incx, BlkSize, N * incx}; + StridedView2DType x1("x1", layout); // Solutions + + Kokkos::RangePolicy policy(0, N); + Kokkos::parallel_for( + "KokkosBatched::Test::SerialTbsv::Initialize", policy, KOKKOS_LAMBDA(const std::size_t ib) { + for (std::size_t i = 0; i < BlkSize; i++) { + for (std::size_t j = 0; j < BlkSize; j++) { + ref(ib, i, j) = i + 1; + } + } + for (std::size_t j = 0; j < BlkSize; j++) { + x0(ib, j) = 1; + x1(ib, j) = 1; + } + + if (std::is_same_v) { + if (std::is_same_v) { + if (std::is_same_v) { + x_ref(ib, 0) = 1.0 / 2.0; + x_ref(ib, 1) = 1.0 / 6.0; + x_ref(ib, 2) = 1.0 / 3.0; + } else { + x_ref(ib, 0) = 1.0; + x_ref(ib, 1) = -1.0; + x_ref(ib, 2) = 1.0; + } + } else { + if (std::is_same_v) { + x_ref(ib, 0) = 1.0; + x_ref(ib, 1) = 0.0; + x_ref(ib, 2) = 0.0; + } else { + x_ref(ib, 0) = 1.0; + x_ref(ib, 1) = 0.0; + x_ref(ib, 2) = 0.0; + } + } + } else { + if (std::is_same_v) { + if (std::is_same_v) { + x_ref(ib, 0) = 1.0; + x_ref(ib, 1) = -1.0 / 2.0; + x_ref(ib, 2) = -1.0 / 6.0; + } else { + x_ref(ib, 0) = 1.0; + x_ref(ib, 1) = -1.0; + x_ref(ib, 2) = 1.0; + } + } else { + if (std::is_same_v) { + x_ref(ib, 0) = 0.0; + x_ref(ib, 1) = 0.0; + x_ref(ib, 2) = 1.0 / 3.0; + } else { + x_ref(ib, 0) = 2.0; + x_ref(ib, 1) = -2.0; + x_ref(ib, 2) = 1.0; + } + } + } + }); + + Kokkos::fence(); + + // Create triangluar or banded matrix + create_banded_triangular_matrix(ref, A, k, false); + create_banded_triangular_matrix(ref, Ab, k, true); + + // tbsv + Functor_BatchedSerialTbsv(Ab, x0, k).run(); + + // tbsv with incx == 2 + Functor_BatchedSerialTbsv(Ab, x1, k).run(); + + Kokkos::fence(); + + // Check x0 = x_ref and x1 = x_ref + // Firstly, prepare contiguous views on host + auto h_x0 = Kokkos::create_mirror_view(x0); + auto h_x1 = Kokkos::create_mirror_view(x0); + + Kokkos::deep_copy(h_x0, x0); + + // Pack x1 into x0 for contiguous storage + Kokkos::parallel_for( + "KokkosBatched::Test::SerialTbsv::Copy", policy, KOKKOS_LAMBDA(const std::size_t ib) { + for (std::size_t j = 0; j < BlkSize; j++) { + x0(ib, j) = x1(ib, j); + } + }); + + Kokkos::fence(); + Kokkos::deep_copy(h_x1, x0); + + // this eps is about 10^-14 + using ats = typename Kokkos::ArithTraits; + using mag_type = typename ats::mag_type; + mag_type eps = 1.0e3 * ats::epsilon(); + + auto h_x_ref = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), x_ref); + for (std::size_t ib = 0; ib < N; ib++) { + for (std::size_t j = 0; j < BlkSize; j++) { + // Check x0 = x_ref + EXPECT_NEAR_KK(h_x0(ib, j), h_x_ref(ib, j), eps); + + // Check x1 = x_ref + EXPECT_NEAR_KK(h_x1(ib, j), h_x_ref(ib, j), eps); + } + } +} + +} // namespace Tbsv +} // namespace Test + +template +int test_batched_tbsv() { +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) + { + using LayoutType = Kokkos::LayoutLeft; + Test::Tbsv::impl_test_batched_tbsv_analytical(0); + Test::Tbsv::impl_test_batched_tbsv_analytical(1); + Test::Tbsv::impl_test_batched_tbsv(0, 1, 10); + for (int i = 0; i < 10; i++) { + Test::Tbsv::impl_test_batched_tbsv(1, 1, i); + } + } +#endif +#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) + { + using LayoutType = Kokkos::LayoutRight; + Test::Tbsv::impl_test_batched_tbsv_analytical(0); + Test::Tbsv::impl_test_batched_tbsv_analytical(1); + Test::Tbsv::impl_test_batched_tbsv(0, 1, 10); + for (int i = 0; i < 10; i++) { + Test::Tbsv::impl_test_batched_tbsv(1, 1, i); + } + } +#endif + + return 0; +} diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTbsv_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTbsv_Complex.hpp new file mode 100644 index 000000000000..005a6e92c06e --- /dev/null +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTbsv_Complex.hpp @@ -0,0 +1,104 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) +// NO TRANSPOSE +TEST_F(TestCategory, batched_serial_tbsv_l_nt_u_dcomplex) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv, param_tag_type, algo_tag_type>(); +} +TEST_F(TestCategory, batched_serial_tbsv_l_nt_n_dcomplex) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv, param_tag_type, algo_tag_type>(); +} +TEST_F(TestCategory, batched_serial_tbsv_u_nt_u_dcomplex) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv, param_tag_type, algo_tag_type>(); +} +TEST_F(TestCategory, batched_serial_tbsv_u_nt_n_dcomplex) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv, param_tag_type, algo_tag_type>(); +} +// TRANSPOSE +TEST_F(TestCategory, batched_serial_tbsv_l_t_u_dcomplex) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv, param_tag_type, algo_tag_type>(); +} +TEST_F(TestCategory, batched_serial_tbsv_l_t_n_dcomplex) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv, param_tag_type, algo_tag_type>(); +} +TEST_F(TestCategory, batched_serial_tbsv_u_t_u_dcomplex) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv, param_tag_type, algo_tag_type>(); +} +TEST_F(TestCategory, batched_serial_tbsv_u_t_n_dcomplex) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv, param_tag_type, algo_tag_type>(); +} + +/* [FIXME] These tests need Trans::ConjTranspose in trsv. +// CONJUGATE TRANSPOSE +TEST_F(TestCategory, batched_serial_tbsv_l_ct_u_dcomplex) { + using param_tag_type = + ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv, param_tag_type, + algo_tag_type>(); +} +TEST_F(TestCategory, batched_serial_tbsv_l_ct_n_dcomplex) { + using param_tag_type = + ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv, param_tag_type, + algo_tag_type>(); +} +TEST_F(TestCategory, batched_serial_tbsv_u_ct_u_dcomplex) { + using param_tag_type = + ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv, param_tag_type, + algo_tag_type>(); +} +TEST_F(TestCategory, batched_serial_tbsv_u_ct_n_dcomplex) { + using param_tag_type = + ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv, param_tag_type, + algo_tag_type>(); +} +*/ +#endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTbsv_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTbsv_Real.hpp new file mode 100644 index 000000000000..c8f10adf5c6a --- /dev/null +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTbsv_Real.hpp @@ -0,0 +1,121 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#if defined(KOKKOSKERNELS_INST_FLOAT) +// NO TRANSPOSE +TEST_F(TestCategory, batched_serial_tbsv_l_nt_u_float) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +TEST_F(TestCategory, batched_serial_tbsv_l_nt_n_float) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +TEST_F(TestCategory, batched_serial_tbsv_u_nt_u_float) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +TEST_F(TestCategory, batched_serial_tbsv_u_nt_n_float) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +// TRANSPOSE +TEST_F(TestCategory, batched_serial_tbsv_l_t_u_float) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +TEST_F(TestCategory, batched_serial_tbsv_l_t_n_float) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +TEST_F(TestCategory, batched_serial_tbsv_u_t_u_float) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +TEST_F(TestCategory, batched_serial_tbsv_u_t_n_float) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) +// NO TRANSPOSE +TEST_F(TestCategory, batched_serial_tbsv_l_nt_u_double) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +TEST_F(TestCategory, batched_serial_tbsv_l_nt_n_double) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +TEST_F(TestCategory, batched_serial_tbsv_u_nt_u_double) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +TEST_F(TestCategory, batched_serial_tbsv_u_nt_n_double) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +// TRANSPOSE +TEST_F(TestCategory, batched_serial_tbsv_l_t_u_double) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +TEST_F(TestCategory, batched_serial_tbsv_l_t_n_double) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +TEST_F(TestCategory, batched_serial_tbsv_u_t_u_double) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +TEST_F(TestCategory, batched_serial_tbsv_u_t_n_double) { + using param_tag_type = ::Test::Tbsv::ParamTag; + using algo_tag_type = typename Algo::Tbsv::Unblocked; + + test_batched_tbsv(); +} +#endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrmm.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrmm.hpp index 7a7e89ebf826..610f9e700a66 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrmm.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrmm.hpp @@ -49,8 +49,7 @@ struct NonUnitDiagTRMM { KOKKOS_INLINE_FUNCTION void operator()(const int& i) const { A_(i, i) = A_(i, i) + 10; } }; -template +template struct VanillaGEMM { bool A_t, B_t, A_c, B_c; int N, K; @@ -67,12 +66,9 @@ struct VanillaGEMM { ScalarC beta; KOKKOS_INLINE_FUNCTION - void operator()( - const typename Kokkos::TeamPolicy::member_type& team) - const { + void operator()(const typename Kokkos::TeamPolicy::member_type& team) const { // GNU COMPILER BUG WORKAROUND -#if defined(KOKKOS_COMPILER_GNU) && !defined(__CUDA_ARCH__) && \ - !defined(__HIP_DEVICE_COMPILE__) +#if defined(KOKKOS_COMPILER_GNU) && !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) int i = team.league_rank(); #else const int i = team.league_rank(); @@ -110,8 +106,7 @@ struct ParamTag { typedef D diag; }; -template +template struct Functor_TestBatchedSerialTrmm { using execution_space = typename DeviceType::execution_space; ViewType _a, _b; @@ -119,8 +114,7 @@ struct Functor_TestBatchedSerialTrmm { ScalarType _alpha; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedSerialTrmm(const ScalarType alpha, const ViewType& a, - const ViewType& b) + Functor_TestBatchedSerialTrmm(const ScalarType alpha, const ViewType& a, const ViewType& b) : _a(a), _b(b), _alpha(alpha) {} KOKKOS_INLINE_FUNCTION @@ -128,9 +122,8 @@ struct Functor_TestBatchedSerialTrmm { auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); auto bb = Kokkos::subview(_b, k, Kokkos::ALL(), Kokkos::ALL()); - SerialTrmm::invoke(_alpha, aa, bb); + SerialTrmm::invoke(_alpha, aa, bb); } inline void run() { @@ -145,10 +138,8 @@ struct Functor_TestBatchedSerialTrmm { } }; -template -void impl_test_batched_trmm(const int N, const int nRows, const int nCols, - const char* trans) { +template +void impl_test_batched_trmm(const int N, const int nRows, const int nCols, const char* trans) { typedef typename ViewType::value_type value_type; typedef typename DeviceType::execution_space execution_space; typedef Kokkos::ArithTraits ats; @@ -156,56 +147,40 @@ void impl_test_batched_trmm(const int N, const int nRows, const int nCols, ScalarType alpha(1.0); ScalarType beta(0.0); - const bool is_side_right = - std::is_same::value; - const bool is_A_lower = - std::is_same::value; - const int K = is_side_right ? nCols : nRows; - ViewType A("A", N, K, K), B_actual("B_actual", N, nRows, nCols), - B_expected("B_expected", N, nRows, nCols); - typename ViewType::HostMirror A_host = Kokkos::create_mirror_view(A); - typename ViewType::HostMirror B_actual_host = - Kokkos::create_mirror_view(B_actual); - typename ViewType::HostMirror B_expected_host = - Kokkos::create_mirror_view(B_expected); - uint64_t seed = - std::chrono::high_resolution_clock::now().time_since_epoch().count(); - - using ViewTypeSubA = - decltype(Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL())); - using ViewTypeSubB = - decltype(Kokkos::subview(B_actual, 0, Kokkos::ALL(), Kokkos::ALL())); + const bool is_side_right = std::is_same::value; + const bool is_A_lower = std::is_same::value; + const int K = is_side_right ? nCols : nRows; + ViewType A("A", N, K, K), B_actual("B_actual", N, nRows, nCols), B_expected("B_expected", N, nRows, nCols); + typename ViewType::HostMirror A_host = Kokkos::create_mirror_view(A); + typename ViewType::HostMirror B_actual_host = Kokkos::create_mirror_view(B_actual); + typename ViewType::HostMirror B_expected_host = Kokkos::create_mirror_view(B_expected); + uint64_t seed = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + + using ViewTypeSubA = decltype(Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL())); + using ViewTypeSubB = decltype(Kokkos::subview(B_actual, 0, Kokkos::ALL(), Kokkos::ALL())); Kokkos::Random_XorShift64_Pool rand_pool(seed); if (std::is_same::value) { // Initialize A with deterministic random numbers - Kokkos::fill_random(A, rand_pool, - Kokkos::rand, - ScalarType>::max()); + Kokkos::fill_random(A, rand_pool, Kokkos::rand, ScalarType>::max()); using functor_type = UnitDiagTRMM; for (int k = 0; k < N; ++k) { functor_type udtrmm(Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL())); // Initialize As diag with 1s - Kokkos::parallel_for("KokkosBlas::Test::UnitDiagTRMM", - Kokkos::RangePolicy(0, K), udtrmm); + Kokkos::parallel_for("KokkosBlas::Test::UnitDiagTRMM", Kokkos::RangePolicy(0, K), udtrmm); } } else { //(diag[0]=='N')||(diag[0]=='n') // Initialize A with random numbers - Kokkos::fill_random(A, rand_pool, - Kokkos::rand, - ScalarType>::max()); + Kokkos::fill_random(A, rand_pool, Kokkos::rand, ScalarType>::max()); using functor_type = NonUnitDiagTRMM; for (int k = 0; k < N; ++k) { functor_type nudtrmm(Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL())); // Initialize As diag with A(i,i)+10 - Kokkos::parallel_for("KokkosBlas::Test::NonUnitDiagTRMM", - Kokkos::RangePolicy(0, K), nudtrmm); + Kokkos::parallel_for("KokkosBlas::Test::NonUnitDiagTRMM", Kokkos::RangePolicy(0, K), nudtrmm); } } - Kokkos::fill_random(B_actual, rand_pool, - Kokkos::rand, - ScalarType>::max()); + Kokkos::fill_random(B_actual, rand_pool, Kokkos::rand, ScalarType>::max()); Kokkos::fence(); Kokkos::deep_copy(B_expected, B_actual); @@ -227,9 +202,7 @@ void impl_test_batched_trmm(const int N, const int nRows, const int nCols, if (!is_side_right) { // B_expected = alpha * op(A) * B + beta * C = 1 * op(A) * B + 0 * C - struct VanillaGEMM - vgemm; + struct VanillaGEMM vgemm; vgemm.A_t = (trans[0] != 'N') && (trans[0] != 'n'); vgemm.B_t = false; vgemm.A_c = (trans[0] == 'C') || (trans[0] == 'c'); @@ -244,15 +217,12 @@ void impl_test_batched_trmm(const int N, const int nRows, const int nCols, ; vgemm.C = Kokkos::subview(B_expected, i, Kokkos::ALL(), Kokkos::ALL()); ; - Kokkos::parallel_for( - "KokkosBlas::Test::VanillaGEMM", - Kokkos::TeamPolicy(nRows, Kokkos::AUTO, 16), vgemm); + Kokkos::parallel_for("KokkosBlas::Test::VanillaGEMM", + Kokkos::TeamPolicy(nRows, Kokkos::AUTO, 16), vgemm); } } else { // B_expected = alpha * B * op(A) + beta * C = 1 * B * op(A) + 0 * C - struct VanillaGEMM - vgemm; + struct VanillaGEMM vgemm; vgemm.A_t = false; vgemm.B_t = (trans[0] != 'N') && (trans[0] != 'n'); vgemm.A_c = false; @@ -267,14 +237,13 @@ void impl_test_batched_trmm(const int N, const int nRows, const int nCols, ; vgemm.C = Kokkos::subview(B_expected, i, Kokkos::ALL(), Kokkos::ALL()); ; - Kokkos::parallel_for( - "KokkosBlas::Test::VanillaGEMM", - Kokkos::TeamPolicy(nRows, Kokkos::AUTO, 16), vgemm); + Kokkos::parallel_for("KokkosBlas::Test::VanillaGEMM", + Kokkos::TeamPolicy(nRows, Kokkos::AUTO, 16), vgemm); } } - Functor_TestBatchedSerialTrmm(alpha, A, B_actual) + Functor_TestBatchedSerialTrmm(alpha, A, + B_actual) .run(); Kokkos::fence(); @@ -308,50 +277,35 @@ void impl_test_batched_trmm(const int N, const int nRows, const int nCols, } // namespace Trmm } // namespace Test -template +template int test_batched_trmm(int batchSize = 512) { - char trans = - std::is_same::value - ? 'N' - : std::is_same::value - ? 'T' - : std::is_same::value - ? 'C' - : 'E'; + char trans = std::is_same::value ? 'N' + : std::is_same::value ? 'T' + : std::is_same::value ? 'C' + : 'E'; #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { typedef Kokkos::View ViewType; - Test::Trmm::impl_test_batched_trmm(0, 10, 4, - &trans); + Test::Trmm::impl_test_batched_trmm(0, 10, 4, &trans); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::Trmm::impl_test_batched_trmm( - batchSize, i, 4, &trans); - Test::Trmm::impl_test_batched_trmm( - batchSize, i, 1, &trans); + Test::Trmm::impl_test_batched_trmm(batchSize, i, 4, + &trans); + Test::Trmm::impl_test_batched_trmm(batchSize, i, 1, + &trans); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - Test::Trmm::impl_test_batched_trmm(0, 10, 4, - &trans); + typedef Kokkos::View ViewType; + Test::Trmm::impl_test_batched_trmm(0, 10, 4, &trans); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutRight, Blksize %d\n", i); - Test::Trmm::impl_test_batched_trmm( - batchSize, i, 4, &trans); - Test::Trmm::impl_test_batched_trmm( - batchSize, i, 1, &trans); + Test::Trmm::impl_test_batched_trmm(batchSize, i, 4, + &trans); + Test::Trmm::impl_test_batched_trmm(batchSize, i, 1, + &trans); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrmm_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrmm_Complex.hpp index 8ab6e2810c34..2d9eab7c4cf9 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrmm_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrmm_Complex.hpp @@ -17,353 +17,227 @@ #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) // NO TRANSPOSE TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_nt_u_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_nt_n_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_nt_u_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_nt_n_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_nt_u_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_nt_n_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } // TRANSPOSE TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_t_u_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_t_n_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_t_u_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_t_n_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_t_u_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_t_n_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } // CONJUGATE TRANSPOSE TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_ct_u_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_ct_n_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_ct_u_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_ct_n_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_ct_u_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_ct_n_scomplex_scomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) // NO TRANSPOSE TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_nt_u_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_nt_n_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_nt_u_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_nt_n_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_nt_u_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_nt_n_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } // TRANSPOSE TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_t_u_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_t_n_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_t_u_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_t_n_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_t_u_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_t_n_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } // CONJUGATE TRANSPOSE TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_ct_u_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_ct_n_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_ct_u_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_ct_n_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_ct_u_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_ct_n_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trmm, Kokkos::complex, param_tag_type, algo_tag_type>(128); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrmm_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrmm_Real.hpp index 1cfc259dd35e..10a4f38ed24c 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrmm_Real.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrmm_Real.hpp @@ -17,147 +17,111 @@ #if defined(KOKKOSKERNELS_INST_FLOAT) // NO TRANSPOSE TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_nt_u_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_nt_n_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_nt_u_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_nt_n_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_nt_u_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_nt_n_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } // TRANSPOSE TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_t_u_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_t_n_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_t_u_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_t_n_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_t_u_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_t_n_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } // CONJUGATE TRANSPOSE TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_ct_u_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_ct_n_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_ct_u_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_ct_n_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_ct_u_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_ct_n_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; test_batched_trmm(); @@ -167,167 +131,113 @@ TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_ct_n_float_float) { #if defined(KOKKOSKERNELS_INST_DOUBLE) // NO TRANSPOSE TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_nt_u_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_nt_n_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_nt_u_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_nt_n_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_nt_u_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_nt_n_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } // TRANSPOSE TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_t_u_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_t_n_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_t_u_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_t_n_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_t_u_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_t_n_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } // CONJUGATE TRANSPOSE TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_ct_u_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_l_ct_n_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_ct_u_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_l_u_ct_n_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_ct_u_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } TEST_F(TestCategory, batched_scalar_serial_trmm_r_u_ct_n_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trmm::Unblocked algo_tag_type; - test_batched_trmm(); + test_batched_trmm(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsm.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsm.hpp index f9418a804ab6..62f4b4de6979 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsm.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsm.hpp @@ -37,8 +37,7 @@ struct ParamTag { typedef D diag; }; -template +template struct Functor_TestBatchedSerialTrsm { using execution_space = typename DeviceType::execution_space; ViewType _a, _b; @@ -46,8 +45,7 @@ struct Functor_TestBatchedSerialTrsm { ScalarType _alpha; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedSerialTrsm(const ScalarType alpha, const ViewType &a, - const ViewType &b) + Functor_TestBatchedSerialTrsm(const ScalarType alpha, const ViewType &a, const ViewType &b) : _a(a), _b(b), _alpha(alpha) {} KOKKOS_INLINE_FUNCTION @@ -55,9 +53,8 @@ struct Functor_TestBatchedSerialTrsm { auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); auto bb = Kokkos::subview(_b, k, Kokkos::ALL(), Kokkos::ALL()); - SerialTrsm::invoke(_alpha, aa, bb); + SerialTrsm::invoke(_alpha, aa, bb); } inline void run() { @@ -72,8 +69,7 @@ struct Functor_TestBatchedSerialTrsm { } }; -template +template void impl_test_batched_trsm(const int N, const int BlkSize, const int NumCols) { typedef typename ViewType::value_type value_type; typedef Kokkos::ArithTraits ats; @@ -81,15 +77,13 @@ void impl_test_batched_trsm(const int N, const int BlkSize, const int NumCols) { /// randomized input testing views ScalarType alpha(1.0); - const bool is_side_right = - std::is_same::value; - const int b_nrows = is_side_right ? NumCols : BlkSize; - const int b_ncols = is_side_right ? BlkSize : NumCols; - ViewType a0("a0", N, BlkSize, BlkSize), a1("a1", N, BlkSize, BlkSize), - b0("b0", N, b_nrows, b_ncols), b1("b1", N, b_nrows, b_ncols); + const bool is_side_right = std::is_same::value; + const int b_nrows = is_side_right ? NumCols : BlkSize; + const int b_ncols = is_side_right ? BlkSize : NumCols; + ViewType a0("a0", N, BlkSize, BlkSize), a1("a1", N, BlkSize, BlkSize), b0("b0", N, b_nrows, b_ncols), + b1("b1", N, b_nrows, b_ncols); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a0, random, value_type(1.0)); Kokkos::fill_random(b0, random, value_type(1.0)); @@ -98,12 +92,9 @@ void impl_test_batched_trsm(const int N, const int BlkSize, const int NumCols) { Kokkos::deep_copy(a1, a0); Kokkos::deep_copy(b1, b0); - Functor_TestBatchedSerialTrsm(alpha, a0, b0) - .run(); - Functor_TestBatchedSerialTrsm(alpha, a1, b1) + Functor_TestBatchedSerialTrsm(alpha, a0, b0) .run(); + Functor_TestBatchedSerialTrsm(alpha, a1, b1).run(); Kokkos::fence(); @@ -130,36 +121,27 @@ void impl_test_batched_trsm(const int N, const int BlkSize, const int NumCols) { } // namespace Trsm } // namespace Test -template +template int test_batched_trsm() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - ViewType; - Test::Trsm::impl_test_batched_trsm(0, 10, 4); + typedef Kokkos::View ViewType; + Test::Trsm::impl_test_batched_trsm(0, 10, 4); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::Trsm::impl_test_batched_trsm(1024, i, 4); - Test::Trsm::impl_test_batched_trsm(1024, i, 1); + Test::Trsm::impl_test_batched_trsm(1024, i, 4); + Test::Trsm::impl_test_batched_trsm(1024, i, 1); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - Test::Trsm::impl_test_batched_trsm(0, 10, 4); + typedef Kokkos::View ViewType; + Test::Trsm::impl_test_batched_trsm(0, 10, 4); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutRight, Blksize %d\n", i); - Test::Trsm::impl_test_batched_trsm(1024, i, 4); - Test::Trsm::impl_test_batched_trsm(1024, i, 1); + Test::Trsm::impl_test_batched_trsm(1024, i, 4); + Test::Trsm::impl_test_batched_trsm(1024, i, 1); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsm_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsm_Complex.hpp index be0005a74ce9..d034ba1a539f 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsm_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsm_Complex.hpp @@ -16,28 +16,19 @@ #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_nt_u_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_nt_n_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_nt_u_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } // TEST_F( TestCategory, batched_scalar_serial_trsm_l_u_nt_n_dcomplex_dcomplex ) // { @@ -47,45 +38,30 @@ TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_nt_u_dcomplex_dcomplex) { // test_batched_trsm,Kokkos::complex,param_tag_type,algo_tag_type>(); // } TEST_F(TestCategory, batched_scalar_serial_trsm_r_u_nt_u_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsm_r_u_nt_n_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } // TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_t_u_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_t_n_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_t_u_dcomplex_dcomplex) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } // TEST_F( TestCategory, batched_scalar_serial_trsm_l_u_t_n_dcomplex_dcomplex ) // { @@ -96,28 +72,19 @@ TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_t_u_dcomplex_dcomplex) { // } TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_nt_u_dcomplex_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, double, param_tag_type, - algo_tag_type>(); + test_batched_trsm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_nt_n_dcomplex_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, double, param_tag_type, - algo_tag_type>(); + test_batched_trsm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_nt_u_dcomplex_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, double, param_tag_type, - algo_tag_type>(); + test_batched_trsm, double, param_tag_type, algo_tag_type>(); } // TEST_F( TestCategory, batched_scalar_serial_trsm_l_u_nt_n_dcomplex_double ) { // typedef @@ -126,45 +93,30 @@ TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_nt_u_dcomplex_double) { // test_batched_trsm,double,param_tag_type,algo_tag_type>(); // } TEST_F(TestCategory, batched_scalar_serial_trsm_r_u_nt_u_dcomplex_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, double, param_tag_type, - algo_tag_type>(); + test_batched_trsm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsm_r_u_nt_n_dcomplex_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, double, param_tag_type, - algo_tag_type>(); + test_batched_trsm, double, param_tag_type, algo_tag_type>(); } // TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_t_u_dcomplex_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, double, param_tag_type, - algo_tag_type>(); + test_batched_trsm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_t_n_dcomplex_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, double, param_tag_type, - algo_tag_type>(); + test_batched_trsm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_t_u_dcomplex_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm, double, param_tag_type, - algo_tag_type>(); + test_batched_trsm, double, param_tag_type, algo_tag_type>(); } // TEST_F( TestCategory, batched_scalar_serial_trsm_l_u_t_n_dcomplex_double ) { // typedef diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsm_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsm_Real.hpp index 18b10a81e684..44cb80226337 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsm_Real.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsm_Real.hpp @@ -16,73 +16,53 @@ #if defined(KOKKOSKERNELS_INST_FLOAT) TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_nt_u_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_nt_n_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_nt_u_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_nt_n_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_r_u_nt_u_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_r_u_nt_n_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; test_batched_trsm(); } // TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_t_u_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_t_n_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_t_u_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_t_n_float_float) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; test_batched_trsm(); } @@ -90,84 +70,54 @@ TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_t_n_float_float) { #if defined(KOKKOSKERNELS_INST_DOUBLE) TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_nt_u_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm(); + test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_nt_n_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm(); + test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_nt_u_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm(); + test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_nt_n_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm(); + test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_r_u_nt_u_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm(); + test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_r_u_nt_n_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm(); + test_batched_trsm(); } // TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_t_u_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm(); + test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_l_t_n_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm(); + test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_t_u_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm(); + test_batched_trsm(); } TEST_F(TestCategory, batched_scalar_serial_trsm_l_u_t_n_double_double) { - typedef ::Test::Trmm::ParamTag - param_tag_type; + typedef ::Test::Trmm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_trsm(); + test_batched_trsm(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsv.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsv.hpp index 512dce3bce94..c0a7de9e9999 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsv.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsv.hpp @@ -22,7 +22,7 @@ #include "KokkosBatched_Util.hpp" #include "KokkosBatched_Trsv_Decl.hpp" -//#include "KokkosKernels_TestUtils.hpp" +// #include "KokkosKernels_TestUtils.hpp" using namespace KokkosBatched; @@ -36,8 +36,7 @@ struct ParamTag { typedef D diag; }; -template +template struct Functor_TestBatchedSerialTrsv { using execution_space = typename DeviceType::execution_space; ViewType _a, _b; @@ -45,8 +44,7 @@ struct Functor_TestBatchedSerialTrsv { ScalarType _alpha; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedSerialTrsv(const ScalarType alpha, const ViewType &a, - const ViewType &b) + Functor_TestBatchedSerialTrsv(const ScalarType alpha, const ViewType &a, const ViewType &b) : _a(a), _b(b), _alpha(alpha) {} KOKKOS_INLINE_FUNCTION @@ -54,9 +52,8 @@ struct Functor_TestBatchedSerialTrsv { auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); auto bb = Kokkos::subview(_b, k, Kokkos::ALL(), 0); - SerialTrsv::invoke(_alpha, aa, - bb); + SerialTrsv::invoke(_alpha, aa, bb); } inline void run() { @@ -71,8 +68,7 @@ struct Functor_TestBatchedSerialTrsv { } }; -template +template void impl_test_batched_trsv(const int N, const int BlkSize) { typedef typename ViewType::value_type value_type; typedef Kokkos::ArithTraits ats; @@ -80,11 +76,10 @@ void impl_test_batched_trsv(const int N, const int BlkSize) { /// randomized input testing views ScalarType alpha(1.5); - ViewType a0("a0", N, BlkSize, BlkSize), a1("a1", N, BlkSize, BlkSize), - b0("b0", N, BlkSize, 1), b1("b1", N, BlkSize, 1); + ViewType a0("a0", N, BlkSize, BlkSize), a1("a1", N, BlkSize, BlkSize), b0("b0", N, BlkSize, 1), + b1("b1", N, BlkSize, 1); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a0, random, value_type(1.0)); Kokkos::fill_random(b0, random, value_type(1.0)); @@ -95,12 +90,9 @@ void impl_test_batched_trsv(const int N, const int BlkSize) { Kokkos::deep_copy(a1, a0); Kokkos::deep_copy(b1, b0); - Functor_TestBatchedSerialTrsv(alpha, a0, b0) - .run(); - Functor_TestBatchedSerialTrsv(alpha, a1, b1) + Functor_TestBatchedSerialTrsv(alpha, a0, b0) .run(); + Functor_TestBatchedSerialTrsv(alpha, a1, b1).run(); Kokkos::fence(); @@ -120,16 +112,14 @@ void impl_test_batched_trsv(const int N, const int BlkSize) { /// check b0 and b1 are correct const value_type one(1); - const bool is_unit_diag = - std::is_same::value; + const bool is_unit_diag = std::is_same::value; for (int k = 0; k < N; ++k) { if (std::is_same::value) { if (std::is_same::value) { for (int i = 0; i < BlkSize; ++i) { value_type tmp(0); for (int j = 0; j <= i; ++j) { - const value_type aval = - (i == j && is_unit_diag ? one : a0_host(k, i, j)); + const value_type aval = (i == j && is_unit_diag ? one : a0_host(k, i, j)); const value_type bval = b0_host(k, j, 0); tmp += aval * bval; } @@ -138,20 +128,17 @@ void impl_test_batched_trsv(const int N, const int BlkSize) { for (int i = 0; i < BlkSize; ++i) { value_type tmp(0); for (int j = 0; j <= i; ++j) { - const value_type aval = - (i == j && is_unit_diag ? one : a0_host(k, i, j)); + const value_type aval = (i == j && is_unit_diag ? one : a0_host(k, i, j)); const value_type bval = b1_host(k, j, 0); tmp += aval * bval; } EXPECT_NEAR(ats::abs(tmp), ats::abs(alpha), eps); } - } else if (std::is_same::value) { + } else if (std::is_same::value) { for (int i = 0; i < BlkSize; ++i) { value_type tmp(0); for (int j = i; j < BlkSize; ++j) { - const value_type aval = - (i == j && is_unit_diag ? one : a0_host(k, i, j)); + const value_type aval = (i == j && is_unit_diag ? one : a0_host(k, i, j)); const value_type bval = b0_host(k, j, 0); tmp += aval * bval; } @@ -160,8 +147,7 @@ void impl_test_batched_trsv(const int N, const int BlkSize) { for (int i = 0; i < BlkSize; ++i) { value_type tmp(0); for (int j = i; j < BlkSize; ++j) { - const value_type aval = - (i == j && is_unit_diag ? one : a0_host(k, i, j)); + const value_type aval = (i == j && is_unit_diag ? one : a0_host(k, i, j)); const value_type bval = b1_host(k, j, 0); tmp += aval * bval; } @@ -183,15 +169,12 @@ void impl_test_batched_trsv(const int N, const int BlkSize) { } // namespace Trsv } // namespace Test -template +template int test_batched_trsv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - ViewType; - Test::Trsv::impl_test_batched_trsv(0, 10); + typedef Kokkos::View ViewType; + Test::Trsv::impl_test_batched_trsv(0, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d, Uplo %d, Trans %d, Diag // %d\n", @@ -200,17 +183,14 @@ int test_batched_trsv() { // std::is_same::value, std::is_same::value); - Test::Trsv::impl_test_batched_trsv(1, i); + Test::Trsv::impl_test_batched_trsv(1, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - Test::Trsv::impl_test_batched_trsv(0, 10); + typedef Kokkos::View ViewType; + Test::Trsv::impl_test_batched_trsv(0, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutRight, Blksize %d, Uplo %d, Trans %d, Diag // %d\n", @@ -219,8 +199,7 @@ int test_batched_trsv() { // std::is_same::value, std::is_same::value); - Test::Trsv::impl_test_batched_trsv(1, i); + Test::Trsv::impl_test_batched_trsv(1, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsv_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsv_Complex.hpp index a524b9f97ea1..73f0e65ed943 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsv_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsv_Complex.hpp @@ -16,60 +16,44 @@ #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) TEST_F(TestCategory, batched_scalar_serial_trsv_l_nt_u_dcomplex_dcomplex) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; - test_batched_trsv, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_trsv, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsv_l_nt_n_dcomplex_dcomplex) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; - test_batched_trsv, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_trsv, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsv_u_nt_u_dcomplex_dcomplex) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; - test_batched_trsv, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_trsv, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsv_u_nt_n_dcomplex_dcomplex) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; - test_batched_trsv, - Kokkos::complex, param_tag_type, algo_tag_type>(); + test_batched_trsv, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsv_l_nt_u_dcomplex_double) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; - test_batched_trsv, double, param_tag_type, - algo_tag_type>(); + test_batched_trsv, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsv_l_nt_n_dcomplex_double) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; - test_batched_trsv, double, param_tag_type, - algo_tag_type>(); + test_batched_trsv, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsv_u_nt_u_dcomplex_double) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; - test_batched_trsv, double, param_tag_type, - algo_tag_type>(); + test_batched_trsv, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_serial_trsv_u_nt_n_dcomplex_double) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; - test_batched_trsv, double, param_tag_type, - algo_tag_type>(); + test_batched_trsv, double, param_tag_type, algo_tag_type>(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsv_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsv_Real.hpp index be1bf77b9ee6..599823260566 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsv_Real.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrsv_Real.hpp @@ -16,26 +16,22 @@ #if defined(KOKKOSKERNELS_INST_FLOAT) TEST_F(TestCategory, batched_scalar_serial_trsv_l_nt_u_float_float) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; test_batched_trsv(); } TEST_F(TestCategory, batched_scalar_serial_trsv_l_nt_n_float_float) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; test_batched_trsv(); } TEST_F(TestCategory, batched_scalar_serial_trsv_u_nt_u_float_float) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; test_batched_trsv(); } TEST_F(TestCategory, batched_scalar_serial_trsv_u_nt_n_float_float) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; test_batched_trsv(); } @@ -43,31 +39,23 @@ TEST_F(TestCategory, batched_scalar_serial_trsv_u_nt_n_float_float) { #if defined(KOKKOSKERNELS_INST_DOUBLE) TEST_F(TestCategory, batched_scalar_serial_trsv_l_nt_u_double_double) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; - test_batched_trsv(); + test_batched_trsv(); } TEST_F(TestCategory, batched_scalar_serial_trsv_l_nt_n_double_double) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; - test_batched_trsv(); + test_batched_trsv(); } TEST_F(TestCategory, batched_scalar_serial_trsv_u_nt_u_double_double) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; - test_batched_trsv(); + test_batched_trsv(); } TEST_F(TestCategory, batched_scalar_serial_trsv_u_nt_n_double_double) { - typedef ::Test::Trsv::ParamTag - param_tag_type; + typedef ::Test::Trsv::ParamTag param_tag_type; typedef Algo::Trsv::Blocked algo_tag_type; - test_batched_trsv(); + test_batched_trsv(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrtri.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrtri.hpp index b09cadcb7e83..c4acbbfafbb5 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrtri.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrtri.hpp @@ -51,8 +51,7 @@ struct NonUnitDiagTRTRI { KOKKOS_INLINE_FUNCTION void operator()(const int& i) const { A_(i, i) = A_(i, i) + 10; } }; -template +template struct VanillaGEMM { bool A_t, B_t, A_c, B_c; int N, K; @@ -69,12 +68,9 @@ struct VanillaGEMM { ScalarC beta; KOKKOS_INLINE_FUNCTION - void operator()( - const typename Kokkos::TeamPolicy::member_type& team) - const { + void operator()(const typename Kokkos::TeamPolicy::member_type& team) const { // GNU COMPILER BUG WORKAROUND -#if defined(KOKKOS_COMPILER_GNU) && !defined(__CUDA_ARCH__) && \ - !defined(__HIP_DEVICE_COMPILE__) +#if defined(KOKKOS_COMPILER_GNU) && !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) int i = team.league_rank(); #else const int i = team.league_rank(); @@ -110,8 +106,7 @@ struct ParamTag { typedef D diag; }; -template +template struct Functor_TestBatchedSerialTrtri { using execution_space = typename DeviceType::execution_space; ViewType _a; @@ -123,8 +118,7 @@ struct Functor_TestBatchedSerialTrtri { void operator()(const ParamTagType&, const int k) const { auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); - SerialTrtri::invoke(aa); + SerialTrtri::invoke(aa); } inline void run() { @@ -139,8 +133,7 @@ struct Functor_TestBatchedSerialTrtri { } }; -template +template void impl_test_batched_trtri(const int N, const int K) { typedef typename ViewType::value_type value_type; typedef typename DeviceType::execution_space execution_space; @@ -155,8 +148,7 @@ void impl_test_batched_trtri(const int N, const int K) { bool fail_flag = false; ScalarType cur_check_val; // Either 1 or 0, to check A_I - const bool is_A_lower = - std::is_same::value; + const bool is_A_lower = std::is_same::value; ViewType A("A", N, K, K); ViewType A_original("A_original", N, K, K); ViewType A_I("A_I", N, K, K); @@ -164,39 +156,29 @@ void impl_test_batched_trtri(const int N, const int K) { typename ViewType::HostMirror I_host = Kokkos::create_mirror_view(A_I); typename ViewType::HostMirror A_host = Kokkos::create_mirror_view(A); - uint64_t seed = - std::chrono::high_resolution_clock::now().time_since_epoch().count(); + uint64_t seed = std::chrono::high_resolution_clock::now().time_since_epoch().count(); - using ViewTypeSubA = - decltype(Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL())); + using ViewTypeSubA = decltype(Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL())); Kokkos::Random_XorShift64_Pool rand_pool(seed); if (std::is_same::value) { // Initialize A with deterministic random numbers - Kokkos::fill_random(A, rand_pool, - Kokkos::rand, - ScalarType>::max()); + Kokkos::fill_random(A, rand_pool, Kokkos::rand, ScalarType>::max()); using functor_type = UnitDiagTRTRI; for (int k = 0; k < N; ++k) { functor_type udtrtri(Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL())); // Initialize As diag with 1s - Kokkos::parallel_for("KokkosBlas::Test::UnitDiagTRTRI", - Kokkos::RangePolicy(0, K), udtrtri); + Kokkos::parallel_for("KokkosBlas::Test::UnitDiagTRTRI", Kokkos::RangePolicy(0, K), udtrtri); } } else { //(diag[0]=='N')||(diag[0]=='n') // Initialize A with random numbers - Kokkos::fill_random(A, rand_pool, - Kokkos::rand, - ScalarType>::max()); + Kokkos::fill_random(A, rand_pool, Kokkos::rand, ScalarType>::max()); using functor_type = NonUnitDiagTRTRI; for (int k = 0; k < N; ++k) { - functor_type nudtrtri( - Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL())); + functor_type nudtrtri(Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL())); // Initialize As diag with A(i,i)+10 - Kokkos::parallel_for("KokkosBlas::Test::NonUnitDiagTRTRI", - Kokkos::RangePolicy(0, K), - nudtrtri); + Kokkos::parallel_for("KokkosBlas::Test::NonUnitDiagTRTRI", Kokkos::RangePolicy(0, K), nudtrtri); } } Kokkos::fence(); @@ -241,9 +223,7 @@ void impl_test_batched_trtri(const int N, const int K) { } #endif - Functor_TestBatchedSerialTrtri(A) - .run(); + Functor_TestBatchedSerialTrtri(A).run(); #if PRINT_MAT printf("A_original:\n"); @@ -271,8 +251,7 @@ void impl_test_batched_trtri(const int N, const int K) { Kokkos::fence(); - struct VanillaGEMM - vgemm; + struct VanillaGEMM vgemm; vgemm.A_t = false; vgemm.B_t = false; vgemm.A_c = false; @@ -287,9 +266,8 @@ void impl_test_batched_trtri(const int N, const int K) { ; vgemm.C = Kokkos::subview(A_I, i, Kokkos::ALL(), Kokkos::ALL()); ; - Kokkos::parallel_for( - "KokkosBlas::Test::VanillaGEMM", - Kokkos::TeamPolicy(K, Kokkos::AUTO, 16), vgemm); + Kokkos::parallel_for("KokkosBlas::Test::VanillaGEMM", Kokkos::TeamPolicy(K, Kokkos::AUTO, 16), + vgemm); } Kokkos::fence(); @@ -311,8 +289,7 @@ void impl_test_batched_trtri(const int N, const int K) { for (int k = 0; k < N; ++k) { for (int i = 0; i < K; ++i) { for (int j = 0; j < K; ++j) { - cur_check_val = - (i == j) ? ScalarType(1) : ScalarType(0); // ats::abs(host_A(i,j)); + cur_check_val = (i == j) ? ScalarType(1) : ScalarType(0); // ats::abs(host_A(i,j)); if (ats::abs(ats::abs(I_host(k, i, j)) - cur_check_val) > eps) { fail_flag = true; // printf(" Error: eps ( %g ), I_host ( %.15f ) != cur_check_val @@ -329,41 +306,29 @@ void impl_test_batched_trtri(const int N, const int K) { } // namespace Trtri } // namespace Test -template +template int test_batched_trtri(int batchSize = 512) { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { typedef Kokkos::View ViewType; - Test::Trtri::impl_test_batched_trtri(0, 10); + Test::Trtri::impl_test_batched_trtri(0, 10); // Test::impl_test_batched_trtri( // 1, 2); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::Trtri::impl_test_batched_trtri(batchSize, - i); - Test::Trtri::impl_test_batched_trtri(batchSize, - i); + Test::Trtri::impl_test_batched_trtri(batchSize, i); + Test::Trtri::impl_test_batched_trtri(batchSize, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - Test::Trtri::impl_test_batched_trtri(0, 10); + typedef Kokkos::View ViewType; + Test::Trtri::impl_test_batched_trtri(0, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutRight, Blksize %d\n", i); - Test::Trtri::impl_test_batched_trtri(batchSize, - i); - Test::Trtri::impl_test_batched_trtri(batchSize, - i); + Test::Trtri::impl_test_batched_trtri(batchSize, i); + Test::Trtri::impl_test_batched_trtri(batchSize, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrtri_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrtri_Complex.hpp index 0d8f2c72a69f..ca5575c99f6d 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrtri_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrtri_Complex.hpp @@ -20,29 +20,25 @@ TEST_F(TestCategory, batched_scalar_serial_trtri_u_n_scomplex_scomplex) { typedef ::Test::Trtri::ParamTag param_tag_type; typedef Algo::Trtri::Unblocked algo_tag_type; - test_batched_trtri, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trtri, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trtri_u_u_scomplex_scomplex) { typedef ::Test::Trtri::ParamTag param_tag_type; typedef Algo::Trtri::Unblocked algo_tag_type; - test_batched_trtri, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trtri, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trtri_l_n_scomplex_scomplex) { typedef ::Test::Trtri::ParamTag param_tag_type; typedef Algo::Trtri::Unblocked algo_tag_type; - test_batched_trtri, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trtri, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trtri_l_u_scomplex_scomplex) { typedef ::Test::Trtri::ParamTag param_tag_type; typedef Algo::Trtri::Unblocked algo_tag_type; - test_batched_trtri, Kokkos::complex, - param_tag_type, algo_tag_type>(128); + test_batched_trtri, Kokkos::complex, param_tag_type, algo_tag_type>(128); } #endif @@ -52,32 +48,24 @@ TEST_F(TestCategory, batched_scalar_serial_trtri_u_n_dcomplex_dcomplex) { typedef ::Test::Trtri::ParamTag param_tag_type; typedef Algo::Trtri::Unblocked algo_tag_type; - test_batched_trtri, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trtri, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trtri_u_u_dcomplex_dcomplex) { typedef ::Test::Trtri::ParamTag param_tag_type; typedef Algo::Trtri::Unblocked algo_tag_type; - test_batched_trtri, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trtri, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trtri_l_n_dcomplex_dcomplex) { typedef ::Test::Trtri::ParamTag param_tag_type; typedef Algo::Trtri::Unblocked algo_tag_type; - test_batched_trtri, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trtri, Kokkos::complex, param_tag_type, algo_tag_type>(128); } TEST_F(TestCategory, batched_scalar_serial_trtri_l_u_dcomplex_dcomplex) { typedef ::Test::Trtri::ParamTag param_tag_type; typedef Algo::Trtri::Unblocked algo_tag_type; - test_batched_trtri, - Kokkos::complex, param_tag_type, algo_tag_type>( - 128); + test_batched_trtri, Kokkos::complex, param_tag_type, algo_tag_type>(128); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrtri_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrtri_Real.hpp index 952994d207cf..66fcd162ab67 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrtri_Real.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialTrtri_Real.hpp @@ -48,28 +48,24 @@ TEST_F(TestCategory, batched_scalar_serial_trtri_u_n_double_double) { typedef ::Test::Trtri::ParamTag param_tag_type; typedef Algo::Trtri::Unblocked algo_tag_type; - test_batched_trtri(); + test_batched_trtri(); } TEST_F(TestCategory, batched_scalar_serial_trtri_u_u_double_double) { typedef ::Test::Trtri::ParamTag param_tag_type; typedef Algo::Trtri::Unblocked algo_tag_type; - test_batched_trtri(); + test_batched_trtri(); } TEST_F(TestCategory, batched_scalar_serial_trtri_l_n_double_double) { typedef ::Test::Trtri::ParamTag param_tag_type; typedef Algo::Trtri::Unblocked algo_tag_type; - test_batched_trtri(); + test_batched_trtri(); } TEST_F(TestCategory, batched_scalar_serial_trtri_l_u_double_double) { typedef ::Test::Trtri::ParamTag param_tag_type; typedef Algo::Trtri::Unblocked algo_tag_type; - test_batched_trtri(); + test_batched_trtri(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamAxpy.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamAxpy.hpp index b43b49860753..d33f83314648 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamAxpy.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamAxpy.hpp @@ -37,8 +37,7 @@ struct Functor_TestBatchedTeamAxpy { const int _N_team; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamAxpy(const alphaViewType &alpha, const ViewType &X, - const ViewType &Y, const int N_team) + Functor_TestBatchedTeamAxpy(const alphaViewType &alpha, const ViewType &X, const ViewType &Y, const int N_team) : _alpha(alpha), _X(X), _Y(Y), _N_team(N_team) {} template @@ -46,16 +45,12 @@ struct Functor_TestBatchedTeamAxpy { const int first_matrix = static_cast(member.league_rank()) * _N_team; const int N = _X.extent(0); const int last_matrix = - (static_cast(member.league_rank() + 1) * _N_team < N - ? static_cast(member.league_rank() + 1) * _N_team - : N); + (static_cast(member.league_rank() + 1) * _N_team < N ? static_cast(member.league_rank() + 1) * _N_team + : N); - auto alpha = - Kokkos::subview(_alpha, Kokkos::make_pair(first_matrix, last_matrix)); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto y = Kokkos::subview(_Y, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + auto alpha = Kokkos::subview(_alpha, Kokkos::make_pair(first_matrix, last_matrix)); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto y = Kokkos::subview(_Y, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); KokkosBatched::TeamAxpy::invoke(member, alpha, x, y); } @@ -66,8 +61,7 @@ struct Functor_TestBatchedTeamAxpy { const std::string name_value_type = Test::value_type_name(); std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); - Kokkos::TeamPolicy policy(_X.extent(0) / _N_team, - Kokkos::AUTO(), Kokkos::AUTO()); + Kokkos::TeamPolicy policy(_X.extent(0) / _N_team, Kokkos::AUTO(), Kokkos::AUTO()); Kokkos::parallel_for(name.c_str(), policy, *this); Kokkos::Profiling::popRegion(); } @@ -80,13 +74,11 @@ void impl_test_batched_axpy(const int N, const int BlkSize, const int N_team) { typedef typename alphaViewType::const_value_type alpha_const_value_type; typedef Kokkos::ArithTraits ats; - ViewType X0("x0", N, BlkSize), X1("x1", N, BlkSize), Y0("y0", N, BlkSize), - Y1("y1", N, BlkSize); + ViewType X0("x0", N, BlkSize), X1("x1", N, BlkSize), Y0("y0", N, BlkSize), Y1("y1", N, BlkSize); alphaViewType alpha("alpha", N); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(X0, random, const_value_type(1.0)); Kokkos::fill_random(Y0, random, const_value_type(1.0)); Kokkos::fill_random(alpha, random, alpha_const_value_type(1.0)); @@ -106,12 +98,9 @@ void impl_test_batched_axpy(const int N, const int BlkSize, const int N_team) { Kokkos::deep_copy(Y0_host, Y0); for (int l = 0; l < N; ++l) - for (int i = 0; i < BlkSize; ++i) - Y0_host(l, i) += alpha_host(l) * X0_host(l, i); + for (int i = 0; i < BlkSize; ++i) Y0_host(l, i) += alpha_host(l) * X0_host(l, i); - Functor_TestBatchedTeamAxpy(alpha, X1, - Y1, N_team) - .run(); + Functor_TestBatchedTeamAxpy(alpha, X1, Y1, N_team).run(); Kokkos::fence(); @@ -140,25 +129,20 @@ int test_batched_team_axpy() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { typedef Kokkos::View ViewType; - typedef Kokkos::View - alphaViewType; + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { - Test::TeamAxpy::impl_test_batched_axpy(1024, i, 2); + Test::TeamAxpy::impl_test_batched_axpy(1024, i, 2); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - typedef Kokkos::View - alphaViewType; + typedef Kokkos::View ViewType; + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { - Test::TeamAxpy::impl_test_batched_axpy(1024, i, 2); + Test::TeamAxpy::impl_test_batched_axpy(1024, i, 2); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamAxpy_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamAxpy_Complex.hpp index b95b769fcc2b..ba47fe739aa5 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamAxpy_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamAxpy_Complex.hpp @@ -16,8 +16,7 @@ #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) TEST_F(TestCategory, batched_scalar_team_axpy_nt_dcomplex_dcomplex) { - test_batched_team_axpy, - Kokkos::complex>(); + test_batched_team_axpy, Kokkos::complex>(); } TEST_F(TestCategory, batched_scalar_team_axpy_nt_dcomplex_double) { diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamAxpy_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamAxpy_Real.hpp index ac458d4a553f..1fcbae03d6d6 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamAxpy_Real.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamAxpy_Real.hpp @@ -15,9 +15,7 @@ //@HEADER #if defined(KOKKOSKERNELS_INST_FLOAT) -TEST_F(TestCategory, batched_scalar_team_axpy_nt_float_float) { - test_batched_team_axpy(); -} +TEST_F(TestCategory, batched_scalar_team_axpy_nt_float_float) { test_batched_team_axpy(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGemm.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGemm.hpp index 2d952889c900..f283da2b68f0 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGemm.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGemm.hpp @@ -19,7 +19,7 @@ #include "Kokkos_Core.hpp" #include "Kokkos_Random.hpp" -//#include "KokkosBatched_Vector.hpp" +// #include "KokkosBatched_Vector.hpp" #include "KokkosBatched_Gemm_Decl.hpp" #include "KokkosBatched_Gemm_Serial_Impl.hpp" @@ -38,8 +38,7 @@ struct ParamTag { typedef TB transB; }; -template +template struct Functor_TestBatchedTeamGemm { using execution_space = typename DeviceType::execution_space; ViewType _a, _b, _c; @@ -47,24 +46,20 @@ struct Functor_TestBatchedTeamGemm { ScalarType _alpha, _beta; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamGemm(const ScalarType alpha, const ViewType &a, - const ViewType &b, const ScalarType beta, + Functor_TestBatchedTeamGemm(const ScalarType alpha, const ViewType &a, const ViewType &b, const ScalarType beta, const ViewType &c) : _a(a), _b(b), _c(c), _alpha(alpha), _beta(beta) {} template - KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, const MemberType &member) const { const int k = member.league_rank(); auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); auto bb = Kokkos::subview(_b, k, Kokkos::ALL(), Kokkos::ALL()); auto cc = Kokkos::subview(_c, k, Kokkos::ALL(), Kokkos::ALL()); - KokkosBatched::TeamGemm::invoke(member, _alpha, aa, bb, _beta, - cc); + KokkosBatched::TeamGemm::invoke(member, _alpha, aa, bb, _beta, cc); } inline void run() { @@ -74,19 +69,15 @@ struct Functor_TestBatchedTeamGemm { std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _c.extent(0); - Kokkos::TeamPolicy policy(league_size, - Kokkos::AUTO); + Kokkos::TeamPolicy policy(league_size, Kokkos::AUTO); Kokkos::parallel_for(name.c_str(), policy, *this); Kokkos::Profiling::popRegion(); } }; -template -void impl_test_batched_teamgemm(const int N, const int matAdim1, - const int matAdim2, const int matBdim1, - const int matBdim2, const int matCdim1, - const int matCdim2) { +template +void impl_test_batched_teamgemm(const int N, const int matAdim1, const int matAdim2, const int matBdim1, + const int matBdim2, const int matCdim1, const int matCdim2) { using transA = typename ParamTagType::transA; using transB = typename ParamTagType::transB; using execution_space = typename DeviceType::execution_space; @@ -96,15 +87,11 @@ void impl_test_batched_teamgemm(const int N, const int matAdim1, /// randomized input testing views ScalarType alpha = ScalarType(1.5), beta = ScalarType(3.0); - ViewType a_expected("a_expected", N, matAdim1, matAdim2), - a_actual("a_actual", N, matAdim1, matAdim2), - b_expected("b_expected", N, matBdim1, matBdim2), - b_actual("b_actual", N, matBdim1, matBdim2), - c_expected("c_expected", N, matCdim1, matCdim2), - c_actual("c_actual", N, matCdim1, matCdim2); + ViewType a_expected("a_expected", N, matAdim1, matAdim2), a_actual("a_actual", N, matAdim1, matAdim2), + b_expected("b_expected", N, matBdim1, matBdim2), b_actual("b_actual", N, matBdim1, matBdim2), + c_expected("c_expected", N, matCdim1, matCdim2), c_actual("c_actual", N, matCdim1, matCdim2); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a_expected, random, value_type(1.0)); Kokkos::fill_random(b_expected, random, value_type(1.0)); @@ -116,8 +103,7 @@ void impl_test_batched_teamgemm(const int N, const int matAdim1, Kokkos::deep_copy(b_actual, b_expected); Kokkos::deep_copy(c_actual, c_expected); - Functor_BatchedVanillaGEMM - vgemm; + Functor_BatchedVanillaGEMM vgemm; vgemm.A_t = std::is_same::value; vgemm.B_t = std::is_same::value; vgemm.A_c = vgemm.B_c = false; @@ -128,17 +114,14 @@ void impl_test_batched_teamgemm(const int N, const int matAdim1, vgemm.beta = beta; vgemm.run(); // Compute c_expected - Functor_TestBatchedTeamGemm(alpha, a_actual, b_actual, beta, - c_actual) + Functor_TestBatchedTeamGemm(alpha, a_actual, b_actual, + beta, c_actual) .run(); Kokkos::fence(); - typename ViewType::HostMirror c_expected_host = - Kokkos::create_mirror_view(c_expected); - typename ViewType::HostMirror c_actual_host = - Kokkos::create_mirror_view(c_actual); + typename ViewType::HostMirror c_expected_host = Kokkos::create_mirror_view(c_expected); + typename ViewType::HostMirror c_actual_host = Kokkos::create_mirror_view(c_actual); // Copy to host for comparision Kokkos::deep_copy(c_expected_host, c_expected); @@ -166,20 +149,16 @@ void impl_test_batched_teamgemm(const int N, const int matAdim1, // void (*impl_test)(const int, const int, const int, const int, const int, // const int, const int) -template +template int test_batched_teamgemm() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - ViewType; - Test::TeamGemm::impl_test_batched_teamgemm( + typedef Kokkos::View ViewType; + Test::TeamGemm::impl_test_batched_teamgemm( 0, 10, 10, 10, 10, 10, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::TeamGemm::impl_test_batched_teamgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( + Test::TeamGemm::impl_test_batched_teamgemm( 1024, i, i, i, i, i, i); } for (int i = 0; i < 10; ++i) { @@ -187,36 +166,24 @@ int test_batched_teamgemm() { int dimM = i; int dimN = 2 * i; int dimK = 3 * i; - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamGemm::impl_test_batched_teamgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamGemm::impl_test_batched_teamgemm( 1024, dimM, dimK, dimK, dimN, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamGemm::impl_test_batched_teamgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamGemm::impl_test_batched_teamgemm( 1024, dimM, dimK, dimN, dimK, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamGemm::impl_test_batched_teamgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamGemm::impl_test_batched_teamgemm( 1024, dimK, dimM, dimK, dimN, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamGemm::impl_test_batched_teamgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamGemm::impl_test_batched_teamgemm( 1024, dimK, dimM, dimN, dimK, dimM, dimN); } } @@ -224,15 +191,12 @@ int test_batched_teamgemm() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - Test::TeamGemm::impl_test_batched_teamgemm( + typedef Kokkos::View ViewType; + Test::TeamGemm::impl_test_batched_teamgemm( 0, 10, 10, 10, 10, 10, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutRight, Blksize %d\n", i); - Test::TeamGemm::impl_test_batched_teamgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( + Test::TeamGemm::impl_test_batched_teamgemm( 1024, i, i, i, i, i, i); } for (int i = 0; i < 10; ++i) { @@ -240,36 +204,24 @@ int test_batched_teamgemm() { int dimM = i; int dimN = 2 * i; int dimK = 3 * i; - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamGemm::impl_test_batched_teamgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamGemm::impl_test_batched_teamgemm( 1024, dimM, dimK, dimK, dimN, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamGemm::impl_test_batched_teamgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamGemm::impl_test_batched_teamgemm( 1024, dimM, dimK, dimN, dimK, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamGemm::impl_test_batched_teamgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamGemm::impl_test_batched_teamgemm( 1024, dimK, dimM, dimK, dimN, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamGemm::impl_test_batched_teamgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamGemm::impl_test_batched_teamgemm( 1024, dimK, dimM, dimN, dimK, dimM, dimN); } } diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGemm_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGemm_Complex.hpp index 09c7f3f2ccf3..a35351396781 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGemm_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGemm_Complex.hpp @@ -19,36 +19,24 @@ /// dcomplex, dcomplex TEST_F(TestCategory, batched_scalar_team_gemm_nt_nt_dcomplex_dcomplex) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm, - Kokkos::complex, param_tag_type, - algo_tag_type>(); + test_batched_teamgemm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_gemm_t_nt_dcomplex_dcomplex) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm, - Kokkos::complex, param_tag_type, - algo_tag_type>(); + test_batched_teamgemm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_gemm_nt_t_dcomplex_dcomplex) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm, - Kokkos::complex, param_tag_type, - algo_tag_type>(); + test_batched_teamgemm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_gemm_t_t_dcomplex_dcomplex) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm, - Kokkos::complex, param_tag_type, - algo_tag_type>(); + test_batched_teamgemm, Kokkos::complex, param_tag_type, algo_tag_type>(); } // TEST_F( TestCategory, batched_scalar_team_gemm_ct_nt_dcomplex_dcomplex ) { // typedef ::Test::TeamGemm::ParamTag @@ -64,32 +52,24 @@ TEST_F(TestCategory, batched_scalar_team_gemm_t_t_dcomplex_dcomplex) { /// dcomplex, double TEST_F(TestCategory, batched_scalar_team_gemm_nt_nt_dcomplex_double) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm, double, - param_tag_type, algo_tag_type>(); + test_batched_teamgemm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_gemm_t_nt_dcomplex_double) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm, double, - param_tag_type, algo_tag_type>(); + test_batched_teamgemm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_gemm_nt_t_dcomplex_double) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm, double, - param_tag_type, algo_tag_type>(); + test_batched_teamgemm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_gemm_t_t_dcomplex_double) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm, double, - param_tag_type, algo_tag_type>(); + test_batched_teamgemm, double, param_tag_type, algo_tag_type>(); } // TEST_F( TestCategory, batched_scalar_team_gemm_ct_nt_dcomplex_double ) { // typedef ::Test::TeamGemm::ParamTag diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGemm_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGemm_Real.hpp index b1a513501875..6f06638c2a29 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGemm_Real.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGemm_Real.hpp @@ -15,156 +15,116 @@ //@HEADER #if defined(KOKKOS_BHALF_T_IS_FLOAT) TEST_F(TestCategory, batched_scalar_team_gemm_nt_nt_bhalf_bhalf) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; - test_batched_teamgemm(); - test_batched_teamgemm(); } TEST_F(TestCategory, batched_scalar_team_gemm_t_nt_bhalf_bhalf) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; - test_batched_teamgemm(); - test_batched_teamgemm(); } TEST_F(TestCategory, batched_scalar_team_gemm_nt_t_bhalf_bhalf) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; - test_batched_teamgemm(); - test_batched_teamgemm(); } TEST_F(TestCategory, batched_scalar_team_gemm_t_t_bhalf_bhalf) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; - test_batched_teamgemm(); - test_batched_teamgemm(); } #endif // KOKKOS_BHALF_T_IS_FLOAT #if defined(KOKKOS_HALF_T_IS_FLOAT) TEST_F(TestCategory, batched_scalar_team_gemm_nt_nt_half_half) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; - test_batched_teamgemm(); - test_batched_teamgemm(); } TEST_F(TestCategory, batched_scalar_team_gemm_t_nt_half_half) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; - test_batched_teamgemm(); - test_batched_teamgemm(); } TEST_F(TestCategory, batched_scalar_team_gemm_nt_t_half_half) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; - test_batched_teamgemm(); - test_batched_teamgemm(); } TEST_F(TestCategory, batched_scalar_team_gemm_t_t_half_half) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; - test_batched_teamgemm(); - test_batched_teamgemm(); } #endif // KOKKOS_HALF_T_IS_FLOAT #if defined(KOKKOSKERNELS_INST_FLOAT) TEST_F(TestCategory, batched_scalar_team_gemm_nt_nt_float_float) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm(); + test_batched_teamgemm(); } TEST_F(TestCategory, batched_scalar_team_gemm_t_nt_float_float) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm(); + test_batched_teamgemm(); } TEST_F(TestCategory, batched_scalar_team_gemm_nt_t_float_float) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm(); + test_batched_teamgemm(); } TEST_F(TestCategory, batched_scalar_team_gemm_t_t_float_float) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm(); + test_batched_teamgemm(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) TEST_F(TestCategory, batched_scalar_team_gemm_nt_nt_double_double) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm(); + test_batched_teamgemm(); } TEST_F(TestCategory, batched_scalar_team_gemm_t_nt_double_double) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm(); + test_batched_teamgemm(); } TEST_F(TestCategory, batched_scalar_team_gemm_nt_t_double_double) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm(); + test_batched_teamgemm(); } TEST_F(TestCategory, batched_scalar_team_gemm_t_t_double_double) { - typedef ::Test::TeamGemm::ParamTag - param_tag_type; + typedef ::Test::TeamGemm::ParamTag param_tag_type; typedef Algo::Gemm::Blocked algo_tag_type; - test_batched_teamgemm(); + test_batched_teamgemm(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGesv.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGesv.hpp index dc3b4e53fbd5..d11930886264 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGesv.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGesv.hpp @@ -32,8 +32,7 @@ using namespace KokkosBatched; namespace Test { namespace TeamGesv { -template +template struct Functor_TestBatchedTeamGesv { using execution_space = typename DeviceType::execution_space; const MatrixType _A; @@ -41,16 +40,14 @@ struct Functor_TestBatchedTeamGesv { const VectorType _B; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamGesv(const MatrixType &A, const VectorType &X, - const VectorType &B) - : _A(A), _X(X), _B(B) {} + Functor_TestBatchedTeamGesv(const MatrixType &A, const VectorType &X, const VectorType &B) : _A(A), _X(X), _B(B) {} template KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { const int matrix_id = static_cast(member.league_rank()); - auto A = Kokkos::subview(_A, matrix_id, Kokkos::ALL, Kokkos::ALL); - auto x = Kokkos::subview(_X, matrix_id, Kokkos::ALL); - auto b = Kokkos::subview(_B, matrix_id, Kokkos::ALL); + auto A = Kokkos::subview(_A, matrix_id, Kokkos::ALL, Kokkos::ALL); + auto x = Kokkos::subview(_X, matrix_id, Kokkos::ALL); + auto b = Kokkos::subview(_B, matrix_id, Kokkos::ALL); member.team_barrier(); KokkosBatched::TeamGesv::invoke(member, A, x, b); @@ -63,13 +60,10 @@ struct Functor_TestBatchedTeamGesv { const std::string name_value_type = Test::value_type_name(); std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); - Kokkos::TeamPolicy policy(_X.extent(0), Kokkos::AUTO(), - Kokkos::AUTO()); + Kokkos::TeamPolicy policy(_X.extent(0), Kokkos::AUTO(), Kokkos::AUTO()); - using MatrixViewType = - Kokkos::View; + using MatrixViewType = Kokkos::View; const int n = _A.extent(1); size_t bytes_0 = MatrixViewType::shmem_size(n, n + 4); @@ -80,15 +74,13 @@ struct Functor_TestBatchedTeamGesv { } }; -template +template void impl_test_batched_gesv(const int N, const int BlkSize) { typedef typename MatrixType::value_type value_type; typedef Kokkos::ArithTraits ats; using MagnitudeType = typename Kokkos::ArithTraits::mag_type; - using NormViewType = - Kokkos::View; + using NormViewType = Kokkos::View; NormViewType sqr_norm_j("sqr_norm_j", N); auto sqr_norm_j_host = Kokkos::create_mirror_view(sqr_norm_j); @@ -109,23 +101,18 @@ void impl_test_batched_gesv(const int N, const int BlkSize) { Kokkos::fence(); - Functor_TestBatchedTeamGesv( - A, X, B) - .run(); + Functor_TestBatchedTeamGesv(A, X, B).run(); Kokkos::fence(); Kokkos::deep_copy(X_host, X); for (int l = 0; l < N; ++l) - KokkosBlas::SerialGemv:: - invoke(-1, Kokkos::subview(A_host, l, Kokkos::ALL, Kokkos::ALL), - Kokkos::subview(X_host, l, Kokkos::ALL), 1, - Kokkos::subview(B_host, l, Kokkos::ALL)); + KokkosBlas::SerialGemv::invoke( + -1, Kokkos::subview(A_host, l, Kokkos::ALL, Kokkos::ALL), Kokkos::subview(X_host, l, Kokkos::ALL), 1, + Kokkos::subview(B_host, l, Kokkos::ALL)); - KokkosBatched::SerialDot::invoke(B_host, B_host, - sqr_norm_j_host); + KokkosBatched::SerialDot::invoke(B_host, B_host, sqr_norm_j_host); const MagnitudeType eps = 1.0e3 * ats::epsilon(); @@ -138,27 +125,21 @@ template int test_batched_team_gesv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - MatrixType; - typedef Kokkos::View - VectorType; + typedef Kokkos::View MatrixType; + typedef Kokkos::View VectorType; for (int i = 3; i < 10; ++i) { - Test::TeamGesv::impl_test_batched_gesv(1024, i); + Test::TeamGesv::impl_test_batched_gesv(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - MatrixType; - typedef Kokkos::View - VectorType; + typedef Kokkos::View MatrixType; + typedef Kokkos::View VectorType; for (int i = 3; i < 10; ++i) { - Test::TeamGesv::impl_test_batched_gesv(1024, i); + Test::TeamGesv::impl_test_batched_gesv(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGesv_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGesv_Real.hpp index d0b04ea57cf1..6fd7241f0bf2 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGesv_Real.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamGesv_Real.hpp @@ -15,8 +15,7 @@ //@HEADER #if defined(KOKKOSKERNELS_INST_FLOAT) TEST_F(TestCategory, batched_scalar_team_gesv_static_pivoting_float) { - test_batched_team_gesv(); + test_batched_team_gesv(); } TEST_F(TestCategory, batched_scalar_team_gesv_no_pivoting_float) { test_batched_team_gesv(); @@ -25,8 +24,7 @@ TEST_F(TestCategory, batched_scalar_team_gesv_no_pivoting_float) { #if defined(KOKKOSKERNELS_INST_DOUBLE) TEST_F(TestCategory, batched_scalar_team_gesv_static_pivoting_double) { - test_batched_team_gesv(); + test_batched_team_gesv(); } TEST_F(TestCategory, batched_scalar_team_gesv_no_pivoting_double) { test_batched_team_gesv(); diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamInverseLU.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamInverseLU.hpp index a62e655d02e9..36d0aae738bf 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamInverseLU.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamInverseLU.hpp @@ -19,14 +19,14 @@ #include "Kokkos_Core.hpp" #include "Kokkos_Random.hpp" -//#include "KokkosBatched_Vector.hpp" +// #include "KokkosBatched_Vector.hpp" #include "KokkosBatched_Gemm_Decl.hpp" #include "KokkosBatched_Gemm_Team_Impl.hpp" #include "KokkosBatched_LU_Decl.hpp" #include "KokkosBatched_LU_Team_Impl.hpp" #include "KokkosBatched_InverseLU_Decl.hpp" -//#include "KokkosBatched_InverseLU_Team_Impl.hpp" +// #include "KokkosBatched_InverseLU_Team_Impl.hpp" #include "KokkosKernels_TestUtils.hpp" @@ -41,8 +41,7 @@ struct ParamTag { typedef TB transB; }; -template +template struct Functor_BatchedTeamGemm { using execution_space = typename DeviceType::execution_space; ViewType _a, _b, _c; @@ -50,14 +49,12 @@ struct Functor_BatchedTeamGemm { ScalarType _alpha, _beta; KOKKOS_INLINE_FUNCTION - Functor_BatchedTeamGemm(const ScalarType alpha, const ViewType &a, - const ViewType &b, const ScalarType beta, + Functor_BatchedTeamGemm(const ScalarType alpha, const ViewType &a, const ViewType &b, const ScalarType beta, const ViewType &c) : _a(a), _b(b), _c(c), _alpha(alpha), _beta(beta) {} template - KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, const MemberType &member) const { const int k = member.league_rank(); auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); @@ -69,10 +66,8 @@ struct Functor_BatchedTeamGemm { } member.team_barrier(); - KokkosBatched::TeamGemm::invoke(member, _alpha, aa, bb, _beta, - cc); + KokkosBatched::TeamGemm::invoke(member, _alpha, aa, bb, _beta, cc); } inline void run() { @@ -83,8 +78,7 @@ struct Functor_BatchedTeamGemm { Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _c.extent(0); - Kokkos::TeamPolicy policy(league_size, - Kokkos::AUTO); + Kokkos::TeamPolicy policy(league_size, Kokkos::AUTO); Kokkos::parallel_for((name + "::GemmFunctor").c_str(), policy, *this); Kokkos::Profiling::popRegion(); } @@ -124,15 +118,13 @@ struct Functor_BatchedTeamLU { } }; -template +template struct Functor_TestBatchedTeamInverseLU { AViewType _a; WViewType _w; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamInverseLU(const AViewType &a, const WViewType &w) - : _a(a), _w(w) {} + Functor_TestBatchedTeamInverseLU(const AViewType &a, const WViewType &w) : _a(a), _w(w) {} template KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { @@ -140,8 +132,7 @@ struct Functor_TestBatchedTeamInverseLU { auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); auto ww = Kokkos::subview(_w, k, Kokkos::ALL()); - KokkosBatched::TeamInverseLU::invoke(member, aa, - ww); + KokkosBatched::TeamInverseLU::invoke(member, aa, ww); } inline void run() { @@ -158,8 +149,7 @@ struct Functor_TestBatchedTeamInverseLU { } }; -template +template void impl_test_batched_inverselu(const int N, const int BlkSize) { typedef typename AViewType::value_type value_type; typedef Kokkos::ArithTraits ats; @@ -170,8 +160,7 @@ void impl_test_batched_inverselu(const int N, const int BlkSize) { WViewType w("w", N, BlkSize * BlkSize); AViewType c0("c0", N, BlkSize, BlkSize); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a0, random, value_type(1.0)); Kokkos::fence(); @@ -181,15 +170,12 @@ void impl_test_batched_inverselu(const int N, const int BlkSize) { Functor_BatchedTeamLU(a1).run(); - Functor_TestBatchedTeamInverseLU(a1, w) - .run(); + Functor_TestBatchedTeamInverseLU(a1, w).run(); value_type alpha = 1.0, beta = 0.0; typedef ParamTag param_tag_type; - Functor_BatchedTeamGemm(alpha, a0, a1, beta, c0) + Functor_BatchedTeamGemm(alpha, a0, a1, beta, c0) .run(); Kokkos::fence(); @@ -220,33 +206,21 @@ template int test_batched_team_inverselu() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - AViewType; - typedef Kokkos::View - WViewType; - Test::TeamInverseLU::impl_test_batched_inverselu( - 0, 10); + typedef Kokkos::View AViewType; + typedef Kokkos::View WViewType; + Test::TeamInverseLU::impl_test_batched_inverselu(0, 10); for (int i = 0; i < 10; ++i) { - Test::TeamInverseLU::impl_test_batched_inverselu( - 1024, i); + Test::TeamInverseLU::impl_test_batched_inverselu(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - AViewType; - typedef Kokkos::View - WViewType; - Test::TeamInverseLU::impl_test_batched_inverselu( - 0, 10); + typedef Kokkos::View AViewType; + typedef Kokkos::View WViewType; + Test::TeamInverseLU::impl_test_batched_inverselu(0, 10); for (int i = 0; i < 10; ++i) { - Test::TeamInverseLU::impl_test_batched_inverselu( - 1024, i); + Test::TeamInverseLU::impl_test_batched_inverselu(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamInverseLU_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamInverseLU_Complex.hpp index 7eb918beef27..cf670f2fc92f 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamInverseLU_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamInverseLU_Complex.hpp @@ -18,11 +18,9 @@ TEST_F(TestCategory, batched_scalar_team_inverselu_dcomplex) { // printf("Batched team inverse LU - double complex - algorithm type: // Unblocked\n"); - test_batched_inverselu, - Algo::InverseLU::Unblocked>(); + test_batched_inverselu, Algo::InverseLU::Unblocked>(); // printf("Batched team inverse LU - double complex - algorithm type: // Blocked\n"); - test_batched_inverselu, - Algo::InverseLU::Blocked>(); + test_batched_inverselu, Algo::InverseLU::Blocked>(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamLU.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamLU.hpp index e20f3a7411ea..b662c4a365dd 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamLU.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamLU.hpp @@ -19,7 +19,7 @@ #include "Kokkos_Core.hpp" #include "Kokkos_Random.hpp" -//#include "KokkosBatched_Vector.hpp" +// #include "KokkosBatched_Vector.hpp" #include "KokkosBatched_LU_Decl.hpp" #include "KokkosBatched_LU_Serial_Impl.hpp" @@ -76,16 +76,14 @@ void impl_test_batched_lu(const int N, const int BlkSize) { /// randomized input testing views ViewType a0("a0", N, BlkSize, BlkSize), a1("a1", N, BlkSize, BlkSize); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a0, random, value_type(1.0)); Kokkos::fence(); Kokkos::deep_copy(a1, a0); - Functor_TestBatchedTeamLU(a0) - .run(); + Functor_TestBatchedTeamLU(a0).run(); Functor_TestBatchedTeamLU(a1).run(); Kokkos::fence(); @@ -117,27 +115,21 @@ template int test_batched_team_lu() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - ViewType; - Test::TeamLU::impl_test_batched_lu(0, - 10); + typedef Kokkos::View ViewType; + Test::TeamLU::impl_test_batched_lu(0, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::TeamLU::impl_test_batched_lu( - 1024, i); + Test::TeamLU::impl_test_batched_lu(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - Test::TeamLU::impl_test_batched_lu(0, - 10); + typedef Kokkos::View ViewType; + Test::TeamLU::impl_test_batched_lu(0, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::TeamLU::impl_test_batched_lu( - 1024, i); + Test::TeamLU::impl_test_batched_lu(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamSolveLU.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamSolveLU.hpp index 445e10132f53..61a11e6be7f0 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamSolveLU.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamSolveLU.hpp @@ -19,14 +19,14 @@ #include "Kokkos_Core.hpp" #include "Kokkos_Random.hpp" -//#include "KokkosBatched_Vector.hpp" +// #include "KokkosBatched_Vector.hpp" #include "KokkosBatched_Gemm_Decl.hpp" #include "KokkosBatched_Gemm_Team_Impl.hpp" #include "KokkosBatched_LU_Decl.hpp" #include "KokkosBatched_LU_Team_Impl.hpp" #include "KokkosBatched_SolveLU_Decl.hpp" -//#include "KokkosBatched_SolveLU_Team_Impl.hpp" +// #include "KokkosBatched_SolveLU_Team_Impl.hpp" #include "KokkosKernels_TestUtils.hpp" @@ -41,8 +41,7 @@ struct ParamTag { typedef TB transB; }; -template +template struct Functor_BatchedTeamGemm { using execution_space = typename DeviceType::execution_space; ViewType _a, _b, _c; @@ -50,14 +49,12 @@ struct Functor_BatchedTeamGemm { ScalarType _alpha, _beta; KOKKOS_INLINE_FUNCTION - Functor_BatchedTeamGemm(const ScalarType alpha, const ViewType &a, - const ViewType &b, const ScalarType beta, + Functor_BatchedTeamGemm(const ScalarType alpha, const ViewType &a, const ViewType &b, const ScalarType beta, const ViewType &c) : _a(a), _b(b), _c(c), _alpha(alpha), _beta(beta) {} template - KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, const MemberType &member) const { const int k = member.league_rank(); auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); @@ -69,10 +66,8 @@ struct Functor_BatchedTeamGemm { } member.team_barrier(); - KokkosBatched::TeamGemm::invoke(member, _alpha, aa, bb, _beta, - cc); + KokkosBatched::TeamGemm::invoke(member, _alpha, aa, bb, _beta, cc); } inline void run() { @@ -82,8 +77,7 @@ struct Functor_BatchedTeamGemm { std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _c.extent(0); - Kokkos::TeamPolicy policy(league_size, - Kokkos::AUTO); + Kokkos::TeamPolicy policy(league_size, Kokkos::AUTO); Kokkos::parallel_for((name + "::GemmFunctor").c_str(), policy, *this); Kokkos::Profiling::popRegion(); } @@ -120,16 +114,14 @@ struct Functor_BatchedTeamLU { Kokkos::Profiling::popRegion(); } }; -template +template struct Functor_TestBatchedTeamSolveLU { using execution_space = typename DeviceType::execution_space; ViewType _a; ViewType _b; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamSolveLU(const ViewType &a, const ViewType &b) - : _a(a), _b(b) {} + Functor_TestBatchedTeamSolveLU(const ViewType &a, const ViewType &b) : _a(a), _b(b) {} template KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { @@ -137,8 +129,7 @@ struct Functor_TestBatchedTeamSolveLU { auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); auto bb = Kokkos::subview(_b, k, Kokkos::ALL(), Kokkos::ALL()); - KokkosBatched::TeamSolveLU::invoke( - member, aa, bb); + KokkosBatched::TeamSolveLU::invoke(member, aa, bb); } inline void run() { @@ -168,8 +159,7 @@ void impl_test_batched_solvelu(const int N, const int BlkSize) { // ViewType a0_T("a0_T", N, BlkSize, BlkSize); // ViewType b_T ("b_T", N, BlkSize, 5 ); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a0, random, value_type(1.0)); Kokkos::fill_random(x0, random, value_type(1.0)); @@ -181,15 +171,11 @@ void impl_test_batched_solvelu(const int N, const int BlkSize) { value_type alpha = 1.0, beta = 0.0; typedef ParamTag param_tag_type; - Functor_BatchedTeamGemm(alpha, a0, x0, beta, b) - .run(); + Functor_BatchedTeamGemm(alpha, a0, x0, beta, b).run(); Functor_BatchedTeamLU(a1).run(); - Functor_TestBatchedTeamSolveLU(a1, b) - .run(); + Functor_TestBatchedTeamSolveLU(a1, b).run(); Kokkos::fence(); @@ -246,25 +232,19 @@ template int test_batched_team_solvelu() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - ViewType; - Test::TeamSolveLU::impl_test_batched_solvelu(0, 10); + typedef Kokkos::View ViewType; + Test::TeamSolveLU::impl_test_batched_solvelu(0, 10); for (int i = 0; i < 10; ++i) { - Test::TeamSolveLU::impl_test_batched_solvelu(1024, i); + Test::TeamSolveLU::impl_test_batched_solvelu(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - Test::TeamSolveLU::impl_test_batched_solvelu(0, 10); + typedef Kokkos::View ViewType; + Test::TeamSolveLU::impl_test_batched_solvelu(0, 10); for (int i = 0; i < 10; ++i) { - Test::TeamSolveLU::impl_test_batched_solvelu(1024, i); + Test::TeamSolveLU::impl_test_batched_solvelu(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamSolveLU_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamSolveLU_Complex.hpp index 865f58ef4315..f90498350937 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamSolveLU_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamSolveLU_Complex.hpp @@ -18,11 +18,9 @@ TEST_F(TestCategory, batched_scalar_team_solvelu_dcomplex) { // printf("Batched team solveLU - double complex - algorithm type: // Unblocked\n"); - test_batched_team_solvelu, - Algo::SolveLU::Unblocked>(); + test_batched_team_solvelu, Algo::SolveLU::Unblocked>(); // printf("Batched team solveLU - double complex - algorithm type: // Blocked\n"); - test_batched_team_solvelu, - Algo::SolveLU::Blocked>(); + test_batched_team_solvelu, Algo::SolveLU::Blocked>(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsm.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsm.hpp index 523bd02df481..5ae1e216d975 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsm.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsm.hpp @@ -19,7 +19,7 @@ #include "Kokkos_Core.hpp" #include "Kokkos_Random.hpp" -//#include "KokkosBatched_Vector.hpp" +// #include "KokkosBatched_Vector.hpp" #include "KokkosBatched_Trsm_Decl.hpp" #include "KokkosBatched_Trsm_Serial_Impl.hpp" @@ -40,8 +40,7 @@ struct ParamTag { typedef D diag; }; -template +template struct Functor_TestBatchedTeamTrsm { using execution_space = typename DeviceType::execution_space; ViewType _a, _b; @@ -49,22 +48,20 @@ struct Functor_TestBatchedTeamTrsm { ScalarType _alpha; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamTrsm(const ScalarType alpha, const ViewType &a, - const ViewType &b) + Functor_TestBatchedTeamTrsm(const ScalarType alpha, const ViewType &a, const ViewType &b) : _a(a), _b(b), _alpha(alpha) {} template - KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, const MemberType &member) const { const int k = member.league_rank(); auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); auto bb = Kokkos::subview(_b, k, Kokkos::ALL(), Kokkos::ALL()); - KokkosBatched::TeamTrsm< - MemberType, typename ParamTagType::side, typename ParamTagType::uplo, - typename ParamTagType::trans, typename ParamTagType::diag, - AlgoTagType>::invoke(member, _alpha, aa, bb); + KokkosBatched::TeamTrsm::invoke(member, + _alpha, aa, + bb); } inline void run() { @@ -75,15 +72,13 @@ struct Functor_TestBatchedTeamTrsm { Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _b.extent(0); - Kokkos::TeamPolicy policy(league_size, - Kokkos::AUTO); + Kokkos::TeamPolicy policy(league_size, Kokkos::AUTO); Kokkos::parallel_for(name.c_str(), policy, *this); Kokkos::Profiling::popRegion(); } }; -template +template void impl_test_batched_trsm(const int N, const int BlkSize, const int NumCols) { typedef typename ViewType::value_type value_type; typedef Kokkos::ArithTraits ats; @@ -91,15 +86,13 @@ void impl_test_batched_trsm(const int N, const int BlkSize, const int NumCols) { /// randomized input testing views ScalarType alpha(1.0); - const bool is_side_right = - std::is_same::value; - const int b_nrows = is_side_right ? NumCols : BlkSize; - const int b_ncols = is_side_right ? BlkSize : NumCols; - ViewType a0("a0", N, BlkSize, BlkSize), a1("a1", N, BlkSize, BlkSize), - b0("b0", N, b_nrows, b_ncols), b1("b1", N, b_nrows, b_ncols); + const bool is_side_right = std::is_same::value; + const int b_nrows = is_side_right ? NumCols : BlkSize; + const int b_ncols = is_side_right ? BlkSize : NumCols; + ViewType a0("a0", N, BlkSize, BlkSize), a1("a1", N, BlkSize, BlkSize), b0("b0", N, b_nrows, b_ncols), + b1("b1", N, b_nrows, b_ncols); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a0, random, value_type(1.0)); Kokkos::fill_random(b0, random, value_type(1.0)); @@ -108,12 +101,9 @@ void impl_test_batched_trsm(const int N, const int BlkSize, const int NumCols) { Kokkos::deep_copy(a1, a0); Kokkos::deep_copy(b1, b0); - Functor_TestBatchedTeamTrsm(alpha, a0, b0) - .run(); - Functor_TestBatchedTeamTrsm(alpha, a1, b1) + Functor_TestBatchedTeamTrsm(alpha, a0, b0) .run(); + Functor_TestBatchedTeamTrsm(alpha, a1, b1).run(); Kokkos::fence(); @@ -140,40 +130,27 @@ void impl_test_batched_trsm(const int N, const int BlkSize, const int NumCols) { } // namespace TeamTrsm } // namespace Test -template +template int test_batched_team_trsm() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - ViewType; - Test::TeamTrsm::impl_test_batched_trsm(0, 10, 4); + typedef Kokkos::View ViewType; + Test::TeamTrsm::impl_test_batched_trsm(0, 10, 4); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::TeamTrsm::impl_test_batched_trsm(1024, i, - 4); - Test::TeamTrsm::impl_test_batched_trsm(1024, i, - 1); + Test::TeamTrsm::impl_test_batched_trsm(1024, i, 4); + Test::TeamTrsm::impl_test_batched_trsm(1024, i, 1); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - Test::TeamTrsm::impl_test_batched_trsm(0, 10, 4); + typedef Kokkos::View ViewType; + Test::TeamTrsm::impl_test_batched_trsm(0, 10, 4); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutRight, Blksize %d\n", i); - Test::TeamTrsm::impl_test_batched_trsm(1024, i, - 4); - Test::TeamTrsm::impl_test_batched_trsm(1024, i, - 1); + Test::TeamTrsm::impl_test_batched_trsm(1024, i, 4); + Test::TeamTrsm::impl_test_batched_trsm(1024, i, 1); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsm_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsm_Complex.hpp index 0cf2761922b7..cf9cafeb9e42 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsm_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsm_Complex.hpp @@ -16,176 +16,106 @@ #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) TEST_F(TestCategory, batched_scalar_team_trsm_l_l_nt_u_dcomplex_dcomplex) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, - Kokkos::complex, param_tag_type, - algo_tag_type>(); + test_batched_team_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_l_nt_n_dcomplex_dcomplex) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, - Kokkos::complex, param_tag_type, - algo_tag_type>(); + test_batched_team_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_nt_u_dcomplex_dcomplex) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, - Kokkos::complex, param_tag_type, - algo_tag_type>(); + test_batched_team_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_nt_n_dcomplex_dcomplex) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, - Kokkos::complex, param_tag_type, - algo_tag_type>(); + test_batched_team_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_r_u_nt_u_dcomplex_dcomplex) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, - Kokkos::complex, param_tag_type, - algo_tag_type>(); + test_batched_team_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_r_u_nt_n_dcomplex_dcomplex) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, - Kokkos::complex, param_tag_type, - algo_tag_type>(); + test_batched_team_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } // TEST_F(TestCategory, batched_scalar_team_trsm_l_l_t_u_dcomplex_dcomplex) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, - Kokkos::complex, param_tag_type, - algo_tag_type>(); + test_batched_team_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_l_t_n_dcomplex_dcomplex) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, - Kokkos::complex, param_tag_type, - algo_tag_type>(); + test_batched_team_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_t_u_dcomplex_dcomplex) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, - Kokkos::complex, param_tag_type, - algo_tag_type>(); + test_batched_team_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_t_n_dcomplex_dcomplex) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, - Kokkos::complex, param_tag_type, - algo_tag_type>(); + test_batched_team_trsm, Kokkos::complex, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_l_nt_u_dcomplex_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, double, - param_tag_type, algo_tag_type>(); + test_batched_team_trsm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_l_nt_n_dcomplex_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, double, - param_tag_type, algo_tag_type>(); + test_batched_team_trsm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_nt_u_dcomplex_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, double, - param_tag_type, algo_tag_type>(); + test_batched_team_trsm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_nt_n_dcomplex_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, double, - param_tag_type, algo_tag_type>(); + test_batched_team_trsm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_r_u_nt_u_dcomplex_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, double, - param_tag_type, algo_tag_type>(); + test_batched_team_trsm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_r_u_nt_n_dcomplex_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, double, - param_tag_type, algo_tag_type>(); + test_batched_team_trsm, double, param_tag_type, algo_tag_type>(); } // TEST_F(TestCategory, batched_scalar_team_trsm_l_l_t_u_dcomplex_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, double, - param_tag_type, algo_tag_type>(); + test_batched_team_trsm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_l_t_n_dcomplex_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, double, - param_tag_type, algo_tag_type>(); + test_batched_team_trsm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_t_u_dcomplex_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, double, - param_tag_type, algo_tag_type>(); + test_batched_team_trsm, double, param_tag_type, algo_tag_type>(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_t_n_dcomplex_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm, double, - param_tag_type, algo_tag_type>(); + test_batched_team_trsm, double, param_tag_type, algo_tag_type>(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsm_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsm_Real.hpp index 6757617ddd11..cd1d2a72117b 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsm_Real.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsm_Real.hpp @@ -16,168 +16,108 @@ #if defined(KOKKOSKERNELS_INST_FLOAT) TEST_F(TestCategory, batched_scalar_team_trsm_l_l_nt_u_float_float) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_l_nt_n_float_float) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_nt_u_float_float) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_nt_n_float_float) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_r_u_nt_u_float_float) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_r_u_nt_n_float_float) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } // TEST_F(TestCategory, batched_scalar_team_trsm_l_l_t_u_float_float) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_l_t_n_float_float) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_t_u_float_float) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_t_n_float_float) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) TEST_F(TestCategory, batched_scalar_team_trsm_l_l_nt_u_double_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_l_nt_n_double_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_nt_u_double_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_nt_n_double_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_r_u_nt_u_double_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_r_u_nt_n_double_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } // TEST_F(TestCategory, batched_scalar_team_trsm_l_l_t_u_double_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_l_t_n_double_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_t_u_double_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } TEST_F(TestCategory, batched_scalar_team_trsm_l_u_t_n_double_double) { - typedef ::Test::TeamTrsm::ParamTag - param_tag_type; + typedef ::Test::TeamTrsm::ParamTag param_tag_type; typedef Algo::Trsm::Blocked algo_tag_type; - test_batched_team_trsm(); + test_batched_team_trsm(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsv.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsv.hpp index 400e35deb8e8..37e8708bd29c 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsv.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamTrsv.hpp @@ -19,7 +19,7 @@ #include "Kokkos_Core.hpp" #include "Kokkos_Random.hpp" -//#include "KokkosBatched_Vector.hpp" +// #include "KokkosBatched_Vector.hpp" #include "KokkosBatched_Trsv_Decl.hpp" #include "KokkosBatched_Trsv_Serial_Impl.hpp" @@ -38,8 +38,7 @@ struct ParamTag { typedef D diag; }; -template +template struct Functor_TestBatchedTeamTrsv { using execution_space = typename DeviceType::execution_space; ViewType _a, _b; @@ -47,22 +46,18 @@ struct Functor_TestBatchedTeamTrsv { ScalarType _alpha; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamTrsv(const ScalarType alpha, const ViewType &a, - const ViewType &b) + Functor_TestBatchedTeamTrsv(const ScalarType alpha, const ViewType &a, const ViewType &b) : _a(a), _b(b), _alpha(alpha) {} template - KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, const MemberType &member) const { const int k = member.league_rank(); auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); auto bb = Kokkos::subview(_b, k, Kokkos::ALL(), 0); - KokkosBatched::TeamTrsv< - MemberType, typename ParamTagType::uplo, typename ParamTagType::trans, - typename ParamTagType::diag, AlgoTagType>::invoke(member, _alpha, aa, - bb); + KokkosBatched::TeamTrsv::invoke(member, _alpha, aa, bb); } inline void run() { @@ -73,15 +68,13 @@ struct Functor_TestBatchedTeamTrsv { Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _b.extent(0); - Kokkos::TeamPolicy policy(league_size, - Kokkos::AUTO); + Kokkos::TeamPolicy policy(league_size, Kokkos::AUTO); Kokkos::parallel_for(name.c_str(), policy, *this); Kokkos::Profiling::popRegion(); } }; -template +template void impl_test_batched_trsv(const int N, const int BlkSize) { typedef typename ViewType::value_type value_type; typedef Kokkos::ArithTraits ats; @@ -89,11 +82,10 @@ void impl_test_batched_trsv(const int N, const int BlkSize) { /// randomized input testing views ScalarType alpha(1.5); - ViewType a0("a0", N, BlkSize, BlkSize), a1("a1", N, BlkSize, BlkSize), - b0("b0", N, BlkSize, 1), b1("b1", N, BlkSize, 1); + ViewType a0("a0", N, BlkSize, BlkSize), a1("a1", N, BlkSize, BlkSize), b0("b0", N, BlkSize, 1), + b1("b1", N, BlkSize, 1); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a0, random, value_type(1.0)); Kokkos::fill_random(b0, random, value_type(1.0)); @@ -104,12 +96,9 @@ void impl_test_batched_trsv(const int N, const int BlkSize) { Kokkos::deep_copy(a1, a0); Kokkos::deep_copy(b1, b0); - Functor_TestBatchedTeamTrsv(alpha, a0, b0) - .run(); - Functor_TestBatchedTeamTrsv(alpha, a1, b1) + Functor_TestBatchedTeamTrsv(alpha, a0, b0) .run(); + Functor_TestBatchedTeamTrsv(alpha, a1, b1).run(); Kokkos::fence(); @@ -136,34 +125,25 @@ void impl_test_batched_trsv(const int N, const int BlkSize) { } // namespace TeamTrsv } // namespace Test -template +template int test_batched_team_trsv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - ViewType; - Test::TeamTrsv::impl_test_batched_trsv(0, 10); + typedef Kokkos::View ViewType; + Test::TeamTrsv::impl_test_batched_trsv(0, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::TeamTrsv::impl_test_batched_trsv(1024, - i); + Test::TeamTrsv::impl_test_batched_trsv(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - Test::TeamTrsv::impl_test_batched_trsv(0, 10); + typedef Kokkos::View ViewType; + Test::TeamTrsv::impl_test_batched_trsv(0, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutRight, Blksize %d\n", i); - Test::TeamTrsv::impl_test_batched_trsv(1024, - i); + Test::TeamTrsv::impl_test_batched_trsv(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorAxpy.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorAxpy.hpp index fca0534b4b47..cd378745efda 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorAxpy.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorAxpy.hpp @@ -37,9 +37,7 @@ struct Functor_TestBatchedTeamVectorAxpy { const int _N_team; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorAxpy(const alphaViewType &alpha, - const ViewType &X, const ViewType &Y, - const int N_team) + Functor_TestBatchedTeamVectorAxpy(const alphaViewType &alpha, const ViewType &X, const ViewType &Y, const int N_team) : _alpha(alpha), _X(X), _Y(Y), _N_team(N_team) {} template @@ -47,16 +45,12 @@ struct Functor_TestBatchedTeamVectorAxpy { const int first_matrix = static_cast(member.league_rank()) * _N_team; const int N = _X.extent(0); const int last_matrix = - (static_cast(member.league_rank() + 1) * _N_team < N - ? static_cast(member.league_rank() + 1) * _N_team - : N); + (static_cast(member.league_rank() + 1) * _N_team < N ? static_cast(member.league_rank() + 1) * _N_team + : N); - auto alpha = - Kokkos::subview(_alpha, Kokkos::make_pair(first_matrix, last_matrix)); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto y = Kokkos::subview(_Y, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + auto alpha = Kokkos::subview(_alpha, Kokkos::make_pair(first_matrix, last_matrix)); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto y = Kokkos::subview(_Y, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); KokkosBatched::TeamVectorAxpy::invoke(member, alpha, x, y); } @@ -67,8 +61,7 @@ struct Functor_TestBatchedTeamVectorAxpy { const std::string name_value_type = Test::value_type_name(); std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); - Kokkos::TeamPolicy policy(_X.extent(0) / _N_team, - Kokkos::AUTO(), Kokkos::AUTO()); + Kokkos::TeamPolicy policy(_X.extent(0) / _N_team, Kokkos::AUTO(), Kokkos::AUTO()); Kokkos::parallel_for(name.c_str(), policy, *this); Kokkos::Profiling::popRegion(); } @@ -81,13 +74,11 @@ void impl_test_batched_axpy(const int N, const int BlkSize, const int N_team) { typedef typename alphaViewType::const_value_type alpha_const_value_type; typedef Kokkos::ArithTraits ats; - ViewType X0("x0", N, BlkSize), X1("x1", N, BlkSize), Y0("y0", N, BlkSize), - Y1("y1", N, BlkSize); + ViewType X0("x0", N, BlkSize), X1("x1", N, BlkSize), Y0("y0", N, BlkSize), Y1("y1", N, BlkSize); alphaViewType alpha("alpha", N); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(X0, random, const_value_type(1.0)); Kokkos::fill_random(Y0, random, const_value_type(1.0)); Kokkos::fill_random(alpha, random, alpha_const_value_type(1.0)); @@ -107,12 +98,9 @@ void impl_test_batched_axpy(const int N, const int BlkSize, const int N_team) { Kokkos::deep_copy(Y0_host, Y0); for (int l = 0; l < N; ++l) - for (int i = 0; i < BlkSize; ++i) - Y0_host(l, i) += alpha_host(l) * X0_host(l, i); + for (int i = 0; i < BlkSize; ++i) Y0_host(l, i) += alpha_host(l) * X0_host(l, i); - Functor_TestBatchedTeamVectorAxpy( - alpha, X1, Y1, N_team) - .run(); + Functor_TestBatchedTeamVectorAxpy(alpha, X1, Y1, N_team).run(); Kokkos::fence(); @@ -141,25 +129,20 @@ int test_batched_teamvector_axpy() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { typedef Kokkos::View ViewType; - typedef Kokkos::View - alphaViewType; + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { - Test::TeamVectorAxpy::impl_test_batched_axpy(1024, i, 2); + Test::TeamVectorAxpy::impl_test_batched_axpy(1024, i, 2); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - typedef Kokkos::View - alphaViewType; + typedef Kokkos::View ViewType; + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { - Test::TeamVectorAxpy::impl_test_batched_axpy(1024, i, 2); + Test::TeamVectorAxpy::impl_test_batched_axpy(1024, i, 2); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorAxpy_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorAxpy_Complex.hpp index b1f70a723e15..0e8cb013f1be 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorAxpy_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorAxpy_Complex.hpp @@ -16,8 +16,7 @@ #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) TEST_F(TestCategory, batched_scalar_teamvector_axpy_nt_dcomplex_dcomplex) { - test_batched_teamvector_axpy, - Kokkos::complex>(); + test_batched_teamvector_axpy, Kokkos::complex>(); } TEST_F(TestCategory, batched_scalar_teamvector_axpy_nt_dcomplex_double) { diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGemm.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGemm.hpp index f2f3bc217d01..2ebc10f2e055 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGemm.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGemm.hpp @@ -33,8 +33,7 @@ struct ParamTag { typedef TB transB; }; -template +template struct Functor_TestBatchedTeamVector { using execution_space = typename DeviceType::execution_space; ViewType _a, _b, _c; @@ -42,24 +41,20 @@ struct Functor_TestBatchedTeamVector { ScalarType _alpha, _beta; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVector(const ScalarType alpha, const ViewType &a, - const ViewType &b, const ScalarType beta, + Functor_TestBatchedTeamVector(const ScalarType alpha, const ViewType &a, const ViewType &b, const ScalarType beta, const ViewType &c) : _a(a), _b(b), _c(c), _alpha(alpha), _beta(beta) {} template - KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, const MemberType &member) const { const int k = member.league_rank(); auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); auto bb = Kokkos::subview(_b, k, Kokkos::ALL(), Kokkos::ALL()); auto cc = Kokkos::subview(_c, k, Kokkos::ALL(), Kokkos::ALL()); - KokkosBatched::TeamVectorGemm::invoke(member, _alpha, aa, bb, - _beta, cc); + KokkosBatched::TeamVectorGemm::invoke(member, _alpha, aa, bb, _beta, cc); } inline void run() { @@ -69,19 +64,15 @@ struct Functor_TestBatchedTeamVector { std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _c.extent(0); - Kokkos::TeamPolicy policy(league_size, - Kokkos::AUTO); + Kokkos::TeamPolicy policy(league_size, Kokkos::AUTO); Kokkos::parallel_for(name.c_str(), policy, *this); Kokkos::Profiling::popRegion(); } }; -template -void impl_test_batched_teamvectorgemm(const int N, const int matAdim1, - const int matAdim2, const int matBdim1, - const int matBdim2, const int matCdim1, - const int matCdim2) { +template +void impl_test_batched_teamvectorgemm(const int N, const int matAdim1, const int matAdim2, const int matBdim1, + const int matBdim2, const int matCdim1, const int matCdim2) { using transA = typename ParamTagType::transA; using transB = typename ParamTagType::transB; using execution_space = typename DeviceType::execution_space; @@ -91,15 +82,11 @@ void impl_test_batched_teamvectorgemm(const int N, const int matAdim1, /// randomized input testing views ScalarType alpha = ScalarType(1.5), beta = ScalarType(3.0); - ViewType a_expected("a_expected", N, matAdim1, matAdim2), - a_actual("a_actual", N, matAdim1, matAdim2), - b_expected("b_expected", N, matBdim1, matBdim2), - b_actual("b_actual", N, matBdim1, matBdim2), - c_expected("c_expected", N, matCdim1, matCdim2), - c_actual("c_actual", N, matCdim1, matCdim2); + ViewType a_expected("a_expected", N, matAdim1, matAdim2), a_actual("a_actual", N, matAdim1, matAdim2), + b_expected("b_expected", N, matBdim1, matBdim2), b_actual("b_actual", N, matBdim1, matBdim2), + c_expected("c_expected", N, matCdim1, matCdim2), c_actual("c_actual", N, matCdim1, matCdim2); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a_expected, random, value_type(1.0)); Kokkos::fill_random(b_expected, random, value_type(1.0)); @@ -114,8 +101,7 @@ void impl_test_batched_teamvectorgemm(const int N, const int matAdim1, // Functor_TestBatchedTeamVector(alpha, a_expected, b_expected, // beta, c_expected).run(); - Functor_BatchedVanillaGEMM - vgemm; + Functor_BatchedVanillaGEMM vgemm; vgemm.A_t = std::is_same::value; vgemm.B_t = std::is_same::value; vgemm.A_c = vgemm.B_c = false; @@ -126,17 +112,14 @@ void impl_test_batched_teamvectorgemm(const int N, const int matAdim1, vgemm.beta = beta; vgemm.run(); // Compute c_expected - Functor_TestBatchedTeamVector(alpha, a_actual, b_actual, beta, - c_actual) + Functor_TestBatchedTeamVector(alpha, a_actual, b_actual, + beta, c_actual) .run(); Kokkos::fence(); - typename ViewType::HostMirror c_expected_host = - Kokkos::create_mirror_view(c_expected); - typename ViewType::HostMirror c_actual_host = - Kokkos::create_mirror_view(c_actual); + typename ViewType::HostMirror c_expected_host = Kokkos::create_mirror_view(c_expected); + typename ViewType::HostMirror c_actual_host = Kokkos::create_mirror_view(c_actual); // Copy to host for comparison Kokkos::deep_copy(c_expected_host, c_expected); @@ -165,111 +148,80 @@ void impl_test_batched_teamvectorgemm(const int N, const int matAdim1, // void (*impl_test)(const int, const int, const int, const int, const int, // const int, const int) -template +template int test_batched_teamvectorgemm() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - ViewType; - Test::TeamVectorGemm::impl_test_batched_teamvectorgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( + typedef Kokkos::View ViewType; + Test::TeamVectorGemm::impl_test_batched_teamvectorgemm( 0, 10, 10, 10, 10, 10, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::TeamVectorGemm::impl_test_batched_teamvectorgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( - 1024, i, i, i, i, i, i); + Test::TeamVectorGemm::impl_test_batched_teamvectorgemm(1024, i, i, i, i, i, i); } for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); int dimM = i; int dimN = 2 * i; int dimK = 3 * i; - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamVectorGemm::impl_test_batched_teamvectorgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( - 1024, dimM, dimK, dimK, dimN, dimM, dimN); + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamVectorGemm::impl_test_batched_teamvectorgemm(1024, dimM, dimK, dimK, dimN, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamVectorGemm::impl_test_batched_teamvectorgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( - 1024, dimM, dimK, dimN, dimK, dimM, dimN); + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamVectorGemm::impl_test_batched_teamvectorgemm(1024, dimM, dimK, dimN, dimK, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamVectorGemm::impl_test_batched_teamvectorgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( - 1024, dimK, dimM, dimK, dimN, dimM, dimN); + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamVectorGemm::impl_test_batched_teamvectorgemm(1024, dimK, dimM, dimK, dimN, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamVectorGemm::impl_test_batched_teamvectorgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( - 1024, dimK, dimM, dimN, dimK, dimM, dimN); + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamVectorGemm::impl_test_batched_teamvectorgemm(1024, dimK, dimM, dimN, dimK, dimM, dimN); } } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - Test::TeamVectorGemm::impl_test_batched_teamvectorgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( + typedef Kokkos::View ViewType; + Test::TeamVectorGemm::impl_test_batched_teamvectorgemm( 0, 10, 10, 10, 10, 10, 10); for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutRight, Blksize %d\n", i); - Test::TeamVectorGemm::impl_test_batched_teamvectorgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( - 1024, i, i, i, i, i, i); + Test::TeamVectorGemm::impl_test_batched_teamvectorgemm(1024, i, i, i, i, i, i); } for (int i = 0; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); int dimM = i; int dimN = 2 * i; int dimK = 3 * i; - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamVectorGemm::impl_test_batched_teamvectorgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( - 1024, dimM, dimK, dimK, dimN, dimM, dimN); + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamVectorGemm::impl_test_batched_teamvectorgemm(1024, dimM, dimK, dimK, dimN, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamVectorGemm::impl_test_batched_teamvectorgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( - 1024, dimM, dimK, dimN, dimK, dimM, dimN); + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamVectorGemm::impl_test_batched_teamvectorgemm(1024, dimM, dimK, dimN, dimK, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamVectorGemm::impl_test_batched_teamvectorgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( - 1024, dimK, dimM, dimK, dimN, dimM, dimN); + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamVectorGemm::impl_test_batched_teamvectorgemm(1024, dimK, dimM, dimK, dimN, dimM, dimN); } - if ((std::is_same::value) && - (std::is_same::value)) { - Test::TeamVectorGemm::impl_test_batched_teamvectorgemm< - DeviceType, ViewType, ScalarType, ParamTagType, AlgoTagType>( - 1024, dimK, dimM, dimN, dimK, dimM, dimN); + if ((std::is_same::value) && + (std::is_same::value)) { + Test::TeamVectorGemm::impl_test_batched_teamvectorgemm(1024, dimK, dimM, dimN, dimK, dimM, dimN); } } } diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGemm_Complex.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGemm_Complex.hpp index cc6cbdd5119f..3d8bd949da51 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGemm_Complex.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGemm_Complex.hpp @@ -15,80 +15,62 @@ //@HEADER #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) TEST_F(TestCategory, batched_scalar_team_vector_gemm_nt_nt_scomplex_scomplex) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm,Kokkos::complex,param_tag_type,Algo::Gemm::Blocked>(); - test_batched_teamvectorgemm, - Kokkos::complex, param_tag_type, + test_batched_teamvectorgemm, Kokkos::complex, param_tag_type, Algo::Gemm::Unblocked>(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_t_nt_scomplex_scomplex) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm,Kokkos::complex,param_tag_type,Algo::Gemm::Blocked>(); - test_batched_teamvectorgemm, - Kokkos::complex, param_tag_type, + test_batched_teamvectorgemm, Kokkos::complex, param_tag_type, Algo::Gemm::Unblocked>(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_nt_t_scomplex_scomplex) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm,Kokkos::complex,param_tag_type,Algo::Gemm::Blocked>(); - test_batched_teamvectorgemm, - Kokkos::complex, param_tag_type, + test_batched_teamvectorgemm, Kokkos::complex, param_tag_type, Algo::Gemm::Unblocked>(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_t_t_scomplex_scomplex) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm,Kokkos::complex,param_tag_type,Algo::Gemm::Blocked>(); - test_batched_teamvectorgemm, - Kokkos::complex, param_tag_type, + test_batched_teamvectorgemm, Kokkos::complex, param_tag_type, Algo::Gemm::Unblocked>(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) TEST_F(TestCategory, batched_scalar_team_vector_gemm_nt_nt_dcomplex_dcomplex) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm,Kokkos::complex,param_tag_type,Algo::Gemm::Blocked>(); - test_batched_teamvectorgemm, - Kokkos::complex, param_tag_type, + test_batched_teamvectorgemm, Kokkos::complex, param_tag_type, Algo::Gemm::Unblocked>(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_t_nt_dcomplex_dcomplex) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm,Kokkos::complex,param_tag_type,Algo::Gemm::Blocked>(); - test_batched_teamvectorgemm, - Kokkos::complex, param_tag_type, + test_batched_teamvectorgemm, Kokkos::complex, param_tag_type, Algo::Gemm::Unblocked>(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_nt_t_dcomplex_dcomplex) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm,Kokkos::complex,param_tag_type,Algo::Gemm::Blocked>(); - test_batched_teamvectorgemm, - Kokkos::complex, param_tag_type, + test_batched_teamvectorgemm, Kokkos::complex, param_tag_type, Algo::Gemm::Unblocked>(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_t_t_dcomplex_dcomplex) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm,Kokkos::complex,param_tag_type,Algo::Gemm::Blocked>(); - test_batched_teamvectorgemm, - Kokkos::complex, param_tag_type, + test_batched_teamvectorgemm, Kokkos::complex, param_tag_type, Algo::Gemm::Unblocked>(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGemm_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGemm_Real.hpp index e96bc1ac5c5a..74a32c13e90b 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGemm_Real.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGemm_Real.hpp @@ -15,152 +15,116 @@ //@HEADER #if defined(KOKKOS_BHALF_T_IS_FLOAT) TEST_F(TestCategory, batched_scalar_team_vector_gemm_nt_nt_bhalf_bhalf) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_t_nt_bhalf_bhalf) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_nt_t_bhalf_bhalf) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_t_t_bhalf_bhalf) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); } #endif // KOKKOS_BHALF_T_IS_FLOAT #if defined(KOKKOS_HALF_T_IS_FLOAT) TEST_F(TestCategory, batched_scalar_team_vector_gemm_nt_nt_half_half) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_t_nt_half_half) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_nt_t_half_half) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_t_t_half_half) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); } #endif // KOKKOS_HALF_T_IS_FLOAT #if defined(KOKKOSKERNELS_INST_FLOAT) TEST_F(TestCategory, batched_scalar_team_vector_gemm_nt_nt_float_float) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); + test_batched_teamvectorgemm(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_t_nt_float_float) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); + test_batched_teamvectorgemm(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_nt_t_float_float) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); + test_batched_teamvectorgemm(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_t_t_float_float) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); + test_batched_teamvectorgemm(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) TEST_F(TestCategory, batched_scalar_team_vector_gemm_nt_nt_double_double) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); + test_batched_teamvectorgemm(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_t_nt_double_double) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); + test_batched_teamvectorgemm(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_nt_t_double_double) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); + test_batched_teamvectorgemm(); } TEST_F(TestCategory, batched_scalar_team_vector_gemm_t_t_double_double) { - typedef ::Test::TeamVectorGemm::ParamTag - param_tag_type; + typedef ::Test::TeamVectorGemm::ParamTag param_tag_type; // test_batched_teamvectorgemm(); - test_batched_teamvectorgemm(); + test_batched_teamvectorgemm(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGesv.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGesv.hpp index ddb1a5c40d75..dba452da53e7 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGesv.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGesv.hpp @@ -32,8 +32,7 @@ using namespace KokkosBatched; namespace Test { namespace TeamVectorGesv { -template +template struct Functor_TestBatchedTeamVectorGesv { using execution_space = typename DeviceType::execution_space; const MatrixType _A; @@ -41,20 +40,18 @@ struct Functor_TestBatchedTeamVectorGesv { const VectorType _B; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorGesv(const MatrixType &A, const VectorType &X, - const VectorType &B) + Functor_TestBatchedTeamVectorGesv(const MatrixType &A, const VectorType &X, const VectorType &B) : _A(A), _X(X), _B(B) {} template KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { const int matrix_id = static_cast(member.league_rank()); - auto A = Kokkos::subview(_A, matrix_id, Kokkos::ALL, Kokkos::ALL); - auto x = Kokkos::subview(_X, matrix_id, Kokkos::ALL); - auto b = Kokkos::subview(_B, matrix_id, Kokkos::ALL); + auto A = Kokkos::subview(_A, matrix_id, Kokkos::ALL, Kokkos::ALL); + auto x = Kokkos::subview(_X, matrix_id, Kokkos::ALL); + auto b = Kokkos::subview(_B, matrix_id, Kokkos::ALL); member.team_barrier(); - KokkosBatched::TeamVectorGesv::invoke(member, A, x, - b); + KokkosBatched::TeamVectorGesv::invoke(member, A, x, b); member.team_barrier(); } @@ -64,13 +61,10 @@ struct Functor_TestBatchedTeamVectorGesv { const std::string name_value_type = Test::value_type_name(); std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); - Kokkos::TeamPolicy policy(_X.extent(0), Kokkos::AUTO(), - Kokkos::AUTO()); + Kokkos::TeamPolicy policy(_X.extent(0), Kokkos::AUTO(), Kokkos::AUTO()); - using MatrixViewType = - Kokkos::View; + using MatrixViewType = Kokkos::View; const int n = _A.extent(1); size_t bytes_0 = MatrixViewType::shmem_size(n, n + 4); @@ -81,15 +75,13 @@ struct Functor_TestBatchedTeamVectorGesv { } }; -template +template void impl_test_batched_gesv(const int N, const int BlkSize) { typedef typename MatrixType::value_type value_type; typedef Kokkos::ArithTraits ats; using MagnitudeType = typename Kokkos::ArithTraits::mag_type; - using NormViewType = - Kokkos::View; + using NormViewType = Kokkos::View; NormViewType sqr_norm_j("sqr_norm_j", N); auto sqr_norm_j_host = Kokkos::create_mirror_view(sqr_norm_j); @@ -110,23 +102,18 @@ void impl_test_batched_gesv(const int N, const int BlkSize) { Kokkos::fence(); - Functor_TestBatchedTeamVectorGesv(A, X, B) - .run(); + Functor_TestBatchedTeamVectorGesv(A, X, B).run(); Kokkos::fence(); Kokkos::deep_copy(X_host, X); for (int l = 0; l < N; ++l) - KokkosBlas::SerialGemv:: - invoke(-1, Kokkos::subview(A_host, l, Kokkos::ALL, Kokkos::ALL), - Kokkos::subview(X_host, l, Kokkos::ALL), 1, - Kokkos::subview(B_host, l, Kokkos::ALL)); + KokkosBlas::SerialGemv::invoke( + -1, Kokkos::subview(A_host, l, Kokkos::ALL, Kokkos::ALL), Kokkos::subview(X_host, l, Kokkos::ALL), 1, + Kokkos::subview(B_host, l, Kokkos::ALL)); - KokkosBatched::SerialDot::invoke(B_host, B_host, - sqr_norm_j_host); + KokkosBatched::SerialDot::invoke(B_host, B_host, sqr_norm_j_host); const MagnitudeType eps = 1.0e3 * ats::epsilon(); @@ -139,29 +126,21 @@ template int test_batched_teamvector_gesv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - MatrixType; - typedef Kokkos::View - VectorType; + typedef Kokkos::View MatrixType; + typedef Kokkos::View VectorType; for (int i = 3; i < 10; ++i) { - Test::TeamVectorGesv::impl_test_batched_gesv( - 1024, i); + Test::TeamVectorGesv::impl_test_batched_gesv(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - MatrixType; - typedef Kokkos::View - VectorType; + typedef Kokkos::View MatrixType; + typedef Kokkos::View VectorType; for (int i = 3; i < 10; ++i) { - Test::TeamVectorGesv::impl_test_batched_gesv( - 1024, i); + Test::TeamVectorGesv::impl_test_batched_gesv(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGesv_Real.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGesv_Real.hpp index 66c6fb3691f2..73a6281fe589 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGesv_Real.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorGesv_Real.hpp @@ -15,22 +15,18 @@ //@HEADER #if defined(KOKKOSKERNELS_INST_FLOAT) TEST_F(TestCategory, batched_scalar_teamvector_gesv_static_pivoting_float) { - test_batched_teamvector_gesv(); + test_batched_teamvector_gesv(); } TEST_F(TestCategory, batched_scalar_teamvector_gesv_no_pivoting_float) { - test_batched_teamvector_gesv(); + test_batched_teamvector_gesv(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) TEST_F(TestCategory, batched_scalar_teamvector_gesv_static_pivoting_double) { - test_batched_teamvector_gesv(); + test_batched_teamvector_gesv(); } TEST_F(TestCategory, batched_scalar_teamvector_gesv_no_pivoting_double) { - test_batched_teamvector_gesv(); + test_batched_teamvector_gesv(); } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorQR.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorQR.hpp index 84ccb396117d..2f4812179a8b 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorQR.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorQR.hpp @@ -32,8 +32,8 @@ using namespace KokkosBatched; namespace Test { -template +template struct Functor_TestBatchedTeamVectorQR { using execution_space = typename DeviceType::execution_space; MatrixViewType _a; @@ -41,11 +41,8 @@ struct Functor_TestBatchedTeamVectorQR { WorkViewType _w; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorQR(const MatrixViewType &a, - const VectorViewType &x, - const VectorViewType &b, - const VectorViewType &t, - const WorkViewType &w) + Functor_TestBatchedTeamVectorQR(const MatrixViewType &a, const VectorViewType &x, const VectorViewType &b, + const VectorViewType &t, const WorkViewType &w) : _a(a), _x(x), _b(b), _t(t), _w(w) {} template @@ -61,17 +58,15 @@ struct Functor_TestBatchedTeamVectorQR { auto ww = Kokkos::subview(_w, k, Kokkos::ALL()); // make diagonal dominant - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, aa.extent(0)), - [&](const int &i) { aa(i, i) += add_this; }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, aa.extent(0)), [&](const int &i) { aa(i, i) += add_this; }); /// xx = 1 KokkosBlas::TeamVectorSet::invoke(member, one, xx); member.team_barrier(); /// bb = AA*xx - KokkosBlas::TeamVectorGemv::invoke(member, one, aa, - xx, zero, bb); + KokkosBlas::TeamVectorGemv::invoke(member, one, aa, xx, zero, + bb); member.team_barrier(); /// AA = QR @@ -83,13 +78,12 @@ struct Functor_TestBatchedTeamVectorQR { member.team_barrier(); /// xx = Q^{T}xx; - TeamVectorApplyQ::invoke(member, aa, tt, xx, ww); + TeamVectorApplyQ::invoke(member, aa, tt, xx, ww); member.team_barrier(); /// xx = R^{-1} xx - TeamVectorTrsv::invoke(member, one, aa, xx); + TeamVectorTrsv::invoke( + member, one, aa, xx); } inline void run() { @@ -107,8 +101,8 @@ struct Functor_TestBatchedTeamVectorQR { } }; -template +template void impl_test_batched_qr(const int N, const int BlkSize) { typedef typename MatrixViewType::non_const_value_type value_type; typedef Kokkos::ArithTraits ats; @@ -122,14 +116,12 @@ void impl_test_batched_qr(const int N, const int BlkSize) { Kokkos::fence(); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a, random, value_type(1.0)); Kokkos::fence(); - Functor_TestBatchedTeamVectorQR(a, x, b, t, w) + Functor_TestBatchedTeamVectorQR(a, x, b, t, w) .run(); Kokkos::fence(); @@ -157,35 +149,25 @@ template int test_batched_qr() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - MatrixViewType; - typedef Kokkos::View - VectorViewType; - typedef Kokkos::View - WorkViewType; - Test::impl_test_batched_qr(0, 10); + typedef Kokkos::View MatrixViewType; + typedef Kokkos::View VectorViewType; + typedef Kokkos::View WorkViewType; + Test::impl_test_batched_qr(0, 10); for (int i = 1; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::impl_test_batched_qr(1024, i); + Test::impl_test_batched_qr(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - MatrixViewType; - typedef Kokkos::View - VectorViewType; - typedef Kokkos::View - WorkViewType; - Test::impl_test_batched_qr(0, 10); + typedef Kokkos::View MatrixViewType; + typedef Kokkos::View VectorViewType; + typedef Kokkos::View WorkViewType; + Test::impl_test_batched_qr(0, 10); for (int i = 1; i < 10; ++i) { // printf("Testing: LayoutRight, Blksize %d\n", i); - Test::impl_test_batched_qr(1024, i); + Test::impl_test_batched_qr(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorQR_WithColumnPivoting.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorQR_WithColumnPivoting.hpp index 09427aa25ef3..f66cebe07dd7 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorQR_WithColumnPivoting.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorQR_WithColumnPivoting.hpp @@ -32,8 +32,8 @@ using namespace KokkosBatched; namespace Test { -template +template struct Functor_TestBatchedTeamVectorQR_WithColumnPivoting { using execution_space = typename DeviceType::execution_space; MatrixViewType _a; @@ -42,9 +42,9 @@ struct Functor_TestBatchedTeamVectorQR_WithColumnPivoting { WorkViewType _w; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorQR_WithColumnPivoting( - const MatrixViewType &a, const VectorViewType &x, const VectorViewType &b, - const VectorViewType &t, const PivotViewType &p, const WorkViewType &w) + Functor_TestBatchedTeamVectorQR_WithColumnPivoting(const MatrixViewType &a, const VectorViewType &x, + const VectorViewType &b, const VectorViewType &t, + const PivotViewType &p, const WorkViewType &w) : _a(a), _x(x), _b(b), _t(t), _p(p), _w(w) {} template @@ -69,15 +69,13 @@ struct Functor_TestBatchedTeamVectorQR_WithColumnPivoting { member.team_barrier(); /// bb = AA*xx - KokkosBlas::TeamVectorGemv::invoke(member, one, aa, - xx, zero, bb); + KokkosBlas::TeamVectorGemv::invoke(member, one, aa, xx, zero, + bb); member.team_barrier(); /// AA P^T = QR int matrix_rank(0); - TeamVectorQR_WithColumnPivoting::invoke( - member, aa, tt, pp, ww, matrix_rank); + TeamVectorQR_WithColumnPivoting::invoke(member, aa, tt, pp, ww, matrix_rank); member.team_barrier(); /// xx = bb; @@ -85,25 +83,22 @@ struct Functor_TestBatchedTeamVectorQR_WithColumnPivoting { member.team_barrier(); /// xx = Q^{T} xx; - TeamVectorApplyQ::invoke(member, aa, tt, xx, ww); + TeamVectorApplyQ::invoke(member, aa, tt, xx, ww); member.team_barrier(); /// xx = R^{-1} xx - TeamVectorTrsv::invoke(member, one, aa, xx); + TeamVectorTrsv::invoke( + member, one, aa, xx); member.team_barrier(); /// xx = P xx - TeamVectorApplyPivot::invoke( - member, pp, xx); + TeamVectorApplyPivot::invoke(member, pp, xx); member.team_barrier(); } inline void run() { typedef typename MatrixViewType::non_const_value_type value_type; - std::string name_region( - "KokkosBatched::Test::TeamVectorQR_WithColumnPivoting"); + std::string name_region("KokkosBatched::Test::TeamVectorQR_WithColumnPivoting"); const std::string name_value_type = Test::value_type_name(); std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); @@ -116,8 +111,8 @@ struct Functor_TestBatchedTeamVectorQR_WithColumnPivoting { } }; -template +template void impl_test_batched_qr_with_columnpivoting(const int N, const int BlkSize) { typedef typename MatrixViewType::non_const_value_type value_type; typedef Kokkos::ArithTraits ats; @@ -132,15 +127,13 @@ void impl_test_batched_qr_with_columnpivoting(const int N, const int BlkSize) { Kokkos::fence(); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a, random, value_type(1.0)); Kokkos::fence(); - Functor_TestBatchedTeamVectorQR_WithColumnPivoting< - DeviceType, MatrixViewType, VectorViewType, PivotViewType, WorkViewType, - AlgoTagType>(a, x, b, t, p, w) + Functor_TestBatchedTeamVectorQR_WithColumnPivoting(a, x, b, t, p, w) .run(); Kokkos::fence(); @@ -164,48 +157,35 @@ void impl_test_batched_qr_with_columnpivoting(const int N, const int BlkSize) { } } // namespace Test -template +template int test_batched_qr_with_columnpivoting() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - MatrixViewType; - typedef Kokkos::View - VectorViewType; - typedef Kokkos::View - PivotViewType; - typedef Kokkos::View - WorkViewType; - Test::impl_test_batched_qr_with_columnpivoting< - DeviceType, MatrixViewType, VectorViewType, PivotViewType, WorkViewType, - AlgoTagType>(0, 10); + typedef Kokkos::View MatrixViewType; + typedef Kokkos::View VectorViewType; + typedef Kokkos::View PivotViewType; + typedef Kokkos::View WorkViewType; + Test::impl_test_batched_qr_with_columnpivoting(0, 10); for (int i = 1; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::impl_test_batched_qr_with_columnpivoting< - DeviceType, MatrixViewType, VectorViewType, PivotViewType, - WorkViewType, AlgoTagType>(1024, i); + Test::impl_test_batched_qr_with_columnpivoting(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - MatrixViewType; - typedef Kokkos::View - VectorViewType; - typedef Kokkos::View - PivotViewType; - typedef Kokkos::View - WorkViewType; - Test::impl_test_batched_qr_with_columnpivoting< - DeviceType, MatrixViewType, VectorViewType, PivotViewType, WorkViewType, - AlgoTagType>(0, 10); + typedef Kokkos::View MatrixViewType; + typedef Kokkos::View VectorViewType; + typedef Kokkos::View PivotViewType; + typedef Kokkos::View WorkViewType; + Test::impl_test_batched_qr_with_columnpivoting(0, 10); for (int i = 1; i < 10; ++i) { // printf("Testing: LayoutRight, Blksize %d\n", i); - Test::impl_test_batched_qr_with_columnpivoting< - DeviceType, MatrixViewType, VectorViewType, PivotViewType, - WorkViewType, AlgoTagType>(1024, i); + Test::impl_test_batched_qr_with_columnpivoting(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorSolveUTV.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorSolveUTV.hpp index 2f30c7d3c175..fdf482b4ab91 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorSolveUTV.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorSolveUTV.hpp @@ -32,8 +32,8 @@ using namespace KokkosBatched; namespace Test { -template +template struct Functor_TestBatchedTeamVectorSolveUTV { using execution_space = typename DeviceType::execution_space; MatrixViewType _r, _a, _acopy, _u, _v; @@ -42,11 +42,9 @@ struct Functor_TestBatchedTeamVectorSolveUTV { WorkViewType _w; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorSolveUTV( - const MatrixViewType &r, const MatrixViewType &a, - const MatrixViewType &acopy, const MatrixViewType &u, - const MatrixViewType &v, const PivViewType &p, const VectorViewType &x, - const VectorViewType &b, const WorkViewType &w) + Functor_TestBatchedTeamVectorSolveUTV(const MatrixViewType &r, const MatrixViewType &a, const MatrixViewType &acopy, + const MatrixViewType &u, const MatrixViewType &v, const PivViewType &p, + const VectorViewType &x, const VectorViewType &b, const WorkViewType &w) : _r(r), _a(a), _acopy(acopy), _u(u), _v(v), _p(p), _x(x), _b(b), _w(w) {} template @@ -72,22 +70,18 @@ struct Functor_TestBatchedTeamVectorSolveUTV { // make diagonal dominant and set xx = 1,2,3,4,5 const int m = aa.extent(0), r = rr.extent(1); if (m <= r) { - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), - [&](const int &i) { - aa(i, i) += add_this; - xx(i) = (i + 1); - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int &i) { + aa(i, i) += add_this; + xx(i) = (i + 1); + }); } else { - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m * m), - [=](const int &ij) { - const int i = ij / m, j = ij % m; - value_type tmp(0); - for (int l = 0; l < r; ++l) - tmp += rr(i, l) * rr(j, l); - aa(i, j) = tmp; - }); - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), - [&](const int &i) { xx(i) = (i + 1); }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m * m), [=](const int &ij) { + const int i = ij / m, j = ij % m; + value_type tmp(0); + for (int l = 0; l < r; ++l) tmp += rr(i, l) * rr(j, l); + aa(i, j) = tmp; + }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int &i) { xx(i) = (i + 1); }); } member.team_barrier(); // finish writing aa, xx @@ -95,9 +89,8 @@ struct Functor_TestBatchedTeamVectorSolveUTV { TeamVectorCopy::invoke(member, aa, ac); /// bb = AA*xx - KokkosBlas::TeamVectorGemv::invoke(member, one, aa, - xx, zero, bb); + KokkosBlas::TeamVectorGemv::invoke(member, one, aa, xx, zero, + bb); member.team_barrier(); /// Solving Ax = b using UTV transformation @@ -106,12 +99,10 @@ struct Functor_TestBatchedTeamVectorSolveUTV { /// UTV = A P^T int matrix_rank(0); - TeamVectorUTV::invoke(member, aa, pp, uu, vv, ww, - matrix_rank); + TeamVectorUTV::invoke(member, aa, pp, uu, vv, ww, matrix_rank); member.team_barrier(); - TeamVectorSolveUTV::invoke(member, matrix_rank, uu, - aa, vv, pp, xx, bb, ww); + TeamVectorSolveUTV::invoke(member, matrix_rank, uu, aa, vv, pp, xx, bb, ww); } inline void run() { @@ -129,8 +120,8 @@ struct Functor_TestBatchedTeamVectorSolveUTV { } }; -template +template void impl_test_batched_solve_utv(const int N, const int BlkSize) { typedef typename MatrixViewType::non_const_value_type value_type; typedef Kokkos::ArithTraits ats; @@ -148,8 +139,7 @@ void impl_test_batched_solve_utv(const int N, const int BlkSize) { Kokkos::fence(); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); if (BlkSize <= 3) Kokkos::fill_random(a, random, value_type(1.0)); else @@ -157,10 +147,8 @@ void impl_test_batched_solve_utv(const int N, const int BlkSize) { Kokkos::fence(); - Functor_TestBatchedTeamVectorSolveUTV( - r, a, acopy, u, v, p, x, b, w) + Functor_TestBatchedTeamVectorSolveUTV(r, a, acopy, u, v, p, x, b, w) .run(); Kokkos::fence(); @@ -203,48 +191,35 @@ void impl_test_batched_solve_utv(const int N, const int BlkSize) { } } // namespace Test -template +template int test_batched_solve_utv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - MatrixViewType; - typedef Kokkos::View - VectorViewType; - typedef Kokkos::View - PivViewType; - typedef Kokkos::View - WorkViewType; - Test::impl_test_batched_solve_utv MatrixViewType; + typedef Kokkos::View VectorViewType; + typedef Kokkos::View PivViewType; + typedef Kokkos::View WorkViewType; + Test::impl_test_batched_solve_utv(0, 10); for (int i = 1; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::impl_test_batched_solve_utv(1024, i); + Test::impl_test_batched_solve_utv(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - MatrixViewType; - typedef Kokkos::View - VectorViewType; - typedef Kokkos::View - PivViewType; - typedef Kokkos::View - WorkViewType; - Test::impl_test_batched_solve_utv MatrixViewType; + typedef Kokkos::View VectorViewType; + typedef Kokkos::View PivViewType; + typedef Kokkos::View WorkViewType; + Test::impl_test_batched_solve_utv(0, 10); for (int i = 1; i < 10; ++i) { // printf("Testing: LayoutRight, Blksize %d\n", i); - Test::impl_test_batched_solve_utv(1024, i); + Test::impl_test_batched_solve_utv(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorSolveUTV2.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorSolveUTV2.hpp index cf7084a92cbf..b38fb318e610 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorSolveUTV2.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorSolveUTV2.hpp @@ -32,8 +32,8 @@ using namespace KokkosBatched; namespace Test { -template +template struct Functor_TestBatchedTeamVectorSolveUTV2 { using execution_space = typename DeviceType::execution_space; MatrixViewType _r, _a, _acopy, _u, _v; @@ -42,11 +42,9 @@ struct Functor_TestBatchedTeamVectorSolveUTV2 { WorkViewType _w; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorSolveUTV2( - const MatrixViewType &r, const MatrixViewType &a, - const MatrixViewType &acopy, const MatrixViewType &u, - const MatrixViewType &v, const PivViewType &p, const VectorViewType &x, - const VectorViewType &b, const WorkViewType &w) + Functor_TestBatchedTeamVectorSolveUTV2(const MatrixViewType &r, const MatrixViewType &a, const MatrixViewType &acopy, + const MatrixViewType &u, const MatrixViewType &v, const PivViewType &p, + const VectorViewType &x, const VectorViewType &b, const WorkViewType &w) : _r(r), _a(a), _acopy(acopy), _u(u), _v(v), _p(p), _x(x), _b(b), _w(w) {} template @@ -72,24 +70,20 @@ struct Functor_TestBatchedTeamVectorSolveUTV2 { // make diagonal dominant and set xx = 1,2,3,4,5 const int m = aa.extent(0), r = rr.extent(1); if (m <= r) { - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), - [&](const int &i) { - aa(i, i) += add_this; - for (int j = 0; j < 2; ++j) xx(i, j) = (i + 1); - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int &i) { + aa(i, i) += add_this; + for (int j = 0; j < 2; ++j) xx(i, j) = (i + 1); + }); } else { - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m * m), - [=](const int &ij) { - const int i = ij / m, j = ij % m; - value_type tmp(0); - for (int l = 0; l < r; ++l) - tmp += rr(i, l) * rr(j, l); - aa(i, j) = tmp; - }); - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), - [&](const int &i) { - for (int j = 0; j < 2; ++j) xx(i, j) = (i + 1); - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m * m), [=](const int &ij) { + const int i = ij / m, j = ij % m; + value_type tmp(0); + for (int l = 0; l < r; ++l) tmp += rr(i, l) * rr(j, l); + aa(i, j) = tmp; + }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int &i) { + for (int j = 0; j < 2; ++j) xx(i, j) = (i + 1); + }); } member.team_barrier(); // finish writing aa, xx @@ -97,11 +91,8 @@ struct Functor_TestBatchedTeamVectorSolveUTV2 { TeamVectorCopy::invoke(member, aa, ac); /// bb = AA*xx - KokkosBatched::TeamVectorGemm::invoke(member, one, - aa, xx, zero, - bb); + KokkosBatched::TeamVectorGemm::invoke( + member, one, aa, xx, zero, bb); member.team_barrier(); /// Solving Ax = b using UTV transformation @@ -110,12 +101,10 @@ struct Functor_TestBatchedTeamVectorSolveUTV2 { /// UTV = A P^T int matrix_rank(0); - TeamVectorUTV::invoke(member, aa, pp, uu, vv, ww, - matrix_rank); + TeamVectorUTV::invoke(member, aa, pp, uu, vv, ww, matrix_rank); member.team_barrier(); - TeamVectorSolveUTV::invoke(member, matrix_rank, uu, - aa, vv, pp, xx, bb, ww); + TeamVectorSolveUTV::invoke(member, matrix_rank, uu, aa, vv, pp, xx, bb, ww); } inline void run() { @@ -133,8 +122,8 @@ struct Functor_TestBatchedTeamVectorSolveUTV2 { } }; -template +template void impl_test_batched_solve_utv2(const int N, const int BlkSize) { typedef typename MatrixViewType::non_const_value_type value_type; typedef Kokkos::ArithTraits ats; @@ -152,8 +141,7 @@ void impl_test_batched_solve_utv2(const int N, const int BlkSize) { Kokkos::fence(); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); if (BlkSize <= 3) Kokkos::fill_random(a, random, value_type(1.0)); else @@ -161,10 +149,8 @@ void impl_test_batched_solve_utv2(const int N, const int BlkSize) { Kokkos::fence(); - Functor_TestBatchedTeamVectorSolveUTV2( - r, a, acopy, u, v, p, x, b, w) + Functor_TestBatchedTeamVectorSolveUTV2(r, a, acopy, u, v, p, x, b, w) .run(); Kokkos::fence(); @@ -210,48 +196,35 @@ void impl_test_batched_solve_utv2(const int N, const int BlkSize) { } } // namespace Test -template +template int test_batched_solve_utv2() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - MatrixViewType; - typedef Kokkos::View - VectorViewType; - typedef Kokkos::View - PivViewType; - typedef Kokkos::View - WorkViewType; - Test::impl_test_batched_solve_utv2(0, 10); + typedef Kokkos::View MatrixViewType; + typedef Kokkos::View VectorViewType; + typedef Kokkos::View PivViewType; + typedef Kokkos::View WorkViewType; + Test::impl_test_batched_solve_utv2(0, 10); for (int i = 1; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::impl_test_batched_solve_utv2(1024, i); + Test::impl_test_batched_solve_utv2(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - MatrixViewType; - typedef Kokkos::View - VectorViewType; - typedef Kokkos::View - PivViewType; - typedef Kokkos::View - WorkViewType; - Test::impl_test_batched_solve_utv2(0, 10); + typedef Kokkos::View MatrixViewType; + typedef Kokkos::View VectorViewType; + typedef Kokkos::View PivViewType; + typedef Kokkos::View WorkViewType; + Test::impl_test_batched_solve_utv2(0, 10); for (int i = 1; i < 10; ++i) { // printf("Testing: LayoutRight, Blksize %d\n", i); - Test::impl_test_batched_solve_utv2(1024, i); + Test::impl_test_batched_solve_utv2(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorUTV.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorUTV.hpp index eb45a70c8952..44f6ec394a69 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorUTV.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorUTV.hpp @@ -31,8 +31,8 @@ using namespace KokkosBatched; namespace Test { -template +template struct Functor_TestBatchedTeamVectorUTV { using execution_space = typename DeviceType::execution_space; MatrixViewType _r, _a, _acopy, _u, _v; @@ -41,11 +41,9 @@ struct Functor_TestBatchedTeamVectorUTV { WorkViewType _w; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorUTV( - const MatrixViewType &r, const MatrixViewType &a, - const MatrixViewType &acopy, const MatrixViewType &u, - const MatrixViewType &v, const PivViewType &p, const VectorViewType &x, - const VectorViewType &b, const WorkViewType &w) + Functor_TestBatchedTeamVectorUTV(const MatrixViewType &r, const MatrixViewType &a, const MatrixViewType &acopy, + const MatrixViewType &u, const MatrixViewType &v, const PivViewType &p, + const VectorViewType &x, const VectorViewType &b, const WorkViewType &w) : _r(r), _a(a), _acopy(acopy), _u(u), _v(v), _p(p), _x(x), _b(b), _w(w) {} template @@ -71,22 +69,18 @@ struct Functor_TestBatchedTeamVectorUTV { // make diagonal dominant and set xx = 1,2,3,4,5 const int m = aa.extent(0), r = rr.extent(1); if (m <= r) { - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), - [&](const int &i) { - aa(i, i) += add_this; - xx(i) = (i + 1); - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int &i) { + aa(i, i) += add_this; + xx(i) = (i + 1); + }); } else { - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m * m), - [=](const int &ij) { - const int i = ij / m, j = ij % m; - value_type tmp(0); - for (int l = 0; l < r; ++l) - tmp += rr(i, l) * rr(j, l); - aa(i, j) = tmp; - }); - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), - [&](const int &i) { xx(i) = (i + 1); }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m * m), [=](const int &ij) { + const int i = ij / m, j = ij % m; + value_type tmp(0); + for (int l = 0; l < r; ++l) tmp += rr(i, l) * rr(j, l); + aa(i, j) = tmp; + }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int &i) { xx(i) = (i + 1); }); } member.team_barrier(); // finish writing aa, xx @@ -94,9 +88,8 @@ struct Functor_TestBatchedTeamVectorUTV { TeamVectorCopy::invoke(member, aa, ac); /// bb = AA*xx - KokkosBlas::TeamVectorGemv::invoke(member, one, aa, - xx, zero, bb); + KokkosBlas::TeamVectorGemv::invoke(member, one, aa, xx, zero, + bb); member.team_barrier(); /// Solving Ax = b using UTV transformation @@ -105,46 +98,41 @@ struct Functor_TestBatchedTeamVectorUTV { /// UTV = A P^T int matrix_rank(0); - TeamVectorUTV::invoke(member, aa, pp, uu, vv, ww, - matrix_rank); + TeamVectorUTV::invoke(member, aa, pp, uu, vv, ww, matrix_rank); member.team_barrier(); const auto range_upto_rank = Kokkos::pair(0, matrix_rank); - auto um = Kokkos::subview(uu, Kokkos::ALL(), range_upto_rank); - auto am = Kokkos::subview(aa, range_upto_rank, range_upto_rank); - auto vm = Kokkos::subview(vv, range_upto_rank, Kokkos::ALL()); + auto um = Kokkos::subview(uu, Kokkos::ALL(), range_upto_rank); + auto am = Kokkos::subview(aa, range_upto_rank, range_upto_rank); + auto vm = Kokkos::subview(vv, range_upto_rank, Kokkos::ALL()); if (matrix_rank < m) { /// w = U^T b - KokkosBlas::TeamVectorGemv::invoke(member, one, um, - bb, zero, ww); + KokkosBlas::TeamVectorGemv::invoke(member, one, um, bb, zero, + ww); member.team_barrier(); /// w = T^{-1} w - TeamVectorTrsv::invoke(member, one, am, ww); + TeamVectorTrsv::invoke( + member, one, am, ww); member.team_barrier(); /// x = V^T w - KokkosBlas::TeamVectorGemv::invoke(member, one, vm, - ww, zero, xx); + KokkosBlas::TeamVectorGemv::invoke(member, one, vm, ww, zero, + xx); member.team_barrier(); } else { /// x = U^T b - KokkosBlas::TeamVectorGemv::invoke(member, one, um, - bb, zero, xx); + KokkosBlas::TeamVectorGemv::invoke(member, one, um, bb, zero, + xx); member.team_barrier(); /// x = T^{-1} x - TeamVectorTrsv::invoke(member, one, am, xx); + TeamVectorTrsv::invoke( + member, one, am, xx); member.team_barrier(); } /// x = P^T x - TeamVectorApplyPivot::invoke( - member, pp, xx); + TeamVectorApplyPivot::invoke(member, pp, xx); member.team_barrier(); } @@ -163,8 +151,8 @@ struct Functor_TestBatchedTeamVectorUTV { } }; -template +template void impl_test_batched_utv(const int N, const int BlkSize) { typedef typename MatrixViewType::non_const_value_type value_type; typedef Kokkos::ArithTraits ats; @@ -182,8 +170,7 @@ void impl_test_batched_utv(const int N, const int BlkSize) { Kokkos::fence(); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); if (BlkSize <= 3) Kokkos::fill_random(a, random, value_type(1.0)); else @@ -191,8 +178,7 @@ void impl_test_batched_utv(const int N, const int BlkSize) { Kokkos::fence(); - Functor_TestBatchedTeamVectorUTV( + Functor_TestBatchedTeamVectorUTV( r, a, acopy, u, v, p, x, b, w) .run(); @@ -236,46 +222,35 @@ void impl_test_batched_utv(const int N, const int BlkSize) { } } // namespace Test -template +template int test_batched_utv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - MatrixViewType; - typedef Kokkos::View - VectorViewType; - typedef Kokkos::View - PivViewType; - typedef Kokkos::View - WorkViewType; - Test::impl_test_batched_utv(0, 10); + typedef Kokkos::View MatrixViewType; + typedef Kokkos::View VectorViewType; + typedef Kokkos::View PivViewType; + typedef Kokkos::View WorkViewType; + Test::impl_test_batched_utv(0, + 10); for (int i = 1; i < 10; ++i) { // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::impl_test_batched_utv(1024, - i); + Test::impl_test_batched_utv( + 1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - MatrixViewType; - typedef Kokkos::View - VectorViewType; - typedef Kokkos::View - PivViewType; - typedef Kokkos::View - WorkViewType; - Test::impl_test_batched_utv(0, 10); + typedef Kokkos::View MatrixViewType; + typedef Kokkos::View VectorViewType; + typedef Kokkos::View PivViewType; + typedef Kokkos::View WorkViewType; + Test::impl_test_batched_utv(0, + 10); for (int i = 1; i < 10; ++i) { // printf("Testing: LayoutRight, Blksize %d\n", i); - Test::impl_test_batched_utv(1024, - i); + Test::impl_test_batched_utv( + 1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorArithmatic.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorArithmatic.hpp index 9d1205717f2a..654d19911781 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorArithmatic.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorArithmatic.hpp @@ -21,10 +21,8 @@ // to ensure it is not included in these // backends unit-test -#if !defined(TEST_CUDA_BATCHED_DENSE_CPP) && \ - !defined(TEST_HIP_BATCHED_DENSE_CPP) && \ - !defined(TEST_SYCL_BATCHED_DENSE_CPP) && \ - !defined(TEST_OPENMPTARGET_BATCHED_DENSE_CPP) +#if !defined(TEST_CUDA_BATCHED_DENSE_CPP) && !defined(TEST_HIP_BATCHED_DENSE_CPP) && \ + !defined(TEST_SYCL_BATCHED_DENSE_CPP) && !defined(TEST_OPENMPTARGET_BATCHED_DENSE_CPP) #include "gtest/gtest.h" #include "Kokkos_Core.hpp" @@ -95,132 +93,91 @@ void impl_test_batched_vector_arithmatic() { { /// test : vec + vec c = a + b; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] + b[k]), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] + b[k]), eps * ats::abs(c[k])); /// test : value + vec c = alpha + b; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(alpha + b[k]), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(alpha + b[k]), eps * ats::abs(c[k])); /// test : vec + value c = b + alpha; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(b[k] + alpha), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(b[k] + alpha), eps * ats::abs(c[k])); /// test : vec + mag c = a + beta; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] + beta), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] + beta), eps * ats::abs(c[k])); /// test : mag + vec c = beta + a; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(beta + a[k]), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(beta + a[k]), eps * ats::abs(c[k])); } { /// test : vec - vec c = a - b; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] - b[k]), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] - b[k]), eps * ats::abs(c[k])); /// test : value - vec c = alpha - b; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(alpha - b[k]), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(alpha - b[k]), eps * ats::abs(c[k])); /// test : vec + value c = b - alpha; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(b[k] - alpha), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(b[k] - alpha), eps * ats::abs(c[k])); /// test : vec - mag c = a - beta; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] - beta), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] - beta), eps * ats::abs(c[k])); /// test : mag - vec c = beta - a; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(beta - a[k]), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(beta - a[k]), eps * ats::abs(c[k])); } { /// test : vec * vec c = a * b; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] * b[k]), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] * b[k]), eps * ats::abs(c[k])); /// test : value * vec c = alpha * b; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(alpha * b[k]), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(alpha * b[k]), eps * ats::abs(c[k])); /// test : vec + value c = b * alpha; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(b[k] * alpha), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(b[k] * alpha), eps * ats::abs(c[k])); /// test : vec * mag c = a * beta; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] * beta), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] * beta), eps * ats::abs(c[k])); /// test : mag * vec c = beta * a; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(beta * a[k]), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(beta * a[k]), eps * ats::abs(c[k])); } { /// test : vec / vec c = a / b; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] / b[k]), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] / b[k]), eps * ats::abs(c[k])); /// test : value / vec c = alpha / b; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(alpha / b[k]), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(alpha / b[k]), eps * ats::abs(c[k])); /// test : vec / value c = b / alpha; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(b[k] / alpha), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(b[k] / alpha), eps * ats::abs(c[k])); /// test : mag / vec c = beta / a; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(beta / a[k]), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(beta / a[k]), eps * ats::abs(c[k])); /// test : vec / value c = a / beta; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] / beta), - eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] / beta), eps * ats::abs(c[k])); } { /// test : vec -vec c = -a; - for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(-a[k]), eps * ats::abs(c[k])); + for (int k = 0; k < vector_length; ++k) EXPECT_NEAR(ats::abs(c[k]), ats::abs(-a[k]), eps * ats::abs(c[k])); } #if defined(__DO_NOT_TEST__) { @@ -232,8 +189,7 @@ void impl_test_batched_vector_arithmatic() { c += vector_type(tiny) * vector_type(a >= 0); for (int k = 0; k < vector_length; ++k) - EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] < 0 ? -tiny : tiny), - eps * ats::abs(c[k])); + EXPECT_NEAR(ats::abs(c[k]), ats::abs(a[k] < 0 ? -tiny : tiny), eps * ats::abs(c[k])); } #endif } @@ -242,18 +198,16 @@ void impl_test_batched_vector_arithmatic() { template int test_batched_vector_arithmatic() { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "vector datatype is only tested on host space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "vector datatype is only tested on host space"); Test::impl_test_batched_vector_arithmatic(); return 0; } template int test_batched_complex_real_imag_value() { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "vector datatype is only tested on host space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "vector datatype is only tested on host space"); Test::impl_test_complex_real_imag_value(); return 0; @@ -297,65 +251,53 @@ TEST_F(TestCategory, batched_vector_arithmatic_simd_double8) { #define __DO_NOT_TEST__ #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) TEST_F(TestCategory, batched_vector_arithmatic_simd_scomplex3) { - test_batched_vector_arithmatic >, - 3>(); + test_batched_vector_arithmatic >, 3>(); } // avx TEST_F(TestCategory, batched_vector_arithmatic_simd_scomplex4) { - test_batched_vector_arithmatic >, - 4>(); + test_batched_vector_arithmatic >, 4>(); } // avx 512 TEST_F(TestCategory, batched_vector_arithmatic_simd_scomplex8) { - test_batched_vector_arithmatic >, - 8>(); + test_batched_vector_arithmatic >, 8>(); } TEST_F(TestCategory, batched_vector_scomplex_real_imag_value3) { - test_batched_complex_real_imag_value >, 3>(); + test_batched_complex_real_imag_value >, 3>(); } // avx TEST_F(TestCategory, batched_vector_scomplex_real_imag_value2) { - test_batched_complex_real_imag_value >, 2>(); + test_batched_complex_real_imag_value >, 2>(); } // avx 512 TEST_F(TestCategory, batched_vector_scomplex_real_imag_value4) { - test_batched_complex_real_imag_value >, 4>(); + test_batched_complex_real_imag_value >, 4>(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) TEST_F(TestCategory, batched_vector_arithmatic_simd_dcomplex3) { - test_batched_vector_arithmatic >, - 3>(); + test_batched_vector_arithmatic >, 3>(); } // avx TEST_F(TestCategory, batched_vector_arithmatic_simd_dcomplex2) { - test_batched_vector_arithmatic >, - 2>(); + test_batched_vector_arithmatic >, 2>(); } // avx 512 TEST_F(TestCategory, batched_vector_arithmatic_simd_dcomplex4) { - test_batched_vector_arithmatic >, - 4>(); + test_batched_vector_arithmatic >, 4>(); } TEST_F(TestCategory, batched_vector_dcomplex_real_imag_value3) { - test_batched_complex_real_imag_value >, 3>(); + test_batched_complex_real_imag_value >, 3>(); } // avx TEST_F(TestCategory, batched_vector_dcomplex_real_imag_value2) { - test_batched_complex_real_imag_value >, 2>(); + test_batched_complex_real_imag_value >, 2>(); } // avx 512 TEST_F(TestCategory, batched_vector_dcomplex_real_imag_value4) { - test_batched_complex_real_imag_value >, 4>(); + test_batched_complex_real_imag_value >, 4>(); } #endif #undef __DO_NOT_TEST__ diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorLogical.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorLogical.hpp index 5ab10bb5bd41..0427982a424e 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorLogical.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorLogical.hpp @@ -21,10 +21,8 @@ // to ensure it is not included in these // backends unit-test -#if !defined(TEST_CUDA_BATCHED_DENSE_CPP) && \ - !defined(TEST_HIP_BATCHED_DENSE_CPP) && \ - !defined(TEST_SYCL_BATCHED_DENSE_CPP) && \ - !defined(TEST_OPENMPTARGET_BATCHED_DENSE_CPP) +#if !defined(TEST_CUDA_BATCHED_DENSE_CPP) && !defined(TEST_HIP_BATCHED_DENSE_CPP) && \ + !defined(TEST_SYCL_BATCHED_DENSE_CPP) && !defined(TEST_OPENMPTARGET_BATCHED_DENSE_CPP) #include "gtest/gtest.h" #include "Kokkos_Core.hpp" @@ -59,33 +57,30 @@ void impl_test_batched_vector_logical() { { #undef CHECK -#define CHECK(op) \ - { \ - const auto comparison = a op b; \ - for (int i = 0; i < vector_length; ++i) \ - EXPECT_EQ(comparison[i], a[i] op b[i]); \ +#define CHECK(op) \ + { \ + const auto comparison = a op b; \ + for (int i = 0; i < vector_length; ++i) EXPECT_EQ(comparison[i], a[i] op b[i]); \ } CHECK(||); CHECK(&&); #undef CHECK -#define CHECK(op) \ - { \ - const auto comparison = a op 0; \ - for (int i = 0; i < vector_length; ++i) \ - EXPECT_EQ(comparison[i], a[i] op 0); \ +#define CHECK(op) \ + { \ + const auto comparison = a op 0; \ + for (int i = 0; i < vector_length; ++i) EXPECT_EQ(comparison[i], a[i] op 0); \ } CHECK(||); CHECK(&&); #undef CHECK -#define CHECK(op) \ - { \ - const auto comparison = 0 op b; \ - for (int i = 0; i < vector_length; ++i) \ - EXPECT_EQ(comparison[i], 0 op b[i]); \ +#define CHECK(op) \ + { \ + const auto comparison = 0 op b; \ + for (int i = 0; i < vector_length; ++i) EXPECT_EQ(comparison[i], 0 op b[i]); \ } CHECK(||); @@ -100,9 +95,8 @@ void impl_test_batched_vector_logical() { template int test_batched_vector_logical() { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "vector datatype is only tested on host space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "vector datatype is only tested on host space"); Test::impl_test_batched_vector_logical(); return 0; @@ -113,21 +107,13 @@ int test_batched_vector_logical() { /// #if defined(KOKKOSKERNELS_INST_FLOAT) -TEST_F(TestCategory, batched_vector_logical_simd_float3) { - test_batched_vector_logical(); -} -TEST_F(TestCategory, batched_vector_logical_simd_float8) { - test_batched_vector_logical(); -} +TEST_F(TestCategory, batched_vector_logical_simd_float3) { test_batched_vector_logical(); } +TEST_F(TestCategory, batched_vector_logical_simd_float8) { test_batched_vector_logical(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) -TEST_F(TestCategory, batched_vector_logical_simd_double3) { - test_batched_vector_logical(); -} -TEST_F(TestCategory, batched_vector_logical_simd_double4) { - test_batched_vector_logical(); -} +TEST_F(TestCategory, batched_vector_logical_simd_double3) { test_batched_vector_logical(); } +TEST_F(TestCategory, batched_vector_logical_simd_double4) { test_batched_vector_logical(); } #endif // #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorMath.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorMath.hpp index 02c943d58740..2cd9f02a497f 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorMath.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorMath.hpp @@ -21,10 +21,8 @@ // to ensure it is not included in these // backends unit-test -#if !defined(TEST_CUDA_BATCHED_DENSE_CPP) && \ - !defined(TEST_HIP_BATCHED_DENSE_CPP) && \ - !defined(TEST_SYCL_BATCHED_DENSE_CPP) && \ - !defined(TEST_OPENMPTARGET_BATCHED_DENSE_CPP) +#if !defined(TEST_CUDA_BATCHED_DENSE_CPP) && !defined(TEST_HIP_BATCHED_DENSE_CPP) && \ + !defined(TEST_SYCL_BATCHED_DENSE_CPP) && !defined(TEST_OPENMPTARGET_BATCHED_DENSE_CPP) #include "gtest/gtest.h" #include "Kokkos_Core.hpp" @@ -67,11 +65,10 @@ void impl_test_batched_vector_math() { { #undef CHECK -#define CHECK(op) \ - { \ - a = op(aref); \ - for (int i = 0; i < vector_length; ++i) \ - EXPECT_NEAR_KK(a[i], ats::op(aref[i]), eps* a[i]); \ +#define CHECK(op) \ + { \ + a = op(aref); \ + for (int i = 0; i < vector_length; ++i) EXPECT_NEAR_KK(a[i], ats::op(aref[i]), eps* a[i]); \ } CHECK(sqrt); @@ -89,32 +86,29 @@ void impl_test_batched_vector_math() { CHECK(atan); #undef CHECK -#define CHECK \ - { \ - a = pow(aref, bref); \ - for (int i = 0; i < vector_length; ++i) \ - EXPECT_NEAR_KK(a[i], ats::pow(aref[i], bref[i]), eps* a[i]); \ - } \ +#define CHECK \ + { \ + a = pow(aref, bref); \ + for (int i = 0; i < vector_length; ++i) EXPECT_NEAR_KK(a[i], ats::pow(aref[i], bref[i]), eps* a[i]); \ + } \ CHECK; #undef CHECK -#define CHECK(op) \ - { \ - mag_type beta = mag_type(3.2); \ - a = op(aref, beta); \ - for (int i = 0; i < vector_length; ++i) \ - EXPECT_NEAR_KK(a[i], ats::op(aref[i], beta), eps* a[i]); \ +#define CHECK(op) \ + { \ + mag_type beta = mag_type(3.2); \ + a = op(aref, beta); \ + for (int i = 0; i < vector_length; ++i) EXPECT_NEAR_KK(a[i], ats::op(aref[i], beta), eps* a[i]); \ } CHECK(pow); #undef CHECK -#define CHECK(op) \ - { \ - value_type alpha = random.value() + 2.0; \ - a = op(alpha, bref); \ - for (int i = 0; i < vector_length; ++i) \ - EXPECT_NEAR_KK(a[i], ats::op(alpha, bref[i]), eps* a[i]); \ +#define CHECK(op) \ + { \ + value_type alpha = random.value() + 2.0; \ + a = op(alpha, bref); \ + for (int i = 0; i < vector_length; ++i) EXPECT_NEAR_KK(a[i], ats::op(alpha, bref[i]), eps* a[i]); \ } CHECK(pow); @@ -126,9 +120,8 @@ void impl_test_batched_vector_math() { template int test_batched_vector_math() { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "vector datatype is only tested on host space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "vector datatype is only tested on host space"); Test::impl_test_batched_vector_math(); return 0; @@ -156,21 +149,13 @@ int test_batched_vector_math() { /// #if defined(KOKKOSKERNELS_INST_FLOAT) -TEST_F(TestCategory, batched_vector_math_simd_float3) { - test_batched_vector_math, 3>(); -} -TEST_F(TestCategory, batched_vector_math_simd_float8) { - test_batched_vector_math, 8>(); -} +TEST_F(TestCategory, batched_vector_math_simd_float3) { test_batched_vector_math, 3>(); } +TEST_F(TestCategory, batched_vector_math_simd_float8) { test_batched_vector_math, 8>(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) -TEST_F(TestCategory, batched_vector_math_simd_double3) { - test_batched_vector_math, 3>(); -} -TEST_F(TestCategory, batched_vector_math_simd_double4) { - test_batched_vector_math, 4>(); -} +TEST_F(TestCategory, batched_vector_math_simd_double3) { test_batched_vector_math, 3>(); } +TEST_F(TestCategory, batched_vector_math_simd_double4) { test_batched_vector_math, 4>(); } #endif // using namespace Test; diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorMisc.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorMisc.hpp index 5f176ccba8b9..98d7f4e87e3a 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorMisc.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorMisc.hpp @@ -21,10 +21,8 @@ // to ensure it is not included in these // backends unit-test -#if !defined(TEST_CUDA_BATCHED_DENSE_CPP) && \ - !defined(TEST_HIP_BATCHED_DENSE_CPP) && \ - !defined(TEST_SYCL_BATCHED_DENSE_CPP) && \ - !defined(TEST_OPENMPTARGET_BATCHED_DENSE_CPP) +#if !defined(TEST_CUDA_BATCHED_DENSE_CPP) && !defined(TEST_HIP_BATCHED_DENSE_CPP) && \ + !defined(TEST_SYCL_BATCHED_DENSE_CPP) && !defined(TEST_OPENMPTARGET_BATCHED_DENSE_CPP) #include "gtest/gtest.h" #include "Kokkos_Core.hpp" @@ -159,9 +157,8 @@ void impl_test_batched_vector_misc() { template int test_batched_vector_misc() { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "vector datatype is only tested on host space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "vector datatype is only tested on host space"); Test::impl_test_batched_vector_misc(); return 0; @@ -172,21 +169,13 @@ int test_batched_vector_misc() { /// #if defined(KOKKOSKERNELS_INST_FLOAT) -TEST_F(TestCategory, batched_vector_misc_simd_float3) { - test_batched_vector_misc, 3>(); -} -TEST_F(TestCategory, batched_vector_misc_simd_float8) { - test_batched_vector_misc, 8>(); -} +TEST_F(TestCategory, batched_vector_misc_simd_float3) { test_batched_vector_misc, 3>(); } +TEST_F(TestCategory, batched_vector_misc_simd_float8) { test_batched_vector_misc, 8>(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) -TEST_F(TestCategory, batched_vector_misc_simd_double3) { - test_batched_vector_misc, 3>(); -} -TEST_F(TestCategory, batched_vector_misc_simd_double4) { - test_batched_vector_misc, 4>(); -} +TEST_F(TestCategory, batched_vector_misc_simd_double3) { test_batched_vector_misc, 3>(); } +TEST_F(TestCategory, batched_vector_misc_simd_double4) { test_batched_vector_misc, 4>(); } #endif // #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorRelation.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorRelation.hpp index 1aff1b2d0fbd..e5c3139c5cf4 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorRelation.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorRelation.hpp @@ -21,10 +21,8 @@ // to ensure it is not included in these // backends unit-test -#if !defined(TEST_CUDA_BATCHED_DENSE_CPP) && \ - !defined(TEST_HIP_BATCHED_DENSE_CPP) && \ - !defined(TEST_SYCL_BATCHED_DENSE_CPP) && \ - !defined(TEST_OPENMPTARGET_BATCHED_DENSE_CPP) +#if !defined(TEST_CUDA_BATCHED_DENSE_CPP) && !defined(TEST_HIP_BATCHED_DENSE_CPP) && \ + !defined(TEST_SYCL_BATCHED_DENSE_CPP) && !defined(TEST_OPENMPTARGET_BATCHED_DENSE_CPP) #include "gtest/gtest.h" #include "Kokkos_Core.hpp" @@ -60,11 +58,10 @@ void impl_test_batched_vector_relation() { { #undef CHECK -#define CHECK(op) \ - { \ - const auto comparison = a op b; \ - for (int i = 0; i < vector_length; ++i) \ - EXPECT_EQ(comparison[i], a[i] op b[i]); \ +#define CHECK(op) \ + { \ + const auto comparison = a op b; \ + for (int i = 0; i < vector_length; ++i) EXPECT_EQ(comparison[i], a[i] op b[i]); \ } CHECK(<); @@ -75,11 +72,10 @@ void impl_test_batched_vector_relation() { CHECK(!=); #undef CHECK -#define CHECK(op) \ - { \ - const auto comparison = a op value_type(0); \ - for (int i = 0; i < vector_length; ++i) \ - EXPECT_EQ(comparison[i], a[i] op value_type(0)); \ +#define CHECK(op) \ + { \ + const auto comparison = a op value_type(0); \ + for (int i = 0; i < vector_length; ++i) EXPECT_EQ(comparison[i], a[i] op value_type(0)); \ } CHECK(<); @@ -90,11 +86,10 @@ void impl_test_batched_vector_relation() { CHECK(!=); #undef CHECK -#define CHECK(op) \ - { \ - const auto comparison = value_type(0) op b; \ - for (int i = 0; i < vector_length; ++i) \ - EXPECT_EQ(comparison[i], value_type(0) op b[i]); \ +#define CHECK(op) \ + { \ + const auto comparison = value_type(0) op b; \ + for (int i = 0; i < vector_length; ++i) EXPECT_EQ(comparison[i], value_type(0) op b[i]); \ } CHECK(<); @@ -113,9 +108,8 @@ void impl_test_batched_vector_relation() { template int test_batched_vector_relation() { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "vector datatype is only tested on host space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "vector datatype is only tested on host space"); Test::impl_test_batched_vector_relation(); return 0; diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorView.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorView.hpp index 74c7748cba7f..5d9047e57c85 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorView.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_VectorView.hpp @@ -21,10 +21,8 @@ // to ensure it is not included in these // backends unit-test -#if !defined(TEST_CUDA_BATCHED_DENSE_CPP) && \ - !defined(TEST_HIP_BATCHED_DENSE_CPP) && \ - !defined(TEST_SYCL_BATCHED_DENSE_CPP) && \ - !defined(TEST_OPENMPTARGET_BATCHED_DENSE_CPP) +#if !defined(TEST_CUDA_BATCHED_DENSE_CPP) && !defined(TEST_HIP_BATCHED_DENSE_CPP) && \ + !defined(TEST_SYCL_BATCHED_DENSE_CPP) && !defined(TEST_OPENMPTARGET_BATCHED_DENSE_CPP) #include "gtest/gtest.h" #include "Kokkos_Core.hpp" @@ -62,100 +60,76 @@ void impl_init_vector_view(const VectorViewType& a) { for (int i7 = 0, i7end = b.extent(7); i7 < i7end; ++i7) template -void impl_verify_vector_view( - const VectorViewType& a, - const SimdViewAccess >& b) { +void impl_verify_vector_view(const VectorViewType& a, const SimdViewAccess >& b) { typedef typename VectorViewType::value_type vector_type; constexpr int vl = vector_type::vector_length; typedef Kokkos::ArithTraits ats; const typename ats::mag_type eps = 1.0e3 * ats::epsilon(); TEST_LOOP - EXPECT_NEAR_KK(a.access(i0 / vl, i1, i2, i3, i4, i5, i6, i7)[i0 % vl], - b(i0, i1, i2, i3, i4, i5, i6, i7), eps); + EXPECT_NEAR_KK(a.access(i0 / vl, i1, i2, i3, i4, i5, i6, i7)[i0 % vl], b(i0, i1, i2, i3, i4, i5, i6, i7), eps); } template -void impl_verify_vector_view( - const VectorViewType& a, - const SimdViewAccess >& b) { +void impl_verify_vector_view(const VectorViewType& a, const SimdViewAccess >& b) { typedef typename VectorViewType::value_type vector_type; constexpr int vl = vector_type::vector_length; typedef Kokkos::ArithTraits ats; const typename ats::mag_type eps = 1.0e3 * ats::epsilon(); TEST_LOOP - EXPECT_NEAR_KK(a.access(i0, i1 / vl, i2, i3, i4, i5, i6, i7)[i1 % vl], - b(i0, i1, i2, i3, i4, i5, i6, i7), eps); + EXPECT_NEAR_KK(a.access(i0, i1 / vl, i2, i3, i4, i5, i6, i7)[i1 % vl], b(i0, i1, i2, i3, i4, i5, i6, i7), eps); } template -void impl_verify_vector_view( - const VectorViewType& a, - const SimdViewAccess >& b) { +void impl_verify_vector_view(const VectorViewType& a, const SimdViewAccess >& b) { typedef typename VectorViewType::value_type vector_type; constexpr int vl = vector_type::vector_length; typedef Kokkos::ArithTraits ats; const typename ats::mag_type eps = 1.0e3 * ats::epsilon(); TEST_LOOP - EXPECT_NEAR_KK(a.access(i0, i1, i2 / vl, i3, i4, i5, i6, i7)[i2 % vl], - b(i0, i1, i2, i3, i4, i5, i6, i7), eps); + EXPECT_NEAR_KK(a.access(i0, i1, i2 / vl, i3, i4, i5, i6, i7)[i2 % vl], b(i0, i1, i2, i3, i4, i5, i6, i7), eps); } template -void impl_verify_vector_view( - const VectorViewType& a, - const SimdViewAccess >& b) { +void impl_verify_vector_view(const VectorViewType& a, const SimdViewAccess >& b) { typedef typename VectorViewType::value_type vector_type; constexpr int vl = vector_type::vector_length; typedef Kokkos::ArithTraits ats; const typename ats::mag_type eps = 1.0e3 * ats::epsilon(); TEST_LOOP - EXPECT_NEAR_KK(a.access(i0, i1, i2, i3 / vl, i4, i5, i6, i7)[i3 % vl], - b(i0, i1, i2, i3, i4, i5, i6, i7), eps); + EXPECT_NEAR_KK(a.access(i0, i1, i2, i3 / vl, i4, i5, i6, i7)[i3 % vl], b(i0, i1, i2, i3, i4, i5, i6, i7), eps); } template -void impl_verify_vector_view( - const VectorViewType& a, - const SimdViewAccess >& b) { +void impl_verify_vector_view(const VectorViewType& a, const SimdViewAccess >& b) { typedef typename VectorViewType::value_type vector_type; constexpr int vl = vector_type::vector_length; typedef Kokkos::ArithTraits ats; const typename ats::mag_type eps = 1.0e3 * ats::epsilon(); TEST_LOOP - EXPECT_NEAR_KK(a.access(i0, i1, i2, i3, i4 / vl, i5, i6, i7)[i4 % vl], - b(i0, i1, i2, i3, i4, i5, i6, i7), eps); + EXPECT_NEAR_KK(a.access(i0, i1, i2, i3, i4 / vl, i5, i6, i7)[i4 % vl], b(i0, i1, i2, i3, i4, i5, i6, i7), eps); } template -void impl_verify_vector_view( - const VectorViewType& a, - const SimdViewAccess >& b) { +void impl_verify_vector_view(const VectorViewType& a, const SimdViewAccess >& b) { typedef typename VectorViewType::value_type vector_type; constexpr int vl = vector_type::vector_length; typedef Kokkos::ArithTraits ats; const typename ats::mag_type eps = 1.0e3 * ats::epsilon(); TEST_LOOP - EXPECT_NEAR_KK(a.access(i0, i1, i2, i3, i4, i5 / vl, i6, i7)[i5 % vl], - b(i0, i1, i2, i3, i4, i5, i6, i7), eps); + EXPECT_NEAR_KK(a.access(i0, i1, i2, i3, i4, i5 / vl, i6, i7)[i5 % vl], b(i0, i1, i2, i3, i4, i5, i6, i7), eps); } template -void impl_verify_vector_view( - const VectorViewType& a, - const SimdViewAccess >& b) { +void impl_verify_vector_view(const VectorViewType& a, const SimdViewAccess >& b) { typedef typename VectorViewType::value_type vector_type; constexpr int vl = vector_type::vector_length; typedef Kokkos::ArithTraits ats; const typename ats::mag_type eps = 1.0e3 * ats::epsilon(); TEST_LOOP - EXPECT_NEAR_KK(a.access(i0, i1, i2, i3, i4, i5, i6 / vl, i7)[i6 % vl], - b(i0, i1, i2, i3, i4, i5, i6, i7), eps); + EXPECT_NEAR_KK(a.access(i0, i1, i2, i3, i4, i5, i6 / vl, i7)[i6 % vl], b(i0, i1, i2, i3, i4, i5, i6, i7), eps); } template -void impl_verify_vector_view( - const VectorViewType& a, - const SimdViewAccess >& b) { +void impl_verify_vector_view(const VectorViewType& a, const SimdViewAccess >& b) { typedef typename VectorViewType::value_type vector_type; constexpr int vl = vector_type::vector_length; typedef Kokkos::ArithTraits ats; const typename ats::mag_type eps = 1.0e3 * ats::epsilon(); TEST_LOOP - EXPECT_NEAR_KK(a.access(i0, i1, i2, i3, i4, i5, i6, i7 / vl)[i7 % vl], - b(i0, i1, i2, i3, i4, i5, i6, i7), eps); + EXPECT_NEAR_KK(a.access(i0, i1, i2, i3, i4, i5, i6, i7 / vl)[i7 % vl], b(i0, i1, i2, i3, i4, i5, i6, i7), eps); } template @@ -169,183 +143,90 @@ void impl_test_batched_vector_view() { { /// rank 1 array Kokkos::View a("a", test_view_size); impl_init_vector_view(a); - impl_verify_vector_view( - a, - SimdViewAccess, PackDim<0> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<0> >(a)); } { /// rank 2 array - Kokkos::View a("a", test_view_size, - test_view_size); + Kokkos::View a("a", test_view_size, test_view_size); impl_init_vector_view(a); - impl_verify_vector_view( - a, SimdViewAccess, PackDim<0> >( - a)); - impl_verify_vector_view( - a, SimdViewAccess, PackDim<1> >( - a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<0> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<1> >(a)); } { /// rank 3 array - Kokkos::View a("a", test_view_size, - test_view_size, test_view_size); + Kokkos::View a("a", test_view_size, test_view_size, test_view_size); impl_init_vector_view(a); - impl_verify_vector_view( - a, - SimdViewAccess, PackDim<0> >( - a)); - impl_verify_vector_view( - a, - SimdViewAccess, PackDim<1> >( - a)); - impl_verify_vector_view( - a, - SimdViewAccess, PackDim<2> >( - a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<0> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<1> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<2> >(a)); } { /// rank 4 array - Kokkos::View a( - "a", test_view_size, test_view_size, test_view_size, test_view_size); + Kokkos::View a("a", test_view_size, test_view_size, test_view_size, test_view_size); impl_init_vector_view(a); - impl_verify_vector_view( - a, - SimdViewAccess, PackDim<0> >( - a)); - impl_verify_vector_view( - a, - SimdViewAccess, PackDim<1> >( - a)); - impl_verify_vector_view( - a, - SimdViewAccess, PackDim<2> >( - a)); - impl_verify_vector_view( - a, - SimdViewAccess, PackDim<3> >( - a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<0> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<1> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<2> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<3> >(a)); } { /// rank 5 array - Kokkos::View a( - "a", test_view_size, test_view_size, test_view_size, test_view_size, - test_view_size); + Kokkos::View a("a", test_view_size, test_view_size, test_view_size, test_view_size, + test_view_size); impl_init_vector_view(a); - impl_verify_vector_view( - a, - SimdViewAccess, PackDim<0> >( - a)); - impl_verify_vector_view( - a, - SimdViewAccess, PackDim<1> >( - a)); - impl_verify_vector_view( - a, - SimdViewAccess, PackDim<2> >( - a)); - impl_verify_vector_view( - a, - SimdViewAccess, PackDim<3> >( - a)); - impl_verify_vector_view( - a, - SimdViewAccess, PackDim<4> >( - a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<0> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<1> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<2> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<3> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<4> >(a)); } { /// rank 6 array - Kokkos::View a( - "a", test_view_size, test_view_size, test_view_size, test_view_size, - test_view_size, test_view_size); + Kokkos::View a("a", test_view_size, test_view_size, test_view_size, test_view_size, + test_view_size, test_view_size); impl_init_vector_view(a); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<0> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<1> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<2> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<3> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<4> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<5> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<0> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<1> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<2> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<3> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<4> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<5> >(a)); } { /// rank 7 array - Kokkos::View a( - "a", test_view_size, test_view_size, test_view_size, test_view_size, - test_view_size, test_view_size, test_view_size); + Kokkos::View a("a", test_view_size, test_view_size, test_view_size, test_view_size, + test_view_size, test_view_size, test_view_size); impl_init_vector_view(a); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<0> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<1> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<2> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<3> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<4> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<5> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<6> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<0> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<1> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<2> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<3> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<4> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<5> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<6> >(a)); } { /// rank 8 array - Kokkos::View a( - "a", test_view_size, test_view_size, test_view_size, test_view_size, - test_view_size, test_view_size, test_view_size, test_view_size); + Kokkos::View a("a", test_view_size, test_view_size, test_view_size, test_view_size, + test_view_size, test_view_size, test_view_size, test_view_size); impl_init_vector_view(a); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<0> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<1> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<2> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<3> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<4> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<5> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<6> >(a)); - impl_verify_vector_view( - a, SimdViewAccess, - PackDim<7> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<0> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<1> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<2> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<3> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<4> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<5> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<6> >(a)); + impl_verify_vector_view(a, SimdViewAccess, PackDim<7> >(a)); } } } // namespace Test template int test_batched_vector_view() { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "vector datatype is only tested on host space"); - Test::impl_test_batched_vector_view(); + static_assert(Kokkos::SpaceAccessibility::accessible, + "vector datatype is only tested on host space"); + Test::impl_test_batched_vector_view(); return 0; } @@ -355,18 +236,12 @@ int test_batched_vector_view() { /// #if defined(KOKKOSKERNELS_INST_FLOAT) -TEST_F(TestCategory, batched_vector_view_simd_float8) { - test_batched_vector_view, 8>(); -} +TEST_F(TestCategory, batched_vector_view_simd_float8) { test_batched_vector_view, 8>(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) -TEST_F(TestCategory, batched_vector_view_simd_double4) { - test_batched_vector_view, 4>(); -} -TEST_F(TestCategory, batched_vector_view_simd_double8) { - test_batched_vector_view, 8>(); -} +TEST_F(TestCategory, batched_vector_view_simd_double4) { test_batched_vector_view, 4>(); } +TEST_F(TestCategory, batched_vector_view_simd_double8) { test_batched_vector_view, 8>(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) @@ -383,8 +258,7 @@ TEST_F(TestCategory, batched_vector_view_simd_dcomplex2) { test_batched_vector_view >, 2>(); } -#if defined(KOKKOS_COMPILER_INTEL) && \ - ((KOKKOS_COMPILER_INTEL > 1900) && (KOKKOS_COMPILER_INTEL <= 2021)) +#if defined(KOKKOS_COMPILER_INTEL) && ((KOKKOS_COMPILER_INTEL > 1900) && (KOKKOS_COMPILER_INTEL <= 2021)) TEST_F(TestCategory, batched_vector_view_simd_dcomplex4) { printf( "Skipped: intel compiler version > 19.0.05 && <= 2021\n" diff --git a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_CG_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_CG_TeamVector_Impl.hpp index c11ad9695997..9aa4b95f2c1e 100644 --- a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_CG_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_CG_TeamVector_Impl.hpp @@ -35,16 +35,14 @@ namespace KokkosBatched { /// template -template -KOKKOS_INLINE_FUNCTION int TeamVectorCG::invoke( - const MemberType& member, const OperatorType& A, const VectorViewType& _B, - const VectorViewType& _X, const KrylovHandleType& handle, - const TMPViewType& _TMPView, const TMPNormViewType& _TMPNormView) { +KOKKOS_INLINE_FUNCTION int TeamVectorCG::invoke(const MemberType& member, const OperatorType& A, + const VectorViewType& _B, const VectorViewType& _X, + const KrylovHandleType& handle, const TMPViewType& _TMPView, + const TMPNormViewType& _TMPNormView) { typedef int OrdinalType; - typedef typename Kokkos::ArithTraits< - typename VectorViewType::non_const_value_type>::mag_type MagnitudeType; + typedef typename Kokkos::ArithTraits::mag_type MagnitudeType; const size_t maximum_iteration = handle.get_max_iteration(); const MagnitudeType tolerance = handle.get_tolerance(); @@ -59,14 +57,10 @@ KOKKOS_INLINE_FUNCTION int TeamVectorCG::invoke( int offset_R = offset_Q + numRows; int offset_X = offset_R + numRows; - auto P = Kokkos::subview(_TMPView, Kokkos::ALL, - Kokkos::make_pair(offset_P, offset_P + numRows)); - auto Q = Kokkos::subview(_TMPView, Kokkos::ALL, - Kokkos::make_pair(offset_Q, offset_Q + numRows)); - auto R = Kokkos::subview(_TMPView, Kokkos::ALL, - Kokkos::make_pair(offset_R, offset_R + numRows)); - auto X = Kokkos::subview(_TMPView, Kokkos::ALL, - Kokkos::make_pair(offset_X, offset_X + numRows)); + auto P = Kokkos::subview(_TMPView, Kokkos::ALL, Kokkos::make_pair(offset_P, offset_P + numRows)); + auto Q = Kokkos::subview(_TMPView, Kokkos::ALL, Kokkos::make_pair(offset_Q, offset_Q + numRows)); + auto R = Kokkos::subview(_TMPView, Kokkos::ALL, Kokkos::make_pair(offset_R, offset_R + numRows)); + auto X = Kokkos::subview(_TMPView, Kokkos::ALL, Kokkos::make_pair(offset_X, offset_X + numRows)); auto sqr_norm_0 = Kokkos::subview(_TMPNormView, Kokkos::ALL, 0); auto sqr_norm_j = Kokkos::subview(_TMPNormView, Kokkos::ALL, 1); @@ -90,10 +84,7 @@ KOKKOS_INLINE_FUNCTION int TeamVectorCG::invoke( member.team_barrier(); Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), - [&](const OrdinalType& i) { - mask(i) = - sqr_norm_0(i) > tolerance * tolerance ? 1. : 0; - }); + [&](const OrdinalType& i) { mask(i) = sqr_norm_0(i) > tolerance * tolerance ? 1. : 0; }); TeamVectorCopy1D::invoke(member, sqr_norm_0, sqr_norm_j); @@ -109,10 +100,7 @@ KOKKOS_INLINE_FUNCTION int TeamVectorCG::invoke( member.team_barrier(); Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), - [&](const OrdinalType& i) { - alpha(i) = - mask(i) != 0. ? sqr_norm_j(i) / tmp(i) : 0.; - }); + [&](const OrdinalType& i) { alpha(i) = mask(i) != 0. ? sqr_norm_j(i) / tmp(i) : 0.; }); member.team_barrier(); // x_{j+1} := alpha p_j + x_j @@ -131,10 +119,7 @@ KOKKOS_INLINE_FUNCTION int TeamVectorCG::invoke( member.team_barrier(); Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), - [&](const OrdinalType& i) { - alpha(i) = - mask(i) != 0. ? tmp(i) / sqr_norm_j(i) : 0.; - }); + [&](const OrdinalType& i) { alpha(i) = mask(i) != 0. ? tmp(i) / sqr_norm_j(i) : 0.; }); TeamVectorCopy1D::invoke(member, tmp, sqr_norm_j); @@ -167,55 +152,43 @@ KOKKOS_INLINE_FUNCTION int TeamVectorCG::invoke( } template -template -KOKKOS_INLINE_FUNCTION int TeamVectorCG::invoke( - const MemberType& member, const OperatorType& A, const VectorViewType& _B, - const VectorViewType& _X, const KrylovHandleType& handle) { +template +KOKKOS_INLINE_FUNCTION int TeamVectorCG::invoke(const MemberType& member, const OperatorType& A, + const VectorViewType& _B, const VectorViewType& _X, + const KrylovHandleType& handle) { const int strategy = handle.get_memory_strategy(); if (strategy == 0) { - using ScratchPadVectorViewType = Kokkos::View< - typename VectorViewType::non_const_value_type**, - typename VectorViewType::array_layout, - typename VectorViewType::execution_space::scratch_memory_space>; - using ScratchPadNormViewType = Kokkos::View< - typename Kokkos::ArithTraits< - typename VectorViewType::non_const_value_type>::mag_type**, - typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadVectorViewType = + Kokkos::View; + using ScratchPadNormViewType = + Kokkos::View::mag_type**, + typename VectorViewType::execution_space::scratch_memory_space>; const int numMatrices = _X.extent(0); const int numRows = _X.extent(1); - ScratchPadVectorViewType _TMPView( - member.team_scratch(handle.get_scratch_pad_level()), numMatrices, - 4 * numRows); + ScratchPadVectorViewType _TMPView(member.team_scratch(handle.get_scratch_pad_level()), numMatrices, 4 * numRows); - ScratchPadNormViewType _TMPNormView( - member.team_scratch(handle.get_scratch_pad_level()), numMatrices, 5); + ScratchPadNormViewType _TMPNormView(member.team_scratch(handle.get_scratch_pad_level()), numMatrices, 5); - return invoke( - member, A, _B, _X, handle, _TMPView, _TMPNormView); + return invoke(member, A, _B, _X, handle, _TMPView, _TMPNormView); } if (strategy == 1) { const int first_matrix = handle.first_index(member.league_rank()); const int last_matrix = handle.last_index(member.league_rank()); - using ScratchPadNormViewType = Kokkos::View< - typename Kokkos::ArithTraits< - typename VectorViewType::non_const_value_type>::mag_type**, - typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadNormViewType = + Kokkos::View::mag_type**, + typename VectorViewType::execution_space::scratch_memory_space>; const int numMatrices = _X.extent(0); - auto _TMPView = Kokkos::subview( - handle.tmp_view, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + auto _TMPView = Kokkos::subview(handle.tmp_view, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - ScratchPadNormViewType _TMPNormView( - member.team_scratch(handle.get_scratch_pad_level()), numMatrices, 5); + ScratchPadNormViewType _TMPNormView(member.team_scratch(handle.get_scratch_pad_level()), numMatrices, 5); - return invoke( - member, A, _B, _X, handle, _TMPView, _TMPNormView); + return invoke(member, A, _B, _X, handle, _TMPView, _TMPNormView); } return 0; } diff --git a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_CG_Team_Impl.hpp b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_CG_Team_Impl.hpp index bf2f1d2e8676..82c62624c168 100644 --- a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_CG_Team_Impl.hpp +++ b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_CG_Team_Impl.hpp @@ -34,15 +34,14 @@ namespace KokkosBatched { /// template -template -KOKKOS_INLINE_FUNCTION int TeamCG::invoke( - const MemberType& member, const OperatorType& A, const VectorViewType& _B, - const VectorViewType& _X, const KrylovHandle& handle, - const TMPViewType& _TMPView, const TMPNormViewType& _TMPNormView) { +template +KOKKOS_INLINE_FUNCTION int TeamCG::invoke(const MemberType& member, const OperatorType& A, + const VectorViewType& _B, const VectorViewType& _X, + const KrylovHandle& handle, const TMPViewType& _TMPView, + const TMPNormViewType& _TMPNormView) { typedef int OrdinalType; - typedef typename Kokkos::ArithTraits< - typename VectorViewType::non_const_value_type>::mag_type MagnitudeType; + typedef typename Kokkos::ArithTraits::mag_type MagnitudeType; size_t maximum_iteration = handle.get_max_iteration(); const MagnitudeType tolerance = handle.get_tolerance(); @@ -57,14 +56,10 @@ KOKKOS_INLINE_FUNCTION int TeamCG::invoke( int offset_R = offset_Q + numRows; int offset_X = offset_R + numRows; - auto P = Kokkos::subview(_TMPView, Kokkos::ALL, - Kokkos::make_pair(offset_P, offset_P + numRows)); - auto Q = Kokkos::subview(_TMPView, Kokkos::ALL, - Kokkos::make_pair(offset_Q, offset_Q + numRows)); - auto R = Kokkos::subview(_TMPView, Kokkos::ALL, - Kokkos::make_pair(offset_R, offset_R + numRows)); - auto X = Kokkos::subview(_TMPView, Kokkos::ALL, - Kokkos::make_pair(offset_X, offset_X + numRows)); + auto P = Kokkos::subview(_TMPView, Kokkos::ALL, Kokkos::make_pair(offset_P, offset_P + numRows)); + auto Q = Kokkos::subview(_TMPView, Kokkos::ALL, Kokkos::make_pair(offset_Q, offset_Q + numRows)); + auto R = Kokkos::subview(_TMPView, Kokkos::ALL, Kokkos::make_pair(offset_R, offset_R + numRows)); + auto X = Kokkos::subview(_TMPView, Kokkos::ALL, Kokkos::make_pair(offset_X, offset_X + numRows)); auto sqr_norm_0 = Kokkos::subview(_TMPNormView, Kokkos::ALL, 0); auto sqr_norm_j = Kokkos::subview(_TMPNormView, Kokkos::ALL, 1); @@ -88,10 +83,7 @@ KOKKOS_INLINE_FUNCTION int TeamCG::invoke( member.team_barrier(); Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), - [&](const OrdinalType& i) { - mask(i) = - sqr_norm_0(i) > tolerance * tolerance ? 1. : 0; - }); + [&](const OrdinalType& i) { mask(i) = sqr_norm_0(i) > tolerance * tolerance ? 1. : 0; }); TeamCopy1D::invoke(member, sqr_norm_0, sqr_norm_j); @@ -107,10 +99,7 @@ KOKKOS_INLINE_FUNCTION int TeamCG::invoke( member.team_barrier(); Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), - [&](const OrdinalType& i) { - alpha(i) = - mask(i) != 0. ? sqr_norm_j(i) / tmp(i) : 0.; - }); + [&](const OrdinalType& i) { alpha(i) = mask(i) != 0. ? sqr_norm_j(i) / tmp(i) : 0.; }); member.team_barrier(); // x_{j+1} := alpha p_j + x_j @@ -129,10 +118,7 @@ KOKKOS_INLINE_FUNCTION int TeamCG::invoke( member.team_barrier(); Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), - [&](const OrdinalType& i) { - alpha(i) = - mask(i) != 0. ? tmp(i) / sqr_norm_j(i) : 0.; - }); + [&](const OrdinalType& i) { alpha(i) = mask(i) != 0. ? tmp(i) / sqr_norm_j(i) : 0.; }); TeamCopy1D::invoke(member, tmp, sqr_norm_j); @@ -165,55 +151,43 @@ KOKKOS_INLINE_FUNCTION int TeamCG::invoke( } template -template -KOKKOS_INLINE_FUNCTION int TeamCG::invoke( - const MemberType& member, const OperatorType& A, const VectorViewType& _B, - const VectorViewType& _X, const KrylovHandleType& handle) { +template +KOKKOS_INLINE_FUNCTION int TeamCG::invoke(const MemberType& member, const OperatorType& A, + const VectorViewType& _B, const VectorViewType& _X, + const KrylovHandleType& handle) { const int strategy = handle.get_memory_strategy(); if (strategy == 0) { - using ScratchPadVectorViewType = Kokkos::View< - typename VectorViewType::non_const_value_type**, - typename VectorViewType::array_layout, - typename VectorViewType::execution_space::scratch_memory_space>; - using ScratchPadNormViewType = Kokkos::View< - typename Kokkos::ArithTraits< - typename VectorViewType::non_const_value_type>::mag_type**, - typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadVectorViewType = + Kokkos::View; + using ScratchPadNormViewType = + Kokkos::View::mag_type**, + typename VectorViewType::execution_space::scratch_memory_space>; const int numMatrices = _X.extent(0); const int numRows = _X.extent(1); - ScratchPadVectorViewType _TMPView( - member.team_scratch(handle.get_scratch_pad_level()), numMatrices, - 4 * numRows); + ScratchPadVectorViewType _TMPView(member.team_scratch(handle.get_scratch_pad_level()), numMatrices, 4 * numRows); - ScratchPadNormViewType _TMPNormView( - member.team_scratch(handle.get_scratch_pad_level()), numMatrices, 5); + ScratchPadNormViewType _TMPNormView(member.team_scratch(handle.get_scratch_pad_level()), numMatrices, 5); - return invoke( - member, A, _B, _X, handle, _TMPView, _TMPNormView); + return invoke(member, A, _B, _X, handle, _TMPView, _TMPNormView); } if (strategy == 1) { const int first_matrix = handle.first_index(member.league_rank()); const int last_matrix = handle.last_index(member.league_rank()); - using ScratchPadNormViewType = Kokkos::View< - typename Kokkos::ArithTraits< - typename VectorViewType::non_const_value_type>::mag_type**, - typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadNormViewType = + Kokkos::View::mag_type**, + typename VectorViewType::execution_space::scratch_memory_space>; const int numMatrices = _X.extent(0); - auto _TMPView = Kokkos::subview( - handle.tmp_view, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + auto _TMPView = Kokkos::subview(handle.tmp_view, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - ScratchPadNormViewType _TMPNormView( - member.team_scratch(handle.get_scratch_pad_level()), numMatrices, 5); + ScratchPadNormViewType _TMPNormView(member.team_scratch(handle.get_scratch_pad_level()), numMatrices, 5); - return invoke( - member, A, _B, _X, handle, _TMPView, _TMPNormView); + return invoke(member, A, _B, _X, handle, _TMPView, _TMPNormView); } return 0; } diff --git a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_GMRES_Serial_Impl.hpp b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_GMRES_Serial_Impl.hpp index 923b67c10573..2d8c0cae0086 100644 --- a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_GMRES_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_GMRES_Serial_Impl.hpp @@ -36,17 +36,12 @@ namespace KokkosBatched { /// Serial GMRES /// -template -KOKKOS_INLINE_FUNCTION int SerialGMRES::invoke(const OperatorType& A, - const VectorViewType& _B, - const VectorViewType& _X, - const PrecOperatorType& P, - const KrylovHandleType& handle, - const int GMRES_id) { +template +KOKKOS_INLINE_FUNCTION int SerialGMRES::invoke(const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, const PrecOperatorType& P, + const KrylovHandleType& handle, const int GMRES_id) { typedef int OrdinalType; - typedef typename Kokkos::ArithTraits< - typename VectorViewType::non_const_value_type>::mag_type MagnitudeType; + typedef typename Kokkos::ArithTraits::mag_type MagnitudeType; typedef Kokkos::ArithTraits ATM; using SerialCopy1D = SerialCopy; @@ -55,9 +50,7 @@ KOKKOS_INLINE_FUNCTION int SerialGMRES::invoke(const OperatorType& A, const OrdinalType numMatrices = _X.extent(0); const OrdinalType numRows = _X.extent(1); - size_t maximum_iteration = handle.get_max_iteration() < numRows - ? handle.get_max_iteration() - : numRows; + size_t maximum_iteration = handle.get_max_iteration() < numRows ? handle.get_max_iteration() : numRows; const MagnitudeType tolerance = handle.get_tolerance(); const MagnitudeType max_tolerance = handle.get_max_tolerance(); @@ -72,15 +65,12 @@ KOKKOS_INLINE_FUNCTION int SerialGMRES::invoke(const OperatorType& A, const int first_matrix = handle.first_index(GMRES_id); const int last_matrix = handle.last_index(GMRES_id); - auto V_view = Kokkos::subview( - handle.Arnoldi_view, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL, Kokkos::make_pair(offset_V, offset_V + n_V)); - auto H_view = Kokkos::subview( - handle.Arnoldi_view, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL, Kokkos::make_pair(offset_H, offset_H + n_H)); - auto Givens_view = Kokkos::subview( - handle.Arnoldi_view, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL, Kokkos::make_pair(offset_Givens, offset_Givens + n_Givens)); + auto V_view = Kokkos::subview(handle.Arnoldi_view, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL, + Kokkos::make_pair(offset_V, offset_V + n_V)); + auto H_view = Kokkos::subview(handle.Arnoldi_view, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL, + Kokkos::make_pair(offset_H, offset_H + n_H)); + auto Givens_view = Kokkos::subview(handle.Arnoldi_view, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL, + Kokkos::make_pair(offset_Givens, offset_Givens + n_Givens)); int n_G = maximum_iteration + 1; int n_W = numRows; @@ -91,18 +81,12 @@ KOKKOS_INLINE_FUNCTION int SerialGMRES::invoke(const OperatorType& A, int offset_mask = offset_W + n_W; int offset_tmp = offset_mask + n_mask; - auto G = Kokkos::subview(handle.tmp_view, - Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::make_pair(offset_G, offset_G + n_G)); - auto W = Kokkos::subview(handle.tmp_view, - Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::make_pair(offset_W, offset_W + n_W)); - auto mask = Kokkos::subview(handle.tmp_view, - Kokkos::make_pair(first_matrix, last_matrix), - offset_mask); - auto tmp = - Kokkos::subview(handle.tmp_view, - Kokkos::make_pair(first_matrix, last_matrix), offset_tmp); + auto G = Kokkos::subview(handle.tmp_view, Kokkos::make_pair(first_matrix, last_matrix), + Kokkos::make_pair(offset_G, offset_G + n_G)); + auto W = Kokkos::subview(handle.tmp_view, Kokkos::make_pair(first_matrix, last_matrix), + Kokkos::make_pair(offset_W, offset_W + n_W)); + auto mask = Kokkos::subview(handle.tmp_view, Kokkos::make_pair(first_matrix, last_matrix), offset_mask); + auto tmp = Kokkos::subview(handle.tmp_view, Kokkos::make_pair(first_matrix, last_matrix), offset_tmp); // Deep copy of b into r_0: SerialCopy2D::invoke(_B, W); @@ -149,19 +133,14 @@ KOKKOS_INLINE_FUNCTION int SerialGMRES::invoke(const OperatorType& A, if (handle.get_ortho_strategy() == 0) { for (OrdinalType l = 0; l < numMatrices; ++l) { auto W_l = Kokkos::subview(W, l, Kokkos::ALL); - auto V_old = Kokkos::subview( - V_view, l, Kokkos::make_pair(0, (int)j + 1), Kokkos::ALL); - auto H_old = - Kokkos::subview(H_view, l, j, Kokkos::make_pair(0, (int)j + 1)); + auto V_old = Kokkos::subview(V_view, l, Kokkos::make_pair(0, (int)j + 1), Kokkos::ALL); + auto H_old = Kokkos::subview(H_view, l, j, Kokkos::make_pair(0, (int)j + 1)); // Inner products - KokkosBlas::SerialGemv::invoke(1, V_old, W_l, 0, - H_old); + KokkosBlas::SerialGemv::invoke(1, V_old, W_l, 0, H_old); // Update - KokkosBlas::SerialGemv::invoke( - -1, V_old, H_old, 1, W_l); + KokkosBlas::SerialGemv::invoke(-1, V_old, H_old, 1, W_l); } } if (handle.get_ortho_strategy() == 1) { @@ -179,8 +158,7 @@ KOKKOS_INLINE_FUNCTION int SerialGMRES::invoke(const OperatorType& A, for (OrdinalType i = 0; i < numMatrices; ++i) { H_view(i, j, j + 1) = ATM::sqrt(tmp(i)); - tmp(i) = - H_view(i, j, j + 1) > max_tolerance ? 1. / H_view(i, j, j + 1) : 0.; + tmp(i) = H_view(i, j, j + 1) > max_tolerance ? 1. / H_view(i, j, j + 1) : 0.; } if (j + 1 < maximum_iteration) { @@ -207,8 +185,7 @@ KOKKOS_INLINE_FUNCTION int SerialGMRES::invoke(const OperatorType& A, } // Compute the new Givens rotation: - Kokkos::pair + Kokkos::pair G_new(1, 0); typename VectorViewType::non_const_value_type alpha = 0; SerialGivensInternal::invoke(H_j(j), H_j(j + 1), &G_new, &alpha); @@ -241,8 +218,7 @@ KOKKOS_INLINE_FUNCTION int SerialGMRES::invoke(const OperatorType& A, } bool all_converged = true; - for (OrdinalType l = 0; l < numMatrices; ++l) - all_converged = (all_converged && mask(l) == 0.); + for (OrdinalType l = 0; l < numMatrices; ++l) all_converged = (all_converged && mask(l) == 0.); if (all_converged) { maximum_iteration = j + 1; break; @@ -255,23 +231,19 @@ KOKKOS_INLINE_FUNCTION int SerialGMRES::invoke(const OperatorType& A, auto A_l = Kokkos::subview(H_view, l, first_indices, first_indices); auto B_l = Kokkos::subview(G, l, first_indices); - SerialTrsm::invoke(1, A_l, B_l); + SerialTrsm::invoke(1, A_l, B_l); } if (handle.get_ortho_strategy() == 0) { for (OrdinalType l = 0; l < numMatrices; ++l) { KokkosBlas::SerialGemv::invoke( - 1, Kokkos::subview(V_view, l, first_indices, Kokkos::ALL), - Kokkos::subview(G, l, first_indices), 1, + 1, Kokkos::subview(V_view, l, first_indices, Kokkos::ALL), Kokkos::subview(G, l, first_indices), 1, Kokkos::subview(_X, l, Kokkos::ALL)); } } if (handle.get_ortho_strategy() == 1) { for (size_t j = 0; j < maximum_iteration; ++j) { - SerialAxpy::invoke(Kokkos::subview(G, Kokkos::ALL, j), - Kokkos::subview(V_view, Kokkos::ALL, j, Kokkos::ALL), - _X); + SerialAxpy::invoke(Kokkos::subview(G, Kokkos::ALL, j), Kokkos::subview(V_view, Kokkos::ALL, j, Kokkos::ALL), _X); } } @@ -289,12 +261,9 @@ KOKKOS_INLINE_FUNCTION int SerialGMRES::invoke(const OperatorType& A, return status; } -template -KOKKOS_INLINE_FUNCTION int SerialGMRES::invoke(const OperatorType& A, - const VectorViewType& _B, - const VectorViewType& _X, - const KrylovHandleType& handle) { +template +KOKKOS_INLINE_FUNCTION int SerialGMRES::invoke(const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, const KrylovHandleType& handle) { Identity P; return invoke(A, _B, _X, P, handle); } diff --git a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_GMRES_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_GMRES_TeamVector_Impl.hpp index a7219ecc91a6..8d37b2ac5eb9 100644 --- a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_GMRES_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_GMRES_TeamVector_Impl.hpp @@ -39,17 +39,16 @@ namespace KokkosBatched { /// template -template -KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke( - const MemberType& member, const OperatorType& A, const VectorViewType& _B, - const VectorViewType& _X, const PrecOperatorType& P, - const KrylovHandleType& handle, const ArnoldiViewType& _ArnoldiView, - const TMPViewType& _TMPView) { +KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke(const MemberType& member, const OperatorType& A, + const VectorViewType& _B, const VectorViewType& _X, + const PrecOperatorType& P, + const KrylovHandleType& handle, + const ArnoldiViewType& _ArnoldiView, + const TMPViewType& _TMPView) { typedef int OrdinalType; - typedef typename Kokkos::ArithTraits< - typename VectorViewType::non_const_value_type>::mag_type MagnitudeType; + typedef typename Kokkos::ArithTraits::mag_type MagnitudeType; typedef Kokkos::ArithTraits ATM; using TeamVectorCopy1D = TeamVectorCopy; @@ -57,9 +56,7 @@ KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke( const OrdinalType numMatrices = _X.extent(0); const OrdinalType numRows = _X.extent(1); - size_t maximum_iteration = handle.get_max_iteration() < numRows - ? handle.get_max_iteration() - : numRows; + size_t maximum_iteration = handle.get_max_iteration() < numRows ? handle.get_max_iteration() : numRows; const MagnitudeType tolerance = handle.get_tolerance(); const MagnitudeType max_tolerance = handle.get_max_tolerance(); @@ -71,13 +68,10 @@ KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke( int offset_H = offset_V + n_V; int offset_Givens = offset_H + n_H; - auto V_view = Kokkos::subview(_ArnoldiView, Kokkos::ALL, Kokkos::ALL, - Kokkos::make_pair(offset_V, offset_V + n_V)); - auto H_view = Kokkos::subview(_ArnoldiView, Kokkos::ALL, Kokkos::ALL, - Kokkos::make_pair(offset_H, offset_H + n_H)); - auto Givens_view = Kokkos::subview( - _ArnoldiView, Kokkos::ALL, Kokkos::ALL, - Kokkos::make_pair(offset_Givens, offset_Givens + n_Givens)); + auto V_view = Kokkos::subview(_ArnoldiView, Kokkos::ALL, Kokkos::ALL, Kokkos::make_pair(offset_V, offset_V + n_V)); + auto H_view = Kokkos::subview(_ArnoldiView, Kokkos::ALL, Kokkos::ALL, Kokkos::make_pair(offset_H, offset_H + n_H)); + auto Givens_view = Kokkos::subview(_ArnoldiView, Kokkos::ALL, Kokkos::ALL, + Kokkos::make_pair(offset_Givens, offset_Givens + n_Givens)); int n_G = maximum_iteration + 1; int n_W = numRows; @@ -88,10 +82,8 @@ KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke( int offset_mask = offset_W + n_W; int offset_tmp = offset_mask + n_mask; - auto G = Kokkos::subview(_TMPView, Kokkos::ALL, - Kokkos::make_pair(offset_G, offset_G + n_G)); - auto W = Kokkos::subview(_TMPView, Kokkos::ALL, - Kokkos::make_pair(offset_W, offset_W + n_W)); + auto G = Kokkos::subview(_TMPView, Kokkos::ALL, Kokkos::make_pair(offset_G, offset_G + n_G)); + auto W = Kokkos::subview(_TMPView, Kokkos::ALL, Kokkos::make_pair(offset_W, offset_W + n_W)); auto mask = Kokkos::subview(_TMPView, Kokkos::ALL, offset_mask); auto tmp = Kokkos::subview(_TMPView, Kokkos::ALL, offset_tmp); @@ -109,33 +101,29 @@ KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke( TeamVectorDot::invoke(member, W, W, tmp); member.team_barrier(); - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), - [&](const OrdinalType& i) { - tmp(i) = ATM::sqrt(tmp(i)); - handle.set_norm(member.league_rank(), i, 0, tmp(i)); - if (tmp(i) > max_tolerance) { - mask(i) = 1; - G(i, 0) = tmp(i); - tmp(i) = 1. / tmp(i); - } else { - handle.set_iteration(member.league_rank(), i, 0); - mask(i) = 0; - G(i, 0) = 0.; - tmp(i) = 0.; - } - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), [&](const OrdinalType& i) { + tmp(i) = ATM::sqrt(tmp(i)); + handle.set_norm(member.league_rank(), i, 0, tmp(i)); + if (tmp(i) > max_tolerance) { + mask(i) = 1; + G(i, 0) = tmp(i); + tmp(i) = 1. / tmp(i); + } else { + handle.set_iteration(member.league_rank(), i, 0); + mask(i) = 0; + G(i, 0) = 0.; + tmp(i) = 0.; + } + }); member.team_barrier(); // Finish writing to tmp auto V_0 = Kokkos::subview(V_view, Kokkos::ALL, 0, Kokkos::ALL); - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, 0, numMatrices * numRows), - [&](const OrdinalType& iTemp) { - OrdinalType iRow, iMatrix; - getIndices( - iTemp, numRows, numMatrices, iRow, iMatrix); - V_0(iMatrix, iRow) = W(iMatrix, iRow) * tmp(iMatrix); - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices * numRows), [&](const OrdinalType& iTemp) { + OrdinalType iRow, iMatrix; + getIndices(iTemp, numRows, numMatrices, iRow, iMatrix); + V_0(iMatrix, iRow) = W(iMatrix, iRow) * tmp(iMatrix); + }); int status = 1; // int number_not_converged = 0; @@ -151,20 +139,14 @@ KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke( member.team_barrier(); if (handle.get_ortho_strategy() == 0) { - auto V_old = Kokkos::subview( - V_view, Kokkos::ALL, Kokkos::make_pair(0, (int)j + 1), Kokkos::ALL); - auto H_old = Kokkos::subview(H_view, Kokkos::ALL, j, - Kokkos::make_pair(0, (int)j + 1)); + auto V_old = Kokkos::subview(V_view, Kokkos::ALL, Kokkos::make_pair(0, (int)j + 1), Kokkos::ALL); + auto H_old = Kokkos::subview(H_view, Kokkos::ALL, j, Kokkos::make_pair(0, (int)j + 1)); // Inner products - TeamVectorGemv::invoke(member, 1, V_old, W, 0, - H_old); + TeamVectorGemv::invoke(member, 1, V_old, W, 0, H_old); member.team_barrier(); // Update - TeamVectorGemv::invoke(member, -1, V_old, H_old, 1, - W); + TeamVectorGemv::invoke(member, -1, V_old, H_old, 1, W); member.team_barrier(); // Finish writing to W } if (handle.get_ortho_strategy() == 1) { @@ -172,12 +154,10 @@ KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke( auto V_i = Kokkos::subview(V_view, Kokkos::ALL, i, Kokkos::ALL); TeamVectorDot::invoke(member, W, V_i, tmp); member.team_barrier(); - TeamVectorCopy1D::invoke(member, tmp, - Kokkos::subview(H_view, Kokkos::ALL, j, i)); + TeamVectorCopy1D::invoke(member, tmp, Kokkos::subview(H_view, Kokkos::ALL, j, i)); member.team_barrier(); - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, 0, numMatrices), - [&](const OrdinalType& ii) { tmp(ii) = -tmp(ii); }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), + [&](const OrdinalType& ii) { tmp(ii) = -tmp(ii); }); member.team_barrier(); // Finish writing to tmp @@ -188,82 +168,71 @@ KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke( TeamVectorDot::invoke(member, W, W, tmp); member.team_barrier(); - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), - [&](const OrdinalType& i) { - H_view(i, j, j + 1) = ATM::sqrt(tmp(i)); - tmp(i) = H_view(i, j, j + 1) > max_tolerance - ? 1. / H_view(i, j, j + 1) - : 0.; - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), [&](const OrdinalType& i) { + H_view(i, j, j + 1) = ATM::sqrt(tmp(i)); + tmp(i) = H_view(i, j, j + 1) > max_tolerance ? 1. / H_view(i, j, j + 1) : 0.; + }); member.team_barrier(); if (j + 1 < maximum_iteration) { auto V_n = Kokkos::subview(V_view, Kokkos::ALL, j + 1, Kokkos::ALL); - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, 0, numMatrices * numRows), - [&](const OrdinalType& iTemp) { - OrdinalType iRow, iMatrix; - getIndices( - iTemp, numRows, numMatrices, iRow, iMatrix); - V_n(iMatrix, iRow) = W(iMatrix, iRow) * tmp(iMatrix); - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices * numRows), [&](const OrdinalType& iTemp) { + OrdinalType iRow, iMatrix; + getIndices(iTemp, numRows, numMatrices, iRow, iMatrix); + V_n(iMatrix, iRow) = W(iMatrix, iRow) * tmp(iMatrix); + }); member.team_barrier(); } - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, 0, numMatrices), - [&](const OrdinalType& l) { - // Apply the previous Givens rotations: - auto H_j = Kokkos::subview(H_view, l, j, Kokkos::ALL); - auto Givens_0_l = Kokkos::subview(Givens_view, l, Kokkos::ALL, 0); - auto Givens_1_l = Kokkos::subview(Givens_view, l, Kokkos::ALL, 1); - - if (mask(l) == 1.) { - for (size_t i = 0; i < j; ++i) { - auto tmp1 = Givens_0_l(i) * H_j(i) + Givens_1_l(i) * H_j(i + 1); - auto tmp2 = -Givens_1_l(i) * H_j(i) + Givens_0_l(i) * H_j(i + 1); - H_j(i) = tmp1; - H_j(i + 1) = tmp2; - } - - // Compute the new Givens rotation: - Kokkos::pair - G_new(1, 0); - typename VectorViewType::non_const_value_type alpha = 0; - SerialGivensInternal::invoke(H_j(j), H_j(j + 1), &G_new, &alpha); - - Givens_0_l(j) = G_new.first; - Givens_1_l(j) = G_new.second; - - // Apply the new Givens rotation: - auto tmp1 = Givens_0_l(j) * H_j(j) + Givens_1_l(j) * H_j(j + 1); - auto tmp2 = -Givens_1_l(j) * H_j(j) + Givens_0_l(j) * H_j(j + 1); - H_j(j) = tmp1; - H_j(j + 1) = tmp2; - - G(l, j + 1) = -Givens_1_l(j) * G(l, j); - G(l, j) *= Givens_0_l(j); - } else { - H_j(j) = 1.; - G(l, j + 1) = 0.; - } - - auto res_norm = - Kokkos::ArithTraits::abs(G(l, j + 1)) / G(l, 0); - - handle.set_norm(member.league_rank(), l, j + 1, res_norm); - - if (mask(l) == 1. && res_norm < tolerance) { - mask(l) = 0.; - G(l, j + 1) = 0.; - handle.set_iteration(member.league_rank(), l, j + 1); - } - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), [&](const OrdinalType& l) { + // Apply the previous Givens rotations: + auto H_j = Kokkos::subview(H_view, l, j, Kokkos::ALL); + auto Givens_0_l = Kokkos::subview(Givens_view, l, Kokkos::ALL, 0); + auto Givens_1_l = Kokkos::subview(Givens_view, l, Kokkos::ALL, 1); + + if (mask(l) == 1.) { + for (size_t i = 0; i < j; ++i) { + auto tmp1 = Givens_0_l(i) * H_j(i) + Givens_1_l(i) * H_j(i + 1); + auto tmp2 = -Givens_1_l(i) * H_j(i) + Givens_0_l(i) * H_j(i + 1); + H_j(i) = tmp1; + H_j(i + 1) = tmp2; + } + + // Compute the new Givens rotation: + Kokkos::pair + G_new(1, 0); + typename VectorViewType::non_const_value_type alpha = 0; + SerialGivensInternal::invoke(H_j(j), H_j(j + 1), &G_new, &alpha); + + Givens_0_l(j) = G_new.first; + Givens_1_l(j) = G_new.second; + + // Apply the new Givens rotation: + auto tmp1 = Givens_0_l(j) * H_j(j) + Givens_1_l(j) * H_j(j + 1); + auto tmp2 = -Givens_1_l(j) * H_j(j) + Givens_0_l(j) * H_j(j + 1); + H_j(j) = tmp1; + H_j(j + 1) = tmp2; + + G(l, j + 1) = -Givens_1_l(j) * G(l, j); + G(l, j) *= Givens_0_l(j); + } else { + H_j(j) = 1.; + G(l, j + 1) = 0.; + } + + auto res_norm = Kokkos::ArithTraits::abs(G(l, j + 1)) / G(l, 0); + + handle.set_norm(member.league_rank(), l, j + 1, res_norm); + + if (mask(l) == 1. && res_norm < tolerance) { + mask(l) = 0.; + G(l, j + 1) = 0.; + handle.set_iteration(member.league_rank(), l, j + 1); + } + }); member.team_barrier(); bool all_converged = true; - for (OrdinalType l = 0; l < numMatrices; ++l) - all_converged = (all_converged && mask(l) == 0.); + for (OrdinalType l = 0; l < numMatrices; ++l) all_converged = (all_converged && mask(l) == 0.); if (all_converged) { maximum_iteration = j + 1; break; @@ -274,30 +243,25 @@ KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke( auto first_indices = Kokkos::make_pair(0, (int)maximum_iteration); - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, 0, numMatrices), - [&](const OrdinalType& l) { - auto A_l = Kokkos::subview(H_view, l, first_indices, first_indices); - auto B_l = Kokkos::subview(G, l, first_indices); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), [&](const OrdinalType& l) { + auto A_l = Kokkos::subview(H_view, l, first_indices, first_indices); + auto B_l = Kokkos::subview(G, l, first_indices); - SerialTrsm::invoke(1, A_l, B_l); - }); + SerialTrsm::invoke(1, A_l, B_l); + }); member.team_barrier(); // Finish writing to G if (handle.get_ortho_strategy() == 0) { TeamVectorGemv::invoke( - member, 1, - Kokkos::subview(V_view, Kokkos::ALL, first_indices, Kokkos::ALL), + member, 1, Kokkos::subview(V_view, Kokkos::ALL, first_indices, Kokkos::ALL), Kokkos::subview(G, Kokkos::ALL, first_indices), 1, _X); member.team_barrier(); // Finish writing to _X } if (handle.get_ortho_strategy() == 1) { for (size_t j = 0; j < maximum_iteration; ++j) { - TeamVectorAxpy::invoke( - member, Kokkos::subview(G, Kokkos::ALL, j), - Kokkos::subview(V_view, Kokkos::ALL, j, Kokkos::ALL), _X); + TeamVectorAxpy::invoke(member, Kokkos::subview(G, Kokkos::ALL, j), + Kokkos::subview(V_view, Kokkos::ALL, j, Kokkos::ALL), _X); member.team_barrier(); // Finish writing to _X } } @@ -305,128 +269,105 @@ KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke( if (handle.get_compute_last_residual()) { TeamVectorCopy::invoke(member, _B, W); member.team_barrier(); - A.template apply(member, _X, W, -1, - 1); + A.template apply(member, _X, W, -1, 1); member.team_barrier(); P.template apply(member, W, W); member.team_barrier(); TeamVectorDot::invoke(member, W, W, tmp); member.team_barrier(); - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), - [&](const OrdinalType& i) { - tmp(i) = ATM::sqrt(tmp(i)); - handle.set_last_norm(member.league_rank(), i, - tmp(i)); - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), [&](const OrdinalType& i) { + tmp(i) = ATM::sqrt(tmp(i)); + handle.set_last_norm(member.league_rank(), i, tmp(i)); + }); } return status; } template -template -KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke( - const MemberType& member, const OperatorType& A, const VectorViewType& _B, - const VectorViewType& _X, const PrecOperatorType& P, - const KrylovHandleType& handle) { +template +KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke(const MemberType& member, const OperatorType& A, + const VectorViewType& _B, const VectorViewType& _X, + const PrecOperatorType& P, + const KrylovHandleType& handle) { const int strategy = handle.get_memory_strategy(); if (strategy == 0) { const int first_matrix = handle.first_index(member.league_rank()); const int last_matrix = handle.last_index(member.league_rank()); - auto _ArnoldiView = Kokkos::subview( - handle.Arnoldi_view, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL, Kokkos::ALL); + auto _ArnoldiView = + Kokkos::subview(handle.Arnoldi_view, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL, Kokkos::ALL); const int numMatrices = _X.extent(0); const int numRows = _X.extent(1); - size_t maximum_iteration = handle.get_max_iteration() < numRows - ? handle.get_max_iteration() - : numRows; + size_t maximum_iteration = handle.get_max_iteration() < numRows ? handle.get_max_iteration() : numRows; int n_G = maximum_iteration + 1; int n_W = numRows; int n_mask = 1; int n_tmp = 1; - using ScratchPadVectorViewType = Kokkos::View< - typename VectorViewType::non_const_value_type**, - typename VectorViewType::array_layout, - typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadVectorViewType = + Kokkos::View; - ScratchPadVectorViewType _TMPView( - member.team_scratch(handle.get_scratch_pad_level()), numMatrices, - n_G + n_W + n_mask + n_tmp); + ScratchPadVectorViewType _TMPView(member.team_scratch(handle.get_scratch_pad_level()), numMatrices, + n_G + n_W + n_mask + n_tmp); - return invoke(member, A, _B, _X, P, handle, _ArnoldiView, - _TMPView); + return invoke(member, A, _B, _X, P, handle, + _ArnoldiView, _TMPView); } if (strategy == 1) { const int first_matrix = handle.first_index(member.league_rank()); const int last_matrix = handle.last_index(member.league_rank()); - auto _ArnoldiView = Kokkos::subview( - handle.Arnoldi_view, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL, Kokkos::ALL); + auto _ArnoldiView = + Kokkos::subview(handle.Arnoldi_view, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL, Kokkos::ALL); - auto _TMPView = Kokkos::subview( - handle.tmp_view, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + auto _TMPView = Kokkos::subview(handle.tmp_view, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - return invoke(member, A, _B, _X, P, handle, _ArnoldiView, - _TMPView); + return invoke(member, A, _B, _X, P, handle, + _ArnoldiView, _TMPView); } if (strategy == 2) { - using ScratchPadArnoldiViewType = Kokkos::View< - typename VectorViewType::non_const_value_type***, - typename VectorViewType::array_layout, - typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadArnoldiViewType = + Kokkos::View; - using ScratchPadVectorViewType = Kokkos::View< - typename VectorViewType::non_const_value_type**, - typename VectorViewType::array_layout, - typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadVectorViewType = + Kokkos::View; const int numMatrices = _X.extent(0); const int numRows = _X.extent(1); - size_t maximum_iteration = handle.get_max_iteration() < numRows - ? handle.get_max_iteration() - : numRows; + size_t maximum_iteration = handle.get_max_iteration() < numRows ? handle.get_max_iteration() : numRows; int n_G = maximum_iteration + 1; int n_W = numRows; int n_mask = 1; int n_tmp = 1; - ScratchPadArnoldiViewType _ArnoldiView( - member.team_scratch(handle.get_scratch_pad_level()), numMatrices, - maximum_iteration, numRows + maximum_iteration + 3); + ScratchPadArnoldiViewType _ArnoldiView(member.team_scratch(handle.get_scratch_pad_level()), numMatrices, + maximum_iteration, numRows + maximum_iteration + 3); - ScratchPadVectorViewType _TMPView( - member.team_scratch(handle.get_scratch_pad_level()), numMatrices, - n_G + n_W + n_mask + n_tmp); + ScratchPadVectorViewType _TMPView(member.team_scratch(handle.get_scratch_pad_level()), numMatrices, + n_G + n_W + n_mask + n_tmp); - return invoke(member, A, _B, _X, P, handle, _ArnoldiView, - _TMPView); + return invoke(member, A, _B, _X, P, handle, + _ArnoldiView, _TMPView); } return 0; } template -template -KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke( - const MemberType& member, const OperatorType& A, const VectorViewType& _B, - const VectorViewType& _X, const KrylovHandleType& handle) { +template +KOKKOS_INLINE_FUNCTION int TeamVectorGMRES::invoke(const MemberType& member, const OperatorType& A, + const VectorViewType& _B, const VectorViewType& _X, + const KrylovHandleType& handle) { Identity P; - return invoke(member, A, _B, _X, P, - handle); + return invoke(member, A, _B, _X, P, handle); } } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_GMRES_Team_Impl.hpp b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_GMRES_Team_Impl.hpp index bb8f446f07c8..9fd9e09bd90c 100644 --- a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_GMRES_Team_Impl.hpp +++ b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_GMRES_Team_Impl.hpp @@ -38,17 +38,15 @@ namespace KokkosBatched { /// template -template -KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke( - const MemberType& member, const OperatorType& A, const VectorViewType& _B, - const VectorViewType& _X, const PrecOperatorType& P, - const KrylovHandleType& handle, const ArnoldiViewType& _ArnoldiView, - const TMPViewType& _TMPView) { +KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke(const MemberType& member, const OperatorType& A, + const VectorViewType& _B, const VectorViewType& _X, + const PrecOperatorType& P, const KrylovHandleType& handle, + const ArnoldiViewType& _ArnoldiView, + const TMPViewType& _TMPView) { typedef int OrdinalType; - typedef typename Kokkos::ArithTraits< - typename VectorViewType::non_const_value_type>::mag_type MagnitudeType; + typedef typename Kokkos::ArithTraits::mag_type MagnitudeType; typedef Kokkos::ArithTraits ATM; using TeamCopy1D = TeamCopy; @@ -56,9 +54,7 @@ KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke( const OrdinalType numMatrices = _X.extent(0); const OrdinalType numRows = _X.extent(1); - size_t maximum_iteration = handle.get_max_iteration() < numRows - ? handle.get_max_iteration() - : numRows; + size_t maximum_iteration = handle.get_max_iteration() < numRows ? handle.get_max_iteration() : numRows; const MagnitudeType tolerance = handle.get_tolerance(); const MagnitudeType max_tolerance = handle.get_max_tolerance(); @@ -70,13 +66,10 @@ KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke( int offset_H = offset_V + n_V; int offset_Givens = offset_H + n_H; - auto V_view = Kokkos::subview(_ArnoldiView, Kokkos::ALL, Kokkos::ALL, - Kokkos::make_pair(offset_V, offset_V + n_V)); - auto H_view = Kokkos::subview(_ArnoldiView, Kokkos::ALL, Kokkos::ALL, - Kokkos::make_pair(offset_H, offset_H + n_H)); - auto Givens_view = Kokkos::subview( - _ArnoldiView, Kokkos::ALL, Kokkos::ALL, - Kokkos::make_pair(offset_Givens, offset_Givens + n_Givens)); + auto V_view = Kokkos::subview(_ArnoldiView, Kokkos::ALL, Kokkos::ALL, Kokkos::make_pair(offset_V, offset_V + n_V)); + auto H_view = Kokkos::subview(_ArnoldiView, Kokkos::ALL, Kokkos::ALL, Kokkos::make_pair(offset_H, offset_H + n_H)); + auto Givens_view = Kokkos::subview(_ArnoldiView, Kokkos::ALL, Kokkos::ALL, + Kokkos::make_pair(offset_Givens, offset_Givens + n_Givens)); int n_G = maximum_iteration + 1; int n_W = numRows; @@ -87,10 +80,8 @@ KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke( int offset_mask = offset_W + n_W; int offset_tmp = offset_mask + n_mask; - auto G = Kokkos::subview(_TMPView, Kokkos::ALL, - Kokkos::make_pair(offset_G, offset_G + n_G)); - auto W = Kokkos::subview(_TMPView, Kokkos::ALL, - Kokkos::make_pair(offset_W, offset_W + n_W)); + auto G = Kokkos::subview(_TMPView, Kokkos::ALL, Kokkos::make_pair(offset_G, offset_G + n_G)); + auto W = Kokkos::subview(_TMPView, Kokkos::ALL, Kokkos::make_pair(offset_W, offset_W + n_W)); auto mask = Kokkos::subview(_TMPView, Kokkos::ALL, offset_mask); auto tmp = Kokkos::subview(_TMPView, Kokkos::ALL, offset_tmp); @@ -108,33 +99,29 @@ KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke( TeamDot::invoke(member, W, W, tmp); member.team_barrier(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), - [&](const OrdinalType& i) { - tmp(i) = ATM::sqrt(tmp(i)); - handle.set_norm(member.league_rank(), i, 0, tmp(i)); - if (tmp(i) > max_tolerance) { - mask(i) = 1; - G(i, 0) = tmp(i); - tmp(i) = 1. / tmp(i); - } else { - handle.set_iteration(member.league_rank(), i, 0); - mask(i) = 0; - G(i, 0) = 0.; - tmp(i) = 0.; - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), [&](const OrdinalType& i) { + tmp(i) = ATM::sqrt(tmp(i)); + handle.set_norm(member.league_rank(), i, 0, tmp(i)); + if (tmp(i) > max_tolerance) { + mask(i) = 1; + G(i, 0) = tmp(i); + tmp(i) = 1. / tmp(i); + } else { + handle.set_iteration(member.league_rank(), i, 0); + mask(i) = 0; + G(i, 0) = 0.; + tmp(i) = 0.; + } + }); member.team_barrier(); // Finish writing to tmp auto V_0 = Kokkos::subview(V_view, Kokkos::ALL, 0, Kokkos::ALL); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, numMatrices * numRows), - [&](const OrdinalType& iTemp) { - OrdinalType iRow, iMatrix; - getIndices( - iTemp, numRows, numMatrices, iRow, iMatrix); - V_0(iMatrix, iRow) = W(iMatrix, iRow) * tmp(iMatrix); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices * numRows), [&](const OrdinalType& iTemp) { + OrdinalType iRow, iMatrix; + getIndices(iTemp, numRows, numMatrices, iRow, iMatrix); + V_0(iMatrix, iRow) = W(iMatrix, iRow) * tmp(iMatrix); + }); int status = 1; // int number_not_converged = 0; @@ -150,18 +137,14 @@ KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke( member.team_barrier(); if (handle.get_ortho_strategy() == 0) { - auto V_old = Kokkos::subview( - V_view, Kokkos::ALL, Kokkos::make_pair(0, (int)j + 1), Kokkos::ALL); - auto H_old = Kokkos::subview(H_view, Kokkos::ALL, j, - Kokkos::make_pair(0, (int)j + 1)); + auto V_old = Kokkos::subview(V_view, Kokkos::ALL, Kokkos::make_pair(0, (int)j + 1), Kokkos::ALL); + auto H_old = Kokkos::subview(H_view, Kokkos::ALL, j, Kokkos::make_pair(0, (int)j + 1)); // Inner products - TeamGemv::invoke( - member, 1, V_old, W, 0, H_old); + TeamGemv::invoke(member, 1, V_old, W, 0, H_old); member.team_barrier(); // Update - TeamGemv::invoke( - member, -1, V_old, H_old, 1, W); + TeamGemv::invoke(member, -1, V_old, H_old, 1, W); member.team_barrier(); // Finish writing to W } if (handle.get_ortho_strategy() == 1) { @@ -169,12 +152,10 @@ KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke( auto V_i = Kokkos::subview(V_view, Kokkos::ALL, i, Kokkos::ALL); TeamDot::invoke(member, W, V_i, tmp); member.team_barrier(); - TeamCopy1D::invoke(member, tmp, - Kokkos::subview(H_view, Kokkos::ALL, j, i)); + TeamCopy1D::invoke(member, tmp, Kokkos::subview(H_view, Kokkos::ALL, j, i)); member.team_barrier(); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, numMatrices), - [&](const OrdinalType& ii) { tmp(ii) = -tmp(ii); }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), + [&](const OrdinalType& ii) { tmp(ii) = -tmp(ii); }); member.team_barrier(); // Finish writing to tmp @@ -185,82 +166,71 @@ KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke( TeamDot::invoke(member, W, W, tmp); member.team_barrier(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), - [&](const OrdinalType& i) { - H_view(i, j, j + 1) = ATM::sqrt(tmp(i)); - tmp(i) = H_view(i, j, j + 1) > max_tolerance - ? 1. / H_view(i, j, j + 1) - : 0.; - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), [&](const OrdinalType& i) { + H_view(i, j, j + 1) = ATM::sqrt(tmp(i)); + tmp(i) = H_view(i, j, j + 1) > max_tolerance ? 1. / H_view(i, j, j + 1) : 0.; + }); member.team_barrier(); if (j + 1 < maximum_iteration) { auto V_n = Kokkos::subview(V_view, Kokkos::ALL, j + 1, Kokkos::ALL); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, numMatrices * numRows), - [&](const OrdinalType& iTemp) { - OrdinalType iRow, iMatrix; - getIndices( - iTemp, numRows, numMatrices, iRow, iMatrix); - V_n(iMatrix, iRow) = W(iMatrix, iRow) * tmp(iMatrix); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices * numRows), [&](const OrdinalType& iTemp) { + OrdinalType iRow, iMatrix; + getIndices(iTemp, numRows, numMatrices, iRow, iMatrix); + V_n(iMatrix, iRow) = W(iMatrix, iRow) * tmp(iMatrix); + }); member.team_barrier(); } - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, numMatrices), - [&](const OrdinalType& l) { - // Apply the previous Givens rotations: - auto H_j = Kokkos::subview(H_view, l, j, Kokkos::ALL); - auto Givens_0_l = Kokkos::subview(Givens_view, l, Kokkos::ALL, 0); - auto Givens_1_l = Kokkos::subview(Givens_view, l, Kokkos::ALL, 1); - - if (mask(l) == 1.) { - for (size_t i = 0; i < j; ++i) { - auto tmp1 = Givens_0_l(i) * H_j(i) + Givens_1_l(i) * H_j(i + 1); - auto tmp2 = -Givens_1_l(i) * H_j(i) + Givens_0_l(i) * H_j(i + 1); - H_j(i) = tmp1; - H_j(i + 1) = tmp2; - } - - // Compute the new Givens rotation: - Kokkos::pair - G_new(1, 0); - typename VectorViewType::non_const_value_type alpha = 0; - SerialGivensInternal::invoke(H_j(j), H_j(j + 1), &G_new, &alpha); - - Givens_0_l(j) = G_new.first; - Givens_1_l(j) = G_new.second; - - // Apply the new Givens rotation: - auto tmp1 = Givens_0_l(j) * H_j(j) + Givens_1_l(j) * H_j(j + 1); - auto tmp2 = -Givens_1_l(j) * H_j(j) + Givens_0_l(j) * H_j(j + 1); - H_j(j) = tmp1; - H_j(j + 1) = tmp2; - - G(l, j + 1) = -Givens_1_l(j) * G(l, j); - G(l, j) *= Givens_0_l(j); - } else { - H_j(j) = 1.; - G(l, j + 1) = 0.; - } - - auto res_norm = - Kokkos::ArithTraits::abs(G(l, j + 1)) / G(l, 0); - - handle.set_norm(member.league_rank(), l, j + 1, res_norm); - - if (mask(l) == 1. && res_norm < tolerance) { - mask(l) = 0.; - G(l, j + 1) = 0.; - handle.set_iteration(member.league_rank(), l, j + 1); - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), [&](const OrdinalType& l) { + // Apply the previous Givens rotations: + auto H_j = Kokkos::subview(H_view, l, j, Kokkos::ALL); + auto Givens_0_l = Kokkos::subview(Givens_view, l, Kokkos::ALL, 0); + auto Givens_1_l = Kokkos::subview(Givens_view, l, Kokkos::ALL, 1); + + if (mask(l) == 1.) { + for (size_t i = 0; i < j; ++i) { + auto tmp1 = Givens_0_l(i) * H_j(i) + Givens_1_l(i) * H_j(i + 1); + auto tmp2 = -Givens_1_l(i) * H_j(i) + Givens_0_l(i) * H_j(i + 1); + H_j(i) = tmp1; + H_j(i + 1) = tmp2; + } + + // Compute the new Givens rotation: + Kokkos::pair + G_new(1, 0); + typename VectorViewType::non_const_value_type alpha = 0; + SerialGivensInternal::invoke(H_j(j), H_j(j + 1), &G_new, &alpha); + + Givens_0_l(j) = G_new.first; + Givens_1_l(j) = G_new.second; + + // Apply the new Givens rotation: + auto tmp1 = Givens_0_l(j) * H_j(j) + Givens_1_l(j) * H_j(j + 1); + auto tmp2 = -Givens_1_l(j) * H_j(j) + Givens_0_l(j) * H_j(j + 1); + H_j(j) = tmp1; + H_j(j + 1) = tmp2; + + G(l, j + 1) = -Givens_1_l(j) * G(l, j); + G(l, j) *= Givens_0_l(j); + } else { + H_j(j) = 1.; + G(l, j + 1) = 0.; + } + + auto res_norm = Kokkos::ArithTraits::abs(G(l, j + 1)) / G(l, 0); + + handle.set_norm(member.league_rank(), l, j + 1, res_norm); + + if (mask(l) == 1. && res_norm < tolerance) { + mask(l) = 0.; + G(l, j + 1) = 0.; + handle.set_iteration(member.league_rank(), l, j + 1); + } + }); member.team_barrier(); bool all_converged = true; - for (OrdinalType l = 0; l < numMatrices; ++l) - all_converged = (all_converged && mask(l) == 0.); + for (OrdinalType l = 0; l < numMatrices; ++l) all_converged = (all_converged && mask(l) == 0.); if (all_converged) { maximum_iteration = j + 1; break; @@ -271,30 +241,25 @@ KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke( auto first_indices = Kokkos::make_pair(0, (int)maximum_iteration); - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, 0, numMatrices), - [&](const OrdinalType& l) { - auto A_l = Kokkos::subview(H_view, l, first_indices, first_indices); - auto B_l = Kokkos::subview(G, l, first_indices); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), [&](const OrdinalType& l) { + auto A_l = Kokkos::subview(H_view, l, first_indices, first_indices); + auto B_l = Kokkos::subview(G, l, first_indices); - SerialTrsm::invoke(1, A_l, B_l); - }); + SerialTrsm::invoke(1, A_l, B_l); + }); member.team_barrier(); // Finish writing to G if (handle.get_ortho_strategy() == 0) { TeamGemv::invoke( - member, 1, - Kokkos::subview(V_view, Kokkos::ALL, first_indices, Kokkos::ALL), + member, 1, Kokkos::subview(V_view, Kokkos::ALL, first_indices, Kokkos::ALL), Kokkos::subview(G, Kokkos::ALL, first_indices), 1, _X); member.team_barrier(); // Finish writing to _X } if (handle.get_ortho_strategy() == 1) { for (size_t j = 0; j < maximum_iteration; ++j) { - TeamAxpy::invoke( - member, Kokkos::subview(G, Kokkos::ALL, j), - Kokkos::subview(V_view, Kokkos::ALL, j, Kokkos::ALL), _X); + TeamAxpy::invoke(member, Kokkos::subview(G, Kokkos::ALL, j), + Kokkos::subview(V_view, Kokkos::ALL, j, Kokkos::ALL), _X); member.team_barrier(); // Finish writing to _X } } @@ -309,120 +274,97 @@ KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke( TeamDot::invoke(member, W, W, tmp); member.team_barrier(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), - [&](const OrdinalType& i) { - tmp(i) = ATM::sqrt(tmp(i)); - handle.set_last_norm(member.league_rank(), i, - tmp(i)); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), [&](const OrdinalType& i) { + tmp(i) = ATM::sqrt(tmp(i)); + handle.set_last_norm(member.league_rank(), i, tmp(i)); + }); } return status; } template -template -KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke( - const MemberType& member, const OperatorType& A, const VectorViewType& _B, - const VectorViewType& _X, const PrecOperatorType& P, - const KrylovHandleType& handle) { +template +KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke(const MemberType& member, const OperatorType& A, + const VectorViewType& _B, const VectorViewType& _X, + const PrecOperatorType& P, const KrylovHandleType& handle) { const int strategy = handle.get_memory_strategy(); if (strategy == 0) { const int first_matrix = handle.first_index(member.league_rank()); const int last_matrix = handle.last_index(member.league_rank()); - auto _ArnoldiView = Kokkos::subview( - handle.Arnoldi_view, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL, Kokkos::ALL); + auto _ArnoldiView = + Kokkos::subview(handle.Arnoldi_view, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL, Kokkos::ALL); const int numMatrices = _X.extent(0); const int numRows = _X.extent(1); - size_t maximum_iteration = handle.get_max_iteration() < numRows - ? handle.get_max_iteration() - : numRows; + size_t maximum_iteration = handle.get_max_iteration() < numRows ? handle.get_max_iteration() : numRows; int n_G = maximum_iteration + 1; int n_W = numRows; int n_mask = 1; int n_tmp = 1; - using ScratchPadVectorViewType = Kokkos::View< - typename VectorViewType::non_const_value_type**, - typename VectorViewType::array_layout, - typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadVectorViewType = + Kokkos::View; - ScratchPadVectorViewType _TMPView( - member.team_scratch(handle.get_scratch_pad_level()), numMatrices, - n_G + n_W + n_mask + n_tmp); + ScratchPadVectorViewType _TMPView(member.team_scratch(handle.get_scratch_pad_level()), numMatrices, + n_G + n_W + n_mask + n_tmp); - return invoke(member, A, _B, _X, P, handle, _ArnoldiView, - _TMPView); + return invoke(member, A, _B, _X, P, handle, + _ArnoldiView, _TMPView); } if (strategy == 1) { const int first_matrix = handle.first_index(member.league_rank()); const int last_matrix = handle.last_index(member.league_rank()); - auto _ArnoldiView = Kokkos::subview( - handle.Arnoldi_view, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL, Kokkos::ALL); + auto _ArnoldiView = + Kokkos::subview(handle.Arnoldi_view, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL, Kokkos::ALL); - auto _TMPView = Kokkos::subview( - handle.tmp_view, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + auto _TMPView = Kokkos::subview(handle.tmp_view, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - return invoke(member, A, _B, _X, P, handle, _ArnoldiView, - _TMPView); + return invoke(member, A, _B, _X, P, handle, + _ArnoldiView, _TMPView); } if (strategy == 2) { - using ScratchPadArnoldiViewType = Kokkos::View< - typename VectorViewType::non_const_value_type***, - typename VectorViewType::array_layout, - typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadArnoldiViewType = + Kokkos::View; - using ScratchPadVectorViewType = Kokkos::View< - typename VectorViewType::non_const_value_type**, - typename VectorViewType::array_layout, - typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadVectorViewType = + Kokkos::View; const int numMatrices = _X.extent(0); const int numRows = _X.extent(1); - size_t maximum_iteration = handle.get_max_iteration() < numRows - ? handle.get_max_iteration() - : numRows; + size_t maximum_iteration = handle.get_max_iteration() < numRows ? handle.get_max_iteration() : numRows; int n_G = maximum_iteration + 1; int n_W = numRows; int n_mask = 1; int n_tmp = 1; - ScratchPadArnoldiViewType _ArnoldiView( - member.team_scratch(handle.get_scratch_pad_level()), numMatrices, - maximum_iteration, numRows + maximum_iteration + 3); + ScratchPadArnoldiViewType _ArnoldiView(member.team_scratch(handle.get_scratch_pad_level()), numMatrices, + maximum_iteration, numRows + maximum_iteration + 3); - ScratchPadVectorViewType _TMPView( - member.team_scratch(handle.get_scratch_pad_level()), numMatrices, - n_G + n_W + n_mask + n_tmp); + ScratchPadVectorViewType _TMPView(member.team_scratch(handle.get_scratch_pad_level()), numMatrices, + n_G + n_W + n_mask + n_tmp); - return invoke(member, A, _B, _X, P, handle, _ArnoldiView, - _TMPView); + return invoke(member, A, _B, _X, P, handle, + _ArnoldiView, _TMPView); } return 0; } template -template -KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke( - const MemberType& member, const OperatorType& A, const VectorViewType& _B, - const VectorViewType& _X, const KrylovHandleType& handle) { +template +KOKKOS_INLINE_FUNCTION int TeamGMRES::invoke(const MemberType& member, const OperatorType& A, + const VectorViewType& _B, const VectorViewType& _X, + const KrylovHandleType& handle) { Identity P; - return invoke(member, A, _B, _X, P, - handle); + return invoke(member, A, _B, _X, P, handle); } } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_Spmv_Serial_Impl.hpp b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_Spmv_Serial_Impl.hpp index b96dc79a8008..3f76ee3d9fbe 100644 --- a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_Spmv_Serial_Impl.hpp +++ b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_Spmv_Serial_Impl.hpp @@ -26,35 +26,24 @@ namespace KokkosBatched { /// Serial Internal Impl /// ==================== struct SerialSpmvInternal { - template + template KOKKOS_INLINE_FUNCTION static int invoke( - const OrdinalType numMatrices, const OrdinalType numRows, - const ScalarType* KOKKOS_RESTRICT alpha, const OrdinalType alphas0, - const ValueType* KOKKOS_RESTRICT values, const OrdinalType valuess0, - const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, - const OrdinalType row_ptrs0, - const OrdinalType* KOKKOS_RESTRICT colIndices, - const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, - const OrdinalType xs0, const OrdinalType xs1, - const ScalarType* KOKKOS_RESTRICT beta, const OrdinalType betas0, - /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, - const OrdinalType ys1) { + const OrdinalType numMatrices, const OrdinalType numRows, const ScalarType* KOKKOS_RESTRICT alpha, + const OrdinalType alphas0, const ValueType* KOKKOS_RESTRICT values, const OrdinalType valuess0, + const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, const OrdinalType row_ptrs0, + const OrdinalType* KOKKOS_RESTRICT colIndices, const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, + const OrdinalType xs0, const OrdinalType xs1, const ScalarType* KOKKOS_RESTRICT beta, const OrdinalType betas0, + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, const OrdinalType ys1) { for (OrdinalType iMatrix = 0; iMatrix < numMatrices; ++iMatrix) { for (OrdinalType iRow = 0; iRow < numRows; ++iRow) { - const OrdinalType rowLength = - row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; - ValueType sum = 0; + const OrdinalType rowLength = row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; + ValueType sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { - sum += values[iMatrix * valuess0 + - (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * - X[iMatrix * xs0 + - colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * - colIndicess0] * - xs1]; + sum += values[iMatrix * valuess0 + (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * + X[iMatrix * xs0 + colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * colIndicess0] * xs1]; } sum *= alpha[iMatrix * alphas0]; @@ -62,8 +51,7 @@ struct SerialSpmvInternal { if (dobeta == 0) { Y[iMatrix * ys0 + iRow * ys1] = sum; } else { - Y[iMatrix * ys0 + iRow * ys1] = - beta[iMatrix * betas0] * Y[iMatrix * ys0 + iRow * ys1] + sum; + Y[iMatrix * ys0 + iRow * ys1] = beta[iMatrix * betas0] * Y[iMatrix * ys0 + iRow * ys1] + sum; } } } @@ -71,33 +59,26 @@ struct SerialSpmvInternal { return 0; } - template - KOKKOS_INLINE_FUNCTION static int invoke( - const OrdinalType numMatrices, const OrdinalType numRows, - const ScalarType alpha, const ValueType* KOKKOS_RESTRICT values, - const OrdinalType valuess0, const OrdinalType valuess1, - const OrdinalType* KOKKOS_RESTRICT row_ptr, const OrdinalType row_ptrs0, - const OrdinalType* KOKKOS_RESTRICT colIndices, - const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, - const OrdinalType xs0, const OrdinalType xs1, const ScalarType beta, - /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, - const OrdinalType ys1) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const OrdinalType numMatrices, const OrdinalType numRows, + const ScalarType alpha, const ValueType* KOKKOS_RESTRICT values, + const OrdinalType valuess0, const OrdinalType valuess1, + const OrdinalType* KOKKOS_RESTRICT row_ptr, const OrdinalType row_ptrs0, + const OrdinalType* KOKKOS_RESTRICT colIndices, + const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, + const OrdinalType xs0, const OrdinalType xs1, const ScalarType beta, + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, + const OrdinalType ys1) { for (OrdinalType iMatrix = 0; iMatrix < numMatrices; ++iMatrix) { for (OrdinalType iRow = 0; iRow < numRows; ++iRow) { - const OrdinalType rowLength = - row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; - ValueType sum = 0; + const OrdinalType rowLength = row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; + ValueType sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { - sum += values[iMatrix * valuess0 + - (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * - X[iMatrix * xs0 + - colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * - colIndicess0] * - xs1]; + sum += values[iMatrix * valuess0 + (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * + X[iMatrix * xs0 + colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * colIndicess0] * xs1]; } sum *= alpha; @@ -105,8 +86,7 @@ struct SerialSpmvInternal { if (dobeta == 0) { Y[iMatrix * ys0 + iRow * ys1] = sum; } else { - Y[iMatrix * ys0 + iRow * ys1] = - beta * Y[iMatrix * ys0 + iRow * ys1] + sum; + Y[iMatrix * ys0 + iRow * ys1] = beta * Y[iMatrix * ys0 + iRow * ys1] + sum; } } } @@ -117,253 +97,136 @@ struct SerialSpmvInternal { template <> struct SerialSpmv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const alphaViewType& alpha, const ValuesViewType& values, - const IntView& row_ptr, const IntView& colIndices, const xViewType& X, - const betaViewType& beta, const yViewType& Y) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const alphaViewType& alpha, const ValuesViewType& values, + const IntView& row_ptr, const IntView& colIndices, const xViewType& X, + const betaViewType& beta, const yViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: IntView is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: xViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: yViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: alphaViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: betaViewType is not a Kokkos::View."); - - static_assert(ValuesViewType::rank == 2, - "KokkosBatched::spmv: ValuesViewType must have rank 2."); - static_assert(IntView::rank == 1, - "KokkosBatched::spmv: IntView must have rank 2."); - static_assert(xViewType::rank == 2, - "KokkosBatched::spmv: xViewType must have rank 2."); - static_assert(yViewType::rank == 2, - "KokkosBatched::spmv: yViewType must have rank 2."); - static_assert(alphaViewType::rank == 1, - "KokkosBatched::spmv: alphaViewType must have rank 1."); - static_assert(betaViewType::rank == 1, - "KokkosBatched::spmv: betaViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: IntView is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: xViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: yViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: alphaViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: betaViewType is not a Kokkos::View."); + + static_assert(ValuesViewType::rank == 2, "KokkosBatched::spmv: ValuesViewType must have rank 2."); + static_assert(IntView::rank == 1, "KokkosBatched::spmv: IntView must have rank 2."); + static_assert(xViewType::rank == 2, "KokkosBatched::spmv: xViewType must have rank 2."); + static_assert(yViewType::rank == 2, "KokkosBatched::spmv: yViewType must have rank 2."); + static_assert(alphaViewType::rank == 1, "KokkosBatched::spmv: alphaViewType must have rank 1."); + static_assert(betaViewType::rank == 1, "KokkosBatched::spmv: betaViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " - "%d, Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " "%d, Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); return 1; } if (X.extent(0) != alpha.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: First dimension of X and alpha do not match: " - "X: %d x %d, alpha: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#else Kokkos::printf( "KokkosBatched::spmv: First dimension of X and alpha do not match: " "X: %d x %d, alpha: %d\n", (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#endif return 1; } if (X.extent(0) != beta.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: First dimension of X and beta do not match: X: " - "%d x %d, beta: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)beta.extent(0)); -#else Kokkos::printf( "KokkosBatched::spmv: First dimension of X and beta do not match: X: " "%d x %d, beta: %d\n", (int)X.extent(0), (int)X.extent(1), (int)beta.extent(0)); -#endif return 1; } if (X.extent(0) != values.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: First dimension of X and the first dimension " - "of values do not match: X: %d x %d, values: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), - (int)values.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: First dimension of X and the first dimension " "of values do not match: X: %d x %d, values: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), - (int)values.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), (int)values.extent(1)); return 1; } if (colIndices.extent(0) != values.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimension of colIndices and the second " - "dimension of values do not match: colIndices: %d , values: %d x " - "%d\n", - (int)colIndices.extent(0), (int)values.extent(0), - (int)values.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimension of colIndices and the second " "dimension of values do not match: colIndices: %d , values: %d x " "%d\n", - (int)colIndices.extent(0), (int)values.extent(0), - (int)values.extent(1)); -#endif + (int)colIndices.extent(0), (int)values.extent(0), (int)values.extent(1)); return 1; } if (row_ptr.extent(0) - 1 != X.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " - "of X do not match: colIndices (-1): %d , values: %d x %d\n", - (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " "of X do not match: colIndices (-1): %d , values: %d x %d\n", (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); -#endif return 1; } #endif return SerialSpmvInternal::template invoke< - typename alphaViewType::non_const_value_type, - typename ValuesViewType::non_const_value_type, - typename IntView::non_const_value_type, - typename ValuesViewType::array_layout, dobeta>( - X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), values.data(), - values.stride_0(), values.stride_1(), row_ptr.data(), - row_ptr.stride_0(), colIndices.data(), colIndices.stride_0(), X.data(), - X.stride_0(), X.stride_1(), beta.data(), beta.stride_0(), Y.data(), - Y.stride_0(), Y.stride_1()); + typename alphaViewType::non_const_value_type, typename ValuesViewType::non_const_value_type, + typename IntView::non_const_value_type, typename ValuesViewType::array_layout, dobeta>( + X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), values.data(), values.stride_0(), values.stride_1(), + row_ptr.data(), row_ptr.stride_0(), colIndices.data(), colIndices.stride_0(), X.data(), X.stride_0(), + X.stride_1(), beta.data(), beta.stride_0(), Y.data(), Y.stride_0(), Y.stride_1()); } - template + template KOKKOS_INLINE_FUNCTION static int invoke( - const typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type& alpha, - const ValuesViewType& values, const IntView& row_ptr, - const IntView& colIndices, const xViewType& X, - const typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type& beta, + const typename Kokkos::ArithTraits::mag_type& alpha, + const ValuesViewType& values, const IntView& row_ptr, const IntView& colIndices, const xViewType& X, + const typename Kokkos::ArithTraits::mag_type& beta, const yViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: IntView is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: xViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: yViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: IntView is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: xViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: yViewType is not a Kokkos::View."); - static_assert(ValuesViewType::rank == 2, - "KokkosBatched::spmv: ValuesViewType must have rank 2."); - static_assert(IntView::rank == 1, - "KokkosBatched::spmv: IntView must have rank 2."); - static_assert(xViewType::rank == 2, - "KokkosBatched::spmv: xViewType must have rank 2."); - static_assert(yViewType::rank == 2, - "KokkosBatched::spmv: yViewType must have rank 2."); + static_assert(ValuesViewType::rank == 2, "KokkosBatched::spmv: ValuesViewType must have rank 2."); + static_assert(IntView::rank == 1, "KokkosBatched::spmv: IntView must have rank 2."); + static_assert(xViewType::rank == 2, "KokkosBatched::spmv: xViewType must have rank 2."); + static_assert(yViewType::rank == 2, "KokkosBatched::spmv: yViewType must have rank 2."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " - "%d, Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " "%d, Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); return 1; } if (X.extent(0) != values.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: First dimension of X and the first dimension " - "of values do not match: X: %d x %d, values: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), - (int)values.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: First dimension of X and the first dimension " "of values do not match: X: %d x %d, values: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), - (int)values.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), (int)values.extent(1)); return 1; } if (colIndices.extent(0) != values.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimension of colIndices and the second " - "dimension of values do not match: colIndices: %d , values: %d x " - "%d\n", - (int)colIndices.extent(0), (int)values.extent(0), - (int)values.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimension of colIndices and the second " "dimension of values do not match: colIndices: %d , values: %d x " "%d\n", - (int)colIndices.extent(0), (int)values.extent(0), - (int)values.extent(1)); -#endif + (int)colIndices.extent(0), (int)values.extent(0), (int)values.extent(1)); return 1; } if (row_ptr.extent(0) - 1 != X.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " - "of X do not match: colIndices (-1): %d , values: %d x %d\n", - (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " "of X do not match: colIndices (-1): %d , values: %d x %d\n", (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); -#endif return 1; } #endif return SerialSpmvInternal::template invoke< - typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type, - typename ValuesViewType::non_const_value_type, - typename IntView::non_const_value_type, + typename Kokkos::ArithTraits::mag_type, + typename ValuesViewType::non_const_value_type, typename IntView::non_const_value_type, typename ValuesViewType::array_layout, dobeta>( - X.extent(0), X.extent(1), alpha, values.data(), values.stride_0(), - values.stride_1(), row_ptr.data(), row_ptr.stride_0(), - colIndices.data(), colIndices.stride_0(), X.data(), X.stride_0(), - X.stride_1(), beta, Y.data(), Y.stride_0(), Y.stride_1()); + X.extent(0), X.extent(1), alpha, values.data(), values.stride_0(), values.stride_1(), row_ptr.data(), + row_ptr.stride_0(), colIndices.data(), colIndices.stride_0(), X.data(), X.stride_0(), X.stride_1(), beta, + Y.data(), Y.stride_0(), Y.stride_1()); } }; diff --git a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_Spmv_TeamVector_Impl.hpp b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_Spmv_TeamVector_Impl.hpp index d7379777be0e..4df4b95e2c2c 100644 --- a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_Spmv_TeamVector_Impl.hpp +++ b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_Spmv_TeamVector_Impl.hpp @@ -27,50 +27,40 @@ namespace KokkosBatched { /// TeamVector Internal Impl /// ==================== struct TeamVectorSpmvInternal { - template + template KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const OrdinalType numMatrices, - const OrdinalType numRows, const ScalarType* KOKKOS_RESTRICT alpha, - const OrdinalType alphas0, const ValueType* KOKKOS_RESTRICT values, - const OrdinalType valuess0, const OrdinalType valuess1, - const OrdinalType* KOKKOS_RESTRICT row_ptr, const OrdinalType row_ptrs0, - const OrdinalType* KOKKOS_RESTRICT colIndices, - const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, - const OrdinalType xs0, const OrdinalType xs1, + const MemberType& member, const OrdinalType numMatrices, const OrdinalType numRows, + const ScalarType* KOKKOS_RESTRICT alpha, const OrdinalType alphas0, const ValueType* KOKKOS_RESTRICT values, + const OrdinalType valuess0, const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, + const OrdinalType row_ptrs0, const OrdinalType* KOKKOS_RESTRICT colIndices, const OrdinalType colIndicess0, + const ValueType* KOKKOS_RESTRICT X, const OrdinalType xs0, const OrdinalType xs1, const ScalarType* KOKKOS_RESTRICT beta, const OrdinalType betas0, - /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, - const OrdinalType ys1); - - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const OrdinalType numMatrices, - const OrdinalType numRows, const ScalarType alpha, - const ValueType* KOKKOS_RESTRICT values, const OrdinalType valuess0, - const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, - const OrdinalType row_ptrs0, - const OrdinalType* KOKKOS_RESTRICT colIndices, - const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, - const OrdinalType xs0, const OrdinalType xs1, const ScalarType beta, - /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, - const OrdinalType ys1); + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, const OrdinalType ys1); + + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const OrdinalType numMatrices, + const OrdinalType numRows, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT values, const OrdinalType valuess0, + const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, + const OrdinalType row_ptrs0, const OrdinalType* KOKKOS_RESTRICT colIndices, + const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, + const OrdinalType xs0, const OrdinalType xs1, const ScalarType beta, + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, + const OrdinalType ys1); }; -template +template KOKKOS_INLINE_FUNCTION int TeamVectorSpmvInternal::invoke( - const MemberType& member, const OrdinalType numMatrices, - const OrdinalType numRows, const ScalarType* KOKKOS_RESTRICT alpha, - const OrdinalType alphas0, const ValueType* KOKKOS_RESTRICT values, - const OrdinalType valuess0, const OrdinalType valuess1, - const OrdinalType* KOKKOS_RESTRICT row_ptr, const OrdinalType row_ptrs0, - const OrdinalType* KOKKOS_RESTRICT colIndices, - const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, - const OrdinalType xs0, const OrdinalType xs1, + const MemberType& member, const OrdinalType numMatrices, const OrdinalType numRows, + const ScalarType* KOKKOS_RESTRICT alpha, const OrdinalType alphas0, const ValueType* KOKKOS_RESTRICT values, + const OrdinalType valuess0, const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, + const OrdinalType row_ptrs0, const OrdinalType* KOKKOS_RESTRICT colIndices, const OrdinalType colIndicess0, + const ValueType* KOKKOS_RESTRICT X, const OrdinalType xs0, const OrdinalType xs1, const ScalarType* KOKKOS_RESTRICT beta, const OrdinalType betas0, - /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, - const OrdinalType ys1) { + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, const OrdinalType ys1) { #if !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) if (member.team_size() == 1) { if (N_team > 1 && valuess0 == 1) { @@ -87,8 +77,7 @@ KOKKOS_INLINE_FUNCTION int TeamVectorSpmvInternal::invoke( beta_v.loadAligned(beta); for (OrdinalType iRow = 0; iRow < numRows; ++iRow) { - const OrdinalType rowLength = - row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; + const OrdinalType rowLength = row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; VectorType sum_v(0); @@ -96,11 +85,8 @@ KOKKOS_INLINE_FUNCTION int TeamVectorSpmvInternal::invoke( #pragma unroll #endif for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { - values_v.loadAligned( - &values[(row_ptr[iRow * row_ptrs0] + iEntry) * valuess1]); - x_v.loadAligned(&X[colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * - colIndicess0] * - xs1]); + values_v.loadAligned(&values[(row_ptr[iRow * row_ptrs0] + iEntry) * valuess1]); + x_v.loadAligned(&X[colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * colIndicess0] * xs1]); sum_v += values_v * x_v; } sum_v *= alpha_v; @@ -113,20 +99,14 @@ KOKKOS_INLINE_FUNCTION int TeamVectorSpmvInternal::invoke( } else { for (unsigned iMatrix = 0; iMatrix < unsigned(numMatrices); ++iMatrix) { for (OrdinalType iRow = 0; iRow < numRows; ++iRow) { - const OrdinalType rowLength = - row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; + const OrdinalType rowLength = row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; ValueType sum = 0; Kokkos::parallel_reduce( Kokkos::ThreadVectorRange(member, rowLength), [&](const OrdinalType& iEntry, ValueType& lsum) { - lsum += - values[iMatrix * valuess0 + - (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * - X[iMatrix * xs0 + - colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * - colIndicess0] * - xs1]; + lsum += values[iMatrix * valuess0 + (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * + X[iMatrix * xs0 + colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * colIndicess0] * xs1]; }, sum); @@ -135,63 +115,50 @@ KOKKOS_INLINE_FUNCTION int TeamVectorSpmvInternal::invoke( if (dobeta == 0) { Y[iMatrix * ys0 + iRow * ys1] = sum; } else { - Y[iMatrix * ys0 + iRow * ys1] = - beta[iMatrix * betas0] * Y[iMatrix * ys0 + iRow * ys1] + sum; + Y[iMatrix * ys0 + iRow * ys1] = beta[iMatrix * betas0] * Y[iMatrix * ys0 + iRow * ys1] + sum; } } } } } else { #endif - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, 0, numMatrices * numRows), - [&](const OrdinalType& iTemp) { - OrdinalType iRow, iMatrix; - getIndices(iTemp, numRows, numMatrices, iRow, - iMatrix); - - const OrdinalType rowLength = - row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; - ValueType sum = 0; + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices * numRows), [&](const OrdinalType& iTemp) { + OrdinalType iRow, iMatrix; + getIndices(iTemp, numRows, numMatrices, iRow, iMatrix); + + const OrdinalType rowLength = row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; + ValueType sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif - for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { - sum += values[iMatrix * valuess0 + - (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * - X[iMatrix * xs0 + - colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * - colIndicess0] * - xs1]; - } + for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { + sum += values[iMatrix * valuess0 + (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * + X[iMatrix * xs0 + colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * colIndicess0] * xs1]; + } - sum *= alpha[iMatrix * alphas0]; + sum *= alpha[iMatrix * alphas0]; - if (dobeta == 0) { - Y[iMatrix * ys0 + iRow * ys1] = sum; - } else { - Y[iMatrix * ys0 + iRow * ys1] = - beta[iMatrix * betas0] * Y[iMatrix * ys0 + iRow * ys1] + sum; - } - }); + if (dobeta == 0) { + Y[iMatrix * ys0 + iRow * ys1] = sum; + } else { + Y[iMatrix * ys0 + iRow * ys1] = beta[iMatrix * betas0] * Y[iMatrix * ys0 + iRow * ys1] + sum; + } + }); #if !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) } #endif return 0; } -template +template KOKKOS_INLINE_FUNCTION int TeamVectorSpmvInternal::invoke( - const MemberType& member, const OrdinalType numMatrices, - const OrdinalType numRows, const ScalarType alpha, - const ValueType* KOKKOS_RESTRICT values, const OrdinalType valuess0, - const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, - const OrdinalType row_ptrs0, const OrdinalType* KOKKOS_RESTRICT colIndices, - const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, + const MemberType& member, const OrdinalType numMatrices, const OrdinalType numRows, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT values, const OrdinalType valuess0, const OrdinalType valuess1, + const OrdinalType* KOKKOS_RESTRICT row_ptr, const OrdinalType row_ptrs0, + const OrdinalType* KOKKOS_RESTRICT colIndices, const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, const OrdinalType xs0, const OrdinalType xs1, const ScalarType beta, - /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, - const OrdinalType ys1) { + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, const OrdinalType ys1) { #if !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) if (member.team_size() == 1) { if (N_team > 1 && valuess0 == 1 && valuess1 % N_team == 0) { @@ -205,8 +172,7 @@ KOKKOS_INLINE_FUNCTION int TeamVectorSpmvInternal::invoke( VectorType alpha_v(alpha), beta_v(beta), values_v, y_v, x_v; for (OrdinalType iRow = 0; iRow < numRows; ++iRow) { - const OrdinalType rowLength = - row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; + const OrdinalType rowLength = row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; VectorType sum_v(0); @@ -214,11 +180,8 @@ KOKKOS_INLINE_FUNCTION int TeamVectorSpmvInternal::invoke( #pragma unroll #endif for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { - values_v.loadAligned( - &values[(row_ptr[iRow * row_ptrs0] + iEntry) * valuess1]); - x_v.loadAligned(&X[colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * - colIndicess0] * - xs1]); + values_v.loadAligned(&values[(row_ptr[iRow * row_ptrs0] + iEntry) * valuess1]); + x_v.loadAligned(&X[colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * colIndicess0] * xs1]); sum_v += values_v * x_v; } sum_v *= alpha_v; @@ -231,20 +194,14 @@ KOKKOS_INLINE_FUNCTION int TeamVectorSpmvInternal::invoke( } else { for (unsigned iMatrix = 0; iMatrix < unsigned(numMatrices); ++iMatrix) { for (OrdinalType iRow = 0; iRow < numRows; ++iRow) { - const OrdinalType rowLength = - row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; + const OrdinalType rowLength = row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; ValueType sum = 0; Kokkos::parallel_reduce( Kokkos::ThreadVectorRange(member, rowLength), [&](const OrdinalType& iEntry, ValueType& lsum) { - lsum += - values[iMatrix * valuess0 + - (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * - X[iMatrix * xs0 + - colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * - colIndicess0] * - xs1]; + lsum += values[iMatrix * valuess0 + (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * + X[iMatrix * xs0 + colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * colIndicess0] * xs1]; }, sum); @@ -253,45 +210,35 @@ KOKKOS_INLINE_FUNCTION int TeamVectorSpmvInternal::invoke( if (dobeta == 0) { Y[iMatrix * ys0 + iRow * ys1] = sum; } else { - Y[iMatrix * ys0 + iRow * ys1] = - beta * Y[iMatrix * ys0 + iRow * ys1] + sum; + Y[iMatrix * ys0 + iRow * ys1] = beta * Y[iMatrix * ys0 + iRow * ys1] + sum; } } } } } else { #endif - Kokkos::parallel_for( - Kokkos::TeamVectorRange(member, 0, numMatrices * numRows), - [&](const OrdinalType& iTemp) { - OrdinalType iRow, iMatrix; - getIndices(iTemp, numRows, numMatrices, iRow, - iMatrix); - - const OrdinalType rowLength = - row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; - ValueType sum = 0; + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices * numRows), [&](const OrdinalType& iTemp) { + OrdinalType iRow, iMatrix; + getIndices(iTemp, numRows, numMatrices, iRow, iMatrix); + + const OrdinalType rowLength = row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; + ValueType sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif - for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { - sum += values[iMatrix * valuess0 + - (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * - X[iMatrix * xs0 + - colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * - colIndicess0] * - xs1]; - } + for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { + sum += values[iMatrix * valuess0 + (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * + X[iMatrix * xs0 + colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * colIndicess0] * xs1]; + } - sum *= alpha; + sum *= alpha; - if (dobeta == 0) { - Y[iMatrix * ys0 + iRow * ys1] = sum; - } else { - Y[iMatrix * ys0 + iRow * ys1] = - beta * Y[iMatrix * ys0 + iRow * ys1] + sum; - } - }); + if (dobeta == 0) { + Y[iMatrix * ys0 + iRow * ys1] = sum; + } else { + Y[iMatrix * ys0 + iRow * ys1] = beta * Y[iMatrix * ys0 + iRow * ys1] + sum; + } + }); #if !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) } #endif @@ -300,272 +247,150 @@ KOKKOS_INLINE_FUNCTION int TeamVectorSpmvInternal::invoke( template struct TeamVectorSpmv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const alphaViewType& alpha, - const ValuesViewType& values, const IntView& row_ptr, - const IntView& colIndices, const xViewType& X, const betaViewType& beta, - const yViewType& Y) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const alphaViewType& alpha, + const ValuesViewType& values, const IntView& row_ptr, + const IntView& colIndices, const xViewType& X, const betaViewType& beta, + const yViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: IntView is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: xViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: yViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: alphaViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: betaViewType is not a Kokkos::View."); - - static_assert(ValuesViewType::rank == 2, - "KokkosBatched::spmv: ValuesViewType must have rank 2."); - static_assert(IntView::rank == 1, - "KokkosBatched::spmv: IntView must have rank 2."); - static_assert(xViewType::rank == 2, - "KokkosBatched::spmv: xViewType must have rank 2."); - static_assert(yViewType::rank == 2, - "KokkosBatched::spmv: yViewType must have rank 2."); - static_assert(alphaViewType::rank == 1, - "KokkosBatched::spmv: alphaViewType must have rank 1."); - static_assert(betaViewType::rank == 1, - "KokkosBatched::spmv: betaViewType must have rank 1."); - static_assert(alphaViewType::rank == 1, - "KokkosBatched::spmv: alphaViewType must have rank 1."); - static_assert(betaViewType::rank == 1, - "KokkosBatched::spmv: betaViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: IntView is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: xViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: yViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: alphaViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: betaViewType is not a Kokkos::View."); + + static_assert(ValuesViewType::rank == 2, "KokkosBatched::spmv: ValuesViewType must have rank 2."); + static_assert(IntView::rank == 1, "KokkosBatched::spmv: IntView must have rank 2."); + static_assert(xViewType::rank == 2, "KokkosBatched::spmv: xViewType must have rank 2."); + static_assert(yViewType::rank == 2, "KokkosBatched::spmv: yViewType must have rank 2."); + static_assert(alphaViewType::rank == 1, "KokkosBatched::spmv: alphaViewType must have rank 1."); + static_assert(betaViewType::rank == 1, "KokkosBatched::spmv: betaViewType must have rank 1."); + static_assert(alphaViewType::rank == 1, "KokkosBatched::spmv: alphaViewType must have rank 1."); + static_assert(betaViewType::rank == 1, "KokkosBatched::spmv: betaViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " - "%d, Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " "%d, Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); return 1; } if (X.extent(0) != alpha.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: First dimension of X and alpha do not match: " - "X: %d x %d, alpha: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#else Kokkos::printf( "KokkosBatched::spmv: First dimension of X and alpha do not match: " "X: %d x %d, alpha: %d\n", (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#endif return 1; } if (X.extent(0) != beta.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: First dimension of X and beta do not match: X: " - "%d x %d, beta: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)beta.extent(0)); -#else Kokkos::printf( "KokkosBatched::spmv: First dimension of X and beta do not match: X: " "%d x %d, beta: %d\n", (int)X.extent(0), (int)X.extent(1), (int)beta.extent(0)); -#endif return 1; } if (X.extent(0) != values.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: First dimension of X and the first dimension " - "of values do not match: X: %d x %d, values: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), - (int)values.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: First dimension of X and the first dimension " "of values do not match: X: %d x %d, values: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), - (int)values.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), (int)values.extent(1)); return 1; } if (colIndices.extent(0) != values.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimension of colIndices and the second " - "dimension of values do not match: colIndices: %d , values: %d x " - "%d\n", - (int)colIndices.extent(0), (int)values.extent(0), - (int)values.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimension of colIndices and the second " "dimension of values do not match: colIndices: %d , values: %d x " "%d\n", - (int)colIndices.extent(0), (int)values.extent(0), - (int)values.extent(1)); -#endif + (int)colIndices.extent(0), (int)values.extent(0), (int)values.extent(1)); return 1; } if (row_ptr.extent(0) - 1 != X.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " - "of X do not match: colIndices (-1): %d , values: %d x %d\n", - (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " "of X do not match: colIndices (-1): %d , values: %d x %d\n", (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); -#endif return 1; } #endif if (values.extent(0) == 1) { return KokkosSparse::Experimental::team_vector_spmv( - member, alpha.data()[0], Kokkos::subview(values, 0, Kokkos::ALL), - row_ptr, colIndices, Kokkos::subview(X, 0, Kokkos::ALL), - beta.data()[0], Kokkos::subview(Y, 0, Kokkos::ALL), dobeta); + member, alpha.data()[0], Kokkos::subview(values, 0, Kokkos::ALL), row_ptr, colIndices, + Kokkos::subview(X, 0, Kokkos::ALL), beta.data()[0], Kokkos::subview(Y, 0, Kokkos::ALL), dobeta); } return TeamVectorSpmvInternal::template invoke< - MemberType, typename alphaViewType::non_const_value_type, - typename ValuesViewType::non_const_value_type, - typename IntView::non_const_value_type, - typename ValuesViewType::array_layout, dobeta, N_team>( - member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), - values.data(), values.stride_0(), values.stride_1(), row_ptr.data(), - row_ptr.stride_0(), colIndices.data(), colIndices.stride_0(), X.data(), - X.stride_0(), X.stride_1(), beta.data(), beta.stride_0(), Y.data(), - Y.stride_0(), Y.stride_1()); + MemberType, typename alphaViewType::non_const_value_type, typename ValuesViewType::non_const_value_type, + typename IntView::non_const_value_type, typename ValuesViewType::array_layout, dobeta, N_team>( + member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), values.data(), values.stride_0(), + values.stride_1(), row_ptr.data(), row_ptr.stride_0(), colIndices.data(), colIndices.stride_0(), X.data(), + X.stride_0(), X.stride_1(), beta.data(), beta.stride_0(), Y.data(), Y.stride_0(), Y.stride_1()); } - template + template KOKKOS_INLINE_FUNCTION static int invoke( const MemberType& member, - const typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type& alpha, - const ValuesViewType& values, const IntView& row_ptr, - const IntView& colIndices, const xViewType& X, - const typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type& beta, + const typename Kokkos::ArithTraits::mag_type& alpha, + const ValuesViewType& values, const IntView& row_ptr, const IntView& colIndices, const xViewType& X, + const typename Kokkos::ArithTraits::mag_type& beta, const yViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: IntView is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: xViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: yViewType is not a Kokkos::View."); - - static_assert(ValuesViewType::rank == 2, - "KokkosBatched::spmv: ValuesViewType must have rank 2."); - static_assert(IntView::rank == 1, - "KokkosBatched::spmv: IntView must have rank 2."); - static_assert(xViewType::rank == 2, - "KokkosBatched::spmv: xViewType must have rank 2."); - static_assert(yViewType::rank == 2, - "KokkosBatched::spmv: yViewType must have rank 2."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: IntView is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: xViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: yViewType is not a Kokkos::View."); + + static_assert(ValuesViewType::rank == 2, "KokkosBatched::spmv: ValuesViewType must have rank 2."); + static_assert(IntView::rank == 1, "KokkosBatched::spmv: IntView must have rank 2."); + static_assert(xViewType::rank == 2, "KokkosBatched::spmv: xViewType must have rank 2."); + static_assert(yViewType::rank == 2, "KokkosBatched::spmv: yViewType must have rank 2."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " - "%d, Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " "%d, Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); return 1; } if (X.extent(0) != values.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: First dimension of X and the first dimension " - "of values do not match: X: %d x %d, values: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), - (int)values.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: First dimension of X and the first dimension " "of values do not match: X: %d x %d, values: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), - (int)values.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), (int)values.extent(1)); return 1; } if (colIndices.extent(0) != values.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimension of colIndices and the second " - "dimension of values do not match: colIndices: %d , values: %d x " - "%d\n", - (int)colIndices.extent(0), (int)values.extent(0), - (int)values.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimension of colIndices and the second " "dimension of values do not match: colIndices: %d , values: %d x " "%d\n", - (int)colIndices.extent(0), (int)values.extent(0), - (int)values.extent(1)); -#endif + (int)colIndices.extent(0), (int)values.extent(0), (int)values.extent(1)); return 1; } if (row_ptr.extent(0) - 1 != X.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " - "of X do not match: colIndices (-1): %d , values: %d x %d\n", - (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " "of X do not match: colIndices (-1): %d , values: %d x %d\n", (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); -#endif return 1; } #endif if (values.extent(0) == 1) { return KokkosSparse::Experimental::team_vector_spmv( - member, alpha, Kokkos::subview(values, 0, Kokkos::ALL), row_ptr, - colIndices, Kokkos::subview(X, 0, Kokkos::ALL), beta, - Kokkos::subview(Y, 0, Kokkos::ALL), dobeta); + member, alpha, Kokkos::subview(values, 0, Kokkos::ALL), row_ptr, colIndices, + Kokkos::subview(X, 0, Kokkos::ALL), beta, Kokkos::subview(Y, 0, Kokkos::ALL), dobeta); } return TeamVectorSpmvInternal::template invoke< - MemberType, - typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type, - typename ValuesViewType::non_const_value_type, - typename IntView::non_const_value_type, + MemberType, typename Kokkos::ArithTraits::mag_type, + typename ValuesViewType::non_const_value_type, typename IntView::non_const_value_type, typename ValuesViewType::array_layout, dobeta, N_team>( - member, X.extent(0), X.extent(1), alpha, values.data(), - values.stride_0(), values.stride_1(), row_ptr.data(), - row_ptr.stride_0(), colIndices.data(), colIndices.stride_0(), X.data(), - X.stride_0(), X.stride_1(), beta, Y.data(), Y.stride_0(), Y.stride_1()); + member, X.extent(0), X.extent(1), alpha, values.data(), values.stride_0(), values.stride_1(), row_ptr.data(), + row_ptr.stride_0(), colIndices.data(), colIndices.stride_0(), X.data(), X.stride_0(), X.stride_1(), beta, + Y.data(), Y.stride_0(), Y.stride_1()); } }; diff --git a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_Spmv_Team_Impl.hpp b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_Spmv_Team_Impl.hpp index beb53521f08a..9e3286161223 100644 --- a/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_Spmv_Team_Impl.hpp +++ b/packages/kokkos-kernels/batched/sparse/impl/KokkosBatched_Spmv_Team_Impl.hpp @@ -27,396 +27,245 @@ namespace KokkosBatched { /// Team Internal Impl /// ==================== struct TeamSpmvInternal { - template + template KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const OrdinalType numMatrices, - const OrdinalType numRows, const ScalarType* KOKKOS_RESTRICT alpha, - const OrdinalType alphas0, const ValueType* KOKKOS_RESTRICT values, - const OrdinalType valuess0, const OrdinalType valuess1, - const OrdinalType* KOKKOS_RESTRICT row_ptr, const OrdinalType row_ptrs0, - const OrdinalType* KOKKOS_RESTRICT colIndices, - const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, - const OrdinalType xs0, const OrdinalType xs1, + const MemberType& member, const OrdinalType numMatrices, const OrdinalType numRows, + const ScalarType* KOKKOS_RESTRICT alpha, const OrdinalType alphas0, const ValueType* KOKKOS_RESTRICT values, + const OrdinalType valuess0, const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, + const OrdinalType row_ptrs0, const OrdinalType* KOKKOS_RESTRICT colIndices, const OrdinalType colIndicess0, + const ValueType* KOKKOS_RESTRICT X, const OrdinalType xs0, const OrdinalType xs1, const ScalarType* KOKKOS_RESTRICT beta, const OrdinalType betas0, - /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, - const OrdinalType ys1); + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, const OrdinalType ys1); - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const OrdinalType numMatrices, - const OrdinalType numRows, const ScalarType alpha, - const ValueType* KOKKOS_RESTRICT values, const OrdinalType valuess0, - const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, - const OrdinalType row_ptrs0, - const OrdinalType* KOKKOS_RESTRICT colIndices, - const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, - const OrdinalType xs0, const OrdinalType xs1, const ScalarType beta, - /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, - const OrdinalType ys1); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const OrdinalType numMatrices, + const OrdinalType numRows, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT values, const OrdinalType valuess0, + const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, + const OrdinalType row_ptrs0, const OrdinalType* KOKKOS_RESTRICT colIndices, + const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, + const OrdinalType xs0, const OrdinalType xs1, const ScalarType beta, + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, + const OrdinalType ys1); }; -template +template KOKKOS_INLINE_FUNCTION int TeamSpmvInternal::invoke( - const MemberType& member, const OrdinalType numMatrices, - const OrdinalType numRows, const ScalarType* KOKKOS_RESTRICT alpha, - const OrdinalType alphas0, const ValueType* KOKKOS_RESTRICT values, - const OrdinalType valuess0, const OrdinalType valuess1, - const OrdinalType* KOKKOS_RESTRICT row_ptr, const OrdinalType row_ptrs0, - const OrdinalType* KOKKOS_RESTRICT colIndices, - const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, - const OrdinalType xs0, const OrdinalType xs1, + const MemberType& member, const OrdinalType numMatrices, const OrdinalType numRows, + const ScalarType* KOKKOS_RESTRICT alpha, const OrdinalType alphas0, const ValueType* KOKKOS_RESTRICT values, + const OrdinalType valuess0, const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, + const OrdinalType row_ptrs0, const OrdinalType* KOKKOS_RESTRICT colIndices, const OrdinalType colIndicess0, + const ValueType* KOKKOS_RESTRICT X, const OrdinalType xs0, const OrdinalType xs1, const ScalarType* KOKKOS_RESTRICT beta, const OrdinalType betas0, - /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, - const OrdinalType ys1) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, numMatrices * numRows), - [&](const OrdinalType& iTemp) { - OrdinalType iRow, iMatrix; - getIndices(iTemp, numRows, numMatrices, iRow, - iMatrix); + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, const OrdinalType ys1) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices * numRows), [&](const OrdinalType& iTemp) { + OrdinalType iRow, iMatrix; + getIndices(iTemp, numRows, numMatrices, iRow, iMatrix); - const OrdinalType rowLength = - row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; - ValueType sum = 0; + const OrdinalType rowLength = row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; + ValueType sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif - for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { - sum += values[iMatrix * valuess0 + - (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * - X[iMatrix * xs0 + - colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * - colIndicess0] * - xs1]; - } + for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { + sum += values[iMatrix * valuess0 + (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * + X[iMatrix * xs0 + colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * colIndicess0] * xs1]; + } - sum *= alpha[iMatrix * alphas0]; + sum *= alpha[iMatrix * alphas0]; - if (dobeta == 0) { - Y[iMatrix * ys0 + iRow * ys1] = sum; - } else { - Y[iMatrix * ys0 + iRow * ys1] = - beta[iMatrix * betas0] * Y[iMatrix * ys0 + iRow * ys1] + sum; - } - }); + if (dobeta == 0) { + Y[iMatrix * ys0 + iRow * ys1] = sum; + } else { + Y[iMatrix * ys0 + iRow * ys1] = beta[iMatrix * betas0] * Y[iMatrix * ys0 + iRow * ys1] + sum; + } + }); return 0; } -template +template KOKKOS_INLINE_FUNCTION int TeamSpmvInternal::invoke( - const MemberType& member, const OrdinalType numMatrices, - const OrdinalType numRows, const ScalarType alpha, - const ValueType* KOKKOS_RESTRICT values, const OrdinalType valuess0, - const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, - const OrdinalType row_ptrs0, const OrdinalType* KOKKOS_RESTRICT colIndices, - const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, + const MemberType& member, const OrdinalType numMatrices, const OrdinalType numRows, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT values, const OrdinalType valuess0, const OrdinalType valuess1, + const OrdinalType* KOKKOS_RESTRICT row_ptr, const OrdinalType row_ptrs0, + const OrdinalType* KOKKOS_RESTRICT colIndices, const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, const OrdinalType xs0, const OrdinalType xs1, const ScalarType beta, - /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, - const OrdinalType ys1) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, numMatrices * numRows), - [&](const OrdinalType& iTemp) { - OrdinalType iRow, iMatrix; - getIndices(iTemp, numRows, numMatrices, iRow, - iMatrix); + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, const OrdinalType ys1) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices * numRows), [&](const OrdinalType& iTemp) { + OrdinalType iRow, iMatrix; + getIndices(iTemp, numRows, numMatrices, iRow, iMatrix); - const OrdinalType rowLength = - row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; - ValueType sum = 0; + const OrdinalType rowLength = row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; + ValueType sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif - for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { - sum += values[iMatrix * valuess0 + - (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * - X[iMatrix * xs0 + - colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * - colIndicess0] * - xs1]; - } + for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { + sum += values[iMatrix * valuess0 + (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * + X[iMatrix * xs0 + colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * colIndicess0] * xs1]; + } - sum *= alpha; + sum *= alpha; - if (dobeta == 0) { - Y[iMatrix * ys0 + iRow * ys1] = sum; - } else { - Y[iMatrix * ys0 + iRow * ys1] = - beta * Y[iMatrix * ys0 + iRow * ys1] + sum; - } - }); + if (dobeta == 0) { + Y[iMatrix * ys0 + iRow * ys1] = sum; + } else { + Y[iMatrix * ys0 + iRow * ys1] = beta * Y[iMatrix * ys0 + iRow * ys1] + sum; + } + }); return 0; } template struct TeamSpmv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const alphaViewType& alpha, - const ValuesViewType& values, const IntView& row_ptr, - const IntView& colIndices, const xViewType& X, const betaViewType& beta, - const yViewType& Y) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const alphaViewType& alpha, + const ValuesViewType& values, const IntView& row_ptr, + const IntView& colIndices, const xViewType& X, const betaViewType& beta, + const yViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: IntView is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: xViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: yViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: alphaViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: betaViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: IntView is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: xViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: yViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: alphaViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: betaViewType is not a Kokkos::View."); - static_assert(ValuesViewType::rank == 2, - "KokkosBatched::spmv: ValuesViewType must have rank 2."); - static_assert(IntView::rank == 1, - "KokkosBatched::spmv: IntView must have rank 2."); - static_assert(xViewType::rank == 2, - "KokkosBatched::spmv: xViewType must have rank 2."); - static_assert(yViewType::rank == 2, - "KokkosBatched::spmv: yViewType must have rank 2."); - static_assert(alphaViewType::rank == 1, - "KokkosBatched::spmv: alphaViewType must have rank 1."); - static_assert(betaViewType::rank == 1, - "KokkosBatched::spmv: betaViewType must have rank 1."); + static_assert(ValuesViewType::rank == 2, "KokkosBatched::spmv: ValuesViewType must have rank 2."); + static_assert(IntView::rank == 1, "KokkosBatched::spmv: IntView must have rank 2."); + static_assert(xViewType::rank == 2, "KokkosBatched::spmv: xViewType must have rank 2."); + static_assert(yViewType::rank == 2, "KokkosBatched::spmv: yViewType must have rank 2."); + static_assert(alphaViewType::rank == 1, "KokkosBatched::spmv: alphaViewType must have rank 1."); + static_assert(betaViewType::rank == 1, "KokkosBatched::spmv: betaViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " - "%d, Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " "%d, Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); return 1; } if (X.extent(0) != alpha.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: First dimension of X and alpha do not match: " - "X: %d x %d, alpha: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#else Kokkos::printf( "KokkosBatched::spmv: First dimension of X and alpha do not match: " "X: %d x %d, alpha: %d\n", (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); -#endif return 1; } if (X.extent(0) != beta.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: First dimension of X and beta do not match: X: " - "%d x %d, beta: %d\n", - (int)X.extent(0), (int)X.extent(1), (int)beta.extent(0)); -#else Kokkos::printf( "KokkosBatched::spmv: First dimension of X and beta do not match: X: " "%d x %d, beta: %d\n", (int)X.extent(0), (int)X.extent(1), (int)beta.extent(0)); -#endif return 1; } if (X.extent(0) != values.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: First dimension of X and the first dimension " - "of values do not match: X: %d x %d, values: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), - (int)values.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: First dimension of X and the first dimension " "of values do not match: X: %d x %d, values: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), - (int)values.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), (int)values.extent(1)); return 1; } if (colIndices.extent(0) != values.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimension of colIndices and the second " - "dimension of values do not match: colIndices: %d , values: %d x " - "%d\n", - (int)colIndices.extent(0), (int)values.extent(0), - (int)values.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimension of colIndices and the second " "dimension of values do not match: colIndices: %d , values: %d x " "%d\n", - (int)colIndices.extent(0), (int)values.extent(0), - (int)values.extent(1)); -#endif + (int)colIndices.extent(0), (int)values.extent(0), (int)values.extent(1)); return 1; } if (row_ptr.extent(0) - 1 != X.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " - "of X do not match: colIndices (-1): %d , values: %d x %d\n", - (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " "of X do not match: colIndices (-1): %d , values: %d x %d\n", (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); -#endif return 1; } #endif if (values.extent(0) == 1) { return KokkosSparse::Experimental::team_spmv( - member, alpha.data()[0], Kokkos::subview(values, 0, Kokkos::ALL), - row_ptr, colIndices, Kokkos::subview(X, 0, Kokkos::ALL), - beta.data()[0], Kokkos::subview(Y, 0, Kokkos::ALL), dobeta); + member, alpha.data()[0], Kokkos::subview(values, 0, Kokkos::ALL), row_ptr, colIndices, + Kokkos::subview(X, 0, Kokkos::ALL), beta.data()[0], Kokkos::subview(Y, 0, Kokkos::ALL), dobeta); } return TeamSpmvInternal::template invoke< - MemberType, typename alphaViewType::non_const_value_type, - typename ValuesViewType::non_const_value_type, - typename IntView::non_const_value_type, - typename ValuesViewType::array_layout, dobeta>( - member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), - values.data(), values.stride_0(), values.stride_1(), row_ptr.data(), - row_ptr.stride_0(), colIndices.data(), colIndices.stride_0(), X.data(), - X.stride_0(), X.stride_1(), beta.data(), beta.stride_0(), Y.data(), - Y.stride_0(), Y.stride_1()); + MemberType, typename alphaViewType::non_const_value_type, typename ValuesViewType::non_const_value_type, + typename IntView::non_const_value_type, typename ValuesViewType::array_layout, dobeta>( + member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), values.data(), values.stride_0(), + values.stride_1(), row_ptr.data(), row_ptr.stride_0(), colIndices.data(), colIndices.stride_0(), X.data(), + X.stride_0(), X.stride_1(), beta.data(), beta.stride_0(), Y.data(), Y.stride_0(), Y.stride_1()); } - template + template KOKKOS_INLINE_FUNCTION static int invoke( const MemberType& member, - const typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type& alpha, - const ValuesViewType& values, const IntView& row_ptr, - const IntView& colIndices, const xViewType& X, - const typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type& beta, + const typename Kokkos::ArithTraits::mag_type& alpha, + const ValuesViewType& values, const IntView& row_ptr, const IntView& colIndices, const xViewType& X, + const typename Kokkos::ArithTraits::mag_type& beta, const yViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: IntView is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: xViewType is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBatched::spmv: yViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: IntView is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: xViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBatched::spmv: yViewType is not a Kokkos::View."); - static_assert(ValuesViewType::rank == 2, - "KokkosBatched::spmv: ValuesViewType must have rank 2."); - static_assert(IntView::rank == 1, - "KokkosBatched::spmv: IntView must have rank 2."); - static_assert(xViewType::rank == 2, - "KokkosBatched::spmv: xViewType must have rank 2."); - static_assert(yViewType::rank == 2, - "KokkosBatched::spmv: yViewType must have rank 2."); + static_assert(ValuesViewType::rank == 2, "KokkosBatched::spmv: ValuesViewType must have rank 2."); + static_assert(IntView::rank == 1, "KokkosBatched::spmv: IntView must have rank 2."); + static_assert(xViewType::rank == 2, "KokkosBatched::spmv: xViewType must have rank 2."); + static_assert(yViewType::rank == 2, "KokkosBatched::spmv: yViewType must have rank 2."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " - "%d, Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " "%d, Y: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), - (int)Y.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); return 1; } if (X.extent(0) != values.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: First dimension of X and the first dimension " - "of values do not match: X: %d x %d, values: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), - (int)values.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: First dimension of X and the first dimension " "of values do not match: X: %d x %d, values: %d x %d\n", - (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), - (int)values.extent(1)); -#endif + (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), (int)values.extent(1)); return 1; } if (colIndices.extent(0) != values.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimension of colIndices and the second " - "dimension of values do not match: colIndices: %d , values: %d x " - "%d\n", - (int)colIndices.extent(0), (int)values.extent(0), - (int)values.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimension of colIndices and the second " "dimension of values do not match: colIndices: %d , values: %d x " "%d\n", - (int)colIndices.extent(0), (int)values.extent(0), - (int)values.extent(1)); -#endif + (int)colIndices.extent(0), (int)values.extent(0), (int)values.extent(1)); return 1; } if (row_ptr.extent(0) - 1 != X.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " - "of X do not match: colIndices (-1): %d , values: %d x %d\n", - (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); -#else Kokkos::printf( "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " "of X do not match: colIndices (-1): %d , values: %d x %d\n", (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); -#endif return 1; } #endif if (values.extent(0) == 1) { - return KokkosSparse::Experimental::team_spmv( - member, alpha, Kokkos::subview(values, 0, Kokkos::ALL), row_ptr, - colIndices, Kokkos::subview(X, 0, Kokkos::ALL), beta, - Kokkos::subview(Y, 0, Kokkos::ALL), dobeta); + return KokkosSparse::Experimental::team_spmv(member, alpha, Kokkos::subview(values, 0, Kokkos::ALL), + row_ptr, colIndices, Kokkos::subview(X, 0, Kokkos::ALL), + beta, Kokkos::subview(Y, 0, Kokkos::ALL), dobeta); } return TeamSpmvInternal::template invoke< - MemberType, - typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type, - typename ValuesViewType::non_const_value_type, - typename IntView::non_const_value_type, + MemberType, typename Kokkos::ArithTraits::mag_type, + typename ValuesViewType::non_const_value_type, typename IntView::non_const_value_type, typename ValuesViewType::array_layout, dobeta>( - member, X.extent(0), X.extent(1), alpha, values.data(), - values.stride_0(), values.stride_1(), row_ptr.data(), - row_ptr.stride_0(), colIndices.data(), colIndices.stride_0(), X.data(), - X.stride_0(), X.stride_1(), beta, Y.data(), Y.stride_0(), Y.stride_1()); + member, X.extent(0), X.extent(1), alpha, values.data(), values.stride_0(), values.stride_1(), row_ptr.data(), + row_ptr.stride_0(), colIndices.data(), colIndices.stride_0(), X.data(), X.stride_0(), X.stride_1(), beta, + Y.data(), Y.stride_0(), Y.stride_1()); } }; diff --git a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_CG.hpp b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_CG.hpp index baa6dca42e75..cabf2eae987a 100644 --- a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_CG.hpp +++ b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_CG.hpp @@ -42,22 +42,14 @@ namespace KokkosBatched { template struct CG { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const OperatorType &A, - const VectorViewType &B, - const VectorViewType &X, - const KrylovHandleType &handle) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const OperatorType &A, const VectorViewType &B, + const VectorViewType &X, const KrylovHandleType &handle) { int status = 0; if (std::is_same::value) { - status = - TeamCG::template invoke( - member, A, B, X, handle); + status = TeamCG::template invoke(member, A, B, X, handle); } else if (std::is_same::value) { - status = TeamVectorCG::template invoke( - member, A, B, X, handle); + status = TeamVectorCG::template invoke(member, A, B, X, handle); } return status; } diff --git a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_CrsMatrix.hpp b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_CrsMatrix.hpp index 92acc91a9ef9..0d880cd88011 100644 --- a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_CrsMatrix.hpp +++ b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_CrsMatrix.hpp @@ -42,8 +42,7 @@ class CrsMatrix { public: KOKKOS_INLINE_FUNCTION - CrsMatrix(const ValuesViewType &_values, const IntViewType &_row_ptr, - const IntViewType &_colIndices) + CrsMatrix(const ValuesViewType &_values, const IntViewType &_row_ptr, const IntViewType &_colIndices) : values(_values), row_ptr(_row_ptr), colIndices(_colIndices) { n_operators = _values.extent(0); n_rows = _row_ptr.extent(0) - 1; @@ -77,45 +76,40 @@ class CrsMatrix { /// \param beta [in]: input coefficient for Y (default value 0.) /// \param Y [in/out]: Output vector Y, a rank 2 view - template - KOKKOS_INLINE_FUNCTION void apply( - const MemberType &member, const XViewType &X, const YViewType &Y, - MagnitudeType alpha = Kokkos::ArithTraits::one(), - MagnitudeType beta = Kokkos::ArithTraits::zero()) const { + template + KOKKOS_INLINE_FUNCTION void apply(const MemberType &member, const XViewType &X, const YViewType &Y, + MagnitudeType alpha = Kokkos::ArithTraits::one(), + MagnitudeType beta = Kokkos::ArithTraits::zero()) const { if (beta == Kokkos::ArithTraits::zero()) { if (member.team_size() == 1 && n_operators == 8) - KokkosBatched::TeamVectorSpmv::template invoke< - ValuesViewType, IntViewType, XViewType, YViewType, 0>( + KokkosBatched::TeamVectorSpmv::template invoke( member, alpha, values, row_ptr, colIndices, X, beta, Y); else - KokkosBatched::TeamVectorSpmv::template invoke< - ValuesViewType, IntViewType, XViewType, YViewType, 0>( + KokkosBatched::TeamVectorSpmv::template invoke( member, alpha, values, row_ptr, colIndices, X, beta, Y); } else { if (member.team_size() == 1 && n_operators == 8) - KokkosBatched::TeamVectorSpmv::template invoke< - ValuesViewType, IntViewType, XViewType, YViewType, 1>( + KokkosBatched::TeamVectorSpmv::template invoke( member, alpha, values, row_ptr, colIndices, X, beta, Y); else - KokkosBatched::TeamVectorSpmv::template invoke< - ValuesViewType, IntViewType, XViewType, YViewType, 1>( + KokkosBatched::TeamVectorSpmv::template invoke( member, alpha, values, row_ptr, colIndices, X, beta, Y); } } template - KOKKOS_INLINE_FUNCTION void apply( - const XViewType &X, const YViewType &Y, - MagnitudeType alpha = Kokkos::ArithTraits::one(), - MagnitudeType beta = Kokkos::ArithTraits::zero()) const { + KOKKOS_INLINE_FUNCTION void apply(const XViewType &X, const YViewType &Y, + MagnitudeType alpha = Kokkos::ArithTraits::one(), + MagnitudeType beta = Kokkos::ArithTraits::zero()) const { if (beta == Kokkos::ArithTraits::zero()) - KokkosBatched::SerialSpmv::template invoke< - ValuesViewType, IntViewType, XViewType, YViewType, 0>( + KokkosBatched::SerialSpmv::template invoke( alpha, values, row_ptr, colIndices, X, beta, Y); else - KokkosBatched::SerialSpmv::template invoke< - ValuesViewType, IntViewType, XViewType, YViewType, 1>( + KokkosBatched::SerialSpmv::template invoke( alpha, values, row_ptr, colIndices, X, beta, Y); } }; diff --git a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_GMRES.hpp b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_GMRES.hpp index 0d27bcd6fb48..a3f4eda8d362 100644 --- a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_GMRES.hpp +++ b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_GMRES.hpp @@ -44,25 +44,16 @@ namespace KokkosBatched { template struct GMRES { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const OperatorType &A, - const VectorViewType &B, - const VectorViewType &X, - const KrylovHandleType &handle) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const OperatorType &A, const VectorViewType &B, + const VectorViewType &X, const KrylovHandleType &handle) { int status = 0; if (std::is_same::value) { - status = SerialGMRES::template invoke( - A, B, X, handle); + status = SerialGMRES::template invoke(A, B, X, handle); } else if (std::is_same::value) { - status = - TeamGMRES::template invoke( - member, A, B, X, handle); + status = TeamGMRES::template invoke(member, A, B, X, handle); } else if (std::is_same::value) { - status = TeamVectorGMRES::template invoke( - member, A, B, X, handle); + status = TeamVectorGMRES::template invoke(member, A, B, X, handle); } return status; } diff --git a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Identity.hpp b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Identity.hpp index 4e8e7c43087b..311ec09d5c60 100644 --- a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Identity.hpp +++ b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Identity.hpp @@ -34,26 +34,21 @@ class Identity { KOKKOS_INLINE_FUNCTION ~Identity() {} - template - KOKKOS_INLINE_FUNCTION void apply(const MemberType &member, - const XViewType &X, - const YViewType &Y) const { + template + KOKKOS_INLINE_FUNCTION void apply(const MemberType &member, const XViewType &X, const YViewType &Y) const { if (sameXY == 0) { if (std::is_same::value) { SerialCopy::invoke(X, Y); } else if (std::is_same::value) { TeamCopy::invoke(member, X, Y); - } else if (std::is_same::value) { + } else if (std::is_same::value) { TeamVectorCopy::invoke(member, X, Y); } } } - template - KOKKOS_INLINE_FUNCTION void apply(const XViewType &X, - const YViewType &Y) const { + template + KOKKOS_INLINE_FUNCTION void apply(const XViewType &X, const YViewType &Y) const { if (sameXY == 0) { SerialCopy::invoke(X, Y); } diff --git a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_JacobiPrec.hpp b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_JacobiPrec.hpp index 44a982525d10..580f85158bec 100644 --- a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_JacobiPrec.hpp +++ b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_JacobiPrec.hpp @@ -75,15 +75,12 @@ class JacobiPrec { Kokkos::TeamThreadRange(member, 0, n_operators * n_rows), [&](const int &iTemp, int <ooSmall) { int i, j; - getIndices( - iTemp, n_rows, n_operators, j, i); - if (Kokkos::abs(diag_values_array[i * vs0 + j * vs1]) <= - epsilon) { + getIndices(iTemp, n_rows, n_operators, j, i); + if (Kokkos::abs(diag_values_array[i * vs0 + j * vs1]) <= epsilon) { ltooSmall++; diag_values_array[i * vs0 + j * vs1] = one; } else - diag_values_array[i * vs0 + j * vs1] = - one / diag_values_array[i * vs0 + j * vs1]; + diag_values_array[i * vs0 + j * vs1] = one / diag_values_array[i * vs0 + j * vs1]; }, tooSmall); } else if (std::is_same::value) { @@ -95,31 +92,21 @@ class JacobiPrec { Kokkos::TeamVectorRange(member, 0, n_operators * n_rows), [&](const int &iTemp, int <ooSmall) { int i, j; - getIndices( - iTemp, n_rows, n_operators, j, i); - if (Kokkos::abs(diag_values_array[i * vs0 + j * vs1]) <= - epsilon) { + getIndices(iTemp, n_rows, n_operators, j, i); + if (Kokkos::abs(diag_values_array[i * vs0 + j * vs1]) <= epsilon) { ltooSmall++; diag_values_array[i * vs0 + j * vs1] = one; } else - diag_values_array[i * vs0 + j * vs1] = - one / diag_values_array[i * vs0 + j * vs1]; + diag_values_array[i * vs0 + j * vs1] = one / diag_values_array[i * vs0 + j * vs1]; }, tooSmall); } if (tooSmall > 0) -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::JacobiPrec: %d entrie(s) has/have a too small " - "magnitude and have been replaced by one, \n", - (int)tooSmall); -#else Kokkos::printf( "KokkosBatched::JacobiPrec: %d entrie(s) has/have a too small " "magnitude and have been replaced by one, \n", (int)tooSmall); -#endif computed_inverse = true; } @@ -138,45 +125,32 @@ class JacobiPrec { } if (tooSmall > 0) -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBatched::JacobiPrec: %d entrie(s) has/have a too small " - "magnitude and have been replaced by one, \n", - (int)tooSmall); -#else Kokkos::printf( "KokkosBatched::JacobiPrec: %d entrie(s) has/have a too small " "magnitude and have been replaced by one, \n", (int)tooSmall); -#endif computed_inverse = true; } - template - KOKKOS_INLINE_FUNCTION void apply(const MemberType &member, - const XViewType &X, - const YViewType &Y) const { + template + KOKKOS_INLINE_FUNCTION void apply(const MemberType &member, const XViewType &X, const YViewType &Y) const { if (!computed_inverse) { this->computeInverse(member); member.team_barrier(); // Finish writing to this->diag_values } - KokkosBatched::HadamardProduct::template invoke< - ValuesViewType, XViewType, YViewType>(member, diag_values, X, Y); + KokkosBatched::HadamardProduct::template invoke( + member, diag_values, X, Y); } - template - KOKKOS_INLINE_FUNCTION void apply(const XViewType &X, - const YViewType &Y) const { + template + KOKKOS_INLINE_FUNCTION void apply(const XViewType &X, const YViewType &Y) const { if (!computed_inverse) { this->computeInverse(); } - KokkosBatched::SerialHadamardProduct::template invoke( - diag_values, X, Y); + KokkosBatched::SerialHadamardProduct::template invoke(diag_values, X, Y); } }; diff --git a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Krylov_Handle.hpp b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Krylov_Handle.hpp index 9992742dd8ff..c8e8392e1150 100644 --- a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Krylov_Handle.hpp +++ b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Krylov_Handle.hpp @@ -51,8 +51,7 @@ class KrylovHandle { using norm_type = typename NormViewType::non_const_value_type; typedef ViewType3D ArnoldiViewType; - typedef Kokkos::View TemporaryViewType; @@ -81,8 +80,7 @@ class KrylovHandle { bool host_synchronised; public: - KrylovHandle(int _batched_size, int _N_team, int _max_iteration = 200, - bool _monitor_residual = false) + KrylovHandle(int _batched_size, int _N_team, int _max_iteration = 200, bool _monitor_residual = false) : max_iteration(_max_iteration), batched_size(_batched_size), N_team(_N_team), @@ -192,9 +190,7 @@ class KrylovHandle { /// \param batched_id [in]: Global batched ID KOKKOS_INLINE_FUNCTION - bool is_converged(int batched_id) const { - return (iteration_numbers(batched_id) != -1); - } + bool is_converged(int batched_id) const { return (iteration_numbers(batched_id) != -1); } /// \brief is_converged /// Test if one particular system has converged (host). @@ -226,9 +222,7 @@ class KrylovHandle { /// \param _max_tolerance [in]: New tolerance KOKKOS_INLINE_FUNCTION - void set_max_tolerance(norm_type _max_tolerance) { - max_tolerance = _max_tolerance; - } + void set_max_tolerance(norm_type _max_tolerance) { max_tolerance = _max_tolerance; } /// \brief get_max_tolerance /// Get the maximal tolerance of the batched Krylov solver @@ -310,9 +304,7 @@ class KrylovHandle { /// \param batched_id [in]: Global batched ID KOKKOS_INLINE_FUNCTION - int get_iteration(int batched_id) const { - return iteration_numbers(batched_id); - } + int get_iteration(int batched_id) const { return iteration_numbers(batched_id); } /// \brief get_iteration_host /// Get the number of iteration after convergence for one system (host) @@ -332,9 +324,7 @@ class KrylovHandle { /// \param _ortho_strategy [in]: used orthogonalization strategy KOKKOS_INLINE_FUNCTION - void set_ortho_strategy(int _ortho_strategy) { - ortho_strategy = _ortho_strategy; - } + void set_ortho_strategy(int _ortho_strategy) { ortho_strategy = _ortho_strategy; } /// \brief get_ortho_strategy /// Get the used orthogonalization strategy. @@ -350,9 +340,7 @@ class KrylovHandle { /// \param _scratch_pad_level [in]: used level KOKKOS_INLINE_FUNCTION - void set_scratch_pad_level(int _scratch_pad_level) { - scratch_pad_level = _scratch_pad_level; - } + void set_scratch_pad_level(int _scratch_pad_level) { scratch_pad_level = _scratch_pad_level; } /// \brief get_scratch_pad_level /// Get the scratch pad level used to store temporary variables. @@ -386,9 +374,7 @@ class KrylovHandle { } KOKKOS_INLINE_FUNCTION - void set_memory_strategy(int _memory_strategy) { - memory_strategy = _memory_strategy; - } + void set_memory_strategy(int _memory_strategy) { memory_strategy = _memory_strategy; } KOKKOS_INLINE_FUNCTION int get_memory_strategy() const { return memory_strategy; } @@ -415,10 +401,8 @@ class KrylovHandle { /// \param norm_i [in]: Norm to store KOKKOS_INLINE_FUNCTION - void set_norm(int team_id, int batched_id, int iteration_id, - norm_type norm_i) const { - if (monitor_residual) - residual_norms(team_id * N_team + batched_id, iteration_id) = norm_i; + void set_norm(int team_id, int batched_id, int iteration_id, norm_type norm_i) const { + if (monitor_residual) residual_norms(team_id * N_team + batched_id, iteration_id) = norm_i; } /// \brief set_last_norm @@ -429,8 +413,7 @@ class KrylovHandle { KOKKOS_INLINE_FUNCTION void set_last_norm(int batched_id, norm_type norm_i) const { - if (monitor_residual) - residual_norms(batched_id, max_iteration + 1) = norm_i; + if (monitor_residual) residual_norms(batched_id, max_iteration + 1) = norm_i; } /// \brief set_last_norm @@ -442,8 +425,7 @@ class KrylovHandle { KOKKOS_INLINE_FUNCTION void set_last_norm(int team_id, int batched_id, norm_type norm_i) const { - if (monitor_residual) - residual_norms(team_id * N_team + batched_id, max_iteration + 1) = norm_i; + if (monitor_residual) residual_norms(team_id * N_team + batched_id, max_iteration + 1) = norm_i; } /// \brief set_iteration @@ -453,9 +435,7 @@ class KrylovHandle { /// \param iteration_id [in]: Iteration ID KOKKOS_INLINE_FUNCTION - void set_iteration(int batched_id, int iteration_id) const { - iteration_numbers(batched_id) = iteration_id; - } + void set_iteration(int batched_id, int iteration_id) const { iteration_numbers(batched_id) = iteration_id; } /// \brief set_iteration /// Store the number of iteration after convergence for one system diff --git a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Krylov_Solvers.hpp b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Krylov_Solvers.hpp index 262167ee64e6..b07ed2b973d7 100644 --- a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Krylov_Solvers.hpp +++ b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Krylov_Solvers.hpp @@ -20,110 +20,71 @@ namespace KokkosBatched { struct SerialGMRES { - template - KOKKOS_INLINE_FUNCTION static int invoke(const OperatorType& A, - const VectorViewType& _B, - const VectorViewType& _X, - const PrecOperatorType& P, - const KrylovHandleType& handle, + template + KOKKOS_INLINE_FUNCTION static int invoke(const OperatorType& A, const VectorViewType& _B, const VectorViewType& _X, + const PrecOperatorType& P, const KrylovHandleType& handle, const int GMRES_id); - template - KOKKOS_INLINE_FUNCTION static int invoke(const OperatorType& A, - const VectorViewType& _B, - const VectorViewType& _X, + template + KOKKOS_INLINE_FUNCTION static int invoke(const OperatorType& A, const VectorViewType& _B, const VectorViewType& _X, const KrylovHandleType& handle); }; template struct TeamGMRES { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const OperatorType& A, const VectorViewType& _B, - const VectorViewType& _X, const PrecOperatorType& P, - const KrylovHandleType& handle, const ArnoldiViewType& _ArnoldiView, - const TMPViewType& _TMPView); - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, - const OperatorType& A, - const VectorViewType& _B, - const VectorViewType& _X, - const PrecOperatorType& P, - const KrylovHandleType& handle); - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, - const OperatorType& A, - const VectorViewType& _B, - const VectorViewType& _X, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, const PrecOperatorType& P, + const KrylovHandleType& handle, const ArnoldiViewType& _ArnoldiView, + const TMPViewType& _TMPView); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, const PrecOperatorType& P, const KrylovHandleType& handle); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, const KrylovHandleType& handle); }; template struct TeamVectorGMRES { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const OperatorType& A, const VectorViewType& _B, - const VectorViewType& _X, const PrecOperatorType& P, - const KrylovHandleType& handle, const ArnoldiViewType& _ArnoldiView, - const TMPViewType& _TMPView); - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, - const OperatorType& A, - const VectorViewType& _B, - const VectorViewType& _X, - const PrecOperatorType& P, - const KrylovHandleType& handle); - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, - const OperatorType& A, - const VectorViewType& _B, - const VectorViewType& _X, + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, const PrecOperatorType& P, + const KrylovHandleType& handle, const ArnoldiViewType& _ArnoldiView, + const TMPViewType& _TMPView); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, const PrecOperatorType& P, const KrylovHandleType& handle); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, const KrylovHandleType& handle); }; template struct TeamCG { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const OperatorType& A, const VectorViewType& _B, - const VectorViewType& _X, const KrylovHandleType& handle, - const TMPViewType& _TMPView, const TMPNormViewType& _TMPNormView); - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, - const OperatorType& A, - const VectorViewType& _B, - const VectorViewType& _X, - const KrylovHandleType& handle); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, const KrylovHandleType& handle, + const TMPViewType& _TMPView, const TMPNormViewType& _TMPNormView); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, const KrylovHandleType& handle); }; template struct TeamVectorCG { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const OperatorType& A, const VectorViewType& _B, - const VectorViewType& _X, const KrylovHandleType& handle, - const TMPViewType& _TMPView, const TMPNormViewType& _TMPNormView); - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, - const OperatorType& A, - const VectorViewType& _B, - const VectorViewType& _X, - const KrylovHandleType& handle); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, const KrylovHandleType& handle, + const TMPViewType& _TMPView, const TMPNormViewType& _TMPNormView); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, const KrylovHandleType& handle); }; } // namespace KokkosBatched diff --git a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Spmv.hpp b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Spmv.hpp index da70acb6bb2d..a93d0775beda 100644 --- a/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Spmv.hpp +++ b/packages/kokkos-kernels/batched/sparse/src/KokkosBatched_Spmv.hpp @@ -64,23 +64,17 @@ namespace KokkosBatched { template struct SerialSpmv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const alphaViewType &alpha, const ValuesViewType &values, - const IntView &row_ptr, const IntView &colIndices, const xViewType &x, - const betaViewType &beta, const yViewType &Y); + template + KOKKOS_INLINE_FUNCTION static int invoke(const alphaViewType &alpha, const ValuesViewType &values, + const IntView &row_ptr, const IntView &colIndices, const xViewType &x, + const betaViewType &beta, const yViewType &Y); - template + template KOKKOS_INLINE_FUNCTION static int invoke( - const typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type &alpha, - const ValuesViewType &values, const IntView &row_ptr, - const IntView &colIndices, const xViewType &X, - const typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type &beta, + const typename Kokkos::ArithTraits::mag_type &alpha, + const ValuesViewType &values, const IntView &row_ptr, const IntView &colIndices, const xViewType &X, + const typename Kokkos::ArithTraits::mag_type &beta, const yViewType &Y); }; @@ -126,25 +120,19 @@ struct SerialSpmv { template struct TeamSpmv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const alphaViewType &alpha, - const ValuesViewType &values, const IntView &row_ptr, - const IntView &colIndices, const xViewType &x, const betaViewType &beta, - const yViewType &y); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const alphaViewType &alpha, + const ValuesViewType &values, const IntView &row_ptr, + const IntView &colIndices, const xViewType &x, const betaViewType &beta, + const yViewType &y); - template + template KOKKOS_INLINE_FUNCTION static int invoke( const MemberType &member, - const typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type &alpha, - const ValuesViewType &values, const IntView &row_ptr, - const IntView &colIndices, const xViewType &x, - const typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type &beta, + const typename Kokkos::ArithTraits::mag_type &alpha, + const ValuesViewType &values, const IntView &row_ptr, const IntView &colIndices, const xViewType &x, + const typename Kokkos::ArithTraits::mag_type &beta, const yViewType &y); }; @@ -189,28 +177,21 @@ struct TeamSpmv { /// (or one with TeamVectorRange) are used inside. /// -template +template struct TeamVectorSpmv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const alphaViewType &alpha, - const ValuesViewType &values, const IntView &row_ptr, - const IntView &colIndices, const xViewType &x, const betaViewType &beta, - const yViewType &y); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const alphaViewType &alpha, + const ValuesViewType &values, const IntView &row_ptr, + const IntView &colIndices, const xViewType &x, const betaViewType &beta, + const yViewType &y); - template + template KOKKOS_INLINE_FUNCTION static int invoke( const MemberType &member, - const typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type &alpha, - const ValuesViewType &values, const IntView &row_ptr, - const IntView &colIndices, const xViewType &x, - const typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type &beta, + const typename Kokkos::ArithTraits::mag_type &alpha, + const ValuesViewType &values, const IntView &row_ptr, const IntView &colIndices, const xViewType &x, + const typename Kokkos::ArithTraits::mag_type &beta, const yViewType &y); }; @@ -245,58 +226,47 @@ struct TeamVectorSpmv { template struct Spmv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const alphaViewType &alpha, - const ValuesViewType &values, const IntView &row_ptr, - const IntView &colIndices, const xViewType &x, const betaViewType &beta, - const yViewType &y) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const alphaViewType &alpha, + const ValuesViewType &values, const IntView &row_ptr, + const IntView &colIndices, const xViewType &x, const betaViewType &beta, + const yViewType &y) { int r_val = 0; if (std::is_same::value) { - r_val = SerialSpmv::template invoke< - ValuesViewType, IntView, xViewType, yViewType, alphaViewType, - betaViewType, dobeta>(alpha, values, row_ptr, colIndices, x, beta, y); + r_val = + SerialSpmv::template invoke(alpha, values, row_ptr, colIndices, x, beta, y); } else if (std::is_same::value) { - r_val = TeamSpmv::template invoke< - ValuesViewType, IntView, xViewType, yViewType, alphaViewType, - betaViewType, dobeta>(member, alpha, values, row_ptr, colIndices, x, - beta, y); + r_val = TeamSpmv::template invoke( + member, alpha, values, row_ptr, colIndices, x, beta, y); } else if (std::is_same::value) { - r_val = TeamVectorSpmv::template invoke< - ValuesViewType, IntView, xViewType, yViewType, alphaViewType, - betaViewType, dobeta>(member, alpha, values, row_ptr, colIndices, x, - beta, y); + r_val = TeamVectorSpmv::template invoke( + member, alpha, values, row_ptr, colIndices, x, beta, y); } return r_val; } - template + template KOKKOS_INLINE_FUNCTION static int invoke( const MemberType &member, - const typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type &alpha, - const ValuesViewType &values, const IntView &row_ptr, - const IntView &colIndices, const xViewType &x, - const typename Kokkos::ArithTraits< - typename ValuesViewType::non_const_value_type>::mag_type &beta, + const typename Kokkos::ArithTraits::mag_type &alpha, + const ValuesViewType &values, const IntView &row_ptr, const IntView &colIndices, const xViewType &x, + const typename Kokkos::ArithTraits::mag_type &beta, const yViewType &y) { int r_val = 0; if (std::is_same::value) { - r_val = - SerialSpmv::template invoke( - alpha, values, row_ptr, colIndices, x, beta, y); + r_val = SerialSpmv::template invoke( + alpha, values, row_ptr, colIndices, x, beta, y); } else if (std::is_same::value) { - r_val = TeamSpmv::template invoke< - ValuesViewType, IntView, xViewType, yViewType, dobeta>( + r_val = TeamSpmv::template invoke( member, alpha, values, row_ptr, colIndices, x, beta, y); } else if (std::is_same::value) { - r_val = TeamVectorSpmv::template invoke< - ValuesViewType, IntView, xViewType, yViewType, dobeta>( - member, alpha, values, row_ptr, colIndices, x, beta, y); + r_val = + TeamVectorSpmv::template invoke( + member, alpha, values, row_ptr, colIndices, x, beta, y); } return r_val; } diff --git a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SerialGMRES.hpp b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SerialGMRES.hpp index e28efb9b823d..3147caefae10 100644 --- a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SerialGMRES.hpp +++ b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SerialGMRES.hpp @@ -29,8 +29,8 @@ using namespace KokkosBatched; namespace Test { namespace GMRES { -template +template struct Functor_TestBatchedSerialGMRES { using execution_space = typename DeviceType::execution_space; const ValuesViewType _D; @@ -42,32 +42,19 @@ struct Functor_TestBatchedSerialGMRES { const int _N_team; KrylovHandleType _handle; - Functor_TestBatchedSerialGMRES(const ValuesViewType &D, const IntView &r, - const IntView &c, const VectorViewType &X, - const VectorViewType &B, - const VectorViewType &diag, const int N_team, + Functor_TestBatchedSerialGMRES(const ValuesViewType &D, const IntView &r, const IntView &c, const VectorViewType &X, + const VectorViewType &B, const VectorViewType &diag, const int N_team, KrylovHandleType &handle) - : _D(D), - _r(r), - _c(c), - _X(X), - _B(B), - _Diag(diag), - _N_team(N_team), - _handle(handle) {} + : _D(D), _r(r), _c(c), _X(X), _B(B), _Diag(diag), _N_team(N_team), _handle(handle) {} KOKKOS_INLINE_FUNCTION void operator()(const int k) const { const int first_matrix = _handle.first_index(k); const int last_matrix = _handle.last_index(k); - auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto diag = Kokkos::subview( - _Diag, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto diag = Kokkos::subview(_Diag, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); using Operator = KokkosBatched::CrsMatrix; using PrecOperator = KokkosBatched::JacobiPrec; @@ -76,8 +63,7 @@ struct Functor_TestBatchedSerialGMRES { PrecOperator P(diag); P.setComputedInverse(); - KokkosBatched::SerialGMRES::template invoke( - A, b, x, P, _handle, k); + KokkosBatched::SerialGMRES::template invoke(A, b, x, P, _handle, k); } inline void run() { @@ -96,18 +82,16 @@ struct Functor_TestBatchedSerialGMRES { _handle.set_compute_last_residual(false); _handle.set_tolerance(1e-8); - _handle.Arnoldi_view = typename KrylovHandleType::ArnoldiViewType( - "", N, maximum_iteration, n + maximum_iteration + 3); - _handle.tmp_view = typename KrylovHandleType::TemporaryViewType( - "", N, n + maximum_iteration + 3); + _handle.Arnoldi_view = + typename KrylovHandleType::ArnoldiViewType("", N, maximum_iteration, n + maximum_iteration + 3); + _handle.tmp_view = typename KrylovHandleType::TemporaryViewType("", N, n + maximum_iteration + 3); Kokkos::parallel_for(name.c_str(), policy, *this); Kokkos::Profiling::popRegion(); } }; -template +template void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { typedef typename ValuesViewType::value_type value_type; typedef Kokkos::ArithTraits ats; @@ -133,8 +117,7 @@ void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { using Scalar3DViewType = Kokkos::View; using IntViewType = Kokkos::View; - using KrylovHandleType = - KrylovHandle; + using KrylovHandleType = KrylovHandle; NormViewType sqr_norm_0("sqr_norm_0", N); NormViewType sqr_norm_j("sqr_norm_j", N); @@ -153,12 +136,10 @@ void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { int current_index; for (int i = 0; i < BlkSize; ++i) { - for (current_index = row_ptr_host(i); current_index < row_ptr_host(i + 1); - ++current_index) { + for (current_index = row_ptr_host(i); current_index < row_ptr_host(i + 1); ++current_index) { if (colIndices_host(current_index) == i) break; } - for (int j = 0; j < N; ++j) - diag_values_host(j, i) = values_host(j, current_index); + for (int j = 0; j < N; ++j) diag_values_host(j, i) = values_host(j, current_index); } Kokkos::deep_copy(Diag, diag_values_host); @@ -188,13 +169,10 @@ void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { KrylovHandleType handle(N, N_team, n_iterations); KokkosBatched::SerialSpmv::template invoke< - typename ValuesViewType::HostMirror, typename IntView::HostMirror, - typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, - 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); - KokkosBatched::SerialDot::invoke(R_host, R_host, - sqr_norm_0_host); - Functor_TestBatchedSerialGMRES( + typename ValuesViewType::HostMirror, typename IntView::HostMirror, typename VectorViewType::HostMirror, + typename VectorViewType::HostMirror, 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, sqr_norm_0_host); + Functor_TestBatchedSerialGMRES( D, r, c, X, B, Diag, N_team, handle) .run(); @@ -205,17 +183,13 @@ void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { Kokkos::deep_copy(X_host, X); KokkosBatched::SerialSpmv::template invoke< - typename ValuesViewType::HostMirror, typename IntView::HostMirror, - typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, - 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); - KokkosBatched::SerialDot::invoke(R_host, R_host, - sqr_norm_j_host); + typename ValuesViewType::HostMirror, typename IntView::HostMirror, typename VectorViewType::HostMirror, + typename VectorViewType::HostMirror, 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, sqr_norm_j_host); const MagnitudeType eps = 1.0e5 * ats::epsilon(); - for (int l = 0; l < N; ++l) - EXPECT_NEAR_KK( - std::sqrt(sqr_norm_j_host(l)) / std::sqrt(sqr_norm_0_host(l)), 0, eps); + for (int l = 0; l < N; ++l) EXPECT_NEAR_KK(std::sqrt(sqr_norm_j_host(l)) / std::sqrt(sqr_norm_0_host(l)), 0, eps); } } // namespace GMRES } // namespace Test @@ -226,26 +200,21 @@ int test_batched_serial_GMRES() { { typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - VectorViewType; + typedef Kokkos::View VectorViewType; for (int i = 3; i < 10; ++i) { - Test::GMRES::impl_test_batched_GMRES(1024, i, 2); + Test::GMRES::impl_test_batched_GMRES(1024, i, 2); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; + typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - VectorViewType; + typedef Kokkos::View VectorViewType; for (int i = 3; i < 10; ++i) { - Test::GMRES::impl_test_batched_GMRES(1024, i, 2); + Test::GMRES::impl_test_batched_GMRES(1024, i, 2); } } #endif diff --git a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SerialGMRES_Real.hpp b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SerialGMRES_Real.hpp index ccfe3c37d521..2756e11a1f32 100644 --- a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SerialGMRES_Real.hpp +++ b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SerialGMRES_Real.hpp @@ -15,13 +15,9 @@ //@HEADER #if defined(KOKKOSKERNELS_INST_FLOAT) -TEST_F(TestCategory, batched_scalar_serial_GMRES_float) { - test_batched_serial_GMRES(); -} +TEST_F(TestCategory, batched_scalar_serial_GMRES_float) { test_batched_serial_GMRES(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) -TEST_F(TestCategory, batched_scalar_serial_GMRES_double) { - test_batched_serial_GMRES(); -} +TEST_F(TestCategory, batched_scalar_serial_GMRES_double) { test_batched_serial_GMRES(); } #endif diff --git a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SerialSpmv.hpp b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SerialSpmv.hpp index 05f2724c5b10..2f32b6294a84 100644 --- a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SerialSpmv.hpp +++ b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SerialSpmv.hpp @@ -19,7 +19,7 @@ #include "Kokkos_Core.hpp" #include "Kokkos_Random.hpp" -//#include "KokkosBatched_Vector.hpp" +// #include "KokkosBatched_Vector.hpp" #include "KokkosBatched_Spmv.hpp" #include "KokkosBatched_Spmv_Serial_Impl.hpp" @@ -37,9 +37,8 @@ struct ParamTag { typedef T trans; }; -template +template struct Functor_TestBatchedSerialSpmv { using execution_space = typename DeviceType::execution_space; const alphaViewType _alpha; @@ -51,10 +50,8 @@ struct Functor_TestBatchedSerialSpmv { const yViewType _Y; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedSerialSpmv(const alphaViewType &alpha, - const ValuesViewType &D, const IntView &r, - const IntView &c, const xViewType &X, - const betaViewType &beta, const yViewType &Y) + Functor_TestBatchedSerialSpmv(const alphaViewType &alpha, const ValuesViewType &D, const IntView &r, const IntView &c, + const xViewType &X, const betaViewType &beta, const yViewType &Y) : _alpha(alpha), _D(D), _r(r), _c(c), _X(X), _beta(beta), _Y(Y) {} KOKKOS_INLINE_FUNCTION @@ -66,8 +63,8 @@ struct Functor_TestBatchedSerialSpmv { auto y = Kokkos::subview(_Y, Kokkos::make_pair(k, k + 1), Kokkos::ALL); KokkosBatched::SerialSpmv::template invoke< - ValuesViewType, IntView, xViewType, yViewType, alphaViewType, - betaViewType, dobeta>(alpha, d, _r, _c, x, beta, y); + ValuesViewType, IntView, xViewType, yViewType, alphaViewType, betaViewType, dobeta>(alpha, d, _r, _c, x, beta, + y); } inline void run() { @@ -82,9 +79,8 @@ struct Functor_TestBatchedSerialSpmv { } }; -template +template void impl_test_batched_spmv(const int N, const int BlkSize) { typedef typename ValuesViewType::value_type value_type; typedef Kokkos::ArithTraits ats; @@ -126,21 +122,15 @@ void impl_test_batched_spmv(const int N, const int BlkSize) { else Y0_host(l, i) *= beta_host(l); if (i != 0 && i != (BlkSize - 1)) - Y0_host(l, i) += - alpha_host(l) * - (2 * X0_host(l, i) - X0_host(l, i - 1) - X0_host(l, i + 1)); + Y0_host(l, i) += alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i - 1) - X0_host(l, i + 1)); else if (i == 0) - Y0_host(l, i) += - alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i + 1)); + Y0_host(l, i) += alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i + 1)); else - Y0_host(l, i) += - alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i - 1)); + Y0_host(l, i) += alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i - 1)); } - Functor_TestBatchedSerialSpmv(alpha, D, r, c, X1, beta, - Y1) + Functor_TestBatchedSerialSpmv(alpha, D, r, c, X1, beta, Y1) .run(); Kokkos::fence(); @@ -165,49 +155,37 @@ void impl_test_batched_spmv(const int N, const int BlkSize) { } // namespace Spmv } // namespace Test -template +template int test_batched_spmv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - alphaViewType; + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { - Test::Spmv::impl_test_batched_spmv(1024, - i); + Test::Spmv::impl_test_batched_spmv(1024, i); } for (int i = 3; i < 10; ++i) { - Test::Spmv::impl_test_batched_spmv(1024, - i); + Test::Spmv::impl_test_batched_spmv(1024, i); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; + typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - alphaViewType; + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { - Test::Spmv::impl_test_batched_spmv(1024, - i); + Test::Spmv::impl_test_batched_spmv(1024, i); } for (int i = 3; i < 10; ++i) { - Test::Spmv::impl_test_batched_spmv(1024, - i); + Test::Spmv::impl_test_batched_spmv(1024, i); } } #endif diff --git a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SparseUtils.hpp b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SparseUtils.hpp index 98bc25894f01..808f95a9a736 100644 --- a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SparseUtils.hpp +++ b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_SparseUtils.hpp @@ -18,21 +18,12 @@ namespace KokkosBatched { template -void create_tridiagonal_batched_matrices(const int nnz, const int BlkSize, - const int N, const IntView &r, - const IntView &c, - const VectorViewType &D, - const VectorViewType &X, +void create_tridiagonal_batched_matrices(const int nnz, const int BlkSize, const int N, const IntView &r, + const IntView &c, const VectorViewType &D, const VectorViewType &X, const VectorViewType &B) { - Kokkos::Random_XorShift64_Pool< - typename VectorViewType::device_type::execution_space> - random(13718); - Kokkos::fill_random( - X, random, - Kokkos::reduction_identity::prod()); - Kokkos::fill_random( - B, random, - Kokkos::reduction_identity::prod()); + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(X, random, Kokkos::reduction_identity::prod()); + Kokkos::fill_random(B, random, Kokkos::reduction_identity::prod()); auto D_host = Kokkos::create_mirror_view(D); auto r_host = Kokkos::create_mirror_view(r); diff --git a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamCG.hpp b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamCG.hpp index b05f3db61f45..3c0b194fafd7 100644 --- a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamCG.hpp +++ b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamCG.hpp @@ -28,8 +28,8 @@ using namespace KokkosBatched; namespace Test { namespace TeamCG { -template +template struct Functor_TestBatchedTeamCG { using execution_space = typename DeviceType::execution_space; const ValuesViewType _D; @@ -40,32 +40,21 @@ struct Functor_TestBatchedTeamCG { const int _N_team; KrylovHandleType handle; - Functor_TestBatchedTeamCG(const ValuesViewType &D, const IntView &r, - const IntView &c, const VectorViewType &X, + Functor_TestBatchedTeamCG(const ValuesViewType &D, const IntView &r, const IntView &c, const VectorViewType &X, const VectorViewType &B, const int N_team) - : _D(D), - _r(r), - _c(c), - _X(X), - _B(B), - _N_team(N_team), - handle(KrylovHandleType(_D.extent(0), _N_team)) {} + : _D(D), _r(r), _c(c), _X(X), _B(B), _N_team(N_team), handle(KrylovHandleType(_D.extent(0), _N_team)) {} template KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { const int first_matrix = static_cast(member.league_rank()) * _N_team; const int N = _D.extent(0); const int last_matrix = - (static_cast(member.league_rank() + 1) * _N_team < N - ? static_cast(member.league_rank() + 1) * _N_team - : N); + (static_cast(member.league_rank() + 1) * _N_team < N ? static_cast(member.league_rank() + 1) * _N_team + : N); - auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); using Operator = KokkosBatched::CrsMatrix; @@ -80,8 +69,7 @@ struct Functor_TestBatchedTeamCG { const std::string name_value_type = Test::value_type_name(); std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); - Kokkos::TeamPolicy policy(_D.extent(0) / _N_team, - Kokkos::AUTO(), Kokkos::AUTO()); + Kokkos::TeamPolicy policy(_D.extent(0) / _N_team, Kokkos::AUTO(), Kokkos::AUTO()); size_t bytes_0 = ValuesViewType::shmem_size(_N_team, _X.extent(1)); size_t bytes_1 = ValuesViewType::shmem_size(_N_team, 1); @@ -92,8 +80,7 @@ struct Functor_TestBatchedTeamCG { } }; -template +template void impl_test_batched_CG(const int N, const int BlkSize, const int N_team) { typedef typename ValuesViewType::value_type value_type; typedef Kokkos::ArithTraits ats; @@ -118,8 +105,7 @@ void impl_test_batched_CG(const int N, const int BlkSize, const int N_team) { using Scalar3DViewType = Kokkos::View; using IntViewType = Kokkos::View; - using KrylovHandleType = - KrylovHandle; + using KrylovHandleType = KrylovHandle; NormViewType sqr_norm_0("sqr_norm_0", N); NormViewType sqr_norm_j("sqr_norm_j", N); @@ -147,13 +133,11 @@ void impl_test_batched_CG(const int N, const int BlkSize, const int N_team) { Kokkos::deep_copy(D_host, D); KokkosBatched::SerialSpmv::template invoke< - typename ValuesViewType::HostMirror, typename IntView::HostMirror, - typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, - 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); - KokkosBatched::SerialDot::invoke(R_host, R_host, - sqr_norm_0_host); - Functor_TestBatchedTeamCG(D, r, c, X, B, N_team) + typename ValuesViewType::HostMirror, typename IntView::HostMirror, typename VectorViewType::HostMirror, + typename VectorViewType::HostMirror, 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, sqr_norm_0_host); + Functor_TestBatchedTeamCG(D, r, c, X, B, + N_team) .run(); Kokkos::fence(); @@ -163,16 +147,13 @@ void impl_test_batched_CG(const int N, const int BlkSize, const int N_team) { Kokkos::deep_copy(X_host, X); KokkosBatched::SerialSpmv::template invoke< - typename ValuesViewType::HostMirror, typename IntView::HostMirror, - typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, - 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); - KokkosBatched::SerialDot::invoke(R_host, R_host, - sqr_norm_j_host); + typename ValuesViewType::HostMirror, typename IntView::HostMirror, typename VectorViewType::HostMirror, + typename VectorViewType::HostMirror, 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, sqr_norm_j_host); const MagnitudeType eps = 1.0e3 * ats::epsilon(); - for (int l = 0; l < N; ++l) - EXPECT_NEAR_KK(sqr_norm_j_host(l) / sqr_norm_0_host(l), 0, eps); + for (int l = 0; l < N; ++l) EXPECT_NEAR_KK(sqr_norm_j_host(l) / sqr_norm_0_host(l), 0, eps); } } // namespace TeamCG } // namespace Test @@ -183,26 +164,21 @@ int test_batched_team_CG() { { typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - VectorViewType; + typedef Kokkos::View VectorViewType; for (int i = 3; i < 10; ++i) { - Test::TeamCG::impl_test_batched_CG(1024, i, 2); + Test::TeamCG::impl_test_batched_CG(1024, i, 2); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; + typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - VectorViewType; + typedef Kokkos::View VectorViewType; for (int i = 3; i < 10; ++i) { - Test::TeamCG::impl_test_batched_CG(1024, i, 2); + Test::TeamCG::impl_test_batched_CG(1024, i, 2); } } #endif diff --git a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamCG_Real.hpp b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamCG_Real.hpp index 1bdb6bc95acb..9d51be581b93 100644 --- a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamCG_Real.hpp +++ b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamCG_Real.hpp @@ -15,13 +15,9 @@ //@HEADER #if defined(KOKKOSKERNELS_INST_FLOAT) -TEST_F(TestCategory, batched_scalar_team_CG_float) { - test_batched_team_CG(); -} +TEST_F(TestCategory, batched_scalar_team_CG_float) { test_batched_team_CG(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) -TEST_F(TestCategory, batched_scalar_team_CG_double) { - test_batched_team_CG(); -} +TEST_F(TestCategory, batched_scalar_team_CG_double) { test_batched_team_CG(); } #endif diff --git a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamGMRES.hpp b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamGMRES.hpp index de1a7f4fc28d..e2250bab9549 100644 --- a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamGMRES.hpp +++ b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamGMRES.hpp @@ -29,8 +29,8 @@ using namespace KokkosBatched; namespace Test { namespace TeamGMRES { -template +template struct Functor_TestBatchedTeamGMRES { using execution_space = typename DeviceType::execution_space; const ValuesViewType _D; @@ -42,37 +42,23 @@ struct Functor_TestBatchedTeamGMRES { const int _N_team; KrylovHandleType _handle; - Functor_TestBatchedTeamGMRES(const ValuesViewType &D, const IntView &r, - const IntView &c, const VectorViewType &X, - const VectorViewType &B, - const VectorViewType &diag, const int N_team, + Functor_TestBatchedTeamGMRES(const ValuesViewType &D, const IntView &r, const IntView &c, const VectorViewType &X, + const VectorViewType &B, const VectorViewType &diag, const int N_team, KrylovHandleType &handle) - : _D(D), - _r(r), - _c(c), - _X(X), - _B(B), - _Diag(diag), - _N_team(N_team), - _handle(handle) {} + : _D(D), _r(r), _c(c), _X(X), _B(B), _Diag(diag), _N_team(N_team), _handle(handle) {} template KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { const int first_matrix = static_cast(member.league_rank()) * _N_team; const int N = _D.extent(0); const int last_matrix = - (static_cast(member.league_rank() + 1) * _N_team < N - ? static_cast(member.league_rank() + 1) * _N_team - : N); - - auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto diag = Kokkos::subview( - _Diag, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + (static_cast(member.league_rank() + 1) * _N_team < N ? static_cast(member.league_rank() + 1) * _N_team + : N); + + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto diag = Kokkos::subview(_Diag, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); using Operator = KokkosBatched::CrsMatrix; using PrecOperator = KokkosBatched::JacobiPrec; @@ -81,9 +67,7 @@ struct Functor_TestBatchedTeamGMRES { PrecOperator P(diag); P.setComputedInverse(); - KokkosBatched::TeamGMRES::template invoke( - member, A, b, x, P, _handle); + KokkosBatched::TeamGMRES::template invoke(member, A, b, x, P, _handle); } inline void run() { @@ -92,8 +76,7 @@ struct Functor_TestBatchedTeamGMRES { const std::string name_value_type = Test::value_type_name(); std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); - Kokkos::TeamPolicy policy(_D.extent(0) / _N_team, - Kokkos::AUTO(), Kokkos::AUTO()); + Kokkos::TeamPolicy policy(_D.extent(0) / _N_team, Kokkos::AUTO(), Kokkos::AUTO()); const int N = _D.extent(0); const int n = _X.extent(1); @@ -103,8 +86,8 @@ struct Functor_TestBatchedTeamGMRES { _handle.set_compute_last_residual(false); _handle.set_tolerance(1e-8); - _handle.Arnoldi_view = typename KrylovHandleType::ArnoldiViewType( - "", N, maximum_iteration, n + maximum_iteration + 3); + _handle.Arnoldi_view = + typename KrylovHandleType::ArnoldiViewType("", N, maximum_iteration, n + maximum_iteration + 3); using ScalarType = typename ValuesViewType::non_const_value_type; using Layout = typename ValuesViewType::array_layout; @@ -122,16 +105,14 @@ struct Functor_TestBatchedTeamGMRES { size_t bytes_int = bytes_row_ptr + bytes_col_idc; size_t bytes_diag = bytes_2D_1; size_t bytes_tmp = 2 * bytes_2D_1 + 2 * bytes_1D + bytes_2D_2; - policy.set_scratch_size( - 0, Kokkos::PerTeam(bytes_tmp + bytes_diag + bytes_int)); + policy.set_scratch_size(0, Kokkos::PerTeam(bytes_tmp + bytes_diag + bytes_int)); Kokkos::parallel_for(name.c_str(), policy, *this); Kokkos::Profiling::popRegion(); } }; -template +template void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { typedef typename ValuesViewType::value_type value_type; typedef Kokkos::ArithTraits ats; @@ -157,8 +138,7 @@ void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { using Scalar3DViewType = Kokkos::View; using IntViewType = Kokkos::View; - using KrylovHandleType = - KrylovHandle; + using KrylovHandleType = KrylovHandle; NormViewType sqr_norm_0("sqr_norm_0", N); NormViewType sqr_norm_j("sqr_norm_j", N); @@ -177,12 +157,10 @@ void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { int current_index; for (int i = 0; i < BlkSize; ++i) { - for (current_index = row_ptr_host(i); current_index < row_ptr_host(i + 1); - ++current_index) { + for (current_index = row_ptr_host(i); current_index < row_ptr_host(i + 1); ++current_index) { if (colIndices_host(current_index) == i) break; } - for (int j = 0; j < N; ++j) - diag_values_host(j, i) = values_host(j, current_index); + for (int j = 0; j < N; ++j) diag_values_host(j, i) = values_host(j, current_index); } Kokkos::deep_copy(Diag, diag_values_host); @@ -212,13 +190,10 @@ void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { KrylovHandleType handle(N, N_team, n_iterations); KokkosBatched::SerialSpmv::template invoke< - typename ValuesViewType::HostMirror, typename IntView::HostMirror, - typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, - 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); - KokkosBatched::SerialDot::invoke(R_host, R_host, - sqr_norm_0_host); - Functor_TestBatchedTeamGMRES( + typename ValuesViewType::HostMirror, typename IntView::HostMirror, typename VectorViewType::HostMirror, + typename VectorViewType::HostMirror, 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, sqr_norm_0_host); + Functor_TestBatchedTeamGMRES( D, r, c, X, B, Diag, N_team, handle) .run(); @@ -229,17 +204,13 @@ void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { Kokkos::deep_copy(X_host, X); KokkosBatched::SerialSpmv::template invoke< - typename ValuesViewType::HostMirror, typename IntView::HostMirror, - typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, - 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); - KokkosBatched::SerialDot::invoke(R_host, R_host, - sqr_norm_j_host); + typename ValuesViewType::HostMirror, typename IntView::HostMirror, typename VectorViewType::HostMirror, + typename VectorViewType::HostMirror, 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, sqr_norm_j_host); const MagnitudeType eps = 1.0e5 * ats::epsilon(); - for (int l = 0; l < N; ++l) - EXPECT_NEAR_KK( - std::sqrt(sqr_norm_j_host(l)) / std::sqrt(sqr_norm_0_host(l)), 0, eps); + for (int l = 0; l < N; ++l) EXPECT_NEAR_KK(std::sqrt(sqr_norm_j_host(l)) / std::sqrt(sqr_norm_0_host(l)), 0, eps); } } // namespace TeamGMRES } // namespace Test @@ -250,26 +221,21 @@ int test_batched_team_GMRES() { { typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - VectorViewType; + typedef Kokkos::View VectorViewType; for (int i = 3; i < 10; ++i) { - Test::TeamGMRES::impl_test_batched_GMRES(1024, i, 2); + Test::TeamGMRES::impl_test_batched_GMRES(1024, i, 2); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; + typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - VectorViewType; + typedef Kokkos::View VectorViewType; for (int i = 3; i < 10; ++i) { - Test::TeamGMRES::impl_test_batched_GMRES(1024, i, 2); + Test::TeamGMRES::impl_test_batched_GMRES(1024, i, 2); } } #endif diff --git a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamGMRES_Real.hpp b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamGMRES_Real.hpp index f8aab13eeca7..3ca0466630e2 100644 --- a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamGMRES_Real.hpp +++ b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamGMRES_Real.hpp @@ -15,13 +15,9 @@ //@HEADER #if defined(KOKKOSKERNELS_INST_FLOAT) -TEST_F(TestCategory, batched_scalar_team_GMRES_float) { - test_batched_team_GMRES(); -} +TEST_F(TestCategory, batched_scalar_team_GMRES_float) { test_batched_team_GMRES(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) -TEST_F(TestCategory, batched_scalar_team_GMRES_double) { - test_batched_team_GMRES(); -} +TEST_F(TestCategory, batched_scalar_team_GMRES_double) { test_batched_team_GMRES(); } #endif diff --git a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamSpmv.hpp b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamSpmv.hpp index a6c9ac7ea899..228bd01afaf1 100644 --- a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamSpmv.hpp +++ b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamSpmv.hpp @@ -19,7 +19,7 @@ #include "Kokkos_Core.hpp" #include "Kokkos_Random.hpp" -//#include "KokkosBatched_Vector.hpp" +// #include "KokkosBatched_Vector.hpp" #include "KokkosBatched_Spmv.hpp" #include "KokkosBatched_Spmv_Team_Impl.hpp" @@ -38,9 +38,8 @@ struct ParamTag { typedef T trans; }; -template +template struct Functor_TestBatchedTeamSpmv { using execution_space = typename DeviceType::execution_space; const alphaViewType _alpha; @@ -53,45 +52,27 @@ struct Functor_TestBatchedTeamSpmv { const int _N_team; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamSpmv(const alphaViewType &alpha, - const ValuesViewType &D, const IntView &r, - const IntView &c, const xViewType &X, - const betaViewType &beta, const yViewType &Y, - const int N_team) - : _alpha(alpha), - _D(D), - _r(r), - _c(c), - _X(X), - _beta(beta), - _Y(Y), - _N_team(N_team) {} + Functor_TestBatchedTeamSpmv(const alphaViewType &alpha, const ValuesViewType &D, const IntView &r, const IntView &c, + const xViewType &X, const betaViewType &beta, const yViewType &Y, const int N_team) + : _alpha(alpha), _D(D), _r(r), _c(c), _X(X), _beta(beta), _Y(Y), _N_team(N_team) {} template - KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, const MemberType &member) const { const int first_matrix = static_cast(member.league_rank()) * _N_team; const int N = _D.extent(0); const int last_matrix = - (static_cast(member.league_rank() + 1) * _N_team < N - ? static_cast(member.league_rank() + 1) * _N_team - : N); - - auto alpha = - Kokkos::subview(_alpha, Kokkos::make_pair(first_matrix, last_matrix)); - auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto beta = - Kokkos::subview(_beta, Kokkos::make_pair(first_matrix, last_matrix)); - auto y = Kokkos::subview(_Y, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - - KokkosBatched::TeamSpmv:: - template invoke( - member, alpha, d, _r, _c, x, beta, y); + (static_cast(member.league_rank() + 1) * _N_team < N ? static_cast(member.league_rank() + 1) * _N_team + : N); + + auto alpha = Kokkos::subview(_alpha, Kokkos::make_pair(first_matrix, last_matrix)); + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto beta = Kokkos::subview(_beta, Kokkos::make_pair(first_matrix, last_matrix)); + auto y = Kokkos::subview(_Y, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + + KokkosBatched::TeamSpmv::template invoke< + ValuesViewType, IntView, xViewType, yViewType, alphaViewType, betaViewType, dobeta>(member, alpha, d, _r, _c, x, + beta, y); } inline void run() { @@ -100,16 +81,14 @@ struct Functor_TestBatchedTeamSpmv { const std::string name_value_type = Test::value_type_name(); std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); - Kokkos::TeamPolicy policy( - _D.extent(0) / _N_team, Kokkos::AUTO(), Kokkos::AUTO()); + Kokkos::TeamPolicy policy(_D.extent(0) / _N_team, Kokkos::AUTO(), Kokkos::AUTO()); Kokkos::parallel_for(name.c_str(), policy, *this); Kokkos::Profiling::popRegion(); } }; -template +template void impl_test_batched_spmv(const int N, const int BlkSize, const int N_team) { typedef typename ValuesViewType::value_type value_type; typedef Kokkos::ArithTraits ats; @@ -151,20 +130,15 @@ void impl_test_batched_spmv(const int N, const int BlkSize, const int N_team) { else Y0_host(l, i) *= beta_host(l); if (i != 0 && i != (BlkSize - 1)) - Y0_host(l, i) += - alpha_host(l) * - (2 * X0_host(l, i) - X0_host(l, i - 1) - X0_host(l, i + 1)); + Y0_host(l, i) += alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i - 1) - X0_host(l, i + 1)); else if (i == 0) - Y0_host(l, i) += - alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i + 1)); + Y0_host(l, i) += alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i + 1)); else - Y0_host(l, i) += - alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i - 1)); + Y0_host(l, i) += alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i - 1)); } - Functor_TestBatchedTeamSpmv(alpha, D, r, c, X1, beta, Y1, N_team) + Functor_TestBatchedTeamSpmv(alpha, D, r, c, X1, beta, Y1, N_team) .run(); Kokkos::fence(); @@ -189,50 +163,38 @@ void impl_test_batched_spmv(const int N, const int BlkSize, const int N_team) { } // namespace TeamSpmv } // namespace Test -template +template int test_batched_team_spmv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - alphaViewType; + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { - Test::TeamSpmv::impl_test_batched_spmv( - 1024, i, 2); + Test::TeamSpmv::impl_test_batched_spmv(1024, i, 2); } for (int i = 3; i < 10; ++i) { - Test::TeamSpmv::impl_test_batched_spmv( - 1024, i, 2); + Test::TeamSpmv::impl_test_batched_spmv(1024, i, 2); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; + typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - alphaViewType; + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { - Test::TeamSpmv::impl_test_batched_spmv( - 1024, i, 2); + Test::TeamSpmv::impl_test_batched_spmv(1024, i, 2); } for (int i = 3; i < 10; ++i) { - Test::TeamSpmv::impl_test_batched_spmv( - 1024, i, 2); + Test::TeamSpmv::impl_test_batched_spmv(1024, i, 2); } } #endif diff --git a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorCG.hpp b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorCG.hpp index 3ffd68209bc0..9ca4405b89b4 100644 --- a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorCG.hpp +++ b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorCG.hpp @@ -28,8 +28,8 @@ using namespace KokkosBatched; namespace Test { namespace TeamVectorCG { -template +template struct Functor_TestBatchedTeamVectorCG { using execution_space = typename DeviceType::execution_space; const ValuesViewType _D; @@ -40,40 +40,27 @@ struct Functor_TestBatchedTeamVectorCG { const int _N_team; KrylovHandleType handle; - Functor_TestBatchedTeamVectorCG(const ValuesViewType &D, const IntView &r, - const IntView &c, const VectorViewType &X, + Functor_TestBatchedTeamVectorCG(const ValuesViewType &D, const IntView &r, const IntView &c, const VectorViewType &X, const VectorViewType &B, const int N_team) - : _D(D), - _r(r), - _c(c), - _X(X), - _B(B), - _N_team(N_team), - handle(KrylovHandleType(_D.extent(0), _N_team)) {} + : _D(D), _r(r), _c(c), _X(X), _B(B), _N_team(N_team), handle(KrylovHandleType(_D.extent(0), _N_team)) {} template KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { const int first_matrix = static_cast(member.league_rank()) * _N_team; const int N = _D.extent(0); const int last_matrix = - (static_cast(member.league_rank() + 1) * _N_team < N - ? static_cast(member.league_rank() + 1) * _N_team - : N); + (static_cast(member.league_rank() + 1) * _N_team < N ? static_cast(member.league_rank() + 1) * _N_team + : N); - auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); using Operator = KokkosBatched::CrsMatrix; Operator A(d, _r, _c); - KokkosBatched::TeamVectorCG::template invoke( - member, A, b, x, handle); + KokkosBatched::TeamVectorCG::template invoke(member, A, b, x, handle); } inline void run() { @@ -82,8 +69,7 @@ struct Functor_TestBatchedTeamVectorCG { const std::string name_value_type = Test::value_type_name(); std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); - Kokkos::TeamPolicy policy(_D.extent(0) / _N_team, - Kokkos::AUTO(), Kokkos::AUTO()); + Kokkos::TeamPolicy policy(_D.extent(0) / _N_team, Kokkos::AUTO(), Kokkos::AUTO()); size_t bytes_0 = ValuesViewType::shmem_size(_N_team, _X.extent(1)); size_t bytes_1 = ValuesViewType::shmem_size(_N_team, 1); @@ -94,8 +80,7 @@ struct Functor_TestBatchedTeamVectorCG { } }; -template +template void impl_test_batched_CG(const int N, const int BlkSize, const int N_team) { typedef typename ValuesViewType::value_type value_type; typedef Kokkos::ArithTraits ats; @@ -120,8 +105,7 @@ void impl_test_batched_CG(const int N, const int BlkSize, const int N_team) { using Scalar3DViewType = Kokkos::View; using IntViewType = Kokkos::View; - using KrylovHandleType = - KrylovHandle; + using KrylovHandleType = KrylovHandle; NormViewType sqr_norm_0("sqr_norm_0", N); NormViewType sqr_norm_j("sqr_norm_j", N); @@ -149,14 +133,11 @@ void impl_test_batched_CG(const int N, const int BlkSize, const int N_team) { Kokkos::deep_copy(D_host, D); KokkosBatched::SerialSpmv::template invoke< - typename ValuesViewType::HostMirror, typename IntView::HostMirror, - typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, - 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); - KokkosBatched::SerialDot::invoke(R_host, R_host, - sqr_norm_0_host); - Functor_TestBatchedTeamVectorCG(D, r, c, X, - B, N_team) + typename ValuesViewType::HostMirror, typename IntView::HostMirror, typename VectorViewType::HostMirror, + typename VectorViewType::HostMirror, 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, sqr_norm_0_host); + Functor_TestBatchedTeamVectorCG(D, r, c, X, B, + N_team) .run(); Kokkos::fence(); @@ -166,16 +147,13 @@ void impl_test_batched_CG(const int N, const int BlkSize, const int N_team) { Kokkos::deep_copy(X_host, X); KokkosBatched::SerialSpmv::template invoke< - typename ValuesViewType::HostMirror, typename IntView::HostMirror, - typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, - 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); - KokkosBatched::SerialDot::invoke(R_host, R_host, - sqr_norm_j_host); + typename ValuesViewType::HostMirror, typename IntView::HostMirror, typename VectorViewType::HostMirror, + typename VectorViewType::HostMirror, 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, sqr_norm_j_host); const MagnitudeType eps = 1.0e3 * ats::epsilon(); - for (int l = 0; l < N; ++l) - EXPECT_NEAR_KK(sqr_norm_j_host(l) / sqr_norm_0_host(l), 0, eps); + for (int l = 0; l < N; ++l) EXPECT_NEAR_KK(sqr_norm_j_host(l) / sqr_norm_0_host(l), 0, eps); } } // namespace TeamVectorCG } // namespace Test @@ -186,26 +164,21 @@ int test_batched_teamvector_CG() { { typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - VectorViewType; + typedef Kokkos::View VectorViewType; for (int i = 3; i < 10; ++i) { - Test::TeamVectorCG::impl_test_batched_CG(1024, i, 2); + Test::TeamVectorCG::impl_test_batched_CG(1024, i, 2); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; + typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - VectorViewType; + typedef Kokkos::View VectorViewType; for (int i = 3; i < 10; ++i) { - Test::TeamVectorCG::impl_test_batched_CG(1024, i, 2); + Test::TeamVectorCG::impl_test_batched_CG(1024, i, 2); } } #endif diff --git a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorCG_Real.hpp b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorCG_Real.hpp index 859a1a885c88..85935e07f3df 100644 --- a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorCG_Real.hpp +++ b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorCG_Real.hpp @@ -15,13 +15,9 @@ //@HEADER #if defined(KOKKOSKERNELS_INST_FLOAT) -TEST_F(TestCategory, batched_scalar_teamvector_CG_float) { - test_batched_teamvector_CG(); -} +TEST_F(TestCategory, batched_scalar_teamvector_CG_float) { test_batched_teamvector_CG(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) -TEST_F(TestCategory, batched_scalar_teamvector_CG_double) { - test_batched_teamvector_CG(); -} +TEST_F(TestCategory, batched_scalar_teamvector_CG_double) { test_batched_teamvector_CG(); } #endif diff --git a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorGMRES.hpp b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorGMRES.hpp index 084b623aa221..a14077f014e2 100644 --- a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorGMRES.hpp +++ b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorGMRES.hpp @@ -29,8 +29,8 @@ using namespace KokkosBatched; namespace Test { namespace TeamVectorGMRES { -template +template struct Functor_TestBatchedTeamVectorGMRES { using execution_space = typename DeviceType::execution_space; const ValuesViewType _D; @@ -42,37 +42,23 @@ struct Functor_TestBatchedTeamVectorGMRES { const int _N_team; KrylovHandleType _handle; - Functor_TestBatchedTeamVectorGMRES(const ValuesViewType &D, const IntView &r, - const IntView &c, const VectorViewType &X, - const VectorViewType &B, - const VectorViewType &diag, + Functor_TestBatchedTeamVectorGMRES(const ValuesViewType &D, const IntView &r, const IntView &c, + const VectorViewType &X, const VectorViewType &B, const VectorViewType &diag, const int N_team, KrylovHandleType &handle) - : _D(D), - _r(r), - _c(c), - _X(X), - _B(B), - _Diag(diag), - _N_team(N_team), - _handle(handle) {} + : _D(D), _r(r), _c(c), _X(X), _B(B), _Diag(diag), _N_team(N_team), _handle(handle) {} template KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { const int first_matrix = static_cast(member.league_rank()) * _N_team; const int N = _D.extent(0); const int last_matrix = - (static_cast(member.league_rank() + 1) * _N_team < N - ? static_cast(member.league_rank() + 1) * _N_team - : N); - - auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto diag = Kokkos::subview( - _Diag, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + (static_cast(member.league_rank() + 1) * _N_team < N ? static_cast(member.league_rank() + 1) * _N_team + : N); + + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto diag = Kokkos::subview(_Diag, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); using Operator = KokkosBatched::CrsMatrix; using PrecOperator = KokkosBatched::JacobiPrec; @@ -81,9 +67,7 @@ struct Functor_TestBatchedTeamVectorGMRES { PrecOperator P(diag); P.setComputedInverse(); - KokkosBatched::TeamVectorGMRES::template invoke( - member, A, b, x, P, _handle); + KokkosBatched::TeamVectorGMRES::template invoke(member, A, b, x, P, _handle); } inline void run() { @@ -92,8 +76,7 @@ struct Functor_TestBatchedTeamVectorGMRES { const std::string name_value_type = Test::value_type_name(); std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); - Kokkos::TeamPolicy policy(_D.extent(0) / _N_team, - Kokkos::AUTO(), Kokkos::AUTO()); + Kokkos::TeamPolicy policy(_D.extent(0) / _N_team, Kokkos::AUTO(), Kokkos::AUTO()); const int N = _D.extent(0); const int n = _X.extent(1); @@ -103,8 +86,8 @@ struct Functor_TestBatchedTeamVectorGMRES { _handle.set_compute_last_residual(false); _handle.set_tolerance(1e-8); - _handle.Arnoldi_view = typename KrylovHandleType::ArnoldiViewType( - "", N, maximum_iteration, n + maximum_iteration + 3); + _handle.Arnoldi_view = + typename KrylovHandleType::ArnoldiViewType("", N, maximum_iteration, n + maximum_iteration + 3); using ScalarType = typename ValuesViewType::non_const_value_type; using Layout = typename ValuesViewType::array_layout; @@ -122,16 +105,14 @@ struct Functor_TestBatchedTeamVectorGMRES { size_t bytes_int = bytes_row_ptr + bytes_col_idc; size_t bytes_diag = bytes_2D_1; size_t bytes_tmp = 2 * bytes_2D_1 + 2 * bytes_1D + bytes_2D_2; - policy.set_scratch_size( - 0, Kokkos::PerTeam(bytes_tmp + bytes_diag + bytes_int)); + policy.set_scratch_size(0, Kokkos::PerTeam(bytes_tmp + bytes_diag + bytes_int)); Kokkos::parallel_for(name.c_str(), policy, *this); Kokkos::Profiling::popRegion(); } }; -template +template void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { typedef typename ValuesViewType::value_type value_type; typedef Kokkos::ArithTraits ats; @@ -157,8 +138,7 @@ void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { using Scalar3DViewType = Kokkos::View; using IntViewType = Kokkos::View; - using KrylovHandleType = - KrylovHandle; + using KrylovHandleType = KrylovHandle; NormViewType sqr_norm_0("sqr_norm_0", N); NormViewType sqr_norm_j("sqr_norm_j", N); @@ -177,12 +157,10 @@ void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { int current_index; for (int i = 0; i < BlkSize; ++i) { - for (current_index = row_ptr_host(i); current_index < row_ptr_host(i + 1); - ++current_index) { + for (current_index = row_ptr_host(i); current_index < row_ptr_host(i + 1); ++current_index) { if (colIndices_host(current_index) == i) break; } - for (int j = 0; j < N; ++j) - diag_values_host(j, i) = values_host(j, current_index); + for (int j = 0; j < N; ++j) diag_values_host(j, i) = values_host(j, current_index); } Kokkos::deep_copy(Diag, diag_values_host); @@ -212,13 +190,10 @@ void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { KrylovHandleType handle(N, N_team, n_iterations); KokkosBatched::SerialSpmv::template invoke< - typename ValuesViewType::HostMirror, typename IntView::HostMirror, - typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, - 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); - KokkosBatched::SerialDot::invoke(R_host, R_host, - sqr_norm_0_host); - Functor_TestBatchedTeamVectorGMRES( + typename ValuesViewType::HostMirror, typename IntView::HostMirror, typename VectorViewType::HostMirror, + typename VectorViewType::HostMirror, 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, sqr_norm_0_host); + Functor_TestBatchedTeamVectorGMRES( D, r, c, X, B, Diag, N_team, handle) .run(); @@ -229,17 +204,13 @@ void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { Kokkos::deep_copy(X_host, X); KokkosBatched::SerialSpmv::template invoke< - typename ValuesViewType::HostMirror, typename IntView::HostMirror, - typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, - 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); - KokkosBatched::SerialDot::invoke(R_host, R_host, - sqr_norm_j_host); + typename ValuesViewType::HostMirror, typename IntView::HostMirror, typename VectorViewType::HostMirror, + typename VectorViewType::HostMirror, 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, sqr_norm_j_host); const MagnitudeType eps = 1.0e5 * ats::epsilon(); - for (int l = 0; l < N; ++l) - EXPECT_NEAR_KK( - std::sqrt(sqr_norm_j_host(l)) / std::sqrt(sqr_norm_0_host(l)), 0, eps); + for (int l = 0; l < N; ++l) EXPECT_NEAR_KK(std::sqrt(sqr_norm_j_host(l)) / std::sqrt(sqr_norm_0_host(l)), 0, eps); } } // namespace TeamVectorGMRES } // namespace Test @@ -250,28 +221,21 @@ int test_batched_teamvector_GMRES() { { typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - VectorViewType; + typedef Kokkos::View VectorViewType; for (int i = 3; i < 10; ++i) { - Test::TeamVectorGMRES::impl_test_batched_GMRES( - 1024, i, 2); + Test::TeamVectorGMRES::impl_test_batched_GMRES(1024, i, 2); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; + typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - VectorViewType; + typedef Kokkos::View VectorViewType; for (int i = 3; i < 10; ++i) { - Test::TeamVectorGMRES::impl_test_batched_GMRES( - 1024, i, 2); + Test::TeamVectorGMRES::impl_test_batched_GMRES(1024, i, 2); } } #endif diff --git a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorGMRES_Real.hpp b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorGMRES_Real.hpp index 53b740deaa56..ab889844a932 100644 --- a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorGMRES_Real.hpp +++ b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorGMRES_Real.hpp @@ -15,13 +15,9 @@ //@HEADER #if defined(KOKKOSKERNELS_INST_FLOAT) -TEST_F(TestCategory, batched_scalar_teamvector_GMRES_float) { - test_batched_teamvector_GMRES(); -} +TEST_F(TestCategory, batched_scalar_teamvector_GMRES_float) { test_batched_teamvector_GMRES(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) -TEST_F(TestCategory, batched_scalar_teamvector_GMRES_double) { - test_batched_teamvector_GMRES(); -} +TEST_F(TestCategory, batched_scalar_teamvector_GMRES_double) { test_batched_teamvector_GMRES(); } #endif diff --git a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorSpmv.hpp b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorSpmv.hpp index 9cbba563701e..83a78228b39f 100644 --- a/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorSpmv.hpp +++ b/packages/kokkos-kernels/batched/sparse/unit_test/Test_Batched_TeamVectorSpmv.hpp @@ -19,7 +19,7 @@ #include "Kokkos_Core.hpp" #include "Kokkos_Random.hpp" -//#include "KokkosBatched_Vector.hpp" +// #include "KokkosBatched_Vector.hpp" #include "KokkosBatched_Spmv.hpp" #include "KokkosBatched_Spmv_TeamVector_Impl.hpp" @@ -38,9 +38,8 @@ struct ParamTag { typedef T trans; }; -template +template struct Functor_TestBatchedTeamVectorSpmv { using execution_space = typename DeviceType::execution_space; const alphaViewType _alpha; @@ -53,52 +52,33 @@ struct Functor_TestBatchedTeamVectorSpmv { const int _N_team; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorSpmv(const alphaViewType &alpha, - const ValuesViewType &D, const IntView &r, - const IntView &c, const xViewType &X, - const betaViewType &beta, - const yViewType &Y, const int N_team) - : _alpha(alpha), - _D(D), - _r(r), - _c(c), - _X(X), - _beta(beta), - _Y(Y), - _N_team(N_team) {} + Functor_TestBatchedTeamVectorSpmv(const alphaViewType &alpha, const ValuesViewType &D, const IntView &r, + const IntView &c, const xViewType &X, const betaViewType &beta, const yViewType &Y, + const int N_team) + : _alpha(alpha), _D(D), _r(r), _c(c), _X(X), _beta(beta), _Y(Y), _N_team(N_team) {} template - KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, const MemberType &member) const { const int first_matrix = static_cast(member.league_rank()) * _N_team; const int N = _D.extent(0); const int last_matrix = - (static_cast(member.league_rank() + 1) * _N_team < N - ? static_cast(member.league_rank() + 1) * _N_team - : N); - - auto alpha = - Kokkos::subview(_alpha, Kokkos::make_pair(first_matrix, last_matrix)); - auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto beta = - Kokkos::subview(_beta, Kokkos::make_pair(first_matrix, last_matrix)); - auto y = Kokkos::subview(_Y, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + (static_cast(member.league_rank() + 1) * _N_team < N ? static_cast(member.league_rank() + 1) * _N_team + : N); + + auto alpha = Kokkos::subview(_alpha, Kokkos::make_pair(first_matrix, last_matrix)); + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto beta = Kokkos::subview(_beta, Kokkos::make_pair(first_matrix, last_matrix)); + auto y = Kokkos::subview(_Y, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); if (last_matrix != N) - KokkosBatched::TeamVectorSpmv< - MemberType, typename ParamTagType::trans, - 2>::template invoke( - member, alpha, d, _r, _c, x, beta, y); + KokkosBatched::TeamVectorSpmv::template invoke< + ValuesViewType, IntView, xViewType, yViewType, alphaViewType, betaViewType, dobeta>(member, alpha, d, _r, _c, + x, beta, y); else - KokkosBatched::TeamVectorSpmv:: - template invoke( - member, alpha, d, _r, _c, x, beta, y); + KokkosBatched::TeamVectorSpmv::template invoke< + ValuesViewType, IntView, xViewType, yViewType, alphaViewType, betaViewType, dobeta>(member, alpha, d, _r, _c, + x, beta, y); } inline void run() { @@ -107,17 +87,15 @@ struct Functor_TestBatchedTeamVectorSpmv { const std::string name_value_type = Test::value_type_name(); std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); - Kokkos::TeamPolicy policy( - ceil(static_cast(_D.extent(0)) / _N_team), Kokkos::AUTO(), - Kokkos::AUTO()); + Kokkos::TeamPolicy policy(ceil(static_cast(_D.extent(0)) / _N_team), + Kokkos::AUTO(), Kokkos::AUTO()); Kokkos::parallel_for(name.c_str(), policy, *this); Kokkos::Profiling::popRegion(); } }; -template +template void impl_test_batched_spmv(const int N, const int BlkSize, const int N_team) { typedef typename ValuesViewType::value_type value_type; typedef Kokkos::ArithTraits ats; @@ -159,21 +137,15 @@ void impl_test_batched_spmv(const int N, const int BlkSize, const int N_team) { else Y0_host(l, i) *= beta_host(l); if (i != 0 && i != (BlkSize - 1)) - Y0_host(l, i) += - alpha_host(l) * - (2 * X0_host(l, i) - X0_host(l, i - 1) - X0_host(l, i + 1)); + Y0_host(l, i) += alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i - 1) - X0_host(l, i + 1)); else if (i == 0) - Y0_host(l, i) += - alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i + 1)); + Y0_host(l, i) += alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i + 1)); else - Y0_host(l, i) += - alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i - 1)); + Y0_host(l, i) += alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i - 1)); } - Functor_TestBatchedTeamVectorSpmv( - alpha, D, r, c, X1, beta, Y1, N_team) + Functor_TestBatchedTeamVectorSpmv(alpha, D, r, c, X1, beta, Y1, N_team) .run(); Kokkos::fence(); @@ -198,45 +170,37 @@ void impl_test_batched_spmv(const int N, const int BlkSize, const int N_team) { } // namespace TeamVectorSpmv } // namespace Test -template +template int test_batched_teamvector_spmv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - alphaViewType; + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { - Test::TeamVectorSpmv::impl_test_batched_spmv< - DeviceType, ParamTagType, ViewType, IntView, ViewType, ViewType, - alphaViewType, alphaViewType, 0>(1025, i, 2); + Test::TeamVectorSpmv::impl_test_batched_spmv(1025, i, 2); } for (int i = 3; i < 10; ++i) { - Test::TeamVectorSpmv::impl_test_batched_spmv< - DeviceType, ParamTagType, ViewType, IntView, ViewType, ViewType, - alphaViewType, alphaViewType, 1>(1025, i, 2); + Test::TeamVectorSpmv::impl_test_batched_spmv(1025, i, 2); } } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; + typedef Kokkos::View ViewType; typedef Kokkos::View IntView; - typedef Kokkos::View - alphaViewType; + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { - Test::TeamVectorSpmv::impl_test_batched_spmv< - DeviceType, ParamTagType, ViewType, IntView, ViewType, ViewType, - alphaViewType, alphaViewType, 0>(1025, i, 2); + Test::TeamVectorSpmv::impl_test_batched_spmv(1025, i, 2); } for (int i = 3; i < 10; ++i) { - Test::TeamVectorSpmv::impl_test_batched_spmv< - DeviceType, ParamTagType, ViewType, IntView, ViewType, ViewType, - alphaViewType, alphaViewType, 1>(1025, i, 2); + Test::TeamVectorSpmv::impl_test_batched_spmv(1025, i, 2); } } #endif diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_abs_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_abs_impl.hpp index 0334adbafe54..0c674f25f5e8 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_abs_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_abs_impl.hpp @@ -37,8 +37,7 @@ struct MV_Abs_Functor { RMV R_; XMV X_; - MV_Abs_Functor(const RMV& R, const XMV& X) - : numCols(X.extent(1)), R_(R), X_(X) { + MV_Abs_Functor(const RMV& R, const XMV& X) : numCols(X.extent(1)), R_(R), X_(X) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "MV_Abs_Functor: RMV is not a Kokkos::View."); @@ -163,8 +162,7 @@ void MV_Abs_Generic(const execution_space& space, const RMV& R, const XMV& X) { const SizeType numRows = X.extent(0); Kokkos::RangePolicy policy(space, 0, numRows); - if ((void*)(R.data()) == - (void*)(X.data())) { // if R and X are the same (alias one another) + if ((void*)(R.data()) == (void*)(X.data())) { // if R and X are the same (alias one another) MV_AbsSelf_Functor op(R); Kokkos::parallel_for("KokkosBlas::Abs::S0", policy, op); } else { @@ -192,8 +190,7 @@ void V_Abs_Generic(const execution_space& space, const RV& R, const XV& X) { const SizeType numRows = X.extent(0); Kokkos::RangePolicy policy(space, 0, numRows); - if ((void*)(R.data()) == - (void*)(X.data())) { // if R and X are the same (alias one another) + if ((void*)(R.data()) == (void*)(X.data())) { // if R and X are the same (alias one another) V_AbsSelf_Functor op(R); Kokkos::parallel_for("KokkosBlas::Abs::S2", policy, op); } else { diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_abs_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_abs_spec.hpp index a4695bd5058b..fb6357b38ee1 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_abs_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_abs_spec.hpp @@ -42,17 +42,15 @@ struct abs_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_ABS_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct abs_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ABS_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct abs_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; // @@ -62,18 +60,15 @@ struct abs_eti_spec_avail { // We may spread out definitions (see _DEF macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_ABS_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct abs_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ABS_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct abs_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -85,10 +80,9 @@ namespace KokkosBlas { namespace Impl { // Unification layer -template < - class execution_space, class RMV, class XMV, int rank = RMV::rank, - bool tpl_spec_avail = abs_tpl_spec_avail::value, - bool eti_spec_avail = abs_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = abs_eti_spec_avail::value> struct Abs { static void abs(const execution_space& space, const RMV& R, const XMV& X); }; @@ -96,8 +90,7 @@ struct Abs { #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Abs for single vectors (1-D Views). template -struct Abs { +struct Abs { using size_type = typename XMV::size_type; static void abs(const execution_space& space, const RMV& R, const XMV& X) { @@ -113,16 +106,13 @@ struct Abs: " "XMV is not rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::abs[ETI]" - : "KokkosBlas::abs[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::abs[ETI]" + : "KokkosBlas::abs[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::abs<> ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::abs<> ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::abs<> non-ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::abs<> non-ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); } #endif const size_type numRows = X.extent(0); @@ -139,8 +129,7 @@ struct Abs -struct Abs { +struct Abs { using size_type = typename XMV::size_type; static void abs(const execution_space& space, const RMV& R, const XMV& X) { @@ -156,23 +145,19 @@ struct Abs: " "XMV is not rank 2."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::abs[ETI]" - : "KokkosBlas::abs[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::abs[ETI]" + : "KokkosBlas::abs[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::abs<> ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::abs<> ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::asb<> non-ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::asb<> non-ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); } #endif const size_type numRows = X.extent(0); const size_type numCols = X.extent(1); - if (numRows < static_cast(INT_MAX) && - numRows * numCols < static_cast(INT_MAX)) { + if (numRows < static_cast(INT_MAX) && numRows * numCols < static_cast(INT_MAX)) { typedef int index_type; MV_Abs_Generic(space, R, X); } else { @@ -194,14 +179,12 @@ struct Abs, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_ABS_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Abs< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 1, false, true>; // @@ -209,14 +192,12 @@ struct Abs, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_ABS_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Abs< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 1, false, true>; // @@ -226,15 +207,12 @@ struct Abs, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_ABS_MV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Abs< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 2, false, true>; // @@ -242,15 +220,12 @@ struct Abs, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_ABS_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Abs< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 2, false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_impl.hpp index b919d76a9479..6baed662cfef 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_impl.hpp @@ -25,14 +25,12 @@ namespace KokkosBlas { namespace Impl { template -constexpr typename std::enable_if, int>::type -axpbyVarExtent(T& v) { +constexpr typename std::enable_if, int>::type axpbyVarExtent(T& v) { return v.extent(0); } template -constexpr typename std::enable_if, int>::type -axpbyVarExtent(T&) { +constexpr typename std::enable_if, int>::type axpbyVarExtent(T&) { return 0; } @@ -58,8 +56,7 @@ axpbyVarExtent(T&) { // coefficients. Any literal coefficient of zero has BLAS semantics // of ignoring the corresponding (multi)vector entry. This does not // apply to coefficients in the a and b vectors, if they are used. -template +template struct Axpby_Functor { typedef typename YV::execution_space execution_space; typedef SizeType size_type; @@ -70,8 +67,7 @@ struct Axpby_Functor { AV m_a; BV m_b; - Axpby_Functor(const XV& x, const YV& y, const AV& av, const BV& bv, - const SizeType startingColumn) + Axpby_Functor(const XV& x, const YV& y, const AV& av, const BV& bv, const SizeType startingColumn) : m_x(x), m_y(y), m_a(av), m_b(bv) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby_Functor(ABgeneric)" @@ -79,8 +75,7 @@ struct Axpby_Functor { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby_Functor(ABgeneric)" ": Y is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Axpby_Functor(ABgeneric)" ": Y must be nonconst, since it is an output argument" " and we have to be able to write to its entries."); @@ -90,18 +85,15 @@ struct Axpby_Functor { static_assert(YV::rank == 1, "KokkosBlas::Impl::Axpby_Functor(ABgeneric)" ": XV and YV must have rank 1."); - static_assert((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && - (scalar_y <= 2), + static_assert((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && (scalar_y <= 2), "KokkosBlas::Impl::Axpby_Functor(ABgeneric)" ": scalar_x and/or scalar_y are out of range."); if (startingColumn != 0) { if (axpbyVarExtent(m_a) > 1) { - m_a = Kokkos::subview( - av, std::make_pair(startingColumn, SizeType(av.extent(0)))); + m_a = Kokkos::subview(av, std::make_pair(startingColumn, SizeType(av.extent(0)))); } if (axpbyVarExtent(m_b) > 1) { - m_b = Kokkos::subview( - bv, std::make_pair(startingColumn, SizeType(bv.extent(0)))); + m_b = Kokkos::subview(bv, std::make_pair(startingColumn, SizeType(bv.extent(0)))); } } } @@ -123,10 +115,8 @@ struct Axpby_Functor { } else if constexpr (scalar_y == 1) { // Nothing to do: m_y(i) = m_y(i); } else if constexpr (scalar_y == 2) { - if (m_b(0) == - Kokkos::ArithTraits::zero()) { - m_y(i) = - Kokkos::ArithTraits::zero(); + if (m_b(0) == Kokkos::ArithTraits::zero()) { + m_y(i) = Kokkos::ArithTraits::zero(); } else { m_y(i) = m_b(0) * m_y(i); } @@ -143,8 +133,7 @@ struct Axpby_Functor { } else if constexpr (scalar_y == 1) { m_y(i) = -m_x(i) + m_y(i); } else if constexpr (scalar_y == 2) { - if (m_b(0) == - Kokkos::ArithTraits::zero()) { + if (m_b(0) == Kokkos::ArithTraits::zero()) { m_y(i) = -m_x(i); } else { m_y(i) = -m_x(i) + m_b(0) * m_y(i); @@ -162,8 +151,7 @@ struct Axpby_Functor { } else if constexpr (scalar_y == 1) { m_y(i) = m_x(i) + m_y(i); } else if constexpr (scalar_y == 2) { - if (m_b(0) == - Kokkos::ArithTraits::zero()) { + if (m_b(0) == Kokkos::ArithTraits::zero()) { m_y(i) = m_x(i); } else { m_y(i) = m_x(i) + m_b(0) * m_y(i); @@ -181,8 +169,7 @@ struct Axpby_Functor { } else if constexpr (scalar_y == 1) { m_y(i) = m_a(0) * m_x(i) + m_y(i); } else if constexpr (scalar_y == 2) { - if (m_b(0) == - Kokkos::ArithTraits::zero()) { + if (m_b(0) == Kokkos::ArithTraits::zero()) { m_y(i) = m_a(0) * m_x(i); } else { m_y(i) = m_a(0) * m_x(i) + m_b(0) * m_y(i); @@ -209,8 +196,7 @@ struct Axpby_Functor { // of ignoring the corresponding (multi)vector entry. This does not // apply to coefficients in the a and b vectors, if they are used. template -struct Axpby_Functor { typedef typename YV::execution_space execution_space; typedef SizeType size_type; @@ -221,10 +207,8 @@ struct Axpby_Functor::value, "KokkosBlas::Impl::Axpby_Functor(ABscalars)" @@ -232,8 +216,7 @@ struct Axpby_Functor::value, "KokkosBlas::Impl::Axpby_Functor(ABscalars)" ": Y is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Axpby_Functor(ABscalars)" ": Y must be nonconst, since it is an output argument" " and we have to be able to write to its entries."); @@ -243,8 +226,7 @@ struct Axpby_Functor -void Axpby_Generic(const execution_space& space, const AV& av, const XV& x, - const BV& bv, const YV& y, const SizeType startingColumn, - int scalar_x = 2, int scalar_y = 2) { +template +void Axpby_Generic(const execution_space& space, const AV& av, const XV& x, const BV& bv, const YV& y, + const SizeType startingColumn, int scalar_x = 2, int scalar_y = 2) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Axpby_Generic: X is not a Kokkos::View."); static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Axpby_Generic: Y is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Axpby_Generic: Y is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -344,8 +323,7 @@ void Axpby_Generic(const execution_space& space, const AV& av, const XV& x, "KokkosBlas::Impl::Axpby_Generic: " "XV and YV must have rank 1."); - if ((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && - (scalar_y <= 2)) { + if ((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && (scalar_y <= 2)) { // Ok } else { KokkosKernels::Impl::throw_runtime_exception( @@ -361,20 +339,16 @@ void Axpby_Generic(const execution_space& space, const AV& av, const XV& x, // **************************************************************** if (scalar_x == 0) { if (scalar_y == 0) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S0", policy, op); } else if (scalar_y == -1) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S1", policy, op); } else if (scalar_y == 1) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S2", policy, op); } else if (scalar_y == 2) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S3", policy, op); } } @@ -383,20 +357,16 @@ void Axpby_Generic(const execution_space& space, const AV& av, const XV& x, // **************************************************************** else if (scalar_x == -1) { if (scalar_y == 0) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S4", policy, op); } else if (scalar_y == -1) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S5", policy, op); } else if (scalar_y == 1) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S6", policy, op); } else if (scalar_y == 2) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S7", policy, op); } } @@ -405,20 +375,16 @@ void Axpby_Generic(const execution_space& space, const AV& av, const XV& x, // **************************************************************** else if (scalar_x == 1) { if (scalar_y == 0) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S8", policy, op); } else if (scalar_y == -1) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S9", policy, op); } else if (scalar_y == 1) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S10", policy, op); } else if (scalar_y == 2) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S11", policy, op); } } @@ -427,20 +393,16 @@ void Axpby_Generic(const execution_space& space, const AV& av, const XV& x, // **************************************************************** else if (scalar_x == 2) { if (scalar_y == 0) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S12", policy, op); } else if (scalar_y == -1) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S13", policy, op); } else if (scalar_y == 1) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S14", policy, op); } else if (scalar_y == 2) { - Axpby_Functor op(x, y, av, bv, - startingColumn); + Axpby_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::S15", policy, op); } } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_mv_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_mv_impl.hpp index 7db7b0abe3c3..81c05fe7dfcc 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_mv_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_mv_impl.hpp @@ -66,8 +66,7 @@ struct Axpby_MV_Functor { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby_MV_Functor(ABgeneric)" ": Y is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Axpby_MV_Functor(ABgeneric)" ": Y must be nonconst, since it is an output argument" " and we have to be able to write to its entries."); @@ -83,8 +82,7 @@ struct Axpby_MV_Functor { static_assert(BV::rank == 1, "KokkosBlas::Impl::Axpby_MV_Functor(ABgeneric)" ": BV must have rank 1."); - static_assert((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && - (scalar_y <= 2), + static_assert((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && (scalar_y <= 2), "KokkosBlas::Impl::Axpby_MV_Functor(ABgeneric)" ": scalar_x and/or scalar_y are out of range."); } @@ -123,8 +121,7 @@ struct Axpby_MV_Functor { // Nothing to do: Y(i,j) := Y(i,j) } else if constexpr (scalar_y == 2) { if (m_b.extent(0) == 1) { - if (m_b(0) == - Kokkos::ArithTraits::zero()) { + if (m_b(0) == Kokkos::ArithTraits::zero()) { #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP #pragma ivdep #endif @@ -132,8 +129,7 @@ struct Axpby_MV_Functor { #pragma vector always #endif for (size_type k = 0; k < numCols; ++k) { - m_y(i, k) = Kokkos::ArithTraits< - typename YMV::non_const_value_type>::zero(); + m_y(i, k) = Kokkos::ArithTraits::zero(); } } else { #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP @@ -195,8 +191,7 @@ struct Axpby_MV_Functor { } } else if constexpr (scalar_y == 2) { if (m_b.extent(0) == 1) { - if (m_b(0) == - Kokkos::ArithTraits::zero()) { + if (m_b(0) == Kokkos::ArithTraits::zero()) { #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP #pragma ivdep #endif @@ -266,8 +261,7 @@ struct Axpby_MV_Functor { } } else if constexpr (scalar_y == 2) { if (m_b.extent(0) == 1) { - if (m_b(0) == - Kokkos::ArithTraits::zero()) { + if (m_b(0) == Kokkos::ArithTraits::zero()) { #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP #pragma ivdep #endif @@ -374,8 +368,7 @@ struct Axpby_MV_Functor { } else if constexpr (scalar_y == 2) { if (m_a.extent(0) == 1) { if (m_b.extent(0) == 1) { - if (m_b(0) == Kokkos::ArithTraits< - typename BV::non_const_value_type>::zero()) { + if (m_b(0) == Kokkos::ArithTraits::zero()) { #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP #pragma ivdep #endif @@ -409,8 +402,7 @@ struct Axpby_MV_Functor { } } else { if (m_b.extent(0) == 1) { - if (m_b(0) == Kokkos::ArithTraits< - typename BV::non_const_value_type>::zero()) { + if (m_b(0) == Kokkos::ArithTraits::zero()) { #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP #pragma ivdep #endif @@ -467,8 +459,7 @@ struct Axpby_MV_Functor { // This version works by partial specialization on AV and BV. // In this partial specialization, both AV and BV are scalars. template -struct Axpby_MV_Functor { typedef SizeType size_type; typedef Kokkos::ArithTraits ATS; @@ -479,8 +470,7 @@ struct Axpby_MV_Functor::value, @@ -489,8 +479,7 @@ struct Axpby_MV_Functor::value, "KokkosBlas::Impl::Axpby_MV_Functor(ABscalars)" ": Y is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Axpby_MV_Functor(ABscalars)" ": Y must be nonconst, since it is an output argument" " and we have to be able to write to its entries."); @@ -500,8 +489,7 @@ struct Axpby_MV_Functor +template struct Axpby_MV_Unroll_Functor { typedef SizeType size_type; typedef Kokkos::ArithTraits ATS; @@ -704,8 +691,7 @@ struct Axpby_MV_Unroll_Functor { AV m_a; BV m_b; - Axpby_MV_Unroll_Functor(const XMV& x, const YMV& y, const AV& av, - const BV& bv, const SizeType startingColumn) + Axpby_MV_Unroll_Functor(const XMV& x, const YMV& y, const AV& av, const BV& bv, const SizeType startingColumn) : m_x(x), m_y(y), m_a(av), m_b(bv) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby_MV_Unroll_Functor(ABgeneric)" @@ -719,8 +705,7 @@ struct Axpby_MV_Unroll_Functor { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby_MV_Unroll_Functor(ABgeneric)" ": Y is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Axpby_MV_Unroll_Functor(ABgeneric)" ": Y must be nonconst, since it is an output argument" " and we have to be able to write to its entries."); @@ -736,19 +721,16 @@ struct Axpby_MV_Unroll_Functor { static_assert(BV::rank == 1, "KokkosBlas::Impl::Axpby_MV_Unroll_Functor(ABgeneric)" ": BV must have rank 1."); - static_assert((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && - (scalar_y <= 2), + static_assert((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && (scalar_y <= 2), "KokkosBlas::Impl::Axpby_MV_Unroll_Functor(ABgeneric)" ": scalar_x and/or scalar_y are out of range."); if (startingColumn != 0) { if (axpbyVarExtent(m_a) > 1) { - m_a = Kokkos::subview( - av, std::make_pair(startingColumn, SizeType(av.extent(0)))); + m_a = Kokkos::subview(av, std::make_pair(startingColumn, SizeType(av.extent(0)))); } if (axpbyVarExtent(m_b) > 1) { - m_b = Kokkos::subview( - bv, std::make_pair(startingColumn, SizeType(bv.extent(0)))); + m_b = Kokkos::subview(bv, std::make_pair(startingColumn, SizeType(bv.extent(0)))); } } } @@ -781,14 +763,12 @@ struct Axpby_MV_Unroll_Functor { // Nothing to do: Y(i,j) := Y(i,j) } else if constexpr (scalar_y == 2) { if (m_b.extent(0) == 1) { - if (m_b(0) == - Kokkos::ArithTraits::zero()) { + if (m_b(0) == Kokkos::ArithTraits::zero()) { #ifdef KOKKOS_ENABLE_PRAGMA_UNROLL #pragma unroll #endif for (int k = 0; k < UNROLL; ++k) { - m_y(i, k) = Kokkos::ArithTraits< - typename YMV::non_const_value_type>::zero(); + m_y(i, k) = Kokkos::ArithTraits::zero(); } } else { #ifdef KOKKOS_ENABLE_PRAGMA_UNROLL @@ -835,8 +815,7 @@ struct Axpby_MV_Unroll_Functor { } } else if constexpr (scalar_y == 2) { if (m_b.extent(0) == 1) { - if (m_b(0) == - Kokkos::ArithTraits::zero()) { + if (m_b(0) == Kokkos::ArithTraits::zero()) { #ifdef KOKKOS_ENABLE_PRAGMA_UNROLL #pragma unroll #endif @@ -888,8 +867,7 @@ struct Axpby_MV_Unroll_Functor { } } else if constexpr (scalar_y == 2) { if (m_b.extent(0) == 1) { - if (m_b(0) == - Kokkos::ArithTraits::zero()) { + if (m_b(0) == Kokkos::ArithTraits::zero()) { #ifdef KOKKOS_ENABLE_PRAGMA_UNROLL #pragma unroll #endif @@ -969,8 +947,7 @@ struct Axpby_MV_Unroll_Functor { } else if constexpr (scalar_y == 2) { if (m_a.extent(0) == 1) { if (m_b.extent(0) == 1) { - if (m_b(0) == Kokkos::ArithTraits< - typename BV::non_const_value_type>::zero()) { + if (m_b(0) == Kokkos::ArithTraits::zero()) { #ifdef KOKKOS_ENABLE_PRAGMA_UNROLL #pragma unroll #endif @@ -995,8 +972,7 @@ struct Axpby_MV_Unroll_Functor { } } else { if (m_b.extent(0) == 1) { - if (m_b(0) == Kokkos::ArithTraits< - typename BV::non_const_value_type>::zero()) { + if (m_b(0) == Kokkos::ArithTraits::zero()) { #ifdef KOKKOS_ENABLE_PRAGMA_UNROLL #pragma unroll #endif @@ -1028,10 +1004,8 @@ struct Axpby_MV_Unroll_Functor { // Variant of Axpby_MV_Unroll_Functor for single coefficients (rather // than vectors of coefficients) a and b. The number of columns in X // and Y, UNROLL, is a compile-time constant. -template -struct Axpby_MV_Unroll_Functor +struct Axpby_MV_Unroll_Functor { typedef SizeType size_type; typedef Kokkos::ArithTraits ATS; @@ -1041,10 +1015,8 @@ struct Axpby_MV_Unroll_Functor::value, "KokkosBlas::Impl::Axpby_MV_Unroll_Functor(ABscalars)" @@ -1052,8 +1024,7 @@ struct Axpby_MV_Unroll_Functor::value, "KokkosBlas::Impl::Axpby_MV_Unroll_Functor(ABscalars)" ": Y is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Axpby_MV_Unroll_Functor(ABscalars)" ": Y must be nonconst, since it is an output argument" " and we have to be able to write to its entries."); @@ -1063,8 +1034,7 @@ struct Axpby_MV_Unroll_Functor -void Axpby_MV_Unrolled(const execution_space& space, const AV& av, const XMV& x, - const BV& bv, const YMV& y, - const SizeType startingColumn, int scalar_x = 2, - int scalar_y = 2) { +template +void Axpby_MV_Unrolled(const execution_space& space, const AV& av, const XMV& x, const BV& bv, const YMV& y, + const SizeType startingColumn, int scalar_x = 2, int scalar_y = 2) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby_MV_Unrolled()" ": X is not a Kokkos::View."); static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby_MV_Unrolled()" ": Y is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Axpby_MV_Unrolled()" ": Y must be nonconst, since it is an output argument" " and we have to be able to write to its entries."); @@ -1251,8 +1217,7 @@ void Axpby_MV_Unrolled(const execution_space& space, const AV& av, const XMV& x, static_assert(YMV::rank == 2, "KokkosBlas::Impl::Axpby_MV_Unrolled()" ": XMV and YMV must have rank 2."); - if ((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && - (scalar_y <= 2)) { + if ((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && (scalar_y <= 2)) { // Ok } else { KokkosKernels::Impl::throw_runtime_exception( @@ -1268,20 +1233,16 @@ void Axpby_MV_Unrolled(const execution_space& space, const AV& av, const XMV& x, // **************************************************************** if (scalar_x == 0) { if (scalar_y == 0) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S0", policy, op); } else if (scalar_y == -1) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S1", policy, op); } else if (scalar_y == 1) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S2", policy, op); } else if (scalar_y == 2) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S3", policy, op); } } @@ -1290,20 +1251,16 @@ void Axpby_MV_Unrolled(const execution_space& space, const AV& av, const XMV& x, // **************************************************************** else if (scalar_x == -1) { if (scalar_y == 0) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S4", policy, op); } else if (scalar_y == -1) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S5", policy, op); } else if (scalar_y == 1) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S6", policy, op); } else if (scalar_y == 2) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S7", policy, op); } } @@ -1312,20 +1269,16 @@ void Axpby_MV_Unrolled(const execution_space& space, const AV& av, const XMV& x, // **************************************************************** else if (scalar_x == 1) { if (scalar_y == 0) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S8", policy, op); } else if (scalar_y == -1) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S9", policy, op); } else if (scalar_y == 1) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S10", policy, op); } else if (scalar_y == 2) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S11", policy, op); } } @@ -1334,20 +1287,16 @@ void Axpby_MV_Unrolled(const execution_space& space, const AV& av, const XMV& x, // **************************************************************** else if (scalar_x == 2) { if (scalar_y == 0) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S12", policy, op); } else if (scalar_y == -1) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S13", policy, op); } else if (scalar_y == 1) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S14", policy, op); } else if (scalar_y == 2) { - Axpby_MV_Unroll_Functor op( - x, y, av, bv, startingColumn); + Axpby_MV_Unroll_Functor op(x, y, av, bv, startingColumn); Kokkos::parallel_for("KokkosBlas::Axpby::MV::S15", policy, op); } } @@ -1372,19 +1321,16 @@ void Axpby_MV_Unrolled(const execution_space& space, const AV& av, const XMV& x, // coefficients in av and bv vectors, if they are used. // // Either av and bv are both 1-D Views, or av and bv are both scalars. -template -void Axpby_MV_Generic(const execution_space& space, const AV& av, const XMV& x, - const BV& bv, const YMV& y, int scalar_x = 2, - int scalar_y = 2) { +template +void Axpby_MV_Generic(const execution_space& space, const AV& av, const XMV& x, const BV& bv, const YMV& y, + int scalar_x = 2, int scalar_y = 2) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby_MV_Generic()" ": X is not a Kokkos::View."); static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby_MV_Generic()" ": Y is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Axpby_MV_Generic()" ": Y must be nonconst, since it is an output argument" " and we have to be able to write to its entries."); @@ -1394,8 +1340,7 @@ void Axpby_MV_Generic(const execution_space& space, const AV& av, const XMV& x, static_assert(YMV::rank == 2, "KokkosBlas::Impl::Axpby_MV_Generic()" ": XMV and YMV must have rank 2."); - if ((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && - (scalar_y <= 2)) { + if ((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && (scalar_y <= 2)) { // Ok } else { KokkosKernels::Impl::throw_runtime_exception( @@ -1499,20 +1444,17 @@ void Axpby_MV_Generic(const execution_space& space, const AV& av, const XMV& x, // coefficients in av and bv vectors, if they are used. // // Either av and bv are both 1-D Views, or av and bv are both scalars. -template +template struct Axpby_MV_Invoke_Left { - static void run(const execution_space& space, const AV& av, const XMV& x, - const BV& bv, const YMV& y, int scalar_x = 2, - int scalar_y = 2) { + static void run(const execution_space& space, const AV& av, const XMV& x, const BV& bv, const YMV& y, + int scalar_x = 2, int scalar_y = 2) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby_MV_Invoke_Left::run()" ": X is not a Kokkos::View."); static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby_MV_Invoke_Left::run()" ": Y is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Axpby_MV_Invoke_Left::run()" ": Y must be nonconst, since it is an output argument" " and we have to be able to write to its entries."); @@ -1522,8 +1464,7 @@ struct Axpby_MV_Invoke_Left { static_assert(YMV::rank == 2, "KokkosBlas::Impl::Axpby_MV_Invoke_Left::run()" ": X and Y must have rank 2."); - if ((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && - (scalar_y <= 2)) { + if ((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && (scalar_y <= 2)) { // Ok } else { KokkosKernels::Impl::throw_runtime_exception( @@ -1544,8 +1485,8 @@ struct Axpby_MV_Invoke_Left { // Passing in the starting column index lets the functor take // subviews of av and bv, if they are Views. If they are scalars, // the functor doesn't have to do anything to them. - Axpby_MV_Unrolled( - space, av, X_cur, bv, Y_cur, j, scalar_x, scalar_y); + Axpby_MV_Unrolled(space, av, X_cur, bv, Y_cur, j, scalar_x, + scalar_y); } for (; j + 4 <= numCols; j += 4) { XMV X_cur = Kokkos::subview(x, Kokkos::ALL(), std::make_pair(j, j + 4)); @@ -1554,8 +1495,8 @@ struct Axpby_MV_Invoke_Left { // Passing in the starting column index lets the functor take // subviews of av and bv, if they are Views. If they are scalars, // the functor doesn't have to do anything to them. - Axpby_MV_Unrolled( - space, av, X_cur, bv, Y_cur, j, scalar_x, scalar_y); + Axpby_MV_Unrolled(space, av, X_cur, bv, Y_cur, j, scalar_x, + scalar_y); } for (; j < numCols; ++j) { auto x_cur = Kokkos::subview(x, Kokkos::ALL(), j); @@ -1566,8 +1507,7 @@ struct Axpby_MV_Invoke_Left { // the functor doesn't have to do anything to them. typedef decltype(x_cur) XV; typedef decltype(y_cur) YV; - Axpby_Generic( - space, av, x_cur, bv, y_cur, j, scalar_x, scalar_y); + Axpby_Generic(space, av, x_cur, bv, y_cur, j, scalar_x, scalar_y); } } }; @@ -1591,20 +1531,17 @@ struct Axpby_MV_Invoke_Left { // coefficients in av and bv vectors, if they are used. // // Either av and bv are both 1-D Views, or av and bv are both scalars. -template +template struct Axpby_MV_Invoke_Right { - static void run(const execution_space& space, const AV& av, const XMV& x, - const BV& bv, const YMV& y, int scalar_x = 2, - int scalar_y = 2) { + static void run(const execution_space& space, const AV& av, const XMV& x, const BV& bv, const YMV& y, + int scalar_x = 2, int scalar_y = 2) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby_MV_Invoke_Right::run()" ": X is not a Kokkos::View."); static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby_MV_Invoke_Right::run()" ": Y is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Axpby_MV_Invoke_Right::run()" ": Y must be nonconst, since it is an output argument" " and we have to be able to write to its entries."); @@ -1614,8 +1551,7 @@ struct Axpby_MV_Invoke_Right { static_assert(YMV::rank == 2, "KokkosBlas::Impl::Axpby_MV_Invoke_Right::run()" ": X and Y must have rank 2."); - if ((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && - (scalar_y <= 2)) { + if ((-1 <= scalar_x) && (scalar_x <= 2) && (-1 <= scalar_y) && (scalar_y <= 2)) { // Ok } else { KokkosKernels::Impl::throw_runtime_exception( @@ -1629,11 +1565,9 @@ struct Axpby_MV_Invoke_Right { auto y_0 = Kokkos::subview(y, Kokkos::ALL(), 0); typedef decltype(x_0) XV; typedef decltype(y_0) YV; - Axpby_Generic( - space, av, x_0, bv, y_0, 0, scalar_x, scalar_y); + Axpby_Generic(space, av, x_0, bv, y_0, 0, scalar_x, scalar_y); } else { - Axpby_MV_Generic( - space, av, x, bv, y, scalar_x, scalar_y); + Axpby_MV_Generic(space, av, x, bv, y, scalar_x, scalar_y); } } }; diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_spec.hpp index 3aff21e0bedc..f4f85c8f6b42 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_spec.hpp @@ -28,8 +28,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct axpby_eti_spec_avail { enum : bool { value = false }; }; @@ -43,36 +42,29 @@ struct axpby_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_AXPBY_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct axpby_eti_spec_avail< \ - EXEC_SPACE, SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ - }; \ - template <> \ - struct axpby_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_AXPBY_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct axpby_eti_spec_avail< \ + EXEC_SPACE, SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ + }; \ + template <> \ + struct axpby_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; // @@ -82,36 +74,29 @@ struct axpby_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_AXPBY_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct axpby_eti_spec_avail< \ - EXEC_SPACE, SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ - }; \ - template <> \ - struct axpby_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_AXPBY_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct axpby_eti_spec_avail< \ + EXEC_SPACE, SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ + }; \ + template <> \ + struct axpby_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -146,21 +131,16 @@ namespace Impl { /// Any scalar coefficient of zero has BLAS semantics of /// ignoring the corresponding (multi)vector entry. This does NOT /// apply to coefficients in av and bv vectors, if they are used. -template ::value, - bool eti_spec_avail = - axpby_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = axpby_eti_spec_avail::value> struct Axpby { - static void axpby(const execution_space& space, const AV& av, const XMV& X, - const BV& bv, const YMV& Y); + static void axpby(const execution_space& space, const AV& av, const XMV& X, const BV& bv, const YMV& Y); }; template struct Axpby { - static void axpby(const execution_space& /*space*/, const AV& /* av */, - const XMV& /* X */, const BV& /* bv */, + static void axpby(const execution_space& /*space*/, const AV& /* av */, const XMV& /* X */, const BV& /* bv */, const YMV& /* Y */) { static_assert(YMV::rank == 0, "Oh My God"); } @@ -175,20 +155,17 @@ struct Axpby { // the unification process forces AV = view and BV = view // ********************************************************************** template -struct Axpby { +struct Axpby { using size_type = typename YMV::size_type; - static void axpby(const execution_space& space, const AV& av, const XMV& X, - const BV& bv, const YMV& Y) { + static void axpby(const execution_space& space, const AV& av, const XMV& X, const BV& bv, const YMV& Y) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Axpby::axpby: X is not a Kokkos::View."); static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Axpby::axpby: Y is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Axpby::axpby: Y is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -198,21 +175,17 @@ struct Axpby::axpby: " "X and Y must have rank 2."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::axpby[ETI]" - : "KokkosBlas::axpby[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::axpby[ETI]" + : "KokkosBlas::axpby[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf( - "KokkosBlas1::axpby<> ETI specialization for < %s , %s , %s , %s >\n", - typeid(AV).name(), typeid(XMV).name(), typeid(BV).name(), - typeid(YMV).name()); + printf("KokkosBlas1::axpby<> ETI specialization for < %s , %s , %s , %s >\n", typeid(AV).name(), + typeid(XMV).name(), typeid(BV).name(), typeid(YMV).name()); else { printf( "KokkosBlas1::axpby<> non-ETI specialization for < %s , %s , %s , %s " ">\n", - typeid(AV).name(), typeid(XMV).name(), typeid(BV).name(), - typeid(YMV).name()); + typeid(AV).name(), typeid(XMV).name(), typeid(BV).name(), typeid(YMV).name()); } #endif @@ -255,22 +228,19 @@ struct Axpby(INT_MAX) && - numRows * numCols < static_cast(INT_MAX)) { - using index_type = int; - using Axpby_MV_Invoke_Layout = typename std::conditional< - std::is_same::value, - Axpby_MV_Invoke_Left, - Axpby_MV_Invoke_Right >::type; + if (numRows < static_cast(INT_MAX) && numRows * numCols < static_cast(INT_MAX)) { + using index_type = int; + using Axpby_MV_Invoke_Layout = + typename std::conditional::value, + Axpby_MV_Invoke_Left, + Axpby_MV_Invoke_Right >::type; Axpby_MV_Invoke_Layout::run(space, av, X, bv, Y, scalar_x, scalar_y); } else { - using index_type = typename XMV::size_type; - using Axpby_MV_Invoke_Layout = typename std::conditional< - std::is_same::value, - Axpby_MV_Invoke_Left, - Axpby_MV_Invoke_Right >::type; + using index_type = typename XMV::size_type; + using Axpby_MV_Invoke_Layout = + typename std::conditional::value, + Axpby_MV_Invoke_Left, + Axpby_MV_Invoke_Right >::type; Axpby_MV_Invoke_Layout::run(space, av, X, bv, Y, scalar_x, scalar_y); } Kokkos::Profiling::popRegion(); @@ -285,25 +255,22 @@ struct Axpby -struct Axpby { +struct Axpby { using AV = typename XMV::non_const_value_type; using BV = typename YMV::non_const_value_type; using size_type = typename YMV::size_type; using ATA = Kokkos::ArithTraits; using ATB = Kokkos::ArithTraits; - static void axpby(const execution_space& space, const AV& alpha, const XMV& X, - const BV& beta, const YMV& Y) { + static void axpby(const execution_space& space, const AV& alpha, const XMV& X, const BV& beta, const YMV& Y) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby::axpby (MV): " "X is not a Kokkos::View."); static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::Axpby::axpby (MV): " "Y is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Axpby::axpby (MV): Y is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -313,22 +280,18 @@ struct Axpby ETI specialization for < %s , %s , %s , %s >\n", - typeid(AV).name(), typeid(XMV).name(), typeid(BV).name(), - typeid(YMV).name()); + printf("KokkosBlas1::axpby<> ETI specialization for < %s , %s , %s , %s >\n", typeid(AV).name(), + typeid(XMV).name(), typeid(BV).name(), typeid(YMV).name()); else { printf( "KokkosBlas1::axpby<> non-ETI specialization for < %s , %s , %s , %s " ">\n", - typeid(AV).name(), typeid(XMV).name(), typeid(BV).name(), - typeid(YMV).name()); + typeid(AV).name(), typeid(XMV).name(), typeid(BV).name(), typeid(YMV).name()); } #endif @@ -353,22 +316,19 @@ struct Axpby(INT_MAX) && - numRows * numCols < static_cast(INT_MAX)) { - using index_type = int; - using Axpby_MV_Invoke_Layout = typename std::conditional< - std::is_same::value, - Axpby_MV_Invoke_Left, - Axpby_MV_Invoke_Right >::type; + if (numRows < static_cast(INT_MAX) && numRows * numCols < static_cast(INT_MAX)) { + using index_type = int; + using Axpby_MV_Invoke_Layout = + typename std::conditional::value, + Axpby_MV_Invoke_Left, + Axpby_MV_Invoke_Right >::type; Axpby_MV_Invoke_Layout::run(space, alpha, X, beta, Y, scalar_x, scalar_y); } else { - using index_type = typename XMV::size_type; - using Axpby_MV_Invoke_Layout = typename std::conditional< - std::is_same::value, - Axpby_MV_Invoke_Left, - Axpby_MV_Invoke_Right >::type; + using index_type = typename XMV::size_type; + using Axpby_MV_Invoke_Layout = + typename std::conditional::value, + Axpby_MV_Invoke_Left, + Axpby_MV_Invoke_Right >::type; Axpby_MV_Invoke_Layout::run(space, alpha, X, beta, Y, scalar_x, scalar_y); } Kokkos::Profiling::popRegion(); @@ -383,15 +343,12 @@ struct Axpby -struct Axpby { +struct Axpby { using size_type = typename YV::size_type; - static void axpby(const execution_space& space, const AV& av, const XV& X, - const BV& bv, const YV& Y) { - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::axpby[ETI]" - : "KokkosBlas::axpby[noETI]"); + static void axpby(const execution_space& space, const AV& av, const XV& X, const BV& bv, const YV& Y) { + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::axpby[ETI]" + : "KokkosBlas::axpby[noETI]"); size_type const numRows = X.extent(0); @@ -433,12 +390,10 @@ struct Axpby(INT_MAX)) { using index_type = int; - Axpby_Generic( - space, av, X, bv, Y, 0, scalar_x, scalar_y); + Axpby_Generic(space, av, X, bv, Y, 0, scalar_x, scalar_y); } else { using index_type = typename XV::size_type; - Axpby_Generic( - space, av, X, bv, Y, 0, scalar_x, scalar_y); + Axpby_Generic(space, av, X, bv, Y, 0, scalar_x, scalar_y); } Kokkos::Profiling::popRegion(); @@ -453,8 +408,7 @@ struct Axpby -struct Axpby { using AV = typename XV::non_const_value_type; using BV = typename YV::non_const_value_type; @@ -462,16 +416,14 @@ struct Axpby; using ATB = Kokkos::ArithTraits; - static void axpby(const execution_space& space, const AV& alpha, const XV& X, - const BV& beta, const YV& Y) { + static void axpby(const execution_space& space, const AV& alpha, const XV& X, const BV& beta, const YV& Y) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Axpby::axpby: X is not a Kokkos::View."); static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Axpby::axpby: Y is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Axpby::axpby: Y is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -482,21 +434,17 @@ struct Axpby::axpby: " "X and Y must have rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::axpby[ETI]" - : "KokkosBlas::axpby[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::axpby[ETI]" + : "KokkosBlas::axpby[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf( - "KokkosBlas1::axpby<> ETI specialization for < %s , %s , %s , %s >\n", - typeid(AV).name(), typeid(XV).name(), typeid(BV).name(), - typeid(YV).name()); + printf("KokkosBlas1::axpby<> ETI specialization for < %s , %s , %s , %s >\n", typeid(AV).name(), + typeid(XV).name(), typeid(BV).name(), typeid(YV).name()); else { printf( "KokkosBlas1::axpby<> non-ETI specialization for < %s , %s , %s , %s " ">\n", - typeid(AV).name(), typeid(XV).name(), typeid(BV).name(), - typeid(YV).name()); + typeid(AV).name(), typeid(XV).name(), typeid(BV).name(), typeid(YV).name()); } #endif @@ -522,14 +470,12 @@ struct Axpby(INT_MAX)) { using index_type = int; - Axpby_Generic( - space, alpha, X, beta, Y, 0, scalar_x, scalar_y); + Axpby_Generic(space, alpha, X, beta, Y, 0, scalar_x, scalar_y); } else { using index_type = typename XV::size_type; - Axpby_Generic( - space, alpha, X, beta, Y, 0, scalar_x, scalar_y); + Axpby_Generic(space, alpha, X, beta, Y, 0, scalar_x, scalar_y); } Kokkos::Profiling::popRegion(); } @@ -548,54 +494,42 @@ struct Axpby, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, false, true>; \ - extern template struct Axpby< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_AXPBY_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Axpby< \ + EXEC_SPACE, SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + 1, false, true>; \ + extern template struct Axpby< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ 1, false, true>; -#define KOKKOSBLAS1_AXPBY_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template struct Axpby< \ - EXEC_SPACE, SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, false, true>; \ - template struct Axpby< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_AXPBY_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Axpby< \ + EXEC_SPACE, SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + 1, false, true>; \ + template struct Axpby< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ 1, false, true>; // @@ -606,56 +540,42 @@ struct Axpby, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2, false, true>; \ - extern template struct Axpby< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_AXPBY_MV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Axpby< \ + EXEC_SPACE, SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + 2, false, true>; \ + extern template struct Axpby< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ 2, false, true>; -#define KOKKOSBLAS1_AXPBY_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template struct Axpby< \ - EXEC_SPACE, SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2, false, true>; \ - template struct Axpby< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_AXPBY_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Axpby< \ + EXEC_SPACE, SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + 2, false, true>; \ + template struct Axpby< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ 2, false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_unification_attempt_traits.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_unification_attempt_traits.hpp index 9d200e892d91..0a0300780166 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_unification_attempt_traits.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_axpby_unification_attempt_traits.hpp @@ -53,8 +53,7 @@ constexpr typename std::enable_if, bool>::type Tr1s_val() { } template -constexpr typename std::enable_if, bool>::type -Tr1s_val() { +constexpr typename std::enable_if, bool>::type Tr1s_val() { return false; } @@ -66,8 +65,7 @@ constexpr typename std::enable_if, bool>::type Tr1d_val() { } template -constexpr typename std::enable_if, bool>::type -Tr1d_val() { +constexpr typename std::enable_if, bool>::type Tr1d_val() { return false; } @@ -105,8 +103,7 @@ struct AxpbyUnificationAttemptTraits { // - type names begin with upper case letters // ******************************************************************** public: - static constexpr bool onDevice = - KokkosKernels::Impl::kk_is_gpu_exec_space(); + static constexpr bool onDevice = KokkosKernels::Impl::kk_is_gpu_exec_space(); private: static constexpr bool onHost = !onDevice; @@ -139,23 +136,15 @@ struct AxpbyUnificationAttemptTraits { // ******************************************************************** // Declare 'AtInputScalarTypeA_nonConst' // ******************************************************************** - using ScalarTypeA2_onDevice = - typename getScalarTypeFromView::type; - using ScalarTypeA1_onDevice = - std::conditional_t; + using ScalarTypeA2_onDevice = typename getScalarTypeFromView::type; + using ScalarTypeA1_onDevice = std::conditional_t; - using ScalarTypeA2_onHost = - typename getScalarTypeFromView::type; - using ScalarTypeA1_onHost = - std::conditional_t; + using ScalarTypeA2_onHost = typename getScalarTypeFromView::type; + using ScalarTypeA1_onHost = std::conditional_t; - using AtInputScalarTypeA = - std::conditional_t; + using AtInputScalarTypeA = std::conditional_t; - using AtInputScalarTypeA_nonConst = - typename std::remove_const::type; + using AtInputScalarTypeA_nonConst = typename std::remove_const::type; // ******************************************************************** // Declare 'AtInputScalarTypeX_nonConst' @@ -167,23 +156,15 @@ struct AxpbyUnificationAttemptTraits { // ******************************************************************** // Declare 'AtInputScalarTypeB_nonConst' // ******************************************************************** - using ScalarTypeB2_onDevice = - typename getScalarTypeFromView::type; - using ScalarTypeB1_onDevice = - std::conditional_t; + using ScalarTypeB2_onDevice = typename getScalarTypeFromView::type; + using ScalarTypeB1_onDevice = std::conditional_t; - using ScalarTypeB2_onHost = - typename getScalarTypeFromView::type; - using ScalarTypeB1_onHost = - std::conditional_t; + using ScalarTypeB2_onHost = typename getScalarTypeFromView::type; + using ScalarTypeB1_onHost = std::conditional_t; - using AtInputScalarTypeB = - std::conditional_t; + using AtInputScalarTypeB = std::conditional_t; - using AtInputScalarTypeB_nonConst = - typename std::remove_const::type; + using AtInputScalarTypeB_nonConst = typename std::remove_const::type; // ******************************************************************** // Declare 'AtInputScalarTypeY_nonConst' @@ -195,138 +176,115 @@ struct AxpbyUnificationAttemptTraits { // ******************************************************************** // Declare 'InternalLayoutX' and 'InternalLayoutY' // ******************************************************************** - using InternalLayoutX = - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; - using InternalLayoutY = - typename KokkosKernels::Impl::GetUnifiedLayoutPreferring< - YMV, InternalLayoutX>::array_layout; + using InternalLayoutX = typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using InternalLayoutY = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; // ******************************************************************** // Declare 'InternalTypeA_tmp' // ******************************************************************** - using AtInputLayoutA = - typename getLayoutFromView::type; + using AtInputLayoutA = typename getLayoutFromView::type; public: - static constexpr bool atInputLayoutA_isStride = - std::is_same_v; + static constexpr bool atInputLayoutA_isStride = std::is_same_v; private: using InternalLayoutA = - std::conditional_t<(a_is_r1d || a_is_r1s) && atInputLayoutA_isStride, - AtInputLayoutA, InternalLayoutX>; - - static constexpr bool atInputScalarTypeA_mustRemain = - Kokkos::ArithTraits::is_complex && - !Kokkos::ArithTraits::is_complex; - - using InternalScalarTypeA = std::conditional_t< - atInputScalarTypeA_mustRemain || ((a_is_r1d || a_is_r1s) && xyRank2Case), - AtInputScalarTypeA_nonConst // Yes, keep the input scalar type - , - AtInputScalarTypeX_nonConst // Yes, instead of - // 'AtInputScalarTypeA_nonConst' - >; - - using InternalTypeA_onDevice = std::conditional_t< - a_is_scalar && b_is_scalar && onDevice, // Keep 'a' as scalar - InternalScalarTypeA, - Kokkos::View>>; - - using InternalTypeA_onHost = std::conditional_t< - (a_is_r1d || a_is_r1s) && xyRank2Case && onHost, - Kokkos::View>, - InternalScalarTypeA>; - - using InternalTypeA_tmp = - std::conditional_t; + std::conditional_t<(a_is_r1d || a_is_r1s) && atInputLayoutA_isStride, AtInputLayoutA, InternalLayoutX>; + + static constexpr bool atInputScalarTypeA_mustRemain = Kokkos::ArithTraits::is_complex && + !Kokkos::ArithTraits::is_complex; + + using InternalScalarTypeA = + std::conditional_t; + + using InternalTypeA_onDevice = + std::conditional_t>>; + + using InternalTypeA_onHost = + std::conditional_t<(a_is_r1d || a_is_r1s) && xyRank2Case && onHost, + Kokkos::View>, + InternalScalarTypeA>; + + using InternalTypeA_tmp = std::conditional_t; // ******************************************************************** // Declare 'InternalTypeX' // ******************************************************************** public: - using InternalTypeX = std::conditional_t< - x_is_r2, - Kokkos::View>, - Kokkos::View>>; + using InternalTypeX = + std::conditional_t>, + Kokkos::View>>; // ******************************************************************** // Declare 'InternalTypeB_tmp' // ******************************************************************** private: - using AtInputLayoutB = - typename getLayoutFromView::type; + using AtInputLayoutB = typename getLayoutFromView::type; public: - static constexpr bool atInputLayoutB_isStride = - std::is_same_v; + static constexpr bool atInputLayoutB_isStride = std::is_same_v; private: using InternalLayoutB = - std::conditional_t<(b_is_r1d || b_is_r1s) && atInputLayoutB_isStride, - AtInputLayoutB, InternalLayoutY>; - - static constexpr bool atInputScalarTypeB_mustRemain = - Kokkos::ArithTraits::is_complex && - !Kokkos::ArithTraits::is_complex; - - using InternalScalarTypeB = std::conditional_t< - atInputScalarTypeB_mustRemain || ((b_is_r1d || b_is_r1s) && xyRank2Case), - AtInputScalarTypeB_nonConst // Yes, keep the input scalar type - , - AtInputScalarTypeY_nonConst // Yes, instead of - // 'AtInputScalarTypeB_nonConst' - >; - - using InternalTypeB_onDevice = std::conditional_t< - a_is_scalar && b_is_scalar && onDevice, // Keep 'b' as scalar - InternalScalarTypeB, - Kokkos::View>>; - - using InternalTypeB_onHost = std::conditional_t< - (b_is_r1d || b_is_r1s) && xyRank2Case && onHost, - Kokkos::View>, - InternalScalarTypeB>; - - using InternalTypeB_tmp = - std::conditional_t; + std::conditional_t<(b_is_r1d || b_is_r1s) && atInputLayoutB_isStride, AtInputLayoutB, InternalLayoutY>; + + static constexpr bool atInputScalarTypeB_mustRemain = Kokkos::ArithTraits::is_complex && + !Kokkos::ArithTraits::is_complex; + + using InternalScalarTypeB = + std::conditional_t; + + using InternalTypeB_onDevice = + std::conditional_t>>; + + using InternalTypeB_onHost = + std::conditional_t<(b_is_r1d || b_is_r1s) && xyRank2Case && onHost, + Kokkos::View>, + InternalScalarTypeB>; + + using InternalTypeB_tmp = std::conditional_t; // ******************************************************************** // Declare 'InternalTypeY' // ******************************************************************** public: - using InternalTypeY = std::conditional_t< - y_is_r2, - Kokkos::View>, - Kokkos::View>>; + using InternalTypeY = + std::conditional_t>, + Kokkos::View>>; // ******************************************************************** // Declare 'InternalTypeA': if 'InternalTypeB_tmp' is a view then // make sure 'InternalTypeA' is a view as well // ******************************************************************** - using InternalTypeA = std::conditional_t< - !Kokkos::is_view_v && - Kokkos::is_view_v, - Kokkos::View>, - InternalTypeA_tmp>; + using InternalTypeA = + std::conditional_t && Kokkos::is_view_v, + Kokkos::View>, + InternalTypeA_tmp>; // ******************************************************************** // Declare 'InternalTypeA_managed' with the same scalar type in @@ -336,23 +294,19 @@ struct AxpbyUnificationAttemptTraits { using InternalLayoutA_managed = InternalLayoutA; public: - using InternalTypeA_managed = std::conditional_t< - Kokkos::is_view_v, - Kokkos::View, - void>; + using InternalTypeA_managed = + std::conditional_t, + Kokkos::View, void>; // ******************************************************************** // Declare 'InternalTypeB' if 'InternalTypeA_tmp' is a view then // make sure 'InternalTypeB' is a view as well // ******************************************************************** - using InternalTypeB = std::conditional_t< - Kokkos::is_view_v && - !Kokkos::is_view_v, - Kokkos::View>, - InternalTypeB_tmp>; + using InternalTypeB = + std::conditional_t && !Kokkos::is_view_v, + Kokkos::View>, + InternalTypeB_tmp>; // ******************************************************************** // Declare 'InternalTypeB_managed' with the same scalar type in @@ -362,91 +316,72 @@ struct AxpbyUnificationAttemptTraits { using InternalLayoutB_managed = InternalLayoutB; public: - using InternalTypeB_managed = std::conditional_t< - Kokkos::is_view_v, - Kokkos::View, - void>; + using InternalTypeB_managed = + std::conditional_t, + Kokkos::View, void>; // ******************************************************************** // Auxiliary Boolean results on internal types // ******************************************************************** private: - static constexpr bool internalTypeA_is_scalar = - !Kokkos::is_view_v; - static constexpr bool internalTypeA_is_r1d = Tr1d_val(); + static constexpr bool internalTypeA_is_scalar = !Kokkos::is_view_v; + static constexpr bool internalTypeA_is_r1d = Tr1d_val(); - static constexpr bool internalTypeB_is_scalar = - !Kokkos::is_view_v; - static constexpr bool internalTypeB_is_r1d = Tr1d_val(); + static constexpr bool internalTypeB_is_scalar = !Kokkos::is_view_v; + static constexpr bool internalTypeB_is_r1d = Tr1d_val(); public: - static constexpr bool internalTypesAB_bothScalars = - (internalTypeA_is_scalar && internalTypeB_is_scalar); - static constexpr bool internalTypesAB_bothViews = - (internalTypeA_is_r1d && internalTypeB_is_r1d); + static constexpr bool internalTypesAB_bothScalars = (internalTypeA_is_scalar && internalTypeB_is_scalar); + static constexpr bool internalTypesAB_bothViews = (internalTypeA_is_r1d && internalTypeB_is_r1d); // ******************************************************************** // Routine to perform checks (both compile time and run time) // ******************************************************************** - static void performChecks(const AV& a, const XMV& X, const BV& b, - const YMV& Y) { + static void performChecks(const AV& a, const XMV& X, const BV& b, const YMV& Y) { // ****************************************************************** // Check 1/6: General checks // ****************************************************************** - static_assert( - Kokkos::is_execution_space_v, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ": tExecSpace must be a valid Kokkos execution space."); - - static_assert( - (xyRank1Case && !xyRank2Case) || (!xyRank1Case && xyRank2Case), - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ": one must have either both X and Y as rank 1, or both X and Y as " - "rank 2"); - - if constexpr (!Kokkos::ArithTraits< - AtInputScalarTypeY_nonConst>::is_complex) { - static_assert( - (!Kokkos::ArithTraits::is_complex) && - (!Kokkos::ArithTraits::is_complex) && - (!Kokkos::ArithTraits::is_complex), - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ": if Y is not complex, then A, X and B cannot be complex"); + static_assert(Kokkos::is_execution_space_v, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ": tExecSpace must be a valid Kokkos execution space."); + + static_assert((xyRank1Case && !xyRank2Case) || (!xyRank1Case && xyRank2Case), + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ": one must have either both X and Y as rank 1, or both X and Y as " + "rank 2"); + + if constexpr (!Kokkos::ArithTraits::is_complex) { + static_assert((!Kokkos::ArithTraits::is_complex) && + (!Kokkos::ArithTraits::is_complex) && + (!Kokkos::ArithTraits::is_complex), + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ": if Y is not complex, then A, X and B cannot be complex"); } // ****************************************************************** // Check 2/6: YMV is valid // ****************************************************************** - static_assert( - Kokkos::is_view::value, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ": Y is not a Kokkos::View."); - static_assert( - std::is_same::value, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ": Y is const. It must be nonconst, " - "because it is an output argument " - "(we must be able to write to its entries)."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ": XMV must be accessible from tExecSpace"); + static_assert(Kokkos::is_view::value, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ": Y is not a Kokkos::View."); + static_assert(std::is_same::value, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ": Y is const. It must be nonconst, " + "because it is an output argument " + "(we must be able to write to its entries)."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ": XMV must be accessible from tExecSpace"); // ****************************************************************** // Check 3/6: XMV is valid // ****************************************************************** - static_assert( - Kokkos::is_view::value, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ": X is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ": XMV must be accessible from tExecSpace"); + static_assert(Kokkos::is_view::value, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ": X is not a Kokkos::View."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ": XMV must be accessible from tExecSpace"); if constexpr (xyRank1Case) { if (X.extent(0) != Y.extent(0)) { @@ -454,8 +389,7 @@ struct AxpbyUnificationAttemptTraits { msg << "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks(" ")" << ", invalid rank-1 X extent" - << ": X.extent(0) = " << X.extent(0) - << ", Y.extent(0) = " << Y.extent(0); + << ": X.extent(0) = " << X.extent(0) << ", Y.extent(0) = " << Y.extent(0); KokkosKernels::Impl::throw_runtime_exception(msg.str()); } } else { @@ -464,10 +398,8 @@ struct AxpbyUnificationAttemptTraits { msg << "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks(" ")" << ", invalid rank-2 X extents" - << ": X.extent(0) = " << X.extent(0) - << ", X.extent(1) = " << X.extent(1) - << ", Y.extent(0) = " << Y.extent(0) - << ", Y.extent(1) = " << Y.extent(1); + << ": X.extent(0) = " << X.extent(0) << ", X.extent(1) = " << X.extent(1) + << ", Y.extent(0) = " << Y.extent(0) << ", Y.extent(1) = " << Y.extent(1); KokkosKernels::Impl::throw_runtime_exception(msg.str()); } } @@ -476,10 +408,8 @@ struct AxpbyUnificationAttemptTraits { // Check 4/6: AV is valid // ****************************************************************** static_assert( - (a_is_scalar && !a_is_r0 && !a_is_r1s && !a_is_r1d) || - (!a_is_scalar && a_is_r0 && !a_is_r1s && !a_is_r1d) || - (!a_is_scalar && !a_is_r0 && a_is_r1s && !a_is_r1d) || - (!a_is_scalar && !a_is_r0 && !a_is_r1s && a_is_r1d), + (a_is_scalar && !a_is_r0 && !a_is_r1s && !a_is_r1d) || (!a_is_scalar && a_is_r0 && !a_is_r1s && !a_is_r1d) || + (!a_is_scalar && !a_is_r0 && a_is_r1s && !a_is_r1d) || (!a_is_scalar && !a_is_r0 && !a_is_r1s && a_is_r1d), "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" ": 'a' must be either scalar or rank 0 or rank 1 static or rank 1 " "dynamic"); @@ -495,8 +425,7 @@ struct AxpbyUnificationAttemptTraits { KokkosKernels::Impl::throw_runtime_exception(msg.str()); } } else { - if ((a.extent(0) == 1) || - (a.extent(0) == Y.extent(1))) { // Yes, 'Y' is the reference + if ((a.extent(0) == 1) || (a.extent(0) == Y.extent(1))) { // Yes, 'Y' is the reference // Ok } else { std::ostringstream msg; @@ -504,8 +433,7 @@ struct AxpbyUnificationAttemptTraits { "performChecks()" << ": view 'a' must have extent(0) == 1 or Y.extent(1) for " "xyRank2Case" - << ", a.extent(0) = " << a.extent(0) - << ", Y.extent(0) = " << Y.extent(0) + << ", a.extent(0) = " << a.extent(0) << ", Y.extent(0) = " << Y.extent(0) << ", Y.extent(1) = " << Y.extent(1); KokkosKernels::Impl::throw_runtime_exception(msg.str()); } @@ -516,10 +444,8 @@ struct AxpbyUnificationAttemptTraits { // Check 5/6: BV is valid // ****************************************************************** static_assert( - (b_is_scalar && !b_is_r0 && !b_is_r1s && !b_is_r1d) || - (!b_is_scalar && b_is_r0 && !b_is_r1s && !b_is_r1d) || - (!b_is_scalar && !b_is_r0 && b_is_r1s && !b_is_r1d) || - (!b_is_scalar && !b_is_r0 && !b_is_r1s && b_is_r1d), + (b_is_scalar && !b_is_r0 && !b_is_r1s && !b_is_r1d) || (!b_is_scalar && b_is_r0 && !b_is_r1s && !b_is_r1d) || + (!b_is_scalar && !b_is_r0 && b_is_r1s && !b_is_r1d) || (!b_is_scalar && !b_is_r0 && !b_is_r1s && b_is_r1d), "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" ": 'b' must be either scalar or rank 0 or rank 1 static or rank 1 " "dynamic"); @@ -543,8 +469,7 @@ struct AxpbyUnificationAttemptTraits { "performChecks()" << ": view 'b' must have extent(0) == 1 or Y.extent(1) for " "xyRank2Case" - << ", b.extent(0) = " << b.extent(0) - << ", Y.extent(0) = " << Y.extent(0) + << ", b.extent(0) = " << b.extent(0) << ", Y.extent(0) = " << Y.extent(0) << ", Y.extent(1) = " << Y.extent(1); KokkosKernels::Impl::throw_runtime_exception(msg.str()); } @@ -556,147 +481,115 @@ struct AxpbyUnificationAttemptTraits { // ****************************************************************** if constexpr (onHost) { if constexpr (xyRank1Case) { - constexpr bool internalTypeA_isOk = - (internalTypeA_is_scalar || internalTypeA_is_r1d); - static_assert( - internalTypeA_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onHost, xyRank1Case: InternalTypeA is wrong"); - - constexpr bool internalTypeX_isOk = std::is_same_v< - InternalTypeX, - Kokkos::View>>; - static_assert( - internalTypeX_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onHost, xyRank1Case: InternalTypeX is wrong"); - - constexpr bool internalTypeB_isOk = - (internalTypeB_is_scalar || internalTypeB_is_r1d); - static_assert( - internalTypeB_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onHost, xyRank1Case: InternalTypeB is wrong"); - - constexpr bool internalTypeY_isOk = std::is_same_v< - InternalTypeY, - Kokkos::View>>; - static_assert( - internalTypeY_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onHost, xyRank1Case: InternalTypeY is wrong"); + constexpr bool internalTypeA_isOk = (internalTypeA_is_scalar || internalTypeA_is_r1d); + static_assert(internalTypeA_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onHost, xyRank1Case: InternalTypeA is wrong"); + + constexpr bool internalTypeX_isOk = + std::is_same_v>>; + static_assert(internalTypeX_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onHost, xyRank1Case: InternalTypeX is wrong"); + + constexpr bool internalTypeB_isOk = (internalTypeB_is_scalar || internalTypeB_is_r1d); + static_assert(internalTypeB_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onHost, xyRank1Case: InternalTypeB is wrong"); + + constexpr bool internalTypeY_isOk = + std::is_same_v>>; + static_assert(internalTypeY_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onHost, xyRank1Case: InternalTypeY is wrong"); } else { - constexpr bool internalTypeA_isOk = - (internalTypeA_is_scalar || internalTypeA_is_r1d); - static_assert( - internalTypeA_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onHost, xyRank2Case: InternalTypeA is wrong"); - - constexpr bool internalTypeX_isOk = std::is_same_v< - InternalTypeX, - Kokkos::View>>; - static_assert( - internalTypeX_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onHost, xyRank2Case: InternalTypeX is wrong"); - - constexpr bool internalTypeB_isOk = - (internalTypeB_is_scalar || internalTypeB_is_r1d); - static_assert( - internalTypeB_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onHost, xyRank2Case: InternalTypeB is wrong"); - - constexpr bool internalTypeY_isOk = std::is_same_v< - InternalTypeY, - Kokkos::View>>; - static_assert( - internalTypeY_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onHost, xyRank2Case: InternalTypeY is wrong"); + constexpr bool internalTypeA_isOk = (internalTypeA_is_scalar || internalTypeA_is_r1d); + static_assert(internalTypeA_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onHost, xyRank2Case: InternalTypeA is wrong"); + + constexpr bool internalTypeX_isOk = + std::is_same_v>>; + static_assert(internalTypeX_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onHost, xyRank2Case: InternalTypeX is wrong"); + + constexpr bool internalTypeB_isOk = (internalTypeB_is_scalar || internalTypeB_is_r1d); + static_assert(internalTypeB_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onHost, xyRank2Case: InternalTypeB is wrong"); + + constexpr bool internalTypeY_isOk = + std::is_same_v>>; + static_assert(internalTypeY_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onHost, xyRank2Case: InternalTypeY is wrong"); } } else { if constexpr (xyRank1Case) { constexpr bool internalTypeA_isOk = - internalTypeA_is_r1d || - (a_is_scalar && b_is_scalar && internalTypeA_is_scalar); - static_assert( - internalTypeA_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onDevice, xyRank1Case: InternalTypeA is wrong"); - - constexpr bool internalTypeX_isOk = std::is_same_v< - InternalTypeX, - Kokkos::View>>; - static_assert( - internalTypeX_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onDevice, xyRank1Case: InternalTypeX is wrong"); + internalTypeA_is_r1d || (a_is_scalar && b_is_scalar && internalTypeA_is_scalar); + static_assert(internalTypeA_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onDevice, xyRank1Case: InternalTypeA is wrong"); + + constexpr bool internalTypeX_isOk = + std::is_same_v>>; + static_assert(internalTypeX_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onDevice, xyRank1Case: InternalTypeX is wrong"); constexpr bool internalTypeB_isOk = - internalTypeB_is_r1d || - (a_is_scalar && b_is_scalar && internalTypeA_is_scalar); - static_assert( - internalTypeB_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onDevice, xyRank1Case: InternalTypeB is wrong"); - - constexpr bool internalTypeY_isOk = std::is_same_v< - InternalTypeY, - Kokkos::View>>; - static_assert( - internalTypeY_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onDevice, xyRank1Case: InternalTypeY is wrong"); + internalTypeB_is_r1d || (a_is_scalar && b_is_scalar && internalTypeA_is_scalar); + static_assert(internalTypeB_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onDevice, xyRank1Case: InternalTypeB is wrong"); + + constexpr bool internalTypeY_isOk = + std::is_same_v>>; + static_assert(internalTypeY_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onDevice, xyRank1Case: InternalTypeY is wrong"); } else { constexpr bool internalTypeA_isOk = - internalTypeA_is_r1d || - (a_is_scalar && b_is_scalar && internalTypeA_is_scalar); - static_assert( - internalTypeA_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onDevice, xyRank2Case: InternalTypeA is wrong"); - - constexpr bool internalTypeX_isOk = std::is_same_v< - InternalTypeX, - Kokkos::View>>; - static_assert( - internalTypeX_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onDevice, xyRank2Case: InternalTypeX is wrong"); + internalTypeA_is_r1d || (a_is_scalar && b_is_scalar && internalTypeA_is_scalar); + static_assert(internalTypeA_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onDevice, xyRank2Case: InternalTypeA is wrong"); + + constexpr bool internalTypeX_isOk = + std::is_same_v>>; + static_assert(internalTypeX_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onDevice, xyRank2Case: InternalTypeX is wrong"); constexpr bool internalTypeB_isOk = - internalTypeB_is_r1d || - (a_is_scalar && b_is_scalar && internalTypeB_is_scalar); - static_assert( - internalTypeB_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onDevice, xyRank2Case: InternalTypeB is wrong"); - - constexpr bool internalTypeY_isOk = std::is_same_v< - InternalTypeY, - Kokkos::View>>; - static_assert( - internalTypeY_isOk, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onDevice, xyRank2Case: InternalTypeY is wrong"); + internalTypeB_is_r1d || (a_is_scalar && b_is_scalar && internalTypeB_is_scalar); + static_assert(internalTypeB_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onDevice, xyRank2Case: InternalTypeB is wrong"); + + constexpr bool internalTypeY_isOk = + std::is_same_v>>; + static_assert(internalTypeY_isOk, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onDevice, xyRank2Case: InternalTypeY is wrong"); } } @@ -714,10 +607,9 @@ struct AxpbyUnificationAttemptTraits { // - [InternalTypeA, B] = [S_a, S_b], or // - [InternalTypeA, B] = [view, view] // **************************************************************** - static_assert( - internalTypesAB_bothScalars || internalTypesAB_bothViews, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onHost, invalid combination of types"); + static_assert(internalTypesAB_bothScalars || internalTypesAB_bothViews, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onHost, invalid combination of types"); } // If onHost else if constexpr (onDevice) { // **************************************************************** @@ -733,35 +625,25 @@ struct AxpbyUnificationAttemptTraits { // - [InternalTypeA, B] = [S_a, S_b], or // - [InternalTypeA, B] = [view, view] // **************************************************************** - static_assert( - internalTypesAB_bothViews || - (a_is_scalar && b_is_scalar && internalTypesAB_bothScalars), - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", onDevice, invalid combination of types"); + static_assert(internalTypesAB_bothViews || (a_is_scalar && b_is_scalar && internalTypesAB_bothScalars), + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", onDevice, invalid combination of types"); } - if constexpr (xyRank2Case && (a_is_r1d || a_is_r1s) && - atInputLayoutA_isStride) { - static_assert( - std::is_same_v< - typename getLayoutFromView< - InternalTypeA, Kokkos::is_view_v>::type, - Kokkos::LayoutStride>, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", xyRank2Case: coeff 'a' is rank-1 and has LayoutStride at input" - ", but no LayoutStride internally"); + if constexpr (xyRank2Case && (a_is_r1d || a_is_r1s) && atInputLayoutA_isStride) { + static_assert(std::is_same_v>::type, + Kokkos::LayoutStride>, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", xyRank2Case: coeff 'a' is rank-1 and has LayoutStride at input" + ", but no LayoutStride internally"); } - if constexpr (xyRank2Case && (b_is_r1d || b_is_r1s) && - atInputLayoutB_isStride) { - static_assert( - std::is_same_v< - typename getLayoutFromView< - InternalTypeB, Kokkos::is_view_v>::type, - Kokkos::LayoutStride>, - "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" - ", xyRank2Case: coeff 'b' is rank-1 and has LayoutStride at input" - ", but no LayoutStride internally"); + if constexpr (xyRank2Case && (b_is_r1d || b_is_r1s) && atInputLayoutB_isStride) { + static_assert(std::is_same_v>::type, + Kokkos::LayoutStride>, + "KokkosBlas::Impl::AxpbyUnificationAttemptTraits::performChecks()" + ", xyRank2Case: coeff 'b' is rank-1 and has LayoutStride at input" + ", but no LayoutStride internally"); } } // Constructor @@ -776,28 +658,20 @@ struct AxpbyUnificationAttemptTraits { //<< ", AV::non_const_data_type = " << // typeid(AV::non_const_data_type).name() << ", AtInputScalarTypeA = " << typeid(AtInputScalarTypeA).name() - << ", isConst = " - << std::is_const_v << ", isComplex = " + << ", isConst = " << std::is_const_v << ", isComplex = " << Kokkos::ArithTraits::is_complex - << ", AtInputScalarTypeA_nonConst = " - << typeid(AtInputScalarTypeA_nonConst).name() + << ", AtInputScalarTypeA_nonConst = " << typeid(AtInputScalarTypeA_nonConst).name() << ", InternalTypeA = " << typeid(InternalTypeA).name() << "\n" - << ", InternalTypeA_managed = " << typeid(InternalTypeA_managed).name() - << "\n" + << ", InternalTypeA_managed = " << typeid(InternalTypeA_managed).name() << "\n" << "\n" << "XMV = " << typeid(XMV).name() << "\n" - << "XMV::value_type = " << typeid(typename XMV::value_type).name() - << "\n" - << "XMV::const_data_type = " - << typeid(typename XMV::const_data_type).name() << "\n" - << "XMV::non_const_data_type = " - << typeid(typename XMV::non_const_data_type).name() << "\n" + << "XMV::value_type = " << typeid(typename XMV::value_type).name() << "\n" + << "XMV::const_data_type = " << typeid(typename XMV::const_data_type).name() << "\n" + << "XMV::non_const_data_type = " << typeid(typename XMV::non_const_data_type).name() << "\n" << "AtInputScalarTypeX = " << typeid(AtInputScalarTypeX).name() << "\n" << "isConst = " << std::is_const_v << "\n" - << "isComplex = " - << Kokkos::ArithTraits::is_complex << "\n" - << "AtInputScalarTypeX_nonConst = " - << typeid(AtInputScalarTypeX_nonConst).name() << "\n" + << "isComplex = " << Kokkos::ArithTraits::is_complex << "\n" + << "AtInputScalarTypeX_nonConst = " << typeid(AtInputScalarTypeX_nonConst).name() << "\n" << "InternalTypeX = " << typeid(InternalTypeX).name() << "\n" << "\n" << "BV = " @@ -806,28 +680,20 @@ struct AxpbyUnificationAttemptTraits { //<< ", BV::non_const_data_type = " << // typeid(BV::non_const_data_type).name() << ", AtInputScalarTypeB = " << typeid(AtInputScalarTypeB).name() - << ", isConst = " - << std::is_const_v << ", isComplex = " + << ", isConst = " << std::is_const_v << ", isComplex = " << Kokkos::ArithTraits::is_complex - << ", AtInputScalarTypeB_nonConst = " - << typeid(AtInputScalarTypeB_nonConst).name() + << ", AtInputScalarTypeB_nonConst = " << typeid(AtInputScalarTypeB_nonConst).name() << ", InternalTypeB = " << typeid(InternalTypeB).name() << "\n" - << ", InternalTypeB_managed = " << typeid(InternalTypeB_managed).name() - << "\n" + << ", InternalTypeB_managed = " << typeid(InternalTypeB_managed).name() << "\n" << "\n" << "YMV = " << typeid(YMV).name() << "\n" - << "YMV::value_type = " << typeid(typename YMV::value_type).name() - << "\n" - << "YMV::const_data_type = " - << typeid(typename YMV::const_data_type).name() << "\n" - << "YMV::non_const_data_type = " - << typeid(typename YMV::non_const_data_type).name() << "\n" + << "YMV::value_type = " << typeid(typename YMV::value_type).name() << "\n" + << "YMV::const_data_type = " << typeid(typename YMV::const_data_type).name() << "\n" + << "YMV::non_const_data_type = " << typeid(typename YMV::non_const_data_type).name() << "\n" << "AtInputScalarTypeY = " << typeid(AtInputScalarTypeY).name() << "\n" << "isConst = " << std::is_const_v << "\n" - << "isComplex = " - << Kokkos::ArithTraits::is_complex << "\n" - << "AtInputScalarTypeY_nonConst = " - << typeid(AtInputScalarTypeY_nonConst).name() << "\n" + << "isComplex = " << Kokkos::ArithTraits::is_complex << "\n" + << "AtInputScalarTypeY_nonConst = " << typeid(AtInputScalarTypeY_nonConst).name() << "\n" << "InternalTypeY = " << typeid(InternalTypeY).name() << "\n" << std::endl; } @@ -840,8 +706,7 @@ struct AxpbyUnificationAttemptTraits { template struct getScalarValueFromVariableAtHost { getScalarValueFromVariableAtHost() { - static_assert((rankT == -1) || (rankT == 0) || (rankT == 1), - "Generic struct should not have been invoked!"); + static_assert((rankT == -1) || (rankT == 0) || (rankT == 1), "Generic struct should not have been invoked!"); } }; @@ -879,8 +744,7 @@ template size_t getStrideInCoefficient(T const& coeff) { size_t result = 1; if constexpr (Kokkos::is_view_v) { - if constexpr ((T::rank == 1) && (std::is_same_v)) { + if constexpr ((T::rank == 1) && (std::is_same_v)) { result = coeff.stride_0(); } } @@ -890,8 +754,7 @@ size_t getStrideInCoefficient(T const& coeff) { // -------------------------------- template -static void populateRank1Stride1ViewWithScalarOrNonStrideView( - T_in const& coeff_in, T_out& coeff_out) { +static void populateRank1Stride1ViewWithScalarOrNonStrideView(T_in const& coeff_in, T_out& coeff_out) { // *********************************************************************** // 'coeff_out' is assumed to be rank-1, of LayoutLeft or LayoutRight // @@ -899,8 +762,7 @@ static void populateRank1Stride1ViewWithScalarOrNonStrideView( // - a coeff_in that deals with 'double', and // - a coeff_out deals with 'complex' // *********************************************************************** - using ScalarOutType = - typename std::remove_const::type; + using ScalarOutType = typename std::remove_const::type; if constexpr (!Kokkos::is_view_v) { // ********************************************************************* @@ -924,17 +786,13 @@ static void populateRank1Stride1ViewWithScalarOrNonStrideView( std::ostringstream msg; msg << "In populateRank1Stride1ViewWithScalarOrNonStrideView()" << ": 'in' and 'out' should have the same extent(0)" - << ", T_in = " << typeid(T_in).name() - << ", coeff_in.label() = " << coeff_in.label() - << ", coeff_in.extent(0) = " << coeff_in.extent(0) - << ", T_out = " << typeid(T_out).name() - << ", coeff_out.label() = " << coeff_out.label() - << ", coeff_out.extent(0) = " << coeff_out.extent(0); + << ", T_in = " << typeid(T_in).name() << ", coeff_in.label() = " << coeff_in.label() + << ", coeff_in.extent(0) = " << coeff_in.extent(0) << ", T_out = " << typeid(T_out).name() + << ", coeff_out.label() = " << coeff_out.label() << ", coeff_out.extent(0) = " << coeff_out.extent(0); KokkosKernels::Impl::throw_runtime_exception(msg.str()); } - using ScalarInType = - typename std::remove_const::type; + using ScalarInType = typename std::remove_const::type; if constexpr (std::is_same_v) { coeff_out = coeff_in; } else if (coeff_out.extent(0) == 1) { @@ -946,14 +804,10 @@ static void populateRank1Stride1ViewWithScalarOrNonStrideView( std::ostringstream msg; msg << "In populateRank1Stride1ViewWithScalarOrNonStrideView()" << ": scalar types 'in' and 'out' should be the same" - << ", T_in = " << typeid(T_in).name() - << ", ScalarInType = " << typeid(ScalarInType).name() - << ", coeff_in.label() = " << coeff_in.label() - << ", coeff_in.extent(0) = " << coeff_in.extent(0) - << ", T_out = " << typeid(T_out).name() - << ", ScalarOutType = " << typeid(ScalarOutType).name() - << ", coeff_out.label() = " << coeff_out.label() - << ", coeff_out.extent(0) = " << coeff_out.extent(0); + << ", T_in = " << typeid(T_in).name() << ", ScalarInType = " << typeid(ScalarInType).name() + << ", coeff_in.label() = " << coeff_in.label() << ", coeff_in.extent(0) = " << coeff_in.extent(0) + << ", T_out = " << typeid(T_out).name() << ", ScalarOutType = " << typeid(ScalarOutType).name() + << ", coeff_out.label() = " << coeff_out.label() << ", coeff_out.extent(0) = " << coeff_out.extent(0); KokkosKernels::Impl::throw_runtime_exception(msg.str()); } } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_dot_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_dot_impl.hpp index 2003f7cc2c63..61e7307bc82a 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_dot_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_dot_impl.hpp @@ -30,8 +30,7 @@ namespace Impl { /// \tparam YVector Type of the second vector y; 1-D View /// \tparam SizeType Type of the row index used in the dot product. /// For best performance, use int instead of size_t here. -template +template struct DotFunctor { typedef SizeType size_type; typedef typename AV::non_const_value_type avalue_type; @@ -44,26 +43,19 @@ struct DotFunctor { DotFunctor(const XVector& x, const YVector& y) : m_x(x), m_y(y) {} void run(const char* label, const execution_space& space, AV result) { - Kokkos::RangePolicy policy(space, 0, - m_x.extent(0)); + Kokkos::RangePolicy policy(space, 0, m_x.extent(0)); Kokkos::parallel_reduce(label, policy, *this, result); } // Prefer const size_type& to const size_type or size_type, // since the compiler has an easier time inlining the former. - KOKKOS_FORCEINLINE_FUNCTION void operator()(const size_type& i, - value_type& sum) const { + KOKKOS_FORCEINLINE_FUNCTION void operator()(const size_type& i, value_type& sum) const { Kokkos::Details::updateDot(sum, m_x(i), m_y(i)); // sum += m_x(i) * m_y(i) } - KOKKOS_INLINE_FUNCTION void init(value_type& update) const { - update = Kokkos::ArithTraits::zero(); - } + KOKKOS_INLINE_FUNCTION void init(value_type& update) const { update = Kokkos::ArithTraits::zero(); } - KOKKOS_INLINE_FUNCTION void join(value_type& update, - const value_type& source) const { - update += source; - } + KOKKOS_INLINE_FUNCTION void join(value_type& update, const value_type& source) const { update += source; } }; } // namespace Impl diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_dot_mv_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_dot_mv_impl.hpp index d19e512599ec..15db366cebc0 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_dot_mv_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_dot_mv_impl.hpp @@ -27,9 +27,8 @@ namespace Impl { template struct Dot_MV_Functor { - using Scalar = typename RV::non_const_value_type; - using IPT = Kokkos::Details::InnerProductSpaceTraits< - typename XV::non_const_value_type>; + using Scalar = typename RV::non_const_value_type; + using IPT = Kokkos::Details::InnerProductSpaceTraits; using dot_type = typename IPT::dot_type; using KAT = Kokkos::ArithTraits; @@ -39,8 +38,7 @@ struct Dot_MV_Functor { XV x; YV y; - size_type - teamsPerDot; // number of teams collectively performing a dot product + size_type teamsPerDot; // number of teams collectively performing a dot product Dot_MV_Functor(const RV& r_, const XV& x_, const YV& y_, int teamsPerDot_) : r(r_), x(x_), y(y_), teamsPerDot(teamsPerDot_) {} @@ -60,13 +58,11 @@ struct Dot_MV_Functor { Kokkos::parallel_reduce( Kokkos::TeamThreadRange(t, begin, end), [&](size_type k, dot_type& update) { - Kokkos::Details::updateDot(update, x.access(k, xcol), - y.access(k, ycol)); + Kokkos::Details::updateDot(update, x.access(k, xcol), y.access(k, ycol)); }, localResult); - Kokkos::single(Kokkos::PerTeam(t), - [&]() { Kokkos::atomic_add(&r(i), Scalar(localResult)); }); + Kokkos::single(Kokkos::PerTeam(t), [&]() { Kokkos::atomic_add(&r(i), Scalar(localResult)); }); } }; @@ -75,14 +71,12 @@ struct Dot_MV_Functor { template void MV_Dot_Invoke( const execution_space& space, const RV& r, const XV& x, const YV& y, - typename std::enable_if::accessible>::type* = + typename std::enable_if::accessible>::type* = nullptr) { size_type numDots = std::max(x.extent(1), y.extent(1)); if (x.extent(0) != y.extent(0)) { std::ostringstream oss; - oss << "KokkosBlas::dot (rank-2): x and y have different lengths (" - << x.extent(0) << " and " << y.extent(0) << ")"; + oss << "KokkosBlas::dot (rank-2): x and y have different lengths (" << x.extent(0) << " and " << y.extent(0) << ")"; throw std::runtime_error(oss.str()); } if ((x.extent(1) != size_t(1) && x.extent(1) != size_t(numDots)) || @@ -95,23 +89,17 @@ void MV_Dot_Invoke( } if (r.extent(0) != size_t(numDots)) { std::ostringstream oss; - oss << "KokkosBlas::dot (rank-2): result vector has wrong length (" - << r.extent(0) << ", but " << numDots + oss << "KokkosBlas::dot (rank-2): result vector has wrong length (" << r.extent(0) << ", but " << numDots << " dot products will be computed)"; throw std::runtime_error(oss.str()); } // Zero out the result vector - Kokkos::deep_copy( - space, r, Kokkos::ArithTraits::zero()); + Kokkos::deep_copy(space, r, Kokkos::ArithTraits::zero()); size_type teamsPerDot; - KokkosBlas::Impl::multipleReductionWorkDistribution( - x.extent(0), numDots, teamsPerDot); + KokkosBlas::Impl::multipleReductionWorkDistribution(x.extent(0), numDots, teamsPerDot); size_type numTeams = numDots * teamsPerDot; Kokkos::TeamPolicy pol(space, numTeams, Kokkos::AUTO); - Kokkos::parallel_for("Dot_MV", pol, - Dot_MV_Functor( - r, x, y, teamsPerDot)); + Kokkos::parallel_for("Dot_MV", pol, Dot_MV_Functor(r, x, y, teamsPerDot)); } // Version for when a temporary result view is needed (implemented in terms of @@ -119,15 +107,11 @@ void MV_Dot_Invoke( template void MV_Dot_Invoke( const execution_space& space, const RV& r, const XV& x, const YV& y, - typename std::enable_if::accessible>::type* = - nullptr) { - Kokkos::View - tempResult( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Dot_MV temp result"), - r.extent(0)); - MV_Dot_Invoke( - space, tempResult, x, y); + typename std::enable_if< + !Kokkos::SpaceAccessibility::accessible>::type* = nullptr) { + Kokkos::View tempResult( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Dot_MV temp result"), r.extent(0)); + MV_Dot_Invoke(space, tempResult, x, y); Kokkos::deep_copy(space, r, tempResult); space.fence(); } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_dot_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_dot_spec.hpp index 02efee6bc545..982e2eaa0c7b 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_dot_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_dot_spec.hpp @@ -54,15 +54,11 @@ struct DotAccumulatingScalar> { template struct HasSpecialAccumulator { - enum : bool { - value = !std::is_same::type>::value - }; + enum : bool { value = !std::is_same::type>::value }; }; // Specialization struct which defines whether a specialization exists -template +template struct dot_eti_spec_avail { enum : bool { value = false }; }; @@ -75,34 +71,27 @@ struct dot_eti_spec_avail { // the declarations of full specializations go in this header file. // We may spread out definitions (see _INST macro below) across one or // more .cpp files. -#define KOKKOSBLAS1_DOT_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct dot_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 1, 1> { \ - enum : bool { value = true }; \ - }; \ - template <> \ - struct dot_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 1, 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_DOT_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct dot_eti_spec_avail>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 1, 1> { \ + enum : bool { value = true }; \ + }; \ + template <> \ + struct dot_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 1, 1> { \ + enum : bool { value = true }; \ }; // @@ -112,55 +101,42 @@ struct dot_eti_spec_avail { // We may spread out definitions (see _DEF macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_DOT_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct dot_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 2, 2> { \ - enum : bool { value = true }; \ - }; \ - template <> \ - struct dot_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 2, 1> { \ - enum : bool { value = true }; \ - }; \ - template <> \ - struct dot_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 1, 2> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_DOT_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct dot_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 2, 2> { \ + enum : bool { value = true }; \ + }; \ + template <> \ + struct dot_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 2, 1> { \ + enum : bool { value = true }; \ + }; \ + template <> \ + struct dot_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 1, 2> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -172,36 +148,28 @@ namespace KokkosBlas { namespace Impl { // Unification layer -template ::value, - bool eti_spec_avail = - dot_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = dot_eti_spec_avail::value> struct Dot { - static void dot(const execution_space& space, const RV&, const XV& R, - const YV& X); + static void dot(const execution_space& space, const RV&, const XV& R, const YV& X); }; // This version never has TPL support, but it does use the same ETI system template ::value> + bool eti_spec_avail = dot_eti_spec_avail::value> struct DotSpecialAccumulator { // Note: not doing the static_asserts to validate RV, XV, YV since those // errors would have already arisen when building the library. - using size_type = typename YV::size_type; - using dot_type = typename Kokkos::Details::InnerProductSpaceTraits< - typename XV::non_const_value_type>::dot_type; + using size_type = typename YV::size_type; + using dot_type = typename Kokkos::Details::InnerProductSpaceTraits::dot_type; using accum_type = typename DotAccumulatingScalar::type; // This is the same View type as RV, but using the special accumulator as the // value type - using RV_Result = Kokkos::View>; - static void dot(const execution_space& space, const RV_Result& R, const XV& X, - const YV& Y); + static void dot(const execution_space& space, const RV_Result& R, const XV& X, const YV& Y); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY @@ -209,8 +177,7 @@ struct DotSpecialAccumulator { // The rank-1 case is currently the only one that may use a different // accumulator type than InnerProductSpaceTraits::dot_type. template -struct Dot { +struct Dot { // Check some things about the template parameters at compile time to get nice // error messages, before using them under the assumption they are valid. static_assert(Kokkos::is_view::value, @@ -231,8 +198,7 @@ struct Dot: " "YV is not rank 1."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Dot<1D>: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -243,23 +209,18 @@ struct Dot> RV_Result; - static void dot(const execution_space& space, const RV& R, const XV& X, - const YV& Y) { - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::dot[ETI]" - : "KokkosBlas::dot[noETI]"); + static void dot(const execution_space& space, const RV& R, const XV& X, const YV& Y) { + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::dot[ETI]" + : "KokkosBlas::dot[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas::dot<> ETI specialization for < %s , %s >\n", - typeid(XV).name(), typeid(YV).name()); + printf("KokkosBlas::dot<> ETI specialization for < %s , %s >\n", typeid(XV).name(), typeid(YV).name()); else { - printf("KokkosBlas::dot<> non-ETI specialization for < %s , %s >\n", - typeid(XV).name(), typeid(YV).name()); + printf("KokkosBlas::dot<> non-ETI specialization for < %s , %s >\n", typeid(XV).name(), typeid(YV).name()); } #endif const size_type numElems = X.extent(0); @@ -282,8 +243,7 @@ struct Dot -struct DotSpecialAccumulator { +struct DotSpecialAccumulator { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "DotSpecialAccumulator: XV is not a Kokkos::View."); @@ -299,38 +259,30 @@ struct DotSpecialAccumulator::value, "KokkosBlas::Impl::" "DotSpecialAccumulator: RV is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::DotSpecialAccumulator: X and Y have " "different scalar types."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Dot<1D>: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); - using size_type = typename YV::size_type; - using dot_type = typename Kokkos::Details::InnerProductSpaceTraits< - typename XV::non_const_value_type>::dot_type; + using size_type = typename YV::size_type; + using dot_type = typename Kokkos::Details::InnerProductSpaceTraits::dot_type; using accum_type = typename DotAccumulatingScalar::type; // This is the same View type as RV, but using the special accumulator as the // value type - using RV_Result = Kokkos::View>; - static void dot(const execution_space& space, const RV_Result& R, const XV& X, - const YV& Y) { - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::dot[ETI]" - : "KokkosBlas::dot[noETI]"); + static void dot(const execution_space& space, const RV_Result& R, const XV& X, const YV& Y) { + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::dot[ETI]" + : "KokkosBlas::dot[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas::dot<> ETI specialization for < %s , %s >\n", - typeid(XV).name(), typeid(YV).name()); + printf("KokkosBlas::dot<> ETI specialization for < %s , %s >\n", typeid(XV).name(), typeid(YV).name()); else { - printf("KokkosBlas::dot<> non-ETI specialization for < %s , %s >\n", - typeid(XV).name(), typeid(YV).name()); + printf("KokkosBlas::dot<> non-ETI specialization for < %s , %s >\n", typeid(XV).name(), typeid(YV).name()); } #endif const size_type numElems = X.extent(0); @@ -348,10 +300,8 @@ struct DotSpecialAccumulator -struct Dot { +template +struct Dot { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Dot<2-D>: XV is not a Kokkos::View."); @@ -367,29 +317,25 @@ struct Dot - static auto getFirstColumn( - const V& v, typename std::enable_if::type* = nullptr) { + static auto getFirstColumn(const V& v, typename std::enable_if::type* = nullptr) { return Kokkos::subview(v, Kokkos::ALL(), 0); } template - static V getFirstColumn( - const V& v, typename std::enable_if::type* = nullptr) { + static V getFirstColumn(const V& v, typename std::enable_if::type* = nullptr) { return v; } - static void dot(const execution_space& space, const RV& R, const XV& X, - const YV& Y) { - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::dot[ETI]" - : "KokkosBlas::dot[noETI]"); + static void dot(const execution_space& space, const RV& R, const XV& X, const YV& Y) { + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::dot[ETI]" + : "KokkosBlas::dot[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::dot<> ETI specialization for < %s , %s , %s >\n", - typeid(RV).name(), typeid(XV).name(), typeid(YV).name()); + printf("KokkosBlas1::dot<> ETI specialization for < %s , %s , %s >\n", typeid(RV).name(), typeid(XV).name(), + typeid(YV).name()); else { - printf("KokkosBlas1::dot<> non-ETI specialization for < %s , %s , %s >\n", - typeid(RV).name(), typeid(XV).name(), typeid(YV).name()); + printf("KokkosBlas1::dot<> non-ETI specialization for < %s , %s , %s >\n", typeid(RV).name(), typeid(XV).name(), + typeid(YV).name()); } #endif @@ -401,20 +347,15 @@ struct Dot(INT_MAX)) { typedef int index_type; - DotFunctor - f(X0, Y0); + DotFunctor f(X0, Y0); f.run("KokkosBlas::dot<1D>", space, R0); } else { typedef int64_t index_type; - DotFunctor - f(X0, Y0); + DotFunctor f(X0, Y0); f.run("KokkosBlas::dot<1D>", space, R0); } } else { - if (numRows < static_cast(INT_MAX) && - numRows * numDots < static_cast(INT_MAX)) { + if (numRows < static_cast(INT_MAX) && numRows * numDots < static_cast(INT_MAX)) { typedef int index_type; MV_Dot_Invoke(space, R, X, Y); } else { @@ -437,95 +378,68 @@ struct Dot>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 1, 1, false, true>; \ - extern template struct Dot< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 1, 1, false, true>; \ - extern template struct DotSpecialAccumulator< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true>; \ - extern template struct DotSpecialAccumulator< \ - EXEC_SPACE, \ - Kokkos::View>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSBLAS1_DOT_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Dot>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 1, 1, false, true>; \ + extern template struct Dot< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 1, 1, false, true>; \ + extern template struct DotSpecialAccumulator< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true>; \ + extern template struct DotSpecialAccumulator< \ + EXEC_SPACE, Kokkos::View>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ true>; -#define KOKKOSBLAS1_DOT_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template struct Dot>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 1, 1, false, true>; \ - template struct Dot< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 1, 1, false, true>; \ - template struct DotSpecialAccumulator< \ - EXEC_SPACE, \ - Kokkos::View>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true>; \ - template struct DotSpecialAccumulator< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSBLAS1_DOT_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Dot>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 1, 1, false, true>; \ + template struct Dot< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 1, 1, false, true>; \ + template struct DotSpecialAccumulator< \ + EXEC_SPACE, Kokkos::View>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true>; \ + template struct DotSpecialAccumulator< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ true>; // @@ -534,88 +448,62 @@ struct Dot, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 2, 2, false, true>; \ - extern template struct Dot< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 2, 1, false, true>; \ - extern template struct Dot< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSBLAS1_DOT_MV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Dot< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 2, 2, false, true>; \ + extern template struct Dot< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 2, 1, false, true>; \ + extern template struct Dot< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ 1, 2, false, true>; -#define KOKKOSBLAS1_DOT_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template struct Dot< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 2, 2, false, true>; \ - template struct Dot< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 2, 1, false, true>; \ - template struct Dot< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSBLAS1_DOT_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Dot< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 2, 2, false, true>; \ + template struct Dot< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 2, 1, false, true>; \ + template struct Dot< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ 1, 2, false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_iamax_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_iamax_impl.hpp index 4c7a3fcc0cb5..bef00fad8c7e 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_iamax_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_iamax_impl.hpp @@ -29,8 +29,7 @@ namespace Impl { /// \tparam XV 1-D input View /// \tparam MagType Magnitude type /// \tparam SizeType Index type. Use int (32 bits) if possible. -template +template struct V_Iamax_Functor { using size_type = SizeType; using mag_type = MagType; @@ -47,8 +46,7 @@ struct V_Iamax_Functor { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::V_Iamax_Functor: " "X is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::V_Iamax_Functor: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -57,8 +55,7 @@ struct V_Iamax_Functor { "RV must have rank 0 and XV must have rank 1."); } - KOKKOS_INLINE_FUNCTION void operator()(const size_type i, - value_type& lmaxloc) const { + KOKKOS_INLINE_FUNCTION void operator()(const size_type i, value_type& lmaxloc) const { mag_type val = IPT::norm(m_x(i - 1)); mag_type maxval = IPT::norm(m_x(lmaxloc - 1)); if (val > maxval) lmaxloc = i; @@ -68,8 +65,7 @@ struct V_Iamax_Functor { update = Kokkos::reduction_identity::max() + 1; } - KOKKOS_INLINE_FUNCTION void join(value_type& update, - const value_type& source) const { + KOKKOS_INLINE_FUNCTION void join(value_type& update, const value_type& source) const { mag_type source_val = IPT::norm(m_x(source - 1)); mag_type update_val = IPT::norm(m_x(update - 1)); if (update_val < source_val) update = source; @@ -107,8 +103,7 @@ void MV_Iamax_Invoke(const execution_space& space, const RV& r, const XMV& X) { for (size_t i = 0; i < X.extent(1); i++) { auto ri = Kokkos::subview(r, i); auto Xi = Kokkos::subview(X, Kokkos::ALL(), i); - V_Iamax_Invoke( - space, ri, Xi); + V_Iamax_Invoke(space, ri, Xi); } } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_iamax_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_iamax_spec.hpp index 341b949050f5..80e4cb603643 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_iamax_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_iamax_spec.hpp @@ -43,39 +43,29 @@ struct iamax_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_IAMAX_ETI_SPEC_AVAIL_INDEX(INDEX_TYPE, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct iamax_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ - }; \ - template <> \ - struct iamax_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_IAMAX_ETI_SPEC_AVAIL_INDEX(INDEX_TYPE, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct iamax_eti_spec_avail< \ + EXEC_SPACE, Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ + }; \ + template <> \ + struct iamax_eti_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -#define KOKKOSBLAS1_IAMAX_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_ETI_SPEC_AVAIL_INDEX(unsigned long, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_ETI_SPEC_AVAIL_INDEX(unsigned int, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_ETI_SPEC_AVAIL_INDEX(int, SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) +#define KOKKOSBLAS1_IAMAX_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_ETI_SPEC_AVAIL_INDEX(unsigned long, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_ETI_SPEC_AVAIL_INDEX(unsigned int, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_ETI_SPEC_AVAIL_INDEX(int, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) // // Macro for declaration of full specialization availability @@ -84,39 +74,29 @@ struct iamax_eti_spec_avail { // We may spread out definitions (see _DEF macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_AVAIL_INDEX_HOST( \ - INDEX_TYPE, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct iamax_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ - }; \ - template <> \ - struct iamax_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_AVAIL_INDEX_HOST(INDEX_TYPE, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct iamax_eti_spec_avail< \ + EXEC_SPACE, Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ + }; \ + template <> \ + struct iamax_eti_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ }; -#define KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_AVAIL_INDEX_HOST( \ - unsigned long, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_AVAIL_INDEX_HOST(unsigned int, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_AVAIL_INDEX_HOST(int, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_AVAIL_INDEX_HOST(unsigned long, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_AVAIL_INDEX_HOST(unsigned int, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_AVAIL_INDEX_HOST(int, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) // Include the actual specialization declarations #include @@ -128,10 +108,8 @@ namespace Impl { // Unification layer template ::value, - bool eti_spec_avail = - iamax_eti_spec_avail::value> + bool tpl_spec_avail = iamax_tpl_spec_avail::value, + bool eti_spec_avail = iamax_eti_spec_avail::value> struct Iamax { static void iamax(const execution_space& space, const RMV& R, const XMV& X); }; @@ -139,8 +117,7 @@ struct Iamax { #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Iamax for single vectors (1-D Views). template -struct Iamax { +struct Iamax { typedef typename XMV::size_type size_type; static void iamax(const execution_space& space, const RMV& R, const XMV& X) { @@ -156,16 +133,13 @@ struct Iamax: " "XMV is not rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::iamax[ETI]" - : "KokkosBlas::iamax[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::iamax[ETI]" + : "KokkosBlas::iamax[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::iamax<> ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::iamax<> ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::iamax<> non-ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::iamax<> non-ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); } #endif const size_type numRows = X.extent(0); @@ -181,8 +155,7 @@ struct Iamax -struct Iamax { +struct Iamax { typedef typename XMV::size_type size_type; static void iamax(const execution_space& space, const RV& R, const XMV& X) { @@ -198,23 +171,19 @@ struct Iamax: " "XMV is not rank 2."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::iamax[ETI]" - : "KokkosBlas::iamax[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::iamax[ETI]" + : "KokkosBlas::iamax[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::iamax<> ETI specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XMV).name()); + printf("KokkosBlas1::iamax<> ETI specialization for < %s , %s >\n", typeid(RV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::iamax<> non-ETI specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XMV).name()); + printf("KokkosBlas1::iamax<> non-ETI specialization for < %s , %s >\n", typeid(RV).name(), typeid(XMV).name()); } #endif const size_type numRows = X.extent(0); const size_type numCols = X.extent(1); - if (numRows < static_cast(INT_MAX) && - numRows * numCols < static_cast(INT_MAX)) { + if (numRows < static_cast(INT_MAX) && numRows * numCols < static_cast(INT_MAX)) { MV_Iamax_Invoke(space, R, X); } else { typedef std::int64_t index_type; @@ -235,64 +204,46 @@ struct Iamax >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, false, true>; \ - extern template struct Iamax< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, false, true>; +#define KOKKOSBLAS1_IAMAX_ETI_SPEC_DECL_INDEX(INDEX_TYPE, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Iamax< \ + EXEC_SPACE, Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, false, true>; \ + extern template struct Iamax, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, false, true>; -#define KOKKOSBLAS1_IAMAX_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_ETI_SPEC_DECL_INDEX(unsigned long, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_ETI_SPEC_DECL_INDEX(unsigned int, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_ETI_SPEC_DECL_INDEX(int, SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) +#define KOKKOSBLAS1_IAMAX_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_ETI_SPEC_DECL_INDEX(unsigned long, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_ETI_SPEC_DECL_INDEX(unsigned int, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_ETI_SPEC_DECL_INDEX(int, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) // // Macro for definition of full specialization of // KokkosBlas::Impl::Iamax for rank == 1. This is NOT for users!!! We // use this macro in one or more .cpp files in this directory. // -#define KOKKOSBLAS1_IAMAX_ETI_SPEC_INST_INDEX(INDEX_TYPE, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - template struct Iamax< \ - EXEC_SPACE, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, false, true>; \ - template struct Iamax< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, false, true>; +#define KOKKOSBLAS1_IAMAX_ETI_SPEC_INST_INDEX(INDEX_TYPE, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Iamax >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, false, true>; \ + template struct Iamax, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, false, true>; -#define KOKKOSBLAS1_IAMAX_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_ETI_SPEC_INST_INDEX(unsigned long, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_ETI_SPEC_INST_INDEX(unsigned int, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_ETI_SPEC_INST_INDEX(int, SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) +#define KOKKOSBLAS1_IAMAX_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_ETI_SPEC_INST_INDEX(unsigned long, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_ETI_SPEC_INST_INDEX(unsigned int, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_ETI_SPEC_INST_INDEX(int, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) // // Macro for declaration of full specialization of @@ -301,66 +252,46 @@ struct Iamax >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2, false, true>; \ - extern template struct Iamax< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2, false, true>; +#define KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_DECL_INDEX(INDEX_TYPE, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Iamax< \ + EXEC_SPACE, Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2, false, true>; \ + extern template struct Iamax, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2, false, true>; -#define KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_DECL_INDEX(unsigned long, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_DECL_INDEX(unsigned int, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_DECL_INDEX(int, SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) +#define KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_DECL_INDEX(unsigned long, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_DECL_INDEX(unsigned int, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_DECL_INDEX(int, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) // // Macro for definition of full specialization of // KokkosBlas::Impl::Iamax for rank == 2. This is NOT for users!!! We // use this macro in one or more .cpp files in this directory. // -#define KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_INST_INDEX(INDEX_TYPE, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - template struct Iamax< \ - EXEC_SPACE, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2, false, true>; \ - template struct Iamax< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2, false, true>; +#define KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_INST_INDEX(INDEX_TYPE, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Iamax< \ + EXEC_SPACE, Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2, false, true>; \ + template struct Iamax, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2, false, true>; -#define KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_INST_INDEX(unsigned long, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_INST_INDEX(unsigned int, SCALAR, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_INST_INDEX(int, SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) +#define KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_INST_INDEX(unsigned long, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_INST_INDEX(unsigned int, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_INST_INDEX(int, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_mult_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_mult_impl.hpp index 048db395b095..3584240e7073 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_mult_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_mult_impl.hpp @@ -34,8 +34,7 @@ namespace Impl { /// /// C(i,j) = c * C(i,j) + ab * A(i) * B(i,j), subject to the usual /// BLAS update rules. -template +template struct MV_MultFunctor { typedef SizeType size_type; typedef Kokkos::ArithTraits ATS; @@ -47,8 +46,8 @@ struct MV_MultFunctor { AV m_A; BMV m_B; - MV_MultFunctor(typename CMV::const_value_type& c, const CMV& C, - typename AV::const_value_type& ab, const AV& A, const BMV& B) + MV_MultFunctor(typename CMV::const_value_type& c, const CMV& C, typename AV::const_value_type& ab, const AV& A, + const BMV& B) : m_n(C.extent(1)), m_c(c), m_C(C), m_ab(ab), m_A(A), m_B(B) {} KOKKOS_INLINE_FUNCTION void operator()(const size_type& i) const { @@ -101,8 +100,7 @@ struct MV_MultFunctor { /// /// C(i) = c * C(i) + ab * A(i) * B(i), subject to the usual /// BLAS update rules. -template +template struct V_MultFunctor { typedef SizeType size_type; typedef Kokkos::ArithTraits ATS; @@ -113,8 +111,8 @@ struct V_MultFunctor { AV m_A; BV m_B; - V_MultFunctor(typename CV::const_value_type& c, const CV& C, - typename AV::const_value_type& ab, const AV& A, const BV& B) + V_MultFunctor(typename CV::const_value_type& c, const CV& C, typename AV::const_value_type& ab, const AV& A, + const BV& B) : m_c(c), m_C(C), m_ab(ab), m_A(A), m_B(B) {} KOKKOS_INLINE_FUNCTION void operator()(const size_type& i) const { @@ -145,10 +143,8 @@ struct V_MultFunctor { /// C(i) = c * C(i) + ab * A(i) * B(i), subject to the usual BLAS /// update rules. template -void V_Mult_Generic(const execution_space& space, - typename CV::const_value_type& c, const CV& C, - typename AV::const_value_type& ab, const AV& A, - const BV& B) { +void V_Mult_Generic(const execution_space& space, typename CV::const_value_type& c, const CV& C, + typename AV::const_value_type& ab, const AV& A, const BV& B) { using Kokkos::ALL; using Kokkos::subview; typedef Kokkos::ArithTraits ATA; @@ -192,10 +188,8 @@ void V_Mult_Generic(const execution_space& space, /// C(i,j) = c * C(i,j) + ab * A(i) * B(i,j), subject to the usual /// BLAS update rules. template -void MV_Mult_Generic(const execution_space& space, - typename CMV::const_value_type& c, const CMV& C, - typename AV::const_value_type& ab, const AV& A, - const BMV& B) { +void MV_Mult_Generic(const execution_space& space, typename CMV::const_value_type& c, const CMV& C, + typename AV::const_value_type& ab, const AV& A, const BMV& B) { typedef Kokkos::ArithTraits ATA; typedef Kokkos::ArithTraits ATC; @@ -205,8 +199,7 @@ void MV_Mult_Generic(const execution_space& space, typedef decltype(C_0) CV; typedef decltype(B_0) BV; - V_Mult_Generic(space, c, C_0, ab, A, - B_0); + V_Mult_Generic(space, c, C_0, ab, A, B_0); return; } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_mult_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_mult_spec.hpp index c81e00a6b03b..3cd847dc1de6 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_mult_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_mult_spec.hpp @@ -27,8 +27,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct mult_eti_spec_avail { enum : bool { value = false }; }; @@ -42,20 +41,17 @@ struct mult_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_MULT_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct mult_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_MULT_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct mult_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; // @@ -65,21 +61,17 @@ struct mult_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_MULT_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct mult_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_MULT_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct mult_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -101,32 +93,24 @@ namespace Impl { /// Y(i,j) = alpha*A(i,j)*X(i,j) + gamma*Y(i,j) /// /// with special cases for alpha, or gamma = 0. -template ::value, - bool eti_spec_avail = - mult_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = mult_eti_spec_avail::value> struct Mult { - static void mult(const execution_space& space, - const typename YMV::non_const_value_type& gamma, - const YMV& Y, - const typename XMV::non_const_value_type& alpha, const AV& A, - const XMV& X); + static void mult(const execution_space& space, const typename YMV::non_const_value_type& gamma, const YMV& Y, + const typename XMV::non_const_value_type& alpha, const AV& A, const XMV& X); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY // Partial specialization for YMV, AV, and XMV rank-2 Views. template -struct Mult { +struct Mult { typedef typename YMV::size_type size_type; typedef typename YMV::non_const_value_type YMV_scalar; typedef typename XMV::non_const_value_type XMV_scalar; - static void mult(const execution_space& space, const YMV_scalar& gamma, - const YMV& Y, const XMV_scalar& alpha, const AV& A, - const XMV& X) { + static void mult(const execution_space& space, const YMV_scalar& gamma, const YMV& Y, const XMV_scalar& alpha, + const AV& A, const XMV& X) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Mult::mult: Y is not a Kokkos::View."); @@ -136,8 +120,7 @@ struct Mult::value, "KokkosBlas::Impl::" "Mult::mult: X is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Mult::mult: Y is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -149,31 +132,26 @@ struct Mult::mult: " "AV must have rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::mult[ETI]" - : "KokkosBlas::mult[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::mult[ETI]" + : "KokkosBlas::mult[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::mult<> ETI specialization for < %s , %s , %s >\n", - typeid(YMV).name(), typeid(AV).name(), typeid(XMV).name()); + printf("KokkosBlas1::mult<> ETI specialization for < %s , %s , %s >\n", typeid(YMV).name(), typeid(AV).name(), + typeid(XMV).name()); else { - printf( - "KokkosBlas1::mult<> non-ETI specialization for < %s , %s , %s >\n", - typeid(YMV).name(), typeid(AV).name(), typeid(XMV).name()); + printf("KokkosBlas1::mult<> non-ETI specialization for < %s , %s , %s >\n", typeid(YMV).name(), typeid(AV).name(), + typeid(XMV).name()); } #endif const size_type numRows = X.extent(0); const size_type numCols = X.extent(1); - if (numRows < static_cast(INT_MAX) && - numRows * numCols < static_cast(INT_MAX)) { - MV_Mult_Generic(space, gamma, Y, - alpha, A, X); + if (numRows < static_cast(INT_MAX) && numRows * numCols < static_cast(INT_MAX)) { + MV_Mult_Generic(space, gamma, Y, alpha, A, X); } else { - MV_Mult_Generic(space, gamma, Y, - alpha, A, X); + MV_Mult_Generic(space, gamma, Y, alpha, A, X); } Kokkos::Profiling::popRegion(); } @@ -181,15 +159,13 @@ struct Mult -struct Mult { +struct Mult { typedef typename YV::size_type size_type; typedef typename YV::non_const_value_type YV_scalar; typedef typename XV::non_const_value_type XV_scalar; - static void mult(const execution_space& space, const YV_scalar& gamma, - const YV& Y, const XV_scalar& alpha, const AV& A, - const XV& X) { + static void mult(const execution_space& space, const YV_scalar& gamma, const YV& Y, const XV_scalar& alpha, + const AV& A, const XV& X) { // YV, AV, and XV must be Kokkos::View specializations. static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" @@ -201,35 +177,30 @@ struct Mult::mult: X is not a Kokkos::View."); // XV must be nonconst (else it can't be an output argument). - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Mult::mult: Y is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); static_assert((int)XV::rank == (int)YV::rank && (int)AV::rank == 1, "KokkosBlas::Impl::Mult::mult: " "X, Y, and Z must have rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::mult[ETI]" - : "KokkosBlas::mult[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::mult[ETI]" + : "KokkosBlas::mult[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::mult<> ETI specialization for < %s , %s , %s >\n", - typeid(YV).name(), typeid(AV).name(), typeid(XV).name()); + printf("KokkosBlas1::mult<> ETI specialization for < %s , %s , %s >\n", typeid(YV).name(), typeid(AV).name(), + typeid(XV).name()); else { - printf( - "KokkosBlas1::mult<> non-ETI specialization for < %s , %s , %s >\n", - typeid(YV).name(), typeid(AV).name(), typeid(XV).name()); + printf("KokkosBlas1::mult<> non-ETI specialization for < %s , %s , %s >\n", typeid(YV).name(), typeid(AV).name(), + typeid(XV).name()); } #endif const size_type numRows = Y.extent(0); if (numRows < static_cast(INT_MAX)) { - V_Mult_Generic(space, gamma, Y, alpha, - A, X); + V_Mult_Generic(space, gamma, Y, alpha, A, X); } else { - V_Mult_Generic(space, gamma, Y, - alpha, A, X); + V_Mult_Generic(space, gamma, Y, alpha, A, X); } Kokkos::Profiling::popRegion(); } @@ -248,30 +219,24 @@ struct Mult, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_MULT_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Mult< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 1, false, true>; -#define KOKKOSBLAS1_MULT_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template struct Mult< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_MULT_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Mult< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 1, false, true>; // @@ -282,32 +247,24 @@ struct Mult, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_MULT_MV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Mult< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 2, false, true>; -#define KOKKOSBLAS1_MULT_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template struct Mult< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_MULT_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Mult< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 2, false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm1_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm1_impl.hpp index a88c01023ec6..8ba857c9e9c8 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm1_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm1_impl.hpp @@ -50,8 +50,7 @@ struct V_Nrm1_Functor { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::V_Nrm1_Functor: " "X is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::V_Nrm1_Functor: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -80,11 +79,9 @@ struct Nrm1_MV_Functor { RV r; XV x; - size_type - teamsPerVec; // number of teams collectively performing a dot product + size_type teamsPerVec; // number of teams collectively performing a dot product - Nrm1_MV_Functor(const RV& r_, const XV& x_, int teamsPerVec_) - : r(r_), x(x_), teamsPerVec(teamsPerVec_) {} + Nrm1_MV_Functor(const RV& r_, const XV& x_, int teamsPerVec_) : r(r_), x(x_), teamsPerVec(teamsPerVec_) {} KOKKOS_INLINE_FUNCTION void operator()(const TeamMem& t) const { @@ -103,9 +100,7 @@ struct Nrm1_MV_Functor { }, localResult); - Kokkos::single(Kokkos::PerTeam(t), [&]() { - Kokkos::atomic_add(&r(i), rvalue_type(localResult)); - }); + Kokkos::single(Kokkos::PerTeam(t), [&]() { Kokkos::atomic_add(&r(i), rvalue_type(localResult)); }); } }; @@ -128,27 +123,23 @@ void V_Nrm1_Invoke(const execution_space& space, const RV& r, const XV& X) { template void MV_Nrm1_Invoke( const execution_space& space, const RV& r, const XV& x, - typename std::enable_if::accessible>::type* = + typename std::enable_if::accessible>::type* = nullptr) { if (r.extent(0) != x.extent(1)) { std::ostringstream oss; - oss << "KokkosBlas::nrm1 (rank-2): result vector has wrong length (" - << r.extent(0) << ", but x has " << x.extent(1) << " columns)"; + oss << "KokkosBlas::nrm1 (rank-2): result vector has wrong length (" << r.extent(0) << ", but x has " << x.extent(1) + << " columns)"; throw std::runtime_error(oss.str()); } // Zero out the result vector - Kokkos::deep_copy( - space, r, Kokkos::ArithTraits::zero()); + Kokkos::deep_copy(space, r, Kokkos::ArithTraits::zero()); size_type teamsPerVec; - KokkosBlas::Impl::multipleReductionWorkDistribution( - x.extent(0), x.extent(1), teamsPerVec); + KokkosBlas::Impl::multipleReductionWorkDistribution(x.extent(0), x.extent(1), + teamsPerVec); size_type numTeams = x.extent(1) * teamsPerVec; Kokkos::TeamPolicy pol(space, numTeams, Kokkos::AUTO); - Kokkos::parallel_for( - "KokkosBlas1::Nrm1::S1", pol, - Nrm1_MV_Functor(r, x, teamsPerVec)); + Kokkos::parallel_for("KokkosBlas1::Nrm1::S1", pol, + Nrm1_MV_Functor(r, x, teamsPerVec)); } // Version for when a temporary result view is needed (implemented in terms of @@ -156,15 +147,11 @@ void MV_Nrm1_Invoke( template void MV_Nrm1_Invoke( const execution_space& space, const RV& r, const XV& x, - typename std::enable_if::accessible>::type* = - nullptr) { - Kokkos::View - tempResult( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Nrm1 temp result"), - r.extent(0)); - MV_Nrm1_Invoke( - space, tempResult, x); + typename std::enable_if< + !Kokkos::SpaceAccessibility::accessible>::type* = nullptr) { + Kokkos::View tempResult( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Nrm1 temp result"), r.extent(0)); + MV_Nrm1_Invoke(space, tempResult, x); Kokkos::deep_copy(space, r, tempResult); // Fence needed to ensure that the deep_copy // above finishes before we exit this function diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm1_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm1_spec.hpp index 24f093c736c0..3977c5225c71 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm1_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm1_spec.hpp @@ -43,19 +43,15 @@ struct nrm1_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_NRM1_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct nrm1_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View< \ - typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_NRM1_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct nrm1_eti_spec_avail::mag_type, LAYOUT, \ + Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; // @@ -65,22 +61,17 @@ struct nrm1_eti_spec_avail { // We may spread out definitions (see _DEF macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_NRM1_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct nrm1_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View::mag_type*, \ - LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_NRM1_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct nrm1_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -92,10 +83,9 @@ namespace KokkosBlas { namespace Impl { // Unification layer -template < - class execution_space, class RMV, class XMV, int rank = XMV::rank, - bool tpl_spec_avail = nrm1_tpl_spec_avail::value, - bool eti_spec_avail = nrm1_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = nrm1_eti_spec_avail::value> struct Nrm1 { static void nrm1(const execution_space& space, const RMV& R, const XMV& X); }; @@ -103,8 +93,7 @@ struct Nrm1 { #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Nrm1 for single vectors (1-D Views). template -struct Nrm1 { +struct Nrm1 { using size_type = typename XMV::size_type; static void nrm1(const execution_space& space, const RMV& R, const XMV& X) { @@ -120,16 +109,13 @@ struct Nrm1: " "XMV is not rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::nrm1[ETI]" - : "KokkosBlas::nrm1[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::nrm1[ETI]" + : "KokkosBlas::nrm1[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::nrm1<> ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrm1<> ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::nrm1<> non-ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrm1<> non-ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); } #endif const size_type numRows = X.extent(0); @@ -145,8 +131,7 @@ struct Nrm1 -struct Nrm1 { +struct Nrm1 { using size_type = typename XMV::size_type; static void nrm1(const execution_space& space, const RV& R, const XMV& X) { @@ -165,32 +150,26 @@ struct Nrm1 ETI specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrm1<> ETI specialization for < %s , %s >\n", typeid(RV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::nrm1<> non-ETI specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrm1<> non-ETI specialization for < %s , %s >\n", typeid(RV).name(), typeid(XMV).name()); } #endif - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::nrm1[ETI]" - : "KokkosBlas::nrm1[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::nrm1[ETI]" + : "KokkosBlas::nrm1[noETI]"); const size_type numRows = X.extent(0); const size_type numCols = X.extent(1); if (numCols == Kokkos::ArithTraits::one()) { auto R0 = Kokkos::subview(R, 0); auto X0 = Kokkos::subview(X, Kokkos::ALL(), 0); if (numRows < static_cast(INT_MAX)) { - V_Nrm1_Invoke(space, - R0, X0); + V_Nrm1_Invoke(space, R0, X0); } else { typedef std::int64_t index_type; - V_Nrm1_Invoke( - space, R0, X0); + V_Nrm1_Invoke(space, R0, X0); } } else { - if (numRows < static_cast(INT_MAX) && - numRows * numCols < static_cast(INT_MAX)) { + if (numRows < static_cast(INT_MAX) && numRows * numCols < static_cast(INT_MAX)) { MV_Nrm1_Invoke(space, R, X); } else { using index_type = std::int64_t; @@ -212,34 +191,26 @@ struct Nrm1::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, false, true>; +#define KOKKOSBLAS1_NRM1_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Nrm1::mag_type, \ + LAYOUT, Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, false, true>; // // Macro for definition of full specialization of // KokkosBlas::Impl::Nrm1 for rank == 2. This is NOT for users!!! We // use this macro in one or more .cpp files in this directory. // -#define KOKKOSBLAS1_NRM1_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template struct Nrm1< \ - EXEC_SPACE, \ - Kokkos::View< \ - typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, false, true>; +#define KOKKOSBLAS1_NRM1_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Nrm1::mag_type, LAYOUT, \ + Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, false, true>; // // Macro for declaration of full specialization of @@ -248,19 +219,14 @@ struct Nrm1::mag_type*, \ - LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_NRM1_MV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Nrm1< \ + EXEC_SPACE, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 2, false, true>; // @@ -268,20 +234,14 @@ struct Nrm1::mag_type*, \ - LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2, false, true>; +#define KOKKOSBLAS1_NRM1_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Nrm1::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2, false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2_impl.hpp index 276023c17198..e840d0bfd4a1 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2_impl.hpp @@ -51,8 +51,7 @@ struct V_Nrm2_Functor { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::V_Nrm2_Functor: " "X is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::V_Nrm2_Functor: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -67,19 +66,12 @@ struct V_Nrm2_Functor { sum += tmp * tmp; } - KOKKOS_INLINE_FUNCTION void init(value_type& update) const { - update = AT::zero(); - } + KOKKOS_INLINE_FUNCTION void init(value_type& update) const { update = AT::zero(); } - KOKKOS_INLINE_FUNCTION void join(value_type& update, - const value_type& source) const { - update += source; - } + KOKKOS_INLINE_FUNCTION void join(value_type& update, const value_type& source) const { update += source; } KOKKOS_INLINE_FUNCTION void final(value_type& update) const { - if (m_take_sqrt) - update = - Kokkos::ArithTraits::sqrt(update); + if (m_take_sqrt) update = Kokkos::ArithTraits::sqrt(update); } }; @@ -102,11 +94,9 @@ struct Nrm2_MV_Functor { RV r; XV x; - size_type - teamsPerVec; // number of teams collectively performing a dot product + size_type teamsPerVec; // number of teams collectively performing a dot product - Nrm2_MV_Functor(const RV& r_, const XV& x_, int teamsPerVec_) - : r(r_), x(x_), teamsPerVec(teamsPerVec_) {} + Nrm2_MV_Functor(const RV& r_, const XV& x_, int teamsPerVec_) : r(r_), x(x_), teamsPerVec(teamsPerVec_) {} KOKKOS_INLINE_FUNCTION void operator()(const TeamMem& t) const { @@ -127,17 +117,14 @@ struct Nrm2_MV_Functor { }, localResult); - Kokkos::single(Kokkos::PerTeam(t), [&]() { - Kokkos::atomic_add(&r(i), rvalue_type(localResult)); - }); + Kokkos::single(Kokkos::PerTeam(t), [&]() { Kokkos::atomic_add(&r(i), rvalue_type(localResult)); }); } }; /// \brief Compute the 2-norm (or its square) of the single vector (1-D /// View) X, and store the result in the 0-D View r. template -void V_Nrm2_Invoke(const execution_space& space, const RV& r, const XV& X, - const bool& take_sqrt) { +void V_Nrm2_Invoke(const execution_space& space, const RV& r, const XV& X, const bool& take_sqrt) { const SizeType numRows = static_cast(X.extent(0)); Kokkos::RangePolicy policy(space, 0, numRows); @@ -153,32 +140,26 @@ void V_Nrm2_Invoke(const execution_space& space, const RV& r, const XV& X, template void MV_Nrm2_Invoke( const execution_space& space, const RV& r, const XV& x, bool take_sqrt, - typename std::enable_if::accessible>::type* = + typename std::enable_if::accessible>::type* = nullptr) { if (r.extent(0) != x.extent(1)) { std::ostringstream oss; - oss << "KokkosBlas::nrm2 (rank-2): result vector has wrong length (" - << r.extent(0) << ", but x has " << x.extent(1) << " columns)"; + oss << "KokkosBlas::nrm2 (rank-2): result vector has wrong length (" << r.extent(0) << ", but x has " << x.extent(1) + << " columns)"; throw std::runtime_error(oss.str()); } // Zero out the result vector - Kokkos::deep_copy( - space, r, Kokkos::ArithTraits::zero()); + Kokkos::deep_copy(space, r, Kokkos::ArithTraits::zero()); size_type teamsPerVec; - KokkosBlas::Impl::multipleReductionWorkDistribution( - x.extent(0), x.extent(1), teamsPerVec); + KokkosBlas::Impl::multipleReductionWorkDistribution(x.extent(0), x.extent(1), + teamsPerVec); size_type numTeams = x.extent(1) * teamsPerVec; Kokkos::TeamPolicy pol(space, numTeams, Kokkos::AUTO); - Kokkos::parallel_for( - "KokkosBlas1::Nrm2::S1", pol, - Nrm2_MV_Functor(r, x, teamsPerVec)); + Kokkos::parallel_for("KokkosBlas1::Nrm2::S1", pol, + Nrm2_MV_Functor(r, x, teamsPerVec)); if (take_sqrt) { - Kokkos::parallel_for( - "KokkosBlas1::Nrm2::Sqrt", - Kokkos::RangePolicy(space, 0, r.extent(0)), - TakeSqrtFunctor(r)); + Kokkos::parallel_for("KokkosBlas1::Nrm2::Sqrt", Kokkos::RangePolicy(space, 0, r.extent(0)), + TakeSqrtFunctor(r)); } } @@ -187,15 +168,11 @@ void MV_Nrm2_Invoke( template void MV_Nrm2_Invoke( const execution_space& space, const RV& r, const XV& x, bool take_sqrt, - typename std::enable_if::accessible>::type* = - nullptr) { - Kokkos::View - tempResult( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Nrm2 temp result"), - r.extent(0)); - MV_Nrm2_Invoke( - space, tempResult, x, take_sqrt); + typename std::enable_if< + !Kokkos::SpaceAccessibility::accessible>::type* = nullptr) { + Kokkos::View tempResult( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Nrm2 temp result"), r.extent(0)); + MV_Nrm2_Invoke(space, tempResult, x, take_sqrt); Kokkos::deep_copy(space, r, tempResult); space.fence(); } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2_spec.hpp index 6c21e551a8cc..4d0b2e139673 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2_spec.hpp @@ -43,19 +43,15 @@ struct nrm2_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_NRM2_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct nrm2_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View< \ - typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_NRM2_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct nrm2_eti_spec_avail::mag_type, LAYOUT, \ + Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; // @@ -65,22 +61,17 @@ struct nrm2_eti_spec_avail { // We may spread out definitions (see _DEF macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_NRM2_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct nrm2_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View::mag_type*, \ - LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_NRM2_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct nrm2_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -92,24 +83,20 @@ namespace KokkosBlas { namespace Impl { // Unification layer -template < - class execution_space, class RMV, class XMV, int rank = XMV::rank, - bool tpl_spec_avail = nrm2_tpl_spec_avail::value, - bool eti_spec_avail = nrm2_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = nrm2_eti_spec_avail::value> struct Nrm2 { - static void nrm2(const execution_space& space, const RMV& R, const XMV& X, - const bool& take_sqrt); + static void nrm2(const execution_space& space, const RMV& R, const XMV& X, const bool& take_sqrt); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Nrm2 for single vectors (1-D Views). template -struct Nrm2 { +struct Nrm2 { typedef typename XMV::size_type size_type; - static void nrm2(const execution_space& space, const RMV& R, const XMV& X, - const bool& take_sqrt) { + static void nrm2(const execution_space& space, const RMV& R, const XMV& X, const bool& take_sqrt) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Nrm2<1-D>: RMV is not a Kokkos::View."); @@ -122,16 +109,13 @@ struct Nrm2: " "XMV is not rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::nrm2[ETI]" - : "KokkosBlas::nrm2[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::nrm2[ETI]" + : "KokkosBlas::nrm2[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::nrm2<> ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrm2<> ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::nrm2<> non-ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrm2<> non-ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); } #endif const size_type numRows = X.extent(0); @@ -140,20 +124,17 @@ struct Nrm2(space, R, X, take_sqrt); } else { typedef std::int64_t index_type; - V_Nrm2_Invoke(space, R, X, - take_sqrt); + V_Nrm2_Invoke(space, R, X, take_sqrt); } Kokkos::Profiling::popRegion(); } }; template -struct Nrm2 { +struct Nrm2 { typedef typename XMV::size_type size_type; - static void nrm2(const execution_space& space, const RV& R, const XMV& X, - const bool& take_sqrt) { + static void nrm2(const execution_space& space, const RV& R, const XMV& X, const bool& take_sqrt) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Nrm2<2-D>: RV is not a Kokkos::View."); @@ -166,16 +147,13 @@ struct Nrm2: " "XMV is not rank 2."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::nrm2[ETI]" - : "KokkosBlas::nrm2[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::nrm2[ETI]" + : "KokkosBlas::nrm2[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::nrm2<> ETI specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrm2<> ETI specialization for < %s , %s >\n", typeid(RV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::nrm2<> non-ETI specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrm2<> non-ETI specialization for < %s , %s >\n", typeid(RV).name(), typeid(XMV).name()); } #endif @@ -185,21 +163,17 @@ struct Nrm2(INT_MAX)) { - V_Nrm2_Invoke( - space, R0, X0, take_sqrt); + V_Nrm2_Invoke(space, R0, X0, take_sqrt); } else { typedef std::int64_t index_type; - V_Nrm2_Invoke( - space, R0, X0, take_sqrt); + V_Nrm2_Invoke(space, R0, X0, take_sqrt); } } else { - if (numRows < static_cast(INT_MAX) && - numRows * numCols < static_cast(INT_MAX)) { + if (numRows < static_cast(INT_MAX) && numRows * numCols < static_cast(INT_MAX)) { MV_Nrm2_Invoke(space, R, X, take_sqrt); } else { typedef std::int64_t index_type; - MV_Nrm2_Invoke(space, R, X, - take_sqrt); + MV_Nrm2_Invoke(space, R, X, take_sqrt); } } Kokkos::Profiling::popRegion(); @@ -217,34 +191,26 @@ struct Nrm2::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, false, true>; +#define KOKKOSBLAS1_NRM2_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Nrm2::mag_type, \ + LAYOUT, Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, false, true>; // // Macro for definition of full specialization of // KokkosBlas::Impl::Nrm2 for rank == 2. This is NOT for users!!! We // use this macro in one or more .cpp files in this directory. // -#define KOKKOSBLAS1_NRM2_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template struct Nrm2< \ - EXEC_SPACE, \ - Kokkos::View< \ - typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, false, true>; +#define KOKKOSBLAS1_NRM2_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Nrm2::mag_type, LAYOUT, \ + Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, false, true>; // // Macro for declaration of full specialization of @@ -253,19 +219,14 @@ struct Nrm2::mag_type*, \ - LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_NRM2_MV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Nrm2< \ + EXEC_SPACE, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 2, false, true>; // @@ -273,20 +234,14 @@ struct Nrm2::mag_type*, \ - LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2, false, true>; +#define KOKKOSBLAS1_NRM2_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Nrm2::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2, false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2w_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2w_impl.hpp index fb9b1f7858b5..979ba2cec38e 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2w_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2w_impl.hpp @@ -46,16 +46,14 @@ struct V_Nrm2w_Functor { typename XV::const_type m_x, m_w; bool m_take_sqrt; - V_Nrm2w_Functor(const XV& x, const XV& w, bool take_sqrt) - : m_x(x), m_w(w), m_take_sqrt(take_sqrt) { + V_Nrm2w_Functor(const XV& x, const XV& w, bool take_sqrt) : m_x(x), m_w(w), m_take_sqrt(take_sqrt) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::V_Nrm2w_Functor: " "R is not a Kokkos::View."); static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::V_Nrm2w_Functor: " "X is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::V_Nrm2w_Functor: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -71,19 +69,12 @@ struct V_Nrm2w_Functor { ; } - KOKKOS_INLINE_FUNCTION void init(value_type& update) const { - update = AT::zero(); - } + KOKKOS_INLINE_FUNCTION void init(value_type& update) const { update = AT::zero(); } - KOKKOS_INLINE_FUNCTION void join(value_type& update, - const value_type& source) const { - update += source; - } + KOKKOS_INLINE_FUNCTION void join(value_type& update, const value_type& source) const { update += source; } KOKKOS_INLINE_FUNCTION void final(value_type& update) const { - if (m_take_sqrt) - update = - Kokkos::ArithTraits::sqrt(update); + if (m_take_sqrt) update = Kokkos::ArithTraits::sqrt(update); } }; @@ -101,8 +92,7 @@ struct Nrm2w_MV_Functor { XV x; XV w; - size_type - teamsPerVec; // number of teams collectively performing a dot product + size_type teamsPerVec; // number of teams collectively performing a dot product Nrm2w_MV_Functor(const RV& r_, const XV& x_, const XV& w_, int teamsPerVec_) : r(r_), x(x_), w(w_), teamsPerVec(teamsPerVec_) {} @@ -120,23 +110,19 @@ struct Nrm2w_MV_Functor { Kokkos::parallel_reduce( Kokkos::TeamThreadRange(t, begin, end), [&](size_type k, value_type& update) { - const typename IPT::mag_type tmp = - IPT::norm(x(k, i)) / IPT::norm(w(k, i)); + const typename IPT::mag_type tmp = IPT::norm(x(k, i)) / IPT::norm(w(k, i)); update += tmp * tmp; }, localResult); - Kokkos::single(Kokkos::PerTeam(t), [&]() { - Kokkos::atomic_add(&r(i), rvalue_type(localResult)); - }); + Kokkos::single(Kokkos::PerTeam(t), [&]() { Kokkos::atomic_add(&r(i), rvalue_type(localResult)); }); } }; /// \brief Compute the 2-norm (or its square) of the single vector (1-D /// View) X, and store the result in the 0-D View r. template -void V_Nrm2w_Invoke(const execution_space& space, const RV& r, const XV& X, - const XV& W, const bool& take_sqrt) { +void V_Nrm2w_Invoke(const execution_space& space, const RV& r, const XV& X, const XV& W, const bool& take_sqrt) { const SizeType numRows = static_cast(X.extent(0)); Kokkos::RangePolicy policy(space, 0, numRows); @@ -151,34 +137,27 @@ void V_Nrm2w_Invoke(const execution_space& space, const RV& r, const XV& X, // be computed in-place template void MV_Nrm2w_Invoke( - const execution_space& space, const RV& r, const XV& x, const XV& w, - bool take_sqrt, - typename std::enable_if::accessible>::type* = + const execution_space& space, const RV& r, const XV& x, const XV& w, bool take_sqrt, + typename std::enable_if::accessible>::type* = nullptr) { if (r.extent(0) != x.extent(1)) { std::ostringstream oss; - oss << "KokkosBlas::nrm2w (rank-2): result vector has wrong length (" - << r.extent(0) << ", but x has " << x.extent(1) << " columns)"; + oss << "KokkosBlas::nrm2w (rank-2): result vector has wrong length (" << r.extent(0) << ", but x has " + << x.extent(1) << " columns)"; throw std::runtime_error(oss.str()); } // Zero out the result vector - Kokkos::deep_copy( - space, r, Kokkos::ArithTraits::zero()); + Kokkos::deep_copy(space, r, Kokkos::ArithTraits::zero()); size_type teamsPerVec; - KokkosBlas::Impl::multipleReductionWorkDistribution( - x.extent(0), x.extent(1), teamsPerVec); + KokkosBlas::Impl::multipleReductionWorkDistribution(x.extent(0), x.extent(1), + teamsPerVec); size_type numTeams = x.extent(1) * teamsPerVec; Kokkos::TeamPolicy pol(space, numTeams, Kokkos::AUTO); Kokkos::parallel_for("KokkosBlas1::Nrm2w::S1", pol, - Nrm2w_MV_Functor( - r, x, w, teamsPerVec)); + Nrm2w_MV_Functor(r, x, w, teamsPerVec)); if (take_sqrt) { - Kokkos::parallel_for( - "KokkosBlas1::Nrm2w::Sqrt", - Kokkos::RangePolicy(space, 0, r.extent(0)), - TakeSqrtFunctor(r)); + Kokkos::parallel_for("KokkosBlas1::Nrm2w::Sqrt", Kokkos::RangePolicy(space, 0, r.extent(0)), + TakeSqrtFunctor(r)); } } @@ -186,17 +165,12 @@ void MV_Nrm2w_Invoke( // the other version) template void MV_Nrm2w_Invoke( - const execution_space& space, const RV& r, const XV& x, const XV& w, - bool take_sqrt, - typename std::enable_if::accessible>::type* = - nullptr) { - Kokkos::View - tempResult( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Nrm2w temp result"), - r.extent(0)); - MV_Nrm2w_Invoke(space, tempResult, x, w, - take_sqrt); + const execution_space& space, const RV& r, const XV& x, const XV& w, bool take_sqrt, + typename std::enable_if< + !Kokkos::SpaceAccessibility::accessible>::type* = nullptr) { + Kokkos::View tempResult( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Nrm2w temp result"), r.extent(0)); + MV_Nrm2w_Invoke(space, tempResult, x, w, take_sqrt); Kokkos::deep_copy(space, r, tempResult); space.fence(); } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2w_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2w_spec.hpp index f4bbe286ef22..566083213946 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2w_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrm2w_spec.hpp @@ -42,20 +42,15 @@ struct nrm2w_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_NRM2W_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct nrm2w_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View< \ - typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_NRM2W_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct nrm2w_eti_spec_avail::mag_type, \ + LAYOUT, Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; // @@ -65,20 +60,16 @@ struct nrm2w_eti_spec_avail { // We may spread out definitions (see _DEF macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_NRM2W_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct nrm2w_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View::mag_type*, \ - LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_NRM2W_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct nrm2w_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -91,24 +82,19 @@ namespace Impl { // Unification layer template ::value, - bool eti_spec_avail = - nrm2w_eti_spec_avail::value> + bool tpl_spec_avail = nrm2w_tpl_spec_avail::value, + bool eti_spec_avail = nrm2w_eti_spec_avail::value> struct Nrm2w { - static void nrm2w(const execution_space& space, const RMV& R, const XMV& X, - const XMV& W, const bool& take_sqrt); + static void nrm2w(const execution_space& space, const RMV& R, const XMV& X, const XMV& W, const bool& take_sqrt); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Nrm2w for single vectors (1-D Views). template -struct Nrm2w { +struct Nrm2w { using size_type = typename XMV::size_type; - static void nrm2w(const execution_space& space, const RMV& R, const XMV& X, - const XMV& W, const bool& take_sqrt) { + static void nrm2w(const execution_space& space, const RMV& R, const XMV& X, const XMV& W, const bool& take_sqrt) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Nrm2w<1-D>: RMV is not a Kokkos::View."); @@ -121,16 +107,13 @@ struct Nrm2w: " "XMV is not rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::nrm2w[ETI]" - : "KokkosBlas::nrm2w[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::nrm2w[ETI]" + : "KokkosBlas::nrm2w[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::nrm2w<> ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrm2w<> ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::nrm2w<> non-ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrm2w<> non-ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); } #endif const size_type numRows = X.extent(0); @@ -139,20 +122,17 @@ struct Nrm2w(space, R, X, W, take_sqrt); } else { typedef std::int64_t index_type; - V_Nrm2w_Invoke(space, R, X, W, - take_sqrt); + V_Nrm2w_Invoke(space, R, X, W, take_sqrt); } Kokkos::Profiling::popRegion(); } }; template -struct Nrm2w { +struct Nrm2w { using size_type = typename XMV::size_type; - static void nrm2w(const execution_space& space, const RV& R, const XMV& X, - const XMV& W, const bool& take_sqrt) { + static void nrm2w(const execution_space& space, const RV& R, const XMV& X, const XMV& W, const bool& take_sqrt) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Nrm2w<2-D>: RV is not a Kokkos::View."); @@ -165,16 +145,13 @@ struct Nrm2w: " "XMV is not rank 2."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::nrm2w[ETI]" - : "KokkosBlas::nrm2w[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::nrm2w[ETI]" + : "KokkosBlas::nrm2w[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::nrm2w<> ETI specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrm2w<> ETI specialization for < %s , %s >\n", typeid(RV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::nrm2w<> non-ETI specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrm2w<> non-ETI specialization for < %s , %s >\n", typeid(RV).name(), typeid(XMV).name()); } #endif @@ -185,22 +162,17 @@ struct Nrm2w(INT_MAX)) { - V_Nrm2w_Invoke( - space, R0, X0, W0, take_sqrt); + V_Nrm2w_Invoke(space, R0, X0, W0, take_sqrt); } else { typedef std::int64_t index_type; - V_Nrm2w_Invoke( - space, R0, X0, W0, take_sqrt); + V_Nrm2w_Invoke(space, R0, X0, W0, take_sqrt); } } else { - if (numRows < static_cast(INT_MAX) && - numRows * numCols < static_cast(INT_MAX)) { - MV_Nrm2w_Invoke(space, R, X, W, - take_sqrt); + if (numRows < static_cast(INT_MAX) && numRows * numCols < static_cast(INT_MAX)) { + MV_Nrm2w_Invoke(space, R, X, W, take_sqrt); } else { typedef std::int64_t index_type; - MV_Nrm2w_Invoke(space, R, X, W, - take_sqrt); + MV_Nrm2w_Invoke(space, R, X, W, take_sqrt); } } Kokkos::Profiling::popRegion(); @@ -218,33 +190,25 @@ struct Nrm2w::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, false, true>; +#define KOKKOSBLAS1_NRM2W_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Nrm2w::mag_type, \ + LAYOUT, Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, false, true>; // // Macro for definition of full specialization of // KokkosBlas::Impl::Nrm2w for rank == 2. This is NOT for users!!! We // use this macro in one or more .cpp files in this directory. // -#define KOKKOSBLAS1_NRM2W_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template struct Nrm2w< \ - EXEC_SPACE, \ - Kokkos::View< \ - typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, false, true>; +#define KOKKOSBLAS1_NRM2W_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Nrm2w::mag_type, LAYOUT, \ + Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, false, true>; // // Macro for declaration of full specialization of @@ -253,17 +217,13 @@ struct Nrm2w::mag_type*, \ - LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_NRM2W_MV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Nrm2w< \ + EXEC_SPACE, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 2, false, true>; // @@ -271,18 +231,13 @@ struct Nrm2w::mag_type*, \ - LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2, false, true>; +#define KOKKOSBLAS1_NRM2W_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Nrm2w::mag_type*, LAYOUT, \ + Kokkos::Device, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2, false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrminf_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrminf_impl.hpp index b8431ac8ea61..e7479e6697be 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrminf_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrminf_impl.hpp @@ -50,8 +50,7 @@ struct V_NrmInf_Functor { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::V_NrmInf_Functor: " "X is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::V_NrmInf_Functor: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -85,8 +84,7 @@ void V_NrmInf_Invoke(const execution_space& space, const RV& r, const XV& X) { typedef V_NrmInf_Functor functor_type; functor_type op(X); - Kokkos::parallel_reduce("KokkosBlas::NrmInf::S0", policy, op, - Kokkos::Max(r())); + Kokkos::parallel_reduce("KokkosBlas::NrmInf::S0", policy, op, Kokkos::Max(r())); } /// \brief Compute the 2-norms (or their square) of the columns of the @@ -96,8 +94,7 @@ void MV_NrmInf_Invoke(const execution_space& space, const RV& r, const XMV& X) { for (size_t i = 0; i < X.extent(1); i++) { auto ri = Kokkos::subview(r, i); auto Xi = Kokkos::subview(X, Kokkos::ALL(), i); - V_NrmInf_Invoke( - space, ri, Xi); + V_NrmInf_Invoke(space, ri, Xi); } } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrminf_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrminf_spec.hpp index 3659d61f19eb..e7b365ce854c 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrminf_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_nrminf_spec.hpp @@ -43,20 +43,15 @@ struct nrminf_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_NRMINF_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct nrminf_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View< \ - typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_NRMINF_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct nrminf_eti_spec_avail::mag_type, \ + LAYOUT, Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; // @@ -66,22 +61,17 @@ struct nrminf_eti_spec_avail { // We may spread out definitions (see _DEF macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_NRMINF_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct nrminf_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View::mag_type*, \ - LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_NRMINF_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct nrminf_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -94,10 +84,8 @@ namespace Impl { // Unification layer template ::value, - bool eti_spec_avail = - nrminf_eti_spec_avail::value> + bool tpl_spec_avail = nrminf_tpl_spec_avail::value, + bool eti_spec_avail = nrminf_eti_spec_avail::value> struct NrmInf { static void nrminf(const execution_space& space, const RMV& R, const XMV& X); }; @@ -105,8 +93,7 @@ struct NrmInf { #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of NrmInf for single vectors (1-D Views). template -struct NrmInf { +struct NrmInf { typedef typename XMV::size_type size_type; static void nrminf(const execution_space& space, const RMV& R, const XMV& X) { @@ -122,16 +109,13 @@ struct NrmInf: " "XMV is not rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::nrminf[ETI]" - : "KokkosBlas::nrminf[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::nrminf[ETI]" + : "KokkosBlas::nrminf[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::nrminf<> ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrminf<> ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::nrminf<> non-ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrminf<> non-ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); } #endif const size_type numRows = X.extent(0); @@ -147,8 +131,7 @@ struct NrmInf -struct NrmInf { +struct NrmInf { typedef typename XMV::size_type size_type; static void nrminf(const execution_space& space, const RV& R, const XMV& X) { @@ -164,23 +147,19 @@ struct NrmInf: " "XMV is not rank 2."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::nrminf[ETI]" - : "KokkosBlas::nrminf[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::nrminf[ETI]" + : "KokkosBlas::nrminf[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::nrminf<> ETI specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrminf<> ETI specialization for < %s , %s >\n", typeid(RV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::nrminf<> non-ETI specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XMV).name()); + printf("KokkosBlas1::nrminf<> non-ETI specialization for < %s , %s >\n", typeid(RV).name(), typeid(XMV).name()); } #endif const size_type numRows = X.extent(0); const size_type numCols = X.extent(1); - if (numRows < static_cast(INT_MAX) && - numRows * numCols < static_cast(INT_MAX)) { + if (numRows < static_cast(INT_MAX) && numRows * numCols < static_cast(INT_MAX)) { MV_NrmInf_Invoke(space, R, X); } else { typedef std::int64_t index_type; @@ -201,36 +180,26 @@ struct NrmInf::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, false, true>; +#define KOKKOSBLAS1_NRMINF_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct NrmInf::mag_type, \ + LAYOUT, Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, false, true>; // // Macro for definition of full specialization of // KokkosBlas::Impl::NrmInf for rank == 2. This is NOT for users!!! We // use this macro in one or more .cpp files in this directory. // -#define KOKKOSBLAS1_NRMINF_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template struct NrmInf< \ - EXEC_SPACE, \ - Kokkos::View< \ - typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, false, true>; +#define KOKKOSBLAS1_NRMINF_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct NrmInf::mag_type, LAYOUT, \ + Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, false, true>; // // Macro for declaration of full specialization of @@ -239,19 +208,14 @@ struct NrmInf::mag_type*, \ - LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_NRMINF_MV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct NrmInf< \ + EXEC_SPACE, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 2, false, true>; // @@ -259,20 +223,14 @@ struct NrmInf::mag_type*, \ - LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2, false, true>; +#define KOKKOSBLAS1_NRMINF_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct NrmInf::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2, false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_reciprocal_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_reciprocal_impl.hpp index 21f736ac4f7b..7ad6ab95dbea 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_reciprocal_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_reciprocal_impl.hpp @@ -37,8 +37,7 @@ struct MV_Reciprocal_Functor { RMV R_; XMV X_; - MV_Reciprocal_Functor(const RMV& R, const XMV& X) - : numCols(X.extent(1)), R_(R), X_(X) { + MV_Reciprocal_Functor(const RMV& R, const XMV& X) : numCols(X.extent(1)), R_(R), X_(X) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "MV_Reciprocal_Functor: RMV is not a Kokkos::View."); @@ -148,8 +147,7 @@ struct V_ReciprocalSelf_Functor { // Invoke the "generic" (not unrolled) multivector functor that // computes entry-wise reciprocalolute value. template -void MV_Reciprocal_Generic(const execution_space& space, const RMV& R, - const XMV& X) { +void MV_Reciprocal_Generic(const execution_space& space, const RMV& R, const XMV& X) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "MV_Reciprocal_Generic: RMV is not a Kokkos::View."); @@ -177,8 +175,7 @@ void MV_Reciprocal_Generic(const execution_space& space, const RMV& R, // Variant of MV_Reciprocal_Generic for single vectors (1-D Views) R and X. template -void V_Reciprocal_Generic(const execution_space& space, const RV& R, - const XV& X) { +void V_Reciprocal_Generic(const execution_space& space, const RV& R, const XV& X) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "V_Reciprocal_Generic: RV is not a Kokkos::View."); diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_reciprocal_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_reciprocal_spec.hpp index 08fc8bc341d8..988043511b0f 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_reciprocal_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_reciprocal_spec.hpp @@ -42,18 +42,15 @@ struct reciprocal_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_RECIPROCAL_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct reciprocal_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_RECIPROCAL_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct reciprocal_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; // @@ -63,18 +60,15 @@ struct reciprocal_eti_spec_avail { // We may spread out definitions (see _DEF macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_RECIPROCAL_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct reciprocal_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_RECIPROCAL_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct reciprocal_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -87,24 +81,19 @@ namespace Impl { // Unification layer template ::value, - bool eti_spec_avail = - reciprocal_eti_spec_avail::value> + bool tpl_spec_avail = reciprocal_tpl_spec_avail::value, + bool eti_spec_avail = reciprocal_eti_spec_avail::value> struct Reciprocal { - static void reciprocal(const execution_space& space, const RMV& R, - const XMV& X); + static void reciprocal(const execution_space& space, const RMV& R, const XMV& X); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Reciprocal for single vectors (1-D Views). template -struct Reciprocal { +struct Reciprocal { typedef typename XMV::size_type size_type; - static void reciprocal(const execution_space& space, const RMV& R, - const XMV& X) { + static void reciprocal(const execution_space& space, const RMV& R, const XMV& X) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Reciprocal<1-D>: RMV is not a Kokkos::View."); @@ -117,17 +106,14 @@ struct Reciprocal: " "XMV is not rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::reciprocal[ETI]" - : "KokkosBlas::reciprocal[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::reciprocal[ETI]" + : "KokkosBlas::reciprocal[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::reciprocal<> ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::reciprocal<> ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); else { - printf( - "KokkosBlas1::reciprocal<> non-ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::reciprocal<> non-ETI specialization for < %s , %s >\n", typeid(RMV).name(), + typeid(XMV).name()); } #endif const size_type numRows = X.extent(0); @@ -144,12 +130,10 @@ struct Reciprocal -struct Reciprocal { +struct Reciprocal { typedef typename XMV::size_type size_type; - static void reciprocal(const execution_space& space, const RMV& R, - const XMV& X) { + static void reciprocal(const execution_space& space, const RMV& R, const XMV& X) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Reciprocal<2-D>: RMV is not a Kokkos::View."); @@ -162,23 +146,19 @@ struct Reciprocal: " "XMV is not rank 2."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::reciprocal[ETI]" - : "KokkosBlas::reciprocal[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::reciprocal[ETI]" + : "KokkosBlas::reciprocal[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::reciprocal<> ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::reciprocal<> ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::asb<> non-ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::asb<> non-ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); } #endif const size_type numRows = X.extent(0); const size_type numCols = X.extent(1); - if (numRows < static_cast(INT_MAX) && - numRows * numCols < static_cast(INT_MAX)) { + if (numRows < static_cast(INT_MAX) && numRows * numCols < static_cast(INT_MAX)) { typedef int index_type; MV_Reciprocal_Generic(space, R, X); } else { @@ -200,15 +180,12 @@ struct Reciprocal, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_RECIPROCAL_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Reciprocal< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 1, false, true>; // @@ -216,15 +193,12 @@ struct Reciprocal, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_RECIPROCAL_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Reciprocal< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 1, false, true>; // @@ -234,15 +208,12 @@ struct Reciprocal, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_RECIPROCAL_MV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Reciprocal< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 2, false, true>; // @@ -250,15 +221,12 @@ struct Reciprocal, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_RECIPROCAL_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Reciprocal< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 2, false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rot_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rot_impl.hpp index 93d3b3d9b964..e139e916be00 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rot_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rot_impl.hpp @@ -30,8 +30,7 @@ struct rot_functor { VectorView X, Y; ScalarView c, s; - rot_functor(VectorView const& X_, VectorView const& Y_, ScalarView const& c_, - ScalarView const& s_) + rot_functor(VectorView const& X_, VectorView const& Y_, ScalarView const& c_, ScalarView const& s_) : X(X_), Y(Y_), c(c_), s(s_) {} KOKKOS_INLINE_FUNCTION @@ -43,8 +42,8 @@ struct rot_functor { }; template -void Rot_Invoke(ExecutionSpace const& space, VectorView const& X, - VectorView const& Y, ScalarView const& c, ScalarView const& s) { +void Rot_Invoke(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, + ScalarView const& s) { Kokkos::RangePolicy rot_policy(space, 0, X.extent(0)); rot_functor rot_func(X, Y, c, s); Kokkos::parallel_for("KokkosBlas::rot", rot_policy, rot_func); diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rot_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rot_spec.hpp index 214e0399e5d5..4ca4d8d1ef97 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rot_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rot_spec.hpp @@ -43,16 +43,14 @@ struct rot_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_ROT_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ - template <> \ - struct rot_eti_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROT_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct rot_eti_spec_avail< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -64,36 +62,28 @@ namespace Impl { // Unification layer template ::value, - bool eti_spec_avail = - rot_eti_spec_avail::value> + bool tpl_spec_avail = rot_tpl_spec_avail::value, + bool eti_spec_avail = rot_eti_spec_avail::value> struct Rot { - static void rot(ExecutionSpace const& space, VectorView const& X, - VectorView const& Y, ScalarView const& c, + static void rot(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, ScalarView const& s); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Rot. template -struct Rot { - static void rot(ExecutionSpace const& space, VectorView const& X, - VectorView const& Y, ScalarView const& c, +struct Rot { + static void rot(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, ScalarView const& s) { - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::rot[ETI]" - : "KokkosBlas::rot[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::rot[ETI]" + : "KokkosBlas::rot[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::rot<> ETI specialization for < %s, %s, %s >\n", - typeid(ExecutionSpace).name(), typeid(VectorView).name(), - typeid(ScalarView).name()); + printf("KokkosBlas1::rot<> ETI specialization for < %s, %s, %s >\n", typeid(ExecutionSpace).name(), + typeid(VectorView).name(), typeid(ScalarView).name()); else { - printf("KokkosBlas1::rot<> non-ETI specialization for < %s, %s, %s >\n", - typeid(ExecutionSpace).name(), typeid(VectorView).name(), - typeid(ScalarView).name()); + printf("KokkosBlas1::rot<> non-ETI specialization for < %s, %s, %s >\n", typeid(ExecutionSpace).name(), + typeid(VectorView).name(), typeid(ScalarView).name()); } #endif Rot_Invoke(space, X, Y, c, s); @@ -112,14 +102,12 @@ struct Rot, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSBLAS1_ROT_ETI_SPEC_DECL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + extern template struct Rot< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ false, true>; // @@ -127,14 +115,12 @@ struct Rot, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSBLAS1_ROT_ETI_SPEC_INST(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template struct Rot< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotg_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotg_impl.hpp index ff7830e147d3..834c773a8d16 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotg_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotg_impl.hpp @@ -24,10 +24,8 @@ namespace KokkosBlas { namespace Impl { template ::is_complex, - bool>::type = true> -KOKKOS_INLINE_FUNCTION void rotg_impl(Scalar* a, Scalar* b, Magnitude* c, - Scalar* s) { + typename std::enable_if::is_complex, bool>::type = true> +KOKKOS_INLINE_FUNCTION void rotg_impl(Scalar* a, Scalar* b, Magnitude* c, Scalar* s) { const Scalar one = Kokkos::ArithTraits::one(); const Scalar zero = Kokkos::ArithTraits::zero(); @@ -40,12 +38,11 @@ KOKKOS_INLINE_FUNCTION void rotg_impl(Scalar* a, Scalar* b, Magnitude* c, } else { const Scalar scaled_a = *a / numerical_scaling; const Scalar scaled_b = *b / numerical_scaling; - Scalar norm = Kokkos::sqrt(scaled_a * scaled_a + scaled_b * scaled_b) * - numerical_scaling; - Scalar sign = Kokkos::abs(*a) > Kokkos::abs(*b) ? *a : *b; - norm = Kokkos::copysign(norm, sign); - *c = *a / norm; - *s = *b / norm; + Scalar norm = Kokkos::sqrt(scaled_a * scaled_a + scaled_b * scaled_b) * numerical_scaling; + Scalar sign = Kokkos::abs(*a) > Kokkos::abs(*b) ? *a : *b; + norm = Kokkos::copysign(norm, sign); + *c = *a / norm; + *s = *b / norm; Scalar z = one; if (Kokkos::abs(*a) > Kokkos::abs(*b)) { @@ -60,10 +57,8 @@ KOKKOS_INLINE_FUNCTION void rotg_impl(Scalar* a, Scalar* b, Magnitude* c, } template ::is_complex, - bool>::type = true> -KOKKOS_INLINE_FUNCTION void rotg_impl(Scalar* a, Scalar* b, Magnitude* c, - Scalar* s) { + typename std::enable_if::is_complex, bool>::type = true> +KOKKOS_INLINE_FUNCTION void rotg_impl(Scalar* a, Scalar* b, Magnitude* c, Scalar* s) { using mag_type = typename Kokkos::ArithTraits::mag_type; const Scalar one = Kokkos::ArithTraits::one(); @@ -78,13 +73,11 @@ KOKKOS_INLINE_FUNCTION void rotg_impl(Scalar* a, Scalar* b, Magnitude* c, } else { const Scalar scaled_a = Kokkos::abs(*a / numerical_scaling); const Scalar scaled_b = Kokkos::abs(*b / numerical_scaling); - mag_type norm = - Kokkos::abs(Kokkos::sqrt(scaled_a * scaled_a + scaled_b * scaled_b)) * - numerical_scaling; - Scalar unit_a = *a / Kokkos::abs(*a); - *c = Kokkos::abs(*a) / norm; - *s = unit_a * Kokkos::conj(*b) / norm; - *a = unit_a * norm; + mag_type norm = Kokkos::abs(Kokkos::sqrt(scaled_a * scaled_a + scaled_b * scaled_b)) * numerical_scaling; + Scalar unit_a = *a / Kokkos::abs(*a); + *c = Kokkos::abs(*a) / norm; + *s = unit_a * Kokkos::conj(*b) / norm; + *a = unit_a * norm; } } @@ -94,20 +87,17 @@ struct rotg_functor { MViewType c; SViewType s; - rotg_functor(SViewType const& a_, SViewType const& b_, MViewType const& c_, - SViewType const& s_) + rotg_functor(SViewType const& a_, SViewType const& b_, MViewType const& c_, SViewType const& s_) : a(a_), b(b_), c(c_), s(s_) {} KOKKOS_INLINE_FUNCTION - void operator()(int const) const { - rotg_impl(a.data(), b.data(), c.data(), s.data()); - } + void operator()(int const) const { rotg_impl(a.data(), b.data(), c.data(), s.data()); } }; /// \brief Compute Givens rotation coefficients. template -void Rotg_Invoke(ExecutionSpace const& space, SViewType const& a, - SViewType const& b, MViewType const& c, SViewType const& s) { +void Rotg_Invoke(ExecutionSpace const& space, SViewType const& a, SViewType const& b, MViewType const& c, + SViewType const& s) { Kokkos::RangePolicy rotg_policy(space, 0, 1); rotg_functor rotg_func(a, b, c, s); Kokkos::parallel_for("KokkosBlas::rotg", rotg_policy, rotg_func); diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotg_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotg_spec.hpp index bdf313e3d0aa..87618f12c991 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotg_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotg_spec.hpp @@ -42,16 +42,14 @@ struct rotg_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_ROTG_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ - template <> \ - struct rotg_eti_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROTG_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct rotg_eti_spec_avail< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -63,34 +61,28 @@ namespace Impl { // Unification layer template ::value, - bool eti_spec_avail = - rotg_eti_spec_avail::value> + bool tpl_spec_avail = rotg_tpl_spec_avail::value, + bool eti_spec_avail = rotg_eti_spec_avail::value> struct Rotg { - static void rotg(ExecutionSpace const& space, SViewType const& a, - SViewType const& b, MViewType const& c, SViewType const& s); + static void rotg(ExecutionSpace const& space, SViewType const& a, SViewType const& b, MViewType const& c, + SViewType const& s); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Rotg. template -struct Rotg { - static void rotg(ExecutionSpace const& space, SViewType const& a, - SViewType const& b, MViewType const& c, SViewType const& s) { - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::rotg[ETI]" - : "KokkosBlas::rotg[noETI]"); +struct Rotg { + static void rotg(ExecutionSpace const& space, SViewType const& a, SViewType const& b, MViewType const& c, + SViewType const& s) { + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::rotg[ETI]" + : "KokkosBlas::rotg[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::rotg<> ETI specialization for < %s, %s, %s >\n", - typeid(ExecutionSpace).name(), typeid(SViewType).name(), - typeid(MViewType).name()); + printf("KokkosBlas1::rotg<> ETI specialization for < %s, %s, %s >\n", typeid(ExecutionSpace).name(), + typeid(SViewType).name(), typeid(MViewType).name()); else { - printf("KokkosBlas1::rotg<> non-ETI specialization for < %s, %s, %s >\n", - typeid(ExecutionSpace).name(), typeid(SViewType).name(), - typeid(MViewType).name()); + printf("KokkosBlas1::rotg<> non-ETI specialization for < %s, %s, %s >\n", typeid(ExecutionSpace).name(), + typeid(SViewType).name(), typeid(MViewType).name()); } #endif Rotg_Invoke(space, a, b, c, s); @@ -109,14 +101,12 @@ struct Rotg, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSBLAS1_ROTG_ETI_SPEC_DECL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + extern template struct Rotg< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ false, true>; // @@ -124,14 +114,12 @@ struct Rotg, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSBLAS1_ROTG_ETI_SPEC_INST(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template struct Rotg< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotm_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotm_impl.hpp index 91a2c7a1d8de..697cb7902ffa 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotm_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotm_impl.hpp @@ -36,9 +36,7 @@ struct rotm_functor { VectorView X, Y; ParamView param; - rotm_functor(VectorView const& X_, VectorView const& Y_, - ParamView const& param_) - : X(X_), Y(Y_), param(param_) {} + rotm_functor(VectorView const& X_, VectorView const& Y_, ParamView const& param_) : X(X_), Y(Y_), param(param_) {} KOKKOS_INLINE_FUNCTION void operator()(const minus_one_tag&, const int idx) const { @@ -63,11 +61,9 @@ struct rotm_functor { }; template -void Rotm_Invoke(execution_space const& space, VectorView const& X, - VectorView const& Y, ParamView const& param) { +void Rotm_Invoke(execution_space const& space, VectorView const& X, VectorView const& Y, ParamView const& param) { using Scalar = typename VectorView::value_type; - static_assert(!Kokkos::ArithTraits::is_complex, - "rotm is not defined for complex types!"); + static_assert(!Kokkos::ArithTraits::is_complex, "rotm is not defined for complex types!"); Scalar const zero = Kokkos::ArithTraits::zero(); Scalar const one = Kokkos::ArithTraits::one(); @@ -82,24 +78,19 @@ void Rotm_Invoke(execution_space const& space, VectorView const& X, if (flag == -two) { return; } else if (flag == -one) { - Kokkos::RangePolicy< - execution_space, - typename rotm_functor::minus_one_tag> - rotm_policy(space, 0, X.extent(0)); + Kokkos::RangePolicy::minus_one_tag> rotm_policy( + space, 0, X.extent(0)); Kokkos::parallel_for("KokkosBlas1::rotm_minus_one", rotm_policy, myFunc); } else if (flag == zero) { - Kokkos::RangePolicy::zero_tag> - rotm_policy(space, 0, X.extent(0)); + Kokkos::RangePolicy::zero_tag> rotm_policy( + space, 0, X.extent(0)); Kokkos::parallel_for("KokkosBlas1::rotm_zero", rotm_policy, myFunc); } else if (flag == one) { - Kokkos::RangePolicy::one_tag> - rotm_policy(space, 0, X.extent(0)); + Kokkos::RangePolicy::one_tag> rotm_policy( + space, 0, X.extent(0)); Kokkos::parallel_for("KokkosBlas1::rotm_one", rotm_policy, myFunc); } else { - throw std::runtime_error( - "KokkosBlas::rotm: param(0) is not -2, -1, 0 or 1!"); + throw std::runtime_error("KokkosBlas::rotm: param(0) is not -2, -1, 0 or 1!"); } } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotm_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotm_spec.hpp index 854f2abacc49..5000b35fc37a 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotm_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotm_spec.hpp @@ -41,16 +41,14 @@ struct rotm_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_ROTM_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct rotm_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROTM_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct rotm_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -61,34 +59,27 @@ namespace KokkosBlas { namespace Impl { // Unification layer -template < - class execution_space, class VectorView, class ParamView, - bool tpl_spec_avail = - rotm_tpl_spec_avail::value, - bool eti_spec_avail = - rotm_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = rotm_eti_spec_avail::value> struct Rotm { - static void rotm(execution_space const& space, VectorView const& X, - VectorView const& Y, ParamView const& param); + static void rotm(execution_space const& space, VectorView const& X, VectorView const& Y, ParamView const& param); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Rotm. template -struct Rotm { - static void rotm(execution_space const& space, VectorView const& X, - VectorView const& Y, ParamView const& param) { - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::rotm[ETI]" - : "KokkosBlas::rotm[noETI]"); +struct Rotm { + static void rotm(execution_space const& space, VectorView const& X, VectorView const& Y, ParamView const& param) { + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::rotm[ETI]" + : "KokkosBlas::rotm[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::rotm<> ETI specialization for < %s, %s >\n", - typeid(VectorView).name(), typeid(ParamView).name()); + printf("KokkosBlas1::rotm<> ETI specialization for < %s, %s >\n", typeid(VectorView).name(), + typeid(ParamView).name()); else { - printf("KokkosBlas1::rotm<> non-ETI specialization for < %s, %s >\n", - typeid(VectorView).name(), typeid(ParamView).name()); + printf("KokkosBlas1::rotm<> non-ETI specialization for < %s, %s >\n", typeid(VectorView).name(), + typeid(ParamView).name()); } #endif Rotm_Invoke(space, X, Y, param); @@ -107,14 +98,12 @@ struct Rotm, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSBLAS1_ROTM_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Rotm< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ false, true>; // @@ -122,14 +111,12 @@ struct Rotm, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSBLAS1_ROTM_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Rotm< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotmg_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotmg_impl.hpp index b35fd62ece46..558020e5a448 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotmg_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotmg_impl.hpp @@ -25,8 +25,7 @@ namespace KokkosBlas { namespace Impl { template -KOKKOS_INLINE_FUNCTION void rotmg_impl(DXView const& d1, DXView const& d2, - DXView const& x1, YView const& y1, +KOKKOS_INLINE_FUNCTION void rotmg_impl(DXView const& d1, DXView const& d2, DXView const& x1, YView const& y1, PView const& param) { using Scalar = typename DXView::non_const_value_type; @@ -133,8 +132,7 @@ KOKKOS_INLINE_FUNCTION void rotmg_impl(DXView const& d1, DXView const& d2, // Rescale d2, h21 and h22 if (d2() != zero) { - while ((Kokkos::abs(d2()) <= gammasqinv) || - (Kokkos::abs(d2()) >= gammasq)) { + while ((Kokkos::abs(d2()) <= gammasqinv) || (Kokkos::abs(d2()) >= gammasq)) { if (flag == zero) { h11 = one; h22 = one; @@ -182,8 +180,7 @@ struct rotmg_functor { YView y1; PView param; - rotmg_functor(DXView& d1_, DXView& d2_, DXView& x1_, const YView& y1_, - PView& param_) + rotmg_functor(DXView& d1_, DXView& d2_, DXView& x1_, const YView& y1_, PView& param_) : d1(d1_), d2(d2_), x1(x1_), y1(y1_), param(param_) {} KOKKOS_INLINE_FUNCTION @@ -191,12 +188,10 @@ struct rotmg_functor { }; template -void Rotmg_Invoke(execution_space const& space, DXView const& d1, - DXView const& d2, DXView const& x1, YView const& y1, +void Rotmg_Invoke(execution_space const& space, DXView const& d1, DXView const& d2, DXView const& x1, YView const& y1, PView const& param) { using Scalar = typename DXView::value_type; - static_assert(!Kokkos::ArithTraits::is_complex, - "rotmg is not defined for complex types!"); + static_assert(!Kokkos::ArithTraits::is_complex, "rotmg is not defined for complex types!"); rotmg_functor myFunc(d1, d2, x1, y1, param); Kokkos::RangePolicy rotmg_policy(space, 0, 1); diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotmg_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotmg_spec.hpp index b90a1586547b..caa44dda5dbb 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotmg_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_rotmg_spec.hpp @@ -41,19 +41,16 @@ struct rotmg_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_ROTMG_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct rotmg_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROTMG_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct rotmg_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -64,38 +61,30 @@ namespace KokkosBlas { namespace Impl { // Unification layer -template < - class execution_space, class DXView, class YView, class PView, - bool tpl_spec_avail = - rotmg_tpl_spec_avail::value, - bool eti_spec_avail = - rotmg_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = rotmg_eti_spec_avail::value> struct Rotmg { - static void rotmg(execution_space const& space, DXView& d1, DXView& d2, - DXView& x1, YView& y1, PView& param); + static void rotmg(execution_space const& space, DXView& d1, DXView& d2, DXView& x1, YView& y1, PView& param); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Rotmg. template -struct Rotmg { - static void rotmg(execution_space const& space, DXView& d1, DXView& d2, - DXView& x1, YView& y1, PView& param) { - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::rotmg[ETI]" - : "KokkosBlas::rotmg[noETI]"); +struct Rotmg { + static void rotmg(execution_space const& space, DXView& d1, DXView& d2, DXView& x1, YView& y1, PView& param) { + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::rotmg[ETI]" + : "KokkosBlas::rotmg[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::rotmg<> ETI specialization for < %s, %s, %s >\n", - typeid(DXView).name(), typeid(YView).name(), typeid(PView).name()); + printf("KokkosBlas1::rotmg<> ETI specialization for < %s, %s, %s >\n", typeid(DXView).name(), + typeid(YView).name(), typeid(PView).name()); else { - printf("KokkosBlas1::rotmg<> non-ETI specialization for < %s, %s, %s >\n", - typeid(DXView).name(), typeid(YView).name(), typeid(PView).name()); + printf("KokkosBlas1::rotmg<> non-ETI specialization for < %s, %s, %s >\n", typeid(DXView).name(), + typeid(YView).name(), typeid(PView).name()); } #endif - Rotmg_Invoke(space, d1, d2, x1, y1, - param); + Rotmg_Invoke(space, d1, d2, x1, y1, param); Kokkos::Profiling::popRegion(); } }; @@ -111,16 +100,13 @@ struct Rotmg, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSBLAS1_ROTMG_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Rotmg< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ false, true>; // @@ -128,16 +114,13 @@ struct Rotmg, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSBLAS1_ROTMG_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Rotmg< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_scal_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_scal_impl.hpp index 541d9a4934f8..510ca3808f17 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_scal_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_scal_impl.hpp @@ -51,23 +51,16 @@ struct V_Scal_Functor { XV m_x; AV m_a; - V_Scal_Functor(const RV& r, const XV& x, const AV& a, - const SizeType startingColumn) - : m_r(r), m_x(x), m_a(a) { - static_assert(Kokkos::is_view::value, - "V_Scal_Functor: RV is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "V_Scal_Functor: AV is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "V_Scal_Functor: XV is not a Kokkos::View."); + V_Scal_Functor(const RV& r, const XV& x, const AV& a, const SizeType startingColumn) : m_r(r), m_x(x), m_a(a) { + static_assert(Kokkos::is_view::value, "V_Scal_Functor: RV is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "V_Scal_Functor: AV is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "V_Scal_Functor: XV is not a Kokkos::View."); static_assert(RV::rank == 1, "V_Scal_Functor: RV is not rank 1."); static_assert(AV::rank == 1, "V_Scal_Functor: AV is not rank 1."); static_assert(XV::rank == 1, "V_Scal_Functor: XV is not rank 1."); if (startingColumn != 0) { - m_a = Kokkos::subview( - a, - std::make_pair(startingColumn, static_cast(a.extent(0)))); + m_a = Kokkos::subview(a, std::make_pair(startingColumn, static_cast(a.extent(0)))); } } @@ -98,8 +91,7 @@ struct V_Scal_Functor { // 1. Y(i) = alpha*X(i) for alpha in -1,0,1 // 2. Y(i) = a*X(i) template -struct V_Scal_Functor { +struct V_Scal_Functor { typedef SizeType size_type; typedef Kokkos::ArithTraits ATS; @@ -107,8 +99,7 @@ struct V_Scal_Functor -void V_Scal_Generic(const execution_space& space, const RV& r, const AV& av, - const XV& x, const SizeType startingColumn, int a = 2) { - static_assert(Kokkos::is_view::value, - "V_Scal_Generic: RV is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "V_Scal_Generic: XV is not a Kokkos::View."); +void V_Scal_Generic(const execution_space& space, const RV& r, const AV& av, const XV& x, const SizeType startingColumn, + int a = 2) { + static_assert(Kokkos::is_view::value, "V_Scal_Generic: RV is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "V_Scal_Generic: XV is not a Kokkos::View."); static_assert(RV::rank == 1, "V_Scal_Generic: RV is not rank 1."); static_assert(XV::rank == 1, "V_Scal_Generic: XV is not rank 1."); diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_scal_mv_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_scal_mv_impl.hpp index da4d7a514949..a729e8502561 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_scal_mv_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_scal_mv_impl.hpp @@ -42,8 +42,7 @@ namespace Impl { // coefficient of zero has BLAS semantics of ignoring the // corresponding (multi)vector entry. This does not apply to // coefficients in the a vector, if they are used. -template +template struct MV_Scal_Functor { typedef SizeType size_type; typedef Kokkos::ArithTraits ATS; @@ -53,13 +52,11 @@ struct MV_Scal_Functor { XMV X_; aVector a_; - MV_Scal_Functor(const RMV& R, const XMV& X, const aVector& a, - const SizeType startingColumn) + MV_Scal_Functor(const RMV& R, const XMV& X, const aVector& a, const SizeType startingColumn) : numCols(X.extent(1)), R_(R), X_(X), a_(a) { if (startingColumn != 0) { - auto rng = - std::make_pair(startingColumn, static_cast(a.extent(0))); - a_ = Kokkos::subview(a, rng); + auto rng = std::make_pair(startingColumn, static_cast(a.extent(0))); + a_ = Kokkos::subview(a, rng); } } @@ -124,8 +121,7 @@ struct MV_Scal_Functor { // This version works by partial specialization on aVector. // In this partial specialization, aVector is a scalar. template -struct MV_Scal_Functor { +struct MV_Scal_Functor { typedef SizeType size_type; typedef Kokkos::ArithTraits ATS; @@ -134,8 +130,7 @@ struct MV_Scal_Functor +template struct MV_Scal_Unroll_Functor { typedef SizeType size_type; typedef Kokkos::ArithTraits ATS; @@ -203,13 +197,11 @@ struct MV_Scal_Unroll_Functor { XMV m_x; aVector m_a; - MV_Scal_Unroll_Functor(const RMV& r, const XMV& x, const aVector& a, - const SizeType startingColumn) + MV_Scal_Unroll_Functor(const RMV& r, const XMV& x, const aVector& a, const SizeType startingColumn) : m_r(r), m_x(x), m_a(a) { if (startingColumn != 0) { - auto rng = - std::make_pair(startingColumn, static_cast(a.extent(0))); - m_a = Kokkos::subview(a, rng); + auto rng = std::make_pair(startingColumn, static_cast(a.extent(0))); + m_a = Kokkos::subview(a, rng); } } @@ -254,8 +246,7 @@ struct MV_Scal_Unroll_Functor { // than a vector of coefficients) a. The number of columns in X, // UNROLL, is a compile-time constant. template -struct MV_Scal_Unroll_Functor { +struct MV_Scal_Unroll_Functor { typedef SizeType size_type; typedef Kokkos::ArithTraits ATS; @@ -263,8 +254,7 @@ struct MV_Scal_Unroll_Functor -void MV_Scal_Unrolled(const execution_space& space, const RMV& r, - const aVector& av, const XMV& x, +template +void MV_Scal_Unrolled(const execution_space& space, const RMV& r, const aVector& av, const XMV& x, const SizeType startingColumn, int a = 2) { if (a == 0) { - MV_Scal_Unroll_Functor op( - r, x, av, startingColumn); + MV_Scal_Unroll_Functor op(r, x, av, startingColumn); const SizeType numRows = x.extent(0); Kokkos::RangePolicy policy(space, 0, numRows); Kokkos::parallel_for("KokkosBlas::Scal::MV::S0", policy, op); return; } if (a == -1) { - MV_Scal_Unroll_Functor op( - r, x, av, startingColumn); + MV_Scal_Unroll_Functor op(r, x, av, startingColumn); const SizeType numRows = x.extent(0); Kokkos::RangePolicy policy(space, 0, numRows); Kokkos::parallel_for("KokkosBlas::Scal::MV::S1", policy, op); return; } if (a == 1) { - MV_Scal_Unroll_Functor op( - r, x, av, startingColumn); + MV_Scal_Unroll_Functor op(r, x, av, startingColumn); const SizeType numRows = x.extent(0); Kokkos::RangePolicy policy(space, 0, numRows); Kokkos::parallel_for("KokkosBlas::Scal::MV::S2", policy, op); @@ -350,8 +335,7 @@ void MV_Scal_Unrolled(const execution_space& space, const RMV& r, } // a arbitrary (not -1, 0, or 1) - MV_Scal_Unroll_Functor op( - r, x, av, startingColumn); + MV_Scal_Unroll_Functor op(r, x, av, startingColumn); const SizeType numRows = x.extent(0); Kokkos::RangePolicy policy(space, 0, numRows); Kokkos::parallel_for("KokkosBlas::Scal::MV::S3", policy, op); @@ -371,36 +355,30 @@ void MV_Scal_Unrolled(const execution_space& space, const RMV& r, // Any literal coefficient of zero has BLAS semantics of ignoring the // corresponding (multi)vector entry. This does NOT apply to // coefficient(s) in av, if used. -template -void MV_Scal_Generic(const execution_space& space, const RVector& r, - const aVector& av, const XVector& x, +template +void MV_Scal_Generic(const execution_space& space, const RVector& r, const aVector& av, const XVector& x, const SizeType startingColumn, int a = 2) { const SizeType numRows = x.extent(0); Kokkos::RangePolicy policy(space, 0, numRows); if (a == 0) { - MV_Scal_Functor op(r, x, av, - startingColumn); + MV_Scal_Functor op(r, x, av, startingColumn); Kokkos::parallel_for("KokkosBlas::Scal::MV::S4", policy, op); return; } if (a == -1) { - MV_Scal_Functor op(r, x, av, - startingColumn); + MV_Scal_Functor op(r, x, av, startingColumn); Kokkos::parallel_for("KokkosBlas::Scal::MV::S5", policy, op); return; } if (a == 1) { - MV_Scal_Functor op(r, x, av, - startingColumn); + MV_Scal_Functor op(r, x, av, startingColumn); Kokkos::parallel_for("KokkosBlas::Scal::MV::S6", policy, op); return; } // a arbitrary (not -1, 0, or 1) - MV_Scal_Functor op(r, x, av, - startingColumn); + MV_Scal_Functor op(r, x, av, startingColumn); Kokkos::parallel_for("KokkosBlas::Scal::MV::S7", policy, op); } @@ -419,8 +397,7 @@ void MV_Scal_Generic(const execution_space& space, const RVector& r, // corresponding (multi)vector entry. This does NOT apply to // coefficient(s) in av, if used. template -void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r, - const AV& av, const XMV& x, int a = 2) { +void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r, const AV& av, const XMV& x, int a = 2) { const SizeType numCols = x.extent(1); #if KOKKOSBLAS_OPTIMIZATION_LEVEL_SCAL <= 2 @@ -437,8 +414,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r, typedef decltype(X_cur) XMV2D; typedef decltype(R_cur) RMV2D; - MV_Scal_Unrolled( - space, R_cur, av, X_cur, j, a); + MV_Scal_Unrolled(space, R_cur, av, X_cur, j, a); } for (; j + 4 <= numCols; j += 4) { const std::pair rng(j, j + 4); @@ -447,8 +423,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r, typedef decltype(X_cur) XMV2D; typedef decltype(R_cur) RMV2D; - MV_Scal_Unrolled( - space, R_cur, av, X_cur, j, a); + MV_Scal_Unrolled(space, R_cur, av, X_cur, j, a); } for (; j < numCols; ++j) { // RMV and XMV need to turn 1-D. @@ -457,8 +432,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r, typedef decltype(r_cur) RV; typedef decltype(x_cur) XV; - V_Scal_Generic(space, r_cur, av, - x_cur, j, a); + V_Scal_Generic(space, r_cur, av, x_cur, j, a); } #else // KOKKOSBLAS_OPTIMIZATION_LEVEL_SCAL > 2 @@ -470,73 +444,25 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r, typedef decltype(r_0) RV; typedef decltype(x_0) XV; - V_Scal_Generic(space, r_0, av, x_0, - 0, a); + V_Scal_Generic(space, r_0, av, x_0, 0, a); break; } - case 2: - MV_Scal_Unrolled(space, r, av, - x, 0, a); - break; - case 3: - MV_Scal_Unrolled(space, r, av, - x, 0, a); - break; - case 4: - MV_Scal_Unrolled(space, r, av, - x, 0, a); - break; - case 5: - MV_Scal_Unrolled(space, r, av, - x, 0, a); - break; - case 6: - MV_Scal_Unrolled(space, r, av, - x, 0, a); - break; - case 7: - MV_Scal_Unrolled(space, r, av, - x, 0, a); - break; - case 8: - MV_Scal_Unrolled(space, r, av, - x, 0, a); - break; - case 9: - MV_Scal_Unrolled(space, r, av, - x, 0, a); - break; - case 10: - MV_Scal_Unrolled( - space, r, av, x, 0, a); - break; - case 11: - MV_Scal_Unrolled( - space, r, av, x, 0, a); - break; - case 12: - MV_Scal_Unrolled( - space, r, av, x, 0, a); - break; - case 13: - MV_Scal_Unrolled( - space, r, av, x, 0, a); - break; - case 14: - MV_Scal_Unrolled( - space, r, av, x, 0, a); - break; - case 15: - MV_Scal_Unrolled( - space, r, av, x, 0, a); - break; - case 16: - MV_Scal_Unrolled( - space, r, av, x, 0, a); - break; - default: - MV_Scal_Generic(space, r, av, x, - 0, a); + case 2: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + case 3: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + case 4: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + case 5: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + case 6: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + case 7: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + case 8: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + case 9: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + case 10: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + case 11: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + case 12: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + case 13: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + case 14: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + case 15: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + case 16: MV_Scal_Unrolled(space, r, av, x, 0, a); break; + default: MV_Scal_Generic(space, r, av, x, 0, a); } #endif // KOKKOSBLAS_OPTIMIZATION_LEVEL_SCAL @@ -556,27 +482,23 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r, // Any literal coefficient of zero has BLAS semantics of ignoring the // corresponding (multi)vector entry. This does NOT apply to // coefficient(s) in av, if used. -template -void MV_Scal_Invoke_Right(const execution_space& space, const RMV& r, - const aVector& av, const XMV& x, int a = 2) { +template +void MV_Scal_Invoke_Right(const execution_space& space, const RMV& r, const aVector& av, const XMV& x, int a = 2) { const SizeType numCols = x.extent(1); if (numCols == 1) { - typedef Kokkos::View + typedef Kokkos::View RV; - typedef Kokkos::View + typedef Kokkos::View XV; RV r_0 = Kokkos::subview(r, Kokkos::ALL(), 0); XV x_0 = Kokkos::subview(x, Kokkos::ALL(), 0); - V_Scal_Generic(space, r_0, - av, x_0, a); + V_Scal_Generic(space, r_0, av, x_0, a); } else { - MV_Scal_Generic(space, r, av, - x, a); + MV_Scal_Generic(space, r, av, x, a); } } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_scal_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_scal_spec.hpp index 38972b222328..70a95d33e2b3 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_scal_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_scal_spec.hpp @@ -29,8 +29,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct scal_eti_spec_avail { enum : bool { value = false }; }; @@ -44,18 +43,16 @@ struct scal_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_SCAL_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct scal_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_SCAL_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct scal_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; // @@ -65,33 +62,27 @@ struct scal_eti_spec_avail { // We may spread out definitions (see _DEF macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_SCAL_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct scal_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ - }; \ - template <> \ - struct scal_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_SCAL_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct scal_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ + }; \ + template <> \ + struct scal_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -103,28 +94,22 @@ namespace KokkosBlas { namespace Impl { // Unification layer -template ::value, - bool eti_spec_avail = - scal_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = scal_eti_spec_avail::value> struct Scal { - static void scal(const execution_space& space, const RV& R, const AV& A, - const XV& X); + static void scal(const execution_space& space, const RV& R, const AV& A, const XV& X); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Scal for single vectors (1-D Views). template -struct Scal { +struct Scal { typedef typename XV::non_const_value_type AV; typedef typename XV::size_type size_type; typedef Kokkos::ArithTraits ATA; - static void scal(const execution_space& space, const RV& R, const AV& alpha, - const XV& X) { + static void scal(const execution_space& space, const RV& R, const AV& alpha, const XV& X) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Scal<1-D>: RV is not a Kokkos::View."); @@ -137,18 +122,16 @@ struct Scal: " "XV is not rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::scal[ETI]" - : "KokkosBlas::scal[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::scal[ETI]" + : "KokkosBlas::scal[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::scal<1D> ETI specialization for < %s , %s , %s >\n", - typeid(RV).name(), typeid(AV).name(), typeid(XV).name()); + printf("KokkosBlas1::scal<1D> ETI specialization for < %s , %s , %s >\n", typeid(RV).name(), typeid(AV).name(), + typeid(XV).name()); else - printf( - "KokkosBlas1::scal<1D> non-ETI specialization for < %s , %s , %s >\n", - typeid(RV).name(), typeid(AV).name(), typeid(XV).name()); + printf("KokkosBlas1::scal<1D> non-ETI specialization for < %s , %s , %s >\n", typeid(RV).name(), + typeid(AV).name(), typeid(XV).name()); #endif const size_type numRows = X.extent(0); @@ -163,12 +146,10 @@ struct Scal(INT_MAX)) { typedef int index_type; - V_Scal_Generic(space, R, alpha, - X, a); + V_Scal_Generic(space, R, alpha, X, a); } else { typedef typename XV::size_type index_type; - V_Scal_Generic(space, R, alpha, - X, a); + V_Scal_Generic(space, R, alpha, X, a); } Kokkos::Profiling::popRegion(); } @@ -181,13 +162,11 @@ struct Scal -struct Scal { +struct Scal { typedef typename XMV::size_type size_type; typedef Kokkos::ArithTraits ATA; - static void scal(const execution_space& space, const RMV& R, const AV& av, - const XMV& X) { + static void scal(const execution_space& space, const RMV& R, const AV& av, const XMV& X) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Scal<2-D>: RMV is not a Kokkos::View."); @@ -206,31 +185,26 @@ struct Scal: " "XMV is not rank 2."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::scal[ETI]" - : "KokkosBlas::scal[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::scal[ETI]" + : "KokkosBlas::scal[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::scal<2D> ETI specialization for < %s , %s , %s >\n", - typeid(RMV).name(), typeid(AV).name(), typeid(XMV).name()); + printf("KokkosBlas1::scal<2D> ETI specialization for < %s , %s , %s >\n", typeid(RMV).name(), typeid(AV).name(), + typeid(XMV).name()); else - printf( - "KokkosBlas1::scal<2D> non-ETI specialization for < %s , %s , %s >\n", - typeid(RMV).name(), typeid(AV).name(), typeid(XMV).name()); + printf("KokkosBlas1::scal<2D> non-ETI specialization for < %s , %s , %s >\n", typeid(RMV).name(), + typeid(AV).name(), typeid(XMV).name()); #endif const size_type numRows = X.extent(0); const size_type numCols = X.extent(1); const int a = (av.extent(0) == 0) ? 0 : 2; - if (numRows < static_cast(INT_MAX) && - numRows * numCols < static_cast(INT_MAX)) { + if (numRows < static_cast(INT_MAX) && numRows * numCols < static_cast(INT_MAX)) { typedef int index_type; - MV_Scal_Invoke_Left(space, R, - av, X, a); + MV_Scal_Invoke_Left(space, R, av, X, a); } else { typedef typename XMV::size_type index_type; - MV_Scal_Invoke_Left(space, R, - av, X, a); + MV_Scal_Invoke_Left(space, R, av, X, a); } Kokkos::Profiling::popRegion(); } @@ -243,14 +217,13 @@ struct Scal -struct Scal { +struct Scal { typedef typename XMV::non_const_value_type AV; typedef typename XMV::size_type size_type; typedef Kokkos::ArithTraits ATA; - static void scal(const execution_space& space, const RMV& R, const AV& alpha, - const XMV& X) { + static void scal(const execution_space& space, const RMV& R, const AV& alpha, const XMV& X) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Scal<2-D, AV=scalar>: RMV is not a Kokkos::View."); @@ -263,18 +236,16 @@ struct Scal: " "XMV is not rank 2."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::scal[ETI]" - : "KokkosBlas::scal[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::scal[ETI]" + : "KokkosBlas::scal[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::scal<2D> ETI specialization for < %s , %s , %s >\n", - typeid(RMV).name(), typeid(AV).name(), typeid(XMV).name()); + printf("KokkosBlas1::scal<2D> ETI specialization for < %s , %s , %s >\n", typeid(RMV).name(), typeid(AV).name(), + typeid(XMV).name()); else - printf( - "KokkosBlas1::scal<2D> non-ETI specialization for < %s , %s , %s >\n", - typeid(RMV).name(), typeid(AV).name(), typeid(XMV).name()); + printf("KokkosBlas1::scal<2D> non-ETI specialization for < %s , %s , %s >\n", typeid(RMV).name(), + typeid(AV).name(), typeid(XMV).name()); #endif const size_type numRows = X.extent(0); @@ -288,17 +259,14 @@ struct Scal(INT_MAX) && - numRows * numCols < static_cast(INT_MAX)) { + if (numRows < static_cast(INT_MAX) && numRows * numCols < static_cast(INT_MAX)) { typedef int index_type; - MV_Scal_Invoke_Left( - space, R, alpha, X, a); + MV_Scal_Invoke_Left(space, R, alpha, X, + a); } else { typedef typename XMV::size_type index_type; - MV_Scal_Invoke_Left( - space, R, alpha, X, a); + MV_Scal_Invoke_Left(space, R, alpha, X, + a); } Kokkos::Profiling::popRegion(); } @@ -315,26 +283,22 @@ struct Scal, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_SCAL_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Scal< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 1, false, true>; -#define KOKKOSBLAS1_SCAL_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template struct Scal< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_SCAL_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Scal< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 1, false, true>; // @@ -343,50 +307,38 @@ struct Scal, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2, false, true>; \ - extern template struct Scal< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_SCAL_MV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Scal< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2, false, true>; \ + extern template struct Scal< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 2, false, true>; -#define KOKKOSBLAS1_SCAL_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template struct Scal< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2, false, true>; \ - template struct Scal< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_SCAL_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Scal< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2, false, true>; \ + template struct Scal< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 2, false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_serial_scal_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_serial_scal_impl.hpp index 4de4f18cc28d..d78384192939 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_serial_scal_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_serial_scal_impl.hpp @@ -28,8 +28,7 @@ namespace Impl { struct SerialScaleInternal { template KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ScalarType alpha, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0) { + /* */ ValueType *KOKKOS_RESTRICT A, const int as0) { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif @@ -39,10 +38,8 @@ struct SerialScaleInternal { } template - KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, - const ScalarType alpha, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const ScalarType alpha, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { if (as0 > as1) for (int i = 0; i < m; ++i) invoke(n, alpha, A + i * as0, as1); else diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_set_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_set_impl.hpp index 38604dc4b251..037720253bca 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_set_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_set_impl.hpp @@ -30,8 +30,7 @@ namespace Impl { struct SerialSetInternal { template KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ScalarType alpha, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0) { + /* */ ValueType *KOKKOS_RESTRICT A, const int as0) { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif @@ -41,10 +40,8 @@ struct SerialSetInternal { } template - KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, - const ScalarType alpha, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const ScalarType alpha, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { if (as0 > as1) for (int i = 0; i < m; ++i) invoke(n, alpha, A + i * as0, as1); else @@ -59,32 +56,22 @@ struct SerialSetInternal { /// ================== struct TeamSetInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, const ScalarType alpha, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), - [&](const int &i) { A[i * as0] = alpha; }); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const ScalarType alpha, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int &i) { A[i * as0] = alpha; }); // member.team_barrier(); return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, const int n, - const ScalarType alpha, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const ScalarType alpha, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { if (m > n) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, m), [&](const int &i) { - SerialSetInternal::invoke(n, alpha, A + i * as0, as1); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), + [&](const int &i) { SerialSetInternal::invoke(n, alpha, A + i * as0, as1); }); } else { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, n), [&](const int &j) { - SerialSetInternal::invoke(m, alpha, A + j * as1, as0); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), + [&](const int &j) { SerialSetInternal::invoke(m, alpha, A + j * as1, as0); }); } // member.team_barrier(); return 0; @@ -96,36 +83,24 @@ struct TeamSetInternal { /// ======================== struct TeamVectorSetInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, const ScalarType alpha, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0) { - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), - [&](const int &i) { A[i * as0] = alpha; }); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const ScalarType alpha, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int &i) { A[i * as0] = alpha; }); // member.team_barrier(); return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, const int n, - const ScalarType alpha, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const ScalarType alpha, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { if (m > n) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, m), [&](const int &i) { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, n), - [&](const int &j) { A[i * as0 + j * as1] = alpha; }); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int &i) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n), [&](const int &j) { A[i * as0 + j * as1] = alpha; }); + }); } else { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, m), [&](const int &i) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, n), - [&](const int &j) { A[i * as0 + j * as1] = alpha; }); - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, m), [&](const int &i) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { A[i * as0 + j * as1] = alpha; }); + }); } // member.team_barrier(); return 0; diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_sum_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_sum_impl.hpp index 864c9835414d..222982dc24c8 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_sum_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_sum_impl.hpp @@ -51,8 +51,7 @@ struct V_Sum_Functor { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::V_Sum_Functor: " "X is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::V_Sum_Functor: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -75,11 +74,9 @@ struct Sum_MV_Functor { RV r; XV x; - size_type - teamsPerVec; // number of teams collectively performing a dot product + size_type teamsPerVec; // number of teams collectively performing a dot product - Sum_MV_Functor(const RV& r_, const XV& x_, int teamsPerVec_) - : r(r_), x(x_), teamsPerVec(teamsPerVec_) {} + Sum_MV_Functor(const RV& r_, const XV& x_, int teamsPerVec_) : r(r_), x(x_), teamsPerVec(teamsPerVec_) {} KOKKOS_INLINE_FUNCTION void operator()(const TeamMem& t) const { @@ -92,12 +89,10 @@ struct Sum_MV_Functor { value_type localResult = AT::zero(); Kokkos::parallel_reduce( - Kokkos::TeamThreadRange(t, begin, end), - [&](size_type k, value_type& update) { update += x(k, i); }, + Kokkos::TeamThreadRange(t, begin, end), [&](size_type k, value_type& update) { update += x(k, i); }, localResult); - Kokkos::single(Kokkos::PerTeam(t), - [&]() { Kokkos::atomic_add(&r(i), localResult); }); + Kokkos::single(Kokkos::PerTeam(t), [&]() { Kokkos::atomic_add(&r(i), localResult); }); } }; @@ -120,27 +115,23 @@ void V_Sum_Invoke(const execution_space& space, const RV& r, const XV& X) { template void MV_Sum_Invoke( const execution_space& space, const RV& r, const XV& x, - typename std::enable_if::accessible>::type* = + typename std::enable_if::accessible>::type* = nullptr) { if (r.extent(0) != x.extent(1)) { std::ostringstream oss; - oss << "KokkosBlas::Sum (rank-2): result vector has wrong length (" - << r.extent(0) << ", but x has " << x.extent(1) << " columns)"; + oss << "KokkosBlas::Sum (rank-2): result vector has wrong length (" << r.extent(0) << ", but x has " << x.extent(1) + << " columns)"; throw std::runtime_error(oss.str()); } // Zero out the result vector - Kokkos::deep_copy( - space, r, Kokkos::ArithTraits::zero()); + Kokkos::deep_copy(space, r, Kokkos::ArithTraits::zero()); size_type teamsPerVec; - KokkosBlas::Impl::multipleReductionWorkDistribution( - x.extent(0), x.extent(1), teamsPerVec); + KokkosBlas::Impl::multipleReductionWorkDistribution(x.extent(0), x.extent(1), + teamsPerVec); size_type numTeams = x.extent(1) * teamsPerVec; Kokkos::TeamPolicy pol(space, numTeams, Kokkos::AUTO); - Kokkos::parallel_for( - "KokkosBlas1::Sum::S1", pol, - Sum_MV_Functor(r, x, teamsPerVec)); + Kokkos::parallel_for("KokkosBlas1::Sum::S1", pol, + Sum_MV_Functor(r, x, teamsPerVec)); } // Version for when a temporary result view is needed (implemented in terms of @@ -148,15 +139,11 @@ void MV_Sum_Invoke( template void MV_Sum_Invoke( const execution_space& space, const RV& r, const XV& x, - typename std::enable_if::accessible>::type* = - nullptr) { - Kokkos::View - tempResult( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Sum temp result"), - r.extent(0)); - MV_Sum_Invoke( - space, tempResult, x); + typename std::enable_if< + !Kokkos::SpaceAccessibility::accessible>::type* = nullptr) { + Kokkos::View tempResult( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Sum temp result"), r.extent(0)); + MV_Sum_Invoke(space, tempResult, x); Kokkos::deep_copy(space, r, tempResult); space.fence(); } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_sum_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_sum_spec.hpp index 458e7ffdb792..6df41e030902 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_sum_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_sum_spec.hpp @@ -43,17 +43,14 @@ struct sum_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_SUM_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct sum_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_SUM_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct sum_eti_spec_avail >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; // @@ -63,20 +60,16 @@ struct sum_eti_spec_avail { // We may spread out definitions (see _DEF macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_SUM_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct sum_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_SUM_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct sum_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -88,10 +81,9 @@ namespace KokkosBlas { namespace Impl { // Unification layer -template < - class execution_space, class RMV, class XMV, int rank = XMV::rank, - bool tpl_spec_avail = sum_tpl_spec_avail::value, - bool eti_spec_avail = sum_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = sum_eti_spec_avail::value> struct Sum { static void sum(const execution_space& space, const RMV& R, const XMV& X); }; @@ -99,8 +91,7 @@ struct Sum { #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Sum for single vectors (1-D Views). template -struct Sum { +struct Sum { typedef typename XMV::size_type size_type; static void sum(const execution_space& space, const RMV& R, const XMV& X) { @@ -116,17 +107,14 @@ struct Sum: " "XMV is not rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::sum[ETI]" - : "KokkosBlas::sum[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::sum[ETI]" + : "KokkosBlas::sum[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::sum<> ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::sum<> ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::sum<> non-ETI specialization for < %s , %s >\n", - typeid(RMV).name(), typeid(XMV).name()); + printf("KokkosBlas1::sum<> non-ETI specialization for < %s , %s >\n", typeid(RMV).name(), typeid(XMV).name()); } #endif const size_type numRows = X.extent(0); @@ -142,8 +130,7 @@ struct Sum -struct Sum { +struct Sum { typedef typename XMV::size_type size_type; static void sum(const execution_space& space, const RV& R, const XMV& X) { @@ -159,16 +146,13 @@ struct Sum: " "XMV is not rank 2."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::sum[ETI]" - : "KokkosBlas::sum[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::sum[ETI]" + : "KokkosBlas::sum[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::sum<> ETI specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XMV).name()); + printf("KokkosBlas1::sum<> ETI specialization for < %s , %s >\n", typeid(RV).name(), typeid(XMV).name()); else { - printf("KokkosBlas1::sum<> non-ETI specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XMV).name()); + printf("KokkosBlas1::sum<> non-ETI specialization for < %s , %s >\n", typeid(RV).name(), typeid(XMV).name()); } #endif @@ -178,16 +162,13 @@ struct Sum(INT_MAX)) { - V_Sum_Invoke(space, - R0, X0); + V_Sum_Invoke(space, R0, X0); } else { typedef std::int64_t index_type; - V_Sum_Invoke( - space, R0, X0); + V_Sum_Invoke(space, R0, X0); } } else { - if (numRows < static_cast(INT_MAX) && - numRows * numCols < static_cast(INT_MAX)) { + if (numRows < static_cast(INT_MAX) && numRows * numCols < static_cast(INT_MAX)) { MV_Sum_Invoke(space, R, X); } else { typedef std::int64_t index_type; @@ -209,14 +190,11 @@ struct Sum >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_SUM_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Sum< \ + EXEC_SPACE, Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 1, false, true>; // @@ -224,13 +202,11 @@ struct Sum >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_SUM_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Sum >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 1, false, true>; // @@ -240,17 +216,13 @@ struct Sum, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_SUM_MV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Sum< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 2, false, true>; // @@ -258,17 +230,13 @@ struct Sum, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_SUM_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Sum< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ 2, false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_swap_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_swap_impl.hpp index 32a13d646988..7d4d22b514ca 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_swap_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_swap_impl.hpp @@ -42,8 +42,7 @@ struct swap_functor { }; template -void Swap_Invoke(ExecutionSpace const& space, XVector const& X, - YVector const& Y) { +void Swap_Invoke(ExecutionSpace const& space, XVector const& X, YVector const& Y) { Kokkos::RangePolicy swap_policy(space, 0, X.extent(0)); swap_functor swap_func(X, Y); Kokkos::parallel_for("KokkosBlas::swap", swap_policy, swap_func); diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_swap_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_swap_spec.hpp index db09a62f8f0a..749552a81c30 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_swap_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_swap_spec.hpp @@ -44,15 +44,13 @@ struct swap_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_SWAP_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ - template <> \ - struct swap_eti_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_SWAP_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct swap_eti_spec_avail< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -64,34 +62,26 @@ namespace Impl { // Unification layer template ::value, - bool eti_spec_avail = - swap_eti_spec_avail::value> + bool tpl_spec_avail = swap_tpl_spec_avail::value, + bool eti_spec_avail = swap_eti_spec_avail::value> struct Swap { - static void swap(ExecutionSpace const& space, XVector const& X, - YVector const& Y); + static void swap(ExecutionSpace const& space, XVector const& X, YVector const& Y); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Swap. template -struct Swap { - static void swap(ExecutionSpace const& space, XVector const& X, - YVector const& Y) { - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::swap[ETI]" - : "KokkosBlas::swap[noETI]"); +struct Swap { + static void swap(ExecutionSpace const& space, XVector const& X, YVector const& Y) { + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::swap[ETI]" + : "KokkosBlas::swap[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::swap<> ETI specialization for < %s, %s, %s >\n", - typeid(ExecutionSpace).name(), typeid(XVector).name(), - typeid(YVector).name()); + printf("KokkosBlas1::swap<> ETI specialization for < %s, %s, %s >\n", typeid(ExecutionSpace).name(), + typeid(XVector).name(), typeid(YVector).name()); else { - printf("KokkosBlas1::swap<> non-ETI specialization for < %s, %s, %s >\n", - typeid(ExecutionSpace).name(), typeid(XVector).name(), - typeid(YVector).name()); + printf("KokkosBlas1::swap<> non-ETI specialization for < %s, %s, %s >\n", typeid(ExecutionSpace).name(), + typeid(XVector).name(), typeid(YVector).name()); } #endif Swap_Invoke(space, X, Y); @@ -110,13 +100,11 @@ struct Swap, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSBLAS1_SWAP_ETI_SPEC_DECL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + extern template struct Swap< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ false, true>; // @@ -124,13 +112,11 @@ struct Swap, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSBLAS1_SWAP_ETI_SPEC_INST(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template struct Swap< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_abs_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_abs_spec.hpp index bcd95457384b..a5140a9b34a5 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_abs_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_abs_spec.hpp @@ -32,24 +32,20 @@ struct team_abs_tpl_spec_avail { }; // Unification and Specialization layer -template ::value> +template ::value> struct TeamAbs { typedef Kokkos::ArithTraits ATS; - static KOKKOS_INLINE_FUNCTION void team_abs(const TeamType& team, const RV& R, - const XV& X); + static KOKKOS_INLINE_FUNCTION void team_abs(const TeamType& team, const RV& R, const XV& X); }; template struct TeamAbs { typedef Kokkos::ArithTraits ATS; - static KOKKOS_INLINE_FUNCTION void team_abs(const TeamType& team, const RV& R, - const XV& X) { + static KOKKOS_INLINE_FUNCTION void team_abs(const TeamType& team, const RV& R, const XV& X) { int N = X.extent(0); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), - [&](const int& i) { R(i) = ATS::abs(X(i)); }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) { R(i) = ATS::abs(X(i)); }); } }; diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_axpby_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_axpby_spec.hpp index 356be339c303..4cd42ae37d41 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_axpby_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_axpby_spec.hpp @@ -33,24 +33,20 @@ struct team_axpby_tpl_spec_avail { // Unification and Specialization layer template ::value> + bool tpl_spec_avail = team_axpby_tpl_spec_avail::value> struct TeamAXPBY { - static KOKKOS_INLINE_FUNCTION void team_axpby( - const TeamType& team, const typename XVector::non_const_value_type& a, - const XVector& x, const typename YVector::non_const_value_type& b, - const YVector& y); + static KOKKOS_INLINE_FUNCTION void team_axpby(const TeamType& team, const typename XVector::non_const_value_type& a, + const XVector& x, const typename YVector::non_const_value_type& b, + const YVector& y); }; template struct TeamAXPBY { - static KOKKOS_INLINE_FUNCTION void team_axpby( - const TeamType& team, const typename XVector::non_const_value_type& a, - const XVector& x, const typename YVector::non_const_value_type& b, - const YVector& y) { + static KOKKOS_INLINE_FUNCTION void team_axpby(const TeamType& team, const typename XVector::non_const_value_type& a, + const XVector& x, const typename YVector::non_const_value_type& b, + const YVector& y) { const int N = x.extent(0); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), - [&](const int& i) { y(i) = b * y(i) + a * x(i); }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) { y(i) = b * y(i) + a * x(i); }); } }; diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_dot_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_dot_spec.hpp index 041920d109c0..5c5e4ea85d75 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_dot_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_dot_spec.hpp @@ -32,27 +32,20 @@ struct team_dot_tpl_spec_avail { }; // Unification and Specialization layer -template ::value> +template ::value> struct TeamDot { - typedef Kokkos::Details::InnerProductSpaceTraits< - typename XV::non_const_value_type> - IPT; + typedef Kokkos::Details::InnerProductSpaceTraits IPT; typedef typename IPT::dot_type dot_type; - static KOKKOS_INLINE_FUNCTION dot_type team_dot(const TeamType& team, - const XV& X, const YV& Y); + static KOKKOS_INLINE_FUNCTION dot_type team_dot(const TeamType& team, const XV& X, const YV& Y); }; template struct TeamDot { - typedef Kokkos::Details::InnerProductSpaceTraits< - typename XV::non_const_value_type> - IPT; + typedef Kokkos::Details::InnerProductSpaceTraits IPT; typedef typename IPT::dot_type dot_type; - static KOKKOS_INLINE_FUNCTION dot_type team_dot(const TeamType& team, - const XV& X, const YV& Y) { + static KOKKOS_INLINE_FUNCTION dot_type team_dot(const TeamType& team, const XV& X, const YV& Y) { dot_type result = 0.0; // Kokkos::ArithTraitszero(); int N = X.extent(0); Kokkos::parallel_reduce( diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_mult_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_mult_spec.hpp index 381802eeb06b..6138257582f4 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_mult_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_mult_spec.hpp @@ -33,25 +33,23 @@ struct team_mult_tpl_spec_avail { // Unification and Specialization layer template ::value> + bool tpl_spec_avail = team_mult_tpl_spec_avail::value> struct TeamMult { - static KOKKOS_INLINE_FUNCTION void team_mult( - const TeamType& team, const typename YVector::non_const_value_type& gamma, - const YVector& y, const typename AVector::non_const_value_type& alpha, - const AVector& a, const XVector& x); + static KOKKOS_INLINE_FUNCTION void team_mult(const TeamType& team, + const typename YVector::non_const_value_type& gamma, const YVector& y, + const typename AVector::non_const_value_type& alpha, const AVector& a, + const XVector& x); }; template struct TeamMult { - static KOKKOS_INLINE_FUNCTION void team_mult( - const TeamType& team, const typename YVector::non_const_value_type& gamma, - const YVector& y, const typename AVector::non_const_value_type& alpha, - const AVector& a, const XVector& x) { + static KOKKOS_INLINE_FUNCTION void team_mult(const TeamType& team, + const typename YVector::non_const_value_type& gamma, const YVector& y, + const typename AVector::non_const_value_type& alpha, const AVector& a, + const XVector& x) { const int N = x.extent(0); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) { - y(i) = gamma * y(i) + alpha * a(i) * x(i); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), + [&](const int& i) { y(i) = gamma * y(i) + alpha * a(i) * x(i); }); } }; diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_nrm2_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_nrm2_spec.hpp index ef050cb73bf1..bf486d88e804 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_nrm2_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_nrm2_spec.hpp @@ -32,31 +32,22 @@ struct team_nrm2_tpl_spec_avail { }; // Unification and Specialization layer -template ::value> +template ::value> struct TeamNrm2 { - typedef typename Kokkos::Details::InnerProductSpaceTraits< - typename XV::non_const_value_type>::mag_type mag_type; - typedef Kokkos::Details::InnerProductSpaceTraits< - typename XV::non_const_value_type> - IPT; + typedef typename Kokkos::Details::InnerProductSpaceTraits::mag_type mag_type; + typedef Kokkos::Details::InnerProductSpaceTraits IPT; typedef Kokkos::ArithTraits AT; - static KOKKOS_INLINE_FUNCTION mag_type team_nrm2(const TeamType& team, - const XV& X); + static KOKKOS_INLINE_FUNCTION mag_type team_nrm2(const TeamType& team, const XV& X); }; template struct TeamNrm2 { - typedef typename Kokkos::Details::InnerProductSpaceTraits< - typename XV::non_const_value_type>::mag_type mag_type; - typedef Kokkos::Details::InnerProductSpaceTraits< - typename XV::non_const_value_type> - IPT; + typedef typename Kokkos::Details::InnerProductSpaceTraits::mag_type mag_type; + typedef Kokkos::Details::InnerProductSpaceTraits IPT; typedef Kokkos::ArithTraits AT; - static KOKKOS_INLINE_FUNCTION mag_type team_nrm2(const TeamType& team, - const XV& X) { + static KOKKOS_INLINE_FUNCTION mag_type team_nrm2(const TeamType& team, const XV& X) { mag_type result = 0.0; // Kokkos::ArithTraitszero(); int N = X.extent(0); Kokkos::parallel_reduce( diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_scal_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_scal_impl.hpp index dc3aa4d42e2e..2ce2eece5edd 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_scal_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_scal_impl.hpp @@ -28,32 +28,22 @@ namespace Impl { /// ==================== struct TeamScaleInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, const ScalarType alpha, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), - [&](const int &i) { A[i * as0] *= alpha; }); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const ScalarType alpha, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int &i) { A[i * as0] *= alpha; }); // member.team_barrier(); return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, const int n, - const ScalarType alpha, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const ScalarType alpha, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { if (m > n) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, m), [&](const int &i) { - SerialScaleInternal::invoke(n, alpha, A + i * as0, as1); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), + [&](const int &i) { SerialScaleInternal::invoke(n, alpha, A + i * as0, as1); }); } else { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, n), [&](const int &j) { - SerialScaleInternal::invoke(m, alpha, A + j * as1, as0); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), + [&](const int &j) { SerialScaleInternal::invoke(m, alpha, A + j * as1, as0); }); } // member.team_barrier(); return 0; @@ -65,36 +55,25 @@ struct TeamScaleInternal { /// ======================== struct TeamVectorScaleInternal { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, const ScalarType alpha, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0) { - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), - [&](const int &i) { A[i * as0] *= alpha; }); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const ScalarType alpha, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int &i) { A[i * as0] *= alpha; }); // member.team_barrier(); return 0; } template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const int m, const int n, - const ScalarType alpha, - /* */ ValueType *KOKKOS_RESTRICT A, - const int as0, const int as1) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const ScalarType alpha, + /* */ ValueType *KOKKOS_RESTRICT A, const int as0, const int as1) { if (as0 > as1) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, m), [&](const int &i) { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, n), - [&](const int &j) { A[i * as0 + j * as1] *= alpha; }); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int &i) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n), + [&](const int &j) { A[i * as0 + j * as1] *= alpha; }); + }); } else { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, m), [&](const int &i) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, n), - [&](const int &j) { A[i * as0 + j * as1] *= alpha; }); - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, m), [&](const int &i) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { A[i * as0 + j * as1] *= alpha; }); + }); } // member.team_barrier(); return 0; diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_scal_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_scal_spec.hpp index ac6d36306aef..3782fb4081cb 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_scal_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_scal_spec.hpp @@ -32,22 +32,18 @@ struct team_scal_tpl_spec_avail { }; // Unification and Specialization layer -template ::value> +template ::value> struct TeamScal { - static KOKKOS_INLINE_FUNCTION void team_scal( - const TeamType& team, const RV& R, - const typename XV::non_const_value_type& a, const XV& X); + static KOKKOS_INLINE_FUNCTION void team_scal(const TeamType& team, const RV& R, + const typename XV::non_const_value_type& a, const XV& X); }; template struct TeamScal { - static KOKKOS_INLINE_FUNCTION void team_scal( - const TeamType& team, const RV& R, - const typename XV::non_const_value_type& a, const XV& X) { + static KOKKOS_INLINE_FUNCTION void team_scal(const TeamType& team, const RV& R, + const typename XV::non_const_value_type& a, const XV& X) { const int N = X.extent(0); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), - [&](const int& i) { R(i) = a * X(i); }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) { R(i) = a * X(i); }); } }; diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_update_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_update_spec.hpp index 94a9221f4eaf..2fbf071d984f 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_update_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_team_update_spec.hpp @@ -33,27 +33,24 @@ struct team_update_tpl_spec_avail { // Unification and Specialization layer template ::value> + bool tpl_spec_avail = team_update_tpl_spec_avail::value> struct TeamUpdate { - static KOKKOS_INLINE_FUNCTION void team_update( - const TeamType& team, const typename XVector::non_const_value_type& alpha, - const XVector& x, const typename YVector::non_const_value_type& beta, - const YVector& y, const typename ZVector::non_const_value_type& gamma, - const ZVector& z); + static KOKKOS_INLINE_FUNCTION void team_update(const TeamType& team, + const typename XVector::non_const_value_type& alpha, const XVector& x, + const typename YVector::non_const_value_type& beta, const YVector& y, + const typename ZVector::non_const_value_type& gamma, const ZVector& z); }; template struct TeamUpdate { - static KOKKOS_INLINE_FUNCTION void team_update( - const TeamType& team, const typename XVector::non_const_value_type& alpha, - const XVector& x, const typename YVector::non_const_value_type& beta, - const YVector& y, const typename ZVector::non_const_value_type& gamma, - const ZVector& z) { + static KOKKOS_INLINE_FUNCTION void team_update(const TeamType& team, + const typename XVector::non_const_value_type& alpha, const XVector& x, + const typename YVector::non_const_value_type& beta, const YVector& y, + const typename ZVector::non_const_value_type& gamma, + const ZVector& z) { const int N = x.extent(0); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) { - z(i) = gamma * z(i) + alpha * x(i) + beta * y(i); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), + [&](const int& i) { z(i) = gamma * z(i) + alpha * x(i) + beta * y(i); }); } }; diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_update_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_update_impl.hpp index 96aca5c70e43..31502bee8ba4 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_update_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_update_impl.hpp @@ -40,8 +40,8 @@ namespace Impl { // corresponding input coefficient. Any literal coefficient of zero // has BLAS semantics of ignoring the corresponding (multi)vector // entry. -template +template struct MV_Update_Functor { typedef SizeType size_type; typedef Kokkos::ArithTraits ATS; @@ -54,19 +54,10 @@ struct MV_Update_Functor { const typename ZMV::non_const_value_type gamma_; ZMV Z_; - MV_Update_Functor(const typename XMV::non_const_value_type& alpha, - const XMV& X, - const typename YMV::non_const_value_type& beta, - const YMV& Y, - const typename ZMV::non_const_value_type& gamma, - const ZMV& Z) - : numCols(X.extent(1)), - alpha_(alpha), - X_(X), - beta_(beta), - Y_(Y), - gamma_(gamma), - Z_(Z) { + MV_Update_Functor(const typename XMV::non_const_value_type& alpha, const XMV& X, + const typename YMV::non_const_value_type& beta, const YMV& Y, + const typename ZMV::non_const_value_type& gamma, const ZMV& Z) + : numCols(X.extent(1)), alpha_(alpha), X_(X), beta_(beta), Y_(Y), gamma_(gamma), Z_(Z) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "MV_Update_Functor: X is not a Kokkos::View."); @@ -76,17 +67,15 @@ struct MV_Update_Functor { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "MV_Update_Functor: Z is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::MV_Update_Functor: Z is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); // Casting enum values to int avoids compiler warnings about // comparing different kinds of enum values. - static_assert( - (int)ZMV::rank == (int)XMV::rank && (int)ZMV::rank == (int)YMV::rank, - "KokkosBlas::Impl::MV_Update_Functor: " - "X, Y, and Z must have the same rank."); + static_assert((int)ZMV::rank == (int)XMV::rank && (int)ZMV::rank == (int)YMV::rank, + "KokkosBlas::Impl::MV_Update_Functor: " + "X, Y, and Z must have the same rank."); static_assert(ZMV::rank == 2, "KokkosBlas::Impl::MV_Update_Functor: " "XMV, YMV, and ZMV must have rank 2."); @@ -209,8 +198,8 @@ struct MV_Update_Functor { // coefficients. The value 2 tells the functor to use the // corresponding input coefficient. Any literal coefficient of zero // has BLAS semantics of ignoring the corresponding vector entry. -template +template struct V_Update_Functor { typedef SizeType size_type; typedef Kokkos::ArithTraits ATS; @@ -226,13 +215,7 @@ struct V_Update_Functor { V_Update_Functor(const typename XV::non_const_value_type& alpha, const XV& X, const typename YV::non_const_value_type& beta, const YV& Y, const typename ZV::non_const_value_type& gamma, const ZV& Z) - : numCols(X.extent(1)), - alpha_(alpha), - X_(X), - beta_(beta), - Y_(Y), - gamma_(gamma), - Z_(Z) { + : numCols(X.extent(1)), alpha_(alpha), X_(X), beta_(beta), Y_(Y), gamma_(gamma), Z_(Z) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "V_Update_Functor: X is not a Kokkos::View."); @@ -242,17 +225,15 @@ struct V_Update_Functor { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "V_Update_Functor: Z is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::V_Update_Functor: Z is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); // Casting to int avoids compiler warnings about comparing // different kinds of enum values. - static_assert( - (int)ZV::rank == (int)XV::rank && (int)ZV::rank == (int)YV::rank, - "KokkosBlas::Impl::V_Update_Functor: " - "X, Y, and Z must have the same rank."); + static_assert((int)ZV::rank == (int)XV::rank && (int)ZV::rank == (int)YV::rank, + "KokkosBlas::Impl::V_Update_Functor: " + "X, Y, and Z must have the same rank."); static_assert(ZV::rank == 1, "KokkosBlas::Impl::V_Update_Functor: " "XV, YV, and ZV must have rank 1."); @@ -314,15 +295,10 @@ struct V_Update_Functor { // // Any literal coefficient of zero has BLAS semantics of ignoring the // corresponding multivector entry. -template -void MV_Update_Generic(const execution_space& space, - const typename XMV::non_const_value_type& alpha, - const XMV& X, - const typename YMV::non_const_value_type& beta, - const YMV& Y, - const typename ZMV::non_const_value_type& gamma, - const ZMV& Z, int a = 2, int b = 2, int c = 2) { +template +void MV_Update_Generic(const execution_space& space, const typename XMV::non_const_value_type& alpha, const XMV& X, + const typename YMV::non_const_value_type& beta, const YMV& Y, + const typename ZMV::non_const_value_type& gamma, const ZMV& Z, int a = 2, int b = 2, int c = 2) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "MV_Update_Generic: X is not a Kokkos::View."); @@ -332,17 +308,15 @@ void MV_Update_Generic(const execution_space& space, static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "MV_Update_Generic: Z is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::MV_Update_Generic: Z is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); // Casting to int avoids compiler warnings about comparing different // kinds of enum values. - static_assert( - (int)ZMV::rank == (int)XMV::rank && (int)ZMV::rank == (int)YMV::rank, - "KokkosBlas::Impl::MV_Update_Generic: " - "X, Y, and Z must have the same rank."); + static_assert((int)ZMV::rank == (int)XMV::rank && (int)ZMV::rank == (int)YMV::rank, + "KokkosBlas::Impl::MV_Update_Generic: " + "X, Y, and Z must have the same rank."); static_assert(ZMV::rank == 2, "KokkosBlas::Impl::MV_Update_Generic: " "XMV, YMV, and ZMV must have rank 2."); @@ -353,22 +327,18 @@ void MV_Update_Generic(const execution_space& space, if (a == 0) { if (b == 0) { if (c == 0) { - MV_Update_Functor op(alpha, X, beta, - Y, gamma, Z); + MV_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update", policy, op); } else { - MV_Update_Functor op(alpha, X, beta, - Y, gamma, Z); + MV_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update", policy, op); } } else { if (c == 0) { - MV_Update_Functor op(alpha, X, beta, - Y, gamma, Z); + MV_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update", policy, op); } else { - MV_Update_Functor op(alpha, X, beta, - Y, gamma, Z); + MV_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update", policy, op); } } @@ -379,22 +349,18 @@ void MV_Update_Generic(const execution_space& space, else { if (b == 0) { if (c == 0) { - MV_Update_Functor op(alpha, X, beta, - Y, gamma, Z); + MV_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update", policy, op); } else { - MV_Update_Functor op(alpha, X, beta, - Y, gamma, Z); + MV_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update", policy, op); } } else { if (c == 0) { - MV_Update_Functor op(alpha, X, beta, - Y, gamma, Z); + MV_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update", policy, op); } else { - MV_Update_Functor op(alpha, X, beta, - Y, gamma, Z); + MV_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update", policy, op); } } @@ -417,13 +383,9 @@ void MV_Update_Generic(const execution_space& space, // Any literal coefficient of zero has BLAS semantics of ignoring the // corresponding vector entry. template -void V_Update_Generic(const execution_space& space, - const typename XV::non_const_value_type& alpha, - const XV& X, - const typename YV::non_const_value_type& beta, - const YV& Y, - const typename ZV::non_const_value_type& gamma, - const ZV& Z, int a = 2, int b = 2, int c = 2) { +void V_Update_Generic(const execution_space& space, const typename XV::non_const_value_type& alpha, const XV& X, + const typename YV::non_const_value_type& beta, const YV& Y, + const typename ZV::non_const_value_type& gamma, const ZV& Z, int a = 2, int b = 2, int c = 2) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "V_Update_Generic: X is not a Kokkos::View."); @@ -433,17 +395,15 @@ void V_Update_Generic(const execution_space& space, static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "V_Update_Generic: Z is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::V_Update_Generic: Z is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); // Casting to int avoids compiler warnings about comparing // different kinds of enum values. - static_assert( - (int)ZV::rank == (int)XV::rank && (int)ZV::rank == (int)YV::rank, - "KokkosBlas::Impl::V_Update_Generic: " - "X, Y, and Z must have the same rank."); + static_assert((int)ZV::rank == (int)XV::rank && (int)ZV::rank == (int)YV::rank, + "KokkosBlas::Impl::V_Update_Generic: " + "X, Y, and Z must have the same rank."); static_assert(ZV::rank == 1, "KokkosBlas::Impl::V_Update_Generic: " "XV, YV, and ZV must have rank 1."); @@ -454,22 +414,18 @@ void V_Update_Generic(const execution_space& space, if (a == 0) { if (b == 0) { if (c == 0) { - V_Update_Functor op(alpha, X, beta, Y, - gamma, Z); + V_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update<0,0,0>", policy, op); } else { - V_Update_Functor op(alpha, X, beta, Y, - gamma, Z); + V_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update<0,0,c>", policy, op); } } else { if (c == 0) { - V_Update_Functor op(alpha, X, beta, Y, - gamma, Z); + V_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update<0,b,0>", policy, op); } else { - V_Update_Functor op(alpha, X, beta, Y, - gamma, Z); + V_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update<0,b,c>", policy, op); } } @@ -480,22 +436,18 @@ void V_Update_Generic(const execution_space& space, else { if (b == 0) { if (c == 0) { - V_Update_Functor op(alpha, X, beta, Y, - gamma, Z); + V_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update", policy, op); } else { - V_Update_Functor op(alpha, X, beta, Y, - gamma, Z); + V_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update", policy, op); } } else { if (c == 0) { - V_Update_Functor op(alpha, X, beta, Y, - gamma, Z); + V_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update", policy, op); } else { - V_Update_Functor op(alpha, X, beta, Y, - gamma, Z); + V_Update_Functor op(alpha, X, beta, Y, gamma, Z); Kokkos::parallel_for("KokkosBlas::update", policy, op); } } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas1_update_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas1_update_spec.hpp index 9a54888012bd..b031a529b8d9 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas1_update_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas1_update_spec.hpp @@ -27,8 +27,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct update_eti_spec_avail { enum : bool { value = false }; }; @@ -42,21 +41,17 @@ struct update_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_UPDATE_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct update_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_UPDATE_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct update_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; // @@ -66,21 +61,17 @@ struct update_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_UPDATE_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct update_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 2> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_UPDATE_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct update_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -103,39 +94,27 @@ namespace Impl { /// Z(i,j) = alpha*X(i,j) + beta*Y(i,j) + gamma*Z(i,j), /// /// with special cases for alpha, beta, or gamma = 0. -template ::value, - bool eti_spec_avail = - update_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = update_eti_spec_avail::value> struct Update { - static void update(const execution_space& space, - const typename XMV::non_const_value_type& alpha, - const XMV& X, - const typename YMV::non_const_value_type& beta, - const YMV& Y, - const typename ZMV::non_const_value_type& gamma, - const ZMV& Z); + static void update(const execution_space& space, const typename XMV::non_const_value_type& alpha, const XMV& X, + const typename YMV::non_const_value_type& beta, const YMV& Y, + const typename ZMV::non_const_value_type& gamma, const ZMV& Z); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY // Partial specialization for XMV, YMV, and ZMV rank-2 Views. template -struct Update { +struct Update { typedef typename XMV::size_type size_type; typedef Kokkos::ArithTraits ATA; typedef Kokkos::ArithTraits ATB; typedef Kokkos::ArithTraits ATC; - static void update(const execution_space& space, - const typename XMV::non_const_value_type& alpha, - const XMV& X, - const typename YMV::non_const_value_type& beta, - const YMV& Y, - const typename ZMV::non_const_value_type& gamma, - const ZMV& Z) { + static void update(const execution_space& space, const typename XMV::non_const_value_type& alpha, const XMV& X, + const typename YMV::non_const_value_type& beta, const YMV& Y, + const typename ZMV::non_const_value_type& gamma, const ZMV& Z) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" "Update::update: X is not a Kokkos::View."); @@ -145,32 +124,28 @@ struct Update::value, "KokkosBlas::Impl::" "Update::update: Z is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Update::update: Z is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); // Casting to int avoids compiler warnings about comparing // different kinds of enum values. - static_assert( - (int)ZMV::rank == (int)XMV::rank && (int)ZMV::rank == (int)YMV::rank, - "KokkosBlas::Impl::Update::update: " - "X, Y, and Z must have the same rank."); + static_assert((int)ZMV::rank == (int)XMV::rank && (int)ZMV::rank == (int)YMV::rank, + "KokkosBlas::Impl::Update::update: " + "X, Y, and Z must have the same rank."); static_assert(ZMV::rank == 2, "KokkosBlas::Impl::Update::update: " "XMV, YMV, and ZMV must have rank 2."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::update[ETI]" - : "KokkosBlas::update[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::update[ETI]" + : "KokkosBlas::update[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::update<> ETI specialization for < %s , %s , %s >\n", - typeid(XMV).name(), typeid(YMV).name(), typeid(ZMV).name()); + printf("KokkosBlas1::update<> ETI specialization for < %s , %s , %s >\n", typeid(XMV).name(), typeid(YMV).name(), + typeid(ZMV).name()); else { - printf( - "KokkosBlas1::update<> non-ETI specialization for < %s , %s , %s >\n", - typeid(XMV).name(), typeid(YMV).name(), typeid(ZMV).name()); + printf("KokkosBlas1::update<> non-ETI specialization for < %s , %s , %s >\n", typeid(XMV).name(), + typeid(YMV).name(), typeid(ZMV).name()); } #endif @@ -203,24 +178,20 @@ struct Update(INT_MAX)) { typedef int index_type; - V_Update_Generic(space, alpha, X_0, beta, - Y_0, gamma, Z_0, a, b, c); + V_Update_Generic( + space, alpha, X_0, beta, Y_0, gamma, Z_0, a, b, c); } else { typedef typename XMV::size_type index_type; - V_Update_Generic(space, alpha, X_0, beta, - Y_0, gamma, Z_0, a, b, c); + V_Update_Generic( + space, alpha, X_0, beta, Y_0, gamma, Z_0, a, b, c); } } else { if (numRows * numCols < static_cast(INT_MAX)) { typedef int index_type; - MV_Update_Generic( - space, alpha, X, beta, Y, gamma, Z, a, b, c); + MV_Update_Generic(space, alpha, X, beta, Y, gamma, Z, a, b, c); } else { typedef typename XMV::size_type index_type; - MV_Update_Generic( - space, alpha, X, beta, Y, gamma, Z, a, b, c); + MV_Update_Generic(space, alpha, X, beta, Y, gamma, Z, a, b, c); } } Kokkos::Profiling::popRegion(); @@ -229,19 +200,15 @@ struct Update -struct Update { +struct Update { typedef typename XV::size_type size_type; typedef Kokkos::ArithTraits ATA; typedef Kokkos::ArithTraits ATB; typedef Kokkos::ArithTraits ATC; - static void update(const execution_space& space, - const typename XV::non_const_value_type& alpha, - const XV& X, const typename YV::non_const_value_type& beta, - const YV& Y, - const typename ZV::non_const_value_type& gamma, - const ZV& Z) { + static void update(const execution_space& space, const typename XV::non_const_value_type& alpha, const XV& X, + const typename YV::non_const_value_type& beta, const YV& Y, + const typename ZV::non_const_value_type& gamma, const ZV& Z) { // XV, YV, and ZV must be Kokkos::View specializations. static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" @@ -253,29 +220,25 @@ struct Update::update: Z is not a Kokkos::View."); // ZV must be nonconst (else it can't be an output argument). - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::Impl::Update::update: Z is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); - static_assert( - (int)ZV::rank == (int)XV::rank && (int)ZV::rank == (int)YV::rank, - "KokkosBlas::Impl::Update::update: " - "X, Y, and Z must have the same rank."); + static_assert((int)ZV::rank == (int)XV::rank && (int)ZV::rank == (int)YV::rank, + "KokkosBlas::Impl::Update::update: " + "X, Y, and Z must have the same rank."); static_assert(ZV::rank == 1, "KokkosBlas::Impl::Update::update: " "XV, YV, and ZV must have rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::update[ETI]" - : "KokkosBlas::update[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::update[ETI]" + : "KokkosBlas::update[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - printf("KokkosBlas1::update<> ETI specialization for < %s , %s , %s >\n", - typeid(XV).name(), typeid(YV).name(), typeid(ZV).name()); + printf("KokkosBlas1::update<> ETI specialization for < %s , %s , %s >\n", typeid(XV).name(), typeid(YV).name(), + typeid(ZV).name()); else { - printf( - "KokkosBlas1::update<> non-ETI specialization for < %s , %s , %s >\n", - typeid(XV).name(), typeid(YV).name(), typeid(ZV).name()); + printf("KokkosBlas1::update<> non-ETI specialization for < %s , %s , %s >\n", typeid(XV).name(), + typeid(YV).name(), typeid(ZV).name()); } #endif @@ -299,15 +262,12 @@ struct Update(INT_MAX) && - numRows * numCols < static_cast(INT_MAX)) { + if (numRows < static_cast(INT_MAX) && numRows * numCols < static_cast(INT_MAX)) { typedef int index_type; - V_Update_Generic( - space, alpha, X, beta, Y, gamma, Z, a, b, c); + V_Update_Generic(space, alpha, X, beta, Y, gamma, Z, a, b, c); } else { typedef typename XV::size_type index_type; - V_Update_Generic( - space, alpha, X, beta, Y, gamma, Z, a, b, c); + V_Update_Generic(space, alpha, X, beta, Y, gamma, Z, a, b, c); } Kokkos::Profiling::popRegion(); } @@ -326,32 +286,24 @@ struct Update, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_UPDATE_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Update< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ 1, false, true>; -#define KOKKOSBLAS1_UPDATE_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template struct Update< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_UPDATE_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Update< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ 1, false, true>; // @@ -362,32 +314,24 @@ struct Update, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_UPDATE_MV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct Update< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ 2, false, true>; -#define KOKKOSBLAS1_UPDATE_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template struct Update< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS1_UPDATE_MV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct Update< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ 2, false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas2_gemv_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas2_gemv_impl.hpp index dc0f531583a9..b1976e262273 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas2_gemv_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas2_gemv_impl.hpp @@ -29,35 +29,26 @@ namespace Impl { template struct SingleLevelNontransposeGEMV { using AlphaCoeffType = typename AViewType::non_const_value_type; using BetaCoeffType = typename YViewType::non_const_value_type; using y_value_type = typename YViewType::non_const_value_type; - using AccumScalar = typename std::conditional< - std::is_same::value || - std::is_same::value, - float, y_value_type>::type; - - SingleLevelNontransposeGEMV(const AlphaCoeffType& alpha, const AViewType& A, - const XViewType& x, const BetaCoeffType& beta, - const YViewType& y) + using AccumScalar = typename std::conditional::value || + std::is_same::value, + float, y_value_type>::type; + + SingleLevelNontransposeGEMV(const AlphaCoeffType& alpha, const AViewType& A, const XViewType& x, + const BetaCoeffType& beta, const YViewType& y) : alpha_(alpha), A_(A), x_(x), beta_(beta), y_(y) { - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "XViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "YViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, - "AViewType must have rank 2."); - static_assert(static_cast(XViewType::rank) == 1, - "XViewType must have rank 1."); - static_assert(static_cast(YViewType::rank) == 1, - "YViewType must have rank 1."); - static_assert(std::is_integral::value, - "IndexType must be an integer."); + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "XViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "YViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); + static_assert(static_cast(XViewType::rank) == 1, "XViewType must have rank 1."); + static_assert(static_cast(YViewType::rank) == 1, "YViewType must have rank 1."); + static_assert(std::is_integral::value, "IndexType must be an integer."); static_assert(alphaPreset == 0 || alphaPreset == 1 || alphaPreset == -1, "Invalid alphaPreset value; valid values are 0, 1, and -1."); static_assert(betaPreset == 0 || betaPreset == 1 || betaPreset == -1, @@ -112,43 +103,29 @@ struct SingleLevelNontransposeGEMV { template struct SingleLevelTransposeGEMV { using y_value_type = typename YViewType::non_const_value_type; using AlphaCoeffType = typename AViewType::non_const_value_type; using BetaCoeffType = typename YViewType::non_const_value_type; - using AccumScalar = typename std::conditional< - std::is_same::value || - std::is_same::value, - float, y_value_type>::type; + using AccumScalar = typename std::conditional::value || + std::is_same::value, + float, y_value_type>::type; typedef AccumScalar value_type[]; IndexType value_count; // Kokkos needs this for reductions w/ array results - SingleLevelTransposeGEMV(const AlphaCoeffType& alpha, const AViewType& A, - const XViewType& x, const BetaCoeffType& beta, - const YViewType& y) - : value_count(A.extent(1)), - alpha_(alpha), - A_(A), - x_(x), - beta_(beta), - y_(y) { - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "XViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "YViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, - "AViewType must have rank 2."); - static_assert(static_cast(XViewType::rank) == 1, - "XViewType must have rank 1."); - static_assert(static_cast(YViewType::rank) == 1, - "YViewType must have rank 1."); - static_assert(std::is_integral::value, - "IndexType must be an integer."); + SingleLevelTransposeGEMV(const AlphaCoeffType& alpha, const AViewType& A, const XViewType& x, + const BetaCoeffType& beta, const YViewType& y) + : value_count(A.extent(1)), alpha_(alpha), A_(A), x_(x), beta_(beta), y_(y) { + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "XViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "YViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); + static_assert(static_cast(XViewType::rank) == 1, "XViewType must have rank 1."); + static_assert(static_cast(YViewType::rank) == 1, "YViewType must have rank 1."); + static_assert(std::is_integral::value, "IndexType must be an integer."); static_assert(alphaPreset == 0 || alphaPreset == 1 || alphaPreset == -1, "Invalid alphaPreset value; valid values are 0, 1, and -1."); static_assert(betaPreset == 0 || betaPreset == 1 || betaPreset == -1, @@ -178,8 +155,7 @@ struct SingleLevelTransposeGEMV { } } - KOKKOS_INLINE_FUNCTION void operator()(const IndexType& i, - value_type y_cur) const { + KOKKOS_INLINE_FUNCTION void operator()(const IndexType& i, value_type y_cur) const { using Kokkos::ArithTraits; using KAT = ArithTraits; @@ -199,27 +175,18 @@ struct SingleLevelTransposeGEMV { }; // Single-level parallel version of GEMV. -template -void singleLevelGemv(const ExecutionSpace& space, const char trans[], - typename AViewType::const_value_type& alpha, - const AViewType& A, const XViewType& x, - typename YViewType::const_value_type& beta, +template +void singleLevelGemv(const ExecutionSpace& space, const char trans[], typename AViewType::const_value_type& alpha, + const AViewType& A, const XViewType& x, typename YViewType::const_value_type& beta, const YViewType& y) { - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "XViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "YViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, - "AViewType must have rank 2."); - static_assert(static_cast(XViewType::rank) == 1, - "XViewType must have rank 1."); - static_assert(static_cast(YViewType::rank) == 1, - "YViewType must have rank 1."); - static_assert(std::is_integral::value, - "IndexType must be an integer"); + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "XViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "YViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); + static_assert(static_cast(XViewType::rank) == 1, "XViewType must have rank 1."); + static_assert(static_cast(YViewType::rank) == 1, "YViewType must have rank 1."); + static_assert(std::is_integral::value, "IndexType must be an integer"); using y_value_type = typename YViewType::non_const_value_type; using policy_type = Kokkos::RangePolicy; @@ -242,12 +209,9 @@ void singleLevelGemv(const ExecutionSpace& space, const char trans[], // "Fake out" a scal() by using the non-transpose alpha=0, // general beta case. This assumes that the functor doesn't // check dimensions. - using functor_type = - SingleLevelNontransposeGEMV; + using functor_type = SingleLevelNontransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_for("KokkosBlas::gemv[SingleLevel]", - policy_type(0, A.extent(1)), functor); + Kokkos::parallel_for("KokkosBlas::gemv[SingleLevel]", policy_type(0, A.extent(1)), functor); } return; } @@ -260,49 +224,35 @@ void singleLevelGemv(const ExecutionSpace& space, const char trans[], } else if (beta == Kokkos::ArithTraits::one()) { // Do nothing (y := 1 * y) } else { // beta != 0 && beta != 1 - using functor_type = - SingleLevelNontransposeGEMV; + using functor_type = SingleLevelNontransposeGEMV; functor_type functor(alpha, A, x, beta, y); Kokkos::parallel_for("KokkosBlas::gemv[SingleLevel]", range, functor); } } else if (alpha == Kokkos::ArithTraits::one()) { if (beta == Kokkos::ArithTraits::zero()) { - using functor_type = - SingleLevelNontransposeGEMV; + using functor_type = SingleLevelNontransposeGEMV; functor_type functor(alpha, A, x, beta, y); Kokkos::parallel_for("KokkosBlas::gemv[SingleLevel]", range, functor); } else if (beta == Kokkos::ArithTraits::one()) { - using functor_type = - SingleLevelNontransposeGEMV; + using functor_type = SingleLevelNontransposeGEMV; functor_type functor(alpha, A, x, beta, y); Kokkos::parallel_for("KokkosBlas::gemv[SingleLevel]", range, functor); } else { // beta != 0 && beta != 1 - using functor_type = - SingleLevelNontransposeGEMV; + using functor_type = SingleLevelNontransposeGEMV; functor_type functor(alpha, A, x, beta, y); Kokkos::parallel_for("KokkosBlas::gemv[SingleLevel]", range, functor); } } else { // alpha != 0 and alpha != 1 if (beta == Kokkos::ArithTraits::zero()) { - using functor_type = - SingleLevelNontransposeGEMV; + using functor_type = SingleLevelNontransposeGEMV; functor_type functor(alpha, A, x, beta, y); Kokkos::parallel_for("KokkosBlas::gemv[SingleLevel]", range, functor); } else if (beta == Kokkos::ArithTraits::one()) { - using functor_type = - SingleLevelNontransposeGEMV; + using functor_type = SingleLevelNontransposeGEMV; functor_type functor(alpha, A, x, beta, y); Kokkos::parallel_for("KokkosBlas::gemv[SingleLevel]", range, functor); } else { // beta != 0 && beta != 1 - using functor_type = - SingleLevelNontransposeGEMV; + using functor_type = SingleLevelNontransposeGEMV; functor_type functor(alpha, A, x, beta, y); Kokkos::parallel_for("KokkosBlas::gemv[SingleLevel]", range, functor); } @@ -315,58 +265,37 @@ void singleLevelGemv(const ExecutionSpace& space, const char trans[], } else if (beta == Kokkos::ArithTraits::one()) { // Do nothing (y := 1 * y) } else { // beta != 0 && beta != 1 - using functor_type = - SingleLevelTransposeGEMV; + using functor_type = SingleLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, - functor); + Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, functor); } } else if (alpha == Kokkos::ArithTraits::one()) { if (beta == Kokkos::ArithTraits::zero()) { - using functor_type = - SingleLevelTransposeGEMV; + using functor_type = SingleLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, - functor); + Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, functor); } else if (beta == Kokkos::ArithTraits::one()) { - using functor_type = - SingleLevelTransposeGEMV; + using functor_type = SingleLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, - functor); + Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, functor); } else { // beta != 0 && beta != 1 - using functor_type = - SingleLevelTransposeGEMV; + using functor_type = SingleLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, - functor); + Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, functor); } } else { // alpha != 0 and alpha != 1 if (beta == Kokkos::ArithTraits::zero()) { - using functor_type = - SingleLevelTransposeGEMV; + using functor_type = SingleLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, - functor); + Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, functor); } else if (beta == Kokkos::ArithTraits::one()) { - using functor_type = - SingleLevelTransposeGEMV; + using functor_type = SingleLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, - functor); + Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, functor); } else { // beta != 0 && beta != 1 - using functor_type = - SingleLevelTransposeGEMV; + using functor_type = SingleLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, - functor); + Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, functor); } } } else if (tr == 'C' || tr == 'c' || tr == 'H' || tr == 'h') { // conj xpose @@ -377,58 +306,37 @@ void singleLevelGemv(const ExecutionSpace& space, const char trans[], } else if (beta == Kokkos::ArithTraits::one()) { // Do nothing (y := 1 * y) } else { // beta != 0 && beta != 1 - using functor_type = - SingleLevelTransposeGEMV; + using functor_type = SingleLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, - functor); + Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, functor); } } else if (alpha == Kokkos::ArithTraits::one()) { if (beta == Kokkos::ArithTraits::zero()) { - using functor_type = - SingleLevelTransposeGEMV; + using functor_type = SingleLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, - functor); + Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, functor); } else if (beta == Kokkos::ArithTraits::one()) { - using functor_type = - SingleLevelTransposeGEMV; + using functor_type = SingleLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, - functor); + Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, functor); } else { // beta != 0 && beta != 1 - using functor_type = - SingleLevelTransposeGEMV; + using functor_type = SingleLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, - functor); + Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, functor); } } else { // alpha != 0 and alpha != 1 if (beta == Kokkos::ArithTraits::zero()) { - using functor_type = - SingleLevelTransposeGEMV; + using functor_type = SingleLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, - functor); + Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, functor); } else if (beta == Kokkos::ArithTraits::one()) { - using functor_type = - SingleLevelTransposeGEMV; + using functor_type = SingleLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, - functor); + Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, functor); } else { // beta != 0 && beta != 1 - using functor_type = - SingleLevelTransposeGEMV; + using functor_type = SingleLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, - functor); + Kokkos::parallel_reduce("KokkosBlas::gemv[SingleLevelTranspose]", range, functor); } } } @@ -440,38 +348,29 @@ struct TwoLevelGEMV_LayoutRightTag {}; // --------------------------------------------------------------------------------------------- // Functor for a two-level parallel_reduce version of GEMV (non-transpose), // designed for performance on GPU. Kernel depends on the layout of A. -template +template struct TwoLevelGEMV { using y_value_type = typename YViewType::non_const_value_type; using AlphaCoeffType = typename AViewType::non_const_value_type; using BetaCoeffType = typename YViewType::non_const_value_type; - using AccumScalar = typename std::conditional< - std::is_same::value || - std::is_same::value, - float, y_value_type>::type; + using AccumScalar = typename std::conditional::value || + std::is_same::value, + float, y_value_type>::type; using policy_type = Kokkos::TeamPolicy; using member_type = typename policy_type::member_type; - TwoLevelGEMV(const AlphaCoeffType& alpha, const AViewType& A, - const XViewType& x, const BetaCoeffType& beta, + TwoLevelGEMV(const AlphaCoeffType& alpha, const AViewType& A, const XViewType& x, const BetaCoeffType& beta, const YViewType& y) : alpha_(alpha), A_(A), x_(x), beta_(beta), y_(y) { - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "XViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "YViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, - "AViewType must have rank 2."); - static_assert(static_cast(XViewType::rank) == 1, - "XViewType must have rank 1."); - static_assert(static_cast(YViewType::rank) == 1, - "YViewType must have rank 1."); - static_assert(std::is_integral::value, - "IndexType must be an integer."); + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "XViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "YViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); + static_assert(static_cast(XViewType::rank) == 1, "XViewType must have rank 1."); + static_assert(static_cast(YViewType::rank) == 1, "YViewType must have rank 1."); + static_assert(std::is_integral::value, "IndexType must be an integer."); } public: @@ -480,15 +379,12 @@ struct TwoLevelGEMV { // -Groups of 32 threads handle N/teamsize columns sequentially, placing // results into shared. -Then individual thread results are combined with // parallel_reduce. - KOKKOS_INLINE_FUNCTION void operator()(TwoLevelGEMV_LayoutLeftTag, - const member_type& team) const { + KOKKOS_INLINE_FUNCTION void operator()(TwoLevelGEMV_LayoutLeftTag, const member_type& team) const { using KAT = Kokkos::ArithTraits; using AKAT = Kokkos::ArithTraits; // Allocate a Scalar in shared for each thread - AccumScalar* blockResult = - (AccumScalar*)team.team_shmem().get_shmem(32 * sizeof(AccumScalar)); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 32), - [&](int i) { blockResult[i] = AKAT::zero(); }); + AccumScalar* blockResult = (AccumScalar*)team.team_shmem().get_shmem(32 * sizeof(AccumScalar)); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 32), [&](int i) { blockResult[i] = AKAT::zero(); }); team.team_barrier(); // Which block this thread will work on int block = team.team_rank() / 32; @@ -498,9 +394,7 @@ struct TwoLevelGEMV { AccumScalar localSum = AKAT::zero(); // compute local sum if (row < (IndexType)A_.extent(0)) { - for (IndexType col = blockColStart; - col < blockColStart + columnsPerThread && col < A_.extent(1); - col++) { + for (IndexType col = blockColStart; col < blockColStart + columnsPerThread && col < A_.extent(1); col++) { // A access is coalesced, x access is a broadcast localSum += AccumScalar(A_(row, col)) * AccumScalar(x_(col)); } @@ -514,15 +408,13 @@ struct TwoLevelGEMV { if (beta_ == KAT::zero()) y_(yrow) = y_value_type(alpha_ * blockResult[i]); else - y_(yrow) = y_value_type(beta_ * AccumScalar(y_(yrow)) + - alpha_ * blockResult[i]); + y_(yrow) = y_value_type(beta_ * AccumScalar(y_(yrow)) + alpha_ * blockResult[i]); } }); } // LayoutRight version: one team per row - KOKKOS_INLINE_FUNCTION void operator()(TwoLevelGEMV_LayoutRightTag, - const member_type& team) const { + KOKKOS_INLINE_FUNCTION void operator()(TwoLevelGEMV_LayoutRightTag, const member_type& team) const { using KAT = Kokkos::ArithTraits; const IndexType N = A_.extent(1); @@ -532,10 +424,7 @@ struct TwoLevelGEMV { AccumScalar val; Kokkos::parallel_reduce( Kokkos::TeamThreadRange(team, N), - [&](const int j, AccumScalar& update) { - update += AccumScalar(A_(i, j)) * x_(j); - }, - val); + [&](const int j, AccumScalar& update) { update += AccumScalar(A_(i, j)) * x_(j); }, val); // compute yj = beta*yj + alpha*val Kokkos::single(Kokkos::PerTeam(team), [&]() { @@ -561,39 +450,29 @@ struct TwoLevelGEMV { // transpose GEMV. The functor uses parallel-for over the columns of the input // matrix A and each team uses parallel-reduce over the row of its column. // The output vector y is the reduction result. -template struct TwoLevelTransposeGEMV { using y_value_type = typename YViewType::non_const_value_type; using AlphaCoeffType = typename AViewType::non_const_value_type; using BetaCoeffType = typename YViewType::non_const_value_type; - using AccumScalar = typename std::conditional< - std::is_same::value || - std::is_same::value, - float, y_value_type>::type; + using AccumScalar = typename std::conditional::value || + std::is_same::value, + float, y_value_type>::type; using policy_type = Kokkos::TeamPolicy; using member_type = typename policy_type::member_type; - TwoLevelTransposeGEMV(const AlphaCoeffType& alpha, const AViewType& A, - const XViewType& x, const BetaCoeffType& beta, + TwoLevelTransposeGEMV(const AlphaCoeffType& alpha, const AViewType& A, const XViewType& x, const BetaCoeffType& beta, const YViewType& y) : alpha_(alpha), A_(A), x_(x), beta_(beta), y_(y) { - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "XViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "YViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, - "AViewType must have rank 2."); - static_assert(static_cast(XViewType::rank) == 1, - "XViewType must have rank 1."); - static_assert(static_cast(YViewType::rank) == 1, - "YViewType must have rank 1."); - static_assert(std::is_integral::value, - "IndexType must be an integer."); + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "XViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "YViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); + static_assert(static_cast(XViewType::rank) == 1, "XViewType must have rank 1."); + static_assert(static_cast(YViewType::rank) == 1, "YViewType must have rank 1."); + static_assert(std::is_integral::value, "IndexType must be an integer."); } public: @@ -634,27 +513,18 @@ struct TwoLevelTransposeGEMV { }; // Two-level parallel version of GEMV. -template -void twoLevelGemv(const ExecutionSpace& space, const char trans[], - typename AViewType::const_value_type& alpha, - const AViewType& A, const XViewType& x, - typename YViewType::const_value_type& beta, +template +void twoLevelGemv(const ExecutionSpace& space, const char trans[], typename AViewType::const_value_type& alpha, + const AViewType& A, const XViewType& x, typename YViewType::const_value_type& beta, const YViewType& y) { - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "XViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "YViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, - "AViewType must have rank 2."); - static_assert(static_cast(XViewType::rank) == 1, - "XViewType must have rank 1."); - static_assert(static_cast(YViewType::rank) == 1, - "YViewType must have rank 1."); - static_assert(std::is_integral::value, - "IndexType must be an integer"); + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "XViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "YViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); + static_assert(static_cast(XViewType::rank) == 1, "XViewType must have rank 1."); + static_assert(static_cast(YViewType::rank) == 1, "YViewType must have rank 1."); + static_assert(std::is_integral::value, "IndexType must be an integer"); using y_value_type = typename YViewType::non_const_value_type; using team_policy_type = Kokkos::TeamPolicy; @@ -681,40 +551,33 @@ void twoLevelGemv(const ExecutionSpace& space, const char trans[], // "Fake out" a scal() by using the non-transpose alpha=0, // general beta case. This assumes that the functor doesn't // check dimensions. - using functor_type = - SingleLevelNontransposeGEMV; + using functor_type = SingleLevelNontransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_for("KokkosBlas::gemv[SingleLevel]", - range_policy_type(space, 0, y.extent(0)), functor); + Kokkos::parallel_for("KokkosBlas::gemv[SingleLevel]", range_policy_type(space, 0, y.extent(0)), functor); } return; } if (tr == 'N') { - constexpr bool isLayoutLeft = std::is_same::value; + constexpr bool isLayoutLeft = std::is_same::value; // Both kernels work for both layouts - the only difference is access // pattern. using layout_tag = - typename std::conditional::type; + typename std::conditional::type; using tagged_policy = Kokkos::TeamPolicy; - using functor_type = TwoLevelGEMV; + using functor_type = TwoLevelGEMV; functor_type functor(alpha, A, x, beta, y); tagged_policy team; if constexpr (isLayoutLeft) { - using AccumScalar = typename std::conditional< - std::is_same::value || - std::is_same::value, - float, y_value_type>::type; + using AccumScalar = + typename std::conditional::value || + std::is_same::value, + float, y_value_type>::type; size_t sharedPerTeam = 32 * sizeof(AccumScalar); IndexType numTeams = (A.extent(0) + 31) / 32; tagged_policy temp(space, 1, 1); temp.set_scratch_size(0, Kokkos::PerTeam(sharedPerTeam)); - int teamSize = - temp.team_size_recommended(functor, Kokkos::ParallelForTag()); + int teamSize = temp.team_size_recommended(functor, Kokkos::ParallelForTag()); // make sure teamSize is a multiple of 32 teamSize -= teamSize % 32; // don't make teamSize larger than what's useful @@ -728,8 +591,7 @@ void twoLevelGemv(const ExecutionSpace& space, const char trans[], #endif int numBlocks = teamSize / 32; functor.columnsPerThread = (A.extent(1) + numBlocks - 1) / numBlocks; - team = tagged_policy(space, numTeams, teamSize) - .set_scratch_size(0, Kokkos::PerTeam(sharedPerTeam)); + team = tagged_policy(space, numTeams, teamSize).set_scratch_size(0, Kokkos::PerTeam(sharedPerTeam)); } else { // LayoutRight: one team per row team = tagged_policy(space, A.extent(0), Kokkos::AUTO); @@ -744,21 +606,15 @@ void twoLevelGemv(const ExecutionSpace& space, const char trans[], } else if (tr == 'T') { // transpose, and not conj transpose team_policy_type team(space, A.extent(1), Kokkos::AUTO); - using functor_type = - TwoLevelTransposeGEMV; + using functor_type = TwoLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_for("KokkosBlas::gemv[twoLevelTranspose]", team, - functor); + Kokkos::parallel_for("KokkosBlas::gemv[twoLevelTranspose]", team, functor); } else if (tr == 'C' || tr == 'H') { // conjugate transpose team_policy_type team(space, A.extent(1), Kokkos::AUTO); - using functor_type = - TwoLevelTransposeGEMV; + using functor_type = TwoLevelTransposeGEMV; functor_type functor(alpha, A, x, beta, y); - Kokkos::parallel_for("KokkosBlas::gemv[twoLevelTranspose]", team, - functor); + Kokkos::parallel_for("KokkosBlas::gemv[twoLevelTranspose]", team, functor); } } } @@ -766,26 +622,18 @@ void twoLevelGemv(const ExecutionSpace& space, const char trans[], // generalGemv: use 1 level (Range) or 2 level (Team) implementation, // depending on whether execution space is CPU or GPU. enable_if makes sure // unused kernels are not instantiated. -template ()>::type* = nullptr> -void generalGemvImpl(const ExecutionSpace& space, const char trans[], - typename AViewType::const_value_type& alpha, - const AViewType& A, const XViewType& x, - typename YViewType::const_value_type& beta, +template ()>::type* = nullptr> +void generalGemvImpl(const ExecutionSpace& space, const char trans[], typename AViewType::const_value_type& alpha, + const AViewType& A, const XViewType& x, typename YViewType::const_value_type& beta, const YViewType& y) { singleLevelGemv(space, trans, alpha, A, x, beta, y); } -template ()>::type* = nullptr> -void generalGemvImpl(const ExecutionSpace& space, const char trans[], - typename AViewType::const_value_type& alpha, - const AViewType& A, const XViewType& x, - typename YViewType::const_value_type& beta, +template ()>::type* = nullptr> +void generalGemvImpl(const ExecutionSpace& space, const char trans[], typename AViewType::const_value_type& alpha, + const AViewType& A, const XViewType& x, typename YViewType::const_value_type& beta, const YViewType& y) { twoLevelGemv(space, trans, alpha, A, x, beta, y); } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas2_gemv_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas2_gemv_spec.hpp index 97e6e2717e2e..05e2d28bc7da 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas2_gemv_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas2_gemv_spec.hpp @@ -41,19 +41,16 @@ struct gemv_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS2_GEMV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct gemv_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_GEMV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct gemv_eti_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -68,47 +65,32 @@ namespace Impl { // // Implementation of KokkosBlas::gemv. -template < - class ExecutionSpace, class AViewType, class XViewType, class YViewType, - bool tpl_spec_avail = gemv_tpl_spec_avail::value, - bool eti_spec_avail = gemv_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = gemv_eti_spec_avail::value> struct GEMV { - static void gemv(const ExecutionSpace& space, const char trans[], - typename AViewType::const_value_type& alpha, - const AViewType& A, const XViewType& x, - typename YViewType::const_value_type& beta, + static void gemv(const ExecutionSpace& space, const char trans[], typename AViewType::const_value_type& alpha, + const AViewType& A, const XViewType& x, typename YViewType::const_value_type& beta, const YViewType& y) #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY { - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "XViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "YViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, - "AViewType must have rank 2."); - static_assert(static_cast(XViewType::rank) == 1, - "XViewType must have rank 1."); - static_assert(static_cast(YViewType::rank) == 1, - "YViewType must have rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::gemv[ETI]" - : "KokkosBlas::gemv[noETI]"); + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "XViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "YViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); + static_assert(static_cast(XViewType::rank) == 1, "XViewType must have rank 1."); + static_assert(static_cast(YViewType::rank) == 1, "YViewType must have rank 1."); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::gemv[ETI]" + : "KokkosBlas::gemv[noETI]"); typedef typename AViewType::size_type size_type; const size_type numRows = A.extent(0); const size_type numCols = A.extent(1); // Prefer int as the index type, but use a larger type if needed. - if (numRows < static_cast(INT_MAX) && - numCols < static_cast(INT_MAX)) { - generalGemvImpl( - space, trans, alpha, A, x, beta, y); + if (numRows < static_cast(INT_MAX) && numCols < static_cast(INT_MAX)) { + generalGemvImpl(space, trans, alpha, A, x, beta, y); } else { - generalGemvImpl( - space, trans, alpha, A, x, beta, y); + generalGemvImpl(space, trans, alpha, A, x, beta, y); } Kokkos::Profiling::popRegion(); } @@ -129,30 +111,24 @@ struct GEMV { // one or more .cpp files. // -#define KOKKOSBLAS2_GEMV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - extern template struct GEMV< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS2_GEMV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct GEMV< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ false, true>; -#define KOKKOSBLAS2_GEMV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template struct GEMV< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS2_GEMV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct GEMV< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas2_ger_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas2_ger_impl.hpp index 651db7f11a1d..94eb1868f97b 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas2_ger_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas2_ger_impl.hpp @@ -34,8 +34,8 @@ struct ThreadParallelGER { using YComponentType = typename YViewType::non_const_value_type; using AComponentType = typename AViewType::non_const_value_type; - ThreadParallelGER(const bool justTranspose, const AlphaCoeffType& alpha, - const XViewType& x, const YViewType& y, const AViewType& A) + ThreadParallelGER(const bool justTranspose, const AlphaCoeffType& alpha, const XViewType& x, const YViewType& y, + const AViewType& A) : justTranspose_(justTranspose), alpha_(alpha), x_(x), y_(y), A_(A) { // Nothing to do } @@ -53,9 +53,7 @@ struct ThreadParallelGER { } } else { for (IndexType j = 0; j < N; ++j) { - A_(i, j) += - AComponentType(alpha_ * x_fixed * - Kokkos::ArithTraits::conj(y_(j))); + A_(i, j) += AComponentType(alpha_ * x_fixed * Kokkos::ArithTraits::conj(y_(j))); } } } @@ -70,14 +68,12 @@ struct ThreadParallelGER { }; // Thread parallel version of GER. -template +template void threadParallelGer(const ExecutionSpace& space, const char trans[], - const typename AViewType::const_value_type& alpha, - const XViewType& x, const YViewType& y, + const typename AViewType::const_value_type& alpha, const XViewType& x, const YViewType& y, const AViewType& A) { - static_assert(std::is_integral::value, - "IndexType must be an integer"); + static_assert(std::is_integral::value, "IndexType must be an integer"); using AlphaCoeffType = typename AViewType::non_const_value_type; @@ -88,12 +84,10 @@ void threadParallelGer(const ExecutionSpace& space, const char trans[], } else if (alpha == Kokkos::ArithTraits::zero()) { // no entries to update } else { - Kokkos::RangePolicy rangePolicy(space, 0, - A.extent(0)); - ThreadParallelGER functor( - (trans[0] == 'T') || (trans[0] == 't'), alpha, x, y, A); - Kokkos::parallel_for("KokkosBlas::ger[threadParallel]", rangePolicy, - functor); + Kokkos::RangePolicy rangePolicy(space, 0, A.extent(0)); + ThreadParallelGER functor((trans[0] == 'T') || (trans[0] == 't'), alpha, + x, y, A); + Kokkos::parallel_for("KokkosBlas::ger[threadParallel]", rangePolicy, functor); } } @@ -104,8 +98,7 @@ struct TeamParallelGER_LayoutRightTag {}; // Functor for the team parallel version of GER, designed for // performance on GPU. The kernel depends on the layout of A. -template +template struct TeamParallelGER { using AlphaCoeffType = typename AViewType::non_const_value_type; using XComponentType = typename XViewType::non_const_value_type; @@ -115,16 +108,15 @@ struct TeamParallelGER { using policy_type = Kokkos::TeamPolicy; using member_type = typename policy_type::member_type; - TeamParallelGER(const bool justTranspose, const AlphaCoeffType& alpha, - const XViewType& x, const YViewType& y, const AViewType& A) + TeamParallelGER(const bool justTranspose, const AlphaCoeffType& alpha, const XViewType& x, const YViewType& y, + const AViewType& A) : justTranspose_(justTranspose), alpha_(alpha), x_(x), y_(y), A_(A) { // Nothing to do } public: // LayoutLeft version: one team per column - KOKKOS_INLINE_FUNCTION void operator()(TeamParallelGER_LayoutLeftTag, - const member_type& team) const { + KOKKOS_INLINE_FUNCTION void operator()(TeamParallelGER_LayoutLeftTag, const member_type& team) const { if (alpha_ == Kokkos::ArithTraits::zero()) { // Nothing to do } else { @@ -132,24 +124,18 @@ struct TeamParallelGER { const IndexType j(team.league_rank()); if (justTranspose_) { const YComponentType y_fixed(y_(j)); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, M), [&](const IndexType& i) { - A_(i, j) += AComponentType(alpha_ * x_(i) * y_fixed); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, M), + [&](const IndexType& i) { A_(i, j) += AComponentType(alpha_ * x_(i) * y_fixed); }); } else { - const YComponentType y_fixed( - Kokkos::ArithTraits::conj(y_(j))); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, M), [&](const IndexType& i) { - A_(i, j) += AComponentType(alpha_ * x_(i) * y_fixed); - }); + const YComponentType y_fixed(Kokkos::ArithTraits::conj(y_(j))); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, M), + [&](const IndexType& i) { A_(i, j) += AComponentType(alpha_ * x_(i) * y_fixed); }); } } } // LayoutRight version: one team per row - KOKKOS_INLINE_FUNCTION void operator()(TeamParallelGER_LayoutRightTag, - const member_type& team) const { + KOKKOS_INLINE_FUNCTION void operator()(TeamParallelGER_LayoutRightTag, const member_type& team) const { if (alpha_ == Kokkos::ArithTraits::zero()) { // Nothing to do } else { @@ -157,17 +143,12 @@ struct TeamParallelGER { const IndexType i(team.league_rank()); const XComponentType x_fixed(x_(i)); if (justTranspose_) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { - A_(i, j) += AComponentType(alpha_ * x_fixed * y_(j)); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), + [&](const IndexType& j) { A_(i, j) += AComponentType(alpha_ * x_fixed * y_(j)); }); } else { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { - A_(i, j) += AComponentType( - alpha_ * x_fixed * - Kokkos::ArithTraits::conj(y_(j))); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { + A_(i, j) += AComponentType(alpha_ * x_fixed * Kokkos::ArithTraits::conj(y_(j))); + }); } } } @@ -181,14 +162,11 @@ struct TeamParallelGER { }; // Team parallel version of GER. -template -void teamParallelGer(const ExecutionSpace& space, const char trans[], - const typename AViewType::const_value_type& alpha, - const XViewType& x, const YViewType& y, - const AViewType& A) { - static_assert(std::is_integral::value, - "IndexType must be an integer"); +template +void teamParallelGer(const ExecutionSpace& space, const char trans[], const typename AViewType::const_value_type& alpha, + const XViewType& x, const YViewType& y, const AViewType& A) { + static_assert(std::is_integral::value, "IndexType must be an integer"); using AlphaCoeffType = typename AViewType::non_const_value_type; @@ -203,11 +181,9 @@ void teamParallelGer(const ExecutionSpace& space, const char trans[], return; } - constexpr bool isLayoutLeft = - std::is_same::value; + constexpr bool isLayoutLeft = std::is_same::value; using layout_tag = - typename std::conditional::type; + typename std::conditional::type; using TeamPolicyType = Kokkos::TeamPolicy; TeamPolicyType teamPolicy; if (isLayoutLeft) { @@ -218,8 +194,8 @@ void teamParallelGer(const ExecutionSpace& space, const char trans[], teamPolicy = TeamPolicyType(space, A.extent(0), Kokkos::AUTO); } - TeamParallelGER - functor((trans[0] == 'T') || (trans[0] == 't'), alpha, x, y, A); + TeamParallelGER functor( + (trans[0] == 'T') || (trans[0] == 't'), alpha, x, y, A); Kokkos::parallel_for("KokkosBlas::ger[teamParallel]", teamPolicy, functor); } @@ -231,25 +207,17 @@ void teamParallelGer(const ExecutionSpace& space, const char trans[], // // The 'enable_if' makes sure unused kernels are not instantiated. -template ()>::type* = nullptr> -void generalGerImpl(const ExecutionSpace& space, const char trans[], - const typename AViewType::const_value_type& alpha, - const XViewType& x, const YViewType& y, - const AViewType& A) { +template ()>::type* = nullptr> +void generalGerImpl(const ExecutionSpace& space, const char trans[], const typename AViewType::const_value_type& alpha, + const XViewType& x, const YViewType& y, const AViewType& A) { threadParallelGer(space, trans, alpha, x, y, A); } -template ()>::type* = nullptr> -void generalGerImpl(const ExecutionSpace& space, const char trans[], - const typename AViewType::const_value_type& alpha, - const XViewType& x, const YViewType& y, - const AViewType& A) { +template ()>::type* = nullptr> +void generalGerImpl(const ExecutionSpace& space, const char trans[], const typename AViewType::const_value_type& alpha, + const XViewType& x, const YViewType& y, const AViewType& A) { teamParallelGer(space, trans, alpha, x, y, A); } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas2_ger_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas2_ger_spec.hpp index 9802194b9863..04e25ab42225 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas2_ger_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas2_ger_spec.hpp @@ -40,19 +40,16 @@ struct ger_eti_spec_avail { // specializations go in this header file. We may spread out definitions (see // _INST macro below) across one or more .cpp files. // -#define KOKKOSBLAS2_GER_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct ger_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_GER_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct ger_eti_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -67,34 +64,26 @@ namespace Impl { // // Implementation of KokkosBlas::ger. -template ::value, - bool eti_spec_avail = ger_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = ger_eti_spec_avail::value> struct GER { - static void ger(const ExecutionSpace& space, const char trans[], - const typename AViewType::const_value_type& alpha, + static void ger(const ExecutionSpace& space, const char trans[], const typename AViewType::const_value_type& alpha, const XViewType& x, const YViewType& y, const AViewType& A) #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY { - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::ger[ETI]" - : "KokkosBlas::ger[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::ger[ETI]" + : "KokkosBlas::ger[noETI]"); typedef typename AViewType::size_type size_type; const size_type numRows = A.extent(0); const size_type numCols = A.extent(1); // Prefer int as the index type, but use a larger type if needed. - if ((numRows < static_cast(INT_MAX)) && - (numCols < static_cast(INT_MAX))) { - generalGerImpl( - space, trans, alpha, x, y, A); + if ((numRows < static_cast(INT_MAX)) && (numCols < static_cast(INT_MAX))) { + generalGerImpl(space, trans, alpha, x, y, A); } else { - generalGerImpl( - space, trans, alpha, x, y, A); + generalGerImpl(space, trans, alpha, x, y, A); } Kokkos::Profiling::popRegion(); @@ -115,30 +104,24 @@ struct GER { // We may spread out definitions (see _DEF macro below) across one or more .cpp // files. // -#define KOKKOSBLAS2_GER_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - extern template struct GER< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS2_GER_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct GER< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ false, true>; -#define KOKKOSBLAS2_GER_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template struct GER< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS2_GER_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct GER< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas2_serial_gemv_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas2_serial_gemv_impl.hpp index 1fec8769cb93..79f49fdd0e4d 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas2_serial_gemv_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas2_serial_gemv_impl.hpp @@ -25,13 +25,9 @@ namespace KokkosBlas { template struct SerialGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType /*alpha*/, - const AViewType & /*A*/, - const xViewType & /*x*/, - const ScalarType /*beta*/, - const yViewType & /*y*/); + template + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType /*alpha*/, const AViewType & /*A*/, const xViewType & /*x*/, + const ScalarType /*beta*/, const yViewType & /*y*/); }; } // namespace KokkosBlas @@ -49,27 +45,21 @@ namespace KokkosBlas { /// template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemv::invoke( - const ScalarType alpha, const AViewType &A, const xViewType &x, - const ScalarType beta, const yViewType &y) { - return Impl::SerialGemvInternal::invoke( - A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_1(), - x.data(), x.stride_0(), beta, y.data(), y.stride_0()); +template +KOKKOS_INLINE_FUNCTION int SerialGemv::invoke( + const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { + return Impl::SerialGemvInternal::invoke(A.extent(0), A.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), x.data(), x.stride_0(), + beta, y.data(), y.stride_0()); } template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemv::invoke( - const ScalarType alpha, const AViewType &A, const xViewType &x, - const ScalarType beta, const yViewType &y) { - return Impl::SerialGemvInternal::invoke( - A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_1(), - x.data(), x.stride_0(), beta, y.data(), y.stride_0()); +template +KOKKOS_INLINE_FUNCTION int SerialGemv::invoke( + const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { + return Impl::SerialGemvInternal::invoke(A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), + A.stride_1(), x.data(), x.stride_0(), beta, y.data(), + y.stride_0()); } /// @@ -77,27 +67,21 @@ SerialGemv::invoke( /// template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemv::invoke( - const ScalarType alpha, const AViewType &A, const xViewType &x, - const ScalarType beta, const yViewType &y) { - return Impl::SerialGemvInternal::invoke( - A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), A.stride_0(), - x.data(), x.stride_0(), beta, y.data(), y.stride_0()); +template +KOKKOS_INLINE_FUNCTION int SerialGemv::invoke( + const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { + return Impl::SerialGemvInternal::invoke(A.extent(1), A.extent(0), alpha, A.data(), + A.stride_1(), A.stride_0(), x.data(), x.stride_0(), + beta, y.data(), y.stride_0()); } template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemv::invoke( - const ScalarType alpha, const AViewType &A, const xViewType &x, - const ScalarType beta, const yViewType &y) { - return Impl::SerialGemvInternal::invoke( - A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), A.stride_0(), - x.data(), x.stride_0(), beta, y.data(), y.stride_0()); +template +KOKKOS_INLINE_FUNCTION int SerialGemv::invoke( + const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { + return Impl::SerialGemvInternal::invoke(A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), + A.stride_0(), x.data(), x.stride_0(), beta, y.data(), + y.stride_0()); } /// @@ -105,27 +89,21 @@ SerialGemv::invoke( /// template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemv::invoke( - const ScalarType alpha, const AViewType &A, const xViewType &x, - const ScalarType beta, const yViewType &y) { - return Impl::SerialGemvInternal::invoke( - Impl::OpConj(), A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), - A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); +template +KOKKOS_INLINE_FUNCTION int SerialGemv::invoke( + const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { + return Impl::SerialGemvInternal::invoke(Impl::OpConj(), A.extent(1), A.extent(0), alpha, + A.data(), A.stride_1(), A.stride_0(), x.data(), + x.stride_0(), beta, y.data(), y.stride_0()); } template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemv::invoke( - const ScalarType alpha, const AViewType &A, const xViewType &x, - const ScalarType beta, const yViewType &y) { - return Impl::SerialGemvInternal::invoke( - Impl::OpConj(), A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), - A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); +template +KOKKOS_INLINE_FUNCTION int SerialGemv::invoke( + const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { + return Impl::SerialGemvInternal::invoke(Impl::OpConj(), A.extent(1), A.extent(0), alpha, + A.data(), A.stride_1(), A.stride_0(), x.data(), + x.stride_0(), beta, y.data(), y.stride_0()); } } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas2_serial_gemv_inner_multiple_dot.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas2_serial_gemv_inner_multiple_dot.hpp index aa7efc912246..1b70413119be 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas2_serial_gemv_inner_multiple_dot.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas2_serial_gemv_inner_multiple_dot.hpp @@ -41,24 +41,17 @@ struct InnerMultipleDotProduct { const int _as0, _as1, _xs0, _ys0; KOKKOS_INLINE_FUNCTION - InnerMultipleDotProduct(const int as0, const int as1, const int xs0, - const int ys0) + InnerMultipleDotProduct(const int as0, const int as1, const int xs0, const int ys0) : _as0(as0), _as1(as1), _xs0(xs0), _ys0(ys0) {} - template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, - const ValueAType *KOKKOS_RESTRICT A, - const ValueXType *KOKKOS_RESTRICT x, - const int n, + template + KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, + const ValueXType *KOKKOS_RESTRICT x, const int n, ValueYType *KOKKOS_RESTRICT y); - template - KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, - const ValueAType *KOKKOS_RESTRICT A, - const ValueXType *KOKKOS_RESTRICT x, - const int m, const int n, + template + KOKKOS_INLINE_FUNCTION int serial_invoke(const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, + const ValueXType *KOKKOS_RESTRICT x, const int m, const int n, ValueYType *KOKKOS_RESTRICT y); }; @@ -67,16 +60,14 @@ struct InnerMultipleDotProduct { /// ==================== template <> -template -KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<5>::serial_invoke( - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, - const ValueXType *KOKKOS_RESTRICT x, const int n, - ValueYType *KOKKOS_RESTRICT y) { +template +KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<5>::serial_invoke(const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, + const ValueXType *KOKKOS_RESTRICT x, const int n, + ValueYType *KOKKOS_RESTRICT y) { if (n <= 0) return 0; - const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, - i4 = 4 * _as0; + const int i0 = 0 * _as0, i1 = 1 * _as0, i2 = 2 * _as0, i3 = 3 * _as0, i4 = 4 * _as0; // unroll by rows ValueYType y_0 = 0, y_1 = 0, y_2 = 0, y_3 = 0, y_4 = 0; @@ -105,12 +96,11 @@ KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<5>::serial_invoke( } template <> -template -KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<4>::serial_invoke( - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, - const ValueXType *KOKKOS_RESTRICT x, const int n, - ValueYType *KOKKOS_RESTRICT y) { +template +KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<4>::serial_invoke(const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, + const ValueXType *KOKKOS_RESTRICT x, const int n, + ValueYType *KOKKOS_RESTRICT y) { if (!n) return 0; OpA op; @@ -141,12 +131,11 @@ KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<4>::serial_invoke( } template <> -template -KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<3>::serial_invoke( - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, - const ValueXType *KOKKOS_RESTRICT x, const int n, - ValueYType *KOKKOS_RESTRICT y) { +template +KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<3>::serial_invoke(const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, + const ValueXType *KOKKOS_RESTRICT x, const int n, + ValueYType *KOKKOS_RESTRICT y) { if (n <= 0) return 0; OpA op; @@ -175,12 +164,11 @@ KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<3>::serial_invoke( } template <> -template -KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<2>::serial_invoke( - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, - const ValueXType *KOKKOS_RESTRICT x, const int n, - ValueYType *KOKKOS_RESTRICT y) { +template +KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<2>::serial_invoke(const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, + const ValueXType *KOKKOS_RESTRICT x, const int n, + ValueYType *KOKKOS_RESTRICT y) { if (n <= 0) return 0; OpA op; @@ -207,12 +195,11 @@ KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<2>::serial_invoke( } template <> -template -KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<1>::serial_invoke( - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, - const ValueXType *KOKKOS_RESTRICT x, const int n, - ValueYType *KOKKOS_RESTRICT y) { +template +KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<1>::serial_invoke(const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, + const ValueXType *KOKKOS_RESTRICT x, const int n, + ValueYType *KOKKOS_RESTRICT y) { if (n <= 0) return 0; OpA op; @@ -230,12 +217,11 @@ KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<1>::serial_invoke( } template <> -template -KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<5>::serial_invoke( - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, - const ValueXType *KOKKOS_RESTRICT x, const int m, const int n, - ValueYType *KOKKOS_RESTRICT y) { +template +KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<5>::serial_invoke(const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, + const ValueXType *KOKKOS_RESTRICT x, const int m, + const int n, ValueYType *KOKKOS_RESTRICT y) { if (m <= 0 || n <= 0) return 0; switch (m) { case 5: { @@ -268,12 +254,11 @@ KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<5>::serial_invoke( } template <> -template -KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<4>::serial_invoke( - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, - const ValueXType *KOKKOS_RESTRICT x, const int m, const int n, - ValueYType *KOKKOS_RESTRICT y) { +template +KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<4>::serial_invoke(const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, + const ValueXType *KOKKOS_RESTRICT x, const int m, + const int n, ValueYType *KOKKOS_RESTRICT y) { if (m <= 0 || n <= 0) return 0; switch (m) { case 4: { @@ -301,13 +286,12 @@ KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<4>::serial_invoke( } template <> -template +template -KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<3>::serial_invoke( - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, - const ValueXType *KOKKOS_RESTRICT x, const int m, const int n, - ValueYType *KOKKOS_RESTRICT y) { +KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<3>::serial_invoke(const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, + const ValueXType *KOKKOS_RESTRICT x, const int m, + const int n, ValueYType *KOKKOS_RESTRICT y) { if (m <= 0 || n <= 0) return 0; switch (m) { case 3: { @@ -330,13 +314,12 @@ KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<3>::serial_invoke( } template <> -template +template -KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<2>::serial_invoke( - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, - const ValueXType *KOKKOS_RESTRICT x, const int m, const int n, - ValueYType *KOKKOS_RESTRICT y) { +KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<2>::serial_invoke(const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, + const ValueXType *KOKKOS_RESTRICT x, const int m, + const int n, ValueYType *KOKKOS_RESTRICT y) { if (m <= 0 || n <= 0) return 0; switch (m) { case 2: { @@ -354,13 +337,12 @@ KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<2>::serial_invoke( } template <> -template +template -KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<1>::serial_invoke( - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, - const ValueXType *KOKKOS_RESTRICT x, const int m, const int n, - ValueYType *KOKKOS_RESTRICT y) { +KOKKOS_INLINE_FUNCTION int InnerMultipleDotProduct<1>::serial_invoke(const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, + const ValueXType *KOKKOS_RESTRICT x, const int m, + const int n, ValueYType *KOKKOS_RESTRICT y) { if (m <= 0 || n <= 0) return 0; switch (m) { case 1: { diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas2_serial_gemv_internal.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas2_serial_gemv_internal.hpp index 2d78102c7a81..912972c7ee4c 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas2_serial_gemv_internal.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas2_serial_gemv_internal.hpp @@ -31,33 +31,27 @@ namespace Impl { template struct SerialGemvInternal { - template - KOKKOS_INLINE_FUNCTION static int invoke( - OpA op, const int m, const int n, const ScalarType alpha, - const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, - /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0); + template + KOKKOS_INLINE_FUNCTION static int invoke(OpA op, const int m, const int n, const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, + const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0); // default OpA = OpID - template - KOKKOS_INLINE_FUNCTION static int invoke( - const int m, const int n, const ScalarType alpha, - const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, - /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, + const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { return invoke(OpID(), m, n, alpha, A, as0, as1, x, xs0, beta, y, ys0); } }; template <> -template +template KOKKOS_INLINE_FUNCTION int SerialGemvInternal::invoke( - OpA op, const int m, const int n, const ScalarType alpha, - const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, + OpA op, const int m, const int n, const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, const int as0, + const int as1, const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { const ScalarType one(1.0), zero(0.0); @@ -91,12 +85,10 @@ KOKKOS_INLINE_FUNCTION int SerialGemvInternal::invoke( } template <> -template +template KOKKOS_INLINE_FUNCTION int SerialGemvInternal::invoke( - OpA /* op */, const int m, const int n, const ScalarType alpha, - const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, + OpA /* op */, const int m, const int n, const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, const int as0, + const int as1, const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { const ScalarType one(1.0), zero(0.0); @@ -116,8 +108,7 @@ KOKKOS_INLINE_FUNCTION int SerialGemvInternal::invoke( Impl::InnerMultipleDotProduct inner(as0, as1, xs0, ys0); const int mb = mbAlgo; for (int i = 0; i < m; i += mb) - inner.serial_invoke(alpha, A + i * as0, x, - (i + mb) > m ? (m - i) : mb, n, y + i * ys0); + inner.serial_invoke(alpha, A + i * as0, x, (i + mb) > m ? (m - i) : mb, n, y + i * ys0); } return 0; } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr2_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr2_impl.hpp index 69284e9547a0..7bcb0069ab1c 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr2_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr2_impl.hpp @@ -27,16 +27,14 @@ namespace Impl { // Functor for the thread parallel version of SYR2. // This functor parallelizes over rows of the input matrix A. -template +template struct ThreadParallelSYR2 { using AlphaCoeffType = typename AViewType::non_const_value_type; using XComponentType = typename XViewType::non_const_value_type; using YComponentType = typename YViewType::non_const_value_type; using AComponentType = typename AViewType::non_const_value_type; - ThreadParallelSYR2(const AlphaCoeffType& alpha, const XViewType& x, - const YViewType& y, const AViewType& A) + ThreadParallelSYR2(const AlphaCoeffType& alpha, const XViewType& x, const YViewType& y, const AViewType& A) : alpha_(alpha), x_(x), y_(y), A_(A) { // Nothing to do } @@ -55,16 +53,14 @@ struct ThreadParallelSYR2 { if constexpr (tJustTranspose) { if (x_fixed != Kokkos::ArithTraits::zero()) { for (IndexType j = 0; j < N; ++j) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { A_(i, j) += AComponentType(alpha_ * x_fixed * y_(j)); } } } if (y_fixed != Kokkos::ArithTraits::zero()) { for (IndexType j = 0; j < N; ++j) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { A_(i, j) += AComponentType(alpha_ * y_fixed * x_(j)); } } @@ -72,21 +68,16 @@ struct ThreadParallelSYR2 { } else { if (x_fixed != Kokkos::ArithTraits::zero()) { for (IndexType j = 0; j < N; ++j) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType( - alpha_ * x_fixed * - Kokkos::ArithTraits::conj(y_(j))); + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(alpha_ * x_fixed * Kokkos::ArithTraits::conj(y_(j))); } } } if (y_fixed != Kokkos::ArithTraits::zero()) { for (IndexType j = 0; j < N; ++j) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType( - Kokkos::ArithTraits::conj(alpha_) * y_fixed * - Kokkos::ArithTraits::conj(x_(j))); + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(Kokkos::ArithTraits::conj(alpha_) * y_fixed * + Kokkos::ArithTraits::conj(x_(j))); } } } @@ -102,14 +93,11 @@ struct ThreadParallelSYR2 { }; // Thread parallel version of SYR2. -template -void threadParallelSyr2(const ExecutionSpace& space, - const typename AViewType::const_value_type& alpha, - const XViewType& x, const YViewType& y, - const AViewType& A) { - static_assert(std::is_integral::value, - "IndexType must be an integer"); +template +void threadParallelSyr2(const ExecutionSpace& space, const typename AViewType::const_value_type& alpha, + const XViewType& x, const YViewType& y, const AViewType& A) { + static_assert(std::is_integral::value, "IndexType must be an integer"); using AlphaCoeffType = typename AViewType::non_const_value_type; @@ -120,13 +108,9 @@ void threadParallelSyr2(const ExecutionSpace& space, } else if (alpha == Kokkos::ArithTraits::zero()) { // no entries to update } else { - Kokkos::RangePolicy rangePolicy(space, 0, - A.extent(0)); - ThreadParallelSYR2 - functor(alpha, x, y, A); - Kokkos::parallel_for("KokkosBlas::syr2[threadParallel]", rangePolicy, - functor); + Kokkos::RangePolicy rangePolicy(space, 0, A.extent(0)); + ThreadParallelSYR2 functor(alpha, x, y, A); + Kokkos::parallel_for("KokkosBlas::syr2[threadParallel]", rangePolicy, functor); } } @@ -137,8 +121,8 @@ struct TeamParallelSYR2_LayoutRightTag {}; // Functor for the team parallel version of SYR2, designed for // performance on GPUs. The kernel depends on the layout of A. -template +template struct TeamParallelSYR2 { using AlphaCoeffType = typename AViewType::non_const_value_type; using XComponentType = typename XViewType::non_const_value_type; @@ -148,16 +132,14 @@ struct TeamParallelSYR2 { using policy_type = Kokkos::TeamPolicy; using member_type = typename policy_type::member_type; - TeamParallelSYR2(const AlphaCoeffType& alpha, const XViewType& x, - const YViewType& y, const AViewType& A) + TeamParallelSYR2(const AlphaCoeffType& alpha, const XViewType& x, const YViewType& y, const AViewType& A) : alpha_(alpha), x_(x), y_(y), A_(A) { // Nothing to do } public: // LayoutLeft version: one team per column - KOKKOS_INLINE_FUNCTION void operator()(TeamParallelSYR2_LayoutLeftTag, - const member_type& team) const { + KOKKOS_INLINE_FUNCTION void operator()(TeamParallelSYR2_LayoutLeftTag, const member_type& team) const { if (alpha_ == Kokkos::ArithTraits::zero()) { // Nothing to do } else { @@ -171,47 +153,35 @@ struct TeamParallelSYR2 { const XComponentType x_fixed(x_(j)); const YComponentType y_fixed(y_(j)); if (y_fixed != Kokkos::ArithTraits::zero()) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, M), [&](const IndexType& i) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType(alpha_ * x_(i) * y_fixed); - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, M), [&](const IndexType& i) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(alpha_ * x_(i) * y_fixed); + } + }); } if (x_fixed != Kokkos::ArithTraits::zero()) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, M), [&](const IndexType& i) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType(alpha_ * y_(i) * x_fixed); - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, M), [&](const IndexType& i) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(alpha_ * y_(i) * x_fixed); + } + }); } } else { - const XComponentType x_fixed( - Kokkos::ArithTraits::conj(x_(j))); - const YComponentType y_fixed( - Kokkos::ArithTraits::conj(y_(j))); + const XComponentType x_fixed(Kokkos::ArithTraits::conj(x_(j))); + const YComponentType y_fixed(Kokkos::ArithTraits::conj(y_(j))); if (y_fixed != Kokkos::ArithTraits::zero()) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, M), [&](const IndexType& i) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType(alpha_ * x_(i) * y_fixed); - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, M), [&](const IndexType& i) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(alpha_ * x_(i) * y_fixed); + } + }); } if (x_fixed != Kokkos::ArithTraits::zero()) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, M), [&](const IndexType& i) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType( - Kokkos::ArithTraits::conj(alpha_) * - y_(i) * x_fixed); - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, M), [&](const IndexType& i) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(Kokkos::ArithTraits::conj(alpha_) * y_(i) * x_fixed); + } + }); } } } @@ -219,8 +189,7 @@ struct TeamParallelSYR2 { } // LayoutRight version: one team per row - KOKKOS_INLINE_FUNCTION void operator()(TeamParallelSYR2_LayoutRightTag, - const member_type& team) const { + KOKKOS_INLINE_FUNCTION void operator()(TeamParallelSYR2_LayoutRightTag, const member_type& team) const { if (alpha_ == Kokkos::ArithTraits::zero()) { // Nothing to do } else { @@ -234,46 +203,34 @@ struct TeamParallelSYR2 { const YComponentType y_fixed(y_(i)); if constexpr (tJustTranspose) { if (x_fixed != Kokkos::ArithTraits::zero()) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType(alpha_ * x_fixed * y_(j)); - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(alpha_ * x_fixed * y_(j)); + } + }); } if (y_fixed != Kokkos::ArithTraits::zero()) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType(alpha_ * y_fixed * x_(j)); - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(alpha_ * y_fixed * x_(j)); + } + }); } } else { if (x_fixed != Kokkos::ArithTraits::zero()) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType( - alpha_ * x_fixed * - Kokkos::ArithTraits::conj(y_(j))); - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(alpha_ * x_fixed * Kokkos::ArithTraits::conj(y_(j))); + } + }); } if (y_fixed != Kokkos::ArithTraits::zero()) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType( - Kokkos::ArithTraits::conj(alpha_) * - y_fixed * - Kokkos::ArithTraits::conj(x_(j))); - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(Kokkos::ArithTraits::conj(alpha_) * y_fixed * + Kokkos::ArithTraits::conj(x_(j))); + } + }); } } } @@ -288,14 +245,11 @@ struct TeamParallelSYR2 { }; // Team parallel version of SYR2. -template -void teamParallelSyr2(const ExecutionSpace& space, - const typename AViewType::const_value_type& alpha, - const XViewType& x, const YViewType& y, - const AViewType& A) { - static_assert(std::is_integral::value, - "IndexType must be an integer"); +template +void teamParallelSyr2(const ExecutionSpace& space, const typename AViewType::const_value_type& alpha, + const XViewType& x, const YViewType& y, const AViewType& A) { + static_assert(std::is_integral::value, "IndexType must be an integer"); using AlphaCoeffType = typename AViewType::non_const_value_type; @@ -310,11 +264,9 @@ void teamParallelSyr2(const ExecutionSpace& space, return; } - constexpr bool isLayoutLeft = - std::is_same::value; + constexpr bool isLayoutLeft = std::is_same::value; using layout_tag = - typename std::conditional::type; + typename std::conditional::type; using TeamPolicyType = Kokkos::TeamPolicy; TeamPolicyType teamPolicy; if (isLayoutLeft) { @@ -325,9 +277,8 @@ void teamParallelSyr2(const ExecutionSpace& space, teamPolicy = TeamPolicyType(space, A.extent(0), Kokkos::AUTO); } - TeamParallelSYR2 - functor(alpha, x, y, A); + TeamParallelSYR2 functor( + alpha, x, y, A); Kokkos::parallel_for("KokkosBlas::syr2[teamParallel]", teamPolicy, functor); } @@ -339,28 +290,22 @@ void teamParallelSyr2(const ExecutionSpace& space, // // The 'enable_if' makes sure unused kernels are not instantiated. -template ()>::type* = nullptr> -void generalSyr2Impl(const ExecutionSpace& space, - const typename AViewType::const_value_type& alpha, - const XViewType& x, const YViewType& y, - const AViewType& A) { - threadParallelSyr2(space, alpha, x, y, A); +template ()>::type* = nullptr> +void generalSyr2Impl(const ExecutionSpace& space, const typename AViewType::const_value_type& alpha, const XViewType& x, + const YViewType& y, const AViewType& A) { + threadParallelSyr2(space, alpha, + x, y, A); } -template ()>::type* = nullptr> -void generalSyr2Impl(const ExecutionSpace& space, - const typename AViewType::const_value_type& alpha, - const XViewType& x, const YViewType& y, - const AViewType& A) { - teamParallelSyr2(space, alpha, x, y, A); +template ()>::type* = nullptr> +void generalSyr2Impl(const ExecutionSpace& space, const typename AViewType::const_value_type& alpha, const XViewType& x, + const YViewType& y, const AViewType& A) { + teamParallelSyr2(space, alpha, x, + y, A); } } // namespace Impl diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr2_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr2_spec.hpp index 01637ba1d446..a8ae741edec2 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr2_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr2_spec.hpp @@ -40,19 +40,16 @@ struct syr2_eti_spec_avail { // specializations go in this header file. We may spread out definitions (see // _INST macro below) across one or more .cpp files. // -#define KOKKOSBLAS2_SYR2_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct syr2_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_SYR2_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct syr2_eti_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -67,22 +64,17 @@ namespace Impl { // // Implementation of KokkosBlas::syr2. -template < - class ExecutionSpace, class XViewType, class YViewType, class AViewType, - bool tpl_spec_avail = syr2_tpl_spec_avail::value, - bool eti_spec_avail = syr2_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = syr2_eti_spec_avail::value> struct SYR2 { - static void syr2(const ExecutionSpace& space, const char trans[], - const char uplo[], - const typename AViewType::const_value_type& alpha, - const XViewType& x, const YViewType& y, const AViewType& A) + static void syr2(const ExecutionSpace& space, const char trans[], const char uplo[], + const typename AViewType::const_value_type& alpha, const XViewType& x, const YViewType& y, + const AViewType& A) #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY { - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::syr2[ETI]" - : "KokkosBlas::syr2[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::syr2[ETI]" + : "KokkosBlas::syr2[noETI]"); typedef typename AViewType::size_type size_type; const size_type numRows = A.extent(0); @@ -92,41 +84,33 @@ struct SYR2 { bool justUp = (uplo[0] == 'U') || (uplo[0] == 'u'); // Prefer int as the index type, but use a larsyr2 type if needed. - if ((numRows < static_cast(INT_MAX)) && - (numCols < static_cast(INT_MAX))) { + if ((numRows < static_cast(INT_MAX)) && (numCols < static_cast(INT_MAX))) { if (justTranspose) { if (justUp) { - generalSyr2Impl(space, alpha, x, y, A); + generalSyr2Impl(space, alpha, x, y, A); } else { - generalSyr2Impl(space, alpha, x, y, A); + generalSyr2Impl(space, alpha, x, y, A); } } else { if (justUp) { - generalSyr2Impl(space, alpha, x, y, A); + generalSyr2Impl(space, alpha, x, y, A); } else { - generalSyr2Impl(space, alpha, x, y, A); + generalSyr2Impl(space, alpha, x, y, A); } } } else { if (justTranspose) { if (justUp) { - generalSyr2Impl(space, alpha, x, y, A); + generalSyr2Impl(space, alpha, x, y, A); } else { - generalSyr2Impl(space, alpha, x, y, A); + generalSyr2Impl(space, alpha, x, y, A); } } else { if (justUp) { - generalSyr2Impl(space, alpha, x, y, A); + generalSyr2Impl(space, alpha, x, y, A); } else { - generalSyr2Impl(space, alpha, x, y, A); + generalSyr2Impl(space, alpha, x, y, + A); } } } @@ -149,30 +133,24 @@ struct SYR2 { // We may spread out definitions (see _DEF macro below) across one or more .cpp // files. // -#define KOKKOSBLAS2_SYR2_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - extern template struct SYR2< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS2_SYR2_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct SYR2< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ false, true>; -#define KOKKOSBLAS2_SYR2_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template struct SYR2< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS2_SYR2_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct SYR2< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr_impl.hpp index 685ca75997cc..7685fd4b4b3d 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr_impl.hpp @@ -27,16 +27,13 @@ namespace Impl { // Functor for the thread parallel version of SYR. // This functor parallelizes over rows of the input matrix A. -template +template struct ThreadParallelSYR { using AlphaCoeffType = typename AViewType::non_const_value_type; using XComponentType = typename XViewType::non_const_value_type; using AComponentType = typename AViewType::non_const_value_type; - ThreadParallelSYR(const AlphaCoeffType& alpha, const XViewType& x, - const AViewType& A) - : alpha_(alpha), x_(x), A_(A) { + ThreadParallelSYR(const AlphaCoeffType& alpha, const XViewType& x, const AViewType& A) : alpha_(alpha), x_(x), A_(A) { // Nothing to do } @@ -50,18 +47,14 @@ struct ThreadParallelSYR { if constexpr (tJustTranspose) { for (IndexType j = 0; j < N; ++j) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { A_(i, j) += AComponentType(alpha_ * x_fixed * x_(j)); } } } else { for (IndexType j = 0; j < N; ++j) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType( - alpha_ * x_fixed * - Kokkos::ArithTraits::conj(x_(j))); + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(alpha_ * x_fixed * Kokkos::ArithTraits::conj(x_(j))); } } } @@ -75,13 +68,10 @@ struct ThreadParallelSYR { }; // Thread parallel version of SYR. -template -void threadParallelSyr(const ExecutionSpace& space, - const typename AViewType::const_value_type& alpha, +template +void threadParallelSyr(const ExecutionSpace& space, const typename AViewType::const_value_type& alpha, const XViewType& x, const AViewType& A) { - static_assert(std::is_integral::value, - "IndexType must be an integer"); + static_assert(std::is_integral::value, "IndexType must be an integer"); using AlphaCoeffType = typename AViewType::non_const_value_type; @@ -90,12 +80,9 @@ void threadParallelSyr(const ExecutionSpace& space, } else if (alpha == Kokkos::ArithTraits::zero()) { // no entries to update } else { - Kokkos::RangePolicy rangePolicy(space, 0, - A.extent(0)); - ThreadParallelSYR - functor(alpha, x, A); - Kokkos::parallel_for("KokkosBlas::syr[threadParallel]", rangePolicy, - functor); + Kokkos::RangePolicy rangePolicy(space, 0, A.extent(0)); + ThreadParallelSYR functor(alpha, x, A); + Kokkos::parallel_for("KokkosBlas::syr[threadParallel]", rangePolicy, functor); } } @@ -106,8 +93,7 @@ struct TeamParallelSYR_LayoutRightTag {}; // Functor for the team parallel version of SYR, designed for // performance on GPUs. The kernel depends on the layout of A. -template +template struct TeamParallelSYR { using AlphaCoeffType = typename AViewType::non_const_value_type; using XComponentType = typename XViewType::non_const_value_type; @@ -116,16 +102,13 @@ struct TeamParallelSYR { using policy_type = Kokkos::TeamPolicy; using member_type = typename policy_type::member_type; - TeamParallelSYR(const AlphaCoeffType& alpha, const XViewType& x, - const AViewType& A) - : alpha_(alpha), x_(x), A_(A) { + TeamParallelSYR(const AlphaCoeffType& alpha, const XViewType& x, const AViewType& A) : alpha_(alpha), x_(x), A_(A) { // Nothing to do } public: // LayoutLeft version: one team per column - KOKKOS_INLINE_FUNCTION void operator()(TeamParallelSYR_LayoutLeftTag, - const member_type& team) const { + KOKKOS_INLINE_FUNCTION void operator()(TeamParallelSYR_LayoutLeftTag, const member_type& team) const { // Condition 'alpha_ == zero' has already been checked const IndexType j(team.league_rank()); if (x_(j) == Kokkos::ArithTraits::zero()) { @@ -134,30 +117,24 @@ struct TeamParallelSYR { const IndexType M(A_.extent(0)); if constexpr (tJustTranspose) { const XComponentType x_fixed(x_(j)); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, M), [&](const IndexType& i) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType(alpha_ * x_(i) * x_fixed); - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, M), [&](const IndexType& i) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(alpha_ * x_(i) * x_fixed); + } + }); } else { - const XComponentType x_fixed( - Kokkos::ArithTraits::conj(x_(j))); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, M), [&](const IndexType& i) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType(alpha_ * x_(i) * x_fixed); - } - }); + const XComponentType x_fixed(Kokkos::ArithTraits::conj(x_(j))); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, M), [&](const IndexType& i) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(alpha_ * x_(i) * x_fixed); + } + }); } } } // LayoutRight version: one team per row - KOKKOS_INLINE_FUNCTION void operator()(TeamParallelSYR_LayoutRightTag, - const member_type& team) const { + KOKKOS_INLINE_FUNCTION void operator()(TeamParallelSYR_LayoutRightTag, const member_type& team) const { // Condition 'alpha_ == zero' has already been checked const IndexType i(team.league_rank()); if (x_(i) == Kokkos::ArithTraits::zero()) { @@ -166,23 +143,17 @@ struct TeamParallelSYR { const IndexType N(A_.extent(1)); const XComponentType x_fixed(x_(i)); if constexpr (tJustTranspose) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType(alpha_ * x_fixed * x_(j)); - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(alpha_ * x_fixed * x_(j)); + } + }); } else { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { - if (((tJustUp == true) && (i <= j)) || - ((tJustUp == false) && (i >= j))) { - A_(i, j) += AComponentType( - alpha_ * x_fixed * - Kokkos::ArithTraits::conj(x_(j))); - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const IndexType& j) { + if (((tJustUp == true) && (i <= j)) || ((tJustUp == false) && (i >= j))) { + A_(i, j) += AComponentType(alpha_ * x_fixed * Kokkos::ArithTraits::conj(x_(j))); + } + }); } } } @@ -194,13 +165,10 @@ struct TeamParallelSYR { }; // Team parallel version of SYR. -template -void teamParallelSyr(const ExecutionSpace& space, - const typename AViewType::const_value_type& alpha, - const XViewType& x, const AViewType& A) { - static_assert(std::is_integral::value, - "IndexType must be an integer"); +template +void teamParallelSyr(const ExecutionSpace& space, const typename AViewType::const_value_type& alpha, const XViewType& x, + const AViewType& A) { + static_assert(std::is_integral::value, "IndexType must be an integer"); using AlphaCoeffType = typename AViewType::non_const_value_type; @@ -212,11 +180,9 @@ void teamParallelSyr(const ExecutionSpace& space, return; } - constexpr bool isLayoutLeft = - std::is_same_v; + constexpr bool isLayoutLeft = std::is_same_v; using layout_tag = - typename std::conditional::type; + typename std::conditional::type; using TeamPolicyType = Kokkos::TeamPolicy; TeamPolicyType teamPolicy; if (isLayoutLeft) { @@ -227,9 +193,7 @@ void teamParallelSyr(const ExecutionSpace& space, teamPolicy = TeamPolicyType(space, A.extent(0), Kokkos::AUTO); } - TeamParallelSYR - functor(alpha, x, A); + TeamParallelSYR functor(alpha, x, A); Kokkos::parallel_for("KokkosBlas::syr[teamParallel]", teamPolicy, functor); } @@ -241,26 +205,18 @@ void teamParallelSyr(const ExecutionSpace& space, // // The 'enable_if' makes sure unused kernels are not instantiated. -template ()>::type* = nullptr> -void generalSyrImpl(const ExecutionSpace& space, - const typename AViewType::const_value_type& alpha, - const XViewType& x, const AViewType& A) { - threadParallelSyr(space, alpha, x, A); +template ()>::type* = nullptr> +void generalSyrImpl(const ExecutionSpace& space, const typename AViewType::const_value_type& alpha, const XViewType& x, + const AViewType& A) { + threadParallelSyr(space, alpha, x, A); } -template ()>::type* = nullptr> -void generalSyrImpl(const ExecutionSpace& space, - const typename AViewType::const_value_type& alpha, - const XViewType& x, const AViewType& A) { - teamParallelSyr(space, alpha, x, A); +template ()>::type* = nullptr> +void generalSyrImpl(const ExecutionSpace& space, const typename AViewType::const_value_type& alpha, const XViewType& x, + const AViewType& A) { + teamParallelSyr(space, alpha, x, A); } } // namespace Impl diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr_spec.hpp index b07c3a144602..58c77536182a 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas2_syr_spec.hpp @@ -40,16 +40,14 @@ struct syr_eti_spec_avail { // specializations go in this header file. We may spread out definitions (see // _INST macro below) across one or more .cpp files. // -#define KOKKOSBLAS2_SYR_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct syr_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_SYR_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct syr_eti_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -65,20 +63,15 @@ namespace Impl { // Implementation of KokkosBlas::syr. template ::value, - bool eti_spec_avail = - syr_eti_spec_avail::value> + bool tpl_spec_avail = syr_tpl_spec_avail::value, + bool eti_spec_avail = syr_eti_spec_avail::value> struct SYR { - static void syr(const ExecutionSpace& space, const char trans[], - const char uplo[], - const typename AViewType::const_value_type& alpha, - const XViewType& x, const AViewType& A) + static void syr(const ExecutionSpace& space, const char trans[], const char uplo[], + const typename AViewType::const_value_type& alpha, const XViewType& x, const AViewType& A) #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY { - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::syr[ETI]" - : "KokkosBlas::syr[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::syr[ETI]" + : "KokkosBlas::syr[noETI]"); typedef typename AViewType::size_type size_type; const size_type numRows = A.extent(0); @@ -88,41 +81,32 @@ struct SYR { bool justUp = (uplo[0] == 'U') || (uplo[0] == 'u'); // Prefer int as the index type, but use a larsyr type if needed. - if ((numRows < static_cast(INT_MAX)) && - (numCols < static_cast(INT_MAX))) { + if ((numRows < static_cast(INT_MAX)) && (numCols < static_cast(INT_MAX))) { if (justTranspose) { if (justUp) { - generalSyrImpl( - space, alpha, x, A); + generalSyrImpl(space, alpha, x, A); } else { - generalSyrImpl(space, alpha, x, A); + generalSyrImpl(space, alpha, x, A); } } else { if (justUp) { - generalSyrImpl(space, alpha, x, A); + generalSyrImpl(space, alpha, x, A); } else { - generalSyrImpl(space, alpha, x, A); + generalSyrImpl(space, alpha, x, A); } } } else { if (justTranspose) { if (justUp) { - generalSyrImpl(space, alpha, x, A); + generalSyrImpl(space, alpha, x, A); } else { - generalSyrImpl(space, alpha, x, A); + generalSyrImpl(space, alpha, x, A); } } else { if (justUp) { - generalSyrImpl(space, alpha, x, A); + generalSyrImpl(space, alpha, x, A); } else { - generalSyrImpl(space, alpha, x, A); + generalSyrImpl(space, alpha, x, A); } } } @@ -145,24 +129,20 @@ struct SYR { // We may spread out definitions (see _DEF macro below) across one or more .cpp // files. // -#define KOKKOSBLAS2_SYR_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - extern template struct SYR< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS2_SYR_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + extern template struct SYR< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ false, true>; -#define KOKKOSBLAS2_SYR_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - template struct SYR< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSBLAS2_SYR_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template struct SYR< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ false, true>; #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas2_team_gemv_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas2_team_gemv_impl.hpp index 5e43cae7d4bf..19e2bde931e6 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas2_team_gemv_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas2_team_gemv_impl.hpp @@ -26,51 +26,41 @@ namespace Impl { template struct TeamGemvInternal { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, OpA op, const int m, const int n, - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, - const int as0, const int as1, const ValueXType *KOKKOS_RESTRICT x, - const int xs0, const ScalarType beta, - /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, OpA op, const int m, const int n, + const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, const int as0, + const int as1, const ValueXType *KOKKOS_RESTRICT x, const int xs0, + const ScalarType beta, + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0); // default OpA = OpID - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const int n, - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, - const int as0, const int as1, const ValueXType *KOKKOS_RESTRICT x, - const int xs0, const ScalarType beta, - /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { - return invoke(member, OpID{}, m, n, alpha, A, as0, as1, x, xs0, beta, y, - ys0); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, + const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { + return invoke(member, OpID{}, m, n, alpha, A, as0, as1, x, xs0, beta, y, ys0); } }; template struct TeamVectorGemvInternal { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, OpA op, const int m, const int n, - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, - const int as0, const int as1, const ValueXType *KOKKOS_RESTRICT x, - const int xs0, const ScalarType beta, - /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, OpA op, const int m, const int n, + const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, const int as0, + const int as1, const ValueXType *KOKKOS_RESTRICT x, const int xs0, + const ScalarType beta, + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0); // default OpA = OpID - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const int n, - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, - const int as0, const int as1, const ValueXType *KOKKOS_RESTRICT x, - const int xs0, const ScalarType beta, - /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { - return invoke(member, OpID{}, m, n, alpha, A, as0, as1, x, xs0, beta, y, - ys0); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const int m, const int n, const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, + const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { + return invoke(member, OpID{}, m, n, alpha, A, as0, as1, x, xs0, beta, y, ys0); } }; @@ -79,13 +69,12 @@ struct TeamVectorGemvInternal { /// ==================== template <> -template +template KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( - const MemberType &member, OpA op, const int m, const int n, - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, const int as0, - const int as1, const ValueXType *KOKKOS_RESTRICT x, const int xs0, - const ScalarType beta, + const MemberType &member, OpA op, const int m, const int n, const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, const ValueXType *KOKKOS_RESTRICT x, + const int xs0, const ScalarType beta, /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { const ScalarType one(1.0), zero(0.0); @@ -102,29 +91,26 @@ KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( if (beta != one) member.team_barrier(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, m), - [&](const int &i) { - ValueYType t(0); - const ValueAType *KOKKOS_RESTRICT tA = (A + i * as0); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, m), [&](const int &i) { + ValueYType t(0); + const ValueAType *KOKKOS_RESTRICT tA = (A + i * as0); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif - for (int j = 0; j < n; ++j) - t += op(tA[j * as1]) * x[j * xs0]; - y[i * ys0] += alpha * t; - }); + for (int j = 0; j < n; ++j) t += op(tA[j * as1]) * x[j * xs0]; + y[i * ys0] += alpha * t; + }); } return 0; } template <> -template +template KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( - const MemberType &member, OpA /* op */, const int m, const int n, - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, const int as0, - const int as1, const ValueXType *KOKKOS_RESTRICT x, const int xs0, - const ScalarType beta, + const MemberType &member, OpA /* op */, const int m, const int n, const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, const ValueXType *KOKKOS_RESTRICT x, + const int xs0, const ScalarType beta, /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { const ScalarType one(1.0), zero(0.0); @@ -149,13 +135,10 @@ KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( // Made this non-const in order to WORKAROUND issue #349 int mb = mb_a < mb_b ? mb_a : mb_b, mp = m % mb; - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, (m / mb) + (mp > 0)), - [&](const int &ii) { - const int i = ii * mb; - inner.serial_invoke(alpha, A + i * as0, x, - (i + mb) > m ? (m - i) : mb, - n, y + i * ys0); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, (m / mb) + (mp > 0)), [&](const int &ii) { + const int i = ii * mb; + inner.serial_invoke(alpha, A + i * as0, x, (i + mb) > m ? (m - i) : mb, n, y + i * ys0); + }); member.team_barrier(); } @@ -167,14 +150,12 @@ KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( /// ==================== template <> -template -KOKKOS_INLINE_FUNCTION int -TeamVectorGemvInternal::invoke( - const MemberType &member, OpA op, const int m, const int n, - const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, const int as0, - const int as1, const ValueXType *KOKKOS_RESTRICT x, const int xs0, - const ScalarType beta, +template +KOKKOS_INLINE_FUNCTION int TeamVectorGemvInternal::invoke( + const MemberType &member, OpA op, const int m, const int n, const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, const ValueXType *KOKKOS_RESTRICT x, + const int xs0, const ScalarType beta, /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { const ScalarType one(1.0), zero(0.0); @@ -196,12 +177,8 @@ TeamVectorGemvInternal::invoke( const ValueAType *KOKKOS_RESTRICT tA = (A + i * as0); Kokkos::parallel_reduce( Kokkos::ThreadVectorRange(member, n), - [&](const int &j, ValueYType &update) { - update += op(tA[j * as1]) * x[j * xs0]; - }, - t); - Kokkos::single(Kokkos::PerThread(member), - [&]() { y[i * ys0] += alpha * t; }); + [&](const int &j, ValueYType &update) { update += op(tA[j * as1]) * x[j * xs0]; }, t); + Kokkos::single(Kokkos::PerThread(member), [&]() { y[i * ys0] += alpha * t; }); }); } return 0; diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas2_team_gemv_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas2_team_gemv_spec.hpp index d46fb7be6f84..c3cf43b7432f 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas2_team_gemv_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas2_team_gemv_spec.hpp @@ -25,28 +25,19 @@ namespace KokkosBlas { -template +template struct TeamGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& /*member*/, - const ScalarType /*alpha*/, - const AViewType& /*A*/, - const xViewType& /*x*/, - const ScalarType /*beta*/, + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& /*member*/, const ScalarType /*alpha*/, + const AViewType& /*A*/, const xViewType& /*x*/, const ScalarType /*beta*/, const yViewType& /*y*/); }; template struct TeamVectorGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& /*member*/, - const ScalarType /*alpha*/, - const AViewType& /*A*/, - const xViewType& /*x*/, - const ScalarType /*beta*/, + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& /*member*/, const ScalarType /*alpha*/, + const AViewType& /*A*/, const xViewType& /*x*/, const ScalarType /*beta*/, const yViewType& /*y*/); }; @@ -56,31 +47,25 @@ struct TeamVectorGemv { template struct TeamGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const ScalarType alpha, const AViewType& A, - const xViewType& x, const ScalarType beta, const yViewType& y) { - static_assert(AViewType::rank == 2, - "KokkosBlas::TeamGemv requires rank-2 A matrix"); - return Impl::TeamGemvInternal::invoke( - member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), - A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::rank == 2, "KokkosBlas::TeamGemv requires rank-2 A matrix"); + return Impl::TeamGemvInternal::invoke(member, A.extent(0), A.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), x.data(), x.stride_0(), + beta, y.data(), y.stride_0()); } }; template struct TeamGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const ScalarType alpha, const AViewType& A, - const xViewType& x, const ScalarType beta, const yViewType& y) { - static_assert(AViewType::rank == 2, - "KokkosBlas::TeamGemv requires rank-2 A matrix"); - return Impl::TeamGemvInternal::invoke( - member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), - A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::rank == 2, "KokkosBlas::TeamGemv requires rank-2 A matrix"); + return Impl::TeamGemvInternal::invoke(member, A.extent(0), A.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), x.data(), x.stride_0(), beta, + y.data(), y.stride_0()); } }; @@ -90,31 +75,25 @@ struct TeamGemv { template struct TeamGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const ScalarType alpha, const AViewType& A, - const xViewType& x, const ScalarType beta, const yViewType& y) { - static_assert(AViewType::rank == 2, - "BLAS TeamGemv requires rank-2 A matrix"); - return Impl::TeamGemvInternal::invoke( - member, A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), - A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::rank == 2, "BLAS TeamGemv requires rank-2 A matrix"); + return Impl::TeamGemvInternal::invoke(member, A.extent(1), A.extent(0), alpha, A.data(), + A.stride_1(), A.stride_0(), x.data(), x.stride_0(), + beta, y.data(), y.stride_0()); } }; template struct TeamGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const ScalarType alpha, const AViewType& A, - const xViewType& x, const ScalarType beta, const yViewType& y) { - static_assert(AViewType::rank == 2, - "BLAS TeamGemv requires rank-2 A matrix"); - return Impl::TeamGemvInternal::invoke( - member, A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), - A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::rank == 2, "BLAS TeamGemv requires rank-2 A matrix"); + return Impl::TeamGemvInternal::invoke(member, A.extent(1), A.extent(0), alpha, A.data(), + A.stride_1(), A.stride_0(), x.data(), x.stride_0(), beta, + y.data(), y.stride_0()); } }; @@ -124,33 +103,25 @@ struct TeamGemv { template struct TeamGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const ScalarType alpha, const AViewType& A, - const xViewType& x, const ScalarType beta, const yViewType& y) { - static_assert(AViewType::rank == 2, - "BLAS TeamGemv requires rank-2 A matrix"); - return Impl::TeamGemvInternal::invoke( - member, Impl::OpConj{}, A.extent(1), A.extent(0), alpha, A.data(), - A.stride_1(), A.stride_0(), x.data(), x.stride_0(), beta, y.data(), - y.stride_0()); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::rank == 2, "BLAS TeamGemv requires rank-2 A matrix"); + return Impl::TeamGemvInternal::invoke(member, Impl::OpConj{}, A.extent(1), A.extent(0), + alpha, A.data(), A.stride_1(), A.stride_0(), x.data(), + x.stride_0(), beta, y.data(), y.stride_0()); } }; template struct TeamGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const ScalarType alpha, const AViewType& A, - const xViewType& x, const ScalarType beta, const yViewType& y) { - static_assert(AViewType::rank == 2, - "BLAS TeamGemv requires rank-2 A matrix"); - return Impl::TeamGemvInternal::invoke( - member, Impl::OpConj{}, A.extent(1), A.extent(0), alpha, A.data(), - A.stride_1(), A.stride_0(), x.data(), x.stride_0(), beta, y.data(), - y.stride_0()); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::rank == 2, "BLAS TeamGemv requires rank-2 A matrix"); + return Impl::TeamGemvInternal::invoke(member, Impl::OpConj{}, A.extent(1), A.extent(0), alpha, + A.data(), A.stride_1(), A.stride_0(), x.data(), + x.stride_0(), beta, y.data(), y.stride_0()); } }; @@ -160,16 +131,13 @@ struct TeamGemv { template struct TeamVectorGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const ScalarType alpha, const AViewType& A, - const xViewType& x, const ScalarType beta, const yViewType& y) { - static_assert(AViewType::rank == 2, - "Batched TeamVectorGemv requires rank-2 A matrix"); - return Impl::TeamVectorGemvInternal::invoke( - member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), - A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::rank == 2, "Batched TeamVectorGemv requires rank-2 A matrix"); + return Impl::TeamVectorGemvInternal::invoke(member, A.extent(0), A.extent(1), alpha, + A.data(), A.stride_0(), A.stride_1(), x.data(), + x.stride_0(), beta, y.data(), y.stride_0()); } }; @@ -179,16 +147,13 @@ struct TeamVectorGemv { template struct TeamVectorGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const ScalarType alpha, const AViewType& A, - const xViewType& x, const ScalarType beta, const yViewType& y) { - static_assert(AViewType::rank == 2, - "Batched TeamVectorGemv requires rank-2 A matrix"); - return Impl::TeamVectorGemvInternal::invoke( - member, A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), - A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::rank == 2, "Batched TeamVectorGemv requires rank-2 A matrix"); + return Impl::TeamVectorGemvInternal::invoke(member, A.extent(1), A.extent(0), alpha, + A.data(), A.stride_1(), A.stride_0(), x.data(), + x.stride_0(), beta, y.data(), y.stride_0()); } }; @@ -198,17 +163,13 @@ struct TeamVectorGemv { template struct TeamVectorGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const ScalarType alpha, const AViewType& A, - const xViewType& x, const ScalarType beta, const yViewType& y) { - static_assert(AViewType::rank == 2, - "Batched TeamVectorGemv requires rank-2 A matrix"); + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::rank == 2, "Batched TeamVectorGemv requires rank-2 A matrix"); return Impl::TeamVectorGemvInternal::invoke( - member, Impl::OpConj{}, A.extent(1), A.extent(0), alpha, A.data(), - A.stride_1(), A.stride_0(), x.data(), x.stride_0(), beta, y.data(), - y.stride_0()); + member, Impl::OpConj{}, A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), A.stride_0(), x.data(), + x.stride_0(), beta, y.data(), y.stride_0()); } }; diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas3_gemm_dotbased_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas3_gemm_dotbased_impl.hpp index 26c4c9624ada..15c3c74ecd66 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas3_gemm_dotbased_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas3_gemm_dotbased_impl.hpp @@ -57,8 +57,7 @@ struct DotBasedGEMM { const size_A dotSize; // the length of the vectors in the dot products - DotBasedGEMM(const scalar_A& alpha_, const AV& A_, const BV& B_, - const scalar_C& beta_, const CV& C_) + DotBasedGEMM(const scalar_A& alpha_, const AV& A_, const BV& B_, const scalar_C& beta_, const CV& C_) : A(A_), B(B_), C(C_), @@ -69,52 +68,39 @@ struct DotBasedGEMM { dotSize(A.extent(0)) {} void run(const ExecSpace& space, bool conjugateTranspose) { - multipleReductionWorkDistribution( - dotSize, numCrows * numCcols, numDivPerDot); + multipleReductionWorkDistribution(dotSize, numCrows * numCcols, numDivPerDot); const size_C ndots = numCrows * numCcols; // Number of dot products numTeams = ndots * numDivPerDot; // Initialize C matrix if beta != 1 if (beta == CVT::zero()) { - Kokkos::MDRangePolicy> policyInit( - space, {0, 0}, {numCrows, numCcols}); - Kokkos::parallel_for("Initialize C for Dot Product Based GEMM", - policyInit, *this); + Kokkos::MDRangePolicy> policyInit(space, {0, 0}, {numCrows, numCcols}); + Kokkos::parallel_for("Initialize C for Dot Product Based GEMM", policyInit, *this); } else if (beta != CVT::one()) { - Kokkos::MDRangePolicy> policyInit( - space, {0, 0}, {numCrows, numCcols}); - Kokkos::parallel_for("Initialize C for Dot Product Based GEMM", - policyInit, *this); + Kokkos::MDRangePolicy> policyInit(space, {0, 0}, {numCrows, numCcols}); + Kokkos::parallel_for("Initialize C for Dot Product Based GEMM", policyInit, *this); } // Multiply alpha*A^TB and add it to beta*C if (conjugateTranspose) { - Kokkos::TeamPolicy policyMult(space, numTeams, - Kokkos::AUTO); + Kokkos::TeamPolicy policyMult(space, numTeams, Kokkos::AUTO); Kokkos::parallel_for("Perform Dot Product Based GEMM", policyMult, *this); } else { - Kokkos::TeamPolicy policyMult(space, numTeams, - Kokkos::AUTO); + Kokkos::TeamPolicy policyMult(space, numTeams, Kokkos::AUTO); Kokkos::parallel_for("Perform Dot Product Based GEMM", policyMult, *this); } } KOKKOS_INLINE_FUNCTION - void operator()(const TagZero&, const size_C& rowId, - const size_C& colId) const { - C(rowId, colId) = CVT::zero(); - } + void operator()(const TagZero&, const size_C& rowId, const size_C& colId) const { C(rowId, colId) = CVT::zero(); } KOKKOS_INLINE_FUNCTION - void operator()(const TagInit&, const size_C& rowId, - const size_C& colId) const { + void operator()(const TagInit&, const size_C& rowId, const size_C& colId) const { C(rowId, colId) = beta * C(rowId, colId); } KOKKOS_INLINE_FUNCTION - void operator()(const TagMult&, - const typename Kokkos::TeamPolicy::member_type& - teamMember) const { + void operator()(const TagMult&, const typename Kokkos::TeamPolicy::member_type& teamMember) const { const size_C globalRank = teamMember.league_rank(); const size_C localRank = globalRank % numDivPerDot; const size_C i = globalRank / numDivPerDot; @@ -127,19 +113,13 @@ struct DotBasedGEMM { if (localRank == numDivPerDot - 1) end = dotSize; Kokkos::parallel_reduce( Kokkos::TeamThreadRange(teamMember, begin, end), - [&](const size_A k, scalar_C& update) { - update += alpha * A(k, rowId) * B(k, colId); - }, - result); + [&](const size_A k, scalar_C& update) { update += alpha * A(k, rowId) * B(k, colId); }, result); - Kokkos::single(Kokkos::PerTeam(teamMember), - [&]() { Kokkos::atomic_add(&C(rowId, colId), result); }); + Kokkos::single(Kokkos::PerTeam(teamMember), [&]() { Kokkos::atomic_add(&C(rowId, colId), result); }); } KOKKOS_INLINE_FUNCTION - void operator()(const TagMultCT&, - const typename Kokkos::TeamPolicy::member_type& - teamMember) const { + void operator()(const TagMultCT&, const typename Kokkos::TeamPolicy::member_type& teamMember) const { const size_C globalRank = teamMember.league_rank(); const size_C localRank = globalRank % numDivPerDot; const size_C i = globalRank / numDivPerDot; @@ -152,13 +132,9 @@ struct DotBasedGEMM { if (localRank == numDivPerDot - 1) end = dotSize; Kokkos::parallel_reduce( Kokkos::TeamThreadRange(teamMember, begin, end), - [&](const size_A k, scalar_C& update) { - update += alpha * AVT::conj(A(k, rowId)) * B(k, colId); - }, - result); + [&](const size_A k, scalar_C& update) { update += alpha * AVT::conj(A(k, rowId)) * B(k, colId); }, result); - Kokkos::single(Kokkos::PerTeam(teamMember), - [&]() { Kokkos::atomic_add(&C(rowId, colId), result); }); + Kokkos::single(Kokkos::PerTeam(teamMember), [&]() { Kokkos::atomic_add(&C(rowId, colId), result); }); } }; diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas3_gemm_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas3_gemm_impl.hpp index 1a0ab46bb392..675ef5d3a4c5 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas3_gemm_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas3_gemm_impl.hpp @@ -55,419 +55,320 @@ struct impl_gemm_choose_copy_layout { #endif // DeepCopy matrix block into scratch -template +template struct impl_deep_copy_matrix_block; -template -struct impl_deep_copy_matrix_block { +template +struct impl_deep_copy_matrix_block { typedef typename ViewType::non_const_value_type value_type; typedef Kokkos::ArithTraits ATV; KOKKOS_INLINE_FUNCTION - static void copy(const TeamHandle& team, const ViewTypeScratch& A_scr, - const ViewType& A, const int& offset_i, + static void copy(const TeamHandle& team, const ViewTypeScratch& A_scr, const ViewType& A, const int& offset_i, const int& offset_j) { - if (offset_i + blockDim_i <= A.extent_int(0) && - offset_j + blockDim_j <= A.extent_int(1)) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockDim_j), [&](const int j) { + if (offset_i + blockDim_i <= A.extent_int(0) && offset_j + blockDim_j <= A.extent_int(1)) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockDim_j), [&](const int j) { #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - const int idx_j = offset_j + j; + const int idx_j = offset_j + j; #endif - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_i), - [&](const int i) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_i), [&](const int i) { #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - const int idx_j = offset_j + j; + const int idx_j = offset_j + j; #endif - const int idx_i = offset_i + i; - A_scr(i, j) = A(idx_i, idx_j); - }); - }); + const int idx_i = offset_i + i; + A_scr(i, j) = A(idx_i, idx_j); + }); + }); } else { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockDim_j), [&](const int j) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockDim_j), [&](const int j) { #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - int idx_j = offset_j + j; + int idx_j = offset_j + j; #endif - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(team, blockDim_i), [&](const int i) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_i), [&](const int i) { #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - int idx_j = offset_j + j; -#endif - const int idx_i = offset_i + i; - A_scr(i, j) = - idx_i < A.extent_int(0) && idx_j < A.extent_int(1) - ? A(idx_i, idx_j) - : ATV::zero(); - }); - }); + int idx_j = offset_j + j; +#endif + const int idx_i = offset_i + i; + A_scr(i, j) = idx_i < A.extent_int(0) && idx_j < A.extent_int(1) ? A(idx_i, idx_j) : ATV::zero(); + }); + }); } } }; -template -struct impl_deep_copy_matrix_block +struct impl_deep_copy_matrix_block { typedef typename ViewType::non_const_value_type value_type; typedef Kokkos::ArithTraits ATV; KOKKOS_INLINE_FUNCTION - static void copy(const TeamHandle& team, const ViewTypeScratch& A_scr, - const ViewType& A, const int& offset_i, + static void copy(const TeamHandle& team, const ViewTypeScratch& A_scr, const ViewType& A, const int& offset_i, const int& offset_j) { - if (offset_i + blockDim_i <= A.extent_int(0) && - offset_j + blockDim_j <= A.extent_int(1)) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockDim_i), [&](const int i) { - const int idx_i = offset_i + i; - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_j), - [&](const int j) { - const int idx_j = offset_j + j; - A_scr(i, j) = A(idx_i, idx_j); - }); - }); + if (offset_i + blockDim_i <= A.extent_int(0) && offset_j + blockDim_j <= A.extent_int(1)) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockDim_i), [&](const int i) { + const int idx_i = offset_i + i; + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_j), [&](const int j) { + const int idx_j = offset_j + j; + A_scr(i, j) = A(idx_i, idx_j); + }); + }); } else { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockDim_i), [&](const int i) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockDim_i), [&](const int i) { #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - int idx_i = offset_i + i; + int idx_i = offset_i + i; #endif - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(team, blockDim_j), [&](const int j) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_j), [&](const int j) { #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - int idx_i = offset_i + i; -#endif - const int idx_j = offset_j + j; - A_scr(i, j) = - idx_i < A.extent_int(0) && idx_j < A.extent_int(1) - ? A(idx_i, idx_j) - : ATV::zero(); - }); - }); + int idx_i = offset_i + i; +#endif + const int idx_j = offset_j + j; + A_scr(i, j) = idx_i < A.extent_int(0) && idx_j < A.extent_int(1) ? A(idx_i, idx_j) : ATV::zero(); + }); + }); } } }; -template -struct impl_deep_copy_matrix_block { +template +struct impl_deep_copy_matrix_block { typedef typename ViewType::non_const_value_type value_type; typedef Kokkos::ArithTraits ATV; KOKKOS_INLINE_FUNCTION - static void copy(const TeamHandle& team, const ViewTypeScratch& A_scr, - const ViewType& A, const int& offset_i, + static void copy(const TeamHandle& team, const ViewTypeScratch& A_scr, const ViewType& A, const int& offset_i, const int& offset_j) { - if (offset_i + blockDim_i <= A.extent_int(1) && - offset_j + blockDim_j <= A.extent_int(0)) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockDim_j), [&](const int j) { + if (offset_i + blockDim_i <= A.extent_int(1) && offset_j + blockDim_j <= A.extent_int(0)) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockDim_j), [&](const int j) { #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - const int idx_j = offset_j + j; + const int idx_j = offset_j + j; #endif - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_i), - [&](const int i) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_i), [&](const int i) { #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - const int idx_j = offset_j + j; + const int idx_j = offset_j + j; #endif - const int idx_i = offset_i + i; - A_scr(i, j) = A(idx_j, idx_i); - }); - }); + const int idx_i = offset_i + i; + A_scr(i, j) = A(idx_j, idx_i); + }); + }); } else { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockDim_j), [&](const int j) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockDim_j), [&](const int j) { #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - int idx_j = offset_j + j; + int idx_j = offset_j + j; #endif - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(team, blockDim_i), [&](const int i) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_i), [&](const int i) { #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - int idx_j = offset_j + j; -#endif - const int idx_i = offset_i + i; - A_scr(i, j) = - idx_i < A.extent_int(1) && idx_j < A.extent_int(0) - ? A(idx_j, idx_i) - : ATV::zero(); - }); - }); + int idx_j = offset_j + j; +#endif + const int idx_i = offset_i + i; + A_scr(i, j) = idx_i < A.extent_int(1) && idx_j < A.extent_int(0) ? A(idx_j, idx_i) : ATV::zero(); + }); + }); } } }; -template -struct impl_deep_copy_matrix_block +struct impl_deep_copy_matrix_block { typedef typename ViewType::non_const_value_type value_type; typedef Kokkos::ArithTraits ATV; KOKKOS_INLINE_FUNCTION - static void copy(const TeamHandle& team, const ViewTypeScratch& A_scr, - const ViewType& A, const int& offset_i, + static void copy(const TeamHandle& team, const ViewTypeScratch& A_scr, const ViewType& A, const int& offset_i, const int& offset_j) { - if (offset_i + blockDim_i <= A.extent_int(1) && - offset_j + blockDim_j <= A.extent_int(0)) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockDim_i), [&](const int i) { + if (offset_i + blockDim_i <= A.extent_int(1) && offset_j + blockDim_j <= A.extent_int(0)) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockDim_i), [&](const int i) { #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - const int idx_i = offset_i + i; + const int idx_i = offset_i + i; #endif - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_j), - [&](const int j) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_j), [&](const int j) { #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - const int idx_i = offset_i + i; + const int idx_i = offset_i + i; #endif - const int idx_j = offset_j + j; - A_scr(i, j) = A(idx_j, idx_i); - }); - }); + const int idx_j = offset_j + j; + A_scr(i, j) = A(idx_j, idx_i); + }); + }); } else { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockDim_i), [&](const int i) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockDim_i), [&](const int i) { #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - int idx_i = offset_i + i; + int idx_i = offset_i + i; #endif - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(team, blockDim_j), [&](const int j) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_j), [&](const int j) { #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - int idx_i = offset_i + i; -#endif - const int idx_j = offset_j + j; - A_scr(i, j) = - idx_i < A.extent_int(1) && idx_j < A.extent_int(0) - ? A(idx_j, idx_i) - : ATV::zero(); - }); - }); + int idx_i = offset_i + i; +#endif + const int idx_j = offset_j + j; + A_scr(i, j) = idx_i < A.extent_int(1) && idx_j < A.extent_int(0) ? A(idx_j, idx_i) : ATV::zero(); + }); + }); } } }; -template -struct impl_deep_copy_matrix_block { +template +struct impl_deep_copy_matrix_block { typedef typename ViewType::non_const_value_type value_type; typedef Kokkos::ArithTraits ATV; KOKKOS_INLINE_FUNCTION - static void copy(const TeamHandle& team, const ViewTypeScratch& A_scr, - const ViewType& A, const int& offset_i, + static void copy(const TeamHandle& team, const ViewTypeScratch& A_scr, const ViewType& A, const int& offset_i, const int& offset_j) { - if (offset_i + blockDim_i <= A.extent_int(1) && - offset_j + blockDim_j <= A.extent_int(0)) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockDim_j), [&](const int j) { + if (offset_i + blockDim_i <= A.extent_int(1) && offset_j + blockDim_j <= A.extent_int(0)) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockDim_j), [&](const int j) { #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - const int idx_j = offset_j + j; + const int idx_j = offset_j + j; #endif - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_i), - [&](const int i) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_i), [&](const int i) { #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - const int idx_j = offset_j + j; + const int idx_j = offset_j + j; #endif - const int idx_i = offset_i + i; - A_scr(i, j) = ATV::conj(A(idx_j, idx_i)); - }); - }); + const int idx_i = offset_i + i; + A_scr(i, j) = ATV::conj(A(idx_j, idx_i)); + }); + }); } else { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockDim_j), [&](const int j) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockDim_j), [&](const int j) { #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - int idx_j = offset_j + j; + int idx_j = offset_j + j; #endif - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(team, blockDim_i), [&](const int i) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_i), [&](const int i) { #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - int idx_j = offset_j + j; -#endif - const int idx_i = offset_i + i; - A_scr(i, j) = - idx_i < A.extent_int(1) && idx_j < A.extent_int(0) - ? ATV::conj(A(idx_j, idx_i)) - : ATV::zero(); - }); - }); + int idx_j = offset_j + j; +#endif + const int idx_i = offset_i + i; + A_scr(i, j) = idx_i < A.extent_int(1) && idx_j < A.extent_int(0) ? ATV::conj(A(idx_j, idx_i)) : ATV::zero(); + }); + }); } } }; -template -struct impl_deep_copy_matrix_block +struct impl_deep_copy_matrix_block { typedef typename ViewType::non_const_value_type value_type; typedef Kokkos::ArithTraits ATV; KOKKOS_INLINE_FUNCTION - static void copy(const TeamHandle& team, const ViewTypeScratch& A_scr, - const ViewType& A, const int& offset_i, + static void copy(const TeamHandle& team, const ViewTypeScratch& A_scr, const ViewType& A, const int& offset_i, const int& offset_j) { - if (offset_i + blockDim_i <= A.extent_int(1) && - offset_j + blockDim_j <= A.extent_int(0)) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockDim_i), [&](const int i) { + if (offset_i + blockDim_i <= A.extent_int(1) && offset_j + blockDim_j <= A.extent_int(0)) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockDim_i), [&](const int i) { #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - const int idx_i = offset_i + i; + const int idx_i = offset_i + i; #endif - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_j), - [&](const int j) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_j), [&](const int j) { #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - const int idx_i = offset_i + i; + const int idx_i = offset_i + i; #endif - const int idx_j = offset_j + j; - A_scr(i, j) = ATV::conj(A(idx_j, idx_i)); - }); - }); + const int idx_j = offset_j + j; + A_scr(i, j) = ATV::conj(A(idx_j, idx_i)); + }); + }); } else { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockDim_i), [&](const int i) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockDim_i), [&](const int i) { #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - int idx_i = offset_i + i; + int idx_i = offset_i + i; #endif - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(team, blockDim_j), [&](const int j) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_j), [&](const int j) { #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND - int idx_i = offset_i + i; -#endif - const int idx_j = offset_j + j; - A_scr(i, j) = - idx_i < A.extent_int(1) && idx_j < A.extent_int(0) - ? ATV::conj(A(idx_j, idx_i)) - : ATV::zero(); - }); - }); + int idx_i = offset_i + i; +#endif + const int idx_j = offset_j + j; + A_scr(i, j) = idx_i < A.extent_int(1) && idx_j < A.extent_int(0) ? ATV::conj(A(idx_j, idx_i)) : ATV::zero(); + }); + }); } } }; -template +template struct impl_update_matrix_block { typedef typename ViewType::non_const_value_type value_type; typedef Kokkos::ArithTraits ATV; KOKKOS_INLINE_FUNCTION - static void update(const TeamHandle& team, const value_type& beta, - const ViewType& A, const value_type& alpha, - const ViewTypeScratch& A_scr, const int& offset_i, - const int& offset_j) { - if (offset_i + blockDim_i <= A.extent_int(0) && - offset_j + blockDim_j <= A.extent_int(1)) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockDim_j), [&](const int j) { - const int idx_j = offset_j + j; - if (beta == ATV::zero()) { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_i), - [&](const int i) { - const int idx_i = offset_i + i; - A(idx_i, idx_j) = alpha * A_scr(i, j); - }); - } else { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_i), - [&](const int i) { - const int idx_i = offset_i + i; - A(idx_i, idx_j) = beta * A(idx_i, idx_j) + - alpha * A_scr(i, j); - }); - } + static void update(const TeamHandle& team, const value_type& beta, const ViewType& A, const value_type& alpha, + const ViewTypeScratch& A_scr, const int& offset_i, const int& offset_j) { + if (offset_i + blockDim_i <= A.extent_int(0) && offset_j + blockDim_j <= A.extent_int(1)) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockDim_j), [&](const int j) { + const int idx_j = offset_j + j; + if (beta == ATV::zero()) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_i), [&](const int i) { + const int idx_i = offset_i + i; + A(idx_i, idx_j) = alpha * A_scr(i, j); + }); + } else { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_i), [&](const int i) { + const int idx_i = offset_i + i; + A(idx_i, idx_j) = beta * A(idx_i, idx_j) + alpha * A_scr(i, j); }); + } + }); } else { - const int range_i = offset_i + blockDim_i <= A.extent_int(0) - ? blockDim_i - : A.extent_int(0) % blockDim_i; - const int range_j = offset_j + blockDim_j <= A.extent_int(1) - ? blockDim_j - : A.extent_int(1) % blockDim_j; - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, range_j), [&](const int j) { - const int idx_j = offset_j + j; - if (beta == ATV::zero()) { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, range_i), - [&](const int i) { - const int idx_i = offset_i + i; - A(idx_i, idx_j) = alpha * A_scr(i, j); - }); - } else { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(team, range_i), [&](const int i) { - const int idx_i = offset_i + i; - A(idx_i, idx_j) = - beta * A(idx_i, idx_j) + alpha * A_scr(i, j); - }); - } + const int range_i = offset_i + blockDim_i <= A.extent_int(0) ? blockDim_i : A.extent_int(0) % blockDim_i; + const int range_j = offset_j + blockDim_j <= A.extent_int(1) ? blockDim_j : A.extent_int(1) % blockDim_j; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, range_j), [&](const int j) { + const int idx_j = offset_j + j; + if (beta == ATV::zero()) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, range_i), [&](const int i) { + const int idx_i = offset_i + i; + A(idx_i, idx_j) = alpha * A_scr(i, j); }); + } else { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, range_i), [&](const int i) { + const int idx_i = offset_i + i; + A(idx_i, idx_j) = beta * A(idx_i, idx_j) + alpha * A_scr(i, j); + }); + } + }); } } }; -template -struct impl_update_matrix_block { +template +struct impl_update_matrix_block { typedef typename ViewType::non_const_value_type value_type; typedef Kokkos::ArithTraits ATV; KOKKOS_INLINE_FUNCTION - static void update(const TeamHandle& team, const value_type& beta, - const ViewType& A, const value_type& alpha, - const ViewTypeScratch& A_scr, const int& offset_i, - const int& offset_j) { - if (offset_i + blockDim_i <= A.extent_int(0) && - offset_j + blockDim_j <= A.extent_int(1)) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockDim_i), [&](const int i) { - const int idx_i = offset_i + i; - if (beta == ATV::zero()) { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_j), - [&](const int j) { - const int idx_j = offset_j + j; - A(idx_i, idx_j) = alpha * A_scr(i, j); - }); - } else { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_j), - [&](const int j) { - const int idx_j = offset_j + j; - A(idx_i, idx_j) = beta * A(idx_i, idx_j) + - alpha * A_scr(i, j); - }); - } + static void update(const TeamHandle& team, const value_type& beta, const ViewType& A, const value_type& alpha, + const ViewTypeScratch& A_scr, const int& offset_i, const int& offset_j) { + if (offset_i + blockDim_i <= A.extent_int(0) && offset_j + blockDim_j <= A.extent_int(1)) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockDim_i), [&](const int i) { + const int idx_i = offset_i + i; + if (beta == ATV::zero()) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_j), [&](const int j) { + const int idx_j = offset_j + j; + A(idx_i, idx_j) = alpha * A_scr(i, j); + }); + } else { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockDim_j), [&](const int j) { + const int idx_j = offset_j + j; + A(idx_i, idx_j) = beta * A(idx_i, idx_j) + alpha * A_scr(i, j); }); + } + }); } else { - const int range_i = offset_i + blockDim_i <= A.extent_int(0) - ? blockDim_i - : A.extent_int(0) % blockDim_i; - const int range_j = offset_j + blockDim_j <= A.extent_int(1) - ? blockDim_j - : A.extent_int(1) % blockDim_j; - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, range_i), [&](const int i) { - const int idx_i = offset_i + i; - if (beta == ATV::zero()) { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, range_j), - [&](const int j) { - const int idx_j = offset_j + j; - A(idx_i, idx_j) = alpha * A_scr(i, j); - }); - } else { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(team, range_j), [&](const int j) { - const int idx_j = offset_j + j; - A(idx_i, idx_j) = - beta * A(idx_i, idx_j) + alpha * A_scr(i, j); - }); - } + const int range_i = offset_i + blockDim_i <= A.extent_int(0) ? blockDim_i : A.extent_int(0) % blockDim_i; + const int range_j = offset_j + blockDim_j <= A.extent_int(1) ? blockDim_j : A.extent_int(1) % blockDim_j; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, range_i), [&](const int i) { + const int idx_i = offset_i + i; + if (beta == ATV::zero()) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, range_j), [&](const int j) { + const int idx_j = offset_j + j; + A(idx_i, idx_j) = alpha * A_scr(i, j); + }); + } else { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, range_j), [&](const int j) { + const int idx_j = offset_j + j; + A(idx_i, idx_j) = beta * A(idx_i, idx_j) + alpha * A_scr(i, j); }); + } + }); } } }; @@ -475,14 +376,11 @@ struct impl_update_matrix_block -KOKKOS_INLINE_FUNCTION void impl_team_gemm_block(const TeamHandle& team, - const ViewTypeC& C, - const ViewTypeA& A, +KOKKOS_INLINE_FUNCTION void impl_team_gemm_block(const TeamHandle& team, const ViewTypeC& C, const ViewTypeA& A, const ViewTypeB& B) { typedef typename ViewTypeC::non_const_value_type ScalarC; // GNU COMPILER BUG WORKAROUND -#if defined(KOKKOS_COMPILER_GNU) && \ - (!defined(__CUDA_ARCH__) || !defined(__HIP_DEVICE_COMPILE__)) +#if defined(KOKKOS_COMPILER_GNU) && (!defined(__CUDA_ARCH__) || !defined(__HIP_DEVICE_COMPILE__)) int blockA0 = A.extent_int(0); int blockA1 = A.extent_int(1); int blockB1 = B.extent_int(1); @@ -491,36 +389,34 @@ KOKKOS_INLINE_FUNCTION void impl_team_gemm_block(const TeamHandle& team, const int blockA1 = A.extent_int(1); const int blockB1 = B.extent_int(1); #endif - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockA0), [&](const int i) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockA0), [&](const int i) { #ifndef KOKKOSKERNELS_ENABLE_OMP_SIMD - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockB1 / 4), - [&](const int B_j) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockB1 / 4), [&](const int B_j) { #else #pragma omp simd for(int B_j=0; B_j @@ -565,8 +461,8 @@ struct impl_gemm_label<2, 2> { static constexpr const char* label = "KokkosBlas::gemm[CC]"; }; -template +template struct GEMMImpl { ViewTypeA A; ViewTypeB B; @@ -580,18 +476,14 @@ struct GEMMImpl { int scratch_level; ScalarC alpha, beta; - typedef Kokkos::View + typedef Kokkos::View ViewTypeAScratch; - typedef Kokkos::View + typedef Kokkos::View ViewTypeBScratch; - typedef Kokkos::View + typedef Kokkos::View ViewTypeCScratch; - GEMMImpl(const ScalarA& alpha_, const ViewTypeA& A_, const ViewTypeB& B_, - const ScalarC& beta_, const ViewTypeC& C_) + GEMMImpl(const ScalarA& alpha_, const ViewTypeA& A_, const ViewTypeB& B_, const ScalarC& beta_, const ViewTypeC& C_) : A(A_), B(B_), C(C_), @@ -602,12 +494,10 @@ struct GEMMImpl { beta = beta_; } - void run(const ExecSpace& space, int team_size, int vector_length, - int scr_level) { - scratch_level = scr_level; - int scratch_memory_size = ViewTypeAScratch::shmem_size() + - ViewTypeBScratch::shmem_size() + - ViewTypeCScratch::shmem_size(); + void run(const ExecSpace& space, int team_size, int vector_length, int scr_level) { + scratch_level = scr_level; + int scratch_memory_size = + ViewTypeAScratch::shmem_size() + ViewTypeBScratch::shmem_size() + ViewTypeCScratch::shmem_size(); #if defined(KOKKOS_ENABLE_HIP) // Note lbv, 10/29/20: The LaunchBounds<384, 2> leads @@ -616,23 +506,19 @@ struct GEMMImpl { // are allocated... Switching to LaunchBounds<384, 0> fixes // that problem but I'm not sure if that it a good perf // parameter or why it is set to 2 for Cuda? - Kokkos::TeamPolicy> policy( - space, num_blocks_0 * num_blocks_1, team_size, vector_length); + Kokkos::TeamPolicy> policy(space, num_blocks_0 * num_blocks_1, team_size, + vector_length); #else - Kokkos::TeamPolicy> policy( - space, num_blocks_0 * num_blocks_1, team_size, vector_length); + Kokkos::TeamPolicy> policy(space, num_blocks_0 * num_blocks_1, team_size, + vector_length); #endif - Kokkos::parallel_for( - impl_gemm_label::label, - policy.set_scratch_size(scratch_level, - Kokkos::PerTeam(scratch_memory_size)), - *this); + Kokkos::parallel_for(impl_gemm_label::label, + policy.set_scratch_size(scratch_level, Kokkos::PerTeam(scratch_memory_size)), *this); } KOKKOS_INLINE_FUNCTION - void operator()( - const typename Kokkos::TeamPolicy::member_type& team) const { + void operator()(const typename Kokkos::TeamPolicy::member_type& team) const { // This team is responsible for computing a single block of C const int league_rank = team.league_rank(); const int num_blocks = num_blocks_1; @@ -642,11 +528,9 @@ struct GEMMImpl { ViewTypeAScratch A_scr(team.team_scratch(scratch_level)); ViewTypeBScratch B_scr(team.team_scratch(scratch_level)); ViewTypeCScratch C_scr(team.team_scratch(scratch_level)); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, blockA0), [&](const int i) { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockB1), - [&](const int j) { C_scr(i, j) = 0; }); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, blockA0), [&](const int i) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, blockB1), [&](const int j) { C_scr(i, j) = 0; }); + }); team.team_barrier(); // Move along the inner dimension in blocks @@ -654,22 +538,16 @@ struct GEMMImpl { for (int A_j = 0; A_j < length; A_j += blockA1) { // Load A block into scratch - impl_deep_copy_matrix_block< - typename Kokkos::TeamPolicy::member_type, ViewTypeAScratch, - ViewTypeA, - typename impl_gemm_choose_copy_layout< - ExecSpace, typename ViewTypeA::array_layout, - typename ViewTypeAScratch::array_layout>::type, - blockA0, blockA1, TransposeA>::copy(team, A_scr, A, i_offset, A_j); + impl_deep_copy_matrix_block::member_type, ViewTypeAScratch, ViewTypeA, + typename impl_gemm_choose_copy_layout::type, + blockA0, blockA1, TransposeA>::copy(team, A_scr, A, i_offset, A_j); // Load B block into scratch - impl_deep_copy_matrix_block< - typename Kokkos::TeamPolicy::member_type, ViewTypeBScratch, - ViewTypeB, - typename impl_gemm_choose_copy_layout< - ExecSpace, typename ViewTypeB::array_layout, - typename ViewTypeBScratch::array_layout>::type, - blockA1, blockB1, TransposeB>::copy(team, B_scr, B, A_j, j_offset); + impl_deep_copy_matrix_block::member_type, ViewTypeBScratch, ViewTypeB, + typename impl_gemm_choose_copy_layout::type, + blockA1, blockB1, TransposeB>::copy(team, B_scr, B, A_j, j_offset); // Wait for A and B block to be in scratch memory team.team_barrier(); @@ -682,10 +560,9 @@ struct GEMMImpl { team.team_barrier(); } // Write back the C block from scratch to main memory - impl_update_matrix_block< - typename Kokkos::TeamPolicy::member_type, ViewTypeC, - ViewTypeCScratch, typename ViewTypeC::array_layout, blockA0, - blockB1>::update(team, beta, C, alpha, C_scr, i_offset, j_offset); + impl_update_matrix_block::member_type, ViewTypeC, ViewTypeCScratch, + typename ViewTypeC::array_layout, blockA0, blockB1>::update(team, beta, C, alpha, C_scr, + i_offset, j_offset); } }; diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas3_gemm_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas3_gemm_spec.hpp index 367a8dad3f47..f085b5fc92f1 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas3_gemm_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas3_gemm_spec.hpp @@ -43,35 +43,27 @@ struct gemm_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS3_GEMM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, LAYOUTA, LAYOUTB, \ - LAYOUTC, EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct gemm_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS3_GEMM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct gemm_eti_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -#define KOKKOSBLAS3_GEMM_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_GEMM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, Kokkos::LayoutLeft, \ - Kokkos::LayoutLeft, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_GEMM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, Kokkos::LayoutLeft, \ - Kokkos::LayoutRight, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_GEMM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, Kokkos::LayoutRight, \ - Kokkos::LayoutLeft, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_GEMM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, Kokkos::LayoutRight, \ - Kokkos::LayoutRight, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBLAS3_GEMM_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS3_GEMM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, Kokkos::LayoutLeft, Kokkos::LayoutLeft, LAYOUT, EXEC_SPACE, \ + MEM_SPACE) \ + KOKKOSBLAS3_GEMM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, Kokkos::LayoutLeft, Kokkos::LayoutRight, LAYOUT, EXEC_SPACE, \ + MEM_SPACE) \ + KOKKOSBLAS3_GEMM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, Kokkos::LayoutRight, Kokkos::LayoutLeft, LAYOUT, EXEC_SPACE, \ + MEM_SPACE) \ + KOKKOSBLAS3_GEMM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, Kokkos::LayoutRight, Kokkos::LayoutRight, LAYOUT, EXEC_SPACE, \ + MEM_SPACE) // Include the actual specialization declarations #include @@ -85,37 +77,24 @@ namespace Impl { // // Implementation of KokkosBlas::gemm. -template < - class execution_space, class AViewType, class BViewType, class CViewType, - bool tpl_spec_avail = gemm_tpl_spec_avail::value, - bool eti_spec_avail = gemm_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = gemm_eti_spec_avail::value> struct GEMM { - static void gemm(const execution_space& space, const char transA[], - const char transB[], - typename AViewType::const_value_type& alpha, - const AViewType& A, const BViewType& B, - typename CViewType::const_value_type& beta, - const CViewType& C) + static void gemm(const execution_space& space, const char transA[], const char transB[], + typename AViewType::const_value_type& alpha, const AViewType& A, const BViewType& B, + typename CViewType::const_value_type& beta, const CViewType& C) #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY { - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "BViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "CViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, - "AViewType must have rank 2."); - static_assert(static_cast(BViewType::rank) == 2, - "BViewType must have rank 2."); - static_assert(static_cast(CViewType::rank) == 2, - "CViewType must have rank 2."); + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "BViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "CViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); + static_assert(static_cast(BViewType::rank) == 2, "BViewType must have rank 2."); + static_assert(static_cast(CViewType::rank) == 2, "CViewType must have rank 2."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::gemm[ETI]" - : "KokkosBlas::gemm[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::gemm[ETI]" + : "KokkosBlas::gemm[noETI]"); // Figure out Scalar Types typedef typename AViewType::non_const_value_type ScalarA; typedef typename BViewType::non_const_value_type ScalarB; @@ -125,29 +104,22 @@ struct GEMM { const int M = static_cast(C.extent(0)); const int N = static_cast(C.extent(1)); - const bool is_device_space = - KokkosKernels::Impl::kk_is_gpu_exec_space(); - const bool A_is_lr = std::is_same::value; - const bool A_is_tr = ((transA[0] == 'T') || (transA[0] == 't') || - (transA[0] == 'C') || (transA[0] == 'c')); - const bool B_is_tr = ((transB[0] == 'T') || (transB[0] == 't') || - (transB[0] == 'C') || (transB[0] == 'c')); + const bool is_device_space = KokkosKernels::Impl::kk_is_gpu_exec_space(); + const bool A_is_lr = std::is_same::value; + const bool A_is_tr = ((transA[0] == 'T') || (transA[0] == 't') || (transA[0] == 'C') || (transA[0] == 'c')); + const bool B_is_tr = ((transB[0] == 'T') || (transB[0] == 't') || (transB[0] == 'C') || (transB[0] == 'c')); // NOTE: these thresholds were copied from TPL CUBLAS, and may need to be // retuned constexpr int numDotsLayoutLeftThreshold = 1600; constexpr int numDotsLayoutRightThreshold = 100; - if (((!A_is_lr && A_is_tr && !B_is_tr && - M * N < numDotsLayoutLeftThreshold) || - (A_is_lr && A_is_tr && !B_is_tr && - M * N < numDotsLayoutRightThreshold)) && + if (((!A_is_lr && A_is_tr && !B_is_tr && M * N < numDotsLayoutLeftThreshold) || + (A_is_lr && A_is_tr && !B_is_tr && M * N < numDotsLayoutRightThreshold)) && is_device_space) { // call dot-based GEMM, only for C := beta * C + alpha * A^T * B, on // device bool A_is_conj = ((transA[0] == 'C') || (transA[0] == 'c')); - DotBasedGEMM - dotBasedGemm(alpha, A, B, beta, C); + DotBasedGEMM dotBasedGemm(alpha, A, B, beta, C); dotBasedGemm.run(space, A_is_conj); } else { @@ -155,116 +127,87 @@ struct GEMM { static constexpr int blockA0 = 24; static constexpr int blockB1 = 64; static constexpr int blockA1 = - (sizeof(ScalarA) * blockA0 * 16 + sizeof(ScalarB) * 16 * blockB1 + - sizeof(ScalarC) * blockA0 * blockB1 < + (sizeof(ScalarA) * blockA0 * 16 + sizeof(ScalarB) * 16 * blockB1 + sizeof(ScalarC) * blockA0 * blockB1 < 24000) ? 16 - : (sizeof(ScalarA) * blockA0 * 8 + sizeof(ScalarB) * 8 * blockB1 + - sizeof(ScalarC) * blockA0 * blockB1 < - 24000) - ? 8 - : (sizeof(ScalarA) * blockA0 * 4 + - sizeof(ScalarB) * 4 * blockB1 + - sizeof(ScalarC) * blockA0 * blockB1 < - 24000) - ? 4 - : 16; - int vector_length = blockB1 / 4; - int max_vector_length = - KokkosKernels::Impl::kk_get_max_vector_size(); + : (sizeof(ScalarA) * blockA0 * 8 + sizeof(ScalarB) * 8 * blockB1 + sizeof(ScalarC) * blockA0 * blockB1 < + 24000) + ? 8 + : (sizeof(ScalarA) * blockA0 * 4 + sizeof(ScalarB) * 4 * blockB1 + sizeof(ScalarC) * blockA0 * blockB1 < + 24000) + ? 4 + : 16; + int vector_length = blockB1 / 4; + int max_vector_length = KokkosKernels::Impl::kk_get_max_vector_size(); if (vector_length > max_vector_length) vector_length = max_vector_length; // Compute scratch space size - typedef KokkosBlas::Impl::GEMMImpl + typedef KokkosBlas::Impl::GEMMImpl gemm_dummy_type; - const int scratch_memory_size = - gemm_dummy_type::ViewTypeAScratch::required_allocation_size() + - gemm_dummy_type::ViewTypeBScratch::required_allocation_size() + - gemm_dummy_type::ViewTypeCScratch::required_allocation_size(); + const int scratch_memory_size = gemm_dummy_type::ViewTypeAScratch::required_allocation_size() + + gemm_dummy_type::ViewTypeBScratch::required_allocation_size() + + gemm_dummy_type::ViewTypeCScratch::required_allocation_size(); const int scratch_level = scratch_memory_size < 24000 ? 0 : 1; // Figure out Team Sizes int team_size = 1; #if defined(KOKKOS_ENABLE_CUDA) - if (std::is_same::value) - team_size = blockA0; + if (std::is_same::value) team_size = blockA0; #endif #if defined(KOKKOS_ENABLE_HIP) - if (std::is_same::value) - team_size = blockA0; + if (std::is_same::value) team_size = blockA0; #endif #if defined(KOKKOS_ENABLE_ROCM) - if (std::is_same::value) - team_size = blockA0; + if (std::is_same::value) team_size = blockA0; #endif #if defined(KOKKOS_ENABLE_SYCL) - if (std::is_same::value) - team_size = blockA0; + if (std::is_same::value) team_size = blockA0; #endif // Call the correct kernel - if ((transA[0] == 'N' || transA[0] == 'n') && - (transB[0] == 'N' || transB[0] == 'n')) { - KokkosBlas::Impl::GEMMImpl + if ((transA[0] == 'N' || transA[0] == 'n') && (transB[0] == 'N' || transB[0] == 'n')) { + KokkosBlas::Impl::GEMMImpl gemm(alpha, A, B, beta, C); gemm.run(space, team_size, vector_length, scratch_level); } - if ((transA[0] == 'T' || transA[0] == 't') && - (transB[0] == 'N' || transB[0] == 'n')) { - KokkosBlas::Impl::GEMMImpl + if ((transA[0] == 'T' || transA[0] == 't') && (transB[0] == 'N' || transB[0] == 'n')) { + KokkosBlas::Impl::GEMMImpl gemm(alpha, A, B, beta, C); gemm.run(space, team_size, vector_length, scratch_level); } - if ((transA[0] == 'C' || transA[0] == 'c') && - (transB[0] == 'N' || transB[0] == 'n')) { - KokkosBlas::Impl::GEMMImpl + if ((transA[0] == 'C' || transA[0] == 'c') && (transB[0] == 'N' || transB[0] == 'n')) { + KokkosBlas::Impl::GEMMImpl gemm(alpha, A, B, beta, C); gemm.run(space, team_size, vector_length, scratch_level); } - if ((transA[0] == 'N' || transA[0] == 'n') && - (transB[0] == 'T' || transB[0] == 't')) { - KokkosBlas::Impl::GEMMImpl + if ((transA[0] == 'N' || transA[0] == 'n') && (transB[0] == 'T' || transB[0] == 't')) { + KokkosBlas::Impl::GEMMImpl gemm(alpha, A, B, beta, C); gemm.run(space, team_size, vector_length, scratch_level); } - if ((transA[0] == 'T' || transA[0] == 't') && - (transB[0] == 'T' || transB[0] == 't')) { - KokkosBlas::Impl::GEMMImpl + if ((transA[0] == 'T' || transA[0] == 't') && (transB[0] == 'T' || transB[0] == 't')) { + KokkosBlas::Impl::GEMMImpl gemm(alpha, A, B, beta, C); gemm.run(space, team_size, vector_length, scratch_level); } - if ((transA[0] == 'C' || transA[0] == 'c') && - (transB[0] == 'T' || transB[0] == 't')) { - KokkosBlas::Impl::GEMMImpl + if ((transA[0] == 'C' || transA[0] == 'c') && (transB[0] == 'T' || transB[0] == 't')) { + KokkosBlas::Impl::GEMMImpl gemm(alpha, A, B, beta, C); gemm.run(space, team_size, vector_length, scratch_level); } - if ((transA[0] == 'N' || transA[0] == 'n') && - (transB[0] == 'C' || transB[0] == 'c')) { - KokkosBlas::Impl::GEMMImpl + if ((transA[0] == 'N' || transA[0] == 'n') && (transB[0] == 'C' || transB[0] == 'c')) { + KokkosBlas::Impl::GEMMImpl gemm(alpha, A, B, beta, C); gemm.run(space, team_size, vector_length, scratch_level); } - if ((transA[0] == 'T' || transA[0] == 't') && - (transB[0] == 'C' || transB[0] == 'c')) { - KokkosBlas::Impl::GEMMImpl + if ((transA[0] == 'T' || transA[0] == 't') && (transB[0] == 'C' || transB[0] == 'c')) { + KokkosBlas::Impl::GEMMImpl gemm(alpha, A, B, beta, C); gemm.run(space, team_size, vector_length, scratch_level); } - if ((transA[0] == 'C' || transA[0] == 'c') && - (transB[0] == 'C' || transB[0] == 'c')) { - KokkosBlas::Impl::GEMMImpl + if ((transA[0] == 'C' || transA[0] == 'c') && (transB[0] == 'C' || transB[0] == 'c')) { + KokkosBlas::Impl::GEMMImpl gemm(alpha, A, B, beta, C); gemm.run(space, team_size, vector_length, scratch_level); } @@ -288,61 +231,45 @@ struct GEMM { // one or more .cpp files. // -#define KOKKOSBLAS3_GEMM_ETI_SPEC_DECL_LAYOUTS(SCALAR, LAYOUTA, LAYOUTB, \ - LAYOUTC, EXEC_SPACE, MEM_SPACE) \ - extern template struct GEMM< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - false, true>; +#define KOKKOSBLAS3_GEMM_ETI_SPEC_DECL_LAYOUTS(SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, EXEC_SPACE, MEM_SPACE) \ + extern template struct GEMM, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; -#define KOKKOSBLAS3_GEMM_ETI_SPEC_INST_LAYOUTS(SCALAR, LAYOUTA, LAYOUTB, \ - LAYOUTC, EXEC_SPACE, MEM_SPACE) \ - template struct GEMM< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - false, true>; +#define KOKKOSBLAS3_GEMM_ETI_SPEC_INST_LAYOUTS(SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, EXEC_SPACE, MEM_SPACE) \ + template struct GEMM, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; -#define KOKKOSBLAS3_GEMM_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_GEMM_ETI_SPEC_DECL_LAYOUTS(SCALAR, Kokkos::LayoutLeft, \ - Kokkos::LayoutLeft, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_GEMM_ETI_SPEC_DECL_LAYOUTS(SCALAR, Kokkos::LayoutLeft, \ - Kokkos::LayoutRight, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_GEMM_ETI_SPEC_DECL_LAYOUTS(SCALAR, Kokkos::LayoutRight, \ - Kokkos::LayoutLeft, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_GEMM_ETI_SPEC_DECL_LAYOUTS(SCALAR, Kokkos::LayoutRight, \ - Kokkos::LayoutRight, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBLAS3_GEMM_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS3_GEMM_ETI_SPEC_DECL_LAYOUTS(SCALAR, Kokkos::LayoutLeft, Kokkos::LayoutLeft, LAYOUT, EXEC_SPACE, \ + MEM_SPACE) \ + KOKKOSBLAS3_GEMM_ETI_SPEC_DECL_LAYOUTS(SCALAR, Kokkos::LayoutLeft, Kokkos::LayoutRight, LAYOUT, EXEC_SPACE, \ + MEM_SPACE) \ + KOKKOSBLAS3_GEMM_ETI_SPEC_DECL_LAYOUTS(SCALAR, Kokkos::LayoutRight, Kokkos::LayoutLeft, LAYOUT, EXEC_SPACE, \ + MEM_SPACE) \ + KOKKOSBLAS3_GEMM_ETI_SPEC_DECL_LAYOUTS(SCALAR, Kokkos::LayoutRight, Kokkos::LayoutRight, LAYOUT, EXEC_SPACE, \ + MEM_SPACE) -#define KOKKOSBLAS3_GEMM_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_GEMM_ETI_SPEC_INST_LAYOUTS(SCALAR, Kokkos::LayoutLeft, \ - Kokkos::LayoutLeft, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_GEMM_ETI_SPEC_INST_LAYOUTS(SCALAR, Kokkos::LayoutLeft, \ - Kokkos::LayoutRight, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_GEMM_ETI_SPEC_INST_LAYOUTS(SCALAR, Kokkos::LayoutRight, \ - Kokkos::LayoutLeft, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_GEMM_ETI_SPEC_INST_LAYOUTS(SCALAR, Kokkos::LayoutRight, \ - Kokkos::LayoutRight, LAYOUT, \ - EXEC_SPACE, MEM_SPACE) +#define KOKKOSBLAS3_GEMM_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + KOKKOSBLAS3_GEMM_ETI_SPEC_INST_LAYOUTS(SCALAR, Kokkos::LayoutLeft, Kokkos::LayoutLeft, LAYOUT, EXEC_SPACE, \ + MEM_SPACE) \ + KOKKOSBLAS3_GEMM_ETI_SPEC_INST_LAYOUTS(SCALAR, Kokkos::LayoutLeft, Kokkos::LayoutRight, LAYOUT, EXEC_SPACE, \ + MEM_SPACE) \ + KOKKOSBLAS3_GEMM_ETI_SPEC_INST_LAYOUTS(SCALAR, Kokkos::LayoutRight, Kokkos::LayoutLeft, LAYOUT, EXEC_SPACE, \ + MEM_SPACE) \ + KOKKOSBLAS3_GEMM_ETI_SPEC_INST_LAYOUTS(SCALAR, Kokkos::LayoutRight, Kokkos::LayoutRight, LAYOUT, EXEC_SPACE, \ + MEM_SPACE) #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas3_trmm_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas3_trmm_impl.hpp index a18367588902..8a1e9a7a4a45 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas3_trmm_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas3_trmm_impl.hpp @@ -32,10 +32,8 @@ namespace KokkosBlas { namespace Impl { template -void SerialTrmm_Invoke(const char side[], const char uplo[], const char trans[], - const char /*diag*/[], - typename BViewType::const_value_type& alpha, - const AViewType& A, const BViewType& B) { +void SerialTrmm_Invoke(const char side[], const char uplo[], const char trans[], const char /*diag*/[], + typename BViewType::const_value_type& alpha, const AViewType& A, const BViewType& B) { using KokkosBatched::Algo; using KokkosBatched::Diag; using KokkosBatched::SerialTrmmInternalLeftLower; @@ -43,8 +41,7 @@ void SerialTrmm_Invoke(const char side[], const char uplo[], const char trans[], using KokkosBatched::SerialTrmmInternalRightLower; using KokkosBatched::SerialTrmmInternalRightUpper; - char __side = tolower(side[0]), __uplo = tolower(uplo[0]), - __trans = tolower(trans[0]); + char __side = tolower(side[0]), __uplo = tolower(uplo[0]), __trans = tolower(trans[0]); //__diag = tolower(diag[0]); bool do_conj = true; @@ -53,79 +50,67 @@ void SerialTrmm_Invoke(const char side[], const char uplo[], const char trans[], //// Lower non-transpose //// if (__side == 'l' && __uplo == 'l' && __trans == 'n') SerialTrmmInternalLeftLower::invoke( - Diag::Unit::use_unit_diag, !do_conj, A.extent(0), A.extent(1), - B.extent(0), B.extent(1), alpha, A.data(), A.stride(0), A.stride(1), - B.data(), B.stride(0), B.stride(1)); + Diag::Unit::use_unit_diag, !do_conj, A.extent(0), A.extent(1), B.extent(0), B.extent(1), alpha, A.data(), + A.stride(0), A.stride(1), B.data(), B.stride(0), B.stride(1)); if (__side == 'r' && __uplo == 'l' && __trans == 'n') SerialTrmmInternalRightLower::invoke( - Diag::Unit::use_unit_diag, !do_conj, A.extent(0), A.extent(1), - B.extent(0), B.extent(1), alpha, A.data(), A.stride(0), A.stride(1), - B.data(), B.stride(0), B.stride(1)); + Diag::Unit::use_unit_diag, !do_conj, A.extent(0), A.extent(1), B.extent(0), B.extent(1), alpha, A.data(), + A.stride(0), A.stride(1), B.data(), B.stride(0), B.stride(1)); //// Lower transpose ///// // Transpose A by simply swapping the dimensions (extent) and stride // parameters if (__side == 'l' && __uplo == 'l' && __trans == 't') SerialTrmmInternalLeftUpper::invoke( - Diag::Unit::use_unit_diag, !do_conj, A.extent(1), A.extent(0), - B.extent(0), B.extent(1), alpha, A.data(), A.stride(1), A.stride(0), - B.data(), B.stride(0), B.stride(1)); + Diag::Unit::use_unit_diag, !do_conj, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), + A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); if (__side == 'r' && __uplo == 'l' && __trans == 't') SerialTrmmInternalRightUpper::invoke( - Diag::Unit::use_unit_diag, !do_conj, A.extent(1), A.extent(0), - B.extent(0), B.extent(1), alpha, A.data(), A.stride(1), A.stride(0), - B.data(), B.stride(0), B.stride(1)); + Diag::Unit::use_unit_diag, !do_conj, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), + A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); //// Lower conjugate-transpose //// // Conjugate-Transpose A by simply swapping the dimensions (extent) and stride // parameters if (__side == 'l' && __uplo == 'l' && __trans == 'c') SerialTrmmInternalLeftUpper::invoke( - Diag::Unit::use_unit_diag, do_conj, A.extent(1), A.extent(0), - B.extent(0), B.extent(1), alpha, A.data(), A.stride(1), A.stride(0), - B.data(), B.stride(0), B.stride(1)); + Diag::Unit::use_unit_diag, do_conj, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), + A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); if (__side == 'r' && __uplo == 'l' && __trans == 'c') SerialTrmmInternalRightUpper::invoke( - Diag::Unit::use_unit_diag, do_conj, A.extent(1), A.extent(0), - B.extent(0), B.extent(1), alpha, A.data(), A.stride(1), A.stride(0), - B.data(), B.stride(0), B.stride(1)); + Diag::Unit::use_unit_diag, do_conj, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), + A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); //// Upper non-transpose //// if (__side == 'l' && __uplo == 'u' && __trans == 'n') SerialTrmmInternalLeftUpper::invoke( - Diag::Unit::use_unit_diag, !do_conj, A.extent(0), A.extent(1), - B.extent(0), B.extent(1), alpha, A.data(), A.stride(0), A.stride(1), - B.data(), B.stride(0), B.stride(1)); + Diag::Unit::use_unit_diag, !do_conj, A.extent(0), A.extent(1), B.extent(0), B.extent(1), alpha, A.data(), + A.stride(0), A.stride(1), B.data(), B.stride(0), B.stride(1)); if (__side == 'r' && __uplo == 'u' && __trans == 'n') SerialTrmmInternalRightUpper::invoke( - Diag::Unit::use_unit_diag, !do_conj, A.extent(0), A.extent(1), - B.extent(0), B.extent(1), alpha, A.data(), A.stride(0), A.stride(1), - B.data(), B.stride(0), B.stride(1)); + Diag::Unit::use_unit_diag, !do_conj, A.extent(0), A.extent(1), B.extent(0), B.extent(1), alpha, A.data(), + A.stride(0), A.stride(1), B.data(), B.stride(0), B.stride(1)); //// Upper transpose // Transpose A by simply swapping the dimensions (extent) and stride // parameters if (__side == 'l' && __uplo == 'u' && __trans == 't') SerialTrmmInternalLeftLower::invoke( - Diag::Unit::use_unit_diag, !do_conj, A.extent(1), A.extent(0), - B.extent(0), B.extent(1), alpha, A.data(), A.stride(1), A.stride(0), - B.data(), B.stride(0), B.stride(1)); + Diag::Unit::use_unit_diag, !do_conj, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), + A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); if (__side == 'r' && __uplo == 'u' && __trans == 't') SerialTrmmInternalRightLower::invoke( - Diag::Unit::use_unit_diag, !do_conj, A.extent(1), A.extent(0), - B.extent(0), B.extent(1), alpha, A.data(), A.stride(1), A.stride(0), - B.data(), B.stride(0), B.stride(1)); + Diag::Unit::use_unit_diag, !do_conj, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), + A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); //// Upper conjugate-transpose //// // Conjugate-Transpose A by simply swapping the dimensions (extent) and stride // parameters if (__side == 'l' && __uplo == 'u' && __trans == 'c') SerialTrmmInternalLeftLower::invoke( - Diag::Unit::use_unit_diag, do_conj, A.extent(1), A.extent(0), - B.extent(0), B.extent(1), alpha, A.data(), A.stride(1), A.stride(0), - B.data(), B.stride(0), B.stride(1)); + Diag::Unit::use_unit_diag, do_conj, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), + A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); if (__side == 'r' && __uplo == 'u' && __trans == 'c') SerialTrmmInternalRightLower::invoke( - Diag::Unit::use_unit_diag, do_conj, A.extent(1), A.extent(0), - B.extent(0), B.extent(1), alpha, A.data(), A.stride(1), A.stride(0), - B.data(), B.stride(0), B.stride(1)); + Diag::Unit::use_unit_diag, do_conj, A.extent(1), A.extent(0), B.extent(0), B.extent(1), alpha, A.data(), + A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); } } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas3_trmm_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas3_trmm_spec.hpp index 85a8b1c6dbde..6399f9e57e21 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas3_trmm_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas3_trmm_spec.hpp @@ -36,25 +36,21 @@ struct trmm_eti_spec_avail { // // This Macro is for readability of the template arguments. // -#define KOKKOSBLAS3_TRMM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, LAYOUTA, LAYOUTB, \ - EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct trmm_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS3_TRMM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, LAYOUTA, LAYOUTB, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct trmm_eti_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; // // This Macros provides the ETI specialization of trmm // #define KOKKOSBLAS3_TRMM_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_TRMM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, LAYOUT, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) + KOKKOSBLAS3_TRMM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, LAYOUT, LAYOUT, EXEC_SPACE, MEM_SPACE) // Include the actual specialization declarations #include @@ -69,33 +65,25 @@ namespace Impl { // Unification layer template ::value, - bool eti_spec_avail = - trmm_eti_spec_avail::value> + bool tpl_spec_avail = trmm_tpl_spec_avail::value, + bool eti_spec_avail = trmm_eti_spec_avail::value> struct TRMM { - static void trmm(const execution_space& space, const char side[], - const char uplo[], const char trans[], const char diag[], - typename BVIT::const_value_type& alpha, const AVIT& A, - const BVIT& B); + static void trmm(const execution_space& space, const char side[], const char uplo[], const char trans[], + const char diag[], typename BVIT::const_value_type& alpha, const AVIT& A, const BVIT& B); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY template -struct TRMM { - static void trmm(const execution_space& /*space*/, const char side[], - const char uplo[], const char trans[], const char diag[], - typename BVIT::const_value_type& alpha, const AVIT& A, - const BVIT& B) { +struct TRMM { + static void trmm(const execution_space& /*space*/, const char side[], const char uplo[], const char trans[], + const char diag[], typename BVIT::const_value_type& alpha, const AVIT& A, const BVIT& B) { static_assert(Kokkos::is_view::value, "AVIT must be a Kokkos::View."); static_assert(Kokkos::is_view::value, "BVIT must be a Kokkos::View."); static_assert(static_cast(AVIT::rank) == 2, "AVIT must have rank 2."); static_assert(static_cast(BVIT::rank) == 2, "BVIT must have rank 2."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::trmm[ETI]" - : "KokkosBlas::trmm[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::trmm[ETI]" + : "KokkosBlas::trmm[noETI]"); typename AVIT::HostMirror host_A = Kokkos::create_mirror_view(A); typename BVIT::HostMirror host_B = Kokkos::create_mirror_view(B); @@ -105,8 +93,8 @@ struct TRMM( - side, uplo, trans, diag, alpha, host_A, host_B); + SerialTrmm_Invoke(side, uplo, trans, diag, alpha, host_A, + host_B); // Copy host_B to B // no-op if B's MemorySpace is HostSpace @@ -124,27 +112,21 @@ struct TRMM, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - false, true>; - -#define KOKKOSBLAS3_TRMM_ETI_SPEC_INST_LAYOUTS(SCALAR, LAYOUTA, LAYOUTB, \ - EXEC_SPACE, MEM_SPACE) \ - template struct TRMM< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - false, true>; +#define KOKKOSBLAS3_TRMM_ETI_SPEC_DECL_LAYOUTS(SCALAR, LAYOUTA, LAYOUTB, EXEC_SPACE, MEM_SPACE) \ + extern template struct TRMM, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; + +#define KOKKOSBLAS3_TRMM_ETI_SPEC_INST_LAYOUTS(SCALAR, LAYOUTA, LAYOUTB, EXEC_SPACE, MEM_SPACE) \ + template struct TRMM, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; // // These Macros are only included when we are not compiling libkokkoskernels but @@ -154,12 +136,10 @@ struct TRMM diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas3_trsm_impl.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas3_trsm_impl.hpp index 87cac8b86ab5..57c1342eb59d 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas3_trsm_impl.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas3_trsm_impl.hpp @@ -34,12 +34,9 @@ namespace KokkosBlas { namespace Impl { template -int SerialTrsmInternalLeftLowerConj(const bool use_unit_diag, const int m, - const int n, const ScalarType alpha, - const ValueType* KOKKOS_RESTRICT A, - const int as0, const int as1, - /**/ ValueType* KOKKOS_RESTRICT B, - const int bs0, const int bs1) { +int SerialTrsmInternalLeftLowerConj(const bool use_unit_diag, const int m, const int n, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT A, const int as0, const int as1, + /**/ ValueType* KOKKOS_RESTRICT B, const int bs0, const int bs1) { typedef Kokkos::ArithTraits AT; const ScalarType one(1.0), zero(0.0); @@ -47,8 +44,7 @@ int SerialTrsmInternalLeftLowerConj(const bool use_unit_diag, const int m, if (alpha == zero) SerialSetInternal::invoke(m, n, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::SerialScaleInternal::invoke(m, n, alpha, B, bs0, bs1); + if (alpha != one) KokkosBlas::Impl::SerialScaleInternal::invoke(m, n, alpha, B, bs0, bs1); if (m <= 0 || n <= 0) return 0; for (int p = 0; p < m; ++p) { @@ -56,8 +52,7 @@ int SerialTrsmInternalLeftLowerConj(const bool use_unit_diag, const int m, const ValueType* KOKKOS_RESTRICT a21 = A + (p + 1) * as0 + p * as1; - ValueType *KOKKOS_RESTRICT b1t = B + p * bs0, - *KOKKOS_RESTRICT B2 = B + (p + 1) * bs0; + ValueType *KOKKOS_RESTRICT b1t = B + p * bs0, *KOKKOS_RESTRICT B2 = B + (p + 1) * bs0; if (!use_unit_diag) { const ValueType alpha11 = AT::conj(A[p * as0 + p * as1]); @@ -65,20 +60,16 @@ int SerialTrsmInternalLeftLowerConj(const bool use_unit_diag, const int m, } for (int i = 0; i < iend; ++i) - for (int j = 0; j < jend; ++j) - B2[i * bs0 + j * bs1] -= AT::conj(a21[i * as0]) * b1t[j * bs1]; + for (int j = 0; j < jend; ++j) B2[i * bs0 + j * bs1] -= AT::conj(a21[i * as0]) * b1t[j * bs1]; } } return 0; } template -int SerialTrsmInternalLeftUpperConj(const bool use_unit_diag, const int m, - const int n, const ScalarType alpha, - const ValueType* KOKKOS_RESTRICT A, - const int as0, const int as1, - /**/ ValueType* KOKKOS_RESTRICT B, - const int bs0, const int bs1) { +int SerialTrsmInternalLeftUpperConj(const bool use_unit_diag, const int m, const int n, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT A, const int as0, const int as1, + /**/ ValueType* KOKKOS_RESTRICT B, const int bs0, const int bs1) { typedef Kokkos::ArithTraits AT; const ScalarType one(1.0), zero(0.0); @@ -86,8 +77,7 @@ int SerialTrsmInternalLeftUpperConj(const bool use_unit_diag, const int m, if (alpha == zero) SerialSetInternal::invoke(m, n, zero, B, bs0, bs1); else { - if (alpha != one) - KokkosBlas::Impl::SerialScaleInternal::invoke(m, n, alpha, B, bs0, bs1); + if (alpha != one) KokkosBlas::Impl::SerialScaleInternal::invoke(m, n, alpha, B, bs0, bs1); if (m <= 0 || n <= 0) return 0; ValueType* KOKKOS_RESTRICT B0 = B; @@ -105,8 +95,7 @@ int SerialTrsmInternalLeftUpperConj(const bool use_unit_diag, const int m, if (p > 0) { // Note: A workaround to produce correct results for // complex with Intel-18.2.199 for (int i = 0; i < iend; ++i) - for (int j = 0; j < jend; ++j) - B0[i * bs0 + j * bs1] -= AT::conj(a01[i * as0]) * b1t[j * bs1]; + for (int j = 0; j < jend; ++j) B0[i * bs0 + j * bs1] -= AT::conj(a01[i * as0]) * b1t[j * bs1]; } } } @@ -114,204 +103,146 @@ int SerialTrsmInternalLeftUpperConj(const bool use_unit_diag, const int m, } template -void SerialTrsm_Invoke(const char side[], const char uplo[], const char trans[], - const char diag[], - typename BViewType::const_value_type& alpha, - const AViewType& A, const BViewType& B) { +void SerialTrsm_Invoke(const char side[], const char uplo[], const char trans[], const char diag[], + typename BViewType::const_value_type& alpha, const AViewType& A, const BViewType& B) { using KokkosBatched::Algo; using KokkosBatched::Diag; // Side::Left, Uplo::Lower, Trans::NoTranspose - if (((side[0] == 'L') || (side[0] == 'l')) && - ((uplo[0] == 'L') || (uplo[0] == 'l')) && - ((trans[0] == 'N') || (trans[0] == 'n')) && - ((diag[0] == 'U') || (diag[0] == 'u'))) + if (((side[0] == 'L') || (side[0] == 'l')) && ((uplo[0] == 'L') || (uplo[0] == 'l')) && + ((trans[0] == 'N') || (trans[0] == 'n')) && ((diag[0] == 'U') || (diag[0] == 'u'))) KokkosBatched::SerialTrsmInternalLeftLower::invoke( - Diag::Unit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride(0), A.stride(1), B.data(), B.stride(0), B.stride(1)); - if (((side[0] == 'L') || (side[0] == 'l')) && - ((uplo[0] == 'L') || (uplo[0] == 'l')) && - ((trans[0] == 'N') || (trans[0] == 'n')) && - ((diag[0] == 'N') || (diag[0] == 'n'))) + Diag::Unit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), A.stride(0), A.stride(1), B.data(), + B.stride(0), B.stride(1)); + if (((side[0] == 'L') || (side[0] == 'l')) && ((uplo[0] == 'L') || (uplo[0] == 'l')) && + ((trans[0] == 'N') || (trans[0] == 'n')) && ((diag[0] == 'N') || (diag[0] == 'n'))) KokkosBatched::SerialTrsmInternalLeftLower::invoke( - Diag::NonUnit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride(0), A.stride(1), B.data(), B.stride(0), B.stride(1)); + Diag::NonUnit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), A.stride(0), A.stride(1), B.data(), + B.stride(0), B.stride(1)); // Side::Left, Uplo::Lower, Trans::Transpose - if (((side[0] == 'L') || (side[0] == 'l')) && - ((uplo[0] == 'L') || (uplo[0] == 'l')) && - ((trans[0] == 'T') || (trans[0] == 't')) && - ((diag[0] == 'U') || (diag[0] == 'u'))) + if (((side[0] == 'L') || (side[0] == 'l')) && ((uplo[0] == 'L') || (uplo[0] == 'l')) && + ((trans[0] == 'T') || (trans[0] == 't')) && ((diag[0] == 'U') || (diag[0] == 'u'))) KokkosBatched::SerialTrsmInternalLeftUpper::invoke( - Diag::Unit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); - if (((side[0] == 'L') || (side[0] == 'l')) && - ((uplo[0] == 'L') || (uplo[0] == 'l')) && - ((trans[0] == 'T') || (trans[0] == 't')) && - ((diag[0] == 'N') || (diag[0] == 'n'))) + Diag::Unit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), A.stride(1), A.stride(0), B.data(), + B.stride(0), B.stride(1)); + if (((side[0] == 'L') || (side[0] == 'l')) && ((uplo[0] == 'L') || (uplo[0] == 'l')) && + ((trans[0] == 'T') || (trans[0] == 't')) && ((diag[0] == 'N') || (diag[0] == 'n'))) KokkosBatched::SerialTrsmInternalLeftUpper::invoke( - Diag::NonUnit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); + Diag::NonUnit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), A.stride(1), A.stride(0), B.data(), + B.stride(0), B.stride(1)); // Side::Left, Uplo::Lower, Trans::ConjTranspose - if (((side[0] == 'L') || (side[0] == 'l')) && - ((uplo[0] == 'L') || (uplo[0] == 'l')) && - ((trans[0] == 'C') || (trans[0] == 'c')) && - ((diag[0] == 'U') || (diag[0] == 'u'))) - SerialTrsmInternalLeftUpperConj( - Diag::Unit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); - if (((side[0] == 'L') || (side[0] == 'l')) && - ((uplo[0] == 'L') || (uplo[0] == 'l')) && - ((trans[0] == 'C') || (trans[0] == 'c')) && - ((diag[0] == 'N') || (diag[0] == 'n'))) - SerialTrsmInternalLeftUpperConj( - Diag::NonUnit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); + if (((side[0] == 'L') || (side[0] == 'l')) && ((uplo[0] == 'L') || (uplo[0] == 'l')) && + ((trans[0] == 'C') || (trans[0] == 'c')) && ((diag[0] == 'U') || (diag[0] == 'u'))) + SerialTrsmInternalLeftUpperConj(Diag::Unit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), A.stride(1), + A.stride(0), B.data(), B.stride(0), B.stride(1)); + if (((side[0] == 'L') || (side[0] == 'l')) && ((uplo[0] == 'L') || (uplo[0] == 'l')) && + ((trans[0] == 'C') || (trans[0] == 'c')) && ((diag[0] == 'N') || (diag[0] == 'n'))) + SerialTrsmInternalLeftUpperConj(Diag::NonUnit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), + A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); // Side::Left, Uplo::Upper, Trans::NoTranspose - if (((side[0] == 'L') || (side[0] == 'l')) && - ((uplo[0] == 'U') || (uplo[0] == 'u')) && - ((trans[0] == 'N') || (trans[0] == 'n')) && - ((diag[0] == 'U') || (diag[0] == 'u'))) + if (((side[0] == 'L') || (side[0] == 'l')) && ((uplo[0] == 'U') || (uplo[0] == 'u')) && + ((trans[0] == 'N') || (trans[0] == 'n')) && ((diag[0] == 'U') || (diag[0] == 'u'))) KokkosBatched::SerialTrsmInternalLeftUpper::invoke( - Diag::Unit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride(0), A.stride(1), B.data(), B.stride(0), B.stride(1)); - if (((side[0] == 'L') || (side[0] == 'l')) && - ((uplo[0] == 'U') || (uplo[0] == 'u')) && - ((trans[0] == 'N') || (trans[0] == 'n')) && - ((diag[0] == 'N') || (diag[0] == 'n'))) + Diag::Unit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), A.stride(0), A.stride(1), B.data(), + B.stride(0), B.stride(1)); + if (((side[0] == 'L') || (side[0] == 'l')) && ((uplo[0] == 'U') || (uplo[0] == 'u')) && + ((trans[0] == 'N') || (trans[0] == 'n')) && ((diag[0] == 'N') || (diag[0] == 'n'))) KokkosBatched::SerialTrsmInternalLeftUpper::invoke( - Diag::NonUnit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride(0), A.stride(1), B.data(), B.stride(0), B.stride(1)); + Diag::NonUnit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), A.stride(0), A.stride(1), B.data(), + B.stride(0), B.stride(1)); // Side::Left, Uplo::Upper, Trans::Transpose - if (((side[0] == 'L') || (side[0] == 'l')) && - ((uplo[0] == 'U') || (uplo[0] == 'u')) && - ((trans[0] == 'T') || (trans[0] == 't')) && - ((diag[0] == 'U') || (diag[0] == 'u'))) + if (((side[0] == 'L') || (side[0] == 'l')) && ((uplo[0] == 'U') || (uplo[0] == 'u')) && + ((trans[0] == 'T') || (trans[0] == 't')) && ((diag[0] == 'U') || (diag[0] == 'u'))) KokkosBatched::SerialTrsmInternalLeftLower::invoke( - Diag::Unit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); - if (((side[0] == 'L') || (side[0] == 'l')) && - ((uplo[0] == 'U') || (uplo[0] == 'u')) && - ((trans[0] == 'T') || (trans[0] == 't')) && - ((diag[0] == 'N') || (diag[0] == 'n'))) + Diag::Unit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), A.stride(1), A.stride(0), B.data(), + B.stride(0), B.stride(1)); + if (((side[0] == 'L') || (side[0] == 'l')) && ((uplo[0] == 'U') || (uplo[0] == 'u')) && + ((trans[0] == 'T') || (trans[0] == 't')) && ((diag[0] == 'N') || (diag[0] == 'n'))) KokkosBatched::SerialTrsmInternalLeftLower::invoke( - Diag::NonUnit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); + Diag::NonUnit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), A.stride(1), A.stride(0), B.data(), + B.stride(0), B.stride(1)); // Side::Left, Uplo::Upper, Trans::ConjTranspose - if (((side[0] == 'L') || (side[0] == 'l')) && - ((uplo[0] == 'U') || (uplo[0] == 'u')) && - ((trans[0] == 'C') || (trans[0] == 'c')) && - ((diag[0] == 'U') || (diag[0] == 'u'))) - SerialTrsmInternalLeftLowerConj( - Diag::Unit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); - if (((side[0] == 'L') || (side[0] == 'l')) && - ((uplo[0] == 'U') || (uplo[0] == 'u')) && - ((trans[0] == 'C') || (trans[0] == 'c')) && - ((diag[0] == 'N') || (diag[0] == 'n'))) - SerialTrsmInternalLeftLowerConj( - Diag::NonUnit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), - A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); + if (((side[0] == 'L') || (side[0] == 'l')) && ((uplo[0] == 'U') || (uplo[0] == 'u')) && + ((trans[0] == 'C') || (trans[0] == 'c')) && ((diag[0] == 'U') || (diag[0] == 'u'))) + SerialTrsmInternalLeftLowerConj(Diag::Unit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), A.stride(1), + A.stride(0), B.data(), B.stride(0), B.stride(1)); + if (((side[0] == 'L') || (side[0] == 'l')) && ((uplo[0] == 'U') || (uplo[0] == 'u')) && + ((trans[0] == 'C') || (trans[0] == 'c')) && ((diag[0] == 'N') || (diag[0] == 'n'))) + SerialTrsmInternalLeftLowerConj(Diag::NonUnit::use_unit_diag, B.extent(0), B.extent(1), alpha, A.data(), + A.stride(1), A.stride(0), B.data(), B.stride(0), B.stride(1)); //// // Side::Right, Uplo::Lower, Trans::NoTranspose - if (((side[0] == 'R') || (side[0] == 'r')) && - ((uplo[0] == 'L') || (uplo[0] == 'l')) && - ((trans[0] == 'N') || (trans[0] == 'n')) && - ((diag[0] == 'U') || (diag[0] == 'u'))) + if (((side[0] == 'R') || (side[0] == 'r')) && ((uplo[0] == 'L') || (uplo[0] == 'l')) && + ((trans[0] == 'N') || (trans[0] == 'n')) && ((diag[0] == 'U') || (diag[0] == 'u'))) KokkosBatched::SerialTrsmInternalLeftUpper::invoke( - Diag::Unit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride(1), A.stride(0), B.data(), B.stride(1), B.stride(0)); - if (((side[0] == 'R') || (side[0] == 'r')) && - ((uplo[0] == 'L') || (uplo[0] == 'l')) && - ((trans[0] == 'N') || (trans[0] == 'n')) && - ((diag[0] == 'N') || (diag[0] == 'n'))) + Diag::Unit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), A.stride(1), A.stride(0), B.data(), + B.stride(1), B.stride(0)); + if (((side[0] == 'R') || (side[0] == 'r')) && ((uplo[0] == 'L') || (uplo[0] == 'l')) && + ((trans[0] == 'N') || (trans[0] == 'n')) && ((diag[0] == 'N') || (diag[0] == 'n'))) KokkosBatched::SerialTrsmInternalLeftUpper::invoke( - Diag::NonUnit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride(1), A.stride(0), B.data(), B.stride(1), B.stride(0)); + Diag::NonUnit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), A.stride(1), A.stride(0), B.data(), + B.stride(1), B.stride(0)); // Side::Right, Uplo::Lower, Trans::Transpose - if (((side[0] == 'R') || (side[0] == 'r')) && - ((uplo[0] == 'L') || (uplo[0] == 'l')) && - ((trans[0] == 'T') || (trans[0] == 't')) && - ((diag[0] == 'U') || (diag[0] == 'u'))) + if (((side[0] == 'R') || (side[0] == 'r')) && ((uplo[0] == 'L') || (uplo[0] == 'l')) && + ((trans[0] == 'T') || (trans[0] == 't')) && ((diag[0] == 'U') || (diag[0] == 'u'))) KokkosBatched::SerialTrsmInternalLeftLower::invoke( - Diag::Unit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride(0), A.stride(1), B.data(), B.stride(1), B.stride(0)); - if (((side[0] == 'R') || (side[0] == 'r')) && - ((uplo[0] == 'L') || (uplo[0] == 'l')) && - ((trans[0] == 'T') || (trans[0] == 't')) && - ((diag[0] == 'N') || (diag[0] == 'n'))) + Diag::Unit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), A.stride(0), A.stride(1), B.data(), + B.stride(1), B.stride(0)); + if (((side[0] == 'R') || (side[0] == 'r')) && ((uplo[0] == 'L') || (uplo[0] == 'l')) && + ((trans[0] == 'T') || (trans[0] == 't')) && ((diag[0] == 'N') || (diag[0] == 'n'))) KokkosBatched::SerialTrsmInternalLeftLower::invoke( - Diag::NonUnit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride(0), A.stride(1), B.data(), B.stride(1), B.stride(0)); + Diag::NonUnit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), A.stride(0), A.stride(1), B.data(), + B.stride(1), B.stride(0)); // Side::Right, Uplo::Lower, Trans::ConjTranspose - if (((side[0] == 'R') || (side[0] == 'r')) && - ((uplo[0] == 'L') || (uplo[0] == 'l')) && - ((trans[0] == 'C') || (trans[0] == 'c')) && - ((diag[0] == 'U') || (diag[0] == 'u'))) - SerialTrsmInternalLeftLowerConj( - Diag::Unit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride(0), A.stride(1), B.data(), B.stride(1), B.stride(0)); - if (((side[0] == 'R') || (side[0] == 'r')) && - ((uplo[0] == 'L') || (uplo[0] == 'l')) && - ((trans[0] == 'C') || (trans[0] == 'c')) && - ((diag[0] == 'N') || (diag[0] == 'n'))) - SerialTrsmInternalLeftLowerConj( - Diag::NonUnit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride(0), A.stride(1), B.data(), B.stride(1), B.stride(0)); + if (((side[0] == 'R') || (side[0] == 'r')) && ((uplo[0] == 'L') || (uplo[0] == 'l')) && + ((trans[0] == 'C') || (trans[0] == 'c')) && ((diag[0] == 'U') || (diag[0] == 'u'))) + SerialTrsmInternalLeftLowerConj(Diag::Unit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), A.stride(0), + A.stride(1), B.data(), B.stride(1), B.stride(0)); + if (((side[0] == 'R') || (side[0] == 'r')) && ((uplo[0] == 'L') || (uplo[0] == 'l')) && + ((trans[0] == 'C') || (trans[0] == 'c')) && ((diag[0] == 'N') || (diag[0] == 'n'))) + SerialTrsmInternalLeftLowerConj(Diag::NonUnit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), + A.stride(0), A.stride(1), B.data(), B.stride(1), B.stride(0)); // Side::Right, Uplo::Upper, Trans::NoTranspose - if (((side[0] == 'R') || (side[0] == 'r')) && - ((uplo[0] == 'U') || (uplo[0] == 'u')) && - ((trans[0] == 'N') || (trans[0] == 'n')) && - ((diag[0] == 'U') || (diag[0] == 'u'))) + if (((side[0] == 'R') || (side[0] == 'r')) && ((uplo[0] == 'U') || (uplo[0] == 'u')) && + ((trans[0] == 'N') || (trans[0] == 'n')) && ((diag[0] == 'U') || (diag[0] == 'u'))) KokkosBatched::SerialTrsmInternalLeftLower::invoke( - Diag::Unit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride(1), A.stride(0), B.data(), B.stride(1), B.stride(0)); - if (((side[0] == 'R') || (side[0] == 'r')) && - ((uplo[0] == 'U') || (uplo[0] == 'u')) && - ((trans[0] == 'N') || (trans[0] == 'n')) && - ((diag[0] == 'N') || (diag[0] == 'n'))) + Diag::Unit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), A.stride(1), A.stride(0), B.data(), + B.stride(1), B.stride(0)); + if (((side[0] == 'R') || (side[0] == 'r')) && ((uplo[0] == 'U') || (uplo[0] == 'u')) && + ((trans[0] == 'N') || (trans[0] == 'n')) && ((diag[0] == 'N') || (diag[0] == 'n'))) KokkosBatched::SerialTrsmInternalLeftLower::invoke( - Diag::NonUnit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride(1), A.stride(0), B.data(), B.stride(1), B.stride(0)); + Diag::NonUnit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), A.stride(1), A.stride(0), B.data(), + B.stride(1), B.stride(0)); // Side::Right, Uplo::Upper, Trans::Transpose - if (((side[0] == 'R') || (side[0] == 'r')) && - ((uplo[0] == 'U') || (uplo[0] == 'u')) && - ((trans[0] == 'T') || (trans[0] == 't')) && - ((diag[0] == 'U') || (diag[0] == 'u'))) + if (((side[0] == 'R') || (side[0] == 'r')) && ((uplo[0] == 'U') || (uplo[0] == 'u')) && + ((trans[0] == 'T') || (trans[0] == 't')) && ((diag[0] == 'U') || (diag[0] == 'u'))) KokkosBatched::SerialTrsmInternalLeftUpper::invoke( - Diag::Unit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride(0), A.stride(1), B.data(), B.stride(1), B.stride(0)); - if (((side[0] == 'R') || (side[0] == 'r')) && - ((uplo[0] == 'U') || (uplo[0] == 'u')) && - ((trans[0] == 'T') || (trans[0] == 't')) && - ((diag[0] == 'N') || (diag[0] == 'n'))) + Diag::Unit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), A.stride(0), A.stride(1), B.data(), + B.stride(1), B.stride(0)); + if (((side[0] == 'R') || (side[0] == 'r')) && ((uplo[0] == 'U') || (uplo[0] == 'u')) && + ((trans[0] == 'T') || (trans[0] == 't')) && ((diag[0] == 'N') || (diag[0] == 'n'))) KokkosBatched::SerialTrsmInternalLeftUpper::invoke( - Diag::NonUnit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride(0), A.stride(1), B.data(), B.stride(1), B.stride(0)); + Diag::NonUnit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), A.stride(0), A.stride(1), B.data(), + B.stride(1), B.stride(0)); // Side::Right, Uplo::Upper, Trans::ConjTranspose - if (((side[0] == 'R') || (side[0] == 'r')) && - ((uplo[0] == 'U') || (uplo[0] == 'u')) && - ((trans[0] == 'C') || (trans[0] == 'c')) && - ((diag[0] == 'U') || (diag[0] == 'u'))) - SerialTrsmInternalLeftUpperConj( - Diag::Unit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride(0), A.stride(1), B.data(), B.stride(1), B.stride(0)); - if (((side[0] == 'R') || (side[0] == 'r')) && - ((uplo[0] == 'U') || (uplo[0] == 'u')) && - ((trans[0] == 'C') || (trans[0] == 'c')) && - ((diag[0] == 'N') || (diag[0] == 'n'))) - SerialTrsmInternalLeftUpperConj( - Diag::NonUnit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), - A.stride(0), A.stride(1), B.data(), B.stride(1), B.stride(0)); + if (((side[0] == 'R') || (side[0] == 'r')) && ((uplo[0] == 'U') || (uplo[0] == 'u')) && + ((trans[0] == 'C') || (trans[0] == 'c')) && ((diag[0] == 'U') || (diag[0] == 'u'))) + SerialTrsmInternalLeftUpperConj(Diag::Unit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), A.stride(0), + A.stride(1), B.data(), B.stride(1), B.stride(0)); + if (((side[0] == 'R') || (side[0] == 'r')) && ((uplo[0] == 'U') || (uplo[0] == 'u')) && + ((trans[0] == 'C') || (trans[0] == 'c')) && ((diag[0] == 'N') || (diag[0] == 'n'))) + SerialTrsmInternalLeftUpperConj(Diag::NonUnit::use_unit_diag, B.extent(1), B.extent(0), alpha, A.data(), + A.stride(0), A.stride(1), B.data(), B.stride(1), B.stride(0)); } } // namespace Impl diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas3_trsm_spec.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas3_trsm_spec.hpp index 93d01ed53b78..8c9088e97075 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas3_trsm_spec.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas3_trsm_spec.hpp @@ -42,22 +42,18 @@ struct trsm_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS3_TRSM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, LAYOUTA, LAYOUTB, \ - EXEC_SPACE, MEM_SPACE) \ - template <> \ - struct trsm_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS3_TRSM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, LAYOUTA, LAYOUTB, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct trsm_eti_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; #define KOKKOSBLAS3_TRSM_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_TRSM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, LAYOUT, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) + KOKKOSBLAS3_TRSM_ETI_SPEC_AVAIL_LAYOUT(SCALAR, LAYOUT, LAYOUT, EXEC_SPACE, MEM_SPACE) // Include the actual specialization declarations #include @@ -72,38 +68,28 @@ namespace Impl { // Unification layer template ::value, - bool eti_spec_avail = - trsm_eti_spec_avail::value> + bool tpl_spec_avail = trsm_tpl_spec_avail::value, + bool eti_spec_avail = trsm_eti_spec_avail::value> struct TRSM { - static void trsm(const execution_space& space, const char side[], - const char uplo[], const char trans[], const char diag[], - typename BViewType::const_value_type& alpha, - const AViewType& A, const BViewType& B); + static void trsm(const execution_space& space, const char side[], const char uplo[], const char trans[], + const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, + const BViewType& B); }; // Implementation of KokkosBlas::trsm. #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY template -struct TRSM { - static void trsm(const execution_space& /*space*/, const char side[], - const char uplo[], const char trans[], const char diag[], - typename BViewType::const_value_type& alpha, - const AViewType& A, const BViewType& B) { - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "BViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, - "AViewType must have rank 2."); - static_assert(static_cast(BViewType::rank) == 2, - "BViewType must have rank 2."); - - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::trsm[ETI]" - : "KokkosBlas::trsm[noETI]"); +struct TRSM { + static void trsm(const execution_space& /*space*/, const char side[], const char uplo[], const char trans[], + const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, + const BViewType& B) { + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "BViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); + static_assert(static_cast(BViewType::rank) == 2, "BViewType must have rank 2."); + + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::trsm[ETI]" + : "KokkosBlas::trsm[noETI]"); typename AViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename BViewType::HostMirror h_B = Kokkos::create_mirror_view(B); @@ -111,9 +97,8 @@ struct TRSM(side, uplo, trans, diag, - alpha, h_A, h_B); + SerialTrsm_Invoke(side, uplo, trans, diag, alpha, + h_A, h_B); Kokkos::deep_copy(B, h_B); @@ -134,35 +119,27 @@ struct TRSM, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - false, true>; - -#define KOKKOSBLAS3_TRSM_ETI_SPEC_INST_LAYOUTS(SCALAR, LAYOUTA, LAYOUTB, \ - EXEC_SPACE, MEM_SPACE) \ - template struct TRSM< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - false, true>; +#define KOKKOSBLAS3_TRSM_ETI_SPEC_DECL_LAYOUTS(SCALAR, LAYOUTA, LAYOUTB, EXEC_SPACE, MEM_SPACE) \ + extern template struct TRSM, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; + +#define KOKKOSBLAS3_TRSM_ETI_SPEC_INST_LAYOUTS(SCALAR, LAYOUTA, LAYOUTB, EXEC_SPACE, MEM_SPACE) \ + template struct TRSM, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; #define KOKKOSBLAS3_TRSM_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_TRSM_ETI_SPEC_DECL_LAYOUTS(SCALAR, LAYOUT, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) + KOKKOSBLAS3_TRSM_ETI_SPEC_DECL_LAYOUTS(SCALAR, LAYOUT, LAYOUT, EXEC_SPACE, MEM_SPACE) #define KOKKOSBLAS3_TRSM_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ - KOKKOSBLAS3_TRSM_ETI_SPEC_INST_LAYOUTS(SCALAR, LAYOUT, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) + KOKKOSBLAS3_TRSM_ETI_SPEC_INST_LAYOUTS(SCALAR, LAYOUT, LAYOUT, EXEC_SPACE, MEM_SPACE) #include diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas_serial_axpy.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas_serial_axpy.hpp index 344632b8eb85..83bb2b9c9861 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas_serial_axpy.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas_serial_axpy.hpp @@ -26,9 +26,8 @@ namespace Impl { /// Serial Internal Impl /// ==================== template -KOKKOS_INLINE_FUNCTION static void serial_axpy( - const int m, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT X, - /* */ ValueType *KOKKOS_RESTRICT Y, const int xs0, const int ys0) { +KOKKOS_INLINE_FUNCTION static void serial_axpy(const int m, const ScalarType alpha, const ValueType *KOKKOS_RESTRICT X, + /* */ ValueType *KOKKOS_RESTRICT Y, const int xs0, const int ys0) { #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif @@ -38,17 +37,14 @@ KOKKOS_INLINE_FUNCTION static void serial_axpy( } template -KOKKOS_INLINE_FUNCTION static void serial_axpy_mv( - const int m, const int n, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT X, - /* */ ValueType *KOKKOS_RESTRICT Y, const int xs0, const int xs1, - const int ys0, const int ys1) { +KOKKOS_INLINE_FUNCTION static void serial_axpy_mv(const int m, const int n, const ScalarType alpha, + const ValueType *KOKKOS_RESTRICT X, + /* */ ValueType *KOKKOS_RESTRICT Y, const int xs0, const int xs1, + const int ys0, const int ys1) { if (xs0 > xs1) { - for (int i = 0; i < m; ++i) - serial_axpy(n, alpha, X + i * xs0, Y + i * ys0, xs1, ys1); + for (int i = 0; i < m; ++i) serial_axpy(n, alpha, X + i * xs0, Y + i * ys0, xs1, ys1); } else { - for (int j = 0; j < n; ++j) - serial_axpy(m, alpha, X + j * xs1, Y + j * ys1, xs0, ys0); + for (int j = 0; j < n; ++j) serial_axpy(m, alpha, X + j * xs1, Y + j * ys1, xs0, ys0); } return; diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas_serial_nrm2.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas_serial_nrm2.hpp index 1b40ea32a878..db17736c0f5d 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas_serial_nrm2.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas_serial_nrm2.hpp @@ -27,10 +27,8 @@ namespace Impl { /// Serial Internal Impl /// ==================== template -KOKKOS_INLINE_FUNCTION static - typename Kokkos::Details::InnerProductSpaceTraits::mag_type - serial_nrm2(const int m, const ValueType *KOKKOS_RESTRICT X, - const int xs0) { +KOKKOS_INLINE_FUNCTION static typename Kokkos::Details::InnerProductSpaceTraits::mag_type serial_nrm2( + const int m, const ValueType *KOKKOS_RESTRICT X, const int xs0) { using IPT = Kokkos::Details::InnerProductSpaceTraits; using norm_type = typename IPT::mag_type; @@ -39,21 +37,16 @@ KOKKOS_INLINE_FUNCTION static #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif - for (int i = 0; i < m; ++i) - nrm += IPT::norm(IPT::dot(X[i * xs0], X[i * xs0])); + for (int i = 0; i < m; ++i) nrm += IPT::norm(IPT::dot(X[i * xs0], X[i * xs0])); return Kokkos::ArithTraits::sqrt(nrm); } template KOKKOS_INLINE_FUNCTION static void serial_nrm2( - const int m, const int n, const ValueType *KOKKOS_RESTRICT X, const int xs0, - const int xs1, - typename Kokkos::Details::InnerProductSpaceTraits::mag_type - *KOKKOS_RESTRICT R, - const int ys0) { - for (int vecIdx = 0; vecIdx < n; ++vecIdx) - R[vecIdx * ys0] = serial_nrm2(m, X + vecIdx * xs1, xs0); + const int m, const int n, const ValueType *KOKKOS_RESTRICT X, const int xs0, const int xs1, + typename Kokkos::Details::InnerProductSpaceTraits::mag_type *KOKKOS_RESTRICT R, const int ys0) { + for (int vecIdx = 0; vecIdx < n; ++vecIdx) R[vecIdx * ys0] = serial_nrm2(m, X + vecIdx * xs1, xs0); return; } diff --git a/packages/kokkos-kernels/blas/impl/KokkosBlas_util.hpp b/packages/kokkos-kernels/blas/impl/KokkosBlas_util.hpp index 50173538fb55..885625673f4d 100644 --- a/packages/kokkos-kernels/blas/impl/KokkosBlas_util.hpp +++ b/packages/kokkos-kernels/blas/impl/KokkosBlas_util.hpp @@ -85,6 +85,7 @@ struct Algo { using SolveLU = Level3; using QR = Level3; using UTV = Level3; + using Pttrf = Level3; struct Level2 { struct Unblocked {}; @@ -116,6 +117,7 @@ struct Algo { using Gemv = Level2; using Trsv = Level2; using ApplyQ = Level2; + using Tbsv = Level2; }; namespace Impl { @@ -133,12 +135,9 @@ namespace Impl { // Output params: // * teamsPerReduction: number of teams to use for each reduction template -void multipleReductionWorkDistribution(size_type length, - size_type numReductions, - size_type &teamsPerDot) { - constexpr size_type workPerTeam = 4096; // Amount of work per team - size_type appxNumTeams = - (length * numReductions) / workPerTeam; // Estimation for appxNumTeams +void multipleReductionWorkDistribution(size_type length, size_type numReductions, size_type &teamsPerDot) { + constexpr size_type workPerTeam = 4096; // Amount of work per team + size_type appxNumTeams = (length * numReductions) / workPerTeam; // Estimation for appxNumTeams // Adjust appxNumTeams in case it is too small or too large if (appxNumTeams < 1) appxNumTeams = 1; diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_abs.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_abs.hpp index bd63ccedf1e9..f3ea88bb03a0 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_abs.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_abs.hpp @@ -46,19 +46,14 @@ void abs(const execution_space& space, const RMV& R, const XMV& X) { static_assert(Kokkos::is_view::value, "KokkosBlas::abs: " "R is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::abs: RMV must be accessible from execution space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::abs: RMV must be accessible from execution space"); static_assert(Kokkos::is_view::value, "KokkosBlas::abs: " "X is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::abs: XMV must be accessible from execution space"); - static_assert(std::is_same::value, + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::abs: XMV must be accessible from execution space"); + static_assert(std::is_same::value, "KokkosBlas::abs: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -73,30 +68,25 @@ void abs(const execution_space& space, const RMV& R, const XMV& X) { if (X.extent(0) != R.extent(0) || X.extent(1) != R.extent(1)) { std::ostringstream os; os << "KokkosBlas::abs (MV): Dimensions of R and X do not match: " - << "R: " << R.extent(0) << " x " << R.extent(1) << ", X: " << X.extent(0) - << " x " << X.extent(1); + << "R: " << R.extent(0) << " x " << R.extent(1) << ", X: " << X.extent(0) << " x " << X.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } // Create unmanaged versions of the input Views. RMV and XMV may be // rank 1 or rank 2. - using RMV_Internal = Kokkos::View< - typename std::conditional::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename RMV::device_type, Kokkos::MemoryTraits >; - using XMV_Internal = Kokkos::View< - typename std::conditional::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XMV::device_type, Kokkos::MemoryTraits >; + using RMV_Internal = Kokkos::View::type, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename RMV::device_type, Kokkos::MemoryTraits >; + using XMV_Internal = Kokkos::View::type, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename XMV::device_type, Kokkos::MemoryTraits >; RMV_Internal R_internal = R; XMV_Internal X_internal = X; - Impl::Abs::abs(space, R_internal, - X_internal); + Impl::Abs::abs(space, R_internal, X_internal); } /// \brief R(i,j) = abs(X(i,j)) diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_axpby.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_axpby.hpp index 5cd03dd7c7bc..788995679c0b 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_axpby.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_axpby.hpp @@ -60,10 +60,8 @@ namespace KokkosBlas { /// \param Y [in/out] View of type YMV in which the results will be /// stored. template -void axpby(const execution_space& exec_space, const AV& a, const XMV& X, - const BV& b, const YMV& Y) { - using AxpbyTraits = - Impl::AxpbyUnificationAttemptTraits; +void axpby(const execution_space& exec_space, const AV& a, const XMV& X, const BV& b, const YMV& Y) { + using AxpbyTraits = Impl::AxpbyUnificationAttemptTraits; using InternalTypeA = typename AxpbyTraits::InternalTypeA; using InternalTypeX = typename AxpbyTraits::InternalTypeX; using InternalTypeB = typename AxpbyTraits::InternalTypeB; @@ -95,37 +93,28 @@ void axpby(const execution_space& exec_space, const AV& a, const XMV& X, // and 'b' become scalars as well, eventually changing precision in // order to match the precisions of 'X' and 'Y'. // ******************************************************************** - if constexpr (AxpbyTraits::a_is_scalar && AxpbyTraits::b_is_scalar && - AxpbyTraits::onDevice) { + if constexpr (AxpbyTraits::a_is_scalar && AxpbyTraits::b_is_scalar && AxpbyTraits::onDevice) { // ****************************************************************** // We are in the exception situation for rule 2 // ****************************************************************** InternalTypeA internal_a(a); InternalTypeA internal_b(b); - Impl::Axpby::axpby(exec_space, internal_a, internal_X, - internal_b, internal_Y); + Impl::Axpby::axpby( + exec_space, internal_a, internal_X, internal_b, internal_Y); } else { // ****************************************************************** // We are in rule 1, that is, we are in a 'onHost' case now // ****************************************************************** - InternalTypeA internal_a(Impl::getScalarValueFromVariableAtHost< - AV, Impl::typeRank()>::getValue(a)); - InternalTypeB internal_b(Impl::getScalarValueFromVariableAtHost< - BV, Impl::typeRank()>::getValue(b)); + InternalTypeA internal_a(Impl::getScalarValueFromVariableAtHost()>::getValue(a)); + InternalTypeB internal_b(Impl::getScalarValueFromVariableAtHost()>::getValue(b)); - Impl::Axpby::axpby(exec_space, internal_a, internal_X, - internal_b, internal_Y); + Impl::Axpby::axpby( + exec_space, internal_a, internal_X, internal_b, internal_Y); } } else if constexpr (AxpbyTraits::internalTypesAB_bothViews) { - constexpr bool internalLayoutA_isStride( - std::is_same_v); - constexpr bool internalLayoutB_isStride( - std::is_same_v); + constexpr bool internalLayoutA_isStride(std::is_same_v); + constexpr bool internalLayoutB_isStride(std::is_same_v); const size_t numScalarsA(Impl::getAmountOfScalarsInCoefficient(a)); const size_t numScalarsB(Impl::getAmountOfScalarsInCoefficient(b)); @@ -143,8 +132,7 @@ void axpby(const execution_space& exec_space, const AV& a, const XMV& X, // ****************************************************************** // Prepare internal_a // ****************************************************************** - typename AxpbyTraits::InternalTypeA_managed managed_a("managed_a", - layoutStrideA); + typename AxpbyTraits::InternalTypeA_managed managed_a("managed_a", layoutStrideA); if constexpr (AxpbyTraits::atInputLayoutA_isStride) { Kokkos::deep_copy(managed_a, a); } else { @@ -156,8 +144,7 @@ void axpby(const execution_space& exec_space, const AV& a, const XMV& X, // **************************************************************** // Prepare internal_b // **************************************************************** - typename AxpbyTraits::InternalTypeB_managed managed_b("managed_b", - layoutStrideB); + typename AxpbyTraits::InternalTypeB_managed managed_b("managed_b", layoutStrideB); if constexpr (AxpbyTraits::atInputLayoutB_isStride) { Kokkos::deep_copy(managed_b, b); } else { @@ -168,16 +155,13 @@ void axpby(const execution_space& exec_space, const AV& a, const XMV& X, // **************************************************************** // Call Impl::Axpby<...>::axpby(...) // **************************************************************** - Impl::Axpby::axpby(exec_space, internal_a, - internal_X, internal_b, - internal_Y); + Impl::Axpby::axpby( + exec_space, internal_a, internal_X, internal_b, internal_Y); } else { // **************************************************************** // Prepare internal_b // **************************************************************** - typename AxpbyTraits::InternalTypeB_managed managed_b("managed_b", - numScalarsB); + typename AxpbyTraits::InternalTypeB_managed managed_b("managed_b", numScalarsB); if constexpr (AxpbyTraits::atInputLayoutB_isStride) { Kokkos::deep_copy(managed_b, b); } else { @@ -188,17 +172,14 @@ void axpby(const execution_space& exec_space, const AV& a, const XMV& X, // **************************************************************** // Call Impl::Axpby<...>::axpby(...) // **************************************************************** - Impl::Axpby::axpby(exec_space, internal_a, - internal_X, internal_b, - internal_Y); + Impl::Axpby::axpby( + exec_space, internal_a, internal_X, internal_b, internal_Y); } } else { // ****************************************************************** // Prepare internal_a // ****************************************************************** - typename AxpbyTraits::InternalTypeA_managed managed_a("managed_a", - numScalarsA); + typename AxpbyTraits::InternalTypeA_managed managed_a("managed_a", numScalarsA); if constexpr (AxpbyTraits::atInputLayoutA_isStride) { Kokkos::deep_copy(managed_a, a); } else { @@ -210,8 +191,7 @@ void axpby(const execution_space& exec_space, const AV& a, const XMV& X, // **************************************************************** // Prepare internal_b // **************************************************************** - typename AxpbyTraits::InternalTypeB_managed managed_b("managed_b", - layoutStrideB); + typename AxpbyTraits::InternalTypeB_managed managed_b("managed_b", layoutStrideB); if constexpr (AxpbyTraits::atInputLayoutB_isStride) { Kokkos::deep_copy(managed_b, b); } else { @@ -222,16 +202,13 @@ void axpby(const execution_space& exec_space, const AV& a, const XMV& X, // **************************************************************** // Call Impl::Axpby<...>::axpby(...) // **************************************************************** - Impl::Axpby::axpby(exec_space, internal_a, - internal_X, internal_b, - internal_Y); + Impl::Axpby::axpby( + exec_space, internal_a, internal_X, internal_b, internal_Y); } else { // **************************************************************** // Prepare internal_b // **************************************************************** - typename AxpbyTraits::InternalTypeB_managed managed_b("managed_b", - numScalarsB); + typename AxpbyTraits::InternalTypeB_managed managed_b("managed_b", numScalarsB); if constexpr (AxpbyTraits::atInputLayoutB_isStride) { Kokkos::deep_copy(managed_b, b); } else { @@ -242,10 +219,8 @@ void axpby(const execution_space& exec_space, const AV& a, const XMV& X, // **************************************************************** // Call Impl::Axpby<...>::axpby(...) // **************************************************************** - Impl::Axpby::axpby(exec_space, internal_a, - internal_X, internal_b, - internal_Y); + Impl::Axpby::axpby( + exec_space, internal_a, internal_X, internal_b, internal_Y); } } } @@ -299,10 +274,8 @@ void axpby(const AV& a, const XMV& X, const BV& b, const YMV& Y) { /// \param Y [in/out] View of type YMV in which the results will be /// stored. template -void axpy(const execution_space& exec_space, const AV& a, const XMV& X, - const YMV& Y) { - axpby(exec_space, a, X, - Kokkos::ArithTraits::one(), Y); +void axpy(const execution_space& exec_space, const AV& a, const XMV& X, const YMV& Y) { + axpby(exec_space, a, X, Kokkos::ArithTraits::one(), Y); } /// \brief Computes Y := a*X + Y @@ -334,23 +307,17 @@ void axpy(const AV& a, const XMV& X, const YMV& Y) { template KOKKOS_FUNCTION void serial_axpy(const scalar_type alpha, const XMV X, YMV Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBlas::serial_axpy: XMV is not a Kokkos::View"); - static_assert(Kokkos::is_view::value, - "KokkosBlas::serial_axpy: YMV is not a Kokkos::View"); - static_assert(XMV::rank == 1 || XMV::rank == 2, - "KokkosBlas::serial_axpy: XMV must have rank 1 or 2."); - static_assert( - XMV::rank == YMV::rank, - "KokkosBlas::serial_axpy: XMV and YMV must have the same rank."); + static_assert(Kokkos::is_view::value, "KokkosBlas::serial_axpy: XMV is not a Kokkos::View"); + static_assert(Kokkos::is_view::value, "KokkosBlas::serial_axpy: YMV is not a Kokkos::View"); + static_assert(XMV::rank == 1 || XMV::rank == 2, "KokkosBlas::serial_axpy: XMV must have rank 1 or 2."); + static_assert(XMV::rank == YMV::rank, "KokkosBlas::serial_axpy: XMV and YMV must have the same rank."); if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { Kokkos::abort("KokkosBlas::serial_axpy: X and Y dimensions do not match"); } #endif // KOKKOSKERNELS_DEBUG_LEVEL - return Impl::serial_axpy_mv(X.extent(0), X.extent(1), alpha, X.data(), - Y.data(), X.stride_0(), X.stride_1(), + return Impl::serial_axpy_mv(X.extent(0), X.extent(1), alpha, X.data(), Y.data(), X.stride_0(), X.stride_1(), Y.stride_0(), Y.stride_1()); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_dot.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_dot.hpp index aa995836ebf9..6e1a428b5170 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_dot.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_dot.hpp @@ -37,28 +37,19 @@ namespace KokkosBlas { /// /// \return The dot product result; a single value. template , - int>::type = 0> -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::dot_type -dot(const execution_space& space, const XVector& x, const YVector& y) { + typename std::enable_if, int>::type = 0> +typename Kokkos::Details::InnerProductSpaceTraits::dot_type dot( + const execution_space& space, const XVector& x, const YVector& y) { static_assert(Kokkos::is_execution_space_v, "KokkosBlas::dot: execution_space must be a valid Kokkos " "execution space."); - static_assert(Kokkos::is_view::value, - "KokkosBlas::dot: XVector must be a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::dot: XVector must be accessible from execution_space"); - static_assert(Kokkos::is_view::value, - "KokkosBlas::dot: YVector must be a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::dot: YVector must be accessible from execution_space"); - static_assert((int)XVector::rank == (int)YVector::rank, - "KokkosBlas::dot: Vector ranks do not match."); + static_assert(Kokkos::is_view::value, "KokkosBlas::dot: XVector must be a Kokkos::View."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::dot: XVector must be accessible from execution_space"); + static_assert(Kokkos::is_view::value, "KokkosBlas::dot: YVector must be a Kokkos::View."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::dot: YVector must be accessible from execution_space"); + static_assert((int)XVector::rank == (int)YVector::rank, "KokkosBlas::dot: Vector ranks do not match."); static_assert(XVector::rank == 1, "KokkosBlas::dot: " "Both Vector inputs must have rank 1."); @@ -72,29 +63,23 @@ dot(const execution_space& space, const XVector& x, const YVector& y) { KokkosKernels::Impl::throw_runtime_exception(os.str()); } - using XVector_Internal = Kokkos::View< - typename XVector::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, Kokkos::MemoryTraits>; - using YVector_Internal = Kokkos::View< - typename YVector::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename YVector::device_type, Kokkos::MemoryTraits>; + using XVector_Internal = Kokkos::View::array_layout, + typename XVector::device_type, Kokkos::MemoryTraits>; + using YVector_Internal = Kokkos::View::array_layout, + typename YVector::device_type, Kokkos::MemoryTraits>; - using dot_type = typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::dot_type; + using dot_type = typename Kokkos::Details::InnerProductSpaceTraits::dot_type; // result_type is usually just dot_type, except: // if dot_type is float, result_type is double // if dot_type is complex, result_type is complex // These special cases are to maintain accuracy. - using result_type = - typename KokkosBlas::Impl::DotAccumulatingScalar::type; + using result_type = typename KokkosBlas::Impl::DotAccumulatingScalar::type; using RVector_Internal = - Kokkos::View>; + Kokkos::View>; using RVector_Result = - Kokkos::View>; + Kokkos::View>; XVector_Internal X = x; YVector_Internal Y = y; @@ -108,24 +93,19 @@ dot(const execution_space& space, const XVector& x, const YVector& y) { // two different scalar types. result_type result{}; RVector_Result R = RVector_Result(&result); - Impl::DotSpecialAccumulator::dot(space, - R, X, - Y); + Impl::DotSpecialAccumulator::dot(space, R, X, + Y); space.fence(); // mfh 22 Jan 2020: We need the line below because // Kokkos::complex lacks a constructor that takes a // Kokkos::complex with U != T. - return Kokkos::Details::CastPossiblyComplex::cast( - result); + return Kokkos::Details::CastPossiblyComplex::cast(result); } else { dot_type result{}; RVector_Internal R = RVector_Internal(&result); - Impl::Dot::dot(space, R, X, Y); + Impl::Dot::dot(space, R, X, Y); space.fence(); - return Kokkos::Details::CastPossiblyComplex::cast( - result); + return Kokkos::Details::CastPossiblyComplex::cast(result); } } @@ -142,9 +122,8 @@ dot(const execution_space& space, const XVector& x, const YVector& y) { /// /// \return The dot product result; a single value. template -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::dot_type -dot(const XVector& x, const YVector& y) { +typename Kokkos::Details::InnerProductSpaceTraits::dot_type dot( + const XVector& x, const YVector& y) { return dot(typename XVector::execution_space{}, x, y); } @@ -192,35 +171,26 @@ void dot(const execution_space& space, const RV& R, const XMV& X, const YMV& Y, static_assert(Kokkos::is_view::value, "KokkosBlas::dot: " "X is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::dot: XMV must be accessible from execution_space."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::dot: XMV must be accessible from execution_space."); static_assert(Kokkos::is_view::value, "KokkosBlas::dot: " "Y is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::dot: XMV must be accessible from execution_space."); - static_assert(std::is_same::value, + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::dot: XMV must be accessible from execution_space."); + static_assert(std::is_same::value, "KokkosBlas::dot: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); - static_assert(RV::rank == 0 || RV::rank == 1, - "KokkosBlas::dot: R must have rank 0 or 1."); - static_assert(XMV::rank == 1 || XMV::rank == 2, - "KokkosBlas::dot: X must have rank 1 or 2."); - static_assert(YMV::rank == 1 || YMV::rank == 2, - "KokkosBlas::dot: Y must have rank 1 or 2."); - static_assert((XMV::rank == 2 && YMV::rank == 2 && RV::rank == 1) || - (XMV::rank == 1 && YMV::rank == 1 && RV::rank == 0) || - (XMV::rank == 2 && YMV::rank == 1 && RV::rank == 1) || - (XMV::rank == 1 && YMV::rank == 2 && RV::rank == 1), - "KokkosBlas::dot: Ranks of RV, XMV, and YMV don't match. " - "See this function's documentation for the allowed " - "combinations of ranks."); + static_assert(RV::rank == 0 || RV::rank == 1, "KokkosBlas::dot: R must have rank 0 or 1."); + static_assert(XMV::rank == 1 || XMV::rank == 2, "KokkosBlas::dot: X must have rank 1 or 2."); + static_assert(YMV::rank == 1 || YMV::rank == 2, "KokkosBlas::dot: Y must have rank 1 or 2."); + static_assert( + (XMV::rank == 2 && YMV::rank == 2 && RV::rank == 1) || (XMV::rank == 1 && YMV::rank == 1 && RV::rank == 0) || + (XMV::rank == 2 && YMV::rank == 1 && RV::rank == 1) || (XMV::rank == 1 && YMV::rank == 2 && RV::rank == 1), + "KokkosBlas::dot: Ranks of RV, XMV, and YMV don't match. " + "See this function's documentation for the allowed " + "combinations of ranks."); // Check compatibility of dimensions at run time. @@ -228,8 +198,7 @@ void dot(const execution_space& space, const RV& R, const XMV& X, const YMV& Y, bool dimsMatch = true; if (X.extent(0) != Y.extent(0)) { dimsMatch = false; - } else if (X.extent(1) != Y.extent(1) && X.extent(1) != 1 && - Y.extent(1) != 1) { + } else if (X.extent(1) != Y.extent(1) && X.extent(1) != 1 && Y.extent(1) != 1) { // Numbers of columns don't match, and neither X nor Y have one column. dimsMatch = false; } @@ -244,43 +213,33 @@ void dot(const execution_space& space, const RV& R, const XMV& X, const YMV& Y, if (RV::rank == 1) { os << "R: " << R.extent(0) << " x " << X.extent(1) << ", "; } - os << "X: " << X.extent(0) << " x " << X.extent(1) << ", Y: " << Y.extent(0) - << " x " << Y.extent(1); + os << "X: " << X.extent(0) << " x " << X.extent(1) << ", Y: " << Y.extent(0) << " x " << Y.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } // Create unmanaged versions of the input Views. - using UnifiedXLayout = - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; - using UnifiedRVLayout = - typename KokkosKernels::Impl::GetUnifiedLayoutPreferring< - RV, UnifiedXLayout>::array_layout; + using UnifiedXLayout = typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; - typedef Kokkos::View::type, - UnifiedRVLayout, typename RV::device_type, - Kokkos::MemoryTraits> + typedef Kokkos::View::type, + UnifiedRVLayout, typename RV::device_type, Kokkos::MemoryTraits> RV_Internal; - typedef Kokkos::View< - typename std::conditional::type, - UnifiedXLayout, typename XMV::device_type, - Kokkos::MemoryTraits> + typedef Kokkos::View::type, + UnifiedXLayout, typename XMV::device_type, Kokkos::MemoryTraits> XMV_Internal; - typedef Kokkos::View< - typename std::conditional::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename YMV::device_type, Kokkos::MemoryTraits> + typedef Kokkos::View::type, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, typename YMV::device_type, + Kokkos::MemoryTraits> YMV_Internal; RV_Internal R_internal = R; XMV_Internal X_internal = X; YMV_Internal Y_internal = Y; - Impl::Dot::dot( - space, R_internal, X_internal, Y_internal); + Impl::Dot::dot(space, R_internal, X_internal, Y_internal); } /// \brief Compute the column-wise dot products of two multivectors. @@ -314,8 +273,7 @@ void dot(const execution_space& space, const RV& R, const XMV& X, const YMV& Y, /// doesn't confuse this version of dot() with the three-argument /// version of dot() in Kokkos_Blas1.hpp. template -void dot(const RV& R, const XMV& X, const YMV& Y, - typename std::enable_if::value, int>::type = 0) { +void dot(const RV& R, const XMV& X, const YMV& Y, typename std::enable_if::value, int>::type = 0) { dot(typename XMV::execution_space{}, R, X, Y); } } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_fill.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_fill.hpp index 403411f7b82a..486ee46c71c0 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_fill.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_fill.hpp @@ -33,8 +33,7 @@ namespace KokkosBlas { /// \param X [out] Output View (1-D or 2-D). /// \param val [in] Value with which to fill the entries of X. template -void fill(const execution_space& space, const XMV& X, - const typename XMV::non_const_value_type& val) { +void fill(const execution_space& space, const XMV& X, const typename XMV::non_const_value_type& val) { Kokkos::Profiling::pushRegion("KokkosBlas::fill"); Kokkos::deep_copy(space, X, val); Kokkos::Profiling::popRegion(); diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_iamax.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_iamax.hpp index cfaaaeed6353..4b69f8d507de 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_iamax.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_iamax.hpp @@ -36,43 +36,35 @@ namespace KokkosBlas { /// single value. /// Note: Returned index is 1-based for compatibility with Fortran. template , - int>::type = 0> -typename XVector::size_type iamax(const execution_space& space, - const XVector& x) { + typename std::enable_if, int>::type = 0> +typename XVector::size_type iamax(const execution_space& space, const XVector& x) { static_assert(Kokkos::is_execution_space_v, "KokkosBlas::iamax: execution_space must be a valid Kokkos " "execution space"); - static_assert(Kokkos::is_view::value, - "KokkosBlas::iamax: XVector must be a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::iamax: XVector must be accessible from execution_space"); + static_assert(Kokkos::is_view::value, "KokkosBlas::iamax: XVector must be a Kokkos::View."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::iamax: XVector must be accessible from execution_space"); static_assert(XVector::rank == 1, "KokkosBlas::iamax: " "Both Vector inputs must have rank 1."); typedef typename XVector::size_type index_type; - typedef Kokkos::View< - typename XVector::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename XVector::device_type, Kokkos::MemoryTraits > XVector_Internal; using layout_t = typename XVector_Internal::array_layout; - typedef Kokkos::View > + typedef Kokkos::View > RVector_Internal; index_type result; RVector_Internal R = RVector_Internal(&result, layout_t()); XVector_Internal X = x; - Impl::Iamax::iamax(space, - R, X); + Impl::Iamax::iamax(space, R, X); space.fence(); return result; } @@ -122,17 +114,13 @@ void iamax(const execution_space& space, const RV& R, const XMV& X, static_assert(Kokkos::is_view::value, "KokkosBlas::iamax: " "X is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::iamax: XMV must be accessible from execution_space."); - static_assert(std::is_same::value, + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::iamax: XMV must be accessible from execution_space."); + static_assert(std::is_same::value, "KokkosBlas::iamax: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); - static_assert(((RV::rank == 0) && (XMV::rank == 1)) || - ((RV::rank == 1) && (XMV::rank == 2)), + static_assert(((RV::rank == 0) && (XMV::rank == 1)) || ((RV::rank == 1) && (XMV::rank == 2)), "KokkosBlas::iamax: " "RV and XMV must either have rank 0 and 1 or rank 1 and 2."); @@ -146,41 +134,32 @@ void iamax(const execution_space& space, const RV& R, const XMV& X, if (X.extent(1) != R.extent(0)) { std::ostringstream os; os << "KokkosBlas::iamax (MV): Dimensions of R and X do not match: " - << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " - << X.extent(1); + << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " << X.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - using UnifiedXLayout = - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; - using UnifiedRVLayout = - typename KokkosKernels::Impl::GetUnifiedLayoutPreferring< - RV, UnifiedXLayout>::array_layout; + using UnifiedXLayout = typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; // Create unmanaged versions of the input Views. RV may be rank 0 or rank 1. // XMV may be rank 1 or rank 2. - typedef Kokkos::View::type, - UnifiedRVLayout, - typename std::conditional< - std::is_same::value, - Kokkos::HostSpace, typename RV::device_type>::type, - Kokkos::MemoryTraits > - RV_Internal; typedef Kokkos::View< - typename std::conditional::type, - UnifiedXLayout, typename XMV::device_type, + typename std::conditional::type, + UnifiedRVLayout, + typename std::conditional::value, + Kokkos::HostSpace, typename RV::device_type>::type, Kokkos::MemoryTraits > + RV_Internal; + typedef Kokkos::View::type, + UnifiedXLayout, typename XMV::device_type, Kokkos::MemoryTraits > XMV_Internal; RV_Internal R_internal = R; XMV_Internal X_internal = X; - Impl::Iamax::iamax( - space, R_internal, X_internal); + Impl::Iamax::iamax(space, R_internal, X_internal); } /// \brief R(j) = iamax(X(i,j)) @@ -197,8 +176,7 @@ void iamax(const execution_space& space, const RV& R, const XMV& X, /// Note for TPL cuBLAS: When TPL cuBLAS iamax is used and returns result to a /// view, RMV must be 0-D view and XMV must be 1-D view. template -void iamax(const RV& R, const XMV& X, - typename std::enable_if::value, int>::type = 0) { +void iamax(const RV& R, const XMV& X, typename std::enable_if::value, int>::type = 0) { iamax(typename XMV::execution_space{}, R, X); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_mult.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_mult.hpp index 32ede3090c2e..9d76d6a8226c 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_mult.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_mult.hpp @@ -41,75 +41,56 @@ namespace KokkosBlas { /// \param A [in] The vector to apply to X. /// \param X [in] The X vector. template -void mult(const execution_space& space, typename YMV::const_value_type& gamma, - const YMV& Y, typename AV::const_value_type& alpha, const AV& A, - const XMV& X) { +void mult(const execution_space& space, typename YMV::const_value_type& gamma, const YMV& Y, + typename AV::const_value_type& alpha, const AV& A, const XMV& X) { static_assert(Kokkos::is_execution_space_v, "KokkosBlas::mult: execution_space must be a valid Kokkos " "execution space."); static_assert(Kokkos::is_view::value, "KokkosBlas::mult: " "Y is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::mult: YMV must be accessible from execution_space."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::mult: YMV must be accessible from execution_space."); static_assert(Kokkos::is_view::value, "KokkosBlas::mult: " "A is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::mult: AV must be accessible from execution_space."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::mult: AV must be accessible from execution_space."); static_assert(Kokkos::is_view::value, "KokkosBlas::mult: " "X is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::mult: AV must be accessible from execution_space."); - static_assert(std::is_same::value, + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::mult: AV must be accessible from execution_space."); + static_assert(std::is_same::value, "KokkosBlas::mult: Y is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); - static_assert( - (XMV::rank == 1 && YMV::rank == 1) || (XMV::rank == 2 && YMV::rank == 2), - "KokkosBlas::mult: Y and X must be either both rank 1, " - "or both rank 2."); + static_assert((XMV::rank == 1 && YMV::rank == 1) || (XMV::rank == 2 && YMV::rank == 2), + "KokkosBlas::mult: Y and X must be either both rank 1, " + "or both rank 2."); static_assert(AV::rank == 1, "KokkosBlas::mult: A must have rank 1."); // Check compatibility of dimensions at run time. - if (Y.extent(0) != A.extent(0) || Y.extent(0) != X.extent(0) || - Y.extent(1) != X.extent(1)) { + if (Y.extent(0) != A.extent(0) || Y.extent(0) != X.extent(0) || Y.extent(1) != X.extent(1)) { std::ostringstream os; os << "KokkosBlas::mult: Dimensions do not match: " - << "Y: " << Y.extent(0) << " x " << Y.extent(1) << ", A: " << A.extent(0) - << " x " << A.extent(0) << ", X: " << X.extent(0) << " x " - << X.extent(1); + << "Y: " << Y.extent(0) << " x " << Y.extent(1) << ", A: " << A.extent(0) << " x " << A.extent(0) + << ", X: " << X.extent(0) << " x " << X.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - using YUnifiedLayout = - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; - using AUnifiedLayout = - typename KokkosKernels::Impl::GetUnifiedLayoutPreferring< - AV, YUnifiedLayout>::array_layout; - using XUnifiedLayout = - typename KokkosKernels::Impl::GetUnifiedLayoutPreferring< - XMV, YUnifiedLayout>::array_layout; + using YUnifiedLayout = typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using AUnifiedLayout = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; + using XUnifiedLayout = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; // Create unmanaged versions of the input Views. - typedef Kokkos::View > YMV_Internal; - typedef Kokkos::View > AV_Internal; - typedef Kokkos::View > XMV_Internal; @@ -117,8 +98,8 @@ void mult(const execution_space& space, typename YMV::const_value_type& gamma, AV_Internal A_internal = A; XMV_Internal X_internal = X; - Impl::Mult::mult( - space, gamma, Y_internal, alpha, A_internal, X_internal); + Impl::Mult::mult(space, gamma, Y_internal, alpha, + A_internal, X_internal); } /// \brief Element wise multiplication of two vectors: @@ -138,8 +119,8 @@ void mult(const execution_space& space, typename YMV::const_value_type& gamma, /// \param A [in] The vector to apply to X. /// \param X [in] The X vector. template -void mult(typename YMV::const_value_type& gamma, const YMV& Y, - typename AV::const_value_type& alpha, const AV& A, const XMV& X) { +void mult(typename YMV::const_value_type& gamma, const YMV& Y, typename AV::const_value_type& alpha, const AV& A, + const XMV& X) { mult(typename YMV::execution_space{}, gamma, Y, alpha, A, X); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm1.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm1.hpp index e9b26e6177f0..bf7119a5855a 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm1.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm1.hpp @@ -33,39 +33,30 @@ namespace KokkosBlas { /// \param x [in] Input 1-D View. /// /// \return The nrm1 product result; a single value. -template < - class execution_space, class XVector, - typename std::enable_if::value, - int>::type = 0> -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type -nrm1(const execution_space& space, const XVector& x) { - static_assert( - Kokkos::is_execution_space::value, - "KokkosBlas::nrm1: execution_space must be a Kokkos::execution_space."); - static_assert(Kokkos::is_view::value, - "KokkosBlas::nrm1: XVector must be a Kokkos::View."); +template ::value, int>::type = 0> +typename Kokkos::Details::InnerProductSpaceTraits::mag_type nrm1( + const execution_space& space, const XVector& x) { + static_assert(Kokkos::is_execution_space::value, + "KokkosBlas::nrm1: execution_space must be a Kokkos::execution_space."); + static_assert(Kokkos::is_view::value, "KokkosBlas::nrm1: XVector must be a Kokkos::View."); static_assert(XVector::rank == 1, "KokkosBlas::nrm1: " "Both Vector inputs must have rank 1."); - using mag_type = typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type; + using mag_type = typename Kokkos::Details::InnerProductSpaceTraits::mag_type; - using XVector_Internal = Kokkos::View< - typename XVector::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, Kokkos::MemoryTraits >; + using XVector_Internal = Kokkos::View::array_layout, + typename XVector::device_type, Kokkos::MemoryTraits >; using RVector_Internal = - Kokkos::View >; + Kokkos::View >; mag_type result; RVector_Internal R = RVector_Internal(&result); XVector_Internal X = x; - Impl::Nrm1::nrm1(space, - R, X); + Impl::Nrm1::nrm1(space, R, X); space.fence(); return result; } @@ -78,9 +69,8 @@ nrm1(const execution_space& space, const XVector& x) { /// /// \return The nrm1 product result; a single value. template -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type -nrm1(const XVector& x) { +typename Kokkos::Details::InnerProductSpaceTraits::mag_type nrm1( + const XVector& x) { return nrm1(typename XVector::execution_space{}, x); } @@ -109,22 +99,17 @@ void nrm1(const execution_space& space, const RV& R, const XMV& X, static_assert(Kokkos::is_view::value, "KokkosBlas::nrm1: " "X is not a Kokkos::View."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosBlas::nrm1: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); - static_assert(((RV::rank == 0) && (XMV::rank == 1)) || - ((RV::rank == 1) && (XMV::rank == 2)), + static_assert(((RV::rank == 0) && (XMV::rank == 1)) || ((RV::rank == 1) && (XMV::rank == 2)), "KokkosBlas::nrm1: " "RV and XMV must either have rank 0 and 1 or rank 1 and 2."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::nrm1: execution_space cannot access data in XMV"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::nrm1: execution_space cannot access data in XMV"); - typedef typename Kokkos::Details::InnerProductSpaceTraits< - typename XMV::non_const_value_type>::mag_type mag_type; + typedef typename Kokkos::Details::InnerProductSpaceTraits::mag_type mag_type; static_assert(std::is_same::value, "KokkosBlas::nrm1: R must have the magnitude type of" "the xvectors value_type it is an output argument " @@ -134,37 +119,28 @@ void nrm1(const execution_space& space, const RV& R, const XMV& X, if (X.extent(1) != R.extent(0)) { std::ostringstream os; os << "KokkosBlas::nrm1 (MV): Dimensions of R and X do not match: " - << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " - << X.extent(1); + << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " << X.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - using UnifiedXLayout = - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; - using UnifiedRVLayout = - typename KokkosKernels::Impl::GetUnifiedLayoutPreferring< - RV, UnifiedXLayout>::array_layout; + using UnifiedXLayout = typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; // Create unmanaged versions of the input Views. RV and XMV may be // rank 1 or rank 2. - typedef Kokkos::View::type, - UnifiedRVLayout, typename RV::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::type, + UnifiedRVLayout, typename RV::device_type, Kokkos::MemoryTraits > RV_Internal; - typedef Kokkos::View< - typename std::conditional::type, - UnifiedXLayout, typename XMV::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::type, + UnifiedXLayout, typename XMV::device_type, Kokkos::MemoryTraits > XMV_Internal; RV_Internal R_internal = R; XMV_Internal X_internal = X; - Impl::Nrm1::nrm1( - space, R_internal, X_internal); + Impl::Nrm1::nrm1(space, R_internal, X_internal); } /// \brief R(j) = nrm1(X(i,j)) @@ -182,16 +158,14 @@ void nrm1(const execution_space& space, const RV& R, const XMV& X, /// \param R [out] Output 1-D View containing the result /// \param X [in] Input 1-D View. template -void nrm1(const RV& R, const XMV& X, - typename std::enable_if::value, int>::type = 0) { +void nrm1(const RV& R, const XMV& X, typename std::enable_if::value, int>::type = 0) { nrm1(typename XMV::execution_space{}, R, X); } /// \brief Return the nrm1 of the vector x via asum (the actual blas name). template -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type -asum(const XVector& x) { +typename Kokkos::Details::InnerProductSpaceTraits::mag_type asum( + const XVector& x) { return nrm1(x); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2.hpp index 64643367a0f8..2e8558ba32ba 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2.hpp @@ -34,46 +34,36 @@ namespace KokkosBlas { /// \param x [in] Input 1-D View. /// /// \return The nrm2 product result; a single value. -template < - class execution_space, class XVector, - typename std::enable_if::value, - int>::type = 0> -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type -nrm2(const execution_space& space, const XVector& x) { +template ::value, int>::type = 0> +typename Kokkos::Details::InnerProductSpaceTraits::mag_type nrm2( + const execution_space& space, const XVector& x) { static_assert(Kokkos::is_execution_space::value, "KokkosBlas::nrm2: execution_space must be a valid" " Kokkos execution space."); - static_assert(Kokkos::is_view::value, - "KokkosBlas::nrm2: XVector must be a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::nrm2: XVector must be accessible from execution_space"); + static_assert(Kokkos::is_view::value, "KokkosBlas::nrm2: XVector must be a Kokkos::View."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::nrm2: XVector must be accessible from execution_space"); static_assert(XVector::rank == 1, "KokkosBlas::nrm2: " "XVector must have rank 1."); - typedef typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type mag_type; + typedef typename Kokkos::Details::InnerProductSpaceTraits::mag_type mag_type; - typedef Kokkos::View< - typename XVector::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename XVector::device_type, Kokkos::MemoryTraits > XVector_Internal; using layout_t = typename XVector_Internal::array_layout; - typedef Kokkos::View > + typedef Kokkos::View > RVector_Internal; mag_type result; RVector_Internal R = RVector_Internal(&result, layout_t()); XVector_Internal X = x; - Impl::Nrm2::nrm2( - space, R, X, true); + Impl::Nrm2::nrm2(space, R, X, true); space.fence(); return result; } @@ -89,9 +79,8 @@ nrm2(const execution_space& space, const XVector& x) { /// /// \return The nrm2 product result; a single value. template -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type -nrm2(const XVector& x) { +typename Kokkos::Details::InnerProductSpaceTraits::mag_type nrm2( + const XVector& x) { return nrm2(typename XVector::execution_space{}, x); } @@ -122,22 +111,17 @@ void nrm2(const execution_space& space, const RV& R, const XMV& X, static_assert(Kokkos::is_view::value, "KokkosBlas::nrm2: " "X is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::nrm2: X cannot be accessed from execution_space."); - static_assert(std::is_same::value, + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::nrm2: X cannot be accessed from execution_space."); + static_assert(std::is_same::value, "KokkosBlas::nrm2: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); - static_assert(((RV::rank == 0) && (XMV::rank == 1)) || - ((RV::rank == 1) && (XMV::rank == 2)), + static_assert(((RV::rank == 0) && (XMV::rank == 1)) || ((RV::rank == 1) && (XMV::rank == 2)), "KokkosBlas::nrm2: " "RV and XMV must either have rank 0 and 1 or rank 1 and 2."); - typedef typename Kokkos::Details::InnerProductSpaceTraits< - typename XMV::non_const_value_type>::mag_type mag_type; + typedef typename Kokkos::Details::InnerProductSpaceTraits::mag_type mag_type; static_assert(std::is_same::value, "KokkosBlas::nrm2: R must have the magnitude type of" "the xvectors value_type it is an output argument " @@ -147,33 +131,26 @@ void nrm2(const execution_space& space, const RV& R, const XMV& X, if (X.extent(1) != R.extent(0)) { std::ostringstream os; os << "KokkosBlas::nrm2 (MV): Dimensions of R and X do not match: " - << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " - << X.extent(1); + << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " << X.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - using UnifiedXLayout = - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; - using UnifiedRVLayout = - typename KokkosKernels::Impl::GetUnifiedLayoutPreferring< - RV, UnifiedXLayout>::array_layout; + using UnifiedXLayout = typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; // Create unmanaged versions of the input Views. RV and XMV may be // rank 1 or rank 2. - typedef Kokkos::View > RV_Internal; - typedef Kokkos::View > XMV_Internal; RV_Internal R_internal = R; XMV_Internal X_internal = X; - Impl::Nrm2::nrm2( - space, R_internal, X_internal, true); + Impl::Nrm2::nrm2(space, R_internal, X_internal, true); } /// \brief R(i,j) = nrm2(X(i,j)) @@ -193,8 +170,7 @@ void nrm2(const execution_space& space, const RV& R, const XMV& X, /// \param R [out] Output View containing results (rank 0 or 1). /// \param X [in] Input View (rank 1 or 2). template -void nrm2(const RV& R, const XMV& X, - typename std::enable_if::value, int>::type = 0) { +void nrm2(const RV& R, const XMV& X, typename std::enable_if::value, int>::type = 0) { nrm2(typename XMV::execution_space{}, R, X); } @@ -202,14 +178,11 @@ void nrm2(const RV& R, const XMV& X, /// Serial nrm2 /// template -KOKKOS_INLINE_FUNCTION typename Kokkos::Details::InnerProductSpaceTraits< - typename XMV::non_const_value_type>::mag_type +KOKKOS_INLINE_FUNCTION typename Kokkos::Details::InnerProductSpaceTraits::mag_type serial_nrm2(const XMV X) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBlas::serial_nrm2: XMV is not a Kokkos::View"); - static_assert(XMV::rank == 1, - "KokkosBlas::serial_nrm2: XMV must have rank 1"); + static_assert(Kokkos::is_view::value, "KokkosBlas::serial_nrm2: XMV is not a Kokkos::View"); + static_assert(XMV::rank == 1, "KokkosBlas::serial_nrm2: XMV must have rank 1"); #endif // KOKKOSKERNELS_DEBUG_LEVEL return Impl::serial_nrm2(X.extent(0), X.data(), X.stride_0()); @@ -219,45 +192,31 @@ template KOKKOS_INLINE_FUNCTION int serial_nrm2(const XMV X, const RV& R) { // Do some compile time check when debug is enabled #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "KokkosBlas::serial_nrm2: XMV is not a Kokkos::View"); - static_assert(Kokkos::is_view::value, - "KokkosBlas::serial_nrm2: RV is not a Kokkos::View"); - static_assert(std::is_same::value, + static_assert(Kokkos::is_view::value, "KokkosBlas::serial_nrm2: XMV is not a Kokkos::View"); + static_assert(Kokkos::is_view::value, "KokkosBlas::serial_nrm2: RV is not a Kokkos::View"); + static_assert(std::is_same::value, "KokkosBlas::serial_nrm2: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); - static_assert(((RV::rank == 0) && (XMV::rank == 1)) || - ((RV::rank == 1) && (XMV::rank == 2)), + static_assert(((RV::rank == 0) && (XMV::rank == 1)) || ((RV::rank == 1) && (XMV::rank == 2)), "KokkosBlas::serial_nrm2: " "RV and XMV must either have rank 0 and 1 or rank 1 and 2."); - using norm_type = typename Kokkos::Details::InnerProductSpaceTraits< - typename XMV::non_const_value_type>::mag_type; - static_assert( - std::is_same::value, - "KokkosBlas::serial_nrm2: RV must have same value_type as" - " Kokkos::ArithTraits::mag_type"); + using norm_type = typename Kokkos::Details::InnerProductSpaceTraits::mag_type; + static_assert(std::is_same::value, + "KokkosBlas::serial_nrm2: RV must have same value_type as" + " Kokkos::ArithTraits::mag_type"); if (R.extent(0) != X.extent(1)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosBlas::serial_nrm2 (MV): Dimensions of R and X do not match," - " R: %d and X: %d x %d.\n", - R.extent_int(0), X.extent_int(0), X.extent_int(1)); -#else Kokkos::printf( "KokkosBlas::serial_nrm2 (MV): Dimensions of R and X do not match," " R: %d and X: %d x %d.\n", R.extent_int(0), X.extent_int(0), X.extent_int(1)); -#endif return 1; } #endif // KOKKOSKERNELS_DEBUG_LEVEL - Impl::serial_nrm2(X.extent(0), X.extent(1), X.data(), X.stride_0(), - X.stride_1(), R.data(), R.stride_0()); + Impl::serial_nrm2(X.extent(0), X.extent(1), X.data(), X.stride_0(), X.stride_1(), R.data(), R.stride_0()); return 0; } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2_squared.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2_squared.hpp index c065efb290bc..748ece36635d 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2_squared.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2_squared.hpp @@ -33,46 +33,36 @@ namespace KokkosBlas { /// \param x [in] Input 1-D View. /// /// \return The nrm2 product result; a single value. -template < - class execution_space, class XVector, - typename std::enable_if::value, - int>::type = 0> -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type -nrm2_squared(const execution_space& space, const XVector& x) { +template ::value, int>::type = 0> +typename Kokkos::Details::InnerProductSpaceTraits::mag_type nrm2_squared( + const execution_space& space, const XVector& x) { static_assert(Kokkos::is_execution_space::value, "KokkosBlas::nrm2_squared: execution_space must be a valid" " Kokkos execution space"); - static_assert(Kokkos::is_view::value, - "KokkosBlas::nrm2_squared: XVector must be a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::nrm2_squared: XVector must be accessible" - " from execution_space"); + static_assert(Kokkos::is_view::value, "KokkosBlas::nrm2_squared: XVector must be a Kokkos::View."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::nrm2_squared: XVector must be accessible" + " from execution_space"); static_assert(XVector::rank == 1, "KokkosBlas::nrm2_squared: " "Both Vector inputs must have rank 1."); - typedef typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type mag_type; + typedef typename Kokkos::Details::InnerProductSpaceTraits::mag_type mag_type; - typedef Kokkos::View< - typename XVector::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename XVector::device_type, Kokkos::MemoryTraits > XVector_Internal; - typedef Kokkos::View > + typedef Kokkos::View > RVector_Internal; mag_type result; RVector_Internal R = RVector_Internal(&result); XVector_Internal X = x; - Impl::Nrm2::nrm2( - space, R, X, false); + Impl::Nrm2::nrm2(space, R, X, false); space.fence(); return result; } @@ -88,9 +78,8 @@ nrm2_squared(const execution_space& space, const XVector& x) { /// /// \return The nrm2 product result; a single value. template -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type -nrm2_squared(const XVector& x) { +typename Kokkos::Details::InnerProductSpaceTraits::mag_type nrm2_squared( + const XVector& x) { return nrm2_squared(typename XVector::execution_space{}, x); } @@ -111,9 +100,8 @@ nrm2_squared(const XVector& x) { /// \param R [in] Output View (rank 0 or 1) that holds the result. /// \param X [in] Input View (rank 1 or 2). template -void nrm2_squared( - const execution_space& space, const RV& R, const XMV& X, - typename std::enable_if::value, int>::type = 0) { +void nrm2_squared(const execution_space& space, const RV& R, const XMV& X, + typename std::enable_if::value, int>::type = 0) { static_assert(Kokkos::is_execution_space::value, "KokkosBlas::nrm2_squared: execution_space must be a valid" " Kokkos execution space"); @@ -123,22 +111,17 @@ void nrm2_squared( static_assert(Kokkos::is_view::value, "KokkosBlas::nrm2_squared: " "X is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::nrm2_squared: XVector must be accessible" - " from execution_space"); - static_assert(std::is_same::value, + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::nrm2_squared: XVector must be accessible" + " from execution_space"); + static_assert(std::is_same::value, "KokkosBlas::nrm2_squared: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); - static_assert(((RV::rank == 0) && (XMV::rank == 1)) || - ((RV::rank == 1) && (XMV::rank == 2)), + static_assert(((RV::rank == 0) && (XMV::rank == 1)) || ((RV::rank == 1) && (XMV::rank == 2)), "KokkosBlas::nrm2_squared: " "RV and XMV must either have rank 0 and 1 or rank 1 and 2."); - typedef typename Kokkos::Details::InnerProductSpaceTraits< - typename XMV::non_const_value_type>::mag_type mag_type; + typedef typename Kokkos::Details::InnerProductSpaceTraits::mag_type mag_type; static_assert(std::is_same::value, "KokkosBlas::nrm2: R must have the magnitude type of" "the xvectors value_type it is an output argument " @@ -148,33 +131,26 @@ void nrm2_squared( if (X.extent(1) != R.extent(0)) { std::ostringstream os; os << "KokkosBlas::nrm2 (MV): Dimensions of R and X do not match: " - << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " - << X.extent(1); + << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " << X.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - using UnifiedXLayout = - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; - using UnifiedRVLayout = - typename KokkosKernels::Impl::GetUnifiedLayoutPreferring< - RV, UnifiedXLayout>::array_layout; + using UnifiedXLayout = typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; // Create unmanaged versions of the input Views. RV and XMV may be // rank 1 or rank 2. - typedef Kokkos::View > RV_Internal; - typedef Kokkos::View > XMV_Internal; RV_Internal R_internal = R; XMV_Internal X_internal = X; - Impl::Nrm2::nrm2( - space, R_internal, X_internal, false); + Impl::Nrm2::nrm2(space, R_internal, X_internal, false); } /// \brief R(i,j) = nrm2(X(i,j)) @@ -190,9 +166,7 @@ void nrm2_squared( /// the same rank as RMV, and its entries must be assignable to /// those of RMV. template -void nrm2_squared( - const RV& R, const XMV& X, - typename std::enable_if::value, int>::type = 0) { +void nrm2_squared(const RV& R, const XMV& X, typename std::enable_if::value, int>::type = 0) { nrm2_squared(typename XMV::execution_space{}, R, X); } } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2w.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2w.hpp index c5eaa0621be4..5fea0c783cee 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2w.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2w.hpp @@ -36,44 +36,35 @@ namespace KokkosBlas { /// /// \return The nrm2w product result; a single value. template -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type -nrm2w(const execution_space& space, const XVector& x, const XVector& w, - typename std::enable_if< - Kokkos::is_execution_space::value, int>::type = 0) { +typename Kokkos::Details::InnerProductSpaceTraits::mag_type nrm2w( + const execution_space& space, const XVector& x, const XVector& w, + typename std::enable_if::value, int>::type = 0) { static_assert(Kokkos::is_execution_space::value, "KokkosBlas::nrm2w: execution_space must be a valid" " Kokkos execution space."); - static_assert(Kokkos::is_view::value, - "KokkosBlas::nrm2w: XVector must be a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::nrm2w: XVector must be accessible from execution_space"); + static_assert(Kokkos::is_view::value, "KokkosBlas::nrm2w: XVector must be a Kokkos::View."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::nrm2w: XVector must be accessible from execution_space"); static_assert(XVector::rank == 1, "KokkosBlas::nrm2w: " "Both Vector inputs must have rank 1."); - using mag_type = typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type; + using mag_type = typename Kokkos::Details::InnerProductSpaceTraits::mag_type; - using XVector_Internal = Kokkos::View< - typename XVector::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, Kokkos::MemoryTraits >; + using XVector_Internal = Kokkos::View::array_layout, + typename XVector::device_type, Kokkos::MemoryTraits >; using layout_t = typename XVector_Internal::array_layout; using RVector_Internal = - Kokkos::View >; + Kokkos::View >; mag_type result; RVector_Internal R = RVector_Internal(&result, layout_t()); XVector_Internal X = x; XVector_Internal W = w; - Impl::Nrm2w::nrm2w( - space, R, X, W, true); + Impl::Nrm2w::nrm2w(space, R, X, W, true); space.fence(); return result; } @@ -90,9 +81,8 @@ nrm2w(const execution_space& space, const XVector& x, const XVector& w, /// /// \return The nrm2w product result; a single value. template -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type -nrm2w(const XVector& x, const XVector& w) { +typename Kokkos::Details::InnerProductSpaceTraits::mag_type nrm2w( + const XVector& x, const XVector& w) { return nrm2w(typename XVector::execution_space{}, x, w); } @@ -114,8 +104,7 @@ nrm2w(const XVector& x, const XVector& w) { /// \param X [in] Input View (rank 1 or 2). /// \param W [in] Input View (rank 1 or 2). template -void nrm2w(const execution_space& space, const RV& R, const XMV& X, - const XMV& W, +void nrm2w(const execution_space& space, const RV& R, const XMV& X, const XMV& W, typename std::enable_if::value, int>::type = 0) { static_assert(Kokkos::is_execution_space::value, "KokkosBlas::nrm2w: execution_space must be a valid" @@ -126,22 +115,17 @@ void nrm2w(const execution_space& space, const RV& R, const XMV& X, static_assert(Kokkos::is_view::value, "KokkosBlas::nrm2w: " "X is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::nrm2w: XMV must be accessible from execution_space"); - static_assert(std::is_same::value, + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::nrm2w: XMV must be accessible from execution_space"); + static_assert(std::is_same::value, "KokkosBlas::nrm2w: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); - static_assert(((RV::rank == 0) && (XMV::rank == 1)) || - ((RV::rank == 1) && (XMV::rank == 2)), + static_assert(((RV::rank == 0) && (XMV::rank == 1)) || ((RV::rank == 1) && (XMV::rank == 2)), "KokkosBlas::nrm2w: " "RV and XMV must either have rank 0 and 1 or rank 1 and 2."); - typedef typename Kokkos::Details::InnerProductSpaceTraits< - typename XMV::non_const_value_type>::mag_type mag_type; + typedef typename Kokkos::Details::InnerProductSpaceTraits::mag_type mag_type; static_assert(std::is_same::value, "KokkosBlas::nrm2w: R must have the magnitude type of" "the xvectors value_type it is an output argument " @@ -151,25 +135,19 @@ void nrm2w(const execution_space& space, const RV& R, const XMV& X, if (X.extent(1) != R.extent(0)) { std::ostringstream os; os << "KokkosBlas::nrm2w (MV): Dimensions of R and X do not match: " - << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " - << X.extent(1); + << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " << X.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - using UnifiedXLayout = - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; - using UnifiedRVLayout = - typename KokkosKernels::Impl::GetUnifiedLayoutPreferring< - RV, UnifiedXLayout>::array_layout; + using UnifiedXLayout = typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; // Create unmanaged versions of the input Views. RV and XMV may be // rank 1 or rank 2. - typedef Kokkos::View > RV_Internal; - typedef Kokkos::View > XMV_Internal; @@ -177,8 +155,7 @@ void nrm2w(const execution_space& space, const RV& R, const XMV& X, XMV_Internal X_internal = X; XMV_Internal W_internal = W; - Impl::Nrm2w::nrm2w( - space, R_internal, X_internal, W_internal, true); + Impl::Nrm2w::nrm2w(space, R_internal, X_internal, W_internal, true); } /// \brief R(i,j) = nrm2w(X(i,j)) diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2w_squared.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2w_squared.hpp index a1fe10bf1ee8..375a55c2949d 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2w_squared.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_nrm2w_squared.hpp @@ -34,49 +34,38 @@ namespace KokkosBlas { /// \param w [in] Input weights (1-D View). /// /// \return The nrm2w product result; a single value. -template < - class execution_space, class XVector, - typename std::enable_if::value, - int>::type = 0> -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type -nrm2w_squared(const execution_space& space, const XVector& x, - const XVector& w) { +template ::value, int>::type = 0> +typename Kokkos::Details::InnerProductSpaceTraits::mag_type nrm2w_squared( + const execution_space& space, const XVector& x, const XVector& w) { static_assert(Kokkos::is_execution_space::value, "KokkosBlas::nrm2w_squared: execution_space must be a valid " "Kokkos execution space."); - static_assert(Kokkos::is_view::value, - "KokkosBlas::nrm2w_squared: XVector must be a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::nrm2w_squared: XVector must be accessible from " - "execution_space."); + static_assert(Kokkos::is_view::value, "KokkosBlas::nrm2w_squared: XVector must be a Kokkos::View."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::nrm2w_squared: XVector must be accessible from " + "execution_space."); static_assert(XVector::rank == 1, "KokkosBlas::nrm2w_squared: " "Both Vector inputs must have rank 1."); - using mag_type = typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type; + using mag_type = typename Kokkos::Details::InnerProductSpaceTraits::mag_type; - using XVector_Internal = Kokkos::View< - typename XVector::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, Kokkos::MemoryTraits >; + using XVector_Internal = Kokkos::View::array_layout, + typename XVector::device_type, Kokkos::MemoryTraits >; using layout_t = typename XVector_Internal::array_layout; using RVector_Internal = - Kokkos::View >; + Kokkos::View >; mag_type result; RVector_Internal R = RVector_Internal(&result, layout_t()); XVector_Internal X = x; XVector_Internal W = w; - Impl::Nrm2w::nrm2w( - space, R, X, W, false); + Impl::Nrm2w::nrm2w(space, R, X, W, false); space.fence(); return result; } @@ -93,9 +82,8 @@ nrm2w_squared(const execution_space& space, const XVector& x, /// /// \return The nrm2w product result; a single value. template -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type -nrm2w_squared(const XVector& x, const XVector& w) { +typename Kokkos::Details::InnerProductSpaceTraits::mag_type nrm2w_squared( + const XVector& x, const XVector& w) { return nrm2w_squared(typename XVector::execution_space(), x, w); } @@ -117,9 +105,8 @@ nrm2w_squared(const XVector& x, const XVector& w) { /// \param X [in] Input View (rank 1 or 2). /// \param W [in] Input View (rank 1 or 2). template -void nrm2w_squared( - const execution_space& space, const RV& R, const XMV& X, const XMV& W, - typename std::enable_if::value, int>::type = 0) { +void nrm2w_squared(const execution_space& space, const RV& R, const XMV& X, const XMV& W, + typename std::enable_if::value, int>::type = 0) { static_assert(Kokkos::is_execution_space::value, "KokkosBlas::nrm2w_squared: execution_space must be a valid " "Kokkos execution space."); @@ -129,22 +116,17 @@ void nrm2w_squared( static_assert(Kokkos::is_view::value, "KokkosBlas::nrm2w_squared: " "X is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::nrm2w_squared: XVector must be accessible from " - "execution_space."); - static_assert(std::is_same::value, + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::nrm2w_squared: XVector must be accessible from " + "execution_space."); + static_assert(std::is_same::value, "KokkosBlas::nrm2w_squared: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); - static_assert(((RV::rank == 0) && (XMV::rank == 1)) || - ((RV::rank == 1) && (XMV::rank == 2)), + static_assert(((RV::rank == 0) && (XMV::rank == 1)) || ((RV::rank == 1) && (XMV::rank == 2)), "KokkosBlas::nrm2w_squared: " "RV and XMV must either have rank 0 and 1 or rank 1 and 2."); - using mag_type = typename Kokkos::Details::InnerProductSpaceTraits< - typename XMV::non_const_value_type>::mag_type; + using mag_type = typename Kokkos::Details::InnerProductSpaceTraits::mag_type; static_assert(std::is_same::value, "KokkosBlas::nrm2w: R must have the magnitude type of" "the xvectors value_type it is an output argument " @@ -154,32 +136,25 @@ void nrm2w_squared( if (X.extent(1) != R.extent(0)) { std::ostringstream os; os << "KokkosBlas::nrm2w (MV): Dimensions of R and X do not match: " - << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " - << X.extent(1); + << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " << X.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - using UnifiedXLayout = - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; - using UnifiedRVLayout = - typename KokkosKernels::Impl::GetUnifiedLayoutPreferring< - RV, UnifiedXLayout>::array_layout; + using UnifiedXLayout = typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; // Create unmanaged versions of the input Views. RV and XMV may be // rank 1 or rank 2. - using RV_Internal = Kokkos::View >; - using XMV_Internal = Kokkos::View >; RV_Internal R_internal = R; XMV_Internal X_internal = X; XMV_Internal W_internal = W; - Impl::Nrm2w::nrm2w( - space, R_internal, X_internal, W_internal, false); + Impl::Nrm2w::nrm2w(space, R_internal, X_internal, W_internal, false); } /// \brief R(i,j) = nrm2w(X(i,j)) @@ -199,9 +174,8 @@ void nrm2w_squared( /// \param X [in] Input View (rank 1 or 2). /// \param W [in] Input View (rank 1 or 2). template -void nrm2w_squared( - const RV& R, const XMV& X, const XMV& W, - typename std::enable_if::value, int>::type = 0) { +void nrm2w_squared(const RV& R, const XMV& X, const XMV& W, + typename std::enable_if::value, int>::type = 0) { nrm2w_squared(typename XMV::execution_space{}, R, X, W); } } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_nrminf.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_nrminf.hpp index c6f923aefe9b..ec3a98fa957d 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_nrminf.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_nrminf.hpp @@ -33,39 +33,31 @@ namespace KokkosBlas { /// \param x [in] Input 1-D View. /// /// \return The nrminf product result; a single value. -template < - class execution_space, class XVector, - typename std::enable_if::value, - int>::type = 0> -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type -nrminf(const execution_space& space, const XVector& x) { - static_assert(Kokkos::is_view::value, - "KokkosBlas::nrminf: XVector must be a Kokkos::View."); +template ::value, int>::type = 0> +typename Kokkos::Details::InnerProductSpaceTraits::mag_type nrminf( + const execution_space& space, const XVector& x) { + static_assert(Kokkos::is_view::value, "KokkosBlas::nrminf: XVector must be a Kokkos::View."); static_assert(XVector::rank == 1, "KokkosBlas::nrminf: " "Both Vector inputs must have rank 1."); - typedef typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type mag_type; + typedef typename Kokkos::Details::InnerProductSpaceTraits::mag_type mag_type; - typedef Kokkos::View< - typename XVector::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename XVector::device_type, Kokkos::MemoryTraits > XVector_Internal; using layout_t = typename XVector_Internal::array_layout; - typedef Kokkos::View > + typedef Kokkos::View > RVector_Internal; mag_type result; RVector_Internal R = RVector_Internal(&result, layout_t()); XVector_Internal X = x; - Impl::NrmInf::nrminf( - space, R, X); + Impl::NrmInf::nrminf(space, R, X); space.fence(); return result; } @@ -78,9 +70,8 @@ nrminf(const execution_space& space, const XVector& x) { /// /// \return The nrminf product result; a single value. template -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type -nrminf(const XVector& x) { +typename Kokkos::Details::InnerProductSpaceTraits::mag_type nrminf( + const XVector& x) { return nrminf(typename XVector::execution_space{}, x); } @@ -95,9 +86,8 @@ nrminf(const XVector& x) { /// the same rank as RMV, and its entries must be assignable to /// those of RMV. template -void nrminf( - const execution_space& space, const RV& R, const XMV& X, - typename std::enable_if::value, int>::type = 0) { +void nrminf(const execution_space& space, const RV& R, const XMV& X, + typename std::enable_if::value, int>::type = 0) { static_assert(Kokkos::is_execution_space::value, "KokkosBlas::nrminf: space is not an execution space instance"); static_assert(Kokkos::is_view::value, @@ -106,22 +96,17 @@ void nrminf( static_assert(Kokkos::is_view::value, "KokkosBlas::nrminf: " "X is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::nrminf: X is not accessible from execution_space"); - static_assert(std::is_same::value, + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::nrminf: X is not accessible from execution_space"); + static_assert(std::is_same::value, "KokkosBlas::nrminf: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); - static_assert(((RV::rank == 0) && (XMV::rank == 1)) || - ((RV::rank == 1) && (XMV::rank == 2)), + static_assert(((RV::rank == 0) && (XMV::rank == 1)) || ((RV::rank == 1) && (XMV::rank == 2)), "KokkosBlas::nrminf: " "RV and XMV must either have rank 0 and 1 or rank 1 and 2."); - typedef typename Kokkos::Details::InnerProductSpaceTraits< - typename XMV::non_const_value_type>::mag_type mag_type; + typedef typename Kokkos::Details::InnerProductSpaceTraits::mag_type mag_type; static_assert(std::is_same::value, "KokkosBlas::nrminf: R must have the magnitude type of" "the xvectors value_type it is an output argument " @@ -131,37 +116,28 @@ void nrminf( if (X.extent(1) != R.extent(0)) { std::ostringstream os; os << "KokkosBlas::nrminf (MV): Dimensions of R and X do not match: " - << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " - << X.extent(1); + << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " << X.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - using UnifiedXLayout = - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; - using UnifiedRVLayout = - typename KokkosKernels::Impl::GetUnifiedLayoutPreferring< - RV, UnifiedXLayout>::array_layout; + using UnifiedXLayout = typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; // Create unmanaged versions of the input Views. RV and XMV may be // rank 1 or rank 2. - typedef Kokkos::View::type, - UnifiedRVLayout, typename RV::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::type, + UnifiedRVLayout, typename RV::device_type, Kokkos::MemoryTraits > RV_Internal; - typedef Kokkos::View< - typename std::conditional::type, - UnifiedXLayout, typename XMV::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::type, + UnifiedXLayout, typename XMV::device_type, Kokkos::MemoryTraits > XMV_Internal; RV_Internal R_internal = R; XMV_Internal X_internal = X; - Impl::NrmInf::nrminf( - space, R_internal, X_internal); + Impl::NrmInf::nrminf(space, R_internal, X_internal); } /// \brief R(j) = nrminf(X(i,j)) @@ -174,9 +150,7 @@ void nrminf( /// the same rank as RMV, and its entries must be assignable to /// those of RMV. template -void nrminf( - const RV& R, const XMV& X, - typename std::enable_if::value, int>::type = 0) { +void nrminf(const RV& R, const XMV& X, typename std::enable_if::value, int>::type = 0) { nrminf(typename XMV::execution_space{}, R, X); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_reciprocal.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_reciprocal.hpp index ef73d268281a..477c885e5eca 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_reciprocal.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_reciprocal.hpp @@ -47,19 +47,14 @@ void reciprocal(const execution_space& space, const RMV& R, const XMV& X) { static_assert(Kokkos::is_view::value, "KokkosBlas::reciprocal: " "R is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::reciprocal: RMV must be accessible from execution_space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::reciprocal: RMV must be accessible from execution_space"); static_assert(Kokkos::is_view::value, "KokkosBlas::reciprocal: " "X is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::reciprocal: XMV must be accessible from execution_space"); - static_assert(std::is_same::value, + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::reciprocal: XMV must be accessible from execution_space"); + static_assert(std::is_same::value, "KokkosBlas::reciprocal: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -74,32 +69,27 @@ void reciprocal(const execution_space& space, const RMV& R, const XMV& X) { if (X.extent(0) != R.extent(0) || X.extent(1) != R.extent(1)) { std::ostringstream os; os << "KokkosBlas::reciprocal (MV): Dimensions of R and X do not match: " - << "R: " << R.extent(0) << " x " << R.extent(1) << ", X: " << X.extent(0) - << " x " << X.extent(1); + << "R: " << R.extent(0) << " x " << R.extent(1) << ", X: " << X.extent(0) << " x " << X.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } // Create unmanaged versions of the input Views. RMV and XMV may be // rank 1 or rank 2. - typedef Kokkos::View< - typename std::conditional::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename RMV::device_type, Kokkos::MemoryTraits > + typedef Kokkos::View::type, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, typename RMV::device_type, + Kokkos::MemoryTraits > RMV_Internal; - typedef Kokkos::View< - typename std::conditional::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XMV::device_type, Kokkos::MemoryTraits > + typedef Kokkos::View::type, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, typename XMV::device_type, + Kokkos::MemoryTraits > XMV_Internal; RMV_Internal R_internal = R; XMV_Internal X_internal = X; - Impl::Reciprocal::reciprocal( - space, R_internal, X_internal); + Impl::Reciprocal::reciprocal(space, R_internal, X_internal); } /// \brief R(i,j) = reciprocal(X(i,j)) diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_rot.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_rot.hpp index d848617b6e1f..7bc3215604ba 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_rot.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_rot.hpp @@ -22,58 +22,45 @@ namespace KokkosBlas { template -void rot(execution_space const& space, VectorView const& X, VectorView const& Y, - ScalarView const& c, ScalarView const& s) { +void rot(execution_space const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, + ScalarView const& s) { static_assert(Kokkos::is_execution_space::value, "rot: execution_space template parameter is not a Kokkos " "execution space."); - static_assert(VectorView::rank == 1, - "rot: VectorView template parameter needs to be a rank 1 view"); - static_assert(ScalarView::rank == 0, - "rot: ScalarView template parameter needs to be a rank 0 view"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "rot: VectorView template parameter memory space needs to be accessible " - "from " - "execution_space template parameter"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "rot: VectorView template parameter memory space needs to be accessible " - "from " - "execution_space template parameter"); - static_assert( - std::is_same::value, - "rot: VectorView template parameter needs to store non-const values"); + static_assert(VectorView::rank == 1, "rot: VectorView template parameter needs to be a rank 1 view"); + static_assert(ScalarView::rank == 0, "rot: ScalarView template parameter needs to be a rank 0 view"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "rot: VectorView template parameter memory space needs to be accessible " + "from " + "execution_space template parameter"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "rot: VectorView template parameter memory space needs to be accessible " + "from " + "execution_space template parameter"); + static_assert(std::is_same::value, + "rot: VectorView template parameter needs to store non-const values"); - using VectorView_Internal = Kokkos::View< - typename VectorView::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - Kokkos::Device, - Kokkos::MemoryTraits>; + using VectorView_Internal = Kokkos::View::array_layout, + Kokkos::Device, + Kokkos::MemoryTraits>; - using ScalarView_Internal = Kokkos::View< - typename ScalarView::non_const_value_type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - Kokkos::Device, - Kokkos::MemoryTraits>; + using ScalarView_Internal = Kokkos::View::array_layout, + Kokkos::Device, + Kokkos::MemoryTraits>; VectorView_Internal X_(X), Y_(Y); ScalarView_Internal c_(c), s_(s); Kokkos::Profiling::pushRegion("KokkosBlas::rot"); - Impl::Rot::rot( - space, X_, Y_, c_, s_); + Impl::Rot::rot(space, X_, Y_, c_, s_); Kokkos::Profiling::popRegion(); } template -void rot(VectorView const& X, VectorView const& Y, ScalarView const& c, - ScalarView const& s) { - const typename VectorView::execution_space space = - typename VectorView::execution_space(); +void rot(VectorView const& X, VectorView const& Y, ScalarView const& c, ScalarView const& s) { + const typename VectorView::execution_space space = typename VectorView::execution_space(); rot(space, X, Y, c, s); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_rotg.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_rotg.hpp index 3b66ae0115ba..1927bc2df9f5 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_rotg.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_rotg.hpp @@ -35,40 +35,28 @@ namespace KokkosBlas { /// rotation /// \param s [out] sine value associated with the rotation template -void rotg(execution_space const& space, SViewType const& a, SViewType const& b, - MViewType const& c, SViewType const& s) { - static_assert(SViewType::rank == 0, - "rotg: the inputs need to be rank 0 views"); - static_assert(MViewType::rank == 0, - "rotg: the inputs need to be rank 0 views"); - static_assert( - !Kokkos::ArithTraits::is_complex); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "rotg: execution_space cannot access data in SViewType"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "rotg: execution_space cannot access data in MViewType"); +void rotg(execution_space const& space, SViewType const& a, SViewType const& b, MViewType const& c, + SViewType const& s) { + static_assert(SViewType::rank == 0, "rotg: the inputs need to be rank 0 views"); + static_assert(MViewType::rank == 0, "rotg: the inputs need to be rank 0 views"); + static_assert(!Kokkos::ArithTraits::is_complex); + static_assert(Kokkos::SpaceAccessibility::accessible, + "rotg: execution_space cannot access data in SViewType"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "rotg: execution_space cannot access data in MViewType"); using SView_Internal = Kokkos::View< - typename SViewType::value_type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - Kokkos::Device, - Kokkos::MemoryTraits>; + typename SViewType::value_type, typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + Kokkos::Device, Kokkos::MemoryTraits>; using MView_Internal = Kokkos::View< - typename MViewType::value_type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - Kokkos::Device, - Kokkos::MemoryTraits>; + typename MViewType::value_type, typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + Kokkos::Device, Kokkos::MemoryTraits>; SView_Internal a_(a), b_(b), s_(s); MView_Internal c_(c); Kokkos::Profiling::pushRegion("KokkosBlas::rotg"); - Impl::Rotg::rotg(space, a, b, - c, s); + Impl::Rotg::rotg(space, a, b, c, s); Kokkos::Profiling::popRegion(); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_rotm.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_rotm.hpp index 077d3350fede..6f5442e931cb 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_rotm.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_rotm.hpp @@ -36,62 +36,45 @@ namespace KokkosBlas { /// \param param [in] output of rotmg contains rotation coefficients /// template -void rotm(execution_space const& space, VectorView const& X, - VectorView const& Y, ParamView const& param) { +void rotm(execution_space const& space, VectorView const& X, VectorView const& Y, ParamView const& param) { static_assert(Kokkos::is_execution_space::value, "rotm: execution_space template parameter is not a Kokkos " "execution space."); - static_assert( - VectorView::rank == 1, - "rotm: VectorView template parameter needs to be a rank 1 view"); - static_assert(ParamView::rank == 1, - "rotm: ParamView template parameter needs to be a rank 1 view"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "rotm: VectorView template parameter memory space needs to be accessible " - "from execution_space template parameter"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "rotm: ScalarView template parameter memory space needs to be accessible " - "from execution_space template parameter"); - static_assert( - std::is_same::value, - "rotm: VectorView template parameter needs to store non-const values"); - static_assert( - !Kokkos::ArithTraits::is_complex, - "rotm: VectorView template parameter cannot use complex value_type"); - static_assert( - !Kokkos::ArithTraits::is_complex, - "rotm: ParamView template parameter cannot use complex value_type"); + static_assert(VectorView::rank == 1, "rotm: VectorView template parameter needs to be a rank 1 view"); + static_assert(ParamView::rank == 1, "rotm: ParamView template parameter needs to be a rank 1 view"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "rotm: VectorView template parameter memory space needs to be accessible " + "from execution_space template parameter"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "rotm: ScalarView template parameter memory space needs to be accessible " + "from execution_space template parameter"); + static_assert(std::is_same::value, + "rotm: VectorView template parameter needs to store non-const values"); + static_assert(!Kokkos::ArithTraits::is_complex, + "rotm: VectorView template parameter cannot use complex value_type"); + static_assert(!Kokkos::ArithTraits::is_complex, + "rotm: ParamView template parameter cannot use complex value_type"); - using VectorView_Internal = Kokkos::View< - typename VectorView::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - Kokkos::Device, - Kokkos::MemoryTraits>; + using VectorView_Internal = Kokkos::View::array_layout, + Kokkos::Device, + Kokkos::MemoryTraits>; using ParamView_Internal = Kokkos::View< - typename ParamView::const_value_type[5], - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - Kokkos::Device, - Kokkos::MemoryTraits>; + typename ParamView::const_value_type[5], typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + Kokkos::Device, Kokkos::MemoryTraits>; VectorView_Internal X_(X), Y_(Y); ParamView_Internal param_(param); Kokkos::Profiling::pushRegion("KokkosBlas::rotm"); - Impl::Rotm::rotm( - space, X_, Y_, param_); + Impl::Rotm::rotm(space, X_, Y_, param_); Kokkos::Profiling::popRegion(); } template void rotm(VectorView const& X, VectorView const& Y, ParamView const& param) { - const typename VectorView::execution_space space = - typename VectorView::execution_space(); + const typename VectorView::execution_space space = typename VectorView::execution_space(); rotm(space, X, Y, param); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_rotmg.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_rotmg.hpp index 723b0eac1ada..a6c629f98705 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_rotmg.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_rotmg.hpp @@ -39,46 +39,39 @@ namespace KokkosBlas { /// \param param [out] /// template -void rotmg(execution_space const& space, DXView const& d1, DXView const& d2, - DXView const& x1, YView const& y1, PView const& param) { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "rotmg: execution_space cannot access data in DXView"); +void rotmg(execution_space const& space, DXView const& d1, DXView const& d2, DXView const& x1, YView const& y1, + PView const& param) { + static_assert(Kokkos::SpaceAccessibility::accessible, + "rotmg: execution_space cannot access data in DXView"); - using DXView_Internal = Kokkos::View< - typename DXView::value_type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - Kokkos::Device, - Kokkos::MemoryTraits>; + using DXView_Internal = + Kokkos::View::array_layout, + Kokkos::Device, + Kokkos::MemoryTraits>; - using YView_Internal = Kokkos::View< - typename YView::value_type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - Kokkos::Device, - Kokkos::MemoryTraits>; + using YView_Internal = + Kokkos::View::array_layout, + Kokkos::Device, + Kokkos::MemoryTraits>; - using PView_Internal = Kokkos::View< - typename PView::value_type[5], - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - Kokkos::Device, - Kokkos::MemoryTraits>; + using PView_Internal = + Kokkos::View::array_layout, + Kokkos::Device, + Kokkos::MemoryTraits>; DXView_Internal d1_(d1), d2_(d2), x1_(x1); YView_Internal y1_(y1); PView_Internal param_(param); Kokkos::Profiling::pushRegion("KokkosBlas::rotmg"); - Impl::Rotmg::rotmg(space, d1_, d2_, x1_, y1_, param_); + Impl::Rotmg::rotmg(space, d1_, d2_, x1_, y1_, + param_); Kokkos::Profiling::popRegion(); } template -void rotmg(DXView const& d1, DXView const& d2, DXView const& x1, - YView const& y1, PView const& param) { - const typename PView::execution_space space = - typename PView::execution_space(); +void rotmg(DXView const& d1, DXView const& d2, DXView const& x1, YView const& y1, PView const& param) { + const typename PView::execution_space space = typename PView::execution_space(); rotmg(space, d1, d2, x1, y1, param); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_scal.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_scal.hpp index 39c197f352b7..561c5050358f 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_scal.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_scal.hpp @@ -44,31 +44,23 @@ namespace KokkosBlas { /// \param a [in] view of type AV, scaling parameter for X. /// \param X [in] input view of type XMV. template -void scal(const execution_space& space, const RMV& R, const AV& a, - const XMV& X) { +void scal(const execution_space& space, const RMV& R, const AV& a, const XMV& X) { static_assert(Kokkos::is_execution_space_v, "KokkosBlas::scal: execution_space must be a valid Kokkos " "execution space"); static_assert(Kokkos::is_view::value, "KokkosBlas::scal: " "R is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::scal: RMV must be accessible from execution_space."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::scal: RMV must be accessible from execution_space."); static_assert(Kokkos::is_view::value, "KokkosBlas::scal: " "X is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::scal: XMV must be accessible from execution_space"); - static_assert( - Kokkos::SpaceAccessibility::assignable, - "KokkosBlas::scal: XMV must be assignable to RMV"); - static_assert(std::is_same::value, + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::scal: XMV must be accessible from execution_space"); + static_assert(Kokkos::SpaceAccessibility::assignable, + "KokkosBlas::scal: XMV must be assignable to RMV"); + static_assert(std::is_same::value, "KokkosBlas::scal: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -83,36 +75,27 @@ void scal(const execution_space& space, const RMV& R, const AV& a, if (X.extent(0) != R.extent(0) || X.extent(1) != R.extent(1)) { std::ostringstream os; os << "KokkosBlas::scal: Dimensions of R and X do not match: " - << "R: " << R.extent(0) << " x " << R.extent(1) << ", X: " << X.extent(0) - << " x " << X.extent(1); + << "R: " << R.extent(0) << " x " << R.extent(1) << ", X: " << X.extent(0) << " x " << X.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - using UnifiedRLayout = - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; - using UnifiedXLayout = - typename KokkosKernels::Impl::GetUnifiedLayoutPreferring< - XMV, UnifiedRLayout>::array_layout; + using UnifiedRLayout = typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedXLayout = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; // Create unmanaged versions of the input Views. RMV and XMV may be // rank 1 or rank 2. AV may be either a rank-1 View, or a scalar // value. - using RMV_Internal = Kokkos::View >; - using XMV_Internal = Kokkos::View >; - using AV_Internal = - typename KokkosKernels::Impl::GetUnifiedScalarViewType::type; + using AV_Internal = typename KokkosKernels::Impl::GetUnifiedScalarViewType::type; RMV_Internal R_internal = R; AV_Internal a_internal = a; XMV_Internal X_internal = X; - Impl::Scal::scal( - space, R_internal, a_internal, X_internal); + Impl::Scal::scal(space, R_internal, a_internal, X_internal); } /// \brief Computes R := alpha*X @@ -140,10 +123,8 @@ void scal(const RMV& R, const AV& a, const XMV& X) { struct SerialScale { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType& A) { - return Impl::SerialScaleInternal::invoke( - A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_1()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType& A) { + return Impl::SerialScaleInternal::invoke(A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_1()); } }; @@ -154,11 +135,8 @@ struct SerialScale { template struct TeamScale { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, - const ScalarType alpha, - const AViewType& A) { - return Impl::TeamScaleInternal::invoke(member, A.extent(0), A.extent(1), - alpha, A.data(), A.stride_0(), + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const ScalarType alpha, const AViewType& A) { + return Impl::TeamScaleInternal::invoke(member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_1()); } }; @@ -170,12 +148,9 @@ struct TeamScale { template struct TeamVectorScale { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, - const ScalarType alpha, - const AViewType& A) { - return Impl::TeamVectorScaleInternal::invoke(member, A.extent(0), - A.extent(1), alpha, A.data(), - A.stride_0(), A.stride_1()); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, const ScalarType alpha, const AViewType& A) { + return Impl::TeamVectorScaleInternal::invoke(member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), + A.stride_1()); } }; diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_set.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_set.hpp index ea31ff62824c..6a6a5e0f2274 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_set.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_set.hpp @@ -27,10 +27,8 @@ namespace KokkosBlas { struct SerialSet { template - KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, - const AViewType &A) { - return Impl::SerialSetInternal::invoke( - A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_1()); + KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A) { + return Impl::SerialSetInternal::invoke(A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_1()); } }; @@ -41,12 +39,8 @@ struct SerialSet { template struct TeamSet { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A) { - return Impl::TeamSetInternal::invoke(member, A.extent(0), A.extent(1), - alpha, A.data(), A.stride_0(), - A.stride_1()); + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A) { + return Impl::TeamSetInternal::invoke(member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_1()); } }; @@ -57,11 +51,8 @@ struct TeamSet { template struct TeamVectorSet { template - KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, - const ScalarType alpha, - const AViewType &A) { - return Impl::TeamVectorSetInternal::invoke(member, A.extent(0), A.extent(1), - alpha, A.data(), A.stride_0(), + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha, const AViewType &A) { + return Impl::TeamVectorSetInternal::invoke(member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_1()); } }; diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_sum.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_sum.hpp index 88c7b100217c..dffd19382e5a 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_sum.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_sum.hpp @@ -33,40 +33,32 @@ namespace KokkosBlas { /// /// \return The sum product result; a single value. template , - int>::type = 0> -typename XVector::non_const_value_type sum(const execution_space& space, - const XVector& x) { + typename std::enable_if, int>::type = 0> +typename XVector::non_const_value_type sum(const execution_space& space, const XVector& x) { static_assert(Kokkos::is_execution_space_v, "KokkosBlas::sum: execution_space must be a valid Kokkos " "execution space"); - static_assert(Kokkos::is_view::value, - "KokkosBlas::sum: XVector must be a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::sum: XVector must be accessible from execution_space."); + static_assert(Kokkos::is_view::value, "KokkosBlas::sum: XVector must be a Kokkos::View."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::sum: XVector must be accessible from execution_space."); static_assert(XVector::rank == 1, "KokkosBlas::sum: " "Both Vector inputs must have rank 1."); - using XVector_Internal = Kokkos::View< - typename XVector::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, Kokkos::MemoryTraits >; + using XVector_Internal = Kokkos::View::array_layout, + typename XVector::device_type, Kokkos::MemoryTraits >; using layout_t = typename XVector_Internal::array_layout; - using RVector_Internal = - Kokkos::View >; + using RVector_Internal = Kokkos::View >; typename XVector::non_const_value_type result; RVector_Internal R = RVector_Internal(&result, layout_t()); XVector_Internal X = x; - Impl::Sum::sum(space, R, - X); + Impl::Sum::sum(space, R, X); space.fence(); return result; } @@ -113,17 +105,13 @@ void sum(const execution_space& space, const RV& R, const XMV& X, static_assert(Kokkos::is_view::value, "KokkosBlas::sum: " "X is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::sum: XMV must be accessible from execution_space."); - static_assert(std::is_same::value, + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::sum: XMV must be accessible from execution_space."); + static_assert(std::is_same::value, "KokkosBlas::sum: R is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); - static_assert(((RV::rank == 0) && (XMV::rank == 1)) || - ((RV::rank == 1) && (XMV::rank == 2)), + static_assert(((RV::rank == 0) && (XMV::rank == 1)) || ((RV::rank == 1) && (XMV::rank == 2)), "KokkosBlas::sum: " "RV and XMV must either have rank 0 and 1 or rank 1 and 2."); @@ -131,33 +119,26 @@ void sum(const execution_space& space, const RV& R, const XMV& X, if (X.extent(1) != R.extent(0)) { std::ostringstream os; os << "KokkosBlas::sum (MV): Dimensions of R and X do not match: " - << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " - << X.extent(1); + << "R: " << R.extent(0) << ", X: " << X.extent(0) << " x " << X.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - using UnifiedXLayout = - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; - using UnifiedRVLayout = - typename KokkosKernels::Impl::GetUnifiedLayoutPreferring< - RV, UnifiedXLayout>::array_layout; + using UnifiedXLayout = typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; // Create unmanaged versions of the input Views. RV and XMV may be // rank 1 or rank 2. - typedef Kokkos::View > RV_Internal; - typedef Kokkos::View > XMV_Internal; RV_Internal R_internal = R; XMV_Internal X_internal = X; - Impl::Sum::sum(space, R_internal, - X_internal); + Impl::Sum::sum(space, R_internal, X_internal); } /// \brief R(j) = sum(X(i,j)) @@ -176,8 +157,7 @@ void sum(const execution_space& space, const RV& R, const XMV& X, /// \param R [out] Output View (rank 0 or 1) containing the results. /// \param X [in] Input View (rank 1 or 2). template -void sum(const RV& R, const XMV& X, - typename std::enable_if::value, int>::type = 0) { +void sum(const RV& R, const XMV& X, typename std::enable_if::value, int>::type = 0) { sum(typename XMV::execution_space{}, R, X); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_swap.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_swap.hpp index 9ddcd106dfb9..30155f5d44be 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_swap.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_swap.hpp @@ -42,44 +42,32 @@ namespace KokkosBlas { template void swap(execution_space const& space, XVector const& x, YVector const& y) { // Assert properties of XVector - static_assert(Kokkos::is_view::value, - "KokkosBlas::swap: XVector must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBlas::swap: XVector must be a Kokkos::View."); static_assert(XVector::rank == 1, "KokkosBlas::swap: " "Input vector x must have rank 1."); - static_assert(std::is_same_v, + static_assert(std::is_same_v, "KokkosBlas::swap: XVector must store non const values."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "swap: execution_space cannot access data in XVector"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "swap: execution_space cannot access data in XVector"); // Assert properties of YVector, could probably use a function for this as // XVector and YVector are required to have identical properties... - static_assert(Kokkos::is_view::value, - "KokkosBlas::swap: YVector must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBlas::swap: YVector must be a Kokkos::View."); static_assert(YVector::rank == 1, "KokkosBlas::swap: " "Input vector y must have rank 1."); - static_assert(std::is_same_v, + static_assert(std::is_same_v, "KokkosBlas::swap: YVector must store non const values."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "swap: execution_space cannot access data in YVector"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "swap: execution_space cannot access data in YVector"); using XVector_Internal = Kokkos::View< - typename XVector::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - Kokkos::Device, - Kokkos::MemoryTraits >; + typename XVector::non_const_value_type*, typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + Kokkos::Device, Kokkos::MemoryTraits >; using YVector_Internal = Kokkos::View< - typename YVector::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - Kokkos::Device, - Kokkos::MemoryTraits >; + typename YVector::non_const_value_type*, typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + Kokkos::Device, Kokkos::MemoryTraits >; XVector_Internal X(x); YVector_Internal Y(y); @@ -92,8 +80,7 @@ void swap(execution_space const& space, XVector const& x, YVector const& y) { Kokkos::Profiling::pushRegion("KokkosBlas::swap"); // If X.extent(0) == 0, do nothing if (X.extent(0) != 0) { - Impl::Swap::swap(space, - X, Y); + Impl::Swap::swap(space, X, Y); } Kokkos::Profiling::popRegion(); } @@ -111,8 +98,7 @@ void swap(execution_space const& space, XVector const& x, YVector const& y) { /// executed on the default stream of the execution_space associted with x. template void swap(const XVector& x, const YVector& y) { - const typename XVector::execution_space space = - typename XVector::execution_space(); + const typename XVector::execution_space space = typename XVector::execution_space(); swap(space, x, y); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_team_abs.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_team_abs.hpp index 55dcc668db45..a7e808c7138c 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_team_abs.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_team_abs.hpp @@ -23,8 +23,7 @@ namespace KokkosBlas { namespace Experimental { template -void KOKKOS_INLINE_FUNCTION abs(const TeamType& team, const RVector& r, - const XVector& x) { +void KOKKOS_INLINE_FUNCTION abs(const TeamType& team, const RVector& r, const XVector& x) { Impl::TeamAbs::team_abs(team, r, x); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_team_axpby.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_team_axpby.hpp index 374bc4239011..1b8734a8523a 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_team_axpby.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_team_axpby.hpp @@ -23,21 +23,16 @@ namespace KokkosBlas { namespace Experimental { template -void KOKKOS_INLINE_FUNCTION -axpby(const TeamType& team, const typename XVector::non_const_value_type& a, - const XVector& x, const typename YVector::non_const_value_type& b, - const YVector& y) { - return Impl::TeamAXPBY::team_axpby(team, a, x, b, - y); +void KOKKOS_INLINE_FUNCTION axpby(const TeamType& team, const typename XVector::non_const_value_type& a, + const XVector& x, const typename YVector::non_const_value_type& b, const YVector& y) { + return Impl::TeamAXPBY::team_axpby(team, a, x, b, y); } template -void KOKKOS_INLINE_FUNCTION -axpy(const TeamType& team, const typename XVector::non_const_value_type& a, - const XVector& x, const YVector& y) { +void KOKKOS_INLINE_FUNCTION axpy(const TeamType& team, const typename XVector::non_const_value_type& a, + const XVector& x, const YVector& y) { KokkosBlas::Experimental::axpby( - team, a, x, - Kokkos::ArithTraits::one(), y); + team, a, x, Kokkos::ArithTraits::one(), y); } } // namespace Experimental diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_team_dot.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_team_dot.hpp index 25c5c05cfc33..53065b6fae8c 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_team_dot.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_team_dot.hpp @@ -23,9 +23,9 @@ namespace KokkosBlas { namespace Experimental { template -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::dot_type KOKKOS_INLINE_FUNCTION -dot(const TeamType& team, const XVector& x, const YVector& y) { +typename Kokkos::Details::InnerProductSpaceTraits::dot_type + KOKKOS_INLINE_FUNCTION + dot(const TeamType& team, const XVector& x, const YVector& y) { return Impl::TeamDot::team_dot(team, x, y); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_team_mult.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_team_mult.hpp index 2737f835c011..08d9c6813ec1 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_team_mult.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_team_mult.hpp @@ -23,12 +23,10 @@ namespace KokkosBlas { namespace Experimental { template -void KOKKOS_INLINE_FUNCTION -mult(const TeamType& team, const typename YVector::non_const_value_type& gamma, - const YVector& y, const typename AVector::non_const_value_type& alpha, - const AVector& a, const XVector& x) { - return Impl::TeamMult::team_mult( - team, gamma, y, alpha, a, x); +void KOKKOS_INLINE_FUNCTION mult(const TeamType& team, const typename YVector::non_const_value_type& gamma, + const YVector& y, const typename AVector::non_const_value_type& alpha, + const AVector& a, const XVector& x) { + return Impl::TeamMult::team_mult(team, gamma, y, alpha, a, x); } } // namespace Experimental diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_team_nrm2.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_team_nrm2.hpp index ee58cd3331cf..f0ac33f4f2d9 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_team_nrm2.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_team_nrm2.hpp @@ -23,9 +23,9 @@ namespace KokkosBlas { namespace Experimental { template -typename Kokkos::Details::InnerProductSpaceTraits< - typename XVector::non_const_value_type>::mag_type KOKKOS_INLINE_FUNCTION -nrm2(const TeamType& team, const XVector& x) { +typename Kokkos::Details::InnerProductSpaceTraits::mag_type + KOKKOS_INLINE_FUNCTION + nrm2(const TeamType& team, const XVector& x) { return Impl::TeamNrm2::team_nrm2(team, x); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_team_scal.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_team_scal.hpp index b148e165f17e..31d0c63b6dd5 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_team_scal.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_team_scal.hpp @@ -23,9 +23,8 @@ namespace KokkosBlas { namespace Experimental { template -void KOKKOS_INLINE_FUNCTION -scal(const TeamType& team, const RVector& r, - const typename XVector::non_const_value_type& a, const XVector& x) { +void KOKKOS_INLINE_FUNCTION scal(const TeamType& team, const RVector& r, + const typename XVector::non_const_value_type& a, const XVector& x) { return Impl::TeamScal::team_scal(team, r, a, x); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_team_update.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_team_update.hpp index 069932b1e52e..587c492c6e9a 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_team_update.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_team_update.hpp @@ -23,13 +23,11 @@ namespace KokkosBlas { namespace Experimental { template -void KOKKOS_INLINE_FUNCTION -update(const TeamType& team, - const typename XVector::non_const_value_type& alpha, const XVector& x, - const typename YVector::non_const_value_type& beta, const YVector& y, - const typename ZVector::non_const_value_type& gamma, const ZVector& z) { - return Impl::TeamUpdate::team_update( - team, alpha, x, beta, y, gamma, z); +void KOKKOS_INLINE_FUNCTION update(const TeamType& team, const typename XVector::non_const_value_type& alpha, + const XVector& x, const typename YVector::non_const_value_type& beta, + const YVector& y, const typename ZVector::non_const_value_type& gamma, + const ZVector& z) { + return Impl::TeamUpdate::team_update(team, alpha, x, beta, y, gamma, z); } } // namespace Experimental diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas1_update.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas1_update.hpp index 889f9ede32cd..95d1a2d7e092 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas1_update.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas1_update.hpp @@ -44,8 +44,7 @@ namespace KokkosBlas { /// \param gamma [in] scaling parameter for Z /// \param Z [in/out] view of type ZMV in which the results will be stored. template -void update(const execution_space& space, - const typename XMV::non_const_value_type& alpha, const XMV& X, +void update(const execution_space& space, const typename XMV::non_const_value_type& alpha, const XMV& X, const typename YMV::non_const_value_type& beta, const YMV& Y, const typename ZMV::non_const_value_type& gamma, const ZMV& Z) { static_assert(Kokkos::is_execution_space_v, @@ -60,20 +59,13 @@ void update(const execution_space& space, static_assert(Kokkos::is_view::value, "KokkosBlas::update: " "Z is not a Kokkos::View."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::update: XMV must be accessible from execution_space."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::update: YMV must be accessible from execution_space."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::update: ZMV must be accessible from execution_space."); - static_assert(std::is_same::value, + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::update: XMV must be accessible from execution_space."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::update: YMV must be accessible from execution_space."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::update: ZMV must be accessible from execution_space."); + static_assert(std::is_same::value, "KokkosBlas::update: Z is const. " "It must be nonconst, because it is an output argument " "(we have to be able to write to its entries)."); @@ -88,37 +80,32 @@ void update(const execution_space& space, "XMV, YMV, and ZMV must either have rank 1 or rank 2."); // Check compatibility of dimensions at run time. - if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1) || - X.extent(0) != Z.extent(0) || X.extent(1) != Z.extent(1)) { + if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1) || X.extent(0) != Z.extent(0) || + X.extent(1) != Z.extent(1)) { std::ostringstream os; os << "KokkosBlas::update (MV): Dimensions of X, Y, and Z do not match: " - << "Z: " << Z.extent(0) << " x " << Z.extent(1) << ", X: " << X.extent(0) - << " x " << X.extent(1) << ", Y: " << Y.extent(0) << " x " - << Y.extent(1); + << "Z: " << Z.extent(0) << " x " << Z.extent(1) << ", X: " << X.extent(0) << " x " << X.extent(1) + << ", Y: " << Y.extent(0) << " x " << Y.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } // Create unmanaged versions of the input Views. XMV, YMV, and ZMV // may be rank 1 or rank 2, but they must all have the same rank. - using XMV_Internal = Kokkos::View< - typename std::conditional::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XMV::device_type, Kokkos::MemoryTraits >; + using XMV_Internal = Kokkos::View::type, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename XMV::device_type, Kokkos::MemoryTraits >; - using YMV_Internal = Kokkos::View< - typename std::conditional::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename YMV::device_type, Kokkos::MemoryTraits >; + using YMV_Internal = Kokkos::View::type, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename YMV::device_type, Kokkos::MemoryTraits >; - using ZMV_Internal = Kokkos::View< - typename std::conditional::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename ZMV::device_type, Kokkos::MemoryTraits >; + using ZMV_Internal = Kokkos::View::type, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename ZMV::device_type, Kokkos::MemoryTraits >; XMV_Internal X_internal = X; YMV_Internal Y_internal = Y; @@ -134,9 +121,8 @@ void update(const execution_space& space, << endl; #endif // KOKKOSKERNELS_PRINT_DEMANGLED_TYPE_INFO - Impl::Update::update(space, alpha, X_internal, beta, Y_internal, - gamma, Z_internal); + Impl::Update::update(space, alpha, X_internal, beta, + Y_internal, gamma, Z_internal); } /// \brief Compute Z := alpha*X + beta*Y + gamma*Z. diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas2_gemv.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas2_gemv.hpp index 614b48d47acc..22d2b7bbbfee 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas2_gemv.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas2_gemv.hpp @@ -49,56 +49,39 @@ namespace KokkosBlas { /// \param x [in] Input vector, as a 1-D Kokkos::View /// \param beta [in] Input coefficient of y /// \param y [in/out] Output vector, as a nonconst 1-D Kokkos::View -template -void gemv(const ExecutionSpace& space, const char trans[], - typename AViewType::const_value_type& alpha, const AViewType& A, - const XViewType& x, typename YViewType::const_value_type& beta, - const YViewType& y) { +template +void gemv(const ExecutionSpace& space, const char trans[], typename AViewType::const_value_type& alpha, + const AViewType& A, const XViewType& x, typename YViewType::const_value_type& beta, const YViewType& y) { static_assert(Kokkos::is_execution_space_v, "KokkosBlas::gemv: ExecutionSpace must be a valid Kokkos " "execution space."); - static_assert(Kokkos::is_view::value, - "KokkosBlas::gemv: AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBlas::gemv: XViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBlas::gemv: YViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, - "KokkosBlas::gemv: AViewType must have rank 2."); - static_assert(static_cast(XViewType::rank) == 1, - "KokkosBlas::gemv: XViewType must have rank 1."); - static_assert(static_cast(YViewType::rank) == 1, - "KokkosBlas::gemv: YViewType must have rank 1."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::gemv: AViewType must be accessible from ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::gemv: XViewType must be accessible from ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::gemv: YViewType must be accessible from ExecutionSpace"); + static_assert(Kokkos::is_view::value, "KokkosBlas::gemv: AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBlas::gemv: XViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBlas::gemv: YViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "KokkosBlas::gemv: AViewType must have rank 2."); + static_assert(static_cast(XViewType::rank) == 1, "KokkosBlas::gemv: XViewType must have rank 1."); + static_assert(static_cast(YViewType::rank) == 1, "KokkosBlas::gemv: YViewType must have rank 1."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::gemv: AViewType must be accessible from ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::gemv: XViewType must be accessible from ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::gemv: YViewType must be accessible from ExecutionSpace"); // Check compatibility of dimensions at run time. if (trans[0] == 'N' || trans[0] == 'n') { if (A.extent(0) != y.extent(0) || A.extent(1) != x.extent(0)) { std::ostringstream os; os << "KokkosBlas::gemv: Dimensions of A, x, and y do not match: " - << "A: " << A.extent(0) << " x " << A.extent(1) - << ", x: " << x.extent(0) << ", y: " << y.extent(0); + << "A: " << A.extent(0) << " x " << A.extent(1) << ", x: " << x.extent(0) << ", y: " << y.extent(0); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - } else if (trans[0] == 'T' || trans[0] == 't' || trans[0] == 'C' || - trans[0] == 'c' || trans[0] == 'H' || trans[0] == 'h') { + } else if (trans[0] == 'T' || trans[0] == 't' || trans[0] == 'C' || trans[0] == 'c' || trans[0] == 'H' || + trans[0] == 'h') { if (A.extent(1) != y.extent(0) || A.extent(0) != x.extent(0)) { std::ostringstream os; os << "KokkosBlas::dot: Dimensions of A, x, and y do not match: " - << "A: " << A.extent(0) << " x " << A.extent(1) - << ", x: " << x.extent(0) << ", y: " << y.extent(0); + << "A: " << A.extent(0) << " x " << A.extent(1) << ", x: " << x.extent(0) << ", y: " << y.extent(0); KokkosKernels::Impl::throw_runtime_exception(os.str()); } } else { @@ -115,21 +98,16 @@ void gemv(const ExecutionSpace& space, const char trans[], // Minimize the number of Impl::GEMV instantiations, by // standardizing on particular View specializations for its template // parameters. - typedef Kokkos::View > AVT; typedef Kokkos::View::array_layout, - typename XViewType::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout, + typename XViewType::device_type, Kokkos::MemoryTraits > XVT; typedef Kokkos::View::array_layout, - typename YViewType::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout, + typename YViewType::device_type, Kokkos::MemoryTraits > YVT; // Degenerate case is essentially same as scal - use fallback impl @@ -139,42 +117,32 @@ void gemv(const ExecutionSpace& space, const char trans[], // If A is LayoutRight and we have the BLAS, cuBLAS or rocBLAS TPL, use // fallback because those only support LayoutLeft #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS - useFallback = useFallback || (tolower(*trans) == 'c' && - std::is_same::value && - std::is_same::value); + useFallback = useFallback || + (tolower(*trans) == 'c' && std::is_same::value && + std::is_same::value); #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS - useFallback = - useFallback || - (tolower(*trans) == 'c' && - std::is_same::value && - std::is_same::value); + useFallback = useFallback || + (tolower(*trans) == 'c' && std::is_same::value && + std::is_same::value); #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS - useFallback = useFallback || (tolower(*trans) == 'c' && - std::is_same::value && - std::is_same::value); + useFallback = useFallback || + (tolower(*trans) == 'c' && std::is_same::value && + std::is_same::value); #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL #ifdef KOKKOS_ENABLE_SYCL // oneMKL supports both row-major and column-major of A + // but only supports oneapi::mkl::transpose::nontrans op useFallback = - useFallback || !std::is_same_v; + useFallback || !std::is_same_v; #endif #endif if (useFallback) { - const bool eti_spec_avail = - KokkosBlas::Impl::gemv_eti_spec_avail::value; - typedef Impl::GEMV - fallback_impl_type; + const bool eti_spec_avail = KokkosBlas::Impl::gemv_eti_spec_avail::value; + typedef Impl::GEMV fallback_impl_type; fallback_impl_type::gemv(space, trans, alpha, A, x, beta, y); } else { typedef Impl::GEMV impl_type; @@ -199,8 +167,7 @@ void gemv(const ExecutionSpace& space, const char trans[], /// \param beta [in] Input coefficient of y /// \param y [in/out] Output vector, as a nonconst 1-D Kokkos::View template -void gemv(const char trans[], typename AViewType::const_value_type& alpha, - const AViewType& A, const XViewType& x, +void gemv(const char trans[], typename AViewType::const_value_type& alpha, const AViewType& A, const XViewType& x, typename YViewType::const_value_type& beta, const YViewType& y) { gemv(typename AViewType::execution_space{}, trans, alpha, A, x, beta, y); } @@ -211,46 +178,38 @@ namespace Experimental { /// template struct Gemv { - template - static void KOKKOS_INLINE_FUNCTION - invoke(const MemberType& member, const char trans, const ScalarType& alpha, - const MatrixType& A, const XVector& x, const ScalarType& beta, - const YVector& y); + template + static void KOKKOS_INLINE_FUNCTION invoke(const MemberType& member, const char trans, const ScalarType& alpha, + const MatrixType& A, const XVector& x, const ScalarType& beta, + const YVector& y); }; template struct Gemv { - template - static void KOKKOS_INLINE_FUNCTION - invoke(const MemberType& /*member*/, const char trans, - const ScalarType& alpha, const MatrixType& A, const XVector& x, - const ScalarType& beta, const YVector& y) { + template + static void KOKKOS_INLINE_FUNCTION invoke(const MemberType& /*member*/, const char trans, const ScalarType& alpha, + const MatrixType& A, const XVector& x, const ScalarType& beta, + const YVector& y) { serial_gemv(trans, alpha, A, x, beta, y); } }; template struct Gemv { - template - static void KOKKOS_INLINE_FUNCTION - invoke(const MemberType& member, const char trans, const ScalarType& alpha, - const MatrixType& A, const XVector& x, const ScalarType& beta, - const YVector& y) { + template + static void KOKKOS_INLINE_FUNCTION invoke(const MemberType& member, const char trans, const ScalarType& alpha, + const MatrixType& A, const XVector& x, const ScalarType& beta, + const YVector& y) { team_gemv(member, trans, alpha, A, x, beta, y); } }; template struct Gemv { - template - static void KOKKOS_INLINE_FUNCTION - invoke(const MemberType& member, const char trans, const ScalarType& alpha, - const MatrixType& A, const XVector& x, const ScalarType& beta, - const YVector& y) { + template + static void KOKKOS_INLINE_FUNCTION invoke(const MemberType& member, const char trans, const ScalarType& alpha, + const MatrixType& A, const XVector& x, const ScalarType& beta, + const YVector& y) { teamvector_gemv(member, trans, alpha, A, x, beta, y); } }; diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas2_ger.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas2_ger.hpp index 8650577faf67..88786649ba0a 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas2_ger.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas2_ger.hpp @@ -39,54 +39,38 @@ namespace KokkosBlas { /// \param x [in] Input vector, as a 1-D Kokkos::View /// \param y [in] Input vector, as a 1-D Kokkos::View /// \param A [in/out] Output matrix, as a nonconst 2-D Kokkos::View -template -void ger(const ExecutionSpace& space, const char trans[], - const typename AViewType::const_value_type& alpha, const XViewType& x, - const YViewType& y, const AViewType& A) { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "AViewType memory space must be accessible from ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "XViewType memory space must be accessible from ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "YViewType memory space must be accessible from ExecutionSpace"); - - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "XViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "YViewType must be a Kokkos::View."); - - static_assert(static_cast(AViewType::rank) == 2, - "AViewType must have rank 2."); - static_assert(static_cast(XViewType::rank) == 1, - "XViewType must have rank 1."); - static_assert(static_cast(YViewType::rank) == 1, - "YViewType must have rank 1."); +template +void ger(const ExecutionSpace& space, const char trans[], const typename AViewType::const_value_type& alpha, + const XViewType& x, const YViewType& y, const AViewType& A) { + static_assert(Kokkos::SpaceAccessibility::accessible, + "AViewType memory space must be accessible from ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "XViewType memory space must be accessible from ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "YViewType memory space must be accessible from ExecutionSpace"); + + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "XViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "YViewType must be a Kokkos::View."); + + static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); + static_assert(static_cast(XViewType::rank) == 1, "XViewType must have rank 1."); + static_assert(static_cast(YViewType::rank) == 1, "YViewType must have rank 1."); // Check compatibility of dimensions at run time. if ((A.extent(0) != x.extent(0)) || (A.extent(1) != y.extent(0))) { std::ostringstream os; os << "KokkosBlas::ger: Dimensions of A, x, and y do not match: " - << "A is " << A.extent(0) << " by " << A.extent(1) << ", x has size " - << x.extent(0) << ", y has size " << y.extent(0); + << "A is " << A.extent(0) << " by " << A.extent(1) << ", x has size " << x.extent(0) << ", y has size " + << y.extent(0); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - if ((trans[0] == 'T') || (trans[0] == 't') || (trans[0] == 'H') || - (trans[0] == 'h')) { + if ((trans[0] == 'T') || (trans[0] == 't') || (trans[0] == 'H') || (trans[0] == 'h')) { // Ok } else { std::ostringstream os; - os << "KokkosBlas::ger: invalid trans[0] = '" << trans[0] - << "'. It must be equalt to 'T' or 't' or 'H' or 'h'"; + os << "KokkosBlas::ger: invalid trans[0] = '" << trans[0] << "'. It must be equalt to 'T' or 't' or 'H' or 'h'"; KokkosKernels::Impl::throw_runtime_exception(os.str()); } @@ -99,21 +83,16 @@ void ger(const ExecutionSpace& space, const char trans[], // Minimize the number of Impl::GER instantiations, by standardizing // on particular View specializations for its template parameters. typedef Kokkos::View::array_layout, - typename XViewType::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout, + typename XViewType::device_type, Kokkos::MemoryTraits > XVT; typedef Kokkos::View::array_layout, - typename YViewType::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout, + typename YViewType::device_type, Kokkos::MemoryTraits > YVT; - typedef Kokkos::View > AVT; @@ -133,12 +112,10 @@ void ger(const ExecutionSpace& space, const char trans[], /// \param y [in] Input vector, as a 1-D Kokkos::View /// \param A [in/out] Output matrix, as a nonconst 2-D Kokkos::View template -void ger(const char trans[], const typename AViewType::const_value_type& alpha, - const XViewType& x, const YViewType& y, const AViewType& A) { - const typename AViewType::execution_space space = - typename AViewType::execution_space(); - ger( - space, trans, alpha, x, y, A); +void ger(const char trans[], const typename AViewType::const_value_type& alpha, const XViewType& x, const YViewType& y, + const AViewType& A) { + const typename AViewType::execution_space space = typename AViewType::execution_space(); + ger(space, trans, alpha, x, y, A); } } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas2_serial_gemv.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas2_serial_gemv.hpp index 12dbf61c3a68..2b52d6c5a9c7 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas2_serial_gemv.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas2_serial_gemv.hpp @@ -23,13 +23,9 @@ namespace KokkosBlas { namespace Experimental { -template -void KOKKOS_INLINE_FUNCTION serial_gemv(const char trans, - const ScalarType& alpha, - const MatrixType& A, const XVector& x, - const ScalarType& beta, - const YVector& y) { +template +void KOKKOS_INLINE_FUNCTION serial_gemv(const char trans, const ScalarType& alpha, const MatrixType& A, + const XVector& x, const ScalarType& beta, const YVector& y) { if (trans == 'N' || trans == 'n') { using mode = KokkosBlas::Trans::NoTranspose; KokkosBlas::SerialGemv::invoke(alpha, A, x, beta, y); @@ -46,11 +42,8 @@ void KOKKOS_INLINE_FUNCTION serial_gemv(const char trans, // default AlgoTag template -void KOKKOS_INLINE_FUNCTION serial_gemv(const char trans, - const ScalarType& alpha, - const MatrixType& A, const XVector& x, - const ScalarType& beta, - const YVector& y) { +void KOKKOS_INLINE_FUNCTION serial_gemv(const char trans, const ScalarType& alpha, const MatrixType& A, + const XVector& x, const ScalarType& beta, const YVector& y) { serial_gemv(trans, alpha, A, x, beta, y); } diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas2_syr.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas2_syr.hpp index 00d1d8b3def5..7cb226fd7ff3 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas2_syr.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas2_syr.hpp @@ -64,53 +64,39 @@ namespace KokkosBlas { /// \param A [in/out] Output matrix, as a nonconst 2-D Kokkos::View template void syr(const ExecutionSpace& space, const char trans[], const char uplo[], - const typename AViewType::const_value_type& alpha, const XViewType& x, - const AViewType& A) { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "AViewType memory space must be accessible from ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "XViewType memory space must be accessible from ExecutionSpace"); - - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "XViewType must be a Kokkos::View."); - - static_assert(static_cast(AViewType::rank) == 2, - "AViewType must have rank 2."); - static_assert(static_cast(XViewType::rank) == 1, - "XViewType must have rank 1."); + const typename AViewType::const_value_type& alpha, const XViewType& x, const AViewType& A) { + static_assert(Kokkos::SpaceAccessibility::accessible, + "AViewType memory space must be accessible from ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "XViewType memory space must be accessible from ExecutionSpace"); + + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "XViewType must be a Kokkos::View."); + + static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); + static_assert(static_cast(XViewType::rank) == 1, "XViewType must have rank 1."); // Check compatibility of dimensions at run time. if ((A.extent(0) != x.extent(0)) || (A.extent(1) != x.extent(0))) { std::ostringstream os; os << "KokkosBlas::syr: Dimensions of A, x: " - << "A is " << A.extent(0) << " by " << A.extent(1) << ", x has size " - << x.extent(0); + << "A is " << A.extent(0) << " by " << A.extent(1) << ", x has size " << x.extent(0); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - if ((trans[0] == 'T') || (trans[0] == 't') || (trans[0] == 'H') || - (trans[0] == 'h')) { + if ((trans[0] == 'T') || (trans[0] == 't') || (trans[0] == 'H') || (trans[0] == 'h')) { // Ok } else { std::ostringstream os; - os << "KokkosBlas2::syr(): invalid trans[0] = '" << trans[0] - << "'. It must be equal to 'T' or 't' or 'H' or 'h'"; + os << "KokkosBlas2::syr(): invalid trans[0] = '" << trans[0] << "'. It must be equal to 'T' or 't' or 'H' or 'h'"; KokkosKernels::Impl::throw_runtime_exception(os.str()); } - if ((uplo[0] == 'U') || (uplo[0] == 'u') || (uplo[0] == 'L') || - (uplo[0] == 'l')) { + if ((uplo[0] == 'U') || (uplo[0] == 'u') || (uplo[0] == 'L') || (uplo[0] == 'l')) { // Ok } else { std::ostringstream oss; - oss << "KokkosBlas2::syr(): invalid uplo[0] = " << uplo[0] - << "'. It must be equal to 'U' or 'u' or 'L' or 'l'"; + oss << "KokkosBlas2::syr(): invalid uplo[0] = " << uplo[0] << "'. It must be equal to 'U' or 'u' or 'L' or 'l'"; throw std::runtime_error(oss.str()); } @@ -122,15 +108,11 @@ void syr(const ExecutionSpace& space, const char trans[], const char uplo[], // Minimize the number of Impl::SYR instantiations, by standardizing // on particular View specializations for its template parameters. - using XVT = - Kokkos::View::array_layout, - typename XViewType::device_type, - Kokkos::MemoryTraits >; - - using AVT = Kokkos::View::array_layout, + typename XViewType::device_type, Kokkos::MemoryTraits >; + + using AVT = Kokkos::View >; Impl::SYR::syr(space, trans, uplo, alpha, x, A); @@ -172,13 +154,10 @@ void syr(const ExecutionSpace& space, const char trans[], const char uplo[], /// \param x [in] Input vector, as a 1-D Kokkos::View /// \param A [in/out] Output matrix, as a nonconst 2-D Kokkos::View template -void syr(const char trans[], const char uplo[], - const typename AViewType::const_value_type& alpha, const XViewType& x, +void syr(const char trans[], const char uplo[], const typename AViewType::const_value_type& alpha, const XViewType& x, const AViewType& A) { - const typename AViewType::execution_space space = - typename AViewType::execution_space(); - syr( - space, trans, uplo, alpha, x, A); + const typename AViewType::execution_space space = typename AViewType::execution_space(); + syr(space, trans, uplo, alpha, x, A); } } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas2_syr2.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas2_syr2.hpp index d86abd31c17c..91f4b20deeca 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas2_syr2.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas2_syr2.hpp @@ -78,67 +78,49 @@ namespace KokkosBlas { /// \param x [in] Input vector, as a 1-D Kokkos::View /// \param y [in] Input vector, as a 1-D Kokkos::View /// \param A [in/out] Output matrix, as a nonconst 2-D Kokkos::View -template +template void syr2(const ExecutionSpace& space, const char trans[], const char uplo[], - const typename AViewType::const_value_type& alpha, const XViewType& x, - const YViewType& y, const AViewType& A) { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "AViewType memory space must be accessible from ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "XViewType memory space must be accessible from ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "YViewType memory space must be accessible from ExecutionSpace"); - - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "XViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "YViewType must be a Kokkos::View."); - - static_assert(static_cast(AViewType::rank()) == 2, - "AViewType must have rank 2."); - static_assert(static_cast(XViewType::rank()) == 1, - "XViewType must have rank 1."); - static_assert(static_cast(YViewType::rank()) == 1, - "YViewType must have rank 1."); + const typename AViewType::const_value_type& alpha, const XViewType& x, const YViewType& y, + const AViewType& A) { + static_assert(Kokkos::SpaceAccessibility::accessible, + "AViewType memory space must be accessible from ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "XViewType memory space must be accessible from ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "YViewType memory space must be accessible from ExecutionSpace"); + + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "XViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "YViewType must be a Kokkos::View."); + + static_assert(static_cast(AViewType::rank()) == 2, "AViewType must have rank 2."); + static_assert(static_cast(XViewType::rank()) == 1, "XViewType must have rank 1."); + static_assert(static_cast(YViewType::rank()) == 1, "YViewType must have rank 1."); // Check compatibility of dimensions at run time. - if ((A.extent(0) == A.extent(1)) && (A.extent(0) == x.extent(0)) && - (A.extent(0) == y.extent(0))) { + if ((A.extent(0) == A.extent(1)) && (A.extent(0) == x.extent(0)) && (A.extent(0) == y.extent(0))) { // Ok } else { std::ostringstream os; os << "KokkosBlas::syr2: Dimensions of A, x: " - << "A is " << A.extent(0) << " by " << A.extent(1) << ", x has size " - << x.extent(0) << ", y has size " << y.extent(0); + << "A is " << A.extent(0) << " by " << A.extent(1) << ", x has size " << x.extent(0) << ", y has size " + << y.extent(0); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - if ((trans[0] == 'T') || (trans[0] == 't') || (trans[0] == 'H') || - (trans[0] == 'h')) { + if ((trans[0] == 'T') || (trans[0] == 't') || (trans[0] == 'H') || (trans[0] == 'h')) { // Ok } else { std::ostringstream os; - os << "KokkosBlas2::syr2(): invalid trans[0] = '" << trans[0] - << "'. It must be equalt to 'T' or 't' or 'H' or 'h'"; + os << "KokkosBlas2::syr2(): invalid trans[0] = '" << trans[0] << "'. It must be equalt to 'T' or 't' or 'H' or 'h'"; KokkosKernels::Impl::throw_runtime_exception(os.str()); } - if ((uplo[0] == 'U') || (uplo[0] == 'u') || (uplo[0] == 'L') || - (uplo[0] == 'l')) { + if ((uplo[0] == 'U') || (uplo[0] == 'u') || (uplo[0] == 'L') || (uplo[0] == 'l')) { // Ok } else { std::ostringstream oss; - oss << "KokkosBlas2::syr2(): invalid uplo[0] = " << uplo[0] - << "'. It must be equalt to 'U' or 'u' or 'L' or 'l'"; + oss << "KokkosBlas2::syr2(): invalid uplo[0] = " << uplo[0] << "'. It must be equalt to 'U' or 'u' or 'L' or 'l'"; throw std::runtime_error(oss.str()); } @@ -151,26 +133,20 @@ void syr2(const ExecutionSpace& space, const char trans[], const char uplo[], // Minimize the number of Impl::SYR2 instantiations, by standardizing // on particular View specializations for its template parameters. typedef Kokkos::View::array_layout, - typename XViewType::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout, + typename XViewType::device_type, Kokkos::MemoryTraits > XVT; typedef Kokkos::View::array_layout, - typename YViewType::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout, + typename YViewType::device_type, Kokkos::MemoryTraits > YVT; - typedef Kokkos::View > AVT; - Impl::SYR2::syr2(space, trans, uplo, alpha, x, - y, A); + Impl::SYR2::syr2(space, trans, uplo, alpha, x, y, A); } /// \brief Rank-1 update (just lower portion or just upper portion) of a @@ -224,13 +200,10 @@ void syr2(const ExecutionSpace& space, const char trans[], const char uplo[], /// \param y [in] Input vector, as a 1-D Kokkos::View /// \param A [in/out] Output matrix, as a nonconst 2-D Kokkos::View template -void syr2(const char trans[], const char uplo[], - const typename AViewType::const_value_type& alpha, const XViewType& x, +void syr2(const char trans[], const char uplo[], const typename AViewType::const_value_type& alpha, const XViewType& x, const YViewType& y, const AViewType& A) { - const typename AViewType::execution_space space = - typename AViewType::execution_space(); - syr2( - space, trans, uplo, alpha, x, y, A); + const typename AViewType::execution_space space = typename AViewType::execution_space(); + syr2(space, trans, uplo, alpha, x, y, A); } } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas2_team_gemv.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas2_team_gemv.hpp index 09a1ae233079..a4a6dade2d71 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas2_team_gemv.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas2_team_gemv.hpp @@ -22,67 +22,48 @@ namespace KokkosBlas { namespace Experimental { -template -void KOKKOS_INLINE_FUNCTION team_gemv(const TeamType& team, const char trans, - const ScalarType& alpha, - const MatrixType& A, const XVector& x, - const ScalarType& beta, - const YVector& y) { +template +void KOKKOS_INLINE_FUNCTION team_gemv(const TeamType& team, const char trans, const ScalarType& alpha, + const MatrixType& A, const XVector& x, const ScalarType& beta, const YVector& y) { if (trans == 'N' || trans == 'n') - TeamGemv::invoke(team, alpha, A, x, - beta, y); + TeamGemv::invoke(team, alpha, A, x, beta, y); else if (trans == 'T' || trans == 't') - TeamGemv::invoke(team, alpha, A, x, - beta, y); + TeamGemv::invoke(team, alpha, A, x, beta, y); else if (trans == 'C' || trans == 'c') - TeamGemv::invoke(team, alpha, A, x, - beta, y); + TeamGemv::invoke(team, alpha, A, x, beta, y); else { Kokkos::abort("Matrix mode not supported"); } } // default AlgoTag -template -void KOKKOS_INLINE_FUNCTION team_gemv(const TeamType& team, const char trans, - const ScalarType& alpha, - const MatrixType& A, const XVector& x, - const ScalarType& beta, - const YVector& y) { +template +void KOKKOS_INLINE_FUNCTION team_gemv(const TeamType& team, const char trans, const ScalarType& alpha, + const MatrixType& A, const XVector& x, const ScalarType& beta, const YVector& y) { team_gemv(team, trans, alpha, A, x, beta, y); } -template -void KOKKOS_INLINE_FUNCTION -teamvector_gemv(const TeamType& team, const char trans, const ScalarType& alpha, - const MatrixType& A, const XVector& x, const ScalarType& beta, - const YVector& y) { +template +void KOKKOS_INLINE_FUNCTION teamvector_gemv(const TeamType& team, const char trans, const ScalarType& alpha, + const MatrixType& A, const XVector& x, const ScalarType& beta, + const YVector& y) { if (trans == 'N' || trans == 'n') { - KokkosBlas::TeamVectorGemv::invoke( - team, alpha, A, x, beta, y); + KokkosBlas::TeamVectorGemv::invoke(team, alpha, A, x, beta, y); } else if (trans == 'T' || trans == 't') { - KokkosBlas::TeamVectorGemv::invoke( - team, alpha, A, x, beta, y); + KokkosBlas::TeamVectorGemv::invoke(team, alpha, A, x, beta, y); } else if (trans == 'C' || trans == 'c') { - KokkosBlas::TeamVectorGemv::invoke( - team, alpha, A, x, beta, y); + KokkosBlas::TeamVectorGemv::invoke(team, alpha, A, x, beta, y); } else { Kokkos::abort("Matrix mode not supported"); } } // default AlgoTag -template -void KOKKOS_INLINE_FUNCTION -team_vector_gemv(const TeamType& team, const char trans, - const ScalarType& alpha, const MatrixType& A, const XVector& x, - const ScalarType& beta, const YVector& y) { - teamvector_gemv(team, trans, alpha, A, x, - beta, y); +template +void KOKKOS_INLINE_FUNCTION team_vector_gemv(const TeamType& team, const char trans, const ScalarType& alpha, + const MatrixType& A, const XVector& x, const ScalarType& beta, + const YVector& y) { + teamvector_gemv(team, trans, alpha, A, x, beta, y); } } // namespace Experimental diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas3_gemm.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas3_gemm.hpp index febd39b1498e..b0bff7ea715d 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas3_gemm.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas3_gemm.hpp @@ -38,31 +38,21 @@ namespace Impl { // This case must be intercepted here rather than impl in order to call TPL // GEMV instead of TPL GEMM. This codepath was measured to be profitable with // cuBLAS. -template +template bool gemv_based_gemm( - const execution_space& space, const char transA[], const char transB[], - typename AViewType::const_value_type& alpha, const AViewType& A, - const BViewType& B, typename CViewType::const_value_type& beta, - const CViewType& C, - typename std::enable_if::value && - !std::is_same::value>::type* = + const execution_space& space, const char transA[], const char transB[], typename AViewType::const_value_type& alpha, + const AViewType& A, const BViewType& B, typename CViewType::const_value_type& beta, const CViewType& C, + typename std::enable_if::value && + !std::is_same::value>::type* = nullptr) { - if (toupper(transA[0]) == 'N' && toupper(transB[0]) == 'N' && - B.extent(1) == size_t(1)) { + if (toupper(transA[0]) == 'N' && toupper(transB[0]) == 'N' && B.extent(1) == size_t(1)) { // since B/C both have a single column and are not LayoutStride, // can create a raw contiguous rank-1 vector from them rather than using // subview. - Kokkos::View> Bvec(B.data(), B.extent(0)); - Kokkos::View> Cvec(C.data(), C.extent(0)); KokkosBlas::gemv(space, "N", alpha, A, Bvec, beta, Cvec); @@ -76,15 +66,11 @@ bool gemv_based_gemm( // tests. template bool gemv_based_gemm( - const typename CViewType::execution_space& /*space*/, - const char /*transA*/[], const char /*transB*/[], - typename AViewType::const_value_type& /*alpha*/, const AViewType& /*A*/, - const BViewType& /*B*/, typename CViewType::const_value_type& /*beta*/, - const CViewType& /*C*/, - typename std::enable_if::value || - std::is_same::value>::type* = + const typename CViewType::execution_space& /*space*/, const char /*transA*/[], const char /*transB*/[], + typename AViewType::const_value_type& /*alpha*/, const AViewType& /*A*/, const BViewType& /*B*/, + typename CViewType::const_value_type& /*beta*/, const CViewType& /*C*/, + typename std::enable_if::value || + std::is_same::value>::type* = nullptr) { return false; } @@ -108,52 +94,35 @@ bool gemv_based_gemm( /// \param B [in] Input matrix, as a 2-D Kokkos::View /// \param beta [in] Input coefficient of C /// \param C [in/out] Output vector, as a nonconst 2-D Kokkos::View -template -void gemm(const execution_space& space, const char transA[], - const char transB[], typename AViewType::const_value_type& alpha, - const AViewType& A, const BViewType& B, +template +void gemm(const execution_space& space, const char transA[], const char transB[], + typename AViewType::const_value_type& alpha, const AViewType& A, const BViewType& B, typename CViewType::const_value_type& beta, const CViewType& C) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) static_assert(Kokkos::is_execution_space_v, "KokkosBlas::gemm: execution_space must be a valid Kokkos " "execution space"); - static_assert(Kokkos::is_view::value, - "KokkosBlas::gemm: AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBlas::gemm: BViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBlas::gemm: CViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, - "KokkosBlas::gemm: AViewType must have rank 2."); - static_assert(static_cast(BViewType::rank) == 2, - "KokkosBlas::gemm: BViewType must have rank 2."); - static_assert(static_cast(CViewType::rank) == 2, - "KokkosBlas::gemm: CViewType must have rank 2."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::gemm: AViewType must be accessible from execution_space"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::gemm: BViewType must be accessible from execution_space"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::gemm: CViewType must be accessible from execution_space"); + static_assert(Kokkos::is_view::value, "KokkosBlas::gemm: AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBlas::gemm: BViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBlas::gemm: CViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "KokkosBlas::gemm: AViewType must have rank 2."); + static_assert(static_cast(BViewType::rank) == 2, "KokkosBlas::gemm: BViewType must have rank 2."); + static_assert(static_cast(CViewType::rank) == 2, "KokkosBlas::gemm: CViewType must have rank 2."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::gemm: AViewType must be accessible from execution_space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::gemm: BViewType must be accessible from execution_space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosBlas::gemm: CViewType must be accessible from execution_space"); // Check validity of transpose argument - bool valid_transA = (transA[0] == 'N') || (transA[0] == 'n') || - (transA[0] == 'T') || (transA[0] == 't') || + bool valid_transA = (transA[0] == 'N') || (transA[0] == 'n') || (transA[0] == 'T') || (transA[0] == 't') || (transA[0] == 'C') || (transA[0] == 'c'); - bool valid_transB = (transB[0] == 'N') || (transB[0] == 'n') || - (transB[0] == 'T') || (transB[0] == 't') || + bool valid_transB = (transB[0] == 'N') || (transB[0] == 'n') || (transB[0] == 'T') || (transB[0] == 't') || (transB[0] == 'C') || (transB[0] == 'c'); if (!(valid_transA && valid_transB)) { std::ostringstream os; - os << "KokkosBlas::gemm: transA[0] = '" << transA[0] << " transB[0] = '" - << transB[0] << "'. " + os << "KokkosBlas::gemm: transA[0] = '" << transA[0] << " transB[0] = '" << transB[0] << "'. " << "Valid values include 'N' or 'n' (No transpose), 'T' or 't' " "(Transpose), " "and 'C' or 'c' (Conjugate transpose)."; @@ -172,13 +141,11 @@ void gemm(const execution_space& space, const char transA[], int64_t C0 = C.extent(0); int64_t C1 = C.extent(1); - if (((A_t ? A1 : A0) != C0) || ((B_t ? B_0 : B1) != C1) || - ((A_t ? A0 : A1) != (B_t ? B1 : B_0))) { + if (((A_t ? A1 : A0) != C0) || ((B_t ? B_0 : B1) != C1) || ((A_t ? A0 : A1) != (B_t ? B1 : B_0))) { std::ostringstream os; os << "KokkosBlas::gemm: Dimensions of A, B, and C do not match: " - << "transA: " << transA[0] << " transB: " << transB[0] - << " A: " << A.extent(0) << " x " << A.extent(1) << " B: " << B.extent(0) - << " x " << B.extent(1) << " C: " << C.extent(0) << " x " << C.extent(1); + << "transA: " << transA[0] << " transB: " << transB[0] << " A: " << A.extent(0) << " x " << A.extent(1) + << " B: " << B.extent(0) << " x " << B.extent(1) << " C: " << C.extent(0) << " x " << C.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } #endif // KOKKOSKERNELS_DEBUG_LEVEL > 0 @@ -195,24 +162,19 @@ void gemm(const execution_space& space, const char transA[], } // Check if gemv code path is allowed and profitable, and if so run it. - if (Impl::gemv_based_gemm(space, transA, transB, alpha, A, B, beta, C)) - return; + if (Impl::gemv_based_gemm(space, transA, transB, alpha, A, B, beta, C)) return; // Minimize the number of Impl::GEMM instantiations, by // standardizing on particular View specializations for its template // parameters. - typedef Kokkos::View< - typename AViewType::const_value_type**, typename AViewType::array_layout, - typename AViewType::device_type, Kokkos::MemoryTraits> + typedef Kokkos::View> AVT; - typedef Kokkos::View< - typename BViewType::const_value_type**, typename BViewType::array_layout, - typename BViewType::device_type, Kokkos::MemoryTraits> + typedef Kokkos::View> BVT; - typedef Kokkos::View> + typedef Kokkos::View> CVT; typedef Impl::GEMM impl_type; impl_type::gemm(space, transA, transB, alpha, A, B, beta, C); @@ -236,12 +198,9 @@ void gemm(const execution_space& space, const char transA[], /// \param beta [in] Input coefficient of C /// \param C [in/out] Output vector, as a nonconst 2-D Kokkos::View template -void gemm(const char transA[], const char transB[], - typename AViewType::const_value_type& alpha, const AViewType& A, - const BViewType& B, typename CViewType::const_value_type& beta, - const CViewType& C) { - gemm(typename CViewType::execution_space{}, transA, transB, alpha, A, B, beta, - C); +void gemm(const char transA[], const char transB[], typename AViewType::const_value_type& alpha, const AViewType& A, + const BViewType& B, typename CViewType::const_value_type& beta, const CViewType& C) { + gemm(typename CViewType::execution_space{}, transA, transB, alpha, A, B, beta, C); } } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas3_trmm.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas3_trmm.hpp index bdc86d4d9e00..9da47b7160f0 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas3_trmm.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas3_trmm.hpp @@ -64,29 +64,19 @@ namespace KokkosBlas { /// On entry, M-by-N matrix /// On exit, overwritten with the solution template -void trmm(const execution_space& space, const char side[], const char uplo[], - const char trans[], const char diag[], - typename BViewType::const_value_type& alpha, const AViewType& A, - const BViewType& B) { - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "BViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, - "AViewType must have rank 2."); - static_assert(static_cast(BViewType::rank) == 2, - "BViewType must have rank 2."); +void trmm(const execution_space& space, const char side[], const char uplo[], const char trans[], const char diag[], + typename BViewType::const_value_type& alpha, const AViewType& A, const BViewType& B) { + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "BViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); + static_assert(static_cast(BViewType::rank) == 2, "BViewType must have rank 2."); // Check validity of indicator argument - bool valid_side = (side[0] == 'L') || (side[0] == 'l') || (side[0] == 'R') || - (side[0] == 'r'); - bool valid_uplo = (uplo[0] == 'U') || (uplo[0] == 'u') || (uplo[0] == 'L') || - (uplo[0] == 'l'); - bool valid_trans = (trans[0] == 'N') || (trans[0] == 'n') || - (trans[0] == 'T') || (trans[0] == 't') || + bool valid_side = (side[0] == 'L') || (side[0] == 'l') || (side[0] == 'R') || (side[0] == 'r'); + bool valid_uplo = (uplo[0] == 'U') || (uplo[0] == 'u') || (uplo[0] == 'L') || (uplo[0] == 'l'); + bool valid_trans = (trans[0] == 'N') || (trans[0] == 'n') || (trans[0] == 'T') || (trans[0] == 't') || (trans[0] == 'C') || (trans[0] == 'c'); - bool valid_diag = (diag[0] == 'U') || (diag[0] == 'u') || (diag[0] == 'N') || - (diag[0] == 'n'); + bool valid_diag = (diag[0] == 'U') || (diag[0] == 'u') || (diag[0] == 'N') || (diag[0] == 'n'); if (!valid_side) { std::ostringstream os; os << "KokkosBlas::trmm: side = '" << side[0] << "'. " @@ -133,27 +123,20 @@ void trmm(const execution_space& space, const char side[], const char uplo[], if (A_m != A_n || (is_A_lower_triangle ? B_m : B_n) != A_n) { std::ostringstream os; os << "KokkosBlas::trmm: Dimensions of A and B do not match: " - << "side: " << side[0] << " A: " << A.extent(0) << " x " << A.extent(1) - << " B: " << B.extent(0) << " x " << B.extent(1); + << "side: " << side[0] << " A: " << A.extent(0) << " x " << A.extent(1) << " B: " << B.extent(0) << " x " + << B.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } // Create A matrix view type alias - using AViewInternalType = - Kokkos::View >; + using AViewInternalType = Kokkos::View >; // Crease B matrix view type alias - using BViewInternalType = - Kokkos::View >; + using BViewInternalType = Kokkos::View >; - KokkosBlas::Impl::TRMM::trmm(space, side, uplo, trans, - diag, alpha, A, B); + KokkosBlas::Impl::TRMM::trmm(space, side, uplo, trans, diag, + alpha, A, B); } /// \brief Solve triangular linear system with multiple RHSs: @@ -186,11 +169,9 @@ void trmm(const execution_space& space, const char side[], const char uplo[], /// On entry, M-by-N matrix /// On exit, overwritten with the solution template -void trmm(const char side[], const char uplo[], const char trans[], - const char diag[], typename BViewType::const_value_type& alpha, - const AViewType& A, const BViewType& B) { - trmm(typename AViewType::execution_space{}, side, uplo, trans, diag, alpha, A, - B); +void trmm(const char side[], const char uplo[], const char trans[], const char diag[], + typename BViewType::const_value_type& alpha, const AViewType& A, const BViewType& B) { + trmm(typename AViewType::execution_space{}, side, uplo, trans, diag, alpha, A, B); } } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas3_trsm.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas3_trsm.hpp index 890b2ff6aa8f..fd0123174ef4 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas3_trsm.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas3_trsm.hpp @@ -60,29 +60,19 @@ namespace KokkosBlas { /// On entry, M-by-N matrix of multile RHS /// On exit, overwritten with the solution X template -void trsm(const execution_space& space, const char side[], const char uplo[], - const char trans[], const char diag[], - typename BViewType::const_value_type& alpha, const AViewType& A, - const BViewType& B) { - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "BViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, - "AViewType must have rank 2."); - static_assert(static_cast(BViewType::rank) == 2, - "BViewType must have rank 2."); +void trsm(const execution_space& space, const char side[], const char uplo[], const char trans[], const char diag[], + typename BViewType::const_value_type& alpha, const AViewType& A, const BViewType& B) { + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "BViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); + static_assert(static_cast(BViewType::rank) == 2, "BViewType must have rank 2."); // Check validity of indicator argument - bool valid_side = (side[0] == 'L') || (side[0] == 'l') || (side[0] == 'R') || - (side[0] == 'r'); - bool valid_uplo = (uplo[0] == 'U') || (uplo[0] == 'u') || (uplo[0] == 'L') || - (uplo[0] == 'l'); - bool valid_trans = (trans[0] == 'N') || (trans[0] == 'n') || - (trans[0] == 'T') || (trans[0] == 't') || + bool valid_side = (side[0] == 'L') || (side[0] == 'l') || (side[0] == 'R') || (side[0] == 'r'); + bool valid_uplo = (uplo[0] == 'U') || (uplo[0] == 'u') || (uplo[0] == 'L') || (uplo[0] == 'l'); + bool valid_trans = (trans[0] == 'N') || (trans[0] == 'n') || (trans[0] == 'T') || (trans[0] == 't') || (trans[0] == 'C') || (trans[0] == 'c'); - bool valid_diag = (diag[0] == 'U') || (diag[0] == 'u') || (diag[0] == 'N') || - (diag[0] == 'n'); + bool valid_diag = (diag[0] == 'U') || (diag[0] == 'u') || (diag[0] == 'N') || (diag[0] == 'n'); if (!valid_side) { std::ostringstream os; os << "KokkosBlas::trsm: side = '" << side[0] << "'. " @@ -125,30 +115,23 @@ void trsm(const execution_space& space, const char side[], const char uplo[], if ((A0 != A1) || ((A_s ? B0 : B1) != A1)) { std::ostringstream os; os << "KokkosBlas::trsm: Dimensions of A and B do not match: " - << "side: " << side[0] << " A: " << A.extent(0) << " x " << A.extent(1) - << " B: " << B.extent(0) << " x " << B.extent(1); + << "side: " << side[0] << " A: " << A.extent(0) << " x " << A.extent(1) << " B: " << B.extent(0) << " x " + << B.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } // Return if degenerated matrices are provided - if ((A.extent(0) == 0) || (A.extent(1) == 0) || (B.extent(0) == 0) || - (B.extent(1) == 0)) - return; + if ((A.extent(0) == 0) || (A.extent(1) == 0) || (B.extent(0) == 0) || (B.extent(1) == 0)) return; // Minimize the number of Impl::TRSM instantiations, by // standardizing on particular View specializations for its template // parameters. - using AVT = Kokkos::View >; - using BVT = Kokkos::View >; + using AVT = Kokkos::View >; + using BVT = Kokkos::View >; - KokkosBlas::Impl::TRSM::trsm( - space, side, uplo, trans, diag, alpha, A, B); + KokkosBlas::Impl::TRSM::trsm(space, side, uplo, trans, diag, alpha, A, B); } /// \brief Solve triangular linear system with multiple RHSs: @@ -179,11 +162,9 @@ void trsm(const execution_space& space, const char side[], const char uplo[], /// On entry, M-by-N matrix of multile RHS /// On exit, overwritten with the solution X template -void trsm(const char side[], const char uplo[], const char trans[], - const char diag[], typename BViewType::const_value_type& alpha, - const AViewType& A, const BViewType& B) { - trsm(typename AViewType::execution_space{}, side, uplo, trans, diag, alpha, A, - B); +void trsm(const char side[], const char uplo[], const char trans[], const char diag[], + typename BViewType::const_value_type& alpha, const AViewType& A, const BViewType& B) { + trsm(typename AViewType::execution_space{}, side, uplo, trans, diag, alpha, A, B); } } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/src/KokkosBlas_trtri.hpp b/packages/kokkos-kernels/blas/src/KokkosBlas_trtri.hpp index d9771e3a16c5..34ca96b2d4cf 100644 --- a/packages/kokkos-kernels/blas/src/KokkosBlas_trtri.hpp +++ b/packages/kokkos-kernels/blas/src/KokkosBlas_trtri.hpp @@ -43,8 +43,7 @@ namespace KokkosBlas { // and the inversion could not be completed. // source: https://software.intel.com/en-us/mkl-developer-reference-c-trtri template -[[deprecated]] int trtri(const char uplo[], const char diag[], - const AViewType& A) { +[[deprecated]] int trtri(const char uplo[], const char diag[], const AViewType& A) { return KokkosLapack::trtri(uplo, diag, A); } diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_axpby_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_axpby_tpl_spec_avail.hpp index e2b04e300db6..1ed52d35b8cd 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_axpby_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_axpby_tpl_spec_avail.hpp @@ -20,8 +20,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct axpby_tpl_spec_avail { enum : bool { value = false }; }; @@ -34,54 +33,44 @@ namespace Impl { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -#define KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, MEMSPACE) \ - template \ - struct axpby_tpl_spec_avail< \ - ExecSpace, SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct axpby_tpl_spec_avail< \ + ExecSpace, SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) #endif // cuBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -#define KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, MEMSPACE) \ - template \ - struct axpby_tpl_spec_avail< \ - ExecSpace, SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct axpby_tpl_spec_avail< \ + ExecSpace, SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_AXPBY_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_axpby_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_axpby_tpl_spec_decl.hpp index 65154b998544..5ab29e632f7f 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_axpby_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_axpby_tpl_spec_decl.hpp @@ -27,8 +27,7 @@ inline void axpby_print_specialization() { printf( "KokkosBlas1::axpby<> TPL Blas specialization for < %s , %s , %s , %s " ">\n", - typeid(AV).name(), typeid(XMV).name(), typeid(BV).name(), - typeid(YMV).name()); + typeid(AV).name(), typeid(XMV).name(), typeid(BV).name(), typeid(YMV).name()); #endif } } // namespace @@ -40,158 +39,132 @@ inline void axpby_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DAXPBY_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Axpby< \ - ExecSpace, double, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - double, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef double AV; \ - typedef double BV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YV; \ - \ - static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, \ - const BV& beta, const YV& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::axpby[TPL_BLAS,double]"); \ - if ((X.extent(0) < INT_MAX) && (beta == 1.0)) { \ - axpby_print_specialization(); \ - int N = X.extent(0); \ - int one = 1; \ - HostBlas::axpy(N, alpha, X.data(), one, Y.data(), one); \ - } else \ - Axpby::axpby(space, alpha, X, beta, Y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DAXPBY_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Axpby< \ + ExecSpace, double, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + double, \ + Kokkos::View, Kokkos::MemoryTraits >, 1, \ + true, ETI_SPEC_AVAIL> { \ + typedef double AV; \ + typedef double BV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YV; \ + \ + static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, const BV& beta, const YV& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::axpby[TPL_BLAS,double]"); \ + if ((X.extent(0) < INT_MAX) && (beta == 1.0)) { \ + axpby_print_specialization(); \ + int N = X.extent(0); \ + int one = 1; \ + HostBlas::axpy(N, alpha, X.data(), one, Y.data(), one); \ + } else \ + Axpby::axpby(space, alpha, X, beta, Y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SAXPBY_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Axpby< \ - ExecSpace, float, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - float, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef float AV; \ - typedef float BV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YV; \ - \ - static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, \ - const BV& beta, const YV& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::axpby[TPL_BLAS,float]"); \ - if ((X.extent(0) < INT_MAX) && (beta == 1.0f)) { \ - axpby_print_specialization(); \ - int N = X.extent(0); \ - int one = 1; \ - HostBlas::axpy(N, alpha, X.data(), one, Y.data(), one); \ - } else \ - Axpby::axpby(space, alpha, X, beta, Y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SAXPBY_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Axpby< \ + ExecSpace, float, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + float, \ + Kokkos::View, Kokkos::MemoryTraits >, 1, \ + true, ETI_SPEC_AVAIL> { \ + typedef float AV; \ + typedef float BV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YV; \ + \ + static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, const BV& beta, const YV& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::axpby[TPL_BLAS,float]"); \ + if ((X.extent(0) < INT_MAX) && (beta == 1.0f)) { \ + axpby_print_specialization(); \ + int N = X.extent(0); \ + int one = 1; \ + HostBlas::axpy(N, alpha, X.data(), one, Y.data(), one); \ + } else \ + Axpby::axpby(space, alpha, X, beta, Y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZAXPBY_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Axpby, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::complex, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex AV; \ - typedef Kokkos::complex BV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - YV; \ - \ - static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, \ - const BV& beta, const YV& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::axpby[TPL_BLAS,complex]"); \ - if ((X.extent(0) < INT_MAX) && (beta == 1.0f)) { \ - axpby_print_specialization(); \ - int N = X.extent(0); \ - int one = 1; \ - const std::complex alpha_val = alpha; \ - HostBlas >::axpy( \ - N, alpha_val, \ - reinterpret_cast*>(X.data()), one, \ - reinterpret_cast*>(Y.data()), one); \ - } else \ - Axpby::axpby(space, alpha, X, beta, Y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZAXPBY_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Axpby, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::complex, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex AV; \ + typedef Kokkos::complex BV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > \ + YV; \ + \ + static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, const BV& beta, const YV& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::axpby[TPL_BLAS,complex]"); \ + if ((X.extent(0) < INT_MAX) && (beta == 1.0f)) { \ + axpby_print_specialization(); \ + int N = X.extent(0); \ + int one = 1; \ + const std::complex alpha_val = alpha; \ + HostBlas >::axpy(N, alpha_val, reinterpret_cast*>(X.data()), \ + one, reinterpret_cast*>(Y.data()), one); \ + } else \ + Axpby::axpby(space, alpha, X, beta, Y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CAXPBY_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Axpby, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::complex, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex AV; \ - typedef Kokkos::complex BV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - YV; \ - \ - static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, \ - const BV& beta, const YV& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::axpby[TPL_BLAS,complex]"); \ - if ((X.extent(0) < INT_MAX) && (beta == 1.0f)) { \ - axpby_print_specialization(); \ - int N = X.extent(0); \ - int one = 1; \ - const std::complex alpha_val = alpha; \ - HostBlas >::axpy( \ - N, alpha_val, \ - reinterpret_cast*>(X.data()), one, \ - reinterpret_cast*>(Y.data()), one); \ - } else \ - Axpby::axpby(space, alpha, X, beta, Y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CAXPBY_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Axpby, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::complex, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex AV; \ + typedef Kokkos::complex BV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > \ + YV; \ + \ + static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, const BV& beta, const YV& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::axpby[TPL_BLAS,complex]"); \ + if ((X.extent(0) < INT_MAX) && (beta == 1.0f)) { \ + axpby_print_specialization(); \ + int N = X.extent(0); \ + int one = 1; \ + const std::complex alpha_val = alpha; \ + HostBlas >::axpy(N, alpha_val, reinterpret_cast*>(X.data()), \ + one, reinterpret_cast*>(Y.data()), one); \ + } else \ + Axpby::axpby(space, alpha, X, beta, Y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSBLAS1_DAXPBY_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) @@ -222,186 +195,152 @@ KOKKOSBLAS1_CAXPBY_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DAXPBY_CUBLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Axpby< \ - ExecSpace, double, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - double, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef double AV; \ - typedef double BV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YV; \ - typedef typename XV::size_type size_type; \ - \ - static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, \ - const BV& beta, const YV& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::axpby[TPL_CUBLAS,double]"); \ - const size_type numElems = X.extent(0); \ - if ((numElems < static_cast(INT_MAX)) && (beta == 1.0)) { \ - axpby_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasDaxpy(s.handle, N, &alpha, X.data(), one, Y.data(), one)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else \ - Axpby::axpby(space, alpha, X, beta, Y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DAXPBY_CUBLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Axpby< \ + ExecSpace, double, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + double, \ + Kokkos::View, Kokkos::MemoryTraits >, 1, \ + true, ETI_SPEC_AVAIL> { \ + typedef double AV; \ + typedef double BV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YV; \ + typedef typename XV::size_type size_type; \ + \ + static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, const BV& beta, const YV& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::axpby[TPL_CUBLAS,double]"); \ + const size_type numElems = X.extent(0); \ + if ((numElems < static_cast(INT_MAX)) && (beta == 1.0)) { \ + axpby_print_specialization(); \ + const int N = static_cast(numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDaxpy(s.handle, N, &alpha, X.data(), one, Y.data(), one)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else \ + Axpby::axpby(space, alpha, X, beta, Y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SAXPBY_CUBLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Axpby< \ - ExecSpace, float, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - float, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef float AV; \ - typedef float BV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YV; \ - typedef typename XV::size_type size_type; \ - \ - static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, \ - const BV& beta, const YV& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::axpby[TPL_CUBLAS,float]"); \ - const size_type numElems = X.extent(0); \ - if ((numElems < static_cast(INT_MAX)) && (beta == 1.0f)) { \ - axpby_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSaxpy(s.handle, N, &alpha, X.data(), one, Y.data(), one)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else \ - Axpby::axpby(space, alpha, X, beta, Y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SAXPBY_CUBLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Axpby< \ + ExecSpace, float, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + float, \ + Kokkos::View, Kokkos::MemoryTraits >, 1, \ + true, ETI_SPEC_AVAIL> { \ + typedef float AV; \ + typedef float BV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YV; \ + typedef typename XV::size_type size_type; \ + \ + static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, const BV& beta, const YV& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::axpby[TPL_CUBLAS,float]"); \ + const size_type numElems = X.extent(0); \ + if ((numElems < static_cast(INT_MAX)) && (beta == 1.0f)) { \ + axpby_print_specialization(); \ + const int N = static_cast(numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSaxpy(s.handle, N, &alpha, X.data(), one, Y.data(), one)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else \ + Axpby::axpby(space, alpha, X, beta, Y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZAXPBY_CUBLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Axpby, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::complex, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex AV; \ - typedef Kokkos::complex BV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - YV; \ - typedef typename XV::size_type size_type; \ - \ - static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, \ - const BV& beta, const YV& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::axpby[TPL_CUBLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if ((numElems < static_cast(INT_MAX)) && (beta == 1.0f)) { \ - axpby_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZaxpy( \ - s.handle, N, reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else \ - Axpby::axpby(space, alpha, X, beta, Y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZAXPBY_CUBLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Axpby, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::complex, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex AV; \ + typedef Kokkos::complex BV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > \ + YV; \ + typedef typename XV::size_type size_type; \ + \ + static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, const BV& beta, const YV& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::axpby[TPL_CUBLAS,complex]"); \ + const size_type numElems = X.extent(0); \ + if ((numElems < static_cast(INT_MAX)) && (beta == 1.0f)) { \ + axpby_print_specialization(); \ + const int N = static_cast(numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZaxpy(s.handle, N, reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else \ + Axpby::axpby(space, alpha, X, beta, Y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CAXPBY_CUBLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Axpby, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::complex, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex AV; \ - typedef Kokkos::complex BV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - YV; \ - typedef typename XV::size_type size_type; \ - \ - static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, \ - const BV& beta, const YV& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::axpby[TPL_CUBLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if ((numElems < static_cast(INT_MAX)) && (beta == 1.0f)) { \ - axpby_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCaxpy( \ - s.handle, N, reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else \ - Axpby::axpby(space, alpha, X, beta, Y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CAXPBY_CUBLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Axpby, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::complex, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex AV; \ + typedef Kokkos::complex BV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > \ + YV; \ + typedef typename XV::size_type size_type; \ + \ + static void axpby(const ExecSpace& space, const AV& alpha, const XV& X, const BV& beta, const YV& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::axpby[TPL_CUBLAS,complex]"); \ + const size_type numElems = X.extent(0); \ + if ((numElems < static_cast(INT_MAX)) && (beta == 1.0f)) { \ + axpby_print_specialization(); \ + const int N = static_cast(numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCaxpy(s.handle, N, reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else \ + Axpby::axpby(space, alpha, X, beta, Y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSBLAS1_DAXPBY_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_dot_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_dot_tpl_spec_avail.hpp index 13cc2a6f92ae..8d5f1b939b1e 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_dot_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_dot_tpl_spec_avail.hpp @@ -20,8 +20,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct dot_tpl_spec_avail { enum : bool { value = false }; }; @@ -34,24 +33,20 @@ namespace Impl { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS // double -#define KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, MEMSPACE) \ - template \ - struct dot_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct dot_tpl_spec_avail >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, - Kokkos::HostSpace) +KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::HostSpace) // TODO: we met difficuties in FindTPLMKL.cmake to set the BLAS library properly // such that the test in CheckHostBlasReturnComplex.cmake could not be @@ -59,33 +54,28 @@ KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, // This resulted in segfault in dot() with MKL and complex. // So we just temporarily disable it until FindTPLMKL.cmake is fixed. #if !defined(KOKKOSKERNELS_ENABLE_TPL_MKL) -KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace) +KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) #endif #endif -#define KOKKOSBLAS1_DOT_TPL_SPEC(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ - template <> \ - struct dot_tpl_spec_avail< \ - EXECSPACE, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_DOT_TPL_SPEC(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct dot_tpl_spec_avail >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, 1> { \ + enum : bool { value = true }; \ }; -#define KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL(LAYOUT, EXECSPACE, MEMSPACE) \ - KOKKOSBLAS1_DOT_TPL_SPEC(float, LAYOUT, EXECSPACE, MEMSPACE) \ - KOKKOSBLAS1_DOT_TPL_SPEC(double, LAYOUT, EXECSPACE, MEMSPACE) \ - KOKKOSBLAS1_DOT_TPL_SPEC(Kokkos::complex, LAYOUT, EXECSPACE, \ - MEMSPACE) \ +#define KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL(LAYOUT, EXECSPACE, MEMSPACE) \ + KOKKOSBLAS1_DOT_TPL_SPEC(float, LAYOUT, EXECSPACE, MEMSPACE) \ + KOKKOSBLAS1_DOT_TPL_SPEC(double, LAYOUT, EXECSPACE, MEMSPACE) \ + KOKKOSBLAS1_DOT_TPL_SPEC(Kokkos::complex, LAYOUT, EXECSPACE, MEMSPACE) \ KOKKOSBLAS1_DOT_TPL_SPEC(Kokkos::complex, LAYOUT, EXECSPACE, MEMSPACE) #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS @@ -100,13 +90,11 @@ KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::Cuda, #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS -KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) +KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) #endif #if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && defined(KOKKOS_ENABLE_SYCL) -KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, - Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace) #endif } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_dot_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_dot_tpl_spec_decl.hpp index 247957b2c875..fa9d5fafce93 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_dot_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_dot_tpl_spec_decl.hpp @@ -24,8 +24,8 @@ namespace { template inline void dot_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION - printf("KokkosBlas1::dot<> TPL Blas specialization for < %s , %s , %s >\n", - typeid(RV).name(), typeid(XV).name(), typeid(YV).name()); + printf("KokkosBlas1::dot<> TPL Blas specialization for < %s , %s , %s >\n", typeid(RV).name(), typeid(XV).name(), + typeid(YV).name()); #endif } } // namespace @@ -39,59 +39,44 @@ inline void dot_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS(LAYOUT, KOKKOS_TYPE, TPL_TYPE, \ - MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Dot >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void dot(const ExecSpace& space, RV& R, const XV& X, const XV& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::dot[TPL_BLAS," + \ - Kokkos::ArithTraits::name() + \ - "]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - dot_print_specialization(); \ - int N = numElems; \ - int one = 1; \ - R() = HostBlas::dot( \ - N, reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one); \ - } else { \ - Dot::dot(space, R, \ - X, Y); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS(LAYOUT, KOKKOS_TYPE, TPL_TYPE, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Dot >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void dot(const ExecSpace& space, RV& R, const XV& X, const XV& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::dot[TPL_BLAS," + Kokkos::ArithTraits::name() + "]"); \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast(INT_MAX)) { \ + dot_print_specialization(); \ + int N = numElems; \ + int one = 1; \ + R() = HostBlas::dot(N, reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one); \ + } else { \ + Dot::dot(space, R, X, Y); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS_EXT(ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, float, float, \ - Kokkos::HostSpace, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, double, double, \ - Kokkos::HostSpace, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS( \ - Kokkos::LayoutLeft, Kokkos::complex, std::complex, \ - Kokkos::HostSpace, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS( \ - Kokkos::LayoutLeft, Kokkos::complex, std::complex, \ - Kokkos::HostSpace, ETI_SPEC_AVAIL) +#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS_EXT(ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, float, float, Kokkos::HostSpace, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, double, double, Kokkos::HostSpace, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::complex, std::complex, \ + Kokkos::HostSpace, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::complex, std::complex, \ + Kokkos::HostSpace, ETI_SPEC_AVAIL) KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS_EXT(true) KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS_EXT(false) @@ -108,69 +93,51 @@ KOKKOSBLAS1_DOT_TPL_SPEC_DECL_BLAS_EXT(false) namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS(LAYOUT, KOKKOS_TYPE, TPL_TYPE, \ - EXECSPACE, MEMSPACE, TPL_DOT, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Dot >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void dot(const EXECSPACE& space, RV& R, const XV& X, const XV& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::dot[TPL_CUBLAS," + \ - Kokkos::ArithTraits::name() + \ - "]"); \ - const size_type numElems = X.extent(0); \ - /* TODO: CUDA-12's 64-bit indices allow larger numElems */ \ - if (numElems <= \ - static_cast(std::numeric_limits::max())) { \ - dot_print_specialization(); \ - const int N = static_cast(numElems); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - TPL_DOT(s.handle, N, reinterpret_cast(X.data()), \ - 1, reinterpret_cast(Y.data()), 1, \ - reinterpret_cast(&R()))); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - Dot::dot(space, R, \ - X, Y); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS(LAYOUT, KOKKOS_TYPE, TPL_TYPE, EXECSPACE, MEMSPACE, TPL_DOT, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Dot >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void dot(const EXECSPACE& space, RV& R, const XV& X, const XV& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::dot[TPL_CUBLAS," + Kokkos::ArithTraits::name() + "]"); \ + const size_type numElems = X.extent(0); \ + /* TODO: CUDA-12's 64-bit indices allow larger numElems */ \ + if (numElems <= static_cast(std::numeric_limits::max())) { \ + dot_print_specialization(); \ + const int N = static_cast(numElems); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(TPL_DOT(s.handle, N, reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1, \ + reinterpret_cast(&R()))); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else { \ + Dot::dot(space, R, X, Y); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS_EXT(ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, float, float, \ - Kokkos::Cuda, Kokkos::CudaSpace, \ - cublasSdot, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, double, double, \ - Kokkos::Cuda, Kokkos::CudaSpace, \ - cublasDdot, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS( \ - Kokkos::LayoutLeft, Kokkos::complex, cuComplex, Kokkos::Cuda, \ - Kokkos::CudaSpace, cublasCdotc, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS( \ - Kokkos::LayoutLeft, Kokkos::complex, cuDoubleComplex, \ - Kokkos::Cuda, Kokkos::CudaSpace, cublasZdotc, ETI_SPEC_AVAIL) +#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS_EXT(ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, float, float, Kokkos::Cuda, Kokkos::CudaSpace, cublasSdot, \ + ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, double, double, Kokkos::Cuda, Kokkos::CudaSpace, \ + cublasDdot, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::complex, cuComplex, Kokkos::Cuda, \ + Kokkos::CudaSpace, cublasCdotc, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::complex, cuDoubleComplex, Kokkos::Cuda, \ + Kokkos::CudaSpace, cublasZdotc, ETI_SPEC_AVAIL) KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS_EXT(true) KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS_EXT(false) @@ -185,68 +152,50 @@ KOKKOSBLAS1_DOT_TPL_SPEC_DECL_CUBLAS_EXT(false) namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS(LAYOUT, KOKKOS_TYPE, TPL_TYPE, \ - EXECSPACE, MEMSPACE, TPL_DOT, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Dot >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void dot(const EXECSPACE& space, RV& R, const XV& X, const XV& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::dot[TPL_ROCBLAS," + \ - Kokkos::ArithTraits::name() + \ - "]"); \ - const size_type numElems = X.extent(0); \ - if (numElems <= \ - static_cast(std::numeric_limits::max())) { \ - dot_print_specialization(); \ - const rocblas_int N = static_cast(numElems); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - TPL_DOT(s.handle, N, reinterpret_cast(X.data()), \ - 1, reinterpret_cast(Y.data()), 1, \ - reinterpret_cast(&R()))); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - Dot::dot(space, R, \ - X, Y); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS(LAYOUT, KOKKOS_TYPE, TPL_TYPE, EXECSPACE, MEMSPACE, TPL_DOT, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Dot >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void dot(const EXECSPACE& space, RV& R, const XV& X, const XV& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::dot[TPL_ROCBLAS," + Kokkos::ArithTraits::name() + "]"); \ + const size_type numElems = X.extent(0); \ + if (numElems <= static_cast(std::numeric_limits::max())) { \ + dot_print_specialization(); \ + const rocblas_int N = static_cast(numElems); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(TPL_DOT(s.handle, N, reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1, \ + reinterpret_cast(&R()))); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + } else { \ + Dot::dot(space, R, X, Y); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS_EXT(ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, float, float, \ - Kokkos::HIP, Kokkos::HIPSpace, \ - rocblas_sdot, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, double, double, \ - Kokkos::HIP, Kokkos::HIPSpace, \ - rocblas_ddot, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS( \ - Kokkos::LayoutLeft, Kokkos::complex, rocblas_float_complex, \ - Kokkos::HIP, Kokkos::HIPSpace, rocblas_cdotc, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS( \ - Kokkos::LayoutLeft, Kokkos::complex, rocblas_double_complex, \ - Kokkos::HIP, Kokkos::HIPSpace, rocblas_zdotc, ETI_SPEC_AVAIL) +#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS_EXT(ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, float, float, Kokkos::HIP, Kokkos::HIPSpace, rocblas_sdot, \ + ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, double, double, Kokkos::HIP, Kokkos::HIPSpace, \ + rocblas_ddot, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::complex, rocblas_float_complex, \ + Kokkos::HIP, Kokkos::HIPSpace, rocblas_cdotc, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::complex, rocblas_double_complex, \ + Kokkos::HIP, Kokkos::HIPSpace, rocblas_zdotc, ETI_SPEC_AVAIL) KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS_EXT(true) KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS_EXT(false) @@ -262,67 +211,50 @@ KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ROCBLAS_EXT(false) namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ONEMKL(LAYOUT, KOKKOS_TYPE, TPL_TYPE, \ - EXECSPACE, MEMSPACE, TPL_DOT, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Dot >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void dot(const EXECSPACE& exec, RV& R, const XV& X, const XV& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::dot[TPL_ONEMKL," + \ - Kokkos::ArithTraits::name() + \ - "]"); \ - const size_type numElems = X.extent(0); \ - if (numElems <= \ - static_cast(std::numeric_limits::max())) { \ - dot_print_specialization(); \ - const std::int64_t N = static_cast(numElems); \ - TPL_DOT(exec.sycl_queue(), N, \ - reinterpret_cast(X.data()), 1, \ - reinterpret_cast(Y.data()), 1, \ - reinterpret_cast(&R())); \ - } else { \ - Dot::dot(exec, R, \ - X, Y); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ONEMKL(LAYOUT, KOKKOS_TYPE, TPL_TYPE, EXECSPACE, MEMSPACE, TPL_DOT, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Dot >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void dot(const EXECSPACE& exec, RV& R, const XV& X, const XV& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::dot[TPL_ONEMKL," + Kokkos::ArithTraits::name() + "]"); \ + const size_type numElems = X.extent(0); \ + if (numElems <= static_cast(std::numeric_limits::max())) { \ + dot_print_specialization(); \ + const std::int64_t N = static_cast(numElems); \ + TPL_DOT(exec.sycl_queue(), N, reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1, reinterpret_cast(&R())); \ + } else { \ + Dot::dot(exec, R, X, Y); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ONEMKL_EXT(ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ONEMKL( \ - Kokkos::LayoutLeft, float, float, Kokkos::Experimental::SYCL, \ - Kokkos::Experimental::SYCLDeviceUSMSpace, \ - oneapi::mkl::blas::row_major::dot, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ONEMKL( \ - Kokkos::LayoutLeft, double, double, Kokkos::Experimental::SYCL, \ - Kokkos::Experimental::SYCLDeviceUSMSpace, \ - oneapi::mkl::blas::row_major::dot, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ONEMKL( \ - Kokkos::LayoutLeft, Kokkos::complex, std::complex, \ - Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace, \ - oneapi::mkl::blas::row_major::dotc, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ONEMKL( \ - Kokkos::LayoutLeft, Kokkos::complex, std::complex, \ - Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace, \ - oneapi::mkl::blas::row_major::dotc, ETI_SPEC_AVAIL) +#define KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ONEMKL_EXT(ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ONEMKL(Kokkos::LayoutLeft, float, float, Kokkos::Experimental::SYCL, \ + Kokkos::Experimental::SYCLDeviceUSMSpace, oneapi::mkl::blas::row_major::dot, \ + ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ONEMKL(Kokkos::LayoutLeft, double, double, Kokkos::Experimental::SYCL, \ + Kokkos::Experimental::SYCLDeviceUSMSpace, oneapi::mkl::blas::row_major::dot, \ + ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ONEMKL(Kokkos::LayoutLeft, Kokkos::complex, std::complex, \ + Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace, \ + oneapi::mkl::blas::row_major::dotc, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ONEMKL(Kokkos::LayoutLeft, Kokkos::complex, std::complex, \ + Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace, \ + oneapi::mkl::blas::row_major::dotc, ETI_SPEC_AVAIL) KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ONEMKL_EXT(true) KOKKOSBLAS1_DOT_TPL_SPEC_DECL_ONEMKL_EXT(false) diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_iamax_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_iamax_tpl_spec_avail.hpp index 616c26c87a20..36a5e5171f28 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_iamax_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_iamax_tpl_spec_avail.hpp @@ -33,145 +33,96 @@ namespace Impl { // Generic Host side BLAS (could be MKL or whatever) #if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) // double -#define KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_BLAS(INDEX_TYPE, SCALAR, LAYOUT, \ - MEMSPACE) \ - template \ - struct iamax_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_BLAS(INDEX_TYPE, SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct iamax_tpl_spec_avail< \ + ExecSpace, Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_BLAS(unsigned long, double, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_BLAS(unsigned long, float, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_BLAS(unsigned long, Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_BLAS(unsigned long, Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_BLAS(unsigned long, double, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_BLAS(unsigned long, float, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_BLAS(unsigned long, Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_BLAS(unsigned long, Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) #endif // cuBLAS #if defined(KOKKOSKERNELS_ENABLE_TPL_CUBLAS) // double -#define KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(INDEX_TYPE, SCALAR, LAYOUT, \ - MEMSPACE) \ - template <> \ - struct iamax_tpl_spec_avail< \ - Kokkos::Cuda, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ - }; \ - template <> \ - struct iamax_tpl_spec_avail< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(INDEX_TYPE, SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct iamax_tpl_spec_avail< \ + Kokkos::Cuda, Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ + }; \ + template <> \ + struct iamax_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, double, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, double, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, float, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, float, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, double, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, double, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, float, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, float, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, Kokkos::complex, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, Kokkos::complex, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, Kokkos::complex, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, Kokkos::complex, - Kokkos::LayoutLeft, +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned long, Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_CUBLAS(unsigned int, Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) #endif // rocBLAS #if defined(KOKKOSKERNELS_ENABLE_TPL_ROCBLAS) -#define KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(INDEX_TYPE, SCALAR, LAYOUT, \ - MEMSPACE) \ - template <> \ - struct iamax_tpl_spec_avail< \ - Kokkos::HIP, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ - }; \ - template <> \ - struct iamax_tpl_spec_avail< \ - Kokkos::HIP, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(INDEX_TYPE, SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct iamax_tpl_spec_avail< \ + Kokkos::HIP, Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ + }; \ + template <> \ + struct iamax_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned long, double, - Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned int, double, - Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned long, float, - Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned int, float, - Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned long, Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned int, Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned long, Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned int, Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned long, double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned int, double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned long, float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned int, float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned long, Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned int, Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned long, Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_IAMAX_TPL_SPEC_AVAIL_ROCBLAS(unsigned int, Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) #endif diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_iamax_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_iamax_tpl_spec_decl.hpp index 913ec5a15145..c85de4d18638 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_iamax_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_iamax_tpl_spec_decl.hpp @@ -23,15 +23,12 @@ template inline void iamax_print_specialization() { #if defined(KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION) #if defined(KOKKOSKERNELS_ENABLE_TPL_CUBLAS) - printf("KokkosBlas1::iamax<> TPL cuBLAS specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XV).name()); + printf("KokkosBlas1::iamax<> TPL cuBLAS specialization for < %s , %s >\n", typeid(RV).name(), typeid(XV).name()); #elif defined(KOKKOSKERNELS_ENABLE_TPL_ROCBLAS) - printf("KokkosBlas1::iamax<> TPL rocBLAS specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XV).name()); + printf("KokkosBlas1::iamax<> TPL rocBLAS specialization for < %s , %s >\n", typeid(RV).name(), typeid(XV).name()); #else #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS - printf("KokkosBlas1::iamax<> TPL Blas specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XV).name()); + printf("KokkosBlas1::iamax<> TPL Blas specialization for < %s , %s >\n", typeid(RV).name(), typeid(XV).name()); #endif #endif #endif @@ -46,90 +43,63 @@ inline void iamax_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_BLAS( \ - SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Iamax >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void iamax(const ExecSpace& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::iamax[TPL_BLAS," #SCALAR_TYPE \ - "]"); \ - const size_type numElems = X.extent(0); \ - if (numElems == 0) { \ - R() = 0; \ - return; \ - } \ - if (numElems < static_cast(INT_MAX)) { \ - iamax_print_specialization(); \ - int N = static_cast(numElems); \ - const int XST = X.stride(0); \ - const int LDX = (XST == 0) ? 1 : XST; \ - int idx = HostBlas::iamax( \ - N, reinterpret_cast(X.data()), LDX); \ - R() = static_cast(idx); \ - } else { \ - Iamax::iamax(space, R, \ - X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_BLAS(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Iamax >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void iamax(const ExecSpace& space, RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::iamax[TPL_BLAS," #SCALAR_TYPE "]"); \ + const size_type numElems = X.extent(0); \ + if (numElems == 0) { \ + R() = 0; \ + return; \ + } \ + if (numElems < static_cast(INT_MAX)) { \ + iamax_print_specialization(); \ + int N = static_cast(numElems); \ + const int XST = X.stride(0); \ + const int LDX = (XST == 0) ? 1 : XST; \ + int idx = HostBlas::iamax(N, reinterpret_cast(X.data()), LDX); \ + R() = static_cast(idx); \ + } else { \ + Iamax::iamax(space, R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_BLAS(double, double, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) - -#define KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_BLAS(float, float, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) - -#define KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::complex, \ - std::complex, LAYOUT, \ - MEMSPACE, ETI_SPEC_AVAIL) - -#define KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::complex, \ - std::complex, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) - -KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) - -KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) - -KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) - -KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) +#define KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_BLAS(double, double, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_BLAS(float, float, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::complex, std::complex, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::complex, std::complex, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) + +KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) + +KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) + +KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) + +KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) } // namespace Impl } // namespace KokkosBlas @@ -145,227 +115,155 @@ namespace Impl { using CUBLAS_DEVICE_TYPE = Kokkos::Device; #if defined(KOKKOS_ENABLE_CUDA_UVM) -using CUBLASUVM_DEVICE_TYPE = - Kokkos::Device; +using CUBLASUVM_DEVICE_TYPE = Kokkos::Device; #endif -#define KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_WRAPPER( \ - SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, INDEX_TYPE, LAYOUT, EXEC_SPACE, \ - MEMSPACE, ETI_SPEC_AVAIL, RET_DEVICE_TYPE, CUBLAS_PTR_MODE_1, \ - CUBLAS_PTR_MODE_2) \ - template <> \ - struct Iamax >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void iamax(const EXEC_SPACE& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::iamax[TPL_CUBLAS," #SCALAR_TYPE "]"); \ - const size_type numElems = X.extent(0); \ - if (numElems == 0) { \ - Kokkos::deep_copy(R, 0); \ - return; \ - } \ - if (numElems < static_cast(INT_MAX)) { \ - iamax_print_specialization(); \ - const int N = static_cast(numElems); \ - const int XST = X.stride(0); \ - const int LDX = (XST == 0) ? 1 : XST; \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - cublasPointerMode_t prevPtrMode; \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasGetPointerMode(s.handle, &prevPtrMode)); \ - if (prevPtrMode == CUBLAS_PTR_MODE_2) { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(s.handle, CUBLAS_PTR_MODE_1)); \ - } \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(CUBLAS_FN( \ - s.handle, N, reinterpret_cast(X.data()), \ - LDX, reinterpret_cast(R.data()))); \ - if (prevPtrMode == CUBLAS_PTR_MODE_2) { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(s.handle, CUBLAS_PTR_MODE_2)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } \ - } else { \ - Iamax::iamax(space, R, \ - X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_WRAPPER(SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, INDEX_TYPE, LAYOUT, \ + EXEC_SPACE, MEMSPACE, ETI_SPEC_AVAIL, RET_DEVICE_TYPE, \ + CUBLAS_PTR_MODE_1, CUBLAS_PTR_MODE_2) \ + template <> \ + struct Iamax >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void iamax(const EXEC_SPACE& space, RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::iamax[TPL_CUBLAS," #SCALAR_TYPE "]"); \ + const size_type numElems = X.extent(0); \ + if (numElems == 0) { \ + Kokkos::deep_copy(R, 0); \ + return; \ + } \ + if (numElems < static_cast(INT_MAX)) { \ + iamax_print_specialization(); \ + const int N = static_cast(numElems); \ + const int XST = X.stride(0); \ + const int LDX = (XST == 0) ? 1 : XST; \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + cublasPointerMode_t prevPtrMode; \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasGetPointerMode(s.handle, &prevPtrMode)); \ + if (prevPtrMode == CUBLAS_PTR_MODE_2) { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(s.handle, CUBLAS_PTR_MODE_1)); \ + } \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(CUBLAS_FN(s.handle, N, reinterpret_cast(X.data()), LDX, \ + reinterpret_cast(R.data()))); \ + if (prevPtrMode == CUBLAS_PTR_MODE_2) { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(s.handle, CUBLAS_PTR_MODE_2)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } \ + } else { \ + Iamax::iamax(space, R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS(SCALAR_TYPE, CUDA_SCALAR_TYPE, \ - CUBLAS_FN, INDEX_TYPE, LAYOUT, \ - MEMSPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_WRAPPER( \ - SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, INDEX_TYPE, LAYOUT, \ - Kokkos::Cuda, MEMSPACE, ETI_SPEC_AVAIL, Kokkos::HostSpace, \ - CUBLAS_POINTER_MODE_HOST, CUBLAS_POINTER_MODE_DEVICE) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_WRAPPER( \ - SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, INDEX_TYPE, LAYOUT, \ - Kokkos::Cuda, MEMSPACE, ETI_SPEC_AVAIL, CUBLAS_DEVICE_TYPE, \ - CUBLAS_POINTER_MODE_DEVICE, CUBLAS_POINTER_MODE_HOST) +#define KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS(SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, INDEX_TYPE, LAYOUT, \ + MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_WRAPPER(SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, INDEX_TYPE, LAYOUT, \ + Kokkos::Cuda, MEMSPACE, ETI_SPEC_AVAIL, Kokkos::HostSpace, \ + CUBLAS_POINTER_MODE_HOST, CUBLAS_POINTER_MODE_DEVICE) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_WRAPPER(SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, INDEX_TYPE, LAYOUT, \ + Kokkos::Cuda, MEMSPACE, ETI_SPEC_AVAIL, CUBLAS_DEVICE_TYPE, \ + CUBLAS_POINTER_MODE_DEVICE, CUBLAS_POINTER_MODE_HOST) #if defined(KOKKOS_ENABLE_CUDA_UVM) -#define KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_UVM( \ - SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, INDEX_TYPE, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_WRAPPER( \ - SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, INDEX_TYPE, LAYOUT, \ - Kokkos::Cuda, MEMSPACE, ETI_SPEC_AVAIL, Kokkos::HostSpace, \ - CUBLAS_POINTER_MODE_HOST, CUBLAS_POINTER_MODE_DEVICE) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_WRAPPER( \ - SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, INDEX_TYPE, LAYOUT, \ - Kokkos::Cuda, MEMSPACE, ETI_SPEC_AVAIL, CUBLASUVM_DEVICE_TYPE, \ - CUBLAS_POINTER_MODE_DEVICE, CUBLAS_POINTER_MODE_HOST) +#define KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, INDEX_TYPE, LAYOUT, \ + MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_WRAPPER(SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, INDEX_TYPE, LAYOUT, \ + Kokkos::Cuda, MEMSPACE, ETI_SPEC_AVAIL, Kokkos::HostSpace, \ + CUBLAS_POINTER_MODE_HOST, CUBLAS_POINTER_MODE_DEVICE) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_WRAPPER(SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, INDEX_TYPE, LAYOUT, \ + Kokkos::Cuda, MEMSPACE, ETI_SPEC_AVAIL, CUBLASUVM_DEVICE_TYPE, \ + CUBLAS_POINTER_MODE_DEVICE, CUBLAS_POINTER_MODE_HOST) #endif -#define KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS(double, double, cublasIdamax, \ - INDEX_TYPE, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) - -#define KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS(float, float, cublasIsamax, \ - INDEX_TYPE, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) - -#define KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS( \ - Kokkos::complex, cuDoubleComplex, cublasIzamax, INDEX_TYPE, \ - LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) - -#define KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, cuComplex, \ - cublasIcamax, INDEX_TYPE, LAYOUT, \ +#define KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS(double, double, cublasIdamax, INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS(float, float, cublasIsamax, INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, cuDoubleComplex, cublasIzamax, INDEX_TYPE, LAYOUT, \ + MEMSPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, cuComplex, cublasIcamax, INDEX_TYPE, LAYOUT, \ MEMSPACE, ETI_SPEC_AVAIL) #if defined(KOKKOS_ENABLE_CUDA_UVM) -#define KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(INDEX_TYPE, LAYOUT, \ - MEMSPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(double, double, cublasIdamax, \ - INDEX_TYPE, LAYOUT, MEMSPACE, \ +#define KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(double, double, cublasIdamax, INDEX_TYPE, LAYOUT, MEMSPACE, \ ETI_SPEC_AVAIL) -#define KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(INDEX_TYPE, LAYOUT, \ - MEMSPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(float, float, cublasIsamax, \ - INDEX_TYPE, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) +#define KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(float, float, cublasIsamax, INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(Kokkos::complex, cuDoubleComplex, cublasIzamax, INDEX_TYPE, \ + LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) -#define KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(INDEX_TYPE, LAYOUT, \ - MEMSPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_UVM( \ - Kokkos::complex, cuDoubleComplex, cublasIzamax, INDEX_TYPE, \ - LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) - -#define KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(INDEX_TYPE, LAYOUT, \ - MEMSPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_UVM( \ - Kokkos::complex, cuComplex, cublasIcamax, INDEX_TYPE, LAYOUT, \ - MEMSPACE, ETI_SPEC_AVAIL) +#define KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(Kokkos::complex, cuComplex, cublasIcamax, INDEX_TYPE, LAYOUT, \ + MEMSPACE, ETI_SPEC_AVAIL) #endif -KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) - -KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) - -KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) - -KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) - -KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) - -KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) - -KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) - -KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) +KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) #if defined(KOKKOS_ENABLE_CUDA_UVM) -KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned long, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_CUBLAS_UVM(unsigned int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) #endif } // namespace Impl @@ -382,144 +280,100 @@ namespace Impl { using ROCBLAS_DEVICE_TYPE = Kokkos::Device; -#define KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS_WRAPPER( \ - SCALAR_TYPE, ROCBLAS_SCALAR_TYPE, ROCBLAS_FN, INDEX_TYPE, LAYOUT, \ - MEMSPACE, ETI_SPEC_AVAIL, RET_DEVICE_TYPE, ROCBLAS_PTR_MODE_1, \ - ROCBLAS_PTR_MODE_2) \ - template <> \ - struct Iamax >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - using execution_space = Kokkos::HIP; \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void iamax(const execution_space& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::iamax[TPL_ROCBLAS," #SCALAR_TYPE "]"); \ - const size_type numElems = X.extent(0); \ - if (numElems == 0) { \ - Kokkos::deep_copy(R, 0); \ - return; \ - } \ - if (numElems < static_cast(INT_MAX)) { \ - iamax_print_specialization(); \ - const int N = static_cast(numElems); \ - const int XST = X.stride(0); \ - const int LDX = (XST == 0) ? 1 : XST; \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - rocblas_pointer_mode prevPtrMode; \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_get_pointer_mode(s.handle, &prevPtrMode)); \ - if (prevPtrMode == ROCBLAS_PTR_MODE_2) { \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, ROCBLAS_PTR_MODE_1)); \ - } \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - ROCBLAS_FN(s.handle, N, \ - reinterpret_cast(X.data()), \ - LDX, reinterpret_cast(R.data()))); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - if (prevPtrMode == ROCBLAS_PTR_MODE_2) { \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, ROCBLAS_PTR_MODE_2)); \ - } \ - } else { \ - Iamax::iamax(space, \ - R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS_WRAPPER(SCALAR_TYPE, ROCBLAS_SCALAR_TYPE, ROCBLAS_FN, INDEX_TYPE, \ + LAYOUT, MEMSPACE, ETI_SPEC_AVAIL, RET_DEVICE_TYPE, \ + ROCBLAS_PTR_MODE_1, ROCBLAS_PTR_MODE_2) \ + template <> \ + struct Iamax >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + using execution_space = Kokkos::HIP; \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void iamax(const execution_space& space, RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::iamax[TPL_ROCBLAS," #SCALAR_TYPE "]"); \ + const size_type numElems = X.extent(0); \ + if (numElems == 0) { \ + Kokkos::deep_copy(R, 0); \ + return; \ + } \ + if (numElems < static_cast(INT_MAX)) { \ + iamax_print_specialization(); \ + const int N = static_cast(numElems); \ + const int XST = X.stride(0); \ + const int LDX = (XST == 0) ? 1 : XST; \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + rocblas_pointer_mode prevPtrMode; \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_get_pointer_mode(s.handle, &prevPtrMode)); \ + if (prevPtrMode == ROCBLAS_PTR_MODE_2) { \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, ROCBLAS_PTR_MODE_1)); \ + } \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(ROCBLAS_FN(s.handle, N, reinterpret_cast(X.data()), \ + LDX, reinterpret_cast(R.data()))); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + if (prevPtrMode == ROCBLAS_PTR_MODE_2) { \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, ROCBLAS_PTR_MODE_2)); \ + } \ + } else { \ + Iamax::iamax(space, R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS( \ - SCALAR_TYPE, ROCBLAS_SCALAR_TYPE, ROCBLAS_FN, INDEX_TYPE, LAYOUT, \ - MEMSPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS_WRAPPER( \ - SCALAR_TYPE, ROCBLAS_SCALAR_TYPE, ROCBLAS_FN, INDEX_TYPE, LAYOUT, \ - MEMSPACE, ETI_SPEC_AVAIL, Kokkos::HostSpace, rocblas_pointer_mode_host, \ - rocblas_pointer_mode_device) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS_WRAPPER( \ - SCALAR_TYPE, ROCBLAS_SCALAR_TYPE, ROCBLAS_FN, INDEX_TYPE, LAYOUT, \ - MEMSPACE, ETI_SPEC_AVAIL, ROCBLAS_DEVICE_TYPE, \ - rocblas_pointer_mode_device, rocblas_pointer_mode_host) - -#define KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_ROCBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS(double, double, rocblas_idamax, \ - INDEX_TYPE, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) - -#define KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_ROCBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS(float, float, rocblas_isamax, \ - INDEX_TYPE, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) - -#define KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_ROCBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS( \ - Kokkos::complex, rocblas_double_complex, rocblas_izamax, \ - INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) - -#define KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_ROCBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS( \ - Kokkos::complex, rocblas_float_complex, rocblas_icamax, \ - INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) - -KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, - Kokkos::HIPSpace, false) +#define KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS(SCALAR_TYPE, ROCBLAS_SCALAR_TYPE, ROCBLAS_FN, INDEX_TYPE, LAYOUT, \ + MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS_WRAPPER(SCALAR_TYPE, ROCBLAS_SCALAR_TYPE, ROCBLAS_FN, INDEX_TYPE, LAYOUT, \ + MEMSPACE, ETI_SPEC_AVAIL, Kokkos::HostSpace, \ + rocblas_pointer_mode_host, rocblas_pointer_mode_device) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS_WRAPPER(SCALAR_TYPE, ROCBLAS_SCALAR_TYPE, ROCBLAS_FN, INDEX_TYPE, LAYOUT, \ + MEMSPACE, ETI_SPEC_AVAIL, ROCBLAS_DEVICE_TYPE, \ + rocblas_pointer_mode_device, rocblas_pointer_mode_host) + +#define KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_ROCBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS(double, double, rocblas_idamax, INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_ROCBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS(float, float, rocblas_isamax, INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_ROCBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, rocblas_double_complex, rocblas_izamax, \ + INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_ROCBLAS(INDEX_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XIAMAX_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, rocblas_float_complex, rocblas_icamax, INDEX_TYPE, \ + LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) + +KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::HIPSpace, true) +KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::HIPSpace, true) +KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::HIPSpace, true) +KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::HIPSpace, true) +KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned long, Kokkos::LayoutLeft, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::HIPSpace, true) +KOKKOSBLAS1_DIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::HIPSpace, true) +KOKKOSBLAS1_SIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::HIPSpace, true) +KOKKOSBLAS1_ZIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::HIPSpace, true) +KOKKOSBLAS1_CIAMAX_TPL_SPEC_DECL_ROCBLAS(unsigned int, Kokkos::LayoutLeft, Kokkos::HIPSpace, false) } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_mult_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_mult_tpl_spec_avail.hpp index 8d3fc0f4d2e8..3924e0da21df 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_mult_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_mult_tpl_spec_avail.hpp @@ -20,8 +20,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct mult_tpl_spec_avail { enum : bool { value = false }; }; diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp index be0a45c7be61..37876d0129e0 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp @@ -33,116 +33,87 @@ namespace Impl { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS // double -#define KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, MEMSPACE) \ - template \ - struct nrm1_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View< \ - typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct nrm1_tpl_spec_avail::mag_type, LAYOUT, \ + Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) #endif // cuBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS // double -#define KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, MEMSPACE) \ - template \ - struct nrm1_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View< \ - typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct nrm1_tpl_spec_avail::mag_type, LAYOUT, \ + Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) #endif // rocBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS -#define KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, MEMSPACE) \ - template \ - struct nrm1_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View< \ - typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct nrm1_tpl_spec_avail::mag_type, LAYOUT, \ + Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, - Kokkos::HIPSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, - Kokkos::HIPSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) #endif // KOKKOSKERNELS_ENABLE_TPL_ROCBLAS // oneMKL #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -#if defined(KOKKOS_ENABLE_SYCL) && \ - !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) - -#define KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL(SCALAR, LAYOUT, MEMSPACE) \ - template \ - struct nrm1_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View< \ - typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) + +#define KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct nrm1_tpl_spec_avail::mag_type, LAYOUT, \ + Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL( - double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL( - float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL( - Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCLDeviceUSMSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL( - Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Experimental::SYCLDeviceUSMSpace) #endif // KOKKOS_ENABLE_SYCL #endif // KOKKOSKERNELS_ENABLE_TPL_MKL diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp index c695eaee1e33..1bf740b3fbc4 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp @@ -24,8 +24,7 @@ namespace { template inline void nrm1_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION - printf("KokkosBlas1::nrm1<> TPL Blas specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XV).name()); + printf("KokkosBlas1::nrm1<> TPL Blas specialization for < %s , %s >\n", typeid(RV).name(), typeid(XV).name()); #endif } } // namespace @@ -39,87 +38,64 @@ inline void nrm1_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(SCALAR, LAYOUT, EXECSPACE, \ - MEMSPACE) \ - template <> \ - struct Nrm1< \ - EXECSPACE, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::HostSpace, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 1, true, \ - nrm1_eti_spec_avail< \ - EXECSPACE, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::HostSpace, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using mag_type = typename Kokkos::ArithTraits::mag_type; \ - using RV = Kokkos::View>; \ - using XV = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using size_type = typename XV::size_type; \ - \ - static void nrm1(const EXECSPACE& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_BLAS," #SCALAR "]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - int N = numElems; \ - int one = 1; \ - if constexpr (Kokkos::ArithTraits::is_complex) { \ - R() = HostBlas>::asum( \ - N, reinterpret_cast*>(X.data()), \ - one); \ - } else { \ - R() = HostBlas::asum(N, X.data(), one); \ - } \ - } else { \ - Nrm1::value>::nrm1(space, R, \ - X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct Nrm1::mag_type, LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 1, true, \ + nrm1_eti_spec_avail::mag_type, LAYOUT, \ + Kokkos::HostSpace, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using mag_type = typename Kokkos::ArithTraits::mag_type; \ + using RV = Kokkos::View>; \ + using XV = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using size_type = typename XV::size_type; \ + \ + static void nrm1(const EXECSPACE& space, RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_BLAS," #SCALAR "]"); \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast(INT_MAX)) { \ + nrm1_print_specialization(); \ + int N = numElems; \ + int one = 1; \ + if constexpr (Kokkos::ArithTraits::is_complex) { \ + R() = HostBlas>::asum(N, reinterpret_cast*>(X.data()), \ + one); \ + } else { \ + R() = HostBlas::asum(N, X.data(), one); \ + } \ + } else { \ + Nrm1::value>::nrm1(space, R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; #if defined(KOKKOS_ENABLE_SERIAL) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial, Kokkos::HostSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) #endif #if defined(KOKKOS_ENABLE_OPENMP) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP, Kokkos::HostSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) #endif #if defined(KOKKOS_ENABLE_THREADS) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Threads, - Kokkos::HostSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Threads, - Kokkos::HostSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Threads, Kokkos::HostSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Threads, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) #endif } // namespace Impl @@ -135,99 +111,74 @@ namespace KokkosBlas { namespace Impl { template -void cublasAsumWrapper(const ExecutionSpace& space, RViewType& R, - const XViewType& X) { +void cublasAsumWrapper(const ExecutionSpace& space, RViewType& R, const XViewType& X) { using XScalar = typename XViewType::non_const_value_type; nrm1_print_specialization(); - const int N = static_cast(X.extent(0)); - constexpr int one = 1; - KokkosBlas::Impl::CudaBlasSingleton& s = - KokkosBlas::Impl::CudaBlasSingleton::singleton(); + const int N = static_cast(X.extent(0)); + constexpr int one = 1; + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); if constexpr (std::is_same_v) { - KOKKOS_CUBLAS_SAFE_CALL_IMPL( - cublasSasum(s.handle, N, X.data(), one, R.data())); + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSasum(s.handle, N, X.data(), one, R.data())); } if constexpr (std::is_same_v) { - KOKKOS_CUBLAS_SAFE_CALL_IMPL( - cublasDasum(s.handle, N, X.data(), one, R.data())); + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDasum(s.handle, N, X.data(), one, R.data())); } if constexpr (std::is_same_v>) { KOKKOS_CUBLAS_SAFE_CALL_IMPL( - cublasScasum(s.handle, N, reinterpret_cast(X.data()), - one, R.data())); + cublasScasum(s.handle, N, reinterpret_cast(X.data()), one, R.data())); } if constexpr (std::is_same_v>) { - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDzasum( - s.handle, N, reinterpret_cast(X.data()), one, - R.data())); + KOKKOS_CUBLAS_SAFE_CALL_IMPL( + cublasDzasum(s.handle, N, reinterpret_cast(X.data()), one, R.data())); } KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); } -#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(SCALAR, LAYOUT, MEMSPACE) \ - template <> \ - struct Nrm1< \ - Kokkos::Cuda, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::HostSpace, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 1, true, \ - nrm1_eti_spec_avail< \ - Kokkos::Cuda, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::HostSpace, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using execution_space = Kokkos::Cuda; \ - using RV = Kokkos::View::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits>; \ - using XV = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using size_type = typename XV::size_type; \ - \ - static void nrm1(const execution_space& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_CUBLAS," #SCALAR \ - "]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - cublasAsumWrapper(space, R, X); \ - } else { \ - Nrm1::value>::nrm1(space, R, \ - X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct Nrm1::mag_type, LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 1, true, \ + nrm1_eti_spec_avail::mag_type, LAYOUT, \ + Kokkos::HostSpace, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using execution_space = Kokkos::Cuda; \ + using RV = Kokkos::View::mag_type, LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits>; \ + using XV = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using size_type = typename XV::size_type; \ + \ + static void nrm1(const execution_space& space, RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_CUBLAS," #SCALAR "]"); \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast(INT_MAX)) { \ + cublasAsumWrapper(space, R, X); \ + } else { \ + Nrm1::value>::nrm1(space, R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(float, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(double, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) #if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(float, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(double, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) #endif } // namespace Impl @@ -242,89 +193,67 @@ namespace KokkosBlas { namespace Impl { template -void rocblasAsumWrapper(const ExecutionSpace& space, RViewType& R, - const XViewType& X) { +void rocblasAsumWrapper(const ExecutionSpace& space, RViewType& R, const XViewType& X) { using XScalar = typename XViewType::non_const_value_type; nrm1_print_specialization(); - const int N = static_cast(X.extent(0)); - constexpr int one = 1; - KokkosBlas::Impl::RocBlasSingleton& s = - KokkosBlas::Impl::RocBlasSingleton::singleton(); + const int N = static_cast(X.extent(0)); + constexpr int one = 1; + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( - rocblas_set_stream(s.handle, space.hip_stream())); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); if constexpr (std::is_same_v) { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( - rocblas_sasum(s.handle, N, X.data(), one, R.data())); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_sasum(s.handle, N, X.data(), one, R.data())); } if constexpr (std::is_same_v) { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( - rocblas_dasum(s.handle, N, X.data(), one, R.data())); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_dasum(s.handle, N, X.data(), one, R.data())); } if constexpr (std::is_same_v>) { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_scasum( - s.handle, N, reinterpret_cast(X.data()), - one, R.data())); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocblas_scasum(s.handle, N, reinterpret_cast(X.data()), one, R.data())); } if constexpr (std::is_same_v>) { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_dzasum( - s.handle, N, reinterpret_cast(X.data()), - one, R.data())); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocblas_dzasum(s.handle, N, reinterpret_cast(X.data()), one, R.data())); } KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); } -#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(SCALAR, LAYOUT, MEMSPACE) \ - template <> \ - struct Nrm1< \ - Kokkos::HIP, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::HostSpace, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 1, true, \ - nrm1_eti_spec_avail< \ - Kokkos::HIP, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::HostSpace, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using RV = Kokkos::View::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits>; \ - using XV = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using size_type = typename XV::size_type; \ - \ - static void nrm1(const Kokkos::HIP& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_ROCBLAS," #SCALAR \ - "]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - rocblasAsumWrapper(space, R, X); \ - } else { \ - Nrm1::value>::nrm1(space, R, \ - X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct Nrm1::mag_type, LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 1, true, \ + nrm1_eti_spec_avail::mag_type, LAYOUT, \ + Kokkos::HostSpace, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using RV = Kokkos::View::mag_type, LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits>; \ + using XV = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using size_type = typename XV::size_type; \ + \ + static void nrm1(const Kokkos::HIP& space, RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_ROCBLAS," #SCALAR "]"); \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast(INT_MAX)) { \ + rocblasAsumWrapper(space, R, X); \ + } else { \ + Nrm1::value>::nrm1(space, R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(float, Kokkos::LayoutLeft, - Kokkos::HIPSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(double, Kokkos::LayoutLeft, - Kokkos::HIPSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) } // namespace Impl } // namespace KokkosBlas @@ -334,8 +263,7 @@ KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, // oneMKL #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -#if defined(KOKKOS_ENABLE_SYCL) && \ - !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) +#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) #include #include @@ -344,8 +272,7 @@ namespace KokkosBlas { namespace Impl { template -void onemklAsumWrapper(const ExecutionSpace& space, RViewType& R, - const XViewType& X) { +void onemklAsumWrapper(const ExecutionSpace& space, RViewType& R, const XViewType& X) { using XScalar = typename XViewType::non_const_value_type; using KAT_X = Kokkos::ArithTraits; using layout_t = typename XViewType::array_layout; @@ -353,100 +280,75 @@ void onemklAsumWrapper(const ExecutionSpace& space, RViewType& R, const std::int64_t N = static_cast(X.extent(0)); // Create temp view on device to store the result - Kokkos::View::mag_type, - typename XViewType::memory_space> - res("sycl asum result"); + Kokkos::View::mag_type, typename XViewType::memory_space> res( + "sycl asum result"); // Decide to call row_major or column_major function if constexpr (std::is_same_v) { if constexpr (KAT_X::is_complex) { - oneapi::mkl::blas::row_major::asum( - space.sycl_queue(), N, - reinterpret_cast*>( - X.data()), - 1, res.data()); - } else { - oneapi::mkl::blas::row_major::asum(space.sycl_queue(), N, X.data(), 1, + oneapi::mkl::blas::row_major::asum(space.sycl_queue(), N, + reinterpret_cast*>(X.data()), 1, res.data()); + } else { + oneapi::mkl::blas::row_major::asum(space.sycl_queue(), N, X.data(), 1, res.data()); } } else { if constexpr (KAT_X::is_complex) { - oneapi::mkl::blas::column_major::asum( - space.sycl_queue(), N, - reinterpret_cast*>( - X.data()), - 1, res.data()); + oneapi::mkl::blas::column_major::asum(space.sycl_queue(), N, + reinterpret_cast*>(X.data()), + 1, res.data()); } else { - oneapi::mkl::blas::column_major::asum(space.sycl_queue(), X.extent_int(0), - X.data(), 1, res.data()); + oneapi::mkl::blas::column_major::asum(space.sycl_queue(), X.extent_int(0), X.data(), 1, res.data()); } } // Bring result back to host Kokkos::deep_copy(space, R, res); } -#define KOKKOSBLAS1_NRM1_ONEMKL(SCALAR, LAYOUT, MEMSPACE) \ - template <> \ - struct Nrm1< \ - Kokkos::Experimental::SYCL, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::HostSpace, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 1, true, \ - nrm1_eti_spec_avail< \ - Kokkos::Experimental::SYCL, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::HostSpace, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using execution_space = Kokkos::Experimental::SYCL; \ - using RV = Kokkos::View::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits>; \ - using XV = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using size_type = typename XV::size_type; \ - \ - static void nrm1(const execution_space& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_ONEMKL," #SCALAR \ - "]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - onemklAsumWrapper(space, R, X); \ - } else { \ - Nrm1::value>::nrm1(space, R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_NRM1_ONEMKL(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct Nrm1< \ + Kokkos::Experimental::SYCL, \ + Kokkos::View::mag_type, LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 1, true, \ + nrm1_eti_spec_avail::mag_type, LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using execution_space = Kokkos::Experimental::SYCL; \ + using RV = Kokkos::View::mag_type, LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits>; \ + using XV = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using size_type = typename XV::size_type; \ + \ + static void nrm1(const execution_space& space, RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_ONEMKL," #SCALAR "]"); \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast(INT_MAX)) { \ + onemklAsumWrapper(space, R, X); \ + } else { \ + Nrm1::value>::nrm1( \ + space, R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCLDeviceUSMSpace) -KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCLDeviceUSMSpace) -KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCLDeviceUSMSpace) -KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace) #if defined(KOKKOSKERNELS_INST_MEMSPACE_SYCLSHAREDSPACE) -KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCLSharedUSMSpace) -KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCLSharedUSMSpace) -KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCLSharedUSMSpace) -KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCLSharedUSMSpace) +KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLSharedUSMSpace) +KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLSharedUSMSpace) +KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLSharedUSMSpace) +KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLSharedUSMSpace) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_avail.hpp index de930f61075a..4d1a238740a8 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_avail.hpp @@ -32,64 +32,50 @@ namespace Impl { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS // double -#define KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, MEMSPACE) \ - template \ - struct nrm2_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View< \ - typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct nrm2_tpl_spec_avail::mag_type, LAYOUT, \ + Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace) +KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) #endif -#define KOKKOSBLAS1_NRM2_TPL_SPEC(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ - template <> \ - struct nrm2_tpl_spec_avail< \ - EXECSPACE, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_NRM2_TPL_SPEC(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct nrm2_tpl_spec_avail::mag_type, LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -#define KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL(LAYOUT, EXECSPACE, MEMSPACE) \ - KOKKOSBLAS1_NRM2_TPL_SPEC(float, LAYOUT, EXECSPACE, MEMSPACE) \ - KOKKOSBLAS1_NRM2_TPL_SPEC(double, LAYOUT, EXECSPACE, MEMSPACE) \ - KOKKOSBLAS1_NRM2_TPL_SPEC(Kokkos::complex, LAYOUT, EXECSPACE, \ - MEMSPACE) \ - KOKKOSBLAS1_NRM2_TPL_SPEC(Kokkos::complex, LAYOUT, EXECSPACE, \ - MEMSPACE) +#define KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL(LAYOUT, EXECSPACE, MEMSPACE) \ + KOKKOSBLAS1_NRM2_TPL_SPEC(float, LAYOUT, EXECSPACE, MEMSPACE) \ + KOKKOSBLAS1_NRM2_TPL_SPEC(double, LAYOUT, EXECSPACE, MEMSPACE) \ + KOKKOSBLAS1_NRM2_TPL_SPEC(Kokkos::complex, LAYOUT, EXECSPACE, MEMSPACE) \ + KOKKOSBLAS1_NRM2_TPL_SPEC(Kokkos::complex, LAYOUT, EXECSPACE, MEMSPACE) #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) +KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS -KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) +KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) #endif -#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && \ - !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) && \ +#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) && \ defined(KOKKOS_ENABLE_SYCL) KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace) diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_decl.hpp index 736523aa8d34..dfd6150914b9 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_decl.hpp @@ -24,8 +24,7 @@ namespace { template inline void nrm2_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION - printf("KokkosBlas1::nrm2<> TPL Blas specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XV).name()); + printf("KokkosBlas1::nrm2<> TPL Blas specialization for < %s , %s >\n", typeid(RV).name(), typeid(XV).name()); #endif } } // namespace @@ -39,175 +38,131 @@ inline void nrm2_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DNRM2_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Nrm2< \ - ExecSpace, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm2(const ExecSpace& space, RV& R, const XV& X, \ - const bool& take_sqrt) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm2[TPL_BLAS,double]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm2_print_specialization(); \ - int N = numElems; \ - int int_one = 1; \ - R() = HostBlas::nrm2(N, X.data(), int_one); \ - if (!take_sqrt) R() = R() * R(); \ - } else { \ - Nrm2::nrm2(space, R, X, \ - take_sqrt); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DNRM2_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Nrm2 >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void nrm2(const ExecSpace& space, RV& R, const XV& X, const bool& take_sqrt) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm2[TPL_BLAS,double]"); \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast(INT_MAX)) { \ + nrm2_print_specialization(); \ + int N = numElems; \ + int int_one = 1; \ + R() = HostBlas::nrm2(N, X.data(), int_one); \ + if (!take_sqrt) R() = R() * R(); \ + } else { \ + Nrm2::nrm2(space, R, X, take_sqrt); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SNRM2_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Nrm2< \ - ExecSpace, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm2(const ExecSpace& space, RV& R, const XV& X, \ - const bool& take_sqrt) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm2[TPL_BLAS,float]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm2_print_specialization(); \ - int N = numElems; \ - int int_one = 1; \ - R() = HostBlas::nrm2(N, X.data(), int_one); \ - if (!take_sqrt) R() = R() * R(); \ - } else { \ - Nrm2::nrm2(space, R, X, \ - take_sqrt); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SNRM2_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Nrm2 >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void nrm2(const ExecSpace& space, RV& R, const XV& X, const bool& take_sqrt) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm2[TPL_BLAS,float]"); \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast(INT_MAX)) { \ + nrm2_print_specialization(); \ + int N = numElems; \ + int int_one = 1; \ + R() = HostBlas::nrm2(N, X.data(), int_one); \ + if (!take_sqrt) R() = R() * R(); \ + } else { \ + Nrm2::nrm2(space, R, X, take_sqrt); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZNRM2_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Nrm2 >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm2(const ExecSpace& space, RV& R, const XV& X, \ - const bool& take_sqrt) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::nrm2[TPL_BLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm2_print_specialization(); \ - int N = numElems; \ - int int_one = 1; \ - R() = HostBlas >::nrm2( \ - N, reinterpret_cast*>(X.data()), \ - int_one); \ - if (!take_sqrt) R() = R() * R(); \ - } else { \ - Nrm2::nrm2(space, R, X, \ - take_sqrt); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZNRM2_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Nrm2 >, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void nrm2(const ExecSpace& space, RV& R, const XV& X, const bool& take_sqrt) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm2[TPL_BLAS,complex]"); \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast(INT_MAX)) { \ + nrm2_print_specialization(); \ + int N = numElems; \ + int int_one = 1; \ + R() = HostBlas >::nrm2(N, reinterpret_cast*>(X.data()), \ + int_one); \ + if (!take_sqrt) R() = R() * R(); \ + } else { \ + Nrm2::nrm2(space, R, X, take_sqrt); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CNRM2_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Nrm2 >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm2(const ExecSpace& space, RV& R, const XV& X, \ - const bool& take_sqrt) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::nrm2[TPL_BLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm2_print_specialization(); \ - int N = numElems; \ - int int_one = 1; \ - R() = HostBlas >::nrm2( \ - N, reinterpret_cast*>(X.data()), \ - int_one); \ - if (!take_sqrt) R() = R() * R(); \ - } else { \ - Nrm2::nrm2(space, R, X, \ - take_sqrt); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CNRM2_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Nrm2 >, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void nrm2(const ExecSpace& space, RV& R, const XV& X, const bool& take_sqrt) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm2[TPL_BLAS,complex]"); \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast(INT_MAX)) { \ + nrm2_print_specialization(); \ + int N = numElems; \ + int int_one = 1; \ + R() = \ + HostBlas >::nrm2(N, reinterpret_cast*>(X.data()), int_one); \ + if (!take_sqrt) R() = R() * R(); \ + } else { \ + Nrm2::nrm2(space, R, X, take_sqrt); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_DNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_DNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) +KOKKOSBLAS1_DNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_DNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) -KOKKOSBLAS1_SNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_SNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) +KOKKOSBLAS1_SNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_SNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) -KOKKOSBLAS1_ZNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_ZNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) +KOKKOSBLAS1_ZNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_ZNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) -KOKKOSBLAS1_CNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_CNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) +KOKKOSBLAS1_CNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_CNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) } // namespace Impl } // namespace KokkosBlas @@ -220,66 +175,48 @@ KOKKOSBLAS1_CNRM2_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS(LAYOUT, KOKKOS_TYPE, TPL_TYPE, \ - EXECSPACE, MEMSPACE, TPL_NRM2, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Nrm2::mag_type, LAYOUT, \ - Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - using RT = Kokkos::ArithTraits::mag_type; \ - using RV = Kokkos::View >; \ - using XV = Kokkos::View, \ - Kokkos::MemoryTraits >; \ - using size_type = typename XV::size_type; \ - \ - static void nrm2(const EXECSPACE& space, RV& R, const XV& X, \ - const bool& take_sqrt) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm2[TPL_CUBLAS," + \ - Kokkos::ArithTraits::name() + \ - "]"); \ - const size_type numElems = X.extent(0); \ - if (numElems <= \ - static_cast(std::numeric_limits::max())) { \ - nrm2_print_specialization(); \ - const int N = static_cast(numElems); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - TPL_NRM2(s.handle, N, reinterpret_cast(X.data()), \ - 1, &R())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - if (!take_sqrt) R() = R() * R(); \ - } else { \ - Nrm2::nrm2(space, R, X, \ - take_sqrt); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS(LAYOUT, KOKKOS_TYPE, TPL_TYPE, EXECSPACE, MEMSPACE, TPL_NRM2, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Nrm2::mag_type, LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + using RT = Kokkos::ArithTraits::mag_type; \ + using RV = Kokkos::View >; \ + using XV = Kokkos::View, \ + Kokkos::MemoryTraits >; \ + using size_type = typename XV::size_type; \ + \ + static void nrm2(const EXECSPACE& space, RV& R, const XV& X, const bool& take_sqrt) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm2[TPL_CUBLAS," + Kokkos::ArithTraits::name() + "]"); \ + const size_type numElems = X.extent(0); \ + if (numElems <= static_cast(std::numeric_limits::max())) { \ + nrm2_print_specialization(); \ + const int N = static_cast(numElems); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(TPL_NRM2(s.handle, N, reinterpret_cast(X.data()), 1, &R())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + if (!take_sqrt) R() = R() * R(); \ + } else { \ + Nrm2::nrm2(space, R, X, take_sqrt); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS_EXT(ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, float, float, \ - Kokkos::Cuda, Kokkos::CudaSpace, \ - cublasSnrm2, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, double, double, \ - Kokkos::Cuda, Kokkos::CudaSpace, \ - cublasDnrm2, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS( \ - Kokkos::LayoutLeft, Kokkos::complex, cuComplex, Kokkos::Cuda, \ - Kokkos::CudaSpace, cublasScnrm2, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS( \ - Kokkos::LayoutLeft, Kokkos::complex, cuDoubleComplex, \ - Kokkos::Cuda, Kokkos::CudaSpace, cublasDznrm2, ETI_SPEC_AVAIL) +#define KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS_EXT(ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, float, float, Kokkos::Cuda, Kokkos::CudaSpace, \ + cublasSnrm2, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, double, double, Kokkos::Cuda, Kokkos::CudaSpace, \ + cublasDnrm2, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::complex, cuComplex, Kokkos::Cuda, \ + Kokkos::CudaSpace, cublasScnrm2, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::complex, cuDoubleComplex, Kokkos::Cuda, \ + Kokkos::CudaSpace, cublasDznrm2, ETI_SPEC_AVAIL) KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS_EXT(true) KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS_EXT(false) @@ -295,66 +232,48 @@ KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_CUBLAS_EXT(false) namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS(LAYOUT, KOKKOS_TYPE, TPL_TYPE, \ - EXECSPACE, MEMSPACE, TPL_NRM2, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Nrm2::mag_type, LAYOUT, \ - Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - using RT = Kokkos::ArithTraits::mag_type; \ - using RV = Kokkos::View >; \ - using XV = Kokkos::View, \ - Kokkos::MemoryTraits >; \ - using size_type = typename XV::size_type; \ - \ - static void nrm2(const EXECSPACE& space, RV& R, const XV& X, \ - const bool& take_sqrt) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm2[TPL_ROCBLAS," + \ - Kokkos::ArithTraits::name() + \ - "]"); \ - const size_type numElems = X.extent(0); \ - if (numElems <= \ - static_cast(std::numeric_limits::max())) { \ - nrm2_print_specialization(); \ - const rocblas_int N = static_cast(numElems); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - TPL_NRM2(s.handle, N, reinterpret_cast(X.data()), \ - 1, &R())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - if (!take_sqrt) R() = R() * R(); \ - } else { \ - Nrm2::nrm2(space, R, X, \ - take_sqrt); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS(LAYOUT, KOKKOS_TYPE, TPL_TYPE, EXECSPACE, MEMSPACE, TPL_NRM2, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Nrm2::mag_type, LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + using RT = Kokkos::ArithTraits::mag_type; \ + using RV = Kokkos::View >; \ + using XV = Kokkos::View, \ + Kokkos::MemoryTraits >; \ + using size_type = typename XV::size_type; \ + \ + static void nrm2(const EXECSPACE& space, RV& R, const XV& X, const bool& take_sqrt) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm2[TPL_ROCBLAS," + Kokkos::ArithTraits::name() + "]"); \ + const size_type numElems = X.extent(0); \ + if (numElems <= static_cast(std::numeric_limits::max())) { \ + nrm2_print_specialization(); \ + const rocblas_int N = static_cast(numElems); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(TPL_NRM2(s.handle, N, reinterpret_cast(X.data()), 1, &R())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + if (!take_sqrt) R() = R() * R(); \ + } else { \ + Nrm2::nrm2(space, R, X, take_sqrt); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS_EXT(ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, float, float, \ - Kokkos::HIP, Kokkos::HIPSpace, \ - rocblas_snrm2, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, double, double, \ - Kokkos::HIP, Kokkos::HIPSpace, \ - rocblas_dnrm2, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS( \ - Kokkos::LayoutLeft, Kokkos::complex, rocblas_float_complex, \ - Kokkos::HIP, Kokkos::HIPSpace, rocblas_scnrm2, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS( \ - Kokkos::LayoutLeft, Kokkos::complex, rocblas_double_complex, \ - Kokkos::HIP, Kokkos::HIPSpace, rocblas_dznrm2, ETI_SPEC_AVAIL) +#define KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS_EXT(ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, float, float, Kokkos::HIP, Kokkos::HIPSpace, \ + rocblas_snrm2, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, double, double, Kokkos::HIP, Kokkos::HIPSpace, \ + rocblas_dnrm2, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::complex, rocblas_float_complex, \ + Kokkos::HIP, Kokkos::HIPSpace, rocblas_scnrm2, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::complex, rocblas_double_complex, \ + Kokkos::HIP, Kokkos::HIPSpace, rocblas_dznrm2, ETI_SPEC_AVAIL) KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS_EXT(true) KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS_EXT(false) @@ -364,8 +283,7 @@ KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS_EXT(false) #endif -#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && \ - !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) && \ +#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) && \ defined(KOKKOS_ENABLE_SYCL) #include #include @@ -374,64 +292,49 @@ KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS_EXT(false) namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ONEMKL(LAYOUT, KOKKOS_TYPE, TPL_TYPE, \ - EXECSPACE, MEMSPACE, TPL_NRM2, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Nrm2::mag_type, LAYOUT, \ - Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - using RT = Kokkos::ArithTraits::mag_type; \ - using RV = Kokkos::View >; \ - using XV = Kokkos::View, \ - Kokkos::MemoryTraits >; \ - using size_type = typename XV::size_type; \ - \ - static void nrm2(const EXECSPACE& space, RV& R, const XV& X, \ - const bool& take_sqrt) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm2[TPL_ONEMKL," + \ - Kokkos::ArithTraits::name() + \ - "]"); \ - const size_type numElems = X.extent(0); \ - if (numElems <= \ - static_cast(std::numeric_limits::max())) { \ - nrm2_print_specialization(); \ - const std::int64_t N = static_cast(numElems); \ - TPL_NRM2(space.sycl_queue(), N, \ - reinterpret_cast(X.data()), 1, &R()); \ - if (!take_sqrt) R() = R() * R(); \ - } else { \ - Nrm2::nrm2(space, R, X, \ - take_sqrt); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ONEMKL(LAYOUT, KOKKOS_TYPE, TPL_TYPE, EXECSPACE, MEMSPACE, TPL_NRM2, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Nrm2::mag_type, LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + using RT = Kokkos::ArithTraits::mag_type; \ + using RV = Kokkos::View >; \ + using XV = Kokkos::View, \ + Kokkos::MemoryTraits >; \ + using size_type = typename XV::size_type; \ + \ + static void nrm2(const EXECSPACE& space, RV& R, const XV& X, const bool& take_sqrt) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm2[TPL_ONEMKL," + Kokkos::ArithTraits::name() + "]"); \ + const size_type numElems = X.extent(0); \ + if (numElems <= static_cast(std::numeric_limits::max())) { \ + nrm2_print_specialization(); \ + const std::int64_t N = static_cast(numElems); \ + TPL_NRM2(space.sycl_queue(), N, reinterpret_cast(X.data()), 1, &R()); \ + if (!take_sqrt) R() = R() * R(); \ + } else { \ + Nrm2::nrm2(space, R, X, take_sqrt); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ONEMKL_EXT(ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ONEMKL( \ - Kokkos::LayoutLeft, float, float, Kokkos::Experimental::SYCL, \ - Kokkos::Experimental::SYCLDeviceUSMSpace, \ - oneapi::mkl::blas::row_major::nrm2, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ONEMKL( \ - Kokkos::LayoutLeft, double, double, Kokkos::Experimental::SYCL, \ - Kokkos::Experimental::SYCLDeviceUSMSpace, \ - oneapi::mkl::blas::row_major::nrm2, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ONEMKL( \ - Kokkos::LayoutLeft, Kokkos::complex, std::complex, \ - Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace, \ - oneapi::mkl::blas::row_major::nrm2, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ONEMKL( \ - Kokkos::LayoutLeft, Kokkos::complex, std::complex, \ - Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace, \ - oneapi::mkl::blas::row_major::nrm2, ETI_SPEC_AVAIL) +#define KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ONEMKL_EXT(ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ONEMKL(Kokkos::LayoutLeft, float, float, Kokkos::Experimental::SYCL, \ + Kokkos::Experimental::SYCLDeviceUSMSpace, oneapi::mkl::blas::row_major::nrm2, \ + ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ONEMKL(Kokkos::LayoutLeft, double, double, Kokkos::Experimental::SYCL, \ + Kokkos::Experimental::SYCLDeviceUSMSpace, oneapi::mkl::blas::row_major::nrm2, \ + ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ONEMKL(Kokkos::LayoutLeft, Kokkos::complex, std::complex, \ + Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace, \ + oneapi::mkl::blas::row_major::nrm2, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ONEMKL(Kokkos::LayoutLeft, Kokkos::complex, std::complex, \ + Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace, \ + oneapi::mkl::blas::row_major::nrm2, ETI_SPEC_AVAIL) KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ONEMKL_EXT(true) KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ONEMKL_EXT(false) diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrminf_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrminf_tpl_spec_avail.hpp index 88591fbf0c7a..27647eed11a6 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrminf_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrminf_tpl_spec_avail.hpp @@ -33,28 +33,21 @@ namespace Impl { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS // double -#define KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, MEMSPACE) \ - template \ - struct nrminf_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View< \ - typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ - LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct nrminf_tpl_spec_avail::mag_type, \ + LAYOUT, Kokkos::HostSpace, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) #endif diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp index 17ec54e05767..0b2081fc27b9 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp @@ -24,8 +24,7 @@ namespace { template inline void nrminf_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION - printf("KokkosBlas1::nrminf<> TPL Blas specialization for < %s , %s >\n", - typeid(RV).name(), typeid(XV).name()); + printf("KokkosBlas1::nrminf<> TPL Blas specialization for < %s , %s >\n", typeid(RV).name(), typeid(XV).name()); #endif } } // namespace @@ -39,201 +38,152 @@ inline void nrminf_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct NrmInf< \ - ExecSpace, \ - Kokkos::View>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View> \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - XV; \ - typedef typename XV::size_type size_type; \ - typedef Kokkos::Details::InnerProductSpaceTraits IPT; \ - \ - static void nrminf(const ExecSpace& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrminf[TPL_BLAS,double]"); \ - const size_type numElems = X.extent(0); \ - if (numElems == 0) { \ - R() = 0.0; \ - return; \ - } \ - if (numElems < static_cast(INT_MAX)) { \ - nrminf_print_specialization(); \ - int N = numElems; \ - int one = 1; \ - int idx = HostBlas::iamax(N, X.data(), one) - 1; \ - R() = IPT::norm(X(idx)); \ - } else { \ - NrmInf::nrminf(space, R, \ - X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct NrmInf>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View> RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + XV; \ + typedef typename XV::size_type size_type; \ + typedef Kokkos::Details::InnerProductSpaceTraits IPT; \ + \ + static void nrminf(const ExecSpace& space, RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrminf[TPL_BLAS,double]"); \ + const size_type numElems = X.extent(0); \ + if (numElems == 0) { \ + R() = 0.0; \ + return; \ + } \ + if (numElems < static_cast(INT_MAX)) { \ + nrminf_print_specialization(); \ + int N = numElems; \ + int one = 1; \ + int idx = HostBlas::iamax(N, X.data(), one) - 1; \ + R() = IPT::norm(X(idx)); \ + } else { \ + NrmInf::nrminf(space, R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct NrmInf< \ - ExecSpace, \ - Kokkos::View>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View> \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - XV; \ - typedef typename XV::size_type size_type; \ - typedef Kokkos::Details::InnerProductSpaceTraits IPT; \ - \ - static void nrminf(const ExecSpace& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrminf[TPL_BLAS,float]"); \ - const size_type numElems = X.extent(0); \ - if (numElems == 0) { \ - R() = 0.0f; \ - return; \ - } \ - if (numElems < static_cast(INT_MAX)) { \ - nrminf_print_specialization(); \ - int N = numElems; \ - int one = 1; \ - int idx = HostBlas::iamax(N, X.data(), one) - 1; \ - R() = IPT::norm(X(idx)); \ - } else { \ - NrmInf::nrminf(space, R, \ - X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct NrmInf>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View> RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + XV; \ + typedef typename XV::size_type size_type; \ + typedef Kokkos::Details::InnerProductSpaceTraits IPT; \ + \ + static void nrminf(const ExecSpace& space, RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrminf[TPL_BLAS,float]"); \ + const size_type numElems = X.extent(0); \ + if (numElems == 0) { \ + R() = 0.0f; \ + return; \ + } \ + if (numElems < static_cast(INT_MAX)) { \ + nrminf_print_specialization(); \ + int N = numElems; \ + int one = 1; \ + int idx = HostBlas::iamax(N, X.data(), one) - 1; \ + R() = IPT::norm(X(idx)); \ + } else { \ + NrmInf::nrminf(space, R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct NrmInf>, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View> \ - RV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits> \ - XV; \ - typedef typename XV::size_type size_type; \ - typedef Kokkos::Details::InnerProductSpaceTraits> \ - IPT; \ - \ - static void nrminf(const ExecSpace& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::nrminf[TPL_BLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if (numElems == 0) { \ - R() = 0.0; \ - return; \ - } \ - if (numElems < static_cast(INT_MAX)) { \ - nrminf_print_specialization(); \ - int N = numElems; \ - int one = 1; \ - int idx = \ - HostBlas>::iamax( \ - N, reinterpret_cast*>(X.data()), \ - one) - \ - 1; \ - R() = IPT::norm(X(idx)); \ - } else { \ - NrmInf::nrminf(space, R, \ - X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct NrmInf>, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View> RV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits> \ + XV; \ + typedef typename XV::size_type size_type; \ + typedef Kokkos::Details::InnerProductSpaceTraits> IPT; \ + \ + static void nrminf(const ExecSpace& space, RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrminf[TPL_BLAS,complex]"); \ + const size_type numElems = X.extent(0); \ + if (numElems == 0) { \ + R() = 0.0; \ + return; \ + } \ + if (numElems < static_cast(INT_MAX)) { \ + nrminf_print_specialization(); \ + int N = numElems; \ + int one = 1; \ + int idx = \ + HostBlas>::iamax(N, reinterpret_cast*>(X.data()), one) - \ + 1; \ + R() = IPT::norm(X(idx)); \ + } else { \ + NrmInf::nrminf(space, R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct NrmInf>, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View> \ - RV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits> \ - XV; \ - typedef typename XV::size_type size_type; \ - typedef Kokkos::Details::InnerProductSpaceTraits> \ - IPT; \ - \ - static void nrminf(const ExecSpace& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::nrminf[TPL_BLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if (numElems == 0) { \ - R() = 0.0f; \ - return; \ - } \ - if (numElems < static_cast(INT_MAX)) { \ - nrminf_print_specialization(); \ - int N = numElems; \ - int one = 1; \ - int idx = \ - HostBlas>::iamax( \ - N, reinterpret_cast*>(X.data()), \ - one) - \ - 1; \ - R() = IPT::norm(X(idx)); \ - } else { \ - NrmInf::nrminf(space, R, \ - X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct NrmInf>, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View> RV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits> \ + XV; \ + typedef typename XV::size_type size_type; \ + typedef Kokkos::Details::InnerProductSpaceTraits> IPT; \ + \ + static void nrminf(const ExecSpace& space, RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrminf[TPL_BLAS,complex]"); \ + const size_type numElems = X.extent(0); \ + if (numElems == 0) { \ + R() = 0.0f; \ + return; \ + } \ + if (numElems < static_cast(INT_MAX)) { \ + nrminf_print_specialization(); \ + int N = numElems; \ + int one = 1; \ + int idx = \ + HostBlas>::iamax(N, reinterpret_cast*>(X.data()), one) - 1; \ + R() = IPT::norm(X(idx)); \ + } else { \ + NrmInf::nrminf(space, R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) - -KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) - -KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) - -KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) +KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) + +KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) + +KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) + +KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp index 59f1715e5446..fee65fce14b6 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp @@ -32,62 +32,46 @@ namespace Impl { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -#define KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXECSPACE) \ - template <> \ - struct rot_tpl_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXECSPACE) \ + template <> \ + struct rot_tpl_spec_avail, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; #ifdef KOKKOS_ENABLE_SERIAL KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial) KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial) -KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial) -KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial) +KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial) #endif #ifdef KOKKOS_ENABLE_OPENMP KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP) KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP) -KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP) -KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP) +KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP) #endif #endif // cuBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -#define KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXECSPACE, \ - MEMSPACE) \ - template <> \ - struct rot_tpl_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct rot_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) +KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) #endif // rocBLAS diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp index 8c83f9a09628..404c5c0e3b9b 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp @@ -24,9 +24,8 @@ namespace { template inline void rot_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION - printf("KokkosBlas::rot<> TPL Blas specialization for < %s, %s, %s >\n", - typeid(VectorView).name(), typeid(ScalarView).name(), - typeid(ExecutionSpace).name); + printf("KokkosBlas::rot<> TPL Blas specialization for < %s, %s, %s >\n", typeid(VectorView).name(), + typeid(ScalarView).name(), typeid(ExecutionSpace).name); #endif } } // namespace @@ -40,110 +39,76 @@ inline void rot_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct Rot, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using ScalarView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& /*space*/, VectorView const& X, \ - VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,double]"); \ - HostBlas::rot(X.extent_int(0), X.data(), 1, Y.data(), 1, \ - c.data(), s.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,double]"); \ + HostBlas::rot(X.extent_int(0), X.data(), 1, Y.data(), 1, c.data(), s.data()); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct Rot, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using ScalarView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& /*space*/, VectorView const& X, \ - VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,float]"); \ - HostBlas::rot(X.extent_int(0), X.data(), 1, Y.data(), 1, \ - c.data(), s.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,float]"); \ + HostBlas::rot(X.extent_int(0), X.data(), 1, Y.data(), 1, c.data(), s.data()); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Rot, EXECSPACE, MEMSPACE, true, \ - ETI_SPEC_AVAIL> { \ - using VectorView = \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using ScalarView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& /*space*/, VectorView const& X, \ - VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::rot[TPL_BLAS,complex]"); \ - HostBlas>::rot( \ - X.extent_int(0), reinterpret_cast*>(X.data()), \ - 1, reinterpret_cast*>(Y.data()), 1, c.data(), \ - s.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Rot, EXECSPACE, MEMSPACE, true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,complex]"); \ + HostBlas>::rot(X.extent_int(0), reinterpret_cast*>(X.data()), 1, \ + reinterpret_cast*>(Y.data()), 1, c.data(), s.data()); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Rot, EXECSPACE, MEMSPACE, true, \ - ETI_SPEC_AVAIL> { \ - using VectorView = \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using ScalarView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& /*space*/, VectorView const& X, \ - VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::rot[TPL_BLAS,complex]"); \ - HostBlas>::rot( \ - X.extent_int(0), reinterpret_cast*>(X.data()), \ - 1, reinterpret_cast*>(Y.data()), 1, c.data(), \ - s.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Rot, EXECSPACE, MEMSPACE, true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,complex]"); \ + HostBlas>::rot(X.extent_int(0), reinterpret_cast*>(X.data()), 1, \ + reinterpret_cast*>(Y.data()), 1, c.data(), s.data()); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #ifdef KOKKOS_ENABLE_SERIAL @@ -186,230 +151,149 @@ KOKKOSBLAS1_CROT_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, false) namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rot< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using ScalarView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& space, VectorView const& X, \ - VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,double]"); \ - rot_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& singleton = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(singleton.handle, space.cuda_stream())); \ - cublasPointerMode_t pointer_mode; \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasGetPointerMode(singleton.handle, &pointer_mode)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ - cublasDrot(singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1, \ - c.data(), s.data()); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using ScalarView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,double]"); \ + rot_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(singleton.handle, space.cuda_stream())); \ + cublasPointerMode_t pointer_mode; \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ + cublasDrot(singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1, c.data(), s.data()); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rot, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using ScalarView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& space, VectorView const& X, \ - VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,float]"); \ - rot_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& singleton = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(singleton.handle, space.cuda_stream())); \ - cublasPointerMode_t pointer_mode; \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasGetPointerMode(singleton.handle, &pointer_mode)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ - cublasSrot(singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1, \ - c.data(), s.data()); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, true, \ + ETI_SPEC_AVAIL> { \ + using VectorView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using ScalarView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,float]"); \ + rot_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(singleton.handle, space.cuda_stream())); \ + cublasPointerMode_t pointer_mode; \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ + cublasSrot(singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1, c.data(), s.data()); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rot*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using ScalarView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& space, VectorView const& X, \ - VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::rot[TPL_CUBLAS,complex]"); \ - rot_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& singleton = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(singleton.handle, space.cuda_stream())); \ - cublasPointerMode_t pointer_mode; \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasGetPointerMode(singleton.handle, &pointer_mode)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ - cublasZdrot(singleton.handle, X.extent_int(0), \ - reinterpret_cast(X.data()), 1, \ - reinterpret_cast(Y.data()), 1, c.data(), \ - s.data()); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot< \ + EXECSPACE, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using ScalarView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,complex]"); \ + rot_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(singleton.handle, space.cuda_stream())); \ + cublasPointerMode_t pointer_mode; \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ + cublasZdrot(singleton.handle, X.extent_int(0), reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1, c.data(), s.data()); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rot*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = Kokkos::View, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using ScalarView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& space, VectorView const& X, \ - VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::rot[TPL_CUBLAS,complex]"); \ - rot_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& singleton = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(singleton.handle, space.cuda_stream())); \ - cublasPointerMode_t pointer_mode; \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasGetPointerMode(singleton.handle, &pointer_mode)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ - cublasCsrot(singleton.handle, X.extent_int(0), \ - reinterpret_cast(X.data()), 1, \ - reinterpret_cast(Y.data()), 1, c.data(), \ - s.data()); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot< \ + EXECSPACE, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, true, \ + ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using ScalarView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,complex]"); \ + rot_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(singleton.handle, space.cuda_stream())); \ + cublasPointerMode_t pointer_mode; \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ + cublasCsrot(singleton.handle, X.extent_int(0), reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1, c.data(), s.data()); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_SROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) } // namespace Impl } // namespace KokkosBlas #endif // KOKKOSKERNELS_ENABLE_TPL_CUBLAS diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp index ea94ff04dc7a..f8b8184b8047 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp @@ -32,157 +32,90 @@ namespace Impl { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -#define KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXECSPACE, \ - MEMSPACE) \ - template <> \ - struct rotg_tpl_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct rotg_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, - Kokkos::Serial, Kokkos::HostSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial, Kokkos::HostSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, - Kokkos::OpenMP, Kokkos::HostSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP, Kokkos::HostSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) #endif #endif // cuBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -#define KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXECSPACE, \ - MEMSPACE) \ - template <> \ - struct rotg_tpl_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct rotg_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::Cuda, Kokkos::CudaSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::Cuda, Kokkos::CudaUVMSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) #endif // rocBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS -#define KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXECSPACE, \ - MEMSPACE) \ - template <> \ - struct rotg_tpl_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct rotg_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, - Kokkos::HIP, Kokkos::HIPSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp index ee6a6c8c04e8..e6583d5ae31e 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp @@ -24,8 +24,8 @@ namespace { template inline void rotg_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION - printf("KokkosBlas1::rotg<> TPL Blas specialization for < %s, %s >\n", - typeid(Scalar).name(), typeid(ExecutionSpace).name); + printf("KokkosBlas1::rotg<> TPL Blas specialization for < %s, %s >\n", typeid(Scalar).name(), + typeid(ExecutionSpace).name); #endif } } // namespace @@ -39,184 +39,130 @@ inline void rotg_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotg< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using SViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using MViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rotg(EXECSPACE const, SViewType const& a, SViewType const& b, \ - MViewType const& c, SViewType const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_BLAS,double]"); \ - HostBlas::rotg(a.data(), b.data(), c.data(), s.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using SViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using MViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rotg(EXECSPACE const, SViewType const& a, SViewType const& b, MViewType const& c, \ + SViewType const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_BLAS,double]"); \ + HostBlas::rotg(a.data(), b.data(), c.data(), s.data()); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotg, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using SViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using MViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rotg(EXECSPACE const, SViewType const& a, SViewType const& b, \ - MViewType const& c, SViewType const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_BLAS,float]"); \ - HostBlas::rotg(a.data(), b.data(), c.data(), s.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, true, \ + ETI_SPEC_AVAIL> { \ + using SViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using MViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rotg(EXECSPACE const, SViewType const& a, SViewType const& b, MViewType const& c, \ + SViewType const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_BLAS,float]"); \ + HostBlas::rotg(a.data(), b.data(), c.data(), s.data()); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotg< \ - EXECSPACE, \ - Kokkos::View, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using SViewType = Kokkos::View, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using MViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rotg(EXECSPACE const, SViewType const& a, SViewType const& b, \ - MViewType const& c, SViewType const& s) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::rotg[TPL_BLAS,complex]"); \ - HostBlas>::rotg( \ - reinterpret_cast*>(a.data()), \ - reinterpret_cast*>(b.data()), c.data(), \ - reinterpret_cast*>(s.data())); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg< \ + EXECSPACE, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using SViewType = Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using MViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rotg(EXECSPACE const, SViewType const& a, SViewType const& b, MViewType const& c, \ + SViewType const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_BLAS,complex]"); \ + HostBlas>::rotg(reinterpret_cast*>(a.data()), \ + reinterpret_cast*>(b.data()), c.data(), \ + reinterpret_cast*>(s.data())); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotg, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using SViewType = Kokkos::View, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using MViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rotg(EXECSPACE const, SViewType const& a, SViewType const& b, \ - MViewType const& c, SViewType const& s) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::rotg[TPL_BLAS,complex]"); \ - HostBlas>::rotg( \ - reinterpret_cast*>(a.data()), \ - reinterpret_cast*>(b.data()), c.data(), \ - reinterpret_cast*>(s.data())); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg< \ + EXECSPACE, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, true, \ + ETI_SPEC_AVAIL> { \ + using SViewType = Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using MViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rotg(EXECSPACE const, SViewType const& a, SViewType const& b, MViewType const& c, \ + SViewType const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_BLAS,complex]"); \ + HostBlas>::rotg(reinterpret_cast*>(a.data()), \ + reinterpret_cast*>(b.data()), c.data(), \ + reinterpret_cast*>(s.data())); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, false) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace, false) - -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, false) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace, false) - -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, false) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace, false) - -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, false) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace, false) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) + +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) + +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) + +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, false) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace, false) - -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, false) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace, false) - -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, false) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace, false) - -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, false) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace, false) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) + +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) + +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) + +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) #endif } // namespace Impl @@ -231,231 +177,151 @@ KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotg< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using SViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using MViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rotg(EXECSPACE const& space, SViewType const& a, \ - SViewType const& b, MViewType const& c, \ - SViewType const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_CUBLAS,double]"); \ - rotg_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& singleton = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(singleton.handle, space.cuda_stream())); \ - cublasPointerMode_t pointer_mode; \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasGetPointerMode(singleton.handle, &pointer_mode)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDrotg(singleton.handle, a.data(), \ - b.data(), c.data(), s.data())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using SViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using MViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rotg(EXECSPACE const& space, SViewType const& a, SViewType const& b, MViewType const& c, \ + SViewType const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_CUBLAS,double]"); \ + rotg_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(singleton.handle, space.cuda_stream())); \ + cublasPointerMode_t pointer_mode; \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDrotg(singleton.handle, a.data(), b.data(), c.data(), s.data())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotg, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using SViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using MViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rotg(EXECSPACE const& space, SViewType const& a, \ - SViewType const& b, MViewType const& c, \ - SViewType const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_CUBLAS,float]"); \ - rotg_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& singleton = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(singleton.handle, space.cuda_stream())); \ - cublasPointerMode_t pointer_mode; \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasGetPointerMode(singleton.handle, &pointer_mode)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSrotg(singleton.handle, a.data(), \ - b.data(), c.data(), s.data())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, true, \ + ETI_SPEC_AVAIL> { \ + using SViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using MViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rotg(EXECSPACE const& space, SViewType const& a, SViewType const& b, MViewType const& c, \ + SViewType const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_CUBLAS,float]"); \ + rotg_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(singleton.handle, space.cuda_stream())); \ + cublasPointerMode_t pointer_mode; \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSrotg(singleton.handle, a.data(), b.data(), c.data(), s.data())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotg< \ - EXECSPACE, \ - Kokkos::View, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using SViewType = Kokkos::View, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using MViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rotg(EXECSPACE const& space, SViewType const& a, \ - SViewType const& b, MViewType const& c, \ - SViewType const& s) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::rotg[TPL_CUBLAS,complex]"); \ - rotg_print_specialization, EXECSPACE>(); \ - KokkosBlas::Impl::CudaBlasSingleton& singleton = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(singleton.handle, space.cuda_stream())); \ - cublasPointerMode_t pointer_mode; \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasGetPointerMode(singleton.handle, &pointer_mode)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZrotg( \ - singleton.handle, reinterpret_cast(a.data()), \ - reinterpret_cast(b.data()), c.data(), \ - reinterpret_cast(s.data()))); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg< \ + EXECSPACE, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using SViewType = Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using MViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rotg(EXECSPACE const& space, SViewType const& a, SViewType const& b, MViewType const& c, \ + SViewType const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_CUBLAS,complex]"); \ + rotg_print_specialization, EXECSPACE>(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(singleton.handle, space.cuda_stream())); \ + cublasPointerMode_t pointer_mode; \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZrotg(singleton.handle, reinterpret_cast(a.data()), \ + reinterpret_cast(b.data()), c.data(), \ + reinterpret_cast(s.data()))); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotg, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using SViewType = Kokkos::View, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using MViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rotg(EXECSPACE const& space, SViewType const& a, \ - SViewType const& b, MViewType const& c, \ - SViewType const& s) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::rotg[TPL_CUBLAS,complex]"); \ - rotg_print_specialization, EXECSPACE>(); \ - KokkosBlas::Impl::CudaBlasSingleton& singleton = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(singleton.handle, space.cuda_stream())); \ - cublasPointerMode_t pointer_mode; \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasGetPointerMode(singleton.handle, &pointer_mode)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCrotg( \ - singleton.handle, reinterpret_cast(a.data()), \ - reinterpret_cast(b.data()), c.data(), \ - reinterpret_cast(s.data()))); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(singleton.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg< \ + EXECSPACE, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, true, \ + ETI_SPEC_AVAIL> { \ + using SViewType = Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using MViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rotg(EXECSPACE const& space, SViewType const& a, SViewType const& b, MViewType const& c, \ + SViewType const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_CUBLAS,complex]"); \ + rotg_print_specialization, EXECSPACE>(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(singleton.handle, space.cuda_stream())); \ + cublasPointerMode_t pointer_mode; \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCrotg(singleton.handle, reinterpret_cast(a.data()), \ + reinterpret_cast(b.data()), c.data(), \ + reinterpret_cast(s.data()))); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) } // namespace Impl } // namespace KokkosBlas @@ -469,201 +335,137 @@ KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotg< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using SViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using MViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rotg(EXECSPACE const& space, SViewType const& a, \ - SViewType const& b, MViewType const& c, \ - SViewType const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_ROCBLAS,double]"); \ - rotg_print_specialization(); \ - KokkosBlas::Impl::RocBlasSingleton& singleton = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(singleton.handle, space.hip_stream())); \ - rocblas_pointer_mode pointer_mode; \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_get_pointer_mode(singleton.handle, &pointer_mode)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode( \ - singleton.handle, rocblas_pointer_mode_device)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_drotg( \ - singleton.handle, a.data(), b.data(), c.data(), s.data())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(singleton.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using SViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using MViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rotg(EXECSPACE const& space, SViewType const& a, SViewType const& b, MViewType const& c, \ + SViewType const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_ROCBLAS,double]"); \ + rotg_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(singleton.handle, space.hip_stream())); \ + rocblas_pointer_mode pointer_mode; \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_get_pointer_mode(singleton.handle, &pointer_mode)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(singleton.handle, rocblas_pointer_mode_device)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_drotg(singleton.handle, a.data(), b.data(), c.data(), s.data())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(singleton.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotg, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using SViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using MViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rotg(EXECSPACE const& space, SViewType const& a, \ - SViewType const& b, MViewType const& c, \ - SViewType const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_ROCBLAS,float]"); \ - rotg_print_specialization(); \ - KokkosBlas::Impl::RocBlasSingleton& singleton = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(singleton.handle, space.hip_stream())); \ - rocblas_pointer_mode pointer_mode; \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_get_pointer_mode(singleton.handle, &pointer_mode)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode( \ - singleton.handle, rocblas_pointer_mode_device)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_srotg( \ - singleton.handle, a.data(), b.data(), c.data(), s.data())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(singleton.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, true, \ + ETI_SPEC_AVAIL> { \ + using SViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using MViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rotg(EXECSPACE const& space, SViewType const& a, SViewType const& b, MViewType const& c, \ + SViewType const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_ROCBLAS,float]"); \ + rotg_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(singleton.handle, space.hip_stream())); \ + rocblas_pointer_mode pointer_mode; \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_get_pointer_mode(singleton.handle, &pointer_mode)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(singleton.handle, rocblas_pointer_mode_device)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_srotg(singleton.handle, a.data(), b.data(), c.data(), s.data())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(singleton.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotg< \ - EXECSPACE, \ - Kokkos::View, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using SViewType = Kokkos::View, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using MViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rotg(EXECSPACE const& space, SViewType const& a, \ - SViewType const& b, MViewType const& c, \ - SViewType const& s) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::rotg[TPL_ROCBLAS,complex]"); \ - rotg_print_specialization, EXECSPACE>(); \ - KokkosBlas::Impl::RocBlasSingleton& singleton = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(singleton.handle, space.hip_stream())); \ - rocblas_pointer_mode pointer_mode; \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_get_pointer_mode(singleton.handle, &pointer_mode)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode( \ - singleton.handle, rocblas_pointer_mode_device)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zrotg( \ - singleton.handle, \ - reinterpret_cast(a.data()), \ - reinterpret_cast(b.data()), c.data(), \ - reinterpret_cast(s.data()))); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(singleton.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg< \ + EXECSPACE, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using SViewType = Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using MViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rotg(EXECSPACE const& space, SViewType const& a, SViewType const& b, MViewType const& c, \ + SViewType const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_ROCBLAS,complex]"); \ + rotg_print_specialization, EXECSPACE>(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(singleton.handle, space.hip_stream())); \ + rocblas_pointer_mode pointer_mode; \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_get_pointer_mode(singleton.handle, &pointer_mode)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(singleton.handle, rocblas_pointer_mode_device)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zrotg(singleton.handle, \ + reinterpret_cast(a.data()), \ + reinterpret_cast(b.data()), c.data(), \ + reinterpret_cast(s.data()))); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(singleton.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotg, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using SViewType = Kokkos::View, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using MViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rotg(EXECSPACE const& space, SViewType const& a, \ - SViewType const& b, MViewType const& c, \ - SViewType const& s) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::rotg[TPL_ROCBLAS,complex]"); \ - rotg_print_specialization, EXECSPACE>(); \ - KokkosBlas::Impl::RocBlasSingleton& singleton = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(singleton.handle, space.hip_stream())); \ - rocblas_pointer_mode pointer_mode; \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_get_pointer_mode(singleton.handle, &pointer_mode)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode( \ - singleton.handle, rocblas_pointer_mode_device)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_crotg( \ - singleton.handle, \ - reinterpret_cast(a.data()), \ - reinterpret_cast(b.data()), c.data(), \ - reinterpret_cast(s.data()))); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(singleton.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg< \ + EXECSPACE, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, true, \ + ETI_SPEC_AVAIL> { \ + using SViewType = Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using MViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void rotg(EXECSPACE const& space, SViewType const& a, SViewType const& b, MViewType const& c, \ + SViewType const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_ROCBLAS,complex]"); \ + rotg_print_specialization, EXECSPACE>(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(singleton.handle, space.hip_stream())); \ + rocblas_pointer_mode pointer_mode; \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_get_pointer_mode(singleton.handle, &pointer_mode)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(singleton.handle, rocblas_pointer_mode_device)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_crotg(singleton.handle, \ + reinterpret_cast(a.data()), \ + reinterpret_cast(b.data()), c.data(), \ + reinterpret_cast(s.data()))); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(singleton.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, false) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotm_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotm_tpl_spec_avail.hpp index 2a1ee21cc6f7..84e7452e6569 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotm_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotm_tpl_spec_avail.hpp @@ -34,90 +34,65 @@ namespace Impl { // ARMPL is disabled as it does not detect some corner // cases correctly which leads to failing unit-tests #if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) -#define KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct rotm_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct rotm_tpl_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, - Kokkos::Serial, Kokkos::HostSpace) -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, - Kokkos::OpenMP, Kokkos::HostSpace) -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) #endif #endif // KOKKOSKERNELS_ENABLE_TPL_BLAS // cuBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -#define KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct rotm_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct rotm_tpl_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::Cuda, Kokkos::CudaSpace) -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) #endif // rocBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS -#define KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct rotm_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct rotm_tpl_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, - Kokkos::HIP, Kokkos::HIPSpace) -KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_ROTM_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotm_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotm_tpl_spec_decl.hpp index ce8826e1ee80..7bde6d08357a 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotm_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotm_tpl_spec_decl.hpp @@ -24,8 +24,7 @@ namespace { template inline void rotm_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION - printf("KokkosBlas1::rotm<> TPL Blas specialization for < %s >\n", - typeid(Scalar).name()); + printf("KokkosBlas1::rotm<> TPL Blas specialization for < %s >\n", typeid(Scalar).name()); #endif } } // namespace @@ -39,68 +38,45 @@ inline void rotm_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct Rotm< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using ParamView = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rotm(EXEC_SPACE const& /* space */, VectorView& X, \ - VectorView& Y, ParamView& param) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotm[TPL_BLAS,SCALAR]"); \ - HostBlas::rotm(X.extent(0), X.data(), 1, Y.data(), 1, \ - param.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotm< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using ParamView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void rotm(EXEC_SPACE const& /* space */, VectorView& X, VectorView& Y, ParamView& param) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotm[TPL_BLAS,SCALAR]"); \ + HostBlas::rotm(X.extent(0), X.data(), 1, Y.data(), 1, param.data()); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, false) -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace, false) -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, false) -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, false) -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace, false) -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, false) -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) #endif } // namespace Impl @@ -115,101 +91,69 @@ KOKKOSBLAS1_ROTM_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotm< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using ParamView = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void rotm(EXEC_SPACE const& space, VectorView const& X, \ - VectorView const& Y, ParamView const& param) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotm[TPL_CUBLAS,double]"); \ - rotm_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - cublasPointerMode_t pointer_mode; \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasGetPointerMode(s.handle, &pointer_mode)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(s.handle, CUBLAS_POINTER_MODE_DEVICE)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDrotm( \ - s.handle, X.extent(0), X.data(), 1, Y.data(), 1, param.data())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(s.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotm< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using ParamView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void rotm(EXEC_SPACE const& space, VectorView const& X, VectorView const& Y, ParamView const& param) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotm[TPL_CUBLAS,double]"); \ + rotm_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + cublasPointerMode_t pointer_mode; \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasGetPointerMode(s.handle, &pointer_mode)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(s.handle, CUBLAS_POINTER_MODE_DEVICE)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDrotm(s.handle, X.extent(0), X.data(), 1, Y.data(), 1, param.data())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(s.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) - -#define KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotm< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using ParamView = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void rotm(EXEC_SPACE const& space, VectorView const& X, \ - VectorView const& Y, ParamView const& param) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotm[TPL_CUBLAS,float]"); \ - rotm_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - cublasPointerMode_t pointer_mode; \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasGetPointerMode(s.handle, &pointer_mode)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(s.handle, CUBLAS_POINTER_MODE_DEVICE)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSrotm( \ - s.handle, X.extent(0), X.data(), 1, Y.data(), 1, param.data())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(s.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) + +#define KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotm< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using ParamView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void rotm(EXEC_SPACE const& space, VectorView const& X, VectorView const& Y, ParamView const& param) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotm[TPL_CUBLAS,float]"); \ + rotm_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + cublasPointerMode_t pointer_mode; \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasGetPointerMode(s.handle, &pointer_mode)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(s.handle, CUBLAS_POINTER_MODE_DEVICE)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSrotm(s.handle, X.extent(0), X.data(), 1, Y.data(), 1, param.data())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(s.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) +KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) } // namespace Impl } // namespace KokkosBlas @@ -223,103 +167,71 @@ KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotm< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using PView = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void rotm(EXEC_SPACE const& space, VectorView const& X, \ - VectorView const& Y, PView const& param) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotm[TPL_ROCBLAS,double]"); \ - rotm_print_specialization(); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - rocblas_pointer_mode pointer_mode; \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_get_pointer_mode(s.handle, &pointer_mode)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_device)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_drotm(s.handle, static_cast(X.extent(0)), X.data(), 1, \ - Y.data(), 1, param.data())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotm< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using PView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void rotm(EXEC_SPACE const& space, VectorView const& X, VectorView const& Y, PView const& param) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotm[TPL_ROCBLAS,double]"); \ + rotm_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + rocblas_pointer_mode pointer_mode; \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_get_pointer_mode(s.handle, &pointer_mode)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_device)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_drotm(s.handle, static_cast(X.extent(0)), X.data(), 1, Y.data(), 1, param.data())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) -KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, false) - -#define KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotm< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using PView = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void rotm(EXEC_SPACE const& space, VectorView const& X, \ - VectorView const& Y, PView const& param) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotm[TPL_ROCBLAS,float]"); \ - rotm_print_specialization(); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - rocblas_pointer_mode pointer_mode; \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_get_pointer_mode(s.handle, &pointer_mode)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_device)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_srotm(s.handle, static_cast(X.extent(0)), X.data(), 1, \ - Y.data(), 1, param.data())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS1_DROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) + +#define KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotm< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using PView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void rotm(EXEC_SPACE const& space, VectorView const& X, VectorView const& Y, PView const& param) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotm[TPL_ROCBLAS,float]"); \ + rotm_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + rocblas_pointer_mode pointer_mode; \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_get_pointer_mode(s.handle, &pointer_mode)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_device)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_srotm(s.handle, static_cast(X.extent(0)), X.data(), 1, Y.data(), 1, param.data())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) -KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, false) +KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS1_SROTM_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotmg_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotmg_tpl_spec_avail.hpp index d4db1143f93a..3a2925fd4993 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotmg_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotmg_tpl_spec_avail.hpp @@ -33,88 +33,66 @@ namespace Impl { // Generic Host side BLAS (could be MKL or whatever) // ARMPL is disabled as it does not detect some corner // cases correctly which leads to failing unit-tests -#if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) && \ - !defined(KOKKOSKERNELS_ENABLE_TPL_ARMPL) -#define KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct rotmg_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) && !defined(KOKKOSKERNELS_ENABLE_TPL_ARMPL) +#define KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct rotmg_tpl_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, - Kokkos::Serial, Kokkos::HostSpace) -KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, - Kokkos::Serial, Kokkos::HostSpace) -KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, - Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, - Kokkos::OpenMP, Kokkos::HostSpace) -KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, - Kokkos::OpenMP, Kokkos::HostSpace) -KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, - Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) #endif #endif // KOKKOSKERNELS_ENABLE_TPL_BLAS // cuBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -#define KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct rotmg_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct rotmg_tpl_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, - Kokkos::Cuda, Kokkos::CudaSpace) -KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::Cuda, Kokkos::CudaSpace) -KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, - Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) #endif // rocBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS -#define KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct rotmg_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROTMG_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct rotmg_tpl_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; // Turning off use of rocBLAS as it returns false results in some of the diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotmg_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotmg_tpl_spec_decl.hpp index e911294df45b..0271cfd98165 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotmg_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_rotmg_tpl_spec_decl.hpp @@ -24,8 +24,7 @@ namespace { template inline void rotmg_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION - printf("KokkosBlas1::rotmg<> TPL Blas specialization for < %s >\n", - typeid(Scalar).name()); + printf("KokkosBlas1::rotmg<> TPL Blas specialization for < %s >\n", typeid(Scalar).name()); #endif } } // namespace @@ -33,80 +32,54 @@ inline void rotmg_print_specialization() { } // namespace KokkosBlas // Generic Host side BLAS (could be MKL or whatever) -#if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) && \ - !defined(KOKKOSKERNELS_ENABLE_TPL_ARMPL) +#if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) && !defined(KOKKOSKERNELS_ENABLE_TPL_ARMPL) #include "KokkosBlas_Host_tpl.hpp" namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct Rotmg< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using DXView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using YView = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using PView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rotmg(EXEC_SPACE const& /* space */, DXView& d1, DXView& d2, \ - DXView& x1, YView& y1, PView& param) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotmg[TPL_BLAS,double]"); \ - HostBlas::rotmg(d1.data(), d2.data(), x1.data(), y1.data(), \ - param.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotmg< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using DXView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using YView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using PView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void rotmg(EXEC_SPACE const& /* space */, DXView& d1, DXView& d2, DXView& x1, YView& y1, PView& param) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotmg[TPL_BLAS,double]"); \ + HostBlas::rotmg(d1.data(), d2.data(), x1.data(), y1.data(), param.data()); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, false) -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, - Kokkos::Serial, Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, - Kokkos::Serial, Kokkos::HostSpace, false) -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace, false) -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, false) -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, - Kokkos::OpenMP, Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, - Kokkos::OpenMP, Kokkos::HostSpace, false) -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace, false) -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace, true) -KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) #endif } // namespace Impl @@ -121,114 +94,77 @@ KOKKOSBLAS1_ROTMG_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotmg< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using DXView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using YView = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using PView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void rotmg(EXEC_SPACE const& space, DXView const& d1, \ - DXView const& d2, DXView const& x1, YView const& y1, \ - PView const& param) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotmg[TPL_CUBLAS,double]"); \ - rotmg_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - cublasPointerMode_t pointer_mode; \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasGetPointerMode(s.handle, &pointer_mode)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(s.handle, CUBLAS_POINTER_MODE_DEVICE)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDrotmg(s.handle, d1.data(), \ - d2.data(), x1.data(), \ - y1.data(), param.data())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(s.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotmg< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using DXView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using YView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using PView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void rotmg(EXEC_SPACE const& space, DXView const& d1, DXView const& d2, DXView const& x1, YView const& y1, \ + PView const& param) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotmg[TPL_CUBLAS,double]"); \ + rotmg_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + cublasPointerMode_t pointer_mode; \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasGetPointerMode(s.handle, &pointer_mode)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(s.handle, CUBLAS_POINTER_MODE_DEVICE)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDrotmg(s.handle, d1.data(), d2.data(), x1.data(), y1.data(), param.data())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(s.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) +KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) -#define KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Rotmg< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using DXView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using YView = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using PView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void rotmg(EXEC_SPACE const& space, DXView const& d1, \ - DXView const& d2, DXView const& x1, YView const& y1, \ - PView const& param) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotmg[TPL_CUBLAS,float]"); \ - rotmg_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - cublasPointerMode_t pointer_mode; \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasGetPointerMode(s.handle, &pointer_mode)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(s.handle, CUBLAS_POINTER_MODE_DEVICE)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSrotmg(s.handle, d1.data(), \ - d2.data(), x1.data(), \ - y1.data(), param.data())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetPointerMode(s.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotmg< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using DXView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using YView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using PView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void rotmg(EXEC_SPACE const& space, DXView const& d1, DXView const& d2, DXView const& x1, YView const& y1, \ + PView const& param) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotmg[TPL_CUBLAS,float]"); \ + rotmg_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + cublasPointerMode_t pointer_mode; \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasGetPointerMode(s.handle, &pointer_mode)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(s.handle, CUBLAS_POINTER_MODE_DEVICE)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSrotmg(s.handle, d1.data(), d2.data(), x1.data(), y1.data(), param.data())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetPointerMode(s.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) +KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) } // namespace Impl } // namespace KokkosBlas @@ -242,114 +178,79 @@ KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXEC_SPACE, \ - MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct Rotmg< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using DXView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using YView = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using PView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void rotmg(EXEC_SPACE const& space, DXView const& d1, \ - DXView const& d2, DXView const& x1, YView const& y1, \ - PView const& param) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotmg[TPL_ROCBLAS,double]"); \ - rotmg_print_specialization(); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - rocblas_pointer_mode pointer_mode; \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_get_pointer_mode(s.handle, &pointer_mode)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_device)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_drotmg(s.handle, d1.data(), \ - d2.data(), x1.data(), \ - y1.data(), param.data())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotmg< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using DXView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using YView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using PView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void rotmg(EXEC_SPACE const& space, DXView const& d1, DXView const& d2, DXView const& x1, YView const& y1, \ + PView const& param) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotmg[TPL_ROCBLAS,double]"); \ + rotmg_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + rocblas_pointer_mode pointer_mode; \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_get_pointer_mode(s.handle, &pointer_mode)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_device)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_drotmg(s.handle, d1.data(), d2.data(), x1.data(), y1.data(), param.data())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) -KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, false) +KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS1_DROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) -#define KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXEC_SPACE, \ - MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct Rotmg< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using DXView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using YView = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using PView = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void rotmg(EXEC_SPACE const& space, DXView const& d1, \ - DXView const& d2, DXView const& x1, YView const& y1, \ - PView const& param) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rotmg[TPL_ROCBLAS,float]"); \ - rotmg_print_specialization(); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - rocblas_pointer_mode pointer_mode; \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_get_pointer_mode(s.handle, &pointer_mode)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_device)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_srotmg(s.handle, d1.data(), \ - d2.data(), x1.data(), \ - y1.data(), param.data())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rotmg< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using DXView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using YView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using PView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void rotmg(EXEC_SPACE const& space, DXView const& d1, DXView const& d2, DXView const& x1, YView const& y1, \ + PView const& param) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotmg[TPL_ROCBLAS,float]"); \ + rotmg_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + rocblas_pointer_mode pointer_mode; \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_get_pointer_mode(s.handle, &pointer_mode)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_device)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_srotmg(s.handle, d1.data(), d2.data(), x1.data(), y1.data(), param.data())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) -KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, false) +KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS1_SROTMG_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_scal_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_scal_tpl_spec_avail.hpp index 5c5a6008ec5c..b5efa5c3a403 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_scal_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_scal_tpl_spec_avail.hpp @@ -20,8 +20,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct scal_tpl_spec_avail { enum : bool { value = false }; }; @@ -34,98 +33,71 @@ namespace Impl { // Generic Host side BLAS (could be MKL or whatever) #if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) // double -#define KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, MEMSPACE) \ - template \ - struct scal_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct scal_tpl_spec_avail< \ + ExecSpace, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) #endif // cuBLAS #if defined(KOKKOSKERNELS_ENABLE_TPL_CUBLAS) // double -#define KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXECSPACE, \ - MEMSPACE) \ - template <> \ - struct scal_tpl_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct scal_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) #endif // rocBLAS #if defined(KOKKOSKERNELS_ENABLE_TPL_ROCBLAS) -#define KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXECSPACE, \ - MEMSPACE) \ - template <> \ - struct scal_tpl_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct scal_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) #endif diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_scal_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_scal_tpl_spec_decl.hpp index da11555f7b1f..7083e28730b3 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_scal_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_scal_tpl_spec_decl.hpp @@ -24,8 +24,8 @@ namespace { template inline void scal_print_specialization() { #if defined(KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION) - printf("KokkosBlas1::scal<> TPL Blas specialization for < %s , %s , %s >\n", - typeid(RV).name(), typeid(AS).name(), typeid(XV).name()); + printf("KokkosBlas1::scal<> TPL Blas specialization for < %s , %s , %s >\n", typeid(RV).name(), typeid(AS).name(), + typeid(XV).name()); #endif } } // namespace @@ -38,87 +38,63 @@ inline void scal_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_BLAS(SCALAR_TYPE, BASE_SCALAR_TYPE, \ - LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Scal< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR_TYPE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - RV; \ - typedef SCALAR_TYPE AS; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void scal(const ExecSpace& space, const RV& R, const AS& alpha, \ - const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::scal[TPL_BLAS," #SCALAR_TYPE \ - "]"); \ - const size_type numElems = X.extent(0); \ - if ((numElems < static_cast(INT_MAX)) && \ - (R.data() == X.data())) { \ - scal_print_specialization(); \ - int N = numElems; \ - int one = 1; \ - const BASE_SCALAR_TYPE alpha_b = static_cast(alpha); \ - HostBlas::scal( \ - N, alpha_b, reinterpret_cast(R.data()), one); \ - } else { \ - Scal::scal(space, R, \ - alpha, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_BLAS(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct Scal, \ + Kokkos::MemoryTraits >, \ + SCALAR_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + RV; \ + typedef SCALAR_TYPE AS; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void scal(const ExecSpace& space, const RV& R, const AS& alpha, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::scal[TPL_BLAS," #SCALAR_TYPE "]"); \ + const size_type numElems = X.extent(0); \ + if ((numElems < static_cast(INT_MAX)) && (R.data() == X.data())) { \ + scal_print_specialization(); \ + int N = numElems; \ + int one = 1; \ + const BASE_SCALAR_TYPE alpha_b = static_cast(alpha); \ + HostBlas::scal(N, alpha_b, reinterpret_cast(R.data()), one); \ + } else { \ + Scal::scal(space, R, alpha, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; #define KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_BLAS(double, double, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) + KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_BLAS(double, double, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) #define KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_BLAS(float, float, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) + KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_BLAS(float, float, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) #define KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_BLAS(Kokkos::complex, \ - std::complex, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) + KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_BLAS(Kokkos::complex, std::complex, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) #define KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_BLAS(Kokkos::complex, \ - std::complex, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) - -KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) - -KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) - -KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) - -KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) + KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_BLAS(Kokkos::complex, std::complex, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) + +KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) + +KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) + +KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) + +KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) } // namespace Impl } // namespace KokkosBlas @@ -132,117 +108,81 @@ KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_CUBLAS(SCALAR_TYPE, CUDA_SCALAR_TYPE, \ - CUBLAS_FN, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct Scal< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR_TYPE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - RV; \ - typedef SCALAR_TYPE AS; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void scal(const ExecSpace& space, const RV& R, const AS& alpha, \ - const XV& X) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::scal[TPL_CUBLAS," #SCALAR_TYPE "]"); \ - const size_type numElems = X.extent(0); \ - if ((numElems < static_cast(INT_MAX)) && \ - (R.data() == X.data())) { \ - scal_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(CUBLAS_FN( \ - s.handle, N, reinterpret_cast(&alpha), \ - reinterpret_cast(R.data()), one)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - Scal::scal(space, R, \ - alpha, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_CUBLAS(SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, LAYOUT, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct Scal, \ + Kokkos::MemoryTraits >, \ + SCALAR_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + RV; \ + typedef SCALAR_TYPE AS; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void scal(const ExecSpace& space, const RV& R, const AS& alpha, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::scal[TPL_CUBLAS," #SCALAR_TYPE "]"); \ + const size_type numElems = X.extent(0); \ + if ((numElems < static_cast(INT_MAX)) && (R.data() == X.data())) { \ + scal_print_specialization(); \ + const int N = static_cast(numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(CUBLAS_FN(s.handle, N, reinterpret_cast(&alpha), \ + reinterpret_cast(R.data()), one)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else { \ + Scal::scal(space, R, alpha, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_CUBLAS(double, double, cublasDscal, LAYOUT, \ - MEMSPACE, ETI_SPEC_AVAIL) - -#define KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_CUBLAS(float, float, cublasSscal, LAYOUT, \ - MEMSPACE, ETI_SPEC_AVAIL) - -#define KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, \ - cuDoubleComplex, cublasZscal, LAYOUT, \ - MEMSPACE, ETI_SPEC_AVAIL) - -#define KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, cuComplex, \ - cublasCscal, LAYOUT, MEMSPACE, \ +#define KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_CUBLAS(double, double, cublasDscal, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_CUBLAS(float, float, cublasSscal, LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, cuDoubleComplex, cublasZscal, LAYOUT, MEMSPACE, \ ETI_SPEC_AVAIL) -KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - true) -KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - false) - -KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - true) -KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - false) - -KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - true) -KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - false) - -KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - true) -KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - false) - -KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - false) - -KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - false) - -KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - false) - -KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - false) +#define KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, cuComplex, cublasCscal, LAYOUT, MEMSPACE, \ + ETI_SPEC_AVAIL) + +KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) } // namespace Impl } // namespace KokkosBlas @@ -256,105 +196,73 @@ KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_ROCBLAS( \ - SCALAR_TYPE, ROCBLAS_SCALAR_TYPE, ROCBLAS_FN, LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Scal< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - SCALAR_TYPE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - using execution_space = EXECSPACE; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - RV; \ - typedef SCALAR_TYPE AS; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void scal(const execution_space& space, const RV& R, \ - const AS& alpha, const XV& X) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::scal[TPL_ROCBLAS," #SCALAR_TYPE "]"); \ - const size_type numElems = X.extent(0); \ - if ((numElems < static_cast(INT_MAX)) && \ - (R.data() == X.data())) { \ - scal_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - rocblas_pointer_mode pointer_mode; \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_get_pointer_mode(s.handle, &pointer_mode)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_host)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(ROCBLAS_FN( \ - s.handle, N, reinterpret_cast(&alpha), \ - reinterpret_cast(R.data()), one)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, pointer_mode)); \ - } else { \ - Scal::scal(space, R, \ - alpha, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_ROCBLAS(SCALAR_TYPE, ROCBLAS_SCALAR_TYPE, ROCBLAS_FN, LAYOUT, EXECSPACE, \ + MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Scal, \ + Kokkos::MemoryTraits >, \ + SCALAR_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + using execution_space = EXECSPACE; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + RV; \ + typedef SCALAR_TYPE AS; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void scal(const execution_space& space, const RV& R, const AS& alpha, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::scal[TPL_ROCBLAS," #SCALAR_TYPE "]"); \ + const size_type numElems = X.extent(0); \ + if ((numElems < static_cast(INT_MAX)) && (R.data() == X.data())) { \ + scal_print_specialization(); \ + const int N = static_cast(numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + rocblas_pointer_mode pointer_mode; \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_get_pointer_mode(s.handle, &pointer_mode)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_host)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(ROCBLAS_FN(s.handle, N, reinterpret_cast(&alpha), \ + reinterpret_cast(R.data()), one)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, pointer_mode)); \ + } else { \ + Scal::scal(space, R, alpha, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_ROCBLAS(double, double, rocblas_dscal, \ - LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) +#define KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_ROCBLAS(double, double, rocblas_dscal, LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_ROCBLAS(float, float, rocblas_sscal, LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) -#define KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_ROCBLAS(float, float, rocblas_sscal, LAYOUT, \ +#define KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, rocblas_double_complex, rocblas_zscal, LAYOUT, \ EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) -#define KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_ROCBLAS( \ - Kokkos::complex, rocblas_double_complex, rocblas_zscal, LAYOUT, \ - EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) - -#define KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_ROCBLAS( \ - Kokkos::complex, rocblas_float_complex, rocblas_cscal, LAYOUT, \ - EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) - -KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) +#define KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS1_XSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, rocblas_float_complex, rocblas_cscal, LAYOUT, \ + EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) + +KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_swap_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_swap_tpl_spec_avail.hpp index 14ecce274060..de1fa19cb331 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_swap_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_swap_tpl_spec_avail.hpp @@ -34,132 +34,83 @@ namespace Impl { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -#define KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXECSPACE) \ - template <> \ - struct swap_tpl_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXECSPACE) \ + template <> \ + struct swap_tpl_spec_avail, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; #ifdef KOKKOS_ENABLE_SERIAL KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial) KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Serial) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial) #endif #ifdef KOKKOS_ENABLE_OPENMP KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP) KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::OpenMP) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP) #endif #endif // cuBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -#define KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXECSPACE, \ - MEMSPACE) \ - template <> \ - struct swap_tpl_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct swap_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::Cuda, Kokkos::CudaSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::Cuda, Kokkos::CudaUVMSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) #endif // rocBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS -#define KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXECSPACE, \ - MEMSPACE) \ - template <> \ - struct swap_tpl_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct swap_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, - Kokkos::HIP, Kokkos::HIPSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_swap_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_swap_tpl_spec_decl.hpp index 555c942c1266..e74b498c33d3 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_swap_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_swap_tpl_spec_decl.hpp @@ -26,9 +26,8 @@ namespace { template inline void swap_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION - printf("KokkosBlas::swap<> TPL Blas specialization for < %s, %s, %s >\n", - typeid(XVector).name(), typeid(YVector).name(), - typeid(ExecutionSpace).name); + printf("KokkosBlas::swap<> TPL Blas specialization for < %s, %s, %s >\n", typeid(XVector).name(), + typeid(YVector).name(), typeid(ExecutionSpace).name); #endif } } // namespace @@ -42,110 +41,82 @@ inline void swap_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Swap, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using XVector = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using YVector = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void swap(EXECSPACE const& /*space*/, XVector const& X, \ - YVector const& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_BLAS,double]"); \ - HostBlas::swap(X.extent_int(0), X.data(), 1, Y.data(), 1); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Swap, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& /*space*/, XVector const& X, YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_BLAS,double]"); \ + HostBlas::swap(X.extent_int(0), X.data(), 1, Y.data(), 1); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Swap, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using XVector = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using YVector = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void swap(EXECSPACE const& /*space*/, XVector const& X, \ - YVector const& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_BLAS,float]"); \ - HostBlas::swap(X.extent_int(0), X.data(), 1, Y.data(), 1); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Swap, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& /*space*/, XVector const& X, YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_BLAS,float]"); \ + HostBlas::swap(X.extent_int(0), X.data(), 1, Y.data(), 1); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Swap*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using XVector = Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using YVector = Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - static void swap(EXECSPACE const& /*space*/, XVector const& X, \ - YVector const& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::swap[TPL_BLAS,complex]"); \ - HostBlas>::swap( \ - X.extent_int(0), reinterpret_cast*>(X.data()), \ - 1, reinterpret_cast*>(Y.data()), 1); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Swap*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& /*space*/, XVector const& X, YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_BLAS,complex]"); \ + HostBlas>::swap(X.extent_int(0), reinterpret_cast*>(X.data()), 1, \ + reinterpret_cast*>(Y.data()), 1); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Swap*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using XVector = Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using YVector = Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - static void swap(EXECSPACE const& /*space*/, XVector const& X, \ - YVector const& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::swap[TPL_BLAS,complex]"); \ - HostBlas>::swap( \ - X.extent_int(0), reinterpret_cast*>(X.data()), \ - 1, reinterpret_cast*>(Y.data()), 1); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Swap*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& /*space*/, XVector const& X, YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_BLAS,complex]"); \ + HostBlas>::swap(X.extent_int(0), reinterpret_cast*>(X.data()), 1, \ + reinterpret_cast*>(Y.data()), 1); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #ifdef KOKKOS_ENABLE_SERIAL @@ -188,201 +159,131 @@ KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, false) namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Swap< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using XVector = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using YVector = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void swap(EXECSPACE const& space, XVector const& X, \ - YVector const& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_CUBLAS,double]"); \ - swap_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& singleton = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(singleton.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDswap( \ - singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Swap< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using YVector = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_CUBLAS,double]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(singleton.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDswap(singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Swap< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using XVector = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using YVector = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void swap(EXECSPACE const& space, XVector const& X, \ - YVector const& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_CUBLAS,float]"); \ - swap_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& singleton = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(singleton.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSswap( \ - singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Swap< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using YVector = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_CUBLAS,float]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(singleton.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSswap(singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Swap*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using XVector = Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using YVector = Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - static void swap(EXECSPACE const& space, XVector const& X, \ - YVector const& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::swap[TPL_CUBLAS,complex]"); \ - swap_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& singleton = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(singleton.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasZswap(singleton.handle, X.extent_int(0), \ - reinterpret_cast(X.data()), 1, \ - reinterpret_cast(Y.data()), 1)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Swap*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_CUBLAS,complex]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(singleton.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZswap(singleton.handle, X.extent_int(0), \ + reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Swap*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using XVector = Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using YVector = Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - static void swap(EXECSPACE const& space, XVector const& X, \ - YVector const& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::swap[TPL_CUBLAS,complex]"); \ - swap_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& singleton = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(singleton.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasCswap(singleton.handle, X.extent_int(0), \ - reinterpret_cast(X.data()), 1, \ - reinterpret_cast(Y.data()), 1)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Swap*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_CUBLAS,complex]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(singleton.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCswap(singleton.handle, X.extent_int(0), \ + reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace, false) -KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) } // namespace Impl } // namespace KokkosBlas #endif // KOKKOSKERNELS_ENABLE_TPL_CUBLAS @@ -394,169 +295,115 @@ KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Swap< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using XVector = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using YVector = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void swap(EXECSPACE const& space, XVector const& X, \ - YVector const& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_ROCBLAS,double]"); \ - swap_print_specialization(); \ - KokkosBlas::Impl::RocBlasSingleton& singleton = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(singleton.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_dswap( \ - singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Swap< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using YVector = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_ROCBLAS,double]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(singleton.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_dswap(singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Swap< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using XVector = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using YVector = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void swap(EXECSPACE const& space, XVector const& X, \ - YVector const& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_ROCBLAS,float]"); \ - swap_print_specialization(); \ - KokkosBlas::Impl::RocBlasSingleton& singleton = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(singleton.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_sswap( \ - singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Swap< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using YVector = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_ROCBLAS,float]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(singleton.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_sswap(singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Swap*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using XVector = Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using YVector = Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - static void swap(EXECSPACE const& space, XVector const& X, \ - YVector const& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::swap[TPL_ROCBLAS,complex_double]"); \ - swap_print_specialization(); \ - KokkosBlas::Impl::RocBlasSingleton& singleton = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(singleton.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zswap( \ - singleton.handle, X.extent_int(0), \ - reinterpret_cast(X.data()), 1, \ - reinterpret_cast(Y.data()), 1)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Swap*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_ROCBLAS,complex_double]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(singleton.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zswap(singleton.handle, X.extent_int(0), \ + reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Swap*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using XVector = Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using YVector = Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - static void swap(EXECSPACE const& space, XVector const& X, \ - YVector const& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::swap[TPL_ROCBLAS,complex_float]"); \ - swap_print_specialization(); \ - KokkosBlas::Impl::RocBlasSingleton& singleton = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(singleton.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cswap( \ - singleton.handle, X.extent_int(0), \ - reinterpret_cast(X.data()), 1, \ - reinterpret_cast(Y.data()), 1)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Swap*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_ROCBLAS,complex_float]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(singleton.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cswap(singleton.handle, X.extent_int(0), \ + reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) -KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) -KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) -KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace, false) -KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace, false) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) + +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) } // namespace Impl } // namespace KokkosBlas #endif // KOKKOSKERNELS_ENABLE_TPL_ROCBLAS diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_update_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_update_tpl_spec_avail.hpp index 88a60e6d1911..55e1383ed72e 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_update_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_update_tpl_spec_avail.hpp @@ -20,8 +20,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct update_tpl_spec_avail { enum : bool { value = false }; }; diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp index 0820badd9a12..709f261b63c6 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp @@ -28,46 +28,34 @@ struct gemv_tpl_spec_avail { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -#define KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUTA, LAYOUTX, \ - LAYOUTY, MEMSPACE) \ - template \ - struct gemv_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUTA, LAYOUTX, LAYOUTY, MEMSPACE) \ + template \ + struct gemv_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) #endif @@ -75,20 +63,16 @@ KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, // cuBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -#define KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUTA, LAYOUTX, \ - LAYOUTY, MEMSPACE) \ - template \ - struct gemv_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUTA, LAYOUTX, LAYOUTY, MEMSPACE) \ + template \ + struct gemv_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; // Note BMK: We use the same layout for A, X and Y because the GEMV @@ -96,30 +80,22 @@ KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, // So this TPL version will match any layout combination, as long // as none are LayoutStride. -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) #endif @@ -127,75 +103,59 @@ KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, // rocBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS -#define KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT) \ - template \ - struct gemv_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT) \ + template \ + struct gemv_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft) KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft) +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft) +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft) KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight) KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutRight) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutRight) +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRight) +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRight) #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -#if defined(KOKKOS_ENABLE_SYCL) && \ - !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) - -#define KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(SCALAR, LAYOUT) \ - template \ - struct gemv_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) + +#define KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(SCALAR, LAYOUT) \ + template \ + struct gemv_tpl_spec_avail< \ + ExecSpace, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(double, Kokkos::LayoutLeft) KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(float, Kokkos::LayoutLeft) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(Kokkos::complex, - Kokkos::LayoutLeft) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(Kokkos::complex, - Kokkos::LayoutLeft) +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft) +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft) KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(double, Kokkos::LayoutRight) KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(float, Kokkos::LayoutRight) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(Kokkos::complex, - Kokkos::LayoutRight) -KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(Kokkos::complex, - Kokkos::LayoutRight) +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(Kokkos::complex, Kokkos::LayoutRight) +KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(Kokkos::complex, Kokkos::LayoutRight) #endif diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp index 2ace06580873..4234afbd77b0 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp @@ -43,215 +43,157 @@ namespace Impl { transa = 'C'; \ } -#define KOKKOSBLAS2_DGEMV_BLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEMV< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - \ - static void gemv(const ExecSpace& /* space */, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,double]"); \ - KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ - HostBlas::gemv(transa, M, N, alpha, A.data(), LDA, X.data(), \ - one, beta, Y.data(), one); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_DGEMV_BLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMV< \ + ExecSpace, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + \ + static void gemv(const ExecSpace& /* space */, const char trans[], typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,double]"); \ + KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ + HostBlas::gemv(transa, M, N, alpha, A.data(), LDA, X.data(), one, beta, Y.data(), one); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_SGEMV_BLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEMV< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - \ - static void gemv(const ExecSpace& /* space */, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,float]"); \ - KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ - HostBlas::gemv(transa, M, N, alpha, A.data(), LDA, X.data(), one, \ - beta, Y.data(), one); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_SGEMV_BLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMV< \ + ExecSpace, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + \ + static void gemv(const ExecSpace& /* space */, const char trans[], typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,float]"); \ + KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ + HostBlas::gemv(transa, M, N, alpha, A.data(), LDA, X.data(), one, beta, Y.data(), one); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_ZGEMV_BLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEMV**, LAYOUTA, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUTX, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUTY, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - \ - static void gemv(const ExecSpace& /* space */, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::gemv[TPL_BLAS,complex]"); \ - KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ - const std::complex alpha_val = alpha, beta_val = beta; \ - HostBlas >::gemv( \ - transa, M, N, alpha_val, \ - reinterpret_cast*>(A.data()), LDA, \ - reinterpret_cast*>(X.data()), one, \ - beta_val, reinterpret_cast*>(Y.data()), one); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_ZGEMV_BLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMV**, LAYOUTA, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTX, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTY, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + \ + static void gemv(const ExecSpace& /* space */, const char trans[], typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,complex]"); \ + KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ + const std::complex alpha_val = alpha, beta_val = beta; \ + HostBlas >::gemv(transa, M, N, alpha_val, \ + reinterpret_cast*>(A.data()), LDA, \ + reinterpret_cast*>(X.data()), one, beta_val, \ + reinterpret_cast*>(Y.data()), one); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_CGEMV_BLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEMV**, LAYOUTA, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUTX, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUTY, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - \ - static void gemv(const ExecSpace& /* space */, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::gemv[TPL_BLAS,complex]"); \ - KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ - const std::complex alpha_val = alpha, beta_val = beta; \ - HostBlas >::gemv( \ - transa, M, N, alpha_val, \ - reinterpret_cast*>(A.data()), LDA, \ - reinterpret_cast*>(X.data()), one, \ - beta_val, reinterpret_cast*>(Y.data()), one); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_CGEMV_BLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMV**, LAYOUTA, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTX, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTY, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + \ + static void gemv(const ExecSpace& /* space */, const char trans[], typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,complex]"); \ + KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ + const std::complex alpha_val = alpha, beta_val = beta; \ + HostBlas >::gemv(transa, M, N, alpha_val, \ + reinterpret_cast*>(A.data()), LDA, \ + reinterpret_cast*>(X.data()), one, beta_val, \ + reinterpret_cast*>(Y.data()), one); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS2_DGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, true) -KOKKOSBLAS2_DGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, false) -KOKKOSBLAS2_DGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, true) -KOKKOSBLAS2_DGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, false) - -KOKKOSBLAS2_SGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, true) -KOKKOSBLAS2_SGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, false) -KOKKOSBLAS2_SGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, true) -KOKKOSBLAS2_SGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, false) - -KOKKOSBLAS2_ZGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, true) -KOKKOSBLAS2_ZGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, false) -KOKKOSBLAS2_ZGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, true) -KOKKOSBLAS2_ZGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, false) - -KOKKOSBLAS2_CGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, true) -KOKKOSBLAS2_CGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, false) -KOKKOSBLAS2_CGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, true) -KOKKOSBLAS2_CGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, false) +KOKKOSBLAS2_DGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS2_DGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS2_DGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS2_DGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS2_SGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS2_SGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS2_SGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS2_SGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS2_ZGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS2_ZGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS2_CGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS2_CGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS2_CGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS2_CGEMV_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) } // namespace Impl } // namespace KokkosBlas @@ -284,238 +226,169 @@ namespace Impl { transa = CUBLAS_OP_C; \ } -#define KOKKOSBLAS2_DGEMV_CUBLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEMV< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - \ - static void gemv(const ExecSpace& space, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_CUBLAS,double]"); \ - KOKKOSBLAS2_GEMV_CUBLAS_DETERMINE_ARGS(LAYOUTA); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDgemv(s.handle, transa, M, N, &alpha, \ - A.data(), LDA, X.data(), one, \ - &beta, Y.data(), one)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_DGEMV_CUBLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMV< \ + ExecSpace, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + \ + static void gemv(const ExecSpace& space, const char trans[], typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_CUBLAS,double]"); \ + KOKKOSBLAS2_GEMV_CUBLAS_DETERMINE_ARGS(LAYOUTA); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasDgemv(s.handle, transa, M, N, &alpha, A.data(), LDA, X.data(), one, &beta, Y.data(), one)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_SGEMV_CUBLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEMV< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - \ - static void gemv(const ExecSpace& space, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_CUBLAS,float]"); \ - KOKKOSBLAS2_GEMV_CUBLAS_DETERMINE_ARGS(LAYOUTA); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSgemv(s.handle, transa, M, N, &alpha, \ - A.data(), LDA, X.data(), one, \ - &beta, Y.data(), one)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_SGEMV_CUBLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMV< \ + ExecSpace, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + \ + static void gemv(const ExecSpace& space, const char trans[], typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_CUBLAS,float]"); \ + KOKKOSBLAS2_GEMV_CUBLAS_DETERMINE_ARGS(LAYOUTA); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasSgemv(s.handle, transa, M, N, &alpha, A.data(), LDA, X.data(), one, &beta, Y.data(), one)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_ZGEMV_CUBLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEMV**, LAYOUTA, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUTX, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUTY, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - \ - static void gemv(const ExecSpace& space, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::gemv[TPL_CUBLAS,complex]"); \ - KOKKOSBLAS2_GEMV_CUBLAS_DETERMINE_ARGS(LAYOUTA); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasZgemv(s.handle, transa, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(&beta), \ - reinterpret_cast(Y.data()), one)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_ZGEMV_CUBLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMV**, LAYOUTA, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTX, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTY, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + \ + static void gemv(const ExecSpace& space, const char trans[], typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_CUBLAS,complex]"); \ + KOKKOSBLAS2_GEMV_CUBLAS_DETERMINE_ARGS(LAYOUTA); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZgemv( \ + s.handle, transa, M, N, reinterpret_cast(&alpha), \ + reinterpret_cast(A.data()), LDA, reinterpret_cast(X.data()), \ + one, reinterpret_cast(&beta), reinterpret_cast(Y.data()), one)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_CGEMV_CUBLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEMV**, LAYOUTA, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUTX, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUTY, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - \ - static void gemv(const ExecSpace& space, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::gemv[TPL_CUBLAS,complex]"); \ - KOKKOSBLAS2_GEMV_CUBLAS_DETERMINE_ARGS(LAYOUTA); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCgemv( \ - s.handle, transa, M, N, reinterpret_cast(&alpha), \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(&beta), \ - reinterpret_cast(Y.data()), one)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_CGEMV_CUBLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMV**, LAYOUTA, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTX, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTY, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + \ + static void gemv(const ExecSpace& space, const char trans[], typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_CUBLAS,complex]"); \ + KOKKOSBLAS2_GEMV_CUBLAS_DETERMINE_ARGS(LAYOUTA); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasCgemv(s.handle, transa, M, N, reinterpret_cast(&alpha), \ + reinterpret_cast(A.data()), LDA, reinterpret_cast(X.data()), \ + one, reinterpret_cast(&beta), reinterpret_cast(Y.data()), one)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS2_DGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS2_DGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, false) -KOKKOSBLAS2_DGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, true) -KOKKOSBLAS2_DGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, false) - -KOKKOSBLAS2_SGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS2_SGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, false) -KOKKOSBLAS2_SGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, true) -KOKKOSBLAS2_SGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, false) - -KOKKOSBLAS2_ZGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS2_ZGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, false) -KOKKOSBLAS2_ZGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, true) -KOKKOSBLAS2_ZGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, false) - -KOKKOSBLAS2_CGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS2_CGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, false) -KOKKOSBLAS2_CGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, true) -KOKKOSBLAS2_CGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, false) +KOKKOSBLAS2_DGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS2_DGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS2_DGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS2_DGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + +KOKKOSBLAS2_SGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS2_SGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS2_SGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS2_SGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + +KOKKOSBLAS2_ZGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS2_ZGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS2_ZGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS2_ZGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + +KOKKOSBLAS2_CGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS2_CGEMV_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS2_CGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS2_CGEMV_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) } // namespace Impl } // namespace KokkosBlas @@ -548,198 +421,152 @@ namespace Impl { transa = rocblas_operation_conjugate_transpose; \ } -#define KOKKOSBLAS2_DGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct GEMV< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - \ - static void gemv(const ExecSpace& space, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_ROCBLAS,double]"); \ - KOKKOSBLAS2_GEMV_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_dgemv(s.handle, transa, M, N, &alpha, A.data(), LDA, \ - X.data(), one, &beta, Y.data(), one)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_DGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMV< \ + ExecSpace, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + \ + static void gemv(const ExecSpace& space, const char trans[], typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_ROCBLAS,double]"); \ + KOKKOSBLAS2_GEMV_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_dgemv(s.handle, transa, M, N, &alpha, A.data(), LDA, X.data(), one, &beta, Y.data(), one)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_SGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct GEMV< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - \ - static void gemv(const ExecSpace& space, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_ROCBLAS,float]"); \ - KOKKOSBLAS2_GEMV_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_sgemv(s.handle, transa, M, N, &alpha, A.data(), LDA, \ - X.data(), one, &beta, Y.data(), one)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_SGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMV< \ + ExecSpace, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + \ + static void gemv(const ExecSpace& space, const char trans[], typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_ROCBLAS,float]"); \ + KOKKOSBLAS2_GEMV_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_sgemv(s.handle, transa, M, N, &alpha, A.data(), LDA, X.data(), one, &beta, Y.data(), one)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_ZGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct GEMV**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - \ - static void gemv(const ExecSpace& space, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::gemv[TPL_ROCBLAS,complex]"); \ - KOKKOSBLAS2_GEMV_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zgemv( \ - s.handle, transa, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(&beta), \ - reinterpret_cast(Y.data()), one)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_ZGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMV**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + \ + static void gemv(const ExecSpace& space, const char trans[], typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_ROCBLAS,complex]"); \ + KOKKOSBLAS2_GEMV_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zgemv(s.handle, transa, M, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(&beta), \ + reinterpret_cast(Y.data()), one)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_CGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct GEMV**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - \ - static void gemv(const ExecSpace& space, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::gemv[TPL_ROCBLAS,complex]"); \ - KOKKOSBLAS2_GEMV_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cgemv( \ - s.handle, transa, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(&beta), \ - reinterpret_cast(Y.data()), one)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_CGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMV**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + \ + static void gemv(const ExecSpace& space, const char trans[], typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_ROCBLAS,complex]"); \ + KOKKOSBLAS2_GEMV_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cgemv(s.handle, transa, M, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(&beta), \ + reinterpret_cast(Y.data()), one)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSBLAS2_DGEMV_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIPSpace, true) @@ -767,8 +594,7 @@ KOKKOSBLAS2_CGEMV_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIPSpace, false) #endif // KOKKOSKERNELS_ENABLE_TPL_ROCBLAS // ONEMKL -#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && \ - !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) && \ +#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) && \ defined(KOKKOS_ENABLE_SYCL) #include #include @@ -784,8 +610,7 @@ inline oneapi::mkl::transpose mode_kk_to_onemkl(char mode_kk) { case 'C': return oneapi::mkl::transpose::conjtrans; default:; } - throw std::invalid_argument( - "Invalid mode for oneMKL (should be one of N, T, C)"); + throw std::invalid_argument("Invalid mode for oneMKL (should be one of N, T, C)"); } template @@ -799,74 +624,58 @@ struct kokkos_to_std_type_map { using type = std::complex::mag_type>; }; -#define KOKKOSBLAS2_GEMV_ONEMKL(SCALAR, LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct GEMV< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - using device_type = Kokkos::Device; \ - using mem_traits = Kokkos::MemoryTraits; \ - using AViewType = \ - Kokkos::View; \ - using XViewType = \ - Kokkos::View; \ - using YViewType = Kokkos::View; \ - \ - static void gemv(const ExecSpace& exec, const char kk_trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - bool row_major = std::is_same::value; \ - const std::int64_t M = A.extent(0); \ - const std::int64_t N = A.extent(1); \ - oneapi::mkl::transpose trans = mode_kk_to_onemkl(kk_trans[0]); \ - const std::int64_t LDA = row_major ? A.stride(0) : A.stride(1); \ - std::string label = "KokkosBlas::gemv[TPL_ONEMKL," + \ - Kokkos::ArithTraits::name() + "]"; \ - \ - Kokkos::Profiling::pushRegion(label); \ - using mag_type = kokkos_to_std_type_map< \ - SCALAR, Kokkos::ArithTraits::is_complex>::type; \ - const mag_type* a = reinterpret_cast(A.data()); \ - const mag_type* x = reinterpret_cast(X.data()); \ - mag_type* y = reinterpret_cast(Y.data()); \ - if (row_major) { \ - oneapi::mkl::blas::row_major::gemv(exec.sycl_queue(), trans, M, N, \ - alpha, a, LDA, x, 1, beta, y, 1); \ - } else { \ - oneapi::mkl::blas::column_major::gemv( \ - exec.sycl_queue(), trans, M, N, alpha, a, LDA, x, 1, beta, y, 1); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_GEMV_ONEMKL(SCALAR, LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMV, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + using device_type = Kokkos::Device; \ + using mem_traits = Kokkos::MemoryTraits; \ + using AViewType = Kokkos::View; \ + using XViewType = Kokkos::View; \ + using YViewType = Kokkos::View; \ + \ + static void gemv(const ExecSpace& exec, const char kk_trans[], typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + if (beta == Kokkos::ArithTraits::zero()) { \ + Kokkos::deep_copy(Y, Kokkos::ArithTraits::zero()); \ + } \ + \ + bool row_major = std::is_same::value; \ + const std::int64_t M = A.extent(0); \ + const std::int64_t N = A.extent(1); \ + oneapi::mkl::transpose trans = mode_kk_to_onemkl(kk_trans[0]); \ + const std::int64_t LDA = row_major ? A.stride(0) : A.stride(1); \ + std::string label = "KokkosBlas::gemv[TPL_ONEMKL," + Kokkos::ArithTraits::name() + "]"; \ + \ + Kokkos::Profiling::pushRegion(label); \ + using mag_type = kokkos_to_std_type_map::is_complex>::type; \ + const mag_type* a = reinterpret_cast(A.data()); \ + const mag_type* x = reinterpret_cast(X.data()); \ + mag_type* y = reinterpret_cast(Y.data()); \ + if (row_major) { \ + oneapi::mkl::blas::row_major::gemv(exec.sycl_queue(), trans, M, N, alpha, a, LDA, x, 1, beta, y, 1); \ + } else { \ + oneapi::mkl::blas::column_major::gemv(exec.sycl_queue(), trans, M, N, alpha, a, LDA, x, 1, beta, y, 1); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS2_GEMV_ONEMKL(float, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) -KOKKOSBLAS2_GEMV_ONEMKL(float, Kokkos::LayoutRight, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) -KOKKOSBLAS2_GEMV_ONEMKL(double, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) -KOKKOSBLAS2_GEMV_ONEMKL(double, Kokkos::LayoutRight, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) -KOKKOSBLAS2_GEMV_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) -KOKKOSBLAS2_GEMV_ONEMKL(Kokkos::complex, Kokkos::LayoutRight, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) -KOKKOSBLAS2_GEMV_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) -KOKKOSBLAS2_GEMV_ONEMKL(Kokkos::complex, Kokkos::LayoutRight, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS2_GEMV_ONEMKL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS2_GEMV_ONEMKL(float, Kokkos::LayoutRight, Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS2_GEMV_ONEMKL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS2_GEMV_ONEMKL(double, Kokkos::LayoutRight, Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS2_GEMV_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS2_GEMV_ONEMKL(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS2_GEMV_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS2_GEMV_ONEMKL(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Experimental::SYCLDeviceUSMSpace, true) } // namespace Impl } // namespace KokkosBlas #endif diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_avail.hpp index 3013689f34d3..b6156c2d3ac8 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_avail.hpp @@ -28,62 +28,40 @@ struct ger_tpl_spec_avail { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -#define KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct ger_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct ger_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial, Kokkos::HostSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial, Kokkos::HostSpace) - -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, - Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) + +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP, Kokkos::HostSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP, Kokkos::HostSpace) - -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, - Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) + +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) #endif #endif @@ -91,112 +69,68 @@ KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, // cuBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -#define KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct ger_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct ger_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; // We use the same layout for X, Y and Abecause the GER interface will // switch the layouts of X and Y to that of A. So this TPL version will // match any layout combination, as long as none are LayoutStride. -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) - -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) - -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) - -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) + +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) + +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) + +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) #endif // rocBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS -#define KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct ger_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct ger_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) - -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) + +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_GER_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_blas.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_blas.hpp index bc1a10f61eac..680df7c464db 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_blas.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_blas.hpp @@ -30,308 +30,225 @@ namespace Impl { constexpr int one = 1; \ const int LDA = A_is_lr ? A.stride(0) : A.stride(1); -#define KOKKOSBLAS2_DGER_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct GER< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - AViewType; \ - \ - static void ger(const EXEC_SPACE& /* space */ \ - , \ - const char /*trans*/[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_BLAS,double]"); \ - KOKKOSBLAS2_GER_DETERMINE_ARGS(LAYOUT); \ - if (A_is_ll) { \ - HostBlas::ger(M, N, alpha, X.data(), one, Y.data(), one, \ - A.data(), LDA); \ - } else { \ - HostBlas::ger(M, N, alpha, Y.data(), one, X.data(), one, \ - A.data(), LDA); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_DGER_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct GER< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + AViewType; \ + \ + static void ger(const EXEC_SPACE& /* space */ \ + , \ + const char /*trans*/[], typename AViewType::const_value_type& alpha, const XViewType& X, \ + const YViewType& Y, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_BLAS,double]"); \ + KOKKOSBLAS2_GER_DETERMINE_ARGS(LAYOUT); \ + if (A_is_ll) { \ + HostBlas::ger(M, N, alpha, X.data(), one, Y.data(), one, A.data(), LDA); \ + } else { \ + HostBlas::ger(M, N, alpha, Y.data(), one, X.data(), one, A.data(), LDA); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_SGER_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct GER< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - AViewType; \ - \ - static void ger(const EXEC_SPACE& /* space */ \ - , \ - const char /*trans*/[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_BLAS,float]"); \ - KOKKOSBLAS2_GER_DETERMINE_ARGS(LAYOUT); \ - if (A_is_ll) { \ - HostBlas::ger(M, N, alpha, X.data(), one, Y.data(), one, \ - A.data(), LDA); \ - } else { \ - HostBlas::ger(M, N, alpha, Y.data(), one, X.data(), one, \ - A.data(), LDA); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_SGER_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct GER< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + AViewType; \ + \ + static void ger(const EXEC_SPACE& /* space */ \ + , \ + const char /*trans*/[], typename AViewType::const_value_type& alpha, const XViewType& X, \ + const YViewType& Y, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_BLAS,float]"); \ + KOKKOSBLAS2_GER_DETERMINE_ARGS(LAYOUT); \ + if (A_is_ll) { \ + HostBlas::ger(M, N, alpha, X.data(), one, Y.data(), one, A.data(), LDA); \ + } else { \ + HostBlas::ger(M, N, alpha, Y.data(), one, X.data(), one, A.data(), LDA); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_ZGER_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct GER*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - AViewType; \ - \ - static void ger(const EXEC_SPACE& space, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::ger[TPL_BLAS,complex"); \ - KOKKOSBLAS2_GER_DETERMINE_ARGS(LAYOUT); \ - const std::complex alpha_val = \ - static_cast>(alpha); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - if (A_is_ll) { \ - if (justTranspose) { \ - HostBlas>::geru( \ - M, N, alpha_val, \ - reinterpret_cast*>(X.data()), one, \ - reinterpret_cast*>(Y.data()), one, \ - reinterpret_cast*>(A.data()), LDA); \ - } else { \ - HostBlas>::gerc( \ - M, N, alpha_val, \ - reinterpret_cast*>(X.data()), one, \ - reinterpret_cast*>(Y.data()), one, \ - reinterpret_cast*>(A.data()), LDA); \ - } \ - } else { \ - if (justTranspose) { \ - HostBlas>::geru( \ - M, N, alpha_val, \ - reinterpret_cast*>(Y.data()), one, \ - reinterpret_cast*>(X.data()), one, \ - reinterpret_cast*>(A.data()), LDA); \ - } else { \ - /* blasgerc() + ~A_ll => call kokkos-kernels' implementation */ \ - GER::ger(space, trans, alpha, X, Y, A); \ - } \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_ZGER_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct GER*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + AViewType; \ + \ + static void ger(const EXEC_SPACE& space, const char trans[], typename AViewType::const_value_type& alpha, \ + const XViewType& X, const YViewType& Y, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_BLAS,complex"); \ + KOKKOSBLAS2_GER_DETERMINE_ARGS(LAYOUT); \ + const std::complex alpha_val = static_cast>(alpha); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + if (A_is_ll) { \ + if (justTranspose) { \ + HostBlas>::geru(M, N, alpha_val, \ + reinterpret_cast*>(X.data()), one, \ + reinterpret_cast*>(Y.data()), one, \ + reinterpret_cast*>(A.data()), LDA); \ + } else { \ + HostBlas>::gerc(M, N, alpha_val, \ + reinterpret_cast*>(X.data()), one, \ + reinterpret_cast*>(Y.data()), one, \ + reinterpret_cast*>(A.data()), LDA); \ + } \ + } else { \ + if (justTranspose) { \ + HostBlas>::geru(M, N, alpha_val, \ + reinterpret_cast*>(Y.data()), one, \ + reinterpret_cast*>(X.data()), one, \ + reinterpret_cast*>(A.data()), LDA); \ + } else { \ + /* blasgerc() + ~A_ll => call kokkos-kernels' implementation */ \ + GER::ger(space, trans, alpha, X, Y, A); \ + } \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_CGER_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct GER*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - AViewType; \ - \ - static void ger(const EXEC_SPACE& space, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::ger[TPL_BLAS,complex"); \ - KOKKOSBLAS2_GER_DETERMINE_ARGS(LAYOUT); \ - const std::complex alpha_val = \ - static_cast>(alpha); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - if (A_is_ll) { \ - if (justTranspose) { \ - HostBlas>::geru( \ - M, N, alpha_val, \ - reinterpret_cast*>(X.data()), one, \ - reinterpret_cast*>(Y.data()), one, \ - reinterpret_cast*>(A.data()), LDA); \ - } else { \ - HostBlas>::gerc( \ - M, N, alpha_val, \ - reinterpret_cast*>(X.data()), one, \ - reinterpret_cast*>(Y.data()), one, \ - reinterpret_cast*>(A.data()), LDA); \ - } \ - } else { \ - if (justTranspose) { \ - HostBlas>::geru( \ - M, N, alpha_val, \ - reinterpret_cast*>(Y.data()), one, \ - reinterpret_cast*>(X.data()), one, \ - reinterpret_cast*>(A.data()), LDA); \ - } else { \ - /* blasgerc() + ~A_ll => call kokkos-kernels' implementation */ \ - GER::ger(space, trans, alpha, X, Y, A); \ - } \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_CGER_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct GER*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + AViewType; \ + \ + static void ger(const EXEC_SPACE& space, const char trans[], typename AViewType::const_value_type& alpha, \ + const XViewType& X, const YViewType& Y, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_BLAS,complex"); \ + KOKKOSBLAS2_GER_DETERMINE_ARGS(LAYOUT); \ + const std::complex alpha_val = static_cast>(alpha); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + if (A_is_ll) { \ + if (justTranspose) { \ + HostBlas>::geru(M, N, alpha_val, reinterpret_cast*>(X.data()), \ + one, reinterpret_cast*>(Y.data()), one, \ + reinterpret_cast*>(A.data()), LDA); \ + } else { \ + HostBlas>::gerc(M, N, alpha_val, reinterpret_cast*>(X.data()), \ + one, reinterpret_cast*>(Y.data()), one, \ + reinterpret_cast*>(A.data()), LDA); \ + } \ + } else { \ + if (justTranspose) { \ + HostBlas>::geru(M, N, alpha_val, reinterpret_cast*>(Y.data()), \ + one, reinterpret_cast*>(X.data()), one, \ + reinterpret_cast*>(A.data()), LDA); \ + } else { \ + /* blasgerc() + ~A_ll => call kokkos-kernels' implementation */ \ + GER::ger(space, trans, alpha, X, Y, A); \ + } \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - false) -KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - false) +KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) -KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - false) -KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - false) +KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) -KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - false) -KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - false) +KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) -KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - false) -KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - false) +KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - false) -KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - false) +KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_DGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) -KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - false) -KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - false) +KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_SGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) -KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - false) -KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - false) +KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) -KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - false) -KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - false) +KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_CGER_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_cublas.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_cublas.hpp index 3f80144f62c5..fdb09d1c9170 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_cublas.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_cublas.hpp @@ -30,324 +30,231 @@ namespace Impl { constexpr int one = 1; \ const int LDA = A_is_lr ? A.stride(0) : A.stride(1); -#define KOKKOSBLAS2_DGER_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct GER< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void ger(const EXEC_SPACE& space, const char /*trans*/[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_CUBLAS,double]"); \ - KOKKOSBLAS2_GER_CUBLAS_DETERMINE_ARGS(LAYOUT); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - if (A_is_ll) { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDger(s.handle, M, N, &alpha, \ - X.data(), one, Y.data(), one, \ - A.data(), LDA)); \ - } else { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDger(s.handle, M, N, &alpha, \ - Y.data(), one, X.data(), one, \ - A.data(), LDA)); \ - } \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_DGER_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct GER< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void ger(const EXEC_SPACE& space, const char /*trans*/[], typename AViewType::const_value_type& alpha, \ + const XViewType& X, const YViewType& Y, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_CUBLAS,double]"); \ + KOKKOSBLAS2_GER_CUBLAS_DETERMINE_ARGS(LAYOUT); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + if (A_is_ll) { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDger(s.handle, M, N, &alpha, X.data(), one, Y.data(), one, A.data(), LDA)); \ + } else { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDger(s.handle, M, N, &alpha, Y.data(), one, X.data(), one, A.data(), LDA)); \ + } \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_SGER_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct GER< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void ger(const EXEC_SPACE& space, const char /*trans*/[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_CUBLAS,float]"); \ - KOKKOSBLAS2_GER_CUBLAS_DETERMINE_ARGS(LAYOUT); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - if (A_is_ll) { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSger(s.handle, M, N, &alpha, \ - X.data(), one, Y.data(), one, \ - A.data(), LDA)); \ - } else { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSger(s.handle, M, N, &alpha, \ - Y.data(), one, X.data(), one, \ - A.data(), LDA)); \ - } \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_SGER_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct GER< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void ger(const EXEC_SPACE& space, const char /*trans*/[], typename AViewType::const_value_type& alpha, \ + const XViewType& X, const YViewType& Y, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_CUBLAS,float]"); \ + KOKKOSBLAS2_GER_CUBLAS_DETERMINE_ARGS(LAYOUT); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + if (A_is_ll) { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSger(s.handle, M, N, &alpha, X.data(), one, Y.data(), one, A.data(), LDA)); \ + } else { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSger(s.handle, M, N, &alpha, Y.data(), one, X.data(), one, A.data(), LDA)); \ + } \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_ZGER_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct GER*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void ger(const EXEC_SPACE& space, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::ger[TPL_CUBLAS,complex]"); \ - KOKKOSBLAS2_GER_CUBLAS_DETERMINE_ARGS(LAYOUT); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - if (A_is_ll) { \ - if (justTranspose) { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZgeru( \ - s.handle, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - } else { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZgerc( \ - s.handle, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - } \ - } else { \ - if (justTranspose) { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZgeru( \ - s.handle, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - } else { \ - /* cublasZgerc() + ~A_ll => call kokkos-kernels' implementation */ \ - GER::ger(space, trans, alpha, X, Y, A); \ - } \ - } \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_ZGER_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct GER*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void ger(const EXEC_SPACE& space, const char trans[], typename AViewType::const_value_type& alpha, \ + const XViewType& X, const YViewType& Y, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_CUBLAS,complex]"); \ + KOKKOSBLAS2_GER_CUBLAS_DETERMINE_ARGS(LAYOUT); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + if (A_is_ll) { \ + if (justTranspose) { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZgeru(s.handle, M, N, reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + } else { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZgerc(s.handle, M, N, reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + } \ + } else { \ + if (justTranspose) { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZgeru(s.handle, M, N, reinterpret_cast(&alpha), \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + } else { \ + /* cublasZgerc() + ~A_ll => call kokkos-kernels' implementation */ \ + GER::ger(space, trans, alpha, X, Y, A); \ + } \ + } \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_CGER_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct GER*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void ger(const EXEC_SPACE& space, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::ger[TPL_CUBLAS,complex]"); \ - KOKKOSBLAS2_GER_CUBLAS_DETERMINE_ARGS(LAYOUT); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - if (A_is_ll) { \ - if (justTranspose) { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCgeru( \ - s.handle, M, N, reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - } else { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCgerc( \ - s.handle, M, N, reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - } \ - } else { \ - if (justTranspose) { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCgeru( \ - s.handle, M, N, reinterpret_cast(&alpha), \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - } else { \ - /* cublasCgerc() + ~A_ll => call kokkos-kernels' implementation */ \ - GER::ger(space, trans, alpha, X, Y, A); \ - } \ - } \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_CGER_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct GER*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void ger(const EXEC_SPACE& space, const char trans[], typename AViewType::const_value_type& alpha, \ + const XViewType& X, const YViewType& Y, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_CUBLAS,complex]"); \ + KOKKOSBLAS2_GER_CUBLAS_DETERMINE_ARGS(LAYOUT); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + if (A_is_ll) { \ + if (justTranspose) { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCgeru(s.handle, M, N, reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + } else { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCgerc(s.handle, M, N, reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + } \ + } else { \ + if (justTranspose) { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCgeru(s.handle, M, N, reinterpret_cast(&alpha), \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + } else { \ + /* cublasCgerc() + ~A_ll => call kokkos-kernels' implementation */ \ + GER::ger(space, trans, alpha, X, Y, A); \ + } \ + } \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - false) -KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - false) +KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) -KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) -KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) +KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_DGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - false) -KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - false) +KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) -KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) -KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) +KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_SGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - false) -KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - false) +KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) -KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) -KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) +KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_ZGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - false) -KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - false) +KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) -KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) -KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) +KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_CGER_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_rocblas.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_rocblas.hpp index c21b61befaf5..26a0da58649b 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_rocblas.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_rocblas.hpp @@ -30,295 +30,221 @@ namespace Impl { constexpr int one = 1; \ const int LDA = A_is_lr ? A.stride(0) : A.stride(1); -#define KOKKOSBLAS2_DGER_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct GER< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void ger(const EXEC_SPACE& space, const char /*trans*/[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_ROCBLAS,double]"); \ - KOKKOSBLAS2_GER_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - if (A_is_ll) { \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_dger(s.handle, M, N, &alpha, \ - X.data(), one, Y.data(), \ - one, A.data(), LDA)); \ - } else { \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_dger(s.handle, M, N, &alpha, \ - Y.data(), one, X.data(), \ - one, A.data(), LDA)); \ - } \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_DGER_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct GER< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void ger(const EXEC_SPACE& space, const char /*trans*/[], typename AViewType::const_value_type& alpha, \ + const XViewType& X, const YViewType& Y, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_ROCBLAS,double]"); \ + KOKKOSBLAS2_GER_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + if (A_is_ll) { \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_dger(s.handle, M, N, &alpha, X.data(), one, Y.data(), one, A.data(), LDA)); \ + } else { \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_dger(s.handle, M, N, &alpha, Y.data(), one, X.data(), one, A.data(), LDA)); \ + } \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_SGER_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct GER< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void ger(const EXEC_SPACE& space, const char /*trans*/[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_ROCBLAS,float]"); \ - KOKKOSBLAS2_GER_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - if (A_is_ll) { \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_sger(s.handle, M, N, &alpha, \ - X.data(), one, Y.data(), \ - one, A.data(), LDA)); \ - } else { \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_sger(s.handle, M, N, &alpha, \ - Y.data(), one, X.data(), \ - one, A.data(), LDA)); \ - } \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_SGER_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct GER< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void ger(const EXEC_SPACE& space, const char /*trans*/[], typename AViewType::const_value_type& alpha, \ + const XViewType& X, const YViewType& Y, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_ROCBLAS,float]"); \ + KOKKOSBLAS2_GER_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + if (A_is_ll) { \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_sger(s.handle, M, N, &alpha, X.data(), one, Y.data(), one, A.data(), LDA)); \ + } else { \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_sger(s.handle, M, N, &alpha, Y.data(), one, X.data(), one, A.data(), LDA)); \ + } \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_ZGER_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct GER*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void ger(const EXEC_SPACE& space, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::ger[TPL_ROCBLAS,complex]"); \ - KOKKOSBLAS2_GER_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - if (A_is_ll) { \ - if (justTranspose) { \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zgeru( \ - s.handle, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - } else { \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zgerc( \ - s.handle, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - } \ - } else { \ - if (justTranspose) { \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zgeru( \ - s.handle, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - } else { \ - /* rocblas_zgerc() + ~A_ll => call k-kernels' implementation */ \ - GER::ger(space, trans, alpha, X, Y, A); \ - } \ - } \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_ZGER_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct GER*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void ger(const EXEC_SPACE& space, const char trans[], typename AViewType::const_value_type& alpha, \ + const XViewType& X, const YViewType& Y, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_ROCBLAS,complex]"); \ + KOKKOSBLAS2_GER_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + if (A_is_ll) { \ + if (justTranspose) { \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zgeru(s.handle, M, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + } else { \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zgerc(s.handle, M, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + } \ + } else { \ + if (justTranspose) { \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zgeru(s.handle, M, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + } else { \ + /* rocblas_zgerc() + ~A_ll => call k-kernels' implementation */ \ + GER::ger(space, trans, alpha, X, Y, A); \ + } \ + } \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_CGER_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct GER*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void ger(const EXEC_SPACE& space, const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::ger[TPL_ROCBLAS,complex]"); \ - KOKKOSBLAS2_GER_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - if (A_is_ll) { \ - if (justTranspose) { \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cgeru( \ - s.handle, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - } else { \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cgerc( \ - s.handle, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - } \ - } else { \ - if (justTranspose) { \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cgeru( \ - s.handle, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - } else { \ - /* rocblas_cgerc() + ~A_ll => call k-kernels' implementation */ \ - GER::ger(space, trans, alpha, X, Y, A); \ - } \ - } \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_CGER_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct GER*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void ger(const EXEC_SPACE& space, const char trans[], typename AViewType::const_value_type& alpha, \ + const XViewType& X, const YViewType& Y, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::ger[TPL_ROCBLAS,complex]"); \ + KOKKOSBLAS2_GER_ROCBLAS_DETERMINE_ARGS(LAYOUT); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + if (A_is_ll) { \ + if (justTranspose) { \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cgeru(s.handle, M, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + } else { \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cgerc(s.handle, M, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + } \ + } else { \ + if (justTranspose) { \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cgeru(s.handle, M, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + } else { \ + /* rocblas_cgerc() + ~A_ll => call k-kernels' implementation */ \ + GER::ger(space, trans, alpha, X, Y, A); \ + } \ + } \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS2_DGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_DGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - false) -KOKKOSBLAS2_DGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_DGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - false) +KOKKOSBLAS2_DGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_DGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS2_DGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_DGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) -KOKKOSBLAS2_SGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_SGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - false) -KOKKOSBLAS2_SGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_SGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - false) +KOKKOSBLAS2_SGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_SGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS2_SGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_SGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) -KOKKOSBLAS2_ZGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_ZGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - false) -KOKKOSBLAS2_ZGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_ZGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - false) +KOKKOSBLAS2_ZGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_ZGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS2_ZGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_ZGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) -KOKKOSBLAS2_CGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_CGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - false) -KOKKOSBLAS2_CGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_CGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - false) +KOKKOSBLAS2_CGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_CGER_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS2_CGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_CGER_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp index 6f6a7a2e9f2a..d8944335403b 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp @@ -35,14 +35,12 @@ namespace Impl { // Note: using GEMM because there is no GEMV in MKL compact routines -#define __IMPL_KK_MKL_DGEMM_COMPACT(SCALAR, MKL_ROUTINE) \ - inline void kk_mkl_gemm_compact( \ - MKL_LAYOUT layout, MKL_TRANSPOSE transa, MKL_TRANSPOSE transb, \ - MKL_INT m, MKL_INT n, MKL_INT k, SCALAR alpha, const SCALAR *a, \ - MKL_INT ldap, const SCALAR *b, MKL_INT ldbp, SCALAR beta, SCALAR *c, \ - MKL_INT ldcp, MKL_COMPACT_PACK format, MKL_INT nm) { \ - MKL_ROUTINE(layout, transa, transb, m, n, k, alpha, a, ldap, b, ldbp, \ - beta, c, ldcp, format, nm); \ +#define __IMPL_KK_MKL_DGEMM_COMPACT(SCALAR, MKL_ROUTINE) \ + inline void kk_mkl_gemm_compact(MKL_LAYOUT layout, MKL_TRANSPOSE transa, MKL_TRANSPOSE transb, MKL_INT m, MKL_INT n, \ + MKL_INT k, SCALAR alpha, const SCALAR *a, MKL_INT ldap, const SCALAR *b, \ + MKL_INT ldbp, SCALAR beta, SCALAR *c, MKL_INT ldcp, MKL_COMPACT_PACK format, \ + MKL_INT nm) { \ + MKL_ROUTINE(layout, transa, transb, m, n, k, alpha, a, ldap, b, ldbp, beta, c, ldcp, format, nm); \ } __IMPL_KK_MKL_DGEMM_COMPACT(double, mkl_dgemm_compact) @@ -81,23 +79,17 @@ inline MKL_COMPACT_PACK mkl_compact_format() { return MKL_COMPACT_AVX512; } -template -void kk_mkl_gemv(MKL_TRANSPOSE trans, const ScalarType alpha, - const AViewType &A, const xViewType &x, const ScalarType beta, - const yViewType &y) { +template +void kk_mkl_gemv(MKL_TRANSPOSE trans, const ScalarType alpha, const AViewType &A, const xViewType &x, + const ScalarType beta, const yViewType &y) { typedef typename yViewType::value_type vector_type; - static_assert(KokkosBatched::is_vector::value, - "value type is not vector type"); + static_assert(KokkosBatched::is_vector::value, "value type is not vector type"); using value_type = typename vector_type::value_type; - static_assert(std::is_same::value && - std::is_same::value, + static_assert(std::is_same::value && + std::is_same::value, "scalar type mismatch"); - if (A.stride_0() != 1 && A.stride_1() != 1 && x.stride_0() != 1 && - y.stride_0() != 1) { + if (A.stride_0() != 1 && A.stride_1() != 1 && x.stride_0() != 1 && y.stride_0() != 1) { Kokkos::abort("Strided inputs are not supported in MKL gemv/gemm"); } @@ -107,21 +99,18 @@ void kk_mkl_gemv(MKL_TRANSPOSE trans, const ScalarType alpha, const int n = 1; const int k = A.extent_int(transposed ? 0 : 1); - const bool col_major = A.stride_0() == 1; - const MKL_LAYOUT layout = col_major ? MKL_COL_MAJOR : MKL_ROW_MAJOR; - const MKL_INT A_ld = KOKKOSKERNELS_MACRO_MAX(1, A.extent(col_major ? 0 : 1)); - const MKL_COMPACT_PACK format = - Impl::mkl_compact_format(); + const bool col_major = A.stride_0() == 1; + const MKL_LAYOUT layout = col_major ? MKL_COL_MAJOR : MKL_ROW_MAJOR; + const MKL_INT A_ld = KOKKOSKERNELS_MACRO_MAX(1, A.extent(col_major ? 0 : 1)); + const MKL_COMPACT_PACK format = Impl::mkl_compact_format(); // cast away simd-vector pointers auto A_data = reinterpret_cast(A.data()); auto x_data = reinterpret_cast(x.data()); auto y_data = reinterpret_cast(y.data()); - Impl::kk_mkl_gemm_compact(layout, trans, MKL_NOTRANS, m, n, k, - (value_type)alpha, A_data, A_ld, x_data, 1, - (value_type)beta, y_data, 1, format, - (MKL_INT)vector_type::vector_length); + Impl::kk_mkl_gemm_compact(layout, trans, MKL_NOTRANS, m, n, k, (value_type)alpha, A_data, A_ld, x_data, 1, + (value_type)beta, y_data, 1, format, (MKL_INT)vector_type::vector_length); } } // namespace Impl @@ -131,12 +120,9 @@ void kk_mkl_gemv(MKL_TRANSPOSE trans, const ScalarType alpha, /// template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemv::invoke( - const ScalarType alpha, const AViewType &A, const xViewType &x, - const ScalarType beta, const yViewType &y) { +template +KOKKOS_INLINE_FUNCTION int SerialGemv::invoke( + const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { Impl::kk_mkl_gemv(MKL_NOTRANS, alpha, A, x, beta, y); return 0; } @@ -146,12 +132,9 @@ SerialGemv::invoke( /// template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemv::invoke( - const ScalarType alpha, const AViewType &A, const xViewType &x, - const ScalarType beta, const yViewType &y) { +template +KOKKOS_INLINE_FUNCTION int SerialGemv::invoke( + const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { Impl::kk_mkl_gemv(MKL_TRANS, alpha, A, x, beta, y); return 0; } @@ -161,12 +144,9 @@ SerialGemv::invoke( /// template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemv::invoke( - const ScalarType alpha, const AViewType &A, const xViewType &x, - const ScalarType beta, const yViewType &y) { +template +KOKKOS_INLINE_FUNCTION int SerialGemv::invoke( + const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { Impl::kk_mkl_gemv(MKL_CONJTRANS, alpha, A, x, beta, y); return 0; } diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_avail.hpp index 59fb154d3530..2c3cdc990ee1 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_avail.hpp @@ -28,66 +28,40 @@ struct syr2_tpl_spec_avail { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -#define KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct syr2_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct syr2_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial, Kokkos::HostSpace) - -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, - Kokkos::Serial, Kokkos::HostSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) + +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP, Kokkos::HostSpace) - -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, - Kokkos::OpenMP, Kokkos::HostSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) + +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) #endif #endif @@ -95,108 +69,64 @@ KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, // cuBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -#define KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct syr2_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct syr2_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) - -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) - -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::Cuda, Kokkos::CudaSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) - -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::Cuda, Kokkos::CudaUVMSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) + +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) + +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) + +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) #endif // rocBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS -#define KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct syr2_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct syr2_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) - -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, - Kokkos::HIP, Kokkos::HIPSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) + +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_SYR2_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_blas.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_blas.hpp index f22e800bc5fa..4aa32b5b0eaf 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_blas.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_blas.hpp @@ -29,286 +29,216 @@ namespace Impl { constexpr int one = 1; \ const int LDA = A_is_lr ? A.stride(0) : A.stride(1); -#define KOKKOSBLAS2_DSYR2_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct SYR2< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - AViewType; \ - \ - static void syr2(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_BLAS,double]"); \ - KOKKOSBLAS2_SYR2_DETERMINE_ARGS(LAYOUT); \ - if (A_is_ll) { \ - HostBlas::syr2(uplo[0], N, alpha, X.data(), one, Y.data(), \ - one, A.data(), LDA); \ - } else { \ - /* blasDsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_DSYR2_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR2< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + AViewType; \ + \ + static void syr2(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const YViewType& Y, \ + const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_BLAS,double]"); \ + KOKKOSBLAS2_SYR2_DETERMINE_ARGS(LAYOUT); \ + if (A_is_ll) { \ + HostBlas::syr2(uplo[0], N, alpha, X.data(), one, Y.data(), one, A.data(), LDA); \ + } else { \ + /* blasDsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_SSYR2_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct SYR2< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - AViewType; \ - \ - static void syr2(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_BLAS,float]"); \ - KOKKOSBLAS2_SYR2_DETERMINE_ARGS(LAYOUT); \ - if (A_is_ll) { \ - HostBlas::syr2(uplo[0], N, alpha, X.data(), one, Y.data(), \ - one, A.data(), LDA); \ - } else { \ - /* blasSsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_SSYR2_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR2< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + AViewType; \ + \ + static void syr2(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const YViewType& Y, \ + const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_BLAS,float]"); \ + KOKKOSBLAS2_SYR2_DETERMINE_ARGS(LAYOUT); \ + if (A_is_ll) { \ + HostBlas::syr2(uplo[0], N, alpha, X.data(), one, Y.data(), one, A.data(), LDA); \ + } else { \ + /* blasSsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_ZSYR2_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct SYR2*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - AViewType; \ - \ - static void syr2(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::syr2[TPL_BLAS,complex"); \ - KOKKOSBLAS2_SYR2_DETERMINE_ARGS(LAYOUT); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - if (justTranspose) { \ - /* No blasZsyr2() => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } else { \ - if (A_is_ll) { \ - HostBlas>::her2( \ - uplo[0], N, alpha, \ - reinterpret_cast*>(X.data()), one, \ - reinterpret_cast*>(Y.data()), one, \ - reinterpret_cast*>(A.data()), LDA); \ - } else { \ - /* blasZher2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_ZSYR2_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR2*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + AViewType; \ + \ + static void syr2(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const YViewType& Y, \ + const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_BLAS,complex"); \ + KOKKOSBLAS2_SYR2_DETERMINE_ARGS(LAYOUT); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + if (justTranspose) { \ + /* No blasZsyr2() => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } else { \ + if (A_is_ll) { \ + HostBlas>::her2(uplo[0], N, alpha, \ + reinterpret_cast*>(X.data()), one, \ + reinterpret_cast*>(Y.data()), one, \ + reinterpret_cast*>(A.data()), LDA); \ + } else { \ + /* blasZher2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_CSYR2_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct SYR2*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - AViewType; \ - \ - static void syr2(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::syr2[TPL_BLAS,complex"); \ - KOKKOSBLAS2_SYR2_DETERMINE_ARGS(LAYOUT); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - if (justTranspose) { \ - /* No blasCsyr2() => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } else { \ - if (A_is_ll) { \ - HostBlas>::her2( \ - uplo[0], N, alpha, \ - reinterpret_cast*>(X.data()), one, \ - reinterpret_cast*>(Y.data()), one, \ - reinterpret_cast*>(A.data()), LDA); \ - } else { \ - /* blasCher2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_CSYR2_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR2*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + AViewType; \ + \ + static void syr2(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const YViewType& Y, \ + const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_BLAS,complex"); \ + KOKKOSBLAS2_SYR2_DETERMINE_ARGS(LAYOUT); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + if (justTranspose) { \ + /* No blasCsyr2() => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } else { \ + if (A_is_ll) { \ + HostBlas>::her2(uplo[0], N, alpha, \ + reinterpret_cast*>(X.data()), one, \ + reinterpret_cast*>(Y.data()), one, \ + reinterpret_cast*>(A.data()), LDA); \ + } else { \ + /* blasCher2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - false) -KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - false) - -KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - false) -KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - false) - -KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - false) -KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - false) - -KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - false) -KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - false) +KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) + +KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) + +KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) + +KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - false) -KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - false) - -KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - false) -KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - false) - -KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - false) -KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - false) - -KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - false) -KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - false) +KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_DSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) + +KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_SSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) + +KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) + +KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_CSYR2_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_cublas.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_cublas.hpp index ca98fedf0d98..4dd95aa79a82 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_cublas.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_cublas.hpp @@ -22,349 +22,257 @@ namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS2_SYR2_CUBLAS_DETERMINE_ARGS(LAYOUT, uploChar) \ - bool A_is_ll = std::is_same::value; \ - bool A_is_lr = std::is_same::value; \ - const int N = static_cast(A_is_lr ? A.extent(0) : A.extent(1)); \ - constexpr int one = 1; \ - const int LDA = A_is_lr ? A.stride(0) : A.stride(1); \ - cublasFillMode_t fillMode = (uploChar == 'L' || uploChar == 'l') \ - ? CUBLAS_FILL_MODE_LOWER \ - : CUBLAS_FILL_MODE_UPPER; +#define KOKKOSBLAS2_SYR2_CUBLAS_DETERMINE_ARGS(LAYOUT, uploChar) \ + bool A_is_ll = std::is_same::value; \ + bool A_is_lr = std::is_same::value; \ + const int N = static_cast(A_is_lr ? A.extent(0) : A.extent(1)); \ + constexpr int one = 1; \ + const int LDA = A_is_lr ? A.stride(0) : A.stride(1); \ + cublasFillMode_t fillMode = (uploChar == 'L' || uploChar == 'l') ? CUBLAS_FILL_MODE_LOWER : CUBLAS_FILL_MODE_UPPER; -#define KOKKOSBLAS2_DSYR2_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct SYR2< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr2(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_CUBLAS,double]"); \ - KOKKOSBLAS2_SYR2_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - if (A_is_ll) { \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasDsyr2(s.handle, fillMode, N, &alpha, X.data(), one, \ - Y.data(), one, A.data(), LDA)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - /* cublasDsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_DSYR2_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR2< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr2(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const YViewType& Y, \ + const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_CUBLAS,double]"); \ + KOKKOSBLAS2_SYR2_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + if (A_is_ll) { \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasDsyr2(s.handle, fillMode, N, &alpha, X.data(), one, Y.data(), one, A.data(), LDA)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else { \ + /* cublasDsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_SSYR2_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct SYR2< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr2(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_CUBLAS,float]"); \ - KOKKOSBLAS2_SYR2_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - if (A_is_ll) { \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSsyr2(s.handle, fillMode, N, &alpha, X.data(), one, \ - Y.data(), one, A.data(), LDA)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - /* cublasSsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_SSYR2_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR2< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr2(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const YViewType& Y, \ + const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_CUBLAS,float]"); \ + KOKKOSBLAS2_SYR2_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + if (A_is_ll) { \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasSsyr2(s.handle, fillMode, N, &alpha, X.data(), one, Y.data(), one, A.data(), LDA)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else { \ + /* cublasSsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_ZSYR2_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct SYR2*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr2(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::syr2[TPL_CUBLAS,complex]"); \ - KOKKOSBLAS2_SYR2_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - if (justTranspose) { \ - if (A_is_ll) { \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZsyr2( \ - s.handle, fillMode, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - /* cublasZsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - } else { \ - if (A_is_ll) { \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZher2( \ - s.handle, fillMode, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - /* cublasZher2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_ZSYR2_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR2*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr2(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const YViewType& Y, \ + const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_CUBLAS,complex]"); \ + KOKKOSBLAS2_SYR2_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + if (justTranspose) { \ + if (A_is_ll) { \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZsyr2(s.handle, fillMode, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else { \ + /* cublasZsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + } else { \ + if (A_is_ll) { \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZher2(s.handle, fillMode, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else { \ + /* cublasZher2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_CSYR2_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct SYR2*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr2(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::syr2[TPL_CUBLAS,complex]"); \ - KOKKOSBLAS2_SYR2_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - if (justTranspose) { \ - if (A_is_ll) { \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasCsyr2(s.handle, fillMode, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - /* cublasCsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - } else { \ - if (A_is_ll) { \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasCher2(s.handle, fillMode, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - /* cublasCher2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_CSYR2_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR2*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr2(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const YViewType& Y, \ + const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_CUBLAS,complex]"); \ + KOKKOSBLAS2_SYR2_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + if (justTranspose) { \ + if (A_is_ll) { \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCsyr2(s.handle, fillMode, N, reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else { \ + /* cublasCsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + } else { \ + if (A_is_ll) { \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCher2(s.handle, fillMode, N, reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else { \ + /* cublasCher2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - false) -KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - false) +KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) -KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) -KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_DSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - false) -KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - false) +KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) -KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) -KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_SSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - false) -KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - false) +KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) -KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) -KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_ZSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - false) -KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - false) +KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) -KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) -KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_CSYR2_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_rocblas.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_rocblas.hpp index 869c065af286..84085224acc9 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_rocblas.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_rocblas.hpp @@ -28,307 +28,233 @@ namespace Impl { const int N = static_cast(A_is_lr ? A.extent(0) : A.extent(1)); \ constexpr int one = 1; \ const int LDA = A_is_lr ? A.stride(0) : A.stride(1); \ - rocblas_fill fillMode = (uploChar == 'L' || uploChar == 'l') \ - ? rocblas_fill_lower \ - : rocblas_fill_upper; + rocblas_fill fillMode = (uploChar == 'L' || uploChar == 'l') ? rocblas_fill_lower : rocblas_fill_upper; -#define KOKKOSBLAS2_DSYR2_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct SYR2< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr2(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_ROCBLAS,double]"); \ - KOKKOSBLAS2_SYR2_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - if (A_is_ll) { \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_dsyr2(s.handle, fillMode, N, &alpha, X.data(), one, \ - Y.data(), one, A.data(), LDA)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - /* rocblas_dsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_DSYR2_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR2< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr2(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const YViewType& Y, \ + const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_ROCBLAS,double]"); \ + KOKKOSBLAS2_SYR2_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + if (A_is_ll) { \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_dsyr2(s.handle, fillMode, N, &alpha, X.data(), one, Y.data(), one, A.data(), LDA)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + } else { \ + /* rocblas_dsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_SSYR2_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct SYR2< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr2(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_ROCBLAS,float]"); \ - KOKKOSBLAS2_SYR2_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - if (A_is_ll) { \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_ssyr2(s.handle, fillMode, N, &alpha, X.data(), one, \ - Y.data(), one, A.data(), LDA)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - /* rocblas_ssyr2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_SSYR2_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR2< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr2(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const YViewType& Y, \ + const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_ROCBLAS,float]"); \ + KOKKOSBLAS2_SYR2_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + if (A_is_ll) { \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_ssyr2(s.handle, fillMode, N, &alpha, X.data(), one, Y.data(), one, A.data(), LDA)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + } else { \ + /* rocblas_ssyr2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_ZSYR2_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct SYR2*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr2(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::syr2[TPL_ROCBLAS,complex]"); \ - KOKKOSBLAS2_SYR2_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - if (justTranspose) { \ - if (A_is_ll) { \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zsyr2( \ - s.handle, fillMode, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - /* rocblas_zsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - } else { \ - if (A_is_ll && (alpha.imag() == 0.)) { \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zher2( \ - s.handle, fillMode, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - /* rocblas_zher2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_ZSYR2_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR2*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr2(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const YViewType& Y, \ + const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_ROCBLAS,complex]"); \ + KOKKOSBLAS2_SYR2_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + if (justTranspose) { \ + if (A_is_ll) { \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zsyr2(s.handle, fillMode, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + } else { \ + /* rocblas_zsyr2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + } else { \ + if (A_is_ll && (alpha.imag() == 0.)) { \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zher2(s.handle, fillMode, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + } else { \ + /* rocblas_zher2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_CSYR2_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct SYR2*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr2(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const YViewType& Y, \ - const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::syr2[TPL_ROCBLAS,complex]"); \ - KOKKOSBLAS2_SYR2_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - if (justTranspose) { \ - if (A_is_ll) { \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_csyr2( \ - s.handle, fillMode, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - /* rocblas_csyr2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - } else { \ - if (A_is_ll && (alpha.imag() == 0.)) { \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cher2( \ - s.handle, fillMode, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(Y.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - /* rocblas_cher2() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR2::syr2(space, trans, uplo, alpha, X, Y, A); \ - } \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_CSYR2_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR2*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr2(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const YViewType& Y, \ + const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr2[TPL_ROCBLAS,complex]"); \ + KOKKOSBLAS2_SYR2_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + if (justTranspose) { \ + if (A_is_ll) { \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_csyr2(s.handle, fillMode, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + } else { \ + /* rocblas_csyr2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + } else { \ + if (A_is_ll && (alpha.imag() == 0.)) { \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cher2(s.handle, fillMode, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(Y.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + } else { \ + /* rocblas_cher2() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR2::syr2(space, trans, uplo, alpha, X, \ + Y, A); \ + } \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS2_DSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_DSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - false) -KOKKOSBLAS2_DSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_DSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - false) +KOKKOSBLAS2_DSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_DSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS2_DSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_DSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) -KOKKOSBLAS2_SSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_SSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - false) -KOKKOSBLAS2_SSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_SSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - false) +KOKKOSBLAS2_SSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_SSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS2_SSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_SSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) -KOKKOSBLAS2_ZSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_ZSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - false) -KOKKOSBLAS2_ZSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_ZSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - false) +KOKKOSBLAS2_ZSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_ZSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS2_ZSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_ZSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) -KOKKOSBLAS2_CSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_CSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - false) -KOKKOSBLAS2_CSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_CSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - false) +KOKKOSBLAS2_CSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_CSYR2_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS2_CSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_CSYR2_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_avail.hpp index f537b3854abc..e1eb94e42568 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_avail.hpp @@ -28,59 +28,38 @@ struct syr_tpl_spec_avail { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -#define KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct syr_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct syr_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial, Kokkos::HostSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial, Kokkos::HostSpace) - -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, - Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) + +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP, Kokkos::HostSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP, Kokkos::HostSpace) - -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, - Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) + +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace) #endif #endif @@ -88,102 +67,60 @@ KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, // cuBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -#define KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct syr_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct syr_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) - -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) - -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaSpace) - -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::Cuda, - Kokkos::CudaUVMSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) + +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace) + +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace) + +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace) #endif // rocBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS -#define KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct syr_tpl_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct syr_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) - -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) -KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HIP, - Kokkos::HIPSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) + +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS2_SYR_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_decl_blas.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_decl_blas.hpp index fc8fb949d7ce..5b0eb0ec52e5 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_decl_blas.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_decl_blas.hpp @@ -29,254 +29,186 @@ namespace Impl { constexpr int one = 1; \ const int LDA = A_is_lr ? A.stride(0) : A.stride(1); -#define KOKKOSBLAS2_DSYR_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct SYR< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - AViewType; \ - \ - static void syr(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_BLAS,double]"); \ - KOKKOSBLAS2_SYR_DETERMINE_ARGS(LAYOUT); \ - if (A_is_ll) { \ - HostBlas::syr(uplo[0], N, alpha, X.data(), one, A.data(), \ - LDA); \ - } else { \ - /* blasDsyr() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_DSYR_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + AViewType; \ + \ + static void syr(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_BLAS,double]"); \ + KOKKOSBLAS2_SYR_DETERMINE_ARGS(LAYOUT); \ + if (A_is_ll) { \ + HostBlas::syr(uplo[0], N, alpha, X.data(), one, A.data(), LDA); \ + } else { \ + /* blasDsyr() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_SSYR_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct SYR< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - AViewType; \ - \ - static void syr(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_BLAS,float]"); \ - KOKKOSBLAS2_SYR_DETERMINE_ARGS(LAYOUT); \ - if (A_is_ll) { \ - HostBlas::syr(uplo[0], N, alpha, X.data(), one, A.data(), \ - LDA); \ - } else { \ - /* blasSsyr() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_SSYR_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + AViewType; \ + \ + static void syr(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_BLAS,float]"); \ + KOKKOSBLAS2_SYR_DETERMINE_ARGS(LAYOUT); \ + if (A_is_ll) { \ + HostBlas::syr(uplo[0], N, alpha, X.data(), one, A.data(), LDA); \ + } else { \ + /* blasSsyr() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_ZSYR_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct SYR*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - AViewType; \ - \ - static void syr(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::syr[TPL_BLAS,complex"); \ - KOKKOSBLAS2_SYR_DETERMINE_ARGS(LAYOUT); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - if (justTranspose) { \ - /* No blasZsyr() => call kokkos-kernels' implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } else { \ - if (A_is_ll) { \ - HostBlas>::her( \ - uplo[0], N, alpha.real(), \ - reinterpret_cast*>(X.data()), one, \ - reinterpret_cast*>(A.data()), LDA); \ - } else { \ - /* blasZher() + [~A_ll or ~real alpha] => call kokkos-kernels' \ - * implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_ZSYR_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + AViewType; \ + \ + static void syr(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_BLAS,complex"); \ + KOKKOSBLAS2_SYR_DETERMINE_ARGS(LAYOUT); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + if (justTranspose) { \ + /* No blasZsyr() => call kokkos-kernels' implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } else { \ + if (A_is_ll) { \ + HostBlas>::her(uplo[0], N, alpha.real(), \ + reinterpret_cast*>(X.data()), one, \ + reinterpret_cast*>(A.data()), LDA); \ + } else { \ + /* blasZher() + [~A_ll or ~real alpha] => call kokkos-kernels' \ + * implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_CSYR_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct SYR*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits> \ - AViewType; \ - \ - static void syr(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::syr[TPL_BLAS,complex"); \ - KOKKOSBLAS2_SYR_DETERMINE_ARGS(LAYOUT); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - if (justTranspose) { \ - /* No blasCsyr() => call kokkos-kernels' implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } else { \ - if (A_is_ll && (alpha.imag() == 0.)) { \ - HostBlas>::her( \ - uplo[0], N, alpha.real(), \ - reinterpret_cast*>(X.data()), one, \ - reinterpret_cast*>(A.data()), LDA); \ - } else { \ - /* blasCher() + [~A_ll or ~real alpha] => call kokkos-kernels' \ - * implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_CSYR_BLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits> \ + AViewType; \ + \ + static void syr(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_BLAS,complex"); \ + KOKKOSBLAS2_SYR_DETERMINE_ARGS(LAYOUT); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + if (justTranspose) { \ + /* No blasCsyr() => call kokkos-kernels' implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } else { \ + if (A_is_ll && (alpha.imag() == 0.)) { \ + HostBlas>::her(uplo[0], N, alpha.real(), \ + reinterpret_cast*>(X.data()), one, \ + reinterpret_cast*>(A.data()), LDA); \ + } else { \ + /* blasCher() + [~A_ll or ~real alpha] => call kokkos-kernels' \ + * implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - false) -KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - false) - -KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - false) -KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - false) - -KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - false) -KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - false) - -KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, - false) -KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - true) -KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, - false) +KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) + +KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) + +KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) + +KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace, false) +KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, true) +KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutRight, Kokkos::Serial, Kokkos::HostSpace, false) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - false) -KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - false) - -KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - false) -KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - false) - -KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - false) -KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - false) - -KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, - false) -KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - true) -KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, - false) +KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_DSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) + +KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_SSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) + +KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) + +KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace, false) +KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, true) +KOKKOSBLAS2_CSYR_BLAS(Kokkos::LayoutRight, Kokkos::OpenMP, Kokkos::HostSpace, false) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_decl_cublas.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_decl_cublas.hpp index dad3c93dbc25..43b177d9a5f0 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_decl_cublas.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_decl_cublas.hpp @@ -22,309 +22,224 @@ namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS2_SYR_CUBLAS_DETERMINE_ARGS(LAYOUT, uploChar) \ - bool A_is_ll = std::is_same::value; \ - bool A_is_lr = std::is_same::value; \ - const int N = static_cast(A_is_lr ? A.extent(0) : A.extent(1)); \ - constexpr int one = 1; \ - const int LDA = A_is_lr ? A.stride(0) : A.stride(1); \ - cublasFillMode_t fillMode = (uploChar == 'L' || uploChar == 'l') \ - ? CUBLAS_FILL_MODE_LOWER \ - : CUBLAS_FILL_MODE_UPPER; - -#define KOKKOSBLAS2_DSYR_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct SYR< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_CUBLAS,double]"); \ - KOKKOSBLAS2_SYR_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - if (A_is_ll) { \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDsyr( \ - s.handle, fillMode, N, &alpha, X.data(), one, A.data(), LDA)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - /* cublasDsyr() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_SYR_CUBLAS_DETERMINE_ARGS(LAYOUT, uploChar) \ + bool A_is_ll = std::is_same::value; \ + bool A_is_lr = std::is_same::value; \ + const int N = static_cast(A_is_lr ? A.extent(0) : A.extent(1)); \ + constexpr int one = 1; \ + const int LDA = A_is_lr ? A.stride(0) : A.stride(1); \ + cublasFillMode_t fillMode = (uploChar == 'L' || uploChar == 'l') ? CUBLAS_FILL_MODE_LOWER : CUBLAS_FILL_MODE_UPPER; + +#define KOKKOSBLAS2_DSYR_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_CUBLAS,double]"); \ + KOKKOSBLAS2_SYR_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + if (A_is_ll) { \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDsyr(s.handle, fillMode, N, &alpha, X.data(), one, A.data(), LDA)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else { \ + /* cublasDsyr() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_SSYR_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct SYR< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_CUBLAS,float]"); \ - KOKKOSBLAS2_SYR_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - if (A_is_ll) { \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSsyr( \ - s.handle, fillMode, N, &alpha, X.data(), one, A.data(), LDA)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - /* cublasSsyr() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_SSYR_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_CUBLAS,float]"); \ + KOKKOSBLAS2_SYR_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + if (A_is_ll) { \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSsyr(s.handle, fillMode, N, &alpha, X.data(), one, A.data(), LDA)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else { \ + /* cublasSsyr() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_ZSYR_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct SYR*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::syr[TPL_CUBLAS,complex]"); \ - KOKKOSBLAS2_SYR_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - if (justTranspose) { \ - if (A_is_ll) { \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZsyr( \ - s.handle, fillMode, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - /* cublasZsyr() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - } else { \ - if (A_is_ll && (alpha.imag() == 0.)) { \ - const double alpha_val = alpha.real(); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZher( \ - s.handle, fillMode, N, &alpha_val, \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - /* cublasZher() + [~A_ll or ~real alpha]=> call kokkos-kernels' \ - * implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_ZSYR_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_CUBLAS,complex]"); \ + KOKKOSBLAS2_SYR_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + if (justTranspose) { \ + if (A_is_ll) { \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZsyr(s.handle, fillMode, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else { \ + /* cublasZsyr() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + } else { \ + if (A_is_ll && (alpha.imag() == 0.)) { \ + const double alpha_val = alpha.real(); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZher(s.handle, fillMode, N, &alpha_val, \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else { \ + /* cublasZher() + [~A_ll or ~real alpha]=> call kokkos-kernels' \ + * implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_CSYR_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct SYR*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::syr[TPL_CUBLAS,complex]"); \ - KOKKOSBLAS2_SYR_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - if (justTranspose) { \ - if (A_is_ll) { \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasCsyr(s.handle, fillMode, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - /* cublasCsyr() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - } else { \ - if (A_is_ll && (alpha.imag() == 0.)) { \ - const float alpha_val = alpha.real(); \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasCher(s.handle, fillMode, N, &alpha_val, \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - /* cublasCher() + [~A_ll or ~real alpha]=> call kokkos-kernels' \ - * implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_CSYR_CUBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_CUBLAS,complex]"); \ + KOKKOSBLAS2_SYR_CUBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + if (justTranspose) { \ + if (A_is_ll) { \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCsyr(s.handle, fillMode, N, reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else { \ + /* cublasCsyr() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + } else { \ + if (A_is_ll && (alpha.imag() == 0.)) { \ + const float alpha_val = alpha.real(); \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCher(s.handle, fillMode, N, &alpha_val, \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } else { \ + /* cublasCher() + [~A_ll or ~real alpha]=> call kokkos-kernels' \ + * implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - false) -KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - false) - -KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) -KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) - -KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - false) -KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - false) - -KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) -KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) - -KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - false) -KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - false) - -KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) -KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) - -KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, - false) -KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - true) -KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, - false) - -KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) -KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - true) -KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, - false) +KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) + +KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_DSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) + +KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_SSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) + +KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_ZSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, false) +KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, true) +KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaSpace, false) + +KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS2_CSYR_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, Kokkos::CudaUVMSpace, false) } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_decl_rocblas.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_decl_rocblas.hpp index cf02e9e207ac..59c99c1225ec 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_decl_rocblas.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_syr_tpl_spec_decl_rocblas.hpp @@ -28,283 +28,205 @@ namespace Impl { const int N = static_cast(A_is_lr ? A.extent(0) : A.extent(1)); \ constexpr int one = 1; \ const int LDA = A_is_lr ? A.stride(0) : A.stride(1); \ - rocblas_fill fillMode = (uploChar == 'L' || uploChar == 'l') \ - ? rocblas_fill_lower \ - : rocblas_fill_upper; + rocblas_fill fillMode = (uploChar == 'L' || uploChar == 'l') ? rocblas_fill_lower : rocblas_fill_upper; -#define KOKKOSBLAS2_DSYR_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct SYR< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_ROCBLAS,double]"); \ - KOKKOSBLAS2_SYR_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - if (A_is_ll) { \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_host)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_dsyr( \ - s.handle, fillMode, N, &alpha, X.data(), one, A.data(), LDA)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - /* rocblas_dsyr() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_DSYR_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_ROCBLAS,double]"); \ + KOKKOSBLAS2_SYR_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + if (A_is_ll) { \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_host)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_dsyr(s.handle, fillMode, N, &alpha, X.data(), one, A.data(), LDA)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + } else { \ + /* rocblas_dsyr() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_SSYR_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct SYR< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_ROCBLAS,float]"); \ - KOKKOSBLAS2_SYR_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - if (A_is_ll) { \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_host)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_ssyr( \ - s.handle, fillMode, N, &alpha, X.data(), one, A.data(), LDA)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - /* rocblas_ssyr() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_SSYR_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_ROCBLAS,float]"); \ + KOKKOSBLAS2_SYR_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + if (A_is_ll) { \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_host)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_ssyr(s.handle, fillMode, N, &alpha, X.data(), one, A.data(), LDA)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + } else { \ + /* rocblas_ssyr() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_ZSYR_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct SYR*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::syr[TPL_ROCBLAS,complex]"); \ - KOKKOSBLAS2_SYR_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - if (justTranspose) { \ - if (A_is_ll) { \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_host)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zsyr( \ - s.handle, fillMode, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - /* rocblas_zsyr() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - } else { \ - if (A_is_ll && (alpha.imag() == 0.)) { \ - const double alpha_val = alpha.real(); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_host)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zher( \ - s.handle, fillMode, N, &alpha_val, \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - /* rocblas_zher() + [~A_ll or ~real alpha]=> call kokkos-kernels' \ - * implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_ZSYR_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_ROCBLAS,complex]"); \ + KOKKOSBLAS2_SYR_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + if (justTranspose) { \ + if (A_is_ll) { \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_host)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zsyr(s.handle, fillMode, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + } else { \ + /* rocblas_zsyr() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + } else { \ + if (A_is_ll && (alpha.imag() == 0.)) { \ + const double alpha_val = alpha.real(); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_host)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zher(s.handle, fillMode, N, &alpha_val, \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + } else { \ + /* rocblas_zher() + [~A_ll or ~real alpha]=> call kokkos-kernels' \ + * implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS2_CSYR_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct SYR*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void syr(const typename AViewType::execution_space& space, \ - const char trans[], const char uplo[], \ - typename AViewType::const_value_type& alpha, \ - const XViewType& X, const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::syr[TPL_ROCBLAS,complex]"); \ - KOKKOSBLAS2_SYR_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ - bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ - if (justTranspose) { \ - if (A_is_ll) { \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_host)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_csyr( \ - s.handle, fillMode, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - /* rocblas_csyr() + ~A_ll => call kokkos-kernels' implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - } else { \ - if (A_is_ll && (alpha.imag() == 0.)) { \ - const float alpha_val = alpha.real(); \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_host)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cher( \ - s.handle, fillMode, N, &alpha_val, \ - reinterpret_cast(X.data()), one, \ - reinterpret_cast(A.data()), LDA)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - /* rocblas_cher() + [~A_ll or ~real alpha]=> call kokkos-kernels' \ - * implementation */ \ - SYR::syr( \ - space, trans, uplo, alpha, X, A); \ - } \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_CSYR_ROCBLAS(LAYOUT, EXEC_SPACE, MEM_SPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct SYR*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void syr(const typename AViewType::execution_space& space, const char trans[], const char uplo[], \ + typename AViewType::const_value_type& alpha, const XViewType& X, const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::syr[TPL_ROCBLAS,complex]"); \ + KOKKOSBLAS2_SYR_ROCBLAS_DETERMINE_ARGS(LAYOUT, uplo[0]); \ + bool justTranspose = (trans[0] == 'T') || (trans[0] == 't'); \ + if (justTranspose) { \ + if (A_is_ll) { \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_host)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_csyr(s.handle, fillMode, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + } else { \ + /* rocblas_csyr() + ~A_ll => call kokkos-kernels' implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + } else { \ + if (A_is_ll && (alpha.imag() == 0.)) { \ + const float alpha_val = alpha.real(); \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_pointer_mode(s.handle, rocblas_pointer_mode_host)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cher(s.handle, fillMode, N, &alpha_val, \ + reinterpret_cast(X.data()), one, \ + reinterpret_cast(A.data()), LDA)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + } else { \ + /* rocblas_cher() + [~A_ll or ~real alpha]=> call kokkos-kernels' \ + * implementation */ \ + SYR::syr(space, trans, uplo, alpha, X, A); \ + } \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS2_DSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_DSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - false) -KOKKOSBLAS2_DSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_DSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - false) +KOKKOSBLAS2_DSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_DSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS2_DSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_DSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) -KOKKOSBLAS2_SSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_SSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - false) -KOKKOSBLAS2_SSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_SSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - false) +KOKKOSBLAS2_SSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_SSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS2_SSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_SSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) -KOKKOSBLAS2_ZSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_ZSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - false) -KOKKOSBLAS2_ZSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_ZSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - false) +KOKKOSBLAS2_ZSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_ZSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS2_ZSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_ZSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) -KOKKOSBLAS2_CSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_CSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, - false) -KOKKOSBLAS2_CSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - true) -KOKKOSBLAS2_CSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, - false) +KOKKOSBLAS2_CSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_CSYR_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, false) +KOKKOSBLAS2_CSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, true) +KOKKOSBLAS2_CSYR_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, Kokkos::HIPSpace, false) } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas3_gemm_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas3_gemm_tpl_spec_avail.hpp index 8e96898b10ee..0dd3ef81e9be 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas3_gemm_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas3_gemm_tpl_spec_avail.hpp @@ -28,46 +28,34 @@ struct gemm_tpl_spec_avail { // Generic Host side BLAS (could be MKL or whatever) #if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) -#define KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUTA, LAYOUTB, \ - LAYOUTC, MEMSPACE) \ - template \ - struct gemm_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE) \ + template \ + struct gemm_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) #endif @@ -75,111 +63,78 @@ KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, // cuBLAS #if defined(KOKKOSKERNELS_ENABLE_TPL_CUBLAS) -#define KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUTA, LAYOUTB, \ - LAYOUTC, MEMSPACE) \ - template \ - struct gemm_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE) \ + template \ + struct gemm_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::CudaUVMSpace) #endif // rocBLAS #if defined(KOKKOSKERNELS_ENABLE_TPL_ROCBLAS) -#define KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, MEMSPACE) \ - template \ - struct gemm_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct gemm_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, - Kokkos::HIPSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, - Kokkos::HIPSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) - -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, - Kokkos::HIPSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, - Kokkos::HIPSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HIPSpace) -KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HIPSpace) +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) + +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, Kokkos::HIPSpace) +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, Kokkos::HIPSpace) +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HIPSpace) +KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HIPSpace) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas3_gemm_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas3_gemm_tpl_spec_decl.hpp index 68bf2708eca8..52123a9daf8c 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas3_gemm_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas3_gemm_tpl_spec_decl.hpp @@ -23,130 +23,92 @@ namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS3_XGEMM_BLAS(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, \ - LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct GEMM, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef SCALAR_TYPE SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - BViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - CViewType; \ - \ - static void gemm(const ExecSpace& /* space*/, const char transA[], \ - const char transB[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const BViewType& B, \ - typename CViewType::const_value_type& beta, \ - const CViewType& C) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::gemm[TPL_BLAS," #SCALAR_TYPE \ - "]"); \ - const bool A_t = (transA[0] != 'N') && (transA[0] != 'n'); \ - const KK_INT M = C.extent(0); \ - const KK_INT N = C.extent(1); \ - const KK_INT K = A.extent(A_t ? 0 : 1); \ - \ - bool A_is_lr = std::is_same::value; \ - bool B_is_lr = std::is_same::value; \ - bool C_is_lr = std::is_same::value; \ - \ - const KK_INT AST = A_is_lr ? A.stride(0) : A.stride(1), \ - LDA = AST == 0 ? 1 : AST; \ - const KK_INT BST = B_is_lr ? B.stride(0) : B.stride(1), \ - LDB = BST == 0 ? 1 : BST; \ - const KK_INT CST = C_is_lr ? C.stride(0) : C.stride(1), \ - LDC = CST == 0 ? 1 : CST; \ - \ - const BASE_SCALAR_TYPE alpha_val = alpha, beta_val = beta; \ - if (!A_is_lr && !B_is_lr && !C_is_lr) \ - HostBlas::gemm( \ - transA[0], transB[0], M, N, K, alpha_val, \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(B.data()), LDB, \ - beta_val, reinterpret_cast(C.data()), LDC); \ - if (A_is_lr && B_is_lr && C_is_lr) \ - HostBlas::gemm( \ - transB[0], transA[0], N, M, K, alpha_val, \ - reinterpret_cast(B.data()), LDB, \ - reinterpret_cast(A.data()), LDA, \ - beta_val, reinterpret_cast(C.data()), LDC); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS3_XGEMM_BLAS(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMM, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef SCALAR_TYPE SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + BViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + \ + static void gemm(const ExecSpace& /* space*/, const char transA[], const char transB[], \ + typename AViewType::const_value_type& alpha, const AViewType& A, const BViewType& B, \ + typename CViewType::const_value_type& beta, const CViewType& C) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemm[TPL_BLAS," #SCALAR_TYPE "]"); \ + const bool A_t = (transA[0] != 'N') && (transA[0] != 'n'); \ + const KK_INT M = C.extent(0); \ + const KK_INT N = C.extent(1); \ + const KK_INT K = A.extent(A_t ? 0 : 1); \ + \ + bool A_is_lr = std::is_same::value; \ + bool B_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + \ + const KK_INT AST = A_is_lr ? A.stride(0) : A.stride(1), LDA = AST == 0 ? 1 : AST; \ + const KK_INT BST = B_is_lr ? B.stride(0) : B.stride(1), LDB = BST == 0 ? 1 : BST; \ + const KK_INT CST = C_is_lr ? C.stride(0) : C.stride(1), LDC = CST == 0 ? 1 : CST; \ + \ + const BASE_SCALAR_TYPE alpha_val = alpha, beta_val = beta; \ + if (!A_is_lr && !B_is_lr && !C_is_lr) \ + HostBlas::gemm(transA[0], transB[0], M, N, K, alpha_val, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(B.data()), LDB, beta_val, \ + reinterpret_cast(C.data()), LDC); \ + if (A_is_lr && B_is_lr && C_is_lr) \ + HostBlas::gemm(transB[0], transA[0], N, M, K, alpha_val, \ + reinterpret_cast(B.data()), LDB, \ + reinterpret_cast(A.data()), LDA, beta_val, \ + reinterpret_cast(C.data()), LDC); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS3_DGEMM_BLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_XGEMM_BLAS(double, double, LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ +#define KOKKOSBLAS3_DGEMM_BLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS3_XGEMM_BLAS(double, double, LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS3_SGEMM_BLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS3_XGEMM_BLAS(float, float, LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) + +#define KOKKOSBLAS3_ZGEMM_BLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS3_XGEMM_BLAS(Kokkos::complex, std::complex, LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ ETI_SPEC_AVAIL) -#define KOKKOSBLAS3_SGEMM_BLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_XGEMM_BLAS(float, float, LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ +#define KOKKOSBLAS3_CGEMM_BLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS3_XGEMM_BLAS(Kokkos::complex, std::complex, LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ ETI_SPEC_AVAIL) -#define KOKKOSBLAS3_ZGEMM_BLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_XGEMM_BLAS(Kokkos::complex, std::complex, \ - LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) - -#define KOKKOSBLAS3_CGEMM_BLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_XGEMM_BLAS(Kokkos::complex, std::complex, LAYOUTA, \ - LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) - -KOKKOSBLAS3_DGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, true) -KOKKOSBLAS3_DGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, false) -KOKKOSBLAS3_DGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, true) -KOKKOSBLAS3_DGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, false) - -KOKKOSBLAS3_SGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, true) -KOKKOSBLAS3_SGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, false) -KOKKOSBLAS3_SGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, true) -KOKKOSBLAS3_SGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, false) - -KOKKOSBLAS3_ZGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, true) -KOKKOSBLAS3_ZGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, false) -KOKKOSBLAS3_ZGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, true) -KOKKOSBLAS3_ZGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, false) - -KOKKOSBLAS3_CGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, true) -KOKKOSBLAS3_CGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace, false) -KOKKOSBLAS3_CGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, true) -KOKKOSBLAS3_CGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace, false) +KOKKOSBLAS3_DGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS3_DGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS3_DGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS3_DGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS3_SGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS3_SGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS3_SGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS3_SGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS3_ZGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS3_ZGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS3_ZGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS3_ZGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS3_CGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS3_CGEMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS3_CGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS3_CGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) } // namespace Impl } // namespace KokkosBlas @@ -160,195 +122,131 @@ KOKKOSBLAS3_CGEMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS3_XGEMM_CUBLAS(SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, \ - LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEMM, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef SCALAR_TYPE SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - BViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - CViewType; \ - \ - static void gemm(const ExecSpace& space, const char transA[], \ - const char transB[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const BViewType& B, \ - typename CViewType::const_value_type& beta, \ - const CViewType& C) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::gemm[TPL_CUBLAS," #SCALAR_TYPE "]"); \ - const bool A_t = (transA[0] != 'N') && (transA[0] != 'n'); \ - const int M = static_cast(C.extent(0)); \ - const int N = static_cast(C.extent(1)); \ - const int K = static_cast(A.extent(A_t ? 0 : 1)); \ - \ - bool A_is_lr = std::is_same::value; \ - bool B_is_lr = std::is_same::value; \ - bool C_is_lr = std::is_same::value; \ - \ - const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ - LDA = AST == 0 ? 1 : AST; \ - const int BST = B_is_lr ? B.stride(0) : B.stride(1), \ - LDB = BST == 0 ? 1 : BST; \ - const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ - LDC = CST == 0 ? 1 : CST; \ - \ - cublasOperation_t transa = trans_mode_kk_to_cublas(transA); \ - cublasOperation_t transb = trans_mode_kk_to_cublas(transB); \ - \ - constexpr int numDotsLayoutLeftThreshold = 1600; \ - constexpr int numDotsLayoutRightThreshold = 100; \ - if ((!A_is_lr && transa != CUBLAS_OP_N && transb == CUBLAS_OP_N && \ - M * N < numDotsLayoutLeftThreshold) || \ - (A_is_lr && transa != CUBLAS_OP_N && transb == CUBLAS_OP_N && \ - M * N < numDotsLayoutRightThreshold)) { \ - DotBasedGEMM gemm( \ - alpha, A, B, beta, C); \ - bool conjT = (std::is_same::value || \ - std::is_same::value) \ - ? false \ - : (transa == CUBLAS_OP_C ? true : false); \ - gemm.run(space, conjT); \ - } else { \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - if (!A_is_lr && !B_is_lr && !C_is_lr) \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(CUBLAS_FN( \ - s.handle, transa, transb, M, N, K, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(B.data()), LDB, \ - reinterpret_cast(&beta), \ - reinterpret_cast(C.data()), LDC)); \ - if (A_is_lr && B_is_lr && C_is_lr) \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(CUBLAS_FN( \ - s.handle, transb, transa, N, M, K, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(B.data()), LDB, \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(&beta), \ - reinterpret_cast(C.data()), LDC)); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS3_XGEMM_CUBLAS(SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct GEMM, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef SCALAR_TYPE SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + BViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + \ + static void gemm(const ExecSpace& space, const char transA[], const char transB[], \ + typename AViewType::const_value_type& alpha, const AViewType& A, const BViewType& B, \ + typename CViewType::const_value_type& beta, const CViewType& C) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemm[TPL_CUBLAS," #SCALAR_TYPE "]"); \ + const bool A_t = (transA[0] != 'N') && (transA[0] != 'n'); \ + const int M = static_cast(C.extent(0)); \ + const int N = static_cast(C.extent(1)); \ + const int K = static_cast(A.extent(A_t ? 0 : 1)); \ + \ + bool A_is_lr = std::is_same::value; \ + bool B_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), LDA = AST == 0 ? 1 : AST; \ + const int BST = B_is_lr ? B.stride(0) : B.stride(1), LDB = BST == 0 ? 1 : BST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), LDC = CST == 0 ? 1 : CST; \ + \ + cublasOperation_t transa = trans_mode_kk_to_cublas(transA); \ + cublasOperation_t transb = trans_mode_kk_to_cublas(transB); \ + \ + constexpr int numDotsLayoutLeftThreshold = 1600; \ + constexpr int numDotsLayoutRightThreshold = 100; \ + if ((!A_is_lr && transa != CUBLAS_OP_N && transb == CUBLAS_OP_N && M * N < numDotsLayoutLeftThreshold) || \ + (A_is_lr && transa != CUBLAS_OP_N && transb == CUBLAS_OP_N && M * N < numDotsLayoutRightThreshold)) { \ + DotBasedGEMM gemm(alpha, A, B, beta, C); \ + bool conjT = (std::is_same::value || std::is_same::value) \ + ? false \ + : (transa == CUBLAS_OP_C ? true : false); \ + gemm.run(space, conjT); \ + } else { \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + if (!A_is_lr && !B_is_lr && !C_is_lr) \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(CUBLAS_FN( \ + s.handle, transa, transb, M, N, K, reinterpret_cast(&alpha), \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(B.data()), LDB, \ + reinterpret_cast(&beta), reinterpret_cast(C.data()), LDC)); \ + if (A_is_lr && B_is_lr && C_is_lr) \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(CUBLAS_FN( \ + s.handle, transb, transa, N, M, K, reinterpret_cast(&alpha), \ + reinterpret_cast(B.data()), LDB, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(&beta), reinterpret_cast(C.data()), LDC)); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS3_DGEMM_CUBLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_XGEMM_CUBLAS(double, double, cublasDgemm, LAYOUTA, LAYOUTB, \ - LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) +#define KOKKOSBLAS3_DGEMM_CUBLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS3_XGEMM_CUBLAS(double, double, cublasDgemm, LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) -#define KOKKOSBLAS3_SGEMM_CUBLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_XGEMM_CUBLAS(float, float, cublasSgemm, LAYOUTA, LAYOUTB, \ - LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) +#define KOKKOSBLAS3_SGEMM_CUBLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS3_XGEMM_CUBLAS(float, float, cublasSgemm, LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) -#define KOKKOSBLAS3_ZGEMM_CUBLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_XGEMM_CUBLAS(Kokkos::complex, cuDoubleComplex, \ - cublasZgemm, LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ - ETI_SPEC_AVAIL) +#define KOKKOSBLAS3_ZGEMM_CUBLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS3_XGEMM_CUBLAS(Kokkos::complex, cuDoubleComplex, cublasZgemm, LAYOUTA, LAYOUTB, LAYOUTC, \ + MEM_SPACE, ETI_SPEC_AVAIL) -#define KOKKOSBLAS3_CGEMM_CUBLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_XGEMM_CUBLAS(Kokkos::complex, cuComplex, cublasCgemm, \ - LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ +#define KOKKOSBLAS3_CGEMM_CUBLAS(LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS3_XGEMM_CUBLAS(Kokkos::complex, cuComplex, cublasCgemm, LAYOUTA, LAYOUTB, LAYOUTC, MEM_SPACE, \ ETI_SPEC_AVAIL) -KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, false) -KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, true) -KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, false) - -KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, false) -KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, true) -KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, false) - -KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, false) -KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, true) -KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, false) - -KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, false) -KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, true) -KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace, false) - -KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + +KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_DGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + +KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_SGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + +KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_ZGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + +KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) } // namespace Impl } // namespace KokkosBlas @@ -362,120 +260,93 @@ KOKKOSBLAS3_CGEMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS3_XGEMM_ROCBLAS(SCALAR_TYPE, ROCBLAS_SCALAR_TYPE, \ - ROCBLAS_FN, LAYOUT, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEMM, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef SCALAR_TYPE SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - BViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - CViewType; \ - \ - static void gemm(const typename CViewType::execution_space& space, \ - const char transA[], const char transB[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const BViewType& B, \ - typename CViewType::const_value_type& beta, \ - const CViewType& C) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::gemm[TPL_ROCBLAS," #SCALAR_TYPE "]"); \ - \ - const bool A_t = (transA[0] != 'N') && (transA[0] != 'n'); \ - const int M = static_cast(C.extent(0)); \ - const int N = static_cast(C.extent(1)); \ - const int K = static_cast(A.extent(A_t ? 0 : 1)); \ - \ - bool is_lr = std::is_same::value; \ - \ - const int AST = is_lr ? A.stride(0) : A.stride(1), \ - LDA = AST == 0 ? 1 : AST; \ - const int BST = is_lr ? B.stride(0) : B.stride(1), \ - LDB = BST == 0 ? 1 : BST; \ - const int CST = is_lr ? C.stride(0) : C.stride(1), \ - LDC = CST == 0 ? 1 : CST; \ - \ - rocblas_operation transa = trans_mode_kk_to_rocblas(transA); \ - rocblas_operation transb = trans_mode_kk_to_rocblas(transB); \ - \ - constexpr int numDotsLayoutLeftThreshold = 1600; \ - constexpr int numDotsLayoutRightThreshold = 100; \ - if ((!is_lr && transa != rocblas_operation_none && \ - transb == rocblas_operation_none && \ - M * N < numDotsLayoutLeftThreshold) || \ - (is_lr && transa != rocblas_operation_none && \ - transb == rocblas_operation_none && \ - M * N < numDotsLayoutRightThreshold)) { \ - DotBasedGEMM gemm( \ - alpha, A, B, beta, C); \ - bool conjT = \ - (std::is_same::value || \ - std::is_same::value) \ - ? false \ - : (transa == rocblas_operation_conjugate_transpose ? true \ - : false); \ - gemm.run(space, conjT); \ - } else { \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - if (!is_lr) \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(ROCBLAS_FN( \ - s.handle, transa, transb, M, N, K, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(B.data()), LDB, \ - reinterpret_cast(&beta), \ - reinterpret_cast(C.data()), LDC)); \ - else \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(ROCBLAS_FN( \ - s.handle, transb, transa, N, M, K, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(B.data()), LDB, \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(&beta), \ - reinterpret_cast(C.data()), LDC)); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS3_XGEMM_ROCBLAS(SCALAR_TYPE, ROCBLAS_SCALAR_TYPE, ROCBLAS_FN, LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEMM, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef SCALAR_TYPE SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + BViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + \ + static void gemm(const typename CViewType::execution_space& space, const char transA[], const char transB[], \ + typename AViewType::const_value_type& alpha, const AViewType& A, const BViewType& B, \ + typename CViewType::const_value_type& beta, const CViewType& C) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemm[TPL_ROCBLAS," #SCALAR_TYPE "]"); \ + \ + const bool A_t = (transA[0] != 'N') && (transA[0] != 'n'); \ + const int M = static_cast(C.extent(0)); \ + const int N = static_cast(C.extent(1)); \ + const int K = static_cast(A.extent(A_t ? 0 : 1)); \ + \ + bool is_lr = std::is_same::value; \ + \ + const int AST = is_lr ? A.stride(0) : A.stride(1), LDA = AST == 0 ? 1 : AST; \ + const int BST = is_lr ? B.stride(0) : B.stride(1), LDB = BST == 0 ? 1 : BST; \ + const int CST = is_lr ? C.stride(0) : C.stride(1), LDC = CST == 0 ? 1 : CST; \ + \ + rocblas_operation transa = trans_mode_kk_to_rocblas(transA); \ + rocblas_operation transb = trans_mode_kk_to_rocblas(transB); \ + \ + constexpr int numDotsLayoutLeftThreshold = 1600; \ + constexpr int numDotsLayoutRightThreshold = 100; \ + if ((!is_lr && transa != rocblas_operation_none && transb == rocblas_operation_none && \ + M * N < numDotsLayoutLeftThreshold) || \ + (is_lr && transa != rocblas_operation_none && transb == rocblas_operation_none && \ + M * N < numDotsLayoutRightThreshold)) { \ + DotBasedGEMM gemm(alpha, A, B, beta, C); \ + bool conjT = (std::is_same::value || std::is_same::value) \ + ? false \ + : (transa == rocblas_operation_conjugate_transpose ? true : false); \ + gemm.run(space, conjT); \ + } else { \ + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); \ + if (!is_lr) \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(ROCBLAS_FN(s.handle, transa, transb, M, N, K, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(B.data()), LDB, \ + reinterpret_cast(&beta), \ + reinterpret_cast(C.data()), LDC)); \ + else \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(ROCBLAS_FN(s.handle, transb, transa, N, M, K, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(B.data()), LDB, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(&beta), \ + reinterpret_cast(C.data()), LDC)); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS3_DGEMM_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_XGEMM_ROCBLAS(double, double, rocblas_dgemm, LAYOUT, MEM_SPACE, \ - ETI_SPEC_AVAIL) +#define KOKKOSBLAS3_DGEMM_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS3_XGEMM_ROCBLAS(double, double, rocblas_dgemm, LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) -#define KOKKOSBLAS3_SGEMM_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_XGEMM_ROCBLAS(float, float, rocblas_sgemm, LAYOUT, MEM_SPACE, \ - ETI_SPEC_AVAIL) +#define KOKKOSBLAS3_SGEMM_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS3_XGEMM_ROCBLAS(float, float, rocblas_sgemm, LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) -#define KOKKOSBLAS3_ZGEMM_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_XGEMM_ROCBLAS(Kokkos::complex, rocblas_double_complex, \ - rocblas_zgemm, LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) +#define KOKKOSBLAS3_ZGEMM_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS3_XGEMM_ROCBLAS(Kokkos::complex, rocblas_double_complex, rocblas_zgemm, LAYOUT, MEM_SPACE, \ + ETI_SPEC_AVAIL) -#define KOKKOSBLAS3_CGEMM_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_XGEMM_ROCBLAS(Kokkos::complex, rocblas_float_complex, \ - rocblas_cgemm, LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) +#define KOKKOSBLAS3_CGEMM_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS3_XGEMM_ROCBLAS(Kokkos::complex, rocblas_float_complex, rocblas_cgemm, LAYOUT, MEM_SPACE, \ + ETI_SPEC_AVAIL) KOKKOSBLAS3_DGEMM_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIPSpace, true) KOKKOSBLAS3_DGEMM_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIPSpace, false) diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trmm_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trmm_tpl_spec_avail.hpp index 010b44a154de..83e39a240e09 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trmm_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trmm_tpl_spec_avail.hpp @@ -29,38 +29,26 @@ struct trmm_tpl_spec_avail { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -#define KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUTA, LAYOUTB, \ - MEMSPACE) \ - template \ - struct trmm_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUTA, LAYOUTB, MEMSPACE) \ + template \ + struct trmm_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) #endif // KOKKOSKERNELS_ENABLE_TPL_BLAS @@ -68,61 +56,40 @@ KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, // cuBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -#define KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUTA, LAYOUTB, \ - MEMSPACE) \ - template \ - struct trmm_tpl_spec_avail< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUTA, LAYOUTB, MEMSPACE) \ + template \ + struct trmm_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, - Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_TRMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) #endif // KOKKOSKERNELS_ENABLE_TPL_CUBLAS diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trmm_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trmm_tpl_spec_decl.hpp index 53c73f741631..4e68c08decfa 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trmm_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trmm_tpl_spec_decl.hpp @@ -24,136 +24,103 @@ namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS3_TRMM_BLAS(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, LAYOUTB, \ - MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct TRMM, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef SCALAR_TYPE SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - BViewType; \ - \ - static void trmm(const ExecSpace& /*space*/, const char side[], \ - const char uplo[], const char trans[], const char diag[], \ - typename BViewType::const_value_type& alpha, \ - const AViewType& A, const BViewType& B) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::trmm[TPL_BLAS," #SCALAR_TYPE \ - "]"); \ - const int M = static_cast(B.extent(0)); \ - const int N = static_cast(B.extent(1)); \ - \ - bool A_is_layout_left = \ - std::is_same::value; \ - bool B_is_layout_left = \ - std::is_same::value; \ - \ - const int AST = A_is_layout_left ? A.stride(1) : A.stride(0), \ - LDA = (AST == 0) ? 1 : AST; \ - const int BST = B_is_layout_left ? B.stride(1) : B.stride(0), \ - LDB = (BST == 0) ? 1 : BST; \ - \ - char side_; \ - char uplo_; \ - \ - if (A_is_layout_left) { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = 'L'; \ - else \ - side_ = 'R'; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = 'L'; \ - else \ - uplo_ = 'U'; \ - } else { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = 'R'; \ - else \ - side_ = 'L'; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = 'U'; \ - else \ - uplo_ = 'L'; \ - } \ - \ - if (A_is_layout_left) \ - HostBlas::trmm( \ - side_, uplo_, trans[0], diag[0], M, N, alpha, \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(B.data()), LDB); \ - else \ - HostBlas::trmm( \ - side_, uplo_, trans[0], diag[0], N, M, alpha, \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(B.data()), LDB); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS3_TRMM_BLAS(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct TRMM, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef SCALAR_TYPE SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + BViewType; \ + \ + static void trmm(const ExecSpace& /*space*/, const char side[], const char uplo[], const char trans[], \ + const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, \ + const BViewType& B) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::trmm[TPL_BLAS," #SCALAR_TYPE "]"); \ + const int M = static_cast(B.extent(0)); \ + const int N = static_cast(B.extent(1)); \ + \ + bool A_is_layout_left = std::is_same::value; \ + bool B_is_layout_left = std::is_same::value; \ + \ + const int AST = A_is_layout_left ? A.stride(1) : A.stride(0), LDA = (AST == 0) ? 1 : AST; \ + const int BST = B_is_layout_left ? B.stride(1) : B.stride(0), LDB = (BST == 0) ? 1 : BST; \ + \ + char side_; \ + char uplo_; \ + \ + if (A_is_layout_left) { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = 'L'; \ + else \ + side_ = 'R'; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = 'L'; \ + else \ + uplo_ = 'U'; \ + } else { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = 'R'; \ + else \ + side_ = 'L'; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = 'U'; \ + else \ + uplo_ = 'L'; \ + } \ + \ + if (A_is_layout_left) \ + HostBlas::trmm(side_, uplo_, trans[0], diag[0], M, N, alpha, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(B.data()), LDB); \ + else \ + HostBlas::trmm(side_, uplo_, trans[0], diag[0], N, M, alpha, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(B.data()), LDB); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #define KOKKOSBLAS3_DTRMM_BLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_TRMM_BLAS(double, double, LAYOUTA, LAYOUTB, MEM_SPACE, \ - ETI_SPEC_AVAIL) + KOKKOSBLAS3_TRMM_BLAS(double, double, LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) #define KOKKOSBLAS3_STRMM_BLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_TRMM_BLAS(float, float, LAYOUTA, LAYOUTB, MEM_SPACE, \ - ETI_SPEC_AVAIL) + KOKKOSBLAS3_TRMM_BLAS(float, float, LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) #define KOKKOSBLAS3_ZTRMM_BLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_TRMM_BLAS(Kokkos::complex, std::complex, \ - LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) + KOKKOSBLAS3_TRMM_BLAS(Kokkos::complex, std::complex, LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) -#define KOKKOSBLAS3_CTRMM_BLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_TRMM_BLAS(Kokkos::complex, std::complex, LAYOUTA, \ - LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) +#define KOKKOSBLAS3_CTRMM_BLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS3_TRMM_BLAS(Kokkos::complex, std::complex, LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) // Explicitly define the TRMM class for all permutations listed below -KOKKOSBLAS3_DTRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, true) -KOKKOSBLAS3_DTRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, false) -KOKKOSBLAS3_DTRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, true) -KOKKOSBLAS3_DTRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, false) - -KOKKOSBLAS3_STRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, true) -KOKKOSBLAS3_STRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, false) -KOKKOSBLAS3_STRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, true) -KOKKOSBLAS3_STRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, false) - -KOKKOSBLAS3_ZTRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, true) -KOKKOSBLAS3_ZTRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, false) -KOKKOSBLAS3_ZTRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, true) -KOKKOSBLAS3_ZTRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, false) - -KOKKOSBLAS3_CTRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, true) -KOKKOSBLAS3_CTRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, false) -KOKKOSBLAS3_CTRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, true) -KOKKOSBLAS3_CTRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, false) +KOKKOSBLAS3_DTRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS3_DTRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS3_DTRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS3_DTRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS3_STRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS3_STRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS3_STRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS3_STRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS3_ZTRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS3_ZTRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS3_ZTRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS3_ZTRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS3_CTRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS3_CTRMM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS3_CTRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS3_CTRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) } // namespace Impl } // namespace KokkosBlas @@ -166,196 +133,143 @@ KOKKOSBLAS3_CTRMM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS3_TRMM_CUBLAS(SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, \ - LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct TRMM, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef SCALAR_TYPE SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - BViewType; \ - \ - static void trmm(const ExecSpace& space, const char side[], \ - const char uplo[], const char trans[], const char diag[], \ - typename BViewType::const_value_type& alpha, \ - const AViewType& A, const BViewType& B) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::trmm[TPL_CUBLAS," #SCALAR_TYPE "]"); \ - const int M = static_cast(B.extent(0)); \ - const int N = static_cast(B.extent(1)); \ - \ - bool A_is_layout_left = \ - std::is_same::value; \ - bool B_is_layout_left = \ - std::is_same::value; \ - \ - const int AST = A_is_layout_left ? A.stride(1) : A.stride(0), \ - LDA = (AST == 0) ? 1 : AST; \ - const int BST = B_is_layout_left ? B.stride(1) : B.stride(0), \ - LDB = (BST == 0) ? 1 : BST; \ - \ - cublasSideMode_t side_; \ - cublasFillMode_t uplo_; \ - cublasOperation_t trans_; \ - cublasDiagType_t diag_; \ - \ - if (A_is_layout_left) { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = CUBLAS_SIDE_LEFT; \ - else \ - side_ = CUBLAS_SIDE_RIGHT; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = CUBLAS_FILL_MODE_LOWER; \ - else \ - uplo_ = CUBLAS_FILL_MODE_UPPER; \ - } else { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = CUBLAS_SIDE_RIGHT; \ - else \ - side_ = CUBLAS_SIDE_LEFT; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = CUBLAS_FILL_MODE_UPPER; \ - else \ - uplo_ = CUBLAS_FILL_MODE_LOWER; \ - } \ - \ - if ((trans[0] == 'N') || (trans[0] == 'n')) \ - trans_ = CUBLAS_OP_N; \ - else if ((trans[0] == 'T') || (trans[0] == 't')) \ - trans_ = CUBLAS_OP_T; \ - else \ - trans_ = CUBLAS_OP_C; \ - if ((diag[0] == 'U') || (diag[0] == 'u')) \ - diag_ = CUBLAS_DIAG_UNIT; \ - else \ - diag_ = CUBLAS_DIAG_NON_UNIT; \ - \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - if (A_is_layout_left) { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - CUBLAS_FN(s.handle, side_, uplo_, trans_, diag_, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(A.data()), \ - LDA, reinterpret_cast(B.data()), LDB, \ - reinterpret_cast(B.data()), LDB)); \ - } else { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - CUBLAS_FN(s.handle, side_, uplo_, trans_, diag_, N, M, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(A.data()), \ - LDA, reinterpret_cast(B.data()), LDB, \ - reinterpret_cast(B.data()), LDB)); \ - } \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS3_TRMM_CUBLAS(SCALAR_TYPE, CUDA_SCALAR_TYPE, CUBLAS_FN, LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct TRMM, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef SCALAR_TYPE SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + BViewType; \ + \ + static void trmm(const ExecSpace& space, const char side[], const char uplo[], const char trans[], \ + const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, \ + const BViewType& B) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::trmm[TPL_CUBLAS," #SCALAR_TYPE "]"); \ + const int M = static_cast(B.extent(0)); \ + const int N = static_cast(B.extent(1)); \ + \ + bool A_is_layout_left = std::is_same::value; \ + bool B_is_layout_left = std::is_same::value; \ + \ + const int AST = A_is_layout_left ? A.stride(1) : A.stride(0), LDA = (AST == 0) ? 1 : AST; \ + const int BST = B_is_layout_left ? B.stride(1) : B.stride(0), LDB = (BST == 0) ? 1 : BST; \ + \ + cublasSideMode_t side_; \ + cublasFillMode_t uplo_; \ + cublasOperation_t trans_; \ + cublasDiagType_t diag_; \ + \ + if (A_is_layout_left) { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = CUBLAS_SIDE_LEFT; \ + else \ + side_ = CUBLAS_SIDE_RIGHT; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = CUBLAS_FILL_MODE_LOWER; \ + else \ + uplo_ = CUBLAS_FILL_MODE_UPPER; \ + } else { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = CUBLAS_SIDE_RIGHT; \ + else \ + side_ = CUBLAS_SIDE_LEFT; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = CUBLAS_FILL_MODE_UPPER; \ + else \ + uplo_ = CUBLAS_FILL_MODE_LOWER; \ + } \ + \ + if ((trans[0] == 'N') || (trans[0] == 'n')) \ + trans_ = CUBLAS_OP_N; \ + else if ((trans[0] == 'T') || (trans[0] == 't')) \ + trans_ = CUBLAS_OP_T; \ + else \ + trans_ = CUBLAS_OP_C; \ + if ((diag[0] == 'U') || (diag[0] == 'u')) \ + diag_ = CUBLAS_DIAG_UNIT; \ + else \ + diag_ = CUBLAS_DIAG_NON_UNIT; \ + \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + if (A_is_layout_left) { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(CUBLAS_FN( \ + s.handle, side_, uplo_, trans_, diag_, M, N, reinterpret_cast(&alpha), \ + reinterpret_cast(A.data()), LDA, reinterpret_cast(B.data()), \ + LDB, reinterpret_cast(B.data()), LDB)); \ + } else { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(CUBLAS_FN( \ + s.handle, side_, uplo_, trans_, diag_, N, M, reinterpret_cast(&alpha), \ + reinterpret_cast(A.data()), LDA, reinterpret_cast(B.data()), \ + LDB, reinterpret_cast(B.data()), LDB)); \ + } \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #define KOKKOSBLAS3_DTRMM_CUBLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_TRMM_CUBLAS(double, double, cublasDtrmm, LAYOUTA, LAYOUTB, \ - MEM_SPACE, ETI_SPEC_AVAIL) + KOKKOSBLAS3_TRMM_CUBLAS(double, double, cublasDtrmm, LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) #define KOKKOSBLAS3_STRMM_CUBLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_TRMM_CUBLAS(float, float, cublasStrmm, LAYOUTA, LAYOUTB, \ - MEM_SPACE, ETI_SPEC_AVAIL) + KOKKOSBLAS3_TRMM_CUBLAS(float, float, cublasStrmm, LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) -#define KOKKOSBLAS3_ZTRMM_CUBLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_TRMM_CUBLAS(Kokkos::complex, cuDoubleComplex, \ - cublasZtrmm, LAYOUTA, LAYOUTB, MEM_SPACE, \ +#define KOKKOSBLAS3_ZTRMM_CUBLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ + KOKKOSBLAS3_TRMM_CUBLAS(Kokkos::complex, cuDoubleComplex, cublasZtrmm, LAYOUTA, LAYOUTB, MEM_SPACE, \ ETI_SPEC_AVAIL) #define KOKKOSBLAS3_CTRMM_CUBLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - KOKKOSBLAS3_TRMM_CUBLAS(Kokkos::complex, cuComplex, cublasCtrmm, \ - LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) + KOKKOSBLAS3_TRMM_CUBLAS(Kokkos::complex, cuComplex, cublasCtrmm, LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) // Explicitly define the TRMM class for all permutations listed below -KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) -KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, false) - -KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) -KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, false) - -KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) -KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, false) - -KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, false) - -KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) -KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, false) - -KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + +KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_DTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + +KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_STRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + +KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_ZTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + +KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_CTRMM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trsm_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trsm_tpl_spec_avail.hpp index d1836809ec98..21289655de3b 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trsm_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trsm_tpl_spec_avail.hpp @@ -29,38 +29,26 @@ struct trsm_tpl_spec_avail { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -#define KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUTA, LAYOUTB, \ - MEMSPACE) \ - template \ - struct trsm_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUTA, LAYOUTB, MEMSPACE) \ + template \ + struct trsm_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) #endif @@ -68,61 +56,40 @@ KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, // cuBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -#define KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUTA, LAYOUTB, \ - MEMSPACE) \ - template \ - struct trsm_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUTA, LAYOUTB, MEMSPACE) \ + template \ + struct trsm_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, - Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, - Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, - Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::LayoutRight, +KOKKOSBLAS3_TRSM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) #endif diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trsm_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trsm_tpl_spec_decl.hpp index ec36388094be..7074a4e0e2ac 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trsm_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas3_trsm_tpl_spec_decl.hpp @@ -23,329 +23,275 @@ namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS3_DTRSM_BLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct TRSM< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - BViewType; \ - \ - static void trsm(const ExecSpace& /*space*/, const char side[], \ - const char uplo[], const char trans[], const char diag[], \ - typename BViewType::const_value_type& alpha, \ - const AViewType& A, const BViewType& B) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::trsm[TPL_BLAS,double]"); \ - const int M = static_cast(B.extent(0)); \ - const int N = static_cast(B.extent(1)); \ - \ - bool A_is_ll = std::is_same::value; \ - bool B_is_ll = std::is_same::value; \ - \ - const int AST = A_is_ll ? A.stride(1) : A.stride(0), \ - LDA = (AST == 0) ? 1 : AST; \ - const int BST = B_is_ll ? B.stride(1) : B.stride(0), \ - LDB = (BST == 0) ? 1 : BST; \ - \ - char side_; \ - char uplo_; \ - \ - if (A_is_ll) { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = 'L'; \ - else \ - side_ = 'R'; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = 'L'; \ - else \ - uplo_ = 'U'; \ - } else { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = 'R'; \ - else \ - side_ = 'L'; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = 'U'; \ - else \ - uplo_ = 'L'; \ - } \ - \ - if (A_is_ll) \ - HostBlas::trsm(side_, uplo_, trans[0], diag[0], M, N, alpha, \ - A.data(), LDA, B.data(), LDB); \ - else \ - HostBlas::trsm(side_, uplo_, trans[0], diag[0], N, M, alpha, \ - A.data(), LDA, B.data(), LDB); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS3_DTRSM_BLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct TRSM< \ + ExecSpace, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + BViewType; \ + \ + static void trsm(const ExecSpace& /*space*/, const char side[], const char uplo[], const char trans[], \ + const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, \ + const BViewType& B) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::trsm[TPL_BLAS,double]"); \ + const int M = static_cast(B.extent(0)); \ + const int N = static_cast(B.extent(1)); \ + \ + bool A_is_ll = std::is_same::value; \ + bool B_is_ll = std::is_same::value; \ + \ + const int AST = A_is_ll ? A.stride(1) : A.stride(0), LDA = (AST == 0) ? 1 : AST; \ + const int BST = B_is_ll ? B.stride(1) : B.stride(0), LDB = (BST == 0) ? 1 : BST; \ + \ + char side_; \ + char uplo_; \ + \ + if (A_is_ll) { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = 'L'; \ + else \ + side_ = 'R'; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = 'L'; \ + else \ + uplo_ = 'U'; \ + } else { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = 'R'; \ + else \ + side_ = 'L'; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = 'U'; \ + else \ + uplo_ = 'L'; \ + } \ + \ + if (A_is_ll) \ + HostBlas::trsm(side_, uplo_, trans[0], diag[0], M, N, alpha, A.data(), LDA, B.data(), LDB); \ + else \ + HostBlas::trsm(side_, uplo_, trans[0], diag[0], N, M, alpha, A.data(), LDA, B.data(), LDB); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS3_STRSM_BLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct TRSM< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - BViewType; \ - \ - static void trsm(const ExecSpace& /*space*/, const char side[], \ - const char uplo[], const char trans[], const char diag[], \ - typename BViewType::const_value_type& alpha, \ - const AViewType& A, const BViewType& B) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::trsm[TPL_BLAS,float]"); \ - const int M = static_cast(B.extent(0)); \ - const int N = static_cast(B.extent(1)); \ - \ - bool A_is_ll = std::is_same::value; \ - bool B_is_ll = std::is_same::value; \ - \ - const int AST = A_is_ll ? A.stride(1) : A.stride(0), \ - LDA = (AST == 0) ? 1 : AST; \ - const int BST = B_is_ll ? B.stride(1) : B.stride(0), \ - LDB = (BST == 0) ? 1 : BST; \ - \ - char side_; \ - char uplo_; \ - \ - if (A_is_ll) { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = 'L'; \ - else \ - side_ = 'R'; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = 'L'; \ - else \ - uplo_ = 'U'; \ - } else { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = 'R'; \ - else \ - side_ = 'L'; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = 'U'; \ - else \ - uplo_ = 'L'; \ - } \ - \ - if (A_is_ll) \ - HostBlas::trsm(side_, uplo_, trans[0], diag[0], M, N, alpha, \ - A.data(), LDA, B.data(), LDB); \ - else \ - HostBlas::trsm(side_, uplo_, trans[0], diag[0], N, M, alpha, \ - A.data(), LDA, B.data(), LDB); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS3_STRSM_BLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct TRSM< \ + ExecSpace, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + BViewType; \ + \ + static void trsm(const ExecSpace& /*space*/, const char side[], const char uplo[], const char trans[], \ + const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, \ + const BViewType& B) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::trsm[TPL_BLAS,float]"); \ + const int M = static_cast(B.extent(0)); \ + const int N = static_cast(B.extent(1)); \ + \ + bool A_is_ll = std::is_same::value; \ + bool B_is_ll = std::is_same::value; \ + \ + const int AST = A_is_ll ? A.stride(1) : A.stride(0), LDA = (AST == 0) ? 1 : AST; \ + const int BST = B_is_ll ? B.stride(1) : B.stride(0), LDB = (BST == 0) ? 1 : BST; \ + \ + char side_; \ + char uplo_; \ + \ + if (A_is_ll) { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = 'L'; \ + else \ + side_ = 'R'; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = 'L'; \ + else \ + uplo_ = 'U'; \ + } else { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = 'R'; \ + else \ + side_ = 'L'; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = 'U'; \ + else \ + uplo_ = 'L'; \ + } \ + \ + if (A_is_ll) \ + HostBlas::trsm(side_, uplo_, trans[0], diag[0], M, N, alpha, A.data(), LDA, B.data(), LDB); \ + else \ + HostBlas::trsm(side_, uplo_, trans[0], diag[0], N, M, alpha, A.data(), LDA, B.data(), LDB); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS3_ZTRSM_BLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct TRSM**, LAYOUTA, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUTB, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - BViewType; \ - \ - static void trsm(const ExecSpace& /*space*/, const char side[], \ - const char uplo[], const char trans[], const char diag[], \ - typename BViewType::const_value_type& alpha, \ - const AViewType& A, const BViewType& B) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::trsm[TPL_BLAS,complex]"); \ - const int M = static_cast(B.extent(0)); \ - const int N = static_cast(B.extent(1)); \ - \ - bool A_is_ll = std::is_same::value; \ - bool B_is_ll = std::is_same::value; \ - \ - const int AST = A_is_ll ? A.stride(1) : A.stride(0), \ - LDA = (AST == 0) ? 1 : AST; \ - const int BST = B_is_ll ? B.stride(1) : B.stride(0), \ - LDB = (BST == 0) ? 1 : BST; \ - \ - char side_; \ - char uplo_; \ - \ - if (A_is_ll) { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = 'L'; \ - else \ - side_ = 'R'; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = 'L'; \ - else \ - uplo_ = 'U'; \ - } else { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = 'R'; \ - else \ - side_ = 'L'; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = 'U'; \ - else \ - uplo_ = 'L'; \ - } \ - \ - const std::complex alpha_val = alpha; \ - if (A_is_ll) \ - HostBlas >::trsm( \ - side_, uplo_, trans[0], diag[0], M, N, alpha_val, \ - reinterpret_cast*>(A.data()), LDA, \ - reinterpret_cast*>(B.data()), LDB); \ - else \ - HostBlas >::trsm( \ - side_, uplo_, trans[0], diag[0], N, M, alpha_val, \ - reinterpret_cast*>(A.data()), LDA, \ - reinterpret_cast*>(B.data()), LDB); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS3_ZTRSM_BLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct TRSM**, LAYOUTA, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUTB, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + BViewType; \ + \ + static void trsm(const ExecSpace& /*space*/, const char side[], const char uplo[], const char trans[], \ + const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, \ + const BViewType& B) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::trsm[TPL_BLAS,complex]"); \ + const int M = static_cast(B.extent(0)); \ + const int N = static_cast(B.extent(1)); \ + \ + bool A_is_ll = std::is_same::value; \ + bool B_is_ll = std::is_same::value; \ + \ + const int AST = A_is_ll ? A.stride(1) : A.stride(0), LDA = (AST == 0) ? 1 : AST; \ + const int BST = B_is_ll ? B.stride(1) : B.stride(0), LDB = (BST == 0) ? 1 : BST; \ + \ + char side_; \ + char uplo_; \ + \ + if (A_is_ll) { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = 'L'; \ + else \ + side_ = 'R'; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = 'L'; \ + else \ + uplo_ = 'U'; \ + } else { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = 'R'; \ + else \ + side_ = 'L'; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = 'U'; \ + else \ + uplo_ = 'L'; \ + } \ + \ + const std::complex alpha_val = alpha; \ + if (A_is_ll) \ + HostBlas >::trsm(side_, uplo_, trans[0], diag[0], M, N, alpha_val, \ + reinterpret_cast*>(A.data()), LDA, \ + reinterpret_cast*>(B.data()), LDB); \ + else \ + HostBlas >::trsm(side_, uplo_, trans[0], diag[0], N, M, alpha_val, \ + reinterpret_cast*>(A.data()), LDA, \ + reinterpret_cast*>(B.data()), LDB); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS3_CTRSM_BLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct TRSM**, LAYOUTA, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUTB, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - BViewType; \ - \ - static void trsm(const ExecSpace& /*space*/, const char side[], \ - const char uplo[], const char trans[], const char diag[], \ - typename BViewType::const_value_type& alpha, \ - const AViewType& A, const BViewType& B) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::trsm[TPL_BLAS,complex]"); \ - const int M = static_cast(B.extent(0)); \ - const int N = static_cast(B.extent(1)); \ - \ - bool A_is_ll = std::is_same::value; \ - bool B_is_ll = std::is_same::value; \ - \ - const int AST = A_is_ll ? A.stride(1) : A.stride(0), \ - LDA = (AST == 0) ? 1 : AST; \ - const int BST = B_is_ll ? B.stride(1) : B.stride(0), \ - LDB = (BST == 0) ? 1 : BST; \ - \ - char side_; \ - char uplo_; \ - \ - if (A_is_ll) { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = 'L'; \ - else \ - side_ = 'R'; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = 'L'; \ - else \ - uplo_ = 'U'; \ - } else { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = 'R'; \ - else \ - side_ = 'L'; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = 'U'; \ - else \ - uplo_ = 'L'; \ - } \ - \ - const std::complex alpha_val = alpha; \ - if (A_is_ll) \ - HostBlas >::trsm( \ - side_, uplo_, trans[0], diag[0], M, N, alpha_val, \ - reinterpret_cast*>(A.data()), LDA, \ - reinterpret_cast*>(B.data()), LDB); \ - else \ - HostBlas >::trsm( \ - side_, uplo_, trans[0], diag[0], N, M, alpha_val, \ - reinterpret_cast*>(A.data()), LDA, \ - reinterpret_cast*>(B.data()), LDB); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS3_CTRSM_BLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct TRSM**, LAYOUTA, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUTB, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + BViewType; \ + \ + static void trsm(const ExecSpace& /*space*/, const char side[], const char uplo[], const char trans[], \ + const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, \ + const BViewType& B) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::trsm[TPL_BLAS,complex]"); \ + const int M = static_cast(B.extent(0)); \ + const int N = static_cast(B.extent(1)); \ + \ + bool A_is_ll = std::is_same::value; \ + bool B_is_ll = std::is_same::value; \ + \ + const int AST = A_is_ll ? A.stride(1) : A.stride(0), LDA = (AST == 0) ? 1 : AST; \ + const int BST = B_is_ll ? B.stride(1) : B.stride(0), LDB = (BST == 0) ? 1 : BST; \ + \ + char side_; \ + char uplo_; \ + \ + if (A_is_ll) { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = 'L'; \ + else \ + side_ = 'R'; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = 'L'; \ + else \ + uplo_ = 'U'; \ + } else { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = 'R'; \ + else \ + side_ = 'L'; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = 'U'; \ + else \ + uplo_ = 'L'; \ + } \ + \ + const std::complex alpha_val = alpha; \ + if (A_is_ll) \ + HostBlas >::trsm(side_, uplo_, trans[0], diag[0], M, N, alpha_val, \ + reinterpret_cast*>(A.data()), LDA, \ + reinterpret_cast*>(B.data()), LDB); \ + else \ + HostBlas >::trsm(side_, uplo_, trans[0], diag[0], N, M, alpha_val, \ + reinterpret_cast*>(A.data()), LDA, \ + reinterpret_cast*>(B.data()), LDB); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS3_DTRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, true) -KOKKOSBLAS3_DTRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, false) -KOKKOSBLAS3_DTRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, true) -KOKKOSBLAS3_DTRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, false) +KOKKOSBLAS3_DTRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS3_DTRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS3_DTRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS3_DTRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) -KOKKOSBLAS3_STRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, true) -KOKKOSBLAS3_STRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, false) -KOKKOSBLAS3_STRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, true) -KOKKOSBLAS3_STRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, false) +KOKKOSBLAS3_STRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS3_STRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS3_STRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS3_STRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) -KOKKOSBLAS3_ZTRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, true) -KOKKOSBLAS3_ZTRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, false) -KOKKOSBLAS3_ZTRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, true) -KOKKOSBLAS3_ZTRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, false) +KOKKOSBLAS3_ZTRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS3_ZTRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS3_ZTRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS3_ZTRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) -KOKKOSBLAS3_CTRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, true) -KOKKOSBLAS3_CTRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::HostSpace, false) -KOKKOSBLAS3_CTRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, true) -KOKKOSBLAS3_CTRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::HostSpace, false) +KOKKOSBLAS3_CTRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS3_CTRSM_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS3_CTRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS3_CTRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) } // namespace Impl } // namespace KokkosBlas @@ -358,450 +304,370 @@ KOKKOSBLAS3_CTRSM_BLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS3_DTRSM_CUBLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct TRSM< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - BViewType; \ - \ - static void trsm(const ExecSpace& space, const char side[], \ - const char uplo[], const char trans[], const char diag[], \ - typename BViewType::const_value_type& alpha, \ - const AViewType& A, const BViewType& B) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::trsm[TPL_CUBLAS,double]"); \ - const int M = static_cast(B.extent(0)); \ - const int N = static_cast(B.extent(1)); \ - \ - bool A_is_ll = std::is_same::value; \ - bool B_is_ll = std::is_same::value; \ - \ - const int AST = A_is_ll ? A.stride(1) : A.stride(0), \ - LDA = (AST == 0) ? 1 : AST; \ - const int BST = B_is_ll ? B.stride(1) : B.stride(0), \ - LDB = (BST == 0) ? 1 : BST; \ - \ - cublasSideMode_t side_; \ - cublasFillMode_t uplo_; \ - cublasOperation_t trans_; \ - cublasDiagType_t diag_; \ - \ - if (A_is_ll) { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = CUBLAS_SIDE_LEFT; \ - else \ - side_ = CUBLAS_SIDE_RIGHT; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = CUBLAS_FILL_MODE_LOWER; \ - else \ - uplo_ = CUBLAS_FILL_MODE_UPPER; \ - } else { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = CUBLAS_SIDE_RIGHT; \ - else \ - side_ = CUBLAS_SIDE_LEFT; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = CUBLAS_FILL_MODE_UPPER; \ - else \ - uplo_ = CUBLAS_FILL_MODE_LOWER; \ - } \ - \ - if ((trans[0] == 'N') || (trans[0] == 'n')) \ - trans_ = CUBLAS_OP_N; \ - else if ((trans[0] == 'T') || (trans[0] == 't')) \ - trans_ = CUBLAS_OP_T; \ - else \ - trans_ = CUBLAS_OP_C; \ - if ((diag[0] == 'U') || (diag[0] == 'u')) \ - diag_ = CUBLAS_DIAG_UNIT; \ - else \ - diag_ = CUBLAS_DIAG_NON_UNIT; \ - \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - if (A_is_ll) { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasDtrsm(s.handle, side_, uplo_, trans_, diag_, M, N, &alpha, \ - A.data(), LDA, B.data(), LDB)); \ - } else { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasDtrsm(s.handle, side_, uplo_, trans_, diag_, N, M, &alpha, \ - A.data(), LDA, B.data(), LDB)); \ - } \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS3_DTRSM_CUBLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct TRSM< \ + ExecSpace, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + BViewType; \ + \ + static void trsm(const ExecSpace& space, const char side[], const char uplo[], const char trans[], \ + const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, \ + const BViewType& B) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::trsm[TPL_CUBLAS,double]"); \ + const int M = static_cast(B.extent(0)); \ + const int N = static_cast(B.extent(1)); \ + \ + bool A_is_ll = std::is_same::value; \ + bool B_is_ll = std::is_same::value; \ + \ + const int AST = A_is_ll ? A.stride(1) : A.stride(0), LDA = (AST == 0) ? 1 : AST; \ + const int BST = B_is_ll ? B.stride(1) : B.stride(0), LDB = (BST == 0) ? 1 : BST; \ + \ + cublasSideMode_t side_; \ + cublasFillMode_t uplo_; \ + cublasOperation_t trans_; \ + cublasDiagType_t diag_; \ + \ + if (A_is_ll) { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = CUBLAS_SIDE_LEFT; \ + else \ + side_ = CUBLAS_SIDE_RIGHT; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = CUBLAS_FILL_MODE_LOWER; \ + else \ + uplo_ = CUBLAS_FILL_MODE_UPPER; \ + } else { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = CUBLAS_SIDE_RIGHT; \ + else \ + side_ = CUBLAS_SIDE_LEFT; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = CUBLAS_FILL_MODE_UPPER; \ + else \ + uplo_ = CUBLAS_FILL_MODE_LOWER; \ + } \ + \ + if ((trans[0] == 'N') || (trans[0] == 'n')) \ + trans_ = CUBLAS_OP_N; \ + else if ((trans[0] == 'T') || (trans[0] == 't')) \ + trans_ = CUBLAS_OP_T; \ + else \ + trans_ = CUBLAS_OP_C; \ + if ((diag[0] == 'U') || (diag[0] == 'u')) \ + diag_ = CUBLAS_DIAG_UNIT; \ + else \ + diag_ = CUBLAS_DIAG_NON_UNIT; \ + \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + if (A_is_ll) { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasDtrsm(s.handle, side_, uplo_, trans_, diag_, M, N, &alpha, A.data(), LDA, B.data(), LDB)); \ + } else { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasDtrsm(s.handle, side_, uplo_, trans_, diag_, N, M, &alpha, A.data(), LDA, B.data(), LDB)); \ + } \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS3_STRSM_CUBLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct TRSM< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - BViewType; \ - \ - static void trsm(const ExecSpace& space, const char side[], \ - const char uplo[], const char trans[], const char diag[], \ - typename BViewType::const_value_type& alpha, \ - const AViewType& A, const BViewType& B) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::trsm[TPL_CUBLAS,float]"); \ - const int M = static_cast(B.extent(0)); \ - const int N = static_cast(B.extent(1)); \ - \ - bool A_is_ll = std::is_same::value; \ - bool B_is_ll = std::is_same::value; \ - \ - const int AST = A_is_ll ? A.stride(1) : A.stride(0), \ - LDA = (AST == 0) ? 1 : AST; \ - const int BST = B_is_ll ? B.stride(1) : B.stride(0), \ - LDB = (BST == 0) ? 1 : BST; \ - \ - cublasSideMode_t side_; \ - cublasFillMode_t uplo_; \ - cublasOperation_t trans_; \ - cublasDiagType_t diag_; \ - \ - if (A_is_ll) { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = CUBLAS_SIDE_LEFT; \ - else \ - side_ = CUBLAS_SIDE_RIGHT; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = CUBLAS_FILL_MODE_LOWER; \ - else \ - uplo_ = CUBLAS_FILL_MODE_UPPER; \ - } else { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = CUBLAS_SIDE_RIGHT; \ - else \ - side_ = CUBLAS_SIDE_LEFT; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = CUBLAS_FILL_MODE_UPPER; \ - else \ - uplo_ = CUBLAS_FILL_MODE_LOWER; \ - } \ - \ - if ((trans[0] == 'N') || (trans[0] == 'n')) \ - trans_ = CUBLAS_OP_N; \ - else if ((trans[0] == 'T') || (trans[0] == 't')) \ - trans_ = CUBLAS_OP_T; \ - else \ - trans_ = CUBLAS_OP_C; \ - if ((diag[0] == 'U') || (diag[0] == 'u')) \ - diag_ = CUBLAS_DIAG_UNIT; \ - else \ - diag_ = CUBLAS_DIAG_NON_UNIT; \ - \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - if (A_is_ll) { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasStrsm(s.handle, side_, uplo_, trans_, diag_, M, N, &alpha, \ - A.data(), LDA, B.data(), LDB)); \ - } else { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasStrsm(s.handle, side_, uplo_, trans_, diag_, N, M, &alpha, \ - A.data(), LDA, B.data(), LDB)); \ - } \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS3_STRSM_CUBLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct TRSM< \ + ExecSpace, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + BViewType; \ + \ + static void trsm(const ExecSpace& space, const char side[], const char uplo[], const char trans[], \ + const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, \ + const BViewType& B) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::trsm[TPL_CUBLAS,float]"); \ + const int M = static_cast(B.extent(0)); \ + const int N = static_cast(B.extent(1)); \ + \ + bool A_is_ll = std::is_same::value; \ + bool B_is_ll = std::is_same::value; \ + \ + const int AST = A_is_ll ? A.stride(1) : A.stride(0), LDA = (AST == 0) ? 1 : AST; \ + const int BST = B_is_ll ? B.stride(1) : B.stride(0), LDB = (BST == 0) ? 1 : BST; \ + \ + cublasSideMode_t side_; \ + cublasFillMode_t uplo_; \ + cublasOperation_t trans_; \ + cublasDiagType_t diag_; \ + \ + if (A_is_ll) { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = CUBLAS_SIDE_LEFT; \ + else \ + side_ = CUBLAS_SIDE_RIGHT; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = CUBLAS_FILL_MODE_LOWER; \ + else \ + uplo_ = CUBLAS_FILL_MODE_UPPER; \ + } else { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = CUBLAS_SIDE_RIGHT; \ + else \ + side_ = CUBLAS_SIDE_LEFT; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = CUBLAS_FILL_MODE_UPPER; \ + else \ + uplo_ = CUBLAS_FILL_MODE_LOWER; \ + } \ + \ + if ((trans[0] == 'N') || (trans[0] == 'n')) \ + trans_ = CUBLAS_OP_N; \ + else if ((trans[0] == 'T') || (trans[0] == 't')) \ + trans_ = CUBLAS_OP_T; \ + else \ + trans_ = CUBLAS_OP_C; \ + if ((diag[0] == 'U') || (diag[0] == 'u')) \ + diag_ = CUBLAS_DIAG_UNIT; \ + else \ + diag_ = CUBLAS_DIAG_NON_UNIT; \ + \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + if (A_is_ll) { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasStrsm(s.handle, side_, uplo_, trans_, diag_, M, N, &alpha, A.data(), LDA, B.data(), LDB)); \ + } else { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasStrsm(s.handle, side_, uplo_, trans_, diag_, N, M, &alpha, A.data(), LDA, B.data(), LDB)); \ + } \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS3_ZTRSM_CUBLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct TRSM**, LAYOUTA, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUTB, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - BViewType; \ - \ - static void trsm(const ExecSpace& space, const char side[], \ - const char uplo[], const char trans[], const char diag[], \ - typename BViewType::const_value_type& alpha, \ - const AViewType& A, const BViewType& B) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::trsm[TPL_CUBLAS,complex]"); \ - const int M = static_cast(B.extent(0)); \ - const int N = static_cast(B.extent(1)); \ - \ - bool A_is_ll = std::is_same::value; \ - bool B_is_ll = std::is_same::value; \ - \ - const int AST = A_is_ll ? A.stride(1) : A.stride(0), \ - LDA = (AST == 0) ? 1 : AST; \ - const int BST = B_is_ll ? B.stride(1) : B.stride(0), \ - LDB = (BST == 0) ? 1 : BST; \ - \ - cublasSideMode_t side_; \ - cublasFillMode_t uplo_; \ - cublasOperation_t trans_; \ - cublasDiagType_t diag_; \ - \ - if (A_is_ll) { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = CUBLAS_SIDE_LEFT; \ - else \ - side_ = CUBLAS_SIDE_RIGHT; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = CUBLAS_FILL_MODE_LOWER; \ - else \ - uplo_ = CUBLAS_FILL_MODE_UPPER; \ - } else { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = CUBLAS_SIDE_RIGHT; \ - else \ - side_ = CUBLAS_SIDE_LEFT; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = CUBLAS_FILL_MODE_UPPER; \ - else \ - uplo_ = CUBLAS_FILL_MODE_LOWER; \ - } \ - \ - if ((trans[0] == 'N') || (trans[0] == 'n')) \ - trans_ = CUBLAS_OP_N; \ - else if ((trans[0] == 'T') || (trans[0] == 't')) \ - trans_ = CUBLAS_OP_T; \ - else \ - trans_ = CUBLAS_OP_C; \ - if ((diag[0] == 'U') || (diag[0] == 'u')) \ - diag_ = CUBLAS_DIAG_UNIT; \ - else \ - diag_ = CUBLAS_DIAG_NON_UNIT; \ - \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - if (A_is_ll) { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZtrsm( \ - s.handle, side_, uplo_, trans_, diag_, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(B.data()), LDB)); \ - } else { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZtrsm( \ - s.handle, side_, uplo_, trans_, diag_, N, M, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(B.data()), LDB)); \ - } \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS3_ZTRSM_CUBLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct TRSM**, LAYOUTA, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUTB, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + BViewType; \ + \ + static void trsm(const ExecSpace& space, const char side[], const char uplo[], const char trans[], \ + const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, \ + const BViewType& B) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::trsm[TPL_CUBLAS,complex]"); \ + const int M = static_cast(B.extent(0)); \ + const int N = static_cast(B.extent(1)); \ + \ + bool A_is_ll = std::is_same::value; \ + bool B_is_ll = std::is_same::value; \ + \ + const int AST = A_is_ll ? A.stride(1) : A.stride(0), LDA = (AST == 0) ? 1 : AST; \ + const int BST = B_is_ll ? B.stride(1) : B.stride(0), LDB = (BST == 0) ? 1 : BST; \ + \ + cublasSideMode_t side_; \ + cublasFillMode_t uplo_; \ + cublasOperation_t trans_; \ + cublasDiagType_t diag_; \ + \ + if (A_is_ll) { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = CUBLAS_SIDE_LEFT; \ + else \ + side_ = CUBLAS_SIDE_RIGHT; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = CUBLAS_FILL_MODE_LOWER; \ + else \ + uplo_ = CUBLAS_FILL_MODE_UPPER; \ + } else { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = CUBLAS_SIDE_RIGHT; \ + else \ + side_ = CUBLAS_SIDE_LEFT; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = CUBLAS_FILL_MODE_UPPER; \ + else \ + uplo_ = CUBLAS_FILL_MODE_LOWER; \ + } \ + \ + if ((trans[0] == 'N') || (trans[0] == 'n')) \ + trans_ = CUBLAS_OP_N; \ + else if ((trans[0] == 'T') || (trans[0] == 't')) \ + trans_ = CUBLAS_OP_T; \ + else \ + trans_ = CUBLAS_OP_C; \ + if ((diag[0] == 'U') || (diag[0] == 'u')) \ + diag_ = CUBLAS_DIAG_UNIT; \ + else \ + diag_ = CUBLAS_DIAG_NON_UNIT; \ + \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + if (A_is_ll) { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZtrsm(s.handle, side_, uplo_, trans_, diag_, M, N, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(B.data()), LDB)); \ + } else { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasZtrsm(s.handle, side_, uplo_, trans_, diag_, N, M, \ + reinterpret_cast(&alpha), \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(B.data()), LDB)); \ + } \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS3_CTRSM_CUBLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct TRSM**, LAYOUTA, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View**, LAYOUTB, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - BViewType; \ - \ - static void trsm(const ExecSpace& space, const char side[], \ - const char uplo[], const char trans[], const char diag[], \ - typename BViewType::const_value_type& alpha, \ - const AViewType& A, const BViewType& B) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::trsm[TPL_CUBLAS,complex]"); \ - const int M = static_cast(B.extent(0)); \ - const int N = static_cast(B.extent(1)); \ - \ - bool A_is_ll = std::is_same::value; \ - bool B_is_ll = std::is_same::value; \ - \ - const int AST = A_is_ll ? A.stride(1) : A.stride(0), \ - LDA = (AST == 0) ? 1 : AST; \ - const int BST = B_is_ll ? B.stride(1) : B.stride(0), \ - LDB = (BST == 0) ? 1 : BST; \ - \ - cublasSideMode_t side_; \ - cublasFillMode_t uplo_; \ - cublasOperation_t trans_; \ - cublasDiagType_t diag_; \ - \ - if (A_is_ll) { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = CUBLAS_SIDE_LEFT; \ - else \ - side_ = CUBLAS_SIDE_RIGHT; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = CUBLAS_FILL_MODE_LOWER; \ - else \ - uplo_ = CUBLAS_FILL_MODE_UPPER; \ - } else { \ - if ((side[0] == 'L') || (side[0] == 'l')) \ - side_ = CUBLAS_SIDE_RIGHT; \ - else \ - side_ = CUBLAS_SIDE_LEFT; \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = CUBLAS_FILL_MODE_UPPER; \ - else \ - uplo_ = CUBLAS_FILL_MODE_LOWER; \ - } \ - \ - if ((trans[0] == 'N') || (trans[0] == 'n')) \ - trans_ = CUBLAS_OP_N; \ - else if ((trans[0] == 'T') || (trans[0] == 't')) \ - trans_ = CUBLAS_OP_T; \ - else \ - trans_ = CUBLAS_OP_C; \ - if ((diag[0] == 'U') || (diag[0] == 'u')) \ - diag_ = CUBLAS_DIAG_UNIT; \ - else \ - diag_ = CUBLAS_DIAG_NON_UNIT; \ - \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - if (A_is_ll) { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasCtrsm(s.handle, side_, uplo_, trans_, diag_, M, N, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(B.data()), LDB)); \ - } else { \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasCtrsm(s.handle, side_, uplo_, trans_, diag_, N, M, \ - reinterpret_cast(&alpha), \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(B.data()), LDB)); \ - } \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS3_CTRSM_CUBLAS(LAYOUTA, LAYOUTB, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct TRSM**, LAYOUTA, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUTB, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + BViewType; \ + \ + static void trsm(const ExecSpace& space, const char side[], const char uplo[], const char trans[], \ + const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, \ + const BViewType& B) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::trsm[TPL_CUBLAS,complex]"); \ + const int M = static_cast(B.extent(0)); \ + const int N = static_cast(B.extent(1)); \ + \ + bool A_is_ll = std::is_same::value; \ + bool B_is_ll = std::is_same::value; \ + \ + const int AST = A_is_ll ? A.stride(1) : A.stride(0), LDA = (AST == 0) ? 1 : AST; \ + const int BST = B_is_ll ? B.stride(1) : B.stride(0), LDB = (BST == 0) ? 1 : BST; \ + \ + cublasSideMode_t side_; \ + cublasFillMode_t uplo_; \ + cublasOperation_t trans_; \ + cublasDiagType_t diag_; \ + \ + if (A_is_ll) { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = CUBLAS_SIDE_LEFT; \ + else \ + side_ = CUBLAS_SIDE_RIGHT; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = CUBLAS_FILL_MODE_LOWER; \ + else \ + uplo_ = CUBLAS_FILL_MODE_UPPER; \ + } else { \ + if ((side[0] == 'L') || (side[0] == 'l')) \ + side_ = CUBLAS_SIDE_RIGHT; \ + else \ + side_ = CUBLAS_SIDE_LEFT; \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = CUBLAS_FILL_MODE_UPPER; \ + else \ + uplo_ = CUBLAS_FILL_MODE_LOWER; \ + } \ + \ + if ((trans[0] == 'N') || (trans[0] == 'n')) \ + trans_ = CUBLAS_OP_N; \ + else if ((trans[0] == 'T') || (trans[0] == 't')) \ + trans_ = CUBLAS_OP_T; \ + else \ + trans_ = CUBLAS_OP_C; \ + if ((diag[0] == 'U') || (diag[0] == 'u')) \ + diag_ = CUBLAS_DIAG_UNIT; \ + else \ + diag_ = CUBLAS_DIAG_NON_UNIT; \ + \ + KokkosBlas::Impl::CudaBlasSingleton& s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); \ + if (A_is_ll) { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCtrsm( \ + s.handle, side_, uplo_, trans_, diag_, M, N, reinterpret_cast(&alpha), \ + reinterpret_cast(A.data()), LDA, reinterpret_cast(B.data()), LDB)); \ + } else { \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasCtrsm( \ + s.handle, side_, uplo_, trans_, diag_, N, M, reinterpret_cast(&alpha), \ + reinterpret_cast(A.data()), LDA, reinterpret_cast(B.data()), LDB)); \ + } \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) -KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, false) +KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) -KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_DTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) -KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, false) +KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) -KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_STRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) -KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, false) +KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) -KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_ZTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, false) -KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, true) -KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaSpace, false) +KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) -KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, false) -KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, true) -KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) +KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS3_CTRSM_CUBLAS(Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, false) } // namespace Impl } // namespace KokkosBlas diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas_Cuda_tpl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas_Cuda_tpl.hpp index d85785316ec6..d80e3a23d8e1 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas_Cuda_tpl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas_Cuda_tpl.hpp @@ -24,8 +24,7 @@ namespace Impl { CudaBlasSingleton::CudaBlasSingleton() { cublasStatus_t stat = cublasCreate(&handle); - if (stat != CUBLAS_STATUS_SUCCESS) - Kokkos::abort("CUBLAS initialization failed\n"); + if (stat != CUBLAS_STATUS_SUCCESS) Kokkos::abort("CUBLAS initialization failed\n"); Kokkos::push_finalize_hook([&]() { cublasDestroy(handle); }); } diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas_Host_tpl.cpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas_Host_tpl.cpp index dc04ca7e6757..6989aea34d51 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas_Host_tpl.cpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas_Host_tpl.cpp @@ -34,63 +34,41 @@ void F77_BLAS_MANGLE(sscal, SSCAL)(const KK_INT* N, const float* alpha, /* */ float* x, const KK_INT* x_inc); void F77_BLAS_MANGLE(dscal, DSCAL)(const KK_INT* N, const double* alpha, /* */ double* x, const KK_INT* x_inc); -void F77_BLAS_MANGLE(cscal, - CSCAL)(const KK_INT* N, const std::complex* alpha, - /* */ std::complex* x, const KK_INT* x_inc); -void F77_BLAS_MANGLE(zscal, - ZSCAL)(const KK_INT* N, const std::complex* alpha, - /* */ std::complex* x, const KK_INT* x_inc); +void F77_BLAS_MANGLE(cscal, CSCAL)(const KK_INT* N, const std::complex* alpha, + /* */ std::complex* x, const KK_INT* x_inc); +void F77_BLAS_MANGLE(zscal, ZSCAL)(const KK_INT* N, const std::complex* alpha, + /* */ std::complex* x, const KK_INT* x_inc); /// /// max /// -KK_INT F77_BLAS_MANGLE(isamax, ISAMAX)(const KK_INT* N, const float* x, - const KK_INT* x_inc); -KK_INT F77_BLAS_MANGLE(idamax, IDAMAX)(const KK_INT* N, const double* x, - const KK_INT* x_inc); -KK_INT F77_BLAS_MANGLE(icamax, ICAMAX)(const KK_INT* N, - const std::complex* x, - const KK_INT* x_inc); -KK_INT F77_BLAS_MANGLE(izamax, IZAMAX)(const KK_INT* N, - const std::complex* x, - const KK_INT* x_inc); +KK_INT F77_BLAS_MANGLE(isamax, ISAMAX)(const KK_INT* N, const float* x, const KK_INT* x_inc); +KK_INT F77_BLAS_MANGLE(idamax, IDAMAX)(const KK_INT* N, const double* x, const KK_INT* x_inc); +KK_INT F77_BLAS_MANGLE(icamax, ICAMAX)(const KK_INT* N, const std::complex* x, const KK_INT* x_inc); +KK_INT F77_BLAS_MANGLE(izamax, IZAMAX)(const KK_INT* N, const std::complex* x, const KK_INT* x_inc); /// /// nrm2 /// -float F77_BLAS_MANGLE(snrm2, SNRM2)(const KK_INT* N, const float* x, - const KK_INT* x_inc); -double F77_BLAS_MANGLE(dnrm2, DNRM2)(const KK_INT* N, const double* x, - const KK_INT* x_inc); -float F77_BLAS_MANGLE(scnrm2, SCNRM2)(const KK_INT* N, - const std::complex* x, - const KK_INT* x_inc); -double F77_BLAS_MANGLE(dznrm2, DZNRM2)(const KK_INT* N, - const std::complex* x, - const KK_INT* x_inc); +float F77_BLAS_MANGLE(snrm2, SNRM2)(const KK_INT* N, const float* x, const KK_INT* x_inc); +double F77_BLAS_MANGLE(dnrm2, DNRM2)(const KK_INT* N, const double* x, const KK_INT* x_inc); +float F77_BLAS_MANGLE(scnrm2, SCNRM2)(const KK_INT* N, const std::complex* x, const KK_INT* x_inc); +double F77_BLAS_MANGLE(dznrm2, DZNRM2)(const KK_INT* N, const std::complex* x, const KK_INT* x_inc); /// /// sum /// -float F77_BLAS_MANGLE(sasum, SASUM)(const KK_INT* N, const float* x, - const KK_INT* x_inc); -double F77_BLAS_MANGLE(dasum, DASUM)(const KK_INT* N, const double* x, - const KK_INT* x_inc); -float F77_BLAS_MANGLE(scasum, SCASUM)(const KK_INT* N, - const std::complex* x, - const KK_INT* x_inc); -double F77_BLAS_MANGLE(dzasum, DZASUM)(const KK_INT* N, - const std::complex* x, - const KK_INT* x_inc); +float F77_BLAS_MANGLE(sasum, SASUM)(const KK_INT* N, const float* x, const KK_INT* x_inc); +double F77_BLAS_MANGLE(dasum, DASUM)(const KK_INT* N, const double* x, const KK_INT* x_inc); +float F77_BLAS_MANGLE(scasum, SCASUM)(const KK_INT* N, const std::complex* x, const KK_INT* x_inc); +double F77_BLAS_MANGLE(dzasum, DZASUM)(const KK_INT* N, const std::complex* x, const KK_INT* x_inc); /// /// dot /// -float F77_BLAS_MANGLE(sdot, SDOT)(const KK_INT* N, const float* x, - const KK_INT* x_inc, const float* y, +float F77_BLAS_MANGLE(sdot, SDOT)(const KK_INT* N, const float* x, const KK_INT* x_inc, const float* y, const KK_INT* y_inc); -double F77_BLAS_MANGLE(ddot, DDOT)(const KK_INT* N, const double* x, - const KK_INT* x_inc, const double* y, +double F77_BLAS_MANGLE(ddot, DDOT)(const KK_INT* N, const double* x, const KK_INT* x_inc, const double* y, const KK_INT* y_inc); #if defined(KOKKOSKERNELS_TPL_BLAS_RETURN_COMPLEX) // clang-format off @@ -106,77 +84,49 @@ typedef struct { double vals[2]; } _kk_double2; -_kk_float2 F77_BLAS_MANGLE(cdotu, CDOTU)(const KK_INT* N, - const std::complex* x, - const KK_INT* x_inc, - const std::complex* y, - const KK_INT* y_inc); -_kk_double2 F77_BLAS_MANGLE(zdotu, ZDOTU)(const KK_INT* N, - const std::complex* x, - const KK_INT* x_inc, - const std::complex* y, - const KK_INT* y_inc); -_kk_float2 F77_BLAS_MANGLE(cdotc, CDOTC)(const KK_INT* N, - const std::complex* x, - const KK_INT* x_inc, - const std::complex* y, - const KK_INT* y_inc); -_kk_double2 F77_BLAS_MANGLE(zdotc, ZDOTC)(const KK_INT* N, - const std::complex* x, - const KK_INT* x_inc, - const std::complex* y, - const KK_INT* y_inc); +_kk_float2 F77_BLAS_MANGLE(cdotu, CDOTU)(const KK_INT* N, const std::complex* x, const KK_INT* x_inc, + const std::complex* y, const KK_INT* y_inc); +_kk_double2 F77_BLAS_MANGLE(zdotu, ZDOTU)(const KK_INT* N, const std::complex* x, const KK_INT* x_inc, + const std::complex* y, const KK_INT* y_inc); +_kk_float2 F77_BLAS_MANGLE(cdotc, CDOTC)(const KK_INT* N, const std::complex* x, const KK_INT* x_inc, + const std::complex* y, const KK_INT* y_inc); +_kk_double2 F77_BLAS_MANGLE(zdotc, ZDOTC)(const KK_INT* N, const std::complex* x, const KK_INT* x_inc, + const std::complex* y, const KK_INT* y_inc); #else -void F77_BLAS_MANGLE(cdotu, - CDOTU)(std::complex* res, const KK_INT* N, - const std::complex* x, const KK_INT* x_inc, - const std::complex* y, const KK_INT* y_inc); -void F77_BLAS_MANGLE(zdotu, - ZDOTU)(std::complex* res, const KK_INT* N, - const std::complex* x, const KK_INT* x_inc, - const std::complex* y, const KK_INT* y_inc); -void F77_BLAS_MANGLE(cdotc, - CDOTC)(std::complex* res, const KK_INT* N, - const std::complex* x, const KK_INT* x_inc, - const std::complex* y, const KK_INT* y_inc); -void F77_BLAS_MANGLE(zdotc, - ZDOTC)(std::complex* res, const KK_INT* N, - const std::complex* x, const KK_INT* x_inc, - const std::complex* y, const KK_INT* y_inc); +void F77_BLAS_MANGLE(cdotu, CDOTU)(std::complex* res, const KK_INT* N, const std::complex* x, + const KK_INT* x_inc, const std::complex* y, const KK_INT* y_inc); +void F77_BLAS_MANGLE(zdotu, ZDOTU)(std::complex* res, const KK_INT* N, const std::complex* x, + const KK_INT* x_inc, const std::complex* y, const KK_INT* y_inc); +void F77_BLAS_MANGLE(cdotc, CDOTC)(std::complex* res, const KK_INT* N, const std::complex* x, + const KK_INT* x_inc, const std::complex* y, const KK_INT* y_inc); +void F77_BLAS_MANGLE(zdotc, ZDOTC)(std::complex* res, const KK_INT* N, const std::complex* x, + const KK_INT* x_inc, const std::complex* y, const KK_INT* y_inc); #endif /// /// axpy /// -void F77_BLAS_MANGLE(saxpy, SAXPY)(const KK_INT* N, const float* alpha, - const float* x, const KK_INT* x_inc, +void F77_BLAS_MANGLE(saxpy, SAXPY)(const KK_INT* N, const float* alpha, const float* x, const KK_INT* x_inc, /* */ float* y, const KK_INT* y_inc); -void F77_BLAS_MANGLE(daxpy, DAXPY)(const KK_INT* N, const double* alpha, - const double* x, const KK_INT* x_inc, +void F77_BLAS_MANGLE(daxpy, DAXPY)(const KK_INT* N, const double* alpha, const double* x, const KK_INT* x_inc, /* */ double* y, const KK_INT* y_inc); -void F77_BLAS_MANGLE(caxpy, - CAXPY)(const KK_INT* N, const std::complex* alpha, - const std::complex* x, const KK_INT* x_inc, - /* */ std::complex* y, const KK_INT* y_inc); -void F77_BLAS_MANGLE(zaxpy, - ZAXPY)(const KK_INT* N, const std::complex* alpha, - const std::complex* x, const KK_INT* x_inc, - /* */ std::complex* y, const KK_INT* y_inc); +void F77_BLAS_MANGLE(caxpy, CAXPY)(const KK_INT* N, const std::complex* alpha, const std::complex* x, + const KK_INT* x_inc, + /* */ std::complex* y, const KK_INT* y_inc); +void F77_BLAS_MANGLE(zaxpy, ZAXPY)(const KK_INT* N, const std::complex* alpha, const std::complex* x, + const KK_INT* x_inc, + /* */ std::complex* y, const KK_INT* y_inc); /// /// rot /// -void F77_BLAS_MANGLE(srot, SROT)(KK_INT const* N, float* X, KK_INT const* incx, - float* Y, KK_INT const* incy, float* c, +void F77_BLAS_MANGLE(srot, SROT)(KK_INT const* N, float* X, KK_INT const* incx, float* Y, KK_INT const* incy, float* c, float* s); -void F77_BLAS_MANGLE(drot, DROT)(KK_INT const* N, double* X, KK_INT const* incx, - double* Y, KK_INT const* incy, double* c, - double* s); -void F77_BLAS_MANGLE(crot, CROT)(KK_INT const* N, std::complex* X, - KK_INT const* incx, std::complex* Y, +void F77_BLAS_MANGLE(drot, DROT)(KK_INT const* N, double* X, KK_INT const* incx, double* Y, KK_INT const* incy, + double* c, double* s); +void F77_BLAS_MANGLE(crot, CROT)(KK_INT const* N, std::complex* X, KK_INT const* incx, std::complex* Y, KK_INT const* incy, float* c, float* s); -void F77_BLAS_MANGLE(zrot, ZROT)(KK_INT const* N, std::complex* X, - KK_INT const* incx, std::complex* Y, +void F77_BLAS_MANGLE(zrot, ZROT)(KK_INT const* N, std::complex* X, KK_INT const* incx, std::complex* Y, KK_INT const* incy, double* c, double* s); /// @@ -184,106 +134,73 @@ void F77_BLAS_MANGLE(zrot, ZROT)(KK_INT const* N, std::complex* X, /// void F77_BLAS_MANGLE(srotg, SROTG)(float* a, float* b, float* c, float* s); void F77_BLAS_MANGLE(drotg, DROTG)(double* a, double* b, double* c, double* s); -void F77_BLAS_MANGLE(crotg, CROTG)(std::complex* a, - std::complex* b, float* c, - std::complex* s); -void F77_BLAS_MANGLE(zrotg, ZROTG)(std::complex* a, - std::complex* b, double* c, +void F77_BLAS_MANGLE(crotg, CROTG)(std::complex* a, std::complex* b, float* c, std::complex* s); +void F77_BLAS_MANGLE(zrotg, ZROTG)(std::complex* a, std::complex* b, double* c, std::complex* s); /// /// rotm /// -void F77_BLAS_MANGLE(srotm, SROTM)(const KK_INT* n, float* X, - const KK_INT* incx, float* Y, - const KK_INT* incy, float const* param); -void F77_BLAS_MANGLE(drotm, DROTM)(const KK_INT* n, double* X, - const KK_INT* incx, double* Y, - const KK_INT* incy, double const* param); +void F77_BLAS_MANGLE(srotm, SROTM)(const KK_INT* n, float* X, const KK_INT* incx, float* Y, const KK_INT* incy, + float const* param); +void F77_BLAS_MANGLE(drotm, DROTM)(const KK_INT* n, double* X, const KK_INT* incx, double* Y, const KK_INT* incy, + double const* param); /// /// rotmg /// -void F77_BLAS_MANGLE(srotmg, SROTMG)(float* d1, float* d2, float* x1, - const float* y1, float* param); -void F77_BLAS_MANGLE(drotmg, DROTMG)(double* d1, double* d2, double* x1, - const double* y1, double* param); +void F77_BLAS_MANGLE(srotmg, SROTMG)(float* d1, float* d2, float* x1, const float* y1, float* param); +void F77_BLAS_MANGLE(drotmg, DROTMG)(double* d1, double* d2, double* x1, const double* y1, double* param); /// /// swap /// -void F77_BLAS_MANGLE(sswap, SSWAP)(KK_INT const* N, float* X, - KK_INT const* incx, float* Y, - KK_INT const* incy); -void F77_BLAS_MANGLE(dswap, DSWAP)(KK_INT const* N, double* X, - KK_INT const* incx, double* Y, - KK_INT const* incy); -void F77_BLAS_MANGLE(cswap, CSWAP)(KK_INT const* N, std::complex* X, - KK_INT const* incx, std::complex* Y, - KK_INT const* incy); -void F77_BLAS_MANGLE(zswap, ZSWAP)(KK_INT const* N, std::complex* X, - KK_INT const* incx, std::complex* Y, +void F77_BLAS_MANGLE(sswap, SSWAP)(KK_INT const* N, float* X, KK_INT const* incx, float* Y, KK_INT const* incy); +void F77_BLAS_MANGLE(dswap, DSWAP)(KK_INT const* N, double* X, KK_INT const* incx, double* Y, KK_INT const* incy); +void F77_BLAS_MANGLE(cswap, CSWAP)(KK_INT const* N, std::complex* X, KK_INT const* incx, std::complex* Y, KK_INT const* incy); +void F77_BLAS_MANGLE(zswap, ZSWAP)(KK_INT const* N, std::complex* X, KK_INT const* incx, + std::complex* Y, KK_INT const* incy); /// /// Gemv /// -void F77_BLAS_MANGLE(sgemv, SGEMV)(const char*, KK_INT*, KK_INT*, const float*, - const float*, KK_INT*, const float*, KK_INT*, - const float*, +void F77_BLAS_MANGLE(sgemv, SGEMV)(const char*, KK_INT*, KK_INT*, const float*, const float*, KK_INT*, const float*, + KK_INT*, const float*, /* */ float*, KK_INT*); -void F77_BLAS_MANGLE(dgemv, DGEMV)(const char*, KK_INT*, KK_INT*, const double*, - const double*, KK_INT*, const double*, +void F77_BLAS_MANGLE(dgemv, DGEMV)(const char*, KK_INT*, KK_INT*, const double*, const double*, KK_INT*, const double*, KK_INT*, const double*, /* */ double*, KK_INT*); -void F77_BLAS_MANGLE(cgemv, CGEMV)(const char*, KK_INT*, KK_INT*, - const std::complex*, - const std::complex*, KK_INT*, - const std::complex*, KK_INT*, +void F77_BLAS_MANGLE(cgemv, CGEMV)(const char*, KK_INT*, KK_INT*, const std::complex*, + const std::complex*, KK_INT*, const std::complex*, KK_INT*, const std::complex*, /* */ std::complex*, KK_INT*); -void F77_BLAS_MANGLE(zgemv, ZGEMV)(const char*, KK_INT*, KK_INT*, - const std::complex*, - const std::complex*, KK_INT*, - const std::complex*, KK_INT*, +void F77_BLAS_MANGLE(zgemv, ZGEMV)(const char*, KK_INT*, KK_INT*, const std::complex*, + const std::complex*, KK_INT*, const std::complex*, KK_INT*, const std::complex*, /* */ std::complex*, KK_INT*); /// /// Ger /// -void F77_BLAS_MANGLE(sger, SGER)(KK_INT*, KK_INT*, const float*, const float*, - KK_INT*, const float*, KK_INT*, float*, - KK_INT*); -void F77_BLAS_MANGLE(dger, DGER)(KK_INT*, KK_INT*, const double*, const double*, - KK_INT*, const double*, KK_INT*, double*, +void F77_BLAS_MANGLE(sger, SGER)(KK_INT*, KK_INT*, const float*, const float*, KK_INT*, const float*, KK_INT*, float*, KK_INT*); -void F77_BLAS_MANGLE(cgeru, CGERU)(KK_INT*, KK_INT*, const std::complex*, - const std::complex*, KK_INT*, - const std::complex*, KK_INT*, - std::complex*, KK_INT*); -void F77_BLAS_MANGLE(zgeru, ZGERU)(KK_INT*, KK_INT*, - const std::complex*, - const std::complex*, KK_INT*, - const std::complex*, KK_INT*, - std::complex*, KK_INT*); -void F77_BLAS_MANGLE(cgerc, CGERC)(KK_INT*, KK_INT*, const std::complex*, - const std::complex*, KK_INT*, - const std::complex*, KK_INT*, - std::complex*, KK_INT*); -void F77_BLAS_MANGLE(zgerc, ZGERC)(KK_INT*, KK_INT*, - const std::complex*, - const std::complex*, KK_INT*, - const std::complex*, KK_INT*, - std::complex*, KK_INT*); +void F77_BLAS_MANGLE(dger, DGER)(KK_INT*, KK_INT*, const double*, const double*, KK_INT*, const double*, KK_INT*, + double*, KK_INT*); +void F77_BLAS_MANGLE(cgeru, CGERU)(KK_INT*, KK_INT*, const std::complex*, const std::complex*, KK_INT*, + const std::complex*, KK_INT*, std::complex*, KK_INT*); +void F77_BLAS_MANGLE(zgeru, ZGERU)(KK_INT*, KK_INT*, const std::complex*, const std::complex*, KK_INT*, + const std::complex*, KK_INT*, std::complex*, KK_INT*); +void F77_BLAS_MANGLE(cgerc, CGERC)(KK_INT*, KK_INT*, const std::complex*, const std::complex*, KK_INT*, + const std::complex*, KK_INT*, std::complex*, KK_INT*); +void F77_BLAS_MANGLE(zgerc, ZGERC)(KK_INT*, KK_INT*, const std::complex*, const std::complex*, KK_INT*, + const std::complex*, KK_INT*, std::complex*, KK_INT*); /// /// Syr /// -void F77_BLAS_MANGLE(ssyr, SSYR)(const char*, KK_INT*, const float*, - const float*, KK_INT*, float*, KK_INT*); -void F77_BLAS_MANGLE(dsyr, DSYR)(const char*, KK_INT*, const double*, - const double*, KK_INT*, double*, KK_INT*); +void F77_BLAS_MANGLE(ssyr, SSYR)(const char*, KK_INT*, const float*, const float*, KK_INT*, float*, KK_INT*); +void F77_BLAS_MANGLE(dsyr, DSYR)(const char*, KK_INT*, const double*, const double*, KK_INT*, double*, KK_INT*); // Although there is a cgeru, there is no csyru // Although there is a zgeru, there is no zsyru // Although there is a cgerc, there is no csyrc, but there is cher (see below) @@ -293,21 +210,17 @@ void F77_BLAS_MANGLE(dsyr, DSYR)(const char*, KK_INT*, const double*, /// Her /// -void F77_BLAS_MANGLE(cher, CHER)(const char*, KK_INT*, const float*, - const std::complex*, KK_INT*, +void F77_BLAS_MANGLE(cher, CHER)(const char*, KK_INT*, const float*, const std::complex*, KK_INT*, /* */ std::complex*, KK_INT*); -void F77_BLAS_MANGLE(zher, ZHER)(const char*, KK_INT*, const double*, - const std::complex*, KK_INT*, +void F77_BLAS_MANGLE(zher, ZHER)(const char*, KK_INT*, const double*, const std::complex*, KK_INT*, /* */ std::complex*, KK_INT*); /// /// Syr2 /// -void F77_BLAS_MANGLE(ssyr2, SSYR2)(const char*, KK_INT*, const float*, - const float*, const KK_INT*, const float*, +void F77_BLAS_MANGLE(ssyr2, SSYR2)(const char*, KK_INT*, const float*, const float*, const KK_INT*, const float*, KK_INT*, float*, KK_INT*); -void F77_BLAS_MANGLE(dsyr2, DSYR2)(const char*, KK_INT*, const double*, - const double*, const KK_INT*, const double*, +void F77_BLAS_MANGLE(dsyr2, DSYR2)(const char*, KK_INT*, const double*, const double*, const KK_INT*, const double*, KK_INT*, double*, KK_INT*); // Although there is a cgeru, there is no csyr2u // Although there is a zgeru, there is no zsyr2u @@ -318,58 +231,42 @@ void F77_BLAS_MANGLE(dsyr2, DSYR2)(const char*, KK_INT*, const double*, /// Her2 /// -void F77_BLAS_MANGLE(cher2, CHER2)(const char*, KK_INT*, - const std::complex*, - const std::complex*, KK_INT*, - const std::complex*, KK_INT*, +void F77_BLAS_MANGLE(cher2, CHER2)(const char*, KK_INT*, const std::complex*, const std::complex*, + KK_INT*, const std::complex*, KK_INT*, /* */ std::complex*, KK_INT*); -void F77_BLAS_MANGLE(zher2, ZHER2)(const char*, KK_INT*, - const std::complex*, - const std::complex*, KK_INT*, - const std::complex*, KK_INT*, +void F77_BLAS_MANGLE(zher2, ZHER2)(const char*, KK_INT*, const std::complex*, const std::complex*, + KK_INT*, const std::complex*, KK_INT*, /* */ std::complex*, KK_INT*); /// /// Trsv /// -void F77_BLAS_MANGLE(strsv, STRSV)(const char*, const char*, const char*, - KK_INT*, const float*, KK_INT*, +void F77_BLAS_MANGLE(strsv, STRSV)(const char*, const char*, const char*, KK_INT*, const float*, KK_INT*, /* */ float*, KK_INT*); -void F77_BLAS_MANGLE(dtrsv, DTRSV)(const char*, const char*, const char*, - KK_INT*, const double*, KK_INT*, +void F77_BLAS_MANGLE(dtrsv, DTRSV)(const char*, const char*, const char*, KK_INT*, const double*, KK_INT*, /* */ double*, KK_INT*); -void F77_BLAS_MANGLE(ctrsv, CTRSV)(const char*, const char*, const char*, - KK_INT*, const std::complex*, KK_INT*, +void F77_BLAS_MANGLE(ctrsv, CTRSV)(const char*, const char*, const char*, KK_INT*, const std::complex*, KK_INT*, /* */ std::complex*, KK_INT*); -void F77_BLAS_MANGLE(ztrsv, ZTRSV)(const char*, const char*, const char*, - KK_INT*, const std::complex*, - KK_INT*, +void F77_BLAS_MANGLE(ztrsv, ZTRSV)(const char*, const char*, const char*, KK_INT*, const std::complex*, KK_INT*, /* */ std::complex*, KK_INT*); /// /// Gemm /// -void F77_BLAS_MANGLE(sgemm, SGEMM)(const char*, const char*, KK_INT*, KK_INT*, - KK_INT*, const float*, const float*, KK_INT*, - const float*, KK_INT*, const float*, +void F77_BLAS_MANGLE(sgemm, SGEMM)(const char*, const char*, KK_INT*, KK_INT*, KK_INT*, const float*, const float*, + KK_INT*, const float*, KK_INT*, const float*, /* */ float*, KK_INT*); -void F77_BLAS_MANGLE(dgemm, DGEMM)(const char*, const char*, KK_INT*, KK_INT*, - KK_INT*, const double*, const double*, - KK_INT*, const double*, KK_INT*, - const double*, +void F77_BLAS_MANGLE(dgemm, DGEMM)(const char*, const char*, KK_INT*, KK_INT*, KK_INT*, const double*, const double*, + KK_INT*, const double*, KK_INT*, const double*, /* */ double*, KK_INT*); -void F77_BLAS_MANGLE(cgemm, CGEMM)(const char*, const char*, KK_INT*, KK_INT*, - KK_INT*, const std::complex*, - const std::complex*, KK_INT*, - const std::complex*, KK_INT*, +void F77_BLAS_MANGLE(cgemm, CGEMM)(const char*, const char*, KK_INT*, KK_INT*, KK_INT*, const std::complex*, + const std::complex*, KK_INT*, const std::complex*, KK_INT*, const std::complex*, /* */ std::complex*, KK_INT*); -void F77_BLAS_MANGLE(zgemm, ZGEMM)(const char*, const char*, KK_INT*, KK_INT*, - KK_INT*, const std::complex*, - const std::complex*, KK_INT*, - const std::complex*, KK_INT*, +void F77_BLAS_MANGLE(zgemm, ZGEMM)(const char*, const char*, KK_INT*, KK_INT*, KK_INT*, const std::complex*, + const std::complex*, KK_INT*, const std::complex*, KK_INT*, const std::complex*, /* */ std::complex*, KK_INT*); @@ -377,69 +274,51 @@ void F77_BLAS_MANGLE(zgemm, ZGEMM)(const char*, const char*, KK_INT*, KK_INT*, /// Herk /// -void F77_BLAS_MANGLE(ssyrk, SSYRK)(const char*, const char*, KK_INT*, KK_INT*, - const float*, const float*, KK_INT*, +void F77_BLAS_MANGLE(ssyrk, SSYRK)(const char*, const char*, KK_INT*, KK_INT*, const float*, const float*, KK_INT*, const float*, /* */ float*, KK_INT*); -void F77_BLAS_MANGLE(dsyrk, DSYRK)(const char*, const char*, KK_INT*, KK_INT*, - const double*, const double*, KK_INT*, +void F77_BLAS_MANGLE(dsyrk, DSYRK)(const char*, const char*, KK_INT*, KK_INT*, const double*, const double*, KK_INT*, const double*, /* */ double*, KK_INT*); -void F77_BLAS_MANGLE(cherk, CHERK)(const char*, const char*, KK_INT*, KK_INT*, - const std::complex*, - const std::complex*, KK_INT*, - const std::complex*, +void F77_BLAS_MANGLE(cherk, CHERK)(const char*, const char*, KK_INT*, KK_INT*, const std::complex*, + const std::complex*, KK_INT*, const std::complex*, /* */ std::complex*, KK_INT*); -void F77_BLAS_MANGLE(zherk, ZHERK)(const char*, const char*, KK_INT*, KK_INT*, - const std::complex*, - const std::complex*, KK_INT*, - const std::complex*, +void F77_BLAS_MANGLE(zherk, ZHERK)(const char*, const char*, KK_INT*, KK_INT*, const std::complex*, + const std::complex*, KK_INT*, const std::complex*, /* */ std::complex*, KK_INT*); /// /// Trmm /// -void F77_BLAS_MANGLE(strmm, STRMM)(const char*, const char*, const char*, - const char*, KK_INT*, KK_INT*, const float*, +void F77_BLAS_MANGLE(strmm, STRMM)(const char*, const char*, const char*, const char*, KK_INT*, KK_INT*, const float*, const float*, KK_INT*, /* */ float*, KK_INT*); -void F77_BLAS_MANGLE(dtrmm, DTRMM)(const char*, const char*, const char*, - const char*, KK_INT*, KK_INT*, const double*, +void F77_BLAS_MANGLE(dtrmm, DTRMM)(const char*, const char*, const char*, const char*, KK_INT*, KK_INT*, const double*, const double*, KK_INT*, /* */ double*, KK_INT*); -void F77_BLAS_MANGLE(ctrmm, CTRMM)(const char*, const char*, const char*, - const char*, KK_INT*, KK_INT*, - const std::complex*, - const std::complex*, KK_INT*, +void F77_BLAS_MANGLE(ctrmm, CTRMM)(const char*, const char*, const char*, const char*, KK_INT*, KK_INT*, + const std::complex*, const std::complex*, KK_INT*, /* */ std::complex*, KK_INT*); -void F77_BLAS_MANGLE(ztrmm, ZTRMM)(const char*, const char*, const char*, - const char*, KK_INT*, KK_INT*, - const std::complex*, - const std::complex*, KK_INT*, +void F77_BLAS_MANGLE(ztrmm, ZTRMM)(const char*, const char*, const char*, const char*, KK_INT*, KK_INT*, + const std::complex*, const std::complex*, KK_INT*, /* */ std::complex*, KK_INT*); /// /// Trsm /// -void F77_BLAS_MANGLE(strsm, STRSM)(const char*, const char*, const char*, - const char*, KK_INT*, KK_INT*, const float*, +void F77_BLAS_MANGLE(strsm, STRSM)(const char*, const char*, const char*, const char*, KK_INT*, KK_INT*, const float*, const float*, KK_INT*, /* */ float*, KK_INT*); -void F77_BLAS_MANGLE(dtrsm, DTRSM)(const char*, const char*, const char*, - const char*, KK_INT*, KK_INT*, const double*, +void F77_BLAS_MANGLE(dtrsm, DTRSM)(const char*, const char*, const char*, const char*, KK_INT*, KK_INT*, const double*, const double*, KK_INT*, /* */ double*, KK_INT*); -void F77_BLAS_MANGLE(ctrsm, CTRSM)(const char*, const char*, const char*, - const char*, KK_INT*, KK_INT*, - const std::complex*, - const std::complex*, KK_INT*, +void F77_BLAS_MANGLE(ctrsm, CTRSM)(const char*, const char*, const char*, const char*, KK_INT*, KK_INT*, + const std::complex*, const std::complex*, KK_INT*, /* */ std::complex*, KK_INT*); -void F77_BLAS_MANGLE(ztrsm, ZTRSM)(const char*, const char*, const char*, - const char*, KK_INT*, KK_INT*, - const std::complex*, - const std::complex*, KK_INT*, +void F77_BLAS_MANGLE(ztrsm, ZTRSM)(const char*, const char*, const char*, const char*, KK_INT*, KK_INT*, + const std::complex*, const std::complex*, KK_INT*, /* */ std::complex*, KK_INT*); } @@ -447,12 +326,10 @@ void F77_BLAS_MANGLE(sscal, SSCAL)(const KK_INT* N, const float* alpha, /* */ float* x, const KK_INT* x_inc); void F77_BLAS_MANGLE(dscal, DSCAL)(const KK_INT* N, const double* alpha, /* */ double* x, const KK_INT* x_inc); -void F77_BLAS_MANGLE(cscal, - CSCAL)(const KK_INT* N, const std::complex* alpha, - /* */ std::complex* x, const KK_INT* x_inc); -void F77_BLAS_MANGLE(zscal, - ZSCAL)(const KK_INT* N, const std::complex* alpha, - /* */ std::complex* x, const KK_INT* x_inc); +void F77_BLAS_MANGLE(cscal, CSCAL)(const KK_INT* N, const std::complex* alpha, + /* */ std::complex* x, const KK_INT* x_inc); +void F77_BLAS_MANGLE(zscal, ZSCAL)(const KK_INT* N, const std::complex* alpha, + /* */ std::complex* x, const KK_INT* x_inc); #define F77_FUNC_SSCAL F77_BLAS_MANGLE(sscal, SSCAL) #define F77_FUNC_DSCAL F77_BLAS_MANGLE(dscal, DSCAL) @@ -581,19 +458,17 @@ float HostBlas::asum(KK_INT n, const float* x, KK_INT x_inc) { return F77_FUNC_SASUM(&n, x, &x_inc); } template <> -float HostBlas::dot(KK_INT n, const float* x, KK_INT x_inc, - const float* y, KK_INT y_inc) { +float HostBlas::dot(KK_INT n, const float* x, KK_INT x_inc, const float* y, KK_INT y_inc) { return F77_FUNC_SDOT(&n, x, &x_inc, y, &y_inc); } template <> -void HostBlas::axpy(KK_INT n, const float alpha, const float* x, - KK_INT x_inc, +void HostBlas::axpy(KK_INT n, const float alpha, const float* x, KK_INT x_inc, /* */ float* y, KK_INT y_inc) { F77_FUNC_SAXPY(&n, &alpha, x, &x_inc, y, &y_inc); } template <> -void HostBlas::rot(KK_INT const N, float* X, KK_INT const incx, float* Y, - KK_INT const incy, float* c, float* s) { +void HostBlas::rot(KK_INT const N, float* X, KK_INT const incx, float* Y, KK_INT const incy, float* c, + float* s) { F77_FUNC_SROT(&N, X, &incx, Y, &incy, c, s); } template <> @@ -601,81 +476,67 @@ void HostBlas::rotg(float* a, float* b, float* c, float* s) { F77_FUNC_SROTG(a, b, c, s); } template <> -void HostBlas::rotm(const KK_INT n, float* X, const KK_INT incx, - float* Y, const KK_INT incy, const float* param) { +void HostBlas::rotm(const KK_INT n, float* X, const KK_INT incx, float* Y, const KK_INT incy, + const float* param) { F77_FUNC_SROTM(&n, X, &incx, Y, &incy, param); } template <> -void HostBlas::rotmg(float* d1, float* d2, float* x1, const float* y1, - float* param) { +void HostBlas::rotmg(float* d1, float* d2, float* x1, const float* y1, float* param) { F77_FUNC_SROTMG(d1, d2, x1, y1, param); } template <> -void HostBlas::swap(KK_INT const N, float* X, KK_INT const incx, - float* Y, KK_INT const incy) { +void HostBlas::swap(KK_INT const N, float* X, KK_INT const incx, float* Y, KK_INT const incy) { F77_FUNC_SSWAP(&N, X, &incx, Y, &incy); } template <> -void HostBlas::gemv(const char trans, KK_INT m, KK_INT n, - const float alpha, const float* a, KK_INT lda, +void HostBlas::gemv(const char trans, KK_INT m, KK_INT n, const float alpha, const float* a, KK_INT lda, const float* b, KK_INT ldb, const float beta, /* */ float* c, KK_INT ldc) { F77_FUNC_SGEMV(&trans, &m, &n, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); } template <> -void HostBlas::ger(KK_INT m, KK_INT n, const float alpha, const float* x, - KK_INT incx, const float* y, KK_INT incy, float* a, - KK_INT lda) { +void HostBlas::ger(KK_INT m, KK_INT n, const float alpha, const float* x, KK_INT incx, const float* y, + KK_INT incy, float* a, KK_INT lda) { F77_FUNC_SGER(&m, &n, &alpha, x, &incx, y, &incy, a, &lda); } template <> -void HostBlas::syr(const char uplo, KK_INT n, const float alpha, - const float* x, KK_INT incx, float* a, KK_INT lda) { +void HostBlas::syr(const char uplo, KK_INT n, const float alpha, const float* x, KK_INT incx, float* a, + KK_INT lda) { F77_FUNC_SSYR(&uplo, &n, &alpha, x, &incx, a, &lda); } template <> -void HostBlas::syr2(const char uplo, KK_INT n, const float alpha, - const float* x, KK_INT incx, const float* y, +void HostBlas::syr2(const char uplo, KK_INT n, const float alpha, const float* x, KK_INT incx, const float* y, KK_INT incy, float* a, KK_INT lda) { F77_FUNC_SSYR2(&uplo, &n, &alpha, x, &incx, y, &incy, a, &lda); } template <> -void HostBlas::trsv(const char uplo, const char transa, const char diag, - KK_INT m, const float* a, KK_INT lda, +void HostBlas::trsv(const char uplo, const char transa, const char diag, KK_INT m, const float* a, KK_INT lda, /* */ float* b, KK_INT ldb) { F77_FUNC_STRSV(&uplo, &transa, &diag, &m, a, &lda, b, &ldb); } template <> -void HostBlas::gemm(const char transa, const char transb, KK_INT m, - KK_INT n, KK_INT k, const float alpha, - const float* a, KK_INT lda, const float* b, - KK_INT ldb, const float beta, +void HostBlas::gemm(const char transa, const char transb, KK_INT m, KK_INT n, KK_INT k, const float alpha, + const float* a, KK_INT lda, const float* b, KK_INT ldb, const float beta, /* */ float* c, KK_INT ldc) { - F77_FUNC_SGEMM(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, - c, &ldc); + F77_FUNC_SGEMM(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); } template <> -void HostBlas::herk(const char transa, const char transb, KK_INT n, - KK_INT k, const float alpha, const float* a, +void HostBlas::herk(const char transa, const char transb, KK_INT n, KK_INT k, const float alpha, const float* a, KK_INT lda, const float beta, /* */ float* c, KK_INT ldc) { F77_FUNC_SSYRK(&transa, &transb, &n, &k, &alpha, a, &lda, &beta, c, &ldc); } template <> -void HostBlas::trmm(const char side, const char uplo, const char transa, - const char diag, KK_INT m, KK_INT n, +void HostBlas::trmm(const char side, const char uplo, const char transa, const char diag, KK_INT m, KK_INT n, const float alpha, const float* a, KK_INT lda, /* */ float* b, KK_INT ldb) { - F77_FUNC_STRMM(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, - &ldb); + F77_FUNC_STRMM(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, &ldb); } template <> -void HostBlas::trsm(const char side, const char uplo, const char transa, - const char diag, KK_INT m, KK_INT n, +void HostBlas::trsm(const char side, const char uplo, const char transa, const char diag, KK_INT m, KK_INT n, const float alpha, const float* a, KK_INT lda, /* */ float* b, KK_INT ldb) { - F77_FUNC_STRSM(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, - &ldb); + F77_FUNC_STRSM(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, &ldb); } /// @@ -700,19 +561,17 @@ double HostBlas::asum(KK_INT n, const double* x, KK_INT x_inc) { return F77_FUNC_DASUM(&n, x, &x_inc); } template <> -double HostBlas::dot(KK_INT n, const double* x, KK_INT x_inc, - const double* y, KK_INT y_inc) { +double HostBlas::dot(KK_INT n, const double* x, KK_INT x_inc, const double* y, KK_INT y_inc) { return F77_FUNC_DDOT(&n, x, &x_inc, y, &y_inc); } template <> -void HostBlas::axpy(KK_INT n, const double alpha, const double* x, - KK_INT x_inc, +void HostBlas::axpy(KK_INT n, const double alpha, const double* x, KK_INT x_inc, /* */ double* y, KK_INT y_inc) { F77_FUNC_DAXPY(&n, &alpha, x, &x_inc, y, &y_inc); } template <> -void HostBlas::rot(KK_INT const N, double* X, KK_INT const incx, - double* Y, KK_INT const incy, double* c, double* s) { +void HostBlas::rot(KK_INT const N, double* X, KK_INT const incx, double* Y, KK_INT const incy, double* c, + double* s) { F77_FUNC_DROT(&N, X, &incx, Y, &incy, c, s); } template <> @@ -720,82 +579,67 @@ void HostBlas::rotg(double* a, double* b, double* c, double* s) { F77_FUNC_DROTG(a, b, c, s); } template <> -void HostBlas::rotm(const KK_INT n, double* X, const KK_INT incx, - double* Y, const KK_INT incy, const double* param) { +void HostBlas::rotm(const KK_INT n, double* X, const KK_INT incx, double* Y, const KK_INT incy, + const double* param) { F77_FUNC_DROTM(&n, X, &incx, Y, &incy, param); } template <> -void HostBlas::rotmg(double* d1, double* d2, double* x1, - const double* y1, double* param) { +void HostBlas::rotmg(double* d1, double* d2, double* x1, const double* y1, double* param) { F77_FUNC_DROTMG(d1, d2, x1, y1, param); } template <> -void HostBlas::swap(KK_INT const N, double* X, KK_INT const incx, - double* Y, KK_INT const incy) { +void HostBlas::swap(KK_INT const N, double* X, KK_INT const incx, double* Y, KK_INT const incy) { F77_FUNC_DSWAP(&N, X, &incx, Y, &incy); } template <> -void HostBlas::gemv(const char trans, KK_INT m, KK_INT n, - const double alpha, const double* a, KK_INT lda, +void HostBlas::gemv(const char trans, KK_INT m, KK_INT n, const double alpha, const double* a, KK_INT lda, const double* b, KK_INT ldb, const double beta, /* */ double* c, KK_INT ldc) { F77_FUNC_DGEMV(&trans, &m, &n, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); } template <> -void HostBlas::ger(KK_INT m, KK_INT n, const double alpha, - const double* x, KK_INT incx, const double* y, +void HostBlas::ger(KK_INT m, KK_INT n, const double alpha, const double* x, KK_INT incx, const double* y, KK_INT incy, double* a, KK_INT lda) { F77_FUNC_DGER(&m, &n, &alpha, x, &incx, y, &incy, a, &lda); } template <> -void HostBlas::syr(const char uplo, KK_INT n, const double alpha, - const double* x, KK_INT incx, double* a, +void HostBlas::syr(const char uplo, KK_INT n, const double alpha, const double* x, KK_INT incx, double* a, KK_INT lda) { F77_FUNC_DSYR(&uplo, &n, &alpha, x, &incx, a, &lda); } template <> -void HostBlas::syr2(const char uplo, KK_INT n, const double alpha, - const double* x, KK_INT incx, const double* y, - KK_INT incy, double* a, KK_INT lda) { +void HostBlas::syr2(const char uplo, KK_INT n, const double alpha, const double* x, KK_INT incx, + const double* y, KK_INT incy, double* a, KK_INT lda) { F77_FUNC_DSYR2(&uplo, &n, &alpha, x, &incx, y, &incy, a, &lda); } template <> -void HostBlas::trsv(const char uplo, const char transa, const char diag, - KK_INT m, const double* a, KK_INT lda, +void HostBlas::trsv(const char uplo, const char transa, const char diag, KK_INT m, const double* a, KK_INT lda, /* */ double* b, KK_INT ldb) { F77_FUNC_DTRSV(&uplo, &transa, &diag, &m, a, &lda, b, &ldb); } template <> -void HostBlas::gemm(const char transa, const char transb, KK_INT m, - KK_INT n, KK_INT k, const double alpha, - const double* a, KK_INT lda, const double* b, - KK_INT ldb, const double beta, +void HostBlas::gemm(const char transa, const char transb, KK_INT m, KK_INT n, KK_INT k, const double alpha, + const double* a, KK_INT lda, const double* b, KK_INT ldb, const double beta, /* */ double* c, KK_INT ldc) { - F77_FUNC_DGEMM(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, - c, &ldc); + F77_FUNC_DGEMM(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); } template <> -void HostBlas::herk(const char transa, const char transb, KK_INT n, - KK_INT k, const double alpha, const double* a, - KK_INT lda, const double beta, +void HostBlas::herk(const char transa, const char transb, KK_INT n, KK_INT k, const double alpha, + const double* a, KK_INT lda, const double beta, /* */ double* c, KK_INT ldc) { F77_FUNC_DSYRK(&transa, &transb, &n, &k, &alpha, a, &lda, &beta, c, &ldc); } template <> -void HostBlas::trmm(const char side, const char uplo, const char transa, - const char diag, KK_INT m, KK_INT n, +void HostBlas::trmm(const char side, const char uplo, const char transa, const char diag, KK_INT m, KK_INT n, const double alpha, const double* a, KK_INT lda, /* */ double* b, KK_INT ldb) { - F77_FUNC_DTRMM(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, - &ldb); + F77_FUNC_DTRMM(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, &ldb); } template <> -void HostBlas::trsm(const char side, const char uplo, const char transa, - const char diag, KK_INT m, KK_INT n, +void HostBlas::trsm(const char side, const char uplo, const char transa, const char diag, KK_INT m, KK_INT n, const double alpha, const double* a, KK_INT lda, /* */ double* b, KK_INT ldb) { - F77_FUNC_DTRSM(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, - &ldb); + F77_FUNC_DTRSM(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, &ldb); } /// @@ -803,34 +647,25 @@ void HostBlas::trsm(const char side, const char uplo, const char transa, /// template <> -void HostBlas >::scal(KK_INT n, - const std::complex alpha, - /* */ std::complex* x, - KK_INT x_inc) { +void HostBlas >::scal(KK_INT n, const std::complex alpha, + /* */ std::complex* x, KK_INT x_inc) { F77_FUNC_CSCAL(&n, &alpha, x, &x_inc); } template <> -KK_INT HostBlas >::iamax(KK_INT n, - const std::complex* x, - KK_INT x_inc) { +KK_INT HostBlas >::iamax(KK_INT n, const std::complex* x, KK_INT x_inc) { return F77_FUNC_ICAMAX(&n, x, &x_inc); } template <> -float HostBlas >::nrm2(KK_INT n, - const std::complex* x, - KK_INT x_inc) { +float HostBlas >::nrm2(KK_INT n, const std::complex* x, KK_INT x_inc) { return F77_FUNC_SCNRM2(&n, x, &x_inc); } template <> -float HostBlas >::asum(KK_INT n, - const std::complex* x, - KK_INT x_inc) { +float HostBlas >::asum(KK_INT n, const std::complex* x, KK_INT x_inc) { return F77_FUNC_SCASUM(&n, x, &x_inc); } template <> -std::complex HostBlas >::dot( - KK_INT n, const std::complex* x, KK_INT x_inc, - const std::complex* y, KK_INT y_inc) { +std::complex HostBlas >::dot(KK_INT n, const std::complex* x, KK_INT x_inc, + const std::complex* y, KK_INT y_inc) { #if defined(KOKKOSKERNELS_TPL_BLAS_RETURN_COMPLEX) _kk_float2 res = F77_FUNC_CDOTC(&n, x, &x_inc, y, &y_inc); return std::complex(res.vals[0], res.vals[1]); @@ -841,131 +676,99 @@ std::complex HostBlas >::dot( #endif } template <> -void HostBlas >::axpy(KK_INT n, - const std::complex alpha, - const std::complex* x, +void HostBlas >::axpy(KK_INT n, const std::complex alpha, const std::complex* x, KK_INT x_inc, - /* */ std::complex* y, - KK_INT y_inc) { + /* */ std::complex* y, KK_INT y_inc) { F77_FUNC_CAXPY(&n, &alpha, x, &x_inc, y, &y_inc); } template <> -void HostBlas >::rot(KK_INT const N, std::complex* X, - KK_INT const incx, - std::complex* Y, - KK_INT const incy, float* c, - float* s) { +void HostBlas >::rot(KK_INT const N, std::complex* X, KK_INT const incx, + std::complex* Y, KK_INT const incy, float* c, float* s) { F77_FUNC_CROT(&N, X, &incx, Y, &incy, c, s); } template <> -void HostBlas >::rotg(std::complex* a, - std::complex* b, float* c, +void HostBlas >::rotg(std::complex* a, std::complex* b, float* c, std::complex* s) { F77_FUNC_CROTG(a, b, c, s); } template <> -void HostBlas >::swap(KK_INT const N, - std::complex* X, - KK_INT const incx, - std::complex* Y, - KK_INT const incy) { +void HostBlas >::swap(KK_INT const N, std::complex* X, KK_INT const incx, + std::complex* Y, KK_INT const incy) { F77_FUNC_CSWAP(&N, X, &incx, Y, &incy); } template <> -void HostBlas >::gemv( - const char trans, KK_INT m, KK_INT n, const std::complex alpha, - const std::complex* a, KK_INT lda, const std::complex* b, - KK_INT ldb, const std::complex beta, - /* */ std::complex* c, KK_INT ldc) { - F77_FUNC_CGEMV(&trans, &m, &n, &alpha, (const std::complex*)a, &lda, - (const std::complex*)b, &ldb, &beta, - (std::complex*)c, &ldc); +void HostBlas >::gemv(const char trans, KK_INT m, KK_INT n, const std::complex alpha, + const std::complex* a, KK_INT lda, const std::complex* b, + KK_INT ldb, const std::complex beta, + /* */ std::complex* c, KK_INT ldc) { + F77_FUNC_CGEMV(&trans, &m, &n, &alpha, (const std::complex*)a, &lda, (const std::complex*)b, &ldb, + &beta, (std::complex*)c, &ldc); } template <> -void HostBlas >::geru( - KK_INT m, KK_INT n, const std::complex alpha, - const std::complex* x, KK_INT incx, const std::complex* y, - KK_INT incy, std::complex* a, KK_INT lda) { - F77_FUNC_CGERU(&m, &n, &alpha, (const std::complex*)x, &incx, - (const std::complex*)y, &incy, (std::complex*)a, - &lda); +void HostBlas >::geru(KK_INT m, KK_INT n, const std::complex alpha, + const std::complex* x, KK_INT incx, const std::complex* y, + KK_INT incy, std::complex* a, KK_INT lda) { + F77_FUNC_CGERU(&m, &n, &alpha, (const std::complex*)x, &incx, (const std::complex*)y, &incy, + (std::complex*)a, &lda); } template <> -void HostBlas >::gerc( - KK_INT m, KK_INT n, const std::complex alpha, - const std::complex* x, KK_INT incx, const std::complex* y, - KK_INT incy, std::complex* a, KK_INT lda) { - F77_FUNC_CGERC(&m, &n, &alpha, (const std::complex*)x, &incx, - (const std::complex*)y, &incy, (std::complex*)a, - &lda); +void HostBlas >::gerc(KK_INT m, KK_INT n, const std::complex alpha, + const std::complex* x, KK_INT incx, const std::complex* y, + KK_INT incy, std::complex* a, KK_INT lda) { + F77_FUNC_CGERC(&m, &n, &alpha, (const std::complex*)x, &incx, (const std::complex*)y, &incy, + (std::complex*)a, &lda); } template <> template <> -void HostBlas >::her( - const char uplo, KK_INT n, const float alpha, const std::complex* x, - KK_INT incx, std::complex* a, KK_INT lda) { - F77_FUNC_CHER(&uplo, &n, &alpha, (const std::complex*)x, &incx, - (std::complex*)a, &lda); +void HostBlas >::her(const char uplo, KK_INT n, const float alpha, + const std::complex* x, KK_INT incx, std::complex* a, + KK_INT lda) { + F77_FUNC_CHER(&uplo, &n, &alpha, (const std::complex*)x, &incx, (std::complex*)a, &lda); } template <> -void HostBlas >::her2( - const char uplo, KK_INT n, const std::complex alpha, - const std::complex* x, KK_INT incx, const std::complex* y, - KK_INT incy, std::complex* a, KK_INT lda) { - F77_FUNC_CHER2(&uplo, &n, &alpha, (const std::complex*)x, &incx, - (const std::complex*)y, &incy, (std::complex*)a, - &lda); +void HostBlas >::her2(const char uplo, KK_INT n, const std::complex alpha, + const std::complex* x, KK_INT incx, const std::complex* y, + KK_INT incy, std::complex* a, KK_INT lda) { + F77_FUNC_CHER2(&uplo, &n, &alpha, (const std::complex*)x, &incx, (const std::complex*)y, &incy, + (std::complex*)a, &lda); } template <> -void HostBlas >::trsv(const char uplo, const char transa, - const char diag, KK_INT m, - const std::complex* a, - KK_INT lda, - /* */ std::complex* b, - KK_INT ldb) { - F77_FUNC_CTRSV(&uplo, &transa, &diag, &m, (const std::complex*)a, &lda, - (std::complex*)b, &ldb); +void HostBlas >::trsv(const char uplo, const char transa, const char diag, KK_INT m, + const std::complex* a, KK_INT lda, + /* */ std::complex* b, KK_INT ldb) { + F77_FUNC_CTRSV(&uplo, &transa, &diag, &m, (const std::complex*)a, &lda, (std::complex*)b, &ldb); } template <> -void HostBlas >::gemm( - const char transa, const char transb, KK_INT m, KK_INT n, KK_INT k, - const std::complex alpha, const std::complex* a, KK_INT lda, - const std::complex* b, KK_INT ldb, const std::complex beta, - /* */ std::complex* c, KK_INT ldc) { - F77_FUNC_CGEMM(&transa, &transb, &m, &n, &k, &alpha, - (const std::complex*)a, &lda, - (const std::complex*)b, &ldb, &beta, - (std::complex*)c, &ldc); +void HostBlas >::gemm(const char transa, const char transb, KK_INT m, KK_INT n, KK_INT k, + const std::complex alpha, const std::complex* a, KK_INT lda, + const std::complex* b, KK_INT ldb, const std::complex beta, + /* */ std::complex* c, KK_INT ldc) { + F77_FUNC_CGEMM(&transa, &transb, &m, &n, &k, &alpha, (const std::complex*)a, &lda, + (const std::complex*)b, &ldb, &beta, (std::complex*)c, &ldc); } template <> -void HostBlas >::herk( - const char transa, const char transb, KK_INT n, KK_INT k, - const std::complex alpha, const std::complex* a, KK_INT lda, - const std::complex beta, - /* */ std::complex* c, KK_INT ldc) { - F77_FUNC_CHERK(&transa, &transb, &n, &k, &alpha, - (const std::complex*)a, &lda, &beta, - (std::complex*)c, &ldc); +void HostBlas >::herk(const char transa, const char transb, KK_INT n, KK_INT k, + const std::complex alpha, const std::complex* a, KK_INT lda, + const std::complex beta, + /* */ std::complex* c, KK_INT ldc) { + F77_FUNC_CHERK(&transa, &transb, &n, &k, &alpha, (const std::complex*)a, &lda, &beta, (std::complex*)c, + &ldc); } template <> -void HostBlas >::trmm( - const char side, const char uplo, const char transa, const char diag, - KK_INT m, KK_INT n, const std::complex alpha, - const std::complex* a, KK_INT lda, - /* */ std::complex* b, KK_INT ldb) { - F77_FUNC_CTRMM(&side, &uplo, &transa, &diag, &m, &n, &alpha, - (const std::complex*)a, &lda, (std::complex*)b, - &ldb); +void HostBlas >::trmm(const char side, const char uplo, const char transa, const char diag, + KK_INT m, KK_INT n, const std::complex alpha, + const std::complex* a, KK_INT lda, + /* */ std::complex* b, KK_INT ldb) { + F77_FUNC_CTRMM(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const std::complex*)a, &lda, + (std::complex*)b, &ldb); } template <> -void HostBlas >::trsm( - const char side, const char uplo, const char transa, const char diag, - KK_INT m, KK_INT n, const std::complex alpha, - const std::complex* a, KK_INT lda, - /* */ std::complex* b, KK_INT ldb) { - F77_FUNC_CTRSM(&side, &uplo, &transa, &diag, &m, &n, &alpha, - (const std::complex*)a, &lda, (std::complex*)b, - &ldb); +void HostBlas >::trsm(const char side, const char uplo, const char transa, const char diag, + KK_INT m, KK_INT n, const std::complex alpha, + const std::complex* a, KK_INT lda, + /* */ std::complex* b, KK_INT ldb) { + F77_FUNC_CTRSM(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const std::complex*)a, &lda, + (std::complex*)b, &ldb); } /// @@ -973,34 +776,25 @@ void HostBlas >::trsm( /// template <> -void HostBlas >::scal(KK_INT n, - const std::complex alpha, - /* */ std::complex* x, - KK_INT x_inc) { +void HostBlas >::scal(KK_INT n, const std::complex alpha, + /* */ std::complex* x, KK_INT x_inc) { F77_FUNC_ZSCAL(&n, &alpha, x, &x_inc); } template <> -KK_INT HostBlas >::iamax(KK_INT n, - const std::complex* x, - KK_INT x_inc) { +KK_INT HostBlas >::iamax(KK_INT n, const std::complex* x, KK_INT x_inc) { return F77_FUNC_IZAMAX(&n, x, &x_inc); } template <> -double HostBlas >::nrm2(KK_INT n, - const std::complex* x, - KK_INT x_inc) { +double HostBlas >::nrm2(KK_INT n, const std::complex* x, KK_INT x_inc) { return F77_FUNC_DZNRM2(&n, x, &x_inc); } template <> -double HostBlas >::asum(KK_INT n, - const std::complex* x, - KK_INT x_inc) { +double HostBlas >::asum(KK_INT n, const std::complex* x, KK_INT x_inc) { return F77_FUNC_DZASUM(&n, x, &x_inc); } template <> -std::complex HostBlas >::dot( - KK_INT n, const std::complex* x, KK_INT x_inc, - const std::complex* y, KK_INT y_inc) { +std::complex HostBlas >::dot(KK_INT n, const std::complex* x, KK_INT x_inc, + const std::complex* y, KK_INT y_inc) { #if defined(KOKKOSKERNELS_TPL_BLAS_RETURN_COMPLEX) _kk_double2 res = F77_FUNC_ZDOTC(&n, x, &x_inc, y, &y_inc); return std::complex(res.vals[0], res.vals[1]); @@ -1011,133 +805,100 @@ std::complex HostBlas >::dot( #endif } template <> -void HostBlas >::axpy(KK_INT n, - const std::complex alpha, - const std::complex* x, +void HostBlas >::axpy(KK_INT n, const std::complex alpha, const std::complex* x, KK_INT x_inc, - /* */ std::complex* y, - KK_INT y_inc) { + /* */ std::complex* y, KK_INT y_inc) { F77_FUNC_ZAXPY(&n, &alpha, x, &x_inc, y, &y_inc); } template <> -void HostBlas >::rot( - KK_INT const N, std::complex* X, KK_INT const incx, - std::complex* Y, KK_INT const incy, double* c, double* s) { +void HostBlas >::rot(KK_INT const N, std::complex* X, KK_INT const incx, + std::complex* Y, KK_INT const incy, double* c, double* s) { F77_FUNC_ZROT(&N, X, &incx, Y, &incy, c, s); } template <> -void HostBlas >::rotg(std::complex* a, - std::complex* b, double* c, +void HostBlas >::rotg(std::complex* a, std::complex* b, double* c, std::complex* s) { F77_FUNC_ZROTG(a, b, c, s); } template <> -void HostBlas >::swap(KK_INT const N, - std::complex* X, - KK_INT const incx, - std::complex* Y, - KK_INT const incy) { +void HostBlas >::swap(KK_INT const N, std::complex* X, KK_INT const incx, + std::complex* Y, KK_INT const incy) { F77_FUNC_ZSWAP(&N, X, &incx, Y, &incy); } template <> -void HostBlas >::gemv( - const char trans, KK_INT m, KK_INT n, const std::complex alpha, - const std::complex* a, KK_INT lda, const std::complex* b, - KK_INT ldb, const std::complex beta, - /* */ std::complex* c, KK_INT ldc) { - F77_FUNC_ZGEMV(&trans, &m, &n, &alpha, (const std::complex*)a, &lda, - (const std::complex*)b, &ldb, &beta, - (std::complex*)c, &ldc); +void HostBlas >::gemv(const char trans, KK_INT m, KK_INT n, const std::complex alpha, + const std::complex* a, KK_INT lda, const std::complex* b, + KK_INT ldb, const std::complex beta, + /* */ std::complex* c, KK_INT ldc) { + F77_FUNC_ZGEMV(&trans, &m, &n, &alpha, (const std::complex*)a, &lda, (const std::complex*)b, &ldb, + &beta, (std::complex*)c, &ldc); } template <> -void HostBlas >::geru( - KK_INT m, KK_INT n, const std::complex alpha, - const std::complex* x, KK_INT incx, const std::complex* y, - KK_INT incy, std::complex* a, KK_INT lda) { - F77_FUNC_ZGERU(&m, &n, &alpha, (const std::complex*)x, &incx, - (const std::complex*)y, &incy, +void HostBlas >::geru(KK_INT m, KK_INT n, const std::complex alpha, + const std::complex* x, KK_INT incx, const std::complex* y, + KK_INT incy, std::complex* a, KK_INT lda) { + F77_FUNC_ZGERU(&m, &n, &alpha, (const std::complex*)x, &incx, (const std::complex*)y, &incy, (std::complex*)a, &lda); } template <> -void HostBlas >::gerc( - KK_INT m, KK_INT n, const std::complex alpha, - const std::complex* x, KK_INT incx, const std::complex* y, - KK_INT incy, std::complex* a, KK_INT lda) { - F77_FUNC_ZGERC(&m, &n, &alpha, (const std::complex*)x, &incx, - (const std::complex*)y, &incy, +void HostBlas >::gerc(KK_INT m, KK_INT n, const std::complex alpha, + const std::complex* x, KK_INT incx, const std::complex* y, + KK_INT incy, std::complex* a, KK_INT lda) { + F77_FUNC_ZGERC(&m, &n, &alpha, (const std::complex*)x, &incx, (const std::complex*)y, &incy, (std::complex*)a, &lda); } template <> template <> -void HostBlas >::her(const char uplo, KK_INT n, - const double alpha, - const std::complex* x, - KK_INT incx, - std::complex* a, +void HostBlas >::her(const char uplo, KK_INT n, const double alpha, + const std::complex* x, KK_INT incx, std::complex* a, KK_INT lda) { - F77_FUNC_ZHER(&uplo, &n, &alpha, (const std::complex*)x, &incx, - (std::complex*)a, &lda); + F77_FUNC_ZHER(&uplo, &n, &alpha, (const std::complex*)x, &incx, (std::complex*)a, &lda); } template <> -void HostBlas >::her2( - const char uplo, KK_INT n, const std::complex alpha, - const std::complex* x, KK_INT incx, const std::complex* y, - KK_INT incy, std::complex* a, KK_INT lda) { - F77_FUNC_ZHER2(&uplo, &n, &alpha, (const std::complex*)x, &incx, - (const std::complex*)y, &incy, +void HostBlas >::her2(const char uplo, KK_INT n, const std::complex alpha, + const std::complex* x, KK_INT incx, const std::complex* y, + KK_INT incy, std::complex* a, KK_INT lda) { + F77_FUNC_ZHER2(&uplo, &n, &alpha, (const std::complex*)x, &incx, (const std::complex*)y, &incy, (std::complex*)a, &lda); } template <> -void HostBlas >::trsv(const char uplo, const char transa, - const char diag, KK_INT m, - const std::complex* a, - KK_INT lda, - /* */ std::complex* b, - KK_INT ldb) { - F77_FUNC_ZTRSV(&uplo, &transa, &diag, &m, (const std::complex*)a, - &lda, (std::complex*)b, &ldb); +void HostBlas >::trsv(const char uplo, const char transa, const char diag, KK_INT m, + const std::complex* a, KK_INT lda, + /* */ std::complex* b, KK_INT ldb) { + F77_FUNC_ZTRSV(&uplo, &transa, &diag, &m, (const std::complex*)a, &lda, (std::complex*)b, &ldb); } template <> -void HostBlas >::gemm( - const char transa, const char transb, KK_INT m, KK_INT n, KK_INT k, - const std::complex alpha, const std::complex* a, KK_INT lda, - const std::complex* b, KK_INT ldb, const std::complex beta, - /* */ std::complex* c, KK_INT ldc) { - F77_FUNC_ZGEMM(&transa, &transb, &m, &n, &k, &alpha, - (const std::complex*)a, &lda, - (const std::complex*)b, &ldb, &beta, - (std::complex*)c, &ldc); +void HostBlas >::gemm(const char transa, const char transb, KK_INT m, KK_INT n, KK_INT k, + const std::complex alpha, const std::complex* a, KK_INT lda, + const std::complex* b, KK_INT ldb, const std::complex beta, + /* */ std::complex* c, KK_INT ldc) { + F77_FUNC_ZGEMM(&transa, &transb, &m, &n, &k, &alpha, (const std::complex*)a, &lda, + (const std::complex*)b, &ldb, &beta, (std::complex*)c, &ldc); } template <> -void HostBlas >::herk( - const char transa, const char transb, KK_INT n, KK_INT k, - const std::complex alpha, const std::complex* a, KK_INT lda, - const std::complex beta, - /* */ std::complex* c, KK_INT ldc) { - F77_FUNC_ZHERK(&transa, &transb, &n, &k, &alpha, - (const std::complex*)a, &lda, &beta, +void HostBlas >::herk(const char transa, const char transb, KK_INT n, KK_INT k, + const std::complex alpha, const std::complex* a, KK_INT lda, + const std::complex beta, + /* */ std::complex* c, KK_INT ldc) { + F77_FUNC_ZHERK(&transa, &transb, &n, &k, &alpha, (const std::complex*)a, &lda, &beta, (std::complex*)c, &ldc); } template <> -void HostBlas >::trmm( - const char side, const char uplo, const char transa, const char diag, - KK_INT m, KK_INT n, const std::complex alpha, - const std::complex* a, KK_INT lda, - /* */ std::complex* b, KK_INT ldb) { - F77_FUNC_ZTRMM(&side, &uplo, &transa, &diag, &m, &n, &alpha, - (const std::complex*)a, &lda, (std::complex*)b, - &ldb); -} -template <> -void HostBlas >::trsm( - const char side, const char uplo, const char transa, const char diag, - KK_INT m, KK_INT n, const std::complex alpha, - const std::complex* a, KK_INT lda, - /* */ std::complex* b, KK_INT ldb) { - F77_FUNC_ZTRSM(&side, &uplo, &transa, &diag, &m, &n, &alpha, - (const std::complex*)a, &lda, (std::complex*)b, - &ldb); +void HostBlas >::trmm(const char side, const char uplo, const char transa, const char diag, + KK_INT m, KK_INT n, const std::complex alpha, + const std::complex* a, KK_INT lda, + /* */ std::complex* b, KK_INT ldb) { + F77_FUNC_ZTRMM(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const std::complex*)a, &lda, + (std::complex*)b, &ldb); +} +template <> +void HostBlas >::trsm(const char side, const char uplo, const char transa, const char diag, + KK_INT m, KK_INT n, const std::complex alpha, + const std::complex* a, KK_INT lda, + /* */ std::complex* b, KK_INT ldb) { + F77_FUNC_ZTRSM(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const std::complex*)a, &lda, + (std::complex*)b, &ldb); } } // namespace Impl diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas_Host_tpl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas_Host_tpl.hpp index d28f7a21866e..3ccf2f822aac 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas_Host_tpl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas_Host_tpl.hpp @@ -57,66 +57,57 @@ struct HostBlas { static void axpy(KK_INT n, const T alpha, const T *x, KK_INT x_inc, /* */ T *y, KK_INT y_inc); - static void rot(KK_INT const N, T *X, KK_INT const incx, T *Y, - KK_INT const incy, mag_type *c, mag_type *s); + static void rot(KK_INT const N, T *X, KK_INT const incx, T *Y, KK_INT const incy, mag_type *c, mag_type *s); static void rotg(T *a, T *b, mag_type *c, T *s); - static void rotm(const KK_INT n, T *X, const KK_INT incx, T *Y, - const KK_INT incy, T const *param); + static void rotm(const KK_INT n, T *X, const KK_INT incx, T *Y, const KK_INT incy, T const *param); static void rotmg(T *d1, T *d2, T *x1, const T *y1, T *param); - static void swap(KK_INT const N, T *X, KK_INT const incx, T *Y, - KK_INT const incy); + static void swap(KK_INT const N, T *X, KK_INT const incx, T *Y, KK_INT const incy); - static void gemv(const char trans, KK_INT m, KK_INT n, const T alpha, - const T *a, KK_INT lda, const T *b, KK_INT ldb, const T beta, + static void gemv(const char trans, KK_INT m, KK_INT n, const T alpha, const T *a, KK_INT lda, const T *b, KK_INT ldb, + const T beta, /* */ T *c, KK_INT ldc); - static void ger(KK_INT m, KK_INT n, const T alpha, const T *x, KK_INT incx, - const T *y, KK_INT incy, T *a, KK_INT lda); + static void ger(KK_INT m, KK_INT n, const T alpha, const T *x, KK_INT incx, const T *y, KK_INT incy, T *a, + KK_INT lda); - static void geru(KK_INT m, KK_INT n, const T alpha, const T *x, KK_INT incx, - const T *y, KK_INT incy, T *a, KK_INT lda); + static void geru(KK_INT m, KK_INT n, const T alpha, const T *x, KK_INT incx, const T *y, KK_INT incy, T *a, + KK_INT lda); - static void gerc(KK_INT m, KK_INT n, const T alpha, const T *x, KK_INT incx, - const T *y, KK_INT incy, T *a, KK_INT lda); + static void gerc(KK_INT m, KK_INT n, const T alpha, const T *x, KK_INT incx, const T *y, KK_INT incy, T *a, + KK_INT lda); - static void syr(const char uplo, KK_INT n, const T alpha, const T *x, - KK_INT incx, T *a, KK_INT lda); + static void syr(const char uplo, KK_INT n, const T alpha, const T *x, KK_INT incx, T *a, KK_INT lda); - static void syr2(const char uplo, KK_INT n, const T alpha, const T *x, - KK_INT incx, const T *y, KK_INT incy, T *a, KK_INT lda); + static void syr2(const char uplo, KK_INT n, const T alpha, const T *x, KK_INT incx, const T *y, KK_INT incy, T *a, + KK_INT lda); template - static void her(const char uplo, KK_INT n, const tAlpha alpha, const T *x, - KK_INT incx, T *a, KK_INT lda); + static void her(const char uplo, KK_INT n, const tAlpha alpha, const T *x, KK_INT incx, T *a, KK_INT lda); - static void her2(const char uplo, KK_INT n, const T alpha, const T *x, - KK_INT incx, const T *y, KK_INT incy, T *a, KK_INT lda); + static void her2(const char uplo, KK_INT n, const T alpha, const T *x, KK_INT incx, const T *y, KK_INT incy, T *a, + KK_INT lda); - static void trsv(const char uplo, const char transa, const char diag, - KK_INT m, const T *a, KK_INT lda, + static void trsv(const char uplo, const char transa, const char diag, KK_INT m, const T *a, KK_INT lda, /* */ T *b, KK_INT ldb); - static void gemm(const char transa, const char transb, KK_INT m, KK_INT n, - KK_INT k, const T alpha, const T *a, KK_INT lda, const T *b, - KK_INT ldb, const T beta, + static void gemm(const char transa, const char transb, KK_INT m, KK_INT n, KK_INT k, const T alpha, const T *a, + KK_INT lda, const T *b, KK_INT ldb, const T beta, /* */ T *c, KK_INT ldc); - static void herk(const char transa, const char transb, KK_INT n, KK_INT k, - const T alpha, const T *a, KK_INT lda, const T beta, + static void herk(const char transa, const char transb, KK_INT n, KK_INT k, const T alpha, const T *a, KK_INT lda, + const T beta, /* */ T *c, KK_INT ldc); - static void trmm(const char side, const char uplo, const char transa, - const char diag, KK_INT m, KK_INT n, const T alpha, - const T *a, KK_INT lda, + static void trmm(const char side, const char uplo, const char transa, const char diag, KK_INT m, KK_INT n, + const T alpha, const T *a, KK_INT lda, /* */ T *b, KK_INT ldb); - static void trsm(const char side, const char uplo, const char transa, - const char diag, KK_INT m, KK_INT n, const T alpha, - const T *a, KK_INT lda, + static void trsm(const char side, const char uplo, const char transa, const char diag, KK_INT m, KK_INT n, + const T alpha, const T *a, KK_INT lda, /* */ T *b, KK_INT ldb); }; } // namespace Impl diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas_Rocm_tpl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas_Rocm_tpl.hpp index 6f89d349c959..b5a7dabf6f2d 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas_Rocm_tpl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas_Rocm_tpl.hpp @@ -25,8 +25,7 @@ namespace Impl { RocBlasSingleton::RocBlasSingleton() { KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_create_handle(&handle)); - Kokkos::push_finalize_hook( - [&]() { KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_destroy_handle(handle)); }); + Kokkos::push_finalize_hook([&]() { KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_destroy_handle(handle)); }); } RocBlasSingleton& RocBlasSingleton::singleton() { diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas_tpl_spec.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas_tpl_spec.hpp index 0151c0534fe9..7f40edf43592 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas_tpl_spec.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas_tpl_spec.hpp @@ -32,8 +32,7 @@ struct CudaBlasSingleton { static CudaBlasSingleton& singleton(); }; -inline void cublas_internal_error_throw(cublasStatus_t cublasState, - const char* name, const char* file, +inline void cublas_internal_error_throw(cublasStatus_t cublasState, const char* name, const char* file, const int line) { std::ostringstream out; // out << name << " error( " << cublasGetStatusName(cublasState) @@ -43,9 +42,7 @@ inline void cublas_internal_error_throw(cublasStatus_t cublasState, case CUBLAS_STATUS_NOT_INITIALIZED: out << "CUBLAS_STATUS_NOT_INITIALIZED): the library was not initialized."; break; - case CUBLAS_STATUS_ALLOC_FAILED: - out << "CUBLAS_STATUS_ALLOC_FAILED): the resource allocation failed."; - break; + case CUBLAS_STATUS_ALLOC_FAILED: out << "CUBLAS_STATUS_ALLOC_FAILED): the resource allocation failed."; break; case CUBLAS_STATUS_INVALID_VALUE: out << "CUBLAS_STATUS_INVALID_VALUE): an invalid numerical value was " "used as an argument."; @@ -62,9 +59,7 @@ inline void cublas_internal_error_throw(cublasStatus_t cublasState, out << "CUBLAS_STATUS_EXECUTION_FAILED): the GPU program failed to " "execute."; break; - case CUBLAS_STATUS_INTERNAL_ERROR: - out << "CUBLAS_STATUS_INTERNAL_ERROR): an internal operation failed."; - break; + case CUBLAS_STATUS_INTERNAL_ERROR: out << "CUBLAS_STATUS_INTERNAL_ERROR): an internal operation failed."; break; case CUBLAS_STATUS_NOT_SUPPORTED: out << "CUBLAS_STATUS_NOT_SUPPORTED): the feature required is not " "supported."; @@ -77,10 +72,8 @@ inline void cublas_internal_error_throw(cublasStatus_t cublasState, throw std::runtime_error(out.str()); } -inline void cublas_internal_safe_call(cublasStatus_t cublasState, - const char* name, - const char* file = nullptr, - const int line = 0) { +inline void cublas_internal_safe_call(cublasStatus_t cublasState, const char* name, const char* file = nullptr, + const int line = 0) { if (CUBLAS_STATUS_SUCCESS != cublasState) { cublas_internal_error_throw(cublasState, name, file, line); } @@ -89,8 +82,7 @@ inline void cublas_internal_safe_call(cublasStatus_t cublasState, // The macro below defines the interface for the safe cublas calls. // The functions themselves are protected by impl namespace and this // is not meant to be used by external application or libraries. -#define KOKKOS_CUBLAS_SAFE_CALL_IMPL(call) \ - KokkosBlas::Impl::cublas_internal_safe_call(call, #call, __FILE__, __LINE__) +#define KOKKOS_CUBLAS_SAFE_CALL_IMPL(call) KokkosBlas::Impl::cublas_internal_safe_call(call, #call, __FILE__, __LINE__) /// \brief This function converts KK transpose mode to cuBLAS transpose mode inline cublasOperation_t trans_mode_kk_to_cublas(const char kkMode[]) { @@ -122,8 +114,7 @@ struct RocBlasSingleton { static RocBlasSingleton& singleton(); }; -inline void rocblas_internal_error_throw(rocblas_status rocblasState, - const char* name, const char* file, +inline void rocblas_internal_error_throw(rocblas_status rocblasState, const char* name, const char* file, const int line) { std::ostringstream out; out << name << " error( "; @@ -132,29 +123,19 @@ inline void rocblas_internal_error_throw(rocblas_status rocblasState, out << "rocblas_status_invalid_handle): handle not initialized, invalid " "or null."; break; - case rocblas_status_not_implemented: - out << "rocblas_status_not_implemented): function is not implemented."; - break; - case rocblas_status_invalid_pointer: - out << "rocblas_status_invalid_pointer): invalid pointer argument."; - break; - case rocblas_status_invalid_size: - out << "rocblas_status_invalid_size): invalid size argument."; - break; + case rocblas_status_not_implemented: out << "rocblas_status_not_implemented): function is not implemented."; break; + case rocblas_status_invalid_pointer: out << "rocblas_status_invalid_pointer): invalid pointer argument."; break; + case rocblas_status_invalid_size: out << "rocblas_status_invalid_size): invalid size argument."; break; case rocblas_status_memory_error: out << "rocblas_status_memory_error): failed internal memory allocation, " "copy or dealloc."; break; - case rocblas_status_internal_error: - out << "rocblas_status_internal_error): other internal library failure."; - break; + case rocblas_status_internal_error: out << "rocblas_status_internal_error): other internal library failure."; break; case rocblas_status_perf_degraded: out << "rocblas_status_perf_degraded): performance degraded due to low " "device memory."; break; - case rocblas_status_size_query_mismatch: - out << "unmatched start/stop size query): ."; - break; + case rocblas_status_size_query_mismatch: out << "unmatched start/stop size query): ."; break; case rocblas_status_size_increased: out << "rocblas_status_size_increased): queried device memory size " "increased."; @@ -163,9 +144,7 @@ inline void rocblas_internal_error_throw(rocblas_status rocblasState, out << "rocblas_status_size_unchanged): queried device memory size " "unchanged."; break; - case rocblas_status_invalid_value: - out << "rocblas_status_invalid_value): passed argument not valid."; - break; + case rocblas_status_invalid_value: out << "rocblas_status_invalid_value): passed argument not valid."; break; case rocblas_status_continue: out << "rocblas_status_continue): nothing preventing function to " "proceed."; @@ -182,10 +161,8 @@ inline void rocblas_internal_error_throw(rocblas_status rocblasState, throw std::runtime_error(out.str()); } -inline void rocblas_internal_safe_call(rocblas_status rocblasState, - const char* name, - const char* file = nullptr, - const int line = 0) { +inline void rocblas_internal_safe_call(rocblas_status rocblasState, const char* name, const char* file = nullptr, + const int line = 0) { if (rocblas_status_success != rocblasState) { rocblas_internal_error_throw(rocblasState, name, file, line); } diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_abs.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_abs.hpp index 5bf3f55388ea..eb2d290a6f81 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_abs.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_abs.hpp @@ -32,8 +32,7 @@ void impl_test_abs(int N) { view_stride_adapter y("Y", N); view_stride_adapter org_y("Org_Y", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -55,8 +54,7 @@ void impl_test_abs(int N) { // Copy result to host (h_y is subview of h_b_y) Kokkos::deep_copy(y.h_base, y.d_base); for (int i = 0; i < N; i++) { - EXPECT_NEAR_KK(y.h_view(i), AT::abs(x.h_view(i)), - eps * AT::abs(x.h_view(i))); + EXPECT_NEAR_KK(y.h_view(i), AT::abs(x.h_view(i)), eps * AT::abs(x.h_view(i))); } // Run with const input // Reset output @@ -64,8 +62,7 @@ void impl_test_abs(int N) { KokkosBlas::abs(y.d_view, x.d_view_const); Kokkos::deep_copy(y.h_base, y.d_base); for (int i = 0; i < N; i++) { - EXPECT_NEAR_KK(y.h_view(i), AT::abs(x.h_view(i)), - eps * AT::abs(x.h_view(i))); + EXPECT_NEAR_KK(y.h_view(i), AT::abs(x.h_view(i)), eps * AT::abs(x.h_view(i))); } } @@ -79,8 +76,7 @@ void impl_test_abs_mv(int N, int K) { view_stride_adapter y("Y", N, K); view_stride_adapter org_y("Org_Y", N, K); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -104,8 +100,7 @@ void impl_test_abs_mv(int N, int K) { Kokkos::deep_copy(y.h_base, y.d_base); for (int i = 0; i < N; i++) { for (int j = 0; j < K; j++) { - EXPECT_NEAR_KK(y.h_view(i, j), AT::abs(x.h_view(i, j)), - eps * AT::abs(x.h_view(i, j))); + EXPECT_NEAR_KK(y.h_view(i, j), AT::abs(x.h_view(i, j)), eps * AT::abs(x.h_view(i, j))); } } // Test and verify const input @@ -115,8 +110,7 @@ void impl_test_abs_mv(int N, int K) { Kokkos::deep_copy(y.h_base, y.d_base); for (int i = 0; i < N; i++) { for (int j = 0; j < K; j++) { - EXPECT_NEAR_KK(y.h_view(i, j), AT::abs(x.h_view(i, j)), - eps * AT::abs(x.h_view(i, j))); + EXPECT_NEAR_KK(y.h_view(i, j), AT::abs(x.h_view(i, j)), eps * AT::abs(x.h_view(i, j))); } } } @@ -125,8 +119,7 @@ void impl_test_abs_mv(int N, int K) { template int test_abs() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_abs(0); @@ -136,8 +129,7 @@ int test_abs() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_abs(0); @@ -146,8 +138,7 @@ int test_abs() { // Test::impl_test_abs(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_abs(0); @@ -156,8 +147,7 @@ int test_abs() { // Test::impl_test_abs(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_abs(1024); Test::impl_test_abs(1024); #endif @@ -168,8 +158,7 @@ int test_abs() { template int test_abs_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_abs_mv(0, 5); @@ -179,8 +168,7 @@ int test_abs_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_abs_mv(0, 5); @@ -189,8 +177,7 @@ int test_abs_mv() { // Test::impl_test_abs_mv(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_abs_mv(0, 5); @@ -199,8 +186,7 @@ int test_abs_mv() { // Test::impl_test_abs_mv(132231,5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_abs_mv(1024, 5); Test::impl_test_abs_mv(1024, 5); #endif @@ -209,8 +195,7 @@ int test_abs_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, abs_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::abs_float"); test_abs(); @@ -224,8 +209,7 @@ TEST_F(TestCategory, abs_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, abs_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::abs_double"); test_abs(); @@ -239,8 +223,7 @@ TEST_F(TestCategory, abs_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, abs_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::abs_double"); test_abs, Kokkos::complex, TestDevice>(); @@ -253,9 +236,8 @@ TEST_F(TestCategory, abs_mv_complex_double) { } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, abs_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::abs_int"); test_abs(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_asum.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_asum.hpp index 65b5b2c06339..07cf2e699897 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_asum.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_asum.hpp @@ -28,8 +28,7 @@ void impl_test_asum(int N) { view_stride_adapter a("A", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); @@ -46,8 +45,7 @@ void impl_test_asum(int N) { // parts. // // This is safe; ArithTraits::imag is 0 if T is real. - expected_result += - MAT::abs(AT::real(a.h_view(i))) + MAT::abs(AT::imag(a.h_view(i))); + expected_result += MAT::abs(AT::real(a.h_view(i))) + MAT::abs(AT::imag(a.h_view(i))); } typename AT::mag_type nonconst_result = KokkosBlas::asum(a.d_view); @@ -62,8 +60,7 @@ void impl_test_asum(int N) { template int test_asum() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_asum(0); Test::impl_test_asum(13); @@ -72,8 +69,7 @@ int test_asum() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_asum(0); Test::impl_test_asum(13); @@ -81,8 +77,7 @@ int test_asum() { // Test::impl_test_asum(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_asum(0); Test::impl_test_asum(13); @@ -94,8 +89,7 @@ int test_asum() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, asum_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::asum_float"); test_asum(); @@ -104,8 +98,7 @@ TEST_F(TestCategory, asum_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, asum_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::asum_double"); test_asum(); @@ -114,8 +107,7 @@ TEST_F(TestCategory, asum_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, asum_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::asum_complex_double"); test_asum, TestDevice>(); @@ -123,9 +115,8 @@ TEST_F(TestCategory, asum_complex_double) { } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, asum_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::asum_int"); test_asum(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_axpby.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_axpby.hpp index 299e18e493b0..16d6bdc78f5f 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_axpby.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_axpby.hpp @@ -34,16 +34,14 @@ void impl_test_axpby(int N) { const MagnitudeB eps = Kokkos::ArithTraits::epsilon(); const MagnitudeB max_val = 10; const MagnitudeB max_error = - (static_cast(Kokkos::ArithTraits::abs(a)) + - Kokkos::ArithTraits::abs(b)) * - max_val * eps; + (static_cast(Kokkos::ArithTraits::abs(a)) + Kokkos::ArithTraits::abs(b)) * max_val * + eps; view_stride_adapter x("X", N); view_stride_adapter y("Y", N); view_stride_adapter org_y("Org_Y", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -58,8 +56,7 @@ void impl_test_axpby(int N) { KokkosBlas::axpby(a, x.d_view, b, y.d_view); Kokkos::deep_copy(y.h_base, y.d_base); for (int i = 0; i < N; i++) { - EXPECT_NEAR_KK(static_cast(a * x.h_view(i) + b * org_y.h_view(i)), - y.h_view(i), 2 * max_error); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i) + b * org_y.h_view(i)), y.h_view(i), 2 * max_error); } // Re-randomize y @@ -68,8 +65,7 @@ void impl_test_axpby(int N) { KokkosBlas::axpby(a, x.d_view_const, b, y.d_view); Kokkos::deep_copy(y.h_base, y.d_base); for (int i = 0; i < N; i++) { - EXPECT_NEAR_KK(static_cast(a * x.h_view(i) + b * org_y.h_view(i)), - y.h_view(i), 2 * max_error); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i) + b * org_y.h_view(i)), y.h_view(i), 2 * max_error); } } @@ -88,12 +84,10 @@ void impl_test_axpby_mv(int N, int K) { const MagnitudeB eps = Kokkos::ArithTraits::epsilon(); const MagnitudeB max_val = 10; const MagnitudeB max_error = - (static_cast(Kokkos::ArithTraits::abs(a)) + - Kokkos::ArithTraits::abs(b)) * - max_val * eps; + (static_cast(Kokkos::ArithTraits::abs(a)) + Kokkos::ArithTraits::abs(b)) * max_val * + eps; - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -114,9 +108,7 @@ void impl_test_axpby_mv(int N, int K) { for (int i = 0; i < N; i++) { for (int j = 0; j < K; j++) { - EXPECT_NEAR_KK( - static_cast(a * x.h_view(i, j) + b * org_y.h_view(i, j)), - y.h_view(i, j), 2 * max_error); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i, j) + b * org_y.h_view(i, j)), y.h_view(i, j), 2 * max_error); } } @@ -126,9 +118,7 @@ void impl_test_axpby_mv(int N, int K) { for (int i = 0; i < N; i++) { for (int j = 0; j < K; j++) { - EXPECT_NEAR_KK( - static_cast(a * x.h_view(i, j) + b * org_y.h_view(i, j)), - y.h_view(i, j), 2 * max_error); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i, j) + b * org_y.h_view(i, j)), y.h_view(i, j), 2 * max_error); } } } @@ -137,8 +127,7 @@ void impl_test_axpby_mv(int N, int K) { template int test_axpby() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_axpby(0); @@ -148,8 +137,7 @@ int test_axpby() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_axpby(0); @@ -158,8 +146,7 @@ int test_axpby() { Test::impl_test_axpby(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_axpby(0); @@ -168,8 +155,7 @@ int test_axpby() { Test::impl_test_axpby(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_axpby(1024); Test::impl_test_axpby(1024); #endif @@ -180,8 +166,7 @@ int test_axpby() { template int test_axpby_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_axpby_mv(0, 5); @@ -191,8 +176,7 @@ int test_axpby_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_axpby_mv(0, 5); @@ -201,8 +185,7 @@ int test_axpby_mv() { Test::impl_test_axpby_mv(132231, 5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_axpby_mv(0, 5); @@ -211,8 +194,7 @@ int test_axpby_mv() { Test::impl_test_axpby_mv(132231, 5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_axpby_mv(1024, 5); Test::impl_test_axpby_mv(1024, 5); #endif @@ -221,8 +203,7 @@ int test_axpby_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, axpby_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpby_float"); test_axpby(); @@ -236,8 +217,7 @@ TEST_F(TestCategory, axpby_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, axpby_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpby_double"); test_axpby(); @@ -250,8 +230,7 @@ TEST_F(TestCategory, axpby_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, axpby_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpby_complex_double"); test_axpby, Kokkos::complex, TestDevice>(); @@ -264,9 +243,8 @@ TEST_F(TestCategory, axpby_mv_complex_double) { } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, axpby_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpby_int"); test_axpby(); @@ -279,8 +257,7 @@ TEST_F(TestCategory, axpby_mv_int) { } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) TEST_F(TestCategory, axpby_double_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpby_double_int"); test_axpby(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_axpby_unification.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_axpby_unification.hpp index 6ce7bad0b148..4f9b394c258f 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_axpby_unification.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_axpby_unification.hpp @@ -76,22 +76,16 @@ constexpr bool isRank0() { return false; } -template -void impl_test_axpby_unification_compare( - tA const& a, tX const& x, tB const& b, tY const& y, int N, - bool testWithNanY, - typename Kokkos::ArithTraits::mag_type const max_val, - typename Kokkos::ArithTraits::mag_type const max_error, - tScalarA const inputValueA = Kokkos::ArithTraits::zero(), - tScalarB const inputValueB = Kokkos::ArithTraits::zero()) { - using ScalarTypeX = - typename std::remove_const::type; - using ScalarTypeY = - typename std::remove_const::type; - - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); +template +void impl_test_axpby_unification_compare(tA const& a, tX const& x, tB const& b, tY const& y, int N, bool testWithNanY, + typename Kokkos::ArithTraits::mag_type const max_val, + typename Kokkos::ArithTraits::mag_type const max_error, + tScalarA const inputValueA = Kokkos::ArithTraits::zero(), + tScalarB const inputValueB = Kokkos::ArithTraits::zero()) { + using ScalarTypeX = typename std::remove_const::type; + using ScalarTypeY = typename std::remove_const::type; + + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarTypeX randStart, randEnd; @@ -121,8 +115,7 @@ void impl_test_axpby_unification_compare( valueB = b; KokkosBlas::axpby(a, x.d_view, b, y.d_view); } else if constexpr (isRank0()) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { valueB = inputValueB; } else { typename tB::HostMirror h_b("h_B"); @@ -136,8 +129,7 @@ void impl_test_axpby_unification_compare( KokkosBlas::axpby(a, x.d_view, b.d_view, y.d_view); } } else if constexpr (isRank0()) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { valueA = inputValueA; } else { typename tA::HostMirror h_a("h_A"); @@ -148,8 +140,7 @@ void impl_test_axpby_unification_compare( valueB = b; KokkosBlas::axpby(a, x.d_view, b, y.d_view); } else if constexpr (isRank0()) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { valueB = inputValueB; } else { typename tB::HostMirror h_b("h_B"); @@ -169,8 +160,7 @@ void impl_test_axpby_unification_compare( valueB = b; KokkosBlas::axpby(a.d_view, x.d_view, b, y.d_view); } else if constexpr (isRank0()) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { valueB = inputValueB; } else { typename tB::HostMirror h_b("h_B"); @@ -189,9 +179,8 @@ void impl_test_axpby_unification_compare( if (testWithNanY == false) { for (int i(0); i < N; ++i) { - EXPECT_NEAR_KK(static_cast(valueA * x.h_view(i) + - valueB * org_y.h_view(i)), - y.h_view(i), 4. * max_error); + EXPECT_NEAR_KK(static_cast(valueA * x.h_view(i) + valueB * org_y.h_view(i)), y.h_view(i), + 4. * max_error); } } else { // ******************************************************** @@ -220,28 +209,22 @@ void impl_test_axpby_unification_compare( } else { EXPECT_NE(y.h_view(i), Kokkos::ArithTraits::nan()); } - EXPECT_NEAR_KK(static_cast(valueA * x.h_view(i)), - y.h_view(i), 4. * max_error); + EXPECT_NEAR_KK(static_cast(valueA * x.h_view(i)), y.h_view(i), 4. * max_error); } } } -template -void impl_test_axpby_mv_unification_compare( - tA const& a, tX const& x, tB const& b, tY const& y, int N, int K, - bool testWithNanY, - typename Kokkos::ArithTraits::mag_type const max_val, - typename Kokkos::ArithTraits::mag_type const max_error, - tScalarA const inputValueA = Kokkos::ArithTraits::zero(), - tScalarB const inputValueB = Kokkos::ArithTraits::zero()) { - using ScalarTypeX = - typename std::remove_const::type; - using ScalarTypeY = - typename std::remove_const::type; - - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); +template +void impl_test_axpby_mv_unification_compare(tA const& a, tX const& x, tB const& b, tY const& y, int N, int K, + bool testWithNanY, + typename Kokkos::ArithTraits::mag_type const max_val, + typename Kokkos::ArithTraits::mag_type const max_error, + tScalarA const inputValueA = Kokkos::ArithTraits::zero(), + tScalarB const inputValueB = Kokkos::ArithTraits::zero()) { + using ScalarTypeX = typename std::remove_const::type; + using ScalarTypeY = typename std::remove_const::type; + + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarTypeX randStart, randEnd; @@ -284,8 +267,7 @@ void impl_test_axpby_mv_unification_compare( valueB = b; KokkosBlas::axpby(a, x.d_view, b, y.d_view); } else if constexpr (isRank0()) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { valueB = inputValueB; } else { typename tB::HostMirror h_b("h_B"); @@ -298,8 +280,7 @@ void impl_test_axpby_mv_unification_compare( KokkosBlas::axpby(a, x.d_view, b.d_view, y.d_view); } } else if constexpr (isRank0()) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { valueA = inputValueA; } else { typename tA::HostMirror h_a("h_A"); @@ -310,8 +291,7 @@ void impl_test_axpby_mv_unification_compare( valueB = b; KokkosBlas::axpby(a, x.d_view, b, y.d_view); } else if constexpr (isRank0()) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { valueB = inputValueB; } else { typename tB::HostMirror h_b("h_B"); @@ -329,8 +309,7 @@ void impl_test_axpby_mv_unification_compare( valueB = b; KokkosBlas::axpby(a.d_view, x.d_view, b, y.d_view); } else if constexpr (isRank0()) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { valueB = inputValueB; } else { typename tB::HostMirror h_b("h_B"); @@ -371,22 +350,18 @@ void impl_test_axpby_mv_unification_compare( << std::endl; #endif vanillaValue = - static_cast(a.h_view(a_k) * x.h_view(i, k) + - b.h_view(b_k) * org_y.h_view(i, k)); + static_cast(a.h_view(a_k) * x.h_view(i, k) + b.h_view(b_k) * org_y.h_view(i, k)); } else { int a_k(a.h_view.extent(0) == 1 ? 0 : k); - vanillaValue = static_cast( - a.h_view(a_k) * x.h_view(i, k) + valueB * org_y.h_view(i, k)); + vanillaValue = static_cast(a.h_view(a_k) * x.h_view(i, k) + valueB * org_y.h_view(i, k)); } } else { if constexpr (bIsRank1) { (void)valueB; // Avoid "set but not used" error int b_k(b.h_view.extent(0) == 1 ? 0 : k); - vanillaValue = static_cast( - valueA * x.h_view(i, k) + b.h_view(b_k) * org_y.h_view(i, k)); + vanillaValue = static_cast(valueA * x.h_view(i, k) + b.h_view(b_k) * org_y.h_view(i, k)); } else { - vanillaValue = static_cast( - valueA * x.h_view(i, k) + valueB * org_y.h_view(i, k)); + vanillaValue = static_cast(valueA * x.h_view(i, k) + valueB * org_y.h_view(i, k)); } } #if 0 @@ -411,8 +386,7 @@ void impl_test_axpby_mv_unification_compare( if constexpr (aIsRank1) { (void)valueA; // Avoid "set but not used" error int a_k(a.h_view.extent(0) == 1 ? 0 : k); - vanillaValue = - static_cast(a.h_view(a_k) * x.h_view(i, k)); + vanillaValue = static_cast(a.h_view(a_k) * x.h_view(i, k)); #if 0 ScalarTypeY tmp = static_cast(a.h_view(a_k) * x.h_view(i, k) + valueB * org_y.h_view(i, k)); std::cout << "i = " << i @@ -468,9 +442,8 @@ void impl_test_axpby_mv_unification_compare( } } -template +template void impl_test_axpby_unification(int const N) { using ViewTypeAr0 = Kokkos::View; using ViewTypeAr1s_1 = Kokkos::View; @@ -484,10 +457,8 @@ void impl_test_axpby_unification(int const N) { using ViewTypeY = Kokkos::View; - std::array const valuesA{ - -1, Kokkos::ArithTraits::zero(), 1, 3}; - std::array const valuesB{ - -1, Kokkos::ArithTraits::zero(), 1, 5}; + std::array const valuesA{-1, Kokkos::ArithTraits::zero(), 1, 3}; + std::array const valuesB{-1, Kokkos::ArithTraits::zero(), 1, 5}; // eps should probably be based on tScalarB since that is the type // in which the result is computed. @@ -495,9 +466,8 @@ void impl_test_axpby_unification(int const N) { MagnitudeB const eps = Kokkos::ArithTraits::epsilon(); MagnitudeB const max_val = 10; MagnitudeB const max_error = - static_cast( - Kokkos::ArithTraits::abs(valuesA[valuesA.size() - 1]) + - Kokkos::ArithTraits::abs(valuesB[valuesB.size() - 1])) * + static_cast(Kokkos::ArithTraits::abs(valuesA[valuesA.size() - 1]) + + Kokkos::ArithTraits::abs(valuesB[valuesB.size() - 1])) * max_val * eps; // ************************************************************ @@ -518,15 +488,13 @@ void impl_test_axpby_unification(int const N) { a = valueA; b = valueB; - impl_test_axpby_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - tScalarB, view_stride_adapter, Device>( - a, x, b, y, N, false, max_val, max_error); + impl_test_axpby_unification_compare, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - tScalarB, view_stride_adapter, Device>( - a, x, b, y, N, true, max_val, max_error); + impl_test_axpby_unification_compare, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, true, max_val, + max_error); } } } @@ -556,14 +524,12 @@ void impl_test_axpby_unification(int const N) { a = valueA; Kokkos::deep_copy(b, valueB); - impl_test_axpby_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - ViewTypeBr0, view_stride_adapter, Device>( - a, x, b, y, N, false, max_val, max_error); + impl_test_axpby_unification_compare, tScalarB, ViewTypeBr0, + view_stride_adapter, Device>(a, x, b, y, N, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - ViewTypeBr0, view_stride_adapter, Device>( + impl_test_axpby_unification_compare, tScalarB, + ViewTypeBr0, view_stride_adapter, Device>( a, x, b, y, N, true, max_val, max_error); } } @@ -589,16 +555,13 @@ void impl_test_axpby_unification(int const N) { a = valueA; Kokkos::deep_copy(b.d_base, valueB); - impl_test_axpby_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, false, max_val, max_error); + impl_test_axpby_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, false, max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, true, - max_val, max_error); + impl_test_axpby_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, true, max_val, max_error); } } } @@ -622,15 +585,13 @@ void impl_test_axpby_unification(int const N) { a = valueA; Kokkos::deep_copy(b.d_base, valueB); - impl_test_axpby_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, false, max_val, max_error); + impl_test_axpby_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, Device>( + a, x, b, y, N, false, max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, true, max_val, max_error); + impl_test_axpby_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, true, max_val, max_error); } } } @@ -657,15 +618,13 @@ void impl_test_axpby_unification(int const N) { Kokkos::deep_copy(a, valueA); b = valueB; - impl_test_axpby_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - tScalarB, view_stride_adapter, Device>( - a, x, b, y, N, false, max_val, max_error); + impl_test_axpby_unification_compare, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - tScalarB, view_stride_adapter, Device>( - a, x, b, y, N, true, max_val, max_error); + impl_test_axpby_unification_compare, tScalarB, + tScalarB, view_stride_adapter, Device>(a, x, b, y, N, true, + max_val, max_error); } } } @@ -678,8 +637,7 @@ void impl_test_axpby_unification(int const N) { #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Starting case 06/16" << std::endl; #endif - if constexpr ((std::is_same_v) || - (std::is_same_v)) { + if constexpr ((std::is_same_v) || (std::is_same_v)) { // Avoid the test, due to compilation errors } else { for (size_t i(0); i < valuesA.size(); ++i) { @@ -694,14 +652,12 @@ void impl_test_axpby_unification(int const N) { Kokkos::deep_copy(a, valueA); Kokkos::deep_copy(b, valueB); - impl_test_axpby_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - ViewTypeBr0, view_stride_adapter, Device>( - a, x, b, y, N, false, max_val, max_error); + impl_test_axpby_unification_compare, tScalarB, + ViewTypeBr0, view_stride_adapter, Device>(a, x, b, y, N, false, + max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - ViewTypeBr0, view_stride_adapter, Device>( + impl_test_axpby_unification_compare, tScalarB, + ViewTypeBr0, view_stride_adapter, Device>( a, x, b, y, N, true, max_val, max_error); } } @@ -730,17 +686,13 @@ void impl_test_axpby_unification(int const N) { Kokkos::deep_copy(a, valueA); Kokkos::deep_copy(b.d_base, valueB); - impl_test_axpby_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, false, - max_val, max_error); + impl_test_axpby_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, false, max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, true, - max_val, max_error); + impl_test_axpby_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, true, max_val, max_error); } } } @@ -768,16 +720,13 @@ void impl_test_axpby_unification(int const N) { Kokkos::deep_copy(a, valueA); Kokkos::deep_copy(b.d_base, valueB); - impl_test_axpby_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, false, max_val, max_error); + impl_test_axpby_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, false, max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, true, - max_val, max_error); + impl_test_axpby_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, true, max_val, max_error); } } } @@ -802,17 +751,15 @@ void impl_test_axpby_unification(int const N) { Kokkos::deep_copy(a.d_base, valueA); b = valueB; - impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, tScalarB, - view_stride_adapter, Device>(a, x, b, y, N, false, - max_val, max_error); + impl_test_axpby_unification_compare, + view_stride_adapter, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, tScalarB, - view_stride_adapter, Device>(a, x, b, y, N, true, - max_val, max_error); + impl_test_axpby_unification_compare, + view_stride_adapter, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, true, max_val, + max_error); } } } @@ -839,17 +786,15 @@ void impl_test_axpby_unification(int const N) { Kokkos::deep_copy(a.d_base, valueA); Kokkos::deep_copy(b, valueB); - impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, ViewTypeBr0, - view_stride_adapter, Device>(a, x, b, y, N, false, - max_val, max_error); + impl_test_axpby_unification_compare, + view_stride_adapter, tScalarB, ViewTypeBr0, + view_stride_adapter, Device>(a, x, b, y, N, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, ViewTypeBr0, - view_stride_adapter, Device>(a, x, b, y, N, true, - max_val, max_error); + impl_test_axpby_unification_compare, + view_stride_adapter, tScalarB, ViewTypeBr0, + view_stride_adapter, Device>(a, x, b, y, N, true, max_val, + max_error); } } } @@ -875,17 +820,14 @@ void impl_test_axpby_unification(int const N) { Kokkos::deep_copy(a.d_base, valueA); Kokkos::deep_copy(b.d_base, valueB); impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, true, - max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, true, max_val, + max_error); } } } @@ -909,17 +851,15 @@ void impl_test_axpby_unification(int const N) { Kokkos::deep_copy(a.d_base, valueA); Kokkos::deep_copy(b.d_base, valueB); - impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, false, max_val, max_error); + impl_test_axpby_unification_compare, + view_stride_adapter, tScalarB, view_stride_adapter, + view_stride_adapter, Device>(a, x, b, y, N, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, true, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, true, max_val, + max_error); } } } @@ -943,17 +883,14 @@ void impl_test_axpby_unification(int const N) { Kokkos::deep_copy(a.d_base, valueA); b = valueB; - impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, tScalarB, - view_stride_adapter, Device>(a, x, b, y, N, false, - max_val, max_error); + impl_test_axpby_unification_compare, view_stride_adapter, + tScalarB, tScalarB, view_stride_adapter, Device>( + a, x, b, y, N, false, max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, tScalarB, - view_stride_adapter, Device>(a, x, b, y, N, true, - max_val, max_error); + impl_test_axpby_unification_compare, + view_stride_adapter, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, true, max_val, + max_error); } } } @@ -980,17 +917,15 @@ void impl_test_axpby_unification(int const N) { Kokkos::deep_copy(a.d_base, valueA); Kokkos::deep_copy(b, valueB); - impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, ViewTypeBr0, - view_stride_adapter, Device>(a, x, b, y, N, false, - max_val, max_error); + impl_test_axpby_unification_compare, + view_stride_adapter, tScalarB, ViewTypeBr0, + view_stride_adapter, Device>(a, x, b, y, N, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, ViewTypeBr0, - view_stride_adapter, Device>(a, x, b, y, N, true, - max_val, max_error); + impl_test_axpby_unification_compare, + view_stride_adapter, tScalarB, ViewTypeBr0, + view_stride_adapter, Device>(a, x, b, y, N, true, max_val, + max_error); } } } @@ -1015,18 +950,15 @@ void impl_test_axpby_unification(int const N) { Kokkos::deep_copy(a.d_base, valueA); Kokkos::deep_copy(b.d_base, valueB); - impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, false, max_val, max_error); + impl_test_axpby_unification_compare, view_stride_adapter, + tScalarB, view_stride_adapter, + view_stride_adapter, Device>(a, x, b, y, N, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, true, - max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, true, max_val, + max_error); } } } @@ -1050,26 +982,22 @@ void impl_test_axpby_unification(int const N) { Kokkos::deep_copy(a.d_base, valueA); Kokkos::deep_copy(b.d_base, valueB); - impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, false, max_val, max_error); + impl_test_axpby_unification_compare, view_stride_adapter, + tScalarB, view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, false, max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { impl_test_axpby_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, true, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, true, max_val, + max_error); } } } } } -template +template void impl_test_axpby_mv_unification(int const N, int const K) { // std::cout << "=========================================" << std::endl; // std::cout << "Entering impl_test_axpby_mv_unification()" @@ -1094,10 +1022,8 @@ void impl_test_axpby_mv_unification(int const N, int const K) { using ViewTypeY = Kokkos::View; - std::array const valuesA{ - -1, Kokkos::ArithTraits::zero(), 1, 3}; - std::array const valuesB{ - -1, Kokkos::ArithTraits::zero(), 1, 5}; + std::array const valuesA{-1, Kokkos::ArithTraits::zero(), 1, 3}; + std::array const valuesB{-1, Kokkos::ArithTraits::zero(), 1, 5}; // eps should probably be based on tScalarB since that is the type // in which the result is computed. @@ -1105,9 +1031,8 @@ void impl_test_axpby_mv_unification(int const N, int const K) { MagnitudeB const eps = Kokkos::ArithTraits::epsilon(); MagnitudeB const max_val = 10; MagnitudeB const max_error = - static_cast( - Kokkos::ArithTraits::abs(valuesA[valuesA.size() - 1]) + - Kokkos::ArithTraits::abs(valuesB[valuesB.size() - 1])) * + static_cast(Kokkos::ArithTraits::abs(valuesA[valuesA.size() - 1]) + + Kokkos::ArithTraits::abs(valuesB[valuesB.size() - 1])) * max_val * eps; // ************************************************************ @@ -1128,15 +1053,13 @@ void impl_test_axpby_mv_unification(int const N, int const K) { a = valueA; b = valueB; - impl_test_axpby_mv_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - tScalarB, view_stride_adapter, Device>( - a, x, b, y, N, K, false, max_val, max_error); + impl_test_axpby_mv_unification_compare, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, K, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - tScalarB, view_stride_adapter, Device>( - a, x, b, y, N, K, true, max_val, max_error); + impl_test_axpby_mv_unification_compare, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -1163,14 +1086,12 @@ void impl_test_axpby_mv_unification(int const N, int const K) { a = valueA; Kokkos::deep_copy(b, valueB); - impl_test_axpby_mv_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - ViewTypeBr0, view_stride_adapter, Device>( + impl_test_axpby_mv_unification_compare, tScalarB, + ViewTypeBr0, view_stride_adapter, Device>( a, x, b, y, N, K, false, max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - ViewTypeBr0, view_stride_adapter, Device>( + impl_test_axpby_mv_unification_compare, tScalarB, + ViewTypeBr0, view_stride_adapter, Device>( a, x, b, y, N, K, true, max_val, max_error); } } @@ -1196,16 +1117,13 @@ void impl_test_axpby_mv_unification(int const N, int const K) { a = valueA; Kokkos::deep_copy(b.d_base, valueB); - impl_test_axpby_mv_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + impl_test_axpby_mv_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, K, false, max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + impl_test_axpby_mv_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, K, true, max_val, max_error); } } } @@ -1239,10 +1157,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { } Kokkos::deep_copy(b.d_base, b.h_base); } - impl_test_axpby_mv_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + impl_test_axpby_mv_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, K, false, max_val, max_error); } } } @@ -1265,15 +1182,13 @@ void impl_test_axpby_mv_unification(int const N, int const K) { a = valueA; Kokkos::deep_copy(b.d_base, valueB); - impl_test_axpby_mv_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + impl_test_axpby_mv_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, K, false, max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, true, max_val, max_error); + impl_test_axpby_mv_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, K, true, max_val, max_error); } } } @@ -1307,10 +1222,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { } Kokkos::deep_copy(b.d_base, b.h_base); } - impl_test_axpby_mv_unification_compare< - tScalarA, tScalarA, view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + impl_test_axpby_mv_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, K, false, max_val, max_error); } } } @@ -1336,14 +1250,12 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a, valueA); b = valueB; - impl_test_axpby_mv_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - tScalarB, view_stride_adapter, Device>( + impl_test_axpby_mv_unification_compare, tScalarB, + tScalarB, view_stride_adapter, Device>( a, x, b, y, N, K, false, max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - tScalarB, view_stride_adapter, Device>( + impl_test_axpby_mv_unification_compare, tScalarB, + tScalarB, view_stride_adapter, Device>( a, x, b, y, N, K, true, max_val, max_error); } } @@ -1357,8 +1269,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Starting case 08/36" << std::endl; #endif - if constexpr ((std::is_same_v) || - (std::is_same_v)) { + if constexpr ((std::is_same_v) || (std::is_same_v)) { // Avoid the test, due to compilation errors } else { for (size_t i(0); i < valuesA.size(); ++i) { @@ -1373,14 +1284,12 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a, valueA); Kokkos::deep_copy(b, valueB); - impl_test_axpby_mv_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - ViewTypeBr0, view_stride_adapter, Device>( + impl_test_axpby_mv_unification_compare, tScalarB, + ViewTypeBr0, view_stride_adapter, Device>( a, x, b, y, N, K, false, max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - ViewTypeBr0, view_stride_adapter, Device>( + impl_test_axpby_mv_unification_compare, tScalarB, + ViewTypeBr0, view_stride_adapter, Device>( a, x, b, y, N, K, true, max_val, max_error); } } @@ -1409,17 +1318,13 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a, valueA); Kokkos::deep_copy(b.d_base, valueB); - impl_test_axpby_mv_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, K, false, - max_val, max_error); + impl_test_axpby_mv_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, K, false, max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + impl_test_axpby_mv_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, K, true, max_val, max_error); } } } @@ -1457,11 +1362,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { } Kokkos::deep_copy(b.d_base, b.h_base); } - impl_test_axpby_mv_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, K, false, - max_val, max_error); + impl_test_axpby_mv_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, K, false, max_val, max_error); } } } @@ -1488,16 +1391,13 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a, valueA); Kokkos::deep_copy(b.d_base, valueB); - impl_test_axpby_mv_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + impl_test_axpby_mv_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, K, false, max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + impl_test_axpby_mv_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, K, true, max_val, max_error); } } } @@ -1535,10 +1435,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { } Kokkos::deep_copy(b.d_base, b.h_base); } - impl_test_axpby_mv_unification_compare< - tScalarA, ViewTypeAr0, view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + impl_test_axpby_mv_unification_compare, tScalarB, + view_stride_adapter, view_stride_adapter, + Device>(a, x, b, y, N, K, false, max_val, max_error); } } } @@ -1562,17 +1461,15 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a.d_base, valueA); b = valueB; - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, tScalarB, - view_stride_adapter, Device>(a, x, b, y, N, K, false, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, K, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, tScalarB, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -1599,17 +1496,15 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a.d_base, valueA); Kokkos::deep_copy(b, valueB); - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, ViewTypeBr0, - view_stride_adapter, Device>(a, x, b, y, N, K, false, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, ViewTypeBr0, + view_stride_adapter, Device>(a, x, b, y, N, K, false, + max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, ViewTypeBr0, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, ViewTypeBr0, + view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -1635,17 +1530,14 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a.d_base, valueA); Kokkos::deep_copy(b.d_base, valueB); impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, + max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -1680,10 +1572,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(b.d_base, b.h_base); } impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, + max_val, max_error); } } } @@ -1707,16 +1598,14 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a.d_base, valueA); Kokkos::deep_copy(b.d_base, valueB); impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, true, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -1751,10 +1640,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(b.d_base, b.h_base); } impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, max_val, + max_error); } } } @@ -1787,17 +1675,15 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a.d_base, a.h_base); } b = valueB; - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, tScalarB, - view_stride_adapter, Device>(a, x, b, y, N, K, false, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, K, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, tScalarB, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -1834,17 +1720,15 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a.d_base, a.h_base); } Kokkos::deep_copy(b, valueB); - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, ViewTypeBr0, - view_stride_adapter, Device>(a, x, b, y, N, K, false, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, ViewTypeBr0, + view_stride_adapter, Device>(a, x, b, y, N, K, false, + max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, ViewTypeBr0, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, ViewTypeBr0, + view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -1880,17 +1764,14 @@ void impl_test_axpby_mv_unification(int const N, int const K) { } Kokkos::deep_copy(b.d_base, valueB); impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, + max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -1936,10 +1817,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(b.d_base, b.h_base); } impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, + max_val, max_error); } } } @@ -1973,16 +1853,14 @@ void impl_test_axpby_mv_unification(int const N, int const K) { } Kokkos::deep_copy(b.d_base, valueB); impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, true, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -2029,10 +1907,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { } impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, max_val, + max_error); } } } @@ -2055,17 +1932,15 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a.d_base, valueA); b = valueB; - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, tScalarB, - view_stride_adapter, Device>(a, x, b, y, N, K, false, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, K, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, tScalarB, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -2092,17 +1967,15 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a.d_base, valueA); Kokkos::deep_copy(b, valueB); - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, ViewTypeBr0, - view_stride_adapter, Device>(a, x, b, y, N, K, false, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, ViewTypeBr0, + view_stride_adapter, Device>(a, x, b, y, N, K, false, + max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, ViewTypeBr0, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, ViewTypeBr0, + view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -2128,17 +2001,14 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a.d_base, valueA); Kokkos::deep_copy(b.d_base, valueB); impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, + max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -2173,10 +2043,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(b.d_base, b.h_base); } impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, + max_val, max_error); } } } @@ -2200,16 +2069,14 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a.d_base, valueA); Kokkos::deep_copy(b.d_base, valueB); impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, true, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -2244,10 +2111,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(b.d_base, b.h_base); } impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, max_val, + max_error); } } } @@ -2280,17 +2146,15 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a.d_base, a.h_base); } b = valueB; - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, tScalarB, - view_stride_adapter, Device>(a, x, b, y, N, K, false, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, K, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, tScalarB, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, tScalarB, + view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -2327,17 +2191,15 @@ void impl_test_axpby_mv_unification(int const N, int const K) { Kokkos::deep_copy(a.d_base, a.h_base); } Kokkos::deep_copy(b, valueB); - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, ViewTypeBr0, - view_stride_adapter, Device>(a, x, b, y, N, K, false, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, ViewTypeBr0, + view_stride_adapter, Device>(a, x, b, y, N, K, false, + max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { - impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, ViewTypeBr0, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + impl_test_axpby_mv_unification_compare, + view_stride_adapter, tScalarB, ViewTypeBr0, + view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -2373,17 +2235,14 @@ void impl_test_axpby_mv_unification(int const N, int const K) { } Kokkos::deep_copy(b.d_base, valueB); impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, + max_val, max_error); if (valueB == Kokkos::ArithTraits::zero()) { impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, - view_stride_adapter, Device>(a, x, b, y, N, K, true, - max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -2430,10 +2289,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { } impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, + max_val, max_error); } } } @@ -2467,16 +2325,14 @@ void impl_test_axpby_mv_unification(int const N, int const K) { } Kokkos::deep_copy(b.d_base, valueB); impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, max_val, + max_error); if (valueB == Kokkos::ArithTraits::zero()) { impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, true, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, true, + max_val, max_error); } } } @@ -2523,10 +2379,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { } impl_test_axpby_mv_unification_compare< - tScalarA, view_stride_adapter, - view_stride_adapter, tScalarB, - view_stride_adapter, view_stride_adapter, - Device>(a, x, b, y, N, K, false, max_val, max_error); + tScalarA, view_stride_adapter, view_stride_adapter, tScalarB, + view_stride_adapter, view_stride_adapter, Device>(a, x, b, y, N, K, false, max_val, + max_error); } } } @@ -2537,130 +2392,103 @@ void impl_test_axpby_mv_unification(int const N, int const K) { } // namespace Test -template +template int test_axpby_unification() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Calling impl_test_axpby_unif(), L-LLL" << std::endl; #endif - Test::impl_test_axpby_unification< - tScalarA, Kokkos::LayoutLeft, tScalarX, Kokkos::LayoutLeft, tScalarB, - Kokkos::LayoutLeft, tScalarY, Kokkos::LayoutLeft, Device>(14); + Test::impl_test_axpby_unification(14); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Calling impl_test_axpby_unif(), L-RRR" << std::endl; #endif - Test::impl_test_axpby_unification< - tScalarA, Kokkos::LayoutRight, tScalarX, Kokkos::LayoutRight, tScalarB, - Kokkos::LayoutRight, tScalarY, Kokkos::LayoutRight, Device>(14); + Test::impl_test_axpby_unification(14); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Calling impl_test_axpby_unif(), L-SSS" << std::endl; #endif - Test::impl_test_axpby_unification< - tScalarA, Kokkos::LayoutStride, tScalarX, Kokkos::LayoutStride, tScalarB, - Kokkos::LayoutStride, tScalarY, Kokkos::LayoutStride, Device>(14); + Test::impl_test_axpby_unification(14); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Calling impl_test_axpby_unif(), L-SLL" << std::endl; #endif - Test::impl_test_axpby_unification< - tScalarA, Kokkos::LayoutStride, tScalarX, Kokkos::LayoutStride, tScalarB, - Kokkos::LayoutLeft, tScalarY, Kokkos::LayoutLeft, Device>(14); + Test::impl_test_axpby_unification(14); #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Calling impl_test_axpby_unif(), L-LSS" << std::endl; #endif - Test::impl_test_axpby_unification< - tScalarA, Kokkos::LayoutLeft, tScalarX, Kokkos::LayoutLeft, tScalarB, - Kokkos::LayoutStride, tScalarY, Kokkos::LayoutStride, Device>(14); + Test::impl_test_axpby_unification(14); #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Calling impl_test_axpby_unif(), L-SRS" << std::endl; #endif - Test::impl_test_axpby_unification< - tScalarA, Kokkos::LayoutLeft, tScalarX, Kokkos::LayoutStride, tScalarB, - Kokkos::LayoutRight, tScalarY, Kokkos::LayoutStride, Device>(14); + Test::impl_test_axpby_unification(14); #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Calling impl_test_axpby_unif(), L-LSR" << std::endl; #endif - Test::impl_test_axpby_unification< - tScalarA, Kokkos::LayoutStride, tScalarX, Kokkos::LayoutLeft, tScalarB, - Kokkos::LayoutStride, tScalarY, Kokkos::LayoutRight, Device>(14); + Test::impl_test_axpby_unification(14); #endif return 1; } -template +template int test_axpby_mv_unification() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - Test::impl_test_axpby_mv_unification< - tScalarA, Kokkos::LayoutLeft, tScalarX, Kokkos::LayoutLeft, tScalarB, - Kokkos::LayoutLeft, tScalarY, Kokkos::LayoutLeft, Device>( - 14, numVecsAxpbyTest); + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + Test::impl_test_axpby_mv_unification(14, numVecsAxpbyTest); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - Test::impl_test_axpby_mv_unification< - tScalarA, Kokkos::LayoutRight, tScalarX, Kokkos::LayoutRight, tScalarB, - Kokkos::LayoutRight, tScalarY, Kokkos::LayoutRight, Device>( - 14, numVecsAxpbyTest); -#endif - -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - Test::impl_test_axpby_mv_unification< - tScalarA, Kokkos::LayoutStride, tScalarX, Kokkos::LayoutStride, tScalarB, - Kokkos::LayoutStride, tScalarY, Kokkos::LayoutStride, Device>( - 14, numVecsAxpbyTest); -#endif - -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) - Test::impl_test_axpby_mv_unification< - tScalarA, Kokkos::LayoutStride, tScalarX, Kokkos::LayoutStride, tScalarB, - Kokkos::LayoutLeft, tScalarY, Kokkos::LayoutLeft, Device>( - 14, numVecsAxpbyTest); - Test::impl_test_axpby_mv_unification< - tScalarA, Kokkos::LayoutLeft, tScalarX, Kokkos::LayoutLeft, tScalarB, - Kokkos::LayoutStride, tScalarY, Kokkos::LayoutStride, Device>( - 14, numVecsAxpbyTest); - - Test::impl_test_axpby_mv_unification< - tScalarA, Kokkos::LayoutLeft, tScalarX, Kokkos::LayoutStride, tScalarB, - Kokkos::LayoutRight, tScalarY, Kokkos::LayoutStride, Device>( - 14, numVecsAxpbyTest); - - Test::impl_test_axpby_mv_unification< - tScalarA, Kokkos::LayoutStride, tScalarX, Kokkos::LayoutLeft, tScalarB, - Kokkos::LayoutStride, tScalarY, Kokkos::LayoutRight, Device>( - 14, numVecsAxpbyTest); + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + Test::impl_test_axpby_mv_unification(14, + numVecsAxpbyTest); +#endif + +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + Test::impl_test_axpby_mv_unification(14, + numVecsAxpbyTest); +#endif + +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) + Test::impl_test_axpby_mv_unification(14, numVecsAxpbyTest); + Test::impl_test_axpby_mv_unification(14, + numVecsAxpbyTest); + + Test::impl_test_axpby_mv_unification(14, + numVecsAxpbyTest); + + Test::impl_test_axpby_mv_unification(14, + numVecsAxpbyTest); #endif return 1; } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, axpby_unification_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpby_unification_float"); test_axpby_unification(); @@ -2674,44 +2502,36 @@ TEST_F(TestCategory, axpby_mv_unification_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, axpby_unification_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpby_unification_double"); test_axpby_unification(); } TEST_F(TestCategory, axpby_mv_unification_double) { - Kokkos::Profiling::pushRegion( - "KokkosBlas::Test::axpby_mv_unification_double"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpby_mv_unification_double"); test_axpby_mv_unification(); Kokkos::Profiling::popRegion(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, axpby_unification_complex_double) { - Kokkos::Profiling::pushRegion( - "KokkosBlas::Test::axpby_unification_complex_double"); - test_axpby_unification, Kokkos::complex, - Kokkos::complex, Kokkos::complex, - TestDevice>(); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpby_unification_complex_double"); + test_axpby_unification, Kokkos::complex, Kokkos::complex, + Kokkos::complex, TestDevice>(); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, axpby_mv_unification_complex_double) { - Kokkos::Profiling::pushRegion( - "KokkosBlas::Test::axpby_mv_unification_complex_double"); - test_axpby_mv_unification, Kokkos::complex, - Kokkos::complex, Kokkos::complex, - TestDevice>(); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpby_mv_unification_complex_double"); + test_axpby_mv_unification, Kokkos::complex, Kokkos::complex, + Kokkos::complex, TestDevice>(); Kokkos::Profiling::popRegion(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, axpby_unification_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpby_unification_int"); test_axpby_unification(); @@ -2724,17 +2544,14 @@ TEST_F(TestCategory, axpby_mv_unification_int) { } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) TEST_F(TestCategory, axpby_unification_double_int) { - Kokkos::Profiling::pushRegion( - "KokkosBlas::Test::axpby_unification_double_int"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpby_unification_double_int"); test_axpby_unification(); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, axpby_double_mv_unification_int) { - Kokkos::Profiling::pushRegion( - "KokkosBlas::Test::axpby_mv_unification_double_int"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpby_mv_unification_double_int"); test_axpby_mv_unification(); Kokkos::Profiling::popRegion(); } diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_axpy.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_axpy.hpp index 76528f4a5204..94e4260268f9 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_axpy.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_axpy.hpp @@ -31,16 +31,13 @@ void impl_test_axpy(int N) { const MagnitudeB max_val = 10; const MagnitudeB eps = Kokkos::ArithTraits::epsilon(); const MagnitudeB max_error = - (static_cast(Kokkos::ArithTraits::abs(a)) * max_val + - max_val) * - eps; + (static_cast(Kokkos::ArithTraits::abs(a)) * max_val + max_val) * eps; view_stride_adapter x("X", N); view_stride_adapter y("Y", N); view_stride_adapter org_y("Org_Y", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -88,12 +85,9 @@ void impl_test_axpy_mv(int N, int K) { const MagnitudeB eps = Kokkos::ArithTraits::epsilon(); const MagnitudeB max_val = 10; const MagnitudeB max_error = - (static_cast(Kokkos::ArithTraits::abs(a)) * max_val + - max_val) * - eps; + (static_cast(Kokkos::ArithTraits::abs(a)) * max_val + max_val) * eps; - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -113,9 +107,7 @@ void impl_test_axpy_mv(int N, int K) { Kokkos::deep_copy(y.h_base, y.d_base); for (int i = 0; i < N; i++) { for (int j = 0; j < K; j++) { - EXPECT_NEAR_KK( - static_cast(a * x.h_view(i, j) + org_y.h_view(i, j)), - y.h_view(i, j), 2 * max_error); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i, j) + org_y.h_view(i, j)), y.h_view(i, j), 2 * max_error); } } @@ -125,9 +117,7 @@ void impl_test_axpy_mv(int N, int K) { Kokkos::deep_copy(y.h_base, y.d_base); for (int i = 0; i < N; i++) { for (int j = 0; j < K; j++) { - EXPECT_NEAR_KK( - static_cast(a * x.h_view(i, j) + org_y.h_view(i, j)), - y.h_view(i, j), 2 * max_error); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i, j) + org_y.h_view(i, j)), y.h_view(i, j), 2 * max_error); } } } @@ -136,8 +126,7 @@ void impl_test_axpy_mv(int N, int K) { template int test_axpy() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_axpy(0); @@ -147,8 +136,7 @@ int test_axpy() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_axpy(0); @@ -157,8 +145,7 @@ int test_axpy() { // Test::impl_test_axpy(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_axpy(0); @@ -167,8 +154,7 @@ int test_axpy() { // Test::impl_test_axpy(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_axpy(1024); Test::impl_test_axpy(1024); #endif @@ -179,8 +165,7 @@ int test_axpy() { template int test_axpy_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_axpy_mv(0, 5); @@ -190,8 +175,7 @@ int test_axpy_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_axpy_mv(0, 5); @@ -200,8 +184,7 @@ int test_axpy_mv() { // Test::impl_test_axpy_mv(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_axpy_mv(0, 5); @@ -210,8 +193,7 @@ int test_axpy_mv() { // Test::impl_test_axpy_mv(132231,5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_axpy_mv(1024, 5); Test::impl_test_axpy_mv(1024, 5); #endif @@ -220,8 +202,7 @@ int test_axpy_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, axpy_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpy_float"); test_axpy(); @@ -235,8 +216,7 @@ TEST_F(TestCategory, axpy_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, axpy_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpy_double"); test_axpy(); @@ -250,8 +230,7 @@ TEST_F(TestCategory, axpy_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, axpy_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpy_complex_double"); test_axpy, Kokkos::complex, TestDevice>(); @@ -264,9 +243,8 @@ TEST_F(TestCategory, axpy_mv_complex_double) { } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, axpy_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpy_int"); test_axpy(); @@ -279,8 +257,7 @@ TEST_F(TestCategory, axpy_mv_int) { } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) TEST_F(TestCategory, axpy_double_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::axpy_double_int"); test_axpy(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_dot.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_dot.hpp index 911925476aee..3de0fae12d51 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_dot.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_dot.hpp @@ -30,8 +30,7 @@ void impl_test_dot(int N) { view_stride_adapter a("a", N); view_stride_adapter b("b", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -48,13 +47,11 @@ void impl_test_dot(int N) { Kokkos::deep_copy(b.h_base, b.d_base); ScalarA expected_result = 0; - for (int i = 0; i < N; i++) - expected_result += ats::conj(a.h_view(i)) * b.h_view(i); + for (int i = 0; i < N; i++) expected_result += ats::conj(a.h_view(i)) * b.h_view(i); ScalarA nonconst_nonconst_result = KokkosBlas::dot(a.d_view, b.d_view); - double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; - EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, - eps * expected_result); + double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; + EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, eps * expected_result); ScalarA const_const_result = KokkosBlas::dot(a.d_view_const, b.d_view_const); EXPECT_NEAR_KK(const_const_result, expected_result, eps * expected_result); @@ -75,8 +72,7 @@ void impl_test_dot_mv(int N, int K) { view_stride_adapter a("A", N, K); view_stride_adapter b("B", N, K); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -95,8 +91,7 @@ void impl_test_dot_mv(int N, int K) { ScalarA* expected_result = new ScalarA[K]; for (int j = 0; j < K; j++) { expected_result[j] = ScalarA(); - for (int i = 0; i < N; i++) - expected_result[j] += ats::conj(a.h_view(i, j)) * b.h_view(i, j); + for (int i = 0; i < N; i++) expected_result[j] += ats::conj(a.h_view(i, j)) * b.h_view(i, j); } double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; @@ -107,32 +102,28 @@ void impl_test_dot_mv(int N, int K) { Kokkos::fence(); for (int k = 0; k < K; k++) { ScalarA nonconst_nonconst_result = r(k); - EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result[k], eps * expected_result[k]); } KokkosBlas::dot(r, a.d_view_const, b.d_view_const); Kokkos::fence(); for (int k = 0; k < K; k++) { ScalarA const_const_result = r(k); - EXPECT_NEAR_KK(const_const_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(const_const_result, expected_result[k], eps * expected_result[k]); } KokkosBlas::dot(r, a.d_view, b.d_view_const); Kokkos::fence(); for (int k = 0; k < K; k++) { ScalarA non_const_const_result = r(k); - EXPECT_NEAR_KK(non_const_const_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(non_const_const_result, expected_result[k], eps * expected_result[k]); } KokkosBlas::dot(r, a.d_view_const, b.d_view); Kokkos::fence(); for (int k = 0; k < K; k++) { ScalarA const_non_const_result = r(k); - EXPECT_NEAR_KK(const_non_const_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(const_non_const_result, expected_result[k], eps * expected_result[k]); } delete[] expected_result; @@ -142,8 +133,7 @@ void impl_test_dot_mv(int N, int K) { template int test_dot() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_dot(0); @@ -153,8 +143,7 @@ int test_dot() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_dot(0); @@ -163,8 +152,7 @@ int test_dot() { // Test::impl_test_dot(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_dot(0); @@ -173,8 +161,7 @@ int test_dot() { // Test::impl_test_dot(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_dot(1024); Test::impl_test_dot(1024); #endif @@ -185,8 +172,7 @@ int test_dot() { template int test_dot_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_dot_mv(0, 5); @@ -197,8 +183,7 @@ int test_dot_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_dot_mv(0, 5); @@ -210,8 +195,7 @@ int test_dot_mv() { // Removing the layout stride test as ViewTypeA a("a", N); // is invalid since the view constructor needs a stride object! -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_dot_mv(0, 5); @@ -221,8 +205,7 @@ int test_dot_mv() { // Test::impl_test_dot_mv(132231,5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_dot_mv(1024, 5); Test::impl_test_dot_mv(1024, 5); #endif @@ -231,8 +214,7 @@ int test_dot_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, dot_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::dot_float"); test_dot(); @@ -246,8 +228,7 @@ TEST_F(TestCategory, dot_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, dot_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::dot_double"); test_dot(); @@ -261,8 +242,7 @@ TEST_F(TestCategory, dot_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, dot_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::dot_complex_double"); test_dot, Kokkos::complex, TestDevice>(); @@ -275,9 +255,8 @@ TEST_F(TestCategory, dot_mv_complex_double) { } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, dot_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::dot_int"); test_dot(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_iamax.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_iamax.hpp index 49f759958a16..94ff8b3ebe4a 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_iamax.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_iamax.hpp @@ -29,8 +29,7 @@ void impl_test_iamax(int N) { view_stride_adapter a("X", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); @@ -66,11 +65,8 @@ void impl_test_iamax(int N) { { // printf("impl_test_iamax -- return result as a 0-D View on host -- N // %d\n", N); - typedef Kokkos::View - ViewType0D; - ViewType0D r("Iamax::Result 0-D View on host", - typename ViewTypeA::array_layout()); + typedef Kokkos::View ViewType0D; + ViewType0D r("Iamax::Result 0-D View on host", typename ViewTypeA::array_layout()); KokkosBlas::iamax(r, a.d_view); Kokkos::fence(); @@ -85,10 +81,8 @@ void impl_test_iamax(int N) { { // printf("impl_test_iamax -- return result as a 0-D View on device -- N // %d\n", N); - typedef Kokkos::View - ViewType0D; - ViewType0D r("Iamax::Result 0-D View on device", - typename ViewTypeA::array_layout()); + typedef Kokkos::View ViewType0D; + ViewType0D r("Iamax::Result 0-D View on device", typename ViewTypeA::array_layout()); typename ViewType0D::HostMirror h_r = Kokkos::create_mirror_view(r); size_type nonconst_max_loc, const_max_loc; @@ -118,8 +112,7 @@ void impl_test_iamax_mv(int N, int K) { view_stride_adapter a("A", N, K); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); @@ -148,11 +141,8 @@ void impl_test_iamax_mv(int N, int K) { { // printf("impl_test_iamax_mv -- return results as a 1-D View on host -- N // %d\n", N); - Kokkos::View rcontig( - "Iamax::Result View on host", K); - Kokkos::View - r = rcontig; + Kokkos::View rcontig("Iamax::Result View on host", K); + Kokkos::View r = rcontig; KokkosBlas::iamax(r, a.d_view); Kokkos::fence(); @@ -177,10 +167,8 @@ void impl_test_iamax_mv(int N, int K) { // printf("impl_test_iamax_mv -- return results as a 1-D View on device -- N // %d\n", N); Kokkos::View rcontig("Iamax::Result View on host", K); - Kokkos::View r = - rcontig; - typename Kokkos::View::HostMirror h_r = + Kokkos::View r = rcontig; + typename Kokkos::View::HostMirror h_r = Kokkos::create_mirror_view(rcontig); KokkosBlas::iamax(r, a.d_view); @@ -210,8 +198,7 @@ void impl_test_iamax_mv(int N, int K) { template int test_iamax() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_iamax(0); Test::impl_test_iamax(13); @@ -220,8 +207,7 @@ int test_iamax() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_iamax(0); Test::impl_test_iamax(13); @@ -229,8 +215,7 @@ int test_iamax() { // Test::impl_test_iamax(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_iamax(0); Test::impl_test_iamax(13); @@ -244,8 +229,7 @@ int test_iamax() { template int test_iamax_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_iamax_mv(0, 5); Test::impl_test_iamax_mv(13, 5); @@ -254,8 +238,7 @@ int test_iamax_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_iamax_mv(0, 5); Test::impl_test_iamax_mv(13, 5); @@ -263,8 +246,7 @@ int test_iamax_mv() { // Test::impl_test_iamax_mv(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_iamax_mv(0, 5); Test::impl_test_iamax_mv(13, 5); @@ -276,8 +258,7 @@ int test_iamax_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, iamax_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::iamax_float"); test_iamax(); @@ -291,8 +272,7 @@ TEST_F(TestCategory, iamax_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, iamax_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::iamax_double"); test_iamax(); @@ -306,8 +286,7 @@ TEST_F(TestCategory, iamax_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, iamax_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::iamax_complex_double"); test_iamax, TestDevice>(); @@ -320,9 +299,8 @@ TEST_F(TestCategory, iamax_mv_complex_double) { } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, iamax_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::iamax_int"); test_iamax(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_mult.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_mult.hpp index 6555280f0dd2..f5755982e794 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_mult.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_mult.hpp @@ -36,8 +36,7 @@ void impl_test_mult(int N) { view_stride_adapter z("Z", N); view_stride_adapter org_z("Org_Z", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -63,27 +62,21 @@ void impl_test_mult(int N) { KokkosBlas::mult(b, z.d_view, a, x.d_view, y.d_view); Kokkos::deep_copy(z.h_base, z.d_base); for (int i = 0; i < N; i++) { - EXPECT_NEAR_KK(static_cast(a * x.h_view(i) * y.h_view(i) + - b * org_z.h_view(i)), - z.h_view(i), eps); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i) * y.h_view(i) + b * org_z.h_view(i)), z.h_view(i), eps); } Kokkos::deep_copy(z.d_base, org_z.h_base); KokkosBlas::mult(b, z.d_view, a, x.d_view, y.d_view_const); Kokkos::deep_copy(z.h_base, z.d_base); for (int i = 0; i < N; i++) { - EXPECT_NEAR_KK(static_cast(a * x.h_view(i) * y.h_view(i) + - b * org_z.h_view(i)), - z.h_view(i), eps); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i) * y.h_view(i) + b * org_z.h_view(i)), z.h_view(i), eps); } Kokkos::deep_copy(z.d_base, org_z.h_base); KokkosBlas::mult(b, z.d_view, a, x.d_view_const, y.d_view_const); Kokkos::deep_copy(z.h_base, z.d_base); for (int i = 0; i < N; i++) { - EXPECT_NEAR_KK(static_cast(a * x.h_view(i) * y.h_view(i) + - b * org_z.h_view(i)), - z.h_view(i), eps); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i) * y.h_view(i) + b * org_z.h_view(i)), z.h_view(i), eps); } } @@ -99,8 +92,7 @@ void impl_test_mult_mv(int N, int K) { view_stride_adapter z("Z", N, K); view_stride_adapter org_z("Org_Z", N, K); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -131,9 +123,8 @@ void impl_test_mult_mv(int N, int K) { Kokkos::deep_copy(z.h_base, z.d_base); for (int i = 0; i < N; i++) { for (int j = 0; j < K; j++) { - EXPECT_NEAR_KK(static_cast(a * x.h_view(i) * y.h_view(i, j) + - b * org_z.h_view(i, j)), - z.h_view(i, j), eps); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i) * y.h_view(i, j) + b * org_z.h_view(i, j)), z.h_view(i, j), + eps); } } @@ -142,9 +133,8 @@ void impl_test_mult_mv(int N, int K) { Kokkos::deep_copy(z.h_base, z.d_base); for (int i = 0; i < N; i++) { for (int j = 0; j < K; j++) { - EXPECT_NEAR_KK(static_cast(a * x.h_view(i) * y.h_view(i, j) + - b * org_z.h_view(i, j)), - z.h_view(i, j), eps); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i) * y.h_view(i, j) + b * org_z.h_view(i, j)), z.h_view(i, j), + eps); } } } @@ -153,58 +143,43 @@ void impl_test_mult_mv(int N, int K) { template int test_mult() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; typedef Kokkos::View view_type_c_ll; - Test::impl_test_mult( - 0); - Test::impl_test_mult( - 13); - Test::impl_test_mult( - 1024); + Test::impl_test_mult(0); + Test::impl_test_mult(13); + Test::impl_test_mult(1024); // Test::impl_test_mult(132231); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; typedef Kokkos::View view_type_c_lr; - Test::impl_test_mult( - 0); - Test::impl_test_mult( - 13); - Test::impl_test_mult( - 1024); + Test::impl_test_mult(0); + Test::impl_test_mult(13); + Test::impl_test_mult(1024); // Test::impl_test_mult(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; typedef Kokkos::View view_type_c_ls; - Test::impl_test_mult( - 0); - Test::impl_test_mult( - 13); - Test::impl_test_mult( - 1024); + Test::impl_test_mult(0); + Test::impl_test_mult(13); + Test::impl_test_mult(1024); // Test::impl_test_mult(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) - Test::impl_test_mult( - 1024); - Test::impl_test_mult( - 1024); +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) + Test::impl_test_mult(1024); + Test::impl_test_mult(1024); #endif return 1; @@ -213,66 +188,50 @@ int test_mult() { template int test_mult_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; typedef Kokkos::View view_type_c_ll; - Test::impl_test_mult_mv(0, 5); - Test::impl_test_mult_mv(13, 5); - Test::impl_test_mult_mv(1024, 5); + Test::impl_test_mult_mv(0, 5); + Test::impl_test_mult_mv(13, 5); + Test::impl_test_mult_mv(1024, 5); // Test::impl_test_mult_mv(132231,5); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; typedef Kokkos::View view_type_c_lr; - Test::impl_test_mult_mv(0, 5); - Test::impl_test_mult_mv(13, 5); - Test::impl_test_mult_mv(1024, 5); + Test::impl_test_mult_mv(0, 5); + Test::impl_test_mult_mv(13, 5); + Test::impl_test_mult_mv(1024, 5); // Test::impl_test_mult_mv(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; typedef Kokkos::View view_type_c_ls; - Test::impl_test_mult_mv(0, 5); - Test::impl_test_mult_mv(13, 5); - Test::impl_test_mult_mv(1024, 5); + Test::impl_test_mult_mv(0, 5); + Test::impl_test_mult_mv(13, 5); + Test::impl_test_mult_mv(1024, 5); // Test::impl_test_mult_mv(132231,5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) - Test::impl_test_mult_mv(1024, 5); - Test::impl_test_mult_mv(1024, 5); +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) + Test::impl_test_mult_mv(1024, 5); + Test::impl_test_mult_mv(1024, 5); #endif return 1; } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, mult_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::mult_float"); test_mult(); @@ -286,8 +245,7 @@ TEST_F(TestCategory, mult_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, mult_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::mult_double"); test_mult(); @@ -301,25 +259,21 @@ TEST_F(TestCategory, mult_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, mult_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::mult_complex_double"); - test_mult, Kokkos::complex, - Kokkos::complex, TestDevice>(); + test_mult, Kokkos::complex, Kokkos::complex, TestDevice>(); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, mult_mv_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::mult_mv_complex_double"); - test_mult_mv, Kokkos::complex, - Kokkos::complex, TestDevice>(); + test_mult_mv, Kokkos::complex, Kokkos::complex, TestDevice>(); Kokkos::Profiling::popRegion(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, mult_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::mult_int"); test_mult(); @@ -332,8 +286,7 @@ TEST_F(TestCategory, mult_mv_int) { } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) TEST_F(TestCategory, mult_double_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::mult_double_int"); test_mult(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm1.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm1.hpp index 24795878d143..3942dafe9343 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm1.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm1.hpp @@ -29,8 +29,7 @@ void impl_test_nrm1(int N) { view_stride_adapter a("a", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); @@ -38,10 +37,7 @@ void impl_test_nrm1(int N) { Kokkos::deep_copy(a.h_base, a.d_base); - double eps = (std::is_same::mag_type, - float>::value - ? 1e-4 - : 1e-7); + double eps = (std::is_same::mag_type, float>::value ? 1e-4 : 1e-7); mag_type expected_result = 0; for (int i = 0; i < N; i++) { @@ -50,8 +46,7 @@ void impl_test_nrm1(int N) { // parts. See netlib, MKL, and CUBLAS documentation. // // This is safe; ArithTraits::imag is 0 if T is real. - expected_result += - MAT::abs(AT::real(a.h_view(i))) + MAT::abs(AT::imag(a.h_view(i))); + expected_result += MAT::abs(AT::real(a.h_view(i))) + MAT::abs(AT::imag(a.h_view(i))); } mag_type nonconst_result = KokkosBlas::nrm1(a.d_view); @@ -70,8 +65,7 @@ void impl_test_nrm1_mv(int N, int K) { view_stride_adapter a("A", N, K); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); @@ -79,18 +73,13 @@ void impl_test_nrm1_mv(int N, int K) { Kokkos::deep_copy(a.h_base, a.d_base); - double eps = (std::is_same::mag_type, - float>::value - ? 1e-4 - : 1e-7); + double eps = (std::is_same::mag_type, float>::value ? 1e-4 : 1e-7); - Kokkos::View expected_result("Expected Nrm1", - K); + Kokkos::View expected_result("Expected Nrm1", K); for (int k = 0; k < K; k++) { expected_result(k) = MAT::zero(); for (int i = 0; i < N; i++) { - expected_result(k) += MAT::abs(AT::real(a.h_view(i, k))) + - MAT::abs(AT::imag(a.h_view(i, k))); + expected_result(k) += MAT::abs(AT::real(a.h_view(i, k))) + MAT::abs(AT::imag(a.h_view(i, k))); } } @@ -109,8 +98,7 @@ void impl_test_nrm1_mv(int N, int K) { template int test_nrm1() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_nrm1(0); Test::impl_test_nrm1(13); @@ -119,8 +107,7 @@ int test_nrm1() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_nrm1(0); Test::impl_test_nrm1(13); @@ -128,8 +115,7 @@ int test_nrm1() { Test::impl_test_nrm1(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_nrm1(0); Test::impl_test_nrm1(13); @@ -143,8 +129,7 @@ int test_nrm1() { template int test_nrm1_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_nrm1_mv(0, 5); Test::impl_test_nrm1_mv(13, 5); @@ -154,8 +139,7 @@ int test_nrm1_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_nrm1_mv(0, 5); Test::impl_test_nrm1_mv(13, 5); @@ -164,8 +148,7 @@ int test_nrm1_mv() { Test::impl_test_nrm1_mv(132231, 5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_nrm1_mv(0, 5); Test::impl_test_nrm1_mv(13, 5); @@ -178,8 +161,7 @@ int test_nrm1_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm1_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm1_float"); test_nrm1(); @@ -193,8 +175,7 @@ TEST_F(TestCategory, nrm1_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm1_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm1_double"); test_nrm1(); @@ -208,8 +189,7 @@ TEST_F(TestCategory, nrm1_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm1_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm1_complex_double"); test_nrm1, TestDevice>(); @@ -222,9 +202,8 @@ TEST_F(TestCategory, nrm1_mv_complex_double) { } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm1_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm1_int"); test_nrm1(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2.hpp index a9b3f7c10fe6..556d48f753b9 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2.hpp @@ -27,8 +27,7 @@ void impl_test_nrm2(int N) { view_stride_adapter a("a", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(1.0, randStart, randEnd); @@ -42,8 +41,7 @@ void impl_test_nrm2(int N) { for (int i = 0; i < N; i++) { expected_result += AT::abs(a.h_view(i)) * AT::abs(a.h_view(i)); } - expected_result = - Kokkos::ArithTraits::sqrt(expected_result); + expected_result = Kokkos::ArithTraits::sqrt(expected_result); typename AT::mag_type nonconst_result = KokkosBlas::nrm2(a.d_view); EXPECT_NEAR_KK(nonconst_result, expected_result, eps * expected_result); @@ -59,8 +57,7 @@ void impl_test_nrm2_mv(int N, int K) { view_stride_adapter a("A", N, K); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(1.0, randStart, randEnd); @@ -74,8 +71,7 @@ void impl_test_nrm2_mv(int N, int K) { for (int i = 0; i < N; i++) { expected_result[j] += AT::abs(a.h_view(i, j)) * AT::abs(a.h_view(i, j)); } - expected_result[j] = - Kokkos::ArithTraits::sqrt(expected_result[j]); + expected_result[j] = Kokkos::ArithTraits::sqrt(expected_result[j]); } double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; @@ -86,8 +82,7 @@ void impl_test_nrm2_mv(int N, int K) { Kokkos::fence(); for (int k = 0; k < K; k++) { typename AT::mag_type nonconst_result = r(k); - EXPECT_NEAR_KK(nonconst_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(nonconst_result, expected_result[k], eps * expected_result[k]); } KokkosBlas::nrm2(r, a.d_view_const); @@ -104,8 +99,7 @@ void impl_test_nrm2_mv(int N, int K) { template int test_nrm2() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_nrm2(0); Test::impl_test_nrm2(13); @@ -114,8 +108,7 @@ int test_nrm2() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_nrm2(0); Test::impl_test_nrm2(13); @@ -123,8 +116,7 @@ int test_nrm2() { // Test::impl_test_nrm2(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_nrm2(0); Test::impl_test_nrm2(13); @@ -138,8 +130,7 @@ int test_nrm2() { template int test_nrm2_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_nrm2_mv(0, 5); Test::impl_test_nrm2_mv(13, 5); @@ -149,8 +140,7 @@ int test_nrm2_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_nrm2_mv(0, 5); Test::impl_test_nrm2_mv(13, 5); @@ -159,8 +149,7 @@ int test_nrm2_mv() { // Test::impl_test_nrm2_mv(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_nrm2_mv(0, 5); Test::impl_test_nrm2_mv(13, 5); @@ -173,8 +162,7 @@ int test_nrm2_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2_float"); test_nrm2(); @@ -188,8 +176,7 @@ TEST_F(TestCategory, nrm2_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2_double"); test_nrm2(); @@ -203,8 +190,7 @@ TEST_F(TestCategory, nrm2_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2_complex_double"); test_nrm2, TestDevice>(); @@ -217,9 +203,8 @@ TEST_F(TestCategory, nrm2_mv_complex_double) { } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2_int"); test_nrm2(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2_squared.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2_squared.hpp index 09e4b3d45d44..d718626f8e15 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2_squared.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2_squared.hpp @@ -27,8 +27,7 @@ void impl_test_nrm2_squared(int N) { view_stride_adapter a("a", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(1.0, randStart, randEnd); @@ -57,8 +56,7 @@ void impl_test_nrm2_squared_mv(int N, int K) { view_stride_adapter a("A", N, K); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(1.0, randStart, randEnd); @@ -84,10 +82,8 @@ void impl_test_nrm2_squared_mv(int N, int K) { Kokkos::fence(); for (int k = 0; k < K; k++) { typename AT::mag_type nonconst_result = r(k); - typename AT::mag_type divisor = - AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); - typename AT::mag_type diff = - AT::abs(nonconst_result - expected_result[k]) / divisor; + typename AT::mag_type divisor = AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); + typename AT::mag_type diff = AT::abs(nonconst_result - expected_result[k]) / divisor; EXPECT_NEAR_KK(diff, zero, eps); } @@ -95,10 +91,8 @@ void impl_test_nrm2_squared_mv(int N, int K) { Kokkos::fence(); for (int k = 0; k < K; k++) { typename AT::mag_type const_result = r(k); - typename AT::mag_type divisor = - AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); - typename AT::mag_type diff = - AT::abs(const_result - expected_result[k]) / divisor; + typename AT::mag_type divisor = AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); + typename AT::mag_type diff = AT::abs(const_result - expected_result[k]) / divisor; EXPECT_NEAR_KK(diff, zero, eps); } @@ -109,8 +103,7 @@ void impl_test_nrm2_squared_mv(int N, int K) { template int test_nrm2_squared() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_nrm2_squared(0); Test::impl_test_nrm2_squared(13); @@ -119,8 +112,7 @@ int test_nrm2_squared() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_nrm2_squared(0); Test::impl_test_nrm2_squared(13); @@ -128,8 +120,7 @@ int test_nrm2_squared() { // Test::impl_test_nrm2_squared(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_nrm2_squared(0); Test::impl_test_nrm2_squared(13); @@ -143,8 +134,7 @@ int test_nrm2_squared() { template int test_nrm2_squared_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_nrm2_squared_mv(0, 5); Test::impl_test_nrm2_squared_mv(13, 5); @@ -154,8 +144,7 @@ int test_nrm2_squared_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_nrm2_squared_mv(0, 5); Test::impl_test_nrm2_squared_mv(13, 5); @@ -164,8 +153,7 @@ int test_nrm2_squared_mv() { // Test::impl_test_nrm2_squared_mv(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_nrm2_squared_mv(0, 5); Test::impl_test_nrm2_squared_mv(13, 5); @@ -178,8 +166,7 @@ int test_nrm2_squared_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2_squared_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2_squared_float"); test_nrm2_squared(); @@ -193,8 +180,7 @@ TEST_F(TestCategory, nrm2_squared_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2_squared_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2_squared_double"); test_nrm2_squared(); @@ -208,25 +194,21 @@ TEST_F(TestCategory, nrm2_squared_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2_squared_complex_double) { - Kokkos::Profiling::pushRegion( - "KokkosBlas::Test::nrm2_squared_complex_double"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2_squared_complex_double"); test_nrm2_squared, TestDevice>(); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, nrm2_squared_mv_complex_double) { - Kokkos::Profiling::pushRegion( - "KokkosBlas::Test::nrm2_squared_mv_complex_double"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2_squared_mv_complex_double"); test_nrm2_squared_mv, TestDevice>(); Kokkos::Profiling::popRegion(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2_squared_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2_squared_int"); test_nrm2_squared(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2w.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2w.hpp index 48d8676fe4db..6dcc01bf1760 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2w.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2w.hpp @@ -31,11 +31,9 @@ void impl_test_nrm2w(int N) { constexpr MagnitudeA max_val = 10; const MagnitudeA eps = AT::epsilon(); - const MagnitudeA max_error = - max_val * std::sqrt(static_cast(N)) * eps; + const MagnitudeA max_error = max_val * std::sqrt(static_cast(N)) * eps; - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(max_val, randStart, randEnd); @@ -51,8 +49,7 @@ void impl_test_nrm2w(int N) { typename AT::mag_type term = AT::abs(a.h_view(i)) / AT::abs(w.h_view(i)); expected_result += term * term; } - expected_result = - Kokkos::ArithTraits::sqrt(expected_result); + expected_result = Kokkos::ArithTraits::sqrt(expected_result); typename AT::mag_type nonconst_result = KokkosBlas::nrm2w(a.d_view, w.d_view); EXPECT_NEAR_KK(nonconst_result, expected_result, max_error); @@ -69,11 +66,9 @@ void impl_test_nrm2w_mv(int N, int K) { constexpr MagnitudeA max_val = 10; const MagnitudeA eps = AT::epsilon(); - const MagnitudeA max_error = - max_val * std::sqrt(static_cast(N)) * eps; + const MagnitudeA max_error = max_val * std::sqrt(static_cast(N)) * eps; - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(max_val, randStart, randEnd); @@ -88,12 +83,10 @@ void impl_test_nrm2w_mv(int N, int K) { for (int j = 0; j < K; j++) { expected_result[j] = typename AT::mag_type(); for (int i = 0; i < N; i++) { - typename AT::mag_type term = - AT::abs(a.h_view(i, j)) / AT::abs(w.h_view(i, j)); + typename AT::mag_type term = AT::abs(a.h_view(i, j)) / AT::abs(w.h_view(i, j)); expected_result[j] += term * term; } - expected_result[j] = - Kokkos::ArithTraits::sqrt(expected_result[j]); + expected_result[j] = Kokkos::ArithTraits::sqrt(expected_result[j]); } Kokkos::View r("Dot::Result", K); @@ -112,8 +105,7 @@ void impl_test_nrm2w_mv(int N, int K) { template int test_nrm2w() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_nrm2w(0); Test::impl_test_nrm2w(13); @@ -122,8 +114,7 @@ int test_nrm2w() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_nrm2w(0); Test::impl_test_nrm2w(13); @@ -131,8 +122,7 @@ int test_nrm2w() { // Test::impl_test_nrm2(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_nrm2w(0); Test::impl_test_nrm2w(13); @@ -146,8 +136,7 @@ int test_nrm2w() { template int test_nrm2w_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_nrm2w_mv(0, 5); Test::impl_test_nrm2w_mv(13, 5); @@ -157,8 +146,7 @@ int test_nrm2w_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_nrm2w_mv(0, 5); Test::impl_test_nrm2w_mv(13, 5); @@ -167,8 +155,7 @@ int test_nrm2w_mv() { // Test::impl_test_nrm2w_mv(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_nrm2w_mv(0, 5); Test::impl_test_nrm2w_mv(13, 5); @@ -181,8 +168,7 @@ int test_nrm2w_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2w_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2w_float"); test_nrm2w(); @@ -196,8 +182,7 @@ TEST_F(TestCategory, nrm2w_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2w_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2w_double"); test_nrm2w(); @@ -211,8 +196,7 @@ TEST_F(TestCategory, nrm2w_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2w_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2w_complex_double"); test_nrm2w, TestDevice>(); @@ -225,9 +209,8 @@ TEST_F(TestCategory, nrm2w_mv_complex_double) { } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2w_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2w_int"); test_nrm2w(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2w_squared.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2w_squared.hpp index 5a55d15fade0..42bcdb0848dd 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2w_squared.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrm2w_squared.hpp @@ -33,8 +33,7 @@ void impl_test_nrm2w_squared(int N) { const MagnitudeA eps = AT::epsilon(); const MagnitudeA max_error = max_val * max_val * N * eps; - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(max_val, randStart, randEnd); @@ -51,8 +50,7 @@ void impl_test_nrm2w_squared(int N) { expected_result += term * term; } - typename AT::mag_type nonconst_result = - KokkosBlas::nrm2w_squared(a.d_view, w.d_view); + typename AT::mag_type nonconst_result = KokkosBlas::nrm2w_squared(a.d_view, w.d_view); EXPECT_NEAR_KK(nonconst_result, expected_result, max_error); } @@ -69,8 +67,7 @@ void impl_test_nrm2w_squared_mv(int N, int K) { const MagnitudeA eps = AT::epsilon(); const MagnitudeA max_error = max_val * max_val * N * eps; - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(max_val, randStart, randEnd); @@ -84,8 +81,7 @@ void impl_test_nrm2w_squared_mv(int N, int K) { for (int j = 0; j < K; j++) { expected_result[j] = typename AT::mag_type(); for (int i = 0; i < N; i++) { - typename AT::mag_type term = - AT::abs(a.h_view(i, j)) / AT::abs(w.h_view(i, j)); + typename AT::mag_type term = AT::abs(a.h_view(i, j)) / AT::abs(w.h_view(i, j)); expected_result[j] += term * term; } } @@ -106,8 +102,7 @@ void impl_test_nrm2w_squared_mv(int N, int K) { template int test_nrm2w_squared() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_nrm2w_squared(0); Test::impl_test_nrm2w_squared(13); @@ -116,8 +111,7 @@ int test_nrm2w_squared() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_nrm2w_squared(0); Test::impl_test_nrm2w_squared(13); @@ -125,8 +119,7 @@ int test_nrm2w_squared() { // Test::impl_test_nrm2(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_nrm2w_squared(0); Test::impl_test_nrm2w_squared(13); @@ -140,8 +133,7 @@ int test_nrm2w_squared() { template int test_nrm2w_squared_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_nrm2w_squared_mv(0, 5); Test::impl_test_nrm2w_squared_mv(13, 5); @@ -151,8 +143,7 @@ int test_nrm2w_squared_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_nrm2w_squared_mv(0, 5); Test::impl_test_nrm2w_squared_mv(13, 5); @@ -161,8 +152,7 @@ int test_nrm2w_squared_mv() { // Test::impl_test_nrm2w_squared_mv(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_nrm2w_squared_mv(0, 5); Test::impl_test_nrm2w_squared_mv(13, 5); @@ -175,8 +165,7 @@ int test_nrm2w_squared_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2w_squared_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2w_squared_float"); test_nrm2w_squared(); @@ -190,8 +179,7 @@ TEST_F(TestCategory, nrm2w_squared_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2w_squared_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2w_squared_double"); test_nrm2w_squared(); @@ -205,25 +193,21 @@ TEST_F(TestCategory, nrm2w_squared_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2w_squared_complex_double) { - Kokkos::Profiling::pushRegion( - "KokkosBlas::Test::nrm2w_squared_complex_double"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2w_squared_complex_double"); test_nrm2w_squared, TestDevice>(); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, nrm2w_squared_mv_complex_double) { - Kokkos::Profiling::pushRegion( - "KokkosBlas::Test::nrm2w_squared_mv_complex_double"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2w_squared_mv_complex_double"); test_nrm2w_squared_mv, TestDevice>(); Kokkos::Profiling::popRegion(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrm2w_squared_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrm2w_squared_int"); test_nrm2w_squared(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrminf.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrminf.hpp index 91cc1c7502a3..e4a9101e85c4 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrminf.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_nrminf.hpp @@ -27,8 +27,7 @@ void impl_test_nrminf(int N) { view_stride_adapter a("A", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); @@ -38,11 +37,9 @@ void impl_test_nrminf(int N) { double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; - typename AT::mag_type expected_result = - Kokkos::ArithTraits::min(); + typename AT::mag_type expected_result = Kokkos::ArithTraits::min(); for (int i = 0; i < N; i++) - if (AT::abs(a.h_view(i)) > expected_result) - expected_result = AT::abs(a.h_view(i)); + if (AT::abs(a.h_view(i)) > expected_result) expected_result = AT::abs(a.h_view(i)); if (N == 0) expected_result = typename AT::mag_type(0); @@ -60,8 +57,7 @@ void impl_test_nrminf_mv(int N, int K) { view_stride_adapter a("A", N, K); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); @@ -73,8 +69,7 @@ void impl_test_nrminf_mv(int N, int K) { for (int j = 0; j < K; j++) { expected_result[j] = Kokkos::ArithTraits::min(); for (int i = 0; i < N; i++) { - if (AT::abs(a.h_view(i, j)) > expected_result[j]) - expected_result[j] = AT::abs(a.h_view(i, j)); + if (AT::abs(a.h_view(i, j)) > expected_result[j]) expected_result[j] = AT::abs(a.h_view(i, j)); } if (N == 0) expected_result[j] = typename AT::mag_type(0); } @@ -103,8 +98,7 @@ void impl_test_nrminf_mv(int N, int K) { template int test_nrminf() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_nrminf(0); Test::impl_test_nrminf(13); @@ -113,8 +107,7 @@ int test_nrminf() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_nrminf(0); Test::impl_test_nrminf(13); @@ -122,8 +115,7 @@ int test_nrminf() { // Test::impl_test_nrminf(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_nrminf(0); Test::impl_test_nrminf(13); @@ -137,8 +129,7 @@ int test_nrminf() { template int test_nrminf_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_nrminf_mv(0, 5); Test::impl_test_nrminf_mv(13, 5); @@ -147,8 +138,7 @@ int test_nrminf_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_nrminf_mv(0, 5); Test::impl_test_nrminf_mv(13, 5); @@ -156,8 +146,7 @@ int test_nrminf_mv() { // Test::impl_test_nrminf_mv(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_nrminf_mv(0, 5); Test::impl_test_nrminf_mv(13, 5); @@ -169,8 +158,7 @@ int test_nrminf_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrminf_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrminf_float"); test_nrminf(); @@ -184,8 +172,7 @@ TEST_F(TestCategory, nrminf_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrminf_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrminf_double"); test_nrminf(); @@ -199,8 +186,7 @@ TEST_F(TestCategory, nrminf_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrminf_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrminf_complex_double"); test_nrminf, TestDevice>(); @@ -213,9 +199,8 @@ TEST_F(TestCategory, nrminf_mv_complex_double) { } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, nrminf_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::nrminf_int"); test_nrminf(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_reciprocal.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_reciprocal.hpp index c293fa04ebb8..2b8a07a552c8 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_reciprocal.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_reciprocal.hpp @@ -36,8 +36,7 @@ void impl_test_reciprocal(int N) { view_stride_adapter x("X", N); view_stride_adapter y("Y", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -71,14 +70,12 @@ void impl_test_reciprocal_mv(int N, int K) { view_stride_adapter x("X", N, K); view_stride_adapter y("Y", N, K); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; Test::getRandomBounds(10, randStart, randEnd); - Kokkos::fill_random(x.d_view, rand_pool, - Kokkos::ArithTraits::one(), randEnd); + Kokkos::fill_random(x.d_view, rand_pool, Kokkos::ArithTraits::one(), randEnd); } Kokkos::deep_copy(x.h_base, x.d_base); @@ -88,10 +85,8 @@ void impl_test_reciprocal_mv(int N, int K) { Kokkos::deep_copy(y.h_base, y.d_base); for (int j = 0; j < K; ++j) { for (int i = 0; i < N; ++i) { - EXPECT_NEAR_KK( - y.h_view(i, j), - Kokkos::ArithTraits::one() / ScalarB(x.h_view(i, j)), - 2 * Kokkos::ArithTraits::epsilon()); + EXPECT_NEAR_KK(y.h_view(i, j), Kokkos::ArithTraits::one() / ScalarB(x.h_view(i, j)), + 2 * Kokkos::ArithTraits::epsilon()); } } @@ -102,10 +97,8 @@ void impl_test_reciprocal_mv(int N, int K) { Kokkos::deep_copy(y.h_base, y.d_base); for (int j = 0; j < K; j++) { for (int i = 0; i < N; ++i) { - EXPECT_NEAR_KK( - y.h_view(i, j), - Kokkos::ArithTraits::one() / ScalarB(x.h_view(i, j)), - 2 * Kokkos::ArithTraits::epsilon()); + EXPECT_NEAR_KK(y.h_view(i, j), Kokkos::ArithTraits::one() / ScalarB(x.h_view(i, j)), + 2 * Kokkos::ArithTraits::epsilon()); } } } @@ -114,8 +107,7 @@ void impl_test_reciprocal_mv(int N, int K) { template int test_reciprocal() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_reciprocal(0); @@ -125,8 +117,7 @@ int test_reciprocal() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_reciprocal(0); @@ -135,8 +126,7 @@ int test_reciprocal() { // Test::impl_test_reciprocal(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_reciprocal(0); @@ -145,8 +135,7 @@ int test_reciprocal() { // Test::impl_test_reciprocal(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_reciprocal(1024); Test::impl_test_reciprocal(1024); #endif @@ -157,57 +146,47 @@ int test_reciprocal() { template int test_reciprocal_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_reciprocal_mv(0, 5); Test::impl_test_reciprocal_mv(13, 5); - Test::impl_test_reciprocal_mv(1024, - 5); + Test::impl_test_reciprocal_mv(1024, 5); // Test::impl_test_reciprocal_mv(132231,5); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_reciprocal_mv(0, 5); Test::impl_test_reciprocal_mv(13, 5); - Test::impl_test_reciprocal_mv(1024, - 5); + Test::impl_test_reciprocal_mv(1024, 5); // Test::impl_test_reciprocal_mv(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_reciprocal_mv(0, 5); Test::impl_test_reciprocal_mv(13, 5); - Test::impl_test_reciprocal_mv(1024, - 5); + Test::impl_test_reciprocal_mv(1024, 5); // Test::impl_test_reciprocal_mv(132231,5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) - Test::impl_test_reciprocal_mv(1024, - 5); - Test::impl_test_reciprocal_mv(1024, - 5); +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) + Test::impl_test_reciprocal_mv(1024, 5); + Test::impl_test_reciprocal_mv(1024, 5); #endif return 1; } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, reciprocal_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::reciprocal_float"); test_reciprocal(); @@ -221,8 +200,7 @@ TEST_F(TestCategory, reciprocal_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, reciprocal_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::reciprocal_double"); test_reciprocal(); @@ -236,26 +214,21 @@ TEST_F(TestCategory, reciprocal_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, reciprocal_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::reciprocal_complex_double"); - test_reciprocal, Kokkos::complex, - TestDevice>(); + test_reciprocal, Kokkos::complex, TestDevice>(); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, reciprocal_mv_complex_double) { - Kokkos::Profiling::pushRegion( - "KokkosBlas::Test::reciprocal_mv_complex_double"); - test_reciprocal_mv, Kokkos::complex, - TestDevice>(); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::reciprocal_mv_complex_double"); + test_reciprocal_mv, Kokkos::complex, TestDevice>(); Kokkos::Profiling::popRegion(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, reciprocal_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::reciprocal_int"); test_reciprocal(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rot.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rot.hpp index ab1f395923d9..db9367cb4290 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rot.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rot.hpp @@ -71,8 +71,7 @@ int test_rot() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, rot_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::rot"); test_rot(); @@ -81,8 +80,7 @@ TEST_F(TestCategory, rot_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, rot_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::rot"); test_rot(); @@ -91,8 +89,7 @@ TEST_F(TestCategory, rot_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, rot_complex_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::rot"); test_rot, TestDevice>(); @@ -101,8 +98,7 @@ TEST_F(TestCategory, rot_complex_float) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, rot_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::rot"); test_rot, TestDevice>(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rotg.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rotg.hpp index 27f9c3cf71d7..31945ba6d9fb 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rotg.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rotg.hpp @@ -17,8 +17,7 @@ namespace Test { template -void test_rotg_impl(typename Device::execution_space const& space, - Scalar const a_in, Scalar const b_in) { +void test_rotg_impl(typename Device::execution_space const& space, Scalar const a_in, Scalar const b_in) { using magnitude_type = typename Kokkos::ArithTraits::mag_type; using SViewType = Kokkos::View; using MViewType = Kokkos::View; @@ -59,8 +58,7 @@ int test_rotg() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, rotg_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::rotg"); test_rotg(); @@ -69,8 +67,7 @@ TEST_F(TestCategory, rotg_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, rotg_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::rotg"); test_rotg(); @@ -79,8 +76,7 @@ TEST_F(TestCategory, rotg_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, rotg_complex_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::rotg"); test_rotg, TestDevice>(); @@ -89,8 +85,7 @@ TEST_F(TestCategory, rotg_complex_float) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, rotg_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::rotg"); test_rotg, TestDevice>(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rotm.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rotm.hpp index 1f41fd06bc10..e1a7cddb3ccf 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rotm.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rotm.hpp @@ -18,8 +18,7 @@ namespace Test { template -void set_rotm_inputs(const int &test_case, vector_view_type &X, - vector_view_type &Y, param_view_type ¶m, +void set_rotm_inputs(const int &test_case, vector_view_type &X, vector_view_type &Y, param_view_type ¶m, vector_ref_type &Xref, vector_ref_type &Yref) { // Initialize X and Y inputs typename vector_view_type::HostMirror X_h = Kokkos::create_mirror_view(X); @@ -37,8 +36,7 @@ void set_rotm_inputs(const int &test_case, vector_view_type &X, Kokkos::deep_copy(Y, Y_h); // Initialize Xref, Yref and param (test case dependent) - typename param_view_type::HostMirror param_h = - Kokkos::create_mirror_view(param); + typename param_view_type::HostMirror param_h = Kokkos::create_mirror_view(param); switch (test_case) { case 0: param_h(0) = -2.0; @@ -116,8 +114,7 @@ void set_rotm_inputs(const int &test_case, vector_view_type &X, } template -void check_results(vector_view_type &X, vector_view_type &Y, - vector_ref_type &Xref, vector_ref_type &Yref) { +void check_results(vector_view_type &X, vector_view_type &Y, vector_ref_type &Xref, vector_ref_type &Yref) { using Scalar = typename vector_view_type::value_type; typename vector_view_type::HostMirror X_h = Kokkos::create_mirror_view(X); @@ -162,8 +159,7 @@ int test_rotm() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, rotm_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::rotm"); test_rotm(); @@ -172,8 +168,7 @@ TEST_F(TestCategory, rotm_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, rotm_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::rotm"); test_rotm(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rotmg.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rotmg.hpp index ecfc3b681588..0fb3c5f67ee8 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rotmg.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_rotmg.hpp @@ -17,8 +17,7 @@ namespace Test { template -void test_rotmg_impl(View0& d1, View0& d2, View0& x1, View0& y1, PView& param, - RView& ref_vals) { +void test_rotmg_impl(View0& d1, View0& d2, View0& x1, View0& y1, PView& param, RView& ref_vals) { using scalar_type = typename View0::non_const_value_type; using YView = typename View0::const_type; @@ -28,10 +27,8 @@ void test_rotmg_impl(View0& d1, View0& d2, View0& x1, View0& y1, PView& param, const scalar_type eps = Kokkos::ArithTraits::eps(); const scalar_type tol = -#if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) || \ - defined(KOKKOSKERNELS_ENABLE_TPL_MKL) - 100 * - eps; // Guessing MKL implements sin/cos differently so need larger tol +#if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) || defined(KOKKOSKERNELS_ENABLE_TPL_MKL) + 100 * eps; // Guessing MKL implements sin/cos differently so need larger tol #else 10 * eps; #endif @@ -61,8 +58,7 @@ void test_rotmg_impl(View0& d1, View0& d2, View0& x1, View0& y1, PView& param, } template -void set_rotmg_input_ref_vals(const int test_case, View0& d1, View0& d2, - View0& x1, View0& y1, PView& param, +void set_rotmg_input_ref_vals(const int test_case, View0& d1, View0& d2, View0& x1, View0& y1, PView& param, RView& ref_vals) { constexpr double gamma = 4096; Kokkos::deep_copy(param, 0.0); @@ -211,9 +207,7 @@ void set_rotmg_input_ref_vals(const int test_case, View0& d1, View0& d2, ref_vals(7) = -0.25; ref_vals(8) = 0.0; break; - default: - throw std::runtime_error("rotmg test: test case unrecognized!"); - break; + default: throw std::runtime_error("rotmg test: test case unrecognized!"); break; } } } // namespace Test @@ -222,8 +216,7 @@ template int test_rotmg() { Kokkos::View d1("d1"), d2("d2"), x1("x1"), y1("y1"); Kokkos::View param("param"); - Kokkos::View ref_vals( - "reference values"); + Kokkos::View ref_vals("reference values"); constexpr int num_test_cases = 9; for (int test_case = 0; test_case < num_test_cases; ++test_case) { diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_scal.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_scal.hpp index a88ed646f16d..b0169095fd04 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_scal.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_scal.hpp @@ -33,8 +33,7 @@ void impl_test_scal(int N) { view_stride_adapter x("X", N); view_stride_adapter y("Y", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -68,8 +67,7 @@ void impl_test_scal_mv(int N, int K) { view_stride_adapter x("X", N, K); view_stride_adapter y("Y", N, K); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -89,8 +87,7 @@ void impl_test_scal_mv(int N, int K) { Kokkos::deep_copy(y.h_base, y.d_base); for (int i = 0; i < N; i++) { for (int j = 0; j < K; j++) { - EXPECT_NEAR_KK(static_cast(a * x.h_view(i, j)), y.h_view(i, j), - eps); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i, j)), y.h_view(i, j), eps); } } @@ -100,8 +97,7 @@ void impl_test_scal_mv(int N, int K) { Kokkos::deep_copy(y.h_base, y.d_base); for (int i = 0; i < N; i++) { for (int j = 0; j < K; j++) { - EXPECT_NEAR_KK(static_cast(a * x.h_view(i, j)), y.h_view(i, j), - eps); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i, j)), y.h_view(i, j), eps); } } @@ -113,16 +109,14 @@ void impl_test_scal_mv(int N, int K) { Kokkos::deep_copy(param_j, ScalarA(3 + j)); } - auto h_params = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), params); + auto h_params = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), params); Kokkos::deep_copy(y.d_view, Kokkos::ArithTraits::zero()); KokkosBlas::scal(y.d_view, params, x.d_view); Kokkos::deep_copy(y.h_base, y.d_base); for (int i = 0; i < N; i++) { for (int j = 0; j < K; j++) { - EXPECT_NEAR_KK(static_cast(h_params(j) * x.h_view(i, j)), - y.h_view(i, j), eps); + EXPECT_NEAR_KK(static_cast(h_params(j) * x.h_view(i, j)), y.h_view(i, j), eps); } } @@ -131,8 +125,7 @@ void impl_test_scal_mv(int N, int K) { Kokkos::deep_copy(y.h_base, y.d_base); for (int i = 0; i < N; i++) { for (int j = 0; j < K; j++) { - EXPECT_NEAR_KK(static_cast(h_params(j) * x.h_view(i, j)), - y.h_view(i, j), eps); + EXPECT_NEAR_KK(static_cast(h_params(j) * x.h_view(i, j)), y.h_view(i, j), eps); } } } @@ -141,8 +134,7 @@ void impl_test_scal_mv(int N, int K) { template int test_scal() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_scal(0); @@ -152,8 +144,7 @@ int test_scal() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_scal(0); @@ -162,8 +153,7 @@ int test_scal() { // Test::impl_test_scal(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_scal(0); @@ -172,8 +162,7 @@ int test_scal() { // Test::impl_test_scal(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_scal(1024); Test::impl_test_scal(1024); #endif @@ -184,8 +173,7 @@ int test_scal() { template int test_scal_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_scal_mv(0, 5); @@ -195,8 +183,7 @@ int test_scal_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_scal_mv(0, 5); @@ -205,8 +192,7 @@ int test_scal_mv() { // Test::impl_test_scal_mv(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_scal_mv(0, 5); @@ -215,8 +201,7 @@ int test_scal_mv() { // Test::impl_test_scal_mv(132231,5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_scal_mv(1024, 5); Test::impl_test_scal_mv(1024, 5); #endif @@ -225,8 +210,7 @@ int test_scal_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, scal_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_float"); test_scal(); @@ -240,8 +224,7 @@ TEST_F(TestCategory, scal_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, scal_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_double"); test_scal(); @@ -255,8 +238,7 @@ TEST_F(TestCategory, scal_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, scal_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_complex_double"); test_scal, Kokkos::complex, TestDevice>(); @@ -269,9 +251,8 @@ TEST_F(TestCategory, scal_mv_complex_double) { } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, scal_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_int"); test_scal(); @@ -284,8 +265,7 @@ TEST_F(TestCategory, scal_mv_int) { } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) TEST_F(TestCategory, scal_double_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_double_int"); test_scal(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_serial_setscal.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_serial_setscal.hpp index cfbe4d602d2f..31ad998ac4b4 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_serial_setscal.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_serial_setscal.hpp @@ -34,15 +34,13 @@ enum : int { BlasSet = 0, BlasScale = 1 }; struct KokkosKernelTag {}; struct NaiveTag {}; -template +template struct Functor_TestBlasSerialMatUtil { ScalarType _alpha; ViewType _a; KOKKOS_INLINE_FUNCTION - Functor_TestBlasSerialMatUtil(const ScalarType alpha, const ViewType &a) - : _alpha(alpha), _a(a) {} + Functor_TestBlasSerialMatUtil(const ScalarType alpha, const ViewType &a) : _alpha(alpha), _a(a) {} KOKKOS_INLINE_FUNCTION void operator()(const KokkosKernelTag &, const int i) const { @@ -76,27 +74,20 @@ struct Functor_TestBlasSerialMatUtil { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBlas::Test::SerialMatUtil"); const std::string name_value_type = Test::value_type_name(); - std::string name_work_tag = - (std::is_same::value - ? "::KokkosBlas" - : std::is_same::value ? "::Naive" - : "::UnknownWorkTag"); - std::string name_test_id = - (TestID == BlasSet ? "Set" - : TestID == BlasScale ? "Scale" : "UnknownTest"); - std::string name = - name_region + name_value_type + name_work_tag + name_test_id; + std::string name_work_tag = (std::is_same::value ? "::KokkosBlas" + : std::is_same::value ? "::Naive" + : "::UnknownWorkTag"); + std::string name_test_id = (TestID == BlasSet ? "Set" : TestID == BlasScale ? "Scale" : "UnknownTest"); + std::string name = name_region + name_value_type + name_work_tag + name_test_id; Kokkos::Profiling::pushRegion(name.c_str()); - Kokkos::RangePolicy - policy(0, _a.extent(0)); + Kokkos::RangePolicy policy(0, _a.extent(0)); Kokkos::parallel_for(name.c_str(), policy, *this); Kokkos::Profiling::popRegion(); return 0; } }; -template +template void impl_test_blas_matutil(const int N, const int BlkSize) { /// typedefs typedef typename ViewType::value_type value_type; @@ -107,8 +98,7 @@ void impl_test_blas_matutil(const int N, const int BlkSize) { ViewType a("a", N, BlkSize, BlkSize); ViewType b("b", N, BlkSize, BlkSize); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a, random, value_type(1.0)); Kokkos::fence(); @@ -116,12 +106,8 @@ void impl_test_blas_matutil(const int N, const int BlkSize) { Kokkos::deep_copy(b, a); /// test body - Functor_TestBlasSerialMatUtil(alpha, a) - .run(); - Functor_TestBlasSerialMatUtil(alpha, b) - .run(); + Functor_TestBlasSerialMatUtil(alpha, a).run(); + Functor_TestBlasSerialMatUtil(alpha, b).run(); Kokkos::fence(); @@ -133,44 +119,31 @@ void impl_test_blas_matutil(const int N, const int BlkSize) { Kokkos::deep_copy(b_host, b); /// check a = b - typename ats::mag_type eps = - 100 * std::numeric_limits::epsilon(); + typename ats::mag_type eps = 100 * std::numeric_limits::epsilon(); for (int k = 0; k < N; ++k) for (int i = 0; i < BlkSize; ++i) - for (int j = 0; j < BlkSize; ++j) - EXPECT_NEAR_KK(b_host(k, i, j), a_host(k, i, j), eps); + for (int j = 0; j < BlkSize; ++j) EXPECT_NEAR_KK(b_host(k, i, j), a_host(k, i, j), eps); } } // namespace Test -template +template int test_blas_matutil() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - ViewType; - Test::impl_test_blas_matutil(0, - 10); - Test::impl_test_blas_matutil(10, - 15); - Test::impl_test_blas_matutil(1024, - 9); - Test::impl_test_blas_matutil( - 132231, 3); + typedef Kokkos::View ViewType; + Test::impl_test_blas_matutil(0, 10); + Test::impl_test_blas_matutil(10, 15); + Test::impl_test_blas_matutil(1024, 9); + Test::impl_test_blas_matutil(132231, 3); } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - Test::impl_test_blas_matutil(0, - 10); - Test::impl_test_blas_matutil(10, - 15); - Test::impl_test_blas_matutil(1024, - 9); - Test::impl_test_blas_matutil( - 132231, 3); + typedef Kokkos::View ViewType; + Test::impl_test_blas_matutil(0, 10); + Test::impl_test_blas_matutil(10, 15); + Test::impl_test_blas_matutil(1024, 9); + Test::impl_test_blas_matutil(132231, 3); } #endif @@ -201,19 +174,15 @@ TEST_F(TestCategory, blas_scalar_serial_scale_double_double) { #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) TEST_F(TestCategory, blas_scalar_serial_set_dcomplex_dcomplex) { - test_blas_matutil, - Kokkos::complex, ::Test::BlasSet>(); + test_blas_matutil, Kokkos::complex, ::Test::BlasSet>(); } TEST_F(TestCategory, blas_scalar_serial_scale_dcomplex_dcomplex) { - test_blas_matutil, - Kokkos::complex, ::Test::BlasScale>(); + test_blas_matutil, Kokkos::complex, ::Test::BlasScale>(); } TEST_F(TestCategory, blas_scalar_serial_set_dcomplex_double) { - test_blas_matutil, double, - ::Test::BlasSet>(); + test_blas_matutil, double, ::Test::BlasSet>(); } TEST_F(TestCategory, blas_scalar_serial_scale_dcomplex_double) { - test_blas_matutil, double, - ::Test::BlasScale>(); + test_blas_matutil, double, ::Test::BlasScale>(); } #endif diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_sum.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_sum.hpp index 34d52a7e4af1..6d7ae3818e7d 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_sum.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_sum.hpp @@ -26,8 +26,7 @@ void impl_test_sum(int N) { view_stride_adapter a("A", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); @@ -53,8 +52,7 @@ void impl_test_sum_mv(int N, int K) { view_stride_adapter a("A", N, K); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); @@ -76,8 +74,7 @@ void impl_test_sum_mv(int N, int K) { Kokkos::fence(); for (int k = 0; k < K; k++) { ScalarA nonconst_result = r(k); - EXPECT_NEAR_KK(nonconst_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(nonconst_result, expected_result[k], eps * expected_result[k]); } KokkosBlas::sum(r, a.d_view_const); @@ -94,8 +91,7 @@ void impl_test_sum_mv(int N, int K) { template int test_sum() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_sum(0); Test::impl_test_sum(13); @@ -104,8 +100,7 @@ int test_sum() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_sum(0); Test::impl_test_sum(13); @@ -113,8 +108,7 @@ int test_sum() { // Test::impl_test_sum(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_sum(0); Test::impl_test_sum(13); @@ -128,8 +122,7 @@ int test_sum() { template int test_sum_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_sum_mv(0, 5); Test::impl_test_sum_mv(13, 5); @@ -139,8 +132,7 @@ int test_sum_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_sum_mv(0, 5); Test::impl_test_sum_mv(13, 5); @@ -149,8 +141,7 @@ int test_sum_mv() { // Test::impl_test_sum_mv(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_sum_mv(0, 5); Test::impl_test_sum_mv(13, 5); @@ -163,8 +154,7 @@ int test_sum_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, sum_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_float"); test_sum(); @@ -178,8 +168,7 @@ TEST_F(TestCategory, sum_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, sum_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_double"); test_sum(); @@ -193,8 +182,7 @@ TEST_F(TestCategory, sum_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, sum_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_complex_double"); test_sum, TestDevice>(); @@ -207,9 +195,8 @@ TEST_F(TestCategory, sum_mv_complex_double) { } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, sum_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_int"); test_sum(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_swap.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_swap.hpp index 624552f1dca6..15a04c652c0c 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_swap.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_swap.hpp @@ -55,8 +55,7 @@ int test_swap() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, swap_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::swap_float"); test_swap(); @@ -65,8 +64,7 @@ TEST_F(TestCategory, swap_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, swap_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::swap_double"); test_swap(); @@ -75,8 +73,7 @@ TEST_F(TestCategory, swap_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, swap_complex_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::swap_complex_float"); test_swap, TestDevice>(); @@ -85,8 +82,7 @@ TEST_F(TestCategory, swap_complex_float) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, swap_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::swap_complex_double"); test_swap, TestDevice>(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_abs.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_abs.hpp index eca7657b5508..0f78731ab3db 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_abs.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_abs.hpp @@ -47,8 +47,7 @@ void impl_test_team_abs(int N) { view_stride_adapter x("X", N); view_stride_adapter y("Y", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); Kokkos::fill_random(x.d_view, rand_pool, ScalarA(1)); Kokkos::fill_random(y.d_view, rand_pool, ScalarB(1)); @@ -56,52 +55,36 @@ void impl_test_team_abs(int N) { Kokkos::deep_copy(x.h_base, x.d_base); ScalarA expected_result = 0; - for (int i = 0; i < N; i++) - expected_result += AT::abs(x.h_view(i)) * AT::abs(x.h_view(i)); + for (int i = 0; i < N; i++) expected_result += AT::abs(x.h_view(i)) * AT::abs(x.h_view(i)); // KokkosBlas::abs(y,x); Kokkos::parallel_for( - "KokkosBlas::Test::TeamAbs", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamAbs", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); KokkosBlas::Experimental::abs( teamMember, - Kokkos::subview( - y.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), - Kokkos::subview( - x.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(y.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(x.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); ScalarB nonconst_nonconst_result = KokkosBlas::dot(y.d_view, y.d_view); - EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, - eps * expected_result); + EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, eps * expected_result); // Zero out y and run again with const input Kokkos::deep_copy(y.d_view, Kokkos::ArithTraits::zero()); // KokkosBlas::abs(y,c_x); Kokkos::parallel_for( - "KokkosBlas::Test::TeamAbs", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamAbs", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); KokkosBlas::Experimental::abs( teamMember, - Kokkos::subview( - y.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), - Kokkos::subview( - x.d_view_const, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(y.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(x.d_view_const, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); ScalarB const_nonconst_result = KokkosBlas::dot(y.d_view, y.d_view); @@ -134,8 +117,7 @@ void impl_test_team_abs_mv(int N, int K) { ScalarA *expected_result = new ScalarA[K]; for (int j = 0; j < K; j++) { expected_result[j] = ScalarA(); - for (int i = 0; i < N; i++) - expected_result[j] += AT::abs(x.h_view(i, j)) * AT::abs(x.h_view(i, j)); + for (int i = 0; i < N; i++) expected_result[j] += AT::abs(x.h_view(i, j)) * AT::abs(x.h_view(i, j)); } // double eps = std::is_same::value?2*1e-5:1e-7; @@ -147,21 +129,17 @@ void impl_test_team_abs_mv(int N, int K) { // KokkosBlas::abs(y,x); Kokkos::parallel_for( - "KokkosBlas::Test::TeamAbs", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamAbs", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::abs( - teamMember, Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), - Kokkos::subview(x.d_view, Kokkos::ALL(), teamId)); + KokkosBlas::Experimental::abs(teamMember, Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), + Kokkos::subview(x.d_view, Kokkos::ALL(), teamId)); }); KokkosBlas::dot(r, y.d_view, y.d_view); for (int k = 0; k < K; k++) { - ScalarA nonconst_result = r(k); - typename AT::mag_type divisor = - AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); - typename AT::mag_type diff = - AT::abs(nonconst_result - expected_result[k]) / divisor; + ScalarA nonconst_result = r(k); + typename AT::mag_type divisor = AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); + typename AT::mag_type diff = AT::abs(nonconst_result - expected_result[k]) / divisor; EXPECT_NEAR_KK(diff, zero, eps); // EXPECT_NEAR_KK( nonconst_result, expected_result[k], // eps*expected_result[k]); @@ -172,21 +150,17 @@ void impl_test_team_abs_mv(int N, int K) { // KokkosBlas::abs(y,c_x); Kokkos::parallel_for( - "KokkosBlas::Test::TeamAbs", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamAbs", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::abs( - teamMember, Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), - Kokkos::subview(x.d_view_const, Kokkos::ALL(), teamId)); + KokkosBlas::Experimental::abs(teamMember, Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), + Kokkos::subview(x.d_view_const, Kokkos::ALL(), teamId)); }); KokkosBlas::dot(r, y.d_view, y.d_view); for (int k = 0; k < K; k++) { - ScalarA const_result = r(k); - typename AT::mag_type divisor = - AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); - typename AT::mag_type diff = - AT::abs(const_result - expected_result[k]) / divisor; + ScalarA const_result = r(k); + typename AT::mag_type divisor = AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); + typename AT::mag_type diff = AT::abs(const_result - expected_result[k]) / divisor; EXPECT_NEAR_KK(diff, zero, eps); // EXPECT_NEAR_KK( const_result, expected_result[k], // eps*expected_result[k]); @@ -199,8 +173,7 @@ void impl_test_team_abs_mv(int N, int K) { template int test_team_abs() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_team_abs(0); @@ -210,8 +183,7 @@ int test_team_abs() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_team_abs(0); @@ -220,8 +192,7 @@ int test_team_abs() { // Test::impl_test_team_abs(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_team_abs(0); @@ -230,8 +201,7 @@ int test_team_abs() { // Test::impl_test_team_abs(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_team_abs(124); Test::impl_test_team_abs(124); #endif @@ -242,8 +212,7 @@ int test_team_abs() { template int test_team_abs_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_team_abs_mv(0, 5); @@ -254,8 +223,7 @@ int test_team_abs_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_team_abs_mv(0, 5); @@ -265,8 +233,7 @@ int test_team_abs_mv() { // Device>(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_team_abs_mv(0, 5); @@ -276,8 +243,7 @@ int test_team_abs_mv() { // Device>(132231,5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_team_abs_mv(124, 5); Test::impl_test_team_abs_mv(124, 5); #endif @@ -286,46 +252,31 @@ int test_team_abs_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_abs_float) { - test_team_abs(); -} -TEST_F(TestCategory, team_abs_mv_float) { - test_team_abs_mv(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_abs_float) { test_team_abs(); } +TEST_F(TestCategory, team_abs_mv_float) { test_team_abs_mv(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_abs_double) { - test_team_abs(); -} -TEST_F(TestCategory, team_abs_mv_double) { - test_team_abs_mv(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_abs_double) { test_team_abs(); } +TEST_F(TestCategory, team_abs_mv_double) { test_team_abs_mv(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, team_abs_complex_double) { test_team_abs, Kokkos::complex, TestDevice>(); } TEST_F(TestCategory, team_abs_mv_complex_double) { - test_team_abs_mv, Kokkos::complex, - TestDevice>(); + test_team_abs_mv, Kokkos::complex, TestDevice>(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, team_abs_int) { test_team_abs(); } -TEST_F(TestCategory, team_abs_mv_int) { - test_team_abs_mv(); -} +TEST_F(TestCategory, team_abs_mv_int) { test_team_abs_mv(); } #endif /*#if !defined(KOKKOSKERNELS_ETI_ONLY) && diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_axpby.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_axpby.hpp index 5875f2bc1f8f..cadb2d0d0903 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_axpby.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_axpby.hpp @@ -60,57 +60,40 @@ void impl_test_team_axpby(int N) { ScalarA expected_result = 0; for (int i = 0; i < N; i++) - expected_result += ScalarB(a * x.h_view(i) + b * y.h_view(i)) * - ScalarB(a * x.h_view(i) + b * y.h_view(i)); + expected_result += ScalarB(a * x.h_view(i) + b * y.h_view(i)) * ScalarB(a * x.h_view(i) + b * y.h_view(i)); // KokkosBlas::axpby(a,x,b,y); Kokkos::parallel_for( - "KokkosBlas::Test::TeamAxpby", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamAxpby", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); KokkosBlas::Experimental::axpby( teamMember, a, - Kokkos::subview( - x.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(x.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), b, - Kokkos::subview( - y.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(y.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); ScalarB nonconst_nonconst_result = KokkosBlas::dot(y.d_view, y.d_view); - EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, - eps * expected_result); + EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, eps * expected_result); Kokkos::deep_copy(y.d_base, org_y.h_base); // KokkosBlas::axpby(a,c_x,b,y); Kokkos::parallel_for( - "KokkosBlas::Test::TeamAxpby", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamAxpby", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); KokkosBlas::Experimental::axpby( teamMember, a, - Kokkos::subview( - x.d_view_const, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(x.d_view_const, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), b, - Kokkos::subview( - y.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(y.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); - ScalarB const_nonconst_result = - KokkosBlas::dot(y.d_view_const, y.d_view_const); + ScalarB const_nonconst_result = KokkosBlas::dot(y.d_view_const, y.d_view_const); EXPECT_NEAR_KK(const_nonconst_result, expected_result, eps * expected_result); } @@ -146,8 +129,8 @@ void impl_test_team_axpby_mv(int N, int K) { for (int j = 0; j < K; j++) { expected_result[j] = ScalarA(); for (int i = 0; i < N; i++) - expected_result[j] += ScalarB(a * x.h_view(i, j) + b * y.h_view(i, j)) * - ScalarB(a * x.h_view(i, j) + b * y.h_view(i, j)); + expected_result[j] += + ScalarB(a * x.h_view(i, j) + b * y.h_view(i, j)) * ScalarB(a * x.h_view(i, j) + b * y.h_view(i, j)); } double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; @@ -158,40 +141,32 @@ void impl_test_team_axpby_mv(int N, int K) { // KokkosBlas::axpby(a,x,b,y); Kokkos::parallel_for( - "KokkosBlas::Test::TeamAxpby", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamAxpby", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::axpby( - teamMember, a, Kokkos::subview(x.d_view, Kokkos::ALL(), teamId), b, - Kokkos::subview(y.d_view, Kokkos::ALL(), teamId)); + KokkosBlas::Experimental::axpby(teamMember, a, Kokkos::subview(x.d_view, Kokkos::ALL(), teamId), b, + Kokkos::subview(y.d_view, Kokkos::ALL(), teamId)); }); KokkosBlas::dot(r, y.d_view, y.d_view); for (int k = 0; k < K; k++) { ScalarA nonconst_nonconst_result = r(k); - EXPECT_NEAR_KK(AT::abs(nonconst_nonconst_result), - AT::abs(expected_result[k]), - AT::abs(expected_result[k] * eps)); + EXPECT_NEAR_KK(AT::abs(nonconst_nonconst_result), AT::abs(expected_result[k]), AT::abs(expected_result[k] * eps)); } Kokkos::deep_copy(y.d_base, org_y.h_base); // KokkosBlas::axpby(a,c_x,b,y); Kokkos::parallel_for( - "KokkosBlas::Test::TeamAxpby", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamAxpby", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::axpby( - teamMember, a, - Kokkos::subview(x.d_view_const, Kokkos::ALL(), teamId), b, - Kokkos::subview(y.d_view, Kokkos::ALL(), teamId)); + KokkosBlas::Experimental::axpby(teamMember, a, Kokkos::subview(x.d_view_const, Kokkos::ALL(), teamId), b, + Kokkos::subview(y.d_view, Kokkos::ALL(), teamId)); }); KokkosBlas::dot(r, y.d_view, y.d_view); for (int k = 0; k < K; k++) { ScalarA const_non_const_result = r(k); - EXPECT_NEAR_KK(AT::abs(const_non_const_result), AT::abs(expected_result[k]), - AT::abs(eps * expected_result[k])); + EXPECT_NEAR_KK(AT::abs(const_non_const_result), AT::abs(expected_result[k]), AT::abs(eps * expected_result[k])); } delete[] expected_result; @@ -201,8 +176,7 @@ void impl_test_team_axpby_mv(int N, int K) { template int test_team_axpby() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_team_axpby(0); @@ -212,8 +186,7 @@ int test_team_axpby() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_team_axpby(0); @@ -222,8 +195,7 @@ int test_team_axpby() { // Test::impl_test_team_axpby(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_team_axpby(0); @@ -232,8 +204,7 @@ int test_team_axpby() { // Test::impl_test_team_axpby(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_team_axpby(124); Test::impl_test_team_axpby(124); #endif @@ -244,8 +215,7 @@ int test_team_axpby() { template int test_team_axpby_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_team_axpby_mv(0, 5); @@ -256,8 +226,7 @@ int test_team_axpby_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_team_axpby_mv(0, 5); @@ -267,8 +236,7 @@ int test_team_axpby_mv() { // Device>(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_team_axpby_mv(0, 5); @@ -278,8 +246,7 @@ int test_team_axpby_mv() { // Device>(132231,5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_team_axpby_mv(124, 5); Test::impl_test_team_axpby_mv(124, 5); #endif @@ -288,59 +255,36 @@ int test_team_axpby_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_axpby_float) { - test_team_axpby(); -} -TEST_F(TestCategory, team_axpby_mv_float) { - test_team_axpby_mv(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_axpby_float) { test_team_axpby(); } +TEST_F(TestCategory, team_axpby_mv_float) { test_team_axpby_mv(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_axpby_double) { - test_team_axpby(); -} -TEST_F(TestCategory, team_axpby_mv_double) { - test_team_axpby_mv(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_axpby_double) { test_team_axpby(); } +TEST_F(TestCategory, team_axpby_mv_double) { test_team_axpby_mv(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, team_axpby_complex_double) { - test_team_axpby, Kokkos::complex, - TestDevice>(); + test_team_axpby, Kokkos::complex, TestDevice>(); } TEST_F(TestCategory, team_axpby_mv_complex_double) { - test_team_axpby_mv, Kokkos::complex, - TestDevice>(); + test_team_axpby_mv, Kokkos::complex, TestDevice>(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_axpby_int) { - test_team_axpby(); -} -TEST_F(TestCategory, team_axpby_mv_int) { - test_team_axpby_mv(); -} +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_axpby_int) { test_team_axpby(); } +TEST_F(TestCategory, team_axpby_mv_int) { test_team_axpby_mv(); } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) -TEST_F(TestCategory, team_axpby_double_int) { - test_team_axpby(); -} -TEST_F(TestCategory, team_axpby_double_mv_int) { - test_team_axpby_mv(); -} +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +TEST_F(TestCategory, team_axpby_double_int) { test_team_axpby(); } +TEST_F(TestCategory, team_axpby_double_mv_int) { test_team_axpby_mv(); } #endif #endif // check for lambda availability in CUDA backend diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_axpy.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_axpy.hpp index a5ac6a9c6693..de2bf7885593 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_axpy.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_axpy.hpp @@ -48,8 +48,7 @@ void impl_test_team_axpy(int N) { ScalarA a = 3; double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); Kokkos::fill_random(x.d_view, rand_pool, ScalarA(10)); Kokkos::fill_random(y.d_view, rand_pool, ScalarB(10)); @@ -60,55 +59,38 @@ void impl_test_team_axpy(int N) { ScalarA expected_result = 0; for (int i = 0; i < N; i++) - expected_result += ScalarB(a * x.h_view(i) + y.h_view(i)) * - ScalarB(a * x.h_view(i) + y.h_view(i)); + expected_result += ScalarB(a * x.h_view(i) + y.h_view(i)) * ScalarB(a * x.h_view(i) + y.h_view(i)); // KokkosBlas::axpy(a,x,y); Kokkos::parallel_for( - "KokkosBlas::Test::TeamAxpy", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamAxpy", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); KokkosBlas::Experimental::axpy( teamMember, a, - Kokkos::subview( - x.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), - Kokkos::subview( - y.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(x.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(y.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); ScalarB nonconst_nonconst_result = KokkosBlas::dot(y.d_view, y.d_view); - EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, - eps * expected_result); + EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, eps * expected_result); Kokkos::deep_copy(y.d_base, org_y.h_base); // KokkosBlas::axpy(a,c_x,y); Kokkos::parallel_for( - "KokkosBlas::Test::TeamAxpy", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamAxpy", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); KokkosBlas::Experimental::axpy( teamMember, a, - Kokkos::subview( - x.d_view_const, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), - Kokkos::subview( - y.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(x.d_view_const, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(y.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); - ScalarB const_nonconst_result = - KokkosBlas::dot(y.d_view_const, y.d_view_const); + ScalarB const_nonconst_result = KokkosBlas::dot(y.d_view_const, y.d_view_const); EXPECT_NEAR_KK(const_nonconst_result, expected_result, eps * expected_result); } @@ -143,8 +125,7 @@ void impl_test_team_axpy_mv(int N, int K) { for (int j = 0; j < K; j++) { expected_result[j] = ScalarA(); for (int i = 0; i < N; i++) - expected_result[j] += ScalarB(a * x.h_view(i, j) + y.h_view(i, j)) * - ScalarB(a * x.h_view(i, j) + y.h_view(i, j)); + expected_result[j] += ScalarB(a * x.h_view(i, j) + y.h_view(i, j)) * ScalarB(a * x.h_view(i, j) + y.h_view(i, j)); } double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; @@ -153,39 +134,32 @@ void impl_test_team_axpy_mv(int N, int K) { // KokkosBlas::axpy(a,x,y); Kokkos::parallel_for( - "KokkosBlas::Test::TeamAxpy", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamAxpy", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::axpy( - teamMember, a, Kokkos::subview(x.d_view, Kokkos::ALL(), teamId), - Kokkos::subview(y.d_view, Kokkos::ALL(), teamId)); + KokkosBlas::Experimental::axpy(teamMember, a, Kokkos::subview(x.d_view, Kokkos::ALL(), teamId), + Kokkos::subview(y.d_view, Kokkos::ALL(), teamId)); }); KokkosBlas::dot(r, y.d_view, y.d_view); for (int k = 0; k < K; k++) { ScalarA nonconst_nonconst_result = r(k); - EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result[k], eps * expected_result[k]); } Kokkos::deep_copy(y.d_base, org_y.h_base); // KokkosBlas::axpy(a,c_x,y); Kokkos::parallel_for( - "KokkosBlas::Test::TeamAxpy", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamAxpy", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::axpy( - teamMember, a, - Kokkos::subview(x.d_view_const, Kokkos::ALL(), teamId), - Kokkos::subview(y.d_view, Kokkos::ALL(), teamId)); + KokkosBlas::Experimental::axpy(teamMember, a, Kokkos::subview(x.d_view_const, Kokkos::ALL(), teamId), + Kokkos::subview(y.d_view, Kokkos::ALL(), teamId)); }); KokkosBlas::dot(r, y.d_view, y.d_view); for (int k = 0; k < K; k++) { ScalarA const_non_const_result = r(k); - EXPECT_NEAR_KK(const_non_const_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(const_non_const_result, expected_result[k], eps * expected_result[k]); } delete[] expected_result; @@ -195,8 +169,7 @@ void impl_test_team_axpy_mv(int N, int K) { template int test_team_axpy() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_team_axpy(0); @@ -206,8 +179,7 @@ int test_team_axpy() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_team_axpy(0); @@ -216,8 +188,7 @@ int test_team_axpy() { // Test::impl_test_team_axpy(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_team_axpy(0); @@ -226,8 +197,7 @@ int test_team_axpy() { // Test::impl_test_team_axpy(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_team_axpy(124); Test::impl_test_team_axpy(124); #endif @@ -238,8 +208,7 @@ int test_team_axpy() { template int test_team_axpy_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_team_axpy_mv(0, 5); @@ -250,8 +219,7 @@ int test_team_axpy_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_team_axpy_mv(0, 5); @@ -261,8 +229,7 @@ int test_team_axpy_mv() { // Device>(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_team_axpy_mv(0, 5); @@ -272,8 +239,7 @@ int test_team_axpy_mv() { // Device>(132231,5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_team_axpy_mv(124, 5); Test::impl_test_team_axpy_mv(124, 5); #endif @@ -282,57 +248,36 @@ int test_team_axpy_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_axpy_float) { - test_team_axpy(); -} -TEST_F(TestCategory, team_axpy_mv_float) { - test_team_axpy_mv(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_axpy_float) { test_team_axpy(); } +TEST_F(TestCategory, team_axpy_mv_float) { test_team_axpy_mv(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_axpy_double) { - test_team_axpy(); -} -TEST_F(TestCategory, team_axpy_mv_double) { - test_team_axpy_mv(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_axpy_double) { test_team_axpy(); } +TEST_F(TestCategory, team_axpy_mv_double) { test_team_axpy_mv(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, team_axpy_complex_double) { - test_team_axpy, Kokkos::complex, - TestDevice>(); + test_team_axpy, Kokkos::complex, TestDevice>(); } TEST_F(TestCategory, team_axpy_mv_complex_double) { - test_team_axpy_mv, Kokkos::complex, - TestDevice>(); + test_team_axpy_mv, Kokkos::complex, TestDevice>(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, team_axpy_int) { test_team_axpy(); } -TEST_F(TestCategory, team_axpy_mv_int) { - test_team_axpy_mv(); -} +TEST_F(TestCategory, team_axpy_mv_int) { test_team_axpy_mv(); } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) -TEST_F(TestCategory, team_axpy_double_int) { - test_team_axpy(); -} -TEST_F(TestCategory, team_axpy_double_mv_int) { - test_team_axpy_mv(); -} +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +TEST_F(TestCategory, team_axpy_double_int) { test_team_axpy(); } +TEST_F(TestCategory, team_axpy_double_mv_int) { test_team_axpy_mv(); } #endif #endif // Check for lambda availability in CUDA backend diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_dot.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_dot.hpp index 26baf261fed6..9445d5784def 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_dot.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_dot.hpp @@ -61,47 +61,32 @@ void impl_test_team_dot(int N) { ScalarA nonconst_nonconst_result = 0; Kokkos::parallel_for( - "KokkosBlas::Test::TeamDot", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamDot", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); d_r(teamId) = KokkosBlas::Experimental::dot( teamMember, - Kokkos::subview( - a.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), - Kokkos::subview( - b.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(a.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(b.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); Kokkos::deep_copy(r, d_r); for (int k = 0; k < M; k++) nonconst_nonconst_result += r(k); double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; - EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, - eps * expected_result); + EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, eps * expected_result); ScalarA const_const_result = 0; Kokkos::parallel_for( - "KokkosBlas::Test::TeamDot", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamDot", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); d_r(teamId) = KokkosBlas::Experimental::dot( teamMember, - Kokkos::subview( - a.d_view_const, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), - Kokkos::subview( - b.d_view_const, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(a.d_view_const, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(b.d_view_const, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); Kokkos::deep_copy(r, d_r); for (int k = 0; k < M; k++) const_const_result += r(k); @@ -112,21 +97,14 @@ void impl_test_team_dot(int N) { ScalarA nonconst_const_result = 0; Kokkos::parallel_for( - "KokkosBlas::Test::TeamDot", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamDot", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); d_r(teamId) = KokkosBlas::Experimental::dot( teamMember, - Kokkos::subview( - a.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), - Kokkos::subview( - b.d_view_const, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(a.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(b.d_view_const, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); Kokkos::deep_copy(r, d_r); for (int k = 0; k < M; k++) nonconst_const_result += r(k); @@ -137,21 +115,14 @@ void impl_test_team_dot(int N) { ScalarA const_nonconst_result = 0; Kokkos::parallel_for( - "KokkosBlas::Test::TeamDot", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamDot", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); d_r(teamId) = KokkosBlas::Experimental::dot( teamMember, - Kokkos::subview( - a.d_view_const, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), - Kokkos::subview( - b.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(a.d_view_const, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(b.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); Kokkos::deep_copy(r, d_r); for (int k = 0; k < M; k++) const_nonconst_result += r(k); @@ -185,8 +156,7 @@ void impl_test_team_dot_mv(int N, int K) { ScalarA *expected_result = new ScalarA[K]; for (int j = 0; j < K; j++) { expected_result[j] = ScalarA(); - for (int i = 0; i < N; i++) - expected_result[j] += a.h_view(i, j) * b.h_view(i, j); + for (int i = 0; i < N; i++) expected_result[j] += a.h_view(i, j) * b.h_view(i, j); } double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; @@ -196,66 +166,54 @@ void impl_test_team_dot_mv(int N, int K) { // KokkosBlas::dot(r,a,b); Kokkos::parallel_for( - "KokkosBlas::Test::TeamDot", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamDot", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - d_r(teamId) = KokkosBlas::Experimental::dot( - teamMember, Kokkos::subview(a.d_view, Kokkos::ALL(), teamId), - Kokkos::subview(b.d_view, Kokkos::ALL(), teamId)); + d_r(teamId) = KokkosBlas::Experimental::dot(teamMember, Kokkos::subview(a.d_view, Kokkos::ALL(), teamId), + Kokkos::subview(b.d_view, Kokkos::ALL(), teamId)); }); Kokkos::deep_copy(r, d_r); for (int k = 0; k < K; k++) { ScalarA nonconst_nonconst_result = r(k); - EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result[k], eps * expected_result[k]); } // KokkosBlas::dot(r,c_a,c_b); Kokkos::parallel_for( - "KokkosBlas::Test::TeamDot", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamDot", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - d_r(teamId) = KokkosBlas::Experimental::dot( - teamMember, Kokkos::subview(a.d_view_const, Kokkos::ALL(), teamId), - Kokkos::subview(b.d_view_const, Kokkos::ALL(), teamId)); + d_r(teamId) = KokkosBlas::Experimental::dot(teamMember, Kokkos::subview(a.d_view_const, Kokkos::ALL(), teamId), + Kokkos::subview(b.d_view_const, Kokkos::ALL(), teamId)); }); Kokkos::deep_copy(r, d_r); for (int k = 0; k < K; k++) { ScalarA const_const_result = r(k); - EXPECT_NEAR_KK(const_const_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(const_const_result, expected_result[k], eps * expected_result[k]); } // KokkosBlas::dot(r,a,c_b); Kokkos::parallel_for( - "KokkosBlas::Test::TeamDot", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamDot", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - d_r(teamId) = KokkosBlas::Experimental::dot( - teamMember, Kokkos::subview(a.d_view, Kokkos::ALL(), teamId), - Kokkos::subview(b.d_view_const, Kokkos::ALL(), teamId)); + d_r(teamId) = KokkosBlas::Experimental::dot(teamMember, Kokkos::subview(a.d_view, Kokkos::ALL(), teamId), + Kokkos::subview(b.d_view_const, Kokkos::ALL(), teamId)); }); Kokkos::deep_copy(r, d_r); for (int k = 0; k < K; k++) { ScalarA non_const_const_result = r(k); - EXPECT_NEAR_KK(non_const_const_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(non_const_const_result, expected_result[k], eps * expected_result[k]); } // KokkosBlas::dot(r,c_a,b); Kokkos::parallel_for( - "KokkosBlas::Test::TeamDot", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamDot", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - d_r(teamId) = KokkosBlas::Experimental::dot( - teamMember, Kokkos::subview(a.d_view_const, Kokkos::ALL(), teamId), - Kokkos::subview(b.d_view, Kokkos::ALL(), teamId)); + d_r(teamId) = KokkosBlas::Experimental::dot(teamMember, Kokkos::subview(a.d_view_const, Kokkos::ALL(), teamId), + Kokkos::subview(b.d_view, Kokkos::ALL(), teamId)); }); Kokkos::deep_copy(r, d_r); for (int k = 0; k < K; k++) { ScalarA const_non_const_result = r(k); - EXPECT_NEAR_KK(const_non_const_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(const_non_const_result, expected_result[k], eps * expected_result[k]); } delete[] expected_result; @@ -265,8 +223,7 @@ void impl_test_team_dot_mv(int N, int K) { template int test_team_dot() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_team_dot(0); @@ -276,8 +233,7 @@ int test_team_dot() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_team_dot(0); @@ -286,8 +242,7 @@ int test_team_dot() { // Test::impl_test_team_dot(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_team_dot(0); @@ -296,8 +251,7 @@ int test_team_dot() { // Test::impl_test_team_dot(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_team_dot(124); Test::impl_test_team_dot(124); #endif @@ -308,8 +262,7 @@ int test_team_dot() { template int test_team_dot_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_team_dot_mv(0, 5); @@ -320,8 +273,7 @@ int test_team_dot_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_team_dot_mv(0, 5); @@ -331,8 +283,7 @@ int test_team_dot_mv() { // Device>(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_team_dot_mv(0, 5); @@ -342,8 +293,7 @@ int test_team_dot_mv() { // Device>(132231,5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_team_dot_mv(124, 5); Test::impl_test_team_dot_mv(124, 5); #endif @@ -352,46 +302,31 @@ int test_team_dot_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_dot_float) { - test_team_dot(); -} -TEST_F(TestCategory, team_dot_mv_float) { - test_team_dot_mv(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_dot_float) { test_team_dot(); } +TEST_F(TestCategory, team_dot_mv_float) { test_team_dot_mv(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_dot_double) { - test_team_dot(); -} -TEST_F(TestCategory, team_dot_mv_double) { - test_team_dot_mv(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_dot_double) { test_team_dot(); } +TEST_F(TestCategory, team_dot_mv_double) { test_team_dot_mv(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, team_dot_complex_double) { test_team_dot, Kokkos::complex, TestDevice>(); } TEST_F(TestCategory, team_dot_mv_complex_double) { - test_team_dot_mv, Kokkos::complex, - TestDevice>(); + test_team_dot_mv, Kokkos::complex, TestDevice>(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, team_dot_int) { test_team_dot(); } -TEST_F(TestCategory, team_dot_mv_int) { - test_team_dot_mv(); -} +TEST_F(TestCategory, team_dot_mv_int) { test_team_dot_mv(); } #endif /*#if !defined(KOKKOSKERNELS_ETI_ONLY) && diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_mult.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_mult.hpp index 488e9ccf514b..63fdbf99c15b 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_mult.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_mult.hpp @@ -65,63 +65,41 @@ void impl_test_team_mult(int N) { ScalarA expected_result = 0; for (int i = 0; i < N; i++) - expected_result += - ScalarC(b * z.h_view(i) + a * x.h_view(i) * y.h_view(i)) * - ScalarC(b * z.h_view(i) + a * x.h_view(i) * y.h_view(i)); + expected_result += ScalarC(b * z.h_view(i) + a * x.h_view(i) * y.h_view(i)) * + ScalarC(b * z.h_view(i) + a * x.h_view(i) * y.h_view(i)); // KokkosBlas::mult(b,z,a,x,y); Kokkos::parallel_for( - "KokkosBlas::Test::TeamMult", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamMult", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); KokkosBlas::Experimental::mult( teamMember, b, - Kokkos::subview( - z.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(z.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), a, - Kokkos::subview( - x.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), - Kokkos::subview( - y.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(x.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(y.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); ScalarC nonconst_nonconst_result = KokkosBlas::dot(z.d_view, z.d_view); - EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, - eps * expected_result); + EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, eps * expected_result); // Reset z on device to orig and run again with const-valued y Kokkos::deep_copy(z.d_base, org_z.h_base); // KokkosBlas::mult(b,z,a,x,c_y); Kokkos::parallel_for( - "KokkosBlas::Test::TeamMult", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamMult", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); KokkosBlas::Experimental::mult( teamMember, b, - Kokkos::subview( - z.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(z.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), a, - Kokkos::subview( - x.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), - Kokkos::subview( - y.d_view_const, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(x.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(y.d_view_const, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); ScalarC const_nonconst_result = KokkosBlas::dot(z.d_view, z.d_view); EXPECT_NEAR_KK(const_nonconst_result, expected_result, eps * expected_result); @@ -130,27 +108,17 @@ void impl_test_team_mult(int N) { Kokkos::deep_copy(z.d_base, org_z.h_base); // KokkosBlas::mult(b,z,a,c_x,c_y); Kokkos::parallel_for( - "KokkosBlas::Test::TeamMult", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamMult", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); KokkosBlas::Experimental::mult( teamMember, b, - Kokkos::subview( - z.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(z.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), a, - Kokkos::subview( - x.d_view_const, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), - Kokkos::subview( - y.d_view_const, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(x.d_view_const, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(y.d_view_const, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); ScalarC const_const_result = KokkosBlas::dot(z.d_view, z.d_view); EXPECT_NEAR_KK(const_const_result, expected_result, eps * expected_result); @@ -195,19 +163,16 @@ void impl_test_team_mult_mv(int N, int K) { // Since b and a are known and the largest value in z, x and y // is set by the variables max_val, the error upper bound will be // max_error = a * max_val * max_val - typename Kokkos::ArithTraits::mag_type const eps = - Kokkos::ArithTraits::epsilon(); + typename Kokkos::ArithTraits::mag_type const eps = Kokkos::ArithTraits::epsilon(); typename Kokkos::ArithTraits::mag_type const max_error = Kokkos::ArithTraits::abs(a) * max_val * max_val * eps; // KokkosBlas::mult(b,z,a,x,y); Kokkos::parallel_for( - "KokkosBlas::Test::TeamMult", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamMult", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::mult( - teamMember, b, Kokkos::subview(z.d_view, Kokkos::ALL(), teamId), a, - x.d_view, Kokkos::subview(y.d_view, Kokkos::ALL(), teamId)); + KokkosBlas::Experimental::mult(teamMember, b, Kokkos::subview(z.d_view, Kokkos::ALL(), teamId), a, x.d_view, + Kokkos::subview(y.d_view, Kokkos::ALL(), teamId)); }); Kokkos::deep_copy(z.h_base, z.d_base); @@ -224,12 +189,10 @@ void impl_test_team_mult_mv(int N, int K) { Kokkos::deep_copy(z.d_base, org_z.h_base); // KokkosBlas::mult(b,z,a,x,c_y); Kokkos::parallel_for( - "KokkosBlas::Test::TeamMult", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamMult", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::mult( - teamMember, b, Kokkos::subview(z.d_view, Kokkos::ALL(), teamId), a, - x.d_view, Kokkos::subview(y.d_view_const, Kokkos::ALL(), teamId)); + KokkosBlas::Experimental::mult(teamMember, b, Kokkos::subview(z.d_view, Kokkos::ALL(), teamId), a, x.d_view, + Kokkos::subview(y.d_view_const, Kokkos::ALL(), teamId)); }); Kokkos::deep_copy(z.h_base, z.d_base); @@ -245,58 +208,43 @@ void impl_test_team_mult_mv(int N, int K) { template int test_team_mult() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; typedef Kokkos::View view_type_c_ll; - Test::impl_test_team_mult(0); - Test::impl_test_team_mult(13); - Test::impl_test_team_mult(124); + Test::impl_test_team_mult(0); + Test::impl_test_team_mult(13); + Test::impl_test_team_mult(124); // Test::impl_test_team_mult(132231); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; typedef Kokkos::View view_type_c_lr; - Test::impl_test_team_mult(0); - Test::impl_test_team_mult(13); - Test::impl_test_team_mult(124); + Test::impl_test_team_mult(0); + Test::impl_test_team_mult(13); + Test::impl_test_team_mult(124); // Test::impl_test_team_mult(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; typedef Kokkos::View view_type_c_ls; - Test::impl_test_team_mult(0); - Test::impl_test_team_mult(13); - Test::impl_test_team_mult(124); + Test::impl_test_team_mult(0); + Test::impl_test_team_mult(13); + Test::impl_test_team_mult(124); // Test::impl_test_team_mult(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) - Test::impl_test_team_mult(124); - Test::impl_test_team_mult(124); +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) + Test::impl_test_team_mult(124); + Test::impl_test_team_mult(124); #endif return 1; @@ -305,117 +253,79 @@ int test_team_mult() { template int test_team_mult_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; typedef Kokkos::View view_type_c_ll; - Test::impl_test_team_mult_mv(0, 5); - Test::impl_test_team_mult_mv(13, 5); - Test::impl_test_team_mult_mv(124, 5); + Test::impl_test_team_mult_mv(0, 5); + Test::impl_test_team_mult_mv(13, 5); + Test::impl_test_team_mult_mv(124, 5); // Test::impl_test_team_mult_mv(132231,5); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; typedef Kokkos::View view_type_c_lr; - Test::impl_test_team_mult_mv(0, 5); - Test::impl_test_team_mult_mv(13, 5); - Test::impl_test_team_mult_mv(124, 5); + Test::impl_test_team_mult_mv(0, 5); + Test::impl_test_team_mult_mv(13, 5); + Test::impl_test_team_mult_mv(124, 5); // Test::impl_test_team_mult_mv(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; typedef Kokkos::View view_type_c_ls; - Test::impl_test_team_mult_mv(0, 5); - Test::impl_test_team_mult_mv(13, 5); - Test::impl_test_team_mult_mv(124, 5); + Test::impl_test_team_mult_mv(0, 5); + Test::impl_test_team_mult_mv(13, 5); + Test::impl_test_team_mult_mv(124, 5); // Test::impl_test_team_mult_mv(132231,5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) - Test::impl_test_team_mult_mv(124, 5); - Test::impl_test_team_mult_mv(124, 5); +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) + Test::impl_test_team_mult_mv(124, 5); + Test::impl_test_team_mult_mv(124, 5); #endif return 1; } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_mult_float) { - test_team_mult(); -} -TEST_F(TestCategory, team_mult_mv_float) { - test_team_mult_mv(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_mult_float) { test_team_mult(); } +TEST_F(TestCategory, team_mult_mv_float) { test_team_mult_mv(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_mult_double) { - test_team_mult(); -} -TEST_F(TestCategory, team_mult_mv_double) { - test_team_mult_mv(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_mult_double) { test_team_mult(); } +TEST_F(TestCategory, team_mult_mv_double) { test_team_mult_mv(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, team_mult_complex_double) { - test_team_mult, Kokkos::complex, - Kokkos::complex, TestDevice>(); + test_team_mult, Kokkos::complex, Kokkos::complex, TestDevice>(); } TEST_F(TestCategory, team_mult_mv_complex_double) { - test_team_mult_mv, Kokkos::complex, - Kokkos::complex, TestDevice>(); + test_team_mult_mv, Kokkos::complex, Kokkos::complex, TestDevice>(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_mult_int) { - test_team_mult(); -} -TEST_F(TestCategory, team_mult_mv_int) { - test_team_mult_mv(); -} +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_mult_int) { test_team_mult(); } +TEST_F(TestCategory, team_mult_mv_int) { test_team_mult_mv(); } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) -TEST_F(TestCategory, team_mult_double_int) { - test_team_mult(); -} -TEST_F(TestCategory, team_mult_double_mv_int) { - test_team_mult_mv(); -} +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +TEST_F(TestCategory, team_mult_double_int) { test_team_mult(); } +TEST_F(TestCategory, team_mult_double_mv_int) { test_team_mult_mv(); } #endif #endif // Check for lambda availability in CUDA backend diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_nrm2.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_nrm2.hpp index 12192032c98b..befec6e57bc0 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_nrm2.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_nrm2.hpp @@ -49,10 +49,8 @@ void impl_test_team_nrm2(int N, int K) { typename AT::mag_type *expected_result = new typename AT::mag_type[K]; for (int j = 0; j < K; j++) { expected_result[j] = typename AT::mag_type(); - for (int i = 0; i < N; i++) - expected_result[j] += AT::abs(a.h_view(i, j)) * AT::abs(a.h_view(i, j)); - expected_result[j] = - Kokkos::ArithTraits::sqrt(expected_result[j]); + for (int i = 0; i < N; i++) expected_result[j] += AT::abs(a.h_view(i, j)) * AT::abs(a.h_view(i, j)); + expected_result[j] = Kokkos::ArithTraits::sqrt(expected_result[j]); } double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; @@ -62,26 +60,22 @@ void impl_test_team_nrm2(int N, int K) { // KokkosBlas::nrm2(r,a); Kokkos::parallel_for( - "KokkosBlas::Test::TeamNrm2", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamNrm2", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - d_r(teamId) = KokkosBlas::Experimental::nrm2( - teamMember, Kokkos::subview(a.d_view, Kokkos::ALL(), teamId)); + d_r(teamId) = KokkosBlas::Experimental::nrm2(teamMember, Kokkos::subview(a.d_view, Kokkos::ALL(), teamId)); }); Kokkos::deep_copy(r, d_r); for (int k = 0; k < K; k++) { typename AT::mag_type nonconst_result = r(k); - EXPECT_NEAR_KK(nonconst_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(nonconst_result, expected_result[k], eps * expected_result[k]); } // KokkosBlas::nrm2(r,c_a); Kokkos::parallel_for( - "KokkosBlas::Test::TeamNrm2", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamNrm2", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - d_r(teamId) = KokkosBlas::Experimental::nrm2( - teamMember, Kokkos::subview(a.d_view_const, Kokkos::ALL(), teamId)); + d_r(teamId) = + KokkosBlas::Experimental::nrm2(teamMember, Kokkos::subview(a.d_view_const, Kokkos::ALL(), teamId)); }); Kokkos::deep_copy(r, d_r); for (int k = 0; k < K; k++) { @@ -96,8 +90,7 @@ void impl_test_team_nrm2(int N, int K) { template int test_team_nrm2() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; Test::impl_test_team_nrm2(0, 5); Test::impl_test_team_nrm2(13, 5); @@ -106,8 +99,7 @@ int test_team_nrm2() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; Test::impl_test_team_nrm2(0, 5); Test::impl_test_team_nrm2(13, 5); @@ -115,8 +107,7 @@ int test_team_nrm2() { // Test::impl_test_team_nrm2(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; Test::impl_test_team_nrm2(0, 5); Test::impl_test_team_nrm2(13, 5); @@ -128,28 +119,22 @@ int test_team_nrm2() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, team_nrm2_float) { test_team_nrm2(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, team_nrm2_double) { test_team_nrm2(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_nrm2_complex_double) { - test_team_nrm2, TestDevice>(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_nrm2_complex_double) { test_team_nrm2, TestDevice>(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, team_nrm2_int) { test_team_nrm2(); } #endif diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_scal.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_scal.hpp index 212b1e09e902..f3d6707ba3b5 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_scal.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_scal.hpp @@ -62,60 +62,42 @@ void impl_test_team_scal(int N) { } Kokkos::parallel_for( - "KokkosBlas::Test::TeamScal", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamScal", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); KokkosBlas::Experimental::scal( teamMember, - Kokkos::subview( - y.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(y.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), a, - Kokkos::subview( - x.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(x.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); { ScalarB nonconst_nonconst_result = KokkosBlas::dot(y.d_view, y.d_view); - typename AT::mag_type divisor = - AT::abs(expected_result) == zero ? one : AT::abs(expected_result); - typename AT::mag_type diff = - AT::abs(nonconst_nonconst_result - expected_result) / divisor; + typename AT::mag_type divisor = AT::abs(expected_result) == zero ? one : AT::abs(expected_result); + typename AT::mag_type diff = AT::abs(nonconst_nonconst_result - expected_result) / divisor; EXPECT_NEAR_KK(diff, zero, eps); } Kokkos::deep_copy(y.d_view, Kokkos::ArithTraits::zero()); Kokkos::parallel_for( - "KokkosBlas::Test::TeamScal", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamScal", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); KokkosBlas::Experimental::scal( teamMember, - Kokkos::subview( - y.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(y.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), a, - Kokkos::subview( - x.d_view_const, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(x.d_view_const, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); { ScalarB const_nonconst_result = KokkosBlas::dot(y.d_view, y.d_view); - typename AT::mag_type divisor = - AT::abs(expected_result) == zero ? one : AT::abs(expected_result); - typename AT::mag_type diff = - AT::abs(const_nonconst_result - expected_result) / divisor; + typename AT::mag_type divisor = AT::abs(expected_result) == zero ? one : AT::abs(expected_result); + typename AT::mag_type diff = AT::abs(const_nonconst_result - expected_result) / divisor; EXPECT_NEAR_KK(diff, zero, eps); } } @@ -147,8 +129,7 @@ void impl_test_team_scal_mv(int N, int K) { for (int j = 0; j < K; j++) { expected_result[j] = ScalarA(); for (int i = 0; i < N; i++) { - expected_result[j] += - ScalarB(a * x.h_view(i, j)) * ScalarB(a * x.h_view(i, j)); + expected_result[j] += ScalarB(a * x.h_view(i, j)) * ScalarB(a * x.h_view(i, j)); } } @@ -159,21 +140,17 @@ void impl_test_team_scal_mv(int N, int K) { Kokkos::View r("Dot::Result", K); Kokkos::parallel_for( - "KokkosBlas::Test::TeamScal", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamScal", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::scal( - teamMember, Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), a, - Kokkos::subview(x.d_view, Kokkos::ALL(), teamId)); + KokkosBlas::Experimental::scal(teamMember, Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), a, + Kokkos::subview(x.d_view, Kokkos::ALL(), teamId)); }); KokkosBlas::dot(r, y.d_view, y.d_view); for (int k = 0; k < K; k++) { ScalarA nonconst_scalar_result = r(k); - typename AT::mag_type divisor = - AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); - typename AT::mag_type diff = - AT::abs(nonconst_scalar_result - expected_result[k]) / divisor; + typename AT::mag_type divisor = AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); + typename AT::mag_type diff = AT::abs(nonconst_scalar_result - expected_result[k]) / divisor; EXPECT_NEAR_KK(diff, zero, eps); } @@ -181,21 +158,17 @@ void impl_test_team_scal_mv(int N, int K) { Kokkos::deep_copy(y.d_view, Kokkos::ArithTraits::zero()); Kokkos::parallel_for( - "KokkosBlas::Test::TeamScal", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamScal", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::scal( - teamMember, Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), a, - Kokkos::subview(x.d_view_const, Kokkos::ALL(), teamId)); + KokkosBlas::Experimental::scal(teamMember, Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), a, + Kokkos::subview(x.d_view_const, Kokkos::ALL(), teamId)); }); KokkosBlas::dot(r, y.d_view, y.d_view); for (int k = 0; k < K; k++) { - ScalarA const_scalar_result = r(k); - typename AT::mag_type divisor = - AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); - typename AT::mag_type diff = - AT::abs(const_scalar_result - expected_result[k]) / divisor; + ScalarA const_scalar_result = r(k); + typename AT::mag_type divisor = AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); + typename AT::mag_type diff = AT::abs(const_scalar_result - expected_result[k]) / divisor; EXPECT_NEAR_KK(diff, zero, eps); } @@ -211,8 +184,7 @@ void impl_test_team_scal_mv(int N, int K) { for (int j = 0; j < K; j++) { expected_result[j] = ScalarA(); for (int i = 0; i < N; i++) { - expected_result[j] += ScalarB((3.0 + j) * x.h_view(i, j)) * - ScalarB((3.0 + j) * x.h_view(i, j)); + expected_result[j] += ScalarB((3.0 + j) * x.h_view(i, j)) * ScalarB((3.0 + j) * x.h_view(i, j)); } } @@ -220,21 +192,17 @@ void impl_test_team_scal_mv(int N, int K) { Kokkos::deep_copy(y.d_view, Kokkos::ArithTraits::zero()); Kokkos::parallel_for( - "KokkosBlas::Test::TeamScal", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamScal", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::scal( - teamMember, Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), - params(teamId), Kokkos::subview(x.d_view, Kokkos::ALL(), teamId)); + KokkosBlas::Experimental::scal(teamMember, Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), params(teamId), + Kokkos::subview(x.d_view, Kokkos::ALL(), teamId)); }); KokkosBlas::dot(r, y.d_view, y.d_view); for (int k = 0; k < K; k++) { ScalarA nonconst_vector_result = r(k); - typename AT::mag_type divisor = - AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); - typename AT::mag_type diff = - AT::abs(nonconst_vector_result - expected_result[k]) / divisor; + typename AT::mag_type divisor = AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); + typename AT::mag_type diff = AT::abs(nonconst_vector_result - expected_result[k]) / divisor; EXPECT_NEAR_KK(diff, zero, eps); } @@ -242,22 +210,17 @@ void impl_test_team_scal_mv(int N, int K) { Kokkos::deep_copy(y.d_view, Kokkos::ArithTraits::zero()); Kokkos::parallel_for( - "KokkosBlas::Test::TeamScal", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamScal", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::scal( - teamMember, Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), - params(teamId), - Kokkos::subview(x.d_view_const, Kokkos::ALL(), teamId)); + KokkosBlas::Experimental::scal(teamMember, Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), params(teamId), + Kokkos::subview(x.d_view_const, Kokkos::ALL(), teamId)); }); KokkosBlas::dot(r, y.d_view, y.d_view); for (int k = 0; k < K; k++) { - ScalarA const_vector_result = r(k); - typename AT::mag_type divisor = - AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); - typename AT::mag_type diff = - AT::abs(const_vector_result - expected_result[k]) / divisor; + ScalarA const_vector_result = r(k); + typename AT::mag_type divisor = AT::abs(expected_result[k]) == zero ? one : AT::abs(expected_result[k]); + typename AT::mag_type diff = AT::abs(const_vector_result - expected_result[k]) / divisor; EXPECT_NEAR_KK(diff, zero, eps); } @@ -268,8 +231,7 @@ void impl_test_team_scal_mv(int N, int K) { template int test_team_scal() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_team_scal(0); @@ -279,8 +241,7 @@ int test_team_scal() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_team_scal(0); @@ -289,8 +250,7 @@ int test_team_scal() { // Test::impl_test_team_scal(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_team_scal(0); @@ -299,8 +259,7 @@ int test_team_scal() { // Test::impl_test_team_scal(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_team_scal(124); Test::impl_test_team_scal(124); #endif @@ -311,8 +270,7 @@ int test_team_scal() { template int test_team_scal_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; Test::impl_test_team_scal_mv(0, 5); @@ -323,8 +281,7 @@ int test_team_scal_mv() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; Test::impl_test_team_scal_mv(0, 5); @@ -334,8 +291,7 @@ int test_team_scal_mv() { // Device>(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; Test::impl_test_team_scal_mv(0, 5); @@ -345,8 +301,7 @@ int test_team_scal_mv() { // Device>(132231,5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) Test::impl_test_team_scal_mv(124, 5); Test::impl_test_team_scal_mv(124, 5); #endif @@ -355,57 +310,36 @@ int test_team_scal_mv() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_scal_float) { - test_team_scal(); -} -TEST_F(TestCategory, team_scal_mv_float) { - test_team_scal_mv(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_scal_float) { test_team_scal(); } +TEST_F(TestCategory, team_scal_mv_float) { test_team_scal_mv(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_scal_double) { - test_team_scal(); -} -TEST_F(TestCategory, team_scal_mv_double) { - test_team_scal_mv(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_scal_double) { test_team_scal(); } +TEST_F(TestCategory, team_scal_mv_double) { test_team_scal_mv(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, team_scal_complex_double) { - test_team_scal, Kokkos::complex, - TestDevice>(); + test_team_scal, Kokkos::complex, TestDevice>(); } TEST_F(TestCategory, team_scal_mv_complex_double) { - test_team_scal_mv, Kokkos::complex, - TestDevice>(); + test_team_scal_mv, Kokkos::complex, TestDevice>(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, team_scal_int) { test_team_scal(); } -TEST_F(TestCategory, team_scal_mv_int) { - test_team_scal_mv(); -} +TEST_F(TestCategory, team_scal_mv_int) { test_team_scal_mv(); } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) -TEST_F(TestCategory, team_scal_double_int) { - test_team_scal(); -} -TEST_F(TestCategory, team_scal_double_mv_int) { - test_team_scal_mv(); -} +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +TEST_F(TestCategory, team_scal_double_int) { test_team_scal(); } +TEST_F(TestCategory, team_scal_double_mv_int) { test_team_scal_mv(); } #endif #endif // Check for lambda availability in CUDA backend diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_setscal.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_setscal.hpp index 4d2499a466e5..33b264aa795c 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_setscal.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_setscal.hpp @@ -33,35 +33,27 @@ enum : int { BlasSet = 0, BlasScale = 1 }; struct KokkosKernelTag {}; struct NaiveTag {}; -template +template struct Functor_TestBlasTeamMatUtil { using execution_space = typename DeviceType::execution_space; ScalarType _alpha; ViewType _a; KOKKOS_INLINE_FUNCTION - Functor_TestBlasTeamMatUtil(const ScalarType alpha, const ViewType &a) - : _alpha(alpha), _a(a) {} + Functor_TestBlasTeamMatUtil(const ScalarType alpha, const ViewType &a) : _alpha(alpha), _a(a) {} template - KOKKOS_INLINE_FUNCTION void operator()(const KokkosKernelTag &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const KokkosKernelTag &, const MemberType &member) const { const int i = member.league_rank(); auto A = Kokkos::subview(_a, i, Kokkos::ALL(), Kokkos::ALL()); switch (TestID) { - case BlasSet: - KokkosBlas::TeamSet::invoke(member, _alpha, A); - break; - case BlasScale: - KokkosBlas::TeamScale::invoke(member, _alpha, A); - break; + case BlasSet: KokkosBlas::TeamSet::invoke(member, _alpha, A); break; + case BlasScale: KokkosBlas::TeamScale::invoke(member, _alpha, A); break; } } template - KOKKOS_INLINE_FUNCTION void operator()(const NaiveTag &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const NaiveTag &, const MemberType &member) const { if (member.team_rank() == 0) { const int k = member.league_rank(); auto A = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); @@ -85,21 +77,15 @@ struct Functor_TestBlasTeamMatUtil { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBlas::Test::SerialMatUtil"); const std::string name_value_type = Test::value_type_name(); - std::string name_work_tag = - (std::is_same::value - ? "::KokkosBlas" - : std::is_same::value ? "::Naive" - : "::UnknownWorkTag"); - std::string name_test_id = - (TestID == BlasSet ? "Set" - : TestID == BlasScale ? "Scale" : "UnknownTest"); - std::string name = - name_region + name_value_type + name_work_tag + name_test_id; + std::string name_work_tag = (std::is_same::value ? "::KokkosBlas" + : std::is_same::value ? "::Naive" + : "::UnknownWorkTag"); + std::string name_test_id = (TestID == BlasSet ? "Set" : TestID == BlasScale ? "Scale" : "UnknownTest"); + std::string name = name_region + name_value_type + name_work_tag + name_test_id; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _a.extent(0); - Kokkos::TeamPolicy policy(league_size, - Kokkos::AUTO); + Kokkos::TeamPolicy policy(league_size, Kokkos::AUTO); Kokkos::parallel_for(name.c_str(), policy, *this); Kokkos::Profiling::popRegion(); @@ -107,8 +93,7 @@ struct Functor_TestBlasTeamMatUtil { } }; -template +template void impl_test_blas_matutil(const int N, const int BlkSize) { /// typedefs typedef typename ViewType::value_type value_type; @@ -119,8 +104,7 @@ void impl_test_blas_matutil(const int N, const int BlkSize) { ViewType a("a", N, BlkSize, BlkSize); ViewType b("b", N, BlkSize, BlkSize); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(a, random, value_type(1.0)); Kokkos::fence(); @@ -128,12 +112,8 @@ void impl_test_blas_matutil(const int N, const int BlkSize) { Kokkos::deep_copy(b, a); /// test body - Functor_TestBlasTeamMatUtil(alpha, a) - .run(); - Functor_TestBlasTeamMatUtil(alpha, b) - .run(); + Functor_TestBlasTeamMatUtil(alpha, a).run(); + Functor_TestBlasTeamMatUtil(alpha, b).run(); Kokkos::fence(); @@ -145,45 +125,32 @@ void impl_test_blas_matutil(const int N, const int BlkSize) { Kokkos::deep_copy(b_host, b); /// check a = b - typename ats::mag_type eps = - 100 * std::numeric_limits::epsilon(); + typename ats::mag_type eps = 100 * std::numeric_limits::epsilon(); for (int k = 0; k < N; ++k) for (int i = 0; i < BlkSize; ++i) - for (int j = 0; j < BlkSize; ++j) - EXPECT_NEAR_KK(b_host(k, i, j), a_host(k, i, j), eps); + for (int j = 0; j < BlkSize; ++j) EXPECT_NEAR_KK(b_host(k, i, j), a_host(k, i, j), eps); } } // namespace TeamMatUtil } // namespace Test -template +template int test_blas_team_matutil() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - ViewType; - Test::TeamMatUtil::impl_test_blas_matutil(0, 10); - Test::TeamMatUtil::impl_test_blas_matutil(10, 15); - Test::TeamMatUtil::impl_test_blas_matutil(1024, 9); - Test::TeamMatUtil::impl_test_blas_matutil(132231, 3); + typedef Kokkos::View ViewType; + Test::TeamMatUtil::impl_test_blas_matutil(0, 10); + Test::TeamMatUtil::impl_test_blas_matutil(10, 15); + Test::TeamMatUtil::impl_test_blas_matutil(1024, 9); + Test::TeamMatUtil::impl_test_blas_matutil(132231, 3); } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; - Test::TeamMatUtil::impl_test_blas_matutil(0, 10); - Test::TeamMatUtil::impl_test_blas_matutil(10, 15); - Test::TeamMatUtil::impl_test_blas_matutil(1024, 9); - Test::TeamMatUtil::impl_test_blas_matutil(132231, 3); + typedef Kokkos::View ViewType; + Test::TeamMatUtil::impl_test_blas_matutil(0, 10); + Test::TeamMatUtil::impl_test_blas_matutil(10, 15); + Test::TeamMatUtil::impl_test_blas_matutil(1024, 9); + Test::TeamMatUtil::impl_test_blas_matutil(132231, 3); } #endif @@ -214,19 +181,15 @@ TEST_F(TestCategory, blas_scalar_team_scale_double_double) { #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) TEST_F(TestCategory, blas_scalar_team_set_dcomplex_dcomplex) { - test_blas_team_matutil, - Kokkos::complex, ::Test::BlasSet>(); + test_blas_team_matutil, Kokkos::complex, ::Test::BlasSet>(); } TEST_F(TestCategory, blas_scalar_team_scale_dcomplex_dcomplex) { - test_blas_team_matutil, - Kokkos::complex, ::Test::BlasScale>(); + test_blas_team_matutil, Kokkos::complex, ::Test::BlasScale>(); } TEST_F(TestCategory, blas_scalar_team_set_dcomplex_double) { - test_blas_team_matutil, double, - ::Test::BlasSet>(); + test_blas_team_matutil, double, ::Test::BlasSet>(); } TEST_F(TestCategory, blas_scalar_team_scale_dcomplex_double) { - test_blas_team_matutil, double, - ::Test::BlasScale>(); + test_blas_team_matutil, double, ::Test::BlasScale>(); } #endif diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_update.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_update.hpp index cfc76455f3b5..27765b0936d2 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_update.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_team_update.hpp @@ -66,64 +66,42 @@ void impl_test_team_update(int N) { ScalarA expected_result = 0; for (int i = 0; i < N; i++) - expected_result += - ScalarC(c * z.h_view(i) + a * x.h_view(i) + b * y.h_view(i)) * - ScalarC(c * z.h_view(i) + a * x.h_view(i) + b * y.h_view(i)); + expected_result += ScalarC(c * z.h_view(i) + a * x.h_view(i) + b * y.h_view(i)) * + ScalarC(c * z.h_view(i) + a * x.h_view(i) + b * y.h_view(i)); // KokkosBlas::update(a,x,b,y,c,z); Kokkos::parallel_for( - "KokkosBlas::Test::TeamUpdate", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamUpdate", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); KokkosBlas::Experimental::update( teamMember, a, - Kokkos::subview( - x.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(x.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), b, - Kokkos::subview( - y.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(y.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), c, - Kokkos::subview( - z.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(z.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); ScalarC nonconst_nonconst_result = KokkosBlas::dot(z.d_view, z.d_view); - EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, - eps * expected_result); + EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, eps * expected_result); Kokkos::deep_copy(z.d_base, org_z.h_base); // KokkosBlas::update(a,c_x,b,y,c,z); Kokkos::parallel_for( - "KokkosBlas::Test::TeamUpdate", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamUpdate", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); KokkosBlas::Experimental::update( teamMember, a, - Kokkos::subview( - x.d_view_const, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(x.d_view_const, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), b, - Kokkos::subview( - y.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(y.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), c, - Kokkos::subview( - z.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(z.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); ScalarC const_nonconst_result = KokkosBlas::dot(z.d_view, z.d_view); EXPECT_NEAR_KK(const_nonconst_result, expected_result, eps * expected_result); @@ -131,28 +109,18 @@ void impl_test_team_update(int N) { Kokkos::deep_copy(z.d_base, org_z.h_base); // KokkosBlas::update(a,c_x,b,c_y,c,z); Kokkos::parallel_for( - "KokkosBlas::Test::TeamUpdate", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamUpdate", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); KokkosBlas::Experimental::update( teamMember, a, - Kokkos::subview( - x.d_view_const, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(x.d_view_const, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), b, - Kokkos::subview( - y.d_view_const, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), + Kokkos::subview(y.d_view_const, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N)), c, - Kokkos::subview( - z.d_view, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); + Kokkos::subview(z.d_view, Kokkos::make_pair(teamId * team_data_siz, + (teamId < M - 1) ? (teamId + 1) * team_data_siz : N))); }); ScalarC const_const_result = KokkosBlas::dot(z.d_view, z.d_view); EXPECT_NEAR_KK(const_const_result, expected_result, eps * expected_result); @@ -196,10 +164,8 @@ void impl_test_team_update_mv(int N, int K) { for (int j = 0; j < K; j++) { expected_result[j] = ScalarC(); for (int i = 0; i < N; i++) - expected_result[j] += - ScalarC(a * x.h_view(i, j) + b * y.h_view(i, j) + - c * z.h_view(i, j)) * - ScalarC(a * x.h_view(i, j) + b * y.h_view(i, j) + c * z.h_view(i, j)); + expected_result[j] += ScalarC(a * x.h_view(i, j) + b * y.h_view(i, j) + c * z.h_view(i, j)) * + ScalarC(a * x.h_view(i, j) + b * y.h_view(i, j) + c * z.h_view(i, j)); } double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; @@ -208,38 +174,31 @@ void impl_test_team_update_mv(int N, int K) { // KokkosBlas::update(a,x,b,y,c,z); Kokkos::parallel_for( - "KokkosBlas::Test::TeamUpdate", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamUpdate", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::update( - teamMember, a, Kokkos::subview(x.d_view, Kokkos::ALL(), teamId), b, - Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), c, - Kokkos::subview(z.d_view, Kokkos::ALL(), teamId)); + KokkosBlas::Experimental::update(teamMember, a, Kokkos::subview(x.d_view, Kokkos::ALL(), teamId), b, + Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), c, + Kokkos::subview(z.d_view, Kokkos::ALL(), teamId)); }); KokkosBlas::dot(r, z.d_view, z.d_view); for (int k = 0; k < K; k++) { ScalarA nonconst_nonconst_result = r(k); - EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result[k], eps * expected_result[k]); } Kokkos::deep_copy(z.d_base, org_z.h_base); // KokkosBlas::update(a,c_x,b,y,c,z); Kokkos::parallel_for( - "KokkosBlas::Test::TeamUpdate", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { + "KokkosBlas::Test::TeamUpdate", policy, KOKKOS_LAMBDA(const team_member &teamMember) { const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::update( - teamMember, a, - Kokkos::subview(x.d_view_const, Kokkos::ALL(), teamId), b, - Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), c, - Kokkos::subview(z.d_view, Kokkos::ALL(), teamId)); + KokkosBlas::Experimental::update(teamMember, a, Kokkos::subview(x.d_view_const, Kokkos::ALL(), teamId), b, + Kokkos::subview(y.d_view, Kokkos::ALL(), teamId), c, + Kokkos::subview(z.d_view, Kokkos::ALL(), teamId)); }); KokkosBlas::dot(r, z.d_view, z.d_view); for (int k = 0; k < K; k++) { ScalarA const_non_const_result = r(k); - EXPECT_NEAR_KK(const_non_const_result, expected_result[k], - eps * expected_result[k]); + EXPECT_NEAR_KK(const_non_const_result, expected_result[k], eps * expected_result[k]); } delete[] expected_result; @@ -249,58 +208,43 @@ void impl_test_team_update_mv(int N, int K) { template int test_team_update() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; typedef Kokkos::View view_type_c_ll; - Test::impl_test_team_update(0); - Test::impl_test_team_update(13); - Test::impl_test_team_update(124); + Test::impl_test_team_update(0); + Test::impl_test_team_update(13); + Test::impl_test_team_update(124); // Test::impl_test_team_update(132231); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; typedef Kokkos::View view_type_c_lr; - Test::impl_test_team_update(0); - Test::impl_test_team_update(13); - Test::impl_test_team_update(124); + Test::impl_test_team_update(0); + Test::impl_test_team_update(13); + Test::impl_test_team_update(124); // Test::impl_test_team_update(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; typedef Kokkos::View view_type_c_ls; - Test::impl_test_team_update(0); - Test::impl_test_team_update(13); - Test::impl_test_team_update(124); + Test::impl_test_team_update(0); + Test::impl_test_team_update(13); + Test::impl_test_team_update(124); // Test::impl_test_team_update(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) - Test::impl_test_team_update(124); - Test::impl_test_team_update(124); +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) + Test::impl_test_team_update(124); + Test::impl_test_team_update(124); #endif return 1; @@ -309,117 +253,79 @@ int test_team_update() { template int test_team_update_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; typedef Kokkos::View view_type_c_ll; - Test::impl_test_team_update_mv(0, 5); - Test::impl_test_team_update_mv(13, 5); - Test::impl_test_team_update_mv(124, 5); + Test::impl_test_team_update_mv(0, 5); + Test::impl_test_team_update_mv(13, 5); + Test::impl_test_team_update_mv(124, 5); // Test::impl_test_team_update_mv(132231,5); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; typedef Kokkos::View view_type_c_lr; - Test::impl_test_team_update_mv(0, 5); - Test::impl_test_team_update_mv(13, 5); - Test::impl_test_team_update_mv(124, 5); + Test::impl_test_team_update_mv(0, 5); + Test::impl_test_team_update_mv(13, 5); + Test::impl_test_team_update_mv(124, 5); // Test::impl_test_team_update_mv(132231,5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; typedef Kokkos::View view_type_c_ls; - Test::impl_test_team_update_mv(0, 5); - Test::impl_test_team_update_mv(13, 5); - Test::impl_test_team_update_mv(124, 5); + Test::impl_test_team_update_mv(0, 5); + Test::impl_test_team_update_mv(13, 5); + Test::impl_test_team_update_mv(124, 5); // Test::impl_test_team_update_mv(132231,5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) - Test::impl_test_team_update_mv(124, 5); - Test::impl_test_team_update_mv(124, 5); +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) + Test::impl_test_team_update_mv(124, 5); + Test::impl_test_team_update_mv(124, 5); #endif return 1; } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_update_float) { - test_team_update(); -} -TEST_F(TestCategory, team_update_mv_float) { - test_team_update_mv(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_update_float) { test_team_update(); } +TEST_F(TestCategory, team_update_mv_float) { test_team_update_mv(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_update_double) { - test_team_update(); -} -TEST_F(TestCategory, team_update_mv_double) { - test_team_update_mv(); -} + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_update_double) { test_team_update(); } +TEST_F(TestCategory, team_update_mv_double) { test_team_update_mv(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, team_update_complex_double) { - test_team_update, Kokkos::complex, - Kokkos::complex, TestDevice>(); + test_team_update, Kokkos::complex, Kokkos::complex, TestDevice>(); } TEST_F(TestCategory, team_update_mv_complex_double) { - test_team_update_mv, Kokkos::complex, - Kokkos::complex, TestDevice>(); + test_team_update_mv, Kokkos::complex, Kokkos::complex, TestDevice>(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_update_int) { - test_team_update(); -} -TEST_F(TestCategory, team_update_mv_int) { - test_team_update_mv(); -} +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, team_update_int) { test_team_update(); } +TEST_F(TestCategory, team_update_mv_int) { test_team_update_mv(); } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) -TEST_F(TestCategory, team_update_double_int) { - test_team_update(); -} -TEST_F(TestCategory, team_update_double_mv_int) { - test_team_update_mv(); -} +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +TEST_F(TestCategory, team_update_double_int) { test_team_update(); } +TEST_F(TestCategory, team_update_double_mv_int) { test_team_update_mv(); } #endif #endif // Check for lambda availability in CUDA backend diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_update.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_update.hpp index cfeddb9d3d7c..6152a3493b1f 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas1_update.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas1_update.hpp @@ -37,8 +37,7 @@ void impl_test_update(int N) { view_stride_adapter z("Z", N); view_stride_adapter org_z("Org_Z", N); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -64,27 +63,21 @@ void impl_test_update(int N) { KokkosBlas::update(a, x.d_view, b, y.d_view, c, z.d_view); Kokkos::deep_copy(z.h_base, z.d_base); for (int i = 0; i < N; i++) { - EXPECT_NEAR_KK(static_cast(a * x.h_view(i) + b * y.h_view(i) + - c * org_z.h_view(i)), - z.h_view(i), eps); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i) + b * y.h_view(i) + c * org_z.h_view(i)), z.h_view(i), eps); } Kokkos::deep_copy(z.d_base, org_z.h_base); KokkosBlas::update(a, x.d_view_const, b, y.d_view, c, z.d_view); Kokkos::deep_copy(z.h_base, z.d_base); for (int i = 0; i < N; i++) { - EXPECT_NEAR_KK(static_cast(a * x.h_view(i) + b * y.h_view(i) + - c * org_z.h_view(i)), - z.h_view(i), eps); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i) + b * y.h_view(i) + c * org_z.h_view(i)), z.h_view(i), eps); } Kokkos::deep_copy(z.d_base, org_z.h_base); KokkosBlas::update(a, x.d_view_const, b, y.d_view_const, c, z.d_view); Kokkos::deep_copy(z.h_base, z.d_base); for (int i = 0; i < N; i++) { - EXPECT_NEAR_KK(static_cast(a * x.h_view(i) + b * y.h_view(i) + - c * org_z.h_view(i)), - z.h_view(i), eps); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i) + b * y.h_view(i) + c * org_z.h_view(i)), z.h_view(i), eps); } } @@ -99,8 +92,7 @@ void impl_test_update_mv(int N, int K) { view_stride_adapter z("Z", N, K); view_stride_adapter org_z("Org_Z", N, K); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarA randStart, randEnd; @@ -133,10 +125,8 @@ void impl_test_update_mv(int N, int K) { Kokkos::deep_copy(z.h_base, z.d_base); for (int i = 0; i < N; i++) { for (int j = 0; j < K; j++) { - EXPECT_NEAR_KK( - static_cast(a * x.h_view(i, j) + b * y.h_view(i, j) + - c * org_z.h_view(i, j)), - z.h_view(i, j), eps); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i, j) + b * y.h_view(i, j) + c * org_z.h_view(i, j)), + z.h_view(i, j), eps); } } @@ -145,10 +135,8 @@ void impl_test_update_mv(int N, int K) { Kokkos::deep_copy(z.h_base, z.d_base); for (int i = 0; i < N; i++) { for (int j = 0; j < K; j++) { - EXPECT_NEAR_KK( - static_cast(a * x.h_view(i, j) + b * y.h_view(i, j) + - c * org_z.h_view(i, j)), - z.h_view(i, j), eps); + EXPECT_NEAR_KK(static_cast(a * x.h_view(i, j) + b * y.h_view(i, j) + c * org_z.h_view(i, j)), + z.h_view(i, j), eps); } } } @@ -157,58 +145,43 @@ void impl_test_update_mv(int N, int K) { template int test_update() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; typedef Kokkos::View view_type_c_ll; - Test::impl_test_update(0); - Test::impl_test_update(13); - Test::impl_test_update(1024); + Test::impl_test_update(0); + Test::impl_test_update(13); + Test::impl_test_update(1024); // Test::impl_test_update(132231); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; typedef Kokkos::View view_type_c_lr; - Test::impl_test_update(0); - Test::impl_test_update(13); - Test::impl_test_update(1024); + Test::impl_test_update(0); + Test::impl_test_update(13); + Test::impl_test_update(1024); // Test::impl_test_update(132231); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; typedef Kokkos::View view_type_c_ls; - Test::impl_test_update(0); - Test::impl_test_update(13); - Test::impl_test_update(1024); + Test::impl_test_update(0); + Test::impl_test_update(13); + Test::impl_test_update(1024); // Test::impl_test_update(132231); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) - Test::impl_test_update(1024); - Test::impl_test_update(1024); +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) + Test::impl_test_update(1024); + Test::impl_test_update(1024); #endif return 1; @@ -217,66 +190,47 @@ int test_update() { template int test_update_mv() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; typedef Kokkos::View view_type_c_ll; - Test::impl_test_update_mv(0, 5); - Test::impl_test_update_mv(13, 5); - Test::impl_test_update_mv(1024, 5); - Test::impl_test_update_mv(132231, 5); + Test::impl_test_update_mv(0, 5); + Test::impl_test_update_mv(13, 5); + Test::impl_test_update_mv(1024, 5); + Test::impl_test_update_mv(132231, 5); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; typedef Kokkos::View view_type_c_lr; - Test::impl_test_update_mv(0, 5); - Test::impl_test_update_mv(13, 5); - Test::impl_test_update_mv(1024, 5); - Test::impl_test_update_mv(132231, 5); + Test::impl_test_update_mv(0, 5); + Test::impl_test_update_mv(13, 5); + Test::impl_test_update_mv(1024, 5); + Test::impl_test_update_mv(132231, 5); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; typedef Kokkos::View view_type_c_ls; - Test::impl_test_update_mv(0, 5); - Test::impl_test_update_mv(13, 5); - Test::impl_test_update_mv(1024, 5); - Test::impl_test_update_mv(132231, 5); + Test::impl_test_update_mv(0, 5); + Test::impl_test_update_mv(13, 5); + Test::impl_test_update_mv(1024, 5); + Test::impl_test_update_mv(132231, 5); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) - Test::impl_test_update_mv(1024, 5); - Test::impl_test_update_mv(1024, 5); +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) + Test::impl_test_update_mv(1024, 5); + Test::impl_test_update_mv(1024, 5); #endif return 1; } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, update_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::update_float"); test_update(); @@ -290,8 +244,7 @@ TEST_F(TestCategory, update_mv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, update_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::update_double"); test_update(); @@ -304,25 +257,21 @@ TEST_F(TestCategory, update_mv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, update_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::update_complex_double"); - test_update, Kokkos::complex, - Kokkos::complex, TestDevice>(); + test_update, Kokkos::complex, Kokkos::complex, TestDevice>(); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, update_mv_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::update_mv_complex_double"); - test_update_mv, Kokkos::complex, - Kokkos::complex, TestDevice>(); + test_update_mv, Kokkos::complex, Kokkos::complex, TestDevice>(); Kokkos::Profiling::popRegion(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, update_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::update_int"); test_update(); @@ -335,8 +284,7 @@ TEST_F(TestCategory, update_mv_int) { } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) TEST_F(TestCategory, update_double_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::update_double_int"); test_update(); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_gemv.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_gemv.hpp index b3f3566f83ac..d70935c2acd8 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_gemv.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_gemv.hpp @@ -21,10 +21,8 @@ #include namespace Test { -template -void impl_test_gemv_streams(ExecutionSpace& space, const char* mode, int M, - int N) { +template +void impl_test_gemv_streams(ExecutionSpace& space, const char* mode, int M, int N) { typedef typename ViewTypeA::value_type ScalarA; typedef typename ViewTypeX::value_type ScalarX; typedef typename ViewTypeY::value_type ScalarY; @@ -70,10 +68,8 @@ void impl_test_gemv_streams(ExecutionSpace& space, const char* mode, int M, Kokkos::fill_random(space, A.d_view, rand_pool, randStart, randEnd); } - const typename KAT_Y::mag_type max_error = - KAT_Y::abs(alpha * max_valA * max_valX * ldx + beta * max_valY); - const typename KAT_Y::mag_type tol = - max_error * eps * 2; // adding small fudge factor of 2 + const typename KAT_Y::mag_type max_error = KAT_Y::abs(alpha * max_valA * max_valX * ldx + beta * max_valY); + const typename KAT_Y::mag_type tol = max_error * eps * 2; // adding small fudge factor of 2 Kokkos::deep_copy(org_y.h_base, y.d_base); Kokkos::deep_copy(x.h_base, x.d_base); @@ -89,39 +85,33 @@ void impl_test_gemv_streams(ExecutionSpace& space, const char* mode, int M, for (int i = 0; i < ldy; i++) { if (KAT_Y::abs(expected(i) - y.h_view(i)) > tol) { numErrors++; - std::cerr << __FILE__ << ":" << __LINE__ - << ": expected(i)=" << expected(i) << ", h_y(i)=" << y.h_view(i) + std::cerr << __FILE__ << ":" << __LINE__ << ": expected(i)=" << expected(i) << ", h_y(i)=" << y.h_view(i) << std::endl; } } - EXPECT_EQ(numErrors, 0) << "Nonconst input, " << M << 'x' << N - << ", alpha = " << alpha << ", beta = " << beta + EXPECT_EQ(numErrors, 0) << "Nonconst input, " << M << 'x' << N << ", alpha = " << alpha << ", beta = " << beta << ", mode " << mode << ": gemv incorrect"; Kokkos::deep_copy(space, y.d_base, org_y.h_base); - KokkosBlas::gemv(space, mode, alpha, A.d_view, x.d_view_const, beta, - y.d_view); + KokkosBlas::gemv(space, mode, alpha, A.d_view, x.d_view_const, beta, y.d_view); Kokkos::deep_copy(y.h_base, y.d_base); numErrors = 0; Kokkos::fence(); // Wait for vanillaGEMV for (int i = 0; i < ldy; i++) { if (KAT_Y::abs(expected(i) - y.h_view(i)) > tol) numErrors++; } - EXPECT_EQ(numErrors, 0) << "Const vector input, " << M << 'x' << N - << ", alpha = " << alpha << ", beta = " << beta + EXPECT_EQ(numErrors, 0) << "Const vector input, " << M << 'x' << N << ", alpha = " << alpha << ", beta = " << beta << ", mode " << mode << ": gemv incorrect"; Kokkos::deep_copy(space, y.d_base, org_y.h_base); - KokkosBlas::gemv(space, mode, alpha, A.d_view_const, x.d_view_const, beta, - y.d_view); + KokkosBlas::gemv(space, mode, alpha, A.d_view_const, x.d_view_const, beta, y.d_view); Kokkos::deep_copy(y.h_base, y.d_base); numErrors = 0; for (int i = 0; i < ldy; i++) { if (KAT_Y::abs(expected(i) - y.h_view(i)) > tol) numErrors++; } - EXPECT_EQ(numErrors, 0) << "Const matrix/vector input, " << M << 'x' << N - << ", alpha = " << alpha << ", beta = " << beta - << ", mode " << mode << ": gemv incorrect"; + EXPECT_EQ(numErrors, 0) << "Const matrix/vector input, " << M << 'x' << N << ", alpha = " << alpha + << ", beta = " << beta << ", mode " << mode << ": gemv incorrect"; // Test once with beta = 0, but with y initially filled with NaN. // This should overwrite the NaNs with the correct result. beta = KAT_Y::zero(); @@ -135,32 +125,28 @@ void impl_test_gemv_streams(ExecutionSpace& space, const char* mode, int M, numErrors = 0; for (int i = 0; i < ldy; i++) { if (KAT_Y::isNan(y.h_view(i)) || - KAT_Y::abs(expected(i) - y.h_view(i)) > - KAT_Y::abs(alpha * max_valA * max_valX * ldx * eps * 2)) { + KAT_Y::abs(expected(i) - y.h_view(i)) > KAT_Y::abs(alpha * max_valA * max_valX * ldx * eps * 2)) { numErrors++; - std::cerr << __FILE__ << ":" << __LINE__ << ": expected(" << i - << ")=" << expected(i) << ", h_y(" << i << ")=" << y.h_view(i) - << ", eps=" << eps - << ", 1024*2*eps=" << 1024 * 2 * KAT_Y::epsilon() << std::endl; + std::cerr << __FILE__ << ":" << __LINE__ << ": expected(" << i << ")=" << expected(i) << ", h_y(" << i + << ")=" << y.h_view(i) << ", eps=" << eps << ", 1024*2*eps=" << 1024 * 2 * KAT_Y::epsilon() + << std::endl; } } - EXPECT_EQ(numErrors, 0) << "beta = 0, input contains NaN, A is " << M << 'x' - << N << ", mode " << mode << ": gemv incorrect"; + EXPECT_EQ(numErrors, 0) << "beta = 0, input contains NaN, A is " << M << 'x' << N << ", mode " << mode + << ": gemv incorrect"; } template void impl_test_gemv(const char* mode, int M, int N) { using execution_space = typename Device::execution_space; execution_space space; - impl_test_gemv_streams(space, mode, M, N); + impl_test_gemv_streams(space, mode, M, N); } } // namespace Test template int test_gemv(const char* mode) { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; typedef Kokkos::View view_type_c_ll; @@ -172,85 +158,58 @@ int test_gemv(const char* mode) { Test::impl_test_gemv(mode,10,200); Test::impl_test_gemv(mode,200,10); #endif - Test::impl_test_gemv( - mode, 0, 1024); - Test::impl_test_gemv( - mode, 1024, 0); - Test::impl_test_gemv( - mode, 13, 13); - Test::impl_test_gemv( - mode, 13, 1024); - Test::impl_test_gemv( - mode, 50, 40); - Test::impl_test_gemv( - mode, 1024, 1024); - Test::impl_test_gemv( - mode, 2131, 2131); + Test::impl_test_gemv(mode, 0, 1024); + Test::impl_test_gemv(mode, 1024, 0); + Test::impl_test_gemv(mode, 13, 13); + Test::impl_test_gemv(mode, 13, 1024); + Test::impl_test_gemv(mode, 50, 40); + Test::impl_test_gemv(mode, 1024, 1024); + Test::impl_test_gemv(mode, 2131, 2131); // Test::impl_test_gemv(mode,132231,1024); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; typedef Kokkos::View view_type_c_lr; - Test::impl_test_gemv( - mode, 0, 1024); - Test::impl_test_gemv( - mode, 1024, 0); - Test::impl_test_gemv( - mode, 13, 13); - Test::impl_test_gemv( - mode, 13, 1024); - Test::impl_test_gemv( - mode, 50, 40); - Test::impl_test_gemv( - mode, 1024, 1024); - Test::impl_test_gemv( - mode, 2131, 2131); + Test::impl_test_gemv(mode, 0, 1024); + Test::impl_test_gemv(mode, 1024, 0); + Test::impl_test_gemv(mode, 13, 13); + Test::impl_test_gemv(mode, 13, 1024); + Test::impl_test_gemv(mode, 50, 40); + Test::impl_test_gemv(mode, 1024, 1024); + Test::impl_test_gemv(mode, 2131, 2131); // Test::impl_test_gemv(mode,132231,1024); #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ls; typedef Kokkos::View view_type_b_ls; typedef Kokkos::View view_type_c_ls; - Test::impl_test_gemv( - mode, 0, 1024); - Test::impl_test_gemv( - mode, 1024, 0); - Test::impl_test_gemv( - mode, 13, 13); - Test::impl_test_gemv( - mode, 13, 1024); - Test::impl_test_gemv( - mode, 50, 40); - Test::impl_test_gemv( - mode, 1024, 1024); - Test::impl_test_gemv( - mode, 2131, 2131); + Test::impl_test_gemv(mode, 0, 1024); + Test::impl_test_gemv(mode, 1024, 0); + Test::impl_test_gemv(mode, 13, 13); + Test::impl_test_gemv(mode, 13, 1024); + Test::impl_test_gemv(mode, 50, 40); + Test::impl_test_gemv(mode, 1024, 1024); + Test::impl_test_gemv(mode, 2131, 2131); // Test::impl_test_gemv(mode,132231,1024); #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) - Test::impl_test_gemv( - mode, 1024, 1024); - Test::impl_test_gemv( - mode, 1024, 1024); +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) + Test::impl_test_gemv(mode, 1024, 1024); + Test::impl_test_gemv(mode, 1024, 1024); #endif return 1; } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gemv_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::gemv_float"); test_gemv("N"); @@ -263,8 +222,7 @@ TEST_F(TestCategory, gemv_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gemv_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::gemv_double"); test_gemv("N"); @@ -277,29 +235,24 @@ TEST_F(TestCategory, gemv_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gemv_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::gemv_complex_double"); - test_gemv, Kokkos::complex, - Kokkos::complex, TestDevice>("N"); + test_gemv, Kokkos::complex, Kokkos::complex, TestDevice>("N"); Kokkos::Profiling::popRegion(); Kokkos::Profiling::pushRegion("KokkosBlas::Test::gemv_tran_complex_double"); - test_gemv, Kokkos::complex, - Kokkos::complex, TestDevice>("T"); + test_gemv, Kokkos::complex, Kokkos::complex, TestDevice>("T"); Kokkos::Profiling::popRegion(); Kokkos::Profiling::pushRegion("KokkosBlas::Test::gemv_conj_complex_double"); - test_gemv, Kokkos::complex, - Kokkos::complex, TestDevice>("C"); + test_gemv, Kokkos::complex, Kokkos::complex, TestDevice>("C"); Kokkos::Profiling::popRegion(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gemv_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::gemv_int"); test_gemv("N"); @@ -311,8 +264,7 @@ TEST_F(TestCategory, gemv_int) { } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) TEST_F(TestCategory, gemv_double_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::gemv_double_int"); test_gemv("N"); @@ -332,34 +284,33 @@ int test_gemv_streams(const char* mode) { using view_type_a_ll = Kokkos::View; using view_type_b_ll = Kokkos::View; using view_type_c_ll = Kokkos::View; - Test::impl_test_gemv_streams(space, mode, 0, 1024); - Test::impl_test_gemv_streams(space, mode, 13, 1024); - Test::impl_test_gemv_streams(space, mode, 50, 40); + Test::impl_test_gemv_streams(space, mode, 0, + 1024); + Test::impl_test_gemv_streams(space, mode, 13, + 1024); + Test::impl_test_gemv_streams(space, mode, 50, + 40); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) using view_type_a_lr = Kokkos::View; using view_type_b_lr = Kokkos::View; using view_type_c_lr = Kokkos::View; - Test::impl_test_gemv_streams(space, mode, 0, 1024); - Test::impl_test_gemv_streams(space, mode, 13, 1024); - Test::impl_test_gemv_streams(space, mode, 50, 40); + Test::impl_test_gemv_streams(space, mode, 0, + 1024); + Test::impl_test_gemv_streams(space, mode, 13, + 1024); + Test::impl_test_gemv_streams(space, mode, 50, + 40); #endif (void)space; return 1; } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - blas##_##gemv_streams##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_gemv_streams("N"); \ - test_gemv_streams("T"); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, blas##_##gemv_streams##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_gemv_streams("N"); \ + test_gemv_streams("T"); \ } #define NO_TEST_COMPLEX diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_gemv_util.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_gemv_util.hpp index e28310c8ebfc..724a2fc0041e 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_gemv_util.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_gemv_util.hpp @@ -23,16 +23,12 @@ namespace Test { -template ::value> -using simd_vector = - KokkosBatched::Vector, length>; +template ::value> +using simd_vector = KokkosBatched::Vector, length>; template struct GemvOpBase { - GemvOpBase(char trans_, ScalarType alpha_, AType A_, XType x_, - ScalarType beta_, YType y_) + GemvOpBase(char trans_, ScalarType alpha_, AType A_, XType x_, ScalarType beta_, YType y_) : trans(trans_), alpha(alpha_), beta(beta_), A(A_), x(x_), y(y_) {} protected: @@ -52,42 +48,32 @@ template struct RefGEMVOp : public GemvOpBase { using params = GemvOpBase; - RefGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, - ScalarType beta_, YType y_) + RefGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, ScalarType beta_, YType y_) : params(trans_, alpha_, A_, x_, beta_, y_) {} template - KOKKOS_INLINE_FUNCTION void operator()( - const TeamMember & /* member */) const { - vanillaGEMV(params::trans, params::alpha, params::A, params::x, - params::beta, params::y); + KOKKOS_INLINE_FUNCTION void operator()(const TeamMember & /* member */) const { + vanillaGEMV(params::trans, params::alpha, params::A, params::x, params::beta, params::y); } }; // RefGEMVOp // fill regular view with random values -template -typename std::enable_if::value>::type -fill_random_view(ViewType A, PoolType &rand_pool, - const ScalarType max_val = 10.0) { +template +typename std::enable_if::value>::type fill_random_view( + ViewType A, PoolType &rand_pool, const ScalarType max_val = 10.0) { Kokkos::fill_random(A, rand_pool, max_val); Kokkos::fence(); } // fill rank-1 view of SIMD vectors with random values -template +template void fill_random_view( - Kokkos::View< - KokkosBatched::Vector, VecLength> *, - Layout, Props...> - x, + Kokkos::View, VecLength> *, Layout, Props...> x, PoolType &rand_pool, const ValueType max_val = 10.0) { // the view can be strided and have Vector values, so randoms // are generated in a plain, linear view first and then copied using device_type = typename decltype(x)::device_type; - Kokkos::View rnd("random_vals", - x.extent(0) * VecLength); + Kokkos::View rnd("random_vals", x.extent(0) * VecLength); Kokkos::fill_random(rnd, rand_pool, max_val); using size_type = decltype(x.extent(0)); for (size_type i = 0; i < x.extent(0); ++i) { @@ -96,19 +82,14 @@ void fill_random_view( } // fill rank-2 view of SIMD vectors with random values -template +template static void fill_random_view( - Kokkos::View< - KokkosBatched::Vector, VecLength> **, - Layout, Props...> - A, + Kokkos::View, VecLength> **, Layout, Props...> A, PoolType &rand_pool, const ValueType max_val = 10.0) { // the view can be strided and have Vector values, so randoms // are generated in a plain, linear view first and then copied using device_type = typename decltype(A)::device_type; - Kokkos::View rnd( - "random_vals", A.extent(0) * A.extent(1) * VecLength); + Kokkos::View rnd("random_vals", A.extent(0) * A.extent(1) * VecLength); Kokkos::fill_random(rnd, rand_pool, max_val); using size_type = decltype(A.extent(0)); size_type idx = 0; @@ -120,29 +101,22 @@ static void fill_random_view( } } -template +template struct GEMVTest { - static void run(const char *mode) { - run_algorithms<0, typename GemvFunc::algorithms>(mode); - } + static void run(const char *mode) { run_algorithms<0, typename GemvFunc::algorithms>(mode); } private: // ScalarCoef==void default behavior is to derive alpha/beta scalar types // from A and X scalar types - using ScalarType = typename std::conditional< - !std::is_void::value, ScalarCoef, - typename std::common_type::type>::type; + using ScalarType = typename std::conditional::value, ScalarCoef, + typename std::common_type::type>::type; template - static std::enable_if_t::value> - run_algorithms(const char * /*mode*/) {} + static std::enable_if_t::value> run_algorithms(const char * /*mode*/) {} template - static - typename std::enable_if<(Idx < - std::tuple_size::value)>::type - run_algorithms(const char *mode) { + static typename std::enable_if<(Idx < std::tuple_size::value)>::type run_algorithms( + const char *mode) { run_layouts::type>(mode); run_algorithms(mode); } @@ -156,8 +130,7 @@ struct GEMVTest { #ifdef KOKKOSKERNELS_TEST_LAYOUTRIGHT run_view_types(mode); #endif -#if defined(KOKKOSKERNELS_TEST_LAYOUTLEFT) && \ - defined(KOKKOSKERNELS_TEST_LAYOUTRIGHT) +#if defined(KOKKOSKERNELS_TEST_LAYOUTLEFT) && defined(KOKKOSKERNELS_TEST_LAYOUTRIGHT) using A_t = typename Kokkos::View; using x_t = typename Kokkos::View; using y_t = typename Kokkos::View; @@ -224,24 +197,16 @@ struct GEMVTest { auto y = Kokkos::subview(b_y, 0, Kokkos::ALL(), 0); // make sure it's actually LayoutStride there - static_assert(std::is_same::value, - ""); - static_assert(std::is_same::value, - ""); - static_assert(std::is_same::value, - ""); + static_assert(std::is_same::value, ""); + static_assert(std::is_same::value, ""); + static_assert(std::is_same::value, ""); run_views(trans, A, x, y); } } template - static void run_views(const char trans, ViewTypeA A, ViewTypeX x, - ViewTypeY y) { - Kokkos::TeamPolicy teams( - 1, 1); // just run on device + static void run_views(const char trans, ViewTypeA A, ViewTypeX x, ViewTypeY y) { + Kokkos::TeamPolicy teams(1, 1); // just run on device fill_inputs(A, x, y); ScalarType alpha = 3; // TODO: test also with zero alpha/beta ? ScalarType beta = 5; @@ -249,8 +214,7 @@ struct GEMVTest { // get reference results Kokkos::View y_ref("Y_ref", y.extent(0)); Kokkos::deep_copy(y_ref, y); - RefGEMVOp gemv_ref( - trans, alpha, A, x, beta, y_ref); + RefGEMVOp gemv_ref(trans, alpha, A, x, beta, y_ref); Kokkos::parallel_for(teams, gemv_ref); // 1. check non-consts @@ -265,10 +229,8 @@ struct GEMVTest { run_case(trans, alpha, c_A, c_x, beta, y, y_ref); } - template - static void run_case(const char trans, ScalarType alpha, ViewTypeA A, - ViewTypeX x, ScalarType beta, ViewTypeY y, + template + static void run_case(const char trans, ScalarType alpha, ViewTypeA A, ViewTypeX x, ScalarType beta, ViewTypeY y, ViewTypeYRef y_ref) { // run on original y view (not to alter the test) // but backup it and restore, so it can be reused @@ -277,12 +239,10 @@ struct GEMVTest { // fetch GEMV functor from the factory using op_type = - typename GemvFunc::template functor_type; + typename GemvFunc::template functor_type; op_type gemv_op(trans, alpha, A, x, beta, y); - Kokkos::parallel_for( - Kokkos::TeamPolicy(1, 1), gemv_op); + Kokkos::parallel_for(Kokkos::TeamPolicy(1, 1), gemv_op); const double eps = epsilon(ScalarY{}); EXPECT_NEAR_KK_REL_1DVIEW(y, y_ref, eps); @@ -317,24 +277,15 @@ struct GEMVTest { } // namespace Test -#define TEST_CASE4(PREFIX, FACTORY, NAME, SCALAR_A, SCALAR_X, SCALAR_Y, \ - SCALAR_COEF) \ - using PREFIX##_##NAME##_gemv_test = \ - ::Test::GEMVTest<::Test::FACTORY, SCALAR_A, SCALAR_X, SCALAR_Y, \ - TestDevice, SCALAR_COEF>; \ - TEST_F(TestCategory, PREFIX##_gemv_nt_##NAME) { \ - PREFIX##_##NAME##_gemv_test::run("N"); \ - } \ - TEST_F(TestCategory, PREFIX##_gemv_t_##NAME) { \ - PREFIX##_##NAME##_gemv_test::run("T"); \ - } \ - TEST_F(TestCategory, PREFIX##_gemv_ct_##NAME) { \ - PREFIX##_##NAME##_gemv_test::run("C"); \ - } +#define TEST_CASE4(PREFIX, FACTORY, NAME, SCALAR_A, SCALAR_X, SCALAR_Y, SCALAR_COEF) \ + using PREFIX##_##NAME##_gemv_test = \ + ::Test::GEMVTest<::Test::FACTORY, SCALAR_A, SCALAR_X, SCALAR_Y, TestDevice, SCALAR_COEF>; \ + TEST_F(TestCategory, PREFIX##_gemv_nt_##NAME) { PREFIX##_##NAME##_gemv_test::run("N"); } \ + TEST_F(TestCategory, PREFIX##_gemv_t_##NAME) { PREFIX##_##NAME##_gemv_test::run("T"); } \ + TEST_F(TestCategory, PREFIX##_gemv_ct_##NAME) { PREFIX##_##NAME##_gemv_test::run("C"); } #define TEST_CASE2(PREFIX, FACTORY, NAME, SCALAR, SCALAR_COEF) \ TEST_CASE4(PREFIX, FACTORY, NAME, SCALAR, SCALAR, SCALAR, SCALAR_COEF) -#define TEST_CASE(PREFIX, FACTORY, NAME, SCALAR) \ - TEST_CASE2(PREFIX, FACTORY, NAME, SCALAR, SCALAR) +#define TEST_CASE(PREFIX, FACTORY, NAME, SCALAR) TEST_CASE2(PREFIX, FACTORY, NAME, SCALAR, SCALAR) #endif // TEST_BLAS2_GEMV_UTIL_HPP diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_ger.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_ger.hpp index df3d2cb5d158..6e975532e138 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_ger.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_ger.hpp @@ -53,107 +53,85 @@ namespace Test { -template +template class GerTester { public: GerTester(); ~GerTester(); - void test(const int M, const int N, const int nonConstConstCombinations, - const bool useAnalyticalResults = false, - const bool useHermitianOption = false); + void test(const int M, const int N, const int nonConstConstCombinations, const bool useAnalyticalResults = false, + const bool useHermitianOption = false); private: using _ViewTypeX = Kokkos::View; using _ViewTypeY = Kokkos::View; using _ViewTypeA = Kokkos::View; - using _HostViewTypeX = typename _ViewTypeX::HostMirror; - using _HostViewTypeY = typename _ViewTypeY::HostMirror; - using _HostViewTypeA = typename _ViewTypeA::HostMirror; - using _ViewTypeExpected = - Kokkos::View; + using _HostViewTypeX = typename _ViewTypeX::HostMirror; + using _HostViewTypeY = typename _ViewTypeY::HostMirror; + using _HostViewTypeA = typename _ViewTypeA::HostMirror; + using _ViewTypeExpected = Kokkos::View; using _KAT_A = Kokkos::ArithTraits; using _AuxType = typename _KAT_A::mag_type; - void populateVariables(ScalarA& alpha, - view_stride_adapter<_ViewTypeX, false>& x, - view_stride_adapter<_ViewTypeY, false>& y, - view_stride_adapter<_ViewTypeA, false>& A, - _ViewTypeExpected& h_expected, - bool& expectedResultIsKnown); + void populateVariables(ScalarA& alpha, view_stride_adapter<_ViewTypeX, false>& x, + view_stride_adapter<_ViewTypeY, false>& y, view_stride_adapter<_ViewTypeA, false>& A, + _ViewTypeExpected& h_expected, bool& expectedResultIsKnown); template - typename std::enable_if>::value || - std::is_same>::value, - void>::type - populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, _HostViewTypeY& h_y, - _HostViewTypeA& h_A, _ViewTypeExpected& h_expected); + typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type + populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, _HostViewTypeY& h_y, _HostViewTypeA& h_A, + _ViewTypeExpected& h_expected); template - typename std::enable_if>::value && - !std::is_same>::value, - void>::type - populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, _HostViewTypeY& h_y, - _HostViewTypeA& h_A, _ViewTypeExpected& h_expected); + typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type + populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, _HostViewTypeY& h_y, _HostViewTypeA& h_A, + _ViewTypeExpected& h_expected); template - typename std::enable_if>::value || - std::is_same>::value, - void>::type - populateVanillaValues(const T& alpha, const _HostViewTypeX& h_x, - const _HostViewTypeY& h_y, const _HostViewTypeA& h_A, + typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type + populateVanillaValues(const T& alpha, const _HostViewTypeX& h_x, const _HostViewTypeY& h_y, const _HostViewTypeA& h_A, _ViewTypeExpected& h_vanilla); template - typename std::enable_if>::value && - !std::is_same>::value, - void>::type - populateVanillaValues(const T& alpha, const _HostViewTypeX& h_x, - const _HostViewTypeY& h_y, const _HostViewTypeA& h_A, + typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type + populateVanillaValues(const T& alpha, const _HostViewTypeX& h_x, const _HostViewTypeY& h_y, const _HostViewTypeA& h_A, _ViewTypeExpected& h_vanilla); template - typename std::enable_if>::value || - std::is_same>::value, - void>::type - compareVanillaAgainstExpected(const T& alpha, - const _ViewTypeExpected& h_vanilla, + typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type + compareVanillaAgainstExpected(const T& alpha, const _ViewTypeExpected& h_vanilla, const _ViewTypeExpected& h_expected); template - typename std::enable_if>::value && - !std::is_same>::value, - void>::type - compareVanillaAgainstExpected(const T& alpha, - const _ViewTypeExpected& h_vanilla, + typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type + compareVanillaAgainstExpected(const T& alpha, const _ViewTypeExpected& h_vanilla, const _ViewTypeExpected& h_expected); template - typename std::enable_if>::value || - std::is_same>::value, - void>::type - compareKkGerAgainstExpected(const T& alpha, const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_expected); + typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type + compareKkGerAgainstExpected(const T& alpha, const _HostViewTypeA& h_A, const _ViewTypeExpected& h_expected); template - typename std::enable_if>::value && - !std::is_same>::value, - void>::type - compareKkGerAgainstExpected(const T& alpha, const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_expected); + typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type + compareKkGerAgainstExpected(const T& alpha, const _HostViewTypeA& h_A, const _ViewTypeExpected& h_expected); template T shrinkAngleToZeroTwoPiRange(const T input); template - void callKkGerAndCompareAgainstExpected( - const ScalarA& alpha, TX& x, TY& y, - view_stride_adapter<_ViewTypeA, false>& A, - const _ViewTypeExpected& h_expected, const std::string& situation); + void callKkGerAndCompareAgainstExpected(const ScalarA& alpha, TX& x, TY& y, view_stride_adapter<_ViewTypeA, false>& A, + const _ViewTypeExpected& h_expected, const std::string& situation); const bool _A_is_complex; const bool _A_is_lr; @@ -169,16 +147,13 @@ class GerTester { bool _kkGerShouldThrowException; }; -template -GerTester::GerTester() +template +GerTester::GerTester() : _A_is_complex(std::is_same>::value || std::is_same>::value), _A_is_lr(std::is_same::value), _A_is_ll(std::is_same::value), - _testIsGpu(KokkosKernels::Impl::kk_is_gpu_exec_space< - typename Device::execution_space>()) + _testIsGpu(KokkosKernels::Impl::kk_is_gpu_exec_space()) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS , _vanillaUsesDifferentOrderOfOps(_A_is_lr && _testIsGpu) @@ -195,12 +170,8 @@ GerTester::value - ? 1.0e-6 - : (std::is_same<_AuxType, double>::value ? 1.0e-9 : 0)), - _relTol(std::is_same<_AuxType, float>::value - ? 5.0e-3 - : (std::is_same<_AuxType, double>::value ? 1.0e-6 : 0)), + _absTol(std::is_same<_AuxType, float>::value ? 1.0e-6 : (std::is_same<_AuxType, double>::value ? 1.0e-9 : 0)), + _relTol(std::is_same<_AuxType, float>::value ? 5.0e-3 : (std::is_same<_AuxType, double>::value ? 1.0e-6 : 0)), _M(-1), _N(-1), _useAnalyticalResults(false), @@ -208,31 +179,24 @@ GerTester -GerTester::~GerTester() { +template +GerTester::~GerTester() { // Nothing to do } -template -void GerTester::test(const int M, const int N, - const int nonConstConstCombinations, - const bool useAnalyticalResults, - const bool useHermitianOption) { +template +void GerTester::test( + const int M, const int N, const int nonConstConstCombinations, const bool useAnalyticalResults, + const bool useHermitianOption) { #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Entering GerTester::test()... - - - - - - - - - - - - - - - - " "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - " "- - - - - - - - - " << std::endl; - std::cout << "_A_is_complex = " << _A_is_complex - << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + std::cout << "_A_is_complex = " << _A_is_complex << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll << ", _testIsGpu = " << _testIsGpu - << ", _vanillaUsesDifferentOrderOfOps = " - << _vanillaUsesDifferentOrderOfOps << ", _absTol = " << _absTol + << ", _vanillaUsesDifferentOrderOfOps = " << _vanillaUsesDifferentOrderOfOps << ", _absTol = " << _absTol << ", _relTol = " << _relTol << std::endl; #endif // ******************************************************************** @@ -277,8 +241,7 @@ void GerTester y("Y", _N); view_stride_adapter<_ViewTypeA, false> A("A", _M, _N); - view_stride_adapter<_ViewTypeExpected, true> h_expected( - "expected A += alpha * x * y^{t,h}", _M, _N); + view_stride_adapter<_ViewTypeExpected, true> h_expected("expected A += alpha * x * y^{t,h}", _M, _N); bool expectedResultIsKnown = false; ScalarA alpha(0.); @@ -286,27 +249,16 @@ void GerTesterpopulateVariables(alpha, x, y, A, h_expected.d_view, - expectedResultIsKnown); + this->populateVariables(alpha, x, y, A, h_expected.d_view, expectedResultIsKnown); // ******************************************************************** // Step 3 of 9: populate h_vanilla // ******************************************************************** - view_stride_adapter<_ViewTypeExpected, true> h_vanilla( - "vanilla = A + alpha * x * y^{t,h}", _M, _N); + view_stride_adapter<_ViewTypeExpected, true> h_vanilla("vanilla = A + alpha * x * y^{t,h}", _M, _N); #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "In Test_Blas2_ger.hpp, computing vanilla A with alpha type = %s\n", - typeid(alpha).name()); -#else - Kokkos::printf( - "In Test_Blas2_ger.hpp, computing vanilla A with alpha type = %s\n", - typeid(alpha).name()); -#endif + Kokkos::printf("In Test_Blas2_ger.hpp, computing vanilla A with alpha type = %s\n", typeid(alpha).name()); #endif - this->populateVanillaValues(alpha, x.h_view, y.h_view, A.h_view, - h_vanilla.d_view); + this->populateVanillaValues(alpha, x.h_view, y.h_view, A.h_view, h_vanilla.d_view); // ******************************************************************** // Step 4 of 9: use h_vanilla and h_expected as appropriate @@ -315,8 +267,7 @@ void GerTestercompareVanillaAgainstExpected(alpha, h_vanilla.d_view, - h_expected.d_view); + this->compareVanillaAgainstExpected(alpha, h_vanilla.d_view, h_expected.d_view); } else { // ****************************************************************** // Copy h_vanilla to h_expected @@ -331,8 +282,7 @@ void GerTestercallKkGerAndCompareAgainstExpected( - alpha, x.d_view, y.d_view, A, h_expected.d_view, "non const {x,y}"); + this->callKkGerAndCompareAgainstExpected(alpha, x.d_view, y.d_view, A, h_expected.d_view, "non const {x,y}"); } // ******************************************************************** @@ -341,8 +291,7 @@ void GerTestercallKkGerAndCompareAgainstExpected(alpha, x.d_view_const, y.d_view, A, - h_expected.d_view, "const x"); + this->callKkGerAndCompareAgainstExpected(alpha, x.d_view_const, y.d_view, A, h_expected.d_view, "const x"); } // ******************************************************************** @@ -351,8 +300,7 @@ void GerTestercallKkGerAndCompareAgainstExpected(alpha, x.d_view, y.d_view_const, A, - h_expected.d_view, "const y"); + this->callKkGerAndCompareAgainstExpected(alpha, x.d_view, y.d_view_const, A, h_expected.d_view, "const y"); } // ******************************************************************** @@ -361,9 +309,8 @@ void GerTestercallKkGerAndCompareAgainstExpected(alpha, x.d_view_const, - y.d_view_const, A, - h_expected.d_view, "const {x,y}"); + this->callKkGerAndCompareAgainstExpected(alpha, x.d_view_const, y.d_view_const, A, h_expected.d_view, + "const {x,y}"); } // ******************************************************************** @@ -382,21 +329,14 @@ void GerTester -void GerTester< - ScalarX, tLayoutX, ScalarY, tLayoutY, ScalarA, tLayoutA, - Device>::populateVariables(ScalarA& alpha, - view_stride_adapter<_ViewTypeX, false>& x, - view_stride_adapter<_ViewTypeY, false>& y, - view_stride_adapter<_ViewTypeA, false>& A, - _ViewTypeExpected& h_expected, - bool& expectedResultIsKnown) { +template +void GerTester::populateVariables( + ScalarA& alpha, view_stride_adapter<_ViewTypeX, false>& x, view_stride_adapter<_ViewTypeY, false>& y, + view_stride_adapter<_ViewTypeA, false>& A, _ViewTypeExpected& h_expected, bool& expectedResultIsKnown) { expectedResultIsKnown = false; if (_useAnalyticalResults) { - this->populateAnalyticalValues(alpha, x.h_view, y.h_view, A.h_view, - h_expected); + this->populateAnalyticalValues(alpha, x.h_view, y.h_view, A.h_view, h_expected); Kokkos::deep_copy(x.d_base, x.h_base); Kokkos::deep_copy(y.d_base, y.h_base); Kokkos::deep_copy(A.d_base, A.h_base); @@ -461,8 +401,7 @@ void GerTester< } else { alpha = 3; - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarX randStart, randEnd; @@ -489,17 +428,12 @@ void GerTester< } // Code for complex values -template +template template -typename std::enable_if>::value || - std::is_same>::value, - void>::type -GerTester::populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, - _HostViewTypeY& h_y, - _HostViewTypeA& h_A, - _ViewTypeExpected& h_expected) { +typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type +GerTester::populateAnalyticalValues( + T& alpha, _HostViewTypeX& h_x, _HostViewTypeY& h_y, _HostViewTypeA& h_A, _ViewTypeExpected& h_expected) { _AuxType auxI(0.); _AuxType auxJ(0.); _AuxType auxIpJ(0.); @@ -524,26 +458,20 @@ GerTestershrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i)); for (int j = 0; j < _N; ++j) { - auxJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(j)); - auxIpJ = - this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); - h_A(i, j).real() = - -sin(auxIpJ) - sin(auxI) * sin(auxJ) - cos(auxI) * cos(auxJ); - h_A(i, j).imag() = - -sin(auxIpJ) - sin(auxI) * sin(auxJ) + cos(auxI) * cos(auxJ); + auxJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(j)); + auxIpJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); + h_A(i, j).real() = -sin(auxIpJ) - sin(auxI) * sin(auxJ) - cos(auxI) * cos(auxJ); + h_A(i, j).imag() = -sin(auxIpJ) - sin(auxI) * sin(auxJ) + cos(auxI) * cos(auxJ); } } } else { for (int i = 0; i < _M; ++i) { auxI = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i)); for (int j = 0; j < _N; ++j) { - auxJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(j)); - auxImJ = - this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i - j)); - h_A(i, j).real() = - -sin(auxImJ) - sin(auxI) * sin(auxJ) + cos(auxI) * cos(auxJ); - h_A(i, j).imag() = - -sin(auxImJ) - sin(auxI) * sin(auxJ) - cos(auxI) * cos(auxJ); + auxJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(j)); + auxImJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i - j)); + h_A(i, j).real() = -sin(auxImJ) - sin(auxI) * sin(auxJ) + cos(auxI) * cos(auxJ); + h_A(i, j).imag() = -sin(auxImJ) - sin(auxI) * sin(auxJ) - cos(auxI) * cos(auxJ); } } } @@ -552,9 +480,8 @@ GerTestershrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i)); for (int j = 0; j < _N; ++j) { - auxJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(j)); - auxIpJ = - this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); + auxJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(j)); + auxIpJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); h_expected(i, j).real() = -2. * sin(auxI) * sin(auxJ); h_expected(i, j).imag() = 2. * (cos(auxIpJ) - sin(auxIpJ)); } @@ -563,9 +490,8 @@ GerTestershrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i)); for (int j = 0; j < _N; ++j) { - auxJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(j)); - auxImJ = - this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i - j)); + auxJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(j)); + auxImJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i - j)); h_expected(i, j).real() = 2. * cos(auxI) * cos(auxJ); h_expected(i, j).imag() = -2. * sin(auxImJ); } @@ -574,17 +500,12 @@ GerTester +template template -typename std::enable_if>::value && - !std::is_same>::value, - void>::type -GerTester::populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, - _HostViewTypeY& h_y, - _HostViewTypeA& h_A, - _ViewTypeExpected& h_expected) { +typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type +GerTester::populateAnalyticalValues( + T& alpha, _HostViewTypeX& h_x, _HostViewTypeY& h_y, _HostViewTypeA& h_A, _ViewTypeExpected& h_expected) { _AuxType auxI(0.); _AuxType auxJ(0.); _AuxType auxIpJ(0.); @@ -611,25 +532,20 @@ GerTestershrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); + auxIpJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); h_expected(i, j) = 3 * sin(auxIpJ); } } } // Code for complex values -template +template template -typename std::enable_if>::value || - std::is_same>::value, - void>::type -GerTester::populateVanillaValues(const T& alpha, - const _HostViewTypeX& h_x, - const _HostViewTypeY& h_y, - const _HostViewTypeA& h_A, - _ViewTypeExpected& h_vanilla) { +typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type +GerTester::populateVanillaValues( + const T& alpha, const _HostViewTypeX& h_x, const _HostViewTypeY& h_y, const _HostViewTypeA& h_A, + _ViewTypeExpected& h_vanilla) { if (_vanillaUsesDifferentOrderOfOps) { if (_useHermitianOption) { for (int i = 0; i < _M; ++i) { @@ -662,18 +578,13 @@ GerTester +template template -typename std::enable_if>::value && - !std::is_same>::value, - void>::type -GerTester::populateVanillaValues(const T& alpha, - const _HostViewTypeX& h_x, - const _HostViewTypeY& h_y, - const _HostViewTypeA& h_A, - _ViewTypeExpected& h_vanilla) { +typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type +GerTester::populateVanillaValues( + const T& alpha, const _HostViewTypeX& h_x, const _HostViewTypeY& h_y, const _HostViewTypeA& h_A, + _ViewTypeExpected& h_vanilla) { if (_vanillaUsesDifferentOrderOfOps) { for (int i = 0; i < _M; ++i) { for (int j = 0; j < _N; ++j) { @@ -689,11 +600,10 @@ GerTester +template template -T GerTester::shrinkAngleToZeroTwoPiRange(const T input) { +T GerTester::shrinkAngleToZeroTwoPiRange( + const T input) { T output(input); #if 0 T twoPi( 2. * Kokkos::numbers::pi ); @@ -708,18 +618,13 @@ T GerTester +template template -typename std::enable_if>::value || - std::is_same>::value, - void>::type -GerTester:: - compareVanillaAgainstExpected(const T& alpha, - const _ViewTypeExpected& h_vanilla, - const _ViewTypeExpected& h_expected) { - int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * - 1.e-3); +typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type +GerTester::compareVanillaAgainstExpected( + const T& alpha, const _ViewTypeExpected& h_vanilla, const _ViewTypeExpected& h_expected) { + int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * 1.e-3); if (_useAnalyticalResults) { int numErrorsRealAbs(0); @@ -738,7 +643,7 @@ GerTester:: for (int i(0); i < _M; ++i) { for (int j(0); j < _N; ++j) { - diff = _KAT_A::abs(h_expected(i, j).real() - h_vanilla(i, j).real()); + diff = _KAT_A::abs(h_expected(i, j).real() - h_vanilla(i, j).real()); errorHappened = false; if (h_expected(i, j).real() == 0.) { diffThreshold = _KAT_A::abs(_absTol); @@ -762,17 +667,15 @@ GerTester:: } if (errorHappened && (numErrorsRealAbs + numErrorsRealRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j).real() = " << h_expected(i, j).real() + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j).real() = " << h_expected(i, j).real() << ", h_vanilla(i,j).real() = " << h_vanilla(i, j).real() << ", _KAT_A::abs(h_expected(i,j).real() - " "h_vanilla(i,j).real()) = " - << diff << ", diffThreshold = " << diffThreshold - << std::endl; + << diff << ", diffThreshold = " << diffThreshold << std::endl; #endif } - diff = _KAT_A::abs(h_expected(i, j).imag() - h_vanilla(i, j).imag()); + diff = _KAT_A::abs(h_expected(i, j).imag() - h_vanilla(i, j).imag()); errorHappened = false; if (h_expected(i, j).imag() == 0.) { diffThreshold = _KAT_A::abs(_absTol); @@ -796,37 +699,26 @@ GerTester:: } if (errorHappened && (numErrorsImagAbs + numErrorsImagRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j).imag() = " << h_expected(i, j).imag() + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j).imag() = " << h_expected(i, j).imag() << ", h_vanilla(i,j).imag() = " << h_vanilla(i, j).imag() << ", _KAT_A::abs(h_expected(i,j).imag() - " "h_vanilla(i,j).imag()) = " - << diff << ", diffThreshold = " << diffThreshold - << std::endl; + << diff << ", diffThreshold = " << diffThreshold << std::endl; #endif } } // for j } // for i { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption << ": vanilla differs too much from analytical on real components" - << ", numErrorsRealAbs = " << numErrorsRealAbs - << ", numErrorsRealRel = " << numErrorsRealRel - << ", maxErrorRealRel = " << maxErrorRealRel - << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel - << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel - << ", h_expected(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) + << ", numErrorsRealAbs = " << numErrorsRealAbs << ", numErrorsRealRel = " << numErrorsRealRel + << ", maxErrorRealRel = " << maxErrorRealRel << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel + << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel << ", h_expected(i,j).real() = " + << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) << ", h_vanilla(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_vanilla(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_vanilla(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrorsReal(numErrorsRealAbs + numErrorsRealRel); @@ -835,29 +727,19 @@ GerTester:: std::cout << "WARNING" << msg.str() << std::endl; #endif } - EXPECT_LE(numErrorsReal, maxNumErrorsAllowed) - << "Failed test" << msg.str(); + EXPECT_LE(numErrorsReal, maxNumErrorsAllowed) << "Failed test" << msg.str(); } { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption << ": vanilla differs too much from analytical on imag components" - << ", numErrorsImagAbs = " << numErrorsImagAbs - << ", numErrorsImagRel = " << numErrorsImagRel - << ", maxErrorImagRel = " << maxErrorImagRel - << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel - << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel - << ", h_expected(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) + << ", numErrorsImagAbs = " << numErrorsImagAbs << ", numErrorsImagRel = " << numErrorsImagRel + << ", maxErrorImagRel = " << maxErrorImagRel << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel + << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel << ", h_expected(i,j).imag() = " + << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) << ", h_vanilla(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_vanilla(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_vanilla(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrorsImag(numErrorsImagAbs + numErrorsImagRel); @@ -866,8 +748,7 @@ GerTester:: std::cout << "WARNING" << msg.str() << std::endl; #endif } - EXPECT_LE(numErrorsImag, maxNumErrorsAllowed) - << "Failed test" << msg.str(); + EXPECT_LE(numErrorsImag, maxNumErrorsAllowed) << "Failed test" << msg.str(); } } else { int numErrorsReal(0); @@ -878,11 +759,8 @@ GerTester:: if (h_expected(i, j).real() != h_vanilla(i, j).real()) { if (numErrorsReal == 0) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j).real() = " - << h_expected(i, j).real() - << ", h_vanilla(i,j).real() = " << h_vanilla(i, j).real() - << std::endl; + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j).real() = " << h_expected(i, j).real() + << ", h_vanilla(i,j).real() = " << h_vanilla(i, j).real() << std::endl; #endif } numErrorsReal++; @@ -891,49 +769,37 @@ GerTester:: if (h_expected(i, j).imag() != h_vanilla(i, j).imag()) { if (numErrorsImag == 0) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j).imag() = " - << h_expected(i, j).imag() - << ", h_vanilla(i,j).imag() = " << h_vanilla(i, j).imag() - << std::endl; + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j).imag() = " << h_expected(i, j).imag() + << ", h_vanilla(i,j).imag() = " << h_vanilla(i, j).imag() << std::endl; #endif } numErrorsImag++; } } // for j } // for i - EXPECT_EQ(numErrorsReal, 0) - << "Failed test" - << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ": vanilla result is incorrect on real components" - << ", numErrorsReal = " << numErrorsReal; - EXPECT_EQ(numErrorsImag, 0) - << "Failed test" - << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ": vanilla result is incorrect on imag components" - << ", numErrorsImag = " << numErrorsImag; + EXPECT_EQ(numErrorsReal, 0) << "Failed test" + << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr + << ", _A_is_ll = " << _A_is_ll << ", alpha type = " << typeid(alpha).name() + << ", _useHermitianOption = " << _useHermitianOption + << ": vanilla result is incorrect on real components" + << ", numErrorsReal = " << numErrorsReal; + EXPECT_EQ(numErrorsImag, 0) << "Failed test" + << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr + << ", _A_is_ll = " << _A_is_ll << ", alpha type = " << typeid(alpha).name() + << ", _useHermitianOption = " << _useHermitianOption + << ": vanilla result is incorrect on imag components" + << ", numErrorsImag = " << numErrorsImag; } } // Code for non-complex values -template +template template -typename std::enable_if>::value && - !std::is_same>::value, - void>::type -GerTester:: - compareVanillaAgainstExpected(const T& alpha, - const _ViewTypeExpected& h_vanilla, - const _ViewTypeExpected& h_expected) { - int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * - 1.e-3); +typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type +GerTester::compareVanillaAgainstExpected( + const T& alpha, const _ViewTypeExpected& h_vanilla, const _ViewTypeExpected& h_expected) { + int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * 1.e-3); if (_useAnalyticalResults) { int numErrorsAbs(0); @@ -971,35 +837,24 @@ GerTester:: } if (errorHappened && (numErrorsAbs + numErrorsRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j) = " << h_expected(i, j) + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j) = " << h_expected(i, j) << ", h_vanilla(i,j) = " << h_vanilla(i, j) - << ", _KAT_A::abs(h_expected(i,j) - h_vanilla(i,j)) = " - << diff << ", diffThreshold = " << diffThreshold - << std::endl; + << ", _KAT_A::abs(h_expected(i,j) - h_vanilla(i,j)) = " << diff + << ", diffThreshold = " << diffThreshold << std::endl; #endif } } // for j } // for i { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption << ": vanilla differs too much from expected" - << ", numErrorsAbs = " << numErrorsAbs - << ", numErrorsRel = " << numErrorsRel - << ", maxErrorRel = " << maxErrorRel - << ", iForMaxErrorRel = " << iForMaxErrorRel + << ", numErrorsAbs = " << numErrorsAbs << ", numErrorsRel = " << numErrorsRel + << ", maxErrorRel = " << maxErrorRel << ", iForMaxErrorRel = " << iForMaxErrorRel << ", jForMaxErrorRel = " << jForMaxErrorRel << ", h_expected(i,j) = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) - << ", h_vanilla(i,j) = " - << (((_M > 0) && (_N > 0)) - ? h_vanilla(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) + << ", h_vanilla(i,j) = " << (((_M > 0) && (_N > 0)) ? h_vanilla(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrors(numErrorsAbs + numErrorsRel); @@ -1018,8 +873,7 @@ GerTester:: if (h_expected(i, j) != h_vanilla(i, j)) { if (numErrors == 0) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j) = " << h_expected(i, j) + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j) = " << h_expected(i, j) << ", h_vanilla(i,j) = " << h_vanilla(i, j) << std::endl; #endif } @@ -1027,29 +881,22 @@ GerTester:: } } // for j } // for i - EXPECT_EQ(numErrors, 0) - << "Failed test" - << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ": vanilla result is incorrect" - << ", numErrors = " << numErrors; + EXPECT_EQ(numErrors, 0) << "Failed test" + << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr + << ", _A_is_ll = " << _A_is_ll << ", alpha type = " << typeid(alpha).name() + << ", _useHermitianOption = " << _useHermitianOption << ": vanilla result is incorrect" + << ", numErrors = " << numErrors; } } // Code for complex values -template +template template -typename std::enable_if>::value || - std::is_same>::value, - void>::type -GerTester:: - compareKkGerAgainstExpected(const T& alpha, const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_expected) { - int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * - 1.e-3); +typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type +GerTester::compareKkGerAgainstExpected( + const T& alpha, const _HostViewTypeA& h_A, const _ViewTypeExpected& h_expected) { + int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * 1.e-3); int numErrorsRealAbs(0); int numErrorsRealRel(0); @@ -1090,12 +937,10 @@ GerTester:: } if (errorHappened && (numErrorsRealAbs + numErrorsRealRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout - << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j).real() = " << h_expected(i, j).real() - << ", h_A(i,j).real() = " << h_A(i, j).real() - << ", _KAT_A::abs(h_expected(i,j).real() - h_A(i,j).real()) = " - << diff << ", diffThreshold = " << diffThreshold << std::endl; + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j).real() = " << h_expected(i, j).real() + << ", h_A(i,j).real() = " << h_A(i, j).real() + << ", _KAT_A::abs(h_expected(i,j).real() - h_A(i,j).real()) = " << diff + << ", diffThreshold = " << diffThreshold << std::endl; #endif } @@ -1123,90 +968,56 @@ GerTester:: } if (errorHappened && (numErrorsImagAbs + numErrorsImagRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout - << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j).imag() = " << h_expected(i, j).imag() - << ", h_A(i,j).imag() = " << h_A(i, j).imag() - << ", _KAT_A::abs(h_expected(i,j).imag() - h_A(i,j).imag()) = " - << diff << ", diffThreshold = " << diffThreshold << std::endl; + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j).imag() = " << h_expected(i, j).imag() + << ", h_A(i,j).imag() = " << h_A(i, j).imag() + << ", _KAT_A::abs(h_expected(i,j).imag() - h_A(i,j).imag()) = " << diff + << ", diffThreshold = " << diffThreshold << std::endl; #endif } } // for j } // for i #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout - << "A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", numErrorsRealAbs = " << numErrorsRealAbs - << ", numErrorsRealRel = " << numErrorsRealRel - << ", maxErrorRealRel = " << maxErrorRealRel - << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel - << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel - << ", h_expected(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) - << ", h_A(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_A(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) - << ", numErrorsImagAbs = " << numErrorsImagAbs - << ", numErrorsImagRel = " << numErrorsImagRel - << ", maxErrorImagRel = " << maxErrorImagRel - << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel - << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel - << ", h_expected(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) - << ", h_A(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_A(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) - << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed << std::endl; + std::cout << "A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", numErrorsRealAbs = " << numErrorsRealAbs << ", numErrorsRealRel = " << numErrorsRealRel + << ", maxErrorRealRel = " << maxErrorRealRel << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel + << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel << ", h_expected(i,j).real() = " + << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) + << ", h_A(i,j).real() = " + << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) + << ", numErrorsImagAbs = " << numErrorsImagAbs << ", numErrorsImagRel = " << numErrorsImagRel + << ", maxErrorImagRel = " << maxErrorImagRel << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel + << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel << ", h_expected(i,j).imag() = " + << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) + << ", h_A(i,j).imag() = " + << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) + << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed << std::endl; if ((_M == 2131) && (_N == 2131)) { std::cout << "Information" - << ": A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", h_expected(11, 2119) = (" << h_expected(11, 2119).real() - << ", " << h_expected(11, 2119).imag() << ")" - << ", h_A(11, 2119) = (" << h_A(11, 2119).real() << ", " - << h_A(11, 2119).imag() << ")" << std::endl; + << ": A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", h_expected(11, 2119) = (" << h_expected(11, 2119).real() << ", " << h_expected(11, 2119).imag() + << ")" + << ", h_A(11, 2119) = (" << h_A(11, 2119).real() << ", " << h_A(11, 2119).imag() << ")" << std::endl; std::cout << "Information" - << ": A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", h_expected(710, 1065) = (" << h_expected(710, 1065).real() - << ", " << h_expected(710, 1065).imag() << ")" - << ", h_A(710, 1065) = (" << h_A(710, 1065).real() << ", " - << h_A(710, 1065).imag() << ")" << std::endl; + << ": A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", h_expected(710, 1065) = (" << h_expected(710, 1065).real() << ", " << h_expected(710, 1065).imag() + << ")" + << ", h_A(710, 1065) = (" << h_A(710, 1065).real() << ", " << h_A(710, 1065).imag() << ")" << std::endl; } #endif { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption << ": ger result is incorrect on real components" - << ", numErrorsRealAbs = " << numErrorsRealAbs - << ", numErrorsRealRel = " << numErrorsRealRel - << ", maxErrorRealRel = " << maxErrorRealRel - << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel - << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel - << ", h_expected(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) + << ", numErrorsRealAbs = " << numErrorsRealAbs << ", numErrorsRealRel = " << numErrorsRealRel + << ", maxErrorRealRel = " << maxErrorRealRel << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel + << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel << ", h_expected(i,j).real() = " + << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) << ", h_A(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_A(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrorsReal(numErrorsRealAbs + numErrorsRealRel); @@ -1219,24 +1030,15 @@ GerTester:: } { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption << ": ger result is incorrect on imag components" - << ", numErrorsImagAbs = " << numErrorsImagAbs - << ", numErrorsImagRel = " << numErrorsImagRel - << ", maxErrorImagRel = " << maxErrorImagRel - << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel - << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel - << ", h_expected(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) + << ", numErrorsImagAbs = " << numErrorsImagAbs << ", numErrorsImagRel = " << numErrorsImagRel + << ", maxErrorImagRel = " << maxErrorImagRel << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel + << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel << ", h_expected(i,j).imag() = " + << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) << ", h_A(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_A(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrorsImag(numErrorsImagAbs + numErrorsImagRel); @@ -1250,17 +1052,13 @@ GerTester:: } // Code for non-complex values -template +template template -typename std::enable_if>::value && - !std::is_same>::value, - void>::type -GerTester:: - compareKkGerAgainstExpected(const T& alpha, const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_expected) { - int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * - 1.e-3); +typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type +GerTester::compareKkGerAgainstExpected( + const T& alpha, const _HostViewTypeA& h_A, const _ViewTypeExpected& h_expected) { + int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * 1.e-3); int numErrorsAbs(0); int numErrorsRel(0); @@ -1296,52 +1094,33 @@ GerTester:: } if (errorHappened && (numErrorsAbs + numErrorsRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j) = " << h_expected(i, j) - << ", h_A(i,j) = " << h_A(i, j) - << ", _KAT_A::abs(h_expected(i,j) - h_A(i,j)) = " << diff + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j) = " << h_expected(i, j) + << ", h_A(i,j) = " << h_A(i, j) << ", _KAT_A::abs(h_expected(i,j) - h_A(i,j)) = " << diff << ", diffThreshold = " << diffThreshold << std::endl; #endif } } // for j } // for i #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", numErrorsAbs = " << numErrorsAbs - << ", numErrorsRel = " << numErrorsRel - << ", maxErrorRel = " << maxErrorRel - << ", iForMaxErrorRel = " << iForMaxErrorRel - << ", jForMaxErrorRel = " << jForMaxErrorRel - << ", h_expected(i,j) = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) - << ", h_A(i,j) = " - << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) + std::cout << "A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", numErrorsAbs = " << numErrorsAbs << ", numErrorsRel = " << numErrorsRel + << ", maxErrorRel = " << maxErrorRel << ", iForMaxErrorRel = " << iForMaxErrorRel + << ", jForMaxErrorRel = " << jForMaxErrorRel << ", h_expected(i,j) = " + << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) + << ", h_A(i,j) = " << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed << std::endl; #endif { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption << ": ger result is incorrect" - << ", numErrorsAbs = " << numErrorsAbs - << ", numErrorsRel = " << numErrorsRel - << ", maxErrorRel = " << maxErrorRel - << ", iForMaxErrorRel = " << iForMaxErrorRel - << ", jForMaxErrorRel = " << jForMaxErrorRel << ", h_expected(i,j) = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) - << ", h_A(i,j) = " - << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) + << ", numErrorsAbs = " << numErrorsAbs << ", numErrorsRel = " << numErrorsRel + << ", maxErrorRel = " << maxErrorRel << ", iForMaxErrorRel = " << iForMaxErrorRel + << ", jForMaxErrorRel = " << jForMaxErrorRel + << ", h_expected(i,j) = " << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) + << ", h_A(i,j) = " << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrors(numErrorsAbs + numErrorsRel); @@ -1354,27 +1133,16 @@ GerTester:: } } -template +template template -void GerTester:: - callKkGerAndCompareAgainstExpected( - const ScalarA& alpha, TX& x, TY& y, - view_stride_adapter<_ViewTypeA, false>& A, - const _ViewTypeExpected& h_expected, const std::string& situation) { +void GerTester::callKkGerAndCompareAgainstExpected( + const ScalarA& alpha, TX& x, TY& y, view_stride_adapter<_ViewTypeA, false>& A, const _ViewTypeExpected& h_expected, + const std::string& situation) { #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "In Test_Blas2_ger.hpp, right before calling KokkosBlas::ger(): " - "ViewTypeA = %s, _kkGerShouldThrowException=%d\n", - typeid(_ViewTypeA).name(), _kkGerShouldThrowException); -#else Kokkos::printf( "In Test_Blas2_ger.hpp, right before calling KokkosBlas::ger(): " "ViewTypeA = %s, _kkGerShouldThrowException=%d\n", typeid(_ViewTypeA).name(), _kkGerShouldThrowException); -#endif #endif std::string mode = _useHermitianOption ? "H" : "T"; bool gotStdException(false); @@ -1383,25 +1151,21 @@ void GerTester #ifdef HAVE_KOKKOSKERNELS_DEBUG int test_ger(const std::string& caseName) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+=======================================================================" - "===\n"); -#else Kokkos::printf( "+=======================================================================" "===\n"); -#endif -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Starting %s, device = %s ...\n", - caseName.c_str(), typeid(Device).name()); -#else - Kokkos::printf("Starting %s, device = %s ...\n", caseName.c_str(), - typeid(Device).name()); -#endif + Kokkos::printf("Starting %s, device = %s ...\n", caseName.c_str(), typeid(Device).name()); #else int test_ger(const std::string& /*caseName*/) { #endif - bool xBool = std::is_same::value || - std::is_same::value || + bool xBool = std::is_same::value || std::is_same::value || std::is_same>::value || std::is_same>::value; - bool yBool = std::is_same::value || - std::is_same::value || + bool yBool = std::is_same::value || std::is_same::value || std::is_same>::value || std::is_same>::value; - bool aBool = std::is_same::value || - std::is_same::value || + bool aBool = std::is_same::value || std::is_same::value || std::is_same>::value || std::is_same>::value; bool useAnalyticalResults = xBool && yBool && aBool; #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); -#else Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); -#endif -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Starting %s for LAYOUTLEFT ...\n", - caseName.c_str()); -#else Kokkos::printf("Starting %s for LAYOUTLEFT ...\n", caseName.c_str()); -#endif #endif if (true) { - Test::GerTester + Test::GerTester tester; tester.test(0, 13, 0); tester.test(1024, 0, 0); @@ -1498,47 +1234,23 @@ int test_ger(const std::string& /*caseName*/) { } #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Finished %s for LAYOUTLEFT\n", - caseName.c_str()); -#else Kokkos::printf("Finished %s for LAYOUTLEFT\n", caseName.c_str()); -#endif -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); -#else Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); #endif #endif -#endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); -#else Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); -#endif -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Starting %s for LAYOUTRIGHT ...\n", - caseName.c_str()); -#else Kokkos::printf("Starting %s for LAYOUTRIGHT ...\n", caseName.c_str()); -#endif #endif if (true) { - Test::GerTester + Test::GerTester tester; tester.test(0, 13, 0); tester.test(1024, 0, 0); @@ -1565,46 +1277,22 @@ int test_ger(const std::string& /*caseName*/) { } #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Finished %s for LAYOUTRIGHT\n", - caseName.c_str()); -#else Kokkos::printf("Finished %s for LAYOUTRIGHT\n", caseName.c_str()); -#endif -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); -#else Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); #endif #endif -#endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); -#else Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); -#endif -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Starting %s for LAYOUTSTRIDE ...\n", - caseName.c_str()); -#else Kokkos::printf("Starting %s for LAYOUTSTRIDE ...\n", caseName.c_str()); -#endif #endif if (true) { - Test::GerTester + Test::GerTester tester; tester.test(0, 13, 0); tester.test(1024, 0, 0); @@ -1628,46 +1316,22 @@ int test_ger(const std::string& /*caseName*/) { } #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Finished %s for LAYOUTSTRIDE\n", - caseName.c_str()); -#else Kokkos::printf("Finished %s for LAYOUTSTRIDE\n", caseName.c_str()); -#endif -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); -#else Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); #endif #endif -#endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); -#else Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); -#endif -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Starting %s for MIXED LAYOUTS ...\n", - caseName.c_str()); -#else Kokkos::printf("Starting %s for MIXED LAYOUTS ...\n", caseName.c_str()); -#endif #endif if (true) { - Test::GerTester + Test::GerTester tester; tester.test(1024, 1024, 0); if (useAnalyticalResults) { @@ -1679,53 +1343,30 @@ int test_ger(const std::string& /*caseName*/) { } if (true) { - Test::GerTester + Test::GerTester tester; tester.test(1024, 1024, 0); } #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Finished %s for MIXED LAYOUTS\n", - caseName.c_str()); -#else Kokkos::printf("Finished %s for MIXED LAYOUTS\n", caseName.c_str()); -#endif -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); -#else Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); #endif #endif -#endif #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Finished %s\n", caseName.c_str()); -#else Kokkos::printf("Finished %s\n", caseName.c_str()); -#endif -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+=======================================================================" - "===\n"); -#else Kokkos::printf( "+=======================================================================" "===\n"); -#endif #endif return 1; } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, ger_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::ger_float"); test_ger("test case ger_float"); @@ -1734,19 +1375,17 @@ TEST_F(TestCategory, ger_float) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, ger_complex_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::ger_complex_float"); - test_ger, Kokkos::complex, - Kokkos::complex, TestDevice>("test case ger_complex_float"); + test_ger, Kokkos::complex, Kokkos::complex, TestDevice>( + "test case ger_complex_float"); Kokkos::Profiling::popRegion(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, ger_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::ger_double"); test_ger("test case ger_double"); @@ -1755,19 +1394,17 @@ TEST_F(TestCategory, ger_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, ger_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::ger_complex_double"); - test_ger, Kokkos::complex, - Kokkos::complex, TestDevice>("test case ger_complex_double"); + test_ger, Kokkos::complex, Kokkos::complex, TestDevice>( + "test case ger_complex_double"); Kokkos::Profiling::popRegion(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, ger_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::ger_int"); test_ger("test case ger_int"); @@ -1775,8 +1412,7 @@ TEST_F(TestCategory, ger_int) { } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) TEST_F(TestCategory, ger_double_int_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::ger_double_int_float"); test_ger("test case ger_double_int_float"); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_serial_gemv.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_serial_gemv.hpp index 5c1aaf5a6709..805ac1d28391 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_serial_gemv.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_serial_gemv.hpp @@ -21,39 +21,31 @@ namespace Test { -template +template struct SerialGEMVOp : public GemvOpBase { using params = GemvOpBase; - SerialGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, - ScalarType beta_, YType y_) + SerialGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, ScalarType beta_, YType y_) : params(trans_, alpha_, A_, x_, beta_, y_) {} template KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { KokkosBlas::Experimental::Gemv::invoke( - member, params::trans, params::alpha, params::A, params::x, - params::beta, params::y); + member, params::trans, params::alpha, params::A, params::x, params::beta, params::y); } }; struct SerialGemvFactory { - template - using functor_type = - SerialGEMVOp; + template + using functor_type = SerialGEMVOp; - using algorithms = std::tuple; + using algorithms = std::tuple; }; #ifdef __KOKKOSBLAS_ENABLE_INTEL_MKL_COMPACT__ struct SerialMKLGemvFactory { - template - using functor_type = - SerialGEMVOp; + template + using functor_type = SerialGEMVOp; using algorithms = std::tuple; }; @@ -61,10 +53,8 @@ struct SerialMKLGemvFactory { } // namespace Test -#define TEST_SERIAL_CASE4(N, A, X, Y, SC) \ - TEST_CASE4(serial, SerialGemvFactory, N, A, X, Y, SC) -#define TEST_SERIAL_CASE2(N, S, SC) \ - TEST_CASE2(serial, SerialGemvFactory, N, S, SC) +#define TEST_SERIAL_CASE4(N, A, X, Y, SC) TEST_CASE4(serial, SerialGemvFactory, N, A, X, Y, SC) +#define TEST_SERIAL_CASE2(N, S, SC) TEST_CASE2(serial, SerialGemvFactory, N, S, SC) #define TEST_SERIAL_CASE(N, S) TEST_CASE(serial, SerialGemvFactory, N, S) #ifdef KOKKOSKERNELS_TEST_FLOAT @@ -76,8 +66,7 @@ using simd_float_avx = ::Test::simd_vector; using simd_float_avx512 = ::Test::simd_vector; TEST_CASE2(serial, SerialMKLGemvFactory, mkl_float_sse, simd_float_sse, float) TEST_CASE2(serial, SerialMKLGemvFactory, mkl_float_avx, simd_float_avx, float) -TEST_CASE2(serial, SerialMKLGemvFactory, mkl_float_avx512, simd_float_avx512, - float) +TEST_CASE2(serial, SerialMKLGemvFactory, mkl_float_avx512, simd_float_avx512, float) #endif #endif @@ -88,12 +77,9 @@ TEST_SERIAL_CASE(double, double) using simd_double_sse = ::Test::simd_vector; using simd_double_avx = ::Test::simd_vector; using simd_double_avx512 = ::Test::simd_vector; -TEST_CASE2(serial, SerialMKLGemvFactory, mkl_double_sse, simd_double_sse, - double) -TEST_CASE2(serial, SerialMKLGemvFactory, mkl_double_avx, simd_double_avx, - double) -TEST_CASE2(serial, SerialMKLGemvFactory, mkl_double_avx512, simd_double_avx512, - double) +TEST_CASE2(serial, SerialMKLGemvFactory, mkl_double_sse, simd_double_sse, double) +TEST_CASE2(serial, SerialMKLGemvFactory, mkl_double_avx, simd_double_avx, double) +TEST_CASE2(serial, SerialMKLGemvFactory, mkl_double_avx512, simd_double_avx512, double) #endif #endif diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_syr.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_syr.hpp index 1253a8e32924..8dc7cadf517b 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_syr.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_syr.hpp @@ -51,110 +51,85 @@ namespace Test { -template +template class SyrTester { public: SyrTester(); ~SyrTester(); - void test(const int N, const int nonConstConstCombinations, - const bool useAnalyticalResults = false, - const bool useHermitianOption = false, - const bool useUpOption = false); + void test(const int N, const int nonConstConstCombinations, const bool useAnalyticalResults = false, + const bool useHermitianOption = false, const bool useUpOption = false); private: using _ViewTypeX = Kokkos::View; using _ViewTypeA = Kokkos::View; - using _HostViewTypeX = typename _ViewTypeX::HostMirror; - using _HostViewTypeA = typename _ViewTypeA::HostMirror; - using _ViewTypeExpected = - Kokkos::View; + using _HostViewTypeX = typename _ViewTypeX::HostMirror; + using _HostViewTypeA = typename _ViewTypeA::HostMirror; + using _ViewTypeExpected = Kokkos::View; using _KAT_A = Kokkos::ArithTraits; using _AuxType = typename _KAT_A::mag_type; - void populateVariables(ScalarA& alpha, - view_stride_adapter<_ViewTypeX, false>& x, - view_stride_adapter<_ViewTypeA, false>& A, - _ViewTypeExpected& h_expected, + void populateVariables(ScalarA& alpha, view_stride_adapter<_ViewTypeX, false>& x, + view_stride_adapter<_ViewTypeA, false>& A, _ViewTypeExpected& h_expected, bool& expectedResultIsKnown); template - typename std::enable_if>::value || - std::is_same>::value, - void>::type - populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, _HostViewTypeA& h_A, - _ViewTypeExpected& h_expected); + typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type + populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, _HostViewTypeA& h_A, _ViewTypeExpected& h_expected); template - typename std::enable_if>::value && - !std::is_same>::value, - void>::type - populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, _HostViewTypeA& h_A, - _ViewTypeExpected& h_expected); + typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type + populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, _HostViewTypeA& h_A, _ViewTypeExpected& h_expected); template - typename std::enable_if>::value || - std::is_same>::value, - void>::type - populateVanillaValues(const T& alpha, const _HostViewTypeX& h_x, - const _HostViewTypeA& h_A, + typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type + populateVanillaValues(const T& alpha, const _HostViewTypeX& h_x, const _HostViewTypeA& h_A, _ViewTypeExpected& h_vanilla); template - typename std::enable_if>::value && - !std::is_same>::value, - void>::type - populateVanillaValues(const T& alpha, const _HostViewTypeX& h_x, - const _HostViewTypeA& h_A, + typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type + populateVanillaValues(const T& alpha, const _HostViewTypeX& h_x, const _HostViewTypeA& h_A, _ViewTypeExpected& h_vanilla); template - typename std::enable_if>::value || - std::is_same>::value, - void>::type - compareVanillaAgainstExpected(const T& alpha, - const _ViewTypeExpected& h_vanilla, + typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type + compareVanillaAgainstExpected(const T& alpha, const _ViewTypeExpected& h_vanilla, const _ViewTypeExpected& h_expected); template - typename std::enable_if>::value && - !std::is_same>::value, - void>::type - compareVanillaAgainstExpected(const T& alpha, - const _ViewTypeExpected& h_vanilla, + typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type + compareVanillaAgainstExpected(const T& alpha, const _ViewTypeExpected& h_vanilla, const _ViewTypeExpected& h_expected); template - typename std::enable_if>::value || - std::is_same>::value, - void>::type - compareKkSyrAgainstReference(const T& alpha, const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_reference); + typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type + compareKkSyrAgainstReference(const T& alpha, const _HostViewTypeA& h_A, const _ViewTypeExpected& h_reference); template - typename std::enable_if>::value && - !std::is_same>::value, - void>::type - compareKkSyrAgainstReference(const T& alpha, const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_reference); + typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type + compareKkSyrAgainstReference(const T& alpha, const _HostViewTypeA& h_A, const _ViewTypeExpected& h_reference); template T shrinkAngleToZeroTwoPiRange(const T input); template - void callKkSyrAndCompareAgainstExpected( - const ScalarA& alpha, TX& x, view_stride_adapter<_ViewTypeA, false>& A, - const _ViewTypeExpected& h_expected, const std::string& situation); + void callKkSyrAndCompareAgainstExpected(const ScalarA& alpha, TX& x, view_stride_adapter<_ViewTypeA, false>& A, + const _ViewTypeExpected& h_expected, const std::string& situation); template - void callKkGerAndCompareKkSyrAgainstIt( - const ScalarA& alpha, TX& x, - view_stride_adapter<_ViewTypeA, false>& org_A, - const _HostViewTypeA& h_A_syr, const std::string& situation); + void callKkGerAndCompareKkSyrAgainstIt(const ScalarA& alpha, TX& x, view_stride_adapter<_ViewTypeA, false>& org_A, + const _HostViewTypeA& h_A_syr, const std::string& situation); const bool _A_is_complex; const bool _A_is_lr; @@ -172,15 +147,13 @@ class SyrTester { bool _kkGerShouldThrowException; }; -template +template SyrTester::SyrTester() : _A_is_complex(std::is_same>::value || std::is_same>::value), _A_is_lr(std::is_same::value), _A_is_ll(std::is_same::value), - _testIsGpu(KokkosKernels::Impl::kk_is_gpu_exec_space< - typename Device::execution_space>()) + _testIsGpu(KokkosKernels::Impl::kk_is_gpu_exec_space()) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS , _vanillaUsesDifferentOrderOfOps(_A_is_lr) @@ -197,12 +170,8 @@ SyrTester::SyrTester() // large enough to require 'relTol' to value 5.0e-3. The same // calculations show no discrepancies for calculations with double. // **************************************************************** - _absTol(std::is_same<_AuxType, float>::value - ? 1.0e-6 - : (std::is_same<_AuxType, double>::value ? 1.0e-9 : 0)), - _relTol(std::is_same<_AuxType, float>::value - ? 5.0e-3 - : (std::is_same<_AuxType, double>::value ? 1.0e-6 : 0)), + _absTol(std::is_same<_AuxType, float>::value ? 1.0e-6 : (std::is_same<_AuxType, double>::value ? 1.0e-9 : 0)), + _relTol(std::is_same<_AuxType, float>::value ? 5.0e-3 : (std::is_same<_AuxType, double>::value ? 1.0e-6 : 0)), _M(-1), _N(-1), _useAnalyticalResults(false), @@ -212,33 +181,27 @@ SyrTester::SyrTester() _kkGerShouldThrowException(false) { } -template +template SyrTester::~SyrTester() { // Nothing to do } -template -void SyrTester::test( - const int N, const int nonConstConstCombinations, - const bool useAnalyticalResults, const bool useHermitianOption, - const bool useUpOption) { +template +void SyrTester::test(const int N, const int nonConstConstCombinations, + const bool useAnalyticalResults, + const bool useHermitianOption, + const bool useUpOption) { #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Entering SyrTester::test()... - - - - - - - - - - - - - - - - " "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - " "- - - - - - - - - " << std::endl; - std::cout << "_A_is_complex = " << _A_is_complex - << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + std::cout << "_A_is_complex = " << _A_is_complex << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll << ", _testIsGpu = " << _testIsGpu - << ", _vanillaUsesDifferentOrderOfOps = " - << _vanillaUsesDifferentOrderOfOps << ", _absTol = " << _absTol - << ", _relTol = " << _relTol - << ", nonConstConstCombinations = " << nonConstConstCombinations - << ", useAnalyticalResults = " << useAnalyticalResults - << ", useHermitianOption = " << useHermitianOption + << ", _vanillaUsesDifferentOrderOfOps = " << _vanillaUsesDifferentOrderOfOps << ", _absTol = " << _absTol + << ", _relTol = " << _relTol << ", nonConstConstCombinations = " << nonConstConstCombinations + << ", useAnalyticalResults = " << useAnalyticalResults << ", useHermitianOption = " << useHermitianOption << ", useUpOption = " << useUpOption << std::endl; #endif // ******************************************************************** @@ -273,8 +236,7 @@ void SyrTester::test( view_stride_adapter<_ViewTypeX, false> x("X", _M); view_stride_adapter<_ViewTypeA, false> A("A", _M, _N); - view_stride_adapter<_ViewTypeExpected, true> h_expected( - "expected A += alpha * x * x^{t,h}", _M, _N); + view_stride_adapter<_ViewTypeExpected, true> h_expected("expected A += alpha * x * x^{t,h}", _M, _N); bool expectedResultIsKnown = false; ScalarA alpha(_KAT_A::zero()); @@ -282,24 +244,14 @@ void SyrTester::test( // ******************************************************************** // Step 2 of 7: populate alpha, h_x, h_A, h_expected, x, A // ******************************************************************** - this->populateVariables(alpha, x, A, h_expected.d_view, - expectedResultIsKnown); + this->populateVariables(alpha, x, A, h_expected.d_view, expectedResultIsKnown); // ******************************************************************** // Step 3 of 7: populate h_vanilla // ******************************************************************** - view_stride_adapter<_ViewTypeExpected, true> h_vanilla( - "vanilla = A + alpha * x * x^{t,h}", _M, _N); + view_stride_adapter<_ViewTypeExpected, true> h_vanilla("vanilla = A + alpha * x * x^{t,h}", _M, _N); #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "In Test_Blas2_syr.hpp, computing vanilla A with alpha type = %s\n", - typeid(alpha).name()); -#else - Kokkos::printf( - "In Test_Blas2_syr.hpp, computing vanilla A with alpha type = %s\n", - typeid(alpha).name()); -#endif + Kokkos::printf("In Test_Blas2_syr.hpp, computing vanilla A with alpha type = %s\n", typeid(alpha).name()); #endif this->populateVanillaValues(alpha, x.h_view, A.h_view, h_vanilla.d_view); @@ -310,8 +262,7 @@ void SyrTester::test( // ****************************************************************** // Compare h_vanilla against h_expected // ****************************************************************** - this->compareVanillaAgainstExpected(alpha, h_vanilla.d_view, - h_expected.d_view); + this->compareVanillaAgainstExpected(alpha, h_vanilla.d_view, h_expected.d_view); } else { // ****************************************************************** // Copy h_vanilla to h_expected @@ -327,13 +278,11 @@ void SyrTester::test( Kokkos::deep_copy(org_A.h_view, A.h_view); if (test_x) { - this->callKkSyrAndCompareAgainstExpected(alpha, x.d_view, A, - h_expected.d_view, "non const x"); + this->callKkSyrAndCompareAgainstExpected(alpha, x.d_view, A, h_expected.d_view, "non const x"); if ((_useAnalyticalResults == false) && // Just to save run time (_kkGerShouldThrowException == false)) { - this->callKkGerAndCompareKkSyrAgainstIt(alpha, x.d_view, org_A, A.h_view, - "non const x"); + this->callKkGerAndCompareKkSyrAgainstIt(alpha, x.d_view, org_A, A.h_view, "non const x"); } } @@ -343,8 +292,7 @@ void SyrTester::test( if (test_cx) { Kokkos::deep_copy(A.d_base, org_A.d_base); - this->callKkSyrAndCompareAgainstExpected(alpha, x.d_view_const, A, - h_expected.d_view, "const x"); + this->callKkSyrAndCompareAgainstExpected(alpha, x.d_view_const, A, h_expected.d_view, "const x"); } // ******************************************************************** @@ -367,12 +315,10 @@ void SyrTester::test( #endif } -template +template void SyrTester::populateVariables( - ScalarA& alpha, view_stride_adapter<_ViewTypeX, false>& x, - view_stride_adapter<_ViewTypeA, false>& A, _ViewTypeExpected& h_expected, - bool& expectedResultIsKnown) { + ScalarA& alpha, view_stride_adapter<_ViewTypeX, false>& x, view_stride_adapter<_ViewTypeA, false>& A, + _ViewTypeExpected& h_expected, bool& expectedResultIsKnown) { expectedResultIsKnown = false; if (_useAnalyticalResults) { @@ -422,8 +368,7 @@ void SyrTester::populateVariables( } else { alpha = 3; - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarX randStart, randEnd; @@ -470,8 +415,7 @@ void SyrTester::populateVariables( if (_N <= 2) { for (int i(0); i < _M; ++i) { for (int j(0); j < _N; ++j) { - std::cout << "h_origA(" << i << "," << j << ")=" << A.h_view(i, j) - << std::endl; + std::cout << "h_origA(" << i << "," << j << ")=" << A.h_view(i, j) << std::endl; } } } @@ -479,16 +423,13 @@ void SyrTester::populateVariables( } // Code for complex values -template +template template -typename std::enable_if>::value || - std::is_same>::value, - void>::type -SyrTester::populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, - _HostViewTypeA& h_A, - _ViewTypeExpected& h_expected) { +typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type +SyrTester::populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, + _HostViewTypeA& h_A, + _ViewTypeExpected& h_expected) { if (_useHermitianOption) { alpha.real() = 1.; alpha.imag() = 0.; @@ -506,10 +447,8 @@ SyrTestershrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i - j)); - if (((_useUpOption == true) && (i <= j)) || - ((_useUpOption == false) && (i >= j))) { + _AuxType auxImJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i - j)); + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { h_A(i, j).real() = cos(auxImJ); h_A(i, j).imag() = -sin(auxImJ); } else { @@ -521,8 +460,7 @@ SyrTestershrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); + _AuxType auxIpJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); h_A(i, j).real() = sin(auxIpJ) + cos(auxIpJ); h_A(i, j).imag() = sin(auxIpJ) - cos(auxIpJ); } @@ -532,10 +470,8 @@ SyrTester= j))) { - _AuxType auxImJ = - this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i - j)); + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { + _AuxType auxImJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i - j)); h_expected(i, j).real() = 2. * cos(auxImJ); h_expected(i, j).imag() = -2. * sin(auxImJ); } else { @@ -547,10 +483,8 @@ SyrTester= j))) { - _AuxType auxIpJ = - this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { + _AuxType auxIpJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); h_expected(i, j).real() = 2. * sin(auxIpJ); h_expected(i, j).imag() = 2. * sin(auxIpJ); } else { @@ -563,16 +497,13 @@ SyrTester +template template -typename std::enable_if>::value && - !std::is_same>::value, - void>::type -SyrTester::populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, - _HostViewTypeA& h_A, - _ViewTypeExpected& h_expected) { +typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type +SyrTester::populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, + _HostViewTypeA& h_A, + _ViewTypeExpected& h_expected) { alpha = 2; for (int i = 0; i < _M; ++i) { @@ -583,18 +514,15 @@ SyrTestershrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i)); for (int j = 0; j < _N; ++j) { - _AuxType auxJ = - this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(j)); - h_A(i, j) = 2 * cos(auxI) * cos(auxJ); + _AuxType auxJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(j)); + h_A(i, j) = 2 * cos(auxI) * cos(auxJ); } } for (int i = 0; i < _M; ++i) { for (int j = 0; j < _N; ++j) { - if (((_useUpOption == true) && (i <= j)) || - ((_useUpOption == false) && (i >= j))) { - _AuxType auxImJ = - this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i - j)); + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { + _AuxType auxImJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i - j)); h_expected(i, j) = 2 * cos(auxImJ); } else { h_expected(i, j) = h_A(i, j); @@ -604,21 +532,19 @@ SyrTester +template template -typename std::enable_if>::value || - std::is_same>::value, - void>::type -SyrTester::populateVanillaValues( - const T& alpha, const _HostViewTypeX& h_x, const _HostViewTypeA& h_A, - _ViewTypeExpected& h_vanilla) { +typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type +SyrTester::populateVanillaValues(const T& alpha, + const _HostViewTypeX& h_x, + const _HostViewTypeA& h_A, + _ViewTypeExpected& h_vanilla) { if (_vanillaUsesDifferentOrderOfOps) { if (_useHermitianOption) { for (int i = 0; i < _M; ++i) { for (int j = 0; j < _N; ++j) { - if (((_useUpOption == true) && (i <= j)) || - ((_useUpOption == false) && (i >= j))) { + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { h_vanilla(i, j) = h_A(i, j) + alpha * _KAT_A::conj(h_x(j)) * h_x(i); } else { h_vanilla(i, j) = h_A(i, j); @@ -631,8 +557,7 @@ SyrTester::populateVanillaValues( } else { for (int i = 0; i < _M; ++i) { for (int j = 0; j < _N; ++j) { - if (((_useUpOption == true) && (i <= j)) || - ((_useUpOption == false) && (i >= j))) { + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { h_vanilla(i, j) = h_A(i, j) + alpha * h_x(j) * h_x(i); } else { h_vanilla(i, j) = h_A(i, j); @@ -644,8 +569,7 @@ SyrTester::populateVanillaValues( if (_useHermitianOption) { for (int i = 0; i < _M; ++i) { for (int j = 0; j < _N; ++j) { - if (((_useUpOption == true) && (i <= j)) || - ((_useUpOption == false) && (i >= j))) { + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { h_vanilla(i, j) = h_A(i, j) + alpha * h_x(i) * _KAT_A::conj(h_x(j)); } else { h_vanilla(i, j) = h_A(i, j); @@ -658,8 +582,7 @@ SyrTester::populateVanillaValues( } else { for (int i = 0; i < _M; ++i) { for (int j = 0; j < _N; ++j) { - if (((_useUpOption == true) && (i <= j)) || - ((_useUpOption == false) && (i >= j))) { + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { h_vanilla(i, j) = h_A(i, j) + alpha * h_x(i) * h_x(j); } else { h_vanilla(i, j) = h_A(i, j); @@ -671,20 +594,18 @@ SyrTester::populateVanillaValues( } // Code for non-complex values -template +template template -typename std::enable_if>::value && - !std::is_same>::value, - void>::type -SyrTester::populateVanillaValues( - const T& alpha, const _HostViewTypeX& h_x, const _HostViewTypeA& h_A, - _ViewTypeExpected& h_vanilla) { +typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type +SyrTester::populateVanillaValues(const T& alpha, + const _HostViewTypeX& h_x, + const _HostViewTypeA& h_A, + _ViewTypeExpected& h_vanilla) { if (_vanillaUsesDifferentOrderOfOps) { for (int i = 0; i < _M; ++i) { for (int j = 0; j < _N; ++j) { - if (((_useUpOption == true) && (i <= j)) || - ((_useUpOption == false) && (i >= j))) { + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { h_vanilla(i, j) = h_A(i, j) + alpha * h_x(j) * h_x(i); } else { h_vanilla(i, j) = h_A(i, j); @@ -694,8 +615,7 @@ SyrTester::populateVanillaValues( } else { for (int i = 0; i < _M; ++i) { for (int j = 0; j < _N; ++j) { - if (((_useUpOption == true) && (i <= j)) || - ((_useUpOption == false) && (i >= j))) { + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { h_vanilla(i, j) = h_A(i, j) + alpha * h_x(i) * h_x(j); } else { h_vanilla(i, j) = h_A(i, j); @@ -705,11 +625,9 @@ SyrTester::populateVanillaValues( } } -template +template template -T SyrTester::shrinkAngleToZeroTwoPiRange(const T input) { +T SyrTester::shrinkAngleToZeroTwoPiRange(const T input) { T output(input); #if 0 T twoPi( 2. * Kokkos::numbers::pi ); @@ -724,29 +642,23 @@ T SyrTester +template template -typename std::enable_if>::value || - std::is_same>::value, - void>::type -SyrTester:: - compareVanillaAgainstExpected(const T& alpha, - const _ViewTypeExpected& h_vanilla, - const _ViewTypeExpected& h_expected) { +typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type +SyrTester::compareVanillaAgainstExpected( + const T& alpha, const _ViewTypeExpected& h_vanilla, const _ViewTypeExpected& h_expected) { #ifdef HAVE_KOKKOSKERNELS_DEBUG if (_N <= 2) { for (int i(0); i < _M; ++i) { for (int j(0); j < _N; ++j) { - std::cout << "h_exp(" << i << "," << j << ")=" << h_expected(i, j) - << ", h_van(" << i << "," << j << ")=" << h_vanilla(i, j) - << std::endl; + std::cout << "h_exp(" << i << "," << j << ")=" << h_expected(i, j) << ", h_van(" << i << "," << j + << ")=" << h_vanilla(i, j) << std::endl; } } } #endif - int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * - 1.e-3); + int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * 1.e-3); if (_useAnalyticalResults) { int numErrorsRealAbs(0); @@ -765,7 +677,7 @@ SyrTester:: for (int i(0); i < _M; ++i) { for (int j(0); j < _N; ++j) { - diff = _KAT_A::abs(h_expected(i, j).real() - h_vanilla(i, j).real()); + diff = _KAT_A::abs(h_expected(i, j).real() - h_vanilla(i, j).real()); errorHappened = false; if (h_expected(i, j).real() == 0.) { diffThreshold = _KAT_A::abs(_absTol); @@ -789,17 +701,15 @@ SyrTester:: } if (errorHappened && (numErrorsRealAbs + numErrorsRealRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j).real() = " << h_expected(i, j).real() + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j).real() = " << h_expected(i, j).real() << ", h_vanilla(i,j).real() = " << h_vanilla(i, j).real() << ", _KAT_A::abs(h_expected(i,j).real() - " "h_vanilla(i,j).real()) = " - << diff << ", diffThreshold = " << diffThreshold - << std::endl; + << diff << ", diffThreshold = " << diffThreshold << std::endl; #endif } - diff = _KAT_A::abs(h_expected(i, j).imag() - h_vanilla(i, j).imag()); + diff = _KAT_A::abs(h_expected(i, j).imag() - h_vanilla(i, j).imag()); errorHappened = false; if (h_expected(i, j).imag() == 0.) { diffThreshold = _KAT_A::abs(_absTol); @@ -823,13 +733,11 @@ SyrTester:: } if (errorHappened && (numErrorsImagAbs + numErrorsImagRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j).imag() = " << h_expected(i, j).imag() + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j).imag() = " << h_expected(i, j).imag() << ", h_vanilla(i,j).imag() = " << h_vanilla(i, j).imag() << ", _KAT_A::abs(h_expected(i,j).imag() - " "h_vanilla(i,j).imag()) = " - << diff << ", diffThreshold = " << diffThreshold - << std::endl; + << diff << ", diffThreshold = " << diffThreshold << std::endl; #endif } } // for j @@ -837,25 +745,15 @@ SyrTester:: { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": vanilla differs too much from analytical on real components" - << ", numErrorsRealAbs = " << numErrorsRealAbs - << ", numErrorsRealRel = " << numErrorsRealRel - << ", maxErrorRealRel = " << maxErrorRealRel - << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel - << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel - << ", h_expected(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ": vanilla differs too much from analytical on real components" + << ", numErrorsRealAbs = " << numErrorsRealAbs << ", numErrorsRealRel = " << numErrorsRealRel + << ", maxErrorRealRel = " << maxErrorRealRel << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel + << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel << ", h_expected(i,j).real() = " + << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) << ", h_vanilla(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_vanilla(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_vanilla(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrorsReal(numErrorsRealAbs + numErrorsRealRel); @@ -864,30 +762,19 @@ SyrTester:: std::cout << "WARNING" << msg.str() << std::endl; #endif } - EXPECT_LE(numErrorsReal, maxNumErrorsAllowed) - << "Failed test" << msg.str(); + EXPECT_LE(numErrorsReal, maxNumErrorsAllowed) << "Failed test" << msg.str(); } { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": vanilla differs too much from analytical on imag components" - << ", numErrorsImagAbs = " << numErrorsImagAbs - << ", numErrorsImagRel = " << numErrorsImagRel - << ", maxErrorImagRel = " << maxErrorImagRel - << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel - << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel - << ", h_expected(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ": vanilla differs too much from analytical on imag components" + << ", numErrorsImagAbs = " << numErrorsImagAbs << ", numErrorsImagRel = " << numErrorsImagRel + << ", maxErrorImagRel = " << maxErrorImagRel << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel + << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel << ", h_expected(i,j).imag() = " + << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) << ", h_vanilla(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_vanilla(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_vanilla(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrorsImag(numErrorsImagAbs + numErrorsImagRel); @@ -896,8 +783,7 @@ SyrTester:: std::cout << "WARNING" << msg.str() << std::endl; #endif } - EXPECT_LE(numErrorsImag, maxNumErrorsAllowed) - << "Failed test" << msg.str(); + EXPECT_LE(numErrorsImag, maxNumErrorsAllowed) << "Failed test" << msg.str(); } } else { int numErrorsReal(0); @@ -908,11 +794,8 @@ SyrTester:: if (h_expected(i, j).real() != h_vanilla(i, j).real()) { if (numErrorsReal == 0) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j).real() = " - << h_expected(i, j).real() - << ", h_vanilla(i,j).real() = " << h_vanilla(i, j).real() - << std::endl; + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j).real() = " << h_expected(i, j).real() + << ", h_vanilla(i,j).real() = " << h_vanilla(i, j).real() << std::endl; #endif } numErrorsReal++; @@ -921,63 +804,50 @@ SyrTester:: if (h_expected(i, j).imag() != h_vanilla(i, j).imag()) { if (numErrorsImag == 0) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j).imag() = " - << h_expected(i, j).imag() - << ", h_vanilla(i,j).imag() = " << h_vanilla(i, j).imag() - << std::endl; + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j).imag() = " << h_expected(i, j).imag() + << ", h_vanilla(i,j).imag() = " << h_vanilla(i, j).imag() << std::endl; #endif } numErrorsImag++; } } // for j } // for i - EXPECT_EQ(numErrorsReal, 0) - << "Failed test" - << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": vanilla result is incorrect on real components" - << ", numErrorsReal = " << numErrorsReal; - EXPECT_EQ(numErrorsImag, 0) - << "Failed test" - << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": vanilla result is incorrect on imag components" - << ", numErrorsImag = " << numErrorsImag; + EXPECT_EQ(numErrorsReal, 0) << "Failed test" + << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr + << ", _A_is_ll = " << _A_is_ll << ", alpha type = " << typeid(alpha).name() + << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption + << ": vanilla result is incorrect on real components" + << ", numErrorsReal = " << numErrorsReal; + EXPECT_EQ(numErrorsImag, 0) << "Failed test" + << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr + << ", _A_is_ll = " << _A_is_ll << ", alpha type = " << typeid(alpha).name() + << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption + << ": vanilla result is incorrect on imag components" + << ", numErrorsImag = " << numErrorsImag; } } // Code for non-complex values -template +template template -typename std::enable_if>::value && - !std::is_same>::value, - void>::type -SyrTester:: - compareVanillaAgainstExpected(const T& alpha, - const _ViewTypeExpected& h_vanilla, - const _ViewTypeExpected& h_expected) { +typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type +SyrTester::compareVanillaAgainstExpected( + const T& alpha, const _ViewTypeExpected& h_vanilla, const _ViewTypeExpected& h_expected) { if (_N <= 2) { for (int i(0); i < _M; ++i) { for (int j(0); j < _N; ++j) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "h_exp(" << i << "," << j << ")=" << h_expected(i, j) - << ", h_van(" << i << "," << j << ")=" << h_vanilla(i, j) - << std::endl; + std::cout << "h_exp(" << i << "," << j << ")=" << h_expected(i, j) << ", h_van(" << i << "," << j + << ")=" << h_vanilla(i, j) << std::endl; #endif } } } - int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * - 1.e-3); + int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * 1.e-3); if (_useAnalyticalResults) { int numErrorsAbs(0); @@ -1015,12 +885,10 @@ SyrTester:: } if (errorHappened && (numErrorsAbs + numErrorsRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j) = " << h_expected(i, j) + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j) = " << h_expected(i, j) << ", h_vanilla(i,j) = " << h_vanilla(i, j) - << ", _KAT_A::abs(h_expected(i,j) - h_vanilla(i,j)) = " - << diff << ", diffThreshold = " << diffThreshold - << std::endl; + << ", _KAT_A::abs(h_expected(i,j) - h_vanilla(i,j)) = " << diff + << ", diffThreshold = " << diffThreshold << std::endl; #endif } } // for j @@ -1028,24 +896,14 @@ SyrTester:: { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": vanilla differs too much from expected" - << ", numErrorsAbs = " << numErrorsAbs - << ", numErrorsRel = " << numErrorsRel - << ", maxErrorRel = " << maxErrorRel - << ", iForMaxErrorRel = " << iForMaxErrorRel + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ": vanilla differs too much from expected" + << ", numErrorsAbs = " << numErrorsAbs << ", numErrorsRel = " << numErrorsRel + << ", maxErrorRel = " << maxErrorRel << ", iForMaxErrorRel = " << iForMaxErrorRel << ", jForMaxErrorRel = " << jForMaxErrorRel << ", h_expected(i,j) = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) - << ", h_vanilla(i,j) = " - << (((_M > 0) && (_N > 0)) - ? h_vanilla(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) + << ", h_vanilla(i,j) = " << (((_M > 0) && (_N > 0)) ? h_vanilla(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrors(numErrorsAbs + numErrorsRel); @@ -1064,8 +922,7 @@ SyrTester:: if (h_expected(i, j) != h_vanilla(i, j)) { if (numErrors == 0) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j) = " << h_expected(i, j) + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j) = " << h_expected(i, j) << ", h_vanilla(i,j) = " << h_vanilla(i, j) << std::endl; #endif } @@ -1073,42 +930,34 @@ SyrTester:: } } // for j } // for i - EXPECT_EQ(numErrors, 0) - << "Failed test" - << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": vanilla result is incorrect" - << ", numErrors = " << numErrors; + EXPECT_EQ(numErrors, 0) << "Failed test" + << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr + << ", _A_is_ll = " << _A_is_ll << ", alpha type = " << typeid(alpha).name() + << ", _useHermitianOption = " << _useHermitianOption << ", _useUpOption = " << _useUpOption + << ": vanilla result is incorrect" + << ", numErrors = " << numErrors; } } // Code for complex values -template +template template -typename std::enable_if>::value || - std::is_same>::value, - void>::type -SyrTester:: - compareKkSyrAgainstReference(const T& alpha, const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_reference) { +typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type +SyrTester::compareKkSyrAgainstReference( + const T& alpha, const _HostViewTypeA& h_A, const _ViewTypeExpected& h_reference) { if (_N <= 2) { for (int i(0); i < _M; ++i) { for (int j(0); j < _N; ++j) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "h_exp(" << i << "," << j << ")=" << h_reference(i, j) - << ", h_A(" << i << "," << j << ")=" << h_A(i, j) - << std::endl; + std::cout << "h_exp(" << i << "," << j << ")=" << h_reference(i, j) << ", h_A(" << i << "," << j + << ")=" << h_A(i, j) << std::endl; #endif } } } - int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * - 1.e-3); + int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * 1.e-3); int numErrorsRealAbs(0); int numErrorsRealRel(0); @@ -1149,12 +998,10 @@ SyrTester:: } if (errorHappened && (numErrorsRealAbs + numErrorsRealRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout - << "ERROR, i = " << i << ", j = " << j - << ": h_reference(i,j).real() = " << h_reference(i, j).real() - << ", h_A(i,j).real() = " << h_A(i, j).real() - << ", _KAT_A::abs(h_reference(i,j).real() - h_A(i,j).real()) = " - << diff << ", diffThreshold = " << diffThreshold << std::endl; + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_reference(i,j).real() = " << h_reference(i, j).real() + << ", h_A(i,j).real() = " << h_A(i, j).real() + << ", _KAT_A::abs(h_reference(i,j).real() - h_A(i,j).real()) = " << diff + << ", diffThreshold = " << diffThreshold << std::endl; #endif } @@ -1182,95 +1029,58 @@ SyrTester:: } if (errorHappened && (numErrorsImagAbs + numErrorsImagRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout - << "ERROR, i = " << i << ", j = " << j - << ": h_reference(i,j).imag() = " << h_reference(i, j).imag() - << ", h_A(i,j).imag() = " << h_A(i, j).imag() - << ", _KAT_A::abs(h_reference(i,j).imag() - h_A(i,j).imag()) = " - << diff << ", diffThreshold = " << diffThreshold << std::endl; + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_reference(i,j).imag() = " << h_reference(i, j).imag() + << ", h_A(i,j).imag() = " << h_A(i, j).imag() + << ", _KAT_A::abs(h_reference(i,j).imag() - h_A(i,j).imag()) = " << diff + << ", diffThreshold = " << diffThreshold << std::endl; #endif } } // for j } // for i #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout - << "A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ", numErrorsRealAbs = " << numErrorsRealAbs - << ", numErrorsRealRel = " << numErrorsRealRel - << ", maxErrorRealRel = " << maxErrorRealRel - << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel - << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel - << ", h_reference(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_reference(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) - << ", h_A(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_A(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) - << ", numErrorsImagAbs = " << numErrorsImagAbs - << ", numErrorsImagRel = " << numErrorsImagRel - << ", maxErrorImagRel = " << maxErrorImagRel - << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel - << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel - << ", h_reference(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_reference(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) - << ", h_A(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_A(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) - << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed << std::endl; + std::cout << "A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ", numErrorsRealAbs = " << numErrorsRealAbs + << ", numErrorsRealRel = " << numErrorsRealRel << ", maxErrorRealRel = " << maxErrorRealRel + << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel + << ", h_reference(i,j).real() = " + << (((_M > 0) && (_N > 0)) ? h_reference(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) + << ", h_A(i,j).real() = " + << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) + << ", numErrorsImagAbs = " << numErrorsImagAbs << ", numErrorsImagRel = " << numErrorsImagRel + << ", maxErrorImagRel = " << maxErrorImagRel << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel + << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel << ", h_reference(i,j).imag() = " + << (((_M > 0) && (_N > 0)) ? h_reference(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) + << ", h_A(i,j).imag() = " + << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) + << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed << std::endl; if ((_M == 2131) && (_N == 2131)) { std::cout << "Information" - << ": A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ", h_reference(11, 2119) = (" << h_reference(11, 2119).real() + << ": A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ", h_reference(11, 2119) = (" << h_reference(11, 2119).real() << ", " << h_reference(11, 2119).imag() << ")" - << ", h_A(11, 2119) = (" << h_A(11, 2119).real() << ", " - << h_A(11, 2119).imag() << ")" << std::endl; + << ", h_A(11, 2119) = (" << h_A(11, 2119).real() << ", " << h_A(11, 2119).imag() << ")" << std::endl; std::cout << "Information" - << ": A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ", h_reference(710, 1065) = (" << h_reference(710, 1065).real() + << ": A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ", h_reference(710, 1065) = (" << h_reference(710, 1065).real() << ", " << h_reference(710, 1065).imag() << ")" - << ", h_A(710, 1065) = (" << h_A(710, 1065).real() << ", " - << h_A(710, 1065).imag() << ")" << std::endl; + << ", h_A(710, 1065) = (" << h_A(710, 1065).real() << ", " << h_A(710, 1065).imag() << ")" << std::endl; } #endif { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": syr result is incorrect on real components" - << ", numErrorsRealAbs = " << numErrorsRealAbs - << ", numErrorsRealRel = " << numErrorsRealRel - << ", maxErrorRealRel = " << maxErrorRealRel - << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel - << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel - << ", h_reference(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_reference(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ": syr result is incorrect on real components" + << ", numErrorsRealAbs = " << numErrorsRealAbs << ", numErrorsRealRel = " << numErrorsRealRel + << ", maxErrorRealRel = " << maxErrorRealRel << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel + << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel << ", h_reference(i,j).real() = " + << (((_M > 0) && (_N > 0)) ? h_reference(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) << ", h_A(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_A(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrorsReal(numErrorsRealAbs + numErrorsRealRel); @@ -1283,25 +1093,15 @@ SyrTester:: } { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": syr result is incorrect on imag components" - << ", numErrorsImagAbs = " << numErrorsImagAbs - << ", numErrorsImagRel = " << numErrorsImagRel - << ", maxErrorImagRel = " << maxErrorImagRel - << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel - << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel - << ", h_reference(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_reference(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ": syr result is incorrect on imag components" + << ", numErrorsImagAbs = " << numErrorsImagAbs << ", numErrorsImagRel = " << numErrorsImagRel + << ", maxErrorImagRel = " << maxErrorImagRel << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel + << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel << ", h_reference(i,j).imag() = " + << (((_M > 0) && (_N > 0)) ? h_reference(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) << ", h_A(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_A(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrorsImag(numErrorsImagAbs + numErrorsImagRel); @@ -1315,28 +1115,23 @@ SyrTester:: } // Code for non-complex values -template +template template -typename std::enable_if>::value && - !std::is_same>::value, - void>::type -SyrTester:: - compareKkSyrAgainstReference(const T& alpha, const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_reference) { +typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type +SyrTester::compareKkSyrAgainstReference( + const T& alpha, const _HostViewTypeA& h_A, const _ViewTypeExpected& h_reference) { #ifdef HAVE_KOKKOSKERNELS_DEBUG if (_N <= 2) { for (int i(0); i < _M; ++i) { for (int j(0); j < _N; ++j) { - std::cout << "h_exp(" << i << "," << j << ")=" << h_reference(i, j) - << ", h_A(" << i << "," << j << ")=" << h_A(i, j) - << std::endl; + std::cout << "h_exp(" << i << "," << j << ")=" << h_reference(i, j) << ", h_A(" << i << "," << j + << ")=" << h_A(i, j) << std::endl; } } } #endif - int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * - 1.e-3); + int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * 1.e-3); int numErrorsAbs(0); int numErrorsRel(0); @@ -1372,53 +1167,34 @@ SyrTester:: } if (errorHappened && (numErrorsAbs + numErrorsRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_reference(i,j) = " << h_reference(i, j) - << ", h_A(i,j) = " << h_A(i, j) - << ", _KAT_A::abs(h_reference(i,j) - h_A(i,j)) = " << diff + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_reference(i,j) = " << h_reference(i, j) + << ", h_A(i,j) = " << h_A(i, j) << ", _KAT_A::abs(h_reference(i,j) - h_A(i,j)) = " << diff << ", diffThreshold = " << diffThreshold << std::endl; #endif } } // for j } // for i #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ", numErrorsAbs = " << numErrorsAbs - << ", numErrorsRel = " << numErrorsRel - << ", maxErrorRel = " << maxErrorRel - << ", iForMaxErrorRel = " << iForMaxErrorRel - << ", jForMaxErrorRel = " << jForMaxErrorRel + std::cout << "A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ", numErrorsAbs = " << numErrorsAbs + << ", numErrorsRel = " << numErrorsRel << ", maxErrorRel = " << maxErrorRel + << ", iForMaxErrorRel = " << iForMaxErrorRel << ", jForMaxErrorRel = " << jForMaxErrorRel << ", h_reference(i,j) = " - << (((_M > 0) && (_N > 0)) - ? h_reference(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) - << ", h_A(i,j) = " - << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_reference(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) + << ", h_A(i,j) = " << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed << std::endl; #endif { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption << ", _useUpOption = " << _useUpOption << ": syr result is incorrect" - << ", numErrorsAbs = " << numErrorsAbs - << ", numErrorsRel = " << numErrorsRel - << ", maxErrorRel = " << maxErrorRel - << ", iForMaxErrorRel = " << iForMaxErrorRel + << ", numErrorsAbs = " << numErrorsAbs << ", numErrorsRel = " << numErrorsRel + << ", maxErrorRel = " << maxErrorRel << ", iForMaxErrorRel = " << iForMaxErrorRel << ", jForMaxErrorRel = " << jForMaxErrorRel << ", h_reference(i,j) = " - << (((_M > 0) && (_N > 0)) - ? h_reference(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) - << ", h_A(i,j) = " - << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_reference(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) + << ", h_A(i,j) = " << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrors(numErrorsAbs + numErrorsRel); @@ -1431,27 +1207,17 @@ SyrTester:: } } -template +template template -void SyrTester:: - callKkSyrAndCompareAgainstExpected( - const ScalarA& alpha, TX& x, view_stride_adapter<_ViewTypeA, false>& A, - const _ViewTypeExpected& h_expected, const std::string& situation) { +void SyrTester::callKkSyrAndCompareAgainstExpected( + const ScalarA& alpha, TX& x, view_stride_adapter<_ViewTypeA, false>& A, const _ViewTypeExpected& h_expected, + const std::string& situation) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "In Test_Blas2_syr, '" << situation << "', alpha = " << alpha - << std::endl; -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "In Test_Blas2_syr.hpp, right before calling KokkosBlas::syr(): " - "ViewTypeA = %s, _kkSyrShouldThrowException=%d\n", - typeid(_ViewTypeA).name(), _kkSyrShouldThrowException); -#else + std::cout << "In Test_Blas2_syr, '" << situation << "', alpha = " << alpha << std::endl; Kokkos::printf( "In Test_Blas2_syr.hpp, right before calling KokkosBlas::syr(): " "ViewTypeA = %s, _kkSyrShouldThrowException=%d\n", typeid(_ViewTypeA).name(), _kkSyrShouldThrowException); -#endif #endif std::string mode = _useHermitianOption ? "H" : "T"; std::string uplo = _useUpOption ? "U" : "L"; @@ -1461,25 +1227,21 @@ void SyrTester:: KokkosBlas::syr(mode.c_str(), uplo.c_str(), alpha, x, A.d_view); } catch (const std::exception& e) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "In Test_Blas2_syr, '" << situation - << "': caught exception, e.what() = " << e.what() << std::endl; + std::cout << "In Test_Blas2_syr, '" << situation << "': caught exception, e.what() = " << e.what() << std::endl; #endif gotStdException = true; } catch (...) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "In Test_Blas2_syr, '" << situation - << "': caught unknown exception" << std::endl; + std::cout << "In Test_Blas2_syr, '" << situation << "': caught unknown exception" << std::endl; #endif gotUnknownException = true; } - EXPECT_EQ(gotUnknownException, false) - << "Failed test, '" << situation - << "': unknown exception should not have happened"; + EXPECT_EQ(gotUnknownException, false) << "Failed test, '" << situation + << "': unknown exception should not have happened"; EXPECT_EQ(gotStdException, _kkSyrShouldThrowException) - << "Failed test, '" << situation << "': kk syr() should" - << (_kkSyrShouldThrowException ? " " : " not ") + << "Failed test, '" << situation << "': kk syr() should" << (_kkSyrShouldThrowException ? " " : " not ") << "have thrown a std::exception"; if ((gotStdException == false) && (gotUnknownException == false)) { @@ -1488,14 +1250,11 @@ void SyrTester:: } } -template +template template -void SyrTester:: - callKkGerAndCompareKkSyrAgainstIt( - const ScalarA& alpha, TX& x, - view_stride_adapter<_ViewTypeA, false>& org_A, - const _HostViewTypeA& h_A_syr, const std::string& situation) { +void SyrTester::callKkGerAndCompareKkSyrAgainstIt( + const ScalarA& alpha, TX& x, view_stride_adapter<_ViewTypeA, false>& org_A, const _HostViewTypeA& h_A_syr, + const std::string& situation) { view_stride_adapter<_ViewTypeA, false> A_ger("A_ger", _M, _N); Kokkos::deep_copy(A_ger.d_base, org_A.d_base); @@ -1503,19 +1262,11 @@ void SyrTester:: // Call ger() // ******************************************************************** #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "In Test_Blas2_syr, '" << situation << "', alpha = " << alpha - << std::endl; -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "In Test_Blas2_syr.hpp, right before calling KokkosBlas::ger(): " - "ViewTypeA = %s, _kkGerShouldThrowException=%d\n", - typeid(_ViewTypeA).name(), _kkGerShouldThrowException); -#else + std::cout << "In Test_Blas2_syr, '" << situation << "', alpha = " << alpha << std::endl; Kokkos::printf( "In Test_Blas2_syr.hpp, right before calling KokkosBlas::ger(): " "ViewTypeA = %s, _kkGerShouldThrowException=%d\n", typeid(_ViewTypeA).name(), _kkGerShouldThrowException); -#endif #endif std::string mode = _useHermitianOption ? "H" : "T"; bool gotStdException(false); @@ -1524,39 +1275,33 @@ void SyrTester:: KokkosBlas::ger(mode.c_str(), alpha, x, x, A_ger.d_view); } catch (const std::exception& e) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "In Test_Blas2_syr, '" << situation - << "', ger() call: caught exception, e.what() = " << e.what() + std::cout << "In Test_Blas2_syr, '" << situation << "', ger() call: caught exception, e.what() = " << e.what() << std::endl; #endif gotStdException = true; } catch (...) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "In Test_Blas2_syr, '" << situation - << "', ger() call: caught unknown exception" << std::endl; + std::cout << "In Test_Blas2_syr, '" << situation << "', ger() call: caught unknown exception" << std::endl; #endif gotUnknownException = true; } - EXPECT_EQ(gotUnknownException, false) - << "Failed test, '" << situation - << "': unknown exception should not have happened for ger() call"; + EXPECT_EQ(gotUnknownException, false) << "Failed test, '" << situation + << "': unknown exception should not have happened for ger() call"; - EXPECT_EQ(gotStdException, false) - << "Failed test, '" << situation - << "': kk ger() should not have thrown a std::exception"; + EXPECT_EQ(gotStdException, false) << "Failed test, '" << situation + << "': kk ger() should not have thrown a std::exception"; // ******************************************************************** // Prepare h_ger_reference to be compared against h_A_syr // ******************************************************************** - view_stride_adapter<_ViewTypeExpected, true> h_ger_reference( - "h_ger_reference", _M, _N); + view_stride_adapter<_ViewTypeExpected, true> h_ger_reference("h_ger_reference", _M, _N); Kokkos::deep_copy(h_ger_reference.d_base, A_ger.d_base); std::string uplo = _useUpOption ? "U" : "L"; for (int i = 0; i < _M; ++i) { for (int j = 0; j < _N; ++j) { - if (((_useUpOption == true) && (i <= j)) || - ((_useUpOption == false) && (i >= j))) { + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { // Keep h_ger_reference as already computed } else { h_ger_reference.d_view(i, j) = org_A.h_view(i, j); @@ -1565,9 +1310,7 @@ void SyrTester:: } if (_useHermitianOption && _A_is_complex) { for (int i(0); i < _N; ++i) { - h_ger_reference.d_view(i, i) = - 0.5 * (h_ger_reference.d_view(i, i) + - _KAT_A::conj(h_ger_reference.d_view(i, i))); + h_ger_reference.d_view(i, i) = 0.5 * (h_ger_reference.d_view(i, i) + _KAT_A::conj(h_ger_reference.d_view(i, i))); } } @@ -1582,51 +1325,31 @@ void SyrTester:: template #ifdef HAVE_KOKKOSKERNELS_DEBUG int test_syr(const std::string& caseName) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+=======================================================================" - "===\n"); - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Starting %s ...\n", caseName.c_str()); -#else Kokkos::printf( "+=======================================================================" "===\n"); Kokkos::printf("Starting %s ...\n", caseName.c_str()); -#endif #else int test_syr(const std::string& /*caseName*/) { #endif - bool xBool = std::is_same::value || - std::is_same::value || + bool xBool = std::is_same::value || std::is_same::value || std::is_same>::value || std::is_same>::value; - bool aBool = std::is_same::value || - std::is_same::value || + bool aBool = std::is_same::value || std::is_same::value || std::is_same>::value || std::is_same>::value; bool useAnalyticalResults = xBool && aBool; #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Starting %s for LAYOUTLEFT ...\n", - caseName.c_str()); -#else Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); Kokkos::printf("Starting %s for LAYOUTLEFT ...\n", caseName.c_str()); -#endif #endif if (true) { - Test::SyrTester - tester; + Test::SyrTester tester; tester.test(0, 0); tester.test(1, 0); tester.test(2, 0); @@ -1652,42 +1375,23 @@ int test_syr(const std::string& /*caseName*/) { } #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Finished %s for LAYOUTLEFT\n", - caseName.c_str()); - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); -#else Kokkos::printf("Finished %s for LAYOUTLEFT\n", caseName.c_str()); Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); #endif #endif -#endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Starting %s for LAYOUTRIGHT ...\n", - caseName.c_str()); -#else Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); Kokkos::printf("Starting %s for LAYOUTRIGHT ...\n", caseName.c_str()); -#endif #endif if (true) { - Test::SyrTester - tester; + Test::SyrTester tester; tester.test(0, 0); tester.test(1, 0); tester.test(2, 0); @@ -1713,42 +1417,23 @@ int test_syr(const std::string& /*caseName*/) { } #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Finished %s for LAYOUTRIGHT\n", - caseName.c_str()); - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); -#else Kokkos::printf("Finished %s for LAYOUTRIGHT\n", caseName.c_str()); Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); #endif #endif -#endif #if defined(KOKKOSKERNELS_INST_LAYOUTSTRIDE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Starting %s for LAYOUTSTRIDE ...\n", - caseName.c_str()); -#else Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); Kokkos::printf("Starting %s for LAYOUTSTRIDE ...\n", caseName.c_str()); -#endif #endif if (true) { - Test::SyrTester - tester; + Test::SyrTester tester; tester.test(0, 0); tester.test(1, 0); tester.test(2, 0); @@ -1774,41 +1459,22 @@ int test_syr(const std::string& /*caseName*/) { } #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Finished %s for LAYOUTSTRIDE\n", - caseName.c_str()); - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); -#else Kokkos::printf("Finished %s for LAYOUTSTRIDE\n", caseName.c_str()); Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); #endif #endif -#endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Starting %s for MIXED LAYOUTS ...\n", - caseName.c_str()); -#else Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); Kokkos::printf("Starting %s for MIXED LAYOUTS ...\n", caseName.c_str()); -#endif #endif if (true) { - Test::SyrTester - tester; + Test::SyrTester tester; tester.test(1, 0); tester.test(2, 0); tester.test(1024, 0); @@ -1825,47 +1491,29 @@ int test_syr(const std::string& /*caseName*/) { } if (true) { - Test::SyrTester - tester; + Test::SyrTester tester; tester.test(1024, 0); } #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Finished %s for MIXED LAYOUTS\n", - caseName.c_str()); - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+-----------------------------------------------------------------------" - "---\n"); -#else Kokkos::printf("Finished %s for MIXED LAYOUTS\n", caseName.c_str()); Kokkos::printf( "+-----------------------------------------------------------------------" "---\n"); #endif #endif -#endif #ifdef HAVE_KOKKOSKERNELS_DEBUG -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("Finished %s\n", caseName.c_str()); - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "+=======================================================================" - "===\n"); -#else Kokkos::printf("Finished %s\n", caseName.c_str()); Kokkos::printf( "+=======================================================================" "===\n"); -#endif #endif return 1; } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, syr_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::syr_float"); test_syr("test case syr_float"); @@ -1874,19 +1522,16 @@ TEST_F(TestCategory, syr_float) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, syr_complex_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::syr_complex_float"); - test_syr, Kokkos::complex, TestDevice>( - "test case syr_complex_float"); + test_syr, Kokkos::complex, TestDevice>("test case syr_complex_float"); Kokkos::Profiling::popRegion(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, syr_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::syr_double"); test_syr("test case syr_double"); @@ -1895,19 +1540,16 @@ TEST_F(TestCategory, syr_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, syr_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::syr_complex_double"); - test_syr, Kokkos::complex, TestDevice>( - "test case syr_complex_double"); + test_syr, Kokkos::complex, TestDevice>("test case syr_complex_double"); Kokkos::Profiling::popRegion(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, syr_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::syr_int"); test_syr("test case syr_int"); @@ -1915,8 +1557,7 @@ TEST_F(TestCategory, syr_int) { } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) TEST_F(TestCategory, syr_int_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::syr_int_float"); test_syr("test case syr_int_float"); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_syr2.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_syr2.hpp index c49eba765b65..2d6792f8c840 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_syr2.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_syr2.hpp @@ -56,114 +56,91 @@ namespace Test { -template +template class Syr2Tester { public: Syr2Tester(); ~Syr2Tester(); - void test(const int N, const int nonConstConstCombinations, - const bool useAnalyticalResults = false, - const bool useHermitianOption = false, - const bool useUpOption = false); + void test(const int N, const int nonConstConstCombinations, const bool useAnalyticalResults = false, + const bool useHermitianOption = false, const bool useUpOption = false); private: using _ViewTypeX = Kokkos::View; using _ViewTypeY = Kokkos::View; using _ViewTypeA = Kokkos::View; - using _HostViewTypeX = typename _ViewTypeX::HostMirror; - using _HostViewTypeY = typename _ViewTypeY::HostMirror; - using _HostViewTypeA = typename _ViewTypeA::HostMirror; - using _ViewTypeExpected = - Kokkos::View; + using _HostViewTypeX = typename _ViewTypeX::HostMirror; + using _HostViewTypeY = typename _ViewTypeY::HostMirror; + using _HostViewTypeA = typename _ViewTypeA::HostMirror; + using _ViewTypeExpected = Kokkos::View; using _KAT_A = Kokkos::ArithTraits; using _AuxType = typename _KAT_A::mag_type; - void populateVariables(ScalarA& alpha, - view_stride_adapter<_ViewTypeX, false>& x, - view_stride_adapter<_ViewTypeY, false>& y, - view_stride_adapter<_ViewTypeA, false>& A, - _ViewTypeExpected& h_expected, - bool& expectedResultIsKnown); + void populateVariables(ScalarA& alpha, view_stride_adapter<_ViewTypeX, false>& x, + view_stride_adapter<_ViewTypeY, false>& y, view_stride_adapter<_ViewTypeA, false>& A, + _ViewTypeExpected& h_expected, bool& expectedResultIsKnown); template - typename std::enable_if>::value || - std::is_same>::value, - void>::type - populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, _HostViewTypeY& h_y, - _HostViewTypeA& h_A, _ViewTypeExpected& h_expected); + typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type + populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, _HostViewTypeY& h_y, _HostViewTypeA& h_A, + _ViewTypeExpected& h_expected); template - typename std::enable_if>::value && - !std::is_same>::value, - void>::type - populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, _HostViewTypeY& h_y, - _HostViewTypeA& h_A, _ViewTypeExpected& h_expected); + typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type + populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, _HostViewTypeY& h_y, _HostViewTypeA& h_A, + _ViewTypeExpected& h_expected); template - typename std::enable_if>::value || - std::is_same>::value, - void>::type - populateVanillaValues(const T& alpha, const _HostViewTypeX& h_x, - const _HostViewTypeY& h_y, const _HostViewTypeA& h_A, + typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type + populateVanillaValues(const T& alpha, const _HostViewTypeX& h_x, const _HostViewTypeY& h_y, const _HostViewTypeA& h_A, _ViewTypeExpected& h_vanilla); template - typename std::enable_if>::value && - !std::is_same>::value, - void>::type - populateVanillaValues(const T& alpha, const _HostViewTypeX& h_x, - const _HostViewTypeY& h_y, const _HostViewTypeA& h_A, + typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type + populateVanillaValues(const T& alpha, const _HostViewTypeX& h_x, const _HostViewTypeY& h_y, const _HostViewTypeA& h_A, _ViewTypeExpected& h_vanilla); template - typename std::enable_if>::value || - std::is_same>::value, - void>::type - compareVanillaAgainstExpected(const T& alpha, - const _ViewTypeExpected& h_vanilla, + typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type + compareVanillaAgainstExpected(const T& alpha, const _ViewTypeExpected& h_vanilla, const _ViewTypeExpected& h_expected); template - typename std::enable_if>::value && - !std::is_same>::value, - void>::type - compareVanillaAgainstExpected(const T& alpha, - const _ViewTypeExpected& h_vanilla, + typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type + compareVanillaAgainstExpected(const T& alpha, const _ViewTypeExpected& h_vanilla, const _ViewTypeExpected& h_expected); template - typename std::enable_if>::value || - std::is_same>::value, - void>::type - compareKkSyr2AgainstReference(const T& alpha, const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_reference); + typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type + compareKkSyr2AgainstReference(const T& alpha, const _HostViewTypeA& h_A, const _ViewTypeExpected& h_reference); template - typename std::enable_if>::value && - !std::is_same>::value, - void>::type - compareKkSyr2AgainstReference(const T& alpha, const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_reference); + typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type + compareKkSyr2AgainstReference(const T& alpha, const _HostViewTypeA& h_A, const _ViewTypeExpected& h_reference); template T shrinkAngleToZeroTwoPiRange(const T input); template - void callKkSyr2AndCompareAgainstExpected( - const ScalarA& alpha, TX& x, TY& y, - view_stride_adapter<_ViewTypeA, false>& A, - const _ViewTypeExpected& h_expected, const std::string& situation); + void callKkSyr2AndCompareAgainstExpected(const ScalarA& alpha, TX& x, TY& y, + view_stride_adapter<_ViewTypeA, false>& A, + const _ViewTypeExpected& h_expected, const std::string& situation); template - void callKkGerAndCompareKkSyr2AgainstIt( - const ScalarA& alpha, TX& x, TY& y, - view_stride_adapter<_ViewTypeA, false>& org_A, - const _HostViewTypeA& h_A_syr2, const std::string& situation); + void callKkGerAndCompareKkSyr2AgainstIt(const ScalarA& alpha, TX& x, TY& y, + view_stride_adapter<_ViewTypeA, false>& org_A, const _HostViewTypeA& h_A_syr2, + const std::string& situation); const bool _A_is_complex; const bool _A_is_lr; @@ -181,16 +158,13 @@ class Syr2Tester { bool _kkGerShouldThrowException; }; -template -Syr2Tester::Syr2Tester() +template +Syr2Tester::Syr2Tester() : _A_is_complex(std::is_same>::value || std::is_same>::value), _A_is_lr(std::is_same::value), _A_is_ll(std::is_same::value), - _testIsGpu(KokkosKernels::Impl::kk_is_gpu_exec_space< - typename Device::execution_space>()) + _testIsGpu(KokkosKernels::Impl::kk_is_gpu_exec_space()) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS , _vanillaUsesDifferentOrderOfOps(_A_is_lr) @@ -207,12 +181,8 @@ Syr2Tester::value - ? 1.0e-6 - : (std::is_same<_AuxType, double>::value ? 1.0e-9 : 0)), - _relTol(std::is_same<_AuxType, float>::value - ? 5.0e-3 - : (std::is_same<_AuxType, double>::value ? 1.0e-6 : 0)), + _absTol(std::is_same<_AuxType, float>::value ? 1.0e-6 : (std::is_same<_AuxType, double>::value ? 1.0e-9 : 0)), + _relTol(std::is_same<_AuxType, float>::value ? 5.0e-3 : (std::is_same<_AuxType, double>::value ? 1.0e-6 : 0)), _M(-1), _N(-1), _useAnalyticalResults(false), @@ -222,35 +192,26 @@ Syr2Tester -Syr2Tester::~Syr2Tester() { +template +Syr2Tester::~Syr2Tester() { // Nothing to do } -template -void Syr2Tester::test(const int N, const int nonConstConstCombinations, - const bool useAnalyticalResults, - const bool useHermitianOption, - const bool useUpOption) { +template +void Syr2Tester::test( + const int N, const int nonConstConstCombinations, const bool useAnalyticalResults, const bool useHermitianOption, + const bool useUpOption) { #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Entering Syr2Tester::test()... - - - - - - - - - - - - - - - - " "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - " "- - - - - - - - - " << std::endl; - std::cout << "_A_is_complex = " << _A_is_complex - << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + std::cout << "_A_is_complex = " << _A_is_complex << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll << ", _testIsGpu = " << _testIsGpu - << ", _vanillaUsesDifferentOrderOfOps = " - << _vanillaUsesDifferentOrderOfOps << ", _absTol = " << _absTol - << ", _relTol = " << _relTol - << ", nonConstConstCombinations = " << nonConstConstCombinations - << ", useAnalyticalResults = " << useAnalyticalResults - << ", useHermitianOption = " << useHermitianOption + << ", _vanillaUsesDifferentOrderOfOps = " << _vanillaUsesDifferentOrderOfOps << ", _absTol = " << _absTol + << ", _relTol = " << _relTol << ", nonConstConstCombinations = " << nonConstConstCombinations + << ", useAnalyticalResults = " << useAnalyticalResults << ", useHermitianOption = " << useHermitianOption << ", useUpOption = " << useUpOption << std::endl; #endif // ******************************************************************** @@ -286,8 +247,7 @@ void Syr2Tester y("Y", _N); view_stride_adapter<_ViewTypeA, false> A("A", _M, _N); - view_stride_adapter<_ViewTypeExpected, true> h_expected( - "expected A += alpha * x * x^{t,h}", _M, _N); + view_stride_adapter<_ViewTypeExpected, true> h_expected("expected A += alpha * x * x^{t,h}", _M, _N); bool expectedResultIsKnown = false; using AlphaCoeffType = typename _ViewTypeA::non_const_value_type; @@ -296,20 +256,16 @@ void Syr2TesterpopulateVariables(alpha, x, y, A, h_expected.d_view, - expectedResultIsKnown); + this->populateVariables(alpha, x, y, A, h_expected.d_view, expectedResultIsKnown); // ******************************************************************** // Step 3 of 7: populate h_vanilla // ******************************************************************** - view_stride_adapter<_ViewTypeExpected, true> h_vanilla( - "vanilla = A + alpha * x * x^{t,h}", _M, _N); + view_stride_adapter<_ViewTypeExpected, true> h_vanilla("vanilla = A + alpha * x * x^{t,h}", _M, _N); #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "In Test_Blas2_syr2.hpp, computing vanilla A with alpha type = " - << typeid(alpha).name() << std::endl; + std::cout << "In Test_Blas2_syr2.hpp, computing vanilla A with alpha type = " << typeid(alpha).name() << std::endl; #endif - this->populateVanillaValues(alpha, x.h_view, y.h_view, A.h_view, - h_vanilla.d_view); + this->populateVanillaValues(alpha, x.h_view, y.h_view, A.h_view, h_vanilla.d_view); // ******************************************************************** // Step 4 of 7: use h_vanilla and h_expected as appropriate @@ -318,8 +274,7 @@ void Syr2TestercompareVanillaAgainstExpected(alpha, h_vanilla.d_view, - h_expected.d_view); + this->compareVanillaAgainstExpected(alpha, h_vanilla.d_view, h_expected.d_view); } else { // ****************************************************************** // Copy h_vanilla to h_expected @@ -335,13 +290,11 @@ void Syr2TestercallKkSyr2AndCompareAgainstExpected(alpha, x.d_view, y.d_view, A, - h_expected.d_view, "non const x"); + this->callKkSyr2AndCompareAgainstExpected(alpha, x.d_view, y.d_view, A, h_expected.d_view, "non const x"); if ((_useAnalyticalResults == false) && // Just to save run time (_kkGerShouldThrowException == false)) { - this->callKkGerAndCompareKkSyr2AgainstIt(alpha, x.d_view, y.d_view, org_A, - A.h_view, "non const x"); + this->callKkGerAndCompareKkSyr2AgainstIt(alpha, x.d_view, y.d_view, org_A, A.h_view, "non const x"); } } @@ -351,24 +304,19 @@ void Syr2TestercallKkSyr2AndCompareAgainstExpected( - alpha, x.d_view_const, y.d_view_const, A, h_expected.d_view, "const x"); + this->callKkSyr2AndCompareAgainstExpected(alpha, x.d_view_const, y.d_view_const, A, h_expected.d_view, "const x"); } // ******************************************************************** // Step 7 of 7: tests with invalid values on the first input parameter // ******************************************************************** - EXPECT_ANY_THROW( - KokkosBlas::syr2(".", "U", alpha, x.d_view, y.d_view, A.d_view)) + EXPECT_ANY_THROW(KokkosBlas::syr2(".", "U", alpha, x.d_view, y.d_view, A.d_view)) << "Failed test: kk syr2 should have thrown an exception for mode '.'"; - EXPECT_ANY_THROW( - KokkosBlas::syr2("", "U", alpha, x.d_view, y.d_view, A.d_view)) + EXPECT_ANY_THROW(KokkosBlas::syr2("", "U", alpha, x.d_view, y.d_view, A.d_view)) << "Failed test: kk syr2 should have thrown an exception for mode ''"; - EXPECT_ANY_THROW( - KokkosBlas::syr2("T", ".", alpha, x.d_view, y.d_view, A.d_view)) + EXPECT_ANY_THROW(KokkosBlas::syr2("T", ".", alpha, x.d_view, y.d_view, A.d_view)) << "Failed test: kk syr2 should have thrown an exception for uplo '.'"; - EXPECT_ANY_THROW( - KokkosBlas::syr2("T", "", alpha, x.d_view, y.d_view, A.d_view)) + EXPECT_ANY_THROW(KokkosBlas::syr2("T", "", alpha, x.d_view, y.d_view, A.d_view)) << "Failed test: kk syr2 should have thrown an exception for uplo ''"; #ifdef HAVE_KOKKOSKERNELS_DEBUG @@ -379,21 +327,14 @@ void Syr2Tester -void Syr2Tester< - ScalarX, tLayoutX, ScalarY, tLayoutY, ScalarA, tLayoutA, - Device>::populateVariables(ScalarA& alpha, - view_stride_adapter<_ViewTypeX, false>& x, - view_stride_adapter<_ViewTypeY, false>& y, - view_stride_adapter<_ViewTypeA, false>& A, - _ViewTypeExpected& h_expected, - bool& expectedResultIsKnown) { +template +void Syr2Tester::populateVariables( + ScalarA& alpha, view_stride_adapter<_ViewTypeX, false>& x, view_stride_adapter<_ViewTypeY, false>& y, + view_stride_adapter<_ViewTypeA, false>& A, _ViewTypeExpected& h_expected, bool& expectedResultIsKnown) { expectedResultIsKnown = false; if (_useAnalyticalResults) { - this->populateAnalyticalValues(alpha, x.h_view, y.h_view, A.h_view, - h_expected); + this->populateAnalyticalValues(alpha, x.h_view, y.h_view, A.h_view, h_expected); Kokkos::deep_copy(x.d_base, x.h_base); Kokkos::deep_copy(y.d_base, y.h_base); Kokkos::deep_copy(A.d_base, A.h_base); @@ -447,8 +388,7 @@ void Syr2Tester< } else { alpha = 3; - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); { ScalarX randStart, randEnd; @@ -502,8 +442,7 @@ void Syr2Tester< if (_N <= 2) { for (int i(0); i < _M; ++i) { for (int j(0); j < _N; ++j) { - std::cout << "h_origA(" << i << "," << j << ") = " << A.h_view(i, j) - << std::endl; + std::cout << "h_origA(" << i << "," << j << ") = " << A.h_view(i, j) << std::endl; } } } @@ -511,17 +450,12 @@ void Syr2Tester< } // Code for complex values -template +template template -typename std::enable_if>::value || - std::is_same>::value, - void>::type -Syr2Tester::populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, - _HostViewTypeY& h_y, - _HostViewTypeA& h_A, - _ViewTypeExpected& h_expected) { +typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type +Syr2Tester::populateAnalyticalValues( + T& alpha, _HostViewTypeX& h_x, _HostViewTypeY& h_y, _HostViewTypeA& h_A, _ViewTypeExpected& h_expected) { alpha.real() = 1.4; alpha.imag() = -2.3; @@ -540,12 +474,9 @@ Syr2TestershrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); - _AuxType auxImJ = - this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i - j)); - if (((_useUpOption == true) && (i <= j)) || - ((_useUpOption == false) && (i >= j))) { + _AuxType auxIpJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); + _AuxType auxImJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i - j)); + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { h_A(i, j).real() = sin(auxIpJ); h_A(i, j).imag() = -sin(auxImJ); } else { @@ -557,8 +488,7 @@ Syr2TestershrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); + _AuxType auxIpJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); h_A(i, j).real() = sin(auxIpJ); h_A(i, j).imag() = sin(auxIpJ); } @@ -568,12 +498,9 @@ Syr2Tester= j))) { - _AuxType auxIpJ = - this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); - _AuxType auxImJ = - this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i - j)); + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { + _AuxType auxIpJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); + _AuxType auxImJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i - j)); h_expected(i, j).real() = 3.8 * sin(auxIpJ); h_expected(i, j).imag() = -5.6 * sin(auxImJ); } else { @@ -585,10 +512,8 @@ Syr2Tester= j))) { - _AuxType auxIpJ = - this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { + _AuxType auxIpJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); h_expected(i, j).real() = 5.6 * sin(auxIpJ); h_expected(i, j).imag() = 3.8 * sin(auxIpJ); } else { @@ -601,17 +526,12 @@ Syr2Tester +template template -typename std::enable_if>::value && - !std::is_same>::value, - void>::type -Syr2Tester::populateAnalyticalValues(T& alpha, _HostViewTypeX& h_x, - _HostViewTypeY& h_y, - _HostViewTypeA& h_A, - _ViewTypeExpected& h_expected) { +typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type +Syr2Tester::populateAnalyticalValues( + T& alpha, _HostViewTypeX& h_x, _HostViewTypeY& h_y, _HostViewTypeA& h_A, _ViewTypeExpected& h_expected) { alpha = std::is_same<_AuxType, int>::value ? 1 : 1.1; for (int i = 0; i < _M; ++i) { @@ -626,18 +546,15 @@ Syr2TestershrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); - h_A(i, j) = .1 * sin(auxIpJ); + _AuxType auxIpJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); + h_A(i, j) = .1 * sin(auxIpJ); } } for (int i = 0; i < _M; ++i) { for (int j = 0; j < _N; ++j) { - if (((_useUpOption == true) && (i <= j)) || - ((_useUpOption == false) && (i >= j))) { - _AuxType auxIpJ = - this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { + _AuxType auxIpJ = this->shrinkAngleToZeroTwoPiRange(static_cast<_AuxType>(i + j)); h_expected(i, j) = 1.2 * sin(auxIpJ); } else { h_expected(i, j) = h_A(i, j); @@ -647,27 +564,20 @@ Syr2Tester +template template -typename std::enable_if>::value || - std::is_same>::value, - void>::type -Syr2Tester::populateVanillaValues(const T& alpha, - const _HostViewTypeX& h_x, - const _HostViewTypeY& h_y, - const _HostViewTypeA& h_A, - _ViewTypeExpected& h_vanilla) { +typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type +Syr2Tester::populateVanillaValues( + const T& alpha, const _HostViewTypeX& h_x, const _HostViewTypeY& h_y, const _HostViewTypeA& h_A, + _ViewTypeExpected& h_vanilla) { if (_vanillaUsesDifferentOrderOfOps) { if (_useHermitianOption) { for (int i = 0; i < _M; ++i) { for (int j = 0; j < _N; ++j) { - if (((_useUpOption == true) && (i <= j)) || - ((_useUpOption == false) && (i >= j))) { + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { h_vanilla(i, j) = - h_A(i, j) + alpha * _KAT_A::conj(h_y(j)) * h_x(i) + - _KAT_A::conj(alpha) * _KAT_A::conj(h_x(j)) * h_y(i); + h_A(i, j) + alpha * _KAT_A::conj(h_y(j)) * h_x(i) + _KAT_A::conj(alpha) * _KAT_A::conj(h_x(j)) * h_y(i); } else { h_vanilla(i, j) = h_A(i, j); } @@ -679,10 +589,8 @@ Syr2Tester= j))) { - h_vanilla(i, j) = - h_A(i, j) + alpha * h_x(j) * h_y(i) + alpha * h_y(j) * h_x(i); + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { + h_vanilla(i, j) = h_A(i, j) + alpha * h_x(j) * h_y(i) + alpha * h_y(j) * h_x(i); } else { h_vanilla(i, j) = h_A(i, j); } @@ -693,11 +601,9 @@ Syr2Tester= j))) { + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { h_vanilla(i, j) = - h_A(i, j) + alpha * h_x(i) * _KAT_A::conj(h_y(j)) + - _KAT_A::conj(alpha) * h_y(i) * _KAT_A::conj(h_x(j)); + h_A(i, j) + alpha * h_x(i) * _KAT_A::conj(h_y(j)) + _KAT_A::conj(alpha) * h_y(i) * _KAT_A::conj(h_x(j)); } else { h_vanilla(i, j) = h_A(i, j); } @@ -709,10 +615,8 @@ Syr2Tester= j))) { - h_vanilla(i, j) = - h_A(i, j) + alpha * h_x(i) * h_y(j) + alpha * h_y(i) * h_x(j); + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { + h_vanilla(i, j) = h_A(i, j) + alpha * h_x(i) * h_y(j) + alpha * h_y(i) * h_x(j); } else { h_vanilla(i, j) = h_A(i, j); } @@ -723,27 +627,20 @@ Syr2Tester +template template -typename std::enable_if>::value && - !std::is_same>::value, - void>::type -Syr2Tester::populateVanillaValues(const T& alpha, - const _HostViewTypeX& h_x, - const _HostViewTypeY& h_y, - const _HostViewTypeA& h_A, - _ViewTypeExpected& h_vanilla) { +typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type +Syr2Tester::populateVanillaValues( + const T& alpha, const _HostViewTypeX& h_x, const _HostViewTypeY& h_y, const _HostViewTypeA& h_A, + _ViewTypeExpected& h_vanilla) { if (_useHermitianOption) { if (_vanillaUsesDifferentOrderOfOps) { for (int i = 0; i < _M; ++i) { for (int j = 0; j < _N; ++j) { - if (((_useUpOption == true) && (i <= j)) || - ((_useUpOption == false) && (i >= j))) { + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { h_vanilla(i, j) = - h_A(i, j) + alpha * h_x(j) * _KAT_A::conj(h_y(i)) + - _KAT_A::conj(alpha) * h_y(j) * _KAT_A::conj(h_x(i)); + h_A(i, j) + alpha * h_x(j) * _KAT_A::conj(h_y(i)) + _KAT_A::conj(alpha) * h_y(j) * _KAT_A::conj(h_x(i)); } else { h_vanilla(i, j) = h_A(i, j); } @@ -752,11 +649,9 @@ Syr2Tester= j))) { + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { h_vanilla(i, j) = - h_A(i, j) + alpha * h_x(i) * _KAT_A::conj(h_y(j)) + - _KAT_A::conj(alpha) * h_y(i) * _KAT_A::conj(h_x(j)); + h_A(i, j) + alpha * h_x(i) * _KAT_A::conj(h_y(j)) + _KAT_A::conj(alpha) * h_y(i) * _KAT_A::conj(h_x(j)); } else { h_vanilla(i, j) = h_A(i, j); } @@ -767,10 +662,8 @@ Syr2Tester= j))) { - h_vanilla(i, j) = - h_A(i, j) + alpha * h_x(j) * h_y(i) + alpha * h_y(j) * h_x(i); + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { + h_vanilla(i, j) = h_A(i, j) + alpha * h_x(j) * h_y(i) + alpha * h_y(j) * h_x(i); } else { h_vanilla(i, j) = h_A(i, j); } @@ -779,10 +672,8 @@ Syr2Tester= j))) { - h_vanilla(i, j) = - h_A(i, j) + alpha * h_x(i) * h_y(j) + alpha * h_y(i) * h_x(j); + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { + h_vanilla(i, j) = h_A(i, j) + alpha * h_x(i) * h_y(j) + alpha * h_y(i) * h_x(j); } else { h_vanilla(i, j) = h_A(i, j); } @@ -792,11 +683,10 @@ Syr2Tester +template template -T Syr2Tester::shrinkAngleToZeroTwoPiRange(const T input) { +T Syr2Tester::shrinkAngleToZeroTwoPiRange( + const T input) { T output(input); #if 0 T twoPi( 2. * Kokkos::numbers::pi ); @@ -811,29 +701,23 @@ T Syr2Tester +template template -typename std::enable_if>::value || - std::is_same>::value, - void>::type -Syr2Tester:: - compareVanillaAgainstExpected(const T& alpha, - const _ViewTypeExpected& h_vanilla, - const _ViewTypeExpected& h_expected) { +typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type +Syr2Tester::compareVanillaAgainstExpected( + const T& alpha, const _ViewTypeExpected& h_vanilla, const _ViewTypeExpected& h_expected) { #ifdef HAVE_KOKKOSKERNELS_DEBUG if (_N <= 2) { for (int i(0); i < _M; ++i) { for (int j(0); j < _N; ++j) { - std::cout << "h_exp(" << i << "," << j << ") = " << h_expected(i, j) - << ", h_van(" << i << "," << j << ") = " << h_vanilla(i, j) - << std::endl; + std::cout << "h_exp(" << i << "," << j << ") = " << h_expected(i, j) << ", h_van(" << i << "," << j + << ") = " << h_vanilla(i, j) << std::endl; } } } #endif - int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * - 1.e-3); + int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * 1.e-3); if (_useAnalyticalResults) { int numErrorsRealAbs(0); @@ -852,7 +736,7 @@ Syr2Tester:: for (int i(0); i < _M; ++i) { for (int j(0); j < _N; ++j) { - diff = _KAT_A::abs(h_expected(i, j).real() - h_vanilla(i, j).real()); + diff = _KAT_A::abs(h_expected(i, j).real() - h_vanilla(i, j).real()); errorHappened = false; if (h_expected(i, j).real() == 0.) { diffThreshold = _KAT_A::abs(_absTol); @@ -876,16 +760,14 @@ Syr2Tester:: } if (errorHappened && (numErrorsRealAbs + numErrorsRealRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j).real() = " << h_expected(i, j).real() + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j).real() = " << h_expected(i, j).real() << ", h_vanilla(i,j).real() = " << h_vanilla(i, j).real() << ", _KAT_A::abs(h_expected(i,j).real() - " "h_vanilla(i,j).real()) = " - << diff << ", diffThreshold = " << diffThreshold - << std::endl; + << diff << ", diffThreshold = " << diffThreshold << std::endl; #endif } - diff = _KAT_A::abs(h_expected(i, j).imag() - h_vanilla(i, j).imag()); + diff = _KAT_A::abs(h_expected(i, j).imag() - h_vanilla(i, j).imag()); errorHappened = false; if (h_expected(i, j).imag() == 0.) { diffThreshold = _KAT_A::abs(_absTol); @@ -909,13 +791,11 @@ Syr2Tester:: } if (errorHappened && (numErrorsImagAbs + numErrorsImagRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j).imag() = " << h_expected(i, j).imag() + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j).imag() = " << h_expected(i, j).imag() << ", h_vanilla(i,j).imag() = " << h_vanilla(i, j).imag() << ", _KAT_A::abs(h_expected(i,j).imag() - " "h_vanilla(i,j).imag()) = " - << diff << ", diffThreshold = " << diffThreshold - << std::endl; + << diff << ", diffThreshold = " << diffThreshold << std::endl; #endif } } // for j @@ -923,25 +803,15 @@ Syr2Tester:: { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": vanilla differs too much from analytical on real components" - << ", numErrorsRealAbs = " << numErrorsRealAbs - << ", numErrorsRealRel = " << numErrorsRealRel - << ", maxErrorRealRel = " << maxErrorRealRel - << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel - << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel - << ", h_expected(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ": vanilla differs too much from analytical on real components" + << ", numErrorsRealAbs = " << numErrorsRealAbs << ", numErrorsRealRel = " << numErrorsRealRel + << ", maxErrorRealRel = " << maxErrorRealRel << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel + << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel << ", h_expected(i,j).real() = " + << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) << ", h_vanilla(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_vanilla(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_vanilla(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrorsReal(numErrorsRealAbs + numErrorsRealRel); @@ -950,30 +820,19 @@ Syr2Tester:: std::cout << "WARNING" << msg.str() << std::endl; } #endif - EXPECT_LE(numErrorsReal, maxNumErrorsAllowed) - << "Failed test" << msg.str(); + EXPECT_LE(numErrorsReal, maxNumErrorsAllowed) << "Failed test" << msg.str(); } { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": vanilla differs too much from analytical on imag components" - << ", numErrorsImagAbs = " << numErrorsImagAbs - << ", numErrorsImagRel = " << numErrorsImagRel - << ", maxErrorImagRel = " << maxErrorImagRel - << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel - << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel - << ", h_expected(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ": vanilla differs too much from analytical on imag components" + << ", numErrorsImagAbs = " << numErrorsImagAbs << ", numErrorsImagRel = " << numErrorsImagRel + << ", maxErrorImagRel = " << maxErrorImagRel << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel + << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel << ", h_expected(i,j).imag() = " + << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) << ", h_vanilla(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_vanilla(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_vanilla(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrorsImag(numErrorsImagAbs + numErrorsImagRel); @@ -982,8 +841,7 @@ Syr2Tester:: std::cout << "WARNING" << msg.str() << std::endl; } #endif - EXPECT_LE(numErrorsImag, maxNumErrorsAllowed) - << "Failed test" << msg.str(); + EXPECT_LE(numErrorsImag, maxNumErrorsAllowed) << "Failed test" << msg.str(); } } else { int numErrorsReal(0); @@ -994,11 +852,8 @@ Syr2Tester:: if (h_expected(i, j).real() != h_vanilla(i, j).real()) { #ifdef HAVE_KOKKOSKERNELS_DEBUG if (numErrorsReal == 0) { - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j).real() = " - << h_expected(i, j).real() - << ", h_vanilla(i,j).real() = " << h_vanilla(i, j).real() - << std::endl; + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j).real() = " << h_expected(i, j).real() + << ", h_vanilla(i,j).real() = " << h_vanilla(i, j).real() << std::endl; } #endif numErrorsReal++; @@ -1007,62 +862,49 @@ Syr2Tester:: if (h_expected(i, j).imag() != h_vanilla(i, j).imag()) { #ifdef HAVE_KOKKOSKERNELS_DEBUG if (numErrorsImag == 0) { - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j).imag() = " - << h_expected(i, j).imag() - << ", h_vanilla(i,j).imag() = " << h_vanilla(i, j).imag() - << std::endl; + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j).imag() = " << h_expected(i, j).imag() + << ", h_vanilla(i,j).imag() = " << h_vanilla(i, j).imag() << std::endl; } #endif numErrorsImag++; } } // for j } // for i - EXPECT_EQ(numErrorsReal, 0) - << "Failed test" - << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": vanilla result is incorrect on real components" - << ", numErrorsReal = " << numErrorsReal; - EXPECT_EQ(numErrorsImag, 0) - << "Failed test" - << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": vanilla result is incorrect on imag components" - << ", numErrorsImag = " << numErrorsImag; + EXPECT_EQ(numErrorsReal, 0) << "Failed test" + << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr + << ", _A_is_ll = " << _A_is_ll << ", alpha type = " << typeid(alpha).name() + << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption + << ": vanilla result is incorrect on real components" + << ", numErrorsReal = " << numErrorsReal; + EXPECT_EQ(numErrorsImag, 0) << "Failed test" + << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr + << ", _A_is_ll = " << _A_is_ll << ", alpha type = " << typeid(alpha).name() + << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption + << ": vanilla result is incorrect on imag components" + << ", numErrorsImag = " << numErrorsImag; } } // Code for non-complex values -template +template template -typename std::enable_if>::value && - !std::is_same>::value, - void>::type -Syr2Tester:: - compareVanillaAgainstExpected(const T& alpha, - const _ViewTypeExpected& h_vanilla, - const _ViewTypeExpected& h_expected) { +typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type +Syr2Tester::compareVanillaAgainstExpected( + const T& alpha, const _ViewTypeExpected& h_vanilla, const _ViewTypeExpected& h_expected) { #ifdef HAVE_KOKKOSKERNELS_DEBUG if (_N <= 2) { for (int i(0); i < _M; ++i) { for (int j(0); j < _N; ++j) { - std::cout << "h_exp(" << i << "," << j << ") = " << h_expected(i, j) - << ", h_van(" << i << "," << j << ") = " << h_vanilla(i, j) - << std::endl; + std::cout << "h_exp(" << i << "," << j << ") = " << h_expected(i, j) << ", h_van(" << i << "," << j + << ") = " << h_vanilla(i, j) << std::endl; } } } #endif - int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * - 1.e-3); + int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * 1.e-3); if (_useAnalyticalResults) { int numErrorsAbs(0); @@ -1100,12 +942,10 @@ Syr2Tester:: } if (errorHappened && (numErrorsAbs + numErrorsRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j) = " << h_expected(i, j) + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j) = " << h_expected(i, j) << ", h_vanilla(i,j) = " << h_vanilla(i, j) - << ", _KAT_A::abs(h_expected(i,j) - h_vanilla(i,j)) = " - << diff << ", diffThreshold = " << diffThreshold - << std::endl; + << ", _KAT_A::abs(h_expected(i,j) - h_vanilla(i,j)) = " << diff + << ", diffThreshold = " << diffThreshold << std::endl; #endif } } // for j @@ -1113,24 +953,14 @@ Syr2Tester:: { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": vanilla differs too much from expected" - << ", numErrorsAbs = " << numErrorsAbs - << ", numErrorsRel = " << numErrorsRel - << ", maxErrorRel = " << maxErrorRel - << ", iForMaxErrorRel = " << iForMaxErrorRel + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ": vanilla differs too much from expected" + << ", numErrorsAbs = " << numErrorsAbs << ", numErrorsRel = " << numErrorsRel + << ", maxErrorRel = " << maxErrorRel << ", iForMaxErrorRel = " << iForMaxErrorRel << ", jForMaxErrorRel = " << jForMaxErrorRel << ", h_expected(i,j) = " - << (((_M > 0) && (_N > 0)) - ? h_expected(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) - << ", h_vanilla(i,j) = " - << (((_M > 0) && (_N > 0)) - ? h_vanilla(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_expected(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) + << ", h_vanilla(i,j) = " << (((_M > 0) && (_N > 0)) ? h_vanilla(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrors(numErrorsAbs + numErrorsRel); @@ -1149,8 +979,7 @@ Syr2Tester:: if (h_expected(i, j) != h_vanilla(i, j)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG if (numErrors == 0) { - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_expected(i,j) = " << h_expected(i, j) + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_expected(i,j) = " << h_expected(i, j) << ", h_vanilla(i,j) = " << h_vanilla(i, j) << std::endl; } #endif @@ -1158,41 +987,33 @@ Syr2Tester:: } } // for j } // for i - EXPECT_EQ(numErrors, 0) - << "Failed test" - << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": vanilla result is incorrect" - << ", numErrors = " << numErrors; + EXPECT_EQ(numErrors, 0) << "Failed test" + << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr + << ", _A_is_ll = " << _A_is_ll << ", alpha type = " << typeid(alpha).name() + << ", _useHermitianOption = " << _useHermitianOption << ", _useUpOption = " << _useUpOption + << ": vanilla result is incorrect" + << ", numErrors = " << numErrors; } } // Code for complex values -template +template template -typename std::enable_if>::value || - std::is_same>::value, - void>::type -Syr2Tester:: - compareKkSyr2AgainstReference(const T& alpha, const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_reference) { +typename std::enable_if< + std::is_same>::value || std::is_same>::value, void>::type +Syr2Tester::compareKkSyr2AgainstReference( + const T& alpha, const _HostViewTypeA& h_A, const _ViewTypeExpected& h_reference) { #ifdef HAVE_KOKKOSKERNELS_DEBUG if (_N <= 2) { for (int i(0); i < _M; ++i) { for (int j(0); j < _N; ++j) { - std::cout << "h_exp(" << i << "," << j << ") = " << h_reference(i, j) - << ", h_A(" << i << "," << j << ") = " << h_A(i, j) - << std::endl; + std::cout << "h_exp(" << i << "," << j << ") = " << h_reference(i, j) << ", h_A(" << i << "," << j + << ") = " << h_A(i, j) << std::endl; } } } #endif - int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * - 1.e-3); + int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * 1.e-3); int numErrorsRealAbs(0); int numErrorsRealRel(0); @@ -1233,12 +1054,10 @@ Syr2Tester:: } if (errorHappened && (numErrorsRealAbs + numErrorsRealRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout - << "ERROR, i = " << i << ", j = " << j - << ": h_reference(i,j).real() = " << h_reference(i, j).real() - << ", h_A(i,j).real() = " << h_A(i, j).real() - << ", _KAT_A::abs(h_reference(i,j).real() - h_A(i,j).real()) = " - << diff << ", diffThreshold = " << diffThreshold << std::endl; + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_reference(i,j).real() = " << h_reference(i, j).real() + << ", h_A(i,j).real() = " << h_A(i, j).real() + << ", _KAT_A::abs(h_reference(i,j).real() - h_A(i,j).real()) = " << diff + << ", diffThreshold = " << diffThreshold << std::endl; #endif } diff = _KAT_A::abs(h_reference(i, j).imag() - h_A(i, j).imag()); @@ -1265,95 +1084,58 @@ Syr2Tester:: } if (errorHappened && (numErrorsImagAbs + numErrorsImagRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout - << "ERROR, i = " << i << ", j = " << j - << ": h_reference(i,j).imag() = " << h_reference(i, j).imag() - << ", h_A(i,j).imag() = " << h_A(i, j).imag() - << ", _KAT_A::abs(h_reference(i,j).imag() - h_A(i,j).imag()) = " - << diff << ", diffThreshold = " << diffThreshold << std::endl; + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_reference(i,j).imag() = " << h_reference(i, j).imag() + << ", h_A(i,j).imag() = " << h_A(i, j).imag() + << ", _KAT_A::abs(h_reference(i,j).imag() - h_A(i,j).imag()) = " << diff + << ", diffThreshold = " << diffThreshold << std::endl; #endif } } // for j } // for i #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout - << "A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ", numErrorsRealAbs = " << numErrorsRealAbs - << ", numErrorsRealRel = " << numErrorsRealRel - << ", maxErrorRealRel = " << maxErrorRealRel - << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel - << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel - << ", h_reference(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_reference(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) - << ", h_A(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_A(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) - << ", numErrorsImagAbs = " << numErrorsImagAbs - << ", numErrorsImagRel = " << numErrorsImagRel - << ", maxErrorImagRel = " << maxErrorImagRel - << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel - << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel - << ", h_reference(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_reference(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) - << ", h_A(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_A(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) - << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed << std::endl; + std::cout << "A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ", numErrorsRealAbs = " << numErrorsRealAbs + << ", numErrorsRealRel = " << numErrorsRealRel << ", maxErrorRealRel = " << maxErrorRealRel + << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel + << ", h_reference(i,j).real() = " + << (((_M > 0) && (_N > 0)) ? h_reference(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) + << ", h_A(i,j).real() = " + << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) + << ", numErrorsImagAbs = " << numErrorsImagAbs << ", numErrorsImagRel = " << numErrorsImagRel + << ", maxErrorImagRel = " << maxErrorImagRel << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel + << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel << ", h_reference(i,j).imag() = " + << (((_M > 0) && (_N > 0)) ? h_reference(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) + << ", h_A(i,j).imag() = " + << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) + << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed << std::endl; if ((_M == 2131) && (_N == 2131)) { std::cout << "Information" - << ": A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ", h_reference(11, 2119) = (" << h_reference(11, 2119).real() + << ": A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ", h_reference(11, 2119) = (" << h_reference(11, 2119).real() << ", " << h_reference(11, 2119).imag() << ")" - << ", h_A(11, 2119) = (" << h_A(11, 2119).real() << ", " - << h_A(11, 2119).imag() << ")" << std::endl; + << ", h_A(11, 2119) = (" << h_A(11, 2119).real() << ", " << h_A(11, 2119).imag() << ")" << std::endl; std::cout << "Information" - << ": A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ", h_reference(710, 1065) = (" << h_reference(710, 1065).real() + << ": A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ", h_reference(710, 1065) = (" << h_reference(710, 1065).real() << ", " << h_reference(710, 1065).imag() << ")" - << ", h_A(710, 1065) = (" << h_A(710, 1065).real() << ", " - << h_A(710, 1065).imag() << ")" << std::endl; + << ", h_A(710, 1065) = (" << h_A(710, 1065).real() << ", " << h_A(710, 1065).imag() << ")" << std::endl; } #endif { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": syr2 result is incorrect on real components" - << ", numErrorsRealAbs = " << numErrorsRealAbs - << ", numErrorsRealRel = " << numErrorsRealRel - << ", maxErrorRealRel = " << maxErrorRealRel - << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel - << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel - << ", h_reference(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_reference(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ": syr2 result is incorrect on real components" + << ", numErrorsRealAbs = " << numErrorsRealAbs << ", numErrorsRealRel = " << numErrorsRealRel + << ", maxErrorRealRel = " << maxErrorRealRel << ", iForMaxErrorRealRel = " << iForMaxErrorRealRel + << ", jForMaxErrorRealRel = " << jForMaxErrorRealRel << ", h_reference(i,j).real() = " + << (((_M > 0) && (_N > 0)) ? h_reference(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) << ", h_A(i,j).real() = " - << (((_M > 0) && (_N > 0)) - ? h_A(iForMaxErrorRealRel, jForMaxErrorRealRel).real() - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRealRel, jForMaxErrorRealRel).real() : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrorsReal(numErrorsRealAbs + numErrorsRealRel); @@ -1366,25 +1148,15 @@ Syr2Tester:: } { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ": syr2 result is incorrect on imag components" - << ", numErrorsImagAbs = " << numErrorsImagAbs - << ", numErrorsImagRel = " << numErrorsImagRel - << ", maxErrorImagRel = " << maxErrorImagRel - << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel - << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel - << ", h_reference(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_reference(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ": syr2 result is incorrect on imag components" + << ", numErrorsImagAbs = " << numErrorsImagAbs << ", numErrorsImagRel = " << numErrorsImagRel + << ", maxErrorImagRel = " << maxErrorImagRel << ", iForMaxErrorImagRel = " << iForMaxErrorImagRel + << ", jForMaxErrorImagRel = " << jForMaxErrorImagRel << ", h_reference(i,j).imag() = " + << (((_M > 0) && (_N > 0)) ? h_reference(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) << ", h_A(i,j).imag() = " - << (((_M > 0) && (_N > 0)) - ? h_A(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorImagRel, jForMaxErrorImagRel).imag() : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrorsImag(numErrorsImagAbs + numErrorsImagRel); @@ -1398,28 +1170,23 @@ Syr2Tester:: } // Code for non-complex values -template +template template -typename std::enable_if>::value && - !std::is_same>::value, - void>::type -Syr2Tester:: - compareKkSyr2AgainstReference(const T& alpha, const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_reference) { +typename std::enable_if< + !std::is_same>::value && !std::is_same>::value, void>::type +Syr2Tester::compareKkSyr2AgainstReference( + const T& alpha, const _HostViewTypeA& h_A, const _ViewTypeExpected& h_reference) { #ifdef HAVE_KOKKOSKERNELS_DEBUG if (_N <= 2) { for (int i(0); i < _M; ++i) { for (int j(0); j < _N; ++j) { - std::cout << "h_exp(" << i << "," << j << ") = " << h_reference(i, j) - << ", h_A(" << i << "," << j << ") = " << h_A(i, j) - << std::endl; + std::cout << "h_exp(" << i << "," << j << ") = " << h_reference(i, j) << ", h_A(" << i << "," << j + << ") = " << h_A(i, j) << std::endl; } } } #endif - int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * - 1.e-3); + int maxNumErrorsAllowed(static_cast(_M) * static_cast(_N) * 1.e-3); int numErrorsAbs(0); int numErrorsRel(0); @@ -1455,53 +1222,34 @@ Syr2Tester:: } if (errorHappened && (numErrorsAbs + numErrorsRel == 1)) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "ERROR, i = " << i << ", j = " << j - << ": h_reference(i,j) = " << h_reference(i, j) - << ", h_A(i,j) = " << h_A(i, j) - << ", _KAT_A::abs(h_reference(i,j) - h_A(i,j)) = " << diff + std::cout << "ERROR, i = " << i << ", j = " << j << ": h_reference(i,j) = " << h_reference(i, j) + << ", h_A(i,j) = " << h_A(i, j) << ", _KAT_A::abs(h_reference(i,j) - h_A(i,j)) = " << diff << ", diffThreshold = " << diffThreshold << std::endl; #endif } } // for j } // for i #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption - << ", _useUpOption = " << _useUpOption - << ", numErrorsAbs = " << numErrorsAbs - << ", numErrorsRel = " << numErrorsRel - << ", maxErrorRel = " << maxErrorRel - << ", iForMaxErrorRel = " << iForMaxErrorRel - << ", jForMaxErrorRel = " << jForMaxErrorRel + std::cout << "A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption + << ", _useUpOption = " << _useUpOption << ", numErrorsAbs = " << numErrorsAbs + << ", numErrorsRel = " << numErrorsRel << ", maxErrorRel = " << maxErrorRel + << ", iForMaxErrorRel = " << iForMaxErrorRel << ", jForMaxErrorRel = " << jForMaxErrorRel << ", h_reference(i,j) = " - << (((_M > 0) && (_N > 0)) - ? h_reference(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) - << ", h_A(i,j) = " - << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_reference(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) + << ", h_A(i,j) = " << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed << std::endl; #endif { std::ostringstream msg; - msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr - << ", _A_is_ll = " << _A_is_ll - << ", alpha type = " << typeid(alpha).name() - << ", _useHermitianOption = " << _useHermitianOption + msg << ", A is " << _M << " by " << _N << ", _A_is_lr = " << _A_is_lr << ", _A_is_ll = " << _A_is_ll + << ", alpha type = " << typeid(alpha).name() << ", _useHermitianOption = " << _useHermitianOption << ", _useUpOption = " << _useUpOption << ": syr2 result is incorrect" - << ", numErrorsAbs = " << numErrorsAbs - << ", numErrorsRel = " << numErrorsRel - << ", maxErrorRel = " << maxErrorRel - << ", iForMaxErrorRel = " << iForMaxErrorRel + << ", numErrorsAbs = " << numErrorsAbs << ", numErrorsRel = " << numErrorsRel + << ", maxErrorRel = " << maxErrorRel << ", iForMaxErrorRel = " << iForMaxErrorRel << ", jForMaxErrorRel = " << jForMaxErrorRel << ", h_reference(i,j) = " - << (((_M > 0) && (_N > 0)) - ? h_reference(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) - << ", h_A(i,j) = " - << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRel, jForMaxErrorRel) - : 9.999e+99) + << (((_M > 0) && (_N > 0)) ? h_reference(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) + << ", h_A(i,j) = " << (((_M > 0) && (_N > 0)) ? h_A(iForMaxErrorRel, jForMaxErrorRel) : 9.999e+99) << ", maxNumErrorsAllowed = " << maxNumErrorsAllowed; int numErrors(numErrorsAbs + numErrorsRel); @@ -1514,22 +1262,16 @@ Syr2Tester:: } } -template +template template -void Syr2Tester:: - callKkSyr2AndCompareAgainstExpected( - const ScalarA& alpha, TX& x, TY& y, - view_stride_adapter<_ViewTypeA, false>& A, - const _ViewTypeExpected& h_expected, const std::string& situation) { +void Syr2Tester::callKkSyr2AndCompareAgainstExpected( + const ScalarA& alpha, TX& x, TY& y, view_stride_adapter<_ViewTypeA, false>& A, const _ViewTypeExpected& h_expected, + const std::string& situation) { #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "In Test_Blas2_syr2, '" << situation << "', alpha = " << alpha - << std::endl; + std::cout << "In Test_Blas2_syr2, '" << situation << "', alpha = " << alpha << std::endl; std::cout << "In Test_Blas2_syr2.hpp, right before calling KokkosBlas::syr2()" << ": ViewTypeA = " << typeid(_ViewTypeA).name() - << ", _kkSyr2ShouldThrowException = " << _kkSyr2ShouldThrowException - << std::endl; + << ", _kkSyr2ShouldThrowException = " << _kkSyr2ShouldThrowException << std::endl; #endif std::string mode = _useHermitianOption ? "H" : "T"; std::string uplo = _useUpOption ? "U" : "L"; @@ -1540,25 +1282,21 @@ void Syr2Tester +template template -void Syr2Tester:: - callKkGerAndCompareKkSyr2AgainstIt( - const ScalarA& alpha, TX& x, TY& y, - view_stride_adapter<_ViewTypeA, false>& org_A, - const _HostViewTypeA& h_A_syr2, const std::string& situation) { +void Syr2Tester::callKkGerAndCompareKkSyr2AgainstIt( + const ScalarA& alpha, TX& x, TY& y, view_stride_adapter<_ViewTypeA, false>& org_A, const _HostViewTypeA& h_A_syr2, + const std::string& situation) { view_stride_adapter<_ViewTypeA, false> A_ger("A_ger", _M, _N); Kokkos::deep_copy(A_ger.d_base, org_A.d_base); @@ -1583,12 +1317,10 @@ void Syr2Tester h_ger_reference( - "h_ger_reference", _M, _N); + view_stride_adapter<_ViewTypeExpected, true> h_ger_reference("h_ger_reference", _M, _N); Kokkos::deep_copy(h_ger_reference.d_base, A_ger.d_base); Kokkos::deep_copy(h_ger_reference.h_base, h_ger_reference.d_base); std::string uplo = _useUpOption ? "U" : "L"; for (int i = 0; i < _M; ++i) { for (int j = 0; j < _N; ++j) { - if (((_useUpOption == true) && (i <= j)) || - ((_useUpOption == false) && (i >= j))) { + if (((_useUpOption == true) && (i <= j)) || ((_useUpOption == false) && (i >= j))) { // Keep h_ger_reference as already computed } else { h_ger_reference.h_view(i, j) = org_A.h_view(i, j); @@ -1677,9 +1398,7 @@ void Syr2Tester::value || - std::is_same::value || + bool xBool = std::is_same::value || std::is_same::value || std::is_same>::value || std::is_same>::value; - bool yBool = std::is_same::value || - std::is_same::value || + bool yBool = std::is_same::value || std::is_same::value || std::is_same>::value || std::is_same>::value; - bool aBool = std::is_same::value || - std::is_same::value || + bool aBool = std::is_same::value || std::is_same::value || std::is_same>::value || std::is_same>::value; bool useAnalyticalResults = xBool && yBool && aBool; #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "+--------------------------------------------------------------" "------------" @@ -1725,8 +1440,7 @@ int test_syr2(const std::string& /*caseName*/) { std::cout << "Starting " << caseName << " for LAYOUTLEFT ..." << std::endl; #endif if (true) { - Test::Syr2Tester + Test::Syr2Tester tester; tester.test(0, 0); tester.test(1, 0); @@ -1761,8 +1475,7 @@ int test_syr2(const std::string& /*caseName*/) { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "+--------------------------------------------------------------" "------------" @@ -1770,8 +1483,7 @@ int test_syr2(const std::string& /*caseName*/) { std::cout << "Starting " << caseName << " for LAYOUTRIGHT ..." << std::endl; #endif if (true) { - Test::Syr2Tester + Test::Syr2Tester tester; tester.test(0, 0); tester.test(1, 0); @@ -1806,8 +1518,7 @@ int test_syr2(const std::string& /*caseName*/) { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTSTRIDE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "+--------------------------------------------------------------" "------------" @@ -1815,8 +1526,7 @@ int test_syr2(const std::string& /*caseName*/) { std::cout << "Starting " << caseName << " for LAYOUTSTRIDE ..." << std::endl; #endif if (true) { - Test::Syr2Tester tester; tester.test(0, 0); @@ -1851,8 +1561,7 @@ int test_syr2(const std::string& /*caseName*/) { #endif #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "+--------------------------------------------------------------" "------------" @@ -1860,8 +1569,7 @@ int test_syr2(const std::string& /*caseName*/) { std::cout << "Starting " << caseName << " for MIXED LAYOUTS ..." << std::endl; #endif if (true) { - Test::Syr2Tester + Test::Syr2Tester tester; tester.test(1, 0); tester.test(2, 0); @@ -1879,8 +1587,7 @@ int test_syr2(const std::string& /*caseName*/) { } if (true) { - Test::Syr2Tester + Test::Syr2Tester tester; tester.test(1024, 0); } @@ -1903,8 +1610,7 @@ int test_syr2(const std::string& /*caseName*/) { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, syr2_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::syr2_float"); test_syr2("test case syr2_float"); @@ -1913,19 +1619,17 @@ TEST_F(TestCategory, syr2_float) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, syr2_complex_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::syr2_complex_float"); - test_syr2, Kokkos::complex, - Kokkos::complex, TestDevice>("test case syr2_complex_float"); + test_syr2, Kokkos::complex, Kokkos::complex, TestDevice>( + "test case syr2_complex_float"); Kokkos::Profiling::popRegion(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, syr2_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::syr2_double"); test_syr2("test case syr2_double"); @@ -1934,20 +1638,17 @@ TEST_F(TestCategory, syr2_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, syr2_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::syr2_complex_double"); - test_syr2, Kokkos::complex, - Kokkos::complex, TestDevice>( + test_syr2, Kokkos::complex, Kokkos::complex, TestDevice>( "test case syr2_complex_double"); Kokkos::Profiling::popRegion(); } #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, syr2_int) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::syr2_int"); test_syr2("test case syr2_int"); @@ -1955,8 +1656,7 @@ TEST_F(TestCategory, syr2_int) { } #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) TEST_F(TestCategory, syr2_int_float_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::syr2_int_float_double"); test_syr2("test case syr2_mixed_types"); diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_team_gemv.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_team_gemv.hpp index 808532a98ead..851410fdb709 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_team_gemv.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_team_gemv.hpp @@ -27,37 +27,30 @@ namespace Test { -template +template struct TeamGEMVOp : public GemvOpBase { using params = GemvOpBase; - TeamGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, - ScalarType beta_, YType y_) + TeamGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, ScalarType beta_, YType y_) : params(trans_, alpha_, A_, x_, beta_, y_) {} template KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { KokkosBlas::Experimental::Gemv::invoke( - member, params::trans, params::alpha, params::A, params::x, - params::beta, params::y); + member, params::trans, params::alpha, params::A, params::x, params::beta, params::y); } }; struct TeamGemvFactory { - template - using functor_type = - TeamGEMVOp; + template + using functor_type = TeamGEMVOp; - using algorithms = std::tuple; + using algorithms = std::tuple; }; } // namespace Test -#define TEST_TEAM_CASE4(N, A, X, Y, SC) \ - TEST_CASE4(team, TeamGemvFactory, N, A, X, Y, SC) +#define TEST_TEAM_CASE4(N, A, X, Y, SC) TEST_CASE4(team, TeamGemvFactory, N, A, X, Y, SC) #define TEST_TEAM_CASE2(N, S, SC) TEST_CASE2(team, TeamGemvFactory, N, S, SC) #define TEST_TEAM_CASE(N, S) TEST_CASE(team, TeamGemvFactory, N, S) diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_teamvector_gemv.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_teamvector_gemv.hpp index 655a5e2f12c6..74cdebf06256 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas2_teamvector_gemv.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas2_teamvector_gemv.hpp @@ -27,30 +27,23 @@ namespace Test { -template +template struct TeamVectorGEMVOp : public GemvOpBase { using params = GemvOpBase; - TeamVectorGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, - ScalarType beta_, YType y_) + TeamVectorGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, ScalarType beta_, YType y_) : params(trans_, alpha_, A_, x_, beta_, y_) {} template KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { - KokkosBlas::Experimental::Gemv::invoke(member, params::trans, - params::alpha, params::A, - params::x, params::beta, - params::y); + KokkosBlas::Experimental::Gemv::invoke( + member, params::trans, params::alpha, params::A, params::x, params::beta, params::y); } }; struct TeamVectorGemvFactory { - template - using functor_type = - TeamVectorGEMVOp; + template + using functor_type = TeamVectorGEMVOp; // no Blocked implementation using algorithms = std::tuple; @@ -58,12 +51,9 @@ struct TeamVectorGemvFactory { } // namespace Test -#define TEST_TEAMVECTOR_CASE4(N, A, X, Y, SC) \ - TEST_CASE4(teamvector, TeamVectorGemvFactory, N, A, X, Y, SC) -#define TEST_TEAMVECTOR_CASE2(N, S, SC) \ - TEST_CASE2(teamvector, TeamVectorGemvFactory, N, S, SC) -#define TEST_TEAMVECTOR_CASE(N, S) \ - TEST_CASE(teamvector, TeamVectorGemvFactory, N, S) +#define TEST_TEAMVECTOR_CASE4(N, A, X, Y, SC) TEST_CASE4(teamvector, TeamVectorGemvFactory, N, A, X, Y, SC) +#define TEST_TEAMVECTOR_CASE2(N, S, SC) TEST_CASE2(teamvector, TeamVectorGemvFactory, N, S, SC) +#define TEST_TEAMVECTOR_CASE(N, S) TEST_CASE(teamvector, TeamVectorGemvFactory, N, S) #ifdef KOKKOSKERNELS_TEST_FLOAT TEST_TEAMVECTOR_CASE(float, float) diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas3_gemm.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas3_gemm.hpp index cd91bc6d959b..d56886cf13f3 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas3_gemm.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas3_gemm.hpp @@ -23,8 +23,7 @@ namespace Test { -template +template struct gemm_VanillaGEMM { bool A_t, B_t, A_c, B_c; int N, K; @@ -41,12 +40,9 @@ struct gemm_VanillaGEMM { ScalarC beta; KOKKOS_INLINE_FUNCTION - void operator()( - const typename Kokkos::TeamPolicy::member_type& team) - const { + void operator()(const typename Kokkos::TeamPolicy::member_type& team) const { // GNU COMPILER BUG WORKAROUND -#if defined(KOKKOS_COMPILER_GNU) && !defined(__CUDA_ARCH__) && \ - !defined(__HIP_DEVICE_COMPILE__) +#if defined(KOKKOS_COMPILER_GNU) && !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) int i = team.league_rank(); #else const int i = team.league_rank(); @@ -77,10 +73,8 @@ struct gemm_VanillaGEMM { }; template -void build_matrices(const int M, const int N, const int K, - const typename ViewTypeA::value_type alpha, ViewTypeA& A, - ViewTypeB& B, const typename ViewTypeA::value_type beta, - ViewTypeC& C, ViewTypeC& Cref) { +void build_matrices(const int M, const int N, const int K, const typename ViewTypeA::value_type alpha, ViewTypeA& A, + ViewTypeB& B, const typename ViewTypeA::value_type beta, ViewTypeC& C, ViewTypeC& Cref) { using execution_space = typename TestDevice::execution_space; using ScalarA = typename ViewTypeA::non_const_value_type; using ScalarB = typename ViewTypeB::non_const_value_type; @@ -93,28 +87,22 @@ void build_matrices(const int M, const int N, const int K, // (SA 11 Dec 2019) Max (previously: 10) increased to detect the bug in // Trilinos issue #6418 - const uint64_t seed = - std::chrono::high_resolution_clock::now().time_since_epoch().count(); + const uint64_t seed = std::chrono::high_resolution_clock::now().time_since_epoch().count(); Kokkos::Random_XorShift64_Pool rand_pool(seed); - Kokkos::fill_random(A, rand_pool, - Kokkos::rand::generator_type, - ScalarA>::max()); - Kokkos::fill_random(B, rand_pool, - Kokkos::rand::generator_type, - ScalarB>::max()); - Kokkos::fill_random(C, rand_pool, - Kokkos::rand::generator_type, - ScalarC>::max()); + Kokkos::fill_random( + A, rand_pool, + Kokkos::rand::generator_type, ScalarA>::max()); + Kokkos::fill_random( + B, rand_pool, + Kokkos::rand::generator_type, ScalarB>::max()); + Kokkos::fill_random( + C, rand_pool, + Kokkos::rand::generator_type, ScalarC>::max()); Kokkos::deep_copy(Cref, C); Kokkos::fence(); - struct Test::gemm_VanillaGEMM - vgemm; + struct Test::gemm_VanillaGEMM vgemm; vgemm.A_t = false; vgemm.B_t = false; vgemm.A_c = false; @@ -127,12 +115,10 @@ void build_matrices(const int M, const int N, const int K, vgemm.alpha = alpha; vgemm.beta = beta; - Kokkos::parallel_for( - "KokkosBlas::Test::gemm_VanillaGEMM", - Kokkos::TeamPolicy( - M, Kokkos::AUTO, - KokkosKernels::Impl::kk_get_max_vector_size()), - vgemm); + Kokkos::parallel_for("KokkosBlas::Test::gemm_VanillaGEMM", + Kokkos::TeamPolicy( + M, Kokkos::AUTO, KokkosKernels::Impl::kk_get_max_vector_size()), + vgemm); Kokkos::fence(); } @@ -146,9 +132,7 @@ struct DiffGEMM { typedef typename APT::mag_type mag_type; KOKKOS_INLINE_FUNCTION - void operator()( - const typename Kokkos::TeamPolicy::member_type& team, - mag_type& diff) const { + void operator()(const typename Kokkos::TeamPolicy::member_type& team, mag_type& diff) const { const int i = team.league_rank(); mag_type diff_row = 0; Kokkos::parallel_reduce( @@ -166,8 +150,7 @@ struct DiffGEMM { }; template -void impl_test_gemm(const char* TA, const char* TB, int M, int N, int K, - typename ViewTypeA::value_type alpha, +void impl_test_gemm(const char* TA, const char* TB, int M, int N, int K, typename ViewTypeA::value_type alpha, typename ViewTypeC::value_type beta) { bool A_t = (TA[0] != 'N') && (TA[0] != 'n'); bool B_t = (TB[0] != 'N') && (TB[0] != 'n'); @@ -187,30 +170,25 @@ void impl_test_gemm(const char* TA, const char* TB, int M, int N, int K, ViewTypeC C("C", M, N); ViewTypeC C2("C", M, N); - const uint64_t seed = - std::chrono::high_resolution_clock::now().time_since_epoch().count(); + const uint64_t seed = std::chrono::high_resolution_clock::now().time_since_epoch().count(); Kokkos::Random_XorShift64_Pool rand_pool(seed); // (SA 11 Dec 2019) Max (previously: 10) increased to detect the bug in // Trilinos issue #6418 - Kokkos::fill_random(A, rand_pool, - Kokkos::rand::generator_type, - ScalarA>::max()); - Kokkos::fill_random(B, rand_pool, - Kokkos::rand::generator_type, - ScalarB>::max()); - Kokkos::fill_random(C, rand_pool, - Kokkos::rand::generator_type, - ScalarC>::max()); + Kokkos::fill_random( + A, rand_pool, + Kokkos::rand::generator_type, ScalarA>::max()); + Kokkos::fill_random( + B, rand_pool, + Kokkos::rand::generator_type, ScalarB>::max()); + Kokkos::fill_random( + C, rand_pool, + Kokkos::rand::generator_type, ScalarC>::max()); Kokkos::deep_copy(C2, C); Kokkos::fence(); - struct gemm_VanillaGEMM - vgemm; + struct gemm_VanillaGEMM vgemm; vgemm.A_t = A_t; vgemm.B_t = B_t; vgemm.A_c = A_c; @@ -223,12 +201,10 @@ void impl_test_gemm(const char* TA, const char* TB, int M, int N, int K, vgemm.alpha = alpha; vgemm.beta = beta; - Kokkos::parallel_for( - "KokkosBlas::Test::gemm_VanillaGEMM", - Kokkos::TeamPolicy( - M, Kokkos::AUTO, - KokkosKernels::Impl::kk_get_max_vector_size()), - vgemm); + Kokkos::parallel_for("KokkosBlas::Test::gemm_VanillaGEMM", + Kokkos::TeamPolicy( + M, Kokkos::AUTO, KokkosKernels::Impl::kk_get_max_vector_size()), + vgemm); KokkosBlas::gemm(TA, TB, alpha, A, B, beta, C); @@ -238,9 +214,8 @@ void impl_test_gemm(const char* TA, const char* TB, int M, int N, int K, diffgemm.C = C; diffgemm.C2 = C2; - Kokkos::parallel_reduce("KokkosBlas::Test::DiffGEMM", - Kokkos::TeamPolicy(M, Kokkos::AUTO), - diffgemm, diff_C); + Kokkos::parallel_reduce("KokkosBlas::Test::DiffGEMM", Kokkos::TeamPolicy(M, Kokkos::AUTO), diffgemm, + diff_C); if (N != 0 && M != 0) { int K_eff = (K == 0) ? 1 : K; @@ -258,8 +233,7 @@ void impl_test_gemm(const char* TA, const char* TB, int M, int N, int K, } template -void impl_test_stream_gemm_psge2(const int M, const int N, const int K, - const Scalar alpha, const Scalar beta) { +void impl_test_stream_gemm_psge2(const int M, const int N, const int K, const Scalar alpha, const Scalar beta) { using execution_space = typename Device::execution_space; using ViewTypeA = Kokkos::View; using ViewTypeB = Kokkos::View; @@ -279,8 +253,7 @@ void impl_test_stream_gemm_psge2(const int M, const int N, const int K, Test::build_matrices(M, N, K, alpha, A1, B1, beta, C1, C1ref); Test::build_matrices(N, M, K, alpha, A2, B2, beta, C2, C2ref); - auto instances = - Kokkos::Experimental::partition_space(execution_space(), 1, 1); + auto instances = Kokkos::Experimental::partition_space(execution_space(), 1, 1); KokkosBlas::gemm(instances[0], tA, tB, alpha, A1, B1, beta, C1); KokkosBlas::gemm(instances[1], tA, tB, alpha, A2, B2, beta, C2); Kokkos::fence(); @@ -291,12 +264,10 @@ void impl_test_stream_gemm_psge2(const int M, const int N, const int K, diffgemm1.C = C1; diffgemm1.C2 = C1ref; - Kokkos::parallel_reduce( - "KokkosBlas::Test::DiffGEMM1", - Kokkos::TeamPolicy( - M, Kokkos::AUTO, - KokkosKernels::Impl::kk_get_max_vector_size()), - diffgemm1, diff_C1); + Kokkos::parallel_reduce("KokkosBlas::Test::DiffGEMM1", + Kokkos::TeamPolicy( + M, Kokkos::AUTO, KokkosKernels::Impl::kk_get_max_vector_size()), + diffgemm1, diff_C1); mag_type diff_C2 = 0; struct Test::DiffGEMM diffgemm2; @@ -304,12 +275,10 @@ void impl_test_stream_gemm_psge2(const int M, const int N, const int K, diffgemm2.C = C2; diffgemm2.C2 = C2ref; - Kokkos::parallel_reduce( - "KokkosBlas::Test::DiffGEMM2", - Kokkos::TeamPolicy( - N, Kokkos::AUTO, - KokkosKernels::Impl::kk_get_max_vector_size()), - diffgemm2, diff_C2); + Kokkos::parallel_reduce("KokkosBlas::Test::DiffGEMM2", + Kokkos::TeamPolicy( + N, Kokkos::AUTO, KokkosKernels::Impl::kk_get_max_vector_size()), + diffgemm2, diff_C2); Kokkos::fence(); if (N != 0 && M != 0) { @@ -317,8 +286,7 @@ void impl_test_stream_gemm_psge2(const int M, const int N, const int K, // Expected Result: Random Walk in the least significant bit (i.e. ~ // sqrt(K)*eps eps scales with the total sum and has a factor in it for the // accuracy of the operations -> eps = K * 75 * machine_eps * 7 - const double diff_C_expected = - 1.0 * sqrt(K_eff) * K_eff * 75 * machine_eps * 7; + const double diff_C_expected = 1.0 * sqrt(K_eff) * K_eff * 75 * machine_eps * 7; const double diff_C1_average = diff_C1 / (N * M); if ((diff_C1_average >= 1.05 * diff_C_expected)) { @@ -342,55 +310,45 @@ void test_gemm() { typedef Kokkos::View view_type_b; typedef Kokkos::View view_type_c; std::vector modes = {"N", "T"}; - if (std::is_same>::value || - std::is_same>::value) + if (std::is_same>::value || std::is_same>::value) modes.push_back("C"); Scalar alpha = 4.5; std::vector betas = {0.0, 3.0}; for (Scalar beta : betas) { for (auto amode : modes) { for (auto bmode : modes) { - Test::impl_test_gemm( - amode, bmode, 0, 0, 0, alpha, beta); + Test::impl_test_gemm(amode, bmode, 0, 0, 0, alpha, beta); // BMK: N = 1 exercises the special GEMV code path in GEMM (currently, // only for modes N/N) - Test::impl_test_gemm( - amode, bmode, 50, 1, 40, alpha, beta); + Test::impl_test_gemm(amode, bmode, 50, 1, 40, alpha, beta); // LBV: K = 0 exercise the quick return code path in GEMM - Test::impl_test_gemm( - amode, bmode, 20, 14, 0, alpha, beta); - Test::impl_test_gemm( - amode, bmode, 13, 15, 17, alpha, beta); - Test::impl_test_gemm( - amode, bmode, 179, 15, 211, alpha, beta); - Test::impl_test_gemm( - amode, bmode, 12, 3071, 517, alpha, beta); + Test::impl_test_gemm(amode, bmode, 20, 14, 0, alpha, beta); + Test::impl_test_gemm(amode, bmode, 13, 15, 17, alpha, beta); + Test::impl_test_gemm(amode, bmode, 179, 15, 211, alpha, + beta); + Test::impl_test_gemm(amode, bmode, 12, 3071, 517, alpha, + beta); } } } auto pool_size = execution_space().concurrency(); if (pool_size >= 2) { - Test::impl_test_stream_gemm_psge2( - 53, 42, 17, 4.5, - 3.0); // General code path - Test::impl_test_stream_gemm_psge2( - 13, 1, 17, 4.5, 3.0); // gemv based gemm code path - Test::impl_test_stream_gemm_psge2( - 7, 13, 17, 4.5, - 3.0); // dot based gemm code path + Test::impl_test_stream_gemm_psge2(53, 42, 17, 4.5, + 3.0); // General code path + Test::impl_test_stream_gemm_psge2(13, 1, 17, 4.5, 3.0); // gemv based gemm code path + Test::impl_test_stream_gemm_psge2(7, 13, 17, 4.5, + 3.0); // dot based gemm code path } } template void test_gemm_enabled_layouts() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) test_gemm(); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) test_gemm(); #endif } @@ -416,8 +374,7 @@ void test_gemm_mixed_scalars() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gemm_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::gemm_float"); test_gemm_enabled_layouts(); @@ -426,8 +383,7 @@ TEST_F(TestCategory, gemm_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gemm_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::gemm_double"); test_gemm_enabled_layouts(); @@ -436,8 +392,7 @@ TEST_F(TestCategory, gemm_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gemm_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::gemm_complex_double"); test_gemm_enabled_layouts>(); @@ -446,8 +401,7 @@ TEST_F(TestCategory, gemm_complex_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gemm_complex_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::gemm_complex_float"); test_gemm_enabled_layouts>(); @@ -455,21 +409,17 @@ TEST_F(TestCategory, gemm_complex_float) { } #endif -#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) && \ - !defined(KOKKOSKERNELS_ETI_ONLY) +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) && !defined(KOKKOSKERNELS_ETI_ONLY) TEST_F(TestCategory, gemm_mixed_scalars_complex_double_double) { - Kokkos::Profiling::pushRegion( - "KokkosBlas::Test::gemm_mixed_complex_double_double"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::gemm_mixed_complex_double_double"); test_gemm_mixed_scalars, double>(); Kokkos::Profiling::popRegion(); } #endif -#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) && \ - !defined(KOKKOSKERNELS_ETI_ONLY) +#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) && !defined(KOKKOSKERNELS_ETI_ONLY) TEST_F(TestCategory, gemm_mixed_scalar_complex_float_float) { - Kokkos::Profiling::pushRegion( - "KokkosBlas::Test::gemm_mixed_complex_float_float"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::gemm_mixed_complex_float_float"); test_gemm_mixed_scalars, float>(); Kokkos::Profiling::popRegion(); } diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas3_trmm.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas3_trmm.hpp index a186835aaa20..d5ba622969fd 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas3_trmm.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas3_trmm.hpp @@ -44,8 +44,7 @@ struct NonUnitDiagTRMM { void operator()(const int& i) const { A_(i, i) = A_(i, i) + 10; } }; -template +template struct trmm_VanillaGEMM { bool A_t, B_t, A_c, B_c; int N, K; @@ -62,12 +61,9 @@ struct trmm_VanillaGEMM { ScalarC beta; KOKKOS_INLINE_FUNCTION - void operator()( - const typename Kokkos::TeamPolicy::member_type& team) - const { + void operator()(const typename Kokkos::TeamPolicy::member_type& team) const { // GNU COMPILER BUG WORKAROUND -#if defined(KOKKOS_COMPILER_GNU) && !defined(__CUDA_ARCH__) && \ - !defined(__HIP_DEVICE_COMPILE__) +#if defined(KOKKOS_COMPILER_GNU) && !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) int i = team.league_rank(); #else const int i = team.league_rank(); @@ -98,8 +94,8 @@ struct trmm_VanillaGEMM { }; template -void impl_test_trmm(const char* side, const char* uplo, const char* trans, - const char* diag, int M, int N, Scalar alpha) { +void impl_test_trmm(const char* side, const char* uplo, const char* trans, const char* diag, int M, int N, + Scalar alpha) { using execution_space = typename ViewTypeA::device_type::execution_space; using ScalarA = typename ViewTypeA::value_type; using APT = Kokkos::ArithTraits; @@ -112,45 +108,35 @@ void impl_test_trmm(const char* side, const char* uplo, const char* trans, ViewTypeA A("A", K, K); ViewTypeB B("B", M, N); ViewTypeB B_expected("B_expected", M, N); - uint64_t seed = - std::chrono::high_resolution_clock::now().time_since_epoch().count(); - ScalarA beta = ScalarA(0); + uint64_t seed = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + ScalarA beta = ScalarA(0); // printf("KokkosBlas::trmm test for alpha %g, %c %c %c %c, M %d, N %d, eps // %g, ViewType: %s\n", // Kokkos::ArithTraits::real(alpha),side[0],uplo[0],trans[0],diag[0],M,N,eps,typeid(ViewTypeA).name()); - typename ViewTypeA::HostMirror host_A = Kokkos::create_mirror_view(A); - typename ViewTypeB::HostMirror host_B_actual = Kokkos::create_mirror_view(B); - typename ViewTypeB::HostMirror host_B_expected = - Kokkos::create_mirror_view(B_expected); + typename ViewTypeA::HostMirror host_A = Kokkos::create_mirror_view(A); + typename ViewTypeB::HostMirror host_B_actual = Kokkos::create_mirror_view(B); + typename ViewTypeB::HostMirror host_B_expected = Kokkos::create_mirror_view(B_expected); Kokkos::Random_XorShift64_Pool rand_pool(seed); if ((diag[0] == 'U') || (diag[0] == 'u')) { // Initialize A with deterministic random numbers - Kokkos::fill_random(A, rand_pool, - Kokkos::rand, - ScalarA>::max()); + Kokkos::fill_random(A, rand_pool, Kokkos::rand, ScalarA>::max()); using functor_type = UnitDiagTRMM; functor_type udtrmm(A); // Initialize As diag with 1s - Kokkos::parallel_for("KokkosBlas::Test::UnitDiagTRMM", - Kokkos::RangePolicy(0, K), udtrmm); + Kokkos::parallel_for("KokkosBlas::Test::UnitDiagTRMM", Kokkos::RangePolicy(0, K), udtrmm); } else { //(diag[0]=='N')||(diag[0]=='n') // Initialize A with random numbers - Kokkos::fill_random(A, rand_pool, - Kokkos::rand, - ScalarA>::max()); + Kokkos::fill_random(A, rand_pool, Kokkos::rand, ScalarA>::max()); using functor_type = NonUnitDiagTRMM; functor_type nudtrmm(A); // Initialize As diag with A(i,i)+10 - Kokkos::parallel_for("KokkosBlas::Test::NonUnitDiagTRMM", - Kokkos::RangePolicy(0, K), nudtrmm); + Kokkos::parallel_for("KokkosBlas::Test::NonUnitDiagTRMM", Kokkos::RangePolicy(0, K), nudtrmm); } - Kokkos::fill_random( - B, rand_pool, - Kokkos::rand, ScalarA>::max()); + Kokkos::fill_random(B, rand_pool, Kokkos::rand, ScalarA>::max()); Kokkos::deep_copy(host_A, A); // Make host_A a lower triangle @@ -164,8 +150,7 @@ void impl_test_trmm(const char* side, const char* uplo, const char* trans, } Kokkos::deep_copy(A, host_A); - struct trmm_VanillaGEMM - vgemm; + struct trmm_VanillaGEMM vgemm; if (A_l) { // B_expected = alpha * op(A) * B + beta * C = 1 * op(A) * B + 0 * C vgemm.A_t = (trans[0] != 'N') && (trans[0] != 'n'); @@ -188,12 +173,10 @@ void impl_test_trmm(const char* side, const char* uplo, const char* trans, vgemm.C = B_expected; // out vgemm.alpha = alpha; vgemm.beta = beta; - Kokkos::parallel_for( - "KokkosBlas::Test::trmm_VanillaGEMM", - Kokkos::TeamPolicy( - M, Kokkos::AUTO, - KokkosKernels::Impl::kk_get_max_vector_size()), - vgemm); + Kokkos::parallel_for("KokkosBlas::Test::trmm_VanillaGEMM", + Kokkos::TeamPolicy( + M, Kokkos::AUTO, KokkosKernels::Impl::kk_get_max_vector_size()), + vgemm); Kokkos::fence(); Kokkos::deep_copy(host_B_expected, B_expected); @@ -221,41 +204,38 @@ void impl_test_trmm(const char* side, const char* uplo, const char* trans, template int test_trmm(const char* mode, ScalarA alpha) { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) using view_type_a_ll = Kokkos::View; using view_type_b_ll = Kokkos::View; - Test::impl_test_trmm( - &mode[0], &mode[1], &mode[2], &mode[3], 0, 0, alpha); - Test::impl_test_trmm( - &mode[0], &mode[1], &mode[2], &mode[3], 101, 19, alpha); - Test::impl_test_trmm( - &mode[0], &mode[1], &mode[2], &mode[3], 19, 101, alpha); - Test::impl_test_trmm( - &mode[0], &mode[1], &mode[2], &mode[3], 12, 731, alpha); + Test::impl_test_trmm(&mode[0], &mode[1], &mode[2], &mode[3], 0, 0, + alpha); + Test::impl_test_trmm(&mode[0], &mode[1], &mode[2], &mode[3], 101, 19, + alpha); + Test::impl_test_trmm(&mode[0], &mode[1], &mode[2], &mode[3], 19, 101, + alpha); + Test::impl_test_trmm(&mode[0], &mode[1], &mode[2], &mode[3], 12, 731, + alpha); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) using view_type_a_lr = Kokkos::View; using view_type_b_lr = Kokkos::View; - Test::impl_test_trmm( - &mode[0], &mode[1], &mode[2], &mode[3], 0, 0, alpha); - Test::impl_test_trmm( - &mode[0], &mode[1], &mode[2], &mode[3], 101, 19, alpha); - Test::impl_test_trmm( - &mode[0], &mode[1], &mode[2], &mode[3], 19, 101, alpha); - Test::impl_test_trmm( - &mode[0], &mode[1], &mode[2], &mode[3], 12, 731, alpha); + Test::impl_test_trmm(&mode[0], &mode[1], &mode[2], &mode[3], 0, 0, + alpha); + Test::impl_test_trmm(&mode[0], &mode[1], &mode[2], &mode[3], 101, 19, + alpha); + Test::impl_test_trmm(&mode[0], &mode[1], &mode[2], &mode[3], 19, 101, + alpha); + Test::impl_test_trmm(&mode[0], &mode[1], &mode[2], &mode[3], 12, 731, + alpha); #endif return 1; } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trmm_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_float"); float alpha = 1.0f; @@ -300,8 +280,7 @@ TEST_F(TestCategory, trmm_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trmm_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_double"); double alpha = 1.0; @@ -346,399 +325,333 @@ TEST_F(TestCategory, trmm_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) ///////////////// alpha 1.0 ///////////////// TEST_F(TestCategory, trmm_complex_double_LLNN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LLNN"); - test_trmm, Kokkos::complex, TestDevice>( - "LLNN", 1.0); + test_trmm, Kokkos::complex, TestDevice>("LLNN", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_LLNU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LLNU"); - test_trmm, Kokkos::complex, TestDevice>( - "LLNU", 1.0); + test_trmm, Kokkos::complex, TestDevice>("LLNU", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_LLCN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LLCN"); - test_trmm, Kokkos::complex, TestDevice>( - "LLCN", 1.0); + test_trmm, Kokkos::complex, TestDevice>("LLCN", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_LLCU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LLCU"); - test_trmm, Kokkos::complex, TestDevice>( - "LLCU", 1.0); + test_trmm, Kokkos::complex, TestDevice>("LLCU", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_LUNN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LUNN"); - test_trmm, Kokkos::complex, TestDevice>( - "LUNN", 1.0); + test_trmm, Kokkos::complex, TestDevice>("LUNN", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_LUNU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LUNU"); - test_trmm, Kokkos::complex, TestDevice>( - "LUNU", 1.0); + test_trmm, Kokkos::complex, TestDevice>("LUNU", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_LUCN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LUCN"); - test_trmm, Kokkos::complex, TestDevice>( - "LUCN", 1.0); + test_trmm, Kokkos::complex, TestDevice>("LUCN", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_LUCU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LUCU"); - test_trmm, Kokkos::complex, TestDevice>( - "LUCU", 1.0); + test_trmm, Kokkos::complex, TestDevice>("LUCU", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RLNN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RLNN"); - test_trmm, Kokkos::complex, TestDevice>( - "RLNN", 1.0); + test_trmm, Kokkos::complex, TestDevice>("RLNN", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RLNU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RLNU"); - test_trmm, Kokkos::complex, TestDevice>( - "RLNU", 1.0); + test_trmm, Kokkos::complex, TestDevice>("RLNU", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RLCN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RLCN"); - test_trmm, Kokkos::complex, TestDevice>( - "RLCN", 1.0); + test_trmm, Kokkos::complex, TestDevice>("RLCN", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RLCU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RLCU"); - test_trmm, Kokkos::complex, TestDevice>( - "RLCU", 1.0); + test_trmm, Kokkos::complex, TestDevice>("RLCU", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RUNN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RUNN"); - test_trmm, Kokkos::complex, TestDevice>( - "RUNN", 1.0); + test_trmm, Kokkos::complex, TestDevice>("RUNN", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RUNU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RUNU"); - test_trmm, Kokkos::complex, TestDevice>( - "RUNU", 1.0); + test_trmm, Kokkos::complex, TestDevice>("RUNU", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RUCN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RUCN"); - test_trmm, Kokkos::complex, TestDevice>( - "RUCN", 1.0); + test_trmm, Kokkos::complex, TestDevice>("RUCN", 1.0); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RUCU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RUCU"); - test_trmm, Kokkos::complex, TestDevice>( - "RUCU", 1.0); + test_trmm, Kokkos::complex, TestDevice>("RUCU", 1.0); Kokkos::Profiling::popRegion(); } ///////////////// alpha 4.5 ///////////////// TEST_F(TestCategory, trmm_complex_double_LLNN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LLNN"); - test_trmm, Kokkos::complex, TestDevice>( - "LLNN", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("LLNN", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_LLNU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LLNU"); - test_trmm, Kokkos::complex, TestDevice>( - "LLNU", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("LLNU", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_LLCN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LLCN"); - test_trmm, Kokkos::complex, TestDevice>( - "LLCN", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("LLCN", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_LLCU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LLCU"); - test_trmm, Kokkos::complex, TestDevice>( - "LLCU", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("LLCU", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_LUNN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LUNN"); - test_trmm, Kokkos::complex, TestDevice>( - "LUNN", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("LUNN", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_LUNU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LUNU"); - test_trmm, Kokkos::complex, TestDevice>( - "LUNU", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("LUNU", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_LUCN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LUCN"); - test_trmm, Kokkos::complex, TestDevice>( - "LUCN", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("LUCN", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_LUCU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_LUCU"); - test_trmm, Kokkos::complex, TestDevice>( - "LUCU", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("LUCU", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RLNN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RLNN"); - test_trmm, Kokkos::complex, TestDevice>( - "RLNN", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("RLNN", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RLNU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RLNU"); - test_trmm, Kokkos::complex, TestDevice>( - "RLNU", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("RLNU", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RLCN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RLCN"); - test_trmm, Kokkos::complex, TestDevice>( - "RLCN", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("RLCN", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RLCU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RLCU"); - test_trmm, Kokkos::complex, TestDevice>( - "RLCU", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("RLCU", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RUNN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RUNN"); - test_trmm, Kokkos::complex, TestDevice>( - "RUNN", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("RUNN", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RUNU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RUNU"); - test_trmm, Kokkos::complex, TestDevice>( - "RUNU", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("RUNU", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RUCN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RUCN"); - test_trmm, Kokkos::complex, TestDevice>( - "RUCN", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("RUCN", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_double_RUCU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_double_RUCU"); - test_trmm, Kokkos::complex, TestDevice>( - "RUCU", Kokkos::complex(4.5, 0.0)); + test_trmm, Kokkos::complex, TestDevice>("RUCU", Kokkos::complex(4.5, 0.0)); Kokkos::Profiling::popRegion(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) ///////////////// alpha 1.0 ///////////////// TEST_F(TestCategory, trmm_complex_float_LLNN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LLNN"); - test_trmm, Kokkos::complex, TestDevice>("LLNN", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("LLNN", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_LLNU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LLNU"); - test_trmm, Kokkos::complex, TestDevice>("LLNU", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("LLNU", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_LLCN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LLCN"); - test_trmm, Kokkos::complex, TestDevice>("LLCN", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("LLCN", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_LLCU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LLCU"); - test_trmm, Kokkos::complex, TestDevice>("LLCU", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("LLCU", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_LUNN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LUNN"); - test_trmm, Kokkos::complex, TestDevice>("LUNN", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("LUNN", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_LUNU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LUNU"); - test_trmm, Kokkos::complex, TestDevice>("LUNU", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("LUNU", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_LUCN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LUCN"); - test_trmm, Kokkos::complex, TestDevice>("LUCN", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("LUCN", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_LUCU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LUCU"); - test_trmm, Kokkos::complex, TestDevice>("LUCU", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("LUCU", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RLNN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RLNN"); - test_trmm, Kokkos::complex, TestDevice>("RLNN", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("RLNN", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RLNU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RLNU"); - test_trmm, Kokkos::complex, TestDevice>("RLNU", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("RLNU", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RLCN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RLCN"); - test_trmm, Kokkos::complex, TestDevice>("RLCN", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("RLCN", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RLCU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RLCU"); - test_trmm, Kokkos::complex, TestDevice>("RLCU", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("RLCU", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RUNN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RUNN"); - test_trmm, Kokkos::complex, TestDevice>("RUNN", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("RUNN", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RUNU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RUNU"); - test_trmm, Kokkos::complex, TestDevice>("RUNU", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("RUNU", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RUCN_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RUCN"); - test_trmm, Kokkos::complex, TestDevice>("RUCN", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("RUCN", 1.0f); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RUCU_one) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RUCU"); - test_trmm, Kokkos::complex, TestDevice>("RUCU", - 1.0f); + test_trmm, Kokkos::complex, TestDevice>("RUCU", 1.0f); Kokkos::Profiling::popRegion(); } ///////////////// alpha 4.5 ///////////////// TEST_F(TestCategory, trmm_complex_float_LLNN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LLNN"); - test_trmm, Kokkos::complex, TestDevice>( - "LLNN", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("LLNN", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_LLNU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LLNU"); - test_trmm, Kokkos::complex, TestDevice>( - "LLNU", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("LLNU", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_LLCN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LLCN"); - test_trmm, Kokkos::complex, TestDevice>( - "LLCN", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("LLCN", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_LLCU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LLCU"); - test_trmm, Kokkos::complex, TestDevice>( - "LLCU", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("LLCU", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_LUNN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LUNN"); - test_trmm, Kokkos::complex, TestDevice>( - "LUNN", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("LUNN", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_LUNU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LUNU"); - test_trmm, Kokkos::complex, TestDevice>( - "LUNU", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("LUNU", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_LUCN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LUCN"); - test_trmm, Kokkos::complex, TestDevice>( - "LUCN", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("LUCN", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_LUCU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_LUCU"); - test_trmm, Kokkos::complex, TestDevice>( - "LUCU", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("LUCU", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RLNN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RLNN"); - test_trmm, Kokkos::complex, TestDevice>( - "RLNN", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("RLNN", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RLNU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RLNU"); - test_trmm, Kokkos::complex, TestDevice>( - "RLNU", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("RLNU", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RLCN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RLCN"); - test_trmm, Kokkos::complex, TestDevice>( - "RLCN", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("RLCN", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RLCU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RLCU"); - test_trmm, Kokkos::complex, TestDevice>( - "RLCU", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("RLCU", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RUNN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RUNN"); - test_trmm, Kokkos::complex, TestDevice>( - "RUNN", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("RUNN", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RUNU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RUNU"); - test_trmm, Kokkos::complex, TestDevice>( - "RUNU", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("RUNU", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RUCN_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RUCN"); - test_trmm, Kokkos::complex, TestDevice>( - "RUCN", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("RUCN", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, trmm_complex_float_RUCU_fourfive) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trmm_complex_float_RUCU"); - test_trmm, Kokkos::complex, TestDevice>( - "RUCU", Kokkos::complex(4.5f, 0.0f)); + test_trmm, Kokkos::complex, TestDevice>("RUCU", Kokkos::complex(4.5f, 0.0f)); Kokkos::Profiling::popRegion(); } #endif diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas3_trsm.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas3_trsm.hpp index 9a00f22263c3..81fdad892993 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas3_trsm.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas3_trsm.hpp @@ -44,8 +44,7 @@ struct NonUnitDiagTRSM { void operator()(const int& i) const { A_(i, i) = A_(i, i) + 10; } }; -template +template struct trsm_VanillaGEMM { bool A_t, B_t, A_c, B_c; int N, K; @@ -62,12 +61,9 @@ struct trsm_VanillaGEMM { ScalarC beta; KOKKOS_INLINE_FUNCTION - void operator()( - const typename Kokkos::TeamPolicy::member_type& team) - const { + void operator()(const typename Kokkos::TeamPolicy::member_type& team) const { // GNU COMPILER BUG WORKAROUND -#if defined(KOKKOS_COMPILER_GNU) && !defined(__CUDA_ARCH__) && \ - !defined(__HIP_DEVICE_COMPILE__) +#if defined(KOKKOS_COMPILER_GNU) && !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) int i = team.league_rank(); #else @@ -99,8 +95,7 @@ struct trsm_VanillaGEMM { }; template -void impl_test_trsm(const char* side, const char* uplo, const char* trans, - const char* diag, int M, int N, +void impl_test_trsm(const char* side, const char* uplo, const char* trans, const char* diag, int M, int N, typename ViewTypeA::value_type alpha) { using execution_space = typename ViewTypeA::device_type::execution_space; using ScalarA = typename ViewTypeA::value_type; @@ -123,31 +118,21 @@ void impl_test_trsm(const char* side, const char* uplo, const char* trans, typename ViewTypeB::HostMirror h_B = Kokkos::create_mirror_view(B); typename ViewTypeB::HostMirror h_X0 = Kokkos::create_mirror_view(X0); - uint64_t seed = - std::chrono::high_resolution_clock::now().time_since_epoch().count(); + uint64_t seed = std::chrono::high_resolution_clock::now().time_since_epoch().count(); Kokkos::Random_XorShift64_Pool rand_pool(seed); if ((diag[0] == 'U') || (diag[0] == 'u')) { - Kokkos::fill_random(A, rand_pool, - Kokkos::rand, - ScalarA>::max() * - 0.1); + Kokkos::fill_random(A, rand_pool, Kokkos::rand, ScalarA>::max() * 0.1); using functor_type = UnitDiagTRSM; functor_type udtrsm(A); - Kokkos::parallel_for("KokkosBlas::Test::UnitDiagTRSM", - Kokkos::RangePolicy(0, K), udtrsm); + Kokkos::parallel_for("KokkosBlas::Test::UnitDiagTRSM", Kokkos::RangePolicy(0, K), udtrsm); } else { //(diag[0]=='N')||(diag[0]=='n') - Kokkos::fill_random(A, rand_pool, - Kokkos::rand, - ScalarA>::max()); + Kokkos::fill_random(A, rand_pool, Kokkos::rand, ScalarA>::max()); using functor_type = NonUnitDiagTRSM; functor_type nudtrsm(A); - Kokkos::parallel_for("KokkosBlas::Test::NonUnitDiagTRSM", - Kokkos::RangePolicy(0, K), nudtrsm); + Kokkos::parallel_for("KokkosBlas::Test::NonUnitDiagTRSM", Kokkos::RangePolicy(0, K), nudtrsm); } - Kokkos::fill_random( - X0, rand_pool, - Kokkos::rand, ScalarA>::max()); + Kokkos::fill_random(X0, rand_pool, Kokkos::rand, ScalarA>::max()); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_X0, X0); @@ -165,8 +150,7 @@ void impl_test_trsm(const char* side, const char* uplo, const char* trans, Kokkos::deep_copy(A, h_A); - struct trsm_VanillaGEMM - vgemm; + struct trsm_VanillaGEMM vgemm; if (A_l) { vgemm.A_t = (trans[0] != 'N') && (trans[0] != 'n'); vgemm.B_t = false; @@ -187,12 +171,10 @@ void impl_test_trsm(const char* side, const char* uplo, const char* trans, vgemm.C = B; vgemm.alpha = alpha_trmm; vgemm.beta = beta; - Kokkos::parallel_for( - "KokkosBlas::Test::trsm_VanillaGEMM", - Kokkos::TeamPolicy( - M, Kokkos::AUTO, - KokkosKernels::Impl::kk_get_max_vector_size()), - vgemm); + Kokkos::parallel_for("KokkosBlas::Test::trsm_VanillaGEMM", + Kokkos::TeamPolicy( + M, Kokkos::AUTO, KokkosKernels::Impl::kk_get_max_vector_size()), + vgemm); Kokkos::fence(); KokkosBlas::trsm(side, uplo, trans, diag, alpha, A, B); @@ -223,41 +205,30 @@ void impl_test_trsm(const char* side, const char* uplo, const char* trans, template int test_trsm(const char* mode, ScalarA alpha) { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) using view_type_a_ll = Kokkos::View; using view_type_b_ll = Kokkos::View; - Test::impl_test_trsm( - &mode[0], &mode[1], &mode[2], &mode[3], 0, 0, alpha); - Test::impl_test_trsm( - &mode[0], &mode[1], &mode[2], &mode[3], 101, 19, alpha); - Test::impl_test_trsm( - &mode[0], &mode[1], &mode[2], &mode[3], 19, 101, alpha); - Test::impl_test_trsm( - &mode[0], &mode[1], &mode[2], &mode[3], 343, 201, alpha); + Test::impl_test_trsm(&mode[0], &mode[1], &mode[2], &mode[3], 0, 0, alpha); + Test::impl_test_trsm(&mode[0], &mode[1], &mode[2], &mode[3], 101, 19, alpha); + Test::impl_test_trsm(&mode[0], &mode[1], &mode[2], &mode[3], 19, 101, alpha); + Test::impl_test_trsm(&mode[0], &mode[1], &mode[2], &mode[3], 343, 201, alpha); #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) using view_type_a_lr = Kokkos::View; using view_type_b_lr = Kokkos::View; - Test::impl_test_trsm( - &mode[0], &mode[1], &mode[2], &mode[3], 0, 0, alpha); - Test::impl_test_trsm( - &mode[0], &mode[1], &mode[2], &mode[3], 101, 19, alpha); - Test::impl_test_trsm( - &mode[0], &mode[1], &mode[2], &mode[3], 19, 101, alpha); - Test::impl_test_trsm( - &mode[0], &mode[1], &mode[2], &mode[3], 343, 201, alpha); + Test::impl_test_trsm(&mode[0], &mode[1], &mode[2], &mode[3], 0, 0, alpha); + Test::impl_test_trsm(&mode[0], &mode[1], &mode[2], &mode[3], 101, 19, alpha); + Test::impl_test_trsm(&mode[0], &mode[1], &mode[2], &mode[3], 19, 101, alpha); + Test::impl_test_trsm(&mode[0], &mode[1], &mode[2], &mode[3], 343, 201, alpha); #endif return 1; } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trsm_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trsm_float"); float alpha = 1.0f; @@ -302,8 +273,7 @@ TEST_F(TestCategory, trsm_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trsm_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trsm_double"); double alpha = 1.0; @@ -348,157 +318,91 @@ TEST_F(TestCategory, trsm_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trsm_complex_double) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trsm_complex_double"); Kokkos::complex alpha = 1.0; - test_trsm, Kokkos::complex, TestDevice>( - "LLNN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "LLNU", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "LLCN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "LLCU", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "LUNN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "LUNU", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "LUCN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "LUCU", alpha); - - test_trsm, Kokkos::complex, TestDevice>( - "RLNN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "RLNU", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "RLCN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "RLCU", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "RUNN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "RUNU", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "RUCN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "RUCU", alpha); + test_trsm, Kokkos::complex, TestDevice>("LLNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LLNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("LLCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LLCU", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUCU", alpha); + + test_trsm, Kokkos::complex, TestDevice>("RLNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RLNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("RLCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RLCU", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUCU", alpha); alpha = Kokkos::complex(4.5, 0.0); - test_trsm, Kokkos::complex, TestDevice>( - "LLNN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "LLNU", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "LLCN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "LLCU", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "LUNN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "LUNU", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "LUCN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "LUCU", alpha); - - test_trsm, Kokkos::complex, TestDevice>( - "RLNN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "RLNU", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "RLCN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "RLCU", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "RUNN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "RUNU", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "RUCN", alpha); - test_trsm, Kokkos::complex, TestDevice>( - "RUCU", alpha); + test_trsm, Kokkos::complex, TestDevice>("LLNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LLNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("LLCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LLCU", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUCU", alpha); + + test_trsm, Kokkos::complex, TestDevice>("RLNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RLNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("RLCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RLCU", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUCU", alpha); Kokkos::Profiling::popRegion(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trsm_complex_float) { Kokkos::Profiling::pushRegion("KokkosBlas::Test::trsm_complex_float"); Kokkos::complex alpha = 1.0f; - test_trsm, Kokkos::complex, TestDevice>("LLNN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("LLNU", - alpha); - test_trsm, Kokkos::complex, TestDevice>("LLCN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("LLCU", - alpha); - test_trsm, Kokkos::complex, TestDevice>("LUNN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("LUNU", - alpha); - test_trsm, Kokkos::complex, TestDevice>("LUCN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("LUCU", - alpha); - - test_trsm, Kokkos::complex, TestDevice>("RLNN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("RLNU", - alpha); - test_trsm, Kokkos::complex, TestDevice>("RLCN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("RLCU", - alpha); - test_trsm, Kokkos::complex, TestDevice>("RUNN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("RUNU", - alpha); - test_trsm, Kokkos::complex, TestDevice>("RUCN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("RUCU", - alpha); + test_trsm, Kokkos::complex, TestDevice>("LLNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LLNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("LLCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LLCU", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUCU", alpha); + + test_trsm, Kokkos::complex, TestDevice>("RLNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RLNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("RLCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RLCU", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUCU", alpha); alpha = Kokkos::complex(4.5f, 0.0f); - test_trsm, Kokkos::complex, TestDevice>("LLNN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("LLNU", - alpha); - test_trsm, Kokkos::complex, TestDevice>("LLCN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("LLCU", - alpha); - test_trsm, Kokkos::complex, TestDevice>("LUNN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("LUNU", - alpha); - test_trsm, Kokkos::complex, TestDevice>("LUCN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("LUCU", - alpha); - - test_trsm, Kokkos::complex, TestDevice>("RLNN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("RLNU", - alpha); - test_trsm, Kokkos::complex, TestDevice>("RLCN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("RLCU", - alpha); - test_trsm, Kokkos::complex, TestDevice>("RUNN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("RUNU", - alpha); - test_trsm, Kokkos::complex, TestDevice>("RUCN", - alpha); - test_trsm, Kokkos::complex, TestDevice>("RUCU", - alpha); + test_trsm, Kokkos::complex, TestDevice>("LLNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LLNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("LLCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LLCU", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("LUCU", alpha); + + test_trsm, Kokkos::complex, TestDevice>("RLNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RLNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("RLCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RLCU", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUNN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUNU", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUCN", alpha); + test_trsm, Kokkos::complex, TestDevice>("RUCU", alpha); Kokkos::Profiling::popRegion(); } #endif diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas_Newton.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas_Newton.hpp index 5bb6946e999f..7b6d4a904900 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas_Newton.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas_Newton.hpp @@ -40,16 +40,13 @@ struct LogisticEquation { scalar_type dt; vec_type state; - LogisticEquation(const scalar_type dt_, vec_type initial_state) - : dt(dt_), state(initial_state) {} + LogisticEquation(const scalar_type dt_, vec_type initial_state) : dt(dt_), state(initial_state) {} KOKKOS_FUNCTION void residual(const vec_type& y, const vec_type& dydt) const { dydt(0) = y(0) - state(0) - dt * y(0) * (1 - y(0)); } - KOKKOS_FUNCTION void jacobian(const vec_type& y, const mat_type& jac) const { - jac(0, 0) = 1 - dt + 2 * dt * y(0); - } + KOKKOS_FUNCTION void jacobian(const vec_type& y, const mat_type& jac) const { jac(0, 0) = 1 - dt + 2 * dt * y(0); } KOKKOS_FUNCTION scalar_type expected_val(const scalar_type t) const { using Kokkos::exp; @@ -112,9 +109,7 @@ int test_logistic() { using norm_type = typename Kokkos::View; using handle_type = KokkosBlas::Impl::NewtonHandle; using system_type = LogisticEquation; - using newton_type = - KokkosBlas::Impl::NewtonFunctor; + using newton_type = KokkosBlas::Impl::NewtonFunctor; // Create the non-linear system and initialize data vec_type state("state", 1); @@ -150,9 +145,7 @@ int test_intersection() { using norm_type = typename Kokkos::View; using handle_type = KokkosBlas::Impl::NewtonHandle; using system_type = Intersection; - using newton_type = - KokkosBlas::Impl::NewtonFunctor; + using newton_type = KokkosBlas::Impl::NewtonFunctor; // Create the non-linear system and initialize data system_type intersection; diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas_rocblas.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas_rocblas.hpp index ed68b7a8b6f8..091fac725921 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas_rocblas.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas_rocblas.hpp @@ -58,8 +58,7 @@ void test_rocblas_safe_call() { // fails it throws an error with the // KOKKOS_ROCBLAS_SAFE_CALL_IMPL macro void test_rocblas_singleton() { - KokkosBlas::Impl::RocBlasSingleton& s = - KokkosBlas::Impl::RocBlasSingleton::singleton(); + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); (void)s; } diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas_serial_axpy.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas_serial_axpy.hpp index 427925a3dc32..cd58eba92012 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas_serial_axpy.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas_serial_axpy.hpp @@ -29,8 +29,7 @@ namespace Test { struct KokkosKernelAxpyTag {}; struct NaiveAxpyTag {}; -template +template struct Functor_TestBlasSerialAxpy { using execution_space = typename DeviceType::execution_space; ScalarType _alpha; @@ -38,8 +37,7 @@ struct Functor_TestBlasSerialAxpy { ViewType _y; KOKKOS_INLINE_FUNCTION - Functor_TestBlasSerialAxpy(const ScalarType alpha, const ViewType &x, - const ViewType &y) + Functor_TestBlasSerialAxpy(const ScalarType alpha, const ViewType &x, const ViewType &y) : _alpha(alpha), _x(x), _y(y) {} KOKKOS_INLINE_FUNCTION @@ -62,15 +60,11 @@ struct Functor_TestBlasSerialAxpy { using value_type = typename ViewType::value_type; std::string name_region("KokkosBlas::Test::SerialAxpy"); const std::string name_value_type = Test::value_type_name(); - std::string name_work_tag = - (std::is_same::value - ? "::KokkosBlas" - : std::is_same::value - ? "::Naive" - : "::UnknownWorkTag"); - std::string name_test_id = "Axpy"; - std::string name = - name_region + name_value_type + name_work_tag + name_test_id; + std::string name_work_tag = (std::is_same::value ? "::KokkosBlas" + : std::is_same::value ? "::Naive" + : "::UnknownWorkTag"); + std::string name_test_id = "Axpy"; + std::string name = name_region + name_value_type + name_work_tag + name_test_id; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _x.extent(0)); Kokkos::parallel_for(name.c_str(), policy, *this); @@ -91,20 +85,15 @@ void impl_test_blas_serial_axpy(const int N, const int BlkSize) { ViewType Y("Y", N, BlkSize, BlkSize); ViewType Yref("Yref", N, BlkSize, BlkSize); - Kokkos::Random_XorShift64_Pool random( - 13718); + Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(X, random, ats::one()); Kokkos::fill_random(Y, random, ats::one()); Kokkos::fence(); Kokkos::deep_copy(Yref, Y); /// test body - Functor_TestBlasSerialAxpy( - alpha, X, Yref) - .run(); - Functor_TestBlasSerialAxpy(alpha, X, Y) - .run(); + Functor_TestBlasSerialAxpy(alpha, X, Yref).run(); + Functor_TestBlasSerialAxpy(alpha, X, Y).run(); Kokkos::fence(); @@ -116,12 +105,10 @@ void impl_test_blas_serial_axpy(const int N, const int BlkSize) { Kokkos::deep_copy(Yref_host, Yref); /// check a = b - typename ats::mag_type eps = - 100 * std::numeric_limits::epsilon(); + typename ats::mag_type eps = 100 * std::numeric_limits::epsilon(); for (int k = 0; k < N; ++k) for (int i = 0; i < BlkSize; ++i) - for (int j = 0; j < BlkSize; ++j) - EXPECT_NEAR_KK(Y_host(k, i, j), Yref_host(k, i, j), eps); + for (int j = 0; j < BlkSize; ++j) EXPECT_NEAR_KK(Y_host(k, i, j), Yref_host(k, i, j), eps); } } // namespace Test @@ -130,24 +117,20 @@ template int test_blas_serial_axpy() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { - typedef Kokkos::View - ViewType; + typedef Kokkos::View ViewType; Test::impl_test_blas_serial_axpy(0, 10); Test::impl_test_blas_serial_axpy(10, 15); Test::impl_test_blas_serial_axpy(1024, 9); - Test::impl_test_blas_serial_axpy(132231, - 3); + Test::impl_test_blas_serial_axpy(132231, 3); } #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - typedef Kokkos::View - ViewType; + typedef Kokkos::View ViewType; Test::impl_test_blas_serial_axpy(0, 10); Test::impl_test_blas_serial_axpy(10, 15); Test::impl_test_blas_serial_axpy(1024, 9); - Test::impl_test_blas_serial_axpy(132231, - 3); + Test::impl_test_blas_serial_axpy(132231, 3); } #endif @@ -155,21 +138,16 @@ int test_blas_serial_axpy() { } #if defined(KOKKOSKERNELS_INST_FLOAT) -TEST_F(TestCategory, serial_axpy_float_float) { - test_blas_serial_axpy(); -} +TEST_F(TestCategory, serial_axpy_float_float) { test_blas_serial_axpy(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) -TEST_F(TestCategory, serial_axpy_double_double) { - test_blas_serial_axpy(); -} +TEST_F(TestCategory, serial_axpy_double_double) { test_blas_serial_axpy(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) TEST_F(TestCategory, serial_axpy_dcomplex_dcomplex) { - test_blas_serial_axpy, - Kokkos::complex >(); + test_blas_serial_axpy, Kokkos::complex >(); } TEST_F(TestCategory, serial_axpy_dcomplex_double) { @@ -179,13 +157,10 @@ TEST_F(TestCategory, serial_axpy_dcomplex_double) { #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) TEST_F(TestCategory, serial_axpy_fcomplex_fcomplex) { - test_blas_serial_axpy, - Kokkos::complex >(); + test_blas_serial_axpy, Kokkos::complex >(); } -TEST_F(TestCategory, serial_axpy_fcomplex_float) { - test_blas_serial_axpy, float>(); -} +TEST_F(TestCategory, serial_axpy_fcomplex_float) { test_blas_serial_axpy, float>(); } #endif #endif // TEST_BLAS_SERIAL_AXPY_HPP_ diff --git a/packages/kokkos-kernels/blas/unit_test/Test_Blas_serial_nrm2.hpp b/packages/kokkos-kernels/blas/unit_test/Test_Blas_serial_nrm2.hpp index 147df523533d..bca8afa1f307 100644 --- a/packages/kokkos-kernels/blas/unit_test/Test_Blas_serial_nrm2.hpp +++ b/packages/kokkos-kernels/blas/unit_test/Test_Blas_serial_nrm2.hpp @@ -38,8 +38,7 @@ struct Functor_TestBlasSerialNrm2 { norm_view_type _nrm; KOKKOS_INLINE_FUNCTION - Functor_TestBlasSerialNrm2(const ViewType &x, const norm_view_type &nrm) - : _x(x), _nrm(nrm) {} + Functor_TestBlasSerialNrm2(const ViewType &x, const norm_view_type &nrm) : _x(x), _nrm(nrm) {} KOKKOS_INLINE_FUNCTION void operator()(const KokkosKernelTag &, const int i) const { @@ -61,14 +60,11 @@ struct Functor_TestBlasSerialNrm2 { inline void run() { std::string name_region("KokkosBlas::Test::SerialNrm2"); const std::string name_value_type = Test::value_type_name(); - std::string name_work_tag = - (std::is_same::value - ? "::KokkosBlas" - : std::is_same::value ? "::Naive" - : "::UnknownWorkTag"); - std::string name_test_id = "Nrm2"; - std::string name = - name_region + name_value_type + name_work_tag + name_test_id; + std::string name_work_tag = (std::is_same::value ? "::KokkosBlas" + : std::is_same::value ? "::Naive" + : "::UnknownWorkTag"); + std::string name_test_id = "Nrm2"; + std::string name = name_region + name_value_type + name_work_tag + name_test_id; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _x.extent(0)); Kokkos::parallel_for(name.c_str(), policy, *this); @@ -89,8 +85,7 @@ struct Functor_TestBlasSerialNrm2MV { norm_view_type _nrm; KOKKOS_INLINE_FUNCTION - Functor_TestBlasSerialNrm2MV(const ViewType &x, const norm_view_type &nrm) - : _x(x), _nrm(nrm) {} + Functor_TestBlasSerialNrm2MV(const ViewType &x, const norm_view_type &nrm) : _x(x), _nrm(nrm) {} KOKKOS_INLINE_FUNCTION void operator()(const KokkosKernelTag &, const int i) const { @@ -116,14 +111,11 @@ struct Functor_TestBlasSerialNrm2MV { inline void run() { std::string name_region("KokkosBlas::Test::SerialNrm2MV"); const std::string name_value_type = Test::value_type_name(); - std::string name_work_tag = - (std::is_same::value - ? "::KokkosBlas" - : std::is_same::value ? "::Naive" - : "::UnknownWorkTag"); - std::string name_test_id = "Nrm2"; - std::string name = - name_region + name_value_type + name_work_tag + name_test_id; + std::string name_work_tag = (std::is_same::value ? "::KokkosBlas" + : std::is_same::value ? "::Naive" + : "::UnknownWorkTag"); + std::string name_test_id = "Nrm2"; + std::string name = name_region + name_value_type + name_work_tag + name_test_id; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _x.extent(0)); Kokkos::parallel_for(name.c_str(), policy, *this); @@ -153,31 +145,24 @@ void impl_test_blas_serial_nrm2(const int N, const int BlkSize) { /// test body Functor_TestBlasSerialNrm2(X, norms).run(); - Functor_TestBlasSerialNrm2(X, - norms_ref) - .run(); + Functor_TestBlasSerialNrm2(X, norms_ref).run(); Kokkos::fence(); /// for comparison send it to host - typename norm_view_type::HostMirror norms_host = - Kokkos::create_mirror_view(norms); - typename norm_view_type::HostMirror norms_ref_host = - Kokkos::create_mirror_view(norms_ref); + typename norm_view_type::HostMirror norms_host = Kokkos::create_mirror_view(norms); + typename norm_view_type::HostMirror norms_ref_host = Kokkos::create_mirror_view(norms_ref); Kokkos::deep_copy(norms_host, norms); Kokkos::deep_copy(norms_ref_host, norms_ref); /// check a = b - typename ats::mag_type eps = - 100 * std::numeric_limits::epsilon(); - for (int k = 0; k < N; ++k) - EXPECT_NEAR_KK(norms_host(k), norms_ref_host(k), eps); + typename ats::mag_type eps = 100 * std::numeric_limits::epsilon(); + for (int k = 0; k < N; ++k) EXPECT_NEAR_KK(norms_host(k), norms_ref_host(k), eps); } template -void impl_test_blas_serial_nrm2mv(const int N, const int vecLength, - const int numVecs) { +void impl_test_blas_serial_nrm2mv(const int N, const int vecLength, const int numVecs) { /// typedefs using execution_space = typename DeviceType::execution_space; using value_type = typename ViewType::non_const_value_type; @@ -197,24 +182,19 @@ void impl_test_blas_serial_nrm2mv(const int N, const int vecLength, /// test body Functor_TestBlasSerialNrm2MV(X, norms).run(); - Functor_TestBlasSerialNrm2MV(X, - norms_ref) - .run(); + Functor_TestBlasSerialNrm2MV(X, norms_ref).run(); Kokkos::fence(); /// for comparison send it to host - typename norm_view_type::HostMirror norms_host = - Kokkos::create_mirror_view(norms); - typename norm_view_type::HostMirror norms_ref_host = - Kokkos::create_mirror_view(norms_ref); + typename norm_view_type::HostMirror norms_host = Kokkos::create_mirror_view(norms); + typename norm_view_type::HostMirror norms_ref_host = Kokkos::create_mirror_view(norms_ref); Kokkos::deep_copy(norms_host, norms); Kokkos::deep_copy(norms_ref_host, norms_ref); /// check a = b - typename ats::mag_type eps = - 100 * std::numeric_limits::epsilon(); + typename ats::mag_type eps = 100 * std::numeric_limits::epsilon(); for (int k = 0; k < N; ++k) for (int vecIdx = 0; vecIdx < numVecs; ++vecIdx) EXPECT_NEAR_KK(norms_host(k, vecIdx), norms_ref_host(k, vecIdx), eps); @@ -232,8 +212,7 @@ int test_blas_serial_nrm2() { Test::impl_test_blas_serial_nrm2(1024, 9); Test::impl_test_blas_serial_nrm2(132231, 3); - using MVViewType = - Kokkos::View; + using MVViewType = Kokkos::View; Test::impl_test_blas_serial_nrm2mv(0, 10, 5); Test::impl_test_blas_serial_nrm2mv(10, 15, 7); Test::impl_test_blas_serial_nrm2mv(1024, 9, 5); @@ -242,15 +221,13 @@ int test_blas_serial_nrm2() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) { - using ViewType = - Kokkos::View; + using ViewType = Kokkos::View; Test::impl_test_blas_serial_nrm2(0, 10); Test::impl_test_blas_serial_nrm2(10, 15); Test::impl_test_blas_serial_nrm2(1024, 9); Test::impl_test_blas_serial_nrm2(132231, 3); - using MVViewType = - Kokkos::View; + using MVViewType = Kokkos::View; Test::impl_test_blas_serial_nrm2mv(0, 10, 5); Test::impl_test_blas_serial_nrm2mv(10, 15, 5); Test::impl_test_blas_serial_nrm2mv(1024, 9, 5); @@ -262,27 +239,19 @@ int test_blas_serial_nrm2() { } #if defined(KOKKOSKERNELS_INST_FLOAT) -TEST_F(TestCategory, serial_nrm2_float_float) { - test_blas_serial_nrm2(); -} +TEST_F(TestCategory, serial_nrm2_float_float) { test_blas_serial_nrm2(); } #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) -TEST_F(TestCategory, serial_nrm2_double_double) { - test_blas_serial_nrm2(); -} +TEST_F(TestCategory, serial_nrm2_double_double) { test_blas_serial_nrm2(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) -TEST_F(TestCategory, serial_nrm2_fcomplex_float) { - test_blas_serial_nrm2 >(); -} +TEST_F(TestCategory, serial_nrm2_fcomplex_float) { test_blas_serial_nrm2 >(); } #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) -TEST_F(TestCategory, serial_nrm2_dcomplex_dcomplex) { - test_blas_serial_nrm2 >(); -} +TEST_F(TestCategory, serial_nrm2_dcomplex_dcomplex) { test_blas_serial_nrm2 >(); } #endif #endif // TEST_BLAS_SERIAL_NRM2_HPP_ diff --git a/packages/kokkos-kernels/cmake/Modules/FindTPLROCBLAS.cmake b/packages/kokkos-kernels/cmake/Modules/FindTPLROCBLAS.cmake index c0a9de3b500b..4edcd829445f 100644 --- a/packages/kokkos-kernels/cmake/Modules/FindTPLROCBLAS.cmake +++ b/packages/kokkos-kernels/cmake/Modules/FindTPLROCBLAS.cmake @@ -1,13 +1,47 @@ -# MPL: 12/29/2022: CMake regular way to find a package -FIND_PACKAGE(ROCBLAS) -if(TARGET roc::rocblas) -## MPL: 12/29/2022: Variable TPL_ROCBLAS_IMPORTED_NAME follows the requested convention -## of KokkosKernel (method kokkoskernels_import_tpl of kokkoskernels_tpls.cmake) - SET(TPL_ROCBLAS_IMPORTED_NAME roc::rocblas) - SET(TPL_IMPORTED_NAME roc::rocblas) -## MPL: 12/29/2022: A target comming from a TPL must follows the requested convention -## of KokkosKernel (method kokkoskernels_link_tpl of kokkoskernels_tpls.cmake) - ADD_LIBRARY(KokkosKernels::ROCBLAS ALIAS roc::rocblas) -ELSE() - MESSAGE(FATAL_ERROR "Package ROCBLAS requested but not found") +IF(ROCBLAS_LIBRARIES AND ROCBLAS_LIBRARY_DIRS AND ROCBLAS_INCLUDE_DIRS) + kokkoskernels_find_imported(ROCBLAS INTERFACE + LIBRARIES ${ROCBLAS_LIBRARIES} + LIBRARY_PATHS ${ROCBLAS_LIBRARY_DIRS} + HEADER_PATHS ${ROCBLAS_INCLUDE_DIRS} + ) +ELSEIF(ROCBLAS_LIBRARIES AND ROCBLAS_LIBRARY_DIRS) + kokkoskernels_find_imported(ROCBLAS INTERFACE + LIBRARIES ${ROCBLAS_LIBRARIES} + LIBRARY_PATHS ${ROCBLAS_LIBRARY_DIRS} + HEADER rocblas.h + ) +ELSEIF(ROCBLAS_LIBRARIES) + kokkoskernels_find_imported(ROCBLAS INTERFACE + LIBRARIES ${ROCBLAS_LIBRARIES} + HEADER rocblas.h + ) +ELSEIF(ROCBLAS_LIBRARY_DIRS) + kokkoskernels_find_imported(ROCBLAS INTERFACE + LIBRARIES rocblas + LIBRARY_PATHS ${ROCBLAS_LIBRARY_DIRS} + HEADER rocblas.h + ) +ELSEIF(ROCBLAS_ROOT OR KokkosKernels_ROCBLAS_ROOT) # nothing specific provided, just ROOT + kokkoskernels_find_imported(ROCBLAS INTERFACE + LIBRARIES rocblas + HEADER rocblas.h + ) +ELSE() # backwards-compatible way + FIND_PACKAGE(ROCBLAS) + INCLUDE(FindPackageHandleStandardArgs) + IF (NOT ROCBLAS_FOUND) + #Important note here: this find Module is named TPLROCBLAS + #The eventual target is named ROCBLAS. To avoid naming conflicts + #the find module is called TPLROCBLAS. This call will cause + #the find_package call to fail in a "standard" CMake way + FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLROCBLAS REQUIRED_VARS ROCBLAS_FOUND) + ELSE() + #The libraries might be empty - OR they might explicitly be not found + IF("${ROCBLAS_LIBRARIES}" MATCHES "NOTFOUND") + FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLROCBLAS REQUIRED_VARS ROCBLAS_LIBRARIES) + ELSE() + KOKKOSKERNELS_CREATE_IMPORTED_TPL(ROCBLAS INTERFACE + LINK_LIBRARIES "${ROCBLAS_LIBRARIES}") + ENDIF() + ENDIF() ENDIF() diff --git a/packages/kokkos-kernels/cmake/Modules/FindTPLROCSOLVER.cmake b/packages/kokkos-kernels/cmake/Modules/FindTPLROCSOLVER.cmake index 8f2a92cfdae2..58eae9f8f5ae 100644 --- a/packages/kokkos-kernels/cmake/Modules/FindTPLROCSOLVER.cmake +++ b/packages/kokkos-kernels/cmake/Modules/FindTPLROCSOLVER.cmake @@ -1,9 +1,48 @@ -# LBV: 11/08/2023: This file follows the partern of FindTPLROCBLAS.cmake/FindTPLROCSPARSE.cmake -FIND_PACKAGE(ROCSOLVER) -if(TARGET roc::rocsolver) - SET(TPL_ROCSOLVER_IMPORTED_NAME roc::rocsolver) - SET(TPL_IMPORTED_NAME roc::rocsolver) - ADD_LIBRARY(KokkosKernels::ROCSOLVER ALIAS roc::rocsolver) -ELSE() - MESSAGE(FATAL_ERROR "Package ROCSOLVER requested but not found") +IF(ROCSOLVER_LIBRARIES AND ROCSOLVER_LIBRARY_DIRS AND ROCSOLVER_INCLUDE_DIRS) + kokkoskernels_find_imported(ROCSOLVER INTERFACE + LIBRARIES ${ROCSOLVER_LIBRARIES} + LIBRARY_PATHS ${ROCSOLVER_LIBRARY_DIRS} + HEADER_PATHS ${ROCSOLVER_INCLUDE_DIRS} + ) +ELSEIF(ROCSOLVER_LIBRARIES AND ROCSOLVER_LIBRARY_DIRS) + kokkoskernels_find_imported(ROCSOLVER INTERFACE + LIBRARIES ${ROCSOLVER_LIBRARIES} + LIBRARY_PATHS ${ROCSOLVER_LIBRARY_DIRS} + HEADER rocsolver.h + ) +ELSEIF(ROCSOLVER_LIBRARIES) + kokkoskernels_find_imported(ROCSOLVER INTERFACE + LIBRARIES ${ROCSOLVER_LIBRARIES} + HEADER rocsolver.h + ) +ELSEIF(ROCSOLVER_LIBRARY_DIRS) + kokkoskernels_find_imported(ROCSOLVER INTERFACE + LIBRARIES rocsolver + LIBRARY_PATHS ${ROCSOLVER_LIBRARY_DIRS} + HEADER rocsolver.h + ) +ELSEIF(ROCSOLVER_ROOT OR KokkosKernels_ROCSOLVER_ROOT) # nothing specific provided, just ROOT + kokkoskernels_find_imported(ROCSOLVER INTERFACE + LIBRARIES rocsolver + HEADER rocsolver.h + ) +ELSE() # backwards-compatible way + FIND_PACKAGE(ROCSOLVER) + INCLUDE(FindPackageHandleStandardArgs) + IF (NOT ROCSOLVER_FOUND) + #Important note here: this find Module is named TPLROCSOLVER + #The eventual target is named ROCSOLVER. To avoid naming conflicts + #the find module is called TPLROCSOLVER. This call will cause + #the find_package call to fail in a "standard" CMake way + FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLROCSOLVER REQUIRED_VARS ROCSOLVER_FOUND) + ELSE() + #The libraries might be empty - OR they might explicitly be not found + IF("${ROCSOLVER_LIBRARIES}" MATCHES "NOTFOUND") + FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLROCSOLVER REQUIRED_VARS ROCSOLVER_LIBRARIES) + ELSE() + KOKKOSKERNELS_CREATE_IMPORTED_TPL(ROCSOLVER INTERFACE + LINK_LIBRARIES "${ROCSOLVER_LIBRARIES}") + ENDIF() + ENDIF() ENDIF() + diff --git a/packages/kokkos-kernels/cmake/Modules/FindTPLROCSPARSE.cmake b/packages/kokkos-kernels/cmake/Modules/FindTPLROCSPARSE.cmake index 5f985ff3a8ae..3b45ba5e82ef 100644 --- a/packages/kokkos-kernels/cmake/Modules/FindTPLROCSPARSE.cmake +++ b/packages/kokkos-kernels/cmake/Modules/FindTPLROCSPARSE.cmake @@ -1,9 +1,47 @@ -# MPL: 05/01/2023: This file follows the partern of FindTPLROCBLAS.cmake -FIND_PACKAGE(ROCSPARSE) -if(TARGET roc::rocsparse) - SET(TPL_ROCSPARSE_IMPORTED_NAME roc::rocsparse) - SET(TPL_IMPORTED_NAME roc::rocsparse) - ADD_LIBRARY(KokkosKernels::ROCSPARSE ALIAS roc::rocsparse) -ELSE() - MESSAGE(FATAL_ERROR "Package ROCSPARSE requested but not found") +IF(ROCSPARSE_LIBRARIES AND ROCSPARSE_LIBRARY_DIRS AND ROCSPARSE_INCLUDE_DIRS) + kokkoskernels_find_imported(ROCSPARSE INTERFACE + LIBRARIES ${ROCSPARSE_LIBRARIES} + LIBRARY_PATHS ${ROCSPARSE_LIBRARY_DIRS} + HEADER_PATHS ${ROCSPARSE_INCLUDE_DIRS} + ) +ELSEIF(ROCSPARSE_LIBRARIES AND ROCSPARSE_LIBRARY_DIRS) + kokkoskernels_find_imported(ROCSPARSE INTERFACE + LIBRARIES ${ROCSPARSE_LIBRARIES} + LIBRARY_PATHS ${ROCSPARSE_LIBRARY_DIRS} + HEADER rocsparse.h + ) +ELSEIF(ROCSPARSE_LIBRARIES) + kokkoskernels_find_imported(ROCSPARSE INTERFACE + LIBRARIES ${ROCSPARSE_LIBRARIES} + HEADER rocsparse.h + ) +ELSEIF(ROCSPARSE_LIBRARY_DIRS) + kokkoskernels_find_imported(ROCSPARSE INTERFACE + LIBRARIES rocsparse + LIBRARY_PATHS ${ROCSPARSE_LIBRARY_DIRS} + HEADER rocsparse.h + ) +ELSEIF(ROCSPARSE_ROOT OR KokkosKernels_ROCSPARSE_ROOT) # nothing specific provided, just ROOT + kokkoskernels_find_imported(ROCSPARSE INTERFACE + LIBRARIES rocsparse + HEADER rocsparse.h + ) +ELSE() # backwards-compatible way + FIND_PACKAGE(ROCSPARSE) + INCLUDE(FindPackageHandleStandardArgs) + IF (NOT ROCSPARSE_FOUND) + #Important note here: this find Module is named TPLROCSPARSE + #The eventual target is named ROCSPARSE. To avoid naming conflicts + #the find module is called TPLROCSPARSE. This call will cause + #the find_package call to fail in a "standard" CMake way + FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLROCSPARSE REQUIRED_VARS ROCSPARSE_FOUND) + ELSE() + #The libraries might be empty - OR they might explicitly be not found + IF("${ROCSPARSE_LIBRARIES}" MATCHES "NOTFOUND") + FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLROCSPARSE REQUIRED_VARS ROCSPARSE_LIBRARIES) + ELSE() + KOKKOSKERNELS_CREATE_IMPORTED_TPL(ROCSPARSE INTERFACE + LINK_LIBRARIES "${ROCSPARSE_LIBRARIES}") + ENDIF() + ENDIF() ENDIF() diff --git a/packages/kokkos-kernels/cmake/kokkoskernels_eti_offsets.cmake b/packages/kokkos-kernels/cmake/kokkoskernels_eti_offsets.cmake index 484175a976ff..1cf02f1327b5 100644 --- a/packages/kokkos-kernels/cmake/kokkoskernels_eti_offsets.cmake +++ b/packages/kokkos-kernels/cmake/kokkoskernels_eti_offsets.cmake @@ -1,5 +1,5 @@ -SET(KOKKOSKERNELS_INST_OFFSET_SIZE_T_DEFAULT ${KOKKOSKERNELS_ADD_DEFAULT_ETI}) -SET(KOKKOSKERNELS_INST_OFFSET_INT_DEFAULT OFF) +SET(KOKKOSKERNELS_INST_OFFSET_SIZE_T_DEFAULT OFF) +SET(KOKKOSKERNELS_INST_OFFSET_INT_DEFAULT ${KOKKOSKERNELS_ADD_DEFAULT_ETI}) SET(OFFSETS OFFSET_INT OFFSET_SIZE_T diff --git a/packages/kokkos-kernels/cmake/kokkoskernels_tpls.cmake b/packages/kokkos-kernels/cmake/kokkoskernels_tpls.cmake index d1a44721e6d7..b8267c4955e4 100644 --- a/packages/kokkos-kernels/cmake/kokkoskernels_tpls.cmake +++ b/packages/kokkos-kernels/cmake/kokkoskernels_tpls.cmake @@ -334,6 +334,9 @@ MACRO(kokkoskernels_export_imported_tpl NAME) GET_TARGET_PROPERTY(TPL_INCLUDES ${TPL_IMPORTED_NAME} INTERFACE_INCLUDE_DIRECTORIES) IF(TPL_INCLUDES) + # remove duplicates to prevent incorrect number of arguments to INTERFACE_INCLUDE_DIRECTORIES + # see issue #2238 + LIST(REMOVE_DUPLICATES TPL_INCLUDES) KOKKOSKERNELS_APPEND_CONFIG_LINE("INTERFACE_INCLUDE_DIRECTORIES ${TPL_INCLUDES}") ENDIF() diff --git a/packages/kokkos-kernels/cmake/kokkoskernels_tribits.cmake b/packages/kokkos-kernels/cmake/kokkoskernels_tribits.cmake index 2d70f656ad2d..f7d11c9ef13c 100644 --- a/packages/kokkos-kernels/cmake/kokkoskernels_tribits.cmake +++ b/packages/kokkos-kernels/cmake/kokkoskernels_tribits.cmake @@ -85,6 +85,11 @@ ENDIF() IF(PARSE_SOURCES) LIST(REMOVE_DUPLICATES PARSE_SOURCES) ENDIF() +IF(Kokkos_COMPILE_LANGUAGE) + FOREACH(source ${PARSE_SOURCES}) + SET_SOURCE_FILES_PROPERTIES(${source} PROPERTIES LANGUAGE ${Kokkos_COMPILE_LANGUAGE}) + ENDFOREACH() +ENDIF() ADD_LIBRARY( ${LIBRARY_NAME} @@ -151,6 +156,12 @@ IF (IS_ENABLED) SOURCES ${PARSE_SOURCES} TESTONLYLIBS ${PARSE_TESTONLYLIBS}) ELSE() + # Set the correct CMake language on all source files for this exe + IF(Kokkos_COMPILE_LANGUAGE) + FOREACH(source ${PARSE_SOURCES}) + SET_SOURCE_FILES_PROPERTIES(${source} PROPERTIES LANGUAGE ${Kokkos_COMPILE_LANGUAGE}) + ENDFOREACH() + ENDIF() ADD_EXECUTABLE(${EXE_NAME} ${PARSE_SOURCES}) #AJP, BMK altered: IF(KOKKOSKERNELS_ENABLE_TESTS_AND_PERFSUITE) diff --git a/packages/kokkos-kernels/common/impl/KokkosKernels_Iota.hpp b/packages/kokkos-kernels/common/impl/KokkosKernels_Iota.hpp index 04851e81c961..770a0201ef00 100644 --- a/packages/kokkos-kernels/common/impl/KokkosKernels_Iota.hpp +++ b/packages/kokkos-kernels/common/impl/KokkosKernels_Iota.hpp @@ -67,8 +67,7 @@ class Iota { Constructing with size < 0 yeilds a 0-size Iota */ KOKKOS_INLINE_FUNCTION - constexpr Iota(const size_type &size, const value_type offset) - : size_(size), offset_(offset) { + constexpr Iota(const size_type &size, const value_type offset) : size_(size), offset_(offset) { if constexpr (std::is_signed_v) { if (size_ < size_type(0)) { size_ = 0; @@ -102,8 +101,7 @@ class Iota { Creating a subview outside of the base Iota yeilds undefined behavior */ template - KOKKOS_INLINE_FUNCTION constexpr Iota(const Iota &base, - const Kokkos::pair &range) + KOKKOS_INLINE_FUNCTION constexpr Iota(const Iota &base, const Kokkos::pair &range) : Iota(range.second - range.first, base.offset_ + range.first) {} /*! \brief Construct Iota subview @@ -111,9 +109,7 @@ class Iota { i >= size() or i < 0 yields undefined behavior. */ KOKKOS_INLINE_FUNCTION - constexpr T operator()(size_type i) const noexcept { - return value_type(i + offset_); - }; + constexpr T operator()(size_type i) const noexcept { return value_type(i + offset_); }; /// \brief return the size of the iota KOKKOS_INLINE_FUNCTION diff --git a/packages/kokkos-kernels/common/impl/KokkosKernels_NaN.hpp b/packages/kokkos-kernels/common/impl/KokkosKernels_NaN.hpp new file mode 100644 index 000000000000..75d6a3ac8c94 --- /dev/null +++ b/packages/kokkos-kernels/common/impl/KokkosKernels_NaN.hpp @@ -0,0 +1,43 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSKERNELS_NAN_HPP +#define KOKKOSKERNELS_NAN_HPP + +#include +#include + +namespace KokkosKernels::Impl { + +// This could be constexpr if Kokkos::complex ctor was +template +KOKKOS_INLINE_FUNCTION T quiet_NaN() { + if constexpr (std::is_same_v) { + return double(Kokkos::Experimental::quiet_NaN_v); // Kokkos::Experimetnal::quiet_NaN_v + // is undefined in + // device code + } else if constexpr (Kokkos::ArithTraits::is_complex) { + using value_type = typename T::value_type; + return T(quiet_NaN(), + quiet_NaN()); // Kokkos::complex ctor is not constexpr + } else { + return Kokkos::Experimental::quiet_NaN_v; + } +} + +} // namespace KokkosKernels::Impl + +#endif // KOKKOSKERNELS_NAN_HPP diff --git a/packages/kokkos-kernels/common/impl/KokkosKernels_SafeCompare.hpp b/packages/kokkos-kernels/common/impl/KokkosKernels_SafeCompare.hpp index 494ef45ada30..1bd43c046ae9 100644 --- a/packages/kokkos-kernels/common/impl/KokkosKernels_SafeCompare.hpp +++ b/packages/kokkos-kernels/common/impl/KokkosKernels_SafeCompare.hpp @@ -47,8 +47,7 @@ KOKKOS_INLINE_FUNCTION constexpr bool safe_gt(const T &t, const U &u) { using KU = Kokkos::ArithTraits; // both are integer, but only one is signed - if constexpr (KT::is_integer && KU::is_integer && - (KT::is_signed != KU::is_signed)) { + if constexpr (KT::is_integer && KU::is_integer && (KT::is_signed != KU::is_signed)) { // how wide the signed type would need to be to hold T and U constexpr size_t t_width = KT::is_signed ? sizeof(T) : 2 * sizeof(T); constexpr size_t u_width = KU::is_signed ? sizeof(U) : 2 * sizeof(U); diff --git a/packages/kokkos-kernels/common/impl/KokkosKernels_ViewUtils.hpp b/packages/kokkos-kernels/common/impl/KokkosKernels_ViewUtils.hpp index 2ae8fb609d72..4769f1744a10 100644 --- a/packages/kokkos-kernels/common/impl/KokkosKernels_ViewUtils.hpp +++ b/packages/kokkos-kernels/common/impl/KokkosKernels_ViewUtils.hpp @@ -29,13 +29,11 @@ class with_unmanaged { using layout_type = typename View::array_layout; using memory_space = typename View::memory_space; - using orig_traits = typename View::memory_traits; - static constexpr unsigned new_traits = - orig_traits::impl_value | Kokkos::Unmanaged; + using orig_traits = typename View::memory_traits; + static constexpr unsigned new_traits = orig_traits::impl_value | Kokkos::Unmanaged; public: - using type = Kokkos::View >; + using type = Kokkos::View >; }; /*! \brief A type that is View with Kokkos::Unmanaged added to the memory traits diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_BitUtils.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_BitUtils.hpp index 5be56c388c33..9dcf8a38aec3 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_BitUtils.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_BitUtils.hpp @@ -222,8 +222,7 @@ int least_set_bit( long long i ){ } */ -#elif defined(__INTEL_COMPILER) || defined(KOKKOS_COMPILER_IBM) || \ - defined(__GNUC__) || defined(__GNUG__) +#elif defined(__INTEL_COMPILER) || defined(KOKKOS_COMPILER_IBM) || defined(__GNUC__) || defined(__GNUG__) KOKKOS_FORCEINLINE_FUNCTION int least_set_bit(unsigned i) { return __builtin_ffs(i); } KOKKOS_FORCEINLINE_FUNCTION diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_BlockHashmapAccumulator.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_BlockHashmapAccumulator.hpp index 3ca160164cf7..2b64c38ce961 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_BlockHashmapAccumulator.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_BlockHashmapAccumulator.hpp @@ -20,14 +20,13 @@ #include "KokkosKernels_BlockUtils.hpp" #include "KokkosKernels_HashmapAccumulator.hpp" -//#define HASHMAPACCUMULATOR_ASSERT_ENABLED +// #define HASHMAPACCUMULATOR_ASSERT_ENABLED namespace KokkosKernels { namespace Experimental { -template +template /** * \brief BlockHashmapAccumulator class * The use of this is described in the paper: @@ -89,13 +88,7 @@ struct BlockHashmapAccumulator { * Assumption: hash_begins_ are all initialized to -1. */ KOKKOS_INLINE_FUNCTION - BlockHashmapAccumulator() - : hash_begins(), - hash_nexts(), - keys(), - values(), - __max_value_size(), - __hashOpRHS(0) {} + BlockHashmapAccumulator() : hash_begins(), hash_nexts(), keys(), values(), __max_value_size(), __hashOpRHS(0) {} /** * \brief parameterized constructor BlockHashmapAccumulator @@ -113,10 +106,8 @@ struct BlockHashmapAccumulator { * Assumption: hash_begins_ are all initialized to -1. */ KOKKOS_INLINE_FUNCTION - BlockHashmapAccumulator(size_type block_dim_, const size_type max_value_size_, - const size_type hashOpRHS, size_type *hash_begins_, - size_type *hash_nexts_, key_type *keys_, - value_type *values_) + BlockHashmapAccumulator(size_type block_dim_, const size_type max_value_size_, const size_type hashOpRHS, + size_type *hash_begins_, size_type *hash_nexts_, key_type *keys_, value_type *values_) : hash_begins(hash_begins_), hash_nexts(hash_nexts_), keys(keys_), @@ -136,10 +127,9 @@ struct BlockHashmapAccumulator { // Insertion is sequential, no race condition for the insertion. // the mergeadd used in the numeric of KKMEM. KOKKOS_INLINE_FUNCTION - void sequential_insert_into_hash_mergeAdd_TrackHashes( - key_type key, const value_type *valueA, const value_type *valueB, - size_type *used_size_, size_type *used_hash_size, - size_type *used_hashes) { + void sequential_insert_into_hash_mergeAdd_TrackHashes(key_type key, const value_type *valueA, + const value_type *valueB, size_type *used_size_, + size_type *used_hash_size, size_type *used_hashes) { size_type hash, i, my_index; if (key == -1) return; @@ -149,8 +139,7 @@ struct BlockHashmapAccumulator { hash = __compute_hash(key, __hashOpRHS); for (i = hash_begins[hash]; i != -1; i = hash_nexts[i]) { if (keys[i] == key) { - KokkosSparse::Impl::kk_block_add_mul(block_dim, values + i * block_size, - valueA, valueB); + KokkosSparse::Impl::kk_block_add_mul(block_dim, values + i * block_size, valueA, valueB); return; } } @@ -164,8 +153,7 @@ struct BlockHashmapAccumulator { hash_begins[hash] = my_index; keys[my_index] = key; - KokkosSparse::Impl::kk_block_set_mul( - block_dim, values + my_index * block_size, valueA, valueB); + KokkosSparse::Impl::kk_block_set_mul(block_dim, values + my_index * block_size, valueA, valueB); } // Performs C[hash] += A * B (for existing entry) @@ -173,37 +161,28 @@ struct BlockHashmapAccumulator { // Insertion is sequential, no race condition for the insertion. // the mergeadd used in the numeric of KKMEM. KOKKOS_INLINE_FUNCTION - void sequential_insert_into_hash_simple(key_type key, const value_type *a_val, - const value_type *b_val, - size_type &used_size, - size_type *used_hashes) { - for (size_type hash = (key * HASHSCALAR) & __hashOpRHS;; - hash = (hash + 1) & __hashOpRHS) { + void sequential_insert_into_hash_simple(key_type key, const value_type *a_val, const value_type *b_val, + size_type &used_size, size_type *used_hashes) { + for (size_type hash = (key * HASHSCALAR) & __hashOpRHS;; hash = (hash + 1) & __hashOpRHS) { if (keys[hash] == -1) { used_hashes[used_size++] = hash; keys[hash] = key; - KokkosSparse::Impl::kk_block_set_mul( - block_dim, values + hash * block_size, a_val, b_val); + KokkosSparse::Impl::kk_block_set_mul(block_dim, values + hash * block_size, a_val, b_val); break; } else if (keys[hash] == key) { - KokkosSparse::Impl::kk_block_add_mul( - block_dim, values + hash * block_size, a_val, b_val); + KokkosSparse::Impl::kk_block_add_mul(block_dim, values + hash * block_size, a_val, b_val); break; } } } KOKKOS_INLINE_FUNCTION - void sequential_export_values_simple(const size_type used_size, - const size_type *used_hashes, - key_type *out_keys, - value_type *out_values, - const bool clear = true) { + void sequential_export_values_simple(const size_type used_size, const size_type *used_hashes, key_type *out_keys, + value_type *out_values, const bool clear = true) { for (size_type i = 0; i < used_size; ++i) { const auto hash = used_hashes[i]; out_keys[i] = keys[hash]; - KokkosSparse::Impl::kk_block_set(block_dim, out_values + i * block_size, - values + hash * block_size); + KokkosSparse::Impl::kk_block_set(block_dim, out_values + i * block_size, values + hash * block_size); if (clear) { keys[hash] = -1; } @@ -218,10 +197,9 @@ struct BlockHashmapAccumulator { // Insertion is simulteanous for the vector lanes of a thread. // used_size should be a shared pointer among the thread vectors KOKKOS_INLINE_FUNCTION - int vector_atomic_insert_into_hash_mergeAdd_TrackHashes( - const key_type key, const value_type *valA, const value_type *valB, - volatile size_type *used_size_, size_type *used_hash_size, - size_type *used_hashes) { + int vector_atomic_insert_into_hash_mergeAdd_TrackHashes(const key_type key, const value_type *valA, + const value_type *valB, volatile size_type *used_size_, + size_type *used_hash_size, size_type *used_hashes) { size_type hash, i, my_write_index, hashbeginning; if (key == -1) return __insert_success; @@ -232,8 +210,7 @@ struct BlockHashmapAccumulator { for (; i != -1; i = hash_nexts[i]) { if (keys[i] == key) { - KokkosSparse::Impl::kk_block_add_mul( - block_dim, values + i * block_size, valA, valB); + KokkosSparse::Impl::kk_block_add_mul(block_dim, values + i * block_size, valA, valB); return __insert_success; } } @@ -247,8 +224,7 @@ struct BlockHashmapAccumulator { return __insert_full; } else { keys[my_write_index] = key; - KokkosSparse::Impl::kk_block_set_mul( - block_dim, values + my_write_index * block_size, valA, valB); + KokkosSparse::Impl::kk_block_set_mul(block_dim, values + my_write_index * block_size, valA, valB); #ifdef KOKKOSKERNELS_CUDA_INDEPENDENT_THREADS // this is an issue on VOLTA+ and up because warps do not go in SIMD @@ -276,11 +252,9 @@ struct BlockHashmapAccumulator { hash_nexts[my_write_index] = hash_begins[hash]; #endif - hashbeginning = - Kokkos::atomic_exchange(hash_begins + hash, my_write_index); + hashbeginning = Kokkos::atomic_exchange(hash_begins + hash, my_write_index); if (hashbeginning == -1) { - used_hashes[Kokkos::atomic_fetch_add(used_hash_size, size_type(1))] = - hash; + used_hashes[Kokkos::atomic_fetch_add(used_hash_size, size_type(1))] = hash; } hash_nexts[my_write_index] = hashbeginning; return __insert_success; @@ -288,12 +262,9 @@ struct BlockHashmapAccumulator { } template - KOKKOS_INLINE_FUNCTION int - vector_atomic_insert_into_hash_mergeAdd_with_team_level_list_length( - const team_member_t & /* teamMember */, const int /* vector_size */, - size_type hash, const key_type key, const value_type *valA, - const value_type *valB, volatile size_type *used_size_, - const size_type max_value_size_) { + KOKKOS_INLINE_FUNCTION int vector_atomic_insert_into_hash_mergeAdd_with_team_level_list_length( + const team_member_t & /* teamMember */, const int /* vector_size */, size_type hash, const key_type key, + const value_type *valA, const value_type *valB, volatile size_type *used_size_, const size_type max_value_size_) { // Cannot compute hash here due to impl_speed use-case // hash = __compute_hash(key, __hashOpRHS); if (key == -1) return __insert_success; @@ -302,8 +273,7 @@ struct BlockHashmapAccumulator { size_type i = hash_begins[hash]; for (; i != -1; i = hash_nexts[i]) { if (keys[i] == key) { - KokkosSparse::Impl::kk_block_add_mul( - block_dim, values + i * block_size, valA, valB); + KokkosSparse::Impl::kk_block_add_mul(block_dim, values + i * block_size, valA, valB); return __insert_success; } } @@ -316,15 +286,13 @@ struct BlockHashmapAccumulator { if (used_size_[0] >= max_value_size_) { return __insert_full; } - size_type my_write_index = - Kokkos::atomic_fetch_add(used_size_, size_type(1)); + size_type my_write_index = Kokkos::atomic_fetch_add(used_size_, size_type(1)); if (my_write_index >= max_value_size_) { return __insert_full; } else { keys[my_write_index] = key; - KokkosSparse::Impl::kk_block_set_mul( - block_dim, values + my_write_index * block_size, valA, valB); + KokkosSparse::Impl::kk_block_set_mul(block_dim, values + my_write_index * block_size, valA, valB); #ifdef KOKKOSKERNELS_CUDA_INDEPENDENT_THREADS // this is an issue on VOLTA+ and up because warps do not go in SIMD @@ -356,8 +324,7 @@ struct BlockHashmapAccumulator { // hashbeginning = hash_begins[hash] // hash_begins[hash] = my_write_index // hash_nexts[my_write_index] = hash_begins[hash] - size_type hashbeginning = - Kokkos::atomic_exchange(hash_begins + hash, my_write_index); + size_type hashbeginning = Kokkos::atomic_exchange(hash_begins + hash, my_write_index); hash_nexts[my_write_index] = hashbeginning; return __insert_success; } @@ -371,15 +338,12 @@ struct BlockHashmapAccumulator { // Insertion is simulteanous for the vector lanes of a thread. // used_size should be a shared pointer among the thread vectors KOKKOS_INLINE_FUNCTION - int vector_atomic_insert_into_hash_mergeAdd(const key_type key, - const value_type *valA, - const value_type *valB, + int vector_atomic_insert_into_hash_mergeAdd(const key_type key, const value_type *valA, const value_type *valB, volatile size_type *used_size_) { if (key == -1) return __insert_success; return vector_atomic_insert_into_hash_mergeAdd_with_team_level_list_length( - nullptr, 0, __compute_hash(key, __hashOpRHS), key, valA, valB, - used_size_, __max_value_size); + nullptr, 0, __compute_hash(key, __hashOpRHS), key, valA, valB, used_size_, __max_value_size); } #if 0 @@ -592,11 +556,9 @@ struct BlockHashmapAccumulator { static constexpr int __insert_success = 0; static constexpr int __insert_full = 1; - template ::value || - std::is_same::value, - std::size_t>::type = 0> + template ::value || + std::is_same::value, + std::size_t>::type = 0> KOKKOS_INLINE_FUNCTION int __compute_hash(size_type key, size_type bitmask) { size_type hash = key & bitmask; #ifdef HASHMAPACCUMULATOR_ASSERT_ENABLED @@ -606,9 +568,8 @@ struct BlockHashmapAccumulator { return hash; } - template ::value, - std::size_t>::type = 0> + template ::value, std::size_t>::type = 0> KOKKOS_INLINE_FUNCTION int __compute_hash(size_type key, size_type divisor) { size_type hash = key % divisor; #ifdef HASHMAPACCUMULATOR_ASSERT_ENABLED diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_BlockUtils.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_BlockUtils.hpp index 006a38a6e4e6..64309372acbf 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_BlockUtils.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_BlockUtils.hpp @@ -25,10 +25,9 @@ namespace Impl { // Initializes block: A = [val, val, val, ....] template -KOKKOS_INLINE_FUNCTION void kk_block_init( - const size_type block_dim, value_type *dst, - const value_type val = static_cast( - 0)) { // Note: replaces __host__ std::fill() not to be called from GPU +KOKKOS_INLINE_FUNCTION void kk_block_init(const size_type block_dim, value_type *dst, + const value_type val = static_cast( + 0)) { // Note: replaces __host__ std::fill() not to be called from GPU for (auto end = dst + (block_dim * block_dim); dst < end; ++dst) { *dst = val; } @@ -36,17 +35,13 @@ KOKKOS_INLINE_FUNCTION void kk_block_init( // Initializes block: A = B template -KOKKOS_INLINE_FUNCTION void kk_block_set(const size_type block_dim, - value_type *dst, - const value_type *val) { - memcpy(dst, val, block_dim * block_dim * sizeof(value_type)); +KOKKOS_INLINE_FUNCTION void kk_block_set(const size_type block_dim, value_type *dst, const value_type *val) { + memcpy((void *)dst, val, block_dim * block_dim * sizeof(value_type)); } // Performs A += B on blocks template -KOKKOS_INLINE_FUNCTION void kk_block_add(const size_type block_dim, - value_type *dst, - const value_type *val) { +KOKKOS_INLINE_FUNCTION void kk_block_add(const size_type block_dim, value_type *dst, const value_type *val) { const auto end = dst + block_dim * block_dim; while (dst < end) { *(dst++) += *(val++); @@ -57,33 +52,25 @@ KOKKOS_INLINE_FUNCTION void kk_block_add(const size_type block_dim, // Note: block is assumed to be row-major, dense matrix (no extra padding) // Note: set clear=true to set C = 0 before increment template > -KOKKOS_INLINE_FUNCTION void kk_block_dgemm(const size_type block_dim, - value_type *dst, - const value_type *valA, - const value_type *valB, - const bool clear = false) { + typename DGEMM = KokkosBatched::SerialGemmInternal> +KOKKOS_INLINE_FUNCTION void kk_block_dgemm(const size_type block_dim, value_type *dst, const value_type *valA, + const value_type *valB, const bool clear = false) { const auto ZERO = static_cast(0); const auto ONE = static_cast(1); - DGEMM::invoke(block_dim, block_dim, block_dim, ONE, valA, block_dim, 1, valB, - block_dim, 1, clear ? ZERO : ONE, dst, block_dim, 1); + DGEMM::invoke(block_dim, block_dim, block_dim, ONE, valA, block_dim, 1, valB, block_dim, 1, clear ? ZERO : ONE, dst, + block_dim, 1); } // dgemm: C = A * B template -KOKKOS_INLINE_FUNCTION void kk_block_set_mul(const size_type block_dim, - value_type *c_val, - const value_type *a_val, +KOKKOS_INLINE_FUNCTION void kk_block_set_mul(const size_type block_dim, value_type *c_val, const value_type *a_val, const value_type *b_val) { kk_block_dgemm(block_dim, c_val, a_val, b_val, true); } // dgemm: C += A * B template -KOKKOS_INLINE_FUNCTION void kk_block_add_mul(const size_type block_dim, - value_type *c_val, - const value_type *a_val, +KOKKOS_INLINE_FUNCTION void kk_block_add_mul(const size_type block_dim, value_type *c_val, const value_type *a_val, const value_type *b_val) { kk_block_dgemm(block_dim, c_val, a_val, b_val, false); } @@ -91,9 +78,7 @@ KOKKOS_INLINE_FUNCTION void kk_block_add_mul(const size_type block_dim, // Performs C += A * B (dense GEMM) on blocks // Note: all pointers reference dense row-major blocks (no extra padding) template -KOKKOS_INLINE_FUNCTION void kk_vector_block_add_mul(const size_type block_dim, - value_type *dst, - const value_type *valA, +KOKKOS_INLINE_FUNCTION void kk_vector_block_add_mul(const size_type block_dim, value_type *dst, const value_type *valA, const value_type *valB) { // NOTE: this should be replaced by batched DGEMM // once atomic increment is supported there @@ -102,8 +87,7 @@ KOKKOS_INLINE_FUNCTION void kk_vector_block_add_mul(const size_type block_dim, for (size_type col = 0; col < block_dim; ++col) { auto v = &dst[row_offset + col]; auto vb = valB + col; - for (const value_type *va = valA + row_offset, *end = va + block_dim; - va < end; ++va) { + for (const value_type *va = valA + row_offset, *end = va + block_dim; va < end; ++va) { Kokkos::atomic_add(v, (*va) * (*vb)); vb += block_dim; } diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_Error.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_Error.hpp index df8b21b8dfc4..05ce523ecfb4 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_Error.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_Error.hpp @@ -23,32 +23,25 @@ namespace KokkosKernels { namespace Impl { -inline void throw_runtime_exception(const std::string &msg) { - throw std::runtime_error(msg); -} +inline void throw_runtime_exception(const std::string &msg) { throw std::runtime_error(msg); } #if defined(KOKKOS_ENABLE_HIP) -inline void hip_internal_error_throw(hipError_t e, const char *name, - const char *file, const int line) { +inline void hip_internal_error_throw(hipError_t e, const char *name, const char *file, const int line) { std::ostringstream out; - out << name << " error( " << hipGetErrorName(e) - << "): " << hipGetErrorString(e); + out << name << " error( " << hipGetErrorName(e) << "): " << hipGetErrorString(e); if (file) { out << " " << file << ":" << line; } throw_runtime_exception(out.str()); } -inline void hip_internal_safe_call(hipError_t e, const char *name, - const char *file = nullptr, - const int line = 0) { +inline void hip_internal_safe_call(hipError_t e, const char *name, const char *file = nullptr, const int line = 0) { if (hipSuccess != e) { hip_internal_error_throw(e, name, file, line); } } -#define KOKKOSKERNELS_IMPL_HIP_SAFE_CALL(call) \ - hip_internal_safe_call(call, #call, __FILE__, __LINE__) +#define KOKKOSKERNELS_IMPL_HIP_SAFE_CALL(call) hip_internal_safe_call(call, #call, __FILE__, __LINE__) #endif } // namespace Impl @@ -80,16 +73,6 @@ inline void hip_internal_safe_call(hipError_t e, const char *name, } while (0) // SYCL cannot printf like the other backends quite yet -#if KOKKOS_VERSION < 40199 -#define IMPL_KERNEL_THROW(condition, msg) \ - do { \ - if (!(condition)) { \ - KOKKOS_IMPL_DO_NOT_USE_PRINTF("KERNEL CHECK FAILED:\n %s\n %s\n", \ - #condition, msg); \ - Kokkos::abort(""); \ - } \ - } while (0) -#else #define IMPL_KERNEL_THROW(condition, msg) \ do { \ if (!(condition)) { \ @@ -97,12 +80,10 @@ inline void hip_internal_safe_call(hipError_t e, const char *name, Kokkos::abort(""); \ } \ } while (0) -#endif #ifndef NDEBUG #define KK_ASSERT(condition) IMPL_THROW(condition, "", std::logic_error) -#define KK_ASSERT_MSG(condition, msg) \ - IMPL_THROW(condition, msg, std::logic_error) +#define KK_ASSERT_MSG(condition, msg) IMPL_THROW(condition, msg, std::logic_error) #define KK_KERNEL_ASSERT(condition) IMPL_KERNEL_THROW(condition, "") #define KK_KERNEL_ASSERT_MSG(condition, msg) IMPL_KERNEL_THROW(condition, msg) #else @@ -113,12 +94,10 @@ inline void hip_internal_safe_call(hipError_t e, const char *name, #endif #define KK_REQUIRE(condition) IMPL_THROW(condition, "", std::logic_error) -#define KK_REQUIRE_MSG(condition, msg) \ - IMPL_THROW(condition, msg, std::logic_error) +#define KK_REQUIRE_MSG(condition, msg) IMPL_THROW(condition, msg, std::logic_error) #define KK_USER_REQUIRE(condition) IMPL_THROW(condition, "", std::runtime_error) -#define KK_USER_REQUIRE_MSG(condition, msg) \ - IMPL_THROW(condition, msg, std::runtime_error) +#define KK_USER_REQUIRE_MSG(condition, msg) IMPL_THROW(condition, msg, std::runtime_error) #define KK_KERNEL_REQUIRE(condition) IMPL_KERNEL_THROW(condition, "") #define KK_KERNEL_REQUIRE_MSG(condition, msg) IMPL_KERNEL_THROW(condition, msg) diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_ExecSpaceUtils.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_ExecSpaceUtils.hpp index 4d3a3002b45b..2d167f5c7333 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_ExecSpaceUtils.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_ExecSpaceUtils.hpp @@ -29,14 +29,7 @@ namespace KokkosKernels { namespace Impl { -enum ExecSpaceType { - Exec_SERIAL, - Exec_OMP, - Exec_THREADS, - Exec_CUDA, - Exec_HIP, - Exec_SYCL -}; +enum ExecSpaceType { Exec_SERIAL, Exec_OMP, Exec_THREADS, Exec_CUDA, Exec_HIP, Exec_SYCL }; template KOKKOS_FORCEINLINE_FUNCTION ExecSpaceType kk_get_exec_space_type() { @@ -105,8 +98,7 @@ constexpr KOKKOS_INLINE_FUNCTION bool kk_is_gpu_exec_space() { #ifdef KOKKOS_ENABLE_SYCL template <> -constexpr KOKKOS_INLINE_FUNCTION bool -kk_is_gpu_exec_space() { +constexpr KOKKOS_INLINE_FUNCTION bool kk_is_gpu_exec_space() { return true; } #endif @@ -122,8 +114,7 @@ constexpr KOKKOS_INLINE_FUNCTION bool kk_is_x86_64_mem_space() { #if __x86_64__ template <> -constexpr KOKKOS_INLINE_FUNCTION bool -kk_is_x86_64_mem_space() { +constexpr KOKKOS_INLINE_FUNCTION bool kk_is_x86_64_mem_space() { return true; } #endif // x86_64 architectures @@ -139,8 +130,7 @@ constexpr KOKKOS_INLINE_FUNCTION bool kk_is_a64fx_mem_space() { #if defined(__ARM_ARCH_ISA_A64) template <> -constexpr KOKKOS_INLINE_FUNCTION bool -kk_is_a64fx_mem_space() { +constexpr KOKKOS_INLINE_FUNCTION bool kk_is_a64fx_mem_space() { return true; } #endif // a64fx architectures @@ -148,86 +138,67 @@ kk_is_a64fx_mem_space() { // Host function to determine free and total device memory. // Will throw if execution space doesn't support this. template -inline void kk_get_free_total_memory(size_t& /* free_mem */, - size_t& /* total_mem */) { +inline void kk_get_free_total_memory(size_t& /* free_mem */, size_t& /* total_mem */) { std::ostringstream oss; - oss << "Error: memory space " << MemorySpace::name() - << " does not support querying free/total memory."; + oss << "Error: memory space " << MemorySpace::name() << " does not support querying free/total memory."; throw std::runtime_error(oss.str()); } // Host function to determine free and total device memory. // Will throw if execution space doesn't support this. template -inline void kk_get_free_total_memory(size_t& /* free_mem */, - size_t& /* total_mem */, - int /* n_streams */) { +inline void kk_get_free_total_memory(size_t& /* free_mem */, size_t& /* total_mem */, int /* n_streams */) { std::ostringstream oss; - oss << "Error: memory space " << MemorySpace::name() - << " does not support querying free/total memory."; + oss << "Error: memory space " << MemorySpace::name() << " does not support querying free/total memory."; throw std::runtime_error(oss.str()); } #ifdef KOKKOS_ENABLE_CUDA template <> -inline void kk_get_free_total_memory(size_t& free_mem, - size_t& total_mem, - int n_streams) { +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem, int n_streams) { cudaMemGetInfo(&free_mem, &total_mem); free_mem /= n_streams; total_mem /= n_streams; } template <> -inline void kk_get_free_total_memory(size_t& free_mem, - size_t& total_mem) { +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem) { kk_get_free_total_memory(free_mem, total_mem, 1); } template <> -inline void kk_get_free_total_memory(size_t& free_mem, - size_t& total_mem, - int n_streams) { +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem, int n_streams) { kk_get_free_total_memory(free_mem, total_mem, n_streams); } template <> -inline void kk_get_free_total_memory(size_t& free_mem, - size_t& total_mem) { +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem) { kk_get_free_total_memory(free_mem, total_mem, 1); } template <> -inline void kk_get_free_total_memory( - size_t& free_mem, size_t& total_mem, int n_streams) { +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem, int n_streams) { kk_get_free_total_memory(free_mem, total_mem, n_streams); } template <> -inline void kk_get_free_total_memory( - size_t& free_mem, size_t& total_mem) { +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem) { kk_get_free_total_memory(free_mem, total_mem, 1); } #endif #ifdef KOKKOS_ENABLE_HIP template <> -inline void kk_get_free_total_memory(size_t& free_mem, - size_t& total_mem, - int n_streams) { +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem, int n_streams) { KOKKOSKERNELS_IMPL_HIP_SAFE_CALL(hipMemGetInfo(&free_mem, &total_mem)); free_mem /= n_streams; total_mem /= n_streams; } template <> -inline void kk_get_free_total_memory(size_t& free_mem, - size_t& total_mem, - int n_streams) { +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem, int n_streams) { kk_get_free_total_memory(free_mem, total_mem, n_streams); } template <> -inline void kk_get_free_total_memory(size_t& free_mem, - size_t& total_mem) { +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem) { kk_get_free_total_memory(free_mem, total_mem, 1); } template <> -inline void kk_get_free_total_memory( - size_t& free_mem, size_t& total_mem) { +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem) { kk_get_free_total_memory(free_mem, total_mem, 1); } #endif @@ -236,12 +207,11 @@ inline void kk_get_free_total_memory( // available. Also, we assume to query memory associated with the default queue. #if defined(KOKKOS_ENABLE_SYCL) && defined(KOKKOS_ARCH_INTEL_GPU) template <> -inline void kk_get_free_total_memory( - size_t& free_mem, size_t& total_mem, int n_streams) { +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem, + int n_streams) { sycl::queue queue; - sycl::device device = queue.get_device(); - auto level_zero_handle = - sycl::get_native(device); + sycl::device device = queue.get_device(); + auto level_zero_handle = sycl::get_native(device); uint32_t n_memory_modules = 0; zesDeviceEnumMemoryModules(level_zero_handle, &n_memory_modules, nullptr); @@ -255,8 +225,7 @@ inline void kk_get_free_total_memory( total_mem = 0; free_mem = 0; std::vector mem_handles(n_memory_modules); - zesDeviceEnumMemoryModules(level_zero_handle, &n_memory_modules, - mem_handles.data()); + zesDeviceEnumMemoryModules(level_zero_handle, &n_memory_modules, mem_handles.data()); for (auto& mem_handle : mem_handles) { zes_mem_properties_t memory_properties{ZES_STRUCTURE_TYPE_MEM_PROPERTIES}; @@ -274,38 +243,30 @@ inline void kk_get_free_total_memory( } template <> -inline void kk_get_free_total_memory( - size_t& free_mem, size_t& total_mem) { - kk_get_free_total_memory( - free_mem, total_mem, 1); +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem) { + kk_get_free_total_memory(free_mem, total_mem, 1); } template <> -inline void kk_get_free_total_memory( - size_t& free_mem, size_t& total_mem, int n_streams) { - kk_get_free_total_memory( - free_mem, total_mem, n_streams); +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem, + int n_streams) { + kk_get_free_total_memory(free_mem, total_mem, n_streams); } template <> -inline void kk_get_free_total_memory( - size_t& free_mem, size_t& total_mem) { - kk_get_free_total_memory( - free_mem, total_mem, 1); +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem) { + kk_get_free_total_memory(free_mem, total_mem, 1); } template <> -inline void kk_get_free_total_memory( - size_t& free_mem, size_t& total_mem, int n_streams) { - kk_get_free_total_memory( - free_mem, total_mem, n_streams); +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem, + int n_streams) { + kk_get_free_total_memory(free_mem, total_mem, n_streams); } template <> -inline void kk_get_free_total_memory( - size_t& free_mem, size_t& total_mem) { - kk_get_free_total_memory( - free_mem, total_mem, 1); +inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem) { + kk_get_free_total_memory(free_mem, total_mem, 1); } #endif @@ -325,8 +286,7 @@ inline int kk_get_max_vector_size() { } #endif -inline int kk_get_suggested_vector_size(const size_t nr, const size_t nnz, - const ExecSpaceType exec_space) { +inline int kk_get_suggested_vector_size(const size_t nr, const size_t nnz, const ExecSpaceType exec_space) { int suggested_vector_size_ = 1; int max_vector_size = 1; switch (exec_space) { @@ -360,17 +320,14 @@ inline int kk_get_suggested_vector_size(const size_t nr, const size_t nnz, } else { suggested_vector_size_ = 64; } - if (suggested_vector_size_ > max_vector_size) - suggested_vector_size_ = max_vector_size; + if (suggested_vector_size_ > max_vector_size) suggested_vector_size_ = max_vector_size; break; } return suggested_vector_size_; } -inline int kk_get_suggested_team_size(const int vector_size, - const ExecSpaceType exec_space) { - if (exec_space == Exec_CUDA || exec_space == Exec_HIP || - exec_space == Exec_SYCL) { +inline int kk_get_suggested_team_size(const int vector_size, const ExecSpaceType exec_space) { + if (exec_space == Exec_CUDA || exec_space == Exec_HIP || exec_space == Exec_SYCL) { // TODO: where this is used, tune the target value for // threads per block (but 256 is probably OK for CUDA and HIP) return 256 / vector_size; diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_Half.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_Half.hpp deleted file mode 100644 index c22646b5aac8..000000000000 --- a/packages/kokkos-kernels/common/src/KokkosKernels_Half.hpp +++ /dev/null @@ -1,65 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#if KOKKOS_VERSION < 40199 -#ifndef KOKKOSKERNELS_HALF_HPP -#define KOKKOSKERNELS_HALF_HPP - -#include "Kokkos_Core.hpp" - -namespace KokkosKernels { -namespace Experimental { -////////////// BEGIN FP16/binary16 limits ////////////// -#define KOKKOSKERNELS_IMPL_FP16_MAX 65504.0F // Maximum normalized number -#define KOKKOSKERNELS_IMPL_FP16_MIN \ - 0.000000059604645F // Minimum normalized positive half precision number -#define KOKKOSKERNELS_IMPL_FP16_RADIX \ - 2 // Value of the base of the exponent representation. TODO: on all archs? -#define KOKKOSKERNELS_IMPL_FP16_MANT_DIG \ - 15 // Number of digits in the matissa that can be represented without losing - // precision. TODO: Confirm this -#define KOKKOSKERNELS_IMPL_FP16_MIN_EXP \ - -14 // This is the smallest possible exponent value -#define KOKKOSKERNELS_IMPL_FP16_MAX_EXP \ - 15 // This is the largest possible exponent value -#define KOKKOSKERNELS_IMPL_FP16_SIGNIFICAND_BITS 10 -#define KOKKOSKERNELS_IMPL_FP16_EPSILON 0.0009765625F // 1/2^10 -#define KOKKOSKERNELS_IMPL_HUGE_VALH 0x7c00 // bits [10,14] set. -////////////// END FP16/binary16 limits ////////////// - -////////////// BEGIN BF16/float16 limits ////////////// -#define KOKKOSKERNELS_IMPL_BF16_MAX 3.38953139e38 // Maximum normalized number -#define KOKKOSKERNELS_IMPL_BF16_MIN \ - 1.1754494351e-38 // Minimum normalized positive bhalf number -#define KOKKOSKERNELS_IMPL_BF16_RADIX \ - 2 // Value of the base of the exponent representation. TODO: on all archs? -#define KOKKOSKERNELS_IMPL_BF16_MANT_DIG_MIN 2 -#define KOKKOSKERNELS_IMPL_BF16_MANT_DIG_MAX 3 -#define KOKKOSKERNELS_IMPL_BF16_MANT_DIG \ - KOKKOSKERNELS_IMPL_BF16_MANT_DIG_MIN // Number of digits in the matissa that - // can be represented without losing - // precision. -#define KOKKOSKERNELS_IMPL_BF16_MIN_EXP \ - -126 // This is the smallest possible exponent value -#define KOKKOSKERNELS_IMPL_BF16_MAX_EXP \ - 127 // This is the largest possible exponent value -#define KOKKOSKERNELS_IMPL_BF16_EPSILON 0.0078125F // 1/2^7 -////////////// END BF16/bfloat16 limits ////////////// - -} // namespace Experimental -} // namespace KokkosKernels -#endif // KOKKOSKERNELS_HALF_HPP -#endif // KOKKOS_VERSION < 40199 diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_HashmapAccumulator.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_HashmapAccumulator.hpp index 1085cec4af42..c57dfa83fd53 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_HashmapAccumulator.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_HashmapAccumulator.hpp @@ -36,8 +36,7 @@ struct HashOpType { struct pow2Modulo {}; }; -template +template /** * \brief HashmapAccumulator class * The use of this is described in the paper: @@ -96,13 +95,7 @@ struct HashmapAccumulator { * Assumption: hash_begins_ are all initialized to -1. */ KOKKOS_INLINE_FUNCTION - HashmapAccumulator() - : hash_begins(), - hash_nexts(), - keys(), - values(), - __max_value_size(), - __hashOpRHS(0) {} + HashmapAccumulator() : hash_begins(), hash_nexts(), keys(), values(), __max_value_size(), __hashOpRHS(0) {} /** * \brief parameterized constructor HashmapAccumulator @@ -120,9 +113,8 @@ struct HashmapAccumulator { * Assumption: hash_begins_ are all initialized to -1. */ KOKKOS_INLINE_FUNCTION - HashmapAccumulator(const size_type max_value_size_, const size_type hashOpRHS, - size_type *hash_begins_, size_type *hash_nexts_, - key_type *keys_, value_type *values_) + HashmapAccumulator(const size_type max_value_size_, const size_type hashOpRHS, size_type *hash_begins_, + size_type *hash_nexts_, key_type *keys_, value_type *values_) : hash_begins(hash_begins_), hash_nexts(hash_nexts_), keys(keys_), @@ -139,11 +131,8 @@ struct HashmapAccumulator { // Accumulation is OR operation. // Insertion is sequential, no race condition for the insertion. KOKKOS_INLINE_FUNCTION - int sequential_insert_into_hash_mergeOr_TrackHashes(key_type key, - value_type value, - size_type *used_size_, - size_type *used_hash_size, - size_type *used_hashes) { + int sequential_insert_into_hash_mergeOr_TrackHashes(key_type key, value_type value, size_type *used_size_, + size_type *used_hash_size, size_type *used_hashes) { size_type hash, i, my_index; if (key == -1) return __insert_success; @@ -175,10 +164,9 @@ struct HashmapAccumulator { // TODO: This function is for triangle counting. // Assume that there are 2 values for triangle count. KOKKOS_INLINE_FUNCTION - int sequential_insert_into_hash_mergeOr_TriangleCount_TrackHashes( - key_type key, value_type value, value_type *values2, - size_type *used_size_, size_type *used_hash_size, - size_type *used_hashes) { + int sequential_insert_into_hash_mergeOr_TriangleCount_TrackHashes(key_type key, value_type value, value_type *values2, + size_type *used_size_, size_type *used_hash_size, + size_type *used_hashes) { size_type hash, i, my_index; if (key == -1) return __insert_success; @@ -210,10 +198,10 @@ struct HashmapAccumulator { // this is used in slow triangle counting method. // L x Incidence KOKKOS_INLINE_FUNCTION - int sequential_insert_into_hash_mergeAnd_TriangleCount_TrackHashes( - key_type key, value_type value, value_type *values2, - size_type * /*used_size_*/, size_type * /*used_hash_size*/, - size_type * /*used_hashes*/) { + int sequential_insert_into_hash_mergeAnd_TriangleCount_TrackHashes(key_type key, value_type value, + value_type *values2, size_type * /*used_size_*/, + size_type * /*used_hash_size*/, + size_type * /*used_hashes*/) { size_type hash, i; if (key == -1) return __insert_success; @@ -234,8 +222,7 @@ struct HashmapAccumulator { // this is used in LxL or Incidence^T x L KOKKOS_INLINE_FUNCTION - value_type sequential_insert_into_hash_mergeAnd_TriangleCount_TrackHashes( - key_type key, value_type value) { + value_type sequential_insert_into_hash_mergeAnd_TriangleCount_TrackHashes(key_type key, value_type value) { size_type hash, i; if (key == -1) return __insert_success; @@ -254,10 +241,9 @@ struct HashmapAccumulator { // this is used in slow triangle counting method. // L x Incidence KOKKOS_INLINE_FUNCTION - int sequential_insert_into_hash_TriangleCount_TrackHashes( - key_type key, value_type value, value_type *values2, - size_type *used_size_, size_type *used_hash_size, - size_type *used_hashes) { + int sequential_insert_into_hash_TriangleCount_TrackHashes(key_type key, value_type value, value_type *values2, + size_type *used_size_, size_type *used_hash_size, + size_type *used_hashes) { size_type hash, my_index; if (key == -1) return __insert_success; @@ -283,11 +269,10 @@ struct HashmapAccumulator { // this is used in LxL or Incidence^T x L KOKKOS_INLINE_FUNCTION - int sequential_insert_into_hash_TriangleCount_TrackHashes( - key_type key, value_type value, size_type *used_size_, - size_type *used_hash_size, - size_type *used_hashes) // issue-508, TODO figure out what this - // "used_hashes" is for + int sequential_insert_into_hash_TriangleCount_TrackHashes(key_type key, value_type value, size_type *used_size_, + size_type *used_hash_size, + size_type *used_hashes) // issue-508, TODO figure out what + // this "used_hashes" is for { size_type hash, my_index; @@ -315,9 +300,8 @@ struct HashmapAccumulator { // Insertion is sequential, no race condition for the insertion. // the mergeadd used in the numeric of KKMEM. KOKKOS_INLINE_FUNCTION - void sequential_insert_into_hash_mergeAdd_TrackHashes( - key_type key, value_type value, size_type *used_size_, - size_type *used_hash_size, size_type *used_hashes) { + void sequential_insert_into_hash_mergeAdd_TrackHashes(key_type key, value_type value, size_type *used_size_, + size_type *used_hash_size, size_type *used_hashes) { size_type hash, i, my_index; if (key == -1) return; @@ -348,9 +332,7 @@ struct HashmapAccumulator { // used in the compression to count the sets. // also used in the symbolic of spgemm if no compression is applied. KOKKOS_INLINE_FUNCTION - int sequential_insert_into_hash_TrackHashes(key_type key, - size_type *used_size_, - size_type *used_hash_size, + int sequential_insert_into_hash_TrackHashes(key_type key, size_type *used_size_, size_type *used_hash_size, size_type *used_hashes) { size_type hash, i, my_index; @@ -383,10 +365,9 @@ struct HashmapAccumulator { // Insertion is simulteanous for the vector lanes of a thread. // used_size should be a shared pointer among the thread vectors KOKKOS_INLINE_FUNCTION - int vector_atomic_insert_into_hash_mergeAdd_TrackHashes( - const key_type key, const value_type value, - volatile size_type *used_size_, size_type *used_hash_size, - size_type *used_hashes) { + int vector_atomic_insert_into_hash_mergeAdd_TrackHashes(const key_type key, const value_type value, + volatile size_type *used_size_, size_type *used_hash_size, + size_type *used_hashes) { size_type hash, i, my_write_index, hashbeginning; if (key == -1) return __insert_success; @@ -438,11 +419,9 @@ struct HashmapAccumulator { hash_nexts[my_write_index] = hash_begins[hash]; #endif - hashbeginning = - Kokkos::atomic_exchange(hash_begins + hash, my_write_index); + hashbeginning = Kokkos::atomic_exchange(hash_begins + hash, my_write_index); if (hashbeginning == -1) { - used_hashes[Kokkos::atomic_fetch_add(used_hash_size, size_type(1))] = - hash; + used_hashes[Kokkos::atomic_fetch_add(used_hash_size, size_type(1))] = hash; } hash_nexts[my_write_index] = hashbeginning; return __insert_success; @@ -453,10 +432,9 @@ struct HashmapAccumulator { // except uses atomic addition on updating the value // necessary if duplicate key insertions happen simultaneously KOKKOS_INLINE_FUNCTION - int vector_atomic_insert_into_hash_mergeAtomicAdd_TrackHashes( - const key_type key, const value_type value, - volatile size_type *used_size_, size_type *used_hash_size, - size_type *used_hashes) { + int vector_atomic_insert_into_hash_mergeAtomicAdd_TrackHashes(const key_type key, const value_type value, + volatile size_type *used_size_, + size_type *used_hash_size, size_type *used_hashes) { size_type hash, i, my_write_index, hashbeginning; if (key == -1) return __insert_success; @@ -509,11 +487,9 @@ struct HashmapAccumulator { hash_nexts[my_write_index] = hash_begins[hash]; #endif - hashbeginning = - Kokkos::atomic_exchange(hash_begins + hash, my_write_index); + hashbeginning = Kokkos::atomic_exchange(hash_begins + hash, my_write_index); if (hashbeginning == -1) { - used_hashes[Kokkos::atomic_fetch_add(used_hash_size, size_type(1))] = - hash; + used_hashes[Kokkos::atomic_fetch_add(used_hash_size, size_type(1))] = hash; } hash_nexts[my_write_index] = hashbeginning; return __insert_success; @@ -521,9 +497,8 @@ struct HashmapAccumulator { } KOKKOS_INLINE_FUNCTION - int vector_atomic_insert_into_hash_mergeAdd_TrackHashes_no_list( - const key_type key, const value_type value, size_type *used_hash_size, - size_type *used_hashes) { + int vector_atomic_insert_into_hash_mergeAdd_TrackHashes_no_list(const key_type key, const value_type value, + size_type *used_hash_size, size_type *used_hashes) { size_type hash; if (key == -1) return __insert_success; @@ -541,11 +516,9 @@ struct HashmapAccumulator { Kokkos::atomic_add(values + hash, value); return __insert_success; } else if (keys[hash] == -1) { - if (Kokkos::atomic_compare_exchange_strong(keys + hash, -1, - key)) { + if (Kokkos::atomic_compare_exchange_strong(keys + hash, -1, key)) { // should only be here if we used a new hash - used_hashes[Kokkos::atomic_fetch_add(used_hash_size, - size_type(1))] = hash; + used_hashes[Kokkos::atomic_fetch_add(used_hash_size, size_type(1))] = hash; Kokkos::atomic_add(values + hash, value); return __insert_success; } @@ -565,11 +538,9 @@ struct HashmapAccumulator { // NOTE: this is an exact copy of vector_atmoic_insert_into_hash_mergeAdd from // https://github.com/kokkos/kokkos-kernels/blob/750fe24508a69ed4dba92bb4a9e17a6094b1a083/src/common/KokkosKernels_HashmapAccumulator.hpp#L442-L502 template - KOKKOS_INLINE_FUNCTION int - vector_atomic_insert_into_hash_mergeAdd_with_team_level_list_length( - const team_member_t & /* teamMember */, const int /* vector_size */, - size_type hash, const key_type key, const value_type value, - volatile size_type *used_size_, const size_type max_value_size_) { + KOKKOS_INLINE_FUNCTION int vector_atomic_insert_into_hash_mergeAdd_with_team_level_list_length( + const team_member_t & /* teamMember */, const int /* vector_size */, size_type hash, const key_type key, + const value_type value, volatile size_type *used_size_, const size_type max_value_size_) { // Cannot compute hash here due to impl_speed use-case // hash = __compute_hash(key, __hashOpRHS); if (key == -1) return __insert_success; @@ -591,8 +562,7 @@ struct HashmapAccumulator { if (used_size_[0] >= max_value_size_) { return __insert_full; } - size_type my_write_index = - Kokkos::atomic_fetch_add(used_size_, size_type(1)); + size_type my_write_index = Kokkos::atomic_fetch_add(used_size_, size_type(1)); if (my_write_index >= max_value_size_) { return __insert_full; @@ -630,8 +600,7 @@ struct HashmapAccumulator { // hashbeginning = hash_begins[hash] // hash_begins[hash] = my_write_index // hash_nexts[my_write_index] = hash_begins[hash] - size_type hashbeginning = - Kokkos::atomic_exchange(hash_begins + hash, my_write_index); + size_type hashbeginning = Kokkos::atomic_exchange(hash_begins + hash, my_write_index); hash_nexts[my_write_index] = hashbeginning; return __insert_success; } @@ -645,20 +614,17 @@ struct HashmapAccumulator { // Insertion is simulteanous for the vector lanes of a thread. // used_size should be a shared pointer among the thread vectors KOKKOS_INLINE_FUNCTION - int vector_atomic_insert_into_hash_mergeAdd(const key_type key, - const value_type value, + int vector_atomic_insert_into_hash_mergeAdd(const key_type key, const value_type value, volatile size_type *used_size_) { if (key == -1) return __insert_success; return vector_atomic_insert_into_hash_mergeAdd_with_team_level_list_length( - nullptr, 0, __compute_hash(key, __hashOpRHS), key, value, used_size_, - __max_value_size); + nullptr, 0, __compute_hash(key, __hashOpRHS), key, value, used_size_, __max_value_size); } // used in symbolic of kkmem if the compression is not applied. KOKKOS_INLINE_FUNCTION - int vector_atomic_insert_into_hash(const key_type &key, - volatile size_type *used_size_) { + int vector_atomic_insert_into_hash(const key_type &key, volatile size_type *used_size_) { size_type hash, i, my_write_index, hashbeginning; if (key == -1) return __insert_success; @@ -692,8 +658,7 @@ struct HashmapAccumulator { hash_nexts[my_write_index] = hash_begins[hash]; #endif - hashbeginning = - Kokkos::atomic_exchange(hash_begins + hash, my_write_index); + hashbeginning = Kokkos::atomic_exchange(hash_begins + hash, my_write_index); hash_nexts[my_write_index] = hashbeginning; return __insert_success; } @@ -706,8 +671,7 @@ struct HashmapAccumulator { // Insertion is simulteanous for the vector lanes of a thread. // used_size should be a shared pointer among the thread vectors KOKKOS_INLINE_FUNCTION - int vector_atomic_insert_into_hash_mergeOr(const key_type &key, - const value_type &value, + int vector_atomic_insert_into_hash_mergeOr(const key_type &key, const value_type &value, volatile size_type *used_size_) { size_type hash, i, my_write_index, hashbeginning; @@ -744,8 +708,7 @@ struct HashmapAccumulator { hash_nexts[my_write_index] = hash_begins[hash]; #endif - hashbeginning = - Kokkos::atomic_exchange(hash_begins + hash, my_write_index); + hashbeginning = Kokkos::atomic_exchange(hash_begins + hash, my_write_index); hash_nexts[my_write_index] = hashbeginning; return __insert_success; } @@ -758,10 +721,9 @@ struct HashmapAccumulator { // Insertion is simulteanous for the vector lanes of a thread. // used_size should be a shared pointer among the thread vectors KOKKOS_INLINE_FUNCTION - int vector_atomic_insert_into_hash_mergeOr_TrackHashes( - const key_type &key, const value_type &value, - volatile size_type *used_size_, size_type *used_hash_size, - size_type *used_hashes) { + int vector_atomic_insert_into_hash_mergeOr_TrackHashes(const key_type &key, const value_type &value, + volatile size_type *used_size_, size_type *used_hash_size, + size_type *used_hashes) { size_type hash, i, my_write_index, hashbeginning; if (key == -1) return __insert_success; @@ -797,11 +759,9 @@ struct HashmapAccumulator { hash_nexts[my_write_index] = hash_begins[hash]; #endif - hashbeginning = - Kokkos::atomic_exchange(hash_begins + hash, my_write_index); + hashbeginning = Kokkos::atomic_exchange(hash_begins + hash, my_write_index); if (hashbeginning == -1) { - used_hashes[Kokkos::atomic_fetch_add(used_hash_size, size_type(1))] = - hash; + used_hashes[Kokkos::atomic_fetch_add(used_hash_size, size_type(1))] = hash; } hash_nexts[my_write_index] = hashbeginning; return __insert_success; @@ -809,10 +769,8 @@ struct HashmapAccumulator { } KOKKOS_INLINE_FUNCTION - int vector_atomic_insert_into_hash_TrackHashes(const key_type &key, - volatile size_type *used_size_, - size_type *used_hash_size, - size_type *used_hashes) { + int vector_atomic_insert_into_hash_TrackHashes(const key_type &key, volatile size_type *used_size_, + size_type *used_hash_size, size_type *used_hashes) { size_type hash, i, my_write_index, hashbeginning; if (key == -1) return __insert_success; @@ -846,11 +804,9 @@ struct HashmapAccumulator { hash_nexts[my_write_index] = hash_begins[hash]; #endif - hashbeginning = - Kokkos::atomic_exchange(hash_begins + hash, my_write_index); + hashbeginning = Kokkos::atomic_exchange(hash_begins + hash, my_write_index); if (hashbeginning == -1) { - used_hashes[Kokkos::atomic_fetch_add(used_hash_size, size_type(1))] = - hash; + used_hashes[Kokkos::atomic_fetch_add(used_hash_size, size_type(1))] = hash; } hash_nexts[my_write_index] = hashbeginning; return __insert_success; @@ -863,11 +819,9 @@ struct HashmapAccumulator { static constexpr int __insert_success = 0; static constexpr int __insert_full = 1; - template ::value || - std::is_same::value, - std::size_t>::type = 0> + template ::value || + std::is_same::value, + std::size_t>::type = 0> KOKKOS_INLINE_FUNCTION int __compute_hash(size_type key, size_type bitmask) { size_type hash = key & bitmask; #ifdef HASHMAPACCUMULATOR_ASSERT_ENABLED @@ -877,9 +831,8 @@ struct HashmapAccumulator { return hash; } - template ::value, - std::size_t>::type = 0> + template ::value, std::size_t>::type = 0> KOKKOS_INLINE_FUNCTION int __compute_hash(size_type key, size_type divisor) { size_type hash = key % divisor; #ifdef HASHMAPACCUMULATOR_ASSERT_ENABLED diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_IOUtils.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_IOUtils.hpp index fd3e44db0991..eb44082a7495 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_IOUtils.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_IOUtils.hpp @@ -47,15 +47,13 @@ inline void getRandomBounds(double mag, Scalar &start, Scalar &end) { } template <> -inline void getRandomBounds(double mag, Kokkos::complex &start, - Kokkos::complex &end) { +inline void getRandomBounds(double mag, Kokkos::complex &start, Kokkos::complex &end) { start = Kokkos::complex(-mag, -mag); end = Kokkos::complex(mag, mag); } template <> -inline void getRandomBounds(double mag, Kokkos::complex &start, - Kokkos::complex &end) { +inline void getRandomBounds(double mag, Kokkos::complex &start, Kokkos::complex &end) { start = Kokkos::complex(-mag, -mag); end = Kokkos::complex(mag, mag); } @@ -98,9 +96,7 @@ inline size_t kk_get_file_size(const char *file) { } template -void buildEdgeListFromBinSrcTarg_undirected(const char *fnameSrc, - const char *fnameTarg, - size_t &numEdges, lno_t **srcs, +void buildEdgeListFromBinSrcTarg_undirected(const char *fnameSrc, const char *fnameTarg, size_t &numEdges, lno_t **srcs, lno_t **dst) { size_t srcFileSize = kk_get_file_size(fnameSrc); size_t trgFileSize = kk_get_file_size(fnameTarg); @@ -150,8 +146,7 @@ inline void kk_write_1Dview_to_file(idx_array_type view, const char *filename) { } template -inline void kk_read_1Dview_from_file(idx_array_type &view, - const char *filename) { +inline void kk_read_1Dview_from_file(idx_array_type &view, const char *filename) { typedef typename idx_array_type::HostMirror host_type; // typedef typename idx_array_type::size_type idx; host_type host_view = Kokkos::create_mirror_view(view); @@ -183,8 +178,7 @@ inline void kk_write_2Dview_to_file(idx_array_type view, const char *filename) { } template -inline void kk_read_2Dview_from_file(idx_array_type &view, - const char *filename) { +inline void kk_read_2Dview_from_file(idx_array_type &view, const char *filename) { typedef typename idx_array_type::HostMirror host_type; // typedef typename idx_array_type::size_type idx; host_type host_view = Kokkos::create_mirror_view(view); @@ -221,8 +215,7 @@ inline void kk_write_3Dview_to_file(idx_array_type view, const char *filename) { } template -inline void kk_read_3Dview_from_file(idx_array_type &view, - const char *filename) { +inline void kk_read_3Dview_from_file(idx_array_type &view, const char *filename) { typedef typename idx_array_type::HostMirror host_type; // typedef typename idx_array_type::size_type idx; host_type host_view = Kokkos::create_mirror_view(view); @@ -241,8 +234,7 @@ inline void kk_read_3Dview_from_file(idx_array_type &view, } template -[[deprecated]] void write_edgelist_bin(size_t ne, const idx *edge_begins, - const idx *edge_ends, const wt *ew, +[[deprecated]] void write_edgelist_bin(size_t ne, const idx *edge_begins, const idx *edge_ends, const wt *ew, const char *filename) { std::ofstream myFile(filename, std::ios::out | std::ios::binary); myFile.write((char *)&ne, sizeof(idx)); @@ -253,8 +245,7 @@ template } template -void read_edgelist_bin(idx *ne, idx **edge_begins, idx **edge_ends, wt **ew, - const char *filename) { +void read_edgelist_bin(idx *ne, idx **edge_begins, idx **edge_ends, wt **ew, const char *filename) { std::ifstream myFile(filename, std::ios::in | std::ios::binary); myFile.read((char *)ne, sizeof(idx)); @@ -269,8 +260,7 @@ void read_edgelist_bin(idx *ne, idx **edge_begins, idx **edge_ends, wt **ew, inline bool endswith(std::string const &fullString, std::string const &ending) { if (fullString.length() >= ending.length()) { - return (0 == fullString.compare(fullString.length() - ending.length(), - ending.length(), ending)); + return (0 == fullString.compare(fullString.length() - ending.length(), ending.length(), ending)); } else { return false; } diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_LowerBound.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_LowerBound.hpp index e091932453ff..f7a5ccef967c 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_LowerBound.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_LowerBound.hpp @@ -87,15 +87,11 @@ namespace Impl { At most view.size() predicate function calls */ -template > -KOKKOS_INLINE_FUNCTION typename ViewLike::size_type -lower_bound_sequential_thread( - const ViewLike &view, const typename ViewLike::non_const_value_type &value, - Pred pred = Pred()) { +template > +KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_sequential_thread( + const ViewLike &view, const typename ViewLike::non_const_value_type &value, Pred pred = Pred()) { using size_type = typename ViewLike::size_type; - static_assert(1 == ViewLike::rank, - "lower_bound_sequential_thread requires rank-1 views"); + static_assert(1 == ViewLike::rank, "lower_bound_sequential_thread requires rank-1 views"); size_type i = 0; while (i < view.size() && pred(view(i), value)) { @@ -116,14 +112,11 @@ lower_bound_sequential_thread( At most log2(view.size()) + 1 predicate function calls */ -template > +template > KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_binary_thread( - const ViewLike &view, const typename ViewLike::non_const_value_type &value, - Pred pred = Pred()) { + const ViewLike &view, const typename ViewLike::non_const_value_type &value, Pred pred = Pred()) { using size_type = typename ViewLike::size_type; - static_assert(1 == ViewLike::rank, - "lower_bound_binary_thread requires rank-1 views"); + static_assert(1 == ViewLike::rank, "lower_bound_binary_thread requires rank-1 views"); size_type lo = 0; size_type hi = view.size(); @@ -155,13 +148,10 @@ KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_binary_thread( This minimizes the calls to predicate: for view.size() >= 8, this does a binary search, otherwise, a linear search */ -template > +template > KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_thread( - const ViewLike &view, const typename ViewLike::non_const_value_type &value, - Pred pred = Pred()) { - static_assert(1 == ViewLike::rank, - "lower_bound_thread requires rank-1 views"); + const ViewLike &view, const typename ViewLike::non_const_value_type &value, Pred pred = Pred()) { + static_assert(1 == ViewLike::rank, "lower_bound_thread requires rank-1 views"); /* sequential search makes on average 0.5 * view.size memory accesses binary search makes log2(view.size)+1 accesses @@ -196,18 +186,14 @@ namespace Impl { Uses a single thread to call \c lower_bound_thread, and broadcasts that to all team members. */ -template > +template > KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_single_team( - const TeamMember &handle, const ViewLike &view, - const typename ViewLike::non_const_value_type &value, Pred pred = Pred()) { + const TeamMember &handle, const ViewLike &view, const typename ViewLike::non_const_value_type &value, + Pred pred = Pred()) { typename ViewLike::size_type idx; Kokkos::single( Kokkos::PerTeam(handle), - [&](typename ViewLike::size_type &lidx) { - lidx = KokkosKernels::lower_bound_thread(view, value, pred); - }, - idx); + [&](typename ViewLike::size_type &lidx) { lidx = KokkosKernels::lower_bound_thread(view, value, pred); }, idx); return idx; } @@ -229,16 +215,12 @@ KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_single_team( Apply pred(view(i), value) for i in [lo, hi) */ -template > +template > KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_sequential_team( - const TeamMember &handle, const ViewLike &view, - const typename ViewLike::non_const_value_type &value, - typename ViewLike::size_type lo, typename ViewLike::size_type hi, - Pred pred = Pred()) { + const TeamMember &handle, const ViewLike &view, const typename ViewLike::non_const_value_type &value, + typename ViewLike::size_type lo, typename ViewLike::size_type hi, Pred pred = Pred()) { using size_type = typename ViewLike::size_type; - static_assert(1 == ViewLike::rank, - "lower_bound_sequential_team requires rank-1 views"); + static_assert(1 == ViewLike::rank, "lower_bound_sequential_team requires rank-1 views"); static_assert(is_iota_v || Kokkos::is_view::value, "lower_bound_sequential_team requires a " "KokkosKernels::Impl::Iota or a Kokkos::View"); @@ -251,7 +233,7 @@ KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_sequential_team( Kokkos::TeamThreadRange(handle, lo, hi), [&](const size_type &i, size_type &li) { li = KOKKOSKERNELS_MACRO_MIN(li, hi); - if (i < li) { // no need to search higher than the smallest so far + if (i < li) { // no need to search higher than the smallest so far if (!pred(view(i), value)) { // look for the smallest index that does // not satisfy li = i; @@ -276,11 +258,10 @@ KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_sequential_team( \returns To all team members, the smallest i for which pred(view(i), value) is false or view.size() if no such value */ -template > +template > KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_sequential_team( - const TeamMember &handle, const ViewLike &view, - const typename ViewLike::non_const_value_type &value, Pred pred = Pred()) { + const TeamMember &handle, const ViewLike &view, const typename ViewLike::non_const_value_type &value, + Pred pred = Pred()) { return lower_bound_sequential_team(handle, view, value, 0, view.size(), pred); } @@ -310,10 +291,9 @@ struct Range { /// \brief maximizes the lower bound, and minimizes the upper bound of a Range template struct RangeReducer { - using reducer = RangeReducer; - using value_type = Range; - using result_view_type = - Kokkos::View *, Space, Kokkos::MemoryUnmanaged>; + using reducer = RangeReducer; + using value_type = Range; + using result_view_type = Kokkos::View *, Space, Kokkos::MemoryUnmanaged>; private: value_type &value; @@ -356,13 +336,11 @@ struct RangeReducer { false Once there are fewer values left than threads in the team, switch to team sequential search */ -template > +template > KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_kary_team( - const TeamMember &handle, const ViewLike &view, - const typename ViewLike::non_const_value_type &value, Pred pred = Pred()) { - static_assert(1 == ViewLike::rank, - "lower_bound_kary_team requires rank-1 views"); + const TeamMember &handle, const ViewLike &view, const typename ViewLike::non_const_value_type &value, + Pred pred = Pred()) { + static_assert(1 == ViewLike::rank, "lower_bound_kary_team requires rank-1 views"); static_assert(is_iota_v || Kokkos::is_view::value, "lower_bound_kary_team requires a " "KokkosKernels::Impl::Iota or a Kokkos::View"); @@ -378,9 +356,8 @@ KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_kary_team( } // otherwise, split the region up among threads - size_type mid = - lo + (hi - lo) * (handle.team_rank() + 1) / (handle.team_size() + 1); - auto ve = view(mid); + size_type mid = lo + (hi - lo) * (handle.team_rank() + 1) / (handle.team_size() + 1); + auto ve = view(mid); // reduce across threads to figure out where the new search bounds are // if a thread satisfies the predicate, the first element that does not @@ -433,14 +410,12 @@ KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_kary_team( Pred should be a binary function comparing two `typename View::non_const_value_type` */ -template > +template > KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_team( - const TeamMember &handle, const ViewLike &view, - const typename ViewLike::non_const_value_type &value, Pred pred = Pred()) { + const TeamMember &handle, const ViewLike &view, const typename ViewLike::non_const_value_type &value, + Pred pred = Pred()) { static_assert(1 == ViewLike::rank, "lower_bound_team requires rank-1 views"); - static_assert(KokkosKernels::Impl::is_iota_v || - Kokkos::is_view::value, + static_assert(KokkosKernels::Impl::is_iota_v || Kokkos::is_view::value, "lower_bound_team requires a " "KokkosKernels::Impl::Iota or a Kokkos::View"); diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_Macros.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_Macros.hpp index 04234a5ce270..6c4093ca10ef 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_Macros.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_Macros.hpp @@ -34,15 +34,13 @@ // is enabled, since in that case, Kokkos::ThreadVectorRange should be used // instead for SIMD parallel loops. -#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) && \ - defined(KOKKOS_ENABLE_OPENMP) +#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) && defined(KOKKOS_ENABLE_OPENMP) // For clang OpenMP support, see // https://clang.llvm.org/docs/OpenMPSupport.html#id1 #if defined(KOKKOS_COMPILER_GNU) || defined(KOKKOS_COMPILER_CLANG) // GCC 4.8.5 and older do not support #pragma omp simd // Do not enable when using GCC 7.2.0 or 7.3.0 + C++17 due to a bug in gcc -#if (KOKKOS_COMPILER_GNU > 485) && \ - !(KOKKOS_COMPILER_GNU == 720 && defined(KOKKOS_ENABLE_CXX17)) && \ +#if (KOKKOS_COMPILER_GNU > 485) && !(KOKKOS_COMPILER_GNU == 720 && defined(KOKKOS_ENABLE_CXX17)) && \ !(KOKKOS_COMPILER_GNU == 730 && defined(KOKKOS_ENABLE_CXX17)) #define KOKKOSKERNELS_ENABLE_OMP_SIMD #endif @@ -99,9 +97,8 @@ // define KOKKOSKERNELS_CUDA_INDEPENDENT_THREADS if we are targeting a CUDA // architecture with "independent thread scheduling" (Volta70 and up). This // requires some extra logic in HashmapAccumulator to avoid data races. -#if defined(KOKKOS_ARCH_VOLTA) || defined(KOKKOS_ARCH_TURING75) || \ - defined(KOKKOS_ARCH_AMPERE) || defined(KOKKOS_ARCH_ADA89) || \ - defined(KOKKOS_ARCH_HOPPER) +#if defined(KOKKOS_ARCH_VOLTA) || defined(KOKKOS_ARCH_TURING75) || defined(KOKKOS_ARCH_AMPERE) || \ + defined(KOKKOS_ARCH_ADA89) || defined(KOKKOS_ARCH_HOPPER) #define KOKKOSKERNELS_CUDA_INDEPENDENT_THREADS #endif diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_Predicates.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_Predicates.hpp index a741d1353a23..f3bc6f2b2c28 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_Predicates.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_Predicates.hpp @@ -32,17 +32,14 @@ namespace KokkosKernels { template struct GT { using value_type = T; - static_assert(!Kokkos::ArithTraits::is_complex, - "Please define custom predicates for ordering complex types"); + static_assert(!Kokkos::ArithTraits::is_complex, "Please define custom predicates for ordering complex types"); /** * @brief Return true if a is greater than b * @param a First value to be compared * @param b Second value to be compared */ - KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, - const value_type &b) const - noexcept { + KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, const value_type &b) const noexcept { return a > b; } }; @@ -53,13 +50,10 @@ struct GT { template struct GTE { using value_type = T; - static_assert(!Kokkos::ArithTraits::is_complex, - "Please define custom predicates for ordering complex types"); + static_assert(!Kokkos::ArithTraits::is_complex, "Please define custom predicates for ordering complex types"); /// \brief return a >= b - KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, - const value_type &b) const - noexcept { + KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, const value_type &b) const noexcept { return a >= b; } }; @@ -70,13 +64,10 @@ struct GTE { template struct LT { using value_type = T; - static_assert(!Kokkos::ArithTraits::is_complex, - "Please define custom predicates for ordering complex types"); + static_assert(!Kokkos::ArithTraits::is_complex, "Please define custom predicates for ordering complex types"); /// \brief return a < b - KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, - const value_type &b) const - noexcept { + KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, const value_type &b) const noexcept { return a < b; } }; @@ -87,13 +78,10 @@ struct LT { template struct LTE { using value_type = T; - static_assert(!Kokkos::ArithTraits::is_complex, - "Please define custom predicates for ordering complex types"); + static_assert(!Kokkos::ArithTraits::is_complex, "Please define custom predicates for ordering complex types"); /// \brief return a <= b - KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, - const value_type &b) const - noexcept { + KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, const value_type &b) const noexcept { return a <= b; } }; @@ -106,10 +94,7 @@ struct Equal { using value_type = T; /// \brief return a == b - KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, - const value_type &b) const { - return a == b; - } + KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, const value_type &b) const { return a == b; } }; /** @@ -133,8 +118,7 @@ struct Neg { * @param b Second value to be compared by the predicate * @return Boolean inverse of the result of the predicate applied to a and b */ - KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, - const value_type &b) const { + KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, const value_type &b) const { return !pred_(a, b); } @@ -153,8 +137,7 @@ struct Refl { constexpr Refl(const Pred &pred) : pred_(pred) {} /// \brief return the underlying binary predicate with reversed arguments - KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, - const value_type &b) const { + KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, const value_type &b) const { return pred_(b, a); } diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_PrintConfiguration.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_PrintConfiguration.hpp index c2e3a5187f3d..587021091281 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_PrintConfiguration.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_PrintConfiguration.hpp @@ -37,8 +37,7 @@ inline void print_cublas_version_if_enabled(std::ostream& os) { inline void print_cusparse_version_if_enabled(std::ostream& os) { #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE os << " " - << "KOKKOSKERNELS_ENABLE_TPL_CUSPARSE: " << cusparse_version_string() - << "\n"; + << "KOKKOSKERNELS_ENABLE_TPL_CUSPARSE: " << cusparse_version_string() << "\n"; #else os << " " << "KOKKOSKERNELS_ENABLE_TPL_CUSPARSE: no\n"; @@ -48,8 +47,7 @@ inline void print_cusparse_version_if_enabled(std::ostream& os) { inline void print_cusolver_version_if_enabled(std::ostream& os) { #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER os << " " - << "KOKKOSKERNELS_ENABLE_TPL_CUSOLVER: " << cusolver_version_string() - << "\n"; + << "KOKKOSKERNELS_ENABLE_TPL_CUSOLVER: " << cusolver_version_string() << "\n"; #else os << " " << "KOKKOSKERNELS_ENABLE_TPL_CUSOLVER: no\n"; @@ -156,9 +154,8 @@ inline void print_version(std::ostream& os) { // KOKKOSKERNELS_VERSION is used because MAJOR, MINOR and PATCH macros // are not available in Kernels os << " " - << "KokkosKernels Version: " << KOKKOSKERNELS_VERSION_MAJOR << "." - << KOKKOSKERNELS_VERSION_MINOR << "." << KOKKOSKERNELS_VERSION_PATCH - << '\n'; + << "KokkosKernels Version: " << KOKKOSKERNELS_VERSION_MAJOR << "." << KOKKOSKERNELS_VERSION_MINOR << "." + << KOKKOSKERNELS_VERSION_PATCH << '\n'; } } // namespace Impl diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_PrintUtils.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_PrintUtils.hpp index 74b32c793a66..b4817022fc55 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_PrintUtils.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_PrintUtils.hpp @@ -27,13 +27,11 @@ template struct Histogram { in_lno_view_t inview; out_lno_view_t outview; - Histogram(in_lno_view_t inview_, out_lno_view_t outview_) - : inview(inview_), outview(outview_) {} + Histogram(in_lno_view_t inview_, out_lno_view_t outview_) : inview(inview_), outview(outview_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_t& ii) const { - typedef typename std::remove_reference::type - atomic_incr_type; + typedef typename std::remove_reference::type atomic_incr_type; Kokkos::atomic_fetch_add(&(outview(inview(ii))), atomic_incr_type(1)); } }; @@ -47,13 +45,11 @@ struct Histogram { * them with 0, and size must be big enough to hold all values in input view. */ template -inline void kk_get_histogram( - typename in_lno_view_t::size_type in_elements, in_lno_view_t in_view, - out_lno_view_t histogram /*must be initialized with 0s*/) { +inline void kk_get_histogram(typename in_lno_view_t::size_type in_elements, in_lno_view_t in_view, + out_lno_view_t histogram /*must be initialized with 0s*/) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_for( - "KokkosKernels::Common::GetHistogram", my_exec_space(0, in_elements), - Histogram(in_view, histogram)); + Kokkos::parallel_for("KokkosKernels::Common::GetHistogram", my_exec_space(0, in_elements), + Histogram(in_view, histogram)); MyExecSpace().fence(); } @@ -68,9 +64,9 @@ inline void kk_get_histogram( * pritned. This parameter is not used if print_all is set to true. */ template -inline std::enable_if_t kk_print_1Dview( - std::ostream& os, idx_array_type view, bool print_all = false, - const char* sep = " ", size_t print_size = 40) { +inline std::enable_if_t kk_print_1Dview(std::ostream& os, idx_array_type view, + bool print_all = false, const char* sep = " ", + size_t print_size = 40) { typedef typename idx_array_type::HostMirror host_type; typedef typename idx_array_type::size_type idx; host_type host_view = Kokkos::create_mirror_view(view); @@ -95,12 +91,11 @@ inline std::enable_if_t kk_print_1Dview( * rank-2 vectors same like rank-1 vectors and prints multi-vector dimensions. */ template -inline std::enable_if_t= 2> kk_print_1Dview( - std::ostream& os, idx_array_type view, bool print_all = false, - const char* sep = " ", size_t print_size = 40) { +inline std::enable_if_t= 2> kk_print_1Dview(std::ostream& os, idx_array_type view, + bool print_all = false, const char* sep = " ", + size_t print_size = 40) { if (idx_array_type::rank == 2 && view.extent(1) == 1) { - kk_print_1Dview(os, subview(view, Kokkos::ALL, 0), print_all, sep, - print_size); + kk_print_1Dview(os, subview(view, Kokkos::ALL, 0), print_all, sep, print_size); return; } os << "[" << view.extent(0); @@ -120,8 +115,7 @@ inline std::enable_if_t= 2> kk_print_1Dview( * This interface is provided for backwards compatiblity. */ template -inline void kk_print_1Dview(idx_array_type view, bool print_all = false, - size_t print_size = 40) { +inline void kk_print_1Dview(idx_array_type view, bool print_all = false, size_t print_size = 40) { kk_print_1Dview(std::cout, view, print_all, " ", print_size); } diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_SimpleUtils.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_SimpleUtils.hpp index 60bdd097e17a..0ae29a2f50e0 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_SimpleUtils.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_SimpleUtils.hpp @@ -21,8 +21,7 @@ #define KOKKOSKERNELS_MACRO_MIN(x, y) ((x) < (y) ? (x) : (y)) #define KOKKOSKERNELS_MACRO_MAX(x, y) ((x) < (y) ? (y) : (x)) -#define KOKKOSKERNELS_MACRO_ABS(x) \ - Kokkos::ArithTraits::type>::abs(x) +#define KOKKOSKERNELS_MACRO_ABS(x) Kokkos::ArithTraits::type>::abs(x) namespace KokkosKernels { @@ -53,8 +52,7 @@ struct ExclusiveParallelPrefixSum { KOKKOS_INLINE_FUNCTION void operator()(const size_t ii, value_type &update, const bool final) const { - value_type val = - (ii == array_sum.extent(0) - 1) ? value_type(0) : array_sum(ii); + value_type val = (ii == array_sum.extent(0) - 1) ? value_type(0) : array_sum(ii); if (final) { array_sum(ii) = value_type(update); } @@ -85,12 +83,10 @@ struct InclusiveParallelPrefixSum { * \param arr: the array for which the prefix sum will be performed. */ template -inline void kk_exclusive_parallel_prefix_sum( - const MyExecSpace &exec, typename view_t::value_type num_elements, - view_t arr) { +inline void kk_exclusive_parallel_prefix_sum(const MyExecSpace &exec, typename view_t::value_type num_elements, + view_t arr) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_scan("KokkosKernels::Common::PrefixSum", - my_exec_space(exec, 0, num_elements), + Kokkos::parallel_scan("KokkosKernels::Common::PrefixSum", my_exec_space(exec, 0, num_elements), ExclusiveParallelPrefixSum(arr)); } @@ -101,8 +97,7 @@ inline void kk_exclusive_parallel_prefix_sum( * \param arr: the array for which the prefix sum will be performed. */ template -inline void kk_exclusive_parallel_prefix_sum( - typename view_t::value_type num_elements, view_t arr) { +inline void kk_exclusive_parallel_prefix_sum(typename view_t::value_type num_elements, view_t arr) { kk_exclusive_parallel_prefix_sum(MyExecSpace(), num_elements, arr); } @@ -117,12 +112,10 @@ inline void kk_exclusive_parallel_prefix_sum( * prefix sum. */ template -inline void kk_exclusive_parallel_prefix_sum( - const MyExecSpace &exec, typename view_t::value_type num_elements, - view_t arr, typename view_t::non_const_value_type &finalSum) { +inline void kk_exclusive_parallel_prefix_sum(const MyExecSpace &exec, typename view_t::value_type num_elements, + view_t arr, typename view_t::non_const_value_type &finalSum) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_scan("KokkosKernels::Common::PrefixSum", - my_exec_space(exec, 0, num_elements), + Kokkos::parallel_scan("KokkosKernels::Common::PrefixSum", my_exec_space(exec, 0, num_elements), ExclusiveParallelPrefixSum(arr), finalSum); } @@ -136,9 +129,8 @@ inline void kk_exclusive_parallel_prefix_sum( * prefix sum. */ template -inline void kk_exclusive_parallel_prefix_sum( - typename view_t::value_type num_elements, view_t arr, - typename view_t::non_const_value_type &finalSum) { +inline void kk_exclusive_parallel_prefix_sum(typename view_t::value_type num_elements, view_t arr, + typename view_t::non_const_value_type &finalSum) { kk_exclusive_parallel_prefix_sum(MyExecSpace(), num_elements, arr, finalSum); } @@ -150,13 +142,10 @@ inline void kk_exclusive_parallel_prefix_sum( /// \param arr: the array for which the prefix sum will be performed. /// template -void kk_inclusive_parallel_prefix_sum( - MyExecSpace my_exec_space, - typename forward_array_type::value_type num_elements, - forward_array_type arr) { +void kk_inclusive_parallel_prefix_sum(MyExecSpace my_exec_space, typename forward_array_type::value_type num_elements, + forward_array_type arr) { typedef Kokkos::RangePolicy range_policy_t; - Kokkos::parallel_scan("KokkosKernels::Common::PrefixSum", - range_policy_t(my_exec_space, 0, num_elements), + Kokkos::parallel_scan("KokkosKernels::Common::PrefixSum", range_policy_t(my_exec_space, 0, num_elements), InclusiveParallelPrefixSum(arr)); } @@ -167,9 +156,7 @@ void kk_inclusive_parallel_prefix_sum( /// \param arr: the array for which the prefix sum will be performed. /// template -void kk_inclusive_parallel_prefix_sum( - typename forward_array_type::value_type num_elements, - forward_array_type arr) { +void kk_inclusive_parallel_prefix_sum(typename forward_array_type::value_type num_elements, forward_array_type arr) { MyExecSpace my_exec_space; return kk_inclusive_parallel_prefix_sum(my_exec_space, num_elements, arr); } @@ -180,9 +167,7 @@ struct ReductionFunctor { ReductionFunctor(view_t arr_) : array_sum(arr_) {} KOKKOS_INLINE_FUNCTION - void operator()(const size_t ii, typename view_t::value_type &update) const { - update += array_sum(ii); - } + void operator()(const size_t ii, typename view_t::value_type &update) const { update += array_sum(ii); } }; template @@ -191,55 +176,44 @@ struct ReductionFunctor2 { ReductionFunctor2(view_t arr_) : array_sum(arr_) {} KOKKOS_INLINE_FUNCTION - void operator()(const size_t ii, size_t &update) const { - update += array_sum(ii); - } + void operator()(const size_t ii, size_t &update) const { update += array_sum(ii); } }; template struct DiffReductionFunctor { view_t array_begins; view2_t array_ends; - DiffReductionFunctor(view_t begins, view2_t ends) - : array_begins(begins), array_ends(ends) {} + DiffReductionFunctor(view_t begins, view2_t ends) : array_begins(begins), array_ends(ends) {} KOKKOS_INLINE_FUNCTION - void operator()(const size_t ii, - typename view_t::non_const_value_type &update) const { + void operator()(const size_t ii, typename view_t::non_const_value_type &update) const { update += (array_ends(ii) - array_begins(ii)); } }; template -inline void kk_reduce_diff_view( - size_t num_elements, view_t smaller, view2_t bigger, - typename view_t::non_const_value_type &reduction) { +inline void kk_reduce_diff_view(size_t num_elements, view_t smaller, view2_t bigger, + typename view_t::non_const_value_type &reduction) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_reduce( - "KokkosKernels::Common::ReduceDiffView", my_exec_space(0, num_elements), - DiffReductionFunctor(smaller, bigger), reduction); + Kokkos::parallel_reduce("KokkosKernels::Common::ReduceDiffView", my_exec_space(0, num_elements), + DiffReductionFunctor(smaller, bigger), reduction); } template struct DiffReductionFunctorP { const it *array_begins; const it *array_ends; - DiffReductionFunctorP(const it *begins, const it *ends) - : array_begins(begins), array_ends(ends) {} + DiffReductionFunctorP(const it *begins, const it *ends) : array_begins(begins), array_ends(ends) {} KOKKOS_INLINE_FUNCTION - void operator()(const size_t ii, it &update) const { - update += (array_ends[ii] - array_begins[ii]); - } + void operator()(const size_t ii, it &update) const { update += (array_ends[ii] - array_begins[ii]); } }; template -inline void kkp_reduce_diff_view(const size_t num_elements, const it *smaller, - const it *bigger, it &reduction) { +inline void kkp_reduce_diff_view(const size_t num_elements, const it *smaller, const it *bigger, it &reduction) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_reduce( - "KokkosKernels::Common::ReduceDiffView", my_exec_space(0, num_elements), - DiffReductionFunctorP(smaller, bigger), reduction); + Kokkos::parallel_reduce("KokkosKernels::Common::ReduceDiffView", my_exec_space(0, num_elements), + DiffReductionFunctorP(smaller, bigger), reduction); } /*** @@ -249,33 +223,27 @@ inline void kkp_reduce_diff_view(const size_t num_elements, const it *smaller, * \param arr: the array for which the prefix sum will be performed. */ template -inline void kk_reduce_view(size_t num_elements, view_t arr, - typename view_t::value_type &reduction) { +inline void kk_reduce_view(size_t num_elements, view_t arr, typename view_t::value_type &reduction) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_reduce("KokkosKernels::Common::ReduceView", - my_exec_space(0, num_elements), + Kokkos::parallel_reduce("KokkosKernels::Common::ReduceView", my_exec_space(0, num_elements), ReductionFunctor(arr), reduction); } template -inline void kk_reduce_view2(size_t num_elements, view_t arr, - size_t &reduction) { +inline void kk_reduce_view2(size_t num_elements, view_t arr, size_t &reduction) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_reduce("KokkosKernels::Common::ReduceView2", - my_exec_space(0, num_elements), + Kokkos::parallel_reduce("KokkosKernels::Common::ReduceView2", my_exec_space(0, num_elements), ReductionFunctor2(arr), reduction); } template ::mag_type> + typename eps_type = typename Kokkos::ArithTraits::mag_type> struct IsIdenticalFunctor { view_type1 view1; view_type2 view2; eps_type eps; - IsIdenticalFunctor(view_type1 view1_, view_type2 view2_, eps_type eps_) - : view1(view1_), view2(view2_), eps(eps_) {} + IsIdenticalFunctor(view_type1 view1_, view_type2 view2_, eps_type eps_) : view1(view1_), view2(view2_), eps(eps_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_t &i, size_t &is_equal) const { @@ -290,8 +258,7 @@ struct IsIdenticalFunctor { } }; -template +template bool kk_is_identical_view(view_type1 view1, view_type2 view2, eps_type eps) { if (view1.extent(0) != view2.extent(0)) { return false; @@ -301,10 +268,8 @@ bool kk_is_identical_view(view_type1 view1, view_type2 view2, eps_type eps) { typedef Kokkos::RangePolicy my_exec_space; size_t issame = 0; - Kokkos::parallel_reduce( - "KokkosKernels::Common::IsIdenticalView", my_exec_space(0, num_elements), - IsIdenticalFunctor(view1, view2, eps), - issame); + Kokkos::parallel_reduce("KokkosKernels::Common::IsIdenticalView", my_exec_space(0, num_elements), + IsIdenticalFunctor(view1, view2, eps), issame); MyExecSpace().fence(); if (issame > 0) { return false; @@ -314,15 +279,13 @@ bool kk_is_identical_view(view_type1 view1, view_type2 view2, eps_type eps) { } template ::mag_type> + typename eps_type = typename Kokkos::ArithTraits::mag_type> struct IsRelativelyIdenticalFunctor { view_type1 view1; view_type2 view2; eps_type eps; - IsRelativelyIdenticalFunctor(view_type1 view1_, view_type2 view2_, - eps_type eps_) + IsRelativelyIdenticalFunctor(view_type1 view1_, view_type2 view2_, eps_type eps_) : view1(view1_), view2(view2_), eps(eps_) {} KOKKOS_INLINE_FUNCTION @@ -333,35 +296,22 @@ struct IsRelativelyIdenticalFunctor { typedef Kokkos::ArithTraits KATM; mag_type val_diff = KATM::zero(); - if (KAT::abs(view1(i)) > mag_type(eps) || - KAT::abs(view2(i)) > mag_type(eps)) { - val_diff = KAT::abs(view1(i) - view2(i)) / - (KAT::abs(view1(i)) + KAT::abs(view2(i))); + if (KAT::abs(view1(i)) > mag_type(eps) || KAT::abs(view2(i)) > mag_type(eps)) { + val_diff = KAT::abs(view1(i) - view2(i)) / (KAT::abs(view1(i)) + KAT::abs(view2(i))); } if (val_diff > mag_type(eps)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "Values at index %d, %.6f + %.6fi and %.6f + %.6fi, differ too much " - "(eps = %e)\n", - (int)i, KAT::real(view1(i)), KAT::imag(view1(i)), KAT::real(view2(i)), - KAT::imag(view2(i)), eps); -#else Kokkos::printf( "Values at index %d, %.6f + %.6fi and %.6f + %.6fi, differ too much " - "(eps = %e)\n", - (int)i, KAT::real(view1(i)), KAT::imag(view1(i)), KAT::real(view2(i)), - KAT::imag(view2(i)), eps); -#endif + "(eps = %e, rel err = %e)\n", + (int)i, KAT::real(view1(i)), KAT::imag(view1(i)), KAT::real(view2(i)), KAT::imag(view2(i)), eps, val_diff); num_diffs++; } } }; -template -bool kk_is_relatively_identical_view(view_type1 view1, view_type2 view2, - eps_type eps) { +template +bool kk_is_relatively_identical_view(view_type1 view1, view_type2 view2, eps_type eps) { if (view1.extent(0) != view2.extent(0)) { return false; } @@ -370,12 +320,9 @@ bool kk_is_relatively_identical_view(view_type1 view1, view_type2 view2, typedef Kokkos::RangePolicy my_exec_space; size_t numDifferences = 0; - Kokkos::parallel_reduce( - "KokkosKernels::Common::IsRelativelyIdenticalView", - my_exec_space(0, num_elements), - IsRelativelyIdenticalFunctor( - view1, view2, eps), - numDifferences); + Kokkos::parallel_reduce("KokkosKernels::Common::IsRelativelyIdenticalView", my_exec_space(0, num_elements), + IsRelativelyIdenticalFunctor(view1, view2, eps), + numDifferences); return numDifferences == 0; } @@ -385,8 +332,7 @@ struct ReduceMaxFunctor { typedef typename view_type::non_const_value_type value_type; const value_type min_val; ReduceMaxFunctor(view_type view_to_reduce_) - : view_to_reduce(view_to_reduce_), - min_val((std::numeric_limits::lowest())) {} + : view_to_reduce(view_to_reduce_), min_val((std::numeric_limits::lowest())) {} KOKKOS_INLINE_FUNCTION void operator()(const size_t &i, value_type &max_reduction) const { value_type val = view_to_reduce(i); @@ -412,28 +358,24 @@ struct ReduceMaxFunctor { }; template -void kk_view_reduce_max( - size_t num_elements, view_type view_to_reduce, - typename view_type::non_const_value_type &max_reduction) { +void kk_view_reduce_max(size_t num_elements, view_type view_to_reduce, + typename view_type::non_const_value_type &max_reduction) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_reduce( - "KokkosKernels::Common::ReduceMax", my_exec_space(0, num_elements), - ReduceMaxFunctor(view_to_reduce), max_reduction); + Kokkos::parallel_reduce("KokkosKernels::Common::ReduceMax", my_exec_space(0, num_elements), + ReduceMaxFunctor(view_to_reduce), max_reduction); } // xorshift hash/pseudorandom function (supported for 32- and 64-bit integer // types only) template KOKKOS_FORCEINLINE_FUNCTION Value xorshiftHash(Value v) { - static_assert(std::is_unsigned::value, - "xorshiftHash: value must be an unsigned integer type"); + static_assert(std::is_unsigned::value, "xorshiftHash: value must be an unsigned integer type"); uint64_t x = v; x ^= x >> 12; x ^= x << 25; x ^= x >> 27; - return std::is_same::value - ? static_cast((x * 2685821657736338717ULL - 1) >> 16) - : static_cast(x * 2685821657736338717ULL - 1); + return std::is_same::value ? static_cast((x * 2685821657736338717ULL - 1) >> 16) + : static_cast(x * 2685821657736338717ULL - 1); } struct ViewHashFunctor { @@ -466,16 +408,14 @@ uint32_t hashView(const View &v) { // but it's not defined on Intel 19 (with GCC 7.2.0 standard library). // So just check if it's available before using. #ifdef __cpp_lib_has_unique_object_representations - static_assert(std::has_unique_object_representations< - typename View::non_const_value_type>::value, + static_assert(std::has_unique_object_representations::value, "KokkosKernels::Impl::hashView: the view's element type must " "not have any padding bytes."); #endif size_t nbytes = v.span() * sizeof(typename View::value_type); uint32_t h; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, nbytes), - ViewHashFunctor(reinterpret_cast(v.data())), h); + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, nbytes), + ViewHashFunctor(reinterpret_cast(v.data())), h); return h; } @@ -484,18 +424,15 @@ struct SequentialFillFunctor { using size_type = typename V::size_type; using val_type = typename V::non_const_value_type; SequentialFillFunctor(const V &v_, val_type start_) : v(v_), start(start_) {} - KOKKOS_INLINE_FUNCTION void operator()(size_type i) const { - v(i) = start + (val_type)i; - } + KOKKOS_INLINE_FUNCTION void operator()(size_type i) const { v(i) = start + (val_type)i; } V v; val_type start; }; template void sequential_fill(const V &v, typename V::non_const_value_type start = 0) { - Kokkos::parallel_for( - Kokkos::RangePolicy(0, v.extent(0)), - SequentialFillFunctor(v, start)); + Kokkos::parallel_for(Kokkos::RangePolicy(0, v.extent(0)), + SequentialFillFunctor(v, start)); } } // namespace Impl diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_Sorting.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_Sorting.hpp index 20ce6deaa22d..f91f11c16498 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_Sorting.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_Sorting.hpp @@ -17,7 +17,7 @@ #define _KOKKOSKERNELS_SORTING_HPP #include "Kokkos_Core.hpp" -#include "KokkosKernels_SimpleUtils.hpp" //for kk_exclusive_parallel_prefix_sum +#include "KokkosKernels_SimpleUtils.hpp" //for kk_exclusive_parallel_prefix_sum #include "KokkosKernels_ExecSpaceUtils.hpp" //for kk_is_gpu_exec_space #include @@ -26,10 +26,7 @@ namespace KokkosKernels { namespace Impl { template struct DefaultComparator { - KOKKOS_INLINE_FUNCTION bool operator()(const Value lhs, - const Value rhs) const { - return lhs < rhs; - } + KOKKOS_INLINE_FUNCTION bool operator()(const Value lhs, const Value rhs) const { return lhs < rhs; } }; } // namespace Impl @@ -39,9 +36,8 @@ struct DefaultComparator { // Bitonic sort: sorts v according to the comparator object's operator(). // Default comparator is just operator< for v's element type. -template < - typename View, typename ExecSpace, typename Ordinal, - typename Comparator = Impl::DefaultComparator> +template > void bitonicSort(View v, const Comparator& comp = Comparator()); // -------------------------------------------------------- @@ -51,15 +47,12 @@ void bitonicSort(View v, const Comparator& comp = Comparator()); // Radix sort. Not in-place: requires scratch array 'valuesAux' to be the same // size as values. ValueType must be an unsigned integer type. template -KOKKOS_INLINE_FUNCTION void SerialRadixSort(ValueType* values, - ValueType* valuesAux, Ordinal n); +KOKKOS_INLINE_FUNCTION void SerialRadixSort(ValueType* values, ValueType* valuesAux, Ordinal n); // Same as SerialRadixSort, but also permutes perm[0...n] as it sorts // values[0...n]. template -KOKKOS_INLINE_FUNCTION void SerialRadixSort2(ValueType* values, - ValueType* valuesAux, - PermType* perm, PermType* permAux, +KOKKOS_INLINE_FUNCTION void SerialRadixSort2(ValueType* values, ValueType* valuesAux, PermType* perm, PermType* permAux, Ordinal n); // ------------------------------------------------------------------- @@ -70,39 +63,32 @@ KOKKOS_INLINE_FUNCTION void SerialRadixSort2(ValueType* values, // raw array according to the comparator. template > -KOKKOS_INLINE_FUNCTION void TeamBitonicSort( - ValueType* values, Ordinal n, const TeamMember mem, - const Comparator& comp = Comparator()); +KOKKOS_INLINE_FUNCTION void TeamBitonicSort(ValueType* values, Ordinal n, const TeamMember mem, + const Comparator& comp = Comparator()); // Same as SerialRadixSort, but also permutes perm[0...n] as it sorts // values[0...n]. -template > -KOKKOS_INLINE_FUNCTION void TeamBitonicSort2( - ValueType* values, PermType* perm, Ordinal n, const TeamMember mem, - const Comparator& comp = Comparator()); +KOKKOS_INLINE_FUNCTION void TeamBitonicSort2(ValueType* values, PermType* perm, Ordinal n, const TeamMember mem, + const Comparator& comp = Comparator()); namespace Impl { // Functor that sorts a view on one team -template +template struct BitonicSingleTeamFunctor { - BitonicSingleTeamFunctor(View& v_, const Comparator& comp_) - : v(v_), comp(comp_) {} + BitonicSingleTeamFunctor(View& v_, const Comparator& comp_) : v(v_), comp(comp_) {} KOKKOS_INLINE_FUNCTION void operator()(const TeamMember t) const { - KokkosKernels::TeamBitonicSort( - v.data(), v.extent(0), t, comp); + KokkosKernels::TeamBitonicSort(v.data(), v.extent(0), t, + comp); }; View v; Comparator comp; }; // Functor that sorts equally sized chunks on each team -template +template struct BitonicChunkFunctor { BitonicChunkFunctor(View& v_, const Comparator& comp_, Ordinal chunkSize_) : v(v_), comp(comp_), chunkSize(chunkSize_) {} @@ -111,9 +97,8 @@ struct BitonicChunkFunctor { Ordinal chunkStart = chunk * chunkSize; Ordinal n = chunkSize; if (chunkStart + n > Ordinal(v.extent(0))) n = v.extent(0) - chunkStart; - KokkosKernels::TeamBitonicSort( - v.data() + chunkStart, n, t, comp); + KokkosKernels::TeamBitonicSort(v.data() + chunkStart, n, + t, comp); }; View v; Comparator comp; @@ -122,12 +107,10 @@ struct BitonicChunkFunctor { // Functor that does just the first phase (brown) of bitonic sort on // equally-sized chunks -template +template struct BitonicPhase1Functor { typedef typename View::value_type Value; - BitonicPhase1Functor(View& v_, const Comparator& comp_, Ordinal boxSize_, - Ordinal teamsPerBox_) + BitonicPhase1Functor(View& v_, const Comparator& comp_, Ordinal boxSize_, Ordinal teamsPerBox_) : v(v_), comp(comp_), boxSize(boxSize_), teamsPerBox(teamsPerBox_) {} KOKKOS_INLINE_FUNCTION void operator()(const TeamMember t) const { Ordinal box = t.league_rank() / teamsPerBox; @@ -135,18 +118,17 @@ struct BitonicPhase1Functor { Ordinal work = boxSize / teamsPerBox / 2; Ordinal workStart = work * (t.league_rank() % teamsPerBox); Ordinal workReflect = boxSize - workStart - 1; - Kokkos::parallel_for(Kokkos::TeamThreadRange(t, work), - [&](const Ordinal i) { - Ordinal elem1 = boxStart + workStart + i; - Ordinal elem2 = boxStart + workReflect - i; - if (elem2 < Ordinal(v.extent(0))) { - if (comp(v(elem2), v(elem1))) { - Value temp = v(elem1); - v(elem1) = v(elem2); - v(elem2) = temp; - } - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(t, work), [&](const Ordinal i) { + Ordinal elem1 = boxStart + workStart + i; + Ordinal elem2 = boxStart + workReflect - i; + if (elem2 < Ordinal(v.extent(0))) { + if (comp(v(elem2), v(elem1))) { + Value temp = v(elem1); + v(elem1) = v(elem2); + v(elem2) = temp; + } + } + }); }; View v; Comparator comp; @@ -155,12 +137,10 @@ struct BitonicPhase1Functor { }; // Functor that does the second phase (red) of bitonic sort -template +template struct BitonicPhase2Functor { typedef typename View::value_type Value; - BitonicPhase2Functor(View& v_, const Comparator& comp_, Ordinal boxSize_, - Ordinal teamsPerBox_) + BitonicPhase2Functor(View& v_, const Comparator& comp_, Ordinal boxSize_, Ordinal teamsPerBox_) : v(v_), comp(comp_), boxSize(boxSize_), teamsPerBox(teamsPerBox_) {} KOKKOS_INLINE_FUNCTION void operator()(const TeamMember t) const { Ordinal logBoxSize = 1; @@ -170,18 +150,17 @@ struct BitonicPhase2Functor { Ordinal work = boxSize / teamsPerBox / 2; Ordinal workStart = boxStart + work * (t.league_rank() % teamsPerBox); Ordinal jump = boxSize / 2; - Kokkos::parallel_for(Kokkos::TeamThreadRange(t, work), - [&](const Ordinal i) { - Ordinal elem1 = workStart + i; - Ordinal elem2 = workStart + jump + i; - if (elem2 < Ordinal(v.extent(0))) { - if (comp(v(elem2), v(elem1))) { - Value temp = v(elem1); - v(elem1) = v(elem2); - v(elem2) = temp; - } - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(t, work), [&](const Ordinal i) { + Ordinal elem1 = workStart + i; + Ordinal elem2 = workStart + jump + i; + if (elem2 < Ordinal(v.extent(0))) { + if (comp(v(elem2), v(elem1))) { + Value temp = v(elem1); + v(elem1) = v(elem2); + v(elem2) = temp; + } + } + }); if (teamsPerBox == 1) { // This team can finish phase 2 for all the smaller red boxes that follow, // since there are no longer cross-team data dependencies @@ -189,26 +168,23 @@ struct BitonicPhase2Functor { t.team_barrier(); Ordinal logSubBoxSize = logBoxSize - subLevel; Ordinal subBoxSize = Ordinal(1) << logSubBoxSize; - Kokkos::parallel_for( - Kokkos::TeamThreadRange(t, work), [&](const Ordinal i) { - Ordinal globalThread = i + t.league_rank() * work; - Ordinal subBox = globalThread >> (logSubBoxSize - 1); - Ordinal subBoxStart = subBox << logSubBoxSize; - Ordinal subBoxOffset = - globalThread & ((Ordinal(1) << (logSubBoxSize - 1)) - - 1); // i % (subBoxSize / 2) - Ordinal elem1 = subBoxStart + subBoxOffset; - // later phases (pink box): within a block, compare with fixed - // distance (boxSize / 2) apart - Ordinal elem2 = elem1 + subBoxSize / 2; - if (elem2 < Ordinal(v.extent(0))) { - if (comp(v(elem2), v(elem1))) { - Value temp = v(elem1); - v(elem1) = v(elem2); - v(elem2) = temp; - } - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(t, work), [&](const Ordinal i) { + Ordinal globalThread = i + t.league_rank() * work; + Ordinal subBox = globalThread >> (logSubBoxSize - 1); + Ordinal subBoxStart = subBox << logSubBoxSize; + Ordinal subBoxOffset = globalThread & ((Ordinal(1) << (logSubBoxSize - 1)) - 1); // i % (subBoxSize / 2) + Ordinal elem1 = subBoxStart + subBoxOffset; + // later phases (pink box): within a block, compare with fixed + // distance (boxSize / 2) apart + Ordinal elem2 = elem1 + subBoxSize / 2; + if (elem2 < Ordinal(v.extent(0))) { + if (comp(v(elem2), v(elem1))) { + Value temp = v(elem1); + v(elem1) = v(elem2); + v(elem2) = temp; + } + } + }); } } }; @@ -228,18 +204,15 @@ struct BitonicPhase2Functor { // type and an arbitrary device-compatible comparison operator (provided through // operator() of Comparator) If comparator is void, use operator< (which should // only be used for primitives) -template +template void bitonicSort(View v, const Comparator& comp) { typedef Kokkos::TeamPolicy team_policy; typedef typename team_policy::member_type team_member; Ordinal n = v.extent(0); // If n is small, just sort on a single team if (n <= Ordinal(1) << 12) { - Kokkos::parallel_for( - team_policy(1, Kokkos::AUTO()), - Impl::BitonicSingleTeamFunctor( - v, comp)); + Kokkos::parallel_for(team_policy(1, Kokkos::AUTO()), + Impl::BitonicSingleTeamFunctor(v, comp)); } else { Ordinal npot = 1; while (npot < n) npot <<= 1; @@ -247,22 +220,17 @@ void bitonicSort(View v, const Comparator& comp) { Ordinal chunkSize = 512; Ordinal numTeams = npot / chunkSize; // First, sort within teams - Kokkos::parallel_for( - team_policy(numTeams, Kokkos::AUTO()), - Impl::BitonicChunkFunctor( - v, comp, chunkSize)); - for (int teamsPerBox = 2; teamsPerBox <= npot / chunkSize; - teamsPerBox *= 2) { + Kokkos::parallel_for(team_policy(numTeams, Kokkos::AUTO()), + Impl::BitonicChunkFunctor(v, comp, chunkSize)); + for (int teamsPerBox = 2; teamsPerBox <= npot / chunkSize; teamsPerBox *= 2) { Ordinal boxSize = teamsPerBox * chunkSize; Kokkos::parallel_for( team_policy(numTeams, Kokkos::AUTO()), - Impl::BitonicPhase1Functor( - v, comp, boxSize, teamsPerBox)); + Impl::BitonicPhase1Functor(v, comp, boxSize, teamsPerBox)); for (int boxDiv = 1; teamsPerBox >> boxDiv; boxDiv++) { - Kokkos::parallel_for( - team_policy(numTeams, Kokkos::AUTO()), - Impl::BitonicPhase2Functor( - v, comp, boxSize >> boxDiv, teamsPerBox >> boxDiv)); + Kokkos::parallel_for(team_policy(numTeams, Kokkos::AUTO()), + Impl::BitonicPhase2Functor( + v, comp, boxSize >> boxDiv, teamsPerBox >> boxDiv)); } } } @@ -273,11 +241,9 @@ void bitonicSort(View v, const Comparator& comp) { // Better on CPU cores. Con: requires auxiliary storage, and this version only // works for integers template -KOKKOS_INLINE_FUNCTION void SerialRadixSort(ValueType* values, - ValueType* valuesAux, Ordinal n) { - static_assert( - std::is_integral::value && std::is_unsigned::value, - "radixSort can only be run on unsigned integers."); +KOKKOS_INLINE_FUNCTION void SerialRadixSort(ValueType* values, ValueType* valuesAux, Ordinal n) { + static_assert(std::is_integral::value && std::is_unsigned::value, + "radixSort can only be run on unsigned integers."); if (n <= 1) return; ValueType maxVal = 0; for (Ordinal i = 0; i < n; i++) { @@ -318,13 +284,13 @@ KOKKOS_INLINE_FUNCTION void SerialRadixSort(ValueType* values, // threads if (!inAux) { for (Ordinal i = 0; i < n; i++) { - Ordinal bucket = (values[i] & mask) >> maskPos; + Ordinal bucket = (values[i] & mask) >> maskPos; valuesAux[offset[bucket + 1] - count[bucket]] = values[i]; count[bucket]--; } } else { for (Ordinal i = 0; i < n; i++) { - Ordinal bucket = (valuesAux[i] & mask) >> maskPos; + Ordinal bucket = (valuesAux[i] & mask) >> maskPos; values[offset[bucket + 1] - count[bucket]] = valuesAux[i]; count[bucket]--; } @@ -348,13 +314,10 @@ KOKKOS_INLINE_FUNCTION void SerialRadixSort(ValueType* values, // lane. Con: requires auxiliary storage, this version only works for integers // (although float/double is possible) template -KOKKOS_INLINE_FUNCTION void SerialRadixSort2(ValueType* values, - ValueType* valuesAux, - PermType* perm, PermType* permAux, +KOKKOS_INLINE_FUNCTION void SerialRadixSort2(ValueType* values, ValueType* valuesAux, PermType* perm, PermType* permAux, Ordinal n) { - static_assert( - std::is_integral::value && std::is_unsigned::value, - "radixSort can only be run on unsigned integers."); + static_assert(std::is_integral::value && std::is_unsigned::value, + "radixSort can only be run on unsigned integers."); if (n <= 1) return; ValueType maxVal = 0; for (Ordinal i = 0; i < n; i++) { @@ -394,14 +357,14 @@ KOKKOS_INLINE_FUNCTION void SerialRadixSort2(ValueType* values, // threads if (!inAux) { for (Ordinal i = 0; i < n; i++) { - Ordinal bucket = (values[i] & mask) >> maskPos; + Ordinal bucket = (values[i] & mask) >> maskPos; valuesAux[offset[bucket + 1] - count[bucket]] = values[i]; permAux[offset[bucket + 1] - count[bucket]] = perm[i]; count[bucket]--; } } else { for (Ordinal i = 0; i < n; i++) { - Ordinal bucket = (valuesAux[i] & mask) >> maskPos; + Ordinal bucket = (valuesAux[i] & mask) >> maskPos; values[offset[bucket + 1] - count[bucket]] = valuesAux[i]; perm[offset[bucket + 1] - count[bucket]] = permAux[i]; count[bucket]--; @@ -425,10 +388,8 @@ KOKKOS_INLINE_FUNCTION void SerialRadixSort2(ValueType* values, // trivially-copyable) Pros: In-place, plenty of parallelism for GPUs, and // memory references are coalesced Con: O(n log^2(n)) serial time is bad on CPUs // Good diagram of the algorithm at https://en.wikipedia.org/wiki/Bitonic_sorter -template -KOKKOS_INLINE_FUNCTION void TeamBitonicSort(ValueType* values, Ordinal n, - const TeamMember mem, +template +KOKKOS_INLINE_FUNCTION void TeamBitonicSort(ValueType* values, Ordinal n, const TeamMember mem, const Comparator& comp) { // Algorithm only works on power-of-two input size only. // If n is not a power-of-two, will implicitly pretend @@ -443,52 +404,49 @@ KOKKOS_INLINE_FUNCTION void TeamBitonicSort(ValueType* values, Ordinal n, for (Ordinal i = 0; i < levels; i++) { for (Ordinal j = 0; j <= i; j++) { // n/2 pairs of items are compared in parallel - Kokkos::parallel_for( - Kokkos::TeamVectorRange(mem, npot / 2), [=](const Ordinal t) { - // How big are the brown/pink boxes? - Ordinal boxSize = Ordinal(2) << (i - j); - // Which box contains this thread? - Ordinal boxID = t >> (i - j); // t * 2 / boxSize; - Ordinal boxStart = boxID << (1 + i - j); // boxID * boxSize - Ordinal boxOffset = t - (boxStart >> 1); // t - boxID * boxSize / - // 2; - Ordinal elem1 = boxStart + boxOffset; - if (j == 0) { - // first phase (brown box): within a block, compare with the - // opposite value in the box - Ordinal elem2 = boxStart + boxSize - 1 - boxOffset; - if (elem2 < n) { - // both elements in bounds, so compare them and swap if out of - // order - if (comp(values[elem2], values[elem1])) { - ValueType temp = values[elem1]; - values[elem1] = values[elem2]; - values[elem2] = temp; - } - } - } else { - // later phases (pink box): within a block, compare with fixed - // distance (boxSize / 2) apart - Ordinal elem2 = elem1 + boxSize / 2; - if (elem2 < n) { - if (comp(values[elem2], values[elem1])) { - ValueType temp = values[elem1]; - values[elem1] = values[elem2]; - values[elem2] = temp; - } - } + Kokkos::parallel_for(Kokkos::TeamVectorRange(mem, npot / 2), [=](const Ordinal t) { + // How big are the brown/pink boxes? + Ordinal boxSize = Ordinal(2) << (i - j); + // Which box contains this thread? + Ordinal boxID = t >> (i - j); // t * 2 / boxSize; + Ordinal boxStart = boxID << (1 + i - j); // boxID * boxSize + Ordinal boxOffset = t - (boxStart >> 1); // t - boxID * boxSize / + // 2; + Ordinal elem1 = boxStart + boxOffset; + if (j == 0) { + // first phase (brown box): within a block, compare with the + // opposite value in the box + Ordinal elem2 = boxStart + boxSize - 1 - boxOffset; + if (elem2 < n) { + // both elements in bounds, so compare them and swap if out of + // order + if (comp(values[elem2], values[elem1])) { + ValueType temp = values[elem1]; + values[elem1] = values[elem2]; + values[elem2] = temp; + } + } + } else { + // later phases (pink box): within a block, compare with fixed + // distance (boxSize / 2) apart + Ordinal elem2 = elem1 + boxSize / 2; + if (elem2 < n) { + if (comp(values[elem2], values[elem1])) { + ValueType temp = values[elem1]; + values[elem1] = values[elem2]; + values[elem2] = temp; } - }); + } + } + }); mem.team_barrier(); } } } // Sort "values", while applying the same swaps to "perm" -template -KOKKOS_INLINE_FUNCTION void TeamBitonicSort2(ValueType* values, PermType* perm, - Ordinal n, const TeamMember mem, +template +KOKKOS_INLINE_FUNCTION void TeamBitonicSort2(ValueType* values, PermType* perm, Ordinal n, const TeamMember mem, const Comparator& comp) { // Algorithm only works on power-of-two input size only. // If n is not a power-of-two, will implicitly pretend @@ -503,48 +461,47 @@ KOKKOS_INLINE_FUNCTION void TeamBitonicSort2(ValueType* values, PermType* perm, for (Ordinal i = 0; i < levels; i++) { for (Ordinal j = 0; j <= i; j++) { // n/2 pairs of items are compared in parallel - Kokkos::parallel_for( - Kokkos::TeamVectorRange(mem, npot / 2), [=](const Ordinal t) { - // How big are the brown/pink boxes? - Ordinal boxSize = Ordinal(2) << (i - j); - // Which box contains this thread? - Ordinal boxID = t >> (i - j); // t * 2 / boxSize; - Ordinal boxStart = boxID << (1 + i - j); // boxID * boxSize - Ordinal boxOffset = t - (boxStart >> 1); // t - boxID * boxSize / - // 2; - Ordinal elem1 = boxStart + boxOffset; - if (j == 0) { - // first phase (brown box): within a block, compare with the - // opposite value in the box - Ordinal elem2 = boxStart + boxSize - 1 - boxOffset; - if (elem2 < n) { - // both elements in bounds, so compare them and swap if out of - // order - if (comp(values[elem2], values[elem1])) { - ValueType temp1 = values[elem1]; - values[elem1] = values[elem2]; - values[elem2] = temp1; - PermType temp2 = perm[elem1]; - perm[elem1] = perm[elem2]; - perm[elem2] = temp2; - } - } - } else { - // later phases (pink box): within a block, compare with fixed - // distance (boxSize / 2) apart - Ordinal elem2 = elem1 + boxSize / 2; - if (elem2 < n) { - if (comp(values[elem2], values[elem1])) { - ValueType temp1 = values[elem1]; - values[elem1] = values[elem2]; - values[elem2] = temp1; - PermType temp2 = perm[elem1]; - perm[elem1] = perm[elem2]; - perm[elem2] = temp2; - } - } + Kokkos::parallel_for(Kokkos::TeamVectorRange(mem, npot / 2), [=](const Ordinal t) { + // How big are the brown/pink boxes? + Ordinal boxSize = Ordinal(2) << (i - j); + // Which box contains this thread? + Ordinal boxID = t >> (i - j); // t * 2 / boxSize; + Ordinal boxStart = boxID << (1 + i - j); // boxID * boxSize + Ordinal boxOffset = t - (boxStart >> 1); // t - boxID * boxSize / + // 2; + Ordinal elem1 = boxStart + boxOffset; + if (j == 0) { + // first phase (brown box): within a block, compare with the + // opposite value in the box + Ordinal elem2 = boxStart + boxSize - 1 - boxOffset; + if (elem2 < n) { + // both elements in bounds, so compare them and swap if out of + // order + if (comp(values[elem2], values[elem1])) { + ValueType temp1 = values[elem1]; + values[elem1] = values[elem2]; + values[elem2] = temp1; + PermType temp2 = perm[elem1]; + perm[elem1] = perm[elem2]; + perm[elem2] = temp2; + } + } + } else { + // later phases (pink box): within a block, compare with fixed + // distance (boxSize / 2) apart + Ordinal elem2 = elem1 + boxSize / 2; + if (elem2 < n) { + if (comp(values[elem2], values[elem1])) { + ValueType temp1 = values[elem1]; + values[elem1] = values[elem2]; + values[elem2] = temp1; + PermType temp2 = perm[elem1]; + perm[elem1] = perm[elem2]; + perm[elem2] = temp2; } - }); + } + } + }); mem.team_barrier(); } } @@ -554,49 +511,40 @@ KOKKOS_INLINE_FUNCTION void TeamBitonicSort2(ValueType* values, PermType* perm, // KokkosKernels::Impl:: namespace Impl { -template < - typename View, typename ExecSpace, typename Ordinal, - typename Comparator = Impl::DefaultComparator> +template > [[deprecated]] void bitonicSort(View v, const Comparator& comp = Comparator()) { KokkosKernels::bitonicSort(v, comp); } template -[[deprecated]] KOKKOS_INLINE_FUNCTION void SerialRadixSort(ValueType* values, - ValueType* valuesAux, - Ordinal n) { +[[deprecated]] KOKKOS_INLINE_FUNCTION void SerialRadixSort(ValueType* values, ValueType* valuesAux, Ordinal n) { KokkosKernels::SerialRadixSort(values, valuesAux, n); } // Same as SerialRadixSort, but also permutes perm[0...n] as it sorts // values[0...n]. template -[[deprecated]] KOKKOS_INLINE_FUNCTION void SerialRadixSort2( - ValueType* values, ValueType* valuesAux, PermType* perm, PermType* permAux, - Ordinal n) { - KokkosKernels::SerialRadixSort2( - values, valuesAux, perm, permAux, n); +[[deprecated]] KOKKOS_INLINE_FUNCTION void SerialRadixSort2(ValueType* values, ValueType* valuesAux, PermType* perm, + PermType* permAux, Ordinal n) { + KokkosKernels::SerialRadixSort2(values, valuesAux, perm, permAux, n); } template > -[[deprecated]] KOKKOS_INLINE_FUNCTION void TeamBitonicSort( - ValueType* values, Ordinal n, const TeamMember mem, - const Comparator& comp = Comparator()) { - KokkosKernels::TeamBitonicSort( - values, n, mem, comp); +[[deprecated]] KOKKOS_INLINE_FUNCTION void TeamBitonicSort(ValueType* values, Ordinal n, const TeamMember mem, + const Comparator& comp = Comparator()) { + KokkosKernels::TeamBitonicSort(values, n, mem, comp); } // Same as SerialRadixSort, but also permutes perm[0...n] as it sorts // values[0...n]. -template > -[[deprecated]] KOKKOS_INLINE_FUNCTION void TeamBitonicSort2( - ValueType* values, PermType* perm, Ordinal n, const TeamMember mem, - const Comparator& comp = Comparator()) { - KokkosKernels::TeamBitonicSort2(values, perm, n, mem, comp); +[[deprecated]] KOKKOS_INLINE_FUNCTION void TeamBitonicSort2(ValueType* values, PermType* perm, Ordinal n, + const TeamMember mem, + const Comparator& comp = Comparator()) { + KokkosKernels::TeamBitonicSort2(values, perm, n, mem, comp); } } // namespace Impl diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_TplsVersion.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_TplsVersion.hpp index 3e00d72457a8..692f0fd350cd 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_TplsVersion.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_TplsVersion.hpp @@ -50,8 +50,7 @@ inline std::string cusparse_version_string() { // Print version std::stringstream ss; - ss << CUSPARSE_VER_MAJOR << "." << CUSPARSE_VER_MINOR << "." - << CUSPARSE_VER_PATCH << "." << CUSPARSE_VER_BUILD; + ss << CUSPARSE_VER_MAJOR << "." << CUSPARSE_VER_MINOR << "." << CUSPARSE_VER_PATCH << "." << CUSPARSE_VER_BUILD; return ss.str(); } @@ -61,8 +60,7 @@ inline std::string cusparse_version_string() { inline std::string cusolver_version_string() { std::stringstream ss; - ss << CUSOLVER_VER_MAJOR << "." << CUSOLVER_VER_MINOR << "." - << CUSOLVER_VER_PATCH << "." << CUSOLVER_VER_BUILD; + ss << CUSOLVER_VER_MAJOR << "." << CUSOLVER_VER_MINOR << "." << CUSOLVER_VER_PATCH << "." << CUSOLVER_VER_BUILD; return ss.str(); } diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_Uniform_Initialized_MemoryPool.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_Uniform_Initialized_MemoryPool.hpp index e40b81a762f7..aa477815d617 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_Uniform_Initialized_MemoryPool.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_Uniform_Initialized_MemoryPool.hpp @@ -176,10 +176,8 @@ class UniformMemoryPool { * initialized_value: the value to initialize \param pool_type_: whether * ManyThread2OneChunk or OneThread2OneChunk */ - UniformMemoryPool(const size_t num_chunks_, const size_t set_chunk_size_, - const data_type initialized_value = 0, - const PoolType pool_type_ = OneThread2OneChunk, - bool initialize = true) + UniformMemoryPool(const size_t num_chunks_, const size_t set_chunk_size_, const data_type initialized_value = 0, + const PoolType pool_type_ = OneThread2OneChunk, bool initialize = true) : num_chunks(1), num_set_chunks(num_chunks_), modular_num_chunks(0), @@ -200,9 +198,7 @@ class UniformMemoryPool { modular_num_chunks = num_chunks - 1; overall_size = num_chunks * chunk_size; if (num_set_chunks > 0) { - data_view = data_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "pool data"), - overall_size); + data_view = data_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "pool data"), overall_size); } data = (data_view.data()); @@ -233,9 +229,9 @@ class UniformMemoryPool { ~UniformMemoryPool() = default; - UniformMemoryPool(UniformMemoryPool &&) = default; - UniformMemoryPool(const UniformMemoryPool &) = default; - UniformMemoryPool &operator=(UniformMemoryPool &&) = default; + UniformMemoryPool(UniformMemoryPool &&) = default; + UniformMemoryPool(const UniformMemoryPool &) = default; + UniformMemoryPool &operator=(UniformMemoryPool &&) = default; UniformMemoryPool &operator=(const UniformMemoryPool &) = default; /** @@ -295,12 +291,10 @@ class UniformMemoryPool { } KOKKOS_INLINE_FUNCTION - data_type *get_arbitrary_free_chunk(const size_t &thread_index, - const size_t max_tries) const { + data_type *get_arbitrary_free_chunk(const size_t &thread_index, const size_t max_tries) const { size_t chunk_index = thread_index & modular_num_chunks; size_t num_try = 0; - while (!Kokkos::atomic_compare_exchange_strong(pchunk_locks + chunk_index, - 0, 1)) { + while (!Kokkos::atomic_compare_exchange_strong(pchunk_locks + chunk_index, 0, 1)) { chunk_index = (chunk_index + 1) & modular_num_chunks; ++num_try; if (num_try > max_tries) { @@ -344,9 +338,7 @@ class UniformMemoryPool { * \brief Returns the chunk index of the pointer. */ KOKKOS_INLINE_FUNCTION - size_t get_chunk_index(const data_type *chunk_ptr) const { - return (chunk_ptr - data) / chunk_size; - } + size_t get_chunk_index(const data_type *chunk_ptr) const { return (chunk_ptr - data) / chunk_size; } /** * \brief Releases the memory that has been allocated. diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_UpperBound.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_UpperBound.hpp index 901c86574344..97efd7559ccd 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_UpperBound.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_UpperBound.hpp @@ -70,11 +70,9 @@ namespace KokkosKernels { \returns index of first element in view where pred(value,element) is true, or view.size if no such element exists */ -template > +template > KOKKOS_INLINE_FUNCTION typename ViewLike::size_type upper_bound_thread( - const ViewLike &view, const typename ViewLike::non_const_value_type &value, - Pred pred = Pred()) { + const ViewLike &view, const typename ViewLike::non_const_value_type &value, Pred pred = Pred()) { return lower_bound_thread(view, value, Neg(Refl(pred))); } @@ -88,11 +86,10 @@ KOKKOS_INLINE_FUNCTION typename ViewLike::size_type upper_bound_thread( \returns index of first element in view where pred(value,element) is true, or view.size if no such element exists */ -template > +template > KOKKOS_INLINE_FUNCTION typename ViewLike::size_type upper_bound_team( - const TeamMember &handle, const ViewLike &view, - const typename ViewLike::non_const_value_type &value, Pred pred = Pred()) { + const TeamMember &handle, const ViewLike &view, const typename ViewLike::non_const_value_type &value, + Pred pred = Pred()) { return lower_bound_team(handle, view, value, Neg(Refl(pred))); } diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_Utils.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_Utils.hpp index ba8049cecfa4..a087002d3142 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_Utils.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_Utils.hpp @@ -36,31 +36,26 @@ ExecSpaceType get_exec_space_type() { return kk_get_exec_space_type(); } -inline int get_suggested_vector__size(size_t nr, size_t nnz, - ExecSpaceType exec_space) { +inline int get_suggested_vector__size(size_t nr, size_t nnz, ExecSpaceType exec_space) { return kk_get_suggested_vector_size(nr, nnz, exec_space); } template -void get_histogram(typename in_lno_view_t::size_type in_elements, - in_lno_view_t in_view, +void get_histogram(typename in_lno_view_t::size_type in_elements, in_lno_view_t in_view, out_lno_view_t histogram /*must be initialized with 0s*/) { - kk_get_histogram( - in_elements, in_view, histogram); + kk_get_histogram(in_elements, in_view, histogram); } template void get_suggested_vector_size(int &suggested_vector_size_, idx nr, idx nnz) { - suggested_vector_size_ = kk_get_suggested_vector_size( - nr, nnz, get_exec_space_type()); + suggested_vector_size_ = kk_get_suggested_vector_size(nr, nnz, get_exec_space_type()); } // Get the best team size for the given functor. // If it uses shared memory, the amount used must be available through // f.team_shmem_size(n), not through the TeamPolicy. If this is how dynamic // shared is set, just use AUTO for the team size. -template +template int get_suggested_team_size(Functor &f, int vector_size) { using execution_space = typename team_policy_t::traits::execution_space; if (kk_is_gpu_exec_space()) { @@ -70,23 +65,18 @@ int get_suggested_team_size(Functor &f, int vector_size) { return 1; } -template -int get_suggested_team_size(Functor &f, int vector_size, size_t sharedPerTeam, - size_t sharedPerThread) { +template +int get_suggested_team_size(Functor &f, int vector_size, size_t sharedPerTeam, size_t sharedPerThread) { using execution_space = typename team_policy_t::traits::execution_space; if (kk_is_gpu_exec_space()) { - team_policy_t temp = - team_policy_t(1, 1, vector_size) - .set_scratch_size(0, Kokkos::PerTeam(sharedPerTeam), - Kokkos::PerThread(sharedPerThread)); + team_policy_t temp = team_policy_t(1, 1, vector_size) + .set_scratch_size(0, Kokkos::PerTeam(sharedPerTeam), Kokkos::PerThread(sharedPerThread)); return temp.team_size_recommended(f, ParallelTag()); } else return 1; } -template +template struct FillSymmetricEdges { typedef typename idx_array_type::value_type idx; idx num_rows; @@ -97,44 +87,35 @@ struct FillSymmetricEdges { idx_out_edge_array_type srcs; idx_out_edge_array_type dsts; - FillSymmetricEdges(typename idx_array_type::value_type num_rows_, - idx_array_type xadj_, idx_edge_array_type adj_, + FillSymmetricEdges(typename idx_array_type::value_type num_rows_, idx_array_type xadj_, idx_edge_array_type adj_, - idx_out_edge_array_type srcs_, - idx_out_edge_array_type dsts_) - : num_rows(num_rows_), - nnz(adj_.extent(0)), - xadj(xadj_), - adj(adj_), - srcs(srcs_), - dsts(dsts_) {} + idx_out_edge_array_type srcs_, idx_out_edge_array_type dsts_) + : num_rows(num_rows_), nnz(adj_.extent(0)), xadj(xadj_), adj(adj_), srcs(srcs_), dsts(dsts_) {} KOKKOS_INLINE_FUNCTION void operator()(const team_member &teamMember) const { - idx ii = teamMember.league_rank() * teamMember.team_size() + - teamMember.team_rank(); + idx ii = teamMember.league_rank() * teamMember.team_size() + teamMember.team_rank(); if (ii >= num_rows) return; idx row_begin = xadj[ii]; idx row_end = xadj[ii + 1]; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(teamMember, row_end - row_begin), [&](idx i) { - idx adjind = i + row_begin; - idx colIndex = adj[adjind]; - if (colIndex < num_rows) { - srcs[adjind] = ii + 1; - dsts[adjind] = colIndex + 1; - if (colIndex != ii) { - srcs[adjind + nnz] = colIndex + 1; - dsts[adjind + nnz] = ii + 1; - } - } - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember, row_end - row_begin), [&](idx i) { + idx adjind = i + row_begin; + idx colIndex = adj[adjind]; + if (colIndex < num_rows) { + srcs[adjind] = ii + 1; + dsts[adjind] = colIndex + 1; + if (colIndex != ii) { + srcs[adjind + nnz] = colIndex + 1; + dsts[adjind + nnz] = ii + 1; + } + } + }); } }; -template +template struct FillSymmetricEdgesHashMap { typedef typename in_lno_row_view_t::value_type idx; idx num_rows; @@ -145,60 +126,47 @@ struct FillSymmetricEdgesHashMap { out_lno_row_view_t pre_pps; bool lower_only; - FillSymmetricEdgesHashMap(idx num_rows_, in_lno_row_view_t xadj_, - in_lno_nnz_view_t adj_, hashmap_t hashmap_, + FillSymmetricEdgesHashMap(idx num_rows_, in_lno_row_view_t xadj_, in_lno_nnz_view_t adj_, hashmap_t hashmap_, out_lno_row_view_t pre_pps_) - : num_rows(num_rows_), - nnz(adj_.extent(0)), - xadj(xadj_), - adj(adj_), - umap(hashmap_), - pre_pps(pre_pps_) {} + : num_rows(num_rows_), nnz(adj_.extent(0)), xadj(xadj_), adj(adj_), umap(hashmap_), pre_pps(pre_pps_) {} KOKKOS_INLINE_FUNCTION void operator()(const team_member &teamMember /*, idx &nnz*/) const { - typedef typename std::remove_reference::type - atomic_incr_type; - idx ii = teamMember.league_rank() * teamMember.team_size() + - teamMember.team_rank(); + typedef typename std::remove_reference::type atomic_incr_type; + idx ii = teamMember.league_rank() * teamMember.team_size() + teamMember.team_rank(); if (ii >= num_rows) { return; } idx row_begin = xadj[ii]; idx row_end = xadj[ii + 1]; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(teamMember, row_end - row_begin), [&](idx i) { - idx adjind = i + row_begin; - idx colIndex = adj[adjind]; - if (colIndex < num_rows) { - if (colIndex < ii) { - Kokkos::UnorderedMapInsertResult r = - umap.insert(Kokkos::pair(colIndex, ii)); - if (r.success()) { - Kokkos::atomic_fetch_add(&(pre_pps(ii)), atomic_incr_type(1)); - - Kokkos::atomic_fetch_add(&(pre_pps(colIndex)), - atomic_incr_type(1)); - } - } else if (colIndex > ii) { - Kokkos::UnorderedMapInsertResult r = - umap.insert(Kokkos::pair(ii, colIndex)); - if (r.success()) { - Kokkos::atomic_fetch_add(&(pre_pps(colIndex)), - atomic_incr_type(1)); - - Kokkos::atomic_fetch_add(&(pre_pps(ii)), atomic_incr_type(1)); - } - } else { - Kokkos::atomic_fetch_add(&(pre_pps(ii)), atomic_incr_type(1)); - } + Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember, row_end - row_begin), [&](idx i) { + idx adjind = i + row_begin; + idx colIndex = adj[adjind]; + if (colIndex < num_rows) { + if (colIndex < ii) { + Kokkos::UnorderedMapInsertResult r = umap.insert(Kokkos::pair(colIndex, ii)); + if (r.success()) { + Kokkos::atomic_fetch_add(&(pre_pps(ii)), atomic_incr_type(1)); + + Kokkos::atomic_fetch_add(&(pre_pps(colIndex)), atomic_incr_type(1)); } - }); + } else if (colIndex > ii) { + Kokkos::UnorderedMapInsertResult r = umap.insert(Kokkos::pair(ii, colIndex)); + if (r.success()) { + Kokkos::atomic_fetch_add(&(pre_pps(colIndex)), atomic_incr_type(1)); + + Kokkos::atomic_fetch_add(&(pre_pps(ii)), atomic_incr_type(1)); + } + } else { + Kokkos::atomic_fetch_add(&(pre_pps(ii)), atomic_incr_type(1)); + } + } + }); } }; -template +template struct FillSymmetricLowerEdgesHashMap { typedef typename in_lno_row_view_t::value_type idx; idx num_rows; @@ -208,55 +176,41 @@ struct FillSymmetricLowerEdgesHashMap { hashmap_t umap; out_lno_row_view_t pre_pps; - FillSymmetricLowerEdgesHashMap(idx num_rows_, in_lno_row_view_t xadj_, - in_lno_nnz_view_t adj_, hashmap_t hashmap_, - out_lno_row_view_t pre_pps_, - bool /* lower_only_ */ = false) - : num_rows(num_rows_), - nnz(adj_.extent(0)), - xadj(xadj_), - adj(adj_), - umap(hashmap_), - pre_pps(pre_pps_) {} + FillSymmetricLowerEdgesHashMap(idx num_rows_, in_lno_row_view_t xadj_, in_lno_nnz_view_t adj_, hashmap_t hashmap_, + out_lno_row_view_t pre_pps_, bool /* lower_only_ */ = false) + : num_rows(num_rows_), nnz(adj_.extent(0)), xadj(xadj_), adj(adj_), umap(hashmap_), pre_pps(pre_pps_) {} KOKKOS_INLINE_FUNCTION void operator()(const team_member &teamMember /*, idx &nnz*/) const { - typedef typename std::remove_reference::type - atomic_incr_type; - idx ii = teamMember.league_rank() * teamMember.team_size() + - teamMember.team_rank(); + typedef typename std::remove_reference::type atomic_incr_type; + idx ii = teamMember.league_rank() * teamMember.team_size() + teamMember.team_rank(); if (ii >= num_rows) { return; } idx row_begin = xadj[ii]; idx row_end = xadj[ii + 1]; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(teamMember, row_end - row_begin), [&](idx i) { - idx adjind = i + row_begin; - idx colIndex = adj[adjind]; - if (colIndex < num_rows) { - if (colIndex < ii) { - Kokkos::UnorderedMapInsertResult r = - umap.insert(Kokkos::pair(colIndex, ii)); - if (r.success()) { - Kokkos::atomic_fetch_add(&(pre_pps(colIndex)), - atomic_incr_type(1)); - } - } else if (colIndex > ii) { - Kokkos::UnorderedMapInsertResult r = - umap.insert(Kokkos::pair(ii, colIndex)); - if (r.success()) { - Kokkos::atomic_fetch_add(&(pre_pps(ii)), atomic_incr_type(1)); - } - } + Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember, row_end - row_begin), [&](idx i) { + idx adjind = i + row_begin; + idx colIndex = adj[adjind]; + if (colIndex < num_rows) { + if (colIndex < ii) { + Kokkos::UnorderedMapInsertResult r = umap.insert(Kokkos::pair(colIndex, ii)); + if (r.success()) { + Kokkos::atomic_fetch_add(&(pre_pps(colIndex)), atomic_incr_type(1)); + } + } else if (colIndex > ii) { + Kokkos::UnorderedMapInsertResult r = umap.insert(Kokkos::pair(ii, colIndex)); + if (r.success()) { + Kokkos::atomic_fetch_add(&(pre_pps(ii)), atomic_incr_type(1)); } - }); + } + } + }); } }; -template struct FillSymmetricCRS_HashMap { typedef typename in_lno_row_view_t::value_type idx; @@ -268,10 +222,8 @@ struct FillSymmetricCRS_HashMap { out_lno_row_view_t pre_pps; out_lno_nnz_view_t sym_adj; - FillSymmetricCRS_HashMap(idx num_rows_, in_lno_row_view_t xadj_, - in_lno_nnz_view_t adj_, hashmap_t hashmap_, - out_lno_row_view_t pre_pps_, - out_lno_nnz_view_t sym_adj_) + FillSymmetricCRS_HashMap(idx num_rows_, in_lno_row_view_t xadj_, in_lno_nnz_view_t adj_, hashmap_t hashmap_, + out_lno_row_view_t pre_pps_, out_lno_nnz_view_t sym_adj_) : num_rows(num_rows_), nnz(adj_.extent(0)), xadj(xadj_), @@ -282,51 +234,42 @@ struct FillSymmetricCRS_HashMap { KOKKOS_INLINE_FUNCTION void operator()(const team_member_t &teamMember) const { - typedef typename std::remove_reference::type - atomic_incr_type; - idx ii = teamMember.league_rank() * teamMember.team_size() + - teamMember.team_rank(); + typedef typename std::remove_reference::type atomic_incr_type; + idx ii = teamMember.league_rank() * teamMember.team_size() + teamMember.team_rank(); if (ii >= num_rows) { return; } idx row_begin = xadj[ii]; idx row_end = xadj[ii + 1]; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(teamMember, row_end - row_begin), [&](idx i) { - idx adjind = i + row_begin; - idx colIndex = adj[adjind]; - if (colIndex < num_rows) { - if (colIndex < ii) { - if (umap.insert(Kokkos::pair(colIndex, ii)).success()) { - idx cAdjInd = Kokkos::atomic_fetch_add(&(pre_pps(colIndex)), - atomic_incr_type(1)); - idx iAdjInd = Kokkos::atomic_fetch_add(&(pre_pps(ii)), - atomic_incr_type(1)); - sym_adj[cAdjInd] = ii; - sym_adj[iAdjInd] = colIndex; - } - } else if (colIndex > ii) { - if (umap.insert(Kokkos::pair(ii, colIndex)).success()) { - idx cAdjInd = Kokkos::atomic_fetch_add(&(pre_pps(colIndex)), - atomic_incr_type(1)); - idx iAdjInd = Kokkos::atomic_fetch_add(&(pre_pps(ii)), - atomic_incr_type(1)); - sym_adj[cAdjInd] = ii; - sym_adj[iAdjInd] = colIndex; - } - } else { - idx cAdjInd = Kokkos::atomic_fetch_add(&(pre_pps(colIndex)), - atomic_incr_type(1)); - sym_adj[cAdjInd] = ii; - } + Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember, row_end - row_begin), [&](idx i) { + idx adjind = i + row_begin; + idx colIndex = adj[adjind]; + if (colIndex < num_rows) { + if (colIndex < ii) { + if (umap.insert(Kokkos::pair(colIndex, ii)).success()) { + idx cAdjInd = Kokkos::atomic_fetch_add(&(pre_pps(colIndex)), atomic_incr_type(1)); + idx iAdjInd = Kokkos::atomic_fetch_add(&(pre_pps(ii)), atomic_incr_type(1)); + sym_adj[cAdjInd] = ii; + sym_adj[iAdjInd] = colIndex; } - }); + } else if (colIndex > ii) { + if (umap.insert(Kokkos::pair(ii, colIndex)).success()) { + idx cAdjInd = Kokkos::atomic_fetch_add(&(pre_pps(colIndex)), atomic_incr_type(1)); + idx iAdjInd = Kokkos::atomic_fetch_add(&(pre_pps(ii)), atomic_incr_type(1)); + sym_adj[cAdjInd] = ii; + sym_adj[iAdjInd] = colIndex; + } + } else { + idx cAdjInd = Kokkos::atomic_fetch_add(&(pre_pps(colIndex)), atomic_incr_type(1)); + sym_adj[cAdjInd] = ii; + } + } + }); } }; -template struct FillSymmetricEdgeList_HashMap { typedef typename in_lno_row_view_t::value_type idx; @@ -339,11 +282,8 @@ struct FillSymmetricEdgeList_HashMap { out_lno_nnz_view_t sym_dst; out_lno_row_view_t pps; - FillSymmetricEdgeList_HashMap(idx num_rows_, in_lno_row_view_t xadj_, - in_lno_nnz_view_t adj_, hashmap_t hashmap_, - out_lno_nnz_view_t sym_src_, - out_lno_nnz_view_t sym_dst_, - out_lno_row_view_t pps_) + FillSymmetricEdgeList_HashMap(idx num_rows_, in_lno_row_view_t xadj_, in_lno_nnz_view_t adj_, hashmap_t hashmap_, + out_lno_nnz_view_t sym_src_, out_lno_nnz_view_t sym_dst_, out_lno_row_view_t pps_) : num_rows(num_rows_), nnz(adj_.extent(0)), xadj(xadj_), @@ -355,44 +295,38 @@ struct FillSymmetricEdgeList_HashMap { KOKKOS_INLINE_FUNCTION void operator()(const team_member_t &teamMember) const { - typedef - typename std::remove_reference::type atomic_incr_type; - idx ii = teamMember.league_rank() * teamMember.team_size() + - teamMember.team_rank(); + typedef typename std::remove_reference::type atomic_incr_type; + idx ii = teamMember.league_rank() * teamMember.team_size() + teamMember.team_rank(); if (ii >= num_rows) { return; } idx row_begin = xadj[ii]; idx row_end = xadj[ii + 1]; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(teamMember, row_end - row_begin), [&](idx i) { - idx adjind = i + row_begin; - idx colIndex = adj[adjind]; - if (colIndex < num_rows) { - if (colIndex < ii) { - if (umap.insert(Kokkos::pair(colIndex, ii)).success()) { - idx cAdjInd = Kokkos::atomic_fetch_add(&(pps(colIndex)), - atomic_incr_type(1)); - sym_src[cAdjInd] = colIndex; - sym_dst[cAdjInd] = ii; - } - } else if (colIndex > ii) { - if (umap.insert(Kokkos::pair(ii, colIndex)).success()) { - idx cAdjInd = - Kokkos::atomic_fetch_add(&(pps(ii)), atomic_incr_type(1)); - sym_src[cAdjInd] = ii; - sym_dst[cAdjInd] = colIndex; - } - } + Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember, row_end - row_begin), [&](idx i) { + idx adjind = i + row_begin; + idx colIndex = adj[adjind]; + if (colIndex < num_rows) { + if (colIndex < ii) { + if (umap.insert(Kokkos::pair(colIndex, ii)).success()) { + idx cAdjInd = Kokkos::atomic_fetch_add(&(pps(colIndex)), atomic_incr_type(1)); + sym_src[cAdjInd] = colIndex; + sym_dst[cAdjInd] = ii; } - }); + } else if (colIndex > ii) { + if (umap.insert(Kokkos::pair(ii, colIndex)).success()) { + idx cAdjInd = Kokkos::atomic_fetch_add(&(pps(ii)), atomic_incr_type(1)); + sym_src[cAdjInd] = ii; + sym_dst[cAdjInd] = colIndex; + } + } + } + }); } }; template -void print_1Dview(std::ostream &os, idx_array_type view, bool print_all = false, - const char *sep = " ") { +void print_1Dview(std::ostream &os, idx_array_type view, bool print_all = false, const char *sep = " ") { kk_print_1Dview(os, view, print_all, sep); } @@ -403,8 +337,7 @@ void print_1Dview(idx_array_type view, bool print_all = false) { template void print_1Dpointer(const lno_t *pview, size_t size, bool print_all = false) { - typedef Kokkos::View - um_array_type; + typedef Kokkos::View um_array_type; um_array_type view(pview, size); kk_print_1Dview(view, print_all); } @@ -415,14 +348,12 @@ struct Reverse_Map_Init { typedef typename reverse_map_type::value_type reverse_type; forward_map_type forward_map; reverse_map_type reverse_map_xadj; - Reverse_Map_Init(forward_map_type forward_map_, - reverse_map_type reverse_xadj_) + Reverse_Map_Init(forward_map_type forward_map_, reverse_map_type reverse_xadj_) : forward_map(forward_map_), reverse_map_xadj(reverse_xadj_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_t &ii) const { - typedef typename std::remove_reference::type - atomic_incr_type; + typedef typename std::remove_reference::type atomic_incr_type; forward_type fm = forward_map[ii]; Kokkos::atomic_fetch_add(&(reverse_map_xadj(fm)), atomic_incr_type(1)); } @@ -436,44 +367,32 @@ struct Fill_Reverse_Map { reverse_map_type reverse_map_xadj; reverse_map_type reverse_map_adj; - Fill_Reverse_Map(forward_map_type forward_map_, - reverse_map_type reverse_map_xadj_, - reverse_map_type reverse_map_adj_) - : forward_map(forward_map_), - reverse_map_xadj(reverse_map_xadj_), - reverse_map_adj(reverse_map_adj_) {} + Fill_Reverse_Map(forward_map_type forward_map_, reverse_map_type reverse_map_xadj_, reverse_map_type reverse_map_adj_) + : forward_map(forward_map_), reverse_map_xadj(reverse_map_xadj_), reverse_map_adj(reverse_map_adj_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_t &ii) const { - typedef typename std::remove_reference::type - atomic_incr_type; + typedef typename std::remove_reference::type atomic_incr_type; forward_type c = forward_map[ii]; - const reverse_type future_index = Kokkos::atomic_fetch_add( - &(reverse_map_xadj(c - 1)), atomic_incr_type(1)); - reverse_map_adj(future_index) = ii; + const reverse_type future_index = Kokkos::atomic_fetch_add(&(reverse_map_xadj(c - 1)), atomic_incr_type(1)); + reverse_map_adj(future_index) = ii; } }; template -void inclusive_parallel_prefix_sum( - MyExecSpace my_exec_space, - typename forward_array_type::value_type num_elements, - forward_array_type arr) { +void inclusive_parallel_prefix_sum(MyExecSpace my_exec_space, typename forward_array_type::value_type num_elements, + forward_array_type arr) { return kk_inclusive_parallel_prefix_sum(my_exec_space, num_elements, arr); } template -void inclusive_parallel_prefix_sum( - typename forward_array_type::value_type num_elements, - forward_array_type arr) { +void inclusive_parallel_prefix_sum(typename forward_array_type::value_type num_elements, forward_array_type arr) { MyExecSpace my_exec_space; return inclusive_parallel_prefix_sum(my_exec_space, num_elements, arr); } template -void exclusive_parallel_prefix_sum( - typename forward_array_type::value_type num_elements, - forward_array_type arr) { +void exclusive_parallel_prefix_sum(typename forward_array_type::value_type num_elements, forward_array_type arr) { kk_exclusive_parallel_prefix_sum(num_elements, arr); } @@ -499,21 +418,16 @@ struct PropogataMaxValstoZeros { } }; -template -void a_times_x_plus_b(typename in_array_t::value_type num_elements, - in_array_t out_arr, in_array_t in_arr, scalar_1 a, +template +void a_times_x_plus_b(typename in_array_t::value_type num_elements, in_array_t out_arr, in_array_t in_arr, scalar_1 a, scalar_2 b) { - kk_a_times_x_plus_b( - num_elements, out_arr, in_arr, a, b); + kk_a_times_x_plus_b(num_elements, out_arr, in_arr, a, b); } template -void modular_view(typename in_array_type::value_type num_elements, - out_array_type out_arr, in_array_type in_arr, +void modular_view(typename in_array_type::value_type num_elements, out_array_type out_arr, in_array_type in_arr, int mod_factor_) { - kk_modular_view( - num_elements, out_arr, in_arr, mod_factor_); + kk_modular_view(num_elements, out_arr, in_arr, mod_factor_); } template @@ -528,18 +442,14 @@ struct LinearInitialization { template void linear_init(typename array_type::value_type num_elements, array_type arr) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_for("KokkosKernels::Common::LinearInit", - my_exec_space(0, num_elements), + Kokkos::parallel_for("KokkosKernels::Common::LinearInit", my_exec_space(0, num_elements), LinearInitialization(arr)); } template -void remove_zeros_in_xadj_vector( - typename forward_array_type::value_type num_elements, - forward_array_type arr) { +void remove_zeros_in_xadj_vector(typename forward_array_type::value_type num_elements, forward_array_type arr) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_scan("KokkosKernels::Common::RemoveZerosInXadjVector", - my_exec_space(0, num_elements), + Kokkos::parallel_scan("KokkosKernels::Common::RemoveZerosInXadjVector", my_exec_space(0, num_elements), PropogataMaxValstoZeros(arr)); } @@ -548,10 +458,9 @@ struct FillReverseBegins { const forward_array_type &forward_map; // vertex to colors reverse_array_type &reverse_map_xadj; // colors to vertex xadj - FillReverseBegins( - const forward_array_type &forward_map_, // vertex to colors - reverse_array_type &reverse_map_xadj_ // colors to vertex xadj - ) + FillReverseBegins(const forward_array_type &forward_map_, // vertex to colors + reverse_array_type &reverse_map_xadj_ // colors to vertex xadj + ) : forward_map(forward_map_), reverse_map_xadj(reverse_map_xadj_) {} KOKKOS_INLINE_FUNCTION @@ -575,10 +484,8 @@ struct Reverse_Map_Scale_Init { const reverse_type multiply_shift_for_scale; const reverse_type division_shift_for_bucket; - Reverse_Map_Scale_Init(forward_map_type forward_map_, - reverse_map_type reverse_xadj_, - reverse_type multiply_shift_for_scale_, - reverse_type division_shift_for_bucket_) + Reverse_Map_Scale_Init(forward_map_type forward_map_, reverse_map_type reverse_xadj_, + reverse_type multiply_shift_for_scale_, reverse_type division_shift_for_bucket_) : forward_map(forward_map_), reverse_map_xadj(reverse_xadj_), multiply_shift_for_scale(multiply_shift_for_scale_), @@ -586,8 +493,7 @@ struct Reverse_Map_Scale_Init { KOKKOS_INLINE_FUNCTION void operator()(const size_t &ii) const { - typedef typename std::remove_reference::type - atomic_incr_type; + typedef typename std::remove_reference::type atomic_incr_type; forward_type fm = forward_map[ii]; fm = fm << multiply_shift_for_scale; fm += ii >> division_shift_for_bucket; @@ -606,10 +512,8 @@ struct Fill_Reverse_Scale_Map { const reverse_type multiply_shift_for_scale; const reverse_type division_shift_for_bucket; - Fill_Reverse_Scale_Map(forward_map_type forward_map_, - reverse_map_type reverse_map_xadj_, - reverse_map_type reverse_map_adj_, - reverse_type multiply_shift_for_scale_, + Fill_Reverse_Scale_Map(forward_map_type forward_map_, reverse_map_type reverse_map_xadj_, + reverse_map_type reverse_map_adj_, reverse_type multiply_shift_for_scale_, reverse_type division_shift_for_bucket_) : forward_map(forward_map_), reverse_map_xadj(reverse_map_xadj_), @@ -619,15 +523,13 @@ struct Fill_Reverse_Scale_Map { KOKKOS_INLINE_FUNCTION void operator()(const size_t &ii) const { - typedef typename std::remove_reference::type - atomic_incr_type; + typedef typename std::remove_reference::type atomic_incr_type; forward_type fm = forward_map[ii]; fm = fm << multiply_shift_for_scale; fm += ii >> division_shift_for_bucket; - const reverse_type future_index = Kokkos::atomic_fetch_add( - &(reverse_map_xadj(fm - 1)), atomic_incr_type(1)); - reverse_map_adj(future_index) = ii; + const reverse_type future_index = Kokkos::atomic_fetch_add(&(reverse_map_xadj(fm - 1)), atomic_incr_type(1)); + reverse_map_adj(future_index) = ii; } }; @@ -636,8 +538,7 @@ struct StridedCopy { const from_view_t from; to_view_t to; const size_t stride; - StridedCopy(const from_view_t from_, to_view_t to_, size_t stride_) - : from(from_), to(to_), stride(stride_) {} + StridedCopy(const from_view_t from_, to_view_t to_, size_t stride_) : from(from_), to(to_), stride(stride_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_t &ii) const { @@ -665,18 +566,14 @@ struct StridedCopy { * values of reverse maps. Its size will be num_forward_elements. * */ -template -void create_reverse_map( - MyExecSpace my_exec_space, - const typename reverse_array_type::value_type - &num_forward_elements, // num_vertices - const typename forward_array_type::value_type - &num_reverse_elements, // num_colors +template +void create_reverse_map(MyExecSpace my_exec_space, + const typename reverse_array_type::value_type &num_forward_elements, // num_vertices + const typename forward_array_type::value_type &num_reverse_elements, // num_colors - const forward_array_type &forward_map, // vertex to colors - reverse_array_type &reverse_map_xadj, // colors to vertex xadj - reverse_array_type &reverse_map_adj) { // colros to vertex adj + const forward_array_type &forward_map, // vertex to colors + reverse_array_type &reverse_map_xadj, // colors to vertex xadj + reverse_array_type &reverse_map_adj) { // colros to vertex adj typedef typename reverse_array_type::value_type lno_t; typedef typename forward_array_type::value_type reverse_lno_t; @@ -685,110 +582,84 @@ void create_reverse_map( typedef Kokkos::RangePolicy range_policy_t; reverse_map_xadj = - reverse_array_type(Kokkos::view_alloc(my_exec_space, "Reverse Map Xadj"), - num_reverse_elements + 1); - reverse_map_adj = reverse_array_type( - Kokkos::view_alloc(my_exec_space, Kokkos::WithoutInitializing, - "REVERSE_ADJ"), - num_forward_elements); + reverse_array_type(Kokkos::view_alloc(my_exec_space, "Reverse Map Xadj"), num_reverse_elements + 1); + reverse_map_adj = reverse_array_type(Kokkos::view_alloc(my_exec_space, Kokkos::WithoutInitializing, "REVERSE_ADJ"), + num_forward_elements); if (num_reverse_elements < MINIMUM_TO_ATOMIC) { - const lno_t scale_size = 1024; - const lno_t multiply_shift_for_scale = 10; - const lno_t division_shift_for_bucket = - lno_t(ceil(log(double(num_forward_elements) / scale_size) / log(2))); + const lno_t scale_size = 1024; + const lno_t multiply_shift_for_scale = 10; + const lno_t division_shift_for_bucket = lno_t(ceil(log(double(num_forward_elements) / scale_size) / log(2))); // const lno_t bucket_range_size = pow(2, division_shift_for_bucket); // coloring indices are base-1. we end up using not using element 1. - const reverse_lno_t tmp_reverse_size = (num_reverse_elements + 1) - << multiply_shift_for_scale; + const reverse_lno_t tmp_reverse_size = (num_reverse_elements + 1) << multiply_shift_for_scale; - reverse_array_type tmp_color_xadj( - Kokkos::view_alloc(my_exec_space, "TMP_REVERSE_XADJ"), - tmp_reverse_size + 1); + reverse_array_type tmp_color_xadj(Kokkos::view_alloc(my_exec_space, "TMP_REVERSE_XADJ"), tmp_reverse_size + 1); Reverse_Map_Scale_Init rmi( - forward_map, tmp_color_xadj, multiply_shift_for_scale, - division_shift_for_bucket); + forward_map, tmp_color_xadj, multiply_shift_for_scale, division_shift_for_bucket); Kokkos::parallel_for("KokkosKernels::Common::ReverseMapScaleInit", - range_policy_t(my_exec_space, 0, num_forward_elements), - rmi); + range_policy_t(my_exec_space, 0, num_forward_elements), rmi); my_exec_space.fence(); - inclusive_parallel_prefix_sum( - my_exec_space, tmp_reverse_size + 1, tmp_color_xadj); + inclusive_parallel_prefix_sum(my_exec_space, tmp_reverse_size + 1, tmp_color_xadj); my_exec_space.fence(); Kokkos::parallel_for( - "KokkosKernels::Common::StridedCopy", - range_policy_t(my_exec_space, 0, num_reverse_elements + 1), - StridedCopy( - tmp_color_xadj, reverse_map_xadj, scale_size)); + "KokkosKernels::Common::StridedCopy", range_policy_t(my_exec_space, 0, num_reverse_elements + 1), + StridedCopy(tmp_color_xadj, reverse_map_xadj, scale_size)); my_exec_space.fence(); Fill_Reverse_Scale_Map frm( - forward_map, tmp_color_xadj, reverse_map_adj, multiply_shift_for_scale, - division_shift_for_bucket); + forward_map, tmp_color_xadj, reverse_map_adj, multiply_shift_for_scale, division_shift_for_bucket); Kokkos::parallel_for("KokkosKernels::Common::FillReverseMap", - range_policy_t(my_exec_space, 0, num_forward_elements), - frm); + range_policy_t(my_exec_space, 0, num_forward_elements), frm); my_exec_space.fence(); } else // atomic implementation. { reverse_array_type tmp_color_xadj( - Kokkos::view_alloc(my_exec_space, Kokkos::WithoutInitializing, - "TMP_REVERSE_XADJ"), - num_reverse_elements + 1); + Kokkos::view_alloc(my_exec_space, Kokkos::WithoutInitializing, "TMP_REVERSE_XADJ"), num_reverse_elements + 1); - Reverse_Map_Init rmi( - forward_map, reverse_map_xadj); + Reverse_Map_Init rmi(forward_map, reverse_map_xadj); Kokkos::parallel_for("KokkosKernels::Common::ReverseMapInit", - range_policy_t(my_exec_space, 0, num_forward_elements), - rmi); + range_policy_t(my_exec_space, 0, num_forward_elements), rmi); my_exec_space.fence(); // print_1Dview(reverse_map_xadj); - inclusive_parallel_prefix_sum( - my_exec_space, num_reverse_elements + 1, reverse_map_xadj); + inclusive_parallel_prefix_sum(my_exec_space, num_reverse_elements + 1, + reverse_map_xadj); Kokkos::deep_copy(my_exec_space, tmp_color_xadj, reverse_map_xadj); my_exec_space.fence(); - Fill_Reverse_Map frm( - forward_map, tmp_color_xadj, reverse_map_adj); + Fill_Reverse_Map frm(forward_map, tmp_color_xadj, reverse_map_adj); Kokkos::parallel_for("KokkosKernels::Common::FillReverseMap", - range_policy_t(my_exec_space, 0, num_forward_elements), - frm); + range_policy_t(my_exec_space, 0, num_forward_elements), frm); my_exec_space.fence(); } } template -void create_reverse_map( - const typename reverse_array_type::value_type - &num_forward_elements, // num_vertices - const typename forward_array_type::value_type - &num_reverse_elements, // num_colors - - const forward_array_type &forward_map, // vertex to colors - reverse_array_type &reverse_map_xadj, // colors to vertex xadj - reverse_array_type &reverse_map_adj) { +void create_reverse_map(const typename reverse_array_type::value_type &num_forward_elements, // num_vertices + const typename forward_array_type::value_type &num_reverse_elements, // num_colors + + const forward_array_type &forward_map, // vertex to colors + reverse_array_type &reverse_map_xadj, // colors to vertex xadj + reverse_array_type &reverse_map_adj) { MyExecSpace my_exec_space; - return create_reverse_map(my_exec_space, num_forward_elements, - num_reverse_elements, forward_map, reverse_map_xadj, + return create_reverse_map(my_exec_space, num_forward_elements, num_reverse_elements, forward_map, reverse_map_xadj, reverse_map_adj); } -template +template struct PermuteVector { typedef typename idx_array_type::value_type idx; value_array_type old_vector; out_value_array_type new_vector; idx_array_type old_to_new_mapping; idx mapping_size; - PermuteVector(value_array_type old_vector_, out_value_array_type new_vector_, - idx_array_type old_to_new_mapping_) + PermuteVector(value_array_type old_vector_, out_value_array_type new_vector_, idx_array_type old_to_new_mapping_) : old_vector(old_vector_), new_vector(new_vector_), old_to_new_mapping(old_to_new_mapping_), @@ -804,34 +675,24 @@ struct PermuteVector { } }; -template -void permute_vector(MyExecSpace my_exec_space, - typename idx_array_type::value_type num_elements, - idx_array_type &old_to_new_index_map, - value_array_type &old_vector, +template +void permute_vector(MyExecSpace my_exec_space, typename idx_array_type::value_type num_elements, + idx_array_type &old_to_new_index_map, value_array_type &old_vector, out_value_array_type &new_vector) { using range_policy_t = Kokkos::RangePolicy; - Kokkos::parallel_for( - "KokkosKernels::Common::PermuteVector", - range_policy_t(my_exec_space, 0, num_elements), - PermuteVector( - old_vector, new_vector, old_to_new_index_map)); + Kokkos::parallel_for("KokkosKernels::Common::PermuteVector", range_policy_t(my_exec_space, 0, num_elements), + PermuteVector(old_vector, new_vector, + old_to_new_index_map)); } -template -void permute_vector(typename idx_array_type::value_type num_elements, - idx_array_type &old_to_new_index_map, - value_array_type &old_vector, - out_value_array_type &new_vector) { - permute_vector(MyExecSpace(), num_elements, old_to_new_index_map, old_vector, - new_vector); +template +void permute_vector(typename idx_array_type::value_type num_elements, idx_array_type &old_to_new_index_map, + value_array_type &old_vector, out_value_array_type &new_vector) { + permute_vector(MyExecSpace(), num_elements, old_to_new_index_map, old_vector, new_vector); } -template +template struct PermuteBlockVector { typedef typename idx_array_type::value_type idx; int block_size; @@ -839,8 +700,7 @@ struct PermuteBlockVector { out_value_array_type new_vector; idx_array_type old_to_new_mapping; idx mapping_size; - PermuteBlockVector(int block_size_, value_array_type old_vector_, - out_value_array_type new_vector_, + PermuteBlockVector(int block_size_, value_array_type old_vector_, out_value_array_type new_vector_, idx_array_type old_to_new_mapping_) : block_size(block_size_), old_vector(old_vector_), @@ -854,55 +714,42 @@ struct PermuteBlockVector { if (ii < mapping_size) mapping = old_to_new_mapping[ii]; for (idx j = 0; j < static_cast(new_vector.extent(1)); j++) { for (int i = 0; i < block_size; ++i) { - new_vector.access(mapping * block_size + i, j) = - old_vector.access(ii * block_size + i, j); + new_vector.access(mapping * block_size + i, j) = old_vector.access(ii * block_size + i, j); } } } }; -template -void permute_block_vector(MyExecSpace my_exec_space, - typename idx_array_type::value_type num_elements, - int block_size, idx_array_type &old_to_new_index_map, - value_array_type &old_vector, +template +void permute_block_vector(MyExecSpace my_exec_space, typename idx_array_type::value_type num_elements, int block_size, + idx_array_type &old_to_new_index_map, value_array_type &old_vector, out_value_array_type &new_vector) { using range_policy_t = Kokkos::RangePolicy; - Kokkos::parallel_for( - "KokkosKernels::Common::PermuteVector", - range_policy_t(my_exec_space, 0, num_elements), - PermuteBlockVector(block_size, old_vector, new_vector, - old_to_new_index_map)); + Kokkos::parallel_for("KokkosKernels::Common::PermuteVector", range_policy_t(my_exec_space, 0, num_elements), + PermuteBlockVector( + block_size, old_vector, new_vector, old_to_new_index_map)); } -template -void permute_block_vector(typename idx_array_type::value_type num_elements, - int block_size, idx_array_type &old_to_new_index_map, - value_array_type &old_vector, +template +void permute_block_vector(typename idx_array_type::value_type num_elements, int block_size, + idx_array_type &old_to_new_index_map, value_array_type &old_vector, out_value_array_type &new_vector) { - permute_block_vector(MyExecSpace(), num_elements, block_size, - old_to_new_index_map, old_vector, new_vector); + permute_block_vector(MyExecSpace(), num_elements, block_size, old_to_new_index_map, old_vector, new_vector); } // TODO BMK: clean this up by removing 1st argument. It is unused but // its name gives the impression that only num_elements of the vector are // zeroed, when really it's always the whole thing. template -void zero_vector(ExecSpaceIn &exec_space_in, - typename value_array_type::value_type /* num_elements */, +void zero_vector(ExecSpaceIn &exec_space_in, typename value_array_type::value_type /* num_elements */, value_array_type &vector) { typedef typename value_array_type::non_const_value_type val_type; - Kokkos::deep_copy(exec_space_in, vector, - Kokkos::ArithTraits::zero()); + Kokkos::deep_copy(exec_space_in, vector, Kokkos::ArithTraits::zero()); exec_space_in.fence(); } template -void zero_vector(typename value_array_type::value_type /* num_elements */, - value_array_type &vector) { +void zero_vector(typename value_array_type::value_type /* num_elements */, value_array_type &vector) { using ne_tmp_t = typename value_array_type::value_type; ne_tmp_t ne_tmp = ne_tmp_t(0); MyExecSpace my_exec_space; @@ -915,21 +762,15 @@ struct MarkDuplicateSortedKeyValuePairs { v2 vals; v3 prefix_sum; typename v1::size_type overall_size; - MarkDuplicateSortedKeyValuePairs(v1 keys_, v2 vals_, v3 prefix_sum_, - typename v1::size_type overall_size_) - : keys(keys_), - vals(vals_), - prefix_sum(prefix_sum_), - overall_size(overall_size_) {} + MarkDuplicateSortedKeyValuePairs(v1 keys_, v2 vals_, v3 prefix_sum_, typename v1::size_type overall_size_) + : keys(keys_), vals(vals_), prefix_sum(prefix_sum_), overall_size(overall_size_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_t &i, typename v3::value_type &num_result) const { typename v1::value_type my_key = keys(i); typename v2::value_type my_val = vals(i); - if ((my_key != 0 && my_val != 0) && - ((i + 1 >= overall_size) || - (my_key != keys(i + 1) || my_val != vals(i + 1)))) { + if ((my_key != 0 && my_val != 0) && ((i + 1 >= overall_size) || (my_key != keys(i + 1) || my_val != vals(i + 1)))) { prefix_sum(i) = 1; num_result += 1; } @@ -944,9 +785,7 @@ struct FillSymmetricCSR { typename v3::size_type array_size; v4 out_xadj; v5 out_adj; - FillSymmetricCSR(v1 keys_, v2 vals_, v3 prefix_sum_, - typename v3::size_type array_size_, v4 out_xadj_, - v5 out_adj_) + FillSymmetricCSR(v1 keys_, v2 vals_, v3 prefix_sum_, typename v3::size_type array_size_, v4 out_xadj_, v5 out_adj_) : keys(keys_), vals(vals_), prefix_sum(prefix_sum_), @@ -978,12 +817,10 @@ struct FillSymmetricCSR { } }; -template -void symmetrize_and_get_lower_diagonal_edge_list( - typename in_lno_nnz_view_t::value_type num_rows_to_symmetrize, - in_lno_row_view_t xadj, in_lno_nnz_view_t adj, out_lno_nnz_view_t &sym_srcs, - out_lno_nnz_view_t &sym_dsts_) { +template +void symmetrize_and_get_lower_diagonal_edge_list(typename in_lno_nnz_view_t::value_type num_rows_to_symmetrize, + in_lno_row_view_t xadj, in_lno_nnz_view_t adj, + out_lno_nnz_view_t &sym_srcs, out_lno_nnz_view_t &sym_dsts_) { typedef typename in_lno_row_view_t::non_const_value_type idx; idx nnz = adj.extent(0); @@ -997,8 +834,7 @@ void symmetrize_and_get_lower_diagonal_edge_list( // typedef Kokkos::RangePolicy my_exec_space; // TODO: Should change this to temporary memory space? - typedef Kokkos::UnorderedMap, void, MyExecSpace> - hashmap_t; + typedef Kokkos::UnorderedMap, void, MyExecSpace> hashmap_t; out_lno_nnz_view_t pre_pps_("pre_pps", num_rows_to_symmetrize + 1); @@ -1007,31 +843,26 @@ void symmetrize_and_get_lower_diagonal_edge_list( hashmap_t umap(nnz); umap.clear(); umap.end_erase(); - FillSymmetricLowerEdgesHashMap + FillSymmetricLowerEdgesHashMap fse(num_rows_to_symmetrize, xadj, adj, umap, pre_pps_); int teamSizeMax = 0; int vector_size = 0; - get_suggested_vector_size(vector_size, xadj.extent(0) - 1, - nnz); + get_suggested_vector_size(vector_size, xadj.extent(0) - 1, nnz); teamSizeMax = get_suggested_team_size(fse, vector_size); // std::cout << "max_allowed_team_size:" << max_allowed_team_size << " vs:" // << vector_size << " tsm:" << teamSizeMax<< std::endl; - team_policy pol((num_rows_to_symmetrize + teamSizeMax - 1) / teamSizeMax, - teamSizeMax, vector_size); - Kokkos::parallel_for( - "KokkosKernels::Common::SymmetrizeAndGetLowerDiagonalEdgeList::S0", pol, - fse /*, num_symmetric_edges*/); + team_policy pol((num_rows_to_symmetrize + teamSizeMax - 1) / teamSizeMax, teamSizeMax, vector_size); + Kokkos::parallel_for("KokkosKernels::Common::SymmetrizeAndGetLowerDiagonalEdgeList::S0", pol, + fse /*, num_symmetric_edges*/); MyExecSpace().fence(); } if (num_rows_to_symmetrize > 0) - exclusive_parallel_prefix_sum( - num_rows_to_symmetrize + 1, pre_pps_); + exclusive_parallel_prefix_sum(num_rows_to_symmetrize + 1, pre_pps_); MyExecSpace().fence(); auto d_sym_edge_size = Kokkos::subview(pre_pps_, num_rows_to_symmetrize); @@ -1046,45 +877,33 @@ void symmetrize_and_get_lower_diagonal_edge_list( num_symmetric_edges = h_sym_edge_size(h_sym_edge_size.extent(0) - 1); */ - sym_srcs = out_lno_nnz_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "sym_srcs"), - num_symmetric_edges); - sym_dsts_ = out_lno_nnz_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "sym_dsts_"), - num_symmetric_edges); + sym_srcs = out_lno_nnz_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "sym_srcs"), num_symmetric_edges); + sym_dsts_ = out_lno_nnz_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "sym_dsts_"), num_symmetric_edges); MyExecSpace().fence(); { hashmap_t umap(nnz); - FillSymmetricEdgeList_HashMap - FSCH(num_rows_to_symmetrize, xadj, adj, umap, sym_srcs, sym_dsts_, - pre_pps_); + FSCH(num_rows_to_symmetrize, xadj, adj, umap, sym_srcs, sym_dsts_, pre_pps_); int teamSizeMax = 0; int vector_size = 0; - get_suggested_vector_size(vector_size, xadj.extent(0) - 1, - nnz); + get_suggested_vector_size(vector_size, xadj.extent(0) - 1, nnz); teamSizeMax = get_suggested_team_size(FSCH, vector_size); - team_policy pol((num_rows_to_symmetrize + teamSizeMax - 1) / teamSizeMax, - teamSizeMax, vector_size); - Kokkos::parallel_for( - "KokkosKernels::Common::SymmetrizeAndGetLowerDiagonalEdgeList::S1", pol, - FSCH); + team_policy pol((num_rows_to_symmetrize + teamSizeMax - 1) / teamSizeMax, teamSizeMax, vector_size); + Kokkos::parallel_for("KokkosKernels::Common::SymmetrizeAndGetLowerDiagonalEdgeList::S1", pol, FSCH); MyExecSpace().fence(); } } -template -void symmetrize_graph_symbolic_hashmap( - typename in_lno_row_view_t::value_type num_rows_to_symmetrize, - in_lno_row_view_t xadj, in_lno_nnz_view_t adj, out_lno_row_view_t &sym_xadj, - out_lno_nnz_view_t &sym_adj) { +template +void symmetrize_graph_symbolic_hashmap(typename in_lno_row_view_t::value_type num_rows_to_symmetrize, + in_lno_row_view_t xadj, in_lno_nnz_view_t adj, out_lno_row_view_t &sym_xadj, + out_lno_nnz_view_t &sym_adj) { typedef typename in_lno_row_view_t::non_const_value_type idx; idx nnz = adj.extent(0); @@ -1098,8 +917,7 @@ void symmetrize_graph_symbolic_hashmap( // typedef Kokkos::RangePolicy my_exec_space; // TODO: Should change this to temporary memory space? - typedef Kokkos::UnorderedMap, void, MyExecSpace> - hashmap_t; + typedef Kokkos::UnorderedMap, void, MyExecSpace> hashmap_t; out_lno_row_view_t pre_pps_("pre_pps", num_rows_to_symmetrize + 1); @@ -1108,66 +926,53 @@ void symmetrize_graph_symbolic_hashmap( hashmap_t umap(nnz); umap.clear(); umap.end_erase(); - FillSymmetricEdgesHashMap - fse(num_rows_to_symmetrize, xadj, adj, umap, pre_pps_); + FillSymmetricEdgesHashMap fse( + num_rows_to_symmetrize, xadj, adj, umap, pre_pps_); int teamSizeMax = 0; int vector_size = 0; - get_suggested_vector_size(vector_size, xadj.extent(0) - 1, - nnz); + get_suggested_vector_size(vector_size, xadj.extent(0) - 1, nnz); teamSizeMax = get_suggested_team_size(fse, vector_size); - team_policy pol((num_rows_to_symmetrize + teamSizeMax - 1) / teamSizeMax, - teamSizeMax, vector_size); - Kokkos::parallel_for( - "KokkosKernels::Common::SymmetrizeGraphSymbolicHashMap::S0", pol, - fse /*, num_symmetric_edges*/); + team_policy pol((num_rows_to_symmetrize + teamSizeMax - 1) / teamSizeMax, teamSizeMax, vector_size); + Kokkos::parallel_for("KokkosKernels::Common::SymmetrizeGraphSymbolicHashMap::S0", pol, + fse /*, num_symmetric_edges*/); MyExecSpace().fence(); } if (num_rows_to_symmetrize > 0) - exclusive_parallel_prefix_sum( - num_rows_to_symmetrize + 1, pre_pps_); + exclusive_parallel_prefix_sum(num_rows_to_symmetrize + 1, pre_pps_); MyExecSpace().fence(); // out_lno_row_view_t d_sym_edge_size = Kokkos::subview(pre_pps_, // num_rows_to_symmetrize, num_rows_to_symmetrize ); - typename out_lno_row_view_t::HostMirror h_sym_edge_size = - Kokkos::create_mirror_view(pre_pps_); + typename out_lno_row_view_t::HostMirror h_sym_edge_size = Kokkos::create_mirror_view(pre_pps_); Kokkos::deep_copy(h_sym_edge_size, pre_pps_); num_symmetric_edges = h_sym_edge_size(h_sym_edge_size.extent(0) - 1); - sym_adj = out_lno_nnz_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "sym_adj"), - num_symmetric_edges); + sym_adj = out_lno_nnz_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "sym_adj"), num_symmetric_edges); MyExecSpace().fence(); - sym_xadj = out_lno_row_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "sym_xadj"), - num_rows_to_symmetrize + 1); + sym_xadj = + out_lno_row_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "sym_xadj"), num_rows_to_symmetrize + 1); Kokkos::deep_copy(sym_xadj, pre_pps_); { hashmap_t umap(nnz); - FillSymmetricCRS_HashMap FSCH(num_rows_to_symmetrize, xadj, adj, umap, pre_pps_, sym_adj); int teamSizeMax = 0; int vector_size = 0; - get_suggested_vector_size(vector_size, xadj.extent(0) - 1, - nnz); + get_suggested_vector_size(vector_size, xadj.extent(0) - 1, nnz); teamSizeMax = get_suggested_team_size(FSCH, vector_size); - team_policy pol((num_rows_to_symmetrize + teamSizeMax - 1) / teamSizeMax, - teamSizeMax, vector_size); - Kokkos::parallel_for( - "KokkosKernels::Common::SymmetrizeGraphSymbolicHashMap::S1", pol, FSCH); + team_policy pol((num_rows_to_symmetrize + teamSizeMax - 1) / teamSizeMax, teamSizeMax, vector_size); + Kokkos::parallel_for("KokkosKernels::Common::SymmetrizeGraphSymbolicHashMap::S1", pol, FSCH); MyExecSpace().fence(); } @@ -1192,44 +997,36 @@ struct CopyView { template void copy_view(size_t num_elements, from_vector from, to_vector to) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_for("KokkosKernels::Common::CopyView", - my_exec_space(0, num_elements), + Kokkos::parallel_for("KokkosKernels::Common::CopyView", my_exec_space(0, num_elements), CopyView(from, to)); } template -void safe_device_to_host_deep_copy(size_t num_elements, from_view from, - typename from_view::HostMirror to) { +void safe_device_to_host_deep_copy(size_t num_elements, from_view from, typename from_view::HostMirror to) { typedef typename from_view::value_type scalar_t; typedef typename from_view::device_type device_t; typedef Kokkos::View unstrided_from_view_t; unstrided_from_view_t unstrided_from("unstrided", num_elements); - copy_view(num_elements, from, - unstrided_from); + copy_view(num_elements, from, unstrided_from); Kokkos::fence(); typedef typename unstrided_from_view_t::HostMirror host_unstrided_from_view_t; - host_unstrided_from_view_t h_unstrided_from = - Kokkos::create_mirror_view(unstrided_from); + host_unstrided_from_view_t h_unstrided_from = Kokkos::create_mirror_view(unstrided_from); Kokkos::deep_copy(h_unstrided_from, unstrided_from); Kokkos::fence(); copy_view( - num_elements, h_unstrided_from, to); + typename host_unstrided_from_view_t::device_type::execution_space>(num_elements, h_unstrided_from, to); Kokkos::fence(); } template -void safe_host_to_device_deep_copy(size_t num_elements, - typename to_view::HostMirror from, - to_view to) { +void safe_host_to_device_deep_copy(size_t num_elements, typename to_view::HostMirror from, to_view to) { typedef typename to_view::value_type scalar_t; typedef typename to_view::device_type device_t; @@ -1241,17 +1038,15 @@ void safe_host_to_device_deep_copy(size_t num_elements, host_unstrided_view_t host_unstrided_from("unstrided", num_elements); device_unstrided_view_t device_unstrided_to("unstrided", num_elements); - copy_view(num_elements, from, - host_unstrided_from); + copy_view( + num_elements, from, host_unstrided_from); Kokkos::fence(); Kokkos::deep_copy(device_unstrided_to, host_unstrided_from); Kokkos::fence(); - copy_view(num_elements, - device_unstrided_to, to); + copy_view(num_elements, device_unstrided_to, + to); Kokkos::fence(); } @@ -1260,12 +1055,9 @@ template struct ReduceSumFunctor { view_type view_to_reduce; - ReduceSumFunctor(view_type view_to_reduce_) - : view_to_reduce(view_to_reduce_) {} + ReduceSumFunctor(view_type view_to_reduce_) : view_to_reduce(view_to_reduce_) {} - void operator()( - const size_t &i, - typename view_type::non_const_value_type &sum_reduction) const { + void operator()(const size_t &i, typename view_type::non_const_value_type &sum_reduction) const { sum_reduction += view_to_reduce(i); } }; @@ -1274,16 +1066,14 @@ template void view_reduce_sum(size_t num_elements, view_type view_to_reduce, typename view_type::non_const_value_type &sum_reduction) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_reduce( - "KokkosKernels::Common::ViewReduceSum", my_exec_space(0, num_elements), - ReduceSumFunctor(view_to_reduce), sum_reduction); + Kokkos::parallel_reduce("KokkosKernels::Common::ViewReduceSum", my_exec_space(0, num_elements), + ReduceSumFunctor(view_to_reduce), sum_reduction); } template void view_reduce_max(size_t num_elements, view_type view_to_reduce, typename view_type::non_const_value_type &max_reduction) { - kk_view_reduce_max(num_elements, view_to_reduce, - max_reduction); + kk_view_reduce_max(num_elements, view_to_reduce, max_reduction); } template @@ -1319,28 +1109,18 @@ struct ReduceRowSizeFunctor { // view has num_rows+1 elements. template -void kk_view_reduce_max_row_size(MyExecSpace my_exec_space, - const size_t num_rows, - const size_type *rowmap_view_begins, - const size_type *rowmap_view_ends, - size_type &max_row_size) { +void kk_view_reduce_max_row_size(MyExecSpace my_exec_space, const size_t num_rows, const size_type *rowmap_view_begins, + const size_type *rowmap_view_ends, size_type &max_row_size) { typedef Kokkos::RangePolicy range_policy_t; - Kokkos::parallel_reduce( - "KokkosKernels::Common::ViewReduceMaxRowSize", - range_policy_t(my_exec_space, 0, num_rows), - ReduceRowSizeFunctor(rowmap_view_begins, rowmap_view_ends), - max_row_size); + Kokkos::parallel_reduce("KokkosKernels::Common::ViewReduceMaxRowSize", range_policy_t(my_exec_space, 0, num_rows), + ReduceRowSizeFunctor(rowmap_view_begins, rowmap_view_ends), max_row_size); } // view has num_rows+1 elements. template -void kk_view_reduce_max_row_size(const size_t num_rows, - const size_type *rowmap_view_begins, - const size_type *rowmap_view_ends, - size_type &max_row_size) { - return kk_view_reduce_max_row_size(MyExecSpace(), num_rows, - rowmap_view_begins, rowmap_view_ends, - max_row_size); +void kk_view_reduce_max_row_size(const size_t num_rows, const size_type *rowmap_view_begins, + const size_type *rowmap_view_ends, size_type &max_row_size) { + return kk_view_reduce_max_row_size(MyExecSpace(), num_rows, rowmap_view_begins, rowmap_view_ends, max_row_size); } template @@ -1348,8 +1128,7 @@ struct ReduceMaxRowFunctor { view_type rowmap_view; typedef typename view_type::non_const_value_type value_type; const value_type min_val; - ReduceMaxRowFunctor(view_type rowmap_view_) - : rowmap_view(rowmap_view_), min_val(0) {} + ReduceMaxRowFunctor(view_type rowmap_view_) : rowmap_view(rowmap_view_), min_val(0) {} KOKKOS_INLINE_FUNCTION void operator()(const size_t &i, value_type &max_reduction) const { @@ -1377,13 +1156,11 @@ struct ReduceMaxRowFunctor { // view has num_rows+1 elements. template -void view_reduce_maxsizerow( - size_t num_rows, view_type rowmap_view, - typename view_type::non_const_value_type &max_reduction) { +void view_reduce_maxsizerow(size_t num_rows, view_type rowmap_view, + typename view_type::non_const_value_type &max_reduction) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_reduce( - "KokkosKernels::Common::ViewReduceMaxSizeRow", my_exec_space(0, num_rows), - ReduceMaxRowFunctor(rowmap_view), max_reduction); + Kokkos::parallel_reduce("KokkosKernels::Common::ViewReduceMaxSizeRow", my_exec_space(0, num_rows), + ReduceMaxRowFunctor(rowmap_view), max_reduction); } template @@ -1391,8 +1168,7 @@ struct IsEqualFunctor { view_type1 view1; view_type2 view2; - IsEqualFunctor(view_type1 view1_, view_type2 view2_) - : view1(view1_), view2(view2_) {} + IsEqualFunctor(view_type1 view1_, view_type2 view2_) : view1(view1_), view2(view2_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_t &i, int &is_equal) const { @@ -1412,9 +1188,8 @@ template bool isSame(size_t num_elements, view_type1 view1, view_type2 view2) { typedef Kokkos::RangePolicy my_exec_space; int issame = 1; - Kokkos::parallel_reduce( - "KokkosKernels::Common::isSame", my_exec_space(0, num_elements), - IsEqualFunctor(view1, view2), issame); + Kokkos::parallel_reduce("KokkosKernels::Common::isSame", my_exec_space(0, num_elements), + IsEqualFunctor(view1, view2), issame); MyExecSpace().fence(); return issame; } @@ -1427,14 +1202,10 @@ struct MaxHeap { size_type current_size; MaxHeap(a_view_t heap_keys_, b_view_t heap_values_, size_type max_size_) - : heap_keys(heap_keys_), - heap_values(heap_values_), - max_size(max_size_), - current_size(0) {} + : heap_keys(heap_keys_), heap_values(heap_values_), max_size(max_size_), current_size(0) {} KOKKOS_INLINE_FUNCTION - void insert(typename a_view_t::value_type &key, - typename b_view_t::value_type &val) { + void insert(typename a_view_t::value_type &key, typename b_view_t::value_type &val) { for (size_type i = 0; i < current_size; ++i) { if (key == heap_keys(i)) { heap_values(i) = heap_values(i) & val; @@ -1459,8 +1230,7 @@ struct InitScalar { size_type team_row_chunk_size; nnz_lno_t init_val; - InitScalar(size_type num_elements_, in_view_t view_to_init_, - size_type chunk_size_, nnz_lno_t init_val_) + InitScalar(size_type num_elements_, in_view_t view_to_init_, size_type chunk_size_, nnz_lno_t init_val_) : num_elements(num_elements_), view_to_init(view_to_init_), team_row_chunk_size(chunk_size_), @@ -1471,20 +1241,16 @@ struct InitScalar { // const nnz_lno_t row_index = teamMember.league_rank() * // team_row_chunk_size; - const nnz_lno_t team_row_begin = - teamMember.league_rank() * team_row_chunk_size; - const nnz_lno_t team_row_end = KOKKOSKERNELS_MACRO_MIN( - team_row_begin + team_row_chunk_size, num_elements); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(teamMember, team_row_begin, team_row_end), - [&](const nnz_lno_t &row_ind) { view_to_init[row_ind] = init_val; }); + const nnz_lno_t team_row_begin = teamMember.league_rank() * team_row_chunk_size; + const nnz_lno_t team_row_end = KOKKOSKERNELS_MACRO_MIN(team_row_begin + team_row_chunk_size, num_elements); + Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, team_row_begin, team_row_end), + [&](const nnz_lno_t &row_ind) { view_to_init[row_ind] = init_val; }); } }; template -void init_view_withscalar( - typename in_row_view_t::size_type num_elements, in_row_view_t arr, - typename in_row_view_t::size_type team_size, - typename in_row_view_t::non_const_value_type init_val) { +void init_view_withscalar(typename in_row_view_t::size_type num_elements, in_row_view_t arr, + typename in_row_view_t::size_type team_size, + typename in_row_view_t::non_const_value_type init_val) { typename in_row_view_t::size_type chunk_size = num_elements / team_size; typedef InitScalar InitScalar_t; InitScalar_t tm(num_elements, arr, chunk_size, init_val); @@ -1492,9 +1258,8 @@ void init_view_withscalar( int vector_size = 1; Kokkos::Timer timer1; - Kokkos::parallel_for( - "KokkosKernels::Common::InitViewWithScalar", - tcp_t(num_elements / chunk_size + 1, team_size, vector_size), tm); + Kokkos::parallel_for("KokkosKernels::Common::InitViewWithScalar", + tcp_t(num_elements / chunk_size + 1, team_size, vector_size), tm); MyExecSpace().fence(); } @@ -1504,8 +1269,7 @@ struct array_sum_reduce { using ValueType = array_sum_reduce; // Workaround for https://github.com/kokkos/kokkos/issues/5860 static constexpr int N_internal = - ((N == 3 || N == 5 || N == 7) && - std::is_same::value && + ((N == 3 || N == 5 || N == 7) && std::is_same::value && sizeof(Kokkos::Experimental::half_t) == 2) ? (N + 1) : N; @@ -1527,13 +1291,15 @@ struct array_sum_reduce { } }; -template -KOKKOS_INLINE_FUNCTION T *alignPtr(InPtr p) { +template +KOKKOS_INLINE_FUNCTION T *alignPtrTo(InPtr *p) { // ugly but computationally free and the "right" way to do this in C++ - std::uintptr_t ptrVal = reinterpret_cast(p); + const std::uintptr_t ptrVal = reinterpret_cast(p); // ptrVal + (align - 1) lands inside the next valid aligned scalar_t, // and the mask produces the start of that scalar_t. - return reinterpret_cast((ptrVal + alignof(T) - 1) & (~(alignof(T) - 1))); + const std::uintptr_t ptrValNew = (ptrVal + alignof(T) - 1) & (~(alignof(T) - 1)); + return reinterpret_cast(reinterpret_cast(const_cast *>(p)) + + (ptrValNew - ptrVal)); } } // namespace Impl diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_VectorUtils.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_VectorUtils.hpp index f0c09a7e9ff2..d20d29895651 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_VectorUtils.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_VectorUtils.hpp @@ -22,15 +22,13 @@ namespace KokkosKernels { namespace Impl { -template +template struct A_times_X_plus_B { out_array_t out_view; in_array_t in_view; const scalar_1 a; const scalar_2 b; - A_times_X_plus_B(out_array_t out_view_, in_array_t in_view_, scalar_1 a_, - scalar_2 b_) + A_times_X_plus_B(out_array_t out_view_, in_array_t in_view_, scalar_1 a_, scalar_2 b_) : out_view(out_view_), in_view(in_view_), a(a_), b(b_) {} KOKKOS_INLINE_FUNCTION @@ -47,9 +45,7 @@ struct ModularView { : out_view(out_view_), in_view(in_view_), modular_constant(mod_factor_) {} KOKKOS_INLINE_FUNCTION - void operator()(const size_t ii) const { - out_view(ii) = in_view(ii) % modular_constant; - } + void operator()(const size_t ii) const { out_view(ii) = in_view(ii) % modular_constant; } }; template @@ -72,16 +68,12 @@ struct CopyVectorFunctor { * \param a: scalar for multiplication * \param b: scalar for addition */ -template -inline void kk_a_times_x_plus_b(typename in_array_t::value_type num_elements, - out_array_t out_arr, in_array_t in_arr, +template +inline void kk_a_times_x_plus_b(typename in_array_t::value_type num_elements, out_array_t out_arr, in_array_t in_arr, scalar_1 a, scalar_2 b) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_for( - "KokkosKernels::Common::ATimesXPlusB", my_exec_space(0, num_elements), - A_times_X_plus_B( - out_arr, in_arr, a, b)); + Kokkos::parallel_for("KokkosKernels::Common::ATimesXPlusB", my_exec_space(0, num_elements), + A_times_X_plus_B(out_arr, in_arr, a, b)); } /** @@ -92,20 +84,17 @@ inline void kk_a_times_x_plus_b(typename in_array_t::value_type num_elements, * applied. */ template -inline void kk_modular_view(typename in_array_type::value_type num_elements, - out_array_type out_arr, in_array_type in_arr, - int mod_factor_) { +inline void kk_modular_view(typename in_array_type::value_type num_elements, out_array_type out_arr, + in_array_type in_arr, int mod_factor_) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_for( - "KokkosKernels::Common::ModularView", my_exec_space(0, num_elements), - ModularView(out_arr, in_arr, mod_factor_)); + Kokkos::parallel_for("KokkosKernels::Common::ModularView", my_exec_space(0, num_elements), + ModularView(out_arr, in_arr, mod_factor_)); } template void kk_copy_vector(size_t num_elements, from_vector from, to_vector to) { typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_for("KokkosKernels::Common::CopyVector", - my_exec_space(0, num_elements), + Kokkos::parallel_for("KokkosKernels::Common::CopyVector", my_exec_space(0, num_elements), CopyVectorFunctor(from, to)); } } // namespace Impl diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_helpers.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_helpers.hpp index 1b725f2f5c6a..cea3a8a0611f 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_helpers.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_helpers.hpp @@ -16,7 +16,7 @@ #ifndef KOKKOSKERNELS_HELPERS_HPP_ #define KOKKOSKERNELS_HELPERS_HPP_ -#include "KokkosKernels_config.h" // KOKKOSKERNELS_INST_LAYOUTLEFT, KOKKOSKERNELS_INST_LAYOUTRIGHT +#include "KokkosKernels_config.h" // KOKKOSKERNELS_INST_LAYOUTLEFT, KOKKOSKERNELS_INST_LAYOUTRIGHT #include "KokkosKernels_default_types.hpp" // default_layout #include @@ -29,49 +29,43 @@ namespace Impl { // Used to reduce number of code instantiations. template struct GetUnifiedLayoutPreferring { - using array_layout = typename std::conditional< - ((ViewType::rank == 1) && !std::is_same_v) || - (ViewType::rank == 0), - PreferredLayoutType, typename ViewType::array_layout>::type; + using array_layout = + typename std::conditional<((ViewType::rank == 1) && + !std::is_same_v) || + (ViewType::rank == 0), + PreferredLayoutType, typename ViewType::array_layout>::type; }; template struct GetUnifiedLayout { - using array_layout = - typename GetUnifiedLayoutPreferring::array_layout; + using array_layout = typename GetUnifiedLayoutPreferring::array_layout; }; -template ::value> +template ::value> struct GetUnifiedScalarViewType { typedef typename TX::non_const_value_type type; }; template struct GetUnifiedScalarViewType { - typedef Kokkos::View::array_layout, - typename T::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View< + typename T::non_const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout, + typename T::device_type, Kokkos::MemoryTraits > type; }; template struct GetUnifiedScalarViewType { - typedef Kokkos::View::array_layout, - typename T::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View< + typename T::const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout, + typename T::device_type, Kokkos::MemoryTraits > type; }; template -struct are_integral : std::bool_constant<((std::is_integral_v || - std::is_enum_v)&&...)> {}; +struct are_integral : std::bool_constant<((std::is_integral_v || std::is_enum_v)&&...)> {}; template inline constexpr bool are_integral_v = are_integral::value; diff --git a/packages/kokkos-kernels/common/src/Kokkos_ArithTraits.hpp b/packages/kokkos-kernels/common/src/Kokkos_ArithTraits.hpp index 75c0951e1047..25089613d4ba 100644 --- a/packages/kokkos-kernels/common/src/Kokkos_ArithTraits.hpp +++ b/packages/kokkos-kernels/common/src/Kokkos_ArithTraits.hpp @@ -25,9 +25,6 @@ #include #include #include -#if KOKKOS_VERSION < 40199 -#include -#endif #include @@ -51,8 +48,7 @@ namespace { // anonymous /// /// Use intPowSigned or intPowUnsigned for general y. template -KOKKOS_FORCEINLINE_FUNCTION IntType intPowImpl(const IntType x, - const IntType y) { +KOKKOS_FORCEINLINE_FUNCTION IntType intPowImpl(const IntType x, const IntType y) { // Recursion (unrolled into while loop): pow(x, 2y) = (x^y)^2 IntType prod = x; IntType y_cur = 1; @@ -123,10 +119,8 @@ struct integer_abs { /// result of this function is undefined. However, this function will /// not throw an exception in that case. template -KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if::is_signed, - IntType>::type - intPowSigned(const IntType x, const IntType y) { +KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if::is_signed, IntType>::type +intPowSigned(const IntType x, const IntType y) { // It's not entirely clear what to return if x and y are both zero. // In the case of floating-point numbers, 0^0 is NaN. Here, though, // I think it's safe to return 0. @@ -146,10 +140,8 @@ KOKKOS_FORCEINLINE_FUNCTION return intPowImpl(x, y); } template -KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if::is_signed, - IntType>::type - intPowSigned(const IntType x, const IntType y) { +KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if::is_signed, IntType>::type +intPowSigned(const IntType x, const IntType y) { // It's not entirely clear what to return if x and y are both zero. // In the case of floating-point numbers, 0^0 is NaN. Here, though, // I think it's safe to return 0. @@ -169,8 +161,7 @@ KOKKOS_FORCEINLINE_FUNCTION /// result of this function is undefined. However, this function will /// not throw an exception in that case. template -KOKKOS_FORCEINLINE_FUNCTION IntType intPowUnsigned(const IntType x, - const IntType y) { +KOKKOS_FORCEINLINE_FUNCTION IntType intPowUnsigned(const IntType x, const IntType y) { // It's not entirely clear what to return if x and y are both zero. // In the case of floating-point numbers, 0^0 is NaN. Here, though, // I think it's safe to return 0. @@ -199,370 +190,229 @@ KOKKOS_FORCEINLINE_FUNCTION IntType intPowUnsigned(const IntType x, namespace Kokkos { // Macro to automate the wrapping of Kokkos Mathematical Functions -#define KOKKOSKERNELS_ARITHTRAITS_REAL_FP(FUNC_QUAL) \ - static FUNC_QUAL val_type zero() { return static_cast(0); } \ - static FUNC_QUAL val_type one() { return static_cast(1); } \ - static FUNC_QUAL val_type min() { \ - return Kokkos::Experimental::finite_min::value; \ - } \ - static FUNC_QUAL val_type max() { \ - return Kokkos::Experimental::finite_max::value; \ - } \ - static FUNC_QUAL val_type infinity() { \ - return Kokkos::Experimental::infinity::value; \ - } \ - static FUNC_QUAL val_type nan() { \ - return Kokkos::Experimental::quiet_NaN::value; \ - } \ - static FUNC_QUAL mag_type epsilon() { \ - return Kokkos::Experimental::epsilon::value; \ - } \ - static FUNC_QUAL mag_type sfmin() { \ - return Kokkos::Experimental::norm_min::value; \ - } \ - static FUNC_QUAL int base() { \ - return Kokkos::Experimental::radix::value; \ - } \ - static FUNC_QUAL mag_type prec() { \ - return epsilon() * static_cast(base()); \ - } \ - static FUNC_QUAL int t() { \ - return Kokkos::Experimental::digits::value; \ - } \ - static FUNC_QUAL mag_type rnd() { return one(); } \ - static FUNC_QUAL int emin() { \ - return Kokkos::Experimental::min_exponent::value; \ - } \ - static FUNC_QUAL mag_type rmin() { \ - return Kokkos::Experimental::norm_min::value; \ - } \ - static FUNC_QUAL int emax() { \ - return Kokkos::Experimental::max_exponent::value; \ - } \ - static FUNC_QUAL mag_type rmax() { \ - return Kokkos::Experimental::finite_max::value; \ - } \ - \ - static FUNC_QUAL bool isInf(const val_type x) { return Kokkos::isinf(x); } \ - static FUNC_QUAL bool isNan(const val_type x) { return Kokkos::isnan(x); } \ - static FUNC_QUAL mag_type abs(const val_type x) { return Kokkos::abs(x); } \ - static FUNC_QUAL mag_type real(const val_type x) { return Kokkos::real(x); } \ - static FUNC_QUAL mag_type imag(const val_type x) { return Kokkos::imag(x); } \ - static FUNC_QUAL val_type conj(const val_type x) { return x; } \ - static FUNC_QUAL val_type pow(const val_type x, const val_type y) { \ - return Kokkos::pow(x, y); \ - } \ - static FUNC_QUAL val_type sqrt(const val_type x) { return Kokkos::sqrt(x); } \ - static FUNC_QUAL val_type cbrt(const val_type x) { return Kokkos::cbrt(x); } \ - static FUNC_QUAL val_type exp(const val_type x) { return Kokkos::exp(x); } \ - static FUNC_QUAL val_type log(const val_type x) { return Kokkos::log(x); } \ - static FUNC_QUAL val_type log10(const val_type x) { \ - return Kokkos::log10(x); \ - } \ - static FUNC_QUAL val_type sin(const val_type x) { return Kokkos::sin(x); } \ - static FUNC_QUAL val_type cos(const val_type x) { return Kokkos::cos(x); } \ - static FUNC_QUAL val_type tan(const val_type x) { return Kokkos::tan(x); } \ - static FUNC_QUAL val_type sinh(const val_type x) { return Kokkos::sinh(x); } \ - static FUNC_QUAL val_type cosh(const val_type x) { return Kokkos::cosh(x); } \ - static FUNC_QUAL val_type tanh(const val_type x) { return Kokkos::tanh(x); } \ - static FUNC_QUAL val_type asin(const val_type x) { return Kokkos::asin(x); } \ - static FUNC_QUAL val_type acos(const val_type x) { return Kokkos::acos(x); } \ - static FUNC_QUAL val_type atan(const val_type x) { return Kokkos::atan(x); } \ - \ - static FUNC_QUAL bool isnaninf(const val_type x) { \ - return isNan(x) || isInf(x); \ - } \ - static FUNC_QUAL magnitudeType magnitude(const val_type x) { \ - return abs(x); \ - } \ - static FUNC_QUAL val_type conjugate(const val_type x) { return conj(x); } \ - static FUNC_QUAL val_type squareroot(const val_type x) { return sqrt(x); } \ +#define KOKKOSKERNELS_ARITHTRAITS_REAL_FP(FUNC_QUAL) \ + static FUNC_QUAL val_type zero() { return static_cast(0); } \ + static FUNC_QUAL val_type one() { return static_cast(1); } \ + static FUNC_QUAL val_type min() { return Kokkos::Experimental::finite_min::value; } \ + static FUNC_QUAL val_type max() { return Kokkos::Experimental::finite_max::value; } \ + static FUNC_QUAL val_type infinity() { return Kokkos::Experimental::infinity::value; } \ + static FUNC_QUAL val_type nan() { return Kokkos::Experimental::quiet_NaN::value; } \ + static FUNC_QUAL mag_type epsilon() { return Kokkos::Experimental::epsilon::value; } \ + static FUNC_QUAL mag_type sfmin() { return Kokkos::Experimental::norm_min::value; } \ + static FUNC_QUAL int base() { return Kokkos::Experimental::radix::value; } \ + static FUNC_QUAL mag_type prec() { return epsilon() * static_cast(base()); } \ + static FUNC_QUAL int t() { return Kokkos::Experimental::digits::value; } \ + static FUNC_QUAL mag_type rnd() { return one(); } \ + static FUNC_QUAL int emin() { return Kokkos::Experimental::min_exponent::value; } \ + static FUNC_QUAL mag_type rmin() { return Kokkos::Experimental::norm_min::value; } \ + static FUNC_QUAL int emax() { return Kokkos::Experimental::max_exponent::value; } \ + static FUNC_QUAL mag_type rmax() { return Kokkos::Experimental::finite_max::value; } \ + \ + static FUNC_QUAL bool isInf(const val_type x) { return Kokkos::isinf(x); } \ + static FUNC_QUAL bool isNan(const val_type x) { return Kokkos::isnan(x); } \ + static FUNC_QUAL mag_type abs(const val_type x) { return Kokkos::abs(x); } \ + static FUNC_QUAL mag_type real(const val_type x) { return Kokkos::real(x); } \ + static FUNC_QUAL mag_type imag(const val_type x) { return Kokkos::imag(x); } \ + static FUNC_QUAL val_type conj(const val_type x) { return x; } \ + static FUNC_QUAL val_type pow(const val_type x, const val_type y) { return Kokkos::pow(x, y); } \ + static FUNC_QUAL val_type sqrt(const val_type x) { return Kokkos::sqrt(x); } \ + static FUNC_QUAL val_type cbrt(const val_type x) { return Kokkos::cbrt(x); } \ + static FUNC_QUAL val_type exp(const val_type x) { return Kokkos::exp(x); } \ + static FUNC_QUAL val_type log(const val_type x) { return Kokkos::log(x); } \ + static FUNC_QUAL val_type log10(const val_type x) { return Kokkos::log10(x); } \ + static FUNC_QUAL val_type sin(const val_type x) { return Kokkos::sin(x); } \ + static FUNC_QUAL val_type cos(const val_type x) { return Kokkos::cos(x); } \ + static FUNC_QUAL val_type tan(const val_type x) { return Kokkos::tan(x); } \ + static FUNC_QUAL val_type sinh(const val_type x) { return Kokkos::sinh(x); } \ + static FUNC_QUAL val_type cosh(const val_type x) { return Kokkos::cosh(x); } \ + static FUNC_QUAL val_type tanh(const val_type x) { return Kokkos::tanh(x); } \ + static FUNC_QUAL val_type asin(const val_type x) { return Kokkos::asin(x); } \ + static FUNC_QUAL val_type acos(const val_type x) { return Kokkos::acos(x); } \ + static FUNC_QUAL val_type atan(const val_type x) { return Kokkos::atan(x); } \ + \ + static FUNC_QUAL bool isnaninf(const val_type x) { return isNan(x) || isInf(x); } \ + static FUNC_QUAL magnitudeType magnitude(const val_type x) { return abs(x); } \ + static FUNC_QUAL val_type conjugate(const val_type x) { return conj(x); } \ + static FUNC_QUAL val_type squareroot(const val_type x) { return sqrt(x); } \ static FUNC_QUAL mag_type eps() { return epsilon(); } // Macro to automate the wrapping of Kokkos Mathematical Functions -#define KOKKOSKERNELS_ARITHTRAITS_HALF_FP(FUNC_QUAL) \ - static FUNC_QUAL val_type zero() { return static_cast(0); } \ - static FUNC_QUAL val_type one() { return static_cast(1); } \ - static FUNC_QUAL val_type min() { \ - return Kokkos::Experimental::finite_min::value; \ - } \ - static FUNC_QUAL val_type max() { \ - return Kokkos::Experimental::finite_max::value; \ - } \ - static FUNC_QUAL val_type infinity() { \ - return Kokkos::Experimental::infinity::value; \ - } \ - static FUNC_QUAL val_type nan() { \ - return Kokkos::Experimental::quiet_NaN::value; \ - } \ - static FUNC_QUAL mag_type epsilon() { \ - return Kokkos::Experimental::epsilon::value; \ - } \ - static FUNC_QUAL mag_type sfmin() { \ - return Kokkos::Experimental::norm_min::value; \ - } \ - static FUNC_QUAL int base() { \ - return Kokkos::Experimental::radix::value; \ - } \ - static FUNC_QUAL mag_type prec() { \ - return epsilon() * static_cast(base()); \ - } \ - static FUNC_QUAL int t() { \ - return Kokkos::Experimental::digits::value; \ - } \ - static FUNC_QUAL mag_type rnd() { return one(); } \ - static FUNC_QUAL int emin() { \ - return Kokkos::Experimental::min_exponent::value; \ - } \ - static FUNC_QUAL mag_type rmin() { \ - return Kokkos::Experimental::norm_min::value; \ - } \ - static FUNC_QUAL int emax() { \ - return Kokkos::Experimental::max_exponent::value; \ - } \ - static FUNC_QUAL mag_type rmax() { \ - return Kokkos::Experimental::finite_max::value; \ - } \ - \ - static FUNC_QUAL bool isInf(const val_type x) { return Kokkos::isinf(x); } \ - static FUNC_QUAL mag_type abs(const val_type x) { return Kokkos::abs(x); } \ - static FUNC_QUAL mag_type real(const val_type x) { return Kokkos::real(x); } \ - static FUNC_QUAL mag_type imag(const val_type x) { return Kokkos::imag(x); } \ - static FUNC_QUAL val_type conj(const val_type x) { return x; } \ - static FUNC_QUAL val_type pow(const val_type x, const val_type y) { \ - return Kokkos::pow(x, y); \ - } \ - static FUNC_QUAL val_type sqrt(const val_type x) { return Kokkos::sqrt(x); } \ - static FUNC_QUAL val_type cbrt(const val_type x) { return Kokkos::cbrt(x); } \ - static FUNC_QUAL val_type exp(const val_type x) { return Kokkos::exp(x); } \ - static FUNC_QUAL val_type log(const val_type x) { return Kokkos::log(x); } \ - static FUNC_QUAL val_type log10(const val_type x) { \ - return Kokkos::log10(x); \ - } \ - static FUNC_QUAL val_type sin(const val_type x) { return Kokkos::sin(x); } \ - static FUNC_QUAL val_type cos(const val_type x) { return Kokkos::cos(x); } \ - static FUNC_QUAL val_type tan(const val_type x) { return Kokkos::tan(x); } \ - static FUNC_QUAL val_type sinh(const val_type x) { return Kokkos::sinh(x); } \ - static FUNC_QUAL val_type cosh(const val_type x) { return Kokkos::cosh(x); } \ - static FUNC_QUAL val_type tanh(const val_type x) { return Kokkos::tanh(x); } \ - static FUNC_QUAL val_type asin(const val_type x) { return Kokkos::asin(x); } \ - static FUNC_QUAL val_type acos(const val_type x) { return Kokkos::acos(x); } \ - static FUNC_QUAL val_type atan(const val_type x) { return Kokkos::atan(x); } \ - \ - static FUNC_QUAL magnitudeType magnitude(const val_type x) { \ - return abs(x); \ - } \ - static FUNC_QUAL val_type conjugate(const val_type x) { return conj(x); } \ - static FUNC_QUAL val_type squareroot(const val_type x) { return sqrt(x); } \ +#define KOKKOSKERNELS_ARITHTRAITS_HALF_FP(FUNC_QUAL) \ + static FUNC_QUAL val_type zero() { return static_cast(0); } \ + static FUNC_QUAL val_type one() { return static_cast(1); } \ + static FUNC_QUAL val_type min() { return Kokkos::Experimental::finite_min::value; } \ + static FUNC_QUAL val_type max() { return Kokkos::Experimental::finite_max::value; } \ + static FUNC_QUAL val_type infinity() { return Kokkos::Experimental::infinity::value; } \ + static FUNC_QUAL val_type nan() { return Kokkos::Experimental::quiet_NaN::value; } \ + static FUNC_QUAL mag_type epsilon() { return Kokkos::Experimental::epsilon::value; } \ + static FUNC_QUAL mag_type sfmin() { return Kokkos::Experimental::norm_min::value; } \ + static FUNC_QUAL int base() { return Kokkos::Experimental::radix::value; } \ + static FUNC_QUAL mag_type prec() { return epsilon() * static_cast(base()); } \ + static FUNC_QUAL int t() { return Kokkos::Experimental::digits::value; } \ + static FUNC_QUAL mag_type rnd() { return one(); } \ + static FUNC_QUAL int emin() { return Kokkos::Experimental::min_exponent::value; } \ + static FUNC_QUAL mag_type rmin() { return Kokkos::Experimental::norm_min::value; } \ + static FUNC_QUAL int emax() { return Kokkos::Experimental::max_exponent::value; } \ + static FUNC_QUAL mag_type rmax() { return Kokkos::Experimental::finite_max::value; } \ + \ + static FUNC_QUAL bool isInf(const val_type x) { return Kokkos::isinf(x); } \ + static FUNC_QUAL mag_type abs(const val_type x) { return Kokkos::abs(x); } \ + static FUNC_QUAL mag_type real(const val_type x) { return Kokkos::real(x); } \ + static FUNC_QUAL mag_type imag(const val_type x) { return Kokkos::imag(x); } \ + static FUNC_QUAL val_type conj(const val_type x) { return x; } \ + static FUNC_QUAL val_type pow(const val_type x, const val_type y) { return Kokkos::pow(x, y); } \ + static FUNC_QUAL val_type sqrt(const val_type x) { return Kokkos::sqrt(x); } \ + static FUNC_QUAL val_type cbrt(const val_type x) { return Kokkos::cbrt(x); } \ + static FUNC_QUAL val_type exp(const val_type x) { return Kokkos::exp(x); } \ + static FUNC_QUAL val_type log(const val_type x) { return Kokkos::log(x); } \ + static FUNC_QUAL val_type log10(const val_type x) { return Kokkos::log10(x); } \ + static FUNC_QUAL val_type sin(const val_type x) { return Kokkos::sin(x); } \ + static FUNC_QUAL val_type cos(const val_type x) { return Kokkos::cos(x); } \ + static FUNC_QUAL val_type tan(const val_type x) { return Kokkos::tan(x); } \ + static FUNC_QUAL val_type sinh(const val_type x) { return Kokkos::sinh(x); } \ + static FUNC_QUAL val_type cosh(const val_type x) { return Kokkos::cosh(x); } \ + static FUNC_QUAL val_type tanh(const val_type x) { return Kokkos::tanh(x); } \ + static FUNC_QUAL val_type asin(const val_type x) { return Kokkos::asin(x); } \ + static FUNC_QUAL val_type acos(const val_type x) { return Kokkos::acos(x); } \ + static FUNC_QUAL val_type atan(const val_type x) { return Kokkos::atan(x); } \ + \ + static FUNC_QUAL magnitudeType magnitude(const val_type x) { return abs(x); } \ + static FUNC_QUAL val_type conjugate(const val_type x) { return conj(x); } \ + static FUNC_QUAL val_type squareroot(const val_type x) { return sqrt(x); } \ static FUNC_QUAL mag_type eps() { return epsilon(); } -#define KOKKOSKERNELS_ARITHTRAITS_CMPLX_FP(FUNC_QUAL) \ - \ - static constexpr bool is_specialized = true; \ - static constexpr bool is_signed = true; \ - static constexpr bool is_integer = false; \ - static constexpr bool is_exact = false; \ - static constexpr bool is_complex = true; \ - static constexpr bool has_infinity = true; \ - \ - using magnitudeType = mag_type; \ - using halfPrecision = \ - ::Kokkos::complex::halfPrecision>; \ - using doublePrecision = \ - ::Kokkos::complex::doublePrecision>; \ - \ - static constexpr bool isComplex = true; \ - static constexpr bool isOrdinal = false; \ - static constexpr bool isComparable = false; \ - static constexpr bool hasMachineParameters = \ - ArithTraits::hasMachineParameters; \ - \ - static FUNC_QUAL val_type zero() { \ - return val_type(ArithTraits::zero(), \ - ArithTraits::zero()); \ - } \ - static FUNC_QUAL val_type one() { \ - return val_type(ArithTraits::one(), \ - ArithTraits::zero()); \ - } \ - static FUNC_QUAL val_type min() { \ - return val_type(ArithTraits::min(), \ - ArithTraits::min()); \ - } \ - static FUNC_QUAL val_type max() { \ - return val_type(ArithTraits::max(), \ - ArithTraits::max()); \ - } \ - static FUNC_QUAL val_type infinity() { \ - return val_type(ArithTraits::infinity(), \ - ArithTraits::infinity()); \ - } \ - static FUNC_QUAL val_type nan() { \ - return val_type(ArithTraits::nan(), \ - ArithTraits::nan()); \ - } \ - static FUNC_QUAL mag_type epsilon() { \ - return ArithTraits::epsilon(); \ - } \ - static FUNC_QUAL mag_type sfmin() { return ArithTraits::sfmin(); } \ - static FUNC_QUAL int base() { return ArithTraits::base(); } \ - static FUNC_QUAL mag_type prec() { return ArithTraits::prec(); } \ - static FUNC_QUAL int t() { return ArithTraits::t(); } \ - static FUNC_QUAL mag_type rnd() { return ArithTraits::rnd(); } \ - static FUNC_QUAL int emin() { return ArithTraits::emin(); } \ - static FUNC_QUAL mag_type rmin() { return ArithTraits::rmin(); } \ - static FUNC_QUAL int emax() { return ArithTraits::emax(); } \ - static FUNC_QUAL mag_type rmax() { return ArithTraits::rmax(); } \ - static FUNC_QUAL bool isInf(const val_type x) { \ - return ArithTraits::isInf(x.real()) || \ - ArithTraits::isInf(x.imag()); \ - } \ - static FUNC_QUAL bool isNan(const val_type x) { \ - return ArithTraits::isNan(x.real()) || \ - ArithTraits::isNan(x.imag()); \ - } \ - static FUNC_QUAL mag_type abs(const val_type x) { return ::Kokkos::abs(x); } \ - static FUNC_QUAL mag_type real(const val_type x) { return x.real(); } \ - static FUNC_QUAL mag_type imag(const val_type x) { return x.imag(); } \ - static FUNC_QUAL val_type conj(const val_type x) { \ - return ::Kokkos::conj(x); \ - } \ - static FUNC_QUAL val_type pow(const val_type x, const val_type y) { \ - return Kokkos::pow(x, y); \ - } \ - static FUNC_QUAL val_type pow(const val_type x, const mag_type y) { \ - return Kokkos::pow(x, y); \ - } \ - static FUNC_QUAL val_type pow(const mag_type x, const val_type y) { \ - return Kokkos::pow(x, y); \ - } \ - static FUNC_QUAL val_type sqrt(const val_type x) { \ - return ::Kokkos::sqrt(x); \ - } \ - static FUNC_QUAL val_type exp(const val_type x) { return Kokkos::exp(x); } \ - static FUNC_QUAL val_type log(const val_type x) { return Kokkos::log(x); } \ - static FUNC_QUAL val_type log10(const val_type x) { \ - return Kokkos::log10(x); \ - } \ - static FUNC_QUAL val_type sin(const val_type x) { return Kokkos::sin(x); } \ - static FUNC_QUAL val_type cos(const val_type x) { return Kokkos::cos(x); } \ - static FUNC_QUAL val_type tan(const val_type x) { return Kokkos::tan(x); } \ - static FUNC_QUAL val_type sinh(const val_type x) { return Kokkos::sinh(x); } \ - static FUNC_QUAL val_type cosh(const val_type x) { return Kokkos::cosh(x); } \ - static FUNC_QUAL val_type tanh(const val_type x) { return Kokkos::tanh(x); } \ - static FUNC_QUAL val_type asin(const val_type x) { return Kokkos::asin(x); } \ - static FUNC_QUAL val_type acos(const val_type x) { return Kokkos::acos(x); } \ - static FUNC_QUAL val_type atan(const val_type x) { return Kokkos::atan(x); } \ - static FUNC_QUAL bool isnaninf(const val_type& x) { \ - return isNan(x) || isInf(x); \ - } \ - static FUNC_QUAL mag_type magnitude(const val_type x) { return abs(x); } \ - static FUNC_QUAL val_type conjugate(const val_type x) { return conj(x); } \ - static FUNC_QUAL val_type squareroot(const val_type x) { return sqrt(x); } \ +#define KOKKOSKERNELS_ARITHTRAITS_CMPLX_FP(FUNC_QUAL) \ + \ + static constexpr bool is_specialized = true; \ + static constexpr bool is_signed = true; \ + static constexpr bool is_integer = false; \ + static constexpr bool is_exact = false; \ + static constexpr bool is_complex = true; \ + static constexpr bool has_infinity = true; \ + \ + using magnitudeType = mag_type; \ + using halfPrecision = ::Kokkos::complex::halfPrecision>; \ + using doublePrecision = ::Kokkos::complex::doublePrecision>; \ + \ + static constexpr bool isComplex = true; \ + static constexpr bool isOrdinal = false; \ + static constexpr bool isComparable = false; \ + static constexpr bool hasMachineParameters = ArithTraits::hasMachineParameters; \ + \ + static FUNC_QUAL val_type zero() { return val_type(ArithTraits::zero(), ArithTraits::zero()); } \ + static FUNC_QUAL val_type one() { return val_type(ArithTraits::one(), ArithTraits::zero()); } \ + static FUNC_QUAL val_type min() { return val_type(ArithTraits::min(), ArithTraits::min()); } \ + static FUNC_QUAL val_type max() { return val_type(ArithTraits::max(), ArithTraits::max()); } \ + static FUNC_QUAL val_type infinity() { \ + return val_type(ArithTraits::infinity(), ArithTraits::infinity()); \ + } \ + static FUNC_QUAL val_type nan() { return val_type(ArithTraits::nan(), ArithTraits::nan()); } \ + static FUNC_QUAL mag_type epsilon() { return ArithTraits::epsilon(); } \ + static FUNC_QUAL mag_type sfmin() { return ArithTraits::sfmin(); } \ + static FUNC_QUAL int base() { return ArithTraits::base(); } \ + static FUNC_QUAL mag_type prec() { return ArithTraits::prec(); } \ + static FUNC_QUAL int t() { return ArithTraits::t(); } \ + static FUNC_QUAL mag_type rnd() { return ArithTraits::rnd(); } \ + static FUNC_QUAL int emin() { return ArithTraits::emin(); } \ + static FUNC_QUAL mag_type rmin() { return ArithTraits::rmin(); } \ + static FUNC_QUAL int emax() { return ArithTraits::emax(); } \ + static FUNC_QUAL mag_type rmax() { return ArithTraits::rmax(); } \ + static FUNC_QUAL bool isInf(const val_type x) { \ + return ArithTraits::isInf(x.real()) || ArithTraits::isInf(x.imag()); \ + } \ + static FUNC_QUAL bool isNan(const val_type x) { \ + return ArithTraits::isNan(x.real()) || ArithTraits::isNan(x.imag()); \ + } \ + static FUNC_QUAL mag_type abs(const val_type x) { return ::Kokkos::abs(x); } \ + static FUNC_QUAL mag_type real(const val_type x) { return x.real(); } \ + static FUNC_QUAL mag_type imag(const val_type x) { return x.imag(); } \ + static FUNC_QUAL val_type conj(const val_type x) { return ::Kokkos::conj(x); } \ + static FUNC_QUAL val_type pow(const val_type x, const val_type y) { return Kokkos::pow(x, y); } \ + static FUNC_QUAL val_type pow(const val_type x, const mag_type y) { return Kokkos::pow(x, y); } \ + static FUNC_QUAL val_type pow(const mag_type x, const val_type y) { return Kokkos::pow(x, y); } \ + static FUNC_QUAL val_type sqrt(const val_type x) { return ::Kokkos::sqrt(x); } \ + static FUNC_QUAL val_type exp(const val_type x) { return Kokkos::exp(x); } \ + static FUNC_QUAL val_type log(const val_type x) { return Kokkos::log(x); } \ + static FUNC_QUAL val_type log10(const val_type x) { return Kokkos::log10(x); } \ + static FUNC_QUAL val_type sin(const val_type x) { return Kokkos::sin(x); } \ + static FUNC_QUAL val_type cos(const val_type x) { return Kokkos::cos(x); } \ + static FUNC_QUAL val_type tan(const val_type x) { return Kokkos::tan(x); } \ + static FUNC_QUAL val_type sinh(const val_type x) { return Kokkos::sinh(x); } \ + static FUNC_QUAL val_type cosh(const val_type x) { return Kokkos::cosh(x); } \ + static FUNC_QUAL val_type tanh(const val_type x) { return Kokkos::tanh(x); } \ + static FUNC_QUAL val_type asin(const val_type x) { return Kokkos::asin(x); } \ + static FUNC_QUAL val_type acos(const val_type x) { return Kokkos::acos(x); } \ + static FUNC_QUAL val_type atan(const val_type x) { return Kokkos::atan(x); } \ + static FUNC_QUAL bool isnaninf(const val_type& x) { return isNan(x) || isInf(x); } \ + static FUNC_QUAL mag_type magnitude(const val_type x) { return abs(x); } \ + static FUNC_QUAL val_type conjugate(const val_type x) { return conj(x); } \ + static FUNC_QUAL val_type squareroot(const val_type x) { return sqrt(x); } \ static FUNC_QUAL mag_type eps() { return epsilon(); } template -static KOKKOS_FUNCTION - typename std::enable_if::is_signed, - val_type>::type - KokkosKernelsAbs(const val_type x) { +static KOKKOS_FUNCTION typename std::enable_if::is_signed, val_type>::type +KokkosKernelsAbs(const val_type x) { return Kokkos::abs(x); } template -static KOKKOS_FUNCTION - typename std::enable_if::is_signed, - val_type>::type - KokkosKernelsAbs(const val_type x) { +static KOKKOS_FUNCTION typename std::enable_if::is_signed, val_type>::type +KokkosKernelsAbs(const val_type x) { return x; } template -static KOKKOS_FUNCTION - typename std::enable_if::is_signed, - val_type>::type - KokkosKernelsNan() { +static KOKKOS_FUNCTION typename std::enable_if::is_signed, val_type>::type +KokkosKernelsNan() { return -1; } template -static KOKKOS_FUNCTION - typename std::enable_if::is_signed, - val_type>::type - KokkosKernelsNan() { +static KOKKOS_FUNCTION typename std::enable_if::is_signed, val_type>::type +KokkosKernelsNan() { return Kokkos::Experimental::finite_max::value; } -#define KOKKOSKERNELS_ARITHTRAITS_INTEGRAL() \ - \ - static constexpr bool is_specialized = true; \ - static constexpr bool is_integer = true; \ - static constexpr bool is_exact = true; \ - static constexpr bool is_complex = false; \ - static constexpr bool has_infinity = false; \ - \ - using magnitudeType = mag_type; \ - using halfPrecision = val_type; \ - using doublePrecision = val_type; \ - \ - static constexpr bool isComplex = false; \ - static constexpr bool isOrdinal = true; \ - static constexpr bool isComparable = true; \ - static constexpr bool hasMachineParameters = false; \ - \ - static KOKKOS_FUNCTION val_type zero() { return static_cast(0); } \ - static KOKKOS_FUNCTION val_type one() { return static_cast(1); } \ - static KOKKOS_FUNCTION val_type min() { \ - return Kokkos::Experimental::finite_min::value; \ - } \ - static KOKKOS_FUNCTION val_type max() { \ - return Kokkos::Experimental::finite_max::value; \ - } \ - static KOKKOS_FUNCTION val_type infinity() { \ - return static_cast(0); \ - } \ - static KOKKOS_FUNCTION val_type nan() { \ - return KokkosKernelsNan(); \ - } \ - static KOKKOS_FUNCTION bool isInf(const val_type) { return false; } \ - static KOKKOS_FUNCTION bool isNan(const val_type) { return false; } \ - static KOKKOS_FUNCTION mag_type abs(const val_type x) { \ - return KokkosKernelsAbs(x); \ - } \ - static KOKKOS_FUNCTION mag_type real(const val_type x) { \ - return Kokkos::real(x); \ - } \ - static KOKKOS_FUNCTION mag_type imag(const val_type) { return zero(); } \ - static KOKKOS_FUNCTION val_type conj(const val_type x) { return x; } \ - static KOKKOS_FUNCTION val_type pow(const val_type x, const val_type y) { \ - return Kokkos::pow(x, y); \ - } \ - static KOKKOS_FUNCTION val_type sqrt(const val_type x) { \ - return static_cast(Kokkos::sqrt(abs(x))); \ - } \ - static KOKKOS_FUNCTION val_type cbrt(const val_type x) { \ - return static_cast(Kokkos::cbrt(abs(x))); \ - } \ - static KOKKOS_FUNCTION val_type exp(const val_type x) { \ - return static_cast(Kokkos::exp(abs(x))); \ - } \ - static KOKKOS_FUNCTION val_type log(const val_type x) { \ - return static_cast(Kokkos::log(abs(x))); \ - } \ - static KOKKOS_FUNCTION val_type log10(const val_type x) { \ - return static_cast(Kokkos::log10(abs(x))); \ - } \ - static KOKKOS_FUNCTION mag_type epsilon() { return zero(); } \ - static KOKKOS_FUNCTION magnitudeType magnitude(const val_type x) { \ - return abs(x); \ - } \ - static KOKKOS_FUNCTION val_type conjugate(const val_type x) { \ - return conj(x); \ - } \ - static KOKKOS_FUNCTION bool isnaninf(const val_type) { return false; } \ - static KOKKOS_FUNCTION val_type squareroot(const val_type x) { \ - return sqrt(x); \ - } +#define KOKKOSKERNELS_ARITHTRAITS_INTEGRAL() \ + \ + static constexpr bool is_specialized = true; \ + static constexpr bool is_integer = true; \ + static constexpr bool is_exact = true; \ + static constexpr bool is_complex = false; \ + static constexpr bool has_infinity = false; \ + \ + using magnitudeType = mag_type; \ + using halfPrecision = val_type; \ + using doublePrecision = val_type; \ + \ + static constexpr bool isComplex = false; \ + static constexpr bool isOrdinal = true; \ + static constexpr bool isComparable = true; \ + static constexpr bool hasMachineParameters = false; \ + \ + static KOKKOS_FUNCTION val_type zero() { return static_cast(0); } \ + static KOKKOS_FUNCTION val_type one() { return static_cast(1); } \ + static KOKKOS_FUNCTION val_type min() { return Kokkos::Experimental::finite_min::value; } \ + static KOKKOS_FUNCTION val_type max() { return Kokkos::Experimental::finite_max::value; } \ + static KOKKOS_FUNCTION val_type infinity() { return static_cast(0); } \ + static KOKKOS_FUNCTION val_type nan() { return KokkosKernelsNan(); } \ + static KOKKOS_FUNCTION bool isInf(const val_type) { return false; } \ + static KOKKOS_FUNCTION bool isNan(const val_type) { return false; } \ + static KOKKOS_FUNCTION mag_type abs(const val_type x) { return KokkosKernelsAbs(x); } \ + static KOKKOS_FUNCTION mag_type real(const val_type x) { return Kokkos::real(x); } \ + static KOKKOS_FUNCTION mag_type imag(const val_type) { return zero(); } \ + static KOKKOS_FUNCTION val_type conj(const val_type x) { return x; } \ + static KOKKOS_FUNCTION val_type pow(const val_type x, const val_type y) { return Kokkos::pow(x, y); } \ + static KOKKOS_FUNCTION val_type sqrt(const val_type x) { return static_cast(Kokkos::sqrt(abs(x))); } \ + static KOKKOS_FUNCTION val_type cbrt(const val_type x) { return static_cast(Kokkos::cbrt(abs(x))); } \ + static KOKKOS_FUNCTION val_type exp(const val_type x) { return static_cast(Kokkos::exp(abs(x))); } \ + static KOKKOS_FUNCTION val_type log(const val_type x) { return static_cast(Kokkos::log(abs(x))); } \ + static KOKKOS_FUNCTION val_type log10(const val_type x) { return static_cast(Kokkos::log10(abs(x))); } \ + static KOKKOS_FUNCTION mag_type epsilon() { return zero(); } \ + static KOKKOS_FUNCTION magnitudeType magnitude(const val_type x) { return abs(x); } \ + static KOKKOS_FUNCTION val_type conjugate(const val_type x) { return conj(x); } \ + static KOKKOS_FUNCTION bool isnaninf(const val_type) { return false; } \ + static KOKKOS_FUNCTION val_type squareroot(const val_type x) { return sqrt(x); } /// \class ArithTraits /// \brief Traits class for arithmetic on type T. @@ -1003,109 +853,6 @@ class ArithTraits { static constexpr bool is_complex = false; static constexpr bool has_infinity = true; -#if KOKKOS_VERSION < 40199 - static KOKKOS_FUNCTION val_type infinity() { - return Kokkos::Experimental::cast_to_half( - Kokkos::Experimental::infinity::value); - } - - static KOKKOS_FUNCTION bool isInf(const val_type x) { -#ifndef __CUDA_ARCH__ - using std::isinf; -#endif - return isinf(Kokkos::Experimental::cast_from_half(x)); - } - static KOKKOS_FUNCTION bool isNan(const val_type x) { -#ifndef __CUDA_ARCH__ - using std::isnan; -#endif - return isnan(Kokkos::Experimental::cast_from_half(x)); - } - static KOKKOS_FUNCTION mag_type abs(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::abs(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION val_type zero() { - return Kokkos::Experimental::cast_to_half(0.0); - } - static KOKKOS_FUNCTION val_type one() { - return Kokkos::Experimental::cast_to_half(1.0); - } - static KOKKOS_FUNCTION val_type min() { - return Kokkos::Experimental::cast_to_half(-KOKKOSKERNELS_IMPL_FP16_MAX); - } - static KOKKOS_FUNCTION val_type max() { - return Kokkos::Experimental::cast_to_half(KOKKOSKERNELS_IMPL_FP16_MAX); - } - static KOKKOS_FUNCTION mag_type real(const val_type x) { return x; } - static KOKKOS_FUNCTION mag_type imag(const val_type) { return zero(); } - static KOKKOS_FUNCTION val_type conj(const val_type x) { return x; } - static KOKKOS_FUNCTION val_type pow(const val_type x, const val_type y) { - return Kokkos::Experimental::cast_to_half( - Kokkos::pow(Kokkos::Experimental::cast_from_half(x), - Kokkos::Experimental::cast_from_half(y))); - } - static KOKKOS_FUNCTION val_type sqrt(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::sqrt(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION val_type cbrt(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::cbrt(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION val_type exp(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::exp(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION val_type log(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::log(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION val_type log10(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::log10(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION val_type sin(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::sin(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION val_type cos(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::cos(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION val_type tan(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::tan(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION val_type sinh(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::sinh(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION val_type cosh(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::cosh(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION val_type tanh(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::tanh(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION val_type asin(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::asin(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION val_type acos(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::acos(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION val_type atan(const val_type x) { - return Kokkos::Experimental::cast_to_half( - Kokkos::atan(Kokkos::Experimental::cast_from_half(x))); - } - static KOKKOS_FUNCTION mag_type epsilon() { - return Kokkos::Experimental::cast_to_half(KOKKOSKERNELS_IMPL_FP16_EPSILON); - } -#endif - // Backwards compatibility with Teuchos::ScalarTraits. using magnitudeType = mag_type; using halfPrecision = Kokkos::Experimental::half_t; @@ -1118,52 +865,11 @@ class ArithTraits { static constexpr bool isComparable = true; static constexpr bool hasMachineParameters = true; -#if KOKKOS_VERSION < 40199 - static KOKKOS_FUNCTION bool isnaninf(const val_type x) { - return isNan(x) || isInf(x); - } - static KOKKOS_FUNCTION magnitudeType magnitude(const val_type x) { - return abs(x); - } - static KOKKOS_FUNCTION val_type conjugate(const val_type x) { - return conj(x); - } - static KOKKOS_FUNCTION val_type squareroot(const val_type x) { - return sqrt(x); - } - static KOKKOS_FUNCTION val_type nan() { - return Kokkos::Experimental::cast_to_half( - Kokkos::Experimental::quiet_NaN::value); - } - static KOKKOS_FUNCTION mag_type eps() { return epsilon(); } - static KOKKOS_FUNCTION mag_type sfmin() { - return Kokkos::Experimental::cast_to_half(KOKKOSKERNELS_IMPL_FP16_MIN); - } - static KOKKOS_FUNCTION int base() { return KOKKOSKERNELS_IMPL_FP16_RADIX; } - // Use float to allow running on both host and device - static KOKKOS_FUNCTION float prec() { - float e = KOKKOSKERNELS_IMPL_FP16_EPSILON; - float b = (float)base(); - float r = e * b; - return r; - } - static KOKKOS_FUNCTION int t() { return KOKKOSKERNELS_IMPL_FP16_MANT_DIG; } - static KOKKOS_FUNCTION mag_type rnd() { return one(); } - static KOKKOS_FUNCTION int emin() { return KOKKOSKERNELS_IMPL_FP16_MIN_EXP; } - static KOKKOS_FUNCTION mag_type rmin() { - return Kokkos::Experimental::cast_to_half(KOKKOSKERNELS_IMPL_FP16_MIN); - } - static KOKKOS_FUNCTION int emax() { return KOKKOSKERNELS_IMPL_FP16_MAX_EXP; } - static KOKKOS_FUNCTION mag_type rmax() { - return Kokkos::Experimental::cast_to_half(KOKKOSKERNELS_IMPL_FP16_MAX); - } -#else #if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_HIP) KOKKOSKERNELS_ARITHTRAITS_HALF_FP(KOKKOS_FUNCTION) #else KOKKOSKERNELS_ARITHTRAITS_REAL_FP(KOKKOS_FUNCTION) #endif -#endif }; #endif // #if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT @@ -1183,106 +889,6 @@ class ArithTraits { static constexpr bool is_complex = false; static constexpr bool has_infinity = true; -#if KOKKOS_VERSION < 40199 - static KOKKOS_FUNCTION val_type infinity() { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::Experimental::infinity::value); - } - - static KOKKOS_FUNCTION bool isInf(const val_type x) { - return Kokkos::isinf(Kokkos::Experimental::cast_from_bhalf(x)); - } - static KOKKOS_FUNCTION bool isNan(const val_type x) { - return Kokkos::isnan(Kokkos::Experimental::cast_from_bhalf(x)); - } - static KOKKOS_FUNCTION mag_type abs(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::abs(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION val_type zero() { - return Kokkos::Experimental::cast_to_bhalf(0.0F); - } - static KOKKOS_FUNCTION val_type one() { - return Kokkos::Experimental::cast_to_bhalf(1.0F); - } - static KOKKOS_FUNCTION val_type min() { - return Kokkos::Experimental::cast_to_bhalf(-KOKKOSKERNELS_IMPL_BF16_MAX); - } - static KOKKOS_FUNCTION val_type max() { - return Kokkos::Experimental::cast_to_bhalf(KOKKOSKERNELS_IMPL_BF16_MAX); - } - static KOKKOS_FUNCTION mag_type real(const val_type x) { return x; } - static KOKKOS_FUNCTION mag_type imag(const val_type) { - return Kokkos::Experimental::cast_to_bhalf(0.0F); - } - static KOKKOS_FUNCTION val_type conj(const val_type x) { return x; } - static KOKKOS_FUNCTION val_type pow(const val_type x, const val_type y) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::pow(Kokkos::Experimental::cast_from_bhalf(x), - Kokkos::Experimental::cast_from_bhalf(y))); - } - static KOKKOS_FUNCTION val_type sqrt(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::sqrt(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION val_type cbrt(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::cbrt(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION val_type exp(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::exp(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION val_type log(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::log(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION val_type log10(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::log10(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION val_type sin(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::sin(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION val_type cos(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::cos(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION val_type tan(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::tan(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION val_type sinh(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::sinh(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION val_type cosh(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::cosh(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION val_type tanh(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::tanh(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION val_type asin(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::asin(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION val_type acos(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::acos(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION val_type atan(const val_type x) { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::atan(Kokkos::Experimental::cast_from_bhalf(x))); - } - static KOKKOS_FUNCTION mag_type epsilon() { - // return ::pow(2, -KOKKOSKERNELS_IMPL_BF16_SIGNIFICAND_BITS); - return Kokkos::Experimental::cast_to_bhalf(KOKKOSKERNELS_IMPL_BF16_EPSILON); - } -#endif - // Backwards compatibility with Teuchos::ScalarTraits. using magnitudeType = mag_type; using bhalfPrecision = Kokkos::Experimental::bhalf_t; @@ -1297,52 +903,11 @@ class ArithTraits { static std::string name() { return "bhalf_t"; } -#if KOKKOS_VERSION < 40199 - static KOKKOS_FUNCTION bool isnaninf(const val_type x) { - return isNan(x) || isInf(x); - } - static KOKKOS_FUNCTION magnitudeType magnitude(const val_type x) { - return abs(x); - } - static KOKKOS_FUNCTION val_type conjugate(const val_type x) { - return conj(x); - } - static KOKKOS_FUNCTION val_type squareroot(const val_type x) { - return sqrt(x); - } - static KOKKOS_FUNCTION val_type nan() { - return Kokkos::Experimental::cast_to_bhalf( - Kokkos::Experimental::quiet_NaN::value); - } - static KOKKOS_FUNCTION mag_type eps() { return epsilon(); } - static KOKKOS_FUNCTION mag_type sfmin() { - return Kokkos::Experimental::cast_to_bhalf(KOKKOSKERNELS_IMPL_BF16_MIN); - } - static KOKKOS_FUNCTION int base() { return KOKKOSKERNELS_IMPL_BF16_RADIX; } - // Use float to allow running on both host and device - static KOKKOS_FUNCTION float prec() { - float e = KOKKOSKERNELS_IMPL_BF16_EPSILON; - float b = (float)base(); - float r = e * b; - return r; - } - static KOKKOS_FUNCTION int t() { return KOKKOSKERNELS_IMPL_BF16_MANT_DIG; } - static KOKKOS_FUNCTION mag_type rnd() { return one(); } - static KOKKOS_FUNCTION int emin() { return KOKKOSKERNELS_IMPL_BF16_MIN_EXP; } - static KOKKOS_FUNCTION mag_type rmin() { - return Kokkos::Experimental::cast_to_bhalf(KOKKOSKERNELS_IMPL_BF16_MIN); - } - static KOKKOS_FUNCTION int emax() { return KOKKOSKERNELS_IMPL_BF16_MAX_EXP; } - static KOKKOS_FUNCTION mag_type rmax() { - return Kokkos::Experimental::cast_to_bhalf(KOKKOSKERNELS_IMPL_BF16_MAX); - } -#else #if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_HIP) KOKKOSKERNELS_ARITHTRAITS_HALF_FP(KOKKOS_FUNCTION) #else KOKKOSKERNELS_ARITHTRAITS_REAL_FP(KOKKOS_FUNCTION) #endif -#endif }; #endif // #if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT @@ -1391,11 +956,9 @@ class ArithTraits { using magnitudeType = mag_type; using halfPrecision = float; #if defined(__CUDA_ARCH__) - using doublePrecision = - double; // CUDA doesn't support long double, unfortunately + using doublePrecision = double; // CUDA doesn't support long double, unfortunately #elif defined(__HIP_DEVICE_COMPILE__) - using doublePrecision = - double; // HIP does not support long double unfortunately + using doublePrecision = double; // HIP does not support long double unfortunately #else using doublePrecision = long double; #endif // __CUDA_ARCH__ @@ -1518,8 +1081,7 @@ class ArithTraits > { static constexpr bool has_infinity = true; static std::complex infinity() { - return std::complex(ArithTraits::infinity(), - ArithTraits::infinity()); + return std::complex(ArithTraits::infinity(), ArithTraits::infinity()); } #ifdef KOKKOS_ENABLE_SYCL @@ -1568,37 +1130,23 @@ class ArithTraits > { return isnan(real(x)) || isnan(imag(x)); } #endif - static mag_type abs(const std::complex& x) { - return std::abs(x); - } + static mag_type abs(const std::complex& x) { return std::abs(x); } static std::complex zero() { - return std::complex(ArithTraits::zero(), - ArithTraits::zero()); + return std::complex(ArithTraits::zero(), ArithTraits::zero()); } static std::complex one() { - return std::complex(ArithTraits::one(), - ArithTraits::zero()); + return std::complex(ArithTraits::one(), ArithTraits::zero()); } static std::complex min() { - return std::complex(ArithTraits::min(), - ArithTraits::zero()); + return std::complex(ArithTraits::min(), ArithTraits::zero()); } static std::complex max() { - return std::complex(ArithTraits::max(), - ArithTraits::zero()); - } - static mag_type real(const std::complex& x) { - return std::real(x); - } - static mag_type imag(const std::complex& x) { - return std::imag(x); - } - static std::complex conj( - const std::complex& x) { - return std::conj(x); + return std::complex(ArithTraits::max(), ArithTraits::zero()); } - static std::complex pow(const std::complex& x, - const std::complex& y) { + static mag_type real(const std::complex& x) { return std::real(x); } + static mag_type imag(const std::complex& x) { return std::imag(x); } + static std::complex conj(const std::complex& x) { return std::conj(x); } + static std::complex pow(const std::complex& x, const std::complex& y) { // Fix for some weird gcc 4.2.1 inaccuracy. if (y == one()) { return x; @@ -1608,46 +1156,29 @@ class ArithTraits > { return std::pow(x, y); } } - static std::complex pow(const std::complex& x, - const RealFloatType& y) { + static std::complex pow(const std::complex& x, const RealFloatType& y) { // Fix for some weird gcc 4.2.1 inaccuracy. if (y == ArithTraits::one()) { return x; - } else if (y == ArithTraits::one() + - ArithTraits::one()) { + } else if (y == ArithTraits::one() + ArithTraits::one()) { return x * x; } else { return std::pow(x, y); } } - static std::complex sqrt( - const std::complex& x) { - return std::sqrt(x); - } - static std::complex cbrt( - const std::complex& x) { + static std::complex sqrt(const std::complex& x) { return std::sqrt(x); } + static std::complex cbrt(const std::complex& x) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL return sycl::cbrt(x); #else return ::cbrt(x); #endif } - static std::complex exp(const std::complex& x) { - return std::exp(x); - } - static std::complex log(const std::complex& x) { - return std::log(x); - } - static std::complex log10( - const std::complex& x) { - return std::log10(x); - } - static std::complex sin(const std::complex& x) { - return std::sin(x); - } - static std::complex cos(const std::complex& x) { - return std::cos(x); - } + static std::complex exp(const std::complex& x) { return std::exp(x); } + static std::complex log(const std::complex& x) { return std::log(x); } + static std::complex log10(const std::complex& x) { return std::log10(x); } + static std::complex sin(const std::complex& x) { return std::sin(x); } + static std::complex cos(const std::complex& x) { return std::cos(x); } static std::complex tan(const std::complex& x) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL return sycl::tan(x); @@ -1655,36 +1186,24 @@ class ArithTraits > { return std::tan(x); #endif } - static std::complex sinh( - const std::complex& x) { - return std::sinh(x); - } - static std::complex cosh( - const std::complex& x) { - return std::cosh(x); - } - static std::complex tanh( - const std::complex& x) { - return std::tanh(x); - } - static std::complex asin( - const std::complex& x) { + static std::complex sinh(const std::complex& x) { return std::sinh(x); } + static std::complex cosh(const std::complex& x) { return std::cosh(x); } + static std::complex tanh(const std::complex& x) { return std::tanh(x); } + static std::complex asin(const std::complex& x) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL return sycl::asin(x); #else return ::asin(x); #endif } - static std::complex acos( - const std::complex& x) { + static std::complex acos(const std::complex& x) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL return sycl::acos(x); #else return ::acos(x); #endif } - static std::complex atan( - const std::complex& x) { + static std::complex atan(const std::complex& x) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL using sycl::atan; #else @@ -1699,33 +1218,19 @@ class ArithTraits > { static mag_type epsilon() { return ArithTraits::epsilon(); } // Backwards compatibility with Teuchos::ScalarTraits. - using magnitudeType = mag_type; - using halfPrecision = - std::complex::halfPrecision>; - using doublePrecision = - std::complex::doublePrecision>; + using magnitudeType = mag_type; + using halfPrecision = std::complex::halfPrecision>; + using doublePrecision = std::complex::doublePrecision>; static constexpr bool isComplex = true; static constexpr bool isOrdinal = false; static constexpr bool isComparable = false; static constexpr bool hasMachineParameters = true; - static bool isnaninf(const std::complex& x) { - return isNan(x) || isInf(x); - } - static mag_type magnitude(const std::complex& x) { - return abs(x); - } - static std::complex conjugate( - const std::complex& x) { - return conj(x); - } - static std::string name() { - return std::string("std::complex<") + ArithTraits::name() + ">"; - } - static std::complex squareroot( - const std::complex& x) { - return sqrt(x); - } + static bool isnaninf(const std::complex& x) { return isNan(x) || isInf(x); } + static mag_type magnitude(const std::complex& x) { return abs(x); } + static std::complex conjugate(const std::complex& x) { return conj(x); } + static std::string name() { return std::string("std::complex<") + ArithTraits::name() + ">"; } + static std::complex squareroot(const std::complex& x) { return sqrt(x); } static mag_type eps() { return epsilon(); } static mag_type sfmin() { return ArithTraits::sfmin(); } static int base() { return ArithTraits::base(); } @@ -1925,9 +1430,7 @@ struct [[deprecated]] ArithTraits { static inline mag_type real(const val_type& x) { return x; } static inline mag_type imag(const val_type&) { return zero(); } static inline val_type conj(const val_type& x) { return x; } - static inline val_type pow(const val_type& x, const val_type& y) { - return ::pow(x, y); - } + static inline val_type pow(const val_type& x, const val_type& y) { return ::pow(x, y); } static inline val_type sqrt(const val_type& x) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL return sycl::sqrt(x); @@ -1998,11 +1501,7 @@ struct [[deprecated]] ArithTraits { static int base() { return std::numeric_limits::radix; } static mag_type prec() { return eps() * base(); } static int t() { return std::numeric_limits::digits; } - static mag_type rnd() { - return std::numeric_limits::round_style == std::round_to_nearest - ? one() - : zero(); - } + static mag_type rnd() { return std::numeric_limits::round_style == std::round_to_nearest ? one() : zero(); } static int emin() { return std::numeric_limits::min_exponent; } static mag_type rmin() { return std::numeric_limits::min(); } static int emax() { return std::numeric_limits::max_exponent; } @@ -2041,9 +1540,7 @@ struct [[deprecated]] ArithTraits { static inline mag_type real(const val_type& x) { return x; } static inline mag_type imag(const val_type&) { return zero(); } static inline val_type conj(const val_type& x) { return x; } - static inline val_type pow(const val_type& x, const val_type& y) { - return ::pow(x, y); - } + static inline val_type pow(const val_type& x, const val_type& y) { return ::pow(x, y); } static inline val_type sqrt(const val_type& x) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL return sycl::sqrt(x); @@ -2098,9 +1595,7 @@ struct [[deprecated]] ArithTraits { #endif } static inline val_type nan() { return val_type::_nan; } - static inline val_type epsilon() { - return std::numeric_limits::epsilon(); - } + static inline val_type epsilon() { return std::numeric_limits::epsilon(); } typedef qd_real magnitudeType; typedef dd_real halfPrecision; @@ -2120,11 +1615,7 @@ struct [[deprecated]] ArithTraits { static int base() { return std::numeric_limits::radix; } static mag_type prec() { return eps() * base(); } static int t() { return std::numeric_limits::digits; } - static mag_type rnd() { - return std::numeric_limits::round_style == std::round_to_nearest - ? one() - : zero(); - } + static mag_type rnd() { return std::numeric_limits::round_style == std::round_to_nearest ? one() : zero(); } static int emin() { return std::numeric_limits::min_exponent; } static mag_type rmin() { return std::numeric_limits::min(); } static int emax() { return std::numeric_limits::max_exponent; } @@ -2145,8 +1636,7 @@ struct [[deprecated]] ArithTraits { namespace Details { template -using ArithTraits [[deprecated("Use Kokkos::ArithTraits instead")]] = - ::Kokkos::ArithTraits; +using ArithTraits [[deprecated("Use Kokkos::ArithTraits instead")]] = ::Kokkos::ArithTraits; } // namespace Details } // namespace Kokkos diff --git a/packages/kokkos-kernels/common/src/Kokkos_InnerProductSpaceTraits.hpp b/packages/kokkos-kernels/common/src/Kokkos_InnerProductSpaceTraits.hpp index c2bc475c4590..25337c925f13 100644 --- a/packages/kokkos-kernels/common/src/Kokkos_InnerProductSpaceTraits.hpp +++ b/packages/kokkos-kernels/common/src/Kokkos_InnerProductSpaceTraits.hpp @@ -125,19 +125,14 @@ class InnerProductSpaceTraits { typedef val_type dot_type; //! The "norm" (absolute value or magnitude) of a value x of type val_type. - static KOKKOS_FORCEINLINE_FUNCTION mag_type norm(const val_type& x) { - return Kokkos::ArithTraits::abs(x); - } + static KOKKOS_FORCEINLINE_FUNCTION mag_type norm(const val_type& x) { return Kokkos::ArithTraits::abs(x); } /// \brief The "dot product" of two values x and y of type val_type. /// /// This default implementation should suffice unless val_type is /// complex. In that case, see the partial specialization for /// Kokkos::complex below to see our convention for which input gets /// conjugated. - static KOKKOS_FORCEINLINE_FUNCTION dot_type dot(const val_type& x, - const val_type& y) { - return x * y; - } + static KOKKOS_FORCEINLINE_FUNCTION dot_type dot(const val_type& x, const val_type& y) { return x * y; } }; /// \brief Partial specialization for long double. @@ -149,9 +144,7 @@ struct InnerProductSpaceTraits { typedef Kokkos::ArithTraits::mag_type mag_type; typedef val_type dot_type; - static mag_type norm(const val_type& x) { - return Kokkos::ArithTraits::abs(x); - } + static mag_type norm(const val_type& x) { return Kokkos::ArithTraits::abs(x); } static dot_type dot(const val_type& x, const val_type& y) { return x * y; } }; @@ -163,13 +156,8 @@ class InnerProductSpaceTraits> { typedef typename Kokkos::ArithTraits::mag_type mag_type; typedef val_type dot_type; - static KOKKOS_FORCEINLINE_FUNCTION mag_type norm(const val_type& x) { - return Kokkos::ArithTraits::abs(x); - } - static KOKKOS_FORCEINLINE_FUNCTION dot_type dot(const val_type& x, - const val_type& y) { - return Kokkos::conj(x) * y; - } + static KOKKOS_FORCEINLINE_FUNCTION mag_type norm(const val_type& x) { return Kokkos::ArithTraits::abs(x); } + static KOKKOS_FORCEINLINE_FUNCTION dot_type dot(const val_type& x, const val_type& y) { return Kokkos::conj(x) * y; } }; /// \brief Partial specialization for std::complex. @@ -182,12 +170,8 @@ struct InnerProductSpaceTraits> { typedef typename Kokkos::ArithTraits::mag_type mag_type; typedef val_type dot_type; - static mag_type norm(const val_type& x) { - return Kokkos::ArithTraits::abs(x); - } - static dot_type dot(const val_type& x, const val_type& y) { - return std::conj(x) * y; - } + static mag_type norm(const val_type& x) { return Kokkos::ArithTraits::abs(x); } + static dot_type dot(const val_type& x, const val_type& y) { return std::conj(x) * y; } }; #ifdef HAVE_KOKKOSKERNELS_QUADMATH @@ -203,9 +187,7 @@ struct InnerProductSpaceTraits<__float128> { typedef typename Kokkos::ArithTraits::mag_type mag_type; typedef val_type dot_type; - static mag_type norm(const val_type& x) { - return Kokkos::ArithTraits::abs(x); - } + static mag_type norm(const val_type& x) { return Kokkos::ArithTraits::abs(x); } static dot_type dot(const val_type& x, const val_type& y) { return x * y; } }; @@ -232,9 +214,7 @@ struct InnerProductSpaceTraits { typedef Kokkos::ArithTraits::mag_type mag_type; typedef val_type dot_type; - static mag_type norm(const val_type& x) { - return Kokkos::ArithTraits::abs(x); - } + static mag_type norm(const val_type& x) { return Kokkos::ArithTraits::abs(x); } static dot_type dot(const val_type& x, const val_type& y) { return x * y; } }; @@ -244,34 +224,24 @@ struct InnerProductSpaceTraits { typedef Kokkos::ArithTraits::mag_type mag_type; typedef val_type dot_type; - static mag_type norm(const val_type& x) { - return Kokkos::ArithTraits::abs(x); - } + static mag_type norm(const val_type& x) { return Kokkos::ArithTraits::abs(x); } static dot_type dot(const val_type& x, const val_type& y) { return x * y; } }; #endif // HAVE_KOKKOS_QD template -KOKKOS_INLINE_FUNCTION void updateDot(ResultType& sum, const InputType1& x, - const InputType2& y) { +KOKKOS_INLINE_FUNCTION void updateDot(ResultType& sum, const InputType1& x, const InputType2& y) { // FIXME (mfh 22 Jan 2020) We should actually pick the type with the // greater precision. sum += InnerProductSpaceTraits::dot(x, y); } -KOKKOS_INLINE_FUNCTION void updateDot(double& sum, const double x, - const double y) { - sum += x * y; -} +KOKKOS_INLINE_FUNCTION void updateDot(double& sum, const double x, const double y) { sum += x * y; } -KOKKOS_INLINE_FUNCTION void updateDot(double& sum, const float x, - const float y) { - sum += x * y; -} +KOKKOS_INLINE_FUNCTION void updateDot(double& sum, const float x, const float y) { sum += x * y; } // This exists because complex += complex is not defined. -KOKKOS_INLINE_FUNCTION void updateDot(Kokkos::complex& sum, - const Kokkos::complex x, +KOKKOS_INLINE_FUNCTION void updateDot(Kokkos::complex& sum, const Kokkos::complex x, const Kokkos::complex y) { const auto tmp = Kokkos::conj(x) * y; sum += Kokkos::complex(tmp.real(), tmp.imag()); @@ -280,8 +250,7 @@ KOKKOS_INLINE_FUNCTION void updateDot(Kokkos::complex& sum, // This exists in case people call the overload of KokkosBlas::dot // that takes an output View, and the output View has element type // Kokkos::complex. -KOKKOS_INLINE_FUNCTION void updateDot(Kokkos::complex& sum, - const Kokkos::complex x, +KOKKOS_INLINE_FUNCTION void updateDot(Kokkos::complex& sum, const Kokkos::complex x, const Kokkos::complex y) { sum += Kokkos::conj(x) * y; } diff --git a/packages/kokkos-kernels/common/unit_test/Test_Common.hpp b/packages/kokkos-kernels/common/unit_test/Test_Common.hpp index 2ccf9c2103bc..fb93a494d637 100644 --- a/packages/kokkos-kernels/common/unit_test/Test_Common.hpp +++ b/packages/kokkos-kernels/common/unit_test/Test_Common.hpp @@ -16,6 +16,7 @@ #ifndef TEST_COMMON_HPP #define TEST_COMMON_HPP +#include #include // #include #include diff --git a/packages/kokkos-kernels/common/unit_test/Test_Common_AlignPtrTo.hpp b/packages/kokkos-kernels/common/unit_test/Test_Common_AlignPtrTo.hpp new file mode 100644 index 000000000000..33e7ed542c00 --- /dev/null +++ b/packages/kokkos-kernels/common/unit_test/Test_Common_AlignPtrTo.hpp @@ -0,0 +1,159 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/*! \file + +This test file was motivated by an observation in the SpGEMM on SYCL that +strange values were coming out of the pointer alignment functions, causing +Kokkos::atomic_add to be a no-op or write 0. The Kokkos Kernels alignPtrTo +function was updated with the one of four implementations that was observed to +work on SYCL (even though all four in here should be okay.) + +TEST_FN 0-3 are various implemetations, and TEST_FN 4 is testing Kokkos Kernels +implementation. The tests are written to PASS for the observed SYCL behavor - +i.e., that TEST_FN 1,4 produce aligned pointers, and the others do not (even +though they should). If the other functions start working on SYCL, then this +test will "fail", and the Kokkos Kernels implementation should be updated with +one of the now-working (and faster?) implementations. +*/ + +#ifndef TEST_COMMON_ALIGNPTRTO_HPP +#define TEST_COMMON_ALIGNPTRTO_HPP + +#include +#include +#include + +namespace { + +// the original Kokkos Kernels implementation +template +KOKKOS_INLINE_FUNCTION T *f0(InPtr p) { + std::uintptr_t ptrVal = reinterpret_cast(p); + return reinterpret_cast((ptrVal + alignof(T) - 1) & (~(alignof(T) - 1))); +} + +// an implementation that works for SYCL +template +KOKKOS_INLINE_FUNCTION T *f1(InPtr p) { + std::uintptr_t ptrVal = reinterpret_cast(p); + while (ptrVal % alignof(T)) { + ++ptrVal; + } + return reinterpret_cast(ptrVal); +} + +// another valid implementation +template +KOKKOS_INLINE_FUNCTION T *f2(InPtr p) { + std::uintptr_t ptrVal = reinterpret_cast(p); + return reinterpret_cast((ptrVal + alignof(T) - 1) / alignof(T) * alignof(T)); +} + +// the way GCC does it (roughly) +template +KOKKOS_INLINE_FUNCTION T *f3(InPtr p) { + std::uintptr_t ptrVal = reinterpret_cast(p); + return reinterpret_cast((ptrVal - uint64_t(1) + alignof(T)) & -alignof(T)); +} + +// Function to be executed by each team +template +struct TeamFunction { + TeamFunction() = default; + TeamFunction(const Results &results) : results_(results) {} + + template + KOKKOS_INLINE_FUNCTION void operator()(const Team &team) const { + // get an "aligned" pointer to scratch memory + char *shmem = (char *)(team.team_shmem().get_shmem(team.team_size() * sizeof(double))); + double *vals; + if constexpr (0 == TEST_FN) { + vals = f0(shmem); + } else if constexpr (1 == TEST_FN) { + vals = f1(shmem); + } else if constexpr (2 == TEST_FN) { + vals = f2(shmem); + } else if constexpr (3 == TEST_FN) { + vals = f3(shmem); + } else if constexpr (4 == TEST_FN) { + vals = KokkosKernels::Impl::alignPtrTo(shmem); + } else { + static_assert(std::is_void_v, "Unexpected test function"); + } + + const size_t i = team.team_rank(); + double val = team.team_rank(); + vals[i] = 0; // zero shared memory + Kokkos::atomic_add(&vals[i], val); +#if 0 // debugging + Kokkos::printf("%s:%i result(%lu) += %f yielded %f\n", __FILE__, __LINE__, i, val, vals[i]); +#endif + + results_(i) = vals[i]; + } + + size_t team_shmem_size(int team_size) const { return team_size * sizeof(double); } + + Results results_; +}; + +// use atomic add to set result(i) = i +template +void test_alignPtrTo() { + using MemorySpace = typename Device::memory_space; + using ExecSpace = typename Device::execution_space; + using TestView = Kokkos::View; + using TestPolicy = Kokkos::TeamPolicy; + const int teamSize = + TestPolicy(1, Kokkos::AUTO).team_size_max(TeamFunction(), Kokkos::ParallelForTag{}); + + ExecSpace space; + + TestView results("TestView", teamSize); + TestPolicy policy(space, 1, teamSize); + Kokkos::parallel_for("test alignment", policy, TeamFunction(results)); + + int errs; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(space, 0, teamSize), + KOKKOS_LAMBDA(int i, int &lerr) { lerr += (results(i) != i); }, errs); + +// if SYCL is enabled, only TEST_FN 1 and 4 should work +#if defined(KOKKOS_ENABLE_SYCL) + if constexpr (std::is_same_v) { + if constexpr ((1 == TEST_FN) || (4 == TEST_FN)) { + EXPECT_EQ(0, errs); + } else { + EXPECT_NE(0, errs); + } + } else { + EXPECT_EQ(0, errs); + } +#else + EXPECT_EQ(0, errs); +#endif +} + +TEST_F(TestCategory, common_AlignPtrTo_0) { test_alignPtrTo<0, TestDevice>(); } +TEST_F(TestCategory, common_AlignPtrTo_1) { test_alignPtrTo<1, TestDevice>(); } +TEST_F(TestCategory, common_AlignPtrTo_2) { test_alignPtrTo<2, TestDevice>(); } +TEST_F(TestCategory, common_AlignPtrTo_3) { test_alignPtrTo<3, TestDevice>(); } +TEST_F(TestCategory, common_AlignPtrTo_kk) { test_alignPtrTo<4, TestDevice>(); } + +} // anonymous namespace + +#endif // TEST_COMMON_ALIGNPTRTO diff --git a/packages/kokkos-kernels/common/unit_test/Test_Common_ArithTraits.hpp b/packages/kokkos-kernels/common/unit_test/Test_Common_ArithTraits.hpp index 1d9a4c6480d9..73a4ebfefee3 100644 --- a/packages/kokkos-kernels/common/unit_test/Test_Common_ArithTraits.hpp +++ b/packages/kokkos-kernels/common/unit_test/Test_Common_ArithTraits.hpp @@ -35,30 +35,14 @@ #include // typeid (T) #include -#if KOKKOS_VERSION < 40199 -#define FAILURE() \ - { \ - KOKKOS_IMPL_DO_NOT_USE_PRINTF("%s:%s:%d: Failure\n", __FILE__, __func__, \ - __LINE__); \ - success = 0; \ - } -#else #define FAILURE() \ { \ Kokkos::printf("%s:%s:%d: Failure\n", __FILE__, __func__, __LINE__); \ success = 0; \ } -#endif #if 0 -#if KOKKOS_VERSION < 40199 -#define TRACE() \ - KOKKOS_IMPL_DO_NOT_USE_PRINTF("%s:%s:%d: Trace\n", __FILE__, __func__, \ - __LINE__); -#else -#define TRACE() \ - Kokkos::printf("%s:%s:%d: Trace\n", __FILE__, __func__, __LINE__); -#endif +#define TRACE() Kokkos::printf("%s:%s:%d: Trace\n", __FILE__, __func__, __LINE__); #else #define TRACE() #endif @@ -148,8 +132,7 @@ class ArithTraitsTesterBase { /// \brief Combine two intermediate reduction results into \c dst. /// /// Subclasses need not and must not override this method. - KOKKOS_INLINE_FUNCTION void join(value_type& dst, - const value_type& src) const { + KOKKOS_INLINE_FUNCTION void join(value_type& dst, const value_type& src) const { dst = dst && src; // dst = 1; } @@ -172,8 +155,7 @@ class ArithTraitsTesterBase { /// far. On output: The result of the tests run in this method. /// The result of more than one test is the logical AND of each /// test's result. - KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, - value_type& dst) const { + KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, value_type& dst) const { TRACE(); typedef Kokkos::ArithTraits AT; (void)iwork; // not using this argument @@ -194,11 +176,7 @@ class ArithTraitsTesterBase { // T, but we check for this int constant for compatibility with // std::numeric_limits. if (!AT::is_specialized) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("! AT::is_specialized\n"); -#else Kokkos::printf("! AT::is_specialized\n"); -#endif FAILURE(); } @@ -206,21 +184,11 @@ class ArithTraitsTesterBase { // function, just not to its class methods (which are not marked // as device functions). if (AT::is_integer != std::numeric_limits::is_integer) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "AT::is_integer not same as numeric_limits\n"); -#else Kokkos::printf("AT::is_integer not same as numeric_limits\n"); -#endif FAILURE(); } if (AT::is_exact != std::numeric_limits::is_exact) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "AT::is_exact not same as numeric_limits\n"); -#else Kokkos::printf("AT::is_exact not same as numeric_limits\n"); -#endif FAILURE(); } @@ -229,62 +197,34 @@ class ArithTraitsTesterBase { // Test properties of the arithmetic and multiplicative identities. if (zero + zero != zero) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("0 + 0 != 0\n"); -#else Kokkos::printf("0 + 0 != 0\n"); -#endif FAILURE(); } if (zero + one != one) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("0 + 1 != 1\n"); -#else Kokkos::printf("0 + 1 != 1\n"); -#endif FAILURE(); } if (one - one != zero) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("1 - 1 != 0\n"); -#else Kokkos::printf("1 - 1 != 0\n"); -#endif FAILURE(); } // This is technically 1 even of Z_2, since in that field, one // is its own inverse (so -one == one). if ((one + one) - one != one) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("(1 + 1) - 1 != 1\n"); -#else Kokkos::printf("(1 + 1) - 1 != 1\n"); -#endif FAILURE(); } if (AT::abs(zero) != zero) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::abs(0) != 0\n"); -#else Kokkos::printf("AT::abs(0) != 0\n"); -#endif FAILURE(); } if (AT::abs(one) != one) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::abs(1) != 1\n"); -#else Kokkos::printf("AT::abs(1) != 1\n"); -#endif FAILURE(); } if (AT::is_signed && AT::abs(-one) != one) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::is_signed and AT::abs(-1) != 1\n"); -#else Kokkos::printf("AT::is_signed and AT::abs(-1) != 1\n"); -#endif FAILURE(); } // Need enable_if to test whether T can be compared using <=. @@ -293,11 +233,7 @@ class ArithTraitsTesterBase { // These are very mild ordering properties. // They should work even for a set only containing zero. if (AT::abs(zero) > AT::abs(AT::max())) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::abs(0) > AT::abs (AT::max ())\n"); -#else Kokkos::printf("AT::abs(0) > AT::abs (AT::max ())\n"); -#endif FAILURE(); } @@ -354,14 +290,12 @@ class ArithTraitsTesterBase { } if (AT::is_integer != std::numeric_limits::is_integer) { - out << "AT::is_integer != std::numeric_limits::is_integer" - << endl; + out << "AT::is_integer != std::numeric_limits::is_integer" << endl; FAILURE(); } if (AT::is_exact != std::numeric_limits::is_exact) { - out << "AT::is_exact != std::numeric_limits::is_exact" - << endl; + out << "AT::is_exact != std::numeric_limits::is_exact" << endl; FAILURE(); } @@ -415,11 +349,9 @@ class ArithTraitsTesterBase { if (AT::has_infinity) { // Compiler intrinsic casts from inf of type half_t / bhalf_t to inf // of type float in CUDA, SYCL and HIP do not work yet. -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_SYCL) || \ - defined(KOKKOS_ENABLE_HIP) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_HIP) namespace KE = Kokkos::Experimental; - if constexpr (!std::is_same::value && - !std::is_same::value) { + if constexpr (!std::is_same::value && !std::is_same::value) { #else { #endif // KOKKOS_ENABLE_CUDA || KOKKOS_ENABLE_SYCL || KOKKOS_ENABLE_HIP @@ -457,10 +389,8 @@ class ArithTraitsTesterBase { /// implements transcendental functions, but the specific tests that /// are run will depend on \c ScalarType. template ::value ? 1 : 0)> -class ArithTraitsTesterTranscendentalBase - : public ArithTraitsTesterBase { + const int has_transcendentals = (HasTranscendentals::value ? 1 : 0)> +class ArithTraitsTesterTranscendentalBase : public ArithTraitsTesterBase { private: //! The base class of this class. typedef ArithTraitsTesterBase base_type; @@ -474,8 +404,7 @@ class ArithTraitsTesterTranscendentalBase /// \brief The "parallel for" part of the reduction. /// /// See comments of ArithTraitsTesterBase's operator(). - KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, - value_type& dst) const; + KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, value_type& dst) const; //! Constructor (does nothing, but marked as device function). KOKKOS_INLINE_FUNCTION ArithTraitsTesterTranscendentalBase(); @@ -506,8 +435,7 @@ class ArithTraitsTesterTranscendentalBase //! Constructor (does nothing, but marked as device function). KOKKOS_INLINE_FUNCTION ArithTraitsTesterTranscendentalBase() {} - KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, - value_type& dst) const { + KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, value_type& dst) const { TRACE(); // typedef Kokkos::ArithTraits AT; (void)iwork; // forestall compiler warning for unused variable @@ -585,8 +513,7 @@ class ArithTraitsTesterTranscendentalBase //! Constructor (does nothing, but marked as device function). KOKKOS_INLINE_FUNCTION ArithTraitsTesterTranscendentalBase() {} - KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, - value_type& dst) const { + KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, value_type& dst) const { TRACE(); typedef Kokkos::ArithTraits AT; (void)iwork; // forestall compiler warning for unused variable @@ -621,36 +548,20 @@ class ArithTraitsTesterTranscendentalBase if (!AT::is_complex) { result = AT::pow(two, three); if (!equal(result, eight)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::pow(2,3) != 8\n"); -#else Kokkos::printf("AT::pow(2,3) != 8\n"); -#endif FAILURE(); } } if (!equal(AT::pow(three, zero), one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::pow(3,0) != 1\n"); -#else Kokkos::printf("AT::pow(3,0) != 1\n"); -#endif FAILURE(); } if (!equal(AT::pow(three, one), three)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::pow(3,1) != 3\n"); -#else Kokkos::printf("AT::pow(3,1) != 3\n"); -#endif FAILURE(); } if (!equal(AT::pow(three, two), nine)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::pow(3,2) != 9\n"); -#else Kokkos::printf("AT::pow(3,2) != 9\n"); -#endif FAILURE(); } @@ -658,11 +569,7 @@ class ArithTraitsTesterTranscendentalBase if (!AT::is_complex) { result = AT::pow(three, three); if (!equal(result, twentySeven)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::pow(3,3) != 27\n"); -#else Kokkos::printf("AT::pow(3,3) != 27\n"); -#endif FAILURE(); } } @@ -671,170 +578,92 @@ class ArithTraitsTesterTranscendentalBase if (AT::is_signed && !AT::is_complex) { result = AT::pow(-three, one); if (!equal(result, -three)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::pow(-3,1) != -3\n"); -#else Kokkos::printf("AT::pow(-3,1) != -3\n"); -#endif FAILURE(); } result = AT::pow(-three, two); if (!equal(result, nine)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::pow(-3,2) != 9\n"); -#else Kokkos::printf("AT::pow(-3,2) != 9\n"); -#endif FAILURE(); } result = AT::pow(-three, three); if (!equal(result, -twentySeven)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::pow(-3,3) != 27\n"); -#else Kokkos::printf("AT::pow(-3,3) != 27\n"); -#endif FAILURE(); } } if (!equal(AT::sqrt(zero), zero)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::sqrt(0) != 0\n"); -#else Kokkos::printf("AT::sqrt(0) != 0\n"); -#endif FAILURE(); } if (!equal(AT::sqrt(one), one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::sqrt(1) != 1\n"); -#else Kokkos::printf("AT::sqrt(1) != 1\n"); -#endif FAILURE(); } if (!equal(AT::sqrt(thirtySix), six)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::sqrt(36) != 6\n"); -#else Kokkos::printf("AT::sqrt(36) != 6\n"); -#endif FAILURE(); } if (!equal(AT::sqrt(sixtyFour), eight)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::sqrt(64) != 8\n"); -#else Kokkos::printf("AT::sqrt(64) != 8\n"); -#endif FAILURE(); } if (AT::is_integer) { if (!equal(AT::sqrt(fortyTwo), six)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT:sqrt(42) != 6\n"); -#else Kokkos::printf("AT:sqrt(42) != 6\n"); -#endif FAILURE(); } if (!equal(AT::sqrt(oneTwentySeven), eleven)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::sqrt(127) != 11\n"); -#else Kokkos::printf("AT::sqrt(127) != 11\n"); -#endif FAILURE(); } } if (!equal(AT::cbrt(zero), zero)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::cbrt(0) != 0\n"); -#else Kokkos::printf("AT::cbrt(0) != 0\n"); -#endif FAILURE(); } if (!equal(AT::cbrt(one), one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::cbrt(1) != 1\n"); -#else Kokkos::printf("AT::cbrt(1) != 1\n"); -#endif FAILURE(); } if (!equal(AT::cbrt(twentySeven), three)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::cbrt(27) != 3\n"); -#else Kokkos::printf("AT::cbrt(27) != 3\n"); -#endif FAILURE(); } if (!equal(AT::cbrt(sixtyFour), four)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::cbrt(64) != 4\n"); -#else Kokkos::printf("AT::cbrt(64) != 4\n"); -#endif FAILURE(); } if (AT::is_integer) { if (!equal(AT::cbrt(fortyTwo), three)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT:cbrt(42) != 3\n"); -#else Kokkos::printf("AT:cbrt(42) != 3\n"); -#endif FAILURE(); } if (!equal(AT::cbrt(oneTwentySeven), five)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::cbrt(127) != 5\n"); -#else Kokkos::printf("AT::cbrt(127) != 5\n"); -#endif FAILURE(); } } if (!equal(AT::exp(zero), one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::cbrt(0) != 1\n"); -#else Kokkos::printf("AT::cbrt(0) != 1\n"); -#endif FAILURE(); } if (AT::is_complex) { const ScalarType val = two; //(two.real(), two.real()); if (!equal(AT::conj(AT::exp(val)), AT::exp(AT::conj(val)))) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "AT::conj(exp(complex(2,2))) != AT::exp(conj(complex(2,2)))\n"); -#else - Kokkos::printf( - "AT::conj(exp(complex(2,2))) != AT::exp(conj(complex(2,2)))\n"); -#endif + Kokkos::printf("AT::conj(exp(complex(2,2))) != AT::exp(conj(complex(2,2)))\n"); FAILURE(); } } if (!equal(AT::log(one), zero)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::log(1) != 0\n"); -#else Kokkos::printf("AT::log(1) != 0\n"); -#endif FAILURE(); } if (!equal(AT::log10(one), zero)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::log10(1) != 0\n"); -#else Kokkos::printf("AT::log10(1) != 0\n"); -#endif FAILURE(); } @@ -843,23 +672,11 @@ class ArithTraitsTesterTranscendentalBase const auto val_sin = AT::sin(val); const auto val_cos = AT::cos(val); if (!equal(val_sin * val_sin + val_cos * val_cos, one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "AT(complex):: sin(val)*sin(val) + cos(val)*cos(val) != 1\n"); -#else - Kokkos::printf( - "AT(complex):: sin(val)*sin(val) + cos(val)*cos(val) != 1\n"); -#endif + Kokkos::printf("AT(complex):: sin(val)*sin(val) + cos(val)*cos(val) != 1\n"); FAILURE(); } if (!equal(val_sin / val_cos, AT::tan(val))) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "AT(complex):: sin(val)/cos(val) != AT(real)::tan(val)\n"); -#else - Kokkos::printf( - "AT(complex):: sin(val)/cos(val) != AT(real)::tan(val)\n"); -#endif + Kokkos::printf("AT(complex):: sin(val)/cos(val) != AT(real)::tan(val)\n"); FAILURE(); } } else { @@ -867,47 +684,25 @@ class ArithTraitsTesterTranscendentalBase const auto val_sin = AT::sin(val); const auto val_cos = AT::cos(val); if (!equal(val_sin * val_sin + val_cos * val_cos, one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "AT(real):: sin(val)*sin(val) + cos(a)*cos(a) != 1\n"); -#else Kokkos::printf("AT(real):: sin(val)*sin(val) + cos(a)*cos(a) != 1\n"); -#endif FAILURE(); } if (!equal(val_sin / val_cos, AT::tan(val))) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "AT(real):: sin(val)/cos(val) != AT(real)::tan(val)\n"); -#else Kokkos::printf("AT(real):: sin(val)/cos(val) != AT(real)::tan(val)\n"); -#endif FAILURE(); } } if (!equal(AT::asin(AT::sin(one)), one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::asin(sin(1)) != 1\n"); -#else Kokkos::printf("AT::asin(sin(1)) != 1\n"); -#endif FAILURE(); } if (!equal(AT::acos(AT::cos(one)), one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::acos(cos(1)) != 1\n"); -#else Kokkos::printf("AT::acos(cos(1)) != 1\n"); -#endif FAILURE(); } if (!equal(AT::atan(AT::tan(one)), one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::atan(tan(1)) != 1\n"); -#else Kokkos::printf("AT::atan(tan(1)) != 1\n"); -#endif FAILURE(); } @@ -978,8 +773,7 @@ class ArithTraitsTesterTranscendentalBase if (!AT::is_complex) { result = AT::pow(three, three); if (result != twentySeven) { - out << "AT::pow (three, three) = " << result - << " != twentySeven = " << twentySeven << endl; + out << "AT::pow (three, three) = " << result << " != twentySeven = " << twentySeven << endl; FAILURE(); } } @@ -988,20 +782,17 @@ class ArithTraitsTesterTranscendentalBase if (AT::is_signed && !AT::is_complex) { result = AT::pow(-three, one); if (result != -three) { - out << "AT::pow (-three, one) = " << result << " != -three = " << -three - << endl; + out << "AT::pow (-three, one) = " << result << " != -three = " << -three << endl; FAILURE(); } result = AT::pow(-three, two); if (result != nine) { - out << "AT::pow (-three, two) = " << result << " != nine = " << nine - << endl; + out << "AT::pow (-three, two) = " << result << " != nine = " << nine << endl; FAILURE(); } result = AT::pow(-three, three); if (result != -twentySeven) { - out << "AT::pow (-three, three) = " << result - << " != -twentySeven = " << twentySeven << endl; + out << "AT::pow (-three, three) = " << result << " != -twentySeven = " << twentySeven << endl; FAILURE(); } } @@ -1034,74 +825,40 @@ class ArithTraitsTesterTranscendentalBase } if (!equal(AT::cbrt(zero), zero)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::cbrt(0) != 0\n"); -#else Kokkos::printf("AT::cbrt(0) != 0\n"); -#endif FAILURE(); } if (!equal(AT::cbrt(one), one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::cbrt(1) != 1\n"); -#else Kokkos::printf("AT::cbrt(1) != 1\n"); -#endif FAILURE(); } if (!equal(AT::cbrt(twentySeven), three)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::cbrt(27) != 3\n"); -#else Kokkos::printf("AT::cbrt(27) != 3\n"); -#endif FAILURE(); } if (!equal(AT::cbrt(sixtyFour), four)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::cbrt(64) != 4\n"); -#else Kokkos::printf("AT::cbrt(64) != 4\n"); -#endif FAILURE(); } if (AT::is_integer) { if (!equal(AT::cbrt(fortyTwo), three)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT:cbrt(42) != 3\n"); -#else Kokkos::printf("AT:cbrt(42) != 3\n"); -#endif FAILURE(); } if (!equal(AT::cbrt(oneTwentySeven), five)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::cbrt(127) != 5\n"); -#else Kokkos::printf("AT::cbrt(127) != 5\n"); -#endif FAILURE(); } } if (!equal(AT::exp(zero), one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::cbrt(0) != 1\n"); -#else Kokkos::printf("AT::cbrt(0) != 1\n"); -#endif FAILURE(); } if (AT::is_complex) { const ScalarType val = two; //(two.real(), two.real()); if (!equal(AT::conj(AT::exp(val)), AT::exp(AT::conj(val)))) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "AT::conj(exp(complex(2,0))) != AT::exp(conj(complex(2,0)))\n"); -#else - Kokkos::printf( - "AT::conj(exp(complex(2,0))) != AT::exp(conj(complex(2,0)))\n"); -#endif + Kokkos::printf("AT::conj(exp(complex(2,0))) != AT::exp(conj(complex(2,0)))\n"); FAILURE(); } } @@ -1119,23 +876,11 @@ class ArithTraitsTesterTranscendentalBase const auto val_sin = AT::sin(val); const auto val_cos = AT::cos(val); if (!equal(val_sin * val_sin + val_cos * val_cos, one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "AT(complex):: sin(val)*sin(val) + cos(val)*cos(val) != 1\n"); -#else - Kokkos::printf( - "AT(complex):: sin(val)*sin(val) + cos(val)*cos(val) != 1\n"); -#endif + Kokkos::printf("AT(complex):: sin(val)*sin(val) + cos(val)*cos(val) != 1\n"); FAILURE(); } if (!equal(val_sin / val_cos, AT::tan(val))) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "AT(complex):: sin(val)/cos(val) != AT(real)::tan(val)\n"); -#else - Kokkos::printf( - "AT(complex):: sin(val)/cos(val) != AT(real)::tan(val)\n"); -#endif + Kokkos::printf("AT(complex):: sin(val)/cos(val) != AT(real)::tan(val)\n"); FAILURE(); } } else { @@ -1143,47 +888,25 @@ class ArithTraitsTesterTranscendentalBase const auto val_sin = AT::sin(val); const auto val_cos = AT::cos(val); if (!equal(val_sin * val_sin + val_cos * val_cos, one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "AT(real):: sin(val)*sin(val) + cos(a)*cos(a) != 1\n"); -#else Kokkos::printf("AT(real):: sin(val)*sin(val) + cos(a)*cos(a) != 1\n"); -#endif FAILURE(); } if (!equal(val_sin / val_cos, AT::tan(val))) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "AT(real):: sin(val)/cos(val) != AT(real)::tan(val)\n"); -#else Kokkos::printf("AT(real):: sin(val)/cos(val) != AT(real)::tan(val)\n"); -#endif FAILURE(); } } if (!equal(AT::asin(AT::sin(three)), three)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::asin(sin(3)) != 3\n"); -#else Kokkos::printf("AT::asin(sin(3)) != 3\n"); -#endif FAILURE(); } if (!equal(AT::acos(AT::cos(three)), three)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::acos(cos(3)) != 3\n"); -#else Kokkos::printf("AT::acos(cos(3)) != 3\n"); -#endif FAILURE(); } if (!equal(AT::atan(AT::tan(three)), three)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::atan(tan(3)) != 3\n"); -#else Kokkos::printf("AT::atan(tan(3)) != 3\n"); -#endif FAILURE(); } @@ -1211,10 +934,8 @@ class ArithTraitsTesterTranscendentalBase /// Some tests will be executed whether or not ScalarType is /// complex, but the specific tests that are run will depend on /// ScalarType. -template ::is_complex> -class ArithTraitsTesterComplexBase - : public ArithTraitsTesterTranscendentalBase { +template ::is_complex> +class ArithTraitsTesterComplexBase : public ArithTraitsTesterTranscendentalBase { private: //! The base class of this class. typedef ArithTraitsTesterTranscendentalBase base_type; @@ -1228,8 +949,7 @@ class ArithTraitsTesterComplexBase /// \brief The "parallel for" part of the reduction. /// /// See comments of ArithTraitsTesterBase's operator(). - KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, - value_type& dst) const; + KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, value_type& dst) const; //! Constructor (does nothing, but marked as device function). KOKKOS_INLINE_FUNCTION ArithTraitsTesterComplexBase(); @@ -1259,8 +979,7 @@ class ArithTraitsTesterComplexBase //! Constructor (does nothing, but marked as device function). KOKKOS_INLINE_FUNCTION ArithTraitsTesterComplexBase() {} - KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, - value_type& dst) const { + KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, value_type& dst) const { TRACE(); typedef Kokkos::ArithTraits AT; (void)iwork; // forestall compiler warning for unused variable @@ -1275,25 +994,18 @@ class ArithTraitsTesterComplexBase #else { if (AT::is_signed != std::numeric_limits::is_signed) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "AT::is_signed = 0x%x, std::numeric_limits::is_signed " - "= 0x%x\n", - AT::is_signed, std::numeric_limits::is_signed); -#else Kokkos::printf( "AT::is_signed = 0x%x, std::numeric_limits::is_signed " "= 0x%x\n", AT::is_signed, std::numeric_limits::is_signed); -#endif FAILURE(); } } #endif // KOKKOS_HALF_T_IS_FLOAT - if (AT::is_complex) { - FAILURE(); - } + if (AT::is_complex) { + FAILURE(); + } // Call the base class' implementation. Every subclass' // implementation of operator() must do this, in order to include @@ -1352,8 +1064,7 @@ class ArithTraitsTesterComplexBase //! Constructor (does nothing, but marked as device function). KOKKOS_INLINE_FUNCTION ArithTraitsTesterComplexBase() {} - KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, - value_type& dst) const { + KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, value_type& dst) const { TRACE(); typedef Kokkos::ArithTraits AT; (void)iwork; // forestall compiler warning for unused variable @@ -1371,8 +1082,7 @@ class ArithTraitsTesterComplexBase const ScalarType onePlusOne(one, one); // Test conjugation. - if (AT::conj(oneMinusOne) != onePlusOne || - AT::conj(onePlusOne) != oneMinusOne) { + if (AT::conj(oneMinusOne) != onePlusOne || AT::conj(onePlusOne) != oneMinusOne) { FAILURE(); } @@ -1440,16 +1150,12 @@ class ArithTraitsTesterComplexBase /// (testHost()). The device-based test is a reduction over redundant /// executions of the test. All redundant executions must return /// '1' (passed). -template ::is_exact> +template ::is_exact> class ArithTraitsTesterFloatingPointBase - : public ArithTraitsTesterComplexBase< - ScalarType, DeviceType, Kokkos::ArithTraits::is_complex> { + : public ArithTraitsTesterComplexBase::is_complex> { private: //! The base class of this class. - typedef ArithTraitsTesterComplexBase< - ScalarType, DeviceType, Kokkos::ArithTraits::is_complex> - base_type; + typedef ArithTraitsTesterComplexBase::is_complex> base_type; public: typedef DeviceType execution_space; @@ -1460,8 +1166,7 @@ class ArithTraitsTesterFloatingPointBase /// \brief The "parallel for" part of the reduction. /// /// See comments of ArithTraitsTesterBase's operator(). - KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, - value_type& dst) const; + KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, value_type& dst) const; protected: virtual int testHostImpl(std::ostream& out) const; @@ -1473,13 +1178,10 @@ class ArithTraitsTesterFloatingPointBase // template class ArithTraitsTesterFloatingPointBase - : public ArithTraitsTesterComplexBase< - ScalarType, DeviceType, Kokkos::ArithTraits::is_complex> { + : public ArithTraitsTesterComplexBase::is_complex> { private: //! The base class of this class. - typedef ArithTraitsTesterComplexBase< - ScalarType, DeviceType, Kokkos::ArithTraits::is_complex> - base_type; + typedef ArithTraitsTesterComplexBase::is_complex> base_type; public: typedef typename DeviceType::execution_space execution_space; @@ -1490,38 +1192,27 @@ class ArithTraitsTesterFloatingPointBase //! Constructor (does nothing, but marked as device function). KOKKOS_INLINE_FUNCTION ArithTraitsTesterFloatingPointBase() {} - KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, - value_type& dst) const { + KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, value_type& dst) const { TRACE(); typedef Kokkos::ArithTraits AT; (void)iwork; // forestall compiler warning for unused variable int success = 1; if (AT::is_exact) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("AT::is_exact is 1\n"); -#else Kokkos::printf("AT::is_exact is 1\n"); -#endif FAILURE(); } // Compiler intrinsic casts from nan of type half_t / bhalf_t to nan // of type float in CUDA, SYCL and HIP do not work yet. -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_SYCL) || \ - defined(KOKKOS_ENABLE_HIP) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_HIP) namespace KE = Kokkos::Experimental; - if constexpr (!std::is_same::value && - !std::is_same::value) { + if constexpr (!std::is_same::value && !std::is_same::value) { #else { #endif // KOKKOS_ENABLE_CUDA || KOKKOS_ENABLE_SYCL || KOKKOS_ENABLE_HIP if (!AT::isNan(AT::nan())) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("NaN is not NaN\n"); -#else Kokkos::printf("NaN is not NaN\n"); -#endif FAILURE(); } } @@ -1530,56 +1221,31 @@ class ArithTraitsTesterFloatingPointBase const ScalarType one = AT::one(); if (AT::isInf(zero)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("0 is Inf\n"); -#else Kokkos::printf("0 is Inf\n"); -#endif FAILURE(); } if (AT::isInf(one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("1 is Inf\n"); -#else Kokkos::printf("1 is Inf\n"); -#endif FAILURE(); } -#if defined(KOKKOS_ENABLE_SYCL) || \ - defined(KOKKOS_ENABLE_HIP) // FIXME_SYCL, FIXME_HIP +#if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_HIP) // FIXME_SYCL, FIXME_HIP if constexpr (!std::is_same_v) { if (AT::isNan(zero)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("0 is NaN\n"); -#else Kokkos::printf("0 is NaN\n"); -#endif FAILURE(); } if (AT::isNan(one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("1 is NaN\n"); -#else Kokkos::printf("1 is NaN\n"); -#endif FAILURE(); } } #else if (AT::isNan(zero)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("0 is NaN\n"); -#else Kokkos::printf("0 is NaN\n"); -#endif FAILURE(); } if (AT::isNan(one)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("1 is NaN\n"); -#else Kokkos::printf("1 is NaN\n"); -#endif FAILURE(); } #endif @@ -1671,13 +1337,10 @@ class ArithTraitsTesterFloatingPointBase // template class ArithTraitsTesterFloatingPointBase - : public ArithTraitsTesterComplexBase< - ScalarType, DeviceType, Kokkos::ArithTraits::is_complex> { + : public ArithTraitsTesterComplexBase::is_complex> { private: //! The base class of this class. - typedef ArithTraitsTesterComplexBase< - ScalarType, DeviceType, Kokkos::ArithTraits::is_complex> - base_type; + typedef ArithTraitsTesterComplexBase::is_complex> base_type; public: typedef typename DeviceType::execution_space execution_space; @@ -1688,19 +1351,14 @@ class ArithTraitsTesterFloatingPointBase //! Constructor (does nothing, but marked as device function). KOKKOS_INLINE_FUNCTION ArithTraitsTesterFloatingPointBase() {} - KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, - value_type& dst) const { + KOKKOS_INLINE_FUNCTION void operator()(size_type iwork, value_type& dst) const { TRACE(); typedef Kokkos::ArithTraits AT; (void)iwork; // forestall compiler warning for unused variable int success = 1; if (!AT::is_exact) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("! AT:is_exact\n"); -#else Kokkos::printf("! AT:is_exact\n"); -#endif FAILURE(); } @@ -1762,8 +1420,7 @@ class ArithTraitsTesterFloatingPointBase /// executions of the test. All redundant executions must return /// '1' (passed). template -class ArithTraitsTester - : public ArithTraitsTesterFloatingPointBase { +class ArithTraitsTester : public ArithTraitsTesterFloatingPointBase { public: typedef typename DeviceType::execution_space execution_space; typedef typename execution_space::size_type size_type; @@ -1789,11 +1446,9 @@ int testArithTraitsOnDevice(std::ostream& out, const int verbose) { using std::endl; typedef ArithTraitsTester functor_type; int success = 1; // output argument of parallel_reduce - Kokkos::parallel_reduce("KokkosKernels::Common::Test::ArithTraitsOnDevice", 1, - functor_type(), success); + Kokkos::parallel_reduce("KokkosKernels::Common::Test::ArithTraitsOnDevice", 1, functor_type(), success); if (success) { - if (verbose) - out << Kokkos::ArithTraits::name() << " passed" << endl; + if (verbose) out << Kokkos::ArithTraits::name() << " passed" << endl; } else { out << Kokkos::ArithTraits::name() << " FAILED" << endl; } @@ -1815,8 +1470,7 @@ int testArithTraitsOnHost(std::ostream& out, const int verbose) { const int localSuccess = f.testHost(out); if (localSuccess) { - if (verbose) - out << Kokkos::ArithTraits::name() << " passed" << endl; + if (verbose) out << Kokkos::ArithTraits::name() << " passed" << endl; } else { out << Kokkos::ArithTraits::name() << " FAILED" << endl; } @@ -1856,8 +1510,7 @@ int runAllArithTraitsDeviceTests(std::ostream& out, const int verbose) { success = success && curSuccess; curSuccess = testArithTraitsOnDevice(out, verbose); success = success && curSuccess; - curSuccess = - testArithTraitsOnDevice(out, verbose); + curSuccess = testArithTraitsOnDevice(out, verbose); success = success && curSuccess; curSuccess = testArithTraitsOnDevice(out, verbose); success = success && curSuccess; @@ -1885,8 +1538,7 @@ int runAllArithTraitsDeviceTests(std::ostream& out, const int verbose) { success = success && curSuccess; curSuccess = testArithTraitsOnDevice(out, verbose); success = success && curSuccess; - curSuccess = - testArithTraitsOnDevice(out, verbose); + curSuccess = testArithTraitsOnDevice(out, verbose); // // Built-in real floating-point types @@ -1894,10 +1546,8 @@ int runAllArithTraitsDeviceTests(std::ostream& out, const int verbose) { #if defined(KOKKOS_HALF_T_IS_FLOAT) TRACE(); - success = success && curSuccess; - curSuccess = - testArithTraitsOnDevice( - out, verbose); + success = success && curSuccess; + curSuccess = testArithTraitsOnDevice(out, verbose); #endif // KOKKOS_HALF_T_IS_FLOAT success = success && curSuccess; curSuccess = testArithTraitsOnDevice(out, verbose); @@ -1908,12 +1558,10 @@ int runAllArithTraitsDeviceTests(std::ostream& out, const int verbose) { // Kokkos' complex floating-point types // - success = success && curSuccess; - curSuccess = - testArithTraitsOnDevice, DeviceType>(out, verbose); success = success && curSuccess; - curSuccess = testArithTraitsOnDevice, DeviceType>( - out, verbose); + curSuccess = testArithTraitsOnDevice, DeviceType>(out, verbose); + success = success && curSuccess; + curSuccess = testArithTraitsOnDevice, DeviceType>(out, verbose); return success && curSuccess; } @@ -1980,8 +1628,7 @@ int runAllArithTraitsHostTests(std::ostream& out, const int verbose) { success = success && curSuccess; curSuccess = testArithTraitsOnHost(out, verbose); success = success && curSuccess; - curSuccess = - testArithTraitsOnHost(out, verbose); + curSuccess = testArithTraitsOnHost(out, verbose); // // Built-in real and complex floating-point types @@ -1991,20 +1638,16 @@ int runAllArithTraitsHostTests(std::ostream& out, const int verbose) { curSuccess = testArithTraitsOnHost(out, verbose); success = success && curSuccess; curSuccess = testArithTraitsOnHost(out, verbose); -#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) && \ - !defined(KOKKOS_ENABLE_SYCL) +#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) && !defined(KOKKOS_ENABLE_SYCL) // This would spill tons of warnings about host device stuff otherwise success = success && curSuccess; curSuccess = testArithTraitsOnHost(out, verbose); success = success && curSuccess; - curSuccess = - testArithTraitsOnHost, DeviceType>(out, verbose); - success = success && curSuccess; - curSuccess = - testArithTraitsOnHost, DeviceType>(out, verbose); + curSuccess = testArithTraitsOnHost, DeviceType>(out, verbose); success = success && curSuccess; - curSuccess = testArithTraitsOnHost, DeviceType>( - out, verbose); + curSuccess = testArithTraitsOnHost, DeviceType>(out, verbose); + success = success && curSuccess; + curSuccess = testArithTraitsOnHost, DeviceType>(out, verbose); #endif // // Kokkos' complex floating-point types @@ -2013,15 +1656,12 @@ int runAllArithTraitsHostTests(std::ostream& out, const int verbose) { #if defined(KOKKOS_HALF_T_IS_FLOAT) success = success && curSuccess; TRACE(); - curSuccess = testArithTraitsOnHost( - out, verbose); + curSuccess = testArithTraitsOnHost(out, verbose); #endif // KOKKOS_HALF_T_IS_FLOAT - success = success && curSuccess; - curSuccess = - testArithTraitsOnHost, DeviceType>(out, verbose); - success = success && curSuccess; - curSuccess = - testArithTraitsOnHost, DeviceType>(out, verbose); + success = success && curSuccess; + curSuccess = testArithTraitsOnHost, DeviceType>(out, verbose); + success = success && curSuccess; + curSuccess = testArithTraitsOnHost, DeviceType>(out, verbose); // success = success && curSuccess; curSuccess = // testArithTraitsOnHost, DeviceType> (out, // verbose); diff --git a/packages/kokkos-kernels/common/unit_test/Test_Common_Error.hpp b/packages/kokkos-kernels/common/unit_test/Test_Common_Error.hpp index 375f75b5ffec..139231d63fd5 100644 --- a/packages/kokkos-kernels/common/unit_test/Test_Common_Error.hpp +++ b/packages/kokkos-kernels/common/unit_test/Test_Common_Error.hpp @@ -20,8 +20,7 @@ #include "KokkosKernels_Error.hpp" void test_kokkoskernels_throw() { - const std::string my_throw_msg = - "Testing Kokkos Kernels' throw_runtime_exception."; + const std::string my_throw_msg = "Testing Kokkos Kernels' throw_runtime_exception."; try { KokkosKernels::Impl::throw_runtime_exception(my_throw_msg); } catch (const std::runtime_error& e) { diff --git a/packages/kokkos-kernels/common/unit_test/Test_Common_Iota.hpp b/packages/kokkos-kernels/common/unit_test/Test_Common_Iota.hpp index af3b6502bf04..ee1e33fda866 100644 --- a/packages/kokkos-kernels/common/unit_test/Test_Common_Iota.hpp +++ b/packages/kokkos-kernels/common/unit_test/Test_Common_Iota.hpp @@ -76,13 +76,11 @@ void test_iota_rank() { template void test_iota_non_const_value_type() { - static_assert( - std::is_same_v::non_const_value_type, T>, - "Iota's non-const value type should be same as non-const type provided"); - static_assert( - std::is_same_v::non_const_value_type, T>, - "Iota's non-const value type should be same as non-const version of " - "const type provided"); + static_assert(std::is_same_v::non_const_value_type, T>, + "Iota's non-const value type should be same as non-const type provided"); + static_assert(std::is_same_v::non_const_value_type, T>, + "Iota's non-const value type should be same as non-const version of " + "const type provided"); } template @@ -98,10 +96,8 @@ void test_iota_subview() { template void test_is_iota() { - static_assert(KokkosKernels::Impl::is_iota_v>, - "Iota should be an Iota"); - static_assert(!KokkosKernels::Impl::is_iota_v, - "int should not be an Iota"); + static_assert(KokkosKernels::Impl::is_iota_v>, "Iota should be an Iota"); + static_assert(!KokkosKernels::Impl::is_iota_v, "int should not be an Iota"); } template diff --git a/packages/kokkos-kernels/common/unit_test/Test_Common_LowerBound.hpp b/packages/kokkos-kernels/common/unit_test/Test_Common_LowerBound.hpp index 6ca28b8be1e9..d471801a3078 100644 --- a/packages/kokkos-kernels/common/unit_test/Test_Common_LowerBound.hpp +++ b/packages/kokkos-kernels/common/unit_test/Test_Common_LowerBound.hpp @@ -21,8 +21,7 @@ #include template -size_t std_lower_bound(const std::vector &haystack, - const Ordinal needle) { +size_t std_lower_bound(const std::vector &haystack, const Ordinal needle) { const auto it = std::lower_bound(haystack.begin(), haystack.end(), needle); return it - haystack.begin(); } @@ -33,9 +32,7 @@ struct ThreadLowerBoundFunctor { using hv_value_type = typename HaystackView::non_const_value_type; using hv_size_type = typename HaystackView::size_type; - ThreadLowerBoundFunctor(const hv_size_type &expected, - const HaystackView &haystack, - const hv_value_type &needle) + ThreadLowerBoundFunctor(const hv_size_type &expected, const HaystackView &haystack, const hv_value_type &needle) : expected_(expected), haystack_(haystack), needle_(needle) {} KOKKOS_INLINE_FUNCTION @@ -43,14 +40,7 @@ struct ThreadLowerBoundFunctor { if (0 == i) { hv_size_type idx = KokkosKernels::lower_bound_thread(haystack_, needle_); if (idx != expected_) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("%s:%d thread %d expected %d got %d\n", - __FILE__, __LINE__, int(i), - int(expected_), int(idx)); -#else - Kokkos::printf("%s:%d thread %d expected %d got %d\n", __FILE__, - __LINE__, int(i), int(expected_), int(idx)); -#endif + Kokkos::printf("%s:%d thread %d expected %d got %d\n", __FILE__, __LINE__, int(i), int(expected_), int(idx)); ++lerrCount; } } @@ -62,13 +52,11 @@ struct ThreadLowerBoundFunctor { }; template -void test_lower_bound_thread(const std::vector &_haystack, - const T &_needle) { +void test_lower_bound_thread(const std::vector &_haystack, const T &_needle) { using execution_space = typename Device::execution_space; using Policy = Kokkos::RangePolicy; using view_t = Kokkos::View; - using u_const_view_t = Kokkos::View>; + using u_const_view_t = Kokkos::View>; using size_type = typename u_const_view_t::size_type; // get expected value @@ -82,9 +70,7 @@ void test_lower_bound_thread(const std::vector &_haystack, // test lower_bound search int errCount; // run a single thread - Kokkos::parallel_reduce(Policy(0, 1), - ThreadLowerBoundFunctor(expected, haystack, _needle), - errCount); + Kokkos::parallel_reduce(Policy(0, 1), ThreadLowerBoundFunctor(expected, haystack, _needle), errCount); EXPECT_EQ(0, errCount); } @@ -95,24 +81,14 @@ struct TeamLowerBoundFunctor { using hv_value_type = typename HaystackView::non_const_value_type; using hv_size_type = typename HaystackView::size_type; - TeamLowerBoundFunctor(const hv_size_type &expected, - const HaystackView &haystack, - const hv_value_type &needle) + TeamLowerBoundFunctor(const hv_size_type &expected, const HaystackView &haystack, const hv_value_type &needle) : expected_(expected), haystack_(haystack), needle_(needle) {} - KOKKOS_INLINE_FUNCTION void operator()(const Member &handle, - int &lerrCount) const { - hv_size_type idx = - KokkosKernels::lower_bound_team(handle, haystack_, needle_); + KOKKOS_INLINE_FUNCTION void operator()(const Member &handle, int &lerrCount) const { + hv_size_type idx = KokkosKernels::lower_bound_team(handle, haystack_, needle_); if (idx != expected_) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("%s:%d thread %d expected %d got %d\n", - __FILE__, __LINE__, int(handle.team_rank()), - int(expected_), int(idx)); -#else - Kokkos::printf("%s:%d thread %d expected %d got %d\n", __FILE__, __LINE__, - int(handle.team_rank()), int(expected_), int(idx)); -#endif + Kokkos::printf("%s:%d thread %d expected %d got %d\n", __FILE__, __LINE__, int(handle.team_rank()), + int(expected_), int(idx)); ++lerrCount; } } @@ -128,8 +104,7 @@ void test_lower_bound_team(const std::vector &_haystack, const T _needle) { using Policy = Kokkos::TeamPolicy; using Member = typename Policy::member_type; using view_t = Kokkos::View; - using u_const_view_t = Kokkos::View>; + using u_const_view_t = Kokkos::View>; using size_type = typename u_const_view_t::size_type; // get expected value @@ -142,13 +117,10 @@ void test_lower_bound_team(const std::vector &_haystack, const T _needle) { // test lower_bound search const int leagueSize = 1; - const int teamSize = - KokkosKernels::Impl::kk_is_gpu_exec_space() ? 64 : 1; + const int teamSize = KokkosKernels::Impl::kk_is_gpu_exec_space() ? 64 : 1; int errCount; - Kokkos::parallel_reduce( - Policy(leagueSize, teamSize), - TeamLowerBoundFunctor(expected, haystack, _needle), - errCount); + Kokkos::parallel_reduce(Policy(leagueSize, teamSize), + TeamLowerBoundFunctor(expected, haystack, _needle), errCount); EXPECT_EQ(0, errCount); } @@ -230,38 +202,31 @@ void test_lower_bound() { } } -#define EXECUTE_TEST(T, DEVICE) \ - TEST_F(TestCategory, common##_##lower_bound##_##T##_##DEVICE) { \ - test_lower_bound(); \ - } +#define EXECUTE_TEST(T, DEVICE) \ + TEST_F(TestCategory, common##_##lower_bound##_##T##_##DEVICE) { test_lower_bound(); } #if (defined(KOKKOSKERNELS_INST_ORDINAL_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(int, TestDevice) #endif #if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(int64_t, TestDevice) #endif #if (defined(KOKKOSKERNELS_INST_ORDINAL_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(size_t, TestDevice) #endif #if (defined(KOKKOSKERNELS_INST_FLOAT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(float, TestDevice) #endif #if (defined(KOKKOSKERNELS_INST_DOUBLE)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, TestDevice) #endif diff --git a/packages/kokkos-kernels/common/unit_test/Test_Common_PrintConfiguration.hpp b/packages/kokkos-kernels/common/unit_test/Test_Common_PrintConfiguration.hpp index 6638c6e3985c..4f59a8857b69 100644 --- a/packages/kokkos-kernels/common/unit_test/Test_Common_PrintConfiguration.hpp +++ b/packages/kokkos-kernels/common/unit_test/Test_Common_PrintConfiguration.hpp @@ -56,8 +56,6 @@ void testPrintConfiguration() { check_print_configuration(out); } -TEST_F(TestCategory, common_print_configuration) { - testPrintConfiguration(); -} +TEST_F(TestCategory, common_print_configuration) { testPrintConfiguration(); } #endif // KOKKOSKERNELS_PRINTCONFIGURATIONTEST_HPP diff --git a/packages/kokkos-kernels/common/unit_test/Test_Common_Sorting.hpp b/packages/kokkos-kernels/common/unit_test/Test_Common_Sorting.hpp index e93a9d093983..30623a8691e9 100644 --- a/packages/kokkos-kernels/common/unit_test/Test_Common_Sorting.hpp +++ b/packages/kokkos-kernels/common/unit_test/Test_Common_Sorting.hpp @@ -33,8 +33,7 @@ // Then prefix-sum into randomOffsets. // This simulates a CRS rowmap or other batched sorting scenario template -size_t generateRandomOffsets(OrdView randomCounts, OrdView randomOffsets, - size_t n, size_t avg) { +size_t generateRandomOffsets(OrdView randomCounts, OrdView randomOffsets, size_t n, size_t avg) { srand(54321); auto countsHost = Kokkos::create_mirror_view(randomCounts); size_t total = 0; @@ -47,8 +46,7 @@ size_t generateRandomOffsets(OrdView randomCounts, OrdView randomOffsets, } Kokkos::deep_copy(randomCounts, countsHost); Kokkos::deep_copy(randomOffsets, randomCounts); - KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum( - n, randomOffsets); + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(n, randomOffsets); return total; } @@ -87,8 +85,7 @@ double getRandom() { template <> Coordinates getRandom() { - return Coordinates(getRandom(), getRandom(), - getRandom()); + return Coordinates(getRandom(), getRandom(), getRandom()); } // Specialize for Kokkos::complex, with the real and imaginary parts different @@ -99,9 +96,7 @@ struct kvHash { template struct kvHash> { - Kokkos::complex operator()(const Key& k) { - return Kokkos::complex(3 * k + 4, k - 10.4); - } + Kokkos::complex operator()(const Key& k) { return Kokkos::complex(3 * k + 4, k - 10.4); } }; template @@ -133,14 +128,12 @@ struct TestSerialRadixFunctor { using Key = typename KeyView::value_type; using UnsignedKey = typename std::make_unsigned::type; - TestSerialRadixFunctor(KeyView& keys_, KeyView& keysAux_, OrdView& counts_, - OrdView& offsets_) + TestSerialRadixFunctor(KeyView& keys_, KeyView& keysAux_, OrdView& counts_, OrdView& offsets_) : keys(keys_), keysAux(keysAux_), counts(counts_), offsets(offsets_) {} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { int off = offsets(i); - KokkosKernels::SerialRadixSort( - (UnsignedKey*)keys.data() + off, (UnsignedKey*)keysAux.data() + off, - counts(i)); + KokkosKernels::SerialRadixSort((UnsignedKey*)keys.data() + off, + (UnsignedKey*)keysAux.data() + off, counts(i)); } KeyView keys; KeyView keysAux; @@ -155,20 +148,14 @@ struct TestSerialRadix2Functor { using UnsignedKey = typename std::make_unsigned::type; using Value = typename ValView::value_type; - TestSerialRadix2Functor(KeyView& keys_, KeyView& keysAux_, ValView& values_, - ValView& valuesAux_, OrdView& counts_, + TestSerialRadix2Functor(KeyView& keys_, KeyView& keysAux_, ValView& values_, ValView& valuesAux_, OrdView& counts_, OrdView& offsets_) - : keys(keys_), - keysAux(keysAux_), - values(values_), - valuesAux(valuesAux_), - counts(counts_), - offsets(offsets_) {} + : keys(keys_), keysAux(keysAux_), values(values_), valuesAux(valuesAux_), counts(counts_), offsets(offsets_) {} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { int off = offsets(i); - KokkosKernels::SerialRadixSort2( - (UnsignedKey*)keys.data() + off, (UnsignedKey*)keysAux.data() + off, - values.data() + off, valuesAux.data() + off, counts(i)); + KokkosKernels::SerialRadixSort2((UnsignedKey*)keys.data() + off, + (UnsignedKey*)keysAux.data() + off, values.data() + off, + valuesAux.data() + off, counts(i)); } KeyView keys; KeyView keysAux; @@ -188,8 +175,7 @@ void testSerialRadixSort(size_t k, size_t subArraySize) { OrdView counts("Subarray Sizes", k); OrdView offsets("Subarray Offsets", k); // Generate k sub-array sizes, each with size about 20 - size_t n = generateRandomOffsets(counts, offsets, k, - subArraySize); + size_t n = generateRandomOffsets(counts, offsets, k, subArraySize); KeyView keys("Radix sort testing data", n); fillRandom(keys); // Sort using std::sort on host to do correctness test @@ -198,22 +184,17 @@ void testSerialRadixSort(size_t k, size_t subArraySize) { KeyView keysAux("Radix sort aux data", n); // Run the sorting on device in all sub-arrays in parallel typedef Kokkos::RangePolicy range_policy; - Kokkos::parallel_for( - range_policy(0, k), - TestSerialRadixFunctor(keys, keysAux, counts, offsets)); + Kokkos::parallel_for(range_policy(0, k), TestSerialRadixFunctor(keys, keysAux, counts, offsets)); exec_space().fence(); - auto countsHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), counts); - auto offsetsHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), offsets); + auto countsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), counts); + auto offsetsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), offsets); for (size_t i = 0; i < k; i++) { Key* begin = gold.data() + offsetsHost(i); Key* end = begin + countsHost(i); std::sort(begin, end); } // Copy actual result to host and compare - auto keysHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), keys); + auto keysHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), keys); for (size_t i = 0; i < n; i++) { ASSERT_EQ(keysHost(i), gold(i)); } @@ -230,8 +211,7 @@ void testSerialRadixSort2(size_t k, size_t subArraySize) { OrdView counts("Subarray Sizes", k); OrdView offsets("Subarray Offsets", k); // Generate k sub-array sizes, each with size about 20 - size_t n = generateRandomOffsets(counts, offsets, k, - subArraySize); + size_t n = generateRandomOffsets(counts, offsets, k, subArraySize); KeyView keys("Radix test keys", n); ValView data("Radix test data", n); // The keys are randomized @@ -243,25 +223,20 @@ void testSerialRadixSort2(size_t k, size_t subArraySize) { // Run the sorting on device in all sub-arrays in parallel typedef Kokkos::RangePolicy range_policy; // Deliberately using a weird number for vector length - Kokkos::parallel_for(range_policy(0, k), - TestSerialRadix2Functor( - keys, keysAux, data, dataAux, counts, offsets)); + Kokkos::parallel_for(range_policy(0, k), TestSerialRadix2Functor( + keys, keysAux, data, dataAux, counts, offsets)); exec_space().fence(); // Sort using std::sort on host to do correctness test - auto countsHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), counts); - auto offsetsHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), offsets); + auto countsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), counts); + auto offsetsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), offsets); for (size_t i = 0; i < k; i++) { Key* begin = gold.data() + offsetsHost(i); Key* end = begin + countsHost(i); std::sort(begin, end); } // Copy results to host - auto keysHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), keys); - auto dataHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), data); + auto keysHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), keys); + auto dataHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), data); // Make sure keys are sorted exactly (stability of sort doesn't matter) for (size_t i = 0; i < n; i++) { ASSERT_EQ(keysHost(i), gold(i)); @@ -283,8 +258,7 @@ struct TestTeamBitonicFunctor { template KOKKOS_INLINE_FUNCTION void operator()(const TeamMem t) const { int i = t.league_rank(); - KokkosKernels::TeamBitonicSort( - values.data() + offsets(i), counts(i), t); + KokkosKernels::TeamBitonicSort(values.data() + offsets(i), counts(i), t); } ValView values; @@ -297,15 +271,14 @@ struct TestTeamBitonic2Functor { typedef typename KeyView::value_type Key; typedef typename ValView::value_type Value; - TestTeamBitonic2Functor(KeyView& keys_, ValView& values_, OrdView& counts_, - OrdView& offsets_) + TestTeamBitonic2Functor(KeyView& keys_, ValView& values_, OrdView& counts_, OrdView& offsets_) : keys(keys_), values(values_), counts(counts_), offsets(offsets_) {} template KOKKOS_INLINE_FUNCTION void operator()(const TeamMem t) const { int i = t.league_rank(); - KokkosKernels::TeamBitonicSort2( - keys.data() + offsets(i), values.data() + offsets(i), counts(i), t); + KokkosKernels::TeamBitonicSort2(keys.data() + offsets(i), values.data() + offsets(i), + counts(i), t); } KeyView keys; @@ -324,25 +297,21 @@ void testTeamBitonicSort(size_t k, size_t subArraySize) { OrdView counts("Subarray Sizes", k); OrdView offsets("Subarray Offsets", k); // Generate k sub-array sizes, each with size about 20 - size_t n = generateRandomOffsets(counts, offsets, k, - subArraySize); + size_t n = generateRandomOffsets(counts, offsets, k, subArraySize); ValView data("Bitonic sort testing data", n); fillRandom(data); Kokkos::View gold("Host sorted", n); Kokkos::deep_copy(gold, data); // Run the sorting on device in all sub-arrays in parallel - Kokkos::parallel_for( - Kokkos::TeamPolicy(k, Kokkos::AUTO()), - TestTeamBitonicFunctor(data, counts, offsets)); + Kokkos::parallel_for(Kokkos::TeamPolicy(k, Kokkos::AUTO()), + TestTeamBitonicFunctor(data, counts, offsets)); // Copy result to host auto dataHost = Kokkos::create_mirror_view(data); Kokkos::deep_copy(dataHost, data); // Sort using std::sort on host to do correctness test exec_space().fence(); - auto countsHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), counts); - auto offsetsHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), offsets); + auto countsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), counts); + auto offsetsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), offsets); for (size_t i = 0; i < k; i++) { Scalar* begin = gold.data() + offsetsHost(i); Scalar* end = begin + countsHost(i); @@ -364,8 +333,7 @@ void testTeamBitonicSort2(size_t k, size_t subArraySize) { OrdView counts("Subarray Sizes", k); OrdView offsets("Subarray Offsets", k); // Generate k sub-array sizes, each with size about 20 - size_t n = generateRandomOffsets(counts, offsets, k, - subArraySize); + size_t n = generateRandomOffsets(counts, offsets, k, subArraySize); KeyView keys("Bitonic test keys", n); ValView data("Bitonic test data", n); // The keys are randomized @@ -375,13 +343,10 @@ void testTeamBitonicSort2(size_t k, size_t subArraySize) { // Run the sorting on device in all sub-arrays in parallel, just using vector // loops Deliberately using a weird number for vector length Kokkos::parallel_for(Kokkos::TeamPolicy(k, Kokkos::AUTO()), - TestTeamBitonic2Functor( - keys, data, counts, offsets)); + TestTeamBitonic2Functor(keys, data, counts, offsets)); exec_space().fence(); - auto countsHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), counts); - auto offsetsHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), offsets); + auto countsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), counts); + auto offsetsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), offsets); // Sort using std::sort on host to do correctness test for (size_t i = 0; i < k; i++) { Key* begin = gold.data() + offsetsHost(i); @@ -389,10 +354,8 @@ void testTeamBitonicSort2(size_t k, size_t subArraySize) { std::sort(begin, end); } // Copy results to host - auto keysHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), keys); - auto dataHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), data); + auto keysHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), keys); + auto dataHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), data); // Make sure keys are sorted exactly (stability of sort doesn't matter) for (size_t i = 0; i < n; i++) { ASSERT_EQ(keysHost(i), gold(i)); @@ -423,8 +386,7 @@ void testBitonicSort(size_t n) { fillRandom(data); KokkosKernels::bitonicSort(data); int ordered = 1; - Kokkos::parallel_reduce(Kokkos::RangePolicy(0, n - 1), - CheckSortedFunctor(data), + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, n - 1), CheckSortedFunctor(data), Kokkos::Min(ordered)); ASSERT_TRUE(ordered); } @@ -443,10 +405,7 @@ struct CheckOrderedFunctor { template struct CompareDescending { - KOKKOS_INLINE_FUNCTION bool operator()(const Scalar lhs, - const Scalar rhs) const { - return lhs > rhs; - } + KOKKOS_INLINE_FUNCTION bool operator()(const Scalar lhs, const Scalar rhs) const { return lhs > rhs; } }; template @@ -462,15 +421,13 @@ void testBitonicSortDescending() { fillRandom(data); KokkosKernels::bitonicSort(data); int ordered = 1; - Kokkos::parallel_reduce(Kokkos::RangePolicy(0, n - 1), - CheckOrderedFunctor(data), + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, n - 1), CheckOrderedFunctor(data), Kokkos::Min(ordered)); ASSERT_TRUE(ordered); } struct LexCompare { - KOKKOS_INLINE_FUNCTION bool operator()(const Coordinates lhs, - const Coordinates rhs) const { + KOKKOS_INLINE_FUNCTION bool operator()(const Coordinates lhs, const Coordinates rhs) const { if (lhs.x < rhs.x) return true; else if (lhs.x > rhs.x) @@ -497,8 +454,7 @@ void testBitonicSortLexicographic() { fillRandom(data); KokkosKernels::bitonicSort(data); int ordered = 1; - Kokkos::parallel_reduce(Kokkos::RangePolicy(0, n - 1), - CheckOrderedFunctor(data), + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, n - 1), CheckOrderedFunctor(data), Kokkos::Min(ordered)); ASSERT_TRUE(ordered); } @@ -520,8 +476,7 @@ TEST_F(TestCategory, common_serial_radix2) { for (size_t arrayMax = 0; arrayMax < 1000; arrayMax = 1 + 4 * arrayMax) { testSerialRadixSort2(numArrays, arrayMax); testSerialRadixSort2(numArrays, arrayMax); - testSerialRadixSort2>(numArrays, - arrayMax); + testSerialRadixSort2>(numArrays, arrayMax); } } @@ -542,8 +497,7 @@ TEST_F(TestCategory, common_team_bitonic2) { for (size_t arrayMax = 0; arrayMax < 10000; arrayMax = 1 + 4 * arrayMax) { testTeamBitonicSort2(numArrays, arrayMax); testTeamBitonicSort2(numArrays, arrayMax); - testTeamBitonicSort2>(numArrays, - arrayMax); + testTeamBitonicSort2>(numArrays, arrayMax); } } diff --git a/packages/kokkos-kernels/common/unit_test/Test_Common_UpperBound.hpp b/packages/kokkos-kernels/common/unit_test/Test_Common_UpperBound.hpp index 113b76c3adb1..abd4cf655ad4 100644 --- a/packages/kokkos-kernels/common/unit_test/Test_Common_UpperBound.hpp +++ b/packages/kokkos-kernels/common/unit_test/Test_Common_UpperBound.hpp @@ -21,8 +21,7 @@ #include template -size_t std_upper_bound(const std::vector &haystack, - const Ordinal needle) { +size_t std_upper_bound(const std::vector &haystack, const Ordinal needle) { const auto it = std::upper_bound(haystack.begin(), haystack.end(), needle); return it - haystack.begin(); } @@ -33,9 +32,7 @@ struct ThreadUpperBoundFunctor { using hv_value_type = typename HaystackView::non_const_value_type; using hv_size_type = typename HaystackView::size_type; - ThreadUpperBoundFunctor(const hv_size_type &expected, - const HaystackView &haystack, - const hv_value_type &needle) + ThreadUpperBoundFunctor(const hv_size_type &expected, const HaystackView &haystack, const hv_value_type &needle) : expected_(expected), haystack_(haystack), needle_(needle) {} KOKKOS_INLINE_FUNCTION @@ -43,14 +40,7 @@ struct ThreadUpperBoundFunctor { if (0 == i) { hv_size_type idx = KokkosKernels::upper_bound_thread(haystack_, needle_); if (idx != expected_) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("%s:%d thread %d expected %d got %d\n", - __FILE__, __LINE__, int(i), - int(expected_), int(idx)); -#else - Kokkos::printf("%s:%d thread %d expected %d got %d\n", __FILE__, - __LINE__, int(i), int(expected_), int(idx)); -#endif + Kokkos::printf("%s:%d thread %d expected %d got %d\n", __FILE__, __LINE__, int(i), int(expected_), int(idx)); ++lerrCount; } } @@ -62,13 +52,11 @@ struct ThreadUpperBoundFunctor { }; template -void test_upper_bound_thread(const std::vector &_haystack, - const T &_needle) { +void test_upper_bound_thread(const std::vector &_haystack, const T &_needle) { using execution_space = typename Device::execution_space; using Policy = Kokkos::RangePolicy; using view_t = Kokkos::View; - using u_const_view_t = Kokkos::View>; + using u_const_view_t = Kokkos::View>; using hv_size_type = typename u_const_view_t::size_type; // get expected value @@ -82,9 +70,7 @@ void test_upper_bound_thread(const std::vector &_haystack, // test upper_bound search int errCount; // run a single thread - Kokkos::parallel_reduce(Policy(0, 1), - ThreadUpperBoundFunctor(expected, haystack, _needle), - errCount); + Kokkos::parallel_reduce(Policy(0, 1), ThreadUpperBoundFunctor(expected, haystack, _needle), errCount); EXPECT_EQ(0, errCount); } @@ -95,24 +81,14 @@ struct TeamUpperBoundFunctor { using hv_value_type = typename HaystackView::non_const_value_type; using hv_size_type = typename HaystackView::size_type; - TeamUpperBoundFunctor(const hv_size_type &expected, - const HaystackView &haystack, - const hv_value_type &needle) + TeamUpperBoundFunctor(const hv_size_type &expected, const HaystackView &haystack, const hv_value_type &needle) : expected_(expected), haystack_(haystack), needle_(needle) {} - KOKKOS_INLINE_FUNCTION void operator()(const Member &handle, - int &lerrCount) const { - hv_size_type idx = - KokkosKernels::upper_bound_team(handle, haystack_, needle_); + KOKKOS_INLINE_FUNCTION void operator()(const Member &handle, int &lerrCount) const { + hv_size_type idx = KokkosKernels::upper_bound_team(handle, haystack_, needle_); if (idx != expected_) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF("%s:%d thread %d expected %d got %d\n", - __FILE__, __LINE__, int(handle.team_rank()), - int(expected_), int(idx)); -#else - Kokkos::printf("%s:%d thread %d expected %d got %d\n", __FILE__, __LINE__, - int(handle.team_rank()), int(expected_), int(idx)); -#endif + Kokkos::printf("%s:%d thread %d expected %d got %d\n", __FILE__, __LINE__, int(handle.team_rank()), + int(expected_), int(idx)); ++lerrCount; } } @@ -128,8 +104,7 @@ void test_upper_bound_team(const std::vector &_haystack, const T _needle) { using Policy = Kokkos::TeamPolicy; using Member = typename Policy::member_type; using view_t = Kokkos::View; - using u_const_view_t = Kokkos::View>; + using u_const_view_t = Kokkos::View>; using hv_size_type = typename u_const_view_t::size_type; // get expected value @@ -142,13 +117,10 @@ void test_upper_bound_team(const std::vector &_haystack, const T _needle) { // test upper_bound search const int leagueSize = 1; - const int teamSize = - KokkosKernels::Impl::kk_is_gpu_exec_space() ? 64 : 1; + const int teamSize = KokkosKernels::Impl::kk_is_gpu_exec_space() ? 64 : 1; int errCount; - Kokkos::parallel_reduce( - Policy(leagueSize, teamSize), - TeamUpperBoundFunctor(expected, haystack, _needle), - errCount); + Kokkos::parallel_reduce(Policy(leagueSize, teamSize), + TeamUpperBoundFunctor(expected, haystack, _needle), errCount); EXPECT_EQ(0, errCount); } @@ -221,38 +193,31 @@ void test_upper_bound() { } } -#define EXECUTE_TEST(T, DEVICE) \ - TEST_F(TestCategory, common##_##upper_bound##_##T##_##DEVICE) { \ - test_upper_bound(); \ - } +#define EXECUTE_TEST(T, DEVICE) \ + TEST_F(TestCategory, common##_##upper_bound##_##T##_##DEVICE) { test_upper_bound(); } #if (defined(KOKKOSKERNELS_INST_ORDINAL_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(int, TestDevice) #endif #if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(int64_t, TestDevice) #endif #if (defined(KOKKOSKERNELS_INST_ORDINAL_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(size_t, TestDevice) #endif #if (defined(KOKKOSKERNELS_INST_FLOAT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(float, TestDevice) #endif #if (defined(KOKKOSKERNELS_INST_DOUBLE)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, TestDevice) #endif diff --git a/packages/kokkos-kernels/common/unit_test/Test_Common_Version.hpp b/packages/kokkos-kernels/common/unit_test/Test_Common_Version.hpp index cb5265cfef61..e2a5faeee241 100644 --- a/packages/kokkos-kernels/common/unit_test/Test_Common_Version.hpp +++ b/packages/kokkos-kernels/common/unit_test/Test_Common_Version.hpp @@ -42,8 +42,7 @@ void test_version_info() { static_assert(false, "KOKKOSKERNELS_VERSION_PATCH macro is not defined!"); #endif - static_assert(KOKKOSKERNELS_VERSION == (KOKKOSKERNELS_VERSION_MAJOR * 10000 + - KOKKOSKERNELS_VERSION_MINOR * 100 + + static_assert(KOKKOSKERNELS_VERSION == (KOKKOSKERNELS_VERSION_MAJOR * 10000 + KOKKOSKERNELS_VERSION_MINOR * 100 + KOKKOSKERNELS_VERSION_PATCH)); } diff --git a/packages/kokkos-kernels/common/unit_test/Test_Common_float128.hpp b/packages/kokkos-kernels/common/unit_test/Test_Common_float128.hpp index 846a5ef8791b..063fd06d80a9 100644 --- a/packages/kokkos-kernels/common/unit_test/Test_Common_float128.hpp +++ b/packages/kokkos-kernels/common/unit_test/Test_Common_float128.hpp @@ -32,7 +32,7 @@ #include #include -//#include +// #include #include #include @@ -55,9 +55,8 @@ std::ostream& operator<<(std::ostream& out, const __float128& x) { const int numCharPrinted = quadmath_snprintf(buf, bufSize, "%.30Qe", x); if (static_cast(numCharPrinted) >= bufSize) { std::ostringstream os; - os << "Failed to print __float128 value: buffer has " << bufSize - << " characters, but quadmath_snprintf wanted " << numCharPrinted - << " characters!"; + os << "Failed to print __float128 value: buffer has " << bufSize << " characters, but quadmath_snprintf wanted " + << numCharPrinted << " characters!"; throw std::runtime_error(os.str()); } out << buf; @@ -79,8 +78,7 @@ void testfloat128() { << "y = " << y << endl << "z = " << z << endl << "(double) z = " << static_cast(z) << endl - << "z - (double) z = " - << (z - static_cast<__float128>(static_cast(z))) << endl; + << "z - (double) z = " << (z - static_cast<__float128>(static_cast(z))) << endl; // FIXME (mfh 04 Sep 2015) The results of printing could depend on // the locale. This works fine for the default locale on my system. @@ -89,8 +87,7 @@ void testfloat128() { os << x; if (os.str() != "1.000000000000000000000000000000e+00") { success = false; - cout << "'_float128 x = 1.0' does not print correctly! It prints as " - << os.str() << "." << endl; + cout << "'_float128 x = 1.0' does not print correctly! It prints as " << os.str() << "." << endl; } } { diff --git a/packages/kokkos-kernels/common/unit_test/Test_Common_set_bit_count.hpp b/packages/kokkos-kernels/common/unit_test/Test_Common_set_bit_count.hpp index 6e2c6e80b676..7b6c996390f3 100644 --- a/packages/kokkos-kernels/common/unit_test/Test_Common_set_bit_count.hpp +++ b/packages/kokkos-kernels/common/unit_test/Test_Common_set_bit_count.hpp @@ -37,21 +37,17 @@ template struct ppctest { view_type view; typename view_type::non_const_type out_view; - ppctest(view_type view_, typename view_type::non_const_type out_view_) - : view(view_), out_view(out_view_) {} + ppctest(view_type view_, typename view_type::non_const_type out_view_) : view(view_), out_view(out_view_) {} KOKKOS_INLINE_FUNCTION - void operator()(const size_t row) const { - out_view(row) = pop_count(view(row)); - } + void operator()(const size_t row) const { out_view(row) = pop_count(view(row)); } }; template struct ppccheck { view_type view; typename view_type::non_const_type out_view; - ppccheck(view_type view_, typename view_type::non_const_type out_view_) - : view(view_), out_view(out_view_) {} + ppccheck(view_type view_, typename view_type::non_const_type out_view_) : view(view_), out_view(out_view_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_t row) const { @@ -69,8 +65,7 @@ view_type get_array_bit_count(view_type view) { typename view_type::non_const_type out_view("out", view.extent(0)); typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_for("KokkosKernels::Common::Test::GetArrayBitCount", - my_exec_space(0, view.extent(0)), + Kokkos::parallel_for("KokkosKernels::Common::Test::GetArrayBitCount", my_exec_space(0, view.extent(0)), ppctest(view, out_view)); Kokkos::fence(); return out_view; @@ -81,8 +76,7 @@ view_type check_array_bit_count(view_type view) { typename view_type::non_const_type out_view("out", view.extent(0)); typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_for("KokkosKernels::Common::Test::CheckArrayBitCount", - my_exec_space(0, view.extent(0)), + Kokkos::parallel_for("KokkosKernels::Common::Test::CheckArrayBitCount", my_exec_space(0, view.extent(0)), ppccheck(view, out_view)); Kokkos::fence(); return out_view; @@ -92,8 +86,7 @@ template struct ffstest { view_type view; typename view_type::non_const_type out_view; - ffstest(view_type view_, typename view_type::non_const_type out_view_) - : view(view_), out_view(out_view_) {} + ffstest(view_type view_, typename view_type::non_const_type out_view_) : view(view_), out_view(out_view_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_t row) const { @@ -108,8 +101,7 @@ template struct ffscheck { view_type view; typename view_type::non_const_type out_view; - ffscheck(view_type view_, typename view_type::non_const_type out_view_) - : view(view_), out_view(out_view_) {} + ffscheck(view_type view_, typename view_type::non_const_type out_view_) : view(view_), out_view(out_view_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_t row) const { @@ -130,8 +122,7 @@ view_type get_ffs(view_type view) { typename view_type::non_const_type out_view("out", view.extent(0)); typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_for("KokkosKernels::Common::Test::GetFFS", - my_exec_space(0, view.extent(0)), + Kokkos::parallel_for("KokkosKernels::Common::Test::GetFFS", my_exec_space(0, view.extent(0)), ffstest(view, out_view)); Kokkos::fence(); return out_view; @@ -142,8 +133,7 @@ view_type check_ffs(view_type view) { typename view_type::non_const_type out_view("out", view.extent(0)); typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_for("KokkosKernels::Common::Test::CheckFFS", - my_exec_space(0, view.extent(0)), + Kokkos::parallel_for("KokkosKernels::Common::Test::CheckFFS", my_exec_space(0, view.extent(0)), ffscheck(view, out_view)); Kokkos::fence(); return out_view; @@ -159,8 +149,7 @@ void test_set_bit_count() { nonconstview count_bit_view("count_bit_view", array_size); - typename nonconstview::HostMirror hview = - Kokkos::create_mirror_view(count_bit_view); + typename nonconstview::HostMirror hview = Kokkos::create_mirror_view(count_bit_view); for (int i = 0; i < array_size; ++i) { hview(i) = lno_t(rand()) * lno_t(rand()); @@ -170,18 +159,13 @@ void test_set_bit_count() { // KokkosKernels::Impl::kk_print_1Dview(count_bit_view); - myview out1 = - Test::get_array_bit_count( - count_bit_view); - myview out2 = - Test::check_array_bit_count( - count_bit_view); + myview out1 = Test::get_array_bit_count(count_bit_view); + myview out2 = Test::check_array_bit_count(count_bit_view); // KokkosKernels::Impl::kk_print_1Dview(out1); // KokkosKernels::Impl::kk_print_1Dview(out2); - bool is_identical = KokkosKernels::Impl::kk_is_identical_view< - myview, myview, typename myview::value_type, - typename device::execution_space>(out1, out2, 0); + bool is_identical = KokkosKernels::Impl::kk_is_identical_view(out1, out2, 0); EXPECT_TRUE(is_identical); } @@ -193,8 +177,7 @@ void test_ffs() { nonconstview count_bit_view("count_bit_view", array_size); - typename nonconstview::HostMirror hview = - Kokkos::create_mirror_view(count_bit_view); + typename nonconstview::HostMirror hview = Kokkos::create_mirror_view(count_bit_view); for (int i = 0; i < array_size; ++i) { hview(i) = lno_t(rand()) * lno_t(rand()); @@ -204,16 +187,13 @@ void test_ffs() { // KokkosKernels::Impl::kk_print_1Dview(count_bit_view); - myview out1 = - Test::get_ffs(count_bit_view); - myview out2 = - Test::check_ffs(count_bit_view); + myview out1 = Test::get_ffs(count_bit_view); + myview out2 = Test::check_ffs(count_bit_view); // KokkosKernels::Impl::kk_print_1Dview(out1); // KokkosKernels::Impl::kk_print_1Dview(out2); - bool is_identical = KokkosKernels::Impl::kk_is_identical_view< - myview, myview, typename myview::value_type, - typename device::execution_space>(out1, out2, 0); + bool is_identical = KokkosKernels::Impl::kk_is_identical_view(out1, out2, 0); EXPECT_TRUE(is_identical); } diff --git a/packages/kokkos-kernels/example/batched_solve/examples_helper.hpp b/packages/kokkos-kernels/example/batched_solve/examples_helper.hpp index 3010f66ba89b..2bbe93fdfbfd 100644 --- a/packages/kokkos-kernels/example/batched_solve/examples_helper.hpp +++ b/packages/kokkos-kernels/example/batched_solve/examples_helper.hpp @@ -62,12 +62,8 @@ /// template -void create_saddle_point_matrices(const MatrixViewType &A, - const VectorViewType &Y, - const int n_dim = 3) { - Kokkos::Random_XorShift64_Pool< - typename MatrixViewType::device_type::execution_space> - random(13718); +void create_saddle_point_matrices(const MatrixViewType &A, const VectorViewType &Y, const int n_dim = 3) { + Kokkos::Random_XorShift64_Pool random(13718); const int N = A.extent(0); const int n = A.extent(1); const int n_2 = n_dim + 1; @@ -76,12 +72,8 @@ void create_saddle_point_matrices(const MatrixViewType &A, MatrixViewType xs("xs", N, n_1, n_dim); VectorViewType ys("ys", N, n_1); - Kokkos::fill_random( - xs, random, - Kokkos::reduction_identity::prod()); - Kokkos::fill_random( - ys, random, - Kokkos::reduction_identity::prod()); + Kokkos::fill_random(xs, random, Kokkos::reduction_identity::prod()); + Kokkos::fill_random(ys, random, Kokkos::reduction_identity::prod()); auto xs_host = Kokkos::create_mirror_view(xs); auto ys_host = Kokkos::create_mirror_view(ys); @@ -94,8 +86,8 @@ void create_saddle_point_matrices(const MatrixViewType &A, for (int i = 0; i < n_1; ++i) { for (int j = 0; j < n_1; ++j) { for (int l = 0; l < N; ++l) { - auto xs_i = Kokkos::subview(xs_host, l, i, Kokkos::ALL); - auto xs_j = Kokkos::subview(xs_host, l, j, Kokkos::ALL); + auto xs_i = Kokkos::subview(xs_host, l, i, Kokkos::ALL); + auto xs_j = Kokkos::subview(xs_host, l, j, Kokkos::ALL); typename MatrixViewType::value_type d = 0; for (int k = 0; k < n_dim; ++k) d += Kokkos::pow(xs_i(k) - xs_j(k), 2); d = Kokkos::sqrt(d); @@ -125,21 +117,12 @@ void create_saddle_point_matrices(const MatrixViewType &A, } template -void create_tridiagonal_batched_matrices(const int nnz, const int BlkSize, - const int N, const IntView &r, - const IntView &c, - const VectorViewType &D, - const VectorViewType &X, +void create_tridiagonal_batched_matrices(const int nnz, const int BlkSize, const int N, const IntView &r, + const IntView &c, const VectorViewType &D, const VectorViewType &X, const VectorViewType &B) { - Kokkos::Random_XorShift64_Pool< - typename VectorViewType::device_type::execution_space> - random(13718); - Kokkos::fill_random( - X, random, - Kokkos::reduction_identity::prod()); - Kokkos::fill_random( - B, random, - Kokkos::reduction_identity::prod()); + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(X, random, Kokkos::reduction_identity::prod()); + Kokkos::fill_random(B, random, Kokkos::reduction_identity::prod()); auto D_host = Kokkos::create_mirror_view(D); auto r_host = Kokkos::create_mirror_view(r); @@ -181,8 +164,7 @@ void create_tridiagonal_batched_matrices(const int nnz, const int BlkSize, } template -void getInvDiagFromCRS(const VType &V, const IntType &r, const IntType &c, - const VType &diag) { +void getInvDiagFromCRS(const VType &V, const IntType &r, const IntType &c, const VType &diag) { auto diag_values_host = Kokkos::create_mirror_view(diag); auto values_host = Kokkos::create_mirror_view(V); auto row_ptr_host = Kokkos::create_mirror_view(r); @@ -197,8 +179,7 @@ void getInvDiagFromCRS(const VType &V, const IntType &r, const IntType &c, int BlkSize = diag.extent(1); for (int i = 0; i < BlkSize; ++i) { - for (current_index = row_ptr_host(i); current_index < row_ptr_host(i + 1); - ++current_index) { + for (current_index = row_ptr_host(i); current_index < row_ptr_host(i + 1); ++current_index) { if (colIndices_host(current_index) == i) break; } for (int j = 0; j < N; ++j) { diff --git a/packages/kokkos-kernels/example/batched_solve/static_pivoting.cpp b/packages/kokkos-kernels/example/batched_solve/static_pivoting.cpp index e8a25778fcbb..f8eabdee2275 100644 --- a/packages/kokkos-kernels/example/batched_solve/static_pivoting.cpp +++ b/packages/kokkos-kernels/example/batched_solve/static_pivoting.cpp @@ -49,9 +49,7 @@ struct Functor_TeamTestStaticPivoting { const XYViewType _Y; KOKKOS_INLINE_FUNCTION - Functor_TeamTestStaticPivoting(const AViewType &A, const XYViewType &X, - const XYViewType &Y) - : _A(A), _X(X), _Y(Y) {} + Functor_TeamTestStaticPivoting(const AViewType &A, const XYViewType &X, const XYViewType &Y) : _A(A), _X(X), _Y(Y) {} template KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { @@ -61,22 +59,16 @@ struct Functor_TeamTestStaticPivoting { auto X = Kokkos::subview(_X, matrix_id, Kokkos::ALL); auto Y = Kokkos::subview(_Y, matrix_id, Kokkos::ALL); member.team_barrier(); - KokkosBatched::TeamGesv::invoke(member, - A, X, - Y); + KokkosBatched::TeamGesv::invoke(member, A, X, Y); member.team_barrier(); } inline void run() { std::string name("KokkosBatched::Test::StaticPivoting"); - Kokkos::TeamPolicy policy(_A.extent(0), Kokkos::AUTO(), - Kokkos::AUTO()); + Kokkos::TeamPolicy policy(_A.extent(0), Kokkos::AUTO(), Kokkos::AUTO()); - using MatrixViewType = - Kokkos::View; + using MatrixViewType = Kokkos::View; const int n = _A.extent(1); size_t bytes_0 = MatrixViewType::shmem_size(n, n + 4); @@ -95,8 +87,7 @@ struct Functor_SerialTestStaticPivoting { const XYViewType _Y; KOKKOS_INLINE_FUNCTION - Functor_SerialTestStaticPivoting(const AViewType &A, const AViewType &tmp, - const XYViewType &X, const XYViewType &Y) + Functor_SerialTestStaticPivoting(const AViewType &A, const AViewType &tmp, const XYViewType &X, const XYViewType &Y) : _A(A), _tmp(tmp), _X(X), _Y(Y) {} KOKKOS_INLINE_FUNCTION void operator()(const int &matrix_id) const { @@ -104,8 +95,7 @@ struct Functor_SerialTestStaticPivoting { auto tmp = Kokkos::subview(_tmp, matrix_id, Kokkos::ALL, Kokkos::ALL); auto X = Kokkos::subview(_X, matrix_id, Kokkos::ALL); auto Y = Kokkos::subview(_Y, matrix_id, Kokkos::ALL); - KokkosBatched::SerialGesv::invoke( - A, X, Y, tmp); + KokkosBatched::SerialGesv::invoke(A, X, Y, tmp); } inline void run() { @@ -144,12 +134,9 @@ int main(int /*argc*/, char ** /*argv[]*/) { KokkosKernels::Impl::kk_write_3Dview_to_file(A, "A.txt"); KokkosKernels::Impl::kk_write_2Dview_to_file(Y, "Y.txt"); - Functor_SerialTestStaticPivoting(A, tmp, - X, Y) - .run(); + Functor_SerialTestStaticPivoting(A, tmp, X, Y).run(); KokkosKernels::Impl::kk_write_2Dview_to_file(X, "X_serial.txt"); - Functor_TeamTestStaticPivoting(A2, X, Y2) - .run(); + Functor_TeamTestStaticPivoting(A2, X, Y2).run(); KokkosKernels::Impl::kk_write_2Dview_to_file(X, "X_team.txt"); } Kokkos::finalize(); diff --git a/packages/kokkos-kernels/example/batched_solve/team_GMRES.cpp b/packages/kokkos-kernels/example/batched_solve/team_GMRES.cpp index b543ddaad6bd..ab14b4b07ae5 100644 --- a/packages/kokkos-kernels/example/batched_solve/team_GMRES.cpp +++ b/packages/kokkos-kernels/example/batched_solve/team_GMRES.cpp @@ -40,8 +40,8 @@ typedef Kokkos::DefaultExecutionSpace exec_space; -template +template struct Functor_TestBatchedTeamVectorGMRES { const ValuesViewType _values; const ValuesViewType _diag; @@ -53,10 +53,9 @@ struct Functor_TestBatchedTeamVectorGMRES { KrylovHandleType _handle; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorGMRES( - const ValuesViewType &values, const IntView &r, const IntView &c, - const VectorViewType &X, const VectorViewType &B, const int team_size, - const int vector_length, KrylovHandleType &handle) + Functor_TestBatchedTeamVectorGMRES(const ValuesViewType &values, const IntView &r, const IntView &c, + const VectorViewType &X, const VectorViewType &B, const int team_size, + const int vector_length, KrylovHandleType &handle) : _values(values), _r(r), _c(c), @@ -67,11 +66,9 @@ struct Functor_TestBatchedTeamVectorGMRES { _handle(handle) {} KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorGMRES( - const ValuesViewType &values, const ValuesViewType &diag, - const IntView &r, const IntView &c, const VectorViewType &X, - const VectorViewType &B, const int team_size, const int vector_length, - KrylovHandleType &handle) + Functor_TestBatchedTeamVectorGMRES(const ValuesViewType &values, const ValuesViewType &diag, const IntView &r, + const IntView &c, const VectorViewType &X, const VectorViewType &B, + const int team_size, const int vector_length, KrylovHandleType &handle) : _values(values), _diag(diag), _r(r), @@ -86,61 +83,42 @@ struct Functor_TestBatchedTeamVectorGMRES { KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { const int first_matrix = _handle.first_index(member.league_rank()); const int last_matrix = _handle.last_index(member.league_rank()); - using TeamVectorCopy1D = - KokkosBatched::TeamVectorCopy; - - auto d = Kokkos::subview( - _values, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - - using ScratchPadIntViewType = - Kokkos::View; - using ScratchPadValuesViewType = Kokkos::View< - typename ValuesViewType::non_const_value_type **, - typename ValuesViewType::array_layout, - typename ValuesViewType::execution_space::scratch_memory_space>; - - using Operator = - KokkosBatched::CrsMatrix; - - ScratchPadIntViewType tmp_1D_int(member.team_scratch(0), - _r.extent(0) + _c.extent(0)); - - auto r = - Kokkos::subview(tmp_1D_int, Kokkos::make_pair(0, (int)_r.extent(0))); - auto c = Kokkos::subview( - tmp_1D_int, - Kokkos::make_pair((int)_r.extent(0), (int)tmp_1D_int.extent(0))); + using TeamVectorCopy1D = KokkosBatched::TeamVectorCopy; + + auto d = Kokkos::subview(_values, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + + using ScratchPadIntViewType = Kokkos::View; + using ScratchPadValuesViewType = + Kokkos::View; + + using Operator = KokkosBatched::CrsMatrix; + + ScratchPadIntViewType tmp_1D_int(member.team_scratch(0), _r.extent(0) + _c.extent(0)); + + auto r = Kokkos::subview(tmp_1D_int, Kokkos::make_pair(0, (int)_r.extent(0))); + auto c = Kokkos::subview(tmp_1D_int, Kokkos::make_pair((int)_r.extent(0), (int)tmp_1D_int.extent(0))); TeamVectorCopy1D::invoke(member, _r, r); TeamVectorCopy1D::invoke(member, _c, c); Operator A(d, r, c); if (UsePrec) { - ScratchPadValuesViewType diag( - member.team_scratch(0), last_matrix - first_matrix, _diag.extent(1)); + ScratchPadValuesViewType diag(member.team_scratch(0), last_matrix - first_matrix, _diag.extent(1)); using PrecOperator = KokkosBatched::JacobiPrec; KokkosBatched::TeamVectorCopy::invoke( - member, - Kokkos::subview(_diag, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL), - diag); + member, Kokkos::subview(_diag, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL), diag); PrecOperator P(diag); P.setComputedInverse(); - KokkosBatched::TeamVectorGMRES::template invoke< - Operator, VectorViewType, PrecOperator, KrylovHandleType>( - member, A, b, x, P, _handle); + KokkosBatched::TeamVectorGMRES::template invoke(member, A, b, x, P, _handle); } else { - KokkosBatched::TeamVectorGMRES::template invoke< - Operator, VectorViewType>(member, A, b, x, _handle); + KokkosBatched::TeamVectorGMRES::template invoke(member, A, b, x, _handle); } } @@ -149,10 +127,8 @@ struct Functor_TestBatchedTeamVectorGMRES { Kokkos::Timer timer; Kokkos::Profiling::pushRegion(name.c_str()); - Kokkos::TeamPolicy auto_policy(_handle.get_number_of_teams(), - Kokkos::AUTO(), Kokkos::AUTO()); - Kokkos::TeamPolicy tuned_policy(_handle.get_number_of_teams(), - _team_size, _vector_length); + Kokkos::TeamPolicy auto_policy(_handle.get_number_of_teams(), Kokkos::AUTO(), Kokkos::AUTO()); + Kokkos::TeamPolicy tuned_policy(_handle.get_number_of_teams(), _team_size, _vector_length); Kokkos::TeamPolicy policy; if (_team_size < 1) @@ -168,21 +144,17 @@ struct Functor_TestBatchedTeamVectorGMRES { using ViewType2D = Kokkos::View; - size_t bytes_1D = - ViewType2D::shmem_size(_handle.get_number_of_systems_per_team(), 1); + size_t bytes_1D = ViewType2D::shmem_size(_handle.get_number_of_systems_per_team(), 1); size_t bytes_row_ptr = IntView::shmem_size(_r.extent(0)); size_t bytes_col_idc = IntView::shmem_size(_c.extent(0)); - size_t bytes_2D_1 = ViewType2D::shmem_size( - _handle.get_number_of_systems_per_team(), _X.extent(1)); - size_t bytes_2D_2 = ViewType2D::shmem_size( - _handle.get_number_of_systems_per_team(), maximum_iteration + 1); + size_t bytes_2D_1 = ViewType2D::shmem_size(_handle.get_number_of_systems_per_team(), _X.extent(1)); + size_t bytes_2D_2 = ViewType2D::shmem_size(_handle.get_number_of_systems_per_team(), maximum_iteration + 1); size_t bytes_int = bytes_row_ptr + bytes_col_idc; size_t bytes_diag = bytes_2D_1; size_t bytes_tmp = 2 * bytes_2D_1 + 2 * bytes_1D + bytes_2D_2; - policy.set_scratch_size( - 0, Kokkos::PerTeam(bytes_tmp + bytes_diag + bytes_int)); + policy.set_scratch_size(0, Kokkos::PerTeam(bytes_tmp + bytes_diag + bytes_int)); exec_space().fence(); timer.reset(); @@ -221,8 +193,7 @@ int main(int /*argc*/, char ** /*argv*/) { printf("N = %d, Blk = %d, nnz = %d\n", N, Blk, nnz); - create_tridiagonal_batched_matrices(nnz, Blk, N, rowOffsets, colIndices, - values, x, y); + create_tridiagonal_batched_matrices(nnz, Blk, N, rowOffsets, colIndices, values, x, y); // Replace y by ones: Kokkos::deep_copy(y, 1.); @@ -242,9 +213,7 @@ int main(int /*argc*/, char ** /*argv*/) { using Scalar3DViewType = Kokkos::View; using IntViewType = Kokkos::View; - using KrylovHandleType = - KokkosBatched::KrylovHandle; + using KrylovHandleType = KokkosBatched::KrylovHandle; const int N_team = 2; const int n_iterations = 150; @@ -255,8 +224,7 @@ int main(int /*argc*/, char ** /*argv*/) { const int ortho_strategy = 0; KrylovHandleType handle(N, N_team, n_iterations, true); - handle.Arnoldi_view = - Scalar3DViewType("", N, n_iterations, Blk + n_iterations + 3); + handle.Arnoldi_view = Scalar3DViewType("", N, n_iterations, Blk + n_iterations + 3); handle.set_max_iteration(n_iterations); handle.set_tolerance(tol); @@ -265,37 +233,27 @@ int main(int /*argc*/, char ** /*argv*/) { handle.set_compute_last_residual(true); double time = - Functor_TestBatchedTeamVectorGMRES(values, diag, rowOffsets, - colIndices, x, y, team_size, - vector_length, handle) + Functor_TestBatchedTeamVectorGMRES( + values, diag, rowOffsets, colIndices, x, y, team_size, vector_length, handle) .run(); printf("times = %f secondes\n", time); for (int i = 0; i < N; ++i) { if (handle.is_converged_host(i)) { - std::cout - << "System " << i << " converged in " - << handle.get_iteration_host(i) - << " iterations, the initial absolute norm of the residual was " - << handle.get_norm_host(i, 0) << " and is now " - << handle.get_last_norm_host(i) << std::endl; + std::cout << "System " << i << " converged in " << handle.get_iteration_host(i) + << " iterations, the initial absolute norm of the residual was " << handle.get_norm_host(i, 0) + << " and is now " << handle.get_last_norm_host(i) << std::endl; } else { - std::cout - << "System " << i << " did not converge in " - << handle.get_max_iteration() - << " iterations, the initial absolute norm of the residual was " - << handle.get_norm_host(i, 0) << " and is now " - << handle.get_last_norm_host(i) << std::endl; + std::cout << "System " << i << " did not converge in " << handle.get_max_iteration() + << " iterations, the initial absolute norm of the residual was " << handle.get_norm_host(i, 0) + << " and is now " << handle.get_last_norm_host(i) << std::endl; } } if (handle.is_converged_host()) std::cout << "All the systems have converged." << std::endl; else - std::cout << "There is at least one system that did not converge." - << std::endl; + std::cout << "There is at least one system that did not converge." << std::endl; } Kokkos::finalize(); } diff --git a/packages/kokkos-kernels/example/gmres/ex_real_A.cpp b/packages/kokkos-kernels/example/gmres/ex_real_A.cpp index 14c4eaeb1514..f18ccfd2782b 100644 --- a/packages/kokkos-kernels/example/gmres/ex_real_A.cpp +++ b/packages/kokkos-kernels/example/gmres/ex_real_A.cpp @@ -31,16 +31,14 @@ int main(int argc, char* argv[]) { using CRS = KokkosSparse::CrsMatrix; using ViewVectorType = Kokkos::View; - using KernelHandle = - KokkosKernels::Experimental::KokkosKernelsHandle; + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle; std::string filename("bcsstk09.mtx"); // example matrix std::string ortho("CGS2"); // orthog type int m = 50; // Max subspace size before restarting. - double convTol = 1e-10; // Relative residual convergence tolerance. - int cycLim = 50; // Maximum number of times to restart the solver. - bool rand_rhs = false; // Generate random right-hand side. + double convTol = 1e-10; // Relative residual convergence tolerance. + int cycLim = 50; // Maximum number of times to restart the solver. + bool rand_rhs = false; // Generate random right-hand side. for (int i = 1; i < argc; ++i) { const std::string& token = argv[i]; @@ -51,29 +49,26 @@ int main(int argc, char* argv[]) { if (token == std::string("--ortho")) ortho = argv[++i]; if (token == std::string("--rand_rhs")) rand_rhs = true; if (token == std::string("--help") || token == std::string("-h")) { - std::cout - << "Kokkos GMRES solver options:" << std::endl - << "--filename : The name of a matrix market (.mtx) file for " - "matrix A (Default bcsstk09.mtx)." - << std::endl - << "--max-subsp : The maximum size of the Kyrlov subspace before " - "restarting (Default 50)." - << std::endl - << "--max-restarts: Maximum number of GMRES restarts (Default 50)." - << std::endl - << "--tol : Convergence tolerance. (Default 1e-10)." - << std::endl - << "--ortho : Type of orthogonalization. Use 'CGS2' or 'MGS'. " - "(Default 'CGS2')" - << std::endl - << "--rand_rhs : Generate a random right-hand side b. (Else, " - "default uses b = vector of ones.)" - << std::endl - << "--help -h : Display this help message." << std::endl - << "Example Call : ./Gmres.exe --filename Laplace3D100.mtx --tol " - "1e-5 --max-subsp 100 " - << std::endl - << std::endl; + std::cout << "Kokkos GMRES solver options:" << std::endl + << "--filename : The name of a matrix market (.mtx) file for " + "matrix A (Default bcsstk09.mtx)." + << std::endl + << "--max-subsp : The maximum size of the Kyrlov subspace before " + "restarting (Default 50)." + << std::endl + << "--max-restarts: Maximum number of GMRES restarts (Default 50)." << std::endl + << "--tol : Convergence tolerance. (Default 1e-10)." << std::endl + << "--ortho : Type of orthogonalization. Use 'CGS2' or 'MGS'. " + "(Default 'CGS2')" + << std::endl + << "--rand_rhs : Generate a random right-hand side b. (Else, " + "default uses b = vector of ones.)" + << std::endl + << "--help -h : Display this help message." << std::endl + << "Example Call : ./Gmres.exe --filename Laplace3D100.mtx --tol " + "1e-5 --max-subsp 100 " + << std::endl + << std::endl; return 0; } } @@ -98,10 +93,8 @@ int main(int argc, char* argv[]) { auto gmres_handle = kh.get_gmres_handle(); // Get full gmres handle type using decltype. Deferencing a pointer gives a // reference, so we need to strip that too. - using GMRESHandle = - typename std::remove_reference::type; - gmres_handle->set_ortho(ortho == "CGS2" ? GMRESHandle::Ortho::CGS2 - : GMRESHandle::Ortho::MGS); + using GMRESHandle = typename std::remove_reference::type; + gmres_handle->set_ortho(ortho == "CGS2" ? GMRESHandle::Ortho::CGS2 : GMRESHandle::Ortho::MGS); if (rand_rhs) { // Make rhs random. @@ -128,8 +121,7 @@ int main(int argc, char* argv[]) { std::cout << "=========================================" << std::endl; std::cout << "Verify from main: Ending residual is " << endRes << std::endl; std::cout << "Number of iterations is: " << numIters << std::endl; - std::cout << "Diff of residual from main - residual from solver: " - << endRelRes - endRes << std::endl; + std::cout << "Diff of residual from main - residual from solver: " << endRelRes - endRes << std::endl; std::cout << "Convergence flag is : " << convFlag << std::endl; } Kokkos::finalize(); diff --git a/packages/kokkos-kernels/example/gmres/test_prec.cpp b/packages/kokkos-kernels/example/gmres/test_prec.cpp index 8d1ff74b87d3..942dc176b64f 100644 --- a/packages/kokkos-kernels/example/gmres/test_prec.cpp +++ b/packages/kokkos-kernels/example/gmres/test_prec.cpp @@ -27,14 +27,10 @@ int main(int argc, char* argv[]) { using OT = int; using EXSP = Kokkos::DefaultExecutionSpace; using MESP = typename EXSP::memory_space; - using CRS = - KokkosSparse::CrsMatrix, void, OT>; + using CRS = KokkosSparse::CrsMatrix, void, OT>; - using ViewVectorType = - Kokkos::View>; - using KernelHandle = - KokkosKernels::Experimental::KokkosKernelsHandle; + using ViewVectorType = Kokkos::View>; + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle; std::string ortho("CGS2"); // orthog type int n = 1000; // Matrix size @@ -53,29 +49,26 @@ int main(int argc, char* argv[]) { if (token == std::string("--ortho")) ortho = argv[++i]; if (token == std::string("--rand_rhs")) rand_rhs = true; if (token == std::string("--help") || token == std::string("-h")) { - std::cout - << "Kokkos GMRES solver options:" << std::endl - << "--mat-size : The size of the nxn test matrix. (Default: " - "n=1000.)" - << std::endl - << "--max-subsp : The maximum size of the Kyrlov subspace before " - "restarting (Default 50)." - << std::endl - << "--max-restarts: Maximum number of GMRES restarts (Default 50)." - << std::endl - << "--tol : Convergence tolerance. (Default 1e-10)." - << std::endl - << "--ortho : Type of orthogonalization. Use 'CGS2' or 'MGS'. " - "(Default 'CGS2')" - << std::endl - << "--rand_rhs : Generate a random right-hand side b. (Else, " - "default uses b = vector of ones.)" - << std::endl - << "--help -h : Display this help message." << std::endl - << "Example Call : ./Gmres.exe --filename Laplace3D100.mtx --tol " - "1e-5 --max-subsp 100 " - << std::endl - << std::endl; + std::cout << "Kokkos GMRES solver options:" << std::endl + << "--mat-size : The size of the nxn test matrix. (Default: " + "n=1000.)" + << std::endl + << "--max-subsp : The maximum size of the Kyrlov subspace before " + "restarting (Default 50)." + << std::endl + << "--max-restarts: Maximum number of GMRES restarts (Default 50)." << std::endl + << "--tol : Convergence tolerance. (Default 1e-10)." << std::endl + << "--ortho : Type of orthogonalization. Use 'CGS2' or 'MGS'. " + "(Default 'CGS2')" + << std::endl + << "--rand_rhs : Generate a random right-hand side b. (Else, " + "default uses b = vector of ones.)" + << std::endl + << "--help -h : Display this help message." << std::endl + << "Example Call : ./Gmres.exe --filename Laplace3D100.mtx --tol " + "1e-5 --max-subsp 100 " + << std::endl + << std::endl; return 0; } } @@ -87,18 +80,16 @@ int main(int argc, char* argv[]) { auto gmres_handle = kh.get_gmres_handle(); // Get full gmres handle type using decltype. Deferencing a pointer gives a // reference, so we need to strip that too. - using GMRESHandle = - typename std::remove_reference::type; - gmres_handle->set_ortho(ortho == "CGS2" ? GMRESHandle::Ortho::CGS2 - : GMRESHandle::Ortho::MGS); + using GMRESHandle = typename std::remove_reference::type; + gmres_handle->set_ortho(ortho == "CGS2" ? GMRESHandle::Ortho::CGS2 : GMRESHandle::Ortho::MGS); // Initialize Kokkos AFTER parsing parameters: Kokkos::initialize(); { // Generate a diagonal matrix with entries 1, 2, ...., 1000 and its inverse. - CRS A = KokkosSparse::Impl::kk_generate_diag_matrix(n); - auto myPrec = new KokkosSparse::Experimental::MatrixPrec( - KokkosSparse::Impl::kk_generate_diag_matrix(n, true)); + CRS A = KokkosSparse::Impl::kk_generate_diag_matrix(n); + auto myPrec = + new KokkosSparse::Experimental::MatrixPrec(KokkosSparse::Impl::kk_generate_diag_matrix(n, true)); ViewVectorType X(Kokkos::view_alloc(Kokkos::WithoutInitializing, "X"), n); // Solution and initial guess @@ -107,9 +98,8 @@ int main(int argc, char* argv[]) { n); // right-hand side vec int rand_seed = 123; Kokkos::Random_XorShift64_Pool<> pool(rand_seed); - Kokkos::fill_random( - X, pool, -1, - 1); // Use non-zero initial guess to test GMRES properties. + Kokkos::fill_random(X, pool, -1, + 1); // Use non-zero initial guess to test GMRES properties. if (rand_rhs) { Kokkos::fill_random(B, pool, -1, 1); } else { @@ -131,8 +121,7 @@ int main(int argc, char* argv[]) { std::cout << "=========================================" << std::endl; std::cout << "Verify from main: Ending residual is " << endRes << std::endl; std::cout << "Number of iterations is: " << numIters << std::endl; - std::cout << "Diff of residual from main - residual from solver: " - << endRelRes - endRes << std::endl; + std::cout << "Diff of residual from main - residual from solver: " << endRelRes - endRes << std::endl; std::cout << "Convergence flag is : " << convFlag << std::endl; if (endRes < convTol && numIters == 1) { pass = true; diff --git a/packages/kokkos-kernels/example/graph/KokkosKernels_Example_Distance2GraphColor.cpp b/packages/kokkos-kernels/example/graph/KokkosKernels_Example_Distance2GraphColor.cpp index 9a5537ee5b90..5506ce68d8bd 100644 --- a/packages/kokkos-kernels/example/graph/KokkosKernels_Example_Distance2GraphColor.cpp +++ b/packages/kokkos-kernels/example/graph/KokkosKernels_Example_Distance2GraphColor.cpp @@ -100,8 +100,7 @@ struct Parameters { } }; -void print_options(std::ostream& os, const char* app_name, - unsigned int indent = 0) { +void print_options(std::ostream& os, const char* app_name, unsigned int indent = 0) { std::string spaces(indent, ' '); os << "Usage:" << std::endl << spaces << " " << app_name << " [parameters]" << std::endl @@ -110,14 +109,11 @@ void print_options(std::ostream& os, const char* app_name, << spaces << " Parallelism (select one of the following):" << std::endl << spaces << " --serial Execute serially." << std::endl << spaces << " --threads Use N posix threads." << std::endl - << spaces << " --openmp Use OpenMP with N threads." - << std::endl + << spaces << " --openmp Use OpenMP with N threads." << std::endl << spaces << " --cuda Use CUDA" << std::endl << std::endl << spaces << " Required Parameters:" << std::endl - << spaces - << " --amtx Input file in Matrix Market format (.mtx)." - << std::endl + << spaces << " --amtx Input file in Matrix Market format (.mtx)." << std::endl << std::endl << spaces << " --algorithm Set the algorithm to use. " @@ -173,16 +169,12 @@ void print_options(std::ostream& os, const char* app_name, << " --verbose-level Set verbosity level [0..5] " "where N > 0 means print verbose messags." << std::endl - << spaces << " Default: 0" - << std::endl - << spaces - << " --help Print out command line help." - << std::endl + << spaces << " Default: 0" << std::endl + << spaces << " --help Print out command line help." << std::endl << spaces << " " << std::endl; } -int parse_inputs(KokkosKernels::Example::Parameters& params, int argc, - char** argv) { +int parse_inputs(KokkosKernels::Example::Parameters& params, int argc, char** argv) { bool got_required_param_amtx = false; bool got_required_param_algorithm = false; @@ -208,40 +200,32 @@ int parse_inputs(KokkosKernels::Example::Parameters& params, int argc, params.verbose_level = atoi(argv[++i]); params.verbose_level = std::min(5, params.verbose_level); params.verbose_level = std::max(0, params.verbose_level); - } else if (0 == - Test::string_compare_no_case(argv[i], "--output-histogram")) { + } else if (0 == Test::string_compare_no_case(argv[i], "--output-histogram")) { params.output_histogram = 1; - } else if (0 == - Test::string_compare_no_case(argv[i], "--output-graphviz")) { + } else if (0 == Test::string_compare_no_case(argv[i], "--output-graphviz")) { params.output_graphviz = 1; - } else if (0 == Test::string_compare_no_case( - argv[i], "--output-graphviz-vert-max")) { + } else if (0 == Test::string_compare_no_case(argv[i], "--output-graphviz-vert-max")) { params.output_graphviz_vert_max = atoi(argv[++i]); } else if (0 == Test::string_compare_no_case(argv[i], "--algorithm")) { ++i; - if (0 == - Test::string_compare_no_case(argv[i], "COLORING_D2_MATRIX_SQUARED")) { + if (0 == Test::string_compare_no_case(argv[i], "COLORING_D2_MATRIX_SQUARED")) { params.algorithm = 1; got_required_param_algorithm = true; - } else if (0 == - Test::string_compare_no_case(argv[i], "COLORING_D2_SERIAL")) { + } else if (0 == Test::string_compare_no_case(argv[i], "COLORING_D2_SERIAL")) { params.algorithm = 2; got_required_param_algorithm = true; } else if (0 == Test::string_compare_no_case(argv[i], "COLORING_D2_VB") || 0 == Test::string_compare_no_case(argv[i], "COLORING_D2")) { params.algorithm = 3; got_required_param_algorithm = true; - } else if (0 == - Test::string_compare_no_case(argv[i], "COLORING_D2_VB_BIT")) { + } else if (0 == Test::string_compare_no_case(argv[i], "COLORING_D2_VB_BIT")) { params.algorithm = 4; got_required_param_algorithm = true; - } else if (0 == Test::string_compare_no_case(argv[i], - "COLORING_D2_VB_BIT_EF")) { + } else if (0 == Test::string_compare_no_case(argv[i], "COLORING_D2_VB_BIT_EF")) { params.algorithm = 5; got_required_param_algorithm = true; } else { - std::cerr << "2-Unrecognized command line argument #" << i << ": " - << argv[i] << std::endl; + std::cerr << "2-Unrecognized command line argument #" << i << ": " << argv[i] << std::endl; print_options(std::cout, argv[0]); return 1; } @@ -250,8 +234,7 @@ int parse_inputs(KokkosKernels::Example::Parameters& params, int argc, print_options(std::cout, argv[0]); return 1; } else { - std::cerr << "3-Unrecognized command line argument #" << i << ": " - << argv[i] << std::endl; + std::cerr << "3-Unrecognized command line argument #" << i << ": " << argv[i] << std::endl; print_options(std::cout, argv[0]); return 1; } @@ -263,21 +246,19 @@ int parse_inputs(KokkosKernels::Example::Parameters& params, int argc, return 1; } if (!got_required_param_algorithm) { - std::cout << "Missing required parameter algorithm" << std::endl - << std::endl; + std::cout << "Missing required parameter algorithm" << std::endl << std::endl; print_options(std::cout, argv[0]); return 1; } - if (!params.use_serial && !params.use_threads && !params.use_openmp && - !params.use_cuda) { + if (!params.use_serial && !params.use_threads && !params.use_openmp && !params.use_cuda) { print_options(std::cout, argv[0]); return 1; } return 0; } -template +template void run_example(CrsGraph_type crsGraph, DataType num_cols, Parameters params) { using namespace KokkosGraph; using namespace KokkosGraph::Experimental; @@ -285,14 +266,13 @@ void run_example(CrsGraph_type crsGraph, DataType num_cols, Parameters params) { int algorithm = params.algorithm; int shmemsize = params.shmemsize; - using lno_view_type = typename CrsGraph_type::row_map_type::non_const_type; - using lno_nnz_view_type = - typename CrsGraph_type::entries_type::non_const_type; + using lno_view_type = typename CrsGraph_type::row_map_type::non_const_type; + using lno_nnz_view_type = typename CrsGraph_type::entries_type::non_const_type; using size_type = typename lno_view_type::non_const_value_type; using lno_type = typename lno_nnz_view_type::non_const_value_type; - using KernelHandle_type = KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_type, kk_scalar_type, ExecSpace, TempMemSpace, - PersistentMemSpace>; + using KernelHandle_type = + KokkosKernels::Experimental::KokkosKernelsHandle; // Create a kernel handle KernelHandle_type kh; @@ -333,52 +313,39 @@ void run_example(CrsGraph_type crsGraph, DataType num_cols, Parameters params) { break; } - std::cout << std::endl - << "Run Graph Color D2 (" << label_algorithm << ")" << std::endl; + std::cout << std::endl << "Run Graph Color D2 (" << label_algorithm << ")" << std::endl; // ------------------------------------------ // Call the distance-2 graph coloring routine // ------------------------------------------ - graph_compute_distance2_color(&kh, crsGraph.numRows(), num_cols, - crsGraph.row_map, crsGraph.entries, - crsGraph.row_map, crsGraph.entries); + graph_compute_distance2_color(&kh, crsGraph.numRows(), num_cols, crsGraph.row_map, crsGraph.entries, crsGraph.row_map, + crsGraph.entries); // ------------------------------------------ // Get the results // ------------------------------------------ - size_t num_colors = - kh.get_distance2_graph_coloring_handle()->get_num_colors(); - size_t num_phases = - kh.get_distance2_graph_coloring_handle()->get_num_phases(); + size_t num_colors = kh.get_distance2_graph_coloring_handle()->get_num_colors(); + size_t num_phases = kh.get_distance2_graph_coloring_handle()->get_num_phases(); if (params.verbose_level > 0) { - std::cout - << "Total Time: " - << kh.get_distance2_graph_coloring_handle()->get_overall_coloring_time() - << std::endl - << "Num colors: " - << kh.get_distance2_graph_coloring_handle()->get_num_colors() - << std::endl - << "Num Phases: " - << kh.get_distance2_graph_coloring_handle()->get_num_phases() - << std::endl - << "Colors:\n\t"; - KokkosKernels::Impl::print_1Dview( - kh.get_distance2_graph_coloring_handle()->get_vertex_colors()); + std::cout << "Total Time: " << kh.get_distance2_graph_coloring_handle()->get_overall_coloring_time() << std::endl + << "Num colors: " << kh.get_distance2_graph_coloring_handle()->get_num_colors() << std::endl + << "Num Phases: " << kh.get_distance2_graph_coloring_handle()->get_num_phases() << std::endl + << "Colors:\n\t"; + KokkosKernels::Impl::print_1Dview(kh.get_distance2_graph_coloring_handle()->get_vertex_colors()); std::cout << std::endl; } // ------------------------------------------ // Save coloring to a GraphViz file // ------------------------------------------ - if (params.output_graphviz && - crsGraph.numRows() <= params.output_graphviz_vert_max) { + if (params.output_graphviz && crsGraph.numRows() <= params.output_graphviz_vert_max) { auto colors = kh.get_distance2_graph_coloring_handle()->get_vertex_colors(); std::ofstream os("G.dot", std::ofstream::out); - kh.get_distance2_graph_coloring_handle()->dump_graphviz( - os, crsGraph.numRows(), crsGraph.row_map, crsGraph.entries, colors); + kh.get_distance2_graph_coloring_handle()->dump_graphviz(os, crsGraph.numRows(), crsGraph.row_map, crsGraph.entries, + colors); } // ------------------------------------------ @@ -394,29 +361,22 @@ void run_example(CrsGraph_type crsGraph, DataType num_cols, Parameters params) { d2_coloring_is_valid = KokkosGraph::Impl::graph_verify_distance2_color( &kh, crsGraph.numRows(), // crsGraph.numCols(), - num_cols, crsGraph.row_map, crsGraph.entries, crsGraph.row_map, - crsGraph.entries, d2_coloring_validation_flags); + num_cols, crsGraph.row_map, crsGraph.entries, crsGraph.row_map, crsGraph.entries, d2_coloring_validation_flags); // Print out messages based on coloring validation check. if (d2_coloring_is_valid) { - std::cout << std::endl - << "Distance-2 Graph Coloring is VALID" << std::endl - << std::endl; + std::cout << std::endl << "Distance-2 Graph Coloring is VALID" << std::endl << std::endl; } else { str_color_is_valid = "INVALID"; std::cout << std::endl << "Distance-2 Graph Coloring is NOT VALID" << std::endl - << " - Vert(s) left uncolored : " - << d2_coloring_validation_flags[1] << std::endl - << " - Invalid D2 Coloring : " - << d2_coloring_validation_flags[2] << std::endl + << " - Vert(s) left uncolored : " << d2_coloring_validation_flags[1] << std::endl + << " - Invalid D2 Coloring : " << d2_coloring_validation_flags[2] << std::endl << std::endl; } if (d2_coloring_validation_flags[3]) { - std::cout << "Distance-2 Graph Coloring may have poor quality." - << std::endl - << " - Vert(s) have high color value : " - << d2_coloring_validation_flags[3] << std::endl + std::cout << "Distance-2 Graph Coloring may have poor quality." << std::endl + << " - Vert(s) have high color value : " << d2_coloring_validation_flags[3] << std::endl << std::endl; } } @@ -425,27 +385,24 @@ void run_example(CrsGraph_type crsGraph, DataType num_cols, Parameters params) { // Print out a histogram of the colors // ------------------------------------------ if (0 != params.output_histogram) { - KokkosGraph::Impl::graph_print_distance2_color_histogram( - &kh, crsGraph.numRows(), num_cols, crsGraph.row_map, crsGraph.entries, - crsGraph.row_map, crsGraph.entries, false); + KokkosGraph::Impl::graph_print_distance2_color_histogram(&kh, crsGraph.numRows(), num_cols, crsGraph.row_map, + crsGraph.entries, crsGraph.row_map, crsGraph.entries, + false); } // ------------------------------------------ // Print out a summary // ------------------------------------------ std::string mtx_bin_file = params.mtx_bin_file; - mtx_bin_file = mtx_bin_file.substr(mtx_bin_file.find_last_of("/\\") + 1); + mtx_bin_file = mtx_bin_file.substr(mtx_bin_file.find_last_of("/\\") + 1); std::cout << "Summary" << std::endl << "-------" << std::endl - << " KExecSName : " << Kokkos::DefaultExecutionSpace::name() - << std::endl + << " KExecSName : " << Kokkos::DefaultExecutionSpace::name() << std::endl << " Filename : " << mtx_bin_file << std::endl << " Num Verts : " << crsGraph.numRows() << std::endl - << " Num Edges : " << crsGraph.entries.extent(0) - << std::endl - << " Concurrency : " - << Kokkos::DefaultExecutionSpace().concurrency() << std::endl + << " Num Edges : " << crsGraph.entries.extent(0) << std::endl + << " Concurrency : " << Kokkos::DefaultExecutionSpace().concurrency() << std::endl << " Algorithm : " << label_algorithm << std::endl << "Coloring Stats" << std::endl << " Num colors : " << num_colors << std::endl @@ -455,26 +412,21 @@ void run_example(CrsGraph_type crsGraph, DataType num_cols, Parameters params) { } // run_example() -template +template void driver(Parameters params) { using myExecSpace = exec_space; using myFastDevice = Kokkos::Device; - using crstmat_type = - typename KokkosSparse::CrsMatrix; - using graph_type = typename crstmat_type::StaticCrsGraphType; - using data_type = typename graph_type::data_type; + using crstmat_type = typename KokkosSparse::CrsMatrix; + using graph_type = typename crstmat_type::StaticCrsGraphType; + using data_type = typename graph_type::data_type; char* mat_file = params.mtx_bin_file; - crstmat_type crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix(mat_file); + crstmat_type crsmat = KokkosKernels::Impl::read_kokkos_crst_matrix(mat_file); graph_type crsgraph = crsmat.graph; data_type num_cols = crsmat.numCols(); - KokkosKernels::Example::run_example( + KokkosKernels::Example::run_example( crsgraph, num_cols, params); } // driver() @@ -494,13 +446,10 @@ int main(int argc, char* argv[]) { return 0; } - const int num_threads = - params.use_openmp; // Assumption is that use_openmp variable is provided - // as number of threads + const int num_threads = params.use_openmp; // Assumption is that use_openmp variable is provided + // as number of threads const int device_id = 0; - Kokkos::initialize(Kokkos::InitializationSettings() - .set_num_threads(num_threads) - .set_device_id(device_id)); + Kokkos::initialize(Kokkos::InitializationSettings().set_num_threads(num_threads).set_device_id(device_id)); // Print out information about the configuration of the run if verbose_level // >= 5 @@ -510,22 +459,19 @@ int main(int argc, char* argv[]) { #if defined(KOKKOS_ENABLE_OPENMP) if (params.use_openmp) { - KokkosKernels::Example::driver(params); + KokkosKernels::Example::driver(params); } #endif #if defined(KOKKOS_ENABLE_CUDA) if (params.use_cuda) { - KokkosKernels::Example::driver(params); + KokkosKernels::Example::driver(params); } #endif #if defined(KOKKOS_ENABLE_SERIAL) if (params.use_serial) { - KokkosKernels::Example::driver(params); + KokkosKernels::Example::driver(params); } #endif diff --git a/packages/kokkos-kernels/example/graph/PartitioningExample b/packages/kokkos-kernels/example/graph/PartitioningExample deleted file mode 100755 index 88619a8d127f7c5acc2015424b160883008f33aa..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 21536 zcmeHPeQ;FQb-(%ofw3eowlPq9jRAuVYY8wGIF46Bi=T`D86--G@nfZ3!4iFSwE}@7 z2O@CU2qhYd6Vjv|Ch?@64AW3gN{K^3e1xTr$5j&IFsaih4mc|$J4!ehs;M@ko8R zX6ceg>VpgG!;P(N3)_m9E?m0A+tTD+B*#tqiT9~B)v_m2L-u)OO9+~O;>pKv5`N?H z(?$0|GBiHor9$%LUz-03_}zxzMEq{YFAu-5N(qQlQZT%k27fjUeoh+vkJI33jH&wl zZW{bA)8Hd%@Cz7U$mrn$ADsnKkS1|m_@1Tt7(xr4B6ZDw;Q($drzs1HYXnL7$WTOTcY%rv9h zB27EZnn2JD)YmuFf_u}N_0i&RQwxwSp(W*w;b>WrKP7$z;bBOFvH`WNwY6powTgsU z-MBN{7&O;~T3Q=I#8#`=>YAEEjWDXVEno(sp|)^TG=v&jLQzpyTi?_Y5_LNx;V3|J zB-|LS11HJypc(?#=;&>J^`WeL2O ziMY;^|8*@Jkxj8q*bMv~MxElvgJ0#`T?qFa@geF|i2hH2n%+7+j3tpJaxx?tRMMg7 zg!$38K80nIEhYm=GST^mu#_^zOy(bcWDBv$HAd@6muZDtWR=z@v&y!|f#(z<@$(#b zy>5w6=)mioMfhR|-kH~Z4!kq384kQNXH+`yItP>fjSf8J3|+Q3@RUPz+2+7ge$l1b zfv0?>OPd4l%t3n`c*=3Q7#)MTMrTg@JdZH;_e66Ne*(qmxR85Zag00~0cm7T6#w(@ z^PxuVZA9$v8$p?~o!T@N{k^hXLv5OZ{vO%hL~U|?|8dz~M{RO>|54d~n%d;*{%+YW zr8c>^e~)Z0r#88^zge~yQJY-azeTnmqV@!8SIYJ*YLg55eX>2B+T^k$WJ|+%*5H=k^F#pliq^ZUjo7us3oztj&RSqHHY-jy|G?*oalNs3 zkmfA*uV4N1$cPd9=Wc+w|Bd@#Q`~=vb~&SK|C^E)Py8pmeR2Q4qXlf2#CE+zP4FBA zI895~h{Y4{fzcs%4$ZiKkjB~3K6r2b7Y-o|;{Jp}B!FlGqSK!^+m{nN*XbY3=#BX= z0CrVoC@dj*Q0?f+aPpk8coc!yPImfx&h||tJr^>1joMx|&WOE5rX|R<*#2HQq?fKJ z4#l^y$(jx9>ZHl>4FmX6EFi*}r*vcsozG%Fo#N(Z$p z9o#l- z7Bwx25mL0weHls20-HGruMBI*{Z3%Vg5t_RAyi{%8KG z(|$pE1S=v$syjGB0PVG0EYgfFzyu?2N+K*RL0HhT;1DQeVKYbeUlP-EH3R# zbp3%G)0;8yl@9;qjDZ)mZM2^$+j6JFwi94eqX)W1ts8n)w}L)Wn@1Ivxrd0=%O zeHCLcVpqtHYEmGHlt2mdu(VbZi;1}R0ww5AZaz=>8oP8sto>%Jb-1JbW=89Sd)tSR zj796cXTFSmCGjh?Uh78fG&RrAF>s;YB6`0ifWr$? zUdd1x6JAF*19L0}TjTG05eFK7Ke186p`YxHRSzl_%iPZDL1nBS7mRQ115={Ymm52W zN}qxFh>;n7j8%2mWjqP{jeO%QHr#Q>**Fv%zbE4cG%~bD^*^NQlEt2VgXiEi zlE8*bkrYo{Metyf)wnwbJ(A1z5gcQ5uEwmsNzDJEm^q$^m*$;(^cuwmJpZG2)TnZ? zT(r~=K9r#jv#W>go(gnNYG5=Ka+`ggq8a0{Iep+nRJZm5eMN-QzKgfel+_eTA^N&DCE`?BL9TsSNN?+H3QQr zH^@+oC(4LXWsUoO2M2XrnCHj}a)fNvbca?H ztYfmjO_MF>KAyOYy2>BWRR~>}JT0D%_Bqv@VQPYLU}=1AB~)2ShQF zie9I7UC$MdgnH1v*Z$=o+6*&f)cl%#TR=T``O&;+#D1Q5!fM77caj=*m|i1E^H@sE z+-DTgr~AZ+D>34eh}+{vNBeORZ3f#(^n9FqZ6isQQGkjkj>vB91+CUH6jXY22c1%c z+}$HrF^8ZJ?qTIUNIjj^mykXN7E4sz=aEaj%io*4@Nct)dZo}62ubtEOchDb6OXmh zr1gEU2R$G`O|w^thvI}X_f_lR2L;H5JK_9&nO5VriF2CPy1C79seD8Yh)nj zJJLLOP^Fmnf0N`xpA|J-l?9H7*sQF_I>ZXMEyVJuPUuMK7__B3WkPZ~?bm;jPWuQA z1?V3gaQekCDw9`;qVxRwQqiO=vYX_Q(L9Gd!6Hv85qCx<7viL^N;~f;oJ2!2I;YS^ zLZ^4uUgSe~zzJvH$ufTbQ=@BAffSN{kxBaw)t#J#rD-jsWEoFPrG8?x$}wnZY&9f! z4eLF;qX#FLiE+t0`{;ef=!9h$#B`~R8XbvT>g0Qrq&s?Y`N+=JKIXa}?Z;~{){eoX znaHK%9~wkQ5AgBCFb-v8o0o$v=CLMN?Iv(BWp^6C^d6nYJJvL@CsZ896JJwWAws(s zEsJ3+3kZV)(JGh1L?f1qJwvW7;79eZ%$9>w{W@dxF?6n@nd8v%kz=ps<1g3{PrUhl z()w`?kA@Xb%%WLd<{!%c5*?K1B(_5a*5c9uj$lClCffF_7$<3=qU6&B0iP)O%aYhg z$%gXS2PM_zu^*RIRdmhSHq|4ljjn~wfa@#fUn}|#4MQGHYRjg#7%$;0CxaZ z0KNiP4>%3G_CY`c@EG88fNucS0}cXy1CY*_Uj6r&C`K!PWfbLJC`z(4k;=15Qdi!RBei!Hqobp@k{^>|@7U)kn>1{SW2)YmS z`A&M!rtbs&Y0%d?>6uSk_Pq@H?lJnm0Q!rd7dquXW$Pz&#_q}r84=^N=oV3yiVHQ* zvq7)HJRe7#XU}{9$%^OD=*Y_YdX8ns0<70H=x2trpAB|D6`<2NQqif8gZQlkeK%ls zk)t1fM^-ZG97&^x;^Et%9|pYwb<5Vwrz|_^YEKX7!8c)`g*J@ z?f$1C&%8ATeF^C2Ku(D#9!F1{~-ei(GRcWK%8jIIAF z=$`{!-j~t-E4S&{i2F9smpkQKZTf7`4}qR8Udlj!VGKI$FWW(%>(n2x_3s0{8FV^u zvFwl7^p`>ZYtX$;df2940R4-gKjWllR!9c&|5ecApv%0k$M49`+1Q_c2)evKrRggD z$oZWO`Z>@aaO!`~?!OH5i=dY}>FqW>2zml^xz}j@Yi;^I&<|l>`nXemmrZ{e^y8r4 z>7-XGdKqvRKtBroA)=S&6?`GnpEvFES^hjvSGGTI-oYFrukgijMqY8p_|*;8x?72X1-bmIrQm;Fbq&dEk}@Zh7FA2mYTuuw{ZzT2Ps!>%?xb4X>90-|}mPuBD^gO~47?R%K6pRbnZh`<#vemcXUgcz_DP6Ce_47neG*QT|Am;xhRbs{;HEKNo31C$$^5xO$5%3bg3xi8jGt(&$7K9%qMPR_ z89zzvN#Xx2JW=1B5}zmZ`cBr9FZy`hC*upO_ZO1!lf|VOJ|{}nv+$%jU50d*EOCeR zzJ`;Qg;lEaM>76SysxpviIVj!ahK4~IXj720;`+zKr()sIFJ&5kD%vA-6c!ttY%Xv zon(vO;DMmzLisZje(rJknXXk4e#nKtpYb|>Yi)GxC_~uxaQYG}C4RJj8d#6s7qp&T zl7F#6yCSwH6q{^ zfPwIOKht@g51`6CoB8{#!tsaMtVMbm%__y zub{1Ed^h`Z4)cGC@vm~fdf)s4@CDdC-Esa!=Ktv(O26iRjq!Ed?+n%-XZ&{7Pw`FV zmyDl2#V6a^{%mZBWWRg7vl&0cdMNIxY-Ie$**{$hE#_eZ_P~Dk`u#NcC*!Rnov-w9 zMknie*EOzh0Y8OxI>C>b|A*6j5-qQxqV^%<^QJ2NDoGXMeso0R`Y+b6{Zq+!_qgcV zIPn+o{Ob979P>T}czO<=3f;S*@^{QXoBgBrz)Osu=j!(tz^57)J;X`r@INilIK_3Q-^?=g15EY`$$;xbNtAA4$!lQ z@!5VZ!^Az{i(@G-x^d44MM*8@-SA zcIB%+#=G<3pBV3s6MCkf#_P_L&oKUw%m2?YeiO&ZgFLPm82=CK9~~!jZ=UqK^JFjM z-Shi1#(%%SCpG(7{|Mu&c)apn7ie=450szX`E3XCLjjHlj=I+AV(^na53-$J)?X*% ziW?5_Jo9g0{%4qWLiTGPZ)lNo%>O#uIhP4PWV}0%4FI2N-U|>{WT$&yD3bAZyP(&A zsD$Ky+%{u=_x=@P{5a+>W&Xcm{7P4z`6lDP&3Jio2AQ7#Pj(jO`dEVk!_4o_{~qKw zvfmw_Wisz2sgR9lCH{6nZ#hxHwkS(I>v8Ap{fu|}`6b4?_nq%Z{jAght}uTY=gA7D z{RViCgU_nsfR)DOj(-p1U*~+XmIanG-km=;Fy6hsYJpD`4?CIPy>EVl`I}wyB5EVi zmS}5Towrs5Ly^$da7#24F{2G8-r;NvwcxGIV3WDEzNscqZw8}Hkrp%1+9qn78k*}v z(NNI)*y6&+QZt!#;l{8Th(rRr%ur)AvP;xO0u3QE*xJyr3nGq|3C?H|YbD-%4M)RG zjp4?v{L}~E)8#O zY>I?bqu4aVP1GNnONvZ-A2k}Tl@^5YR_$)I@ak;Gh7)x+AA zE7$w0%&L;o3O~L6yJi($@?C6}?g-V=!03hE^7Vo88o%Fx4Z}%AMdoTa8!rI|gAx6* zZ&9&X3isk&+=%oGiQ|>u%7*QvT4{FB-K3A~6awp3!JG!aHg$>YFcPXmL^aliYIoEq zTbAJ!=EhdMG2B+PxG}U-y{b$$L3YI=;t2%<(EzDQ&sxK*FrE73mzpsQOePIG)Y8&a z2SR0)xo&--bRUSzq)9SbEKi_I+X<hnRj1`OYHdh^)GO)iAAZLi%vew^ z?!{O(6oF>2rODie^P?4O7wt6;^{>u8#3Y0Sea@MYL??dZ{w?f^nJh zXbtjkqh7u)B4(yx-KrXBP_r|7BuiStyF*HPv>`#Jj2b`uOp%GCOIgtJseANt&CnML z?AQKBId`nJ%gdEMXfP^hN87Y4$tH70puRQ4-ly~jU+mzo4Y-ZKXB1NCcSTuq1ua9J zNYnxwBl9%)iwc7wE9xp6WZbPvKimd?aYLZZ)bn9htzN-9h5UGe+hz*3lp!S-Y^snN ztLxBq~#peEDWBGyM6!g4jH$i(Z%&Va?{db4q7QzS^)MJ9Ij=?Y%@>5NW_ z5VD)1gg=3iYQAkpI6Oe}H^97#QS#?yZIBh@NN6NGZJ zRA2F^ZE2Osg1nJ_QsiDEdmf9&p672%HpFxmE!X`zAf@m zW=m(P02#}46~#$kNP$23vnKq(6LJ+D3ah|zEjVi#sJ(%fkz)!+FFdu-)xh=j~pQ6&LNsp>N@Da=cq7#vtiEtZ2)e9I;| zbcMHNS3@*V0~n1cxJ}pa0h&;xS$G?pq9HH7FS9Tj*ebl+0xjEwH@K@2f(k|>!iytR z?+$!q2Z#Mh4HL9Ts6IdfTx+h63a>nu@S^J7inm3)_!18O%L(^Jn&jcEH?)n_hg7Ccgp>|lN1uU)a4kfdqSP16r2}Oz@__K-Y^(uiSk}4c z|DrMjvbZE-omcDoe)_v(`o6R7f_R;Uu6N-Qk|kf2=|ju@u9ru)?0>j?npBF=!X-va zUf)Ng_Zg{FDS&$ys^D^uEV@nCxp7Kv$@gaZ&=R=sD1q3+22&qY)$-F6Sv(1dOVXCS zzAs7FovG;i<*H@hf6?uyP@#L(`nrU^f7wPIqIl$g`+mLV(~!nT_r7&o-{(BS^35zy zO|nzl&uCGL3Yn_q_5ILXT*suM$Ep3LYhZd5y(e`RN4c`V2#jcju_r>-147Fo;Ia=|r z?b-`j8j~ge$QHEle%vDxSp2GG+oSux27*)Gy3byyBo1gnhFVU;_g(V(diMdA*LG?( zT28lbph7rpzrO!)gyr4*?)m!zWC_RYsHvjm^?hlL*YZxNV^8xCyuu2HZ&z5_hZfA&{u^Y&_cH(h diff --git a/packages/kokkos-kernels/example/graph/PartitioningExample.cpp b/packages/kokkos-kernels/example/graph/PartitioningExample.cpp index 1bef46cd28c7..7f06b216d321 100644 --- a/packages/kokkos-kernels/example/graph/PartitioningExample.cpp +++ b/packages/kokkos-kernels/example/graph/PartitioningExample.cpp @@ -28,7 +28,7 @@ using std::cout; using std::vector; -//#include "../../src/sparse/impl/KokkosSparse_partitioning_impl.hpp" +// #include "../../src/sparse/impl/KokkosSparse_partitioning_impl.hpp" int main(int argc, char* argv[]) { /* diff --git a/packages/kokkos-kernels/example/half/xpy.cpp b/packages/kokkos-kernels/example/half/xpy.cpp index 238fdef18798..cf3b5767f710 100644 --- a/packages/kokkos-kernels/example/half/xpy.cpp +++ b/packages/kokkos-kernels/example/half/xpy.cpp @@ -40,18 +40,15 @@ void do_xpy(size_t n, bool time_only = false) { View y_rand("y_rand", n); View expected("expected", n); - View relative_error( - "relative_error", n); + View relative_error("relative_error", n); typename ViewType::HostMirror x_host = create_mirror_view(x); typename ViewType::HostMirror y_host = create_mirror_view(y); // TODO: Report segfault in random_pool creation with: // typename ViewType::HostMirror y_host = create_mirror_view(y_host); Random_XorShift64_Pool random_pool(12345); - fill_random(x_rand, random_pool, ReferenceScalarType(1.0), - ReferenceScalarType(2.0)); - fill_random(y_rand, random_pool, ReferenceScalarType(1.0), - ReferenceScalarType(2.0)); + fill_random(x_rand, random_pool, ReferenceScalarType(1.0), ReferenceScalarType(2.0)); + fill_random(y_rand, random_pool, ReferenceScalarType(1.0), ReferenceScalarType(2.0)); ExecutionSpace().fence(); deep_copy(x, x_rand); @@ -72,22 +69,18 @@ void do_xpy(size_t n, bool time_only = false) { if (!time_only) { for (size_t i = 0; i < n; i++) - expected(i) = static_cast(y_host(i)) + - static_cast(x_host(i)); + expected(i) = static_cast(y_host(i)) + static_cast(x_host(i)); } deep_copy(x_host, x); ExecutionSpace().fence(); - std::cout << "n: " << n << ", " << typeid(ScalarType).name() - << " Runtime(s): " << s << std::endl; + std::cout << "n: " << n << ", " << typeid(ScalarType).name() << " Runtime(s): " << s << std::endl; if (!time_only) { - std::cout << "n: " << n << ", " << typeid(ScalarType).name() - << " Relative Errors:" << std::endl; + std::cout << "n: " << n << ", " << typeid(ScalarType).name() << " Relative Errors:" << std::endl; for (size_t i = 0; i < n; i++) { - std::cout << ", " << std::abs(expected(i) - x_host(i)) / expected(i) - << std::endl; + std::cout << ", " << std::abs(expected(i) - x_host(i)) / expected(i) << std::endl; } std::cout << std::endl << std::endl; } diff --git a/packages/kokkos-kernels/example/hashmap_accumulator/KokkosKernels_Example_HashmapAccumulator.cpp b/packages/kokkos-kernels/example/hashmap_accumulator/KokkosKernels_Example_HashmapAccumulator.cpp index 065c103cefbe..52de73fe298a 100644 --- a/packages/kokkos-kernels/example/hashmap_accumulator/KokkosKernels_Example_HashmapAccumulator.cpp +++ b/packages/kokkos-kernels/example/hashmap_accumulator/KokkosKernels_Example_HashmapAccumulator.cpp @@ -52,8 +52,7 @@ typedef struct params { namespace KokkosKernels { namespace Experiment { -template +template struct functorTestHashmapAccumulator { typedef ExecutionSpace execution_space; typedef typename Kokkos::View data_view_t; @@ -65,17 +64,12 @@ struct functorTestHashmapAccumulator { const size_t _max_hash_entries; const parameters_t& _params; - typedef Kokkos::Experimental::UniqueToken< - execution_space, Kokkos::Experimental::UniqueTokenScope::Global> + typedef Kokkos::Experimental::UniqueToken unique_token_t; unique_token_t tokens; - functorTestHashmapAccumulator(const size_t num_entries, - const data_view_t& data, - uniform_memory_pool_t memory_pool, - const size_t hash_size, - const size_t max_hash_entries, - const parameters_t& params) + functorTestHashmapAccumulator(const size_t num_entries, const data_view_t& data, uniform_memory_pool_t memory_pool, + const size_t hash_size, const size_t max_hash_entries, const parameters_t& params) : _num_entries(num_entries), _data(data), _memory_pool(memory_pool), @@ -104,9 +98,7 @@ struct functorTestHashmapAccumulator { } scalar_t* ptr_memory_pool_chunk = (scalar_t*)(ptr_temp); - KokkosKernels::Experimental::HashmapAccumulator< - hash_size_type, hash_key_type, hash_value_type> - hash_map; + KokkosKernels::Experimental::HashmapAccumulator hash_map; // Set pointer to hash indices scalar_t* used_hash_indices = (scalar_t*)(ptr_temp); @@ -145,9 +137,8 @@ struct functorTestHashmapAccumulator { // Compute the hash index using & instead of % (modulus is slower). scalar_t hash = key & hash_func_pow2; - int r = hash_map.sequential_insert_into_hash_TrackHashes( - hash, key, &used_hash_size, hash_map.max_value_size, &used_hash_count, - used_hash_indices); + int r = hash_map.sequential_insert_into_hash_TrackHashes(hash, key, &used_hash_size, hash_map.max_value_size, + &used_hash_count, used_hash_indices); // Check return code if (r) { @@ -180,9 +171,7 @@ struct functorTestHashmapAccumulator { template void experiment(const parameters_t& params) { - typedef - typename KokkosKernels::Impl::UniformMemoryPool - uniform_memory_pool_t; + typedef typename KokkosKernels::Impl::UniformMemoryPool uniform_memory_pool_t; typedef typename Kokkos::View data_view_t; typedef typename data_view_t::HostMirror data_view_hostmirror_t; @@ -224,9 +213,8 @@ void experiment(const parameters_t& params) { // Set Hash Table Parameters size_t max_hash_entries = max_value; // Max number of entries that can be // inserted (values allowed are 1..100) - size_t hash_size_hint = - max_value; // How many hash keys are allowed. The actual hash size will - // be set to the next power of 2 bigger than hash_size_hint. + size_t hash_size_hint = max_value; // How many hash keys are allowed. The actual hash size will + // be set to the next power of 2 bigger than hash_size_hint. // Set the hash_size as the next power of 2 bigger than hash_size_hint. // - hash_size must be a power of two since we use & rather than % (which is @@ -237,8 +225,7 @@ void experiment(const parameters_t& params) { } // Create Uniform Initialized Memory Pool - KokkosKernels::Impl::PoolType pool_type = - KokkosKernels::Impl::OneThread2OneChunk; + KokkosKernels::Impl::PoolType pool_type = KokkosKernels::Impl::OneThread2OneChunk; // Determine memory chunk size for UniformMemoryPool size_t mem_chunk_size = hash_size; // for hash indices @@ -254,16 +241,12 @@ void experiment(const parameters_t& params) { // KokkosKernels::Impl::UniformMemoryPool m_space(mem_chunk_count, mem_chunk_size, -1, pool_type); - uniform_memory_pool_t memory_pool(mem_chunk_count, mem_chunk_size, -1, - pool_type); + uniform_memory_pool_t memory_pool(mem_chunk_count, mem_chunk_size, -1, pool_type); - functorTestHashmapAccumulator - testHashmapAccumulator(num_entries, d_data, memory_pool, hash_size, - max_hash_entries, params); + functorTestHashmapAccumulator testHashmapAccumulator( + num_entries, d_data, memory_pool, hash_size, max_hash_entries, params); - Kokkos::parallel_for("testHashmapAccumulator", num_entries, - testHashmapAccumulator); + Kokkos::parallel_for("testHashmapAccumulator", num_entries, testHashmapAccumulator); if (params.verbose) { double t = timer.seconds(); @@ -275,8 +258,7 @@ void experiment(const parameters_t& params) { } // namespace Experiment } // namespace KokkosKernels -void print_options(std::ostream& os, const char* app_name, - unsigned int indent = 0) { +void print_options(std::ostream& os, const char* app_name, unsigned int indent = 0) { std::string spaces(indent, ' '); os << "Usage:" << std::endl << spaces << " " << app_name << " [parameters]" << std::endl @@ -285,15 +267,12 @@ void print_options(std::ostream& os, const char* app_name, << spaces << " Parallelism (select one of the following):" << std::endl << spaces << " --serial Execute serially." << std::endl << spaces << " --threads Use N posix threads." << std::endl - << spaces << " --openmp Use OpenMP with N threads." - << std::endl + << spaces << " --openmp Use OpenMP with N threads." << std::endl << spaces << " --cuda Use CUDA" << std::endl << spaces << " Optional Parameters:" << std::endl - << spaces << " --problem-size Problem Size (Default: 20)" - << std::endl + << spaces << " --problem-size Problem Size (Default: 20)" << std::endl << spaces << " --verbose Verbose output" << std::endl - << spaces << " --help Print out command line help." - << std::endl + << spaces << " --help Print out command line help." << std::endl << spaces << " " << std::endl; } // print_options @@ -321,19 +300,16 @@ int parse_inputs(parameters_t& params, int argc, char** argv) { } else if (0 == Test::string_compare_no_case(argv[i], "--verbose") || 0 == Test::string_compare_no_case(argv[i], "-V")) { params.verbose = true; - } else if (0 == Test::string_compare_no_case(argv[i], "help") || - 0 == Test::string_compare_no_case(argv[i], "-h")) { + } else if (0 == Test::string_compare_no_case(argv[i], "help") || 0 == Test::string_compare_no_case(argv[i], "-h")) { print_options(std::cout, argv[0]); return 1; } else { - std::cerr << "3-Unrecognized command line argument #" << i << ": " - << argv[i] << std::endl; + std::cerr << "3-Unrecognized command line argument #" << i << ": " << argv[i] << std::endl; print_options(std::cout, argv[0]); return 1; } } - if (!params.use_serial && !params.use_threads && !params.use_openmp && - !params.use_cuda) { + if (!params.use_serial && !params.use_threads && !params.use_openmp && !params.use_cuda) { print_options(std::cout, argv[0]); return 1; } @@ -351,14 +327,11 @@ int main(int argc, char* argv[]) { return 1; } - const int device_id = 0; - const int num_threads = - params.use_openmp; // Assumption is that use_openmp variable is provided - // as number of threads + const int device_id = 0; + const int num_threads = params.use_openmp; // Assumption is that use_openmp variable is provided + // as number of threads - Kokkos::initialize(Kokkos::InitializationSettings() - .set_num_threads(num_threads) - .set_device_id(device_id)); + Kokkos::initialize(Kokkos::InitializationSettings().set_num_threads(num_threads).set_device_id(device_id)); if (params.verbose) { Kokkos::print_configuration(std::cout); diff --git a/packages/kokkos-kernels/example/wiki/blas/abs/abs.cpp b/packages/kokkos-kernels/example/wiki/blas/abs/abs.cpp index c5a1d39e15e7..a74d4e3555a2 100644 --- a/packages/kokkos-kernels/example/wiki/blas/abs/abs.cpp +++ b/packages/kokkos-kernels/example/wiki/blas/abs/abs.cpp @@ -29,8 +29,7 @@ int main(int argc, char* argv[]) { double sum = 0.0; Kokkos::parallel_reduce( - "CheckValue", N, - KOKKOS_LAMBDA(const int& i, double& lsum) { lsum += y(i); }, sum); + "CheckValue", N, KOKKOS_LAMBDA(const int& i, double& lsum) { lsum += y(i); }, sum); printf("Sum: %lf Expected: %lf Diff: %e\n", sum, 1.0 * N, sum - 1.0 * N); diff --git a/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_9pt_stencil.hpp b/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_9pt_stencil.hpp index 57f109f652b9..2137bf09e5ab 100644 --- a/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_9pt_stencil.hpp +++ b/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_9pt_stencil.hpp @@ -33,8 +33,8 @@ using DeviceSpace = typename ExecSpace::memory_space; using Kokkos::HostSpace; using RowmapType = Kokkos::View; using ColindsType = Kokkos::View; -using Handle = KokkosKernels::Experimental::KokkosKernelsHandle< - Offset, Ordinal, default_scalar, ExecSpace, DeviceSpace, DeviceSpace>; +using Handle = KokkosKernels::Experimental::KokkosKernelsHandle; namespace GraphDemo { Ordinal gridX = 15; @@ -124,10 +124,8 @@ void generate9pt(RowmapType& rowmapDevice, ColindsType& colindsDevice) { Offset numEdges = colinds.size(); // Now that the graph is formed, copy rowmap and colinds to Kokkos::Views in // device memory The nonowning host views just alias the std::vectors. - Kokkos::View> - rowmapHost(rowmap.data(), numVertices + 1); - Kokkos::View> - colindsHost(colinds.data(), numEdges); + Kokkos::View> rowmapHost(rowmap.data(), numVertices + 1); + Kokkos::View> colindsHost(colinds.data(), numEdges); // Allocate owning views on device with the correct size. rowmapDevice = RowmapType("Rowmap", numVertices + 1); colindsDevice = ColindsType("Colinds", numEdges); diff --git a/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_coarsening.cpp b/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_coarsening.cpp index 027ee0a057d1..409564a33451 100644 --- a/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_coarsening.cpp +++ b/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_coarsening.cpp @@ -29,9 +29,8 @@ int main() { { std::cout << "Coarsened vertex labels:\n"; Ordinal numClusters = 0; - auto labels = - KokkosGraph::graph_mis2_aggregate( - rowmapDevice, colindsDevice, numClusters); + auto labels = KokkosGraph::graph_mis2_aggregate(rowmapDevice, colindsDevice, + numClusters); // coarsening labels can be printed in the same way as colors GraphDemo::printColoring(labels, numClusters); putchar('\n'); diff --git a/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_coloring.cpp b/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_coloring.cpp index ac62861e12ec..8ff0f6941d76 100644 --- a/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_coloring.cpp +++ b/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_coloring.cpp @@ -42,10 +42,9 @@ int main() { // Use the default algorithm (chosen based on ExecSpace) handle.create_graph_coloring_handle(KokkosGraph::COLORING_DEFAULT); // Run coloring (graph is square and symmetric) - KokkosGraph::Experimental::graph_color(&handle, numVertices, numVertices, - rowmapDevice, colindsDevice); + KokkosGraph::Experimental::graph_color(&handle, numVertices, numVertices, rowmapDevice, colindsDevice); // Get the colors array, and the number of colors used from the handle. - auto colors = handle.get_graph_coloring_handle()->get_vertex_colors(); + auto colors = handle.get_graph_coloring_handle()->get_vertex_colors(); Ordinal numColors = handle.get_graph_coloring_handle()->get_num_colors(); printf("9-pt stencil: Distance-1 Colors (used %d):\n", (int)numColors); GraphDemo::printColoring(colors, numColors); @@ -57,16 +56,12 @@ int main() { { Handle handle; // Use the default algorithm (chosen based on ExecSpace) - handle.create_distance2_graph_coloring_handle( - KokkosGraph::COLORING_D2_DEFAULT); + handle.create_distance2_graph_coloring_handle(KokkosGraph::COLORING_D2_DEFAULT); // Run coloring - KokkosGraph::Experimental::graph_color_distance2( - &handle, numVertices, rowmapDevice, colindsDevice); + KokkosGraph::Experimental::graph_color_distance2(&handle, numVertices, rowmapDevice, colindsDevice); // Get the colors array, and the number of colors used from the handle. - auto colors = - handle.get_distance2_graph_coloring_handle()->get_vertex_colors(); - Ordinal numColors = - handle.get_distance2_graph_coloring_handle()->get_num_colors(); + auto colors = handle.get_distance2_graph_coloring_handle()->get_vertex_colors(); + Ordinal numColors = handle.get_distance2_graph_coloring_handle()->get_num_colors(); printf("9-pt stencil: Distance-2 Colors (used %d):\n", (int)numColors); GraphDemo::printColoring(colors, numColors); putchar('\n'); diff --git a/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_mis2.cpp b/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_mis2.cpp index 773930682fd8..2ee304d249c5 100644 --- a/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_mis2.cpp +++ b/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_mis2.cpp @@ -29,19 +29,16 @@ int main() { // algorithms { // Run coloring - auto misDevice = - KokkosGraph::graph_d2_mis( - rowmapDevice, colindsDevice, KokkosGraph::MIS2_FAST); - std::cout << "Distance-2 MIS, FAST algorithm: contains " - << misDevice.extent(0) << " out of " << GraphDemo::numVertices - << " vertices.\n"; + auto misDevice = KokkosGraph::graph_d2_mis(rowmapDevice, colindsDevice, + KokkosGraph::MIS2_FAST); + std::cout << "Distance-2 MIS, FAST algorithm: contains " << misDevice.extent(0) << " out of " + << GraphDemo::numVertices << " vertices.\n"; GraphDemo::printMIS(misDevice); putchar('\n'); - misDevice = KokkosGraph::graph_d2_mis( - rowmapDevice, colindsDevice, KokkosGraph::MIS2_QUALITY); - std::cout << "Distance-2 MIS, QUALITY algorithm: contains " - << misDevice.extent(0) << " out of " << GraphDemo::numVertices - << " vertices.\n"; + misDevice = KokkosGraph::graph_d2_mis(rowmapDevice, colindsDevice, + KokkosGraph::MIS2_QUALITY); + std::cout << "Distance-2 MIS, QUALITY algorithm: contains " << misDevice.extent(0) << " out of " + << GraphDemo::numVertices << " vertices.\n"; GraphDemo::printMIS(misDevice); putchar('\n'); } diff --git a/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_rcm.cpp b/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_rcm.cpp index d23a7de233b9..29fdf613120b 100644 --- a/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_rcm.cpp +++ b/packages/kokkos-kernels/example/wiki/graph/KokkosGraph_wiki_rcm.cpp @@ -17,19 +17,14 @@ #include "KokkosGraph_RCM.hpp" template -void printReorderedMatrix(const rowmap_t& rowmapIn, const entries_t& entriesIn, - const labels_t& invPermIn) { +void printReorderedMatrix(const rowmap_t& rowmapIn, const entries_t& entriesIn, const labels_t& invPermIn) { using size_type = typename rowmap_t::non_const_value_type; using lno_t = typename entries_t::non_const_value_type; - auto rowmap = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rowmapIn); - auto entries = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), entriesIn); - auto invPerm = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), invPermIn); - lno_t numVerts = rowmap.extent(0) - 1; - decltype(invPerm) perm( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Perm"), numVerts); + auto rowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rowmapIn); + auto entries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), entriesIn); + auto invPerm = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), invPermIn); + lno_t numVerts = rowmap.extent(0) - 1; + decltype(invPerm) perm(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Perm"), numVerts); for (lno_t i = 0; i < numVerts; i++) perm(invPerm(i)) = i; std::vector neighbors; for (lno_t i = 0; i < numVerts; i++) { @@ -68,9 +63,7 @@ int main() { // Step 2: Run RCM and print the reordered matrix { auto rcmDevice = - KokkosGraph::Experimental::graph_rcm(rowmapDevice, - colindsDevice); + KokkosGraph::Experimental::graph_rcm(rowmapDevice, colindsDevice); std::cout << "Graph reordered by reverse Cuthill-McKee:\n"; printReorderedMatrix(rowmapDevice, colindsDevice, rcmDevice); } diff --git a/packages/kokkos-kernels/example/wiki/sparse/CMakeLists.txt b/packages/kokkos-kernels/example/wiki/sparse/CMakeLists.txt index 16d6a3a89df6..8d061c24f844 100644 --- a/packages/kokkos-kernels/example/wiki/sparse/CMakeLists.txt +++ b/packages/kokkos-kernels/example/wiki/sparse/CMakeLists.txt @@ -10,6 +10,11 @@ if (KOKKOSKERNELS_ENABLE_EXPERIMENTAL) ) endif() +KOKKOSKERNELS_ADD_EXECUTABLE_AND_TEST( + wiki_bsrmatrix_2 + SOURCES KokkosSparse_wiki_bsrmatrix_2.cpp + ) + KOKKOSKERNELS_ADD_EXECUTABLE_AND_TEST( wiki_crsmatrix SOURCES KokkosSparse_wiki_crsmatrix.cpp diff --git a/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_bsrmatrix.cpp b/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_bsrmatrix.cpp index eacf134f89da..49721e595ed4 100644 --- a/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_bsrmatrix.cpp +++ b/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_bsrmatrix.cpp @@ -31,19 +31,14 @@ using Layout = default_layout; int main() { Kokkos::initialize(); - using device_type = typename Kokkos::Device< - Kokkos::DefaultExecutionSpace, - typename Kokkos::DefaultExecutionSpace::memory_space>; - using matrix_type = - typename KokkosSparse::CrsMatrix; - using b_matrix_type = - typename KokkosSparse::Experimental::BsrMatrix; - using graph_type = typename matrix_type::staticcrsgraph_type; - using row_map_type = typename graph_type::row_map_type; - using entries_type = typename graph_type::entries_type; - using values_type = typename matrix_type::values_type; + using device_type = + typename Kokkos::Device; + using matrix_type = typename KokkosSparse::CrsMatrix; + using b_matrix_type = typename KokkosSparse::Experimental::BsrMatrix; + using graph_type = typename matrix_type::staticcrsgraph_type; + using row_map_type = typename graph_type::row_map_type; + using entries_type = typename graph_type::entries_type; + using values_type = typename matrix_type::values_type; const Scalar SC_ONE = Kokkos::ArithTraits::one(); @@ -70,8 +65,7 @@ int main() { { // Build the row pointers and store numNNZ - typename row_map_type::HostMirror row_map_h = - Kokkos::create_mirror_view(row_map); + typename row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(row_map); for (Ordinal rowIdx = 1; rowIdx < numRows + 1; ++rowIdx) { if ((rowIdx == 1) || (rowIdx == numRows)) { row_map_h(rowIdx) = row_map_h(rowIdx - 1) + 2; @@ -82,15 +76,13 @@ int main() { Kokkos::deep_copy(row_map, row_map_h); if (row_map_h(numRows) != numNNZ) { std::ostringstream error_msg; - error_msg << "error: row_map(numRows) != numNNZ, row_map_h(numRows)=" - << row_map_h(numRows) << ", numNNZ=" << numNNZ; + error_msg << "error: row_map(numRows) != numNNZ, row_map_h(numRows)=" << row_map_h(numRows) + << ", numNNZ=" << numNNZ; throw std::runtime_error(error_msg.str()); } - typename entries_type::HostMirror entries_h = - Kokkos::create_mirror_view(entries); - typename values_type::HostMirror values_h = - Kokkos::create_mirror_view(values); + typename entries_type::HostMirror entries_h = Kokkos::create_mirror_view(entries); + typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values); for (Ordinal rowIdx = 0; rowIdx < numRows; ++rowIdx) { if (rowIdx == 0) { entries_h(row_map_h(rowIdx)) = rowIdx; diff --git a/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_bsrmatrix_2.cpp b/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_bsrmatrix_2.cpp new file mode 100644 index 000000000000..527b0d56c4e7 --- /dev/null +++ b/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_bsrmatrix_2.cpp @@ -0,0 +1,228 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include + +#include "Kokkos_Core.hpp" + +#include "KokkosKernels_default_types.hpp" +#include "KokkosSparse_BsrMatrix.hpp" + +using Scalar = default_scalar; +using Ordinal = default_lno_t; +using Offset = default_size_type; +using Layout = default_layout; + +template +struct bsr_fill { + bsrmatrix_type bsr_mat; + + bsr_fill(bsrmatrix_type bsr_mat_) : bsr_mat(bsr_mat_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int& rowIdx) const { + if (rowIdx == 0) { // Left boundary condition + auto block_tmp = bsr_mat.unmanaged_block(0); + block_tmp(0, 0) = 1.0; + block_tmp(0, 1) = 0.0; + block_tmp(1, 0) = 0.0; + block_tmp(1, 1) = 1.0; + } else if (rowIdx == bsr_mat.numRows() - 1) { // Right boundary condition + auto block_tmp = bsr_mat.unmanaged_block(bsr_mat.graph.row_map(rowIdx) + 1); + block_tmp(0, 0) = 1.0; + block_tmp(1, 1) = 1.0; + } else { + auto block_tmp = bsr_mat.unmanaged_block(bsr_mat.graph.row_map(rowIdx)); + block_tmp(0, 0) = -1.0; + block_tmp(0, 1) = -1.0 / 2.0; + block_tmp(1, 0) = 0.0; + block_tmp(1, 1) = -1.0; + + block_tmp = bsr_mat.unmanaged_block(bsr_mat.graph.row_map(rowIdx) + 1); + block_tmp(0, 0) = 2.0; + block_tmp(0, 1) = 0.0; + block_tmp(1, 0) = 0.0; + block_tmp(1, 1) = 2.0; + + block_tmp = bsr_mat.unmanaged_block(bsr_mat.graph.row_map(rowIdx) + 2); + block_tmp(0, 0) = -1.0; + block_tmp(0, 1) = 1.0 / 2.0; + block_tmp(1, 0) = 0.0; + block_tmp(1, 1) = -1.0; + } + } +}; + +template +struct diagonal_extractor { + using graph_type = typename bsrmatrix_type::staticcrsgraph_type; + using row_map_type = typename graph_type::row_map_type; + using entries_type = typename graph_type::entries_type; + using bsr_block_type = typename bsrmatrix_type::block_type; + + bsrmatrix_type bsr_mat; + row_map_type row_map; + entries_type entries; + diag_blocks_type diag_blocks; + + diagonal_extractor(bsrmatrix_type bsr_mat_, diag_blocks_type diag_blocks_) + : bsr_mat(bsr_mat_), + row_map(bsr_mat_.graph.row_map), + entries(bsr_mat_.graph.entries), + diag_blocks(diag_blocks_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int& rowIdx) const { + for (Offset entryIdx = row_map(rowIdx); entryIdx < row_map(rowIdx + 1); ++entryIdx) { + if (entries(entryIdx) == rowIdx) { + bsr_block_type bsr_diag_block = bsr_mat.unmanaged_block(entryIdx); + for (int i = 0; i < bsr_mat.blockDim(); ++i) { + for (int j = 0; j < bsr_mat.blockDim(); ++j) { + diag_blocks(rowIdx, i, j) = bsr_diag_block(i, j); + } + } + } + } + } +}; + +int main(int argc, char* argv[]) { + using device_type = + typename Kokkos::Device; + using bsrmatrix_type = typename KokkosSparse::Experimental::BsrMatrix; + using graph_type = typename bsrmatrix_type::staticcrsgraph_type; + using row_map_type = typename graph_type::row_map_type; + using entries_type = typename graph_type::entries_type; + + Kokkos::initialize(argc, argv); + { + // + // We will create a 1D discretization for the coupled thermo-elastic + // diffusion + // + // -\div(EA \grad_s(u) - \alpha(T-T0)I) = f_u + // -\kappa\Delta(T) = f_T + // + // The problem is discretized using finite differences as follows: + // \frac{d^2 u}{dx^2}\approx \frac{u_{i+1}-2u_i+u_{i-1}}{h_x^2} + // \frac{dT}{dx}\approx\frac{T_{i+1}-T_{i-1}}{2h_x} + // \frac{d^2T}{dx^2}\approx\frac{T_{i+1}-2T_i+T_{i-1}}{h_x^2} + // + // This leads to the combined stencil (assuming all unit coefficients): + // + // [-1 1/2] [2 0] [-1 -1/2] + // [ 0 -1] [0 2] [ 0 -1] + // + // First the graph for the mesh will be constructed. + // Second a BsrMatrix will be constructed from the graph + // Third the values of the BsrMatrix will be filled. + + constexpr Ordinal blockSize = 2; + constexpr Ordinal numRows = 10; + constexpr Offset numNNZ = 3 * numRows - 2; + bsrmatrix_type bsr_mat; + + { + typename row_map_type::non_const_type row_map(Kokkos::view_alloc(Kokkos::WithoutInitializing, "row pointers"), + numRows + 1); + typename entries_type::non_const_type entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "column indices"), + numNNZ); + typename row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(row_map); + typename entries_type::HostMirror entries_h = Kokkos::create_mirror_view(entries); + + // First Step: build the CrsGraph + { + // Build the row pointers and store numNNZ + + row_map_h(0) = 0; + for (Ordinal rowIdx = 0; rowIdx < numRows; ++rowIdx) { + if (rowIdx == 0) { + row_map_h(rowIdx + 1) = row_map_h(rowIdx) + 2; + + entries_h(row_map_h(rowIdx)) = rowIdx; + entries_h(row_map_h(rowIdx) + 1) = rowIdx + 1; + } else if (rowIdx == numRows - 1) { + row_map_h(rowIdx + 1) = row_map_h(rowIdx) + 2; + + entries_h(row_map_h(rowIdx)) = rowIdx - 1; + entries_h(row_map_h(rowIdx) + 1) = rowIdx; + } else { + row_map_h(rowIdx + 1) = row_map_h(rowIdx) + 3; + + entries_h(row_map_h(rowIdx)) = rowIdx - 1; + entries_h(row_map_h(rowIdx) + 1) = rowIdx; + entries_h(row_map_h(rowIdx) + 2) = rowIdx + 1; + } + } + + if (row_map_h(numRows) != numNNZ) { + std::ostringstream error_msg; + error_msg << "error: row_map(numRows) != numNNZ, row_map_h(numRows)=" << row_map_h(numRows) + << ", numNNZ=" << numNNZ; + throw std::runtime_error(error_msg.str()); + } + Kokkos::deep_copy(row_map, row_map_h); + Kokkos::deep_copy(entries, entries_h); + } + + graph_type myGraph(entries, row_map); + + // Second Step: build the BsrMatrix from graph and block size + bsr_mat = bsrmatrix_type("block matrix", myGraph, blockSize); + + bsr_fill fillFunctor(bsr_mat); + Kokkos::parallel_for(Kokkos::RangePolicy(0, numRows), fillFunctor); + + std::cout << "BsrMatrix graph: " << std::endl; + for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) { + std::cout << " ["; + for (int colIdx = 0; colIdx < entries_h(row_map_h(rowIdx)); ++colIdx) { + std::cout << " "; + } + std::cout << "*"; + for (Offset entryIdx = row_map_h(rowIdx); entryIdx < row_map_h(rowIdx + 1) - 1; ++entryIdx) { + for (int colIdx = entries_h(entryIdx) + 1; colIdx < entries_h(entryIdx + 1); ++colIdx) { + std::cout << " "; + } + std::cout << "*"; + } + for (int colIdx = entries_h(row_map_h(rowIdx + 1) - 1) + 1; colIdx < numRows; ++colIdx) { + std::cout << " "; + } + std::cout << "]" << std::endl; + } + } + + // Extract diagonal block and store them in a rank-3 view + using diag_blocks_type = Kokkos::View; + diag_blocks_type diag_blocks("diagonal blocks", numRows, blockSize, blockSize); + diagonal_extractor myFunc(bsr_mat, diag_blocks); + Kokkos::parallel_for(Kokkos::RangePolicy(0, numRows), myFunc); + + auto diag_blocks_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, diag_blocks); + + std::cout << "\nBsrMatrix diagonal blocks: " << std::endl; + for (int blockId = 0; blockId < diag_blocks_h.extent_int(0); ++blockId) { + std::cout << " [" << diag_blocks_h(blockId, 0, 0) << ", " << diag_blocks_h(blockId, 0, 1) << "]" << std::endl; + std::cout << " [" << diag_blocks_h(blockId, 1, 0) << ", " << diag_blocks_h(blockId, 1, 1) << "]\n" << std::endl; + } + } + Kokkos::finalize(); + + return 0; +} diff --git a/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_crsmatrix.cpp b/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_crsmatrix.cpp index c8d6c805c1d3..21257d803481 100644 --- a/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_crsmatrix.cpp +++ b/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_crsmatrix.cpp @@ -29,12 +29,9 @@ using Layout = default_layout; int main() { Kokkos::initialize(); - using device_type = typename Kokkos::Device< - Kokkos::DefaultExecutionSpace, - typename Kokkos::DefaultExecutionSpace::memory_space>; - using matrix_type = - typename KokkosSparse::CrsMatrix; + using device_type = + typename Kokkos::Device; + using matrix_type = typename KokkosSparse::CrsMatrix; using graph_type = typename matrix_type::staticcrsgraph_type; using row_map_type = typename graph_type::row_map_type; using entries_type = typename graph_type::entries_type; @@ -52,8 +49,7 @@ int main() { { // Build the row pointers and store numNNZ - typename row_map_type::HostMirror row_map_h = - Kokkos::create_mirror_view(row_map); + typename row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(row_map); for (Ordinal rowIdx = 1; rowIdx < numRows + 1; ++rowIdx) { if ((rowIdx == 1) || (rowIdx == numRows)) { row_map_h(rowIdx) = row_map_h(rowIdx - 1) + 2; @@ -64,15 +60,13 @@ int main() { Kokkos::deep_copy(row_map, row_map_h); if (row_map_h(numRows) != numNNZ) { std::ostringstream error_msg; - error_msg << "error: row_map(numRows) != numNNZ, row_map_h(numRows)=" - << row_map_h(numRows) << ", numNNZ=" << numNNZ; + error_msg << "error: row_map(numRows) != numNNZ, row_map_h(numRows)=" << row_map_h(numRows) + << ", numNNZ=" << numNNZ; throw std::runtime_error(error_msg.str()); } - typename entries_type::HostMirror entries_h = - Kokkos::create_mirror_view(entries); - typename values_type::HostMirror values_h = - Kokkos::create_mirror_view(values); + typename entries_type::HostMirror entries_h = Kokkos::create_mirror_view(entries); + typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values); for (Ordinal rowIdx = 0; rowIdx < numRows; ++rowIdx) { if (rowIdx == 0) { entries_h(row_map_h(rowIdx)) = rowIdx; diff --git a/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp b/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp index 3dd8bfd5e5de..31ccea3b0a0e 100644 --- a/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp +++ b/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp @@ -37,10 +37,10 @@ int main() { using ExecSpace = Kokkos::DefaultExecutionSpace; using MemSpace = typename ExecSpace::memory_space; using Device = Kokkos::Device; - using Handle = KokkosKernels::Experimental::KokkosKernelsHandle< - Offset, Ordinal, default_scalar, ExecSpace, MemSpace, MemSpace>; - using Matrix = KokkosSparse::CrsMatrix; - using Vector = typename Matrix::values_type; + using Handle = + KokkosKernels::Experimental::KokkosKernelsHandle; + using Matrix = KokkosSparse::CrsMatrix; + using Vector = typename Matrix::values_type; constexpr Ordinal numRows = 10000; const Scalar one = Kokkos::ArithTraits::one(); const Mag magOne = Kokkos::ArithTraits::one(); @@ -52,32 +52,28 @@ int main() { // on which Gauss-Seidel should converge. Get approx. 20 entries per row // Diagonals are 2x the absolute sum of all other entries. Offset nnz = numRows * 20; - Matrix A = - KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< - Matrix>(numRows, numRows, nnz, 2, 100, 1.05 * one); - std::cout << "Generated a matrix with " << numRows << " rows/cols, and " - << nnz << " entries.\n"; + Matrix A = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix(numRows, numRows, nnz, 2, 100, + 1.05 * one); + std::cout << "Generated a matrix with " << numRows << " rows/cols, and " << nnz << " entries.\n"; // Create a kernel handle, then a Gauss-Seidel handle with the default // algorithm Handle handle; handle.create_gs_handle(KokkosSparse::GS_DEFAULT); // Set up Gauss-Seidel for the graph (matrix sparsity pattern) - KokkosSparse::Experimental::gauss_seidel_symbolic( - &handle, numRows, numRows, A.graph.row_map, A.graph.entries, false); + KokkosSparse::Experimental::gauss_seidel_symbolic(&handle, numRows, numRows, A.graph.row_map, A.graph.entries, + false); // Set up Gauss-Seidel for the matrix values (numeric) // Another matrix with the same sparsity pattern could re-use the handle and // symbolic phase, and only call numeric. - KokkosSparse::Experimental::gauss_seidel_numeric( - &handle, numRows, numRows, A.graph.row_map, A.graph.entries, A.values, - false); + KokkosSparse::Experimental::gauss_seidel_numeric(&handle, numRows, numRows, A.graph.row_map, A.graph.entries, + A.values, false); // Now, preconditioner is ready to use. Set up an unknown vector // (uninitialized) and randomized right-hand-side vector. Vector x(Kokkos::view_alloc(Kokkos::WithoutInitializing, "x"), numRows); Vector b(Kokkos::view_alloc(Kokkos::WithoutInitializing, "b"), numRows); Vector res(Kokkos::view_alloc(Kokkos::WithoutInitializing, "res"), numRows); auto bHost = Kokkos::create_mirror_view(b); - for (Ordinal i = 0; i < numRows; i++) - bHost(i) = 3 * ((one * rand()) / RAND_MAX); + for (Ordinal i = 0; i < numRows; i++) bHost(i) = 3 * ((one * rand()) / RAND_MAX); Kokkos::deep_copy(b, bHost); // Measure initial residual norm ||Ax - b||, where x is 0 Mag initialRes = KokkosBlas::nrm2(b); @@ -92,8 +88,7 @@ int main() { // * that b has changed since the previous apply (since there was no // previous apply) KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply( - &handle, numRows, numRows, A.graph.row_map, A.graph.entries, A.values, - x, b, firstIter, firstIter, one, 1); + &handle, numRows, numRows, A.graph.row_map, A.graph.entries, A.values, x, b, firstIter, firstIter, one, 1); firstIter = false; // Now, compute the new residual norm using SPMV Kokkos::deep_copy(res, b); @@ -102,8 +97,7 @@ int main() { // Recompute the scaled norm scaledResNorm = KokkosBlas::nrm2(res) / initialRes; numIters++; - std::cout << "Iteration " << numIters - << " scaled residual norm: " << scaledResNorm << '\n'; + std::cout << "Iteration " << numIters << " scaled residual norm: " << scaledResNorm << '\n'; } std::cout << "SUCCESS: converged in " << numIters << " iterations.\n"; } diff --git a/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_spadd.cpp b/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_spadd.cpp index 841e3b9eb371..c9edd7bc0c99 100644 --- a/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_spadd.cpp +++ b/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_spadd.cpp @@ -28,14 +28,11 @@ using Layout = default_layout; int main() { Kokkos::initialize(); - using device_type = typename Kokkos::Device< - Kokkos::DefaultExecutionSpace, - typename Kokkos::DefaultExecutionSpace::memory_space>; + using device_type = + typename Kokkos::Device; using execution_space = typename device_type::execution_space; using memory_space = typename device_type::memory_space; - using matrix_type = - typename KokkosSparse::CrsMatrix; + using matrix_type = typename KokkosSparse::CrsMatrix; int return_value = 0; @@ -47,8 +44,7 @@ int main() { // In each row the first entry is the number of grid point in // that direction, the second and third entries are used to apply // BCs in that direction. - Kokkos::View mat_structure( - "Matrix Structure", 2); + Kokkos::View mat_structure("Matrix Structure", 2); mat_structure(0, 0) = 10; // Request 10 grid point in 'x' direction mat_structure(0, 1) = 1; // Add BC to the left mat_structure(0, 2) = 1; // Add BC to the right @@ -56,15 +52,13 @@ int main() { mat_structure(1, 1) = 1; // Add BC to the bottom mat_structure(1, 2) = 1; // Add BC to the top - matrix_type A = - Test::generate_structured_matrix2D("FD", mat_structure); - matrix_type B = - Test::generate_structured_matrix2D("FE", mat_structure); + matrix_type A = Test::generate_structured_matrix2D("FD", mat_structure); + matrix_type B = Test::generate_structured_matrix2D("FE", mat_structure); matrix_type C; // Create KokkosKernelHandle - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< - Offset, Ordinal, Scalar, execution_space, memory_space, memory_space>; + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle; KernelHandle kh; kh.create_spadd_handle(false); diff --git a/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_spgemm.cpp b/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_spgemm.cpp index 56a628ffd587..2b3ccd13d2b2 100644 --- a/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_spgemm.cpp +++ b/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_spgemm.cpp @@ -28,12 +28,9 @@ using Layout = default_layout; int main() { Kokkos::initialize(); - using device_type = typename Kokkos::Device< - Kokkos::DefaultExecutionSpace, - typename Kokkos::DefaultExecutionSpace::memory_space>; - using matrix_type = - typename KokkosSparse::CrsMatrix; + using device_type = + typename Kokkos::Device; + using matrix_type = typename KokkosSparse::CrsMatrix; int return_value = 0; @@ -45,8 +42,7 @@ int main() { // In each row the first entry is the number of grid point in // that direction, the second and third entries are used to apply // BCs in that direction. - Kokkos::View mat_structure( - "Matrix Structure", 2); + Kokkos::View mat_structure("Matrix Structure", 2); mat_structure(0, 0) = 10; // Request 10 grid point in 'x' direction mat_structure(0, 1) = 1; // Add BC to the left mat_structure(0, 2) = 1; // Add BC to the right @@ -54,15 +50,13 @@ int main() { mat_structure(1, 1) = 1; // Add BC to the bottom mat_structure(1, 2) = 1; // Add BC to the top - matrix_type A = - Test::generate_structured_matrix2D("FD", mat_structure); - matrix_type B = - Test::generate_structured_matrix2D("FE", mat_structure); + matrix_type A = Test::generate_structured_matrix2D("FD", mat_structure); + matrix_type B = Test::generate_structured_matrix2D("FE", mat_structure); matrix_type C = KokkosSparse::spgemm(A, false, B, false); - std::cout << "Ran spgemm: product C is " << C.numRows() << 'x' - << C.numCols() << " and has " << C.nnz() << " nonzeros.\n"; + std::cout << "Ran spgemm: product C is " << C.numRows() << 'x' << C.numCols() << " and has " << C.nnz() + << " nonzeros.\n"; } Kokkos::finalize(); diff --git a/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_spmv.cpp b/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_spmv.cpp index 8b876e5bfc03..5778684a8a16 100644 --- a/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_spmv.cpp +++ b/packages/kokkos-kernels/example/wiki/sparse/KokkosSparse_wiki_spmv.cpp @@ -44,12 +44,9 @@ struct check_spmv_functor { int main() { Kokkos::initialize(); - using device_type = typename Kokkos::Device< - Kokkos::DefaultExecutionSpace, - typename Kokkos::DefaultExecutionSpace::memory_space>; - using matrix_type = - typename KokkosSparse::CrsMatrix; + using device_type = + typename Kokkos::Device; + using matrix_type = typename KokkosSparse::CrsMatrix; using values_type = typename matrix_type::values_type; int return_value = 0; @@ -66,8 +63,7 @@ int main() { // BCs in that direction, BC=0 means Neumann BC is applied, // BC=1 means Dirichlet BC is applied by zeroing out the row and putting // one on the diagonal. - Kokkos::View mat_structure( - "Matrix Structure", 2); + Kokkos::View mat_structure("Matrix Structure", 2); mat_structure(0, 0) = 10; // Request 10 grid point in 'x' direction mat_structure(0, 1) = 0; // Add BC to the left mat_structure(0, 2) = 0; // Add BC to the right @@ -75,8 +71,7 @@ int main() { mat_structure(1, 1) = 0; // Add BC to the bottom mat_structure(1, 2) = 0; // Add BC to the top - matrix_type myMatrix = - Test::generate_structured_matrix2D("FD", mat_structure); + matrix_type myMatrix = Test::generate_structured_matrix2D("FD", mat_structure); const Ordinal numRows = myMatrix.numRows(); @@ -92,15 +87,12 @@ int main() { Ordinal count_errors = 0; check_spmv_functor check_spmv(y); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0, numRows), - check_spmv, count_errors); + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, numRows), check_spmv, count_errors); if (count_errors > 0) { return_value = 1; - std::cout << "Found " << count_errors << " errors in y vector!" - << std::endl; + std::cout << "Found " << count_errors << " errors in y vector!" << std::endl; } else { - std::cout << "spmv was performed correctly: y = beta*y + alpha*A*x" - << std::endl; + std::cout << "spmv was performed correctly: y = beta*y + alpha*A*x" << std::endl; } } diff --git a/packages/kokkos-kernels/graph/impl/KokkosGraph_BFS_impl.hpp b/packages/kokkos-kernels/graph/impl/KokkosGraph_BFS_impl.hpp index e73c1cb48968..34cb3c917968 100644 --- a/packages/kokkos-kernels/graph/impl/KokkosGraph_BFS_impl.hpp +++ b/packages/kokkos-kernels/graph/impl/KokkosGraph_BFS_impl.hpp @@ -38,44 +38,39 @@ struct SerialRCM { host_lno_view_t entries; SerialRCM(const rowmap_t& rowmap_, const entries_t& entries_) - : numVerts(rowmap_.extent(0) - 1), - rowmap(Kokkos::view_alloc(Kokkos::WithoutInitializing, "HostRowmap"), - rowmap_.extent(0)), - entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "HostEntries"), - entries_.extent(0)) { + : numVerts(std::max(rowmap_.extent_int(0), 1) - 1), + rowmap(Kokkos::view_alloc(Kokkos::WithoutInitializing, "HostRowmap"), rowmap_.extent(0)), + entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "HostEntries"), entries_.extent(0)) { Kokkos::deep_copy(rowmap, rowmap_); Kokkos::deep_copy(entries, entries_); } - lno_t findPseudoPeripheral() { - // Choose vertex with smallest degree - lno_t periph = -1; - lno_t periphDeg = numVerts; - for (lno_t i = 0; i < numVerts; i++) { - lno_t deg = rowmap(i + 1) - rowmap(i); - if (deg < periphDeg) { - periph = i; - periphDeg = deg; - if (deg == 0) break; - } - } - return periph; - } - lno_view_t rcm() { - lno_t start = findPseudoPeripheral(); - host_lno_view_t q(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Queue"), - numVerts); - host_lno_view_t label( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Permutation"), - numVerts); + // Given a label L, labelReverse - L gives the reversed label (as in reverse + // Cuthill McKee) + lno_t labelReverse = numVerts - 1; + host_lno_view_t q(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Queue"), numVerts); + host_lno_view_t label(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Permutation"), numVerts); for (lno_t i = 0; i < numVerts; i++) label(i) = -1; - lno_t qhead = 0; - lno_t qtail = 0; - label(start) = qtail; + lno_t qhead = 0; + lno_t qtail = 0; + // List of all vertices, in order from lowest to highest degree + // (heuristic for best to worst starting vertex for RCM). + // If the graph has multiple connected components, restart at the first + // unlabeled vertex in this list. + host_lno_view_t allVertices(Kokkos::view_alloc(Kokkos::WithoutInitializing, "allVertices"), numVerts); + for (lno_t i = 0; i < numVerts; i++) allVertices(i) = i; + std::sort(allVertices.data(), allVertices.data() + numVerts, [&](lno_t n1, lno_t n2) -> bool { + // return true if n1 has a lower degree than n2 + return (rowmap(n1 + 1) - rowmap(n1)) < (rowmap(n2 + 1) - rowmap(n2)); + }); + lno_t allVerticesIter = 0; + // Start RCM with the first vertex in allVertices + lno_t start = allVertices(allVerticesIter++); + label(start) = labelReverse - qtail; q(qtail++) = start; + // Reuse this neighbor list for all levels without deallocating std::vector neighbors; - lno_t outerQueue = 0; while (true) { lno_t v = q(qhead++); neighbors.clear(); @@ -86,15 +81,13 @@ struct SerialRCM { neighbors.push_back(nei); } } - std::sort(neighbors.begin(), neighbors.end(), - [&](lno_t n1, lno_t n2) -> bool { - // return true if n1 has a lower degree than n2 - return (rowmap(n1 + 1) - rowmap(n1)) < - (rowmap(n2 + 1) - rowmap(n2)); - }); + std::sort(neighbors.begin(), neighbors.end(), [&](lno_t n1, lno_t n2) -> bool { + // return true if n1 has a lower degree than n2 + return (rowmap(n1 + 1) - rowmap(n1)) < (rowmap(n2 + 1) - rowmap(n2)); + }); // label and enqueue all unlabeled neighbors for (lno_t nei : neighbors) { - label(nei) = qtail; + label(nei) = labelReverse - qtail; q(qtail++) = nei; } if (qtail == numVerts) { @@ -102,16 +95,13 @@ struct SerialRCM { break; } else if (qhead == qtail) { // have exhausted this connected component, but others remain unlabeled - while (label(outerQueue) != -1) outerQueue++; - label(outerQueue) = qtail; - q(qtail++) = outerQueue; + while (label(allVertices(allVerticesIter)) != -1) allVerticesIter++; + lno_t restart = allVertices(allVerticesIter); + label(restart) = labelReverse - qtail; + q(qtail++) = restart; } } - lno_view_t labelOut( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "RCM Permutation"), - numVerts); - // reverse the labels - for (lno_t i = 0; i < numVerts; i++) label(i) = numVerts - label(i) - 1; + lno_view_t labelOut(Kokkos::view_alloc(Kokkos::WithoutInitializing, "RCM Permutation"), numVerts); Kokkos::deep_copy(labelOut, label); return labelOut; } diff --git a/packages/kokkos-kernels/graph/impl/KokkosGraph_Distance1Color_impl.hpp b/packages/kokkos-kernels/graph/impl/KokkosGraph_Distance1Color_impl.hpp index 6bd1c022ae9b..2abc5c76e4c1 100644 --- a/packages/kokkos-kernels/graph/impl/KokkosGraph_Distance1Color_impl.hpp +++ b/packages/kokkos-kernels/graph/impl/KokkosGraph_Distance1Color_impl.hpp @@ -36,8 +36,7 @@ namespace Impl { * General aim is to find the minimum number of colors, minimum number of * independent sets. */ -template +template class GraphColor { public: typedef lno_row_view_t_ in_lno_row_view_t; @@ -49,19 +48,15 @@ class GraphColor { typedef typename HandleType::size_type size_type; typedef typename HandleType::nnz_lno_t nnz_lno_t; - typedef typename in_lno_row_view_t::HostMirror - row_lno_host_view_t; // Host view type + typedef typename in_lno_row_view_t::HostMirror row_lno_host_view_t; // Host view type - typedef typename in_lno_nnz_view_t::HostMirror - nnz_lno_host_view_t; // Host view type + typedef typename in_lno_nnz_view_t::HostMirror nnz_lno_host_view_t; // Host view type - typedef typename HandleType::color_host_view_t - color_host_view_t; // Host view type + typedef typename HandleType::color_host_view_t color_host_view_t; // Host view type typedef typename HandleType::HandleExecSpace MyExecSpace; typedef typename HandleType::HandleTempMemorySpace MyTempMemorySpace; - typedef - typename HandleType::HandlePersistentMemorySpace MyPersistentMemorySpace; + typedef typename HandleType::HandlePersistentMemorySpace MyPersistentMemorySpace; typedef typename HandleType::const_size_type const_size_type; typedef typename lno_row_view_t_::const_type const_lno_row_view_t; @@ -70,8 +65,8 @@ class GraphColor { typedef typename lno_nnz_view_t_::non_const_type non_const_lno_nnz_view_t; protected: - nnz_lno_t nv; //# vertices - size_type ne; //# edges + nnz_lno_t nv; // # vertices + size_type ne; // # edges const_lno_row_view_t xadj; // rowmap const_lno_nnz_view_t adj; // entries const_lno_nnz_view_t kok_src, kok_dst; // Edge list storage of the graph @@ -87,25 +82,13 @@ class GraphColor { * \param coloring_handle: GraphColoringHandle object that holds the * specification about the graph coloring, including parameters. */ - GraphColor(nnz_lno_t nv_, size_type ne_, const_lno_row_view_t row_map, - const_lno_nnz_view_t entries, HandleType *coloring_handle) - : nv(nv_), - ne(ne_), - xadj(row_map), - adj(entries), - kok_src(), - kok_dst(), - cp(coloring_handle) { - static_assert( - std::is_same< - size_type, - typename const_lno_row_view_t::non_const_value_type>::value, - "Row map element type does not match handle's size_type."); - static_assert( - std::is_same< - nnz_lno_t, - typename const_lno_nnz_view_t::non_const_value_type>::value, - "Entries element type does not match handle's nnz_lno_t."); + GraphColor(nnz_lno_t nv_, size_type ne_, const_lno_row_view_t row_map, const_lno_nnz_view_t entries, + HandleType *coloring_handle) + : nv(nv_), ne(ne_), xadj(row_map), adj(entries), kok_src(), kok_dst(), cp(coloring_handle) { + static_assert(std::is_same::value, + "Row map element type does not match handle's size_type."); + static_assert(std::is_same::value, + "Entries element type does not match handle's nnz_lno_t."); } /** \brief GraphColor destructor. @@ -125,11 +108,9 @@ class GraphColor { virtual void color_graph(color_view_t d_colors, int &num_phases) { num_phases = 1; - color_host_view_t colors = Kokkos::create_mirror_view(d_colors); - typename const_lno_row_view_t::HostMirror h_xadj = - Kokkos::create_mirror_view(this->xadj); - typename const_lno_nnz_view_t::HostMirror h_adj = - Kokkos::create_mirror_view(this->adj); + color_host_view_t colors = Kokkos::create_mirror_view(d_colors); + typename const_lno_row_view_t::HostMirror h_xadj = Kokkos::create_mirror_view(this->xadj); + typename const_lno_nnz_view_t::HostMirror h_adj = Kokkos::create_mirror_view(this->adj); // typename nnz_lno_host_view_t::HostMirror::HostMirror::HostMirror h_adj = // tmp; @@ -185,10 +166,8 @@ class GraphColor { * based algorithms. VBCS: Speculative parallel vertex based using color set * implementation. */ -template -class GraphColor_VB - : public GraphColor { +template +class GraphColor_VB : public GraphColor { public: typedef long long int ban_type; @@ -202,32 +181,24 @@ class GraphColor_VB typedef typename HandleType::nnz_lno_t nnz_lno_t; typedef typename HandleType::color_t color_t; - typedef typename HandleType::color_host_view_t - color_host_view_t; // Host view type + typedef typename HandleType::color_host_view_t color_host_view_t; // Host view type typedef typename HandleType::HandleExecSpace MyExecSpace; typedef typename HandleType::HandleTempMemorySpace MyTempMemorySpace; - typedef - typename HandleType::HandlePersistentMemorySpace MyPersistentMemorySpace; + typedef typename HandleType::HandlePersistentMemorySpace MyPersistentMemorySpace; - typedef typename Kokkos::View - single_dim_index_view_type; + typedef typename Kokkos::View single_dim_index_view_type; // typedef typename Kokkos::View // um_array_type; - typedef typename single_dim_index_view_type::HostMirror - single_dim_index_host_view_type; // Host view type + typedef typename single_dim_index_view_type::HostMirror single_dim_index_host_view_type; // Host view type typedef Kokkos::RangePolicy my_exec_space; - typedef typename HandleType::size_type_temp_work_view_t - size_type_temp_work_view_t; - typedef typename HandleType::size_type_persistent_work_view_t - size_type_persistent_work_view_t; + typedef typename HandleType::size_type_temp_work_view_t size_type_temp_work_view_t; + typedef typename HandleType::size_type_persistent_work_view_t size_type_persistent_work_view_t; - typedef - typename HandleType::nnz_lno_temp_work_view_t nnz_lno_temp_work_view_t; - typedef typename HandleType::nnz_lno_persistent_work_view_t - nnz_lno_persistent_work_view_t; + typedef typename HandleType::nnz_lno_temp_work_view_t nnz_lno_temp_work_view_t; + typedef typename HandleType::nnz_lno_persistent_work_view_t nnz_lno_persistent_work_view_t; typedef typename in_lno_row_view_t::const_type const_lno_row_view_t; @@ -240,21 +211,21 @@ class GraphColor_VB bool _serialConflictResolution; // if true use serial conflict resolution bool _ticToc; // if true print info in each step - ConflictList _conflict_scheme; // Enum: COLORING_NOCONFLICT, COLORING_ATOMIC, - // COLORING_PPS + ConflictList _conflict_scheme; // Enum: COLORING_NOCONFLICT, COLORING_ATOMIC, + // COLORING_PPS - double _pps_ratio; // the minimum number of reduction on the size of the - // conflictlist to create a new conflictlist + double _pps_ratio; // the minimum number of reduction on the size of the + // conflictlist to create a new conflictlist nnz_lno_t _min_vertex_cut_off; // minimum number of vertices to reduce the // conflictlist further. - bool _edge_filtering; // if true, edge-filtering is applied by swaps on - // adjacency array. - int _chunkSize; // the size of the minimum work unit assigned to threads. - // Changes the convergence on GPUs - char _use_color_set; // the VB algorithm type. - // 0 for VB: - // 1: for VBCS - // 2: for VBBIT + bool _edge_filtering; // if true, edge-filtering is applied by swaps on + // adjacency array. + int _chunkSize; // the size of the minimum work unit assigned to threads. + // Changes the convergence on GPUs + char _use_color_set; // the VB algorithm type. + // 0 for VB: + // 1: for VBCS + // 2: for VBBIT int _max_num_iterations; @@ -268,17 +239,14 @@ class GraphColor_VB * \param coloring_handle: GraphColoringHandle object that holds the * specification about the graph coloring, including parameters. */ - GraphColor_VB(nnz_lno_t nv_, size_type ne_, const_lno_row_view_t row_map, - const_lno_nnz_view_t entries, HandleType *coloring_handle) - : GraphColor( - nv_, ne_, row_map, entries, coloring_handle), - _serialConflictResolution( - coloring_handle->get_serial_conflict_resolution()), + GraphColor_VB(nnz_lno_t nv_, size_type ne_, const_lno_row_view_t row_map, const_lno_nnz_view_t entries, + HandleType *coloring_handle) + : GraphColor(nv_, ne_, row_map, entries, coloring_handle), + _serialConflictResolution(coloring_handle->get_serial_conflict_resolution()), _ticToc(coloring_handle->get_tictoc()), _conflict_scheme(coloring_handle->get_conflict_list_type()), _pps_ratio(coloring_handle->get_min_reduction_for_conflictlist()), - _min_vertex_cut_off( - coloring_handle->get_min_elements_for_conflictlist()), + _min_vertex_cut_off(coloring_handle->get_min_elements_for_conflictlist()), _edge_filtering(coloring_handle->get_vb_edge_filtering()), _chunkSize(coloring_handle->get_vb_chunk_size()), _use_color_set(), @@ -309,20 +277,15 @@ class GraphColor_VB virtual void color_graph(color_view_type colors, int &num_loops) { if (this->_ticToc) { std::cout << "\tVB params:" << std::endl - << "\tuseConflictList:" << int(this->_conflict_scheme) - << std::endl + << "\tuseConflictList:" << int(this->_conflict_scheme) << std::endl << "\talgorithm:" << (int)this->_use_color_set << std::endl - << "\tserialConflictResolution:" - << (int)this->_serialConflictResolution << std::endl + << "\tserialConflictResolution:" << (int)this->_serialConflictResolution << std::endl << "\tticToc:" << (int)this->_ticToc << std::endl << "\tuse_color_set:" << (int)this->_use_color_set << std::endl << "\tpps_ratio:" << this->_pps_ratio << std::endl - << "\tmin_vertex_cut_off:" << this->_min_vertex_cut_off - << std::endl - << "\tedge_filtering:" << (int)this->_edge_filtering - << std::endl - << "\tmax_num_iterations:" << this->_max_num_iterations - << std::endl + << "\tmin_vertex_cut_off:" << this->_min_vertex_cut_off << std::endl + << "\tedge_filtering:" << (int)this->_edge_filtering << std::endl + << "\tmax_num_iterations:" << this->_max_num_iterations << std::endl << "\tchunkSize:" << this->_chunkSize << std::endl; } @@ -334,9 +297,7 @@ class GraphColor_VB // We need to copy the adjacency array so that we dont harm the original // one. if (this->_edge_filtering) { - adj_copy = nnz_lno_temp_work_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "adj copy"), - this->ne); + adj_copy = nnz_lno_temp_work_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "adj copy"), this->ne); Kokkos::deep_copy(adj_copy, this->adj); } @@ -348,9 +309,8 @@ class GraphColor_VB } // the conflictlist - nnz_lno_temp_work_view_t current_vertexList = nnz_lno_temp_work_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "vertexList"), - this->nv); + nnz_lno_temp_work_view_t current_vertexList = + nnz_lno_temp_work_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "vertexList"), this->nv); nnz_lno_t current_vertexListLength = this->nv; if (this->cp->get_use_vtx_list()) { @@ -359,9 +319,8 @@ class GraphColor_VB current_vertexListLength = this->cp->get_vertex_list_size(); } else { // init vertexList sequentially. - Kokkos::parallel_for( - "KokkosGraph::GraphColoring::InitList", my_exec_space(0, this->nv), - functorInitList(current_vertexList)); + Kokkos::parallel_for("KokkosGraph::GraphColoring::InitList", my_exec_space(0, this->nv), + functorInitList(current_vertexList)); } // the next iteration's conflict list @@ -374,11 +333,9 @@ class GraphColor_VB // if a conflictlist is used if (this->_conflict_scheme != COLORING_NOCONFLICT) { // Vertices to recolor. Will swap with vertexList. - next_iteration_recolorList = nnz_lno_temp_work_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "recolorList"), - this->nv); - next_iteration_recolorListLength = - single_dim_index_view_type("recolorListLength"); + next_iteration_recolorList = + nnz_lno_temp_work_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "recolorList"), this->nv); + next_iteration_recolorListLength = single_dim_index_view_type("recolorListLength"); } nnz_lno_t numUncolored = this->nv; @@ -394,13 +351,13 @@ class GraphColor_VB if (this->_edge_filtering) { // First color greedy speculatively, // some conflicts expected - this->colorGreedyEF(this->xadj, adj_copy, colors, vertex_color_set, - current_vertexList, current_vertexListLength); + this->colorGreedyEF(this->xadj, adj_copy, colors, vertex_color_set, current_vertexList, + current_vertexListLength); } else { // First color greedy speculatively, // some conflicts expected - this->colorGreedy(this->xadj, this->adj, colors, vertex_color_set, - current_vertexList, current_vertexListLength); + this->colorGreedy(this->xadj, this->adj, colors, vertex_color_set, current_vertexList, + current_vertexListLength); } MyExecSpace().fence(); @@ -408,22 +365,19 @@ class GraphColor_VB if (this->_ticToc) { double t = timer.seconds(); total_time_greedy_phase += t; - std::cout << "\tTime speculative greedy phase " << iter << " : " << t - << std::endl; + std::cout << "\tTime speculative greedy phase " << iter << " : " << t << std::endl; timer.reset(); } bool swap_work_arrays = true; if (this->_edge_filtering) { - numUncolored = this->findConflicts( - swap_work_arrays, this->xadj, adj_copy, colors, vertex_color_set, - current_vertexList, current_vertexListLength, - next_iteration_recolorList, next_iteration_recolorListLength); + numUncolored = + this->findConflicts(swap_work_arrays, this->xadj, adj_copy, colors, vertex_color_set, current_vertexList, + current_vertexListLength, next_iteration_recolorList, next_iteration_recolorListLength); } else { - numUncolored = this->findConflicts( - swap_work_arrays, this->xadj, this->adj, colors, vertex_color_set, - current_vertexList, current_vertexListLength, - next_iteration_recolorList, next_iteration_recolorListLength); + numUncolored = + this->findConflicts(swap_work_arrays, this->xadj, this->adj, colors, vertex_color_set, current_vertexList, + current_vertexListLength, next_iteration_recolorList, next_iteration_recolorListLength); } MyExecSpace().fence(); @@ -431,41 +385,34 @@ class GraphColor_VB if (_ticToc) { double t = timer.seconds(); total_time_find_conflicts += t; - std::cout << "\tTime conflict detection " << iter << " : " << t - << std::endl; + std::cout << "\tTime conflict detection " << iter << " : " << t << std::endl; timer.reset(); } - if (this->_serialConflictResolution) - break; // Break after first iteration. - if (this->_conflict_scheme != COLORING_NOCONFLICT && swap_work_arrays && - (iter + 1 < this->_max_num_iterations)) { + if (this->_serialConflictResolution) break; // Break after first iteration. + if (this->_conflict_scheme != COLORING_NOCONFLICT && swap_work_arrays && (iter + 1 < this->_max_num_iterations)) { // Swap recolorList and vertexList - nnz_lno_temp_work_view_t temp = current_vertexList; - current_vertexList = next_iteration_recolorList; - next_iteration_recolorList = temp; - current_vertexListLength = numUncolored; - next_iteration_recolorListLength = - single_dim_index_view_type("recolorListLength"); + nnz_lno_temp_work_view_t temp = current_vertexList; + current_vertexList = next_iteration_recolorList; + next_iteration_recolorList = temp; + current_vertexListLength = numUncolored; + next_iteration_recolorListLength = single_dim_index_view_type("recolorListLength"); } } // if VBCS algorithm is used, the colors are converted back to original // form. if (this->_use_color_set == 1) { - Kokkos::parallel_for("KokkosGraph::GraphColoring::SetFinalColors", - my_exec_space(0, this->nv), + Kokkos::parallel_for("KokkosGraph::GraphColoring::SetFinalColors", my_exec_space(0, this->nv), set_final_colors(colors, vertex_color_set)); } if (numUncolored > 0) { if (this->_edge_filtering) { // Resolve conflicts by recoloring in serial - this->resolveConflicts(this->nv, this->xadj, adj_copy, colors, - current_vertexList, current_vertexListLength); + this->resolveConflicts(this->nv, this->xadj, adj_copy, colors, current_vertexList, current_vertexListLength); } else { // Resolve conflicts by recoloring in serial - this->resolveConflicts(this->nv, this->xadj, this->adj, colors, - current_vertexList, current_vertexListLength); + this->resolveConflicts(this->nv, this->xadj, this->adj, colors, current_vertexList, current_vertexListLength); } MyExecSpace().fence(); if (_ticToc) { @@ -478,8 +425,7 @@ class GraphColor_VB this->cp->add_to_overall_coloring_time_phase1(total_time_greedy_phase); this->cp->add_to_overall_coloring_time_phase2(total_time_find_conflicts); - this->cp->add_to_overall_coloring_time_phase3( - total_time_serial_conflict_resolution); + this->cp->add_to_overall_coloring_time_phase3(total_time_serial_conflict_resolution); } // color_graph (end) private: @@ -491,13 +437,10 @@ class GraphColor_VB * \param current_vertexList_: current conflictlist * \param current_vertexListLength_: size of current conflictlist */ - void colorGreedy(const_lno_row_view_t xadj_, const_lno_nnz_view_t adj_, - color_view_type vertex_colors_, - nnz_lno_temp_work_view_t vertex_color_set, - nnz_lno_temp_work_view_t current_vertexList_, + void colorGreedy(const_lno_row_view_t xadj_, const_lno_nnz_view_t adj_, color_view_type vertex_colors_, + nnz_lno_temp_work_view_t vertex_color_set, nnz_lno_temp_work_view_t current_vertexList_, nnz_lno_t current_vertexListLength_) { - nnz_lno_t chunkSize_ = - this->_chunkSize; // Process chunkSize vertices in one chunk + nnz_lno_t chunkSize_ = this->_chunkSize; // Process chunkSize vertices in one chunk if (current_vertexListLength_ < 100 * chunkSize_) chunkSize_ = 1; @@ -505,34 +448,28 @@ class GraphColor_VB if (this->_use_color_set == 2) { // std::cout << ">>> functorGreedyColor_IMPLOG" << std::endl; // // WCMCLEN - functorGreedyColor_IMPLOG gc(this->nv, xadj_, adj_, vertex_colors_, - current_vertexList_, + functorGreedyColor_IMPLOG gc(this->nv, xadj_, adj_, vertex_colors_, current_vertexList_, current_vertexListLength_, chunkSize_); - Kokkos::parallel_for( - "KokkosGraph::GraphColoring::GreedyColor_IMPLOG", - my_exec_space(0, current_vertexListLength_ / chunkSize_ + 1), gc); + Kokkos::parallel_for("KokkosGraph::GraphColoring::GreedyColor_IMPLOG", + my_exec_space(0, current_vertexListLength_ / chunkSize_ + 1), gc); } // VBCS algorithm else if (this->_use_color_set == 1) { // std::cout << ">>> functorGreedyColor_IMP" << std::endl; // WCMCLEN - functorGreedyColor_IMP gc(this->nv, xadj_, adj_, vertex_colors_, - vertex_color_set, current_vertexList_, + functorGreedyColor_IMP gc(this->nv, xadj_, adj_, vertex_colors_, vertex_color_set, current_vertexList_, current_vertexListLength_, chunkSize_); - Kokkos::parallel_for( - "KokkosGraph::GraphColoring::GreedyColor_IMP", - my_exec_space(0, current_vertexListLength_ / chunkSize_ + 1), gc); + Kokkos::parallel_for("KokkosGraph::GraphColoring::GreedyColor_IMP", + my_exec_space(0, current_vertexListLength_ / chunkSize_ + 1), gc); } // VB algorithm else if (this->_use_color_set == 0) { // std::cout << ">>> functorGreedyColor" << std::endl; // WCMCLEN - functorGreedyColor gc(this->nv, xadj_, adj_, vertex_colors_, - current_vertexList_, current_vertexListLength_, + functorGreedyColor gc(this->nv, xadj_, adj_, vertex_colors_, current_vertexList_, current_vertexListLength_, chunkSize_); - Kokkos::parallel_for( - "KokkosGraph::GraphColoring::GreedyColor", - my_exec_space(0, current_vertexListLength_ / chunkSize_ + 1), gc); + Kokkos::parallel_for("KokkosGraph::GraphColoring::GreedyColor", + my_exec_space(0, current_vertexListLength_ / chunkSize_ + 1), gc); } } // colorGreedy (end) @@ -544,13 +481,10 @@ class GraphColor_VB * \param current_vertexList_: current conflictlist * \param current_vertexListLength_: size of current conflictlist */ - void colorGreedyEF(const_lno_row_view_t xadj_, nnz_lno_temp_work_view_t adj_, - color_view_type vertex_colors_, - nnz_lno_temp_work_view_t vertex_color_set, - nnz_lno_temp_work_view_t current_vertexList_, + void colorGreedyEF(const_lno_row_view_t xadj_, nnz_lno_temp_work_view_t adj_, color_view_type vertex_colors_, + nnz_lno_temp_work_view_t vertex_color_set, nnz_lno_temp_work_view_t current_vertexList_, nnz_lno_t current_vertexListLength_) { - nnz_lno_t chunkSize_ = - this->_chunkSize; // Process chunkSize vertices in one chunk + nnz_lno_t chunkSize_ = this->_chunkSize; // Process chunkSize vertices in one chunk if (current_vertexListLength_ < 100 * chunkSize_) chunkSize_ = 1; @@ -559,34 +493,28 @@ class GraphColor_VB // If edge filtering is applied // std::cout << ">>> functorGreedyColor_IMPLOG_EF" << std::endl; // // WCMCLEN - functorGreedyColor_IMPLOG_EF gc(this->nv, xadj_, adj_, vertex_colors_, - current_vertexList_, + functorGreedyColor_IMPLOG_EF gc(this->nv, xadj_, adj_, vertex_colors_, current_vertexList_, current_vertexListLength_, chunkSize_); - Kokkos::parallel_for( - "KokkosGraph::GraphColoring::GreedyColor_IMPLOG_EF", - my_exec_space(0, current_vertexListLength_ / chunkSize_ + 1), gc); + Kokkos::parallel_for("KokkosGraph::GraphColoring::GreedyColor_IMPLOG_EF", + my_exec_space(0, current_vertexListLength_ / chunkSize_ + 1), gc); } // VBCS algorithm else if (this->_use_color_set == 1) { // std::cout << ">>> functorGreedyColor_IMP_EF" << std::endl; // // WCMCLEN - functorGreedyColor_IMP_EF gc(this->nv, xadj_, adj_, vertex_colors_, - vertex_color_set, current_vertexList_, + functorGreedyColor_IMP_EF gc(this->nv, xadj_, adj_, vertex_colors_, vertex_color_set, current_vertexList_, current_vertexListLength_, chunkSize_); - Kokkos::parallel_for( - "KokkosGraph::GraphColoring::GreedyColor_IMP_EF", - my_exec_space(0, current_vertexListLength_ / chunkSize_ + 1), gc); + Kokkos::parallel_for("KokkosGraph::GraphColoring::GreedyColor_IMP_EF", + my_exec_space(0, current_vertexListLength_ / chunkSize_ + 1), gc); } // VB algorithm else if (this->_use_color_set == 0) { // std::cout << ">>> functorGreedyColor_EF" << std::endl; // WCMCLEN - functorGreedyColor_EF gc(this->nv, xadj_, adj_, vertex_colors_, - current_vertexList_, current_vertexListLength_, + functorGreedyColor_EF gc(this->nv, xadj_, adj_, vertex_colors_, current_vertexList_, current_vertexListLength_, chunkSize_); - Kokkos::parallel_for( - "KokkosGraph::GraphColoring::GreedyColor_EF", - my_exec_space(0, current_vertexListLength_ / chunkSize_ + 1), gc); + Kokkos::parallel_for("KokkosGraph::GraphColoring::GreedyColor_EF", + my_exec_space(0, current_vertexListLength_ / chunkSize_ + 1), gc); } } @@ -601,85 +529,63 @@ class GraphColor_VB * \param next_iteration_recolorListLength_: size of next conflictlist */ template - nnz_lno_t findConflicts( - bool &swap_work_arrays, const_lno_row_view_t xadj_, adj_view_t adj_, - color_view_type vertex_colors_, - nnz_lno_temp_work_view_t vertex_color_set_, - nnz_lno_temp_work_view_t current_vertexList_, - nnz_lno_t current_vertexListLength_, - nnz_lno_temp_work_view_t next_iteration_recolorList_, - single_dim_index_view_type next_iteration_recolorListLength_) { + nnz_lno_t findConflicts(bool &swap_work_arrays, const_lno_row_view_t xadj_, adj_view_t adj_, + color_view_type vertex_colors_, nnz_lno_temp_work_view_t vertex_color_set_, + nnz_lno_temp_work_view_t current_vertexList_, nnz_lno_t current_vertexListLength_, + nnz_lno_temp_work_view_t next_iteration_recolorList_, + single_dim_index_view_type next_iteration_recolorListLength_) { swap_work_arrays = true; nnz_lno_t numUncolored = 0; if (this->_conflict_scheme == COLORING_NOCONFLICT) { if (this->_use_color_set == 0 || this->_use_color_set == 2) { - functorFindConflicts_No_Conflist conf(this->nv, xadj_, adj_, - vertex_colors_); - Kokkos::parallel_reduce( - "KokkosGraph::GraphColoring::FindConflicts::CaseA", - my_exec_space(0, current_vertexListLength_), conf, numUncolored); + functorFindConflicts_No_Conflist conf(this->nv, xadj_, adj_, vertex_colors_); + Kokkos::parallel_reduce("KokkosGraph::GraphColoring::FindConflicts::CaseA", + my_exec_space(0, current_vertexListLength_), conf, numUncolored); } else { - functorFindConflicts_No_Conflist_IMP conf( - this->nv, xadj_, adj_, vertex_colors_, vertex_color_set_); - Kokkos::parallel_reduce( - "KokkosGraph::GraphColoring::FindConflicts::CaseB", - my_exec_space(0, current_vertexListLength_), conf, numUncolored); + functorFindConflicts_No_Conflist_IMP conf(this->nv, xadj_, adj_, vertex_colors_, vertex_color_set_); + Kokkos::parallel_reduce("KokkosGraph::GraphColoring::FindConflicts::CaseB", + my_exec_space(0, current_vertexListLength_), conf, numUncolored); } } else if (this->_conflict_scheme == COLORING_PPS) { if (this->_use_color_set == 0 || this->_use_color_set == 2) { // Check for conflicts. Compute numUncolored == numConflicts. - functorFindConflicts_PPS conf( - this->nv, xadj_, adj_, vertex_colors_, current_vertexList_); - Kokkos::parallel_reduce( - "KokkosGraph::GraphColoring::FindConflicts::CaseC", - my_exec_space(0, current_vertexListLength_), conf, numUncolored); + functorFindConflicts_PPS conf(this->nv, xadj_, adj_, vertex_colors_, current_vertexList_); + Kokkos::parallel_reduce("KokkosGraph::GraphColoring::FindConflicts::CaseC", + my_exec_space(0, current_vertexListLength_), conf, numUncolored); } else { - functorFindConflicts_PPS_IMP conf( - this->nv, xadj_, adj_, vertex_colors_, vertex_color_set_, - current_vertexList_); - Kokkos::parallel_reduce( - "KokkosGraph::GraphColoring::FindConflicts::CaseD", - my_exec_space(0, current_vertexListLength_), conf, numUncolored); + functorFindConflicts_PPS_IMP conf(this->nv, xadj_, adj_, vertex_colors_, vertex_color_set_, + current_vertexList_); + Kokkos::parallel_reduce("KokkosGraph::GraphColoring::FindConflicts::CaseD", + my_exec_space(0, current_vertexListLength_), conf, numUncolored); } - if (numUncolored && - (current_vertexListLength_ >= this->_min_vertex_cut_off) && - (double(numUncolored) / current_vertexListLength_ < - (1.0 - this->_pps_ratio))) { + if (numUncolored && (current_vertexListLength_ >= this->_min_vertex_cut_off) && + (double(numUncolored) / current_vertexListLength_ < (1.0 - this->_pps_ratio))) { if (this->_ticToc) { - std::cout - << "\tcreating work array with pps current_vertexListLength_:" - << current_vertexListLength_ - << " params->min_vertex_cut_off:" << this->_min_vertex_cut_off - << std::endl; + std::cout << "\tcreating work array with pps current_vertexListLength_:" << current_vertexListLength_ + << " params->min_vertex_cut_off:" << this->_min_vertex_cut_off << std::endl; } single_dim_index_host_view_type h_numUncolored(&numUncolored); Kokkos::deep_copy(next_iteration_recolorListLength_, h_numUncolored); Kokkos::parallel_scan( - "KokkosGraph::GraphColoring::PrefixSum", - my_exec_space(0, current_vertexListLength_), - ppsWorklistFunctorVB( - this->nv, current_vertexList_, next_iteration_recolorList_)); + "KokkosGraph::GraphColoring::PrefixSum", my_exec_space(0, current_vertexListLength_), + ppsWorklistFunctorVB(this->nv, current_vertexList_, next_iteration_recolorList_)); } else { swap_work_arrays = false; } } else { // worklist scheme COLORING_ATOMIC if (this->_use_color_set == 0 || this->_use_color_set == 2) { // Check for conflicts. Compute numUncolored == numConflicts. - functorFindConflicts_Atomic conf( - this->nv, xadj_, adj_, vertex_colors_, current_vertexList_, - next_iteration_recolorList_, next_iteration_recolorListLength_); - Kokkos::parallel_reduce( - "KokkosGraph::GraphColoring::FindConflictsAtomic", - my_exec_space(0, current_vertexListLength_), conf, numUncolored); + functorFindConflicts_Atomic conf(this->nv, xadj_, adj_, vertex_colors_, current_vertexList_, + next_iteration_recolorList_, next_iteration_recolorListLength_); + Kokkos::parallel_reduce("KokkosGraph::GraphColoring::FindConflictsAtomic", + my_exec_space(0, current_vertexListLength_), conf, numUncolored); } else { - functorFindConflicts_Atomic_IMP conf( - this->nv, xadj_, adj_, vertex_colors_, vertex_color_set_, - current_vertexList_, next_iteration_recolorList_, - next_iteration_recolorListLength_); - Kokkos::parallel_reduce( - "KokkosGraph::GraphColoring::FindConflictsAtomic_IMP", - my_exec_space(0, current_vertexListLength_), conf, numUncolored); + functorFindConflicts_Atomic_IMP conf(this->nv, xadj_, adj_, vertex_colors_, vertex_color_set_, + current_vertexList_, next_iteration_recolorList_, + next_iteration_recolorListLength_); + Kokkos::parallel_reduce("KokkosGraph::GraphColoring::FindConflictsAtomic_IMP", + my_exec_space(0, current_vertexListLength_), conf, numUncolored); } } if (this->_ticToc) { @@ -697,10 +603,8 @@ class GraphColor_VB * \param current_vertexListLength_: size of current conflictlist */ template - void resolveConflicts(nnz_lno_t _nv, const_lno_row_view_t xadj_, - adj_view_t adj_, color_view_type vertex_colors_, - nnz_lno_temp_work_view_t current_vertexList_, - size_type current_vertexListLength_) { + void resolveConflicts(nnz_lno_t _nv, const_lno_row_view_t xadj_, adj_view_t adj_, color_view_type vertex_colors_, + nnz_lno_temp_work_view_t current_vertexList_, size_type current_vertexListLength_) { color_t *forbidden = new color_t[_nv]; nnz_lno_t i = 0; nnz_lno_t end = _nv; @@ -711,10 +615,9 @@ class GraphColor_VB h_recolor_list = Kokkos::create_mirror_view(current_vertexList_); Kokkos::deep_copy(h_recolor_list, current_vertexList_); } - color_host_view_t h_colors = Kokkos::create_mirror_view(vertex_colors_); - typename const_lno_row_view_t::HostMirror h_idx = - Kokkos::create_mirror_view(xadj_); - typename adj_view_t::HostMirror h_adj = Kokkos::create_mirror_view(adj_); + color_host_view_t h_colors = Kokkos::create_mirror_view(vertex_colors_); + typename const_lno_row_view_t::HostMirror h_idx = Kokkos::create_mirror_view(xadj_); + typename adj_view_t::HostMirror h_adj = Kokkos::create_mirror_view(adj_); Kokkos::deep_copy(h_colors, vertex_colors_); Kokkos::deep_copy(h_idx, xadj_); @@ -756,12 +659,9 @@ class GraphColor_VB nnz_lno_t _vertexListLength; nnz_lno_t _chunkSize; - functorGreedyColor_IMPLOG_EF(nnz_lno_t nv_, const_lno_row_view_t xadj_, - nnz_lno_temp_work_view_t adj_, - color_view_type colors, - nnz_lno_temp_work_view_t vertexList, - nnz_lno_t vertexListLength, - nnz_lno_t chunkSize) + functorGreedyColor_IMPLOG_EF(nnz_lno_t nv_, const_lno_row_view_t xadj_, nnz_lno_temp_work_view_t adj_, + color_view_type colors, nnz_lno_temp_work_view_t vertexList, + nnz_lno_t vertexListLength, nnz_lno_t chunkSize) : nv(nv_), _idx(xadj_), _adj(adj_), @@ -794,8 +694,7 @@ class GraphColor_VB // we parse the neigborlist multiple times, // each time we look for a certain range of colors. - for (; (offset <= degree + VBBIT_COLORING_FORBIDDEN_SIZE); - offset += VBBIT_COLORING_FORBIDDEN_SIZE) { + for (; (offset <= degree + VBBIT_COLORING_FORBIDDEN_SIZE); offset += VBBIT_COLORING_FORBIDDEN_SIZE) { // Forbidden colors // we use a single (long) int for forbidden colors ban_type forbidden = 0; @@ -867,10 +766,9 @@ class GraphColor_VB nnz_lno_t _vertexListLength; nnz_lno_t _chunkSize; - functorGreedyColor_IMPLOG(nnz_lno_t nv_, const_lno_row_view_t xadj_, - const_lno_nnz_view_t adj_, color_view_type colors, - nnz_lno_temp_work_view_t vertexList, - nnz_lno_t vertexListLength, nnz_lno_t chunkSize) + functorGreedyColor_IMPLOG(nnz_lno_t nv_, const_lno_row_view_t xadj_, const_lno_nnz_view_t adj_, + color_view_type colors, nnz_lno_temp_work_view_t vertexList, nnz_lno_t vertexListLength, + nnz_lno_t chunkSize) : nv(nv_), _idx(xadj_), _adj(adj_), @@ -896,8 +794,7 @@ class GraphColor_VB color_t degree = my_xadj_end - xadjbegin; // My degree color_t offset = 0; - for (; (offset <= degree + VBBIT_COLORING_FORBIDDEN_SIZE); - offset += VBBIT_COLORING_FORBIDDEN_SIZE) { + for (; (offset <= degree + VBBIT_COLORING_FORBIDDEN_SIZE); offset += VBBIT_COLORING_FORBIDDEN_SIZE) { ban_type forbidden = 0; // Forbidden colors // Check nbors, fill forbidden array. @@ -950,12 +847,9 @@ class GraphColor_VB nnz_lno_t _vertexListLength; nnz_lno_t _chunkSize; - functorGreedyColor_IMP_EF(nnz_lno_t nv_, const_lno_row_view_t xadj_, - nnz_lno_temp_work_view_t adj_, - color_view_type colors, - nnz_lno_temp_work_view_t color_set, - nnz_lno_temp_work_view_t vertexList, - nnz_lno_t vertexListLength, nnz_lno_t chunkSize) + functorGreedyColor_IMP_EF(nnz_lno_t nv_, const_lno_row_view_t xadj_, nnz_lno_temp_work_view_t adj_, + color_view_type colors, nnz_lno_temp_work_view_t color_set, + nnz_lno_temp_work_view_t vertexList, nnz_lno_t vertexListLength, nnz_lno_t chunkSize) : nv(nv_), _xadj(xadj_), _adj(adj_), @@ -1033,10 +927,8 @@ class GraphColor_VB nnz_lno_t _vertexListLength; nnz_lno_t _chunkSize; - functorGreedyColor_IMP(nnz_lno_t nv_, const_lno_row_view_t xadj_, - const_lno_nnz_view_t adj_, color_view_type colors, - nnz_lno_temp_work_view_t color_set, - nnz_lno_temp_work_view_t vertexList, + functorGreedyColor_IMP(nnz_lno_t nv_, const_lno_row_view_t xadj_, const_lno_nnz_view_t adj_, color_view_type colors, + nnz_lno_temp_work_view_t color_set, nnz_lno_temp_work_view_t vertexList, nnz_lno_t vertexListLength, nnz_lno_t chunkSize) : nv(nv_), _xadj(xadj_), @@ -1105,10 +997,9 @@ class GraphColor_VB nnz_lno_t _vertexListLength; nnz_lno_t _chunkSize; - functorGreedyColor_EF(nnz_lno_t nv_, const_lno_row_view_t xadj_, - nnz_lno_temp_work_view_t adj_, color_view_type colors, - nnz_lno_temp_work_view_t vertexList, - nnz_lno_t vertexListLength, nnz_lno_t chunkSize) + functorGreedyColor_EF(nnz_lno_t nv_, const_lno_row_view_t xadj_, nnz_lno_temp_work_view_t adj_, + color_view_type colors, nnz_lno_temp_work_view_t vertexList, nnz_lno_t vertexListLength, + nnz_lno_t chunkSize) : nv(nv_), _idx(xadj_), _adj(adj_), @@ -1150,8 +1041,7 @@ class GraphColor_VB color_t offset = 0; size_type xadjbegin = _idx(i); - for (; (offset <= degree + VB_COLORING_FORBIDDEN_SIZE) && (!foundColor); - offset += VB_COLORING_FORBIDDEN_SIZE) { + for (; (offset <= degree + VB_COLORING_FORBIDDEN_SIZE) && (!foundColor); offset += VB_COLORING_FORBIDDEN_SIZE) { // initialize for (int j = 0; j < VB_COLORING_FORBIDDEN_SIZE; j++) { forbidden[j] = false; @@ -1211,10 +1101,8 @@ class GraphColor_VB nnz_lno_t _vertexListLength; nnz_lno_t _chunkSize; - functorGreedyColor(nnz_lno_t nv_, const_lno_row_view_t xadj_, - const_lno_nnz_view_t adj_, color_view_type colors, - nnz_lno_temp_work_view_t vertexList, - nnz_lno_t vertexListLength, nnz_lno_t chunkSize) + functorGreedyColor(nnz_lno_t nv_, const_lno_row_view_t xadj_, const_lno_nnz_view_t adj_, color_view_type colors, + nnz_lno_temp_work_view_t vertexList, nnz_lno_t vertexListLength, nnz_lno_t chunkSize) : nv(nv_), _idx(xadj_), _adj(adj_), @@ -1253,8 +1141,7 @@ class GraphColor_VB // Do multiple passes if array is too small. color_t degree = _idx(i + 1) - _idx(i); // My degree color_t offset = 1; - for (; (offset <= degree + VB_COLORING_FORBIDDEN_SIZE) && (!foundColor); - offset += VB_COLORING_FORBIDDEN_SIZE) { + for (; (offset <= degree + VB_COLORING_FORBIDDEN_SIZE) && (!foundColor); offset += VB_COLORING_FORBIDDEN_SIZE) { // initialize for (int j = 0; j < VB_COLORING_FORBIDDEN_SIZE; j++) { forbidden[j] = false; @@ -1271,8 +1158,7 @@ class GraphColor_VB // foundColor = true; // return; //} - if ((c >= offset) && (c - offset < VB_COLORING_FORBIDDEN_SIZE)) - forbidden[c - offset] = true; + if ((c >= offset) && (c - offset < VB_COLORING_FORBIDDEN_SIZE)) forbidden[c - offset] = true; } // color vertex i with smallest available color (FirstFit) @@ -1302,8 +1188,7 @@ class GraphColor_VB adj_view_t _adj; color_view_type _colors; - functorFindConflicts_No_Conflist(nnz_lno_t nv_, const_lno_row_view_t xadj_, - adj_view_t adj_, color_view_type colors) + functorFindConflicts_No_Conflist(nnz_lno_t nv_, const_lno_row_view_t xadj_, adj_view_t adj_, color_view_type colors) : nv(nv_), _idx(xadj_), _adj(adj_), _colors(colors) {} KOKKOS_INLINE_FUNCTION @@ -1323,9 +1208,8 @@ class GraphColor_VB #endif _colors(neighbor) == my_color #ifdef DEGREECOMP - && - (myDegree < _idx(neighbor + 1) - _idx(neighbor) || - (myDegree == _idx(neighbor + 1) - _idx(neighbor) && ii < neighbor)) + && (myDegree < _idx(neighbor + 1) - _idx(neighbor) || + (myDegree == _idx(neighbor + 1) - _idx(neighbor) && ii < neighbor)) #endif ) { // std::cout << "me:" << ii << " n:" << neighbor << " color:" << @@ -1350,14 +1234,9 @@ class GraphColor_VB color_view_type _colors; nnz_lno_temp_work_view_t _vertexList; - functorFindConflicts_PPS(nnz_lno_t nv_, const_lno_row_view_t xadj_, - adj_view_t adj_, color_view_type colors, + functorFindConflicts_PPS(nnz_lno_t nv_, const_lno_row_view_t xadj_, adj_view_t adj_, color_view_type colors, nnz_lno_temp_work_view_t vertexList) - : nv(nv_), - _idx(xadj_), - _adj(adj_), - _colors(colors), - _vertexList(vertexList) {} + : nv(nv_), _idx(xadj_), _adj(adj_), _colors(colors), _vertexList(vertexList) {} KOKKOS_INLINE_FUNCTION void operator()(const nnz_lno_t ii, nnz_lno_t &numConflicts) const { @@ -1378,9 +1257,8 @@ class GraphColor_VB #endif _colors(neighbor) == my_color #ifdef DEGREECOMP - && - (myDegree < _idx(neighbor + 1) - _idx(neighbor) || - (myDegree == _idx(neighbor + 1) - _idx(neighbor) && i < neighbor)) + && (myDegree < _idx(neighbor + 1) - _idx(neighbor) || + (myDegree == _idx(neighbor + 1) - _idx(neighbor) && i < neighbor)) #endif ) { _colors(i) = 0; // Uncolor vertex i @@ -1405,10 +1283,8 @@ class GraphColor_VB nnz_lno_temp_work_view_t _recolorList; single_dim_index_view_type _recolorListLength; - functorFindConflicts_Atomic(nnz_lno_t nv_, const_lno_row_view_t xadj_, - adj_view_t adj_, color_view_type colors, - nnz_lno_temp_work_view_t vertexList, - nnz_lno_temp_work_view_t recolorList, + functorFindConflicts_Atomic(nnz_lno_t nv_, const_lno_row_view_t xadj_, adj_view_t adj_, color_view_type colors, + nnz_lno_temp_work_view_t vertexList, nnz_lno_temp_work_view_t recolorList, single_dim_index_view_type recolorListLength) : nv(nv_), _idx(xadj_), @@ -1420,9 +1296,7 @@ class GraphColor_VB KOKKOS_INLINE_FUNCTION void operator()(const nnz_lno_t ii, nnz_lno_t &numConflicts) const { - typedef - typename std::remove_reference::type - atomic_incr_type; + typedef typename std::remove_reference::type atomic_incr_type; nnz_lno_t i = _vertexList(ii); color_t my_color = _colors(i); @@ -1441,15 +1315,13 @@ class GraphColor_VB #endif _colors(neighbor) == my_color #ifdef DEGREECOMP - && - (myDegree < _idx(neighbor + 1) - _idx(neighbor) || - (myDegree == _idx(neighbor + 1) - _idx(neighbor) && i < neighbor)) + && (myDegree < _idx(neighbor + 1) - _idx(neighbor) || + (myDegree == _idx(neighbor + 1) - _idx(neighbor) && i < neighbor)) #endif ) { _colors(i) = 0; // Uncolor vertex i // Atomically add vertex i to recolorList - const nnz_lno_t k = Kokkos::atomic_fetch_add(&_recolorListLength(), - atomic_incr_type(1)); + const nnz_lno_t k = Kokkos::atomic_fetch_add(&_recolorListLength(), atomic_incr_type(1)); _recolorList(k) = i; numConflicts += 1; break; // Once i is uncolored and marked conflict @@ -1470,16 +1342,9 @@ class GraphColor_VB color_view_type _colors; nnz_lno_temp_work_view_t _color_sets; - functorFindConflicts_No_Conflist_IMP(nnz_lno_t nv_, - const_lno_row_view_t xadj_, - adj_view_t adj_, - color_view_type colors, - nnz_lno_temp_work_view_t color_sets) - : nv(nv_), - _xadj(xadj_), - _adj(adj_), - _colors(colors), - _color_sets(color_sets) {} + functorFindConflicts_No_Conflist_IMP(nnz_lno_t nv_, const_lno_row_view_t xadj_, adj_view_t adj_, + color_view_type colors, nnz_lno_temp_work_view_t color_sets) + : nv(nv_), _xadj(xadj_), _adj(adj_), _colors(colors), _color_sets(color_sets) {} KOKKOS_INLINE_FUNCTION void operator()(const nnz_lno_t ii, nnz_lno_t &numConflicts) const { @@ -1504,12 +1369,10 @@ class GraphColor_VB #ifndef DEGREECOMP ii < neighbor && neighbor < nv && #endif - _colors(neighbor) == my_color && - my_color_set == _color_sets(neighbor) + _colors(neighbor) == my_color && my_color_set == _color_sets(neighbor) #ifdef DEGREECOMP && (myDegree < _xadj(neighbor + 1) - _xadj(neighbor) || - (myDegree == _xadj(neighbor + 1) - _xadj(neighbor) && - ii < neighbor)) + (myDegree == _xadj(neighbor + 1) - _xadj(neighbor) && ii < neighbor)) #endif ) { _colors(ii) = 0; // Uncolor vertex i @@ -1535,16 +1398,9 @@ class GraphColor_VB nnz_lno_temp_work_view_t _color_sets; nnz_lno_temp_work_view_t _vertexList; - functorFindConflicts_PPS_IMP(nnz_lno_t nv_, const_lno_row_view_t xadj_, - adj_view_t adj_, color_view_type colors, - nnz_lno_temp_work_view_t color_sets, - nnz_lno_temp_work_view_t vertexList) - : nv(nv_), - _xadj(xadj_), - _adj(adj_), - _colors(colors), - _color_sets(color_sets), - _vertexList(vertexList) {} + functorFindConflicts_PPS_IMP(nnz_lno_t nv_, const_lno_row_view_t xadj_, adj_view_t adj_, color_view_type colors, + nnz_lno_temp_work_view_t color_sets, nnz_lno_temp_work_view_t vertexList) + : nv(nv_), _xadj(xadj_), _adj(adj_), _colors(colors), _color_sets(color_sets), _vertexList(vertexList) {} KOKKOS_INLINE_FUNCTION void operator()(const nnz_lno_t ii, nnz_lno_t &numConflicts) const { @@ -1570,12 +1426,10 @@ class GraphColor_VB #ifndef DEGREECOMP i < neighbor && neighbor < nv && #endif - _colors(neighbor) == my_color && - my_color_set == _color_sets(neighbor) + _colors(neighbor) == my_color && my_color_set == _color_sets(neighbor) #ifdef DEGREECOMP && (myDegree < _xadj(neighbor + 1) - _xadj(neighbor) || - (myDegree == _xadj(neighbor + 1) - _xadj(neighbor) && - i < neighbor)) + (myDegree == _xadj(neighbor + 1) - _xadj(neighbor) && i < neighbor)) #endif ) { _colors(i) = 0; // Uncolor vertex i @@ -1603,12 +1457,9 @@ class GraphColor_VB nnz_lno_temp_work_view_t _recolorList; single_dim_index_view_type _recolorListLength; - functorFindConflicts_Atomic_IMP( - nnz_lno_t nv_, const_lno_row_view_t xadj_, adj_view_t adj_, - color_view_type colors, nnz_lno_temp_work_view_t color_sets, - nnz_lno_temp_work_view_t vertexList, - nnz_lno_temp_work_view_t recolorList, - single_dim_index_view_type recolorListLength) + functorFindConflicts_Atomic_IMP(nnz_lno_t nv_, const_lno_row_view_t xadj_, adj_view_t adj_, color_view_type colors, + nnz_lno_temp_work_view_t color_sets, nnz_lno_temp_work_view_t vertexList, + nnz_lno_temp_work_view_t recolorList, single_dim_index_view_type recolorListLength) : nv(nv_), _xadj(xadj_), _adj(adj_), @@ -1620,16 +1471,13 @@ class GraphColor_VB KOKKOS_INLINE_FUNCTION void operator()(const nnz_lno_t ii, nnz_lno_t &numConflicts) const { - typedef - typename std::remove_reference::type - atomic_incr_type; + typedef typename std::remove_reference::type atomic_incr_type; nnz_lno_t i = _vertexList(ii); color_t my_color = _colors(i); if (my_color == 0) { // this should only happen when one_color_set_per_iteration is set to // true. - const nnz_lno_t k = Kokkos::atomic_fetch_add(&_recolorListLength(), - atomic_incr_type(1)); + const nnz_lno_t k = Kokkos::atomic_fetch_add(&_recolorListLength(), atomic_incr_type(1)); _recolorList(k) = i; numConflicts++; } else { @@ -1647,19 +1495,16 @@ class GraphColor_VB #ifndef DEGREECOMP i < neighbor && neighbor < nv && #endif - _colors(neighbor) == my_color && - my_color_set == _color_sets(neighbor) + _colors(neighbor) == my_color && my_color_set == _color_sets(neighbor) #ifdef DEGREECOMP && (myDegree < _xadj(neighbor + 1) - _xadj(neighbor) || - (myDegree == _xadj(neighbor + 1) - _xadj(neighbor) && - i < neighbor)) + (myDegree == _xadj(neighbor + 1) - _xadj(neighbor) && i < neighbor)) #endif ) { _colors(i) = 0; // Uncolor vertex i _color_sets(i) = 0; // Atomically add vertex i to recolorList - const nnz_lno_t k = Kokkos::atomic_fetch_add(&_recolorListLength(), - atomic_incr_type(1)); + const nnz_lno_t k = Kokkos::atomic_fetch_add(&_recolorListLength(), atomic_incr_type(1)); _recolorList(k) = i; numConflicts++; break; // Once i is uncolored and marked conflict @@ -1690,8 +1535,7 @@ class GraphColor_VB view_type _vertexList; view_type _recolorList; - ppsWorklistFunctorVB(nnz_lno_t nv_, const view_type &vertexList, - const view_type &recolorList) + ppsWorklistFunctorVB(nnz_lno_t nv_, const view_type &vertexList, const view_type &recolorList) : _nv(nv_), _vertexList(vertexList), _recolorList(recolorList) {} KOKKOS_INLINE_FUNCTION @@ -1709,9 +1553,8 @@ class GraphColor_VB */ struct set_final_colors { color_view_type kokcol; - nnz_lno_temp_work_view_t - kokcolset; // the colors that are represented with bits, and the colors - // set that the color is in. + nnz_lno_temp_work_view_t kokcolset; // the colors that are represented with bits, and the colors + // set that the color is in. color_t color_size; /** \brief functor constructor. @@ -1720,11 +1563,8 @@ class GraphColor_VB * color_set_ together is used to represent the colors e.g. color_set_(v) * * (numbits_in_idx-1) + set_bit_position_in_kokcolors_(v) */ - set_final_colors(color_view_type kokcol_, - nnz_lno_temp_work_view_t kokcolset_) - : kokcol(kokcol_), - kokcolset(kokcolset_), - color_size(sizeof(color_t) * 8) {} + set_final_colors(color_view_type kokcol_, nnz_lno_temp_work_view_t kokcolset_) + : kokcol(kokcol_), kokcolset(kokcolset_), color_size(sizeof(color_t) * 8) {} KOKKOS_INLINE_FUNCTION void operator()(const nnz_lno_t &ii) const { @@ -1747,10 +1587,8 @@ class GraphColor_VB /*! \brief Class for the deterministic vertex based graph coloring algorithms. */ -template -class GraphColor_VBD - : public GraphColor { +template +class GraphColor_VBD : public GraphColor { public: typedef long long int ban_type; @@ -1764,30 +1602,22 @@ class GraphColor_VBD typedef typename HandleType::nnz_lno_t nnz_lno_t; typedef typename HandleType::color_t color_t; - typedef typename HandleType::color_host_view_t - color_host_view_t; // Host view type + typedef typename HandleType::color_host_view_t color_host_view_t; // Host view type typedef typename HandleType::HandleExecSpace MyExecSpace; typedef typename HandleType::HandleTempMemorySpace MyTempMemorySpace; - typedef - typename HandleType::HandlePersistentMemorySpace MyPersistentMemorySpace; + typedef typename HandleType::HandlePersistentMemorySpace MyPersistentMemorySpace; - typedef typename Kokkos::View - single_dim_index_view_type; - typedef typename single_dim_index_view_type::HostMirror - single_dim_index_host_view_type; // Host view type + typedef typename Kokkos::View single_dim_index_view_type; + typedef typename single_dim_index_view_type::HostMirror single_dim_index_host_view_type; // Host view type typedef Kokkos::RangePolicy my_exec_space; - typedef typename HandleType::size_type_temp_work_view_t - size_type_temp_work_view_t; - typedef typename HandleType::size_type_persistent_work_view_t - size_type_persistent_work_view_t; + typedef typename HandleType::size_type_temp_work_view_t size_type_temp_work_view_t; + typedef typename HandleType::size_type_persistent_work_view_t size_type_persistent_work_view_t; - typedef - typename HandleType::nnz_lno_temp_work_view_t nnz_lno_temp_work_view_t; - typedef typename HandleType::nnz_lno_persistent_work_view_t - nnz_lno_persistent_work_view_t; + typedef typename HandleType::nnz_lno_temp_work_view_t nnz_lno_temp_work_view_t; + typedef typename HandleType::nnz_lno_persistent_work_view_t nnz_lno_persistent_work_view_t; typedef typename in_lno_row_view_t::const_type const_lno_row_view_t; @@ -1795,9 +1625,9 @@ class GraphColor_VBD typedef typename lno_nnz_view_t_::non_const_type non_const_lno_nnz_view_t; protected: - bool _ticToc; // if true print info in each step - int _chunkSize; // the size of the minimum work unit assigned to threads. - // Changes the convergence on GPUs + bool _ticToc; // if true print info in each step + int _chunkSize; // the size of the minimum work unit assigned to threads. + // Changes the convergence on GPUs char _use_color_set; // the VBD algorithm type. // 0 for VBD: @@ -1811,10 +1641,9 @@ class GraphColor_VBD * \param coloring_handle: GraphColoringHandle object that holds the * specification about the graph coloring, including parameters. */ - GraphColor_VBD(nnz_lno_t nv_, size_type ne_, const_lno_row_view_t row_map, - const_lno_nnz_view_t entries, HandleType *coloring_handle) - : GraphColor( - nv_, ne_, row_map, entries, coloring_handle), + GraphColor_VBD(nnz_lno_t nv_, size_type ne_, const_lno_row_view_t row_map, const_lno_nnz_view_t entries, + HandleType *coloring_handle) + : GraphColor(nv_, ne_, row_map, entries, coloring_handle), _ticToc(coloring_handle->get_tictoc()), _chunkSize(coloring_handle->get_vb_chunk_size()), _use_color_set() { @@ -1850,15 +1679,13 @@ class GraphColor_VBD nnz_lno_t numVertices = this->nv; - size_type maxColors = 0; - nnz_lno_persistent_work_view_t score = nnz_lno_persistent_work_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "score"), this->nv); - functorScoreCalculation scoreCalculation( - score, this->xadj); + size_type maxColors = 0; + nnz_lno_persistent_work_view_t score = + nnz_lno_persistent_work_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "score"), this->nv); + functorScoreCalculation scoreCalculation(score, this->xadj); - Kokkos::parallel_reduce("Deterministic Coloring: compute initial scores", - my_exec_space(0, this->nv), scoreCalculation, - Kokkos::Max(maxColors)); + Kokkos::parallel_reduce("Deterministic Coloring: compute initial scores", my_exec_space(0, this->nv), + scoreCalculation, Kokkos::Max(maxColors)); if (this->_ticToc) { std::cout << "maxColors: " << maxColors << std::endl; @@ -1867,18 +1694,17 @@ class GraphColor_VBD // Create the dependency list of the graph nnz_lno_persistent_work_view_t dependency("dependency", numVertices); Kokkos::View frontierSize("frontierSize"); - typename Kokkos::View::HostMirror - host_frontierSize = Kokkos::create_mirror_view(frontierSize); - Kokkos::View newFrontierSize( - "newFrontierSize"); - typename Kokkos::View::HostMirror - host_newFrontierSize = Kokkos::create_mirror_view(newFrontierSize); + typename Kokkos::View::HostMirror host_frontierSize = + Kokkos::create_mirror_view(frontierSize); + Kokkos::View newFrontierSize("newFrontierSize"); + typename Kokkos::View::HostMirror host_newFrontierSize = + Kokkos::create_mirror_view(newFrontierSize); nnz_lno_temp_work_view_t frontier("frontier", numVertices); nnz_lno_temp_work_view_t newFrontier("newFrontier", numVertices); - functorInitialDependency myInitialDependency( - this->xadj, this->adj, score, dependency, newFrontier, newFrontierSize); - Kokkos::parallel_for("Deterministic Coloring: compute dependency list", - my_exec_space(0, numVertices), myInitialDependency); + functorInitialDependency myInitialDependency(this->xadj, this->adj, score, dependency, newFrontier, + newFrontierSize); + Kokkos::parallel_for("Deterministic Coloring: compute dependency list", my_exec_space(0, numVertices), + myInitialDependency); Kokkos::deep_copy(host_newFrontierSize, newFrontierSize); while (host_newFrontierSize() > 0) { @@ -1886,8 +1712,7 @@ class GraphColor_VBD // First swap fontier with newFrontier and fontierSize with // newFrontierSize reset newFrontierSize functorSwapOnDevice mySwapOnDevice(frontierSize, newFrontierSize); - Kokkos::parallel_for("Swap frontier sizes", my_exec_space(0, 1), - mySwapOnDevice); + Kokkos::parallel_for("Swap frontier sizes", my_exec_space(0, 1), mySwapOnDevice); Kokkos::deep_copy(host_frontierSize, frontierSize); { auto swap_tmp = frontier; @@ -1898,11 +1723,9 @@ class GraphColor_VBD // Loop over nodes in the frontier // First variant without bit array, easier to understand/program if (this->_use_color_set == 0) { - functorDeterministicColoring myDeterministicColoring( - this->xadj, this->adj, dependency, frontier, frontierSize, - newFrontier, newFrontierSize, maxColors, colors); - Kokkos::parallel_for("Deterministic Coloring: color nodes in frontier", - my_exec_space(0, host_frontierSize()), + functorDeterministicColoring myDeterministicColoring(this->xadj, this->adj, dependency, frontier, frontierSize, + newFrontier, newFrontierSize, maxColors, colors); + Kokkos::parallel_for("Deterministic Coloring: color nodes in frontier", my_exec_space(0, host_frontierSize()), myDeterministicColoring); } else if (this->_use_color_set == 1) { @@ -1911,12 +1734,9 @@ class GraphColor_VBD // we need to use successive color ranges of width 64 // to represent all the possible colors on the graph. functorDeterministicColoringBitArray myDeterministicColoringBitArray( - this->xadj, this->adj, dependency, frontier, frontierSize, - newFrontier, newFrontierSize, maxColors, colors); - Kokkos::parallel_for( - "Deterministic Coloring: color nodes in frontier", - my_exec_space(0, host_frontierSize()), - myDeterministicColoringBitArray); // Loop over current frontier + this->xadj, this->adj, dependency, frontier, frontierSize, newFrontier, newFrontierSize, maxColors, colors); + Kokkos::parallel_for("Deterministic Coloring: color nodes in frontier", my_exec_space(0, host_frontierSize()), + myDeterministicColoringBitArray); // Loop over current frontier } Kokkos::deep_copy(host_newFrontierSize, newFrontierSize); } // while newFrontierSize @@ -1928,14 +1748,13 @@ class GraphColor_VBD nnz_lno_persistent_work_view_t score_; const_lno_row_view_t numNeighbors_; - functorScoreCalculation(nnz_lno_persistent_work_view_t &score, - const_lno_row_view_t &numNeighbors) + functorScoreCalculation(nnz_lno_persistent_work_view_t &score, const_lno_row_view_t &numNeighbors) : score_(score), numNeighbors_(numNeighbors) {} KOKKOS_INLINE_FUNCTION void operator()(const int i, size_type &update) const { score_(i) = numNeighbors_(i + 1) - numNeighbors_(i); - update = ((size_type)score_(i) < update ? update : (size_type)score_(i)); + update = ((size_type)score_(i) < update ? update : (size_type)score_(i)); } }; // functorScoreCalculation() @@ -1943,9 +1762,8 @@ class GraphColor_VBD Kokkos::View frontierSize_; Kokkos::View newFrontierSize_; - functorSwapOnDevice( - Kokkos::View frontierSize, - Kokkos::View newFrontierSize) + functorSwapOnDevice(Kokkos::View frontierSize, + Kokkos::View newFrontierSize) : frontierSize_(frontierSize), newFrontierSize_(newFrontierSize) {} KOKKOS_INLINE_FUNCTION @@ -1964,12 +1782,10 @@ class GraphColor_VBD nnz_lno_temp_work_view_t newFrontier_; Kokkos::View newFrontierSize_; - functorInitialDependency( - const_lno_row_view_t rowPtr, const_lno_nnz_view_t colInd, - nnz_lno_persistent_work_view_t score, - nnz_lno_persistent_work_view_t dependency, - nnz_lno_temp_work_view_t newFrontier, - Kokkos::View newFrontierSize) + functorInitialDependency(const_lno_row_view_t rowPtr, const_lno_nnz_view_t colInd, + nnz_lno_persistent_work_view_t score, nnz_lno_persistent_work_view_t dependency, + nnz_lno_temp_work_view_t newFrontier, + Kokkos::View newFrontierSize) : xadj_(rowPtr), adj_(colInd), score_(score), @@ -1979,8 +1795,7 @@ class GraphColor_VBD KOKKOS_INLINE_FUNCTION void operator()(const int node) const { - typedef typename std::remove_reference::type - atomic_incr_type; + typedef typename std::remove_reference::type atomic_incr_type; int myScore = score_(node); int numNeighs = xadj_(node + 1) - xadj_(node); nnz_lno_t numVerts = xadj_.extent(0) - 1; @@ -1996,9 +1811,8 @@ class GraphColor_VBD } } if (dependency_(node) == 0) { - const size_type newFrontierIdx = - Kokkos::atomic_fetch_add(&newFrontierSize_(), atomic_incr_type(1)); - newFrontier_(newFrontierIdx) = node; + const size_type newFrontierIdx = Kokkos::atomic_fetch_add(&newFrontierSize_(), atomic_incr_type(1)); + newFrontier_(newFrontierIdx) = node; } } @@ -2016,14 +1830,12 @@ class GraphColor_VBD color_view_type colors_; Kokkos::View bannedColors_; - functorDeterministicColoring( - const_lno_row_view_t rowPtr, const_lno_nnz_view_t colInd, - nnz_lno_persistent_work_view_t dependency, - nnz_lno_temp_work_view_t frontier, - Kokkos::View frontierSize, - nnz_lno_temp_work_view_t newFrontier, - Kokkos::View newFrontierSize, - size_type maxColors, color_view_type colors) + functorDeterministicColoring(const_lno_row_view_t rowPtr, const_lno_nnz_view_t colInd, + nnz_lno_persistent_work_view_t dependency, nnz_lno_temp_work_view_t frontier, + Kokkos::View frontierSize, + nnz_lno_temp_work_view_t newFrontier, + Kokkos::View newFrontierSize, size_type maxColors, + color_view_type colors) : xadj_(rowPtr), adj_(colInd), dependency_(dependency), @@ -2033,14 +1845,12 @@ class GraphColor_VBD newFrontierSize_(newFrontierSize), maxColors_(maxColors), colors_(colors), - bannedColors_("KokkosKernels::bannedColors", frontier.size(), - maxColors_) {} + bannedColors_("KokkosKernels::bannedColors", frontier.size(), maxColors_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_type frontierIdx) const { nnz_lno_t numVerts = xadj_.extent(0) - 1; - typedef typename std::remove_reference::type - atomic_incr_type; + typedef typename std::remove_reference::type atomic_incr_type; size_type frontierNode = frontier_(frontierIdx); for (size_type colorIdx = 0; colorIdx < maxColors_; ++colorIdx) { bannedColors_(frontierIdx, colorIdx) = 0; @@ -2048,8 +1858,7 @@ class GraphColor_VBD // Loop over neighbors, find banned colors, decrement dependency and // update newFrontier - for (size_type i = xadj_(frontierNode); i < xadj_(frontierNode + 1); - ++i) { + for (size_type i = xadj_(frontierNode); i < xadj_(frontierNode + 1); ++i) { nnz_lno_t neigh = adj_(i); // Skip remote edges (in case this is part of a distributed graph) if (neigh >= numVerts) continue; @@ -2059,13 +1868,11 @@ class GraphColor_VBD // so let's check that the node is not already colored, i.e. // its dependency is not -1. if (dependency_(neigh) >= 0) { - nnz_lno_t myDependency = - Kokkos::atomic_fetch_add(&dependency_(neigh), -1); + nnz_lno_t myDependency = Kokkos::atomic_fetch_add(&dependency_(neigh), -1); // dependency(neigh) = dependency(neigh) - 1; if (myDependency - 1 == 0) { - const size_type newFrontierIdx = Kokkos::atomic_fetch_add( - &newFrontierSize_(), atomic_incr_type(1)); - newFrontier_(newFrontierIdx) = neigh; + const size_type newFrontierIdx = Kokkos::atomic_fetch_add(&newFrontierSize_(), atomic_incr_type(1)); + newFrontier_(newFrontierIdx) = neigh; } } } // Loop over neighbors @@ -2090,14 +1897,12 @@ class GraphColor_VBD size_type maxColors_; color_view_type colors_; - functorDeterministicColoringBitArray( - const_lno_row_view_t rowPtr, const_lno_nnz_view_t colInd, - nnz_lno_persistent_work_view_t dependency, - nnz_lno_temp_work_view_t frontier, - Kokkos::View frontierSize, - nnz_lno_temp_work_view_t newFrontier, - Kokkos::View newFrontierSize, - size_type maxColors, color_view_type colors) + functorDeterministicColoringBitArray(const_lno_row_view_t rowPtr, const_lno_nnz_view_t colInd, + nnz_lno_persistent_work_view_t dependency, nnz_lno_temp_work_view_t frontier, + Kokkos::View frontierSize, + nnz_lno_temp_work_view_t newFrontier, + Kokkos::View newFrontierSize, + size_type maxColors, color_view_type colors) : xadj_(rowPtr), adj_(colInd), dependency_(dependency), @@ -2110,8 +1915,7 @@ class GraphColor_VBD KOKKOS_INLINE_FUNCTION void operator()(const size_type frontierIdx) const { - typedef typename std::remove_reference::type - atomic_incr_type; + typedef typename std::remove_reference::type atomic_incr_type; nnz_lno_t numVerts = xadj_.extent(0) - 1; size_type frontierNode = frontier_(frontierIdx); // Initialize bit array to all bits = 0 @@ -2121,8 +1925,7 @@ class GraphColor_VBD while (myColor == 0) { // Loop over neighbors, find banned colors in the range: // [colorOffset + 1, colorOffset + 64] - for (size_type i = xadj_(frontierNode); i < xadj_(frontierNode + 1); - ++i) { + for (size_type i = xadj_(frontierNode); i < xadj_(frontierNode + 1); ++i) { nnz_lno_t neigh = adj_(i); if (neigh >= numVerts) continue; color_t neighColor = colors_(neigh); @@ -2136,12 +1939,10 @@ class GraphColor_VBD // so let's check that the node is not already colored, i.e. // its dependency is not -1. if (colorOffset == 0 && dependency_(neigh) >= 0) { - nnz_lno_t myDependency = - Kokkos::atomic_fetch_add(&dependency_(neigh), -1); + nnz_lno_t myDependency = Kokkos::atomic_fetch_add(&dependency_(neigh), -1); if (myDependency - 1 == 0) { - const size_type newFrontierIdx = Kokkos::atomic_fetch_add( - &newFrontierSize_(), atomic_incr_type(1)); - newFrontier_(newFrontierIdx) = neigh; + const size_type newFrontierIdx = Kokkos::atomic_fetch_add(&newFrontierSize_(), atomic_incr_type(1)); + newFrontier_(newFrontierIdx) = neigh; } } } // Loop over neighbors @@ -2169,10 +1970,8 @@ class GraphColor_VBD * Performs a edge_base coloring, with the hope of better load balance * as well as better memory accesses on GPUs. */ -template -class GraphColor_EB : public GraphColor { +template +class GraphColor_EB : public GraphColor { // FIXME SYCL: This does not work, returns colors with conflicts. public: typedef long long int ban_type; @@ -2187,41 +1986,30 @@ class GraphColor_EB : public GraphColor - single_dim_index_view_type; + typedef typename Kokkos::View single_dim_index_view_type; - typedef typename single_dim_index_view_type::HostMirror - single_dim_index_host_view_type; // Host view type + typedef typename single_dim_index_view_type::HostMirror single_dim_index_host_view_type; // Host view type typedef Kokkos::RangePolicy my_exec_space; - typedef typename HandleType::size_type_temp_work_view_t - size_type_temp_work_view_t; - typedef typename HandleType::size_type_persistent_work_view_t - size_type_persistent_work_view_t; + typedef typename HandleType::size_type_temp_work_view_t size_type_temp_work_view_t; + typedef typename HandleType::size_type_persistent_work_view_t size_type_persistent_work_view_t; - typedef - typename HandleType::nnz_lno_temp_work_view_t nnz_lno_temp_work_view_t; - typedef typename HandleType::nnz_lno_persistent_work_view_t - nnz_lno_persistent_work_view_t; + typedef typename HandleType::nnz_lno_temp_work_view_t nnz_lno_temp_work_view_t; + typedef typename HandleType::nnz_lno_persistent_work_view_t nnz_lno_persistent_work_view_t; - typedef typename Kokkos::View - color_temp_work_view_type; + typedef typename Kokkos::View color_temp_work_view_type; typedef Kokkos::View char_temp_work_view_type; - typedef typename char_temp_work_view_type::HostMirror - char_temp_work_host_view_type; // Host view type + typedef typename char_temp_work_view_type::HostMirror char_temp_work_host_view_type; // Host view type typedef typename in_row_index_view_type::const_type const_lno_row_view_t; - typedef typename in_nonzero_index_view_type::const_type - const_nonzero_index_view_type; + typedef typename in_nonzero_index_view_type::const_type const_nonzero_index_view_type; public: /** @@ -2231,12 +2019,10 @@ class GraphColor_EB : public GraphColor(nv_, ne_, row_map, entries, - coloring_handle) {} + : GraphColor(nv_, ne_, row_map, entries, + coloring_handle) {} /** * \brief Class Destructor. @@ -2256,7 +2042,7 @@ class GraphColor_EB : public GraphColorcp->get_eb_num_initial_colors(); double pps_cutoff = this->cp->get_min_reduction_for_conflictlist(); size_type ps_min = this->cp->get_min_elements_for_conflictlist(); - bool use_pps = (this->cp->get_conflict_list_type() == COLORING_PPS); + bool use_pps = (this->cp->get_conflict_list_type() == COLORING_PPS); bool tictoc = this->cp->get_tictoc(); @@ -2264,53 +2050,40 @@ class GraphColor_EB : public GraphColorcp->get_lower_diagonal_edge_list(this->nv, this->ne, this->xadj, - this->adj, numEdges, _kok_src, - _kok_dst); + this->cp->get_lower_diagonal_edge_list(this->nv, this->ne, this->xadj, this->adj, numEdges, _kok_src, _kok_dst); size_type num_work_edges = numEdges; // allocate memory for vertex ban colors, and tentative bans - color_temp_work_view_type color_ban( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "color_ban"), this->nv); - color_temp_work_view_type tentative_color_ban( - "tentative_color_ban", this->nv); // views are initialized with zero + color_temp_work_view_type color_ban(Kokkos::view_alloc(Kokkos::WithoutInitializing, "color_ban"), this->nv); + color_temp_work_view_type tentative_color_ban("tentative_color_ban", this->nv); // views are initialized with zero // allocate memory for vertex color set shifts. nnz_lno_temp_work_view_t color_set("color_set", this->nv); // initialized with zero. // initialize colors, color bans - Kokkos::parallel_for( - "KokkosGraph::GraphColoring::initColors", my_exec_space(0, this->nv), - init_colors(kok_colors, color_ban, numInitialColors, color_set)); + Kokkos::parallel_for("KokkosGraph::GraphColoring::initColors", my_exec_space(0, this->nv), + init_colors(kok_colors, color_ban, numInitialColors, color_set)); // std::cout << "nv:" << this->nv << " init_colors" << std::endl; // worklist size_type_temp_work_view_t edge_conflict_indices( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "edge_conflict_indices"), - num_work_edges); + Kokkos::view_alloc(Kokkos::WithoutInitializing, "edge_conflict_indices"), num_work_edges); // next iterations conflict list size_type_temp_work_view_t new_edge_conflict_indices( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "new_edge_conflict_indices"), - num_work_edges); + Kokkos::view_alloc(Kokkos::WithoutInitializing, "new_edge_conflict_indices"), num_work_edges); char_temp_work_view_type edge_conflict_marker( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "edge_conflict_marker"), - num_work_edges); + Kokkos::view_alloc(Kokkos::WithoutInitializing, "edge_conflict_marker"), num_work_edges); // initialize the worklist sequentiall, and markers as 1. - Kokkos::parallel_for( - "KokkosGraph::GraphColoring::InitWorkArrays", - my_exec_space(0, num_work_edges), - init_work_arrays(edge_conflict_indices, edge_conflict_marker)); + Kokkos::parallel_for("KokkosGraph::GraphColoring::InitWorkArrays", my_exec_space(0, num_work_edges), + init_work_arrays(edge_conflict_indices, edge_conflict_marker)); MyExecSpace().fence(); // std::cout << "nv:" << this->nv << " init_work_arrays" << std::endl; @@ -2319,8 +2092,7 @@ class GraphColor_EB : public GraphColorseconds(); timer->reset(); } - double mc_time = 0, cnt_time = 0, ban_time = 0, expand_ban_time = 0, - color_time = 0, pps_time = 0; + double mc_time = 0, cnt_time = 0, ban_time = 0, expand_ban_time = 0, color_time = 0, pps_time = 0; size_type i = 0; @@ -2340,12 +2112,9 @@ class GraphColor_EB : public GraphColornv << " i:" << i << " num_work_edges:" << // num_work_edges<< std::endl; conflict detection mark conflicts as color // 0. update their bans - Kokkos::parallel_for( - "KokkosGraph::GraphColoring::HalfEdgeMarkConflicts", - my_exec_space(0, num_work_edges), - halfedge_mark_conflicts(_kok_src, _kok_dst, kok_colors, color_set, - color_ban, tentative_color_ban, - edge_conflict_indices)); + Kokkos::parallel_for("KokkosGraph::GraphColoring::HalfEdgeMarkConflicts", my_exec_space(0, num_work_edges), + halfedge_mark_conflicts(_kok_src, _kok_dst, kok_colors, color_set, color_ban, + tentative_color_ban, edge_conflict_indices)); MyExecSpace().fence(); // std::cout << "nv:" << this->nv << " i:" << i << " @@ -2361,13 +2130,10 @@ class GraphColor_EB : public GraphColor 0) - Kokkos::parallel_reduce( - "KokkosGraph::GraphColoring::HalfEdgeConflictsCount", - my_exec_space(0, num_work_edges), - halfedge_conflict_count(_kok_src, _kok_dst, kok_colors, color_set, - edge_conflict_indices, - edge_conflict_marker), - num_conflict_reduction); + Kokkos::parallel_reduce("KokkosGraph::GraphColoring::HalfEdgeConflictsCount", my_exec_space(0, num_work_edges), + halfedge_conflict_count(_kok_src, _kok_dst, kok_colors, color_set, + edge_conflict_indices, edge_conflict_marker), + num_conflict_reduction); MyExecSpace().fence(); @@ -2396,26 +2162,19 @@ class GraphColor_EB : public GraphColor ps_min && - num_conflict_reduction / double(num_work_edges) > pps_cutoff) { + if (num_work_edges > ps_min && num_conflict_reduction / double(num_work_edges) > pps_cutoff) { // use_pps = false; if (use_pps) { - Kokkos::parallel_scan("KokkosGraph::GraphColoring::CalcEdgePositions", - my_exec_space(0, num_work_edges), - ppsWorklistFunctorEB(edge_conflict_indices, - new_edge_conflict_indices, - edge_conflict_marker)); + Kokkos::parallel_scan( + "KokkosGraph::GraphColoring::CalcEdgePositions", my_exec_space(0, num_work_edges), + ppsWorklistFunctorEB(edge_conflict_indices, new_edge_conflict_indices, edge_conflict_marker)); } else { // create new worklist - single_dim_index_view_type new_index = - single_dim_index_view_type("recolorListLength"); + single_dim_index_view_type new_index = single_dim_index_view_type("recolorListLength"); ; - Kokkos::parallel_for( - "KokkosGraph::GraphColoring::CreateNewWorkArrayAtomic", - my_exec_space(0, num_work_edges), - atomic_create_new_work_array(new_index, edge_conflict_indices, - edge_conflict_marker, - new_edge_conflict_indices)); + Kokkos::parallel_for("KokkosGraph::GraphColoring::CreateNewWorkArrayAtomic", my_exec_space(0, num_work_edges), + atomic_create_new_work_array(new_index, edge_conflict_indices, edge_conflict_marker, + new_edge_conflict_indices)); MyExecSpace().fence(); } @@ -2433,12 +2192,9 @@ class GraphColor_EB : public GraphColorseconds(); @@ -2463,27 +2217,22 @@ class GraphColor_EB : public GraphColornv), - choose_colors(kok_colors, color_set, color_ban, tentative_color_ban)); + Kokkos::parallel_for("KokkosGraph::GraphColoring::ChooseColors", my_exec_space(0, this->nv), + choose_colors(kok_colors, color_set, color_ban, tentative_color_ban)); if (tictoc) { color_time += timer->seconds(); timer->reset(); } } if (tictoc) { - std::cout << "\tinit_time:" << inittime << " mc:" << mc_time - << " cnt_time:" << cnt_time << " ban_time:" << ban_time - << " expand ban time:" << expand_ban_time - << " pps time:" << pps_time << " color time:" << color_time - << std::endl + std::cout << "\tinit_time:" << inittime << " mc:" << mc_time << " cnt_time:" << cnt_time + << " ban_time:" << ban_time << " expand ban time:" << expand_ban_time << " pps time:" << pps_time + << " color time:" << color_time << std::endl << std::endl; } // set the final colors. - Kokkos::parallel_for("KokkosGraph::GraphColoring::SetFinalColors", - my_exec_space(0, this->nv), + Kokkos::parallel_for("KokkosGraph::GraphColoring::SetFinalColors", my_exec_space(0, this->nv), set_final_colors(kok_colors, color_set)); num_loops = i; @@ -2500,7 +2249,7 @@ class GraphColor_EB : public GraphColor - _color_set(s))) || // if source is colored, and destination - // color set is larger than source - (dc && (_color_set(s) > - _color_set(d))) // or if destionation is colored, and the - // source color set is larger + if ((dc && sc) || // if both colored + (sc && (_color_set(d) > _color_set(s))) || // if source is colored, and destination + // color set is larger than source + (dc && (_color_set(s) > _color_set(d))) // or if destionation is colored, and the + // source color set is larger ) { // then no need to look at this edge anymore. _edge_conflict_marker(w) = 0; @@ -2696,8 +2430,7 @@ class GraphColor_EB : public GraphColor::type - atomic_incr_type; + typedef typename std::remove_reference::type atomic_incr_type; size_type w = _edge_conflict_indices(ii); if (_edge_conflict_marker(w)) { - const size_type future_index = - Kokkos::atomic_fetch_add(&_new_index(), atomic_incr_type(1)); + const size_type future_index = Kokkos::atomic_fetch_add(&_new_index(), atomic_incr_type(1)); _new_edge_conflict_indices(future_index) = w; } } @@ -2751,11 +2480,9 @@ class GraphColor_EB : public GraphColor(&(color_ban(uncolored_vertex)), - src_col | dst_col); + Kokkos::atomic_fetch_or(&(color_ban(uncolored_vertex)), src_col | dst_col); edge_conflict_marker(work_index) = 0; } } @@ -2821,9 +2544,8 @@ class GraphColor_EB : public GraphColor dst_id) ? src_id : dst_id; - nnz_lno_t smaller_index = - dst_id; // TODO which one is better? this seems to be not - // much changing + nnz_lno_t smaller_index = dst_id; // TODO which one is better? this seems to be not + // much changing // idx smaller_index = src_id; // then both have been colored tentavitely. propoagate the color // of src to dst. - Kokkos::atomic_fetch_or( - &(tentative_color_ban(smaller_index)), -src_col); - nnz_lno_t banned_colors = ~(color_ban(smaller_index) | - tentative_color_ban(smaller_index)); + Kokkos::atomic_fetch_or(&(tentative_color_ban(smaller_index)), -src_col); + nnz_lno_t banned_colors = ~(color_ban(smaller_index) | tentative_color_ban(smaller_index)); nnz_lno_t larger_col = banned_colors & (-banned_colors); kokcolors(smaller_index) = -(larger_col); } @@ -2909,16 +2627,14 @@ class GraphColor_EB : public GraphColor(&(color_ban(dst_id)), // -src_col); - Kokkos::atomic_fetch_or(&(tentative_color_ban(dst_id)), - -src_col); + Kokkos::atomic_fetch_or(&(tentative_color_ban(dst_id)), -src_col); } else if (dst_col != 0) { // if it is dst tentatively colors, but src is not colored, // then we send the dst color info to src's tentative_ban // Kokkos::atomic_fetch_or(&(color_ban(src_id)), // -dst_col); - Kokkos::atomic_fetch_or(&(tentative_color_ban(src_id)), - -dst_col); + Kokkos::atomic_fetch_or(&(tentative_color_ban(src_id)), -dst_col); } else { // idx smaller_index = src_id < dst_id > 0 ? src_id: dst_id; // idx larger_index = src_id < dst_id > 0 ? dst_id : src_id; @@ -2937,16 +2653,14 @@ class GraphColor_EB : public GraphColor( - &(tentative_color_ban(larger_index)), src_col); + Kokkos::atomic_fetch_or(&(tentative_color_ban(larger_index)), src_col); // Kokkos::atomic_fetch_or(&(color_ban(dst_id)), // src_col); } @@ -2961,15 +2675,13 @@ class GraphColor_EB : public GraphColor -void graph_color_impl(KernelHandle *handle, - typename KernelHandle::nnz_lno_t num_rows, - lno_row_view_t_ row_map, lno_nnz_view_t_ entries) { +template +void graph_color_impl(KernelHandle *handle, typename KernelHandle::nnz_lno_t num_rows, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries) { Kokkos::Timer timer; - typename KernelHandle::GraphColoringHandleType *gch = - handle->get_graph_coloring_handle(); + typename KernelHandle::GraphColoringHandleType *gch = handle->get_graph_coloring_handle(); ColoringAlgorithm algorithm = gch->get_coloring_algo_type(); - typedef typename KernelHandle::GraphColoringHandleType::color_view_t - color_view_type; + typedef typename KernelHandle::GraphColoringHandleType::color_view_t color_view_type; gch->set_tictoc(handle->get_verbose()); @@ -3119,46 +2820,35 @@ void graph_color_impl(KernelHandle *handle, colors_out = color_view_type("Graph Colors", num_rows); } - typedef - typename Impl::GraphColor - BaseGraphColoring; + typedef typename Impl::GraphColor + BaseGraphColoring; BaseGraphColoring *gc = NULL; switch (algorithm) { - case COLORING_SERIAL: - gc = new BaseGraphColoring(num_rows, entries.extent(0), row_map, entries, - gch); - break; + case COLORING_SERIAL: gc = new BaseGraphColoring(num_rows, entries.extent(0), row_map, entries, gch); break; case COLORING_VB: case COLORING_VBBIT: case COLORING_VBCS: - typedef typename Impl::GraphColor_VB< - typename KernelHandle::GraphColoringHandleType, lno_row_view_t_, - lno_nnz_view_t_> - VBGraphColoring; - gc = new VBGraphColoring(num_rows, entries.extent(0), row_map, entries, - gch); + typedef + typename Impl::GraphColor_VB + VBGraphColoring; + gc = new VBGraphColoring(num_rows, entries.extent(0), row_map, entries, gch); break; case COLORING_VBD: case COLORING_VBDBIT: - typedef typename Impl::GraphColor_VBD< - typename KernelHandle::GraphColoringHandleType, lno_row_view_t_, - lno_nnz_view_t_> + typedef typename Impl::GraphColor_VBD VBDGraphColoring; - gc = new VBDGraphColoring(num_rows, entries.extent(0), row_map, entries, - gch); + gc = new VBDGraphColoring(num_rows, entries.extent(0), row_map, entries, gch); break; case COLORING_EB: - typedef typename Impl::GraphColor_EB< - typename KernelHandle::GraphColoringHandleType, lno_row_view_t_, - lno_nnz_view_t_> - EBGraphColoring; - gc = new EBGraphColoring(num_rows, entries.extent(0), row_map, entries, - gch); + typedef + typename Impl::GraphColor_EB + EBGraphColoring; + gc = new EBGraphColoring(num_rows, entries.extent(0), row_map, entries, gch); break; case COLORING_DEFAULT: break; diff --git a/packages/kokkos-kernels/graph/impl/KokkosGraph_Distance2Color_impl.hpp b/packages/kokkos-kernels/graph/impl/KokkosGraph_Distance2Color_impl.hpp index 2ab04667e079..cfa5186283e2 100644 --- a/packages/kokkos-kernels/graph/impl/KokkosGraph_Distance2Color_impl.hpp +++ b/packages/kokkos-kernels/graph/impl/KokkosGraph_Distance2Color_impl.hpp @@ -53,8 +53,7 @@ namespace Impl { * Distance-1 conflicts will not be checked. * */ -template +template class GraphColorDistance2 { // Need mutable entries type for edge filtering using nc_entries_t = typename entries_t::non_const_type; @@ -109,9 +108,8 @@ class GraphColorDistance2 { * \param handle: GraphColoringHandle object that holds the specification * about the graph coloring, including parameters. */ - GraphColorDistance2(lno_t nr_, lno_t nc_, rowmap_t row_map, entries_t entries, - rowmap_t t_row_map, entries_t t_entries, - HandleType* handle) + GraphColorDistance2(lno_t nr_, lno_t nc_, rowmap_t row_map, entries_t entries, rowmap_t t_row_map, + entries_t t_entries, HandleType* handle) : nr(nr_), nc(nc_), ne(entries.extent(0)), @@ -157,15 +155,14 @@ class GraphColorDistance2 { colors_out = color_view_type("Graph Colors", this->nr); } switch (this->gc_handle->get_coloring_algo_type()) { - case COLORING_D2_VB_BIT_EF: using_edge_filtering = true; + case COLORING_D2_VB_BIT_EF: using_edge_filtering = true; [[fallthrough]]; case COLORING_D2_VB_BIT: case COLORING_D2_VB: compute_d2_coloring_vb(colors_out); break; case COLORING_D2_NB_BIT: compute_d2_coloring_nb(colors_out); break; case COLORING_D2_SERIAL: compute_d2_coloring_serial(colors_out); break; default: - throw std::runtime_error( - std::string("D2 coloring handle has invalid algorithm: ") + - std::to_string((int)this->gc_handle->get_coloring_algo_type())); + throw std::runtime_error(std::string("D2 coloring handle has invalid algorithm: ") + + std::to_string((int)this->gc_handle->get_coloring_algo_type())); } } @@ -179,16 +176,11 @@ class GraphColorDistance2 { // adjacency list ) if (this->_ticToc) { std::cout << "\tcolor_graph_d2 params:" << std::endl - << "\t algorithm : " - << this->gc_handle->getD2AlgorithmName() << std::endl - << "\t ticToc : " << this->_ticToc - << std::endl - << "\t max_num_iterations : " - << this->_max_num_iterations << std::endl - << "\t chunkSize : " << this->_chunkSize - << std::endl - << "\t Edge Filtering Pass? : " - << (int)using_edge_filtering << std::endl + << "\t algorithm : " << this->gc_handle->getD2AlgorithmName() << std::endl + << "\t ticToc : " << this->_ticToc << std::endl + << "\t max_num_iterations : " << this->_max_num_iterations << std::endl + << "\t chunkSize : " << this->_chunkSize << std::endl + << "\t Edge Filtering Pass? : " << (int)using_edge_filtering << std::endl << "\tgraph information:" << std::endl << "\t nr : " << this->nr << std::endl << "\t ne : " << this->ne << std::endl; @@ -203,9 +195,7 @@ class GraphColorDistance2 { // conflictlist - store conflicts that can happen when we're coloring in // parallel. - lno_view_t current_vertexList( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "vertexList"), - this->nr); + lno_view_t current_vertexList(Kokkos::view_alloc(Kokkos::WithoutInitializing, "vertexList"), this->nr); lno_t current_vertexListLength = this->nr; @@ -215,13 +205,10 @@ class GraphColorDistance2 { current_vertexListLength = this->gc_handle->get_vertex_list_size(); } else { // init conflictlist sequentially. - Kokkos::parallel_for("InitList", range_policy_type(0, this->nr), - functorInitList(current_vertexList)); + Kokkos::parallel_for("InitList", range_policy_type(0, this->nr), functorInitList(current_vertexList)); } // Next iteratons's conflictList - lno_view_t next_iteration_recolorList( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "recolorList"), - this->nr); + lno_view_t next_iteration_recolorList(Kokkos::view_alloc(Kokkos::WithoutInitializing, "recolorList"), this->nr); // Size the next iteration conflictList single_lno_view_t next_iteration_recolorListLength("recolorListLength"); @@ -251,15 +238,11 @@ class GraphColorDistance2 { // entries_t, // so that it has the same type as adj // * on the other hand, t_adj is not actually modified by EF functor - lno_view_t adj_copy( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "adj copy"), - this->ne); + lno_view_t adj_copy(Kokkos::view_alloc(Kokkos::WithoutInitializing, "adj copy"), this->ne); Kokkos::deep_copy(adj_copy, this->adj); - this->colorGreedyEF(this->xadj, adj_copy, this->t_xadj, this->t_adj, - colors_out); + this->colorGreedyEF(this->xadj, adj_copy, this->t_xadj, this->t_adj, colors_out); } else { - this->colorGreedy(this->xadj, this->adj, this->t_xadj, this->t_adj, - colors_out, current_vertexList, + this->colorGreedy(this->xadj, this->adj, this->t_xadj, this->t_adj, colors_out, current_vertexList, current_vertexListLength); } @@ -269,10 +252,8 @@ class GraphColorDistance2 { time = timer.seconds(); total_time += time; std::cout << "\tIteration: " << iter << std::endl - << "\t - Time speculative greedy phase : " << time - << std::endl - << "\t - Num Uncolored (greedy-color) : " << numUncolored - << std::endl; + << "\t - Time speculative greedy phase : " << time << std::endl + << "\t - Num Uncolored (greedy-color) : " << numUncolored << std::endl; gc_handle->add_to_overall_coloring_time_phase1(time); @@ -289,20 +270,17 @@ class GraphColorDistance2 { // NOTE: not using colorset algorithm in this so we don't include colorset // data - numUncolored = this->findConflicts( - swap_work_arrays, this->xadj, this->adj, this->t_xadj, this->t_adj, - colors_out, current_vertexList, current_vertexListLength, - next_iteration_recolorList, next_iteration_recolorListLength); + numUncolored = this->findConflicts(swap_work_arrays, this->xadj, this->adj, this->t_xadj, this->t_adj, colors_out, + current_vertexList, current_vertexListLength, next_iteration_recolorList, + next_iteration_recolorListLength); execution_space().fence(); if (_ticToc) { time = timer.seconds(); total_time += time; - std::cout << "\t - Time conflict detection : " << time - << std::endl; - std::cout << "\t - Num Uncolored (conflicts) : " << numUncolored - << std::endl; + std::cout << "\t - Time conflict detection : " << time << std::endl; + std::cout << "\t - Num Uncolored (conflicts) : " << numUncolored << std::endl; gc_handle->add_to_overall_coloring_time_phase2(time); timer.reset(); } @@ -315,9 +293,8 @@ class GraphColorDistance2 { current_vertexList = next_iteration_recolorList; next_iteration_recolorList = temp; - current_vertexListLength = numUncolored; - next_iteration_recolorListLength = - single_lno_view_t("recolorListLength"); + current_vertexListLength = numUncolored; + next_iteration_recolorListLength = single_lno_view_t("recolorListLength"); } } @@ -331,8 +308,7 @@ class GraphColorDistance2 { // clean up in serial (resolveConflictsSerial) // ------------------------------------------ if (numUncolored > 0) { - this->resolveConflictsSerial(this->xadj, this->adj, this->t_xadj, - this->t_adj, colors_out, current_vertexList, + this->resolveConflictsSerial(this->xadj, this->adj, this->t_xadj, this->t_adj, colors_out, current_vertexList, current_vertexListLength); } @@ -341,10 +317,8 @@ class GraphColorDistance2 { if (_ticToc) { time = timer.seconds(); total_time += time; - std::cout << "\tTime serial conflict resolution : " << time - << std::endl; - std::cout << "\tTotal time for coloring : " << total_time - << std::endl; + std::cout << "\tTime serial conflict resolution : " << time << std::endl; + std::cout << "\tTotal time for coloring : " << total_time << std::endl; gc_handle->add_to_overall_coloring_time_phase3(time); } @@ -356,11 +330,9 @@ class GraphColorDistance2 { template struct NB_Coloring { - NB_Coloring(const lno_view_t& worklist_, const single_lno_view_t& worklen_, - color_type colorBase_, const forbidden_view& forbidden_, - color_view_type colors_, const rowmap_t& Vrowmap_, - const entries_t& Vcolinds_, lno_t vertsPerThread_, - lno_t numCols_) + NB_Coloring(const lno_view_t& worklist_, const single_lno_view_t& worklen_, color_type colorBase_, + const forbidden_view& forbidden_, color_view_type colors_, const rowmap_t& Vrowmap_, + const entries_t& Vcolinds_, lno_t vertsPerThread_, lno_t numCols_) : worklist(worklist_), worklen(worklen_), colorBase(colorBase_), @@ -387,8 +359,7 @@ class GraphColorDistance2 { for (size_type j = rowBegin; j < rowEnd; j++) { lno_t nei = Vcolinds(j); if (nei < numCols) { - for (int b = 0; b < batch; b++) - forbid[b] |= forbidden(nei * batch + b); + for (int b = 0; b < batch; b++) forbid[b] |= forbidden(nei * batch + b); } } // Find the first 0 bit in forbid @@ -405,27 +376,22 @@ class GraphColorDistance2 { break; } } - if (color && (colors(v) == 0 || colors(v) == CONFLICTED || - colors(v) == UNCOLORABLE)) { + if (color && (colors(v) == 0 || colors(v) == CONFLICTED || colors(v) == UNCOLORABLE)) { // Color v colors(v) = color; if (!doing_bipartite) { // Update forbidden for v (preventing dist-1 conflicts) - if (v < numCols) - Kokkos::atomic_fetch_or(&forbidden(v * batch + colorWord), - (uint32_t)1 << colorBit); + if (v < numCols) Kokkos::atomic_fetch_or(&forbidden(v * batch + colorWord), (uint32_t)1 << colorBit); } // Update forbidden for all of v's neighbors for (size_type j = rowBegin; j < rowEnd; j++) { lno_t nei = Vcolinds(j); if (nei < numCols) { // Update column forbidden - Kokkos::atomic_fetch_or(&forbidden(nei * batch + colorWord), - (uint32_t)1 << colorBit); + Kokkos::atomic_fetch_or(&forbidden(nei * batch + colorWord), (uint32_t)1 << colorBit); } } - } else if (colors(v) == 0 || colors(v) == CONFLICTED || - colors(v) == UNCOLORABLE) { + } else if (colors(v) == 0 || colors(v) == CONFLICTED || colors(v) == UNCOLORABLE) { colors(v) = UNCOLORABLE; } } @@ -444,9 +410,8 @@ class GraphColorDistance2 { template struct NB_Conflict { - NB_Conflict(color_type colorBase_, const forbidden_view& forbidden_, - const color_view_type& colors_, const rowmap_t& Crowmap_, - const entries_t& Ccolinds_, lno_t numVerts_) + NB_Conflict(color_type colorBase_, const forbidden_view& forbidden_, const color_view_type& colors_, + const rowmap_t& Crowmap_, const entries_t& Ccolinds_, lno_t numVerts_) : colorBase(colorBase_), forbidden(forbidden_), colors(colors_), @@ -513,10 +478,8 @@ class GraphColorDistance2 { template struct NB_RefreshForbidden { - NB_RefreshForbidden(color_type colorBase_, const forbidden_view& forbidden_, - const color_view_type& colors_, - const rowmap_t& Crowmap_, const entries_t& Ccolinds_, - lno_t numVerts_) + NB_RefreshForbidden(color_type colorBase_, const forbidden_view& forbidden_, const color_view_type& colors_, + const rowmap_t& Crowmap_, const entries_t& Ccolinds_, lno_t numVerts_) : colorBase(colorBase_), colorEnd(colorBase + 32 * batch), forbidden(forbidden_), @@ -563,12 +526,11 @@ class GraphColorDistance2 { }; struct NB_Worklist { - NB_Worklist(const color_view_type colors_, const lno_view_t& worklist_, - const single_lno_view_t& worklen_, lno_t nr_) + NB_Worklist(const color_view_type colors_, const lno_view_t& worklist_, const single_lno_view_t& worklen_, + lno_t nr_) : colors(colors_), worklist(worklist_), worklen(worklen_), nr(nr_) {} - KOKKOS_INLINE_FUNCTION void operator()(const lno_t v, lno_t& lnum, - bool finalPass) const { + KOKKOS_INLINE_FUNCTION void operator()(const lno_t v, lno_t& lnum, bool finalPass) const { if (colors(v) == CONFLICTED) { if (finalPass) worklist(lnum) = v; lnum++; @@ -587,12 +549,11 @@ class GraphColorDistance2 { }; struct NB_UpdateBatch { - NB_UpdateBatch(const color_view_type& colors_, const lno_view_t& worklist_, - const single_lno_view_t& worklen_, lno_t nr_) + NB_UpdateBatch(const color_view_type& colors_, const lno_view_t& worklist_, const single_lno_view_t& worklen_, + lno_t nr_) : colors(colors_), worklist(worklist_), worklen(worklen_), nr(nr_) {} - KOKKOS_INLINE_FUNCTION void operator()(const lno_t v, lno_t& lnum, - bool finalPass) const { + KOKKOS_INLINE_FUNCTION void operator()(const lno_t v, lno_t& lnum, bool finalPass) const { if (colors(v) == UNCOLORABLE) { if (finalPass) worklist(lnum) = v; lnum++; @@ -630,8 +591,7 @@ class GraphColorDistance2 { Kokkos::deep_copy(worklen, this->nr); // init conflictlist sequentially. - Kokkos::parallel_for("InitList", range_policy_type(0, this->nr), - functorInitList(worklist)); + Kokkos::parallel_for("InitList", range_policy_type(0, this->nr), functorInitList(worklist)); // Estimate the number of colors that will be needed // The algorithm can't use more colors than the max distance-2 degree, @@ -670,7 +630,7 @@ class GraphColorDistance2 { // for batch size while (currentWork) { lno_t vertsPerThread = 1; - lno_t workBatches = (currentWork + vertsPerThread - 1) / vertsPerThread; + lno_t workBatches = (currentWork + vertsPerThread - 1) / vertsPerThread; timer.reset(); // if still using this color set, refresh forbidden. // This avoids using too many colors, by relying on forbidden from @@ -681,26 +641,22 @@ class GraphColorDistance2 { case 1: Kokkos::parallel_for( "NB D2 Forbidden", range_policy_type(0, numCols), - NB_RefreshForbidden<1>(colorBase, forbidden, colors_out, - this->t_xadj, this->t_adj, numVerts)); + NB_RefreshForbidden<1>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); break; case 2: Kokkos::parallel_for( "NB D2 Forbidden", range_policy_type(0, numCols), - NB_RefreshForbidden<2>(colorBase, forbidden, colors_out, - this->t_xadj, this->t_adj, numVerts)); + NB_RefreshForbidden<2>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); break; case 4: Kokkos::parallel_for( "NB D2 Forbidden", range_policy_type(0, numCols), - NB_RefreshForbidden<4>(colorBase, forbidden, colors_out, - this->t_xadj, this->t_adj, numVerts)); + NB_RefreshForbidden<4>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); break; case 8: Kokkos::parallel_for( "NB D2 Forbidden", range_policy_type(0, numCols), - NB_RefreshForbidden<8>(colorBase, forbidden, colors_out, - this->t_xadj, this->t_adj, numVerts)); + NB_RefreshForbidden<8>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); break; default:; } @@ -709,62 +665,46 @@ class GraphColorDistance2 { switch (batch) { case 1: timer.reset(); - Kokkos::parallel_for( - "NB D2 Coloring", range_policy_type(0, workBatches), - NB_Coloring<1>(worklist, worklen, colorBase, forbidden, - colors_out, this->xadj, this->adj, - vertsPerThread, numCols)); + Kokkos::parallel_for("NB D2 Coloring", range_policy_type(0, workBatches), + NB_Coloring<1>(worklist, worklen, colorBase, forbidden, colors_out, this->xadj, + this->adj, vertsPerThread, numCols)); colorTime += timer.seconds(); timer.reset(); - Kokkos::parallel_for( - "NB D2 Conflict Resolution", range_policy_type(0, numCols), - NB_Conflict<1>(colorBase, forbidden, colors_out, this->t_xadj, - this->t_adj, numVerts)); + Kokkos::parallel_for("NB D2 Conflict Resolution", range_policy_type(0, numCols), + NB_Conflict<1>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); conflictTime += timer.seconds(); break; case 2: timer.reset(); - Kokkos::parallel_for( - "NB D2 Coloring", range_policy_type(0, workBatches), - NB_Coloring<2>(worklist, worklen, colorBase, forbidden, - colors_out, this->xadj, this->adj, - vertsPerThread, numCols)); + Kokkos::parallel_for("NB D2 Coloring", range_policy_type(0, workBatches), + NB_Coloring<2>(worklist, worklen, colorBase, forbidden, colors_out, this->xadj, + this->adj, vertsPerThread, numCols)); colorTime += timer.seconds(); timer.reset(); - Kokkos::parallel_for( - "NB D2 Conflict Resolution", range_policy_type(0, numCols), - NB_Conflict<2>(colorBase, forbidden, colors_out, this->t_xadj, - this->t_adj, numVerts)); + Kokkos::parallel_for("NB D2 Conflict Resolution", range_policy_type(0, numCols), + NB_Conflict<2>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); conflictTime += timer.seconds(); break; case 4: timer.reset(); - Kokkos::parallel_for( - "NB D2 Coloring", range_policy_type(0, workBatches), - NB_Coloring<4>(worklist, worklen, colorBase, forbidden, - colors_out, this->xadj, this->adj, - vertsPerThread, numCols)); + Kokkos::parallel_for("NB D2 Coloring", range_policy_type(0, workBatches), + NB_Coloring<4>(worklist, worklen, colorBase, forbidden, colors_out, this->xadj, + this->adj, vertsPerThread, numCols)); colorTime += timer.seconds(); timer.reset(); - Kokkos::parallel_for( - "NB D2 Conflict Resolution", range_policy_type(0, numCols), - NB_Conflict<4>(colorBase, forbidden, colors_out, this->t_xadj, - this->t_adj, numVerts)); + Kokkos::parallel_for("NB D2 Conflict Resolution", range_policy_type(0, numCols), + NB_Conflict<4>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); conflictTime += timer.seconds(); break; case 8: timer.reset(); - Kokkos::parallel_for( - "NB D2 Coloring", range_policy_type(0, workBatches), - NB_Coloring<8>(worklist, worklen, colorBase, forbidden, - colors_out, this->xadj, this->adj, - vertsPerThread, numCols)); + Kokkos::parallel_for("NB D2 Coloring", range_policy_type(0, workBatches), + NB_Coloring<8>(worklist, worklen, colorBase, forbidden, colors_out, this->xadj, + this->adj, vertsPerThread, numCols)); colorTime += timer.seconds(); timer.reset(); - Kokkos::parallel_for( - "NB D2 Conflict Resolution", range_policy_type(0, numCols), - NB_Conflict<8>(colorBase, forbidden, colors_out, this->t_xadj, - this->t_adj, numVerts)); + Kokkos::parallel_for("NB D2 Conflict Resolution", range_policy_type(0, numCols), + NB_Conflict<8>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); conflictTime += timer.seconds(); break; default: @@ -774,17 +714,15 @@ class GraphColorDistance2 { } timer.reset(); // Then build the next worklist - Kokkos::parallel_scan( - "NB D2 worklist", range_policy_type(0, numVerts), - NB_Worklist(colors_out, worklist, worklen, numVerts), currentWork); + Kokkos::parallel_scan("NB D2 worklist", range_policy_type(0, numVerts), + NB_Worklist(colors_out, worklist, worklen, numVerts), currentWork); worklistTime += timer.seconds(); timer.reset(); iter++; } // Will need to run with a different color base, so rebuild the work list - Kokkos::parallel_scan( - "NB D2 Worklist Rebuild", range_policy_type(0, numVerts), - NB_UpdateBatch(colors_out, worklist, worklen, numVerts)); + Kokkos::parallel_scan("NB D2 Worklist Rebuild", range_policy_type(0, numVerts), + NB_UpdateBatch(colors_out, worklist, worklen, numVerts)); Kokkos::deep_copy(currentWork, worklen); worklistTime += timer.seconds(); timer.reset(); @@ -802,9 +740,7 @@ class GraphColorDistance2 { std::cout << "Conflict: " << conflictTime << '\n'; std::cout << "Forbidden: " << forbiddenTime << '\n'; std::cout << "Worklist: " << worklistTime << '\n'; - std::cout << "** Total: " - << colorTime + conflictTime + forbiddenTime + worklistTime - << "\n\n"; + std::cout << "** Total: " << colorTime + conflictTime + forbiddenTime + worklistTime << "\n\n"; } if (this->_ticToc) { gc_handle->add_to_overall_coloring_time_phase1(timer.seconds()); @@ -838,8 +774,8 @@ class GraphColorDistance2 { Kokkos::View Vcolinds = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), this->adj); // Create worklist - Kokkos::View worklist( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Worklist"), this->nr); + Kokkos::View worklist(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Worklist"), + this->nr); int iter = 0; Kokkos::Timer timer; lno_t currentWork = this->nr; @@ -898,10 +834,8 @@ class GraphColorDistance2 { // GraphColorDistance2::colorGreedy() // // ----------------------------------------------------------------- - void colorGreedy(rowmap_t xadj_, entries_t adj_, rowmap_t t_xadj_, - entries_t t_adj_, color_view_type vertex_colors_, - lno_view_t current_vertexList_, - lno_t current_vertexListLength_) { + void colorGreedy(rowmap_t xadj_, entries_t adj_, rowmap_t t_xadj_, entries_t t_adj_, color_view_type vertex_colors_, + lno_view_t current_vertexList_, lno_t current_vertexListLength_) { lno_t chunkSize_ = this->_chunkSize; if (current_vertexListLength_ < 100 * chunkSize_) { @@ -917,11 +851,9 @@ class GraphColorDistance2 { // 3. [S] loop over vertex neighbors // 4. [S] loop over vertex neighbors of neighbors case COLORING_D2_VB: { - functorGreedyColorVB gc(this->nr, this->nc, xadj_, adj_, t_xadj_, - t_adj_, vertex_colors_, current_vertexList_, + functorGreedyColorVB gc(this->nr, this->nc, xadj_, adj_, t_xadj_, t_adj_, vertex_colors_, current_vertexList_, current_vertexListLength_); - Kokkos::parallel_for("LoopOverChunks", range_policy_type(0, this->nr), - gc); + Kokkos::parallel_for("LoopOverChunks", range_policy_type(0, this->nr), gc); } break; // One level Perallelism, BIT Array for coloring @@ -930,11 +862,9 @@ class GraphColorDistance2 { // 3. [S] loop over vertex neighbors // 4. [S] loop over vertex neighbors of neighbors case COLORING_D2_VB_BIT: { - functorGreedyColorVB_BIT gc(this->nr, this->nc, xadj_, adj_, t_xadj_, - t_adj_, vertex_colors_, current_vertexList_, - current_vertexListLength_); - Kokkos::parallel_for("LoopOverChunks", range_policy_type(0, this->nr), - gc); + functorGreedyColorVB_BIT gc(this->nr, this->nc, xadj_, adj_, t_xadj_, t_adj_, vertex_colors_, + current_vertexList_, current_vertexListLength_); + Kokkos::parallel_for("LoopOverChunks", range_policy_type(0, this->nr), gc); } break; default: @@ -950,8 +880,8 @@ class GraphColorDistance2 { // GraphColorDistance2::colorGreedyEF() // // ----------------------------------------------------------------- - void colorGreedyEF(rowmap_t xadj_, lno_view_t adj_copy_, rowmap_t t_xadj_, - entries_t t_adj_copy_, color_view_type vertex_colors_) { + void colorGreedyEF(rowmap_t xadj_, lno_view_t adj_copy_, rowmap_t t_xadj_, entries_t t_adj_copy_, + color_view_type vertex_colors_) { // Pick the right coloring algorithm to use based on which algorithm we're // using switch (this->gc_handle->get_coloring_algo_type()) { @@ -961,10 +891,8 @@ class GraphColorDistance2 { // 3. [S] loop over vertex neighbors // 4. [S] loop over vertex neighbors of neighbors case COLORING_D2_VB_BIT_EF: { - functorGreedyColorVB_BIT_EF gc(this->nr, this->nc, xadj_, adj_copy_, - t_xadj_, t_adj_copy_, vertex_colors_); - Kokkos::parallel_for("LoopOverChunks", range_policy_type(0, this->nr), - gc); + functorGreedyColorVB_BIT_EF gc(this->nr, this->nc, xadj_, adj_copy_, t_xadj_, t_adj_copy_, vertex_colors_); + Kokkos::parallel_for("LoopOverChunks", range_policy_type(0, this->nr), gc); // prettyPrint1DView(vertex_colors_, "COLORS_GC_VB_BIT",500); } break; @@ -980,23 +908,17 @@ class GraphColorDistance2 { // GraphColorDistance2::findConflicts() // // ----------------------------------------------------------------- - lno_t findConflicts(bool& swap_work_arrays, rowmap_t xadj_, entries_t adj_, - rowmap_t t_xadj_, entries_t t_adj_, - color_view_type vertex_colors_, - lno_view_t current_vertexList_, - lno_t current_vertexListLength_, - lno_view_t next_iteration_recolorList_, - single_lno_view_t next_iteration_recolorListLength_) { + lno_t findConflicts(bool& swap_work_arrays, rowmap_t xadj_, entries_t adj_, rowmap_t t_xadj_, entries_t t_adj_, + color_view_type vertex_colors_, lno_view_t current_vertexList_, lno_t current_vertexListLength_, + lno_view_t next_iteration_recolorList_, single_lno_view_t next_iteration_recolorListLength_) { swap_work_arrays = true; lno_t output_numUncolored = 0; - functorFindConflicts_Atomic conf( - this->nr, this->nc, xadj_, adj_, t_xadj_, t_adj_, vertex_colors_, - current_vertexList_, next_iteration_recolorList_, - next_iteration_recolorListLength_); - Kokkos::parallel_reduce("FindConflicts", - range_policy_type(0, current_vertexListLength_), - conf, output_numUncolored); + functorFindConflicts_Atomic conf(this->nr, this->nc, xadj_, adj_, t_xadj_, t_adj_, vertex_colors_, + current_vertexList_, next_iteration_recolorList_, + next_iteration_recolorListLength_); + Kokkos::parallel_reduce("FindConflicts", range_policy_type(0, current_vertexListLength_), conf, + output_numUncolored); return output_numUncolored; } // findConflicts (end) @@ -1005,9 +927,8 @@ class GraphColorDistance2 { // GraphColorDistance2::resolveConflictsSerial() // // ----------------------------------------------------------------- - void resolveConflictsSerial(rowmap_t xadj_, entries_t adj_, rowmap_t t_xadj_, - entries_t t_adj_, color_view_type vertex_colors_, - lno_view_t current_vertexList_, + void resolveConflictsSerial(rowmap_t xadj_, entries_t adj_, rowmap_t t_xadj_, entries_t t_adj_, + color_view_type vertex_colors_, lno_view_t current_vertexList_, size_type current_vertexListLength_) { color_type* forbidden = new color_type[nr]; for (lno_t i = 0; i < nr; i++) forbidden[i] = nr; @@ -1042,16 +963,14 @@ class GraphColorDistance2 { if (h_colors(vid) > 0) continue; // loop over distance-1 neighbors of vid - for (size_type vid_d1_adj = h_idx(vid); vid_d1_adj < h_idx(vid + 1); - vid_d1_adj++) { + for (size_type vid_d1_adj = h_idx(vid); vid_d1_adj < h_idx(vid + 1); vid_d1_adj++) { lno_t vid_d1 = h_adj(vid_d1_adj); if (vid_d1 < nc) { if (!doing_bipartite && vid_d1 != vid) { forbidden[h_colors(vid_d1)] = vid; } // loop over neighbors of vid_d1 (distance-2 from vid) - for (size_type vid_d2_adj = h_t_idx(vid_d1); - vid_d2_adj < h_t_idx(vid_d1 + 1); vid_d2_adj++) { + for (size_type vid_d2_adj = h_t_idx(vid_d1); vid_d2_adj < h_t_idx(vid_d1 + 1); vid_d2_adj++) { lno_t vid_d2 = h_t_adj(vid_d2_adj); // skip over loops vid -- x -- vid, and filter out-of-bounds @@ -1076,8 +995,7 @@ class GraphColorDistance2 { public: // pretty-print a 1D View with label template - void prettyPrint1DView(kokkos_view_t& view, const char* label, - const size_t max_entries = 500) const { + void prettyPrint1DView(kokkos_view_t& view, const char* label, const size_t max_entries = 500) const { int max_per_line = 20; int line_count = 1; std::cout << label << " = [ \n\t"; @@ -1132,10 +1050,8 @@ class GraphColorDistance2 { lno_t _vertexListLength; // lno_t _chunkSize; // - functorGreedyColorVB(lno_t nr_, lno_t nc_, rowmap_t xadj_, entries_t adj_, - rowmap_t t_xadj_, entries_t t_adj_, - color_view_type colors, lno_view_t vertexList, - lno_t vertexListLength) + functorGreedyColorVB(lno_t nr_, lno_t nc_, rowmap_t xadj_, entries_t adj_, rowmap_t t_xadj_, entries_t t_adj_, + color_view_type colors, lno_view_t vertexList, lno_t vertexListLength) : nr(nr_), nc(nc_), _idx(xadj_), @@ -1173,15 +1089,13 @@ class GraphColorDistance2 { // but in distance-2 we'd need the total vertices at distance-2 which // we don't easily have aprioi. This could be as big as all the // vertices in the graph if diameter(G)=2... - for (color_type offset = 1; offset <= nr; - offset += VB_D2_COLORING_FORBIDDEN_SIZE) { + for (color_type offset = 1; offset <= nr; offset += VB_D2_COLORING_FORBIDDEN_SIZE) { // initialize for (int i = 0; i < VB_D2_COLORING_FORBIDDEN_SIZE; i++) { forbidden[i] = false; } // Check neighbors, fill forbidden array. - for (size_type vid_adj = vid_adj_begin; vid_adj < vid_adj_end; - vid_adj++) { + for (size_type vid_adj = vid_adj_begin; vid_adj < vid_adj_end; vid_adj++) { const lno_t vid_d1 = _adj(vid_adj); if (vid_d1 < nc) { if (!doing_bipartite) // note: compile-time branch (template @@ -1189,23 +1103,20 @@ class GraphColorDistance2 { { if (vid_d1 != vid) { const color_type c = _colors(vid_d1); - if ((c >= offset) && - (c - offset < VB_D2_COLORING_FORBIDDEN_SIZE)) { + if ((c >= offset) && (c - offset < VB_D2_COLORING_FORBIDDEN_SIZE)) { forbidden[c - offset] = true; } } } const size_type vid_d1_adj_begin = _t_idx(vid_d1); const size_type vid_d1_adj_end = _t_idx(vid_d1 + 1); - for (size_type vid_d1_adj = vid_d1_adj_begin; - vid_d1_adj < vid_d1_adj_end; vid_d1_adj++) { + for (size_type vid_d1_adj = vid_d1_adj_begin; vid_d1_adj < vid_d1_adj_end; vid_d1_adj++) { const lno_t vid_d2 = _t_adj(vid_d1_adj); // Skip distance-2-self-loops if (vid_d2 != vid && vid_d2 < nr) { const color_type c = _colors(vid_d2); - if ((c >= offset) && - (c - offset < VB_D2_COLORING_FORBIDDEN_SIZE)) { + if ((c >= offset) && (c - offset < VB_D2_COLORING_FORBIDDEN_SIZE)) { forbidden[c - offset] = true; } } @@ -1240,10 +1151,8 @@ class GraphColorDistance2 { lno_view_t _vertexList; // lno_t _vertexListLength; // - functorGreedyColorVB_BIT(lno_t nr_, lno_t nc_, rowmap_t xadj_, - entries_t adj_, rowmap_t t_xadj_, entries_t t_adj_, - color_view_type colors, lno_view_t vertexList, - lno_t vertexListLength) + functorGreedyColorVB_BIT(lno_t nr_, lno_t nc_, rowmap_t xadj_, entries_t adj_, rowmap_t t_xadj_, entries_t t_adj_, + color_view_type colors, lno_view_t vertexList, lno_t vertexListLength) : nr(nr_), nc(nc_), _idx(xadj_), @@ -1270,8 +1179,7 @@ class GraphColorDistance2 { const size_type vid_adj_begin = _idx(vid); const size_type vid_adj_end = _idx(vid + 1); - for (color_type offset = 1; - offset <= (nr + VBBIT_D2_COLORING_FORBIDDEN_SIZE); + for (color_type offset = 1; offset <= (nr + VBBIT_D2_COLORING_FORBIDDEN_SIZE); offset += VBBIT_D2_COLORING_FORBIDDEN_SIZE) { // Forbidden colors // - single long int for forbidden colors @@ -1282,8 +1190,7 @@ class GraphColorDistance2 { bool break_out = false; // Loop over distance-1 neighbors of vid - for (size_type vid_adj = vid_adj_begin; - !break_out && vid_adj < vid_adj_end; ++vid_adj) { + for (size_type vid_adj = vid_adj_begin; !break_out && vid_adj < vid_adj_end; ++vid_adj) { const lno_t vid_d1 = _adj(vid_adj); if (vid_d1 < nc) { if (!doing_bipartite) // note: compile-time branch (template @@ -1293,8 +1200,7 @@ class GraphColorDistance2 { if (vid_d1 != vid) { const color_type color = _colors(vid_d1); const color_type color_offset = color - offset; - if (color && - color_offset <= VBBIT_D2_COLORING_FORBIDDEN_SIZE) { + if (color && color_offset <= VBBIT_D2_COLORING_FORBIDDEN_SIZE) { // if it is in the current range, then add the color to the // banned colors if (color > offset) { @@ -1313,8 +1219,7 @@ class GraphColorDistance2 { const size_type vid_d1_adj_end = _t_idx(vid_d1 + 1); // Loop over distance-2 neighbors of vid - for (size_type vid_d1_adj = vid_d1_adj_begin; - !break_out && vid_d1_adj < vid_d1_adj_end; ++vid_d1_adj) { + for (size_type vid_d1_adj = vid_d1_adj_begin; !break_out && vid_d1_adj < vid_d1_adj_end; ++vid_d1_adj) { const lno_t vid_d2 = _t_adj(vid_d1_adj); // Ignore Distance-2 Self Loops @@ -1324,8 +1229,7 @@ class GraphColorDistance2 { // if color is within the current range, or if its color is in // a previously traversed range - if (offset <= color && - color_offset < VBBIT_D2_COLORING_FORBIDDEN_SIZE) { + if (offset <= color && color_offset < VBBIT_D2_COLORING_FORBIDDEN_SIZE) { // if it is in the current range, then add the color to the // banned colors forbidden |= (bit_64_forbidden_type(1) << color_offset); @@ -1343,9 +1247,8 @@ class GraphColorDistance2 { // check if an available color exists. if (~forbidden) { - bit_64_forbidden_type color_offset = - KokkosKernels::Impl::least_set_bit(~forbidden) - 1; - _colors(vid) = offset + color_offset; + bit_64_forbidden_type color_offset = KokkosKernels::Impl::least_set_bit(~forbidden) - 1; + _colors(vid) = offset + color_offset; return; } } // for offset <= (nr + VBBIT_D2_COLORING_FORBIDDEN_SIZE) @@ -1366,16 +1269,9 @@ class GraphColorDistance2 { entries_t _t_adj; // transpose vertex adjacency list (NOT modified) color_view_type _colors; // vertex colors - functorGreedyColorVB_BIT_EF(lno_t nr_, lno_t nc_, rowmap_t xadj_, - lno_view_t adj_, rowmap_t t_xadj_, + functorGreedyColorVB_BIT_EF(lno_t nr_, lno_t nc_, rowmap_t xadj_, lno_view_t adj_, rowmap_t t_xadj_, entries_t t_adj_, color_view_type colors) - : _nr(nr_), - _nc(nc_), - _idx(xadj_), - _adj(adj_), - _t_idx(t_xadj_), - _t_adj(t_adj_), - _colors(colors) {} + : _nr(nr_), _nc(nc_), _idx(xadj_), _adj(adj_), _t_idx(t_xadj_), _t_adj(t_adj_), _colors(colors) {} // Color vertex i with smallest available color. // @@ -1394,8 +1290,7 @@ class GraphColorDistance2 { size_type vid_adj_end = _idx(vid + 1); bool foundColor = false; - for (color_type offset = 0; - !foundColor && offset <= (_nr + VBBIT_D2_COLORING_FORBIDDEN_SIZE); + for (color_type offset = 0; !foundColor && offset <= (_nr + VBBIT_D2_COLORING_FORBIDDEN_SIZE); offset += VBBIT_D2_COLORING_FORBIDDEN_SIZE) { // Forbidden colors // - single long int for forbidden colors @@ -1406,8 +1301,7 @@ class GraphColorDistance2 { bool offset_colors_full = false; // Loop over distance-1 neighbors of vid - for (size_type vid_adj = vid_adj_begin; - !offset_colors_full && vid_adj < vid_adj_end; ++vid_adj) { + for (size_type vid_adj = vid_adj_begin; !offset_colors_full && vid_adj < vid_adj_end; ++vid_adj) { const lno_t vid_d1 = _adj(vid_adj); if (vid_d1 < _nc) { if (!doing_bipartite) // note: compile-time branch (template @@ -1419,21 +1313,20 @@ class GraphColorDistance2 { color_type color_offset = color - offset; // if color is within the current range, or if its color is in // a previously traversed range - if (color && offset < color && - color_offset <= VBBIT_D2_COLORING_FORBIDDEN_SIZE) { + if (color && offset < color && color_offset <= VBBIT_D2_COLORING_FORBIDDEN_SIZE) { // if it is in the current range, then add the color to the // banned colors convert color to bit representation bit_64_forbidden_type ban_color_bit = 1; - ban_color_bit = ban_color_bit << (color_offset - 1); + ban_color_bit = ban_color_bit << (color_offset - 1); // add it to forbidden colors forbidden = forbidden | ban_color_bit; } } } - size_type vid_d1_adj_begin = _t_idx(vid_d1); - const size_type vid_d1_adj_end = _t_idx(vid_d1 + 1); - const size_type degree_vid_d1 = vid_d1_adj_end - vid_d1_adj_begin; + size_type vid_d1_adj_begin = _t_idx(vid_d1); + const size_type vid_d1_adj_end = _t_idx(vid_d1 + 1); + const size_type degree_vid_d1 = vid_d1_adj_end - vid_d1_adj_begin; size_type num_vid_d2_colored_in_range = 0; // Store the maximum color value found in the vertices adjacent to @@ -1441,26 +1334,22 @@ class GraphColorDistance2 { color_type max_color_adj_to_d1 = 0; // Loop over distance-2 neighbors of vid - for (size_type vid_d1_adj = vid_d1_adj_begin; - !offset_colors_full && vid_d1_adj < vid_d1_adj_end; + for (size_type vid_d1_adj = vid_d1_adj_begin; !offset_colors_full && vid_d1_adj < vid_d1_adj_end; ++vid_d1_adj) { const lno_t vid_d2 = _t_adj(vid_d1_adj); // Ignore Distance-2 Self Loops if (vid_d2 != vid && vid_d2 < _nr) { - color_type color = _colors(vid_d2); - color_type color_offset = - color - offset; // color_offset < 0 means color is from a - // previous offset. + color_type color = _colors(vid_d2); + color_type color_offset = color - offset; // color_offset < 0 means color is from a + // previous offset. // Update maximum color adjacent to vid_d1 found so far. - max_color_adj_to_d1 = - color > max_color_adj_to_d1 ? color : max_color_adj_to_d1; + max_color_adj_to_d1 = color > max_color_adj_to_d1 ? color : max_color_adj_to_d1; // if color is within the current range, or if its color is in // a previously traversed range - if (color && - color_offset <= VBBIT_D2_COLORING_FORBIDDEN_SIZE) { + if (color && color_offset <= VBBIT_D2_COLORING_FORBIDDEN_SIZE) { num_vid_d2_colored_in_range++; // if it is in the current range, then add the color to the @@ -1543,10 +1432,8 @@ class GraphColorDistance2 { lno_view_t _recolorList; single_lno_view_t _recolorListLength; - functorFindConflicts_Atomic(lno_t nr_, lno_t nc_, rowmap_t xadj_, - entries_t adj_, rowmap_t t_xadj_, - entries_t t_adj_, color_view_type colors, - lno_view_t vertexList, lno_view_t recolorList, + functorFindConflicts_Atomic(lno_t nr_, lno_t nc_, rowmap_t xadj_, entries_t adj_, rowmap_t t_xadj_, + entries_t t_adj_, color_view_type colors, lno_view_t vertexList, lno_view_t recolorList, single_lno_view_t recolorListLength) : nr(nr_), nc(nc_), @@ -1566,8 +1453,7 @@ class GraphColorDistance2 { const size_type vid_d1_adj_begin = _idx(vid); const size_type vid_d1_adj_end = _idx(vid + 1); // If vid is a valid column (vid < nc), check for column->vert conflicts - for (size_type vid_d1_adj = vid_d1_adj_begin; vid_d1_adj < vid_d1_adj_end; - vid_d1_adj++) { + for (size_type vid_d1_adj = vid_d1_adj_begin; vid_d1_adj < vid_d1_adj_end; vid_d1_adj++) { lno_t vid_d1 = _adj(vid_d1_adj); if (vid_d1 < nc) { if (!doing_bipartite) // note: compile-time branch (template param) @@ -1576,8 +1462,7 @@ class GraphColorDistance2 { if (vid_d1 != vid && _colors(vid_d1) == my_color) { _colors(vid) = 0; // uncolor vertex // Atomically add vertex to recolorList - const lno_t k = - Kokkos::atomic_fetch_add(&_recolorListLength(), lno_t(1)); + const lno_t k = Kokkos::atomic_fetch_add(&_recolorListLength(), lno_t(1)); _recolorList(k) = vid; numConflicts++; return; @@ -1585,16 +1470,14 @@ class GraphColorDistance2 { } const size_type d2_adj_begin = _t_idx(vid_d1); const size_type d2_adj_end = _t_idx(vid_d1 + 1); - for (size_type vid_d2_adj = d2_adj_begin; vid_d2_adj < d2_adj_end; - vid_d2_adj++) { + for (size_type vid_d2_adj = d2_adj_begin; vid_d2_adj < d2_adj_end; vid_d2_adj++) { const lno_t vid_d2 = _t_adj(vid_d2_adj); if (vid != vid_d2 && vid_d2 < nr) { if (_colors(vid_d2) == my_color) { _colors(vid) = 0; // uncolor vertex // Atomically add vertex to recolorList - const lno_t k = - Kokkos::atomic_fetch_add(&_recolorListLength(), lno_t(1)); + const lno_t k = Kokkos::atomic_fetch_add(&_recolorListLength(), lno_t(1)); _recolorList(k) = vid; numConflicts++; return; @@ -1634,8 +1517,7 @@ class GraphColorDistance2 { * @return nothing */ template -void graph_print_distance2_color_histogram(KernelHandle* handle, - bool csv = false) { +void graph_print_distance2_color_histogram(KernelHandle* handle, bool csv = false) { using lno_view_t = typename KernelHandle::nnz_lno_temp_work_view_t; using lno_t = typename KernelHandle::nnz_lno_t; using execution_space = typename KernelHandle::HandleExecSpace; @@ -1647,11 +1529,8 @@ void graph_print_distance2_color_histogram(KernelHandle* handle, color_view_t colors = gch_d2->get_vertex_colors(); lno_t num_colors = gch_d2->get_num_colors(); lno_view_t histogram("histogram", num_colors + 1); - KokkosKernels::Impl::kk_get_histogram(colors.extent(0), - colors, histogram); - auto h_histogram = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), histogram); + KokkosKernels::Impl::kk_get_histogram(colors.extent(0), colors, histogram); + auto h_histogram = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), histogram); // note: both modes ignore color 0 in output, since we assume the coloring is // valid if (csv) { @@ -1661,8 +1540,7 @@ void graph_print_distance2_color_histogram(KernelHandle* handle, } std::cout << h_histogram(i); } else { - auto histogram_slice = Kokkos::subview( - histogram, std::make_pair((size_t)1, histogram.extent(0))); + auto histogram_slice = Kokkos::subview(histogram, std::make_pair((size_t)1, histogram.extent(0))); std::cout << "Distance-2 Color Histogram (1..N): " << std::endl; KokkosKernels::Impl::kk_print_1Dview(histogram_slice); std::cout << std::endl; diff --git a/packages/kokkos-kernels/graph/impl/KokkosGraph_Distance2MIS_impl.hpp b/packages/kokkos-kernels/graph/impl/KokkosGraph_Distance2MIS_impl.hpp index a359956a23ee..e39e1e7ad3e6 100644 --- a/packages/kokkos-kernels/graph/impl/KokkosGraph_Distance2MIS_impl.hpp +++ b/packages/kokkos-kernels/graph/impl/KokkosGraph_Distance2MIS_impl.hpp @@ -26,8 +26,7 @@ namespace KokkosGraph { namespace Impl { -template +template struct D2_MIS_RandomPriority { using exec_space = typename device_t::execution_space; using mem_space = typename device_t::memory_space; @@ -66,17 +65,14 @@ struct D2_MIS_RandomPriority { // adjacent to the column. // This counts up monotonically as vertices are eliminated (given status // OUT_SET) - rowStatus = status_view_t( - Kokkos::ViewAllocateWithoutInitializing("RowStatus"), numVerts); - colStatus = status_view_t( - Kokkos::ViewAllocateWithoutInitializing("ColStatus"), numVerts); + rowStatus = status_view_t(Kokkos::ViewAllocateWithoutInitializing("RowStatus"), numVerts); + colStatus = status_view_t(Kokkos::ViewAllocateWithoutInitializing("ColStatus"), numVerts); allWorklists = Kokkos::View( Kokkos::ViewAllocateWithoutInitializing("AllWorklists"), numVerts, 3); } struct RefreshRowStatus { - RefreshRowStatus(const status_view_t& rowStatus_, - const worklist_t& worklist_, lno_t nvBits_, int round) + RefreshRowStatus(const status_view_t& rowStatus_, const worklist_t& worklist_, lno_t nvBits_, int round) : rowStatus(rowStatus_), worklist(worklist_), nvBits(nvBits_) { hashedRound = KokkosKernels::Impl::xorshiftHash(round); } @@ -85,8 +81,8 @@ struct D2_MIS_RandomPriority { lno_t i = worklist(w); // Combine vertex and round to get some pseudorandom priority bits that // change each round - status_t priority = KokkosKernels::Impl::xorshiftHash( - KokkosKernels::Impl::xorshiftHash(i) ^ hashedRound); + status_t priority = + KokkosKernels::Impl::xorshiftHash(KokkosKernels::Impl::xorshiftHash(i) ^ hashedRound); // Generate unique status per row, with IN_SET < status < OUT_SET, status_t newStatus = (status_t)(i + 1) | (priority << nvBits); if (newStatus == OUT_SET) newStatus--; @@ -100,10 +96,8 @@ struct D2_MIS_RandomPriority { }; struct RefreshColStatus { - RefreshColStatus(const status_view_t& colStatus_, - const worklist_t& worklist_, - const status_view_t& rowStatus_, const rowmap_t& rowmap_, - const entries_t& entries_, lno_t nv_, lno_t worklistLen_) + RefreshColStatus(const status_view_t& colStatus_, const worklist_t& worklist_, const status_view_t& rowStatus_, + const rowmap_t& rowmap_, const entries_t& entries_, lno_t nv_, lno_t worklistLen_) : colStatus(colStatus_), worklist(worklist_), rowStatus(rowStatus_), @@ -167,10 +161,8 @@ struct D2_MIS_RandomPriority { }; struct DecideSetFunctor { - DecideSetFunctor(const status_view_t& rowStatus_, - const status_view_t& colStatus_, const rowmap_t& rowmap_, - const entries_t& entries_, lno_t nv_, - const worklist_t& worklist_, lno_t worklistLen_) + DecideSetFunctor(const status_view_t& rowStatus_, const status_view_t& colStatus_, const rowmap_t& rowmap_, + const entries_t& entries_, lno_t nv_, const worklist_t& worklist_, lno_t worklistLen_) : rowStatus(rowStatus_), colStatus(colStatus_), rowmap(rowmap_), @@ -275,8 +267,7 @@ struct D2_MIS_RandomPriority { struct CompactInSet { CompactInSet(const status_view_t& rowStatus_, const lno_view_t& setList_) : rowStatus(rowStatus_), setList(setList_) {} - KOKKOS_INLINE_FUNCTION void operator()(lno_t i, lno_t& lNumInSet, - bool finalPass) const { + KOKKOS_INLINE_FUNCTION void operator()(lno_t i, lno_t& lNumInSet, bool finalPass) const { if (rowStatus(i) == IN_SET) { if (finalPass) setList(lNumInSet) = i; lNumInSet++; @@ -287,11 +278,9 @@ struct D2_MIS_RandomPriority { }; struct MaskedWorklist { - MaskedWorklist(const lno_view_t& mask_, const worklist_t& worklist_) - : mask(mask_), worklist(worklist_) {} + MaskedWorklist(const lno_view_t& mask_, const worklist_t& worklist_) : mask(mask_), worklist(worklist_) {} - KOKKOS_INLINE_FUNCTION void operator()(lno_t i, lno_t& lNumInList, - bool finalPass) const { + KOKKOS_INLINE_FUNCTION void operator()(lno_t i, lno_t& lNumInList, bool finalPass) const { if (mask(i) < 0) { if (finalPass) worklist(lNumInList) = i; lNumInList++; @@ -302,12 +291,10 @@ struct D2_MIS_RandomPriority { }; struct CompactWorklistFunctor { - CompactWorklistFunctor(const worklist_t& src_, const worklist_t& dst_, - const status_view_t& status_) + CompactWorklistFunctor(const worklist_t& src_, const worklist_t& dst_, const status_view_t& status_) : src(src_), dst(dst_), status(status_) {} - KOKKOS_INLINE_FUNCTION void operator()(lno_t w, lno_t& lNumInSet, - bool finalPass) const { + KOKKOS_INLINE_FUNCTION void operator()(lno_t w, lno_t& lNumInSet, bool finalPass) const { lno_t i = src(w); status_t s = status(i); if (s != IN_SET && s != OUT_SET) { @@ -329,15 +316,12 @@ struct D2_MIS_RandomPriority { KokkosKernels::Impl::sequential_fill(rowWorklist); KokkosKernels::Impl::sequential_fill(colWorklist); worklist_t thirdWorklist = Kokkos::subview(allWorklists, Kokkos::ALL(), 2); - auto execSpaceEnum = - KokkosKernels::Impl::kk_get_exec_space_type(); - bool useTeams = KokkosKernels::Impl::kk_is_gpu_exec_space() && - (entries.extent(0) / numVerts >= 16); - int vectorLength = KokkosKernels::Impl::kk_get_suggested_vector_size( - numVerts, entries.extent(0), execSpaceEnum); - int round = 0; - lno_t rowWorkLen = numVerts; - lno_t colWorkLen = numVerts; + auto execSpaceEnum = KokkosKernels::Impl::kk_get_exec_space_type(); + bool useTeams = KokkosKernels::Impl::kk_is_gpu_exec_space() && (entries.extent(0) / numVerts >= 16); + int vectorLength = KokkosKernels::Impl::kk_get_suggested_vector_size(numVerts, entries.extent(0), execSpaceEnum); + int round = 0; + lno_t rowWorkLen = numVerts; + lno_t colWorkLen = numVerts; int refreshColTeamSize = 0; int decideSetTeamSize = 0; if (useTeams) { @@ -345,71 +329,54 @@ struct D2_MIS_RandomPriority { // Compute the recommended team size for RefreshColStatus and // DecideSetFunctor (will be constant) { - RefreshColStatus refreshCol(colStatus, colWorklist, rowStatus, rowmap, - entries, numVerts, colWorkLen); - refreshColTeamSize = - dummyPolicy.team_size_max(refreshCol, Kokkos::ParallelForTag()); + RefreshColStatus refreshCol(colStatus, colWorklist, rowStatus, rowmap, entries, numVerts, colWorkLen); + refreshColTeamSize = dummyPolicy.team_size_max(refreshCol, Kokkos::ParallelForTag()); } { - DecideSetFunctor decideSet(rowStatus, colStatus, rowmap, entries, - numVerts, rowWorklist, rowWorkLen); - decideSetTeamSize = - dummyPolicy.team_size_max(decideSet, Kokkos::ParallelForTag()); + DecideSetFunctor decideSet(rowStatus, colStatus, rowmap, entries, numVerts, rowWorklist, rowWorkLen); + decideSetTeamSize = dummyPolicy.team_size_max(decideSet, Kokkos::ParallelForTag()); } } while (true) { // Compute new row statuses - Kokkos::parallel_for( - range_pol(0, rowWorkLen), - RefreshRowStatus(rowStatus, rowWorklist, nvBits, round)); + Kokkos::parallel_for(range_pol(0, rowWorkLen), RefreshRowStatus(rowStatus, rowWorklist, nvBits, round)); // Compute new col statuses { - RefreshColStatus refreshCol(colStatus, colWorklist, rowStatus, rowmap, - entries, numVerts, colWorkLen); + RefreshColStatus refreshCol(colStatus, colWorklist, rowStatus, rowmap, entries, numVerts, colWorkLen); if (useTeams) - Kokkos::parallel_for(team_pol((colWorkLen + refreshColTeamSize - 1) / - refreshColTeamSize, - refreshColTeamSize, vectorLength), - refreshCol); + Kokkos::parallel_for( + team_pol((colWorkLen + refreshColTeamSize - 1) / refreshColTeamSize, refreshColTeamSize, vectorLength), + refreshCol); else Kokkos::parallel_for(range_pol(0, colWorkLen), refreshCol); } // Decide row statuses where enough information is available { - DecideSetFunctor decideSet(rowStatus, colStatus, rowmap, entries, - numVerts, rowWorklist, rowWorkLen); + DecideSetFunctor decideSet(rowStatus, colStatus, rowmap, entries, numVerts, rowWorklist, rowWorkLen); if (useTeams) Kokkos::parallel_for( - team_pol((rowWorkLen + decideSetTeamSize - 1) / decideSetTeamSize, - decideSetTeamSize, vectorLength), + team_pol((rowWorkLen + decideSetTeamSize - 1) / decideSetTeamSize, decideSetTeamSize, vectorLength), decideSet); else Kokkos::parallel_for(range_pol(0, rowWorkLen), decideSet); } round++; // Compact row worklist - Kokkos::parallel_scan( - range_pol(0, rowWorkLen), - CompactWorklistFunctor(rowWorklist, thirdWorklist, rowStatus), - rowWorkLen); + Kokkos::parallel_scan(range_pol(0, rowWorkLen), CompactWorklistFunctor(rowWorklist, thirdWorklist, rowStatus), + rowWorkLen); if (rowWorkLen == 0) break; std::swap(rowWorklist, thirdWorklist); // Compact col worklist - Kokkos::parallel_scan( - range_pol(0, colWorkLen), - CompactWorklistFunctor(colWorklist, thirdWorklist, colStatus), - colWorkLen); + Kokkos::parallel_scan(range_pol(0, colWorkLen), CompactWorklistFunctor(colWorklist, thirdWorklist, colStatus), + colWorkLen); std::swap(colWorklist, thirdWorklist); } // now that every vertex has been decided IN_SET/OUT_SET, // build a compact list of the vertices which are IN_SET. lno_t numInSet = 0; - Kokkos::parallel_reduce(range_pol(0, numVerts), CountInSet(rowStatus), - numInSet); - lno_view_t setList(Kokkos::ViewAllocateWithoutInitializing("D2MIS"), - numInSet); - Kokkos::parallel_scan(range_pol(0, numVerts), - CompactInSet(rowStatus, setList)); + Kokkos::parallel_reduce(range_pol(0, numVerts), CountInSet(rowStatus), numInSet); + lno_view_t setList(Kokkos::ViewAllocateWithoutInitializing("D2MIS"), numInSet); + Kokkos::parallel_scan(range_pol(0, numVerts), CompactInSet(rowStatus, setList)); return setList; } @@ -422,20 +389,16 @@ struct D2_MIS_RandomPriority { lno_t rowWorkLen = numVerts; lno_t colWorkLen = numVerts; // Row worklist: initially only the non-masked vertices - Kokkos::parallel_scan(range_pol(0, numVerts), - MaskedWorklist(mask, rowWorklist), rowWorkLen); + Kokkos::parallel_scan(range_pol(0, numVerts), MaskedWorklist(mask, rowWorklist), rowWorkLen); KokkosKernels::Impl::sequential_fill(colWorklist); // Need to fill rowStatus with OUT_SET initially so that vertices not in the // worklist don't affect algorithm Kokkos::deep_copy(rowStatus, ~(status_t(0))); worklist_t thirdWorklist = Kokkos::subview(allWorklists, Kokkos::ALL(), 2); - auto execSpaceEnum = - KokkosKernels::Impl::kk_get_exec_space_type(); - bool useTeams = KokkosKernels::Impl::kk_is_gpu_exec_space() && - (entries.extent(0) / numVerts >= 16); - int vectorLength = KokkosKernels::Impl::kk_get_suggested_vector_size( - numVerts, entries.extent(0), execSpaceEnum); - int round = 0; + auto execSpaceEnum = KokkosKernels::Impl::kk_get_exec_space_type(); + bool useTeams = KokkosKernels::Impl::kk_is_gpu_exec_space() && (entries.extent(0) / numVerts >= 16); + int vectorLength = KokkosKernels::Impl::kk_get_suggested_vector_size(numVerts, entries.extent(0), execSpaceEnum); + int round = 0; int refreshColTeamSize = 0; int decideSetTeamSize = 0; if (useTeams) { @@ -443,71 +406,54 @@ struct D2_MIS_RandomPriority { // Compute the recommended team size for RefreshColStatus and // DecideSetFunctor (will be constant) { - RefreshColStatus refreshCol(colStatus, colWorklist, rowStatus, rowmap, - entries, numVerts, colWorkLen); - refreshColTeamSize = - dummyPolicy.team_size_max(refreshCol, Kokkos::ParallelForTag()); + RefreshColStatus refreshCol(colStatus, colWorklist, rowStatus, rowmap, entries, numVerts, colWorkLen); + refreshColTeamSize = dummyPolicy.team_size_max(refreshCol, Kokkos::ParallelForTag()); } { - DecideSetFunctor decideSet(rowStatus, colStatus, rowmap, entries, - numVerts, rowWorklist, rowWorkLen); - decideSetTeamSize = - dummyPolicy.team_size_max(decideSet, Kokkos::ParallelForTag()); + DecideSetFunctor decideSet(rowStatus, colStatus, rowmap, entries, numVerts, rowWorklist, rowWorkLen); + decideSetTeamSize = dummyPolicy.team_size_max(decideSet, Kokkos::ParallelForTag()); } } while (true) { // Compute new row statuses - Kokkos::parallel_for( - range_pol(0, rowWorkLen), - RefreshRowStatus(rowStatus, rowWorklist, nvBits, round)); + Kokkos::parallel_for(range_pol(0, rowWorkLen), RefreshRowStatus(rowStatus, rowWorklist, nvBits, round)); // Compute new col statuses { - RefreshColStatus refreshCol(colStatus, colWorklist, rowStatus, rowmap, - entries, numVerts, colWorkLen); + RefreshColStatus refreshCol(colStatus, colWorklist, rowStatus, rowmap, entries, numVerts, colWorkLen); if (useTeams) - Kokkos::parallel_for(team_pol((colWorkLen + refreshColTeamSize - 1) / - refreshColTeamSize, - refreshColTeamSize, vectorLength), - refreshCol); + Kokkos::parallel_for( + team_pol((colWorkLen + refreshColTeamSize - 1) / refreshColTeamSize, refreshColTeamSize, vectorLength), + refreshCol); else Kokkos::parallel_for(range_pol(0, colWorkLen), refreshCol); } // Decide row statuses where enough information is available { - DecideSetFunctor decideSet(rowStatus, colStatus, rowmap, entries, - numVerts, rowWorklist, rowWorkLen); + DecideSetFunctor decideSet(rowStatus, colStatus, rowmap, entries, numVerts, rowWorklist, rowWorkLen); if (useTeams) Kokkos::parallel_for( - team_pol((rowWorkLen + decideSetTeamSize - 1) / decideSetTeamSize, - decideSetTeamSize, vectorLength), + team_pol((rowWorkLen + decideSetTeamSize - 1) / decideSetTeamSize, decideSetTeamSize, vectorLength), decideSet); else Kokkos::parallel_for(range_pol(0, rowWorkLen), decideSet); } round++; // Compact row worklist - Kokkos::parallel_scan( - range_pol(0, rowWorkLen), - CompactWorklistFunctor(rowWorklist, thirdWorklist, rowStatus), - rowWorkLen); + Kokkos::parallel_scan(range_pol(0, rowWorkLen), CompactWorklistFunctor(rowWorklist, thirdWorklist, rowStatus), + rowWorkLen); if (rowWorkLen == 0) break; std::swap(rowWorklist, thirdWorklist); // Compact col worklist - Kokkos::parallel_scan( - range_pol(0, colWorkLen), - CompactWorklistFunctor(colWorklist, thirdWorklist, colStatus), - colWorkLen); + Kokkos::parallel_scan(range_pol(0, colWorkLen), CompactWorklistFunctor(colWorklist, thirdWorklist, colStatus), + colWorkLen); std::swap(colWorklist, thirdWorklist); } // now that every vertex has been decided IN_SET/OUT_SET, // build a compact list of the vertices which are IN_SET. lno_t numInSet = 0; - Kokkos::parallel_reduce(range_pol(0, numVerts), CountInSet(rowStatus), - numInSet); - lno_view_t setList(Kokkos::ViewAllocateWithoutInitializing("D2MIS"), - numInSet); - Kokkos::parallel_scan(range_pol(0, numVerts), - CompactInSet(rowStatus, setList)); + Kokkos::parallel_reduce(range_pol(0, numVerts), CountInSet(rowStatus), numInSet); + lno_view_t setList(Kokkos::ViewAllocateWithoutInitializing("D2MIS"), numInSet); + Kokkos::parallel_scan(range_pol(0, numVerts), CompactInSet(rowStatus, setList)); return setList; } @@ -523,8 +469,7 @@ struct D2_MIS_RandomPriority { int nvBits; }; -template +template struct D2_MIS_FixedPriority { using exec_space = typename device_t::execution_space; using mem_space = typename device_t::memory_space; @@ -551,10 +496,8 @@ struct D2_MIS_FixedPriority { entries(entries_), numVerts(rowmap.extent(0) - 1), colUpdateBitset(numVerts), - worklist1(Kokkos::view_alloc(Kokkos::WithoutInitializing, "WL1"), - numVerts), - worklist2(Kokkos::view_alloc(Kokkos::WithoutInitializing, "WL2"), - numVerts) { + worklist1(Kokkos::view_alloc(Kokkos::WithoutInitializing, "WL1"), numVerts), + worklist2(Kokkos::view_alloc(Kokkos::WithoutInitializing, "WL2"), numVerts) { status_t i = numVerts + 1; nvBits = 0; while (i) { @@ -566,25 +509,19 @@ struct D2_MIS_FixedPriority { // adjacent to the column. // This counts up monotonically as vertices are eliminated (given status // OUT_SET) - rowStatus = status_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "RowStatus"), numVerts); - colStatus = status_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "ColStatus"), numVerts); - KokkosSparse::Impl::graph_min_max_degree( - rowmap, minDegree, maxDegree); + rowStatus = status_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "RowStatus"), numVerts); + colStatus = status_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "ColStatus"), numVerts); + KokkosSparse::Impl::graph_min_max_degree(rowmap, minDegree, maxDegree); // Compute row statuses Kokkos::parallel_for(range_pol(0, numVerts), - InitRowStatus(rowStatus, rowmap, numVerts, nvBits, - minDegree, maxDegree)); + InitRowStatus(rowStatus, rowmap, numVerts, nvBits, minDegree, maxDegree)); // Compute col statuses - Kokkos::parallel_for( - range_pol(0, numVerts), - InitColStatus(colStatus, rowStatus, rowmap, entries, numVerts)); + Kokkos::parallel_for(range_pol(0, numVerts), InitColStatus(colStatus, rowStatus, rowmap, entries, numVerts)); } struct InitRowStatus { - InitRowStatus(const status_view_t& rowStatus_, const rowmap_t& rowmap_, - lno_t nv_, lno_t nvBits_, lno_t minDeg_, lno_t maxDeg_) + InitRowStatus(const status_view_t& rowStatus_, const rowmap_t& rowmap_, lno_t nv_, lno_t nvBits_, lno_t minDeg_, + lno_t maxDeg_) : rowStatus(rowStatus_), rowmap(rowmap_), nv(nv_), @@ -605,8 +542,7 @@ struct D2_MIS_FixedPriority { status_t maxDegRange = (((status_t)1) << degBits) - 2; lno_t deg = rowmap(i + 1) - rowmap(i); float degScore = (float)(deg - minDeg) * invDegRange; - rowStatus(i) = - (status_t)(i + 1) + (((status_t)(degScore * maxDegRange)) << nvBits); + rowStatus(i) = (status_t)(i + 1) + (((status_t)(degScore * maxDegRange)) << nvBits); } status_view_t rowStatus; @@ -619,14 +555,9 @@ struct D2_MIS_FixedPriority { }; struct InitColStatus { - InitColStatus(const status_view_t& colStatus_, - const status_view_t& rowStatus_, const rowmap_t& rowmap_, + InitColStatus(const status_view_t& colStatus_, const status_view_t& rowStatus_, const rowmap_t& rowmap_, const entries_t& entries_, lno_t nv_) - : colStatus(colStatus_), - rowStatus(rowStatus_), - rowmap(rowmap_), - entries(entries_), - nv(nv_) {} + : colStatus(colStatus_), rowStatus(rowStatus_), rowmap(rowmap_), entries(entries_), nv(nv_) {} KOKKOS_INLINE_FUNCTION void operator()(lno_t i) const { // iterate over {i} union the neighbors of i, to find @@ -652,10 +583,8 @@ struct D2_MIS_FixedPriority { }; struct IterateStatusFunctor { - IterateStatusFunctor(const status_view_t& rowStatus_, - const status_view_t& colStatus_, - const rowmap_t& rowmap_, const entries_t& entries_, - lno_t nv_, const lno_view_t& worklist_, + IterateStatusFunctor(const status_view_t& rowStatus_, const status_view_t& colStatus_, const rowmap_t& rowmap_, + const entries_t& entries_, lno_t nv_, const lno_view_t& worklist_, const bitset_t& colUpdateBitset_) : rowStatus(rowStatus_), colStatus(colStatus_), @@ -715,15 +644,11 @@ struct D2_MIS_FixedPriority { }; struct UpdateWorklistFunctor { - UpdateWorklistFunctor(const status_view_t& rowStatus_, - const lno_view_t& oldWorklist_, + UpdateWorklistFunctor(const status_view_t& rowStatus_, const lno_view_t& oldWorklist_, const lno_view_t& newWorklist_) - : rowStatus(rowStatus_), - oldWorklist(oldWorklist_), - newWorklist(newWorklist_) {} + : rowStatus(rowStatus_), oldWorklist(oldWorklist_), newWorklist(newWorklist_) {} - KOKKOS_INLINE_FUNCTION void operator()(lno_t w, lno_t& lcount, - bool finalPass) const { + KOKKOS_INLINE_FUNCTION void operator()(lno_t w, lno_t& lcount, bool finalPass) const { // processing row i lno_t i = oldWorklist(w); // Bit i will be set when it's decided IN_SET/OUT_SET. @@ -741,12 +666,10 @@ struct D2_MIS_FixedPriority { }; struct ColRefreshWorklist { - ColRefreshWorklist(const bitset_t& colUpdateBitset_, - const lno_view_t& refreshList_) + ColRefreshWorklist(const bitset_t& colUpdateBitset_, const lno_view_t& refreshList_) : colUpdateBitset(colUpdateBitset_), refreshList(refreshList_) {} - KOKKOS_INLINE_FUNCTION void operator()(lno_t i, lno_t& lindex, - bool finalPass) const { + KOKKOS_INLINE_FUNCTION void operator()(lno_t i, lno_t& lindex, bool finalPass) const { if (colUpdateBitset.test(i)) { if (finalPass) { refreshList(lindex) = i; @@ -761,10 +684,8 @@ struct D2_MIS_FixedPriority { }; struct RefreshColStatus { - RefreshColStatus(const lno_view_t& worklist_, - const status_view_t& rowStatus_, - const status_view_t& colStatus_, const rowmap_t& rowmap_, - const entries_t& entries_, lno_t nv_) + RefreshColStatus(const lno_view_t& worklist_, const status_view_t& rowStatus_, const status_view_t& colStatus_, + const rowmap_t& rowmap_, const entries_t& entries_, lno_t nv_) : worklist(worklist_), rowStatus(rowStatus_), colStatus(colStatus_), @@ -812,8 +733,7 @@ struct D2_MIS_FixedPriority { struct CompactInSet { CompactInSet(const status_view_t& rowStatus_, const lno_view_t& setList_) : rowStatus(rowStatus_), setList(setList_) {} - KOKKOS_INLINE_FUNCTION void operator()(lno_t i, lno_t& lNumInSet, - bool finalPass) const { + KOKKOS_INLINE_FUNCTION void operator()(lno_t i, lno_t& lNumInSet, bool finalPass) const { if (rowStatus(i) == IN_SET) { if (finalPass) setList(lNumInSet) = i; lNumInSet++; @@ -825,30 +745,22 @@ struct D2_MIS_FixedPriority { lno_view_t compute() { // Initialize first worklist to 0...numVerts - Kokkos::parallel_for(range_pol(0, numVerts), - InitWorklistFunctor(worklist1)); + Kokkos::parallel_for(range_pol(0, numVerts), InitWorklistFunctor(worklist1)); lno_t workRemain = numVerts; while (workRemain) { // do another iteration - Kokkos::parallel_for( - range_pol(0, workRemain), - IterateStatusFunctor(rowStatus, colStatus, rowmap, entries, numVerts, - worklist1, colUpdateBitset)); + Kokkos::parallel_for(range_pol(0, workRemain), IterateStatusFunctor(rowStatus, colStatus, rowmap, entries, + numVerts, worklist1, colUpdateBitset)); // And refresh the column statuses using the other worklist. lno_t colsToRefresh; - Kokkos::parallel_scan(range_pol(0, numVerts), - ColRefreshWorklist(colUpdateBitset, worklist2), - colsToRefresh); + Kokkos::parallel_scan(range_pol(0, numVerts), ColRefreshWorklist(colUpdateBitset, worklist2), colsToRefresh); Kokkos::parallel_for(range_pol(0, colsToRefresh), - RefreshColStatus(worklist2, rowStatus, colStatus, - rowmap, entries, numVerts)); + RefreshColStatus(worklist2, rowStatus, colStatus, rowmap, entries, numVerts)); // then build the next worklist with a scan. Also get the length of the // next worklist. lno_t newWorkRemain = 0; - Kokkos::parallel_scan( - range_pol(0, workRemain), - UpdateWorklistFunctor(rowStatus, worklist1, worklist2), - newWorkRemain); + Kokkos::parallel_scan(range_pol(0, workRemain), UpdateWorklistFunctor(rowStatus, worklist1, worklist2), + newWorkRemain); // Finally, flip the worklists std::swap(worklist1, worklist2); workRemain = newWorkRemain; @@ -856,12 +768,9 @@ struct D2_MIS_FixedPriority { // now that every vertex has been decided IN_SET/OUT_SET, // build a compact list of the vertices which are IN_SET. lno_t numInSet = 0; - Kokkos::parallel_reduce(range_pol(0, numVerts), CountInSet(rowStatus), - numInSet); - lno_view_t setList(Kokkos::view_alloc(Kokkos::WithoutInitializing, "D2MIS"), - numInSet); - Kokkos::parallel_scan(range_pol(0, numVerts), - CompactInSet(rowStatus, setList)); + Kokkos::parallel_reduce(range_pol(0, numVerts), CountInSet(rowStatus), numInSet); + lno_view_t setList(Kokkos::view_alloc(Kokkos::WithoutInitializing, "D2MIS"), numInSet); + Kokkos::parallel_scan(range_pol(0, numVerts), CompactInSet(rowStatus, setList)); return setList; } @@ -883,8 +792,7 @@ struct D2_MIS_FixedPriority { lno_view_t worklist2; }; -template +template struct D2_MIS_Aggregation { using exec_space = typename device_t::execution_space; using mem_space = typename device_t::memory_space; @@ -904,15 +812,13 @@ struct D2_MIS_Aggregation { : rowmap(rowmap_), entries(entries_), numVerts(rowmap.extent(0) - 1), - labels(Kokkos::ViewAllocateWithoutInitializing("AggregateLabels"), - numVerts), + labels(Kokkos::ViewAllocateWithoutInitializing("AggregateLabels"), numVerts), roots("Root Status", numVerts) { Kokkos::deep_copy(labels, (lno_t)-1); } struct Phase1Functor { - Phase1Functor(lno_t numVerts__, const mis2_view& m1__, - const rowmap_t& rowmap__, const entries_t& entries__, + Phase1Functor(lno_t numVerts__, const mis2_view& m1__, const rowmap_t& rowmap__, const entries_t& entries__, const labels_t& labels__, const char_view_t& roots__) : numVerts_(numVerts__), m1_(m1__), @@ -943,21 +849,16 @@ struct D2_MIS_Aggregation { void createPrimaryAggregates() { // Compute an MIS-2 - D2_MIS_RandomPriority d2mis( - rowmap, entries); + D2_MIS_RandomPriority d2mis(rowmap, entries); mis2_view m1 = d2mis.compute(); // Construct initial aggregates using roots and all direct neighbors - Kokkos::parallel_for( - range_pol(0, m1.extent(0)), - Phase1Functor(numVerts, m1, rowmap, entries, labels, roots)); + Kokkos::parallel_for(range_pol(0, m1.extent(0)), Phase1Functor(numVerts, m1, rowmap, entries, labels, roots)); numAggs = m1.extent(0); } struct CandAggSizesFunctor { - CandAggSizesFunctor(lno_t numVerts__, const labels_t& m2__, - const rowmap_t& rowmap__, const entries_t& entries__, - const labels_t& labels__, - const labels_t& candAggSizes__) + CandAggSizesFunctor(lno_t numVerts__, const labels_t& m2__, const rowmap_t& rowmap__, const entries_t& entries__, + const labels_t& labels__, const labels_t& candAggSizes__) : numVerts_(numVerts__), m2_(m2__), rowmap_(rowmap__), @@ -988,11 +889,8 @@ struct D2_MIS_Aggregation { }; struct ChoosePhase2AggsFunctor { - ChoosePhase2AggsFunctor(lno_t numVerts__, lno_t numAggs__, - const labels_t& m2__, const rowmap_t& rowmap__, - const entries_t& entries__, - const labels_t& labels__, - const labels_t& candAggSizes__, + ChoosePhase2AggsFunctor(lno_t numVerts__, lno_t numAggs__, const labels_t& m2__, const rowmap_t& rowmap__, + const entries_t& entries__, const labels_t& labels__, const labels_t& candAggSizes__, const char_view_t& roots__) : numVerts_(numVerts__), numAggs_(numAggs__), @@ -1003,8 +901,7 @@ struct D2_MIS_Aggregation { candAggSizes_(candAggSizes__), roots_(roots__) {} - KOKKOS_INLINE_FUNCTION void operator()(lno_t i, lno_t& lid, - bool finalPass) const { + KOKKOS_INLINE_FUNCTION void operator()(lno_t i, lno_t& lid, bool finalPass) const { lno_t aggSize = candAggSizes_(i); if (aggSize < 3) return; if (finalPass) { @@ -1035,36 +932,27 @@ struct D2_MIS_Aggregation { }; void createSecondaryAggregates() { - labels_t candAggSizes( - Kokkos::ViewAllocateWithoutInitializing("Phase2 Candidate Agg Sizes"), - numVerts); + labels_t candAggSizes(Kokkos::ViewAllocateWithoutInitializing("Phase2 Candidate Agg Sizes"), numVerts); // Compute a new MIS-2 from only unaggregated nodes - D2_MIS_RandomPriority d2mis( - rowmap, entries); + D2_MIS_RandomPriority d2mis(rowmap, entries); labels_t m2 = d2mis.compute(labels); lno_t numCandRoots = m2.extent(0); // Compute the sizes of would-be aggregates. Kokkos::parallel_for(range_pol(0, numCandRoots), - CandAggSizesFunctor(numVerts, m2, rowmap, entries, - labels, candAggSizes)); + CandAggSizesFunctor(numVerts, m2, rowmap, entries, labels, candAggSizes)); // Now, filter out the candidate aggs which are big enough, and create those // aggregates. Using a scan for this assigns IDs deterministically (unlike // an atomic counter). lno_t numNewAggs = 0; - Kokkos::parallel_scan( - range_pol(0, numCandRoots), - ChoosePhase2AggsFunctor(numVerts, numAggs, m2, rowmap, entries, labels, - candAggSizes, roots), - numNewAggs); + Kokkos::parallel_scan(range_pol(0, numCandRoots), + ChoosePhase2AggsFunctor(numVerts, numAggs, m2, rowmap, entries, labels, candAggSizes, roots), + numNewAggs); numAggs += numNewAggs; } struct SizeAndConnectivityFunctor { - SizeAndConnectivityFunctor(lno_t numVerts__, const rowmap_t& rowmap__, - const entries_t& entries__, - const labels_t& labels__, - const labels_t& connectivities__, - const labels_t& aggSizes__) + SizeAndConnectivityFunctor(lno_t numVerts__, const rowmap_t& rowmap__, const entries_t& entries__, + const labels_t& labels__, const labels_t& connectivities__, const labels_t& aggSizes__) : numVerts_(numVerts__), rowmap_(rowmap__), entries_(entries__), @@ -1100,12 +988,9 @@ struct D2_MIS_Aggregation { }; struct AssignLeftoverFunctor { - AssignLeftoverFunctor(lno_t numVerts__, const rowmap_t& rowmap__, - const entries_t& entries__, const labels_t& labels__, - const labels_t& labelsOld__, - const labels_t& connectivities__, - const labels_t& aggSizes__, - const char_view_t& roots__) + AssignLeftoverFunctor(lno_t numVerts__, const rowmap_t& rowmap__, const entries_t& entries__, + const labels_t& labels__, const labels_t& labelsOld__, const labels_t& connectivities__, + const labels_t& aggSizes__, const char_view_t& roots__) : numVerts_(numVerts__), rowmap_(rowmap__), entries_(entries__), @@ -1167,8 +1052,7 @@ struct D2_MIS_Aggregation { // Priorities: adjacent to root > connect > size if (trackedRootAdj[k] > bestRootAdj || (trackedRootAdj[k] == bestRootAdj && - ((trackedConnect[k] > bestConnect) || - (trackedConnect[k] == bestConnect && s < bestSize)))) { + ((trackedConnect[k] > bestConnect) || (trackedConnect[k] == bestConnect && s < bestSize)))) { bestRootAdj = trackedRootAdj[k]; bestConnect = trackedConnect[k]; bestSize = s; @@ -1195,18 +1079,13 @@ struct D2_MIS_Aggregation { // neighboring aggregate. labels_t labelsOld("old", numVerts); Kokkos::deep_copy(labelsOld, labels); - labels_t connectivities(Kokkos::ViewAllocateWithoutInitializing("connect"), - numVerts); + labels_t connectivities(Kokkos::ViewAllocateWithoutInitializing("connect"), numVerts); labels_t aggSizes("Phase3 Agg Sizes", numAggs); - Kokkos::parallel_for( - range_pol(0, numVerts), - SizeAndConnectivityFunctor(numVerts, rowmap, entries, labels, - connectivities, aggSizes)); + Kokkos::parallel_for(range_pol(0, numVerts), + SizeAndConnectivityFunctor(numVerts, rowmap, entries, labels, connectivities, aggSizes)); // Now, join vertices to aggregates - Kokkos::parallel_for( - range_pol(0, numVerts), - AssignLeftoverFunctor(numVerts, rowmap, entries, labels, labelsOld, - connectivities, aggSizes, roots)); + Kokkos::parallel_for(range_pol(0, numVerts), AssignLeftoverFunctor(numVerts, rowmap, entries, labels, labelsOld, + connectivities, aggSizes, roots)); } // phase 2 creates new aggregates in between the initial MIS-2 neighborhoods. diff --git a/packages/kokkos-kernels/graph/impl/KokkosGraph_ExplicitCoarsening_impl.hpp b/packages/kokkos-kernels/graph/impl/KokkosGraph_ExplicitCoarsening_impl.hpp index 464880c93200..dc0e802485bd 100644 --- a/packages/kokkos-kernels/graph/impl/KokkosGraph_ExplicitCoarsening_impl.hpp +++ b/packages/kokkos-kernels/graph/impl/KokkosGraph_ExplicitCoarsening_impl.hpp @@ -20,10 +20,8 @@ namespace KokkosGraph { namespace Impl { -template +template struct ExplicitGraphCoarsening { using exec_space = typename device_t::execution_space; using range_pol = Kokkos::RangePolicy; @@ -33,29 +31,23 @@ struct ExplicitGraphCoarsening { using const_bitset_t = Kokkos::ConstBitset; struct ClusterSizeFunctor { - ClusterSizeFunctor(const ordinal_view_t& counts_, - const labels_t& vertClusters_) + ClusterSizeFunctor(const ordinal_view_t& counts_, const labels_t& vertClusters_) : counts(counts_), vertClusters(vertClusters_) {} - KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { - Kokkos::atomic_increment(&counts(vertClusters(i))); - } + KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { Kokkos::atomic_increment(&counts(vertClusters(i))); } ordinal_view_t counts; labels_t vertClusters; }; struct FillClusterVertsFunctor { - FillClusterVertsFunctor(const ordinal_view_t& clusterOffsets_, - const ordinal_view_t& clusterVerts_, - const labels_t& vertClusters_, - const ordinal_view_t& insertCounts_) + FillClusterVertsFunctor(const ordinal_view_t& clusterOffsets_, const ordinal_view_t& clusterVerts_, + const labels_t& vertClusters_, const ordinal_view_t& insertCounts_) : clusterOffsets(clusterOffsets_), clusterVerts(clusterVerts_), vertClusters(vertClusters_), insertCounts(insertCounts_) {} KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { - lno_t cluster = vertClusters(i); - lno_t offset = clusterOffsets(cluster) + - Kokkos::atomic_fetch_add(&insertCounts(cluster), 1); + lno_t cluster = vertClusters(i); + lno_t offset = clusterOffsets(cluster) + Kokkos::atomic_fetch_add(&insertCounts(cluster), 1); clusterVerts(offset) = i; } ordinal_view_t clusterOffsets; @@ -65,12 +57,9 @@ struct ExplicitGraphCoarsening { }; struct BuildCrossClusterMaskFunctor { - BuildCrossClusterMaskFunctor(const fine_rowmap_t& rowmap_, - const fine_entries_t& colinds_, - const ordinal_view_t& clusterOffsets_, - const ordinal_view_t& clusterVerts_, - const labels_t& vertClusters_, - const bitset_t& mask_) + BuildCrossClusterMaskFunctor(const fine_rowmap_t& rowmap_, const fine_entries_t& colinds_, + const ordinal_view_t& clusterOffsets_, const ordinal_view_t& clusterVerts_, + const labels_t& vertClusters_, const bitset_t& mask_) : numRows(rowmap_.extent(0) - 1), rowmap(rowmap_), colinds(colinds_), @@ -106,13 +95,10 @@ struct ExplicitGraphCoarsening { // Try to insert the edge between cluster (team's cluster) and neighbor // (neighboring cluster) by inserting nei into the table. - KOKKOS_INLINE_FUNCTION bool insert(lno_t cluster, lno_t nei, - int* table) const { + KOKKOS_INLINE_FUNCTION bool insert(lno_t cluster, lno_t nei, int* table) const { unsigned h = xorshiftHash(nei); for (unsigned i = h; i < h + 2; i++) { - if (Kokkos::atomic_compare_exchange_strong(&table[i % tableSize()], - cluster, nei)) - return true; + if (Kokkos::atomic_compare_exchange_strong(&table[i % tableSize()], cluster, nei)) return true; } return false; } @@ -127,40 +113,35 @@ struct ExplicitGraphCoarsening { // thread handles a cluster int* table = (int*)t.team_shmem().get_shmem(tableSize() * sizeof(int)); // mark every entry as cluster (self-loop) to represent free/empty - Kokkos::parallel_for(Kokkos::TeamVectorRange(t, tableSize()), - [&](const lno_t i) { table[i] = cluster; }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(t, tableSize()), [&](const lno_t i) { table[i] = cluster; }); t.team_barrier(); // now, for each row belonging to the cluster, iterate through the // neighbors - Kokkos::parallel_for( - Kokkos::TeamThreadRange(t, clusterSize), [&](const lno_t i) { - lno_t row = clusterVerts(clusterOffsets(cluster) + i); - lno_t rowDeg = rowmap(row + 1) - rowmap(row); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(t, rowDeg), - [&](const lno_t j) { - lno_t nei = colinds(rowmap(row) + j); - // Remote neighbors are not included - if (nei >= numRows) return; - lno_t neiCluster = vertClusters(nei); - if (neiCluster != cluster) { - // Have a neighbor. Try to find it in the - // table. - if (!lookup(neiCluster, table)) { - // Not in the table. Try to insert it. - insert(cluster, neiCluster, table); - // Whether or not insertion succeeded, - // this is a cross-cluster edge possibly - // not seen before - mask.set(rowmap(row) + j); - } - } - }); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(t, clusterSize), [&](const lno_t i) { + lno_t row = clusterVerts(clusterOffsets(cluster) + i); + lno_t rowDeg = rowmap(row + 1) - rowmap(row); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(t, rowDeg), [&](const lno_t j) { + lno_t nei = colinds(rowmap(row) + j); + // Remote neighbors are not included + if (nei >= numRows) return; + lno_t neiCluster = vertClusters(nei); + if (neiCluster != cluster) { + // Have a neighbor. Try to find it in the + // table. + if (!lookup(neiCluster, table)) { + // Not in the table. Try to insert it. + insert(cluster, neiCluster, table); + // Whether or not insertion succeeded, + // this is a cross-cluster edge possibly + // not seen before + mask.set(rowmap(row) + j); + } + } + }); + }); } - size_t team_shmem_size(int /*teamSize*/) const { - return tableSize() * sizeof(int); - } + size_t team_shmem_size(int /*teamSize*/) const { return tableSize() * sizeof(int); } lno_t numRows; fine_rowmap_t rowmap; @@ -172,14 +153,10 @@ struct ExplicitGraphCoarsening { }; struct FillClusterEntriesFunctor { - FillClusterEntriesFunctor(const fine_rowmap_t& rowmap_, - const fine_entries_t& colinds_, - const coarse_rowmap_t& clusterRowmap_, - const coarse_entries_t& clusterEntries_, - const ordinal_view_t& clusterOffsets_, - const ordinal_view_t& clusterVerts_, - const labels_t& vertClusters_, - const bitset_t& edgeMask_) + FillClusterEntriesFunctor(const fine_rowmap_t& rowmap_, const fine_entries_t& colinds_, + const coarse_rowmap_t& clusterRowmap_, const coarse_entries_t& clusterEntries_, + const ordinal_view_t& clusterOffsets_, const ordinal_view_t& clusterVerts_, + const labels_t& vertClusters_, const bitset_t& edgeMask_) : rowmap(rowmap_), colinds(colinds_), clusterRowmap(clusterRowmap_), @@ -189,8 +166,7 @@ struct ExplicitGraphCoarsening { vertClusters(vertClusters_), edgeMask(edgeMask_) {} // Run this scan over entries in clusterVerts (reordered point rows) - KOKKOS_INLINE_FUNCTION void operator()(const lno_t i, lno_t& lcount, - const bool& finalPass) const { + KOKKOS_INLINE_FUNCTION void operator()(const lno_t i, lno_t& lcount, const bool& finalPass) const { lno_t numRows = rowmap.extent(0) - 1; lno_t row = clusterVerts(i); size_type rowStart = rowmap(row); @@ -238,9 +214,8 @@ struct ExplicitGraphCoarsening { // Constructor just does the computation and outputs to coarseRowmap, // coarseEntries. - ExplicitGraphCoarsening(const fine_rowmap_t& fineRowmap, - const fine_entries_t& fineEntries, - const labels_t& labels, lno_t numCoarseVerts) { + ExplicitGraphCoarsening(const fine_rowmap_t& fineRowmap, const fine_entries_t& fineEntries, const labels_t& labels, + lno_t numCoarseVerts) { lno_t numFineVerts = fineRowmap.extent(0); if (numFineVerts <= 1) { coarseRowmap = coarse_rowmap_t(); @@ -249,54 +224,39 @@ struct ExplicitGraphCoarsening { } numFineVerts--; clusterOffsets = ordinal_view_t("Cluster offsets", numCoarseVerts + 1); - clusterVerts = ordinal_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Cluster verts"), - numFineVerts); - Kokkos::parallel_for(range_pol(0, numFineVerts), - ClusterSizeFunctor(clusterOffsets, labels)); - KokkosKernels::Impl::exclusive_parallel_prefix_sum( - numCoarseVerts + 1, clusterOffsets); + clusterVerts = ordinal_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Cluster verts"), numFineVerts); + Kokkos::parallel_for(range_pol(0, numFineVerts), ClusterSizeFunctor(clusterOffsets, labels)); + KokkosKernels::Impl::exclusive_parallel_prefix_sum(numCoarseVerts + 1, clusterOffsets); { - ordinal_view_t tempInsertCounts("Temporary cluster insert counts", - numCoarseVerts); + ordinal_view_t tempInsertCounts("Temporary cluster insert counts", numCoarseVerts); Kokkos::parallel_for(range_pol(0, numFineVerts), - FillClusterVertsFunctor(clusterOffsets, clusterVerts, - labels, tempInsertCounts)); + FillClusterVertsFunctor(clusterOffsets, clusterVerts, labels, tempInsertCounts)); } // Determine the set of edges (in the point graph) that cross between two // distinct clusters int vectorSize = KokkosKernels::Impl::kk_get_suggested_vector_size( - numFineVerts, fineEntries.extent(0), - KokkosKernels::Impl::kk_get_exec_space_type()); + numFineVerts, fineEntries.extent(0), KokkosKernels::Impl::kk_get_exec_space_type()); bitset_t crossClusterEdgeMask(fineEntries.extent(0)); size_type numClusterEdges; { - BuildCrossClusterMaskFunctor buildEdgeMask(fineRowmap, fineEntries, - clusterOffsets, clusterVerts, - labels, crossClusterEdgeMask); - int sharedPerTeam = buildEdgeMask.team_shmem_size( - 0); // using team-size = 0 for since no per-thread shared is used. - int teamSize = KokkosKernels::Impl::get_suggested_team_size( - buildEdgeMask, vectorSize, sharedPerTeam, 0); + BuildCrossClusterMaskFunctor buildEdgeMask(fineRowmap, fineEntries, clusterOffsets, clusterVerts, labels, + crossClusterEdgeMask); + int sharedPerTeam = + buildEdgeMask.team_shmem_size(0); // using team-size = 0 for since no per-thread shared is used. + int teamSize = + KokkosKernels::Impl::get_suggested_team_size(buildEdgeMask, vectorSize, sharedPerTeam, 0); Kokkos::parallel_for( - team_pol(numCoarseVerts, teamSize, vectorSize) - .set_scratch_size(0, Kokkos::PerTeam(sharedPerTeam)), + team_pol(numCoarseVerts, teamSize, vectorSize).set_scratch_size(0, Kokkos::PerTeam(sharedPerTeam)), buildEdgeMask); numClusterEdges = crossClusterEdgeMask.count(); } - coarseRowmap = coarse_rowmap_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Cluster graph rowmap"), - numCoarseVerts + 1); + coarseRowmap = + coarse_rowmap_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Cluster graph rowmap"), numCoarseVerts + 1); coarseEntries = - coarse_entries_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Cluster graph colinds"), - numClusterEdges); - Kokkos::parallel_scan( - range_pol(0, numFineVerts), - FillClusterEntriesFunctor(fineRowmap, fineEntries, coarseRowmap, - coarseEntries, clusterOffsets, clusterVerts, - labels, crossClusterEdgeMask)); + coarse_entries_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Cluster graph colinds"), numClusterEdges); + Kokkos::parallel_scan(range_pol(0, numFineVerts), + FillClusterEntriesFunctor(fineRowmap, fineEntries, coarseRowmap, coarseEntries, + clusterOffsets, clusterVerts, labels, crossClusterEdgeMask)); } coarse_rowmap_t coarseRowmap; diff --git a/packages/kokkos-kernels/graph/impl/KokkosGraph_color_d1_spec.hpp b/packages/kokkos-kernels/graph/impl/KokkosGraph_color_d1_spec.hpp index 5d6624076374..178fdd9182ba 100644 --- a/packages/kokkos-kernels/graph/impl/KokkosGraph_color_d1_spec.hpp +++ b/packages/kokkos-kernels/graph/impl/KokkosGraph_color_d1_spec.hpp @@ -36,21 +36,17 @@ struct color_d1_eti_spec_avail { } // namespace Impl } // namespace KokkosGraph -#define KOKKOSGRAPH_COLOR_D1_ETI_SPEC_AVAIL(SCALAR_TYPE, ORDINAL_TYPE, \ - OFFSET_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - template <> \ - struct color_d1_eti_spec_avail< \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSGRAPH_COLOR_D1_ETI_SPEC_AVAIL(SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ + MEM_SPACE_TYPE) \ + template <> \ + struct color_d1_eti_spec_avail< \ + KokkosKernels::Experimental::KokkosKernelsHandle, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -63,24 +59,19 @@ namespace Impl { /// \brief Implementation of KokkosGraph::graph_color (distance-1 greedy /// coloring) -template ::value> +template ::value> struct COLOR_D1 { - static void color_d1(KernelHandle *handle, - typename lno_view_t::non_const_value_type num_rows, - size_view_t rowmap, lno_view_t entries); + static void color_d1(KernelHandle *handle, typename lno_view_t::non_const_value_type num_rows, size_view_t rowmap, + lno_view_t entries); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY template -struct COLOR_D1 { - static void color_d1(KernelHandle *handle, - typename lno_view_t::non_const_value_type num_rows, - size_view_t rowmap, lno_view_t entries) { +struct COLOR_D1 { + static void color_d1(KernelHandle *handle, typename lno_view_t::non_const_value_type num_rows, size_view_t rowmap, + lno_view_t entries) { KokkosGraph::Impl::graph_color_impl(handle, num_rows, rowmap, entries); } }; @@ -90,34 +81,26 @@ struct COLOR_D1, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSGRAPH_COLOR_D1_ETI_SPEC_DECL(SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ + MEM_SPACE_TYPE) \ + extern template struct COLOR_D1< \ + typename KokkosKernels::Experimental::KokkosKernelsHandle< \ + const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ false, true>; -#define KOKKOSGRAPH_COLOR_D1_ETI_SPEC_INST(SCALAR_TYPE, ORDINAL_TYPE, \ - OFFSET_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - template struct COLOR_D1< \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSGRAPH_COLOR_D1_ETI_SPEC_INST(SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ + MEM_SPACE_TYPE) \ + template struct COLOR_D1< \ + KokkosKernels::Experimental::KokkosKernelsHandle, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ false, true>; #endif diff --git a/packages/kokkos-kernels/graph/src/KokkosGraph_CoarsenConstruct.hpp b/packages/kokkos-kernels/graph/src/KokkosGraph_CoarsenConstruct.hpp index 28de59979e62..8e1cce3ddb52 100644 --- a/packages/kokkos-kernels/graph/src/KokkosGraph_CoarsenConstruct.hpp +++ b/packages/kokkos-kernels/graph/src/KokkosGraph_CoarsenConstruct.hpp @@ -31,8 +31,7 @@ namespace KokkosSparse { namespace Impl { -template +template struct SortLowDegreeCrsMatrixFunctor { using size_type = typename rowmap_t::non_const_value_type; using lno_t = typename entries_t::non_const_value_type; @@ -40,27 +39,17 @@ struct SortLowDegreeCrsMatrixFunctor { using team_mem = typename Kokkos::TeamPolicy::member_type; using value_type = lno_t; - SortLowDegreeCrsMatrixFunctor(bool usingRangePol, const rowmap_t& _rowmap, - const entries_t& _entries, - const values_t& _values, - const lno_t _degreeLimit) - : rowmap(_rowmap), - entries(_entries), - values(_values), - degreeLimit(_degreeLimit) { + SortLowDegreeCrsMatrixFunctor(bool usingRangePol, const rowmap_t& _rowmap, const entries_t& _entries, + const values_t& _values, const lno_t _degreeLimit) + : rowmap(_rowmap), entries(_entries), values(_values), degreeLimit(_degreeLimit) { if (usingRangePol) { - entriesAux = - entries_t(Kokkos::ViewAllocateWithoutInitializing("Entries aux"), - entries.extent(0)); - valuesAux = - values_t(Kokkos::ViewAllocateWithoutInitializing("Values aux"), - values.extent(0)); + entriesAux = entries_t(Kokkos::ViewAllocateWithoutInitializing("Entries aux"), entries.extent(0)); + valuesAux = values_t(Kokkos::ViewAllocateWithoutInitializing("Values aux"), values.extent(0)); } // otherwise, aux arrays won't be allocated (sorting in place) } - KOKKOS_INLINE_FUNCTION void operator()(const lno_t i, - value_type& reducer) const { + KOKKOS_INLINE_FUNCTION void operator()(const lno_t i, value_type& reducer) const { size_type rowStart = rowmap(i); size_type rowEnd = rowmap(i + 1); lno_t rowNum = rowEnd - rowStart; @@ -71,13 +60,11 @@ struct SortLowDegreeCrsMatrixFunctor { // Radix sort requires unsigned keys for comparison using unsigned_lno_t = typename std::make_unsigned::type; KokkosKernels::SerialRadixSort2( - (unsigned_lno_t*)entries.data() + rowStart, - (unsigned_lno_t*)entriesAux.data() + rowStart, values.data() + rowStart, - valuesAux.data() + rowStart, rowNum); + (unsigned_lno_t*)entries.data() + rowStart, (unsigned_lno_t*)entriesAux.data() + rowStart, + values.data() + rowStart, valuesAux.data() + rowStart, rowNum); } - KOKKOS_INLINE_FUNCTION void operator()(const team_mem t, - value_type& reducer) const { + KOKKOS_INLINE_FUNCTION void operator()(const team_mem t, value_type& reducer) const { size_type i = t.league_rank(); size_type rowStart = rowmap(i); size_type rowEnd = rowmap(i + 1); @@ -86,8 +73,8 @@ struct SortLowDegreeCrsMatrixFunctor { Kokkos::single(Kokkos::PerTeam(t), [&]() { reducer++; }); return; } - KokkosKernels::TeamBitonicSort2( - entries.data() + rowStart, values.data() + rowStart, rowNum, t); + KokkosKernels::TeamBitonicSort2(entries.data() + rowStart, + values.data() + rowStart, rowNum, t); } rowmap_t rowmap; @@ -103,23 +90,19 @@ struct SortLowDegreeCrsMatrixFunctor { // Sort a CRS matrix: within each row, sort entries ascending by column. // At the same time, permute the values. // Only modifies rows below the degreeLimit -template +template typename entries_t::non_const_value_type sort_low_degree_rows_crs_matrix( const rowmap_t& rowmap, const entries_t& entries, const values_t& values, const typename entries_t::non_const_value_type degreeLimit) { using lno_t = typename entries_t::non_const_value_type; using team_pol = Kokkos::TeamPolicy; - bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); - Impl::SortLowDegreeCrsMatrixFunctor - funct(useRadix, rowmap, entries, values, degreeLimit); + bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); + Impl::SortLowDegreeCrsMatrixFunctor funct(useRadix, rowmap, entries, + values, degreeLimit); lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; lno_t notSorted = 0; if (useRadix) { - Kokkos::parallel_reduce("sort_crs_matrix", - Kokkos::RangePolicy(0, numRows), - funct, notSorted); + Kokkos::parallel_reduce("sort_crs_matrix", Kokkos::RangePolicy(0, numRows), funct, notSorted); } else { // Try to get teamsize to be largest power of 2 not greater than avg entries // per row @@ -136,10 +119,8 @@ typename entries_t::non_const_value_type sort_low_degree_rows_crs_matrix( teamSize *= 2; } team_pol temp(numRows, teamSize); - teamSize = std::min(teamSize, - temp.team_size_max(funct, Kokkos::ParallelReduceTag())); - Kokkos::parallel_reduce("sort_crs_matrix", team_pol(numRows, teamSize), - funct, notSorted); + teamSize = std::min(teamSize, temp.team_size_max(funct, Kokkos::ParallelReduceTag())); + Kokkos::parallel_reduce("sort_crs_matrix", team_pol(numRows, teamSize), funct, notSorted); } return notSorted; } @@ -156,30 +137,27 @@ template class coarse_builder { public: // define internal types - using matrix_t = crsMat; - using exec_space = typename matrix_t::execution_space; - using mem_space = typename matrix_t::memory_space; - using Device = typename matrix_t::device_type; - using ordinal_t = typename matrix_t::ordinal_type; - using edge_offset_t = typename matrix_t::size_type; - using scalar_t = typename matrix_t::value_type; - using vtx_view_t = Kokkos::View; - using wgt_view_t = Kokkos::View; - using edge_view_t = Kokkos::View; - using edge_subview_t = Kokkos::View; - using graph_type = typename matrix_t::staticcrsgraph_type; - using policy_t = Kokkos::RangePolicy; - using dyn_policy_t = - Kokkos::RangePolicy, exec_space>; - using team_policy_t = Kokkos::TeamPolicy; - using dyn_team_policy_t = - Kokkos::TeamPolicy, exec_space>; - using member = typename team_policy_t::member_type; - using spgemm_kernel_handle = KokkosKernels::Experimental::KokkosKernelsHandle< - edge_offset_t, ordinal_t, scalar_t, exec_space, mem_space, mem_space>; - using uniform_memory_pool_t = - KokkosKernels::Impl::UniformMemoryPool; - using mapper_t = coarsen_heuristics; + using matrix_t = crsMat; + using exec_space = typename matrix_t::execution_space; + using mem_space = typename matrix_t::memory_space; + using Device = typename matrix_t::device_type; + using ordinal_t = typename matrix_t::ordinal_type; + using edge_offset_t = typename matrix_t::size_type; + using scalar_t = typename matrix_t::value_type; + using vtx_view_t = Kokkos::View; + using wgt_view_t = Kokkos::View; + using edge_view_t = Kokkos::View; + using edge_subview_t = Kokkos::View; + using graph_type = typename matrix_t::staticcrsgraph_type; + using policy_t = Kokkos::RangePolicy; + using dyn_policy_t = Kokkos::RangePolicy, exec_space>; + using team_policy_t = Kokkos::TeamPolicy; + using dyn_team_policy_t = Kokkos::TeamPolicy, exec_space>; + using member = typename team_policy_t::member_type; + using spgemm_kernel_handle = KokkosKernels::Experimental::KokkosKernelsHandle; + using uniform_memory_pool_t = KokkosKernels::Impl::UniformMemoryPool; + using mapper_t = coarsen_heuristics; static constexpr ordinal_t get_null_val() { // this value must line up with the null value used by the hashmap // accumulator @@ -189,10 +167,9 @@ class coarse_builder { return std::numeric_limits::max(); } } - static constexpr ordinal_t ORD_MAX = get_null_val(); - static constexpr bool is_host_space = std::is_same< - typename exec_space::memory_space, - typename Kokkos::DefaultHostExecutionSpace::memory_space>::value; + static constexpr ordinal_t ORD_MAX = get_null_val(); + static constexpr bool is_host_space = + std::is_same::value; static constexpr bool scal_eq_ord = std::is_same::value; // contains matrix and vertex weights corresponding to current level // interp matrix maps previous level to this level @@ -222,9 +199,7 @@ class coarse_builder { }; // determine if dynamic scheduling should be used - static bool should_use_dyn( - const ordinal_t n, const Kokkos::View work, - int t_count) { + static bool should_use_dyn(const ordinal_t n, const Kokkos::View work, int t_count) { bool use_dyn = false; edge_offset_t max = 0; edge_offset_t min = std::numeric_limits::max(); @@ -252,19 +227,16 @@ class coarse_builder { // build the course graph according to ((B^T A) B) or (B^T (A B)), where B is // aggregator matrix - static coarse_level_triple build_coarse_graph_spgemm( - coarsen_handle& handle, const coarse_level_triple level, - const matrix_t interp_mtx) { + static coarse_level_triple build_coarse_graph_spgemm(coarsen_handle& handle, const coarse_level_triple level, + const matrix_t interp_mtx) { vtx_view_t f_vtx_w = level.vtx_wgts; matrix_t g = level.mtx; - if (!KokkosSparse::Impl::isCrsGraphSorted(g.graph.row_map, g.graph.entries)) - KokkosSparse::sort_crs_matrix(g); + if (!KokkosSparse::Impl::isCrsGraphSorted(g.graph.row_map, g.graph.entries)) KokkosSparse::sort_crs_matrix(g); ordinal_t n = g.numRows(); ordinal_t nc = interp_mtx.numCols(); - matrix_t interp_transpose = - KokkosSparse::Impl::transpose_matrix(interp_mtx); + matrix_t interp_transpose = KokkosSparse::Impl::transpose_matrix(interp_mtx); KokkosSparse::sort_crs_matrix(interp_transpose); spgemm_kernel_handle kh; @@ -278,78 +250,60 @@ class coarse_builder { if (handle.b == Spgemm_transpose_first) { kh.create_spgemm_handle(); edge_view_t row_map_p1("rows_partial", nc + 1); - KokkosSparse::Experimental::spgemm_symbolic( - &kh, nc, n, n, interp_transpose.graph.row_map, - interp_transpose.graph.entries, false, g.graph.row_map, - g.graph.entries, false, row_map_p1); + KokkosSparse::Experimental::spgemm_symbolic(&kh, nc, n, n, interp_transpose.graph.row_map, + interp_transpose.graph.entries, false, g.graph.row_map, + g.graph.entries, false, row_map_p1); // partial-result matrix - vtx_view_t entries_p1("adjacencies_partial", - kh.get_spgemm_handle()->get_c_nnz()); - wgt_view_t values_p1("weights_partial", - kh.get_spgemm_handle()->get_c_nnz()); + vtx_view_t entries_p1("adjacencies_partial", kh.get_spgemm_handle()->get_c_nnz()); + wgt_view_t values_p1("weights_partial", kh.get_spgemm_handle()->get_c_nnz()); KokkosSparse::Experimental::spgemm_numeric( - &kh, nc, n, n, interp_transpose.graph.row_map, - interp_transpose.graph.entries, interp_transpose.values, false, - g.graph.row_map, g.graph.entries, g.values, false, row_map_p1, - entries_p1, values_p1); + &kh, nc, n, n, interp_transpose.graph.row_map, interp_transpose.graph.entries, interp_transpose.values, false, + g.graph.row_map, g.graph.entries, g.values, false, row_map_p1, entries_p1, values_p1); kh.destroy_spgemm_handle(); row_map_coarse = edge_view_t("rows_coarse", nc + 1); kh.create_spgemm_handle(); - KokkosSparse::Experimental::spgemm_symbolic( - &kh, nc, n, nc, row_map_p1, entries_p1, false, - interp_mtx.graph.row_map, interp_mtx.graph.entries, false, - row_map_coarse); + KokkosSparse::Experimental::spgemm_symbolic(&kh, nc, n, nc, row_map_p1, entries_p1, false, + interp_mtx.graph.row_map, interp_mtx.graph.entries, false, + row_map_coarse); // coarse-graph adjacency matrix - adj_coarse = - vtx_view_t("adjacencies_coarse", kh.get_spgemm_handle()->get_c_nnz()); - wgt_coarse = - wgt_view_t("weights_coarse", kh.get_spgemm_handle()->get_c_nnz()); + adj_coarse = vtx_view_t("adjacencies_coarse", kh.get_spgemm_handle()->get_c_nnz()); + wgt_coarse = wgt_view_t("weights_coarse", kh.get_spgemm_handle()->get_c_nnz()); - KokkosSparse::Experimental::spgemm_numeric( - &kh, nc, n, nc, row_map_p1, entries_p1, values_p1, false, - interp_mtx.graph.row_map, interp_mtx.graph.entries, interp_mtx.values, - false, row_map_coarse, adj_coarse, wgt_coarse); + KokkosSparse::Experimental::spgemm_numeric(&kh, nc, n, nc, row_map_p1, entries_p1, values_p1, false, + interp_mtx.graph.row_map, interp_mtx.graph.entries, interp_mtx.values, + false, row_map_coarse, adj_coarse, wgt_coarse); kh.destroy_spgemm_handle(); } else { edge_view_t row_map_p1("rows_partial", n + 1); kh.create_spgemm_handle(); - KokkosSparse::Experimental::spgemm_symbolic( - &kh, n, n, nc, g.graph.row_map, g.graph.entries, false, - interp_mtx.graph.row_map, interp_mtx.graph.entries, false, - row_map_p1); + KokkosSparse::Experimental::spgemm_symbolic(&kh, n, n, nc, g.graph.row_map, g.graph.entries, false, + interp_mtx.graph.row_map, interp_mtx.graph.entries, false, + row_map_p1); // partial-result matrix - vtx_view_t entries_p1("adjacencies_partial", - kh.get_spgemm_handle()->get_c_nnz()); - wgt_view_t values_p1("weights_partial", - kh.get_spgemm_handle()->get_c_nnz()); + vtx_view_t entries_p1("adjacencies_partial", kh.get_spgemm_handle()->get_c_nnz()); + wgt_view_t values_p1("weights_partial", kh.get_spgemm_handle()->get_c_nnz()); - KokkosSparse::Experimental::spgemm_numeric( - &kh, n, n, nc, g.graph.row_map, g.graph.entries, g.values, false, - interp_mtx.graph.row_map, interp_mtx.graph.entries, interp_mtx.values, - false, row_map_p1, entries_p1, values_p1); + KokkosSparse::Experimental::spgemm_numeric(&kh, n, n, nc, g.graph.row_map, g.graph.entries, g.values, false, + interp_mtx.graph.row_map, interp_mtx.graph.entries, interp_mtx.values, + false, row_map_p1, entries_p1, values_p1); kh.destroy_spgemm_handle(); row_map_coarse = edge_view_t("rows_coarse", nc + 1); kh.create_spgemm_handle(); - KokkosSparse::Experimental::spgemm_symbolic( - &kh, nc, n, nc, interp_transpose.graph.row_map, - interp_transpose.graph.entries, false, row_map_p1, entries_p1, false, - row_map_coarse); + KokkosSparse::Experimental::spgemm_symbolic(&kh, nc, n, nc, interp_transpose.graph.row_map, + interp_transpose.graph.entries, false, row_map_p1, entries_p1, false, + row_map_coarse); // coarse-graph adjacency matrix - adj_coarse = - vtx_view_t("adjacencies_coarse", kh.get_spgemm_handle()->get_c_nnz()); - wgt_coarse = - wgt_view_t("weights_coarse", kh.get_spgemm_handle()->get_c_nnz()); + adj_coarse = vtx_view_t("adjacencies_coarse", kh.get_spgemm_handle()->get_c_nnz()); + wgt_coarse = wgt_view_t("weights_coarse", kh.get_spgemm_handle()->get_c_nnz()); KokkosSparse::Experimental::spgemm_numeric( - &kh, nc, n, nc, interp_transpose.graph.row_map, - interp_transpose.graph.entries, interp_transpose.values, false, - row_map_p1, entries_p1, values_p1, false, row_map_coarse, adj_coarse, - wgt_coarse); + &kh, nc, n, nc, interp_transpose.graph.row_map, interp_transpose.graph.entries, interp_transpose.values, + false, row_map_p1, entries_p1, values_p1, false, row_map_coarse, adj_coarse, wgt_coarse); kh.destroy_spgemm_handle(); } @@ -362,8 +316,7 @@ class coarse_builder { Kokkos::parallel_for( policy_t(0, nc), KOKKOS_LAMBDA(ordinal_t u) { - for (edge_offset_t j = row_map_coarse(u); j < row_map_coarse(u + 1); - j++) { + for (edge_offset_t j = row_map_coarse(u); j < row_map_coarse(u + 1); j++) { if (adj_coarse(j) != u) { nonLoops(u)++; } @@ -373,8 +326,7 @@ class coarse_builder { edge_view_t row_map_nonloop("nonloop row map", nc + 1); Kokkos::parallel_scan( - policy_t(0, nc), KOKKOS_LAMBDA(const ordinal_t i, edge_offset_t& update, - const bool final) { + policy_t(0, nc), KOKKOS_LAMBDA(const ordinal_t i, edge_offset_t& update, const bool final) { const edge_offset_t val_i = nonLoops(i); update += val_i; if (final) { @@ -394,8 +346,7 @@ class coarse_builder { Kokkos::parallel_for( policy_t(0, nc), KOKKOS_LAMBDA(const ordinal_t u) { - for (edge_offset_t j = row_map_coarse(u); j < row_map_coarse(u + 1); - j++) { + for (edge_offset_t j = row_map_coarse(u); j < row_map_coarse(u + 1); j++) { if (adj_coarse(j) != u) { edge_offset_t offset = row_map_nonloop(u) + nonLoops(u)++; entries_nonloop(offset) = adj_coarse(j); @@ -412,8 +363,7 @@ class coarse_builder { vtx_view_t c_vtx_w("coarse vtx weights", interp_mtx.numCols()); Kokkos::parallel_for( - "compute coarse vtx wgts", policy_t(0, n), - KOKKOS_LAMBDA(const ordinal_t i) { + "compute coarse vtx wgts", policy_t(0, n), KOKKOS_LAMBDA(const ordinal_t i) { ordinal_t u = interp_mtx.graph.entries(i); Kokkos::atomic_add(&c_vtx_w(u), f_vtx_w(i)); }); @@ -431,12 +381,10 @@ class coarse_builder { vtx_view_t input; edge_view_t output; - prefix_sum(vtx_view_t _input, edge_view_t _output) - : input(_input), output(_output) {} + prefix_sum(vtx_view_t _input, edge_view_t _output) : input(_input), output(_output) {} KOKKOS_INLINE_FUNCTION - void operator()(const ordinal_t i, edge_offset_t& update, - const bool final) const { + void operator()(const ordinal_t i, edge_offset_t& update, const bool final) const { const edge_offset_t val_i = input(i); update += val_i; if (final) { @@ -455,11 +403,8 @@ class coarse_builder { vtx_view_t dedupe_edge_count; ordinal_t degreeLimit; - functorDedupeLowDegreeAfterSort(edge_view_t _row_map, vtx_view_t _entries, - vtx_view_t _entriesOut, wgt_view_t _wgts, - wgt_view_t _wgtsOut, - vtx_view_t _dedupe_edge_count, - ordinal_t _degreeLimit_) + functorDedupeLowDegreeAfterSort(edge_view_t _row_map, vtx_view_t _entries, vtx_view_t _entriesOut, wgt_view_t _wgts, + wgt_view_t _wgtsOut, vtx_view_t _dedupe_edge_count, ordinal_t _degreeLimit_) : row_map(_row_map), entries(_entries), entriesOut(_entriesOut), @@ -477,31 +422,28 @@ class coarse_builder { if (degree > degreeLimit) { return; } - Kokkos::parallel_scan( - Kokkos::TeamThreadRange(thread, start, end), - [&](const edge_offset_t& i, edge_offset_t& update, const bool final) { - if (i == start) { - update += 1; - } else if (entries(i) != entries(i - 1)) { - update += 1; - } - if (final) { - entriesOut(start + update - 1) = entries(i); - // requires that wgtsOut be initialized to 0 - Kokkos::atomic_add(&wgtsOut(start + update - 1), wgts(i)); - if (i + 1 == end) { - dedupe_edge_count(u) = update; - } - } - }); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(thread, start, start + dedupe_edge_count(u)), - [&](const edge_offset_t& i) { - entries(i) = entriesOut(i); - wgts(i) = wgtsOut(i); - }); - Kokkos::single(Kokkos::PerTeam(thread), - [&]() { thread_sum += dedupe_edge_count(u); }); + Kokkos::parallel_scan(Kokkos::TeamThreadRange(thread, start, end), + [&](const edge_offset_t& i, edge_offset_t& update, const bool final) { + if (i == start) { + update += 1; + } else if (entries(i) != entries(i - 1)) { + update += 1; + } + if (final) { + entriesOut(start + update - 1) = entries(i); + // requires that wgtsOut be initialized to 0 + Kokkos::atomic_add(&wgtsOut(start + update - 1), wgts(i)); + if (i + 1 == end) { + dedupe_edge_count(u) = update; + } + } + }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(thread, start, start + dedupe_edge_count(u)), + [&](const edge_offset_t& i) { + entries(i) = entriesOut(i); + wgts(i) = wgtsOut(i); + }); + Kokkos::single(Kokkos::PerTeam(thread), [&]() { thread_sum += dedupe_edge_count(u); }); } KOKKOS_INLINE_FUNCTION @@ -536,8 +478,7 @@ class coarse_builder { wgt_view_t wgts, wgtsOut; vtx_view_t dedupe_edge_count; - functorDedupeAfterSort(edge_view_t _row_map, vtx_view_t _entries, - vtx_view_t _entriesOut, wgt_view_t _wgts, + functorDedupeAfterSort(edge_view_t _row_map, vtx_view_t _entries, vtx_view_t _entriesOut, wgt_view_t _wgts, wgt_view_t _wgtsOut, vtx_view_t _dedupe_edge_count) : row_map(_row_map), entries(_entries), @@ -551,25 +492,23 @@ class coarse_builder { ordinal_t u = thread.league_rank(); edge_offset_t start = row_map(u); edge_offset_t end = row_map(u + 1); - Kokkos::parallel_scan( - Kokkos::TeamThreadRange(thread, start, end), - [&](const edge_offset_t& i, edge_offset_t& update, const bool final) { - if (i == start) { - update += 1; - } else if (entries(i) != entries(i - 1)) { - update += 1; - } - if (final) { - entriesOut(start + update - 1) = entries(i); - // requires that wgtsOut be initialized to 0 - Kokkos::atomic_add(&wgtsOut(start + update - 1), wgts(i)); - if (i + 1 == end) { - dedupe_edge_count(u) = update; - } - } - }); - Kokkos::single(Kokkos::PerTeam(thread), - [&]() { thread_sum += dedupe_edge_count(u); }); + Kokkos::parallel_scan(Kokkos::TeamThreadRange(thread, start, end), + [&](const edge_offset_t& i, edge_offset_t& update, const bool final) { + if (i == start) { + update += 1; + } else if (entries(i) != entries(i - 1)) { + update += 1; + } + if (final) { + entriesOut(start + update - 1) = entries(i); + // requires that wgtsOut be initialized to 0 + Kokkos::atomic_add(&wgtsOut(start + update - 1), wgts(i)); + if (i + 1 == end) { + dedupe_edge_count(u) = update; + } + } + }); + Kokkos::single(Kokkos::PerTeam(thread), [&]() { thread_sum += dedupe_edge_count(u); }); } KOKKOS_INLINE_FUNCTION @@ -601,11 +540,10 @@ class coarse_builder { const wgt_view_t source_wgts; wgt_view_t target_wgts; - functorCollapseDirectedToUndirected( - const edge_view_t _source_row_map, const edge_view_t _target_row_map, - const vtx_view_t _source_edge_counts, vtx_view_t _target_edge_counts, - const vtx_view_t _source_destinations, vtx_view_t _target_destinations, - const wgt_view_t _source_wgts, wgt_view_t _target_wgts) + functorCollapseDirectedToUndirected(const edge_view_t _source_row_map, const edge_view_t _target_row_map, + const vtx_view_t _source_edge_counts, vtx_view_t _target_edge_counts, + const vtx_view_t _source_destinations, vtx_view_t _target_destinations, + const wgt_view_t _source_wgts, wgt_view_t _target_wgts) : source_row_map(_source_row_map), target_row_map(_target_row_map), source_edge_counts(_source_edge_counts), @@ -620,24 +558,18 @@ class coarse_builder { ordinal_t u = thread.league_rank(); edge_offset_t u_origin = source_row_map(u); edge_offset_t u_dest_offset = target_row_map(u); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(thread, source_edge_counts(u)), - [&](const edge_offset_t u_idx) { - ordinal_t v = source_destinations(u_origin + u_idx); - scalar_t wgt = source_wgts(u_origin + u_idx); - edge_offset_t v_dest_offset = target_row_map(v); - edge_offset_t v_dest = - v_dest_offset + - Kokkos::atomic_fetch_add(&target_edge_counts(v), 1); - edge_offset_t u_dest = - u_dest_offset + - Kokkos::atomic_fetch_add(&target_edge_counts(u), 1); - - target_destinations(u_dest) = v; - target_wgts(u_dest) = wgt; - target_destinations(v_dest) = u; - target_wgts(v_dest) = wgt; - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(thread, source_edge_counts(u)), [&](const edge_offset_t u_idx) { + ordinal_t v = source_destinations(u_origin + u_idx); + scalar_t wgt = source_wgts(u_origin + u_idx); + edge_offset_t v_dest_offset = target_row_map(v); + edge_offset_t v_dest = v_dest_offset + Kokkos::atomic_fetch_add(&target_edge_counts(v), 1); + edge_offset_t u_dest = u_dest_offset + Kokkos::atomic_fetch_add(&target_edge_counts(u), 1); + + target_destinations(u_dest) = v; + target_wgts(u_dest) = wgt; + target_destinations(v_dest) = u; + target_wgts(v_dest) = wgt; + }); } }; @@ -654,14 +586,10 @@ class coarse_builder { vtx_view_t remaining; bool use_out; - functorHashmapAccumulator(edge_view_t _row_map, vtx_view_t _entries_in, - vtx_view_t _entries_out, wgt_view_t _wgts_in, - wgt_view_t _wgts_out, - vtx_view_t _dedupe_edge_count, - uniform_memory_pool_t _memory_pool, - const ordinal_t _hash_size, - const ordinal_t _max_hash_entries, - vtx_view_t _remaining, bool _use_out) + functorHashmapAccumulator(edge_view_t _row_map, vtx_view_t _entries_in, vtx_view_t _entries_out, + wgt_view_t _wgts_in, wgt_view_t _wgts_out, vtx_view_t _dedupe_edge_count, + uniform_memory_pool_t _memory_pool, const ordinal_t _hash_size, + const ordinal_t _max_hash_entries, vtx_view_t _remaining, bool _use_out) : row_map(_row_map), entries_in(_entries_in), entries_out(_entries_out), @@ -680,12 +608,10 @@ class coarse_builder { if (std::is_same::value) return 0; #endif #if defined(KOKKOS_ENABLE_OPENMP) - if (std::is_same::value) - return Kokkos::OpenMP::impl_hardware_thread_id(); + if (std::is_same::value) return Kokkos::OpenMP::impl_hardware_thread_id(); #endif #if defined(KOKKOS_ENABLE_THREADS) - if (std::is_same::value) - return Kokkos::Threads::impl_hardware_thread_id(); + if (std::is_same::value) return Kokkos::Threads::impl_hardware_thread_id(); #endif return row_index; } @@ -745,17 +671,15 @@ class coarse_builder { // Set pointer to hash values scalar_t* values = (scalar_t*)wgts_out.data() + row_map(idx); - KokkosKernels::Experimental::HashmapAccumulator< - hash_size_type, hash_key_type, hash_value_type, - KokkosKernels::Experimental::HashOpType::bitwiseAnd> - hash_map(hash_size, hash_func_pow2, hash_begins, hash_nexts, keys, - values); + KokkosKernels::Experimental::HashmapAccumulator + hash_map(hash_size, hash_func_pow2, hash_begins, hash_nexts, keys, values); for (edge_offset_t i = row_map(idx); i < row_map(idx + 1); i++) { ordinal_t key = entries_in(i); scalar_t value = wgts_in(i); - hash_map.sequential_insert_into_hash_mergeAdd_TrackHashes( - key, value, used_hash_size, used_hash_count, used_hash_indices); + hash_map.sequential_insert_into_hash_mergeAdd_TrackHashes(key, value, used_hash_size, used_hash_count, + used_hash_indices); }; // Reset the Begins values to -1 before releasing the memory pool chunk. @@ -797,8 +721,7 @@ class coarse_builder { // Acquire a chunk from the memory pool using a spin-loop. ptr_write = nullptr; while (nullptr == ptr_write) { - ptr_write = (volatile ordinal_t*)(memory_pool.allocate_chunk( - thread.league_rank())); + ptr_write = (volatile ordinal_t*)(memory_pool.allocate_chunk(thread.league_rank())); } }, ptr_temp); @@ -848,29 +771,23 @@ class coarse_builder { values = (scalar_t*)(ptr_temp); } - KokkosKernels::Experimental::HashmapAccumulator< - hash_size_type, hash_key_type, hash_value_type, - KokkosKernels::Experimental::HashOpType::bitwiseAnd> - hash_map(hash_size, hash_func_pow2, hash_begins, hash_nexts, keys, - values); - - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(thread, row_map(idx), row_map(idx + 1)), - [&](const edge_offset_t& i) { - ordinal_t key = entries_in(i); - scalar_t value = wgts_in(i); - // duplicate keys may be inserted simultaneously, this causes - // problems we must handle later - int r = - hash_map - .vector_atomic_insert_into_hash_mergeAtomicAdd_TrackHashes( - key, value, used_hash_size, used_hash_count, - used_hash_indices); - - // Check return code - if (r) { - } - }); + KokkosKernels::Experimental::HashmapAccumulator + hash_map(hash_size, hash_func_pow2, hash_begins, hash_nexts, keys, values); + + Kokkos::parallel_for(Kokkos::ThreadVectorRange(thread, row_map(idx), row_map(idx + 1)), + [&](const edge_offset_t& i) { + ordinal_t key = entries_in(i); + scalar_t value = wgts_in(i); + // duplicate keys may be inserted simultaneously, this causes + // problems we must handle later + int r = hash_map.vector_atomic_insert_into_hash_mergeAtomicAdd_TrackHashes( + key, value, used_hash_size, used_hash_count, used_hash_indices); + + // Check return code + if (r) { + } + }); thread.team_barrier(); // Reset the Begins values to -1 before releasing the memory pool chunk. @@ -879,72 +796,49 @@ class coarse_builder { // there can be duplicate key insertions (these are hopefully rare or else // performance will suffer) This did not work as a TeamThreadRange, don't // know why (possibly issues with atomic addition on write_idx) - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(thread, (ordinal_t)0, *used_hash_count), - [&](const ordinal_t& i) { - ordinal_t dirty_hash = used_hash_indices[i]; - - ordinal_t bucket = hash_begins[dirty_hash]; - - // ascending-key bubble-sort the linked list - // it really do be like that sometimes - ordinal_t end_inner = ORD_MAX; - while (end_inner != bucket) { - ordinal_t last_idx = bucket; - ordinal_t last_key = keys[last_idx]; - scalar_t last_val = values[last_idx]; - bool is_sorted = true; - // bubble-up - for (ordinal_t k = hash_nexts[bucket]; k != end_inner; - k = hash_nexts[k]) { - // swap - if (keys[k] < last_key) { - keys[last_idx] = keys[k]; - values[last_idx] = values[k]; - keys[k] = last_key; - values[k] = last_val; - is_sorted = false; - } - // increment last - last_key = keys[k]; - last_val = values[k]; - last_idx = k; - } - end_inner = last_idx; - if (is_sorted) { - // end the outer loop - end_inner = bucket; - } - } - ordinal_t key = keys[bucket]; - scalar_t val = values[bucket]; - ordinal_t last = bucket; - // merge linked list and write out - for (ordinal_t j = hash_nexts[bucket]; j != ORD_MAX; - j = hash_nexts[j]) { - if (keys[j] == key) { - val += values[j]; - } else { - ordinal_t write_at = - row_map(idx) + Kokkos::atomic_fetch_add(write_idx, 1); - entries_out(write_at) = key; - if (use_out) { - // reuse wgts_in as scratch space because we are overwriting - // working memory if we use wgts_out - wgts_in(write_at) = val; - } else { - wgts_out(write_at) = val; - } - key = keys[j]; - val = values[j]; - } - hash_nexts[last] = ORD_MAX; - last = j; + Kokkos::parallel_for(Kokkos::ThreadVectorRange(thread, (ordinal_t)0, *used_hash_count), [&](const ordinal_t& i) { + ordinal_t dirty_hash = used_hash_indices[i]; + + ordinal_t bucket = hash_begins[dirty_hash]; + + // ascending-key bubble-sort the linked list + // it really do be like that sometimes + ordinal_t end_inner = ORD_MAX; + while (end_inner != bucket) { + ordinal_t last_idx = bucket; + ordinal_t last_key = keys[last_idx]; + scalar_t last_val = values[last_idx]; + bool is_sorted = true; + // bubble-up + for (ordinal_t k = hash_nexts[bucket]; k != end_inner; k = hash_nexts[k]) { + // swap + if (keys[k] < last_key) { + keys[last_idx] = keys[k]; + values[last_idx] = values[k]; + keys[k] = last_key; + values[k] = last_val; + is_sorted = false; } - hash_nexts[last] = ORD_MAX; - // write out the final entry in linked list - ordinal_t write_at = - row_map(idx) + Kokkos::atomic_fetch_add(write_idx, 1); + // increment last + last_key = keys[k]; + last_val = values[k]; + last_idx = k; + } + end_inner = last_idx; + if (is_sorted) { + // end the outer loop + end_inner = bucket; + } + } + ordinal_t key = keys[bucket]; + scalar_t val = values[bucket]; + ordinal_t last = bucket; + // merge linked list and write out + for (ordinal_t j = hash_nexts[bucket]; j != ORD_MAX; j = hash_nexts[j]) { + if (keys[j] == key) { + val += values[j]; + } else { + ordinal_t write_at = row_map(idx) + Kokkos::atomic_fetch_add(write_idx, 1); entries_out(write_at) = key; if (use_out) { // reuse wgts_in as scratch space because we are overwriting @@ -953,17 +847,31 @@ class coarse_builder { } else { wgts_out(write_at) = val; } - hash_begins[dirty_hash] = ORD_MAX; - }); + key = keys[j]; + val = values[j]; + } + hash_nexts[last] = ORD_MAX; + last = j; + } + hash_nexts[last] = ORD_MAX; + // write out the final entry in linked list + ordinal_t write_at = row_map(idx) + Kokkos::atomic_fetch_add(write_idx, 1); + entries_out(write_at) = key; + if (use_out) { + // reuse wgts_in as scratch space because we are overwriting + // working memory if we use wgts_out + wgts_in(write_at) = val; + } else { + wgts_out(write_at) = val; + } + hash_begins[dirty_hash] = ORD_MAX; + }); thread.team_barrier(); // need to copy from wgts_in to wgts_out if we used wgts_in as scratch // space if (use_out) { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(thread, (ordinal_t)0, *write_idx), - [&](const ordinal_t& i) { - wgts_out(row_map(idx) + i) = wgts_in(row_map(idx) + i); - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(thread, (ordinal_t)0, *write_idx), + [&](const ordinal_t& i) { wgts_out(row_map(idx) + i) = wgts_in(row_map(idx) + i); }); } Kokkos::single(Kokkos::PerTeam(thread), [&]() { @@ -978,14 +886,11 @@ class coarse_builder { }; // functorHashmapAccumulator - static void getHashmapSizeAndCount( - coarsen_handle& handle, const ordinal_t n, - const ordinal_t remaining_count, vtx_view_t remaining, - vtx_view_t edges_per_source, ordinal_t& hash_size, ordinal_t& max_entries, - ordinal_t& mem_chunk_size, ordinal_t& mem_chunk_count) { + static void getHashmapSizeAndCount(coarsen_handle& handle, const ordinal_t n, const ordinal_t remaining_count, + vtx_view_t remaining, vtx_view_t edges_per_source, ordinal_t& hash_size, + ordinal_t& max_entries, ordinal_t& mem_chunk_size, ordinal_t& mem_chunk_count) { ordinal_t avg_entries = 0; - if (!is_host_space && - static_cast(remaining_count) / static_cast(n) > 0.01) { + if (!is_host_space && static_cast(remaining_count) / static_cast(n) > 0.01) { Kokkos::parallel_reduce( "calc average among remaining", policy_t(0, remaining_count), KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& thread_sum) { @@ -1024,12 +929,11 @@ class coarse_builder { } // Determine memory chunk size for UniformMemoryPool - mem_chunk_size = hash_size; // for hash indices - mem_chunk_size += hash_size; // for hash begins - mem_chunk_size += - 3 * max_entries; // for hash nexts, keys, and values (unless scalar_t - // != ordinal_t, in which case memory is unused) - mem_chunk_size += 10; // for metadata + mem_chunk_size = hash_size; // for hash indices + mem_chunk_size += hash_size; // for hash begins + mem_chunk_size += 3 * max_entries; // for hash nexts, keys, and values (unless scalar_t + // != ordinal_t, in which case memory is unused) + mem_chunk_size += 10; // for metadata mem_chunk_count = exec_space().concurrency(); if (mem_chunk_count > remaining_count) { mem_chunk_count = remaining_count + 1; @@ -1037,34 +941,27 @@ class coarse_builder { if (!is_host_space) { // decrease number of mem_chunks to reduce memory usage if necessary - size_t mem_needed = static_cast(mem_chunk_count) * - static_cast(mem_chunk_size) * - sizeof(ordinal_t); + size_t mem_needed = + static_cast(mem_chunk_count) * static_cast(mem_chunk_size) * sizeof(ordinal_t); //~500MB size_t max_mem_allowed = handle.max_mem_allowed; if (mem_needed > max_mem_allowed) { size_t chunk_dif = mem_needed - max_mem_allowed; - chunk_dif = chunk_dif / - (static_cast(mem_chunk_size) * sizeof(ordinal_t)); + chunk_dif = chunk_dif / (static_cast(mem_chunk_size) * sizeof(ordinal_t)); chunk_dif++; mem_chunk_count -= chunk_dif; } } } - static void deduplicate_graph(coarsen_handle& handle, const ordinal_t n, - const bool use_team, - vtx_view_t edges_per_source, - vtx_view_t dest_by_source, - wgt_view_t wgt_by_source, - const edge_view_t source_bucket_offset, - edge_offset_t& gc_nedges) { + static void deduplicate_graph(coarsen_handle& handle, const ordinal_t n, const bool use_team, + vtx_view_t edges_per_source, vtx_view_t dest_by_source, wgt_view_t wgt_by_source, + const edge_view_t source_bucket_offset, edge_offset_t& gc_nedges) { if (handle.b == Hashmap || is_host_space) { ordinal_t remaining_count = n; vtx_view_t remaining("remaining vtx", n); Kokkos::parallel_for( - policy_t(0, n), - KOKKOS_LAMBDA(const ordinal_t i) { remaining(i) = i; }); + policy_t(0, n), KOKKOS_LAMBDA(const ordinal_t i) { remaining(i) = i; }); // deduplicate rows in phases starting with the small degree rows so we // can use small hashmaps increase the hashmap size each phase to the // necessary size for twice the average of remaining rows @@ -1076,12 +973,10 @@ class coarse_builder { do { // determine size for hashmap ordinal_t hash_size, max_entries, mem_chunk_size, mem_chunk_count; - getHashmapSizeAndCount(handle, n, remaining_count, remaining, - edges_per_source, hash_size, max_entries, + getHashmapSizeAndCount(handle, n, remaining_count, remaining, edges_per_source, hash_size, max_entries, mem_chunk_size, mem_chunk_count); // Create Uniform Initialized Memory Pool - KokkosKernels::Impl::PoolType pool_type = - KokkosKernels::Impl::ManyThread2OneChunk; + KokkosKernels::Impl::PoolType pool_type = KokkosKernels::Impl::ManyThread2OneChunk; if (is_host_space) { pool_type = KokkosKernels::Impl::OneThread2OneChunk; @@ -1089,29 +984,23 @@ class coarse_builder { bool use_dyn = should_use_dyn(n, source_bucket_offset, mem_chunk_count); - uniform_memory_pool_t memory_pool(mem_chunk_count, mem_chunk_size, - ORD_MAX, pool_type); + uniform_memory_pool_t memory_pool(mem_chunk_count, mem_chunk_size, ORD_MAX, pool_type); - functorHashmapAccumulator hashmapAccumulator( - source_bucket_offset, dest_by_source, dest_by_source, wgt_by_source, - wgt_out, edges_per_source, memory_pool, hash_size, max_entries, - remaining, !scal_eq_ord); + functorHashmapAccumulator hashmapAccumulator(source_bucket_offset, dest_by_source, dest_by_source, + wgt_by_source, wgt_out, edges_per_source, memory_pool, hash_size, + max_entries, remaining, !scal_eq_ord); ordinal_t old_remaining_count = remaining_count; if (!is_host_space && max_entries >= 128) { - Kokkos::parallel_reduce("hashmap time", - team_policy_t(old_remaining_count, 1, 64), - hashmapAccumulator, remaining_count); + Kokkos::parallel_reduce("hashmap time", team_policy_t(old_remaining_count, 1, 64), hashmapAccumulator, + remaining_count); } else { if (use_dyn) { - Kokkos::parallel_reduce( - "hashmap time", - dyn_policy_t(0, old_remaining_count, Kokkos::ChunkSize(128)), - hashmapAccumulator, remaining_count); - } else { - Kokkos::parallel_reduce("hashmap time", - policy_t(0, old_remaining_count), + Kokkos::parallel_reduce("hashmap time", dyn_policy_t(0, old_remaining_count, Kokkos::ChunkSize(128)), hashmapAccumulator, remaining_count); + } else { + Kokkos::parallel_reduce("hashmap time", policy_t(0, old_remaining_count), hashmapAccumulator, + remaining_count); } } @@ -1120,8 +1009,7 @@ class coarse_builder { Kokkos::parallel_scan( "move remaining vertices", policy_t(0, old_remaining_count), - KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, - const bool final) { + KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, const bool final) { ordinal_t u = remaining(i); if (edges_per_source(u) >= max_entries) { if (final) { @@ -1135,39 +1023,31 @@ class coarse_builder { } } while (remaining_count > 0); Kokkos::parallel_reduce( - policy_t(0, n), - KOKKOS_LAMBDA(const ordinal_t i, edge_offset_t& sum) { - sum += edges_per_source(i); - }, + policy_t(0, n), KOKKOS_LAMBDA(const ordinal_t i, edge_offset_t& sum) { sum += edges_per_source(i); }, gc_nedges); if (!scal_eq_ord && !is_host_space) { Kokkos::deep_copy(wgt_by_source, wgt_out); } } else if (handle.b == Sort) { // sort the (implicit) crs matrix - KokkosSparse::sort_crs_matrix(source_bucket_offset, - dest_by_source, wgt_by_source); + KokkosSparse::sort_crs_matrix(source_bucket_offset, + dest_by_source, wgt_by_source); // combine adjacent entries that are equal if (use_team) { // thread team version wgt_view_t wgts_out("wgts after dedupe", wgt_by_source.extent(0)); vtx_view_t dest_out("dest after dedupe", dest_by_source.extent(0)); - functorDedupeAfterSort deduper(source_bucket_offset, dest_by_source, - dest_out, wgt_by_source, wgts_out, + functorDedupeAfterSort deduper(source_bucket_offset, dest_by_source, dest_out, wgt_by_source, wgts_out, edges_per_source); - Kokkos::parallel_reduce("deduplicated sorted", team_policy_t(n, 64), - deduper, gc_nedges); + Kokkos::parallel_reduce("deduplicated sorted", team_policy_t(n, 64), deduper, gc_nedges); Kokkos::deep_copy(wgt_by_source, wgts_out); Kokkos::deep_copy(dest_by_source, dest_out); } else { // no thread team version - functorDedupeAfterSort deduper(source_bucket_offset, dest_by_source, - dest_by_source, wgt_by_source, + functorDedupeAfterSort deduper(source_bucket_offset, dest_by_source, dest_by_source, wgt_by_source, wgt_by_source, edges_per_source); - Kokkos::parallel_reduce("deduplicated sorted", policy_t(0, n), deduper, - gc_nedges); + Kokkos::parallel_reduce("deduplicated sorted", policy_t(0, n), deduper, gc_nedges); } } else if (handle.b == Hybrid) { @@ -1179,23 +1059,19 @@ class coarse_builder { ordinal_t limit = 128; // sort the (implicit) crs matrix, but only the low degree rows ordinal_t remaining_count = - KokkosSparse::sort_low_degree_rows_crs_matrix( + KokkosSparse::sort_low_degree_rows_crs_matrix( source_bucket_offset, dest_by_source, wgt_by_source, limit); // combine adjacent entries that are equal { // no thread team version - functorDedupeLowDegreeAfterSort deduper( - source_bucket_offset, dest_by_source, dest_by_source, wgt_by_source, - wgt_out, edges_per_source, limit); - Kokkos::parallel_reduce("deduplicated sorted", policy_t(0, n), deduper, - gc_nedges); + functorDedupeLowDegreeAfterSort deduper(source_bucket_offset, dest_by_source, dest_by_source, wgt_by_source, + wgt_out, edges_per_source, limit); + Kokkos::parallel_reduce("deduplicated sorted", policy_t(0, n), deduper, gc_nedges); } vtx_view_t remaining("remaining vtx", remaining_count); Kokkos::parallel_scan( "move remaining vertices", policy_t(0, n), - KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, - const bool final) { + KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, const bool final) { if (edges_per_source(i) > limit) { if (final) { remaining(update) = i; @@ -1209,34 +1085,28 @@ class coarse_builder { while (remaining_count > 0) { // determine size for hashmap ordinal_t hash_size, max_entries, mem_chunk_size, mem_chunk_count; - getHashmapSizeAndCount(handle, n, remaining_count, remaining, - edges_per_source, hash_size, max_entries, + getHashmapSizeAndCount(handle, n, remaining_count, remaining, edges_per_source, hash_size, max_entries, mem_chunk_size, mem_chunk_count); // Create Uniform Initialized Memory Pool - KokkosKernels::Impl::PoolType pool_type = - KokkosKernels::Impl::ManyThread2OneChunk; + KokkosKernels::Impl::PoolType pool_type = KokkosKernels::Impl::ManyThread2OneChunk; if (is_host_space) { pool_type = KokkosKernels::Impl::OneThread2OneChunk; } - uniform_memory_pool_t memory_pool(mem_chunk_count, mem_chunk_size, - ORD_MAX, pool_type); + uniform_memory_pool_t memory_pool(mem_chunk_count, mem_chunk_size, ORD_MAX, pool_type); - functorHashmapAccumulator hashmapAccumulator( - source_bucket_offset, dest_by_source, dest_by_source, wgt_by_source, - wgt_out, edges_per_source, memory_pool, hash_size, max_entries, - remaining, !scal_eq_ord); + functorHashmapAccumulator hashmapAccumulator(source_bucket_offset, dest_by_source, dest_by_source, + wgt_by_source, wgt_out, edges_per_source, memory_pool, hash_size, + max_entries, remaining, !scal_eq_ord); ordinal_t old_remaining_count = remaining_count; if (!is_host_space && max_entries >= 128) { - Kokkos::parallel_reduce("hashmap time", - dyn_team_policy_t(old_remaining_count, 1, 64), - hashmapAccumulator, remaining_count); + Kokkos::parallel_reduce("hashmap time", dyn_team_policy_t(old_remaining_count, 1, 64), hashmapAccumulator, + remaining_count); } else { - Kokkos::parallel_reduce("hashmap time", - dyn_policy_t(0, old_remaining_count), - hashmapAccumulator, remaining_count); + Kokkos::parallel_reduce("hashmap time", dyn_policy_t(0, old_remaining_count), hashmapAccumulator, + remaining_count); } if (remaining_count > 0) { @@ -1244,8 +1114,7 @@ class coarse_builder { Kokkos::parallel_scan( "move remaining vertices", policy_t(0, old_remaining_count), - KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, - const bool final) { + KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, const bool final) { ordinal_t u = remaining(i); if (edges_per_source(u) >= max_entries) { if (final) { @@ -1260,10 +1129,7 @@ class coarse_builder { } gc_nedges = 0; Kokkos::parallel_reduce( - policy_t(0, n), - KOKKOS_LAMBDA(const ordinal_t i, edge_offset_t& sum) { - sum += edges_per_source(i); - }, + policy_t(0, n), KOKKOS_LAMBDA(const ordinal_t i, edge_offset_t& sum) { sum += edges_per_source(i); }, gc_nedges); if (!scal_eq_ord && !is_host_space) { Kokkos::deep_copy(wgt_by_source, wgt_out); @@ -1279,10 +1145,8 @@ class coarse_builder { wgt_view_t wgts_out; ordinal_t workLength; - translationFunctor(matrix_t _vcmap, matrix_t _g, vtx_view_t _mapped_edges, - vtx_view_t _edges_per_source, - edge_view_t _source_bucket_offset, vtx_view_t _edges_out, - wgt_view_t _wgts_out) + translationFunctor(matrix_t _vcmap, matrix_t _g, vtx_view_t _mapped_edges, vtx_view_t _edges_per_source, + edge_view_t _source_bucket_offset, vtx_view_t _edges_out, wgt_view_t _wgts_out) : vcmap(_vcmap), g(_g), mapped_edges(_mapped_edges), @@ -1299,20 +1163,18 @@ class coarse_builder { ordinal_t u = vcmap.graph.entries(i); edge_offset_t start = g.graph.row_map(i); edge_offset_t end = g.graph.row_map(i + 1); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(t, start, end), - [&](const edge_offset_t idx) { - ordinal_t v = mapped_edges(idx); - if (u != v) { - // fix this, inefficient - edge_offset_t offset = Kokkos::atomic_fetch_add( - &edges_per_source(u), 1); - - offset += source_bucket_offset(u); - - edges_out(offset) = v; - wgts_out(offset) = g.values(idx); - } - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(t, start, end), [&](const edge_offset_t idx) { + ordinal_t v = mapped_edges(idx); + if (u != v) { + // fix this, inefficient + edge_offset_t offset = Kokkos::atomic_fetch_add(&edges_per_source(u), 1); + + offset += source_bucket_offset(u); + + edges_out(offset) = v; + wgts_out(offset) = g.values(idx); + } + }); } KOKKOS_INLINE_FUNCTION @@ -1324,8 +1186,7 @@ class coarse_builder { ordinal_t v = mapped_edges(idx); if (u != v) { // fix this - edge_offset_t offset = - Kokkos::atomic_fetch_add(&edges_per_source(u), 1); + edge_offset_t offset = Kokkos::atomic_fetch_add(&edges_per_source(u), 1); offset += source_bucket_offset(u); @@ -1337,18 +1198,14 @@ class coarse_builder { }; // optimized for regular distribution low degree rows - static coarse_level_triple build_nonskew(coarsen_handle& handle, - const matrix_t g, - const matrix_t vcmap, - vtx_view_t mapped_edges, - vtx_view_t edges_per_source) { + static coarse_level_triple build_nonskew(coarsen_handle& handle, const matrix_t g, const matrix_t vcmap, + vtx_view_t mapped_edges, vtx_view_t edges_per_source) { ordinal_t n = g.numRows(); ordinal_t nc = vcmap.numCols(); edge_view_t source_bucket_offset("source_bucket_offsets", nc + 1); edge_offset_t gc_nedges = 0; - Kokkos::parallel_scan("calc source offsets", policy_t(0, nc), - prefix_sum(edges_per_source, source_bucket_offset)); + Kokkos::parallel_scan("calc source offsets", policy_t(0, nc), prefix_sum(edges_per_source, source_bucket_offset)); Kokkos::deep_copy(edges_per_source, static_cast(0)); @@ -1360,39 +1217,30 @@ class coarse_builder { wgt_view_t wgt_by_source("wgt_by_source", nnz_pre_dedupe); // translates fine entries into coarse entries and writes into coarse rows - translationFunctor translateF(vcmap, g, mapped_edges, edges_per_source, - source_bucket_offset, dest_by_source, + translationFunctor translateF(vcmap, g, mapped_edges, edges_per_source, source_bucket_offset, dest_by_source, wgt_by_source); if (is_host_space) { - bool use_dyn = - should_use_dyn(n, g.graph.row_map, exec_space().concurrency()); + bool use_dyn = should_use_dyn(n, g.graph.row_map, exec_space().concurrency()); if (use_dyn) { - Kokkos::parallel_for("move edges to coarse matrix", dyn_policy_t(0, n), - translateF); + Kokkos::parallel_for("move edges to coarse matrix", dyn_policy_t(0, n), translateF); } else { - Kokkos::parallel_for("move edges to coarse matrix", policy_t(0, n), - translateF); + Kokkos::parallel_for("move edges to coarse matrix", policy_t(0, n), translateF); } } else { - auto execSpaceEnum = - KokkosKernels::Impl::kk_get_exec_space_type(); - int vectorLength = KokkosKernels::Impl::kk_get_suggested_vector_size( - n, g.nnz(), execSpaceEnum); + auto execSpaceEnum = KokkosKernels::Impl::kk_get_exec_space_type(); + int vectorLength = KokkosKernels::Impl::kk_get_suggested_vector_size(n, g.nnz(), execSpaceEnum); team_policy_t dummy(1, 1, vectorLength); int teamSize = dummy.team_size_max(translateF, Kokkos::ParallelForTag()); - Kokkos::parallel_for( - "move edges to coarse matrix", - team_policy_t((n + teamSize - 1) / teamSize, teamSize, vectorLength), - translateF); + Kokkos::parallel_for("move edges to coarse matrix", + team_policy_t((n + teamSize - 1) / teamSize, teamSize, vectorLength), translateF); } - deduplicate_graph(handle, nc, false, edges_per_source, dest_by_source, - wgt_by_source, source_bucket_offset, gc_nedges); + deduplicate_graph(handle, nc, false, edges_per_source, dest_by_source, wgt_by_source, source_bucket_offset, + gc_nedges); edge_view_t source_offsets("source_offsets", nc + 1); - Kokkos::parallel_scan("calc source offsets again", policy_t(0, nc), - prefix_sum(edges_per_source, source_offsets)); + Kokkos::parallel_scan("calc source offsets again", policy_t(0, nc), prefix_sum(edges_per_source, source_offsets)); edge_subview_t edge_total_subview = Kokkos::subview(source_offsets, nc); Kokkos::deep_copy(gc_nedges, edge_total_subview); @@ -1401,12 +1249,10 @@ class coarse_builder { wgt_view_t wgts("wgts", gc_nedges); if (is_host_space) { - bool use_dyn = - should_use_dyn(nc, source_offsets, exec_space().concurrency()); + bool use_dyn = should_use_dyn(nc, source_offsets, exec_space().concurrency()); if (use_dyn) { Kokkos::parallel_for( - "move deduped edges to new coarse matrix", dyn_policy_t(0, nc), - KOKKOS_LAMBDA(const ordinal_t& u) { + "move deduped edges to new coarse matrix", dyn_policy_t(0, nc), KOKKOS_LAMBDA(const ordinal_t& u) { edge_offset_t start_origin = source_bucket_offset(u); edge_offset_t start_dest = source_offsets(u); for (ordinal_t idx = 0; idx < edges_per_source(u); idx++) { @@ -1416,8 +1262,7 @@ class coarse_builder { }); } else { Kokkos::parallel_for( - "move deduped edges to new coarse matrix", policy_t(0, nc), - KOKKOS_LAMBDA(const ordinal_t& u) { + "move deduped edges to new coarse matrix", policy_t(0, nc), KOKKOS_LAMBDA(const ordinal_t& u) { edge_offset_t start_origin = source_bucket_offset(u); edge_offset_t start_dest = source_offsets(u); for (ordinal_t idx = 0; idx < edges_per_source(u); idx++) { @@ -1428,18 +1273,15 @@ class coarse_builder { } } else { Kokkos::parallel_for( - "move deduped edges to new coarse matrix", - team_policy_t(nc, Kokkos::AUTO), KOKKOS_LAMBDA(const member& thread) { + "move deduped edges to new coarse matrix", team_policy_t(nc, Kokkos::AUTO), + KOKKOS_LAMBDA(const member& thread) { ordinal_t u = thread.league_rank(); edge_offset_t start_origin = source_bucket_offset(u); edge_offset_t start_dest = source_offsets(u); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(thread, edges_per_source(u)), - [=](const ordinal_t idx) { - dest_idx(start_dest + idx) = - dest_by_source(start_origin + idx); - wgts(start_dest + idx) = wgt_by_source(start_origin + idx); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(thread, edges_per_source(u)), [=](const ordinal_t idx) { + dest_idx(start_dest + idx) = dest_by_source(start_origin + idx); + wgts(start_dest + idx) = wgt_by_source(start_origin + idx); + }); }); } @@ -1452,37 +1294,33 @@ class coarse_builder { } // forms the explicit matrix created by symmetrizing the implicit matrix - static matrix_t collapse_directed_to_undirected( - const ordinal_t nc, const vtx_view_t source_edge_counts, - const edge_view_t source_row_map, const vtx_view_t source_destinations, - const wgt_view_t source_wgts) { + static matrix_t collapse_directed_to_undirected(const ordinal_t nc, const vtx_view_t source_edge_counts, + const edge_view_t source_row_map, + const vtx_view_t source_destinations, const wgt_view_t source_wgts) { vtx_view_t coarse_degree("coarse degree", nc); Kokkos::deep_copy(coarse_degree, source_edge_counts); Kokkos::parallel_for( - "count directed edges owned by opposite endpoint", - team_policy_t(nc, Kokkos::AUTO), KOKKOS_LAMBDA(const member& thread) { + "count directed edges owned by opposite endpoint", team_policy_t(nc, Kokkos::AUTO), + KOKKOS_LAMBDA(const member& thread) { ordinal_t u = thread.league_rank(); edge_offset_t start = source_row_map(u); edge_offset_t end = start + source_edge_counts(u); - Kokkos::parallel_for(Kokkos::TeamThreadRange(thread, start, end), - [=](const edge_offset_t idx) { - ordinal_t v = source_destinations(idx); - // increment other vertex - Kokkos::atomic_fetch_add(&coarse_degree(v), 1); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(thread, start, end), [=](const edge_offset_t idx) { + ordinal_t v = source_destinations(idx); + // increment other vertex + Kokkos::atomic_fetch_add(&coarse_degree(v), 1); + }); }); edge_view_t target_row_map("target row map", nc + 1); - Kokkos::parallel_scan("calc target row map", policy_t(0, nc), - prefix_sum(coarse_degree, target_row_map)); + Kokkos::parallel_scan("calc target row map", policy_t(0, nc), prefix_sum(coarse_degree, target_row_map)); Kokkos::deep_copy(coarse_degree, static_cast(0)); - edge_offset_t coarse_edges_total = 0; - edge_subview_t coarse_edge_total_subview = - Kokkos::subview(target_row_map, nc); + edge_offset_t coarse_edges_total = 0; + edge_subview_t coarse_edge_total_subview = Kokkos::subview(target_row_map, nc); Kokkos::deep_copy(coarse_edges_total, coarse_edge_total_subview); vtx_view_t dest_idx("dest_idx", coarse_edges_total); @@ -1490,9 +1328,8 @@ class coarse_builder { Kokkos::parallel_for( "move edges into correct size matrix", team_policy_t(nc, Kokkos::AUTO), - functorCollapseDirectedToUndirected( - source_row_map, target_row_map, source_edge_counts, coarse_degree, - source_destinations, dest_idx, source_wgts, wgts)); + functorCollapseDirectedToUndirected(source_row_map, target_row_map, source_edge_counts, coarse_degree, + source_destinations, dest_idx, source_wgts, wgts)); graph_type gc_graph(dest_idx, target_row_map); matrix_t gc("gc", nc, wgts, gc_graph); @@ -1500,10 +1337,8 @@ class coarse_builder { } // optimized for skewed degree distributions - static coarse_level_triple build_skew(coarsen_handle& handle, - const matrix_t g, const matrix_t vcmap, - vtx_view_t mapped_edges, - vtx_view_t degree_initial) { + static coarse_level_triple build_skew(coarsen_handle& handle, const matrix_t g, const matrix_t vcmap, + vtx_view_t mapped_edges, vtx_view_t degree_initial) { ordinal_t n = g.numRows(); ordinal_t nc = vcmap.numCols(); edge_offset_t gc_nedges = 0; @@ -1513,8 +1348,7 @@ class coarse_builder { // recount with edges only belonging to coarse vertex of smaller degree // matrix becomes directed Kokkos::parallel_for( - "recount edges", team_policy_t(n, Kokkos::AUTO), - KOKKOS_LAMBDA(const member& thread) { + "recount edges", team_policy_t(n, Kokkos::AUTO), KOKKOS_LAMBDA(const member& thread) { ordinal_t outer_idx = thread.league_rank(); ordinal_t u = vcmap.graph.entries(outer_idx); edge_offset_t start = g.graph.row_map(outer_idx); @@ -1531,15 +1365,13 @@ class coarse_builder { } }, nonLoopEdgesTotal); - Kokkos::single(Kokkos::PerTeam(thread), [=]() { - Kokkos::atomic_add(&edges_per_source(u), nonLoopEdgesTotal); - }); + Kokkos::single(Kokkos::PerTeam(thread), + [=]() { Kokkos::atomic_add(&edges_per_source(u), nonLoopEdgesTotal); }); }); edge_view_t source_bucket_offset("source_bucket_offsets", nc + 1); - Kokkos::parallel_scan("calc source offsets", policy_t(0, nc), - prefix_sum(edges_per_source, source_bucket_offset)); + Kokkos::parallel_scan("calc source offsets", policy_t(0, nc), prefix_sum(edges_per_source, source_bucket_offset)); edge_subview_t sbo_subview = Kokkos::subview(source_bucket_offset, nc); edge_offset_t nnz_pre_dedupe = 0; Kokkos::deep_copy(nnz_pre_dedupe, sbo_subview); @@ -1548,38 +1380,33 @@ class coarse_builder { vtx_view_t dest_by_source("dest by source", nnz_pre_dedupe); wgt_view_t wgt_by_source("wgt by source", nnz_pre_dedupe); Kokkos::parallel_for( - "combine fine rows", team_policy_t(n, Kokkos::AUTO), - KOKKOS_LAMBDA(const member& thread) { + "combine fine rows", team_policy_t(n, Kokkos::AUTO), KOKKOS_LAMBDA(const member& thread) { ordinal_t outer_idx = thread.league_rank(); ordinal_t u = vcmap.graph.entries(outer_idx); edge_offset_t start = g.graph.row_map(outer_idx); edge_offset_t end = g.graph.row_map(outer_idx + 1); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(thread, start, end), - [=](const edge_offset_t idx) { - ordinal_t v = mapped_edges(idx); - bool degree_less = degree_initial(u) < degree_initial(v); - bool degree_equal = degree_initial(u) == degree_initial(v); - if (degree_less || (degree_equal && u < v)) { - edge_offset_t offset = - Kokkos::atomic_fetch_add(&edges_per_source(u), 1); + Kokkos::parallel_for(Kokkos::TeamThreadRange(thread, start, end), [=](const edge_offset_t idx) { + ordinal_t v = mapped_edges(idx); + bool degree_less = degree_initial(u) < degree_initial(v); + bool degree_equal = degree_initial(u) == degree_initial(v); + if (degree_less || (degree_equal && u < v)) { + edge_offset_t offset = Kokkos::atomic_fetch_add(&edges_per_source(u), 1); - offset += source_bucket_offset(u); + offset += source_bucket_offset(u); - dest_by_source(offset) = v; - wgt_by_source(offset) = g.values(idx); - } - }); + dest_by_source(offset) = v; + wgt_by_source(offset) = g.values(idx); + } + }); }); gc_nedges = 0; - deduplicate_graph(handle, nc, true, edges_per_source, dest_by_source, - wgt_by_source, source_bucket_offset, gc_nedges); + deduplicate_graph(handle, nc, true, edges_per_source, dest_by_source, wgt_by_source, source_bucket_offset, + gc_nedges); // form the final coarse graph, which requires symmetrizing the matrix - matrix_t gc = collapse_directed_to_undirected( - nc, edges_per_source, source_bucket_offset, dest_by_source, - wgt_by_source); + matrix_t gc = + collapse_directed_to_undirected(nc, edges_per_source, source_bucket_offset, dest_by_source, wgt_by_source); coarse_level_triple next_level; next_level.mtx = gc; @@ -1591,11 +1418,8 @@ class coarse_builder { // deduplicates within each fine row // combines fine rows into coarse rows // deduplicates within each coarse row - static coarse_level_triple build_high_duplicity(coarsen_handle& handle, - const matrix_t g, - const matrix_t vcmap, - vtx_view_t mapped_edges, - vtx_view_t degree_initial) { + static coarse_level_triple build_high_duplicity(coarsen_handle& handle, const matrix_t g, const matrix_t vcmap, + vtx_view_t mapped_edges, vtx_view_t degree_initial) { ordinal_t n = g.numRows(); ordinal_t nc = vcmap.numCols(); edge_offset_t gc_nedges = 0; @@ -1606,8 +1430,7 @@ class coarse_builder { // recount fine row sizes with edges only belonging to fine vertex of coarse // vertex of smaller degree matrix becomes directed Kokkos::parallel_for( - "recount edges", team_policy_t(n, Kokkos::AUTO), - KOKKOS_LAMBDA(const member& thread) { + "recount edges", team_policy_t(n, Kokkos::AUTO), KOKKOS_LAMBDA(const member& thread) { ordinal_t outer_idx = thread.league_rank(); ordinal_t u = vcmap.graph.entries(outer_idx); edge_offset_t start = g.graph.row_map(outer_idx); @@ -1624,13 +1447,10 @@ class coarse_builder { } }, nonLoopEdgesTotal); - Kokkos::single(Kokkos::PerTeam(thread), [=]() { - dedupe_count(outer_idx) = nonLoopEdgesTotal; - }); + Kokkos::single(Kokkos::PerTeam(thread), [=]() { dedupe_count(outer_idx) = nonLoopEdgesTotal; }); }); - Kokkos::parallel_scan("calc source offsets", policy_t(0, n), - prefix_sum(dedupe_count, row_map_copy)); + Kokkos::parallel_scan("calc source offsets", policy_t(0, n), prefix_sum(dedupe_count, row_map_copy)); // reset counters to 0 Kokkos::deep_copy(dedupe_count, static_cast(0)); @@ -1643,35 +1463,30 @@ class coarse_builder { // create a new directed version of the fine matrix Kokkos::parallel_for( - "move edges to new matrix", team_policy_t(n, Kokkos::AUTO), - KOKKOS_LAMBDA(const member& thread) { + "move edges to new matrix", team_policy_t(n, Kokkos::AUTO), KOKKOS_LAMBDA(const member& thread) { ordinal_t outer_idx = thread.league_rank(); ordinal_t u = vcmap.graph.entries(outer_idx); edge_offset_t start = g.graph.row_map(outer_idx); edge_offset_t end = g.graph.row_map(outer_idx + 1); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(thread, start, end), - [=](const edge_offset_t idx) { - ordinal_t v = mapped_edges(idx); - bool degree_less = degree_initial(u) < degree_initial(v); - bool degree_equal = degree_initial(u) == degree_initial(v); - if (u != v && (degree_less || (degree_equal && u < v))) { - edge_offset_t offset = - Kokkos::atomic_fetch_add(&dedupe_count(outer_idx), 1); + Kokkos::parallel_for(Kokkos::TeamThreadRange(thread, start, end), [=](const edge_offset_t idx) { + ordinal_t v = mapped_edges(idx); + bool degree_less = degree_initial(u) < degree_initial(v); + bool degree_equal = degree_initial(u) == degree_initial(v); + if (u != v && (degree_less || (degree_equal && u < v))) { + edge_offset_t offset = Kokkos::atomic_fetch_add(&dedupe_count(outer_idx), 1); - offset += row_map_copy(outer_idx); + offset += row_map_copy(outer_idx); - dest_fine(offset) = v; - wgt_fine(offset) = g.values(idx); - } - }); + dest_fine(offset) = v; + wgt_fine(offset) = g.values(idx); + } + }); }); //"delete" these views Kokkos::resize(mapped_edges, 0); // deduplicate coarse adjacencies within each fine row - deduplicate_graph(handle, n, true, dedupe_count, dest_fine, wgt_fine, - row_map_copy, gc_nedges); + deduplicate_graph(handle, n, true, dedupe_count, dest_fine, wgt_fine, row_map_copy, gc_nedges); edge_view_t source_bucket_offset("source_bucket_offsets", nc + 1); vtx_view_t edges_per_source("edges_per_source", nc); @@ -1681,46 +1496,40 @@ class coarse_builder { ordinal_t u = vcmap.graph.entries(i); Kokkos::atomic_fetch_add(&edges_per_source(u), dedupe_count(i)); }); - Kokkos::parallel_scan("calc source offsets", policy_t(0, nc), - prefix_sum(edges_per_source, source_bucket_offset)); + Kokkos::parallel_scan("calc source offsets", policy_t(0, nc), prefix_sum(edges_per_source, source_bucket_offset)); Kokkos::deep_copy(edges_per_source, static_cast(0)); vtx_view_t dest_by_source("dest by source", gc_nedges); wgt_view_t wgt_by_source("wgt by source", gc_nedges); Kokkos::parallel_for( - "combine deduped fine rows", team_policy_t(n, Kokkos::AUTO), - KOKKOS_LAMBDA(const member& thread) { + "combine deduped fine rows", team_policy_t(n, Kokkos::AUTO), KOKKOS_LAMBDA(const member& thread) { ordinal_t outer_idx = thread.league_rank(); ordinal_t u = vcmap.graph.entries(outer_idx); edge_offset_t start = row_map_copy(outer_idx); edge_offset_t end = start + dedupe_count(outer_idx); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(thread, start, end), - [=](const edge_offset_t idx) { - ordinal_t v = dest_fine(idx); - bool degree_less = degree_initial(u) < degree_initial(v); - bool degree_equal = degree_initial(u) == degree_initial(v); - if (degree_less || (degree_equal && u < v)) { - edge_offset_t offset = - Kokkos::atomic_fetch_add(&edges_per_source(u), 1); + Kokkos::parallel_for(Kokkos::TeamThreadRange(thread, start, end), [=](const edge_offset_t idx) { + ordinal_t v = dest_fine(idx); + bool degree_less = degree_initial(u) < degree_initial(v); + bool degree_equal = degree_initial(u) == degree_initial(v); + if (degree_less || (degree_equal && u < v)) { + edge_offset_t offset = Kokkos::atomic_fetch_add(&edges_per_source(u), 1); - offset += source_bucket_offset(u); + offset += source_bucket_offset(u); - dest_by_source(offset) = v; - wgt_by_source(offset) = wgt_fine(idx); - } - }); + dest_by_source(offset) = v; + wgt_by_source(offset) = wgt_fine(idx); + } + }); }); gc_nedges = 0; Kokkos::resize(dest_fine, 0); Kokkos::resize(wgt_fine, 0); - deduplicate_graph(handle, nc, true, edges_per_source, dest_by_source, - wgt_by_source, source_bucket_offset, gc_nedges); + deduplicate_graph(handle, nc, true, edges_per_source, dest_by_source, wgt_by_source, source_bucket_offset, + gc_nedges); // form the final coarse graph, which requires symmetrizing the matrix - matrix_t gc = collapse_directed_to_undirected( - nc, edges_per_source, source_bucket_offset, dest_by_source, - wgt_by_source); + matrix_t gc = + collapse_directed_to_undirected(nc, edges_per_source, source_bucket_offset, dest_by_source, wgt_by_source); coarse_level_triple next_level; next_level.mtx = gc; @@ -1735,9 +1544,8 @@ class coarse_builder { vtx_view_t c_vtx_w, f_vtx_w; ordinal_t workLength; - countingFunctor(matrix_t _vcmap, matrix_t _g, vtx_view_t _mapped_edges, - vtx_view_t _degree_initial, vtx_view_t _c_vtx_w, - vtx_view_t _f_vtx_w) + countingFunctor(matrix_t _vcmap, matrix_t _g, vtx_view_t _mapped_edges, vtx_view_t _degree_initial, + vtx_view_t _c_vtx_w, vtx_view_t _f_vtx_w) : vcmap(_vcmap), g(_g), mapped_edges(_mapped_edges), @@ -1788,8 +1596,7 @@ class coarse_builder { } }; - static coarse_level_triple build_coarse_graph(coarsen_handle& handle, - const coarse_level_triple level, + static coarse_level_triple build_coarse_graph(coarsen_handle& handle, const coarse_level_triple level, const matrix_t vcmap) { if (handle.b == Spgemm || handle.b == Spgemm_transpose_first) { return build_coarse_graph_spgemm(handle, level, vcmap); @@ -1807,24 +1614,18 @@ class coarse_builder { // count non-self loop edges per coarse vertex // also computes coarse vertex weights - countingFunctor countF(vcmap, g, mapped_edges, degree_initial, c_vtx_w, - f_vtx_w); + countingFunctor countF(vcmap, g, mapped_edges, degree_initial, c_vtx_w, f_vtx_w); if (is_host_space) { - Kokkos::parallel_for( - "count edges per coarse vertex (also compute coarse vertex weights)", - policy_t(0, n), countF); + Kokkos::parallel_for("count edges per coarse vertex (also compute coarse vertex weights)", policy_t(0, n), + countF); } else { - auto execSpaceEnum = - KokkosKernels::Impl::kk_get_exec_space_type(); - int vectorLength = KokkosKernels::Impl::kk_get_suggested_vector_size( - n, g.nnz(), execSpaceEnum); + auto execSpaceEnum = KokkosKernels::Impl::kk_get_exec_space_type(); + int vectorLength = KokkosKernels::Impl::kk_get_suggested_vector_size(n, g.nnz(), execSpaceEnum); team_policy_t dummy(1, 1, vectorLength); int teamSize = dummy.team_size_max(countF, Kokkos::ParallelForTag()); // count edges per vertex - Kokkos::parallel_for( - "count edges per coarse vertex (also compute coarse vertex weights)", - team_policy_t((n + teamSize - 1) / teamSize, teamSize, vectorLength), - countF); + Kokkos::parallel_for("count edges per coarse vertex (also compute coarse vertex weights)", + team_policy_t((n + teamSize - 1) / teamSize, teamSize, vectorLength), countF); } // compute max row size and avg row size @@ -1842,10 +1643,7 @@ class coarse_builder { Kokkos::Max(max_unduped)); Kokkos::parallel_reduce( "find total", policy_t(0, nc), - KOKKOS_LAMBDA(const ordinal_t i, edge_offset_t& sum) { - sum += degree_initial(i); - }, - total_unduped); + KOKKOS_LAMBDA(const ordinal_t i, edge_offset_t& sum) { sum += degree_initial(i); }, total_unduped); ordinal_t avg_unduped = total_unduped / nc; coarse_level_triple next_level; @@ -1853,14 +1651,11 @@ class coarse_builder { // adjacency rows don't do optimizations if running on CPU (the default host // space) if (avg_unduped > (nc / 4) && !is_host_space) { - next_level = - build_high_duplicity(handle, g, vcmap, mapped_edges, degree_initial); - } else if (avg_unduped > 50 && (max_unduped / 10) > avg_unduped && - !is_host_space) { + next_level = build_high_duplicity(handle, g, vcmap, mapped_edges, degree_initial); + } else if (avg_unduped > 50 && (max_unduped / 10) > avg_unduped && !is_host_space) { next_level = build_skew(handle, g, vcmap, mapped_edges, degree_initial); } else { - next_level = - build_nonskew(handle, g, vcmap, mapped_edges, degree_initial); + next_level = build_nonskew(handle, g, vcmap, mapped_edges, degree_initial); } next_level.vtx_wgts = c_vtx_w; @@ -1870,9 +1665,7 @@ class coarse_builder { return next_level; } - static matrix_t generate_coarse_mapping(coarsen_handle& handle, - const matrix_t g, - bool uniform_weights) { + static matrix_t generate_coarse_mapping(coarsen_handle& handle, const matrix_t g, bool uniform_weights) { matrix_t interpolation_graph; int choice = 0; @@ -1883,14 +1676,9 @@ class coarse_builder { } switch (handle.h) { - case HECv1: - interpolation_graph = mapper_t::coarsen_HEC(g, uniform_weights); - break; + case HECv1: interpolation_graph = mapper_t::coarsen_HEC(g, uniform_weights); break; case Match: - case MtMetis: - interpolation_graph = - mapper_t::coarsen_match(g, uniform_weights, choice); - break; + case MtMetis: interpolation_graph = mapper_t::coarsen_match(g, uniform_weights, choice); break; case MIS2: interpolation_graph = mapper_t::coarsen_mis_2(g); break; case GOSHv2: interpolation_graph = mapper_t::coarsen_GOSH_v2(g); break; case GOSHv1: interpolation_graph = mapper_t::coarsen_GOSH(g); break; @@ -1902,9 +1690,7 @@ class coarse_builder { // this function can't return the generated list directly because of an NVCC // compiler bug caller must use the get_levels() method after calling this // function - static void generate_coarse_graphs(coarsen_handle& handle, - const matrix_t fine_g, - bool uniform_weights = false) { + static void generate_coarse_graphs(coarsen_handle& handle, const matrix_t fine_g, bool uniform_weights = false) { ordinal_t fine_n = fine_g.numRows(); std::list& levels = handle.results; levels.clear(); @@ -1920,15 +1706,13 @@ class coarse_builder { while (levels.rbegin()->mtx.numRows() > handle.coarse_vtx_cutoff) { coarse_level_triple current_level = *levels.rbegin(); - matrix_t interp_graph = generate_coarse_mapping( - handle, current_level.mtx, current_level.uniform_weights); + matrix_t interp_graph = generate_coarse_mapping(handle, current_level.mtx, current_level.uniform_weights); if (interp_graph.numCols() < handle.min_allowed_vtx) { break; } - coarse_level_triple next_level = - build_coarse_graph(handle, current_level, interp_graph); + coarse_level_triple next_level = build_coarse_graph(handle, current_level, interp_graph); levels.push_back(next_level); diff --git a/packages/kokkos-kernels/graph/src/KokkosGraph_CoarsenHeuristics.hpp b/packages/kokkos-kernels/graph/src/KokkosGraph_CoarsenHeuristics.hpp index 169490516718..f136882d8992 100644 --- a/packages/kokkos-kernels/graph/src/KokkosGraph_CoarsenHeuristics.hpp +++ b/packages/kokkos-kernels/graph/src/KokkosGraph_CoarsenHeuristics.hpp @@ -74,8 +74,7 @@ class coarsen_heuristics { int t_buckets = 2 * n; vtx_view_t buckets("buckets", t_buckets); Kokkos::parallel_for( - "init buckets", policy_t(0, t_buckets), - KOKKOS_LAMBDA(ordinal_t i) { buckets(i) = ORD_MAX; }); + "init buckets", policy_t(0, t_buckets), KOKKOS_LAMBDA(ordinal_t i) { buckets(i) = ORD_MAX; }); uint64_t max = std::numeric_limits::max(); uint64_t bucket_size = max / t_buckets; @@ -87,8 +86,7 @@ class coarsen_heuristics { if (bucket >= t_buckets) bucket -= t_buckets; if (buckets(bucket) == ORD_MAX) { // attempt to insert into bucket - if (Kokkos::atomic_compare_exchange_strong(&buckets(bucket), - ORD_MAX, i)) { + if (Kokkos::atomic_compare_exchange_strong(&buckets(bucket), ORD_MAX, i)) { break; } } @@ -113,9 +111,9 @@ class coarsen_heuristics { // create a mapping when some vertices are already mapped // hn is a list of vertices such that vertex i wants to aggregate with vertex // hn(i) - static ordinal_t parallel_map_construct_prefilled( - vtx_view_t vcmap, const ordinal_t n, const vtx_view_t vperm, - const vtx_view_t hn, Kokkos::View nvertices_coarse) { + static ordinal_t parallel_map_construct_prefilled(vtx_view_t vcmap, const ordinal_t n, const vtx_view_t vperm, + const vtx_view_t hn, + Kokkos::View nvertices_coarse) { vtx_view_t match("match", n); Kokkos::parallel_for( policy_t(0, n), KOKKOS_LAMBDA(ordinal_t i) { @@ -142,14 +140,11 @@ class coarsen_heuristics { // need to enforce an ordering condition to allow hard-stall // conditions to be broken if (condition ^ swap) { - if (Kokkos::atomic_compare_exchange_strong(&match(u), ORD_MAX, - v)) { - if (u == v || Kokkos::atomic_compare_exchange_strong( - &match(v), ORD_MAX, u)) { - ordinal_t cv = - Kokkos::atomic_fetch_add(&nvertices_coarse(), 1); - vcmap(u) = cv; - vcmap(v) = cv; + if (Kokkos::atomic_compare_exchange_strong(&match(u), ORD_MAX, v)) { + if (u == v || Kokkos::atomic_compare_exchange_strong(&match(v), ORD_MAX, u)) { + ordinal_t cv = Kokkos::atomic_fetch_add(&nvertices_coarse(), 1); + vcmap(u) = cv; + vcmap(v) = cv; } else { if (vcmap(v) != ORD_MAX) { vcmap(u) = vcmap(v); @@ -183,10 +178,8 @@ class coarsen_heuristics { // hn is a list of vertices such that vertex i wants to aggregate with vertex // hn(i) - static ordinal_t parallel_map_construct(vtx_view_t vcmap, const ordinal_t n, - const vtx_view_t vperm, - const vtx_view_t hn, - const vtx_view_t ordering) { + static ordinal_t parallel_map_construct(vtx_view_t vcmap, const ordinal_t n, const vtx_view_t vperm, + const vtx_view_t hn, const vtx_view_t ordering) { vtx_view_t match("match", n); Kokkos::parallel_for( policy_t(0, n), KOKKOS_LAMBDA(ordinal_t i) { match(i) = ORD_MAX; }); @@ -208,10 +201,8 @@ class coarsen_heuristics { // need to enforce an ordering condition to allow hard-stall // conditions to be broken if (condition ^ swap) { - if (Kokkos::atomic_compare_exchange_strong(&match(u), ORD_MAX, - v)) { - if (u == v || Kokkos::atomic_compare_exchange_strong( - &match(v), ORD_MAX, u)) { + if (Kokkos::atomic_compare_exchange_strong(&match(u), ORD_MAX, v)) { + if (u == v || Kokkos::atomic_compare_exchange_strong(&match(v), ORD_MAX, u)) { ordinal_t cv = u; if (v < u) { cv = v; @@ -232,9 +223,7 @@ class coarsen_heuristics { // add the ones that failed to be reprocessed next round // maybe count these then create next_perm to save memory? Kokkos::parallel_scan( - policy_t(0, perm_length), - KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, - const bool final) { + policy_t(0, perm_length), KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, const bool final) { ordinal_t u = curr_perm(i); if (vcmap(u) == ORD_MAX) { if (final) { @@ -252,8 +241,7 @@ class coarsen_heuristics { curr_perm = next_perm; } Kokkos::parallel_scan( - "assign aggregates", policy_t(0, n), - KOKKOS_LAMBDA(const ordinal_t u, ordinal_t& update, const bool final) { + "assign aggregates", policy_t(0, n), KOKKOS_LAMBDA(const ordinal_t u, ordinal_t& update, const bool final) { if (vcmap(u) == u) { if (final) { vcmap(u) = update; @@ -325,8 +313,7 @@ class coarsen_heuristics { edge_offset_t max_degree = tuple_degree(u); ordinal_t max_idx = tuple_idx(u); - for (edge_offset_t j = g.graph.row_map(u); - j < g.graph.row_map(u + 1); j++) { + for (edge_offset_t j = g.graph.row_map(u); j < g.graph.row_map(u + 1); j++) { ordinal_t v = g.graph.entries(j); bool is_max = false; if (tuple_state(v) > max_state) { @@ -375,8 +362,7 @@ class coarsen_heuristics { } // check if at least one of neighbors are in the IS or will be // placed into the IS - else if (tuple_state(u) == 1 || - tuple_idx(tuple_idx(u)) == tuple_idx(u)) { + else if (tuple_state(u) == 1 || tuple_idx(tuple_idx(u)) == tuple_idx(u)) { state(u) = -1; } } @@ -389,8 +375,7 @@ class coarsen_heuristics { vtx_view_t next_unassigned("next unassigned", next_unassigned_total); Kokkos::parallel_scan( "create next unassigned", policy_t(0, unassigned_total), - KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, - const bool final) { + KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, const bool final) { ordinal_t u = unassigned(i); if (state(u) == 0) { if (final) { @@ -408,12 +393,11 @@ class coarsen_heuristics { static matrix_t coarsen_mis_2(const matrix_t& g) { ordinal_t n = g.numRows(); - typename matrix_t::staticcrsgraph_type::entries_type::non_const_value_type - nc = 0; - vtx_view_t vcmap = KokkosGraph::graph_mis2_aggregate< - Device, typename matrix_t::staticcrsgraph_type::row_map_type, - typename matrix_t::staticcrsgraph_type::entries_type, vtx_view_t>( - g.graph.row_map, g.graph.entries, nc); + typename matrix_t::staticcrsgraph_type::entries_type::non_const_value_type nc = 0; + vtx_view_t vcmap = + KokkosGraph::graph_mis2_aggregate( + g.graph.row_map, g.graph.entries, nc); edge_view_t row_map("interpolate row map", n + 1); @@ -461,11 +445,9 @@ class coarsen_heuristics { if (colors(i) != first_color) { // could use a thread team here edge_offset_t max_degree = 0; - for (edge_offset_t j = g.graph.row_map(i); - j < g.graph.row_map(i + 1); j++) { - ordinal_t v = g.graph.entries(j); - edge_offset_t degree = - g.graph.row_map(v + 1) - g.graph.row_map(v); + for (edge_offset_t j = g.graph.row_map(i); j < g.graph.row_map(i + 1); j++) { + ordinal_t v = g.graph.entries(j); + edge_offset_t degree = g.graph.row_map(v + 1) - g.graph.row_map(v); if (colors(v) == first_color && degree > max_degree) { max_degree = degree; vcmap(i) = vcmap(v); @@ -524,8 +506,7 @@ class coarsen_heuristics { if (vcmap(i) == ORD_MAX) { ordinal_t argmax = ORD_MAX; scalar_t max_w = 0; - for (edge_offset_t j = g.graph.row_map(i); - j < g.graph.row_map(i + 1); j++) { + for (edge_offset_t j = g.graph.row_map(i); j < g.graph.row_map(i + 1); j++) { ordinal_t v = g.graph.entries(j); ordinal_t wgt = g.values(j); if (vcmap(v) != ORD_MAX) { @@ -547,11 +528,9 @@ class coarsen_heuristics { if (vcmap(i) == ORD_MAX) { ordinal_t argmax = ORD_MAX; edge_offset_t max_d = 0; - for (edge_offset_t j = g.graph.row_map(i); - j < g.graph.row_map(i + 1); j++) { - ordinal_t v = g.graph.entries(j); - edge_offset_t degree = - g.graph.row_map(v + 1) - g.graph.row_map(v); + for (edge_offset_t j = g.graph.row_map(i); j < g.graph.row_map(i + 1); j++) { + ordinal_t v = g.graph.entries(j); + edge_offset_t degree = g.graph.row_map(v + 1) - g.graph.row_map(v); if (vcmap(v) != ORD_MAX) { if (degree >= max_d) { max_d = degree; @@ -569,8 +548,7 @@ class coarsen_heuristics { Kokkos::parallel_for( policy_t(0, n), KOKKOS_LAMBDA(ordinal_t i) { if (vcmap(i) != ORD_MAX) { - for (edge_offset_t j = g.graph.row_map(i); - j < g.graph.row_map(i + 1); j++) { + for (edge_offset_t j = g.graph.row_map(i); j < g.graph.row_map(i + 1); j++) { ordinal_t v = g.graph.entries(j); if (vcmap(v) == ORD_MAX) { vcmap(v) = vcmap(i); @@ -593,8 +571,7 @@ class coarsen_heuristics { vtx_view_t remaining("remaining vtx", remaining_total); Kokkos::parallel_scan( - "count remaining", policy_t(0, n), - KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, const bool final) { + "count remaining", policy_t(0, n), KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, const bool final) { if (vcmap(i) == ORD_MAX) { if (final) { remaining(update) = i; @@ -608,8 +585,7 @@ class coarsen_heuristics { pool_t rand_pool(std::time(nullptr)); Kokkos::parallel_for( - "fill hn", policy_t(0, remaining_total), - KOKKOS_LAMBDA(ordinal_t r_idx) { + "fill hn", policy_t(0, remaining_total), KOKKOS_LAMBDA(ordinal_t r_idx) { // select heaviest neighbor with ties randomly broken ordinal_t i = remaining(r_idx); ordinal_t hn_i = ORD_MAX; @@ -639,8 +615,7 @@ class coarsen_heuristics { hn(i) = hn_i; }); - ordinal_t nc = - parallel_map_construct_prefilled(vcmap, n, remaining, hn, nvc); + ordinal_t nc = parallel_map_construct_prefilled(vcmap, n, remaining, hn, nvc); Kokkos::deep_copy(nc, nvc); edge_view_t row_map("interpolate row map", n + 1); @@ -671,8 +646,7 @@ class coarsen_heuristics { vtx_view_t vcmap("vcmap", n); Kokkos::parallel_for( - "initialize vcmap", policy_t(0, n), - KOKKOS_LAMBDA(ordinal_t i) { vcmap(i) = ORD_MAX; }); + "initialize vcmap", policy_t(0, n), KOKKOS_LAMBDA(ordinal_t i) { vcmap(i) = ORD_MAX; }); pool_t rand_pool(std::time(nullptr)); @@ -680,8 +654,7 @@ class coarsen_heuristics { vtx_view_t reverse_map("reversed", n); Kokkos::parallel_for( - "construct reverse map", policy_t(0, n), - KOKKOS_LAMBDA(ordinal_t i) { reverse_map(vperm(i)) = i; }); + "construct reverse map", policy_t(0, n), KOKKOS_LAMBDA(ordinal_t i) { reverse_map(vperm(i)) = i; }); if (uniform_weights) { // all weights equal at this level so choose heaviest edge randomly @@ -690,9 +663,8 @@ class coarsen_heuristics { gen_t generator = rand_pool.get_state(); ordinal_t adj_size = g.graph.row_map(i + 1) - g.graph.row_map(i); if (adj_size > 0) { - ordinal_t offset = - g.graph.row_map(i) + (generator.urand64() % adj_size); - hn(i) = g.graph.entries(offset); + ordinal_t offset = g.graph.row_map(i) + (generator.urand64() % adj_size); + hn(i) = g.graph.entries(offset); } else { hn(i) = generator.urand64() % n; } @@ -700,18 +672,15 @@ class coarsen_heuristics { }); } else { Kokkos::parallel_for( - "Heaviest HN", team_policy_t(n, Kokkos::AUTO), - KOKKOS_LAMBDA(const member& thread) { + "Heaviest HN", team_policy_t(n, Kokkos::AUTO), KOKKOS_LAMBDA(const member& thread) { ordinal_t i = thread.league_rank(); ordinal_t adj_size = g.graph.row_map(i + 1) - g.graph.row_map(i); if (adj_size > 0) { edge_offset_t end = g.graph.row_map(i + 1); - typename Kokkos::MaxLoc::value_type argmax{}; + typename Kokkos::MaxLoc::value_type argmax{}; Kokkos::parallel_reduce( Kokkos::TeamThreadRange(thread, g.graph.row_map(i), end), - [=](const edge_offset_t idx, - Kokkos::ValLocScalar& local) { + [=](const edge_offset_t idx, Kokkos::ValLocScalar& local) { scalar_t wgt = g.values(idx); if (wgt >= local.val) { local.val = wgt; @@ -773,10 +742,8 @@ class coarsen_heuristics { Kokkos::View hashes; ordinal_t unmapped_total; Kokkos::View nvertices_coarse; - MatchByHashSorted(vtx_view_t _vcmap, vtx_view_t _unmapped, - Kokkos::View _hashes, - ordinal_t _unmapped_total, - Kokkos::View _nvertices_coarse) + MatchByHashSorted(vtx_view_t _vcmap, vtx_view_t _unmapped, Kokkos::View _hashes, + ordinal_t _unmapped_total, Kokkos::View _nvertices_coarse) : vcmap(_vcmap), unmapped(_unmapped), hashes(_hashes), @@ -784,8 +751,7 @@ class coarsen_heuristics { nvertices_coarse(_nvertices_coarse) {} KOKKOS_INLINE_FUNCTION - void operator()(const ordinal_t i, ordinal_t& update, - const bool final) const { + void operator()(const ordinal_t i, ordinal_t& update, const bool final) const { ordinal_t u = unmapped(i); ordinal_t tentative = 0; if (i == 0) { @@ -823,8 +789,7 @@ class coarsen_heuristics { } }; - static matrix_t coarsen_match(const matrix_t& g, bool uniform_weights, - int match_choice) { + static matrix_t coarsen_match(const matrix_t& g, bool uniform_weights, int match_choice) { ordinal_t n = g.numRows(); vtx_view_t hn("heavies", n); @@ -832,8 +797,7 @@ class coarsen_heuristics { vtx_view_t vcmap("vcmap", n); Kokkos::parallel_for( - "initialize vcmap", policy_t(0, n), - KOKKOS_LAMBDA(ordinal_t i) { vcmap(i) = ORD_MAX; }); + "initialize vcmap", policy_t(0, n), KOKKOS_LAMBDA(ordinal_t i) { vcmap(i) = ORD_MAX; }); rand_view_t randoms("randoms", n); @@ -843,8 +807,7 @@ class coarsen_heuristics { vtx_view_t reverse_map("reversed", n); Kokkos::parallel_for( - "construct reverse map", policy_t(0, n), - KOKKOS_LAMBDA(ordinal_t i) { reverse_map(vperm(i)) = i; }); + "construct reverse map", policy_t(0, n), KOKKOS_LAMBDA(ordinal_t i) { reverse_map(vperm(i)) = i; }); if (uniform_weights) { // all weights equal at this level so choose heaviest edge randomly @@ -852,9 +815,8 @@ class coarsen_heuristics { "Random HN", policy_t(0, n), KOKKOS_LAMBDA(ordinal_t i) { gen_t generator = rand_pool.get_state(); ordinal_t adj_size = g.graph.row_map(i + 1) - g.graph.row_map(i); - ordinal_t offset = - g.graph.row_map(i) + (generator.urand64() % adj_size); - hn(i) = g.graph.entries(offset); + ordinal_t offset = g.graph.row_map(i) + (generator.urand64() % adj_size); + hn(i) = g.graph.entries(offset); rand_pool.free_state(generator); }); } else { @@ -863,11 +825,9 @@ class coarsen_heuristics { ordinal_t hn_i = g.graph.entries(g.graph.row_map(i)); scalar_t max_ewt = g.values(g.graph.row_map(i)); - edge_offset_t end_offset = - g.graph.row_map(i + 1); // +g.edges_per_source[i]; + edge_offset_t end_offset = g.graph.row_map(i + 1); // +g.edges_per_source[i]; - for (edge_offset_t j = g.graph.row_map(i) + 1; j < end_offset; - j++) { + for (edge_offset_t j = g.graph.row_map(i) + 1; j < end_offset; j++) { if (max_ewt < g.values(j)) { max_ewt = g.values(j); hn_i = g.graph.entries(j); @@ -899,15 +859,12 @@ class coarsen_heuristics { // need to enforce an ordering condition to allow hard-stall // conditions to be broken if (condition ^ swap) { - if (Kokkos::atomic_compare_exchange_strong(&match(u), ORD_MAX, - v)) { - if (u == v || Kokkos::atomic_compare_exchange_strong( - &match(v), ORD_MAX, u)) { + if (Kokkos::atomic_compare_exchange_strong(&match(u), ORD_MAX, v)) { + if (u == v || Kokkos::atomic_compare_exchange_strong(&match(v), ORD_MAX, u)) { // u == v avoids problems if there is a self-loop edge - ordinal_t cv = - Kokkos::atomic_fetch_add(&nvertices_coarse(), 1); - vcmap(u) = cv; - vcmap(v) = cv; + ordinal_t cv = Kokkos::atomic_fetch_add(&nvertices_coarse(), 1); + vcmap(u) = cv; + vcmap(v) = cv; } else { match(u) = ORD_MAX; } @@ -930,8 +887,7 @@ class coarsen_heuristics { // check if any are unmatched! so instead of randomly choosing a // heaviest edge, we instead use the reverse permutation order // as the weight - for (edge_offset_t j = g.graph.row_map(u); - j < g.graph.row_map(u + 1); j++) { + for (edge_offset_t j = g.graph.row_map(u); j < g.graph.row_map(u + 1); j++) { ordinal_t v = g.graph.entries(j); // v must be unmatched to be considered if (vcmap(v) == ORD_MAX) { @@ -944,8 +900,7 @@ class coarsen_heuristics { } } else { scalar_t max_ewt = 0; - for (edge_offset_t j = g.graph.row_map(u); - j < g.graph.row_map(u + 1); j++) { + for (edge_offset_t j = g.graph.row_map(u); j < g.graph.row_map(u + 1); j++) { ordinal_t v = g.graph.entries(j); // v must be unmatched to be considered if (vcmap(v) == ORD_MAX) { @@ -959,8 +914,7 @@ class coarsen_heuristics { } if (h != ORD_MAX) { - ordinal_t add_next = - Kokkos::atomic_fetch_add(&next_length(), 1); + ordinal_t add_next = Kokkos::atomic_fetch_add(&next_length(), 1); next_perm(add_next) = u; hn(u) = h; } @@ -973,9 +927,8 @@ class coarsen_heuristics { } if (match_choice == 1) { - ordinal_t unmapped = countInf(vcmap); - double unmappedRatio = - static_cast(unmapped) / static_cast(n); + ordinal_t unmapped = countInf(vcmap); + double unmappedRatio = static_cast(unmapped) / static_cast(n); // leaf matches if (unmappedRatio > 0.25) { @@ -983,8 +936,7 @@ class coarsen_heuristics { policy_t(0, n), KOKKOS_LAMBDA(ordinal_t u) { if (vcmap(u) != ORD_MAX) { ordinal_t lastLeaf = ORD_MAX; - for (edge_offset_t j = g.graph.row_map(u); - j < g.graph.row_map(u + 1); j++) { + for (edge_offset_t j = g.graph.row_map(u); j < g.graph.row_map(u + 1); j++) { ordinal_t v = g.graph.entries(j); // v must be unmatched to be considered if (vcmap(v) == ORD_MAX) { @@ -993,10 +945,9 @@ class coarsen_heuristics { if (lastLeaf == ORD_MAX) { lastLeaf = v; } else { - vcmap(lastLeaf) = - Kokkos::atomic_fetch_add(&nvertices_coarse(), 1); - vcmap(v) = vcmap(lastLeaf); - lastLeaf = ORD_MAX; + vcmap(lastLeaf) = Kokkos::atomic_fetch_add(&nvertices_coarse(), 1); + vcmap(v) = vcmap(lastLeaf); + lastLeaf = ORD_MAX; } } } @@ -1017,20 +968,16 @@ class coarsen_heuristics { hasher_t hasher; // compute digests of adjacency lists Kokkos::parallel_for( - "create digests", team_policy_t(n, Kokkos::AUTO), - KOKKOS_LAMBDA(const member& thread) { + "create digests", team_policy_t(n, Kokkos::AUTO), KOKKOS_LAMBDA(const member& thread) { ordinal_t u = thread.league_rank(); if (vcmap(u) == ORD_MAX) { uint32_t hash = 0; Kokkos::parallel_reduce( - Kokkos::TeamThreadRange(thread, g.graph.row_map(u), - g.graph.row_map(u + 1)), - [=](const edge_offset_t j, uint32_t& thread_sum) { - thread_sum += hasher(g.graph.entries(j)); - }, + Kokkos::TeamThreadRange(thread, g.graph.row_map(u), g.graph.row_map(u + 1)), + [=](const edge_offset_t j, uint32_t& thread_sum) { thread_sum += hasher(g.graph.entries(j)); }, hash); Kokkos::single(Kokkos::PerTeam(thread), [=]() { - ordinal_t idx = Kokkos::atomic_fetch_add(&unmappedIdx(), 1); + ordinal_t idx = Kokkos::atomic_fetch_add(&unmappedIdx(), 1); unmappedVtx(idx) = u; hashes(idx) = hash; }); @@ -1040,17 +987,13 @@ class coarsen_heuristics { typedef Kokkos::BinOp1D > BinOp; BinOp bin_op(unmapped, 0, max); // VERY important that final parameter is true - Kokkos::BinSort, BinOp, exec_space, - ordinal_t> - sorter(hashes, bin_op, true); + Kokkos::BinSort, BinOp, exec_space, ordinal_t> sorter(hashes, bin_op, true); sorter.create_permute_vector(); sorter.template sort >(hashes); sorter.template sort(unmappedVtx); - MatchByHashSorted matchTwinFunctor(vcmap, unmappedVtx, hashes, unmapped, - nvertices_coarse); - Kokkos::parallel_scan("match twins", policy_t(0, unmapped), - matchTwinFunctor); + MatchByHashSorted matchTwinFunctor(vcmap, unmappedVtx, hashes, unmapped, nvertices_coarse); + Kokkos::parallel_scan("match twins", policy_t(0, unmapped), matchTwinFunctor); } unmapped = countInf(vcmap); @@ -1061,9 +1004,7 @@ class coarsen_heuristics { // get possibly mappable vertices of unmapped vtx_view_t mappableVtx("mappable vertices", unmapped); Kokkos::parallel_scan( - "get unmapped", policy_t(0, n), - KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, - const bool final) { + "get unmapped", policy_t(0, n), KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, const bool final) { if (vcmap(i) == ORD_MAX) { if (final) { mappableVtx(update) = i; @@ -1076,8 +1017,7 @@ class coarsen_heuristics { ordinal_t mappable_count = unmapped; do { Kokkos::parallel_for( - "reset hn", policy_t(0, mappable_count), - KOKKOS_LAMBDA(ordinal_t i) { + "reset hn", policy_t(0, mappable_count), KOKKOS_LAMBDA(ordinal_t i) { ordinal_t u = mappableVtx(i); hn(u) = ORD_MAX; }); @@ -1087,8 +1027,7 @@ class coarsen_heuristics { "assign relatives", policy_t(0, n), KOKKOS_LAMBDA(ordinal_t i) { if (vcmap(i) != ORD_MAX) { ordinal_t last_free = ORD_MAX; - for (edge_offset_t j = g.graph.row_map(i); - j < g.graph.row_map(i + 1); j++) { + for (edge_offset_t j = g.graph.row_map(i); j < g.graph.row_map(i + 1); j++) { ordinal_t v = g.graph.entries(j); if (vcmap(v) == ORD_MAX) { if (last_free != ORD_MAX) { @@ -1123,8 +1062,7 @@ class coarsen_heuristics { Kokkos::parallel_scan( "get next mappable", policy_t(0, old_mappable), - KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, - const bool final) { + KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update, const bool final) { ordinal_t u = mappableVtx(i); if (hn(u) != ORD_MAX) { if (final) { @@ -1146,14 +1084,11 @@ class coarsen_heuristics { // need to enforce an ordering condition to allow hard-stall // conditions to be broken if (condition ^ swap) { - if (Kokkos::atomic_compare_exchange_strong(&match(u), - ORD_MAX, v)) { - if (Kokkos::atomic_compare_exchange_strong(&match(v), - ORD_MAX, u)) { - ordinal_t cv = - Kokkos::atomic_fetch_add(&nvertices_coarse(), 1); - vcmap(u) = cv; - vcmap(v) = cv; + if (Kokkos::atomic_compare_exchange_strong(&match(u), ORD_MAX, v)) { + if (Kokkos::atomic_compare_exchange_strong(&match(v), ORD_MAX, u)) { + ordinal_t cv = Kokkos::atomic_fetch_add(&nvertices_coarse(), 1); + vcmap(u) = cv; + vcmap(v) = cv; } else { match(u) = ORD_MAX; } diff --git a/packages/kokkos-kernels/graph/src/KokkosGraph_Distance1Color.hpp b/packages/kokkos-kernels/graph/src/KokkosGraph_Distance1Color.hpp index 784b687957be..86bb28bab0cd 100644 --- a/packages/kokkos-kernels/graph/src/KokkosGraph_Distance1Color.hpp +++ b/packages/kokkos-kernels/graph/src/KokkosGraph_Distance1Color.hpp @@ -24,13 +24,10 @@ namespace KokkosGraph { namespace Experimental { -template -void graph_color_symbolic(KernelHandle *handle, - typename KernelHandle::nnz_lno_t num_rows, - typename KernelHandle::nnz_lno_t /* num_cols */, - lno_row_view_t_ row_map, lno_nnz_view_t_ entries, - bool /* is_symmetric */ = true) { +template +void graph_color_symbolic(KernelHandle *handle, typename KernelHandle::nnz_lno_t num_rows, + typename KernelHandle::nnz_lno_t /* num_cols */, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, bool /* is_symmetric */ = true) { typedef typename KernelHandle::HandleExecSpace ExecSpace; typedef typename KernelHandle::HandleTempMemorySpace MemSpace; typedef typename KernelHandle::HandlePersistentMemorySpace PersistentMemSpace; @@ -40,37 +37,29 @@ void graph_color_symbolic(KernelHandle *handle, typedef typename KernelHandle::const_nnz_lno_t c_lno_t; typedef typename KernelHandle::const_nnz_scalar_t c_scalar_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, ExecSpace, MemSpace, PersistentMemSpace> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle ConstKernelHandle; ConstKernelHandle tmp_handle(*handle); typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits > Internal_rowmap; typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits > Internal_entries; - KokkosGraph::Impl:: - COLOR_D1::color_d1( - &tmp_handle, num_rows, - Internal_rowmap(row_map.data(), row_map.extent(0)), - Internal_entries(entries.data(), entries.extent(0))); + KokkosGraph::Impl::COLOR_D1::color_d1( + &tmp_handle, num_rows, Internal_rowmap(row_map.data(), row_map.extent(0)), + Internal_entries(entries.data(), entries.extent(0))); } -template -void graph_color(KernelHandle *handle, - typename KernelHandle::nnz_lno_t num_rows, - typename KernelHandle::nnz_lno_t num_cols, - lno_row_view_t_ row_map, lno_nnz_view_t_ entries, +template +void graph_color(KernelHandle *handle, typename KernelHandle::nnz_lno_t num_rows, + typename KernelHandle::nnz_lno_t num_cols, lno_row_view_t_ row_map, lno_nnz_view_t_ entries, bool is_symmetric = true) { - graph_color_symbolic(handle, num_rows, num_cols, row_map, entries, - is_symmetric); + graph_color_symbolic(handle, num_rows, num_cols, row_map, entries, is_symmetric); } } // end namespace Experimental diff --git a/packages/kokkos-kernels/graph/src/KokkosGraph_Distance1ColorHandle.hpp b/packages/kokkos-kernels/graph/src/KokkosGraph_Distance1ColorHandle.hpp index 1b2f98194581..1eefd07c4d29 100644 --- a/packages/kokkos-kernels/graph/src/KokkosGraph_Distance1ColorHandle.hpp +++ b/packages/kokkos-kernels/graph/src/KokkosGraph_Distance1ColorHandle.hpp @@ -22,7 +22,7 @@ #ifndef _GRAPHCOLORHANDLE_HPP #define _GRAPHCOLORHANDLE_HPP -//#define VERBOSE +// #define VERBOSE namespace KokkosGraph { enum ColoringAlgorithm { @@ -45,8 +45,7 @@ enum ColoringType { Distance1, Distance2 }; template + class ExecutionSpace, class TemporaryMemorySpace, class PersistentMemorySpace> class GraphColoringHandle { public: typedef ExecutionSpace HandleExecSpace; @@ -62,8 +61,7 @@ class GraphColoringHandle { typedef typename std::remove_const::type color_t; typedef const color_t const_color_t; - typedef typename Kokkos::View - color_view_t; + typedef typename Kokkos::View color_view_t; typedef typename color_view_t::array_layout color_view_array_layout; typedef typename color_view_t::device_type color_view_device_t; @@ -71,20 +69,15 @@ class GraphColoringHandle { typedef typename color_view_t::HostMirror color_host_view_t; // Host view // type - typedef typename Kokkos::View - size_type_temp_work_view_t; - typedef typename Kokkos::View - size_type_persistent_work_view_t; + typedef typename Kokkos::View size_type_temp_work_view_t; + typedef typename Kokkos::View size_type_persistent_work_view_t; - typedef typename size_type_persistent_work_view_t::HostMirror - size_type_persistent_work_host_view_t; // Host view type + typedef + typename size_type_persistent_work_view_t::HostMirror size_type_persistent_work_host_view_t; // Host view type - typedef typename Kokkos::View - nnz_lno_temp_work_view_t; - typedef typename Kokkos::View - nnz_lno_persistent_work_view_t; - typedef typename nnz_lno_persistent_work_view_t::HostMirror - nnz_lno_persistent_work_host_view_t; // Host view type + typedef typename Kokkos::View nnz_lno_temp_work_view_t; + typedef typename Kokkos::View nnz_lno_persistent_work_view_t; + typedef typename nnz_lno_persistent_work_view_t::HostMirror nnz_lno_persistent_work_host_view_t; // Host view type typedef Kokkos::TeamPolicy team_policy_t; typedef typename team_policy_t::member_type team_member_t; @@ -95,9 +88,9 @@ class GraphColoringHandle { ColoringType GraphColoringType; // Parameters ColoringAlgorithm coloring_algorithm_type; // VB, VBBIT, VBCS, VBD or EB. - ConflictList conflict_list_type; // whether to use a conflict list or not, - // and if using it wheter to create it with - // atomic or parallel prefix sum. + ConflictList conflict_list_type; // whether to use a conflict list or not, + // and if using it wheter to create it with + // atomic or parallel prefix sum. double min_reduction_for_conflictlist; // if used pps is selected to create conflict list, what min percantage should @@ -116,23 +109,23 @@ class GraphColoringHandle { bool vb_edge_filtering; // whether to do edge filtering or not in vertex // based algorithms. Swaps on the ad error. - int vb_chunk_size; // the (minimum) size of the consecutive works that a - // thread will be assigned to. + int vb_chunk_size; // the (minimum) size of the consecutive works that a + // thread will be assigned to. int max_number_of_iterations; // maximum allowed number of phases int eb_num_initial_colors; // the number of colors to assign at the beginning // of the edge-based algorithm // STATISTICS - double overall_coloring_time; // the overall time that it took to color the - // graph. In the case of the iterative calls. + double overall_coloring_time; // the overall time that it took to color the + // graph. In the case of the iterative calls. double overall_coloring_time_phase1; // double overall_coloring_time_phase2; // double overall_coloring_time_phase3; // Some timer accumulators for internal // phases. double overall_coloring_time_phase4; // double overall_coloring_time_phase5; // - double coloring_time; // the time that it took to color the graph + double coloring_time; // the time that it took to color the graph int num_phases; // @@ -189,9 +182,7 @@ class GraphColoringHandle { * KokkosKernels::Experimental::Graph::Distance1 or * KokkosKernels::Experimental::Graph::Distance2 */ - void set_coloring_type(const ColoringType &col_type) { - this->GraphColoringType = col_type; - } + void set_coloring_type(const ColoringType &col_type) { this->GraphColoringType = col_type; } /** \brief Gets the graph coloring type. Whether it is distance-1 or * distance-2 coloring. returns Coloring Type: @@ -206,8 +197,7 @@ class GraphColoringHandle { * COLORING_VBCS, COLORING_EB \param set_default_parameters: whether or not to * reset the default parameters for the given algorithm. */ - void set_algorithm(const ColoringAlgorithm &col_algo, - bool set_default_parameters = true) { + void set_algorithm(const ColoringAlgorithm &col_algo, bool set_default_parameters = true) { if (col_algo == COLORING_DEFAULT) { this->choose_default_algorithm(); } else { @@ -228,27 +218,23 @@ class GraphColoringHandle { if (exec == KokkosKernels::Impl::Exec_SERIAL) { this->coloring_algorithm_type = COLORING_SERIAL; #ifdef VERBOSE - std::cout - << "Serial Execution Space, Default Algorithm: COLORING_SERIAL\n"; + std::cout << "Serial Execution Space, Default Algorithm: COLORING_SERIAL\n"; #endif } else if (exec == KokkosKernels::Impl::Exec_SYCL) { // FIXME SYCL: Do not use EB this->coloring_algorithm_type = COLORING_VBBIT; #ifdef VERBOSE - std::cout << ExecutionSpace::name() - << " Execution Space, Default Algorithm: COLORING_VBBIT\n"; + std::cout << ExecutionSpace::name() << " Execution Space, Default Algorithm: COLORING_VBBIT\n"; #endif } else if (KokkosKernels::Impl::kk_is_gpu_exec_space()) { this->coloring_algorithm_type = COLORING_EB; #ifdef VERBOSE - std::cout << ExecutionSpace::name() - << " Execution Space, Default Algorithm: COLORING_EB\n"; + std::cout << ExecutionSpace::name() << " Execution Space, Default Algorithm: COLORING_EB\n"; #endif } else { this->coloring_algorithm_type = COLORING_VBBIT; #ifdef VERBOSE - std::cout << ExecutionSpace::name() - << " Execution Space, Default Algorithm: COLORING_VBBIT\n"; + std::cout << ExecutionSpace::name() << " Execution Space, Default Algorithm: COLORING_VBBIT\n"; #endif } } @@ -261,10 +247,7 @@ class GraphColoringHandle { v3 lower_xadj_counts; CountLowerTriangle(nnz_lno_t nv_, v1 xadj_, v2 adj_, v3 lower_xadj_counts_) - : nv(nv_), - xadj(xadj_), - adj(adj_), - lower_xadj_counts(lower_xadj_counts_) {} + : nv(nv_), xadj(xadj_), adj(adj_), lower_xadj_counts(lower_xadj_counts_) {} KOKKOS_INLINE_FUNCTION void operator()(const nnz_lno_t &i, size_type &new_num_edge) const { @@ -290,18 +273,12 @@ class GraphColoringHandle { v2 adj; v3 lower_xadj_counts; - CountLowerTriangleTeam(nnz_lno_t nv_, v1 xadj_, v2 adj_, - v3 lower_xadj_counts_) - : nv(nv_), - xadj(xadj_), - adj(adj_), - lower_xadj_counts(lower_xadj_counts_) {} + CountLowerTriangleTeam(nnz_lno_t nv_, v1 xadj_, v2 adj_, v3 lower_xadj_counts_) + : nv(nv_), xadj(xadj_), adj(adj_), lower_xadj_counts(lower_xadj_counts_) {} KOKKOS_INLINE_FUNCTION - void operator()( - const team_member_t &teamMember /*, row_lno_t &new_num_edge*/) const { - nnz_lno_t ii = teamMember.league_rank() * teamMember.team_size() + - teamMember.team_rank(); + void operator()(const team_member_t &teamMember /*, row_lno_t &new_num_edge*/) const { + nnz_lno_t ii = teamMember.league_rank() * teamMember.team_size() + teamMember.team_rank(); if (ii >= nv) { return; } @@ -322,8 +299,7 @@ class GraphColoringHandle { }, new_edge_count); - Kokkos::single(Kokkos::PerThread(teamMember), - [&]() { lower_xadj_counts(ii + 1) = new_edge_count; }); + Kokkos::single(Kokkos::PerThread(teamMember), [&]() { lower_xadj_counts(ii + 1) = new_edge_count; }); } }; @@ -336,8 +312,7 @@ class GraphColoringHandle { v4 lower_srcs; v4 lower_dsts; - FillLowerTriangleTeam(nnz_lno_t nv_, v1 xadj_, v2 adj_, - v3 lower_xadj_counts_, v4 lower_srcs_, v4 lower_dsts_) + FillLowerTriangleTeam(nnz_lno_t nv_, v1 xadj_, v2 adj_, v3 lower_xadj_counts_, v4 lower_srcs_, v4 lower_dsts_) : nv(nv_), xadj(xadj_), adj(adj_), @@ -347,12 +322,9 @@ class GraphColoringHandle { KOKKOS_INLINE_FUNCTION void operator()(const team_member_t &teamMember) const { - typedef - typename std::remove_reference::type - atomic_incr_type; + typedef typename std::remove_reference::type atomic_incr_type; - nnz_lno_t ii = teamMember.league_rank() * teamMember.team_size() + - teamMember.team_rank(); + nnz_lno_t ii = teamMember.league_rank() * teamMember.team_size() + teamMember.team_rank(); if (ii >= nv) { return; } @@ -360,18 +332,15 @@ class GraphColoringHandle { size_type xadj_begin = xadj(ii); size_type xadj_end = xadj(ii + 1); - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(teamMember, xadj_end - xadj_begin), - [&](size_type i) { - size_type adjind = i + xadj_begin; - nnz_lno_t n = adj[adjind]; - if (ii < n && n < nv) { - size_type position = Kokkos::atomic_fetch_add( - &(lower_xadj_counts(ii)), atomic_incr_type(1)); - lower_srcs(position) = ii; - lower_dsts(position) = n; - } - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember, xadj_end - xadj_begin), [&](size_type i) { + size_type adjind = i + xadj_begin; + nnz_lno_t n = adj[adjind]; + if (ii < n && n < nv) { + size_type position = Kokkos::atomic_fetch_add(&(lower_xadj_counts(ii)), atomic_incr_type(1)); + lower_srcs(position) = ii; + lower_dsts(position) = n; + } + }); } }; @@ -384,8 +353,7 @@ class GraphColoringHandle { v4 lower_srcs; v4 lower_dsts; - FillLowerTriangle(nnz_lno_t nv_, v1 xadj_, v2 adj_, v3 lower_xadj_counts_, - v4 lower_srcs_, v4 lower_dsts_) + FillLowerTriangle(nnz_lno_t nv_, v1 xadj_, v2 adj_, v3 lower_xadj_counts_, v4 lower_srcs_, v4 lower_dsts_) : nv(nv_), xadj(xadj_), adj(adj_), @@ -410,21 +378,18 @@ class GraphColoringHandle { }; template - void symmetrize_and_calculate_lower_diagonal_edge_list( - nnz_lno_t nv, row_index_view_type xadj, nonzero_view_type adj) { - KokkosKernels::Impl::symmetrize_and_get_lower_diagonal_edge_list< - row_index_view_type, nonzero_view_type, nnz_lno_persistent_work_view_t, - ExecutionSpace>(nv, xadj, adj, lower_triangle_src, lower_triangle_dst); + void symmetrize_and_calculate_lower_diagonal_edge_list(nnz_lno_t nv, row_index_view_type xadj, + nonzero_view_type adj) { + KokkosKernels::Impl::symmetrize_and_get_lower_diagonal_edge_list( + nv, xadj, adj, lower_triangle_src, lower_triangle_dst); size_of_edge_list = lower_triangle_src.extent(0); } template - void get_lower_diagonal_edge_list(nnz_lno_t nv, size_type ne, - row_index_view_type xadj, - nonzero_view_type adj, - size_type &num_out_edges, - nnz_lno_persistent_work_view_t &src, + void get_lower_diagonal_edge_list(nnz_lno_t nv, size_type ne, row_index_view_type xadj, nonzero_view_type adj, + size_type &num_out_edges, nnz_lno_persistent_work_view_t &src, nnz_lno_persistent_work_view_t &dst) { if (size_of_edge_list > 0) { num_out_edges = size_of_edge_list; @@ -441,26 +406,20 @@ class GraphColoringHandle { int teamSizeMax = 0; int vector_size = 0; - CountLowerTriangleTeam - clt(nv, xadj, adj, lower_count); + CountLowerTriangleTeam clt(nv, xadj, adj, + lower_count); - KokkosKernels::Impl::get_suggested_vector_size( - vector_size, nv, ne); + KokkosKernels::Impl::get_suggested_vector_size(vector_size, nv, ne); - teamSizeMax = - KokkosKernels::Impl::get_suggested_team_size( - clt, vector_size); + teamSizeMax = KokkosKernels::Impl::get_suggested_team_size(clt, vector_size); Kokkos::parallel_for("KokkosGraph::CountLowerTriangleTeam", - team_policy_t((nv + teamSizeMax - 1) / teamSizeMax, - teamSizeMax, vector_size), + team_policy_t((nv + teamSizeMax - 1) / teamSizeMax, teamSizeMax, vector_size), clt //, new_num_edge ); - KokkosKernels::Impl::inclusive_parallel_prefix_sum< - size_type_temp_work_view_t, ExecutionSpace>(nv + 1, lower_count); + KokkosKernels::Impl::inclusive_parallel_prefix_sum(nv + 1, + lower_count); // Kokkos::parallel_scan (my_exec_space(0, nv + 1), // PPS(lower_count)); ExecutionSpace().fence(); @@ -469,20 +428,15 @@ class GraphColoringHandle { Kokkos::deep_copy(hlower, lower_total_count); new_num_edge = hlower(); - nnz_lno_persistent_work_view_t half_src( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "HALF SRC"), - new_num_edge); - nnz_lno_persistent_work_view_t half_dst( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "HALF DST"), - new_num_edge); + nnz_lno_persistent_work_view_t half_src(Kokkos::view_alloc(Kokkos::WithoutInitializing, "HALF SRC"), + new_num_edge); + nnz_lno_persistent_work_view_t half_dst(Kokkos::view_alloc(Kokkos::WithoutInitializing, "HALF DST"), + new_num_edge); Kokkos::parallel_for( "KokkosGraph::FillLowerTriangleTeam", - team_policy_t((nv + teamSizeMax - 1) / teamSizeMax, teamSizeMax, - vector_size), - FillLowerTriangleTeam( - nv, xadj, adj, lower_count, half_src, half_dst)); + team_policy_t((nv + teamSizeMax - 1) / teamSizeMax, teamSizeMax, vector_size), + FillLowerTriangleTeam(nv, xadj, adj, lower_count, half_src, half_dst)); src = lower_triangle_src = half_src; dst = lower_triangle_dst = half_dst; @@ -491,30 +445,25 @@ class GraphColoringHandle { if (nv > 0) { Kokkos::parallel_reduce( "KokkosGraph::CountLowerTriangleTeam", my_exec_space(0, nv), - CountLowerTriangle(nv, xadj, adj, - lower_count), + CountLowerTriangle(nv, xadj, adj, + lower_count), new_num_edge); } // Kokkos::parallel_scan (my_exec_space(0, nv + 1), // PPS(lower_count)); - KokkosKernels::Impl::inclusive_parallel_prefix_sum< - size_type_temp_work_view_t, ExecutionSpace>(nv + 1, lower_count); - nnz_lno_persistent_work_view_t half_src( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "HALF SRC"), - new_num_edge); - nnz_lno_persistent_work_view_t half_dst( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "HALF DST"), - new_num_edge); + KokkosKernels::Impl::inclusive_parallel_prefix_sum(nv + 1, + lower_count); + nnz_lno_persistent_work_view_t half_src(Kokkos::view_alloc(Kokkos::WithoutInitializing, "HALF SRC"), + new_num_edge); + nnz_lno_persistent_work_view_t half_dst(Kokkos::view_alloc(Kokkos::WithoutInitializing, "HALF DST"), + new_num_edge); Kokkos::parallel_for( "KokkosGraph::FillLowerTriangleTeam", my_exec_space(0, nv), - FillLowerTriangle( - nv, xadj, adj, lower_count, half_src, half_dst)); + FillLowerTriangle(nv, xadj, adj, lower_count, half_src, half_dst)); src = lower_triangle_src = half_src; dst = lower_triangle_dst = half_dst; @@ -547,8 +496,7 @@ class GraphColoringHandle { nnz_lno_t get_num_colors() { if (num_colors == 0) { typedef typename Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_reduce("KokkosKernels::FindMax", - my_exec_space(0, vertex_colors.extent(0)), + Kokkos::parallel_reduce("KokkosKernels::FindMax", my_exec_space(0, vertex_colors.extent(0)), ReduceMaxFunctor(vertex_colors), num_colors); } return num_colors; @@ -594,47 +542,23 @@ class GraphColoringHandle { virtual ~GraphColoringHandle(){}; // getters - ColoringAlgorithm get_coloring_algo_type() const { - return this->coloring_algorithm_type; - } - ConflictList get_conflict_list_type() const { - return this->conflict_list_type; - } - double get_min_reduction_for_conflictlist() const { - return this->min_reduction_for_conflictlist; - } - int get_min_elements_for_conflictlist() const { - return this->min_elements_for_conflictlist; - } - bool get_serial_conflict_resolution() const { - return this->serial_conflict_resolution; - } + ColoringAlgorithm get_coloring_algo_type() const { return this->coloring_algorithm_type; } + ConflictList get_conflict_list_type() const { return this->conflict_list_type; } + double get_min_reduction_for_conflictlist() const { return this->min_reduction_for_conflictlist; } + int get_min_elements_for_conflictlist() const { return this->min_elements_for_conflictlist; } + bool get_serial_conflict_resolution() const { return this->serial_conflict_resolution; } bool get_tictoc() const { return this->tictoc; } bool get_vb_edge_filtering() const { return this->vb_edge_filtering; } int get_vb_chunk_size() const { return this->vb_chunk_size; } - int get_max_number_of_iterations() const { - return this->max_number_of_iterations; - } + int get_max_number_of_iterations() const { return this->max_number_of_iterations; } int get_eb_num_initial_colors() const { return this->eb_num_initial_colors; } - double get_overall_coloring_time() const { - return this->overall_coloring_time; - } - double get_overall_coloring_time_phase1() const { - return this->overall_coloring_time_phase1; - } - double get_overall_coloring_time_phase2() const { - return this->overall_coloring_time_phase2; - } - double get_overall_coloring_time_phase3() const { - return this->overall_coloring_time_phase3; - } - double get_overall_coloring_time_phase4() const { - return this->overall_coloring_time_phase4; - } - double get_overall_coloring_time_phase5() const { - return this->overall_coloring_time_phase5; - } + double get_overall_coloring_time() const { return this->overall_coloring_time; } + double get_overall_coloring_time_phase1() const { return this->overall_coloring_time_phase1; } + double get_overall_coloring_time_phase2() const { return this->overall_coloring_time_phase2; } + double get_overall_coloring_time_phase3() const { return this->overall_coloring_time_phase3; } + double get_overall_coloring_time_phase4() const { return this->overall_coloring_time_phase4; } + double get_overall_coloring_time_phase5() const { return this->overall_coloring_time_phase5; } double get_coloring_time() const { return this->coloring_time; } int get_num_phases() const { return this->num_phases; } color_view_t get_vertex_colors() const { return this->vertex_colors; } @@ -643,44 +567,28 @@ class GraphColoringHandle { nnz_lno_temp_work_view_t get_vertex_list() const { return this->vertex_list; } size_type get_vertex_list_size() const { return this->vertex_list_size; } // setters - void set_vertex_list(nnz_lno_temp_work_view_t vertex_list_, - size_type vertex_list_size_) { + void set_vertex_list(nnz_lno_temp_work_view_t vertex_list_, size_type vertex_list_size_) { this->vertex_list = vertex_list_; this->vertex_list_size = vertex_list_size_; this->use_vtx_list = true; } - void set_coloring_algo_type(const ColoringAlgorithm &col_algo) { - this->coloring_algorithm_type = col_algo; - } - void set_conflict_list_type(const ConflictList &cl) { - this->conflict_list_type = cl; - } + void set_coloring_algo_type(const ColoringAlgorithm &col_algo) { this->coloring_algorithm_type = col_algo; } + void set_conflict_list_type(const ConflictList &cl) { this->conflict_list_type = cl; } void set_min_reduction_for_conflictlist(const double &min_reduction) { this->min_reduction_for_conflictlist = min_reduction; } void set_min_elements_for_conflictlist(const int &min_elements) { this->min_elements_for_conflictlist = min_elements; } - void set_serial_conflict_resolution( - const bool &use_serial_conflist_resolution) { + void set_serial_conflict_resolution(const bool &use_serial_conflist_resolution) { this->serial_conflict_resolution = use_serial_conflist_resolution; } void set_tictoc(const bool use_tictoc) { this->tictoc = use_tictoc; } - void set_vb_edge_filtering(const bool &use_vb_edge_filtering) { - this->vb_edge_filtering = use_vb_edge_filtering; - } - void set_vb_chunk_size(const int &chunksize) { - this->vb_chunk_size = chunksize; - } - void set_max_number_of_iterations(const int &max_phases) { - this->max_number_of_iterations = max_phases; - } - void set_eb_num_initial_colors(const int &num_initial_colors) { - this->eb_num_initial_colors = num_initial_colors; - } - void add_to_overall_coloring_time(const double &coloring_time_) { - this->overall_coloring_time += coloring_time_; - } + void set_vb_edge_filtering(const bool &use_vb_edge_filtering) { this->vb_edge_filtering = use_vb_edge_filtering; } + void set_vb_chunk_size(const int &chunksize) { this->vb_chunk_size = chunksize; } + void set_max_number_of_iterations(const int &max_phases) { this->max_number_of_iterations = max_phases; } + void set_eb_num_initial_colors(const int &num_initial_colors) { this->eb_num_initial_colors = num_initial_colors; } + void add_to_overall_coloring_time(const double &coloring_time_) { this->overall_coloring_time += coloring_time_; } void add_to_overall_coloring_time_phase1(const double &coloring_time_) { this->overall_coloring_time_phase1 += coloring_time_; } @@ -696,12 +604,8 @@ class GraphColoringHandle { void add_to_overall_coloring_time_phase5(const double &coloring_time_) { this->overall_coloring_time_phase5 += coloring_time_; } - void set_coloring_time(const double &coloring_time_) { - this->coloring_time = coloring_time_; - } - void set_num_phases(const double &num_phases_) { - this->num_phases = num_phases_; - } + void set_coloring_time(const double &coloring_time_) { this->coloring_time = coloring_time_; } + void set_num_phases(const double &num_phases_) { this->num_phases = num_phases_; } void set_vertex_colors(const color_view_t vertex_colors_) { this->vertex_colors = vertex_colors_; this->is_coloring_called_before = true; diff --git a/packages/kokkos-kernels/graph/src/KokkosGraph_Distance2Color.hpp b/packages/kokkos-kernels/graph/src/KokkosGraph_Distance2Color.hpp index c40ec72ece70..a6555915bb42 100644 --- a/packages/kokkos-kernels/graph/src/KokkosGraph_Distance2Color.hpp +++ b/packages/kokkos-kernels/graph/src/KokkosGraph_Distance2Color.hpp @@ -44,16 +44,13 @@ namespace Experimental { */ template -void graph_color_distance2(KernelHandle *handle, - typename KernelHandle::nnz_lno_t num_verts, - InRowmap row_map, InEntries row_entries) { +void graph_color_distance2(KernelHandle *handle, typename KernelHandle::nnz_lno_t num_verts, InRowmap row_map, + InEntries row_entries) { using size_type = typename KernelHandle::size_type; using lno_t = typename KernelHandle::nnz_lno_t; - using InternalRowmap = Kokkos::View>; - using InternalEntries = Kokkos::View>; Kokkos::Timer timer; size_type nnz = row_entries.extent(0); @@ -61,11 +58,9 @@ void graph_color_distance2(KernelHandle *handle, InternalEntries rowentries_internal(row_entries.data(), nnz); auto gch_d2 = handle->get_distance2_graph_coloring_handle(); // note: last template argument 'false' means do distance-2, not bipartite - KokkosGraph::Impl::GraphColorDistance2< - typename KernelHandle::GraphColorDistance2HandleType, InternalRowmap, - InternalEntries, false> - gc(num_verts, num_verts, rowmap_internal, rowentries_internal, - rowmap_internal, rowentries_internal, gch_d2); + KokkosGraph::Impl::GraphColorDistance2 + gc(num_verts, num_verts, rowmap_internal, rowentries_internal, rowmap_internal, rowentries_internal, gch_d2); gc.compute_distance2_color(); gch_d2->add_to_overall_coloring_time(timer.seconds()); gch_d2->set_coloring_time(timer.seconds()); @@ -104,24 +99,18 @@ void graph_color_distance2(KernelHandle *handle, */ template -void bipartite_color_rows(KernelHandle *handle, - typename KernelHandle::nnz_lno_t num_rows, - typename KernelHandle::nnz_lno_t num_columns, - InRowmap row_map, InEntries row_entries, +void bipartite_color_rows(KernelHandle *handle, typename KernelHandle::nnz_lno_t num_rows, + typename KernelHandle::nnz_lno_t num_columns, InRowmap row_map, InEntries row_entries, bool is_symmetric = false) { using execution_space = typename KernelHandle::HandleExecSpace; using size_type = typename KernelHandle::size_type; using lno_t = typename KernelHandle::nnz_lno_t; - using InternalRowmap = Kokkos::View>; - using InternalEntries = Kokkos::View>; - using TRowmap = Kokkos::View; - using TEntries = Kokkos::View; + using TRowmap = Kokkos::View; + using TEntries = Kokkos::View; Kokkos::Timer timer; size_type nnz = row_entries.extent(0); TRowmap col_map; @@ -130,8 +119,7 @@ void bipartite_color_rows(KernelHandle *handle, // Compute the transpose col_map = TRowmap("Col map", num_columns + 1); col_entries = TEntries("Col entries", nnz); - KokkosSparse::Impl::transpose_graph( + KokkosSparse::Impl::transpose_graph( num_rows, num_columns, row_map, row_entries, col_map, col_entries); } InternalRowmap rowmap_internal(row_map.data(), row_map.extent(0)); @@ -147,11 +135,9 @@ void bipartite_color_rows(KernelHandle *handle, } auto gch_d2 = handle->get_distance2_graph_coloring_handle(); // note: last template argument 'true' means do bipartite one-sided - KokkosGraph::Impl::GraphColorDistance2< - typename KernelHandle::GraphColorDistance2HandleType, InternalRowmap, - InternalEntries, true> - gc(num_rows, num_columns, rowmap_internal, rowentries_internal, - colmap_internal, colentries_internal, gch_d2); + KokkosGraph::Impl::GraphColorDistance2 + gc(num_rows, num_columns, rowmap_internal, rowentries_internal, colmap_internal, colentries_internal, gch_d2); gc.compute_distance2_color(); gch_d2->add_to_overall_coloring_time(timer.seconds()); gch_d2->set_coloring_time(timer.seconds()); @@ -185,31 +171,23 @@ void bipartite_color_rows(KernelHandle *handle, * return a view of length num_columns, containing the colors. */ template -void bipartite_color_columns(KernelHandle *handle, - typename KernelHandle::nnz_lno_t num_rows, - typename KernelHandle::nnz_lno_t num_columns, - InRowmap row_map, InEntries row_entries) { +void bipartite_color_columns(KernelHandle *handle, typename KernelHandle::nnz_lno_t num_rows, + typename KernelHandle::nnz_lno_t num_columns, InRowmap row_map, InEntries row_entries) { using execution_space = typename KernelHandle::HandleExecSpace; using size_type = typename KernelHandle::size_type; using lno_t = typename KernelHandle::nnz_lno_t; - using InternalRowmap = Kokkos::View>; - using InternalEntries = Kokkos::View>; - using TRowmap = Kokkos::View; - using TEntries = Kokkos::View; + using TRowmap = Kokkos::View; + using TEntries = Kokkos::View; Kokkos::Timer timer; size_type nnz = row_entries.extent(0); // Compute the transpose TRowmap col_map("Col map", num_columns + 1); - TEntries col_entries( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Col entries"), nnz); - KokkosSparse::Impl::transpose_graph( + TEntries col_entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Col entries"), nnz); + KokkosSparse::Impl::transpose_graph( num_rows, num_columns, row_map, row_entries, col_map, col_entries); // Get unmanaged views for both graph and its transpose InternalRowmap colmap_internal(col_map.data(), col_map.extent(0)); @@ -218,11 +196,9 @@ void bipartite_color_columns(KernelHandle *handle, InternalEntries rowentries_internal(row_entries.data(), nnz); auto gch_d2 = handle->get_distance2_graph_coloring_handle(); // note: last template argument 'true' means do bipartite one-sided - KokkosGraph::Impl::GraphColorDistance2< - typename KernelHandle::GraphColorDistance2HandleType, InternalRowmap, - InternalEntries, true> - gc(num_columns, num_rows, colmap_internal, colentries_internal, - rowmap_internal, rowentries_internal, gch_d2); + KokkosGraph::Impl::GraphColorDistance2 + gc(num_columns, num_rows, colmap_internal, colentries_internal, rowmap_internal, rowentries_internal, gch_d2); gc.compute_distance2_color(); gch_d2->add_to_overall_coloring_time(timer.seconds()); gch_d2->set_coloring_time(timer.seconds()); diff --git a/packages/kokkos-kernels/graph/src/KokkosGraph_Distance2ColorHandle.hpp b/packages/kokkos-kernels/graph/src/KokkosGraph_Distance2ColorHandle.hpp index c6508e0ba8e7..f50ce08fef56 100644 --- a/packages/kokkos-kernels/graph/src/KokkosGraph_Distance2ColorHandle.hpp +++ b/packages/kokkos-kernels/graph/src/KokkosGraph_Distance2ColorHandle.hpp @@ -36,45 +36,37 @@ enum GraphColoringAlgorithmDistance2 { COLORING_D2_NB_BIT // Distance-2 Graph Coloring Net Based BIT }; -template +template class GraphColorDistance2Handle { public: - using HandleExecSpace = ExecutionSpace; - using HandleTempMemorySpace = TemporaryMemorySpace; - using HandlePersistentMemorySpace = PersistentMemorySpace; - using size_type = typename std::remove_const::type; - using const_size_type = const size_type; - using nnz_lno_type = typename std::remove_const::type; - using const_nnz_lno_type = const nnz_lno_type; - using color_type = typename std::remove_const::type; - using const_color_type = const color_type; - using color_view_type = - typename Kokkos::View; - using color_view_array_layout = typename color_view_type::array_layout; - using color_view_device_type = typename color_view_type::device_type; - using color_view_memory_traits = typename color_view_type::memory_traits; - using color_host_view_type = typename color_view_type::HostMirror; - using size_type_temp_work_view_type = - typename Kokkos::View; - using size_type_persistent_work_view_type = - typename Kokkos::View; - using size_type_persistent_work_host_view_type = - typename size_type_persistent_work_view_type::HostMirror; - using nnz_lno_temp_work_view_type = - typename Kokkos::View; - using nnz_lno_persistent_work_view_type = - typename Kokkos::View; - using nnz_lno_persistent_work_host_view_type = - typename nnz_lno_persistent_work_view_type::HostMirror; - using team_policy_type = Kokkos::TeamPolicy; - using team_member_type = typename team_policy_type::member_type; - using non_const_1d_size_type_view_type = typename Kokkos::View; + using HandleExecSpace = ExecutionSpace; + using HandleTempMemorySpace = TemporaryMemorySpace; + using HandlePersistentMemorySpace = PersistentMemorySpace; + using size_type = typename std::remove_const::type; + using const_size_type = const size_type; + using nnz_lno_type = typename std::remove_const::type; + using const_nnz_lno_type = const nnz_lno_type; + using color_type = typename std::remove_const::type; + using const_color_type = const color_type; + using color_view_type = typename Kokkos::View; + using color_view_array_layout = typename color_view_type::array_layout; + using color_view_device_type = typename color_view_type::device_type; + using color_view_memory_traits = typename color_view_type::memory_traits; + using color_host_view_type = typename color_view_type::HostMirror; + using size_type_temp_work_view_type = typename Kokkos::View; + using size_type_persistent_work_view_type = typename Kokkos::View; + using size_type_persistent_work_host_view_type = typename size_type_persistent_work_view_type::HostMirror; + using nnz_lno_temp_work_view_type = typename Kokkos::View; + using nnz_lno_persistent_work_view_type = typename Kokkos::View; + using nnz_lno_persistent_work_host_view_type = typename nnz_lno_persistent_work_view_type::HostMirror; + using team_policy_type = Kokkos::TeamPolicy; + using team_member_type = typename team_policy_type::member_type; + using non_const_1d_size_type_view_type = typename Kokkos::View; private: // Parameters - GraphColoringAlgorithmDistance2 - coloring_algorithm_type; // Which algorithm type to use. + GraphColoringAlgorithmDistance2 coloring_algorithm_type; // Which algorithm type to use. bool verbose; // verbosity flag bool tictoc; // print time at every step @@ -82,20 +74,20 @@ class GraphColorDistance2Handle { bool vb_edge_filtering; // whether to do edge filtering or not in vertex // based algorithms. - int vb_chunk_size; // the (minimum) size of the consecutive works that a - // thread will be assigned to. + int vb_chunk_size; // the (minimum) size of the consecutive works that a + // thread will be assigned to. int max_number_of_iterations; // maximum allowed number of phases that // STATISTICS - double overall_coloring_time; // The overall time taken to color the graph. - // In the case of the iterative calls. + double overall_coloring_time; // The overall time taken to color the graph. + // In the case of the iterative calls. double overall_coloring_time_phase1; // double overall_coloring_time_phase2; // double overall_coloring_time_phase3; // Some timer accumulators for internal // phases. double overall_coloring_time_phase4; // double overall_coloring_time_phase5; // - double coloring_time; // the time that it took to color the graph + double coloring_time; // the time that it took to color the graph bool use_vtx_list; nnz_lno_temp_work_view_type vertex_list; @@ -159,8 +151,7 @@ class GraphColorDistance2Handle { * * @return None */ - void set_algorithm(const GraphColoringAlgorithmDistance2& col_algo, - bool set_default_parameters = true) { + void set_algorithm(const GraphColoringAlgorithmDistance2& col_algo, bool set_default_parameters = true) { if (col_algo == COLORING_D2_DEFAULT) { this->choose_default_algorithm(); } else { @@ -182,26 +173,23 @@ class GraphColorDistance2Handle { */ void choose_default_algorithm() { - if (KokkosKernels::Impl::kk_get_exec_space_type() == - KokkosKernels::Impl::Exec_SERIAL) { + if (KokkosKernels::Impl::kk_get_exec_space_type() == KokkosKernels::Impl::Exec_SERIAL) { this->coloring_algorithm_type = COLORING_D2_SERIAL; #ifdef VERBOSE - std::cout - << "Serial Execution Space, Default Algorithm: COLORING_D2_SERIAL\n"; + std::cout << "Serial Execution Space, Default Algorithm: COLORING_D2_SERIAL\n"; #endif } else { this->coloring_algorithm_type = COLORING_D2_NB_BIT; #ifdef VERBOSE - std::cout << ExecutionSpace::name() - << " Execution Space, Default Algorithm: COLORING_D2_NB_BIT\n"; + std::cout << ExecutionSpace::name() << " Execution Space, Default Algorithm: COLORING_D2_NB_BIT\n"; #endif } } nnz_lno_type get_num_colors() { if (num_colors == 0) - KokkosKernels::Impl::view_reduce_max( - vertex_colors.extent(0), vertex_colors, num_colors); + KokkosKernels::Impl::view_reduce_max(vertex_colors.extent(0), vertex_colors, + num_colors); return num_colors; } @@ -219,9 +207,7 @@ class GraphColorDistance2Handle { this->vb_chunk_size = 8; this->max_number_of_iterations = 200; break; - default: - throw std::runtime_error( - "Unknown Distance-2 Graph Coloring Algorithm\n"); + default: throw std::runtime_error("Unknown Distance-2 Graph Coloring Algorithm\n"); } } @@ -231,35 +217,19 @@ class GraphColorDistance2Handle { virtual ~GraphColorDistance2Handle(){}; // getters and setters - GraphColoringAlgorithmDistance2 get_coloring_algo_type() const { - return this->coloring_algorithm_type; - } + GraphColoringAlgorithmDistance2 get_coloring_algo_type() const { return this->coloring_algorithm_type; } bool get_verbose() const { return this->verbose; } double get_coloring_time() const { return this->coloring_time; } - int get_max_number_of_iterations() const { - return this->max_number_of_iterations; - } + int get_max_number_of_iterations() const { return this->max_number_of_iterations; } int get_num_phases() const { return this->num_phases; } - double get_overall_coloring_time() const { - return this->overall_coloring_time; - } - double get_overall_coloring_time_phase1() const { - return this->overall_coloring_time_phase1; - } - double get_overall_coloring_time_phase2() const { - return this->overall_coloring_time_phase2; - } - double get_overall_coloring_time_phase3() const { - return this->overall_coloring_time_phase3; - } - double get_overall_coloring_time_phase4() const { - return this->overall_coloring_time_phase4; - } - double get_overall_coloring_time_phase5() const { - return this->overall_coloring_time_phase5; - } + double get_overall_coloring_time() const { return this->overall_coloring_time; } + double get_overall_coloring_time_phase1() const { return this->overall_coloring_time_phase1; } + double get_overall_coloring_time_phase2() const { return this->overall_coloring_time_phase2; } + double get_overall_coloring_time_phase3() const { return this->overall_coloring_time_phase3; } + double get_overall_coloring_time_phase4() const { return this->overall_coloring_time_phase4; } + double get_overall_coloring_time_phase5() const { return this->overall_coloring_time_phase5; } bool get_tictoc() const { return this->tictoc; } @@ -272,14 +242,11 @@ class GraphColorDistance2Handle { bool is_coloring_called() const { return this->is_coloring_called_before; } bool get_use_vtx_list() const { return this->use_vtx_list; } - nnz_lno_temp_work_view_type get_vertex_list() const { - return this->vertex_list; - } + nnz_lno_temp_work_view_type get_vertex_list() const { return this->vertex_list; } size_type get_vertex_list_size() const { return this->vertex_list_size; } // setters - void set_vertex_list(nnz_lno_temp_work_view_type vertex_list_, - size_type vertex_list_size_) { + void set_vertex_list(nnz_lno_temp_work_view_type vertex_list_, size_type vertex_list_size_) { this->vertex_list = vertex_list_; this->vertex_list_size = vertex_list_size_; this->use_vtx_list = true; @@ -291,19 +258,11 @@ class GraphColorDistance2Handle { } void set_verbose(const bool verbose_) { this->verbose = verbose_; } - void set_coloring_time(const double& coloring_time_) { - this->coloring_time = coloring_time_; - } - void set_max_number_of_iterations(const int& max_phases) { - this->max_number_of_iterations = max_phases; - } - void set_num_phases(const int& num_phases_) { - this->num_phases = num_phases_; - } + void set_coloring_time(const double& coloring_time_) { this->coloring_time = coloring_time_; } + void set_max_number_of_iterations(const int& max_phases) { this->max_number_of_iterations = max_phases; } + void set_num_phases(const int& num_phases_) { this->num_phases = num_phases_; } - void add_to_overall_coloring_time(const double& coloring_time_) { - this->overall_coloring_time += coloring_time_; - } + void add_to_overall_coloring_time(const double& coloring_time_) { this->overall_coloring_time += coloring_time_; } void add_to_overall_coloring_time_phase1(const double& coloring_time_) { this->overall_coloring_time_phase1 += coloring_time_; } @@ -322,13 +281,9 @@ class GraphColorDistance2Handle { void set_tictoc(const bool use_tictoc) { this->tictoc = use_tictoc; } - void set_vb_chunk_size(const int& chunksize) { - this->vb_chunk_size = chunksize; - } + void set_vb_chunk_size(const int& chunksize) { this->vb_chunk_size = chunksize; } - void set_vb_edge_filtering(const bool& use_vb_edge_filtering) { - this->vb_edge_filtering = use_vb_edge_filtering; - } + void set_vb_edge_filtering(const bool& use_vb_edge_filtering) { this->vb_edge_filtering = use_vb_edge_filtering; } void set_vertex_colors(const color_view_type vertex_colors_) { this->vertex_colors = vertex_colors_; @@ -349,10 +304,8 @@ class GraphColorDistance2Handle { * object (i.e., `std::ofstream os("G.dot", std::ofstream::out);`) to write to * a file. */ - template - void dump_graphviz(std::ostream& os, const size_t num_verts, - rowmap_type& rowmap, entries_type& entries, + template + void dump_graphviz(std::ostream& os, const size_t num_verts, rowmap_type& rowmap, entries_type& entries, kokkos_view_type& colors) const { using h_colors_type = typename kokkos_view_type::HostMirror; using h_rowmap_type = typename rowmap_type::HostMirror; @@ -407,13 +360,11 @@ class GraphColorDistance2Handle { penwidth = ", penwidth=\"2.0\""; } - os << " " << vid << " [ label=\"" << vid << "|" << h_colors(vid) - << "\"" << style << fontcolor << color << fillcolor << penwidth << "];" - << std::endl; + os << " " << vid << " [ label=\"" << vid << "|" << h_colors(vid) << "\"" << style << fontcolor << color + << fillcolor << penwidth << "];" << std::endl; // Add the node's edges - for (size_t iadj = h_rowmap(vid); iadj < (size_t)h_rowmap(vid + 1); - iadj++) { + for (size_t iadj = h_rowmap(vid); iadj < (size_t)h_rowmap(vid + 1); iadj++) { size_t vadj = h_entries(iadj); if (vadj >= vid) { os << " " << vid << " -- " << vadj << ";" << std::endl; diff --git a/packages/kokkos-kernels/graph/src/KokkosGraph_ExplicitCoarsening.hpp b/packages/kokkos-kernels/graph/src/KokkosGraph_ExplicitCoarsening.hpp index 3c655026f5c6..67c4fbd45378 100644 --- a/packages/kokkos-kernels/graph/src/KokkosGraph_ExplicitCoarsening.hpp +++ b/packages/kokkos-kernels/graph/src/KokkosGraph_ExplicitCoarsening.hpp @@ -32,35 +32,27 @@ namespace Experimental { // An uncompressed graph will still work as input to some things like D1 graph // coloring. -template -void graph_explicit_coarsen( - const fine_rowmap_t& fineRowmap, const fine_entries_t& fineEntries, - const labels_t& labels, - typename fine_entries_t::non_const_value_type numCoarseVerts, - coarse_rowmap_t& coarseRowmap, coarse_entries_t& coarseEntries, - bool compress = true) { +template +void graph_explicit_coarsen(const fine_rowmap_t& fineRowmap, const fine_entries_t& fineEntries, const labels_t& labels, + typename fine_entries_t::non_const_value_type numCoarseVerts, coarse_rowmap_t& coarseRowmap, + coarse_entries_t& coarseEntries, bool compress = true) { using size_type = typename fine_rowmap_t::non_const_value_type; using lno_t = typename fine_entries_t::non_const_value_type; using exec_space = typename device_t::execution_space; - static_assert( - std::is_same::value, - "graph_explicit_coarsen: The coarse and fine entry Views have different " - "value types."); - KokkosGraph::Impl::ExplicitGraphCoarsening< - lno_t, size_type, device_t, fine_rowmap_t, fine_entries_t, labels_t, - coarse_rowmap_t, coarse_entries_t, coarse_entries_t> + static_assert(std::is_same::value, + "graph_explicit_coarsen: The coarse and fine entry Views have different " + "value types."); + KokkosGraph::Impl::ExplicitGraphCoarsening egc(fineRowmap, fineEntries, labels, numCoarseVerts); coarseRowmap = egc.coarseRowmap; coarseEntries = egc.coarseEntries; if (compress) { coarse_rowmap_t mergedRowmap; coarse_entries_t mergedEntries; - KokkosSparse::sort_and_merge_graph( - coarseRowmap, coarseEntries, mergedRowmap, mergedEntries); + KokkosSparse::sort_and_merge_graph(coarseRowmap, coarseEntries, + mergedRowmap, mergedEntries); coarseRowmap = mergedRowmap; coarseEntries = mergedEntries; } @@ -68,27 +60,22 @@ void graph_explicit_coarsen( // Same as above, but also produce the map from coarse vertices to fine vertices // (inverse map of labels) -template -void graph_explicit_coarsen_with_inverse_map( - const fine_rowmap_t& fineRowmap, const fine_entries_t& fineEntries, - const labels_t& labels, - typename fine_entries_t::non_const_value_type numCoarseVerts, - coarse_rowmap_t& coarseRowmap, coarse_entries_t& coarseEntries, - ordinal_view_t& inverseOffsets, ordinal_view_t& inverseLabels, - bool compress = true) { +template +void graph_explicit_coarsen_with_inverse_map(const fine_rowmap_t& fineRowmap, const fine_entries_t& fineEntries, + const labels_t& labels, + typename fine_entries_t::non_const_value_type numCoarseVerts, + coarse_rowmap_t& coarseRowmap, coarse_entries_t& coarseEntries, + ordinal_view_t& inverseOffsets, ordinal_view_t& inverseLabels, + bool compress = true) { using size_type = typename fine_rowmap_t::non_const_value_type; using lno_t = typename fine_entries_t::non_const_value_type; using exec_space = typename device_t::execution_space; - static_assert( - std::is_same::value, - "graph_explicit_coarsen: The coarse and fine entry Views have different " - "value types."); - KokkosGraph::Impl::ExplicitGraphCoarsening< - lno_t, size_type, device_t, fine_rowmap_t, fine_entries_t, labels_t, - coarse_rowmap_t, coarse_entries_t, ordinal_view_t> + static_assert(std::is_same::value, + "graph_explicit_coarsen: The coarse and fine entry Views have different " + "value types."); + KokkosGraph::Impl::ExplicitGraphCoarsening egc(fineRowmap, fineEntries, labels, numCoarseVerts); coarseRowmap = egc.coarseRowmap; coarseEntries = egc.coarseEntries; @@ -97,9 +84,8 @@ void graph_explicit_coarsen_with_inverse_map( if (compress) { coarse_rowmap_t mergedRowmap; coarse_entries_t mergedEntries; - KokkosSparse::sort_and_merge_graph( - coarseRowmap, coarseEntries, mergedRowmap, mergedEntries); + KokkosSparse::sort_and_merge_graph(coarseRowmap, coarseEntries, + mergedRowmap, mergedEntries); coarseRowmap = mergedRowmap; coarseEntries = mergedEntries; } diff --git a/packages/kokkos-kernels/graph/src/KokkosGraph_MIS2.hpp b/packages/kokkos-kernels/graph/src/KokkosGraph_MIS2.hpp index fb38d05456b6..4af491a40651 100644 --- a/packages/kokkos-kernels/graph/src/KokkosGraph_MIS2.hpp +++ b/packages/kokkos-kernels/graph/src/KokkosGraph_MIS2.hpp @@ -30,21 +30,18 @@ enum MIS2_Algorithm { MIS2_QUALITY, MIS2_FAST }; template -lno_view_t graph_d2_mis(const rowmap_t& rowmap, const colinds_t& colinds, - MIS2_Algorithm algo = MIS2_FAST) { +lno_view_t graph_d2_mis(const rowmap_t& rowmap, const colinds_t& colinds, MIS2_Algorithm algo = MIS2_FAST) { if (rowmap.extent(0) <= 1) { // zero vertices means the MIS is empty. return lno_view_t(); } switch (algo) { case MIS2_QUALITY: { - Impl::D2_MIS_FixedPriority mis( - rowmap, colinds); + Impl::D2_MIS_FixedPriority mis(rowmap, colinds); return mis.compute(); } case MIS2_FAST: { - Impl::D2_MIS_RandomPriority - mis(rowmap, colinds); + Impl::D2_MIS_RandomPriority mis(rowmap, colinds); return mis.compute(); } } @@ -53,16 +50,14 @@ lno_view_t graph_d2_mis(const rowmap_t& rowmap, const colinds_t& colinds, template -labels_t graph_mis2_coarsen( - const rowmap_t& rowmap, const colinds_t& colinds, - typename colinds_t::non_const_value_type& numClusters) { +labels_t graph_mis2_coarsen(const rowmap_t& rowmap, const colinds_t& colinds, + typename colinds_t::non_const_value_type& numClusters) { if (rowmap.extent(0) <= 1) { // there are no vertices to label numClusters = 0; return labels_t(); } - Impl::D2_MIS_Aggregation aggregation( - rowmap, colinds); + Impl::D2_MIS_Aggregation aggregation(rowmap, colinds); aggregation.compute(false); numClusters = aggregation.numAggs; return aggregation.labels; @@ -70,16 +65,14 @@ labels_t graph_mis2_coarsen( template -labels_t graph_mis2_aggregate( - const rowmap_t& rowmap, const colinds_t& colinds, - typename colinds_t::non_const_value_type& numAggregates) { +labels_t graph_mis2_aggregate(const rowmap_t& rowmap, const colinds_t& colinds, + typename colinds_t::non_const_value_type& numAggregates) { if (rowmap.extent(0) <= 1) { // there are no vertices to label numAggregates = 0; return labels_t(); } - Impl::D2_MIS_Aggregation aggregation( - rowmap, colinds); + Impl::D2_MIS_Aggregation aggregation(rowmap, colinds); aggregation.compute(true); numAggregates = aggregation.numAggs; return aggregation.labels; @@ -101,31 +94,23 @@ namespace Experimental { template -[[deprecated]] lno_view_t graph_d2_mis(const rowmap_t& rowmap, - const colinds_t& colinds, +[[deprecated]] lno_view_t graph_d2_mis(const rowmap_t& rowmap, const colinds_t& colinds, MIS2_Algorithm algo = MIS2_FAST) { - return KokkosGraph::graph_d2_mis( - rowmap, colinds, algo); + return KokkosGraph::graph_d2_mis(rowmap, colinds, algo); } template -[[deprecated]] labels_t graph_mis2_coarsen( - const rowmap_t& rowmap, const colinds_t& colinds, - typename colinds_t::non_const_value_type& numClusters) { - return KokkosGraph::graph_mis2_coarsen(rowmap, colinds, - numClusters); +[[deprecated]] labels_t graph_mis2_coarsen(const rowmap_t& rowmap, const colinds_t& colinds, + typename colinds_t::non_const_value_type& numClusters) { + return KokkosGraph::graph_mis2_coarsen(rowmap, colinds, numClusters); } template -[[deprecated]] labels_t graph_mis2_aggregate( - const rowmap_t& rowmap, const colinds_t& colinds, - typename colinds_t::non_const_value_type& numAggregates) { - return KokkosGraph::graph_mis2_aggregate(rowmap, colinds, - numAggregates); +[[deprecated]] labels_t graph_mis2_aggregate(const rowmap_t& rowmap, const colinds_t& colinds, + typename colinds_t::non_const_value_type& numAggregates) { + return KokkosGraph::graph_mis2_aggregate(rowmap, colinds, numAggregates); } [[deprecated]] inline const char* mis2_algorithm_name(MIS2_Algorithm algo) { diff --git a/packages/kokkos-kernels/graph/src/KokkosGraph_Triangle.hpp b/packages/kokkos-kernels/graph/src/KokkosGraph_Triangle.hpp index 0a878891ce88..6ab6dd7b9aa4 100644 --- a/packages/kokkos-kernels/graph/src/KokkosGraph_Triangle.hpp +++ b/packages/kokkos-kernels/graph/src/KokkosGraph_Triangle.hpp @@ -148,15 +148,11 @@ transposeA, row_mapB, entriesB, transposeB); } */ -template -void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, - typename KernelHandle::nnz_lno_t n, - typename KernelHandle::nnz_lno_t k, - alno_row_view_t_ row_mapA, alno_nnz_view_t_ entriesA, - bool transposeA, blno_row_view_t_ row_mapB, - blno_nnz_view_t_ entriesB, bool transposeB, +void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, typename KernelHandle::nnz_lno_t n, + typename KernelHandle::nnz_lno_t k, alno_row_view_t_ row_mapA, alno_nnz_view_t_ entriesA, + bool transposeA, blno_row_view_t_ row_mapB, blno_nnz_view_t_ entriesB, bool transposeB, visit_struct_t visit_struct) { using namespace KokkosSparse; @@ -168,30 +164,24 @@ void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, case SPGEMM_KK_TRIANGLE_IA: case SPGEMM_KK_TRIANGLE_IA_UNION: default: { - KokkosSparse::Impl::KokkosSPGEMM< - KernelHandle, alno_row_view_t_, alno_nnz_view_t_, - typename KernelHandle::in_scalar_nnz_view_t, blno_row_view_t_, - blno_nnz_view_t_, typename KernelHandle::in_scalar_nnz_view_t> - kspgemm(handle, m, n, k, row_mapA, entriesA, transposeA, row_mapB, - entriesB, transposeB); + KokkosSparse::Impl::KokkosSPGEMM + kspgemm(handle, m, n, k, row_mapA, entriesA, transposeA, row_mapB, entriesB, transposeB); kspgemm.KokkosSPGEMM_generic_triangle(visit_struct); } break; } } -template -void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, - alno_row_view_t_ row_mapA, alno_nnz_view_t_ entriesA, - visit_struct_t visit_struct) { +template +void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, alno_row_view_t_ row_mapA, + alno_nnz_view_t_ entriesA, visit_struct_t visit_struct) { typedef typename KernelHandle::nnz_lno_t nnz_lno_t; typedef typename KernelHandle::size_type size_type; typedef typename KernelHandle::SPGEMMHandleType spgemmHandleType; - typedef typename KernelHandle::nnz_lno_persistent_work_view_t - nnz_lno_persistent_work_view_t; - typedef typename KernelHandle::row_lno_persistent_work_view_t - row_lno_persistent_work_view_t; + typedef typename KernelHandle::nnz_lno_persistent_work_view_t nnz_lno_persistent_work_view_t; + typedef typename KernelHandle::row_lno_persistent_work_view_t row_lno_persistent_work_view_t; typedef typename KernelHandle::HandleExecSpace ExecutionSpace; @@ -207,8 +197,8 @@ void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, should_i_sort = true; else if (sort_lower_triangle == 2) { size_type max_row_size = 0; - KokkosKernels::Impl::kk_view_reduce_max_row_size( - m, row_mapA.data(), row_mapA.data() + 1, max_row_size); + KokkosKernels::Impl::kk_view_reduce_max_row_size(m, row_mapA.data(), row_mapA.data() + 1, + max_row_size); if (max_row_size > 1000) { should_i_sort = true; @@ -217,13 +207,11 @@ void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, if (should_i_sort) { if (sh->get_lower_triangular_permutation().data() == NULL) { - nnz_lno_persistent_work_view_t new_indices( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "new_indices"), m); + nnz_lno_persistent_work_view_t new_indices(Kokkos::view_alloc(Kokkos::WithoutInitializing, "new_indices"), m); int sort_decreasing_order = 1; ////If true we place the largest row to top, so that largest row size will /// be minimized in lower triangle. - if (sh->get_algorithm_type() == SPGEMM_KK_TRIANGLE_AI || - sh->get_algorithm_type() == SPGEMM_KK_TRIANGLE_LU) { + if (sh->get_algorithm_type() == SPGEMM_KK_TRIANGLE_AI || sh->get_algorithm_type() == SPGEMM_KK_TRIANGLE_LU) { sort_decreasing_order = 0; // if false we place the largest row to bottom, so that largest column // is minimizedin lower triangle. @@ -232,10 +220,8 @@ void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, // if 2, we do an interleaved sort. } { - KokkosSparse::Impl::kk_sort_by_row_size( - m, row_mapA.data(), new_indices.data(), sort_decreasing_order, - ExecutionSpace().concurrency()); + KokkosSparse::Impl::kk_sort_by_row_size( + m, row_mapA.data(), new_indices.data(), sort_decreasing_order, ExecutionSpace().concurrency()); } sh->set_lower_triangular_permutation(new_indices); } @@ -250,56 +236,43 @@ void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, row_lno_persistent_work_view_t lower_triangular_matrix_rowmap; nnz_lno_persistent_work_view_t lower_triangular_matrix_entries; timer1.reset(); - if (create_lower_triangular || - sh->get_algorithm_type() == SPGEMM_KK_TRIANGLE_LL || + if (create_lower_triangular || sh->get_algorithm_type() == SPGEMM_KK_TRIANGLE_LL || sh->get_algorithm_type() == SPGEMM_KK_TRIANGLE_LU) { - sh->get_lower_triangular_matrix(lower_triangular_matrix_rowmap, - lower_triangular_matrix_entries); - if (lower_triangular_matrix_rowmap.data() == NULL || - lower_triangular_matrix_entries.data() == NULL) { + sh->get_lower_triangular_matrix(lower_triangular_matrix_rowmap, lower_triangular_matrix_entries); + if (lower_triangular_matrix_rowmap.data() == NULL || lower_triangular_matrix_entries.data() == NULL) { alno_nnz_view_t_ null_values; - nnz_lno_persistent_work_view_t new_indices = - sh->get_lower_triangular_permutation(); - - KokkosSparse::Impl::kk_get_lower_triangle< - alno_row_view_t_, alno_nnz_view_t_, alno_nnz_view_t_, - row_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - alno_nnz_view_t_, nnz_lno_persistent_work_view_t, ExecutionSpace>( - m, row_mapA, entriesA, null_values, lower_triangular_matrix_rowmap, - lower_triangular_matrix_entries, null_values, new_indices, - handle->is_dynamic_scheduling(), + nnz_lno_persistent_work_view_t new_indices = sh->get_lower_triangular_permutation(); + + KokkosSparse::Impl::kk_get_lower_triangle( + m, row_mapA, entriesA, null_values, lower_triangular_matrix_rowmap, lower_triangular_matrix_entries, + null_values, new_indices, handle->is_dynamic_scheduling(), handle->get_team_work_size(1, ExecutionSpace().concurrency(), m)); - sh->set_lower_triangular_matrix(lower_triangular_matrix_rowmap, - lower_triangular_matrix_entries); + sh->set_lower_triangular_matrix(lower_triangular_matrix_rowmap, lower_triangular_matrix_entries); } } if (handle->get_verbose()) { - std::cout << "Preprocess Create Lower Triangular Time:" << timer1.seconds() - << std::endl; + std::cout << "Preprocess Create Lower Triangular Time:" << timer1.seconds() << std::endl; } timer1.reset(); row_lno_persistent_work_view_t upper_triangular_matrix_rowmap; nnz_lno_persistent_work_view_t upper_triangular_matrix_entries; if (sh->get_algorithm_type() == SPGEMM_KK_TRIANGLE_LU) { - sh->get_lower_triangular_matrix(lower_triangular_matrix_rowmap, - lower_triangular_matrix_entries); + sh->get_lower_triangular_matrix(lower_triangular_matrix_rowmap, lower_triangular_matrix_entries); alno_nnz_view_t_ null_values; - nnz_lno_persistent_work_view_t new_indices = - sh->get_lower_triangular_permutation(); - - KokkosSparse::Impl::kk_get_lower_triangle< - alno_row_view_t_, alno_nnz_view_t_, alno_nnz_view_t_, - row_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - alno_nnz_view_t_, nnz_lno_persistent_work_view_t, ExecutionSpace>( - m, row_mapA, entriesA, null_values, upper_triangular_matrix_rowmap, - upper_triangular_matrix_entries, null_values, new_indices, - handle->is_dynamic_scheduling(), 4, false); + nnz_lno_persistent_work_view_t new_indices = sh->get_lower_triangular_permutation(); + + KokkosSparse::Impl::kk_get_lower_triangle( + m, row_mapA, entriesA, null_values, upper_triangular_matrix_rowmap, upper_triangular_matrix_entries, + null_values, new_indices, handle->is_dynamic_scheduling(), 4, false); } if (handle->get_verbose()) { - std::cout << "Preprocess Create Upper Triangular Time:" << timer1.seconds() - << std::endl; + std::cout << "Preprocess Create Upper Triangular Time:" << timer1.seconds() << std::endl; } /////////CREATE LOWER TRIANGLE/////// @@ -320,33 +293,25 @@ void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, case SPGEMM_KK_TRIANGLE_IA: { // these are the algorithms that requires transpose of the incidence // matrix. - sh->get_lower_triangular_matrix(lower_triangular_matrix_rowmap, - lower_triangular_matrix_entries); + sh->get_lower_triangular_matrix(lower_triangular_matrix_rowmap, lower_triangular_matrix_entries); - if (lower_triangular_matrix_rowmap.data() == NULL || - lower_triangular_matrix_entries.data() == NULL) { + if (lower_triangular_matrix_rowmap.data() == NULL || lower_triangular_matrix_entries.data() == NULL) { std::cout << "Creating lower triangular A" << std::endl; alno_nnz_view_t_ null_values; - nnz_lno_persistent_work_view_t new_indices = - sh->get_lower_triangular_permutation(); - - KokkosSparse::Impl::kk_get_lower_triangle< - alno_row_view_t_, alno_nnz_view_t_, alno_nnz_view_t_, - row_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - alno_nnz_view_t_, nnz_lno_persistent_work_view_t, ExecutionSpace>( - m, row_mapA, entriesA, null_values, lower_triangular_matrix_rowmap, - lower_triangular_matrix_entries, null_values, new_indices, - handle->is_dynamic_scheduling()); + nnz_lno_persistent_work_view_t new_indices = sh->get_lower_triangular_permutation(); + + KokkosSparse::Impl::kk_get_lower_triangle( + m, row_mapA, entriesA, null_values, lower_triangular_matrix_rowmap, lower_triangular_matrix_entries, + null_values, new_indices, handle->is_dynamic_scheduling()); } - KokkosSparse::Impl:: - kk_create_incidence_tranpose_matrix_from_lower_triangle< - row_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - row_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - ExecutionSpace>( - m, lower_triangular_matrix_rowmap, - lower_triangular_matrix_entries, incidence_transpose_rowmap, - incidence_transpose_entries, handle->is_dynamic_scheduling()); + KokkosSparse::Impl::kk_create_incidence_tranpose_matrix_from_lower_triangle< + row_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, row_lno_persistent_work_view_t, + nnz_lno_persistent_work_view_t, ExecutionSpace>(m, lower_triangular_matrix_rowmap, + lower_triangular_matrix_entries, incidence_transpose_rowmap, + incidence_transpose_entries, handle->is_dynamic_scheduling()); } break; // IF it is one of below, we perform (A) or (L) x I @@ -355,12 +320,10 @@ void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, // these are the algorithms that requires the incidence matrix. KokkosSparse::Impl::kk_create_incidence_matrix_from_original_matrix< - alno_row_view_t_, alno_nnz_view_t_, row_lno_persistent_work_view_t, - nnz_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - ExecutionSpace>(m, row_mapA, entriesA, incidence_rowmap, - incidence_entries, - sh->get_lower_triangular_permutation(), - handle->is_dynamic_scheduling()); + alno_row_view_t_, alno_nnz_view_t_, row_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, + nnz_lno_persistent_work_view_t, ExecutionSpace>(m, row_mapA, entriesA, incidence_rowmap, incidence_entries, + sh->get_lower_triangular_permutation(), + handle->is_dynamic_scheduling()); } break; case SPGEMM_KK_TRIANGLE_LU: case SPGEMM_KK_TRIANGLE_LL: @@ -370,8 +333,7 @@ void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, } if (handle->get_verbose()) { - std::cout << "Preprocess Incidence Matrix Create Time:" << timer1.seconds() - << std::endl; + std::cout << "Preprocess Incidence Matrix Create Time:" << timer1.seconds() << std::endl; } //// /// CREATE INCIDENCE MATRIX END @@ -380,49 +342,36 @@ void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, switch (sh->get_algorithm_type()) { default: case SPGEMM_KK_TRIANGLE_LL: { - KokkosSparse::Impl::KokkosSPGEMM< - KernelHandle, row_lno_persistent_work_view_t, - nnz_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - row_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - nnz_lno_persistent_work_view_t> - kspgemm(handle, m, m, m, lower_triangular_matrix_rowmap, - lower_triangular_matrix_entries, false, - lower_triangular_matrix_rowmap, - lower_triangular_matrix_entries, false); + KokkosSparse::Impl::KokkosSPGEMM + kspgemm(handle, m, m, m, lower_triangular_matrix_rowmap, lower_triangular_matrix_entries, false, + lower_triangular_matrix_rowmap, lower_triangular_matrix_entries, false); kspgemm.KokkosSPGEMM_generic_triangle(visit_struct); } break; case SPGEMM_KK_TRIANGLE_LU: { - KokkosSparse::Impl::KokkosSPGEMM< - KernelHandle, row_lno_persistent_work_view_t, - nnz_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - row_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - nnz_lno_persistent_work_view_t> - kspgemm(handle, m, m, m, lower_triangular_matrix_rowmap, - lower_triangular_matrix_entries, false, - upper_triangular_matrix_rowmap, - upper_triangular_matrix_entries, false); + KokkosSparse::Impl::KokkosSPGEMM + kspgemm(handle, m, m, m, lower_triangular_matrix_rowmap, lower_triangular_matrix_entries, false, + upper_triangular_matrix_rowmap, upper_triangular_matrix_entries, false); kspgemm.KokkosSPGEMM_generic_triangle(visit_struct); } break; case SPGEMM_KK_TRIANGLE_AI: { if (create_lower_triangular) { - KokkosSparse::Impl::KokkosSPGEMM< - KernelHandle, row_lno_persistent_work_view_t, - nnz_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - row_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - nnz_lno_persistent_work_view_t> - kspgemm(handle, m, m, incidence_entries.extent(0) / 2, - lower_triangular_matrix_rowmap, + KokkosSparse::Impl::KokkosSPGEMM + kspgemm(handle, m, m, incidence_entries.extent(0) / 2, lower_triangular_matrix_rowmap, lower_triangular_matrix_entries, false, // transpose ignore. incidence_rowmap, incidence_entries, false); kspgemm.KokkosSPGEMM_generic_triangle(visit_struct); } else { - KokkosSparse::Impl::KokkosSPGEMM< - KernelHandle, alno_row_view_t_, alno_nnz_view_t_, - nnz_lno_persistent_work_view_t, row_lno_persistent_work_view_t, - nnz_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t> - kspgemm(handle, m, m, incidence_entries.extent(0) / 2, row_mapA, - entriesA, + KokkosSparse::Impl::KokkosSPGEMM + kspgemm(handle, m, m, incidence_entries.extent(0) / 2, row_mapA, entriesA, false, // transpose ignore. incidence_rowmap, incidence_entries, false); kspgemm.KokkosSPGEMM_generic_triangle(visit_struct); @@ -433,24 +382,20 @@ void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, case SPGEMM_KK_TRIANGLE_IA_UNION: case SPGEMM_KK_TRIANGLE_IA: { if (create_lower_triangular) { - KokkosSparse::Impl::KokkosSPGEMM< - KernelHandle, row_lno_persistent_work_view_t, - nnz_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - row_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - nnz_lno_persistent_work_view_t> - kspgemm(handle, incidence_transpose_rowmap.extent(0) - 1, m, m, - incidence_transpose_rowmap, incidence_transpose_entries, + KokkosSparse::Impl::KokkosSPGEMM + kspgemm(handle, incidence_transpose_rowmap.extent(0) - 1, m, m, incidence_transpose_rowmap, + incidence_transpose_entries, false, // transpose ignore. - lower_triangular_matrix_rowmap, - lower_triangular_matrix_entries, false); + lower_triangular_matrix_rowmap, lower_triangular_matrix_entries, false); kspgemm.KokkosSPGEMM_generic_triangle(visit_struct); } else { - KokkosSparse::Impl::KokkosSPGEMM< - KernelHandle, row_lno_persistent_work_view_t, - nnz_lno_persistent_work_view_t, nnz_lno_persistent_work_view_t, - alno_row_view_t_, alno_nnz_view_t_, nnz_lno_persistent_work_view_t> - kspgemm(handle, incidence_transpose_rowmap.extent(0) - 1, m, m, - incidence_transpose_rowmap, incidence_transpose_entries, + KokkosSparse::Impl::KokkosSPGEMM + kspgemm(handle, incidence_transpose_rowmap.extent(0) - 1, m, m, incidence_transpose_rowmap, + incidence_transpose_entries, false, // transpose ignore. row_mapA, entriesA, false); kspgemm.KokkosSPGEMM_generic_triangle(visit_struct); diff --git a/packages/kokkos-kernels/graph/unit_test/Test_Graph_coarsen.hpp b/packages/kokkos-kernels/graph/unit_test/Test_Graph_coarsen.hpp index 95f1533c88d5..2fda527dfb67 100644 --- a/packages/kokkos-kernels/graph/unit_test/Test_Graph_coarsen.hpp +++ b/packages/kokkos-kernels/graph/unit_test/Test_Graph_coarsen.hpp @@ -47,23 +47,16 @@ bool verify_coarsening(typename coarsener_t::coarse_level_triple fine_l, using ordinal_t = typename entries_t::value_type; using edge_t = typename rowmap_t::value_type; - crsMat A = fine_l.mtx; - crsMat coarse_A = coarse_l.mtx; - auto f_rowmap = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); - auto c_rowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), - coarse_A.graph.row_map); - auto f_entries = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); - auto vcmap = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), coarse_l.interp_mtx.graph.entries); - auto few = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.values); - auto cew = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), coarse_A.values); - auto fvw = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), fine_l.vtx_wgts); - auto cvw = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), - coarse_l.vtx_wgts); + crsMat A = fine_l.mtx; + crsMat coarse_A = coarse_l.mtx; + auto f_rowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); + auto c_rowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), coarse_A.graph.row_map); + auto f_entries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); + auto vcmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), coarse_l.interp_mtx.graph.entries); + auto few = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.values); + auto cew = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), coarse_A.values); + auto fvw = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), fine_l.vtx_wgts); + auto cvw = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), coarse_l.vtx_wgts); ordinal_t f_size = 0; ordinal_t c_size = 0; for (ordinal_t i = 0; i < static_cast(fvw.extent(0)); i++) { @@ -112,10 +105,8 @@ bool verify_is_graph(crsMat A) { using entries_t = typename c_entries_t::non_const_type; using ordinal_t = typename entries_t::value_type; using edge_t = typename rowmap_t::value_type; - auto rowmap = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); - auto entries = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); + auto rowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); + auto entries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); for (ordinal_t i = 0; i < A.numRows(); i++) { std::set adjset; @@ -158,8 +149,7 @@ bool verify_aggregator(crsMat A, crsMat agg) { if (A.numRows() < agg.numCols()) { return false; } - auto entries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), - agg.graph.entries); + auto entries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), agg.graph.entries); std::vector aggregateSizes(agg.numCols(), 0); for (ordinal_t i = 0; i < static_cast(agg.nnz()); i++) { @@ -244,8 +234,7 @@ crsMat gen_grid() { template void test_multilevel_coarsen_grid() { - using crsMat = - KokkosSparse::CrsMatrix; + using crsMat = KokkosSparse::CrsMatrix; crsMat A = gen_grid(); using coarsener_t = coarse_builder; typename coarsener_t::coarsen_handle handle; @@ -259,17 +248,12 @@ void test_multilevel_coarsen_grid() { coarse++; while (coarse != levels.end()) { bool correct_aggregator = verify_aggregator(fine->mtx, coarse->interp_mtx); - EXPECT_TRUE(correct_aggregator) - << "Multilevel coarsening produced invalid aggregator on level " - << coarse->level - 1; + EXPECT_TRUE(correct_aggregator) << "Multilevel coarsening produced invalid aggregator on level " + << coarse->level - 1; bool correct_graph = verify_is_graph(coarse->mtx); bool correct_coarsening = verify_coarsening(*fine, *coarse); - EXPECT_TRUE(correct_graph) - << "Multilevel coarsening produced invalid graph on level " - << coarse->level; - EXPECT_TRUE(correct_coarsening) - << "Multilevel coarsening produced invalid coarsening on level " - << coarse->level; + EXPECT_TRUE(correct_graph) << "Multilevel coarsening produced invalid graph on level " << coarse->level; + EXPECT_TRUE(correct_coarsening) << "Multilevel coarsening produced invalid coarsening on level " << coarse->level; fine++; coarse++; } @@ -277,8 +261,7 @@ void test_multilevel_coarsen_grid() { template void test_coarsen_grid() { - using crsMat = - KokkosSparse::CrsMatrix; + using crsMat = KokkosSparse::CrsMatrix; using graph_type = typename crsMat::StaticCrsGraphType; using c_entries_t = typename graph_type::entries_type; using entries_t = typename c_entries_t::non_const_type; @@ -293,60 +276,49 @@ void test_coarsen_grid() { fine_A.vtx_wgts = vWgts; fine_A.level = 0; fine_A.uniform_weights = true; - std::vector heuristics = { - coarsener_t::HECv1, coarsener_t::Match, coarsener_t::MtMetis, - coarsener_t::MIS2, coarsener_t::GOSHv1, coarsener_t::GOSHv2}; - std::vector builders = { - coarsener_t::Sort, coarsener_t::Hashmap, coarsener_t::Hybrid, - coarsener_t::Spgemm, coarsener_t::Spgemm_transpose_first}; + std::vector heuristics = {coarsener_t::HECv1, coarsener_t::Match, + coarsener_t::MtMetis, coarsener_t::MIS2, + coarsener_t::GOSHv1, coarsener_t::GOSHv2}; + std::vector builders = {coarsener_t::Sort, coarsener_t::Hashmap, coarsener_t::Hybrid, + coarsener_t::Spgemm, coarsener_t::Spgemm_transpose_first}; for (auto h : heuristics) { - handle.h = h; - crsMat aggregator = - coarsener_t::generate_coarse_mapping(handle, fine_A.mtx, true); + handle.h = h; + crsMat aggregator = coarsener_t::generate_coarse_mapping(handle, fine_A.mtx, true); bool correct_aggregator = verify_aggregator(fine_A.mtx, aggregator); - EXPECT_TRUE(correct_aggregator) - << "Aggregation heuristic " << static_cast(h) - << " produced invalid aggregator."; + EXPECT_TRUE(correct_aggregator) << "Aggregation heuristic " << static_cast(h) + << " produced invalid aggregator."; for (auto b : builders) { - handle.b = b; - clt coarse_A = - coarsener_t::build_coarse_graph(handle, fine_A, aggregator); - bool correct_graph = verify_is_graph(coarse_A.mtx); - bool correct_coarsening = - verify_coarsening(fine_A, coarse_A); - EXPECT_TRUE(correct_graph) - << "Coarsening with dedupe method " << static_cast(b) - << " produced invalid graph with aggregation heuristic " - << static_cast(h) << "."; - EXPECT_TRUE(correct_coarsening) - << "Coarsening with dedupe method " << static_cast(b) - << " produced invalid coarsening with aggregation heuristic " - << static_cast(h) << "."; + handle.b = b; + clt coarse_A = coarsener_t::build_coarse_graph(handle, fine_A, aggregator); + bool correct_graph = verify_is_graph(coarse_A.mtx); + bool correct_coarsening = verify_coarsening(fine_A, coarse_A); + EXPECT_TRUE(correct_graph) << "Coarsening with dedupe method " << static_cast(b) + << " produced invalid graph with aggregation heuristic " << static_cast(h) << "."; + EXPECT_TRUE(correct_coarsening) << "Coarsening with dedupe method " << static_cast(b) + << " produced invalid coarsening with aggregation heuristic " + << static_cast(h) << "."; } } } template -void test_coarsen_random(lno_t numVerts, size_type nnz, lno_t bandwidth, - lno_t row_size_variance) { +void test_coarsen_random(lno_t numVerts, size_type nnz, lno_t bandwidth, lno_t row_size_variance) { using execution_space = typename device::execution_space; - using crsMat = - KokkosSparse::CrsMatrix; - using graph_type = typename crsMat::StaticCrsGraphType; - using c_rowmap_t = typename graph_type::row_map_type; - using c_entries_t = typename graph_type::entries_type; - using rowmap_t = typename c_rowmap_t::non_const_type; - using entries_t = typename c_entries_t::non_const_type; - using svt = typename crsMat::values_type; + using crsMat = KokkosSparse::CrsMatrix; + using graph_type = typename crsMat::StaticCrsGraphType; + using c_rowmap_t = typename graph_type::row_map_type; + using c_entries_t = typename graph_type::entries_type; + using rowmap_t = typename c_rowmap_t::non_const_type; + using entries_t = typename c_entries_t::non_const_type; + using svt = typename crsMat::values_type; // Generate graph - crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( - numVerts, numVerts, nnz, row_size_variance, bandwidth); + crsMat A = + KokkosSparse::Impl::kk_generate_sparse_matrix(numVerts, numVerts, nnz, row_size_variance, bandwidth); auto G = A.graph; // Symmetrize the graph rowmap_t symRowmap; entries_t symEntries; - KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap< - c_rowmap_t, c_entries_t, rowmap_t, entries_t, execution_space>( + KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap( numVerts, G.row_map, G.entries, symRowmap, symEntries); graph_type GS(symEntries, symRowmap); svt symValues("sym values", symEntries.extent(0)); @@ -362,88 +334,65 @@ void test_coarsen_random(lno_t numVerts, size_type nnz, lno_t bandwidth, fine_A.vtx_wgts = vWgts; fine_A.level = 0; fine_A.uniform_weights = true; - std::vector heuristics = { - coarsener_t::HECv1, coarsener_t::Match, coarsener_t::MtMetis, - coarsener_t::MIS2, coarsener_t::GOSHv1, coarsener_t::GOSHv2}; - std::vector builders = { - coarsener_t::Sort, coarsener_t::Hashmap, coarsener_t::Hybrid, - coarsener_t::Spgemm, coarsener_t::Spgemm_transpose_first}; + std::vector heuristics = {coarsener_t::HECv1, coarsener_t::Match, + coarsener_t::MtMetis, coarsener_t::MIS2, + coarsener_t::GOSHv1, coarsener_t::GOSHv2}; + std::vector builders = {coarsener_t::Sort, coarsener_t::Hashmap, coarsener_t::Hybrid, + coarsener_t::Spgemm, coarsener_t::Spgemm_transpose_first}; for (auto h : heuristics) { - handle.h = h; - crsMat aggregator = - coarsener_t::generate_coarse_mapping(handle, fine_A.mtx, true); + handle.h = h; + crsMat aggregator = coarsener_t::generate_coarse_mapping(handle, fine_A.mtx, true); bool correct_aggregator = verify_aggregator(fine_A.mtx, aggregator); - EXPECT_TRUE(correct_aggregator) - << "Aggregation heuristic " << static_cast(h) - << " produced invalid aggregator."; + EXPECT_TRUE(correct_aggregator) << "Aggregation heuristic " << static_cast(h) + << " produced invalid aggregator."; for (auto b : builders) { - handle.b = b; - clt coarse_A = - coarsener_t::build_coarse_graph(handle, fine_A, aggregator); - bool correct_graph = verify_is_graph(coarse_A.mtx); - bool correct_coarsening = - verify_coarsening(fine_A, coarse_A); - EXPECT_TRUE(correct_graph) - << "Coarsening with dedupe method " << static_cast(b) - << " produced invalid graph with aggregation heuristic " - << static_cast(h) << "."; - EXPECT_TRUE(correct_coarsening) - << "Coarsening with dedupe method " << static_cast(b) - << " produced invalid coarsening with aggregation heuristic " - << static_cast(h) << "."; + handle.b = b; + clt coarse_A = coarsener_t::build_coarse_graph(handle, fine_A, aggregator); + bool correct_graph = verify_is_graph(coarse_A.mtx); + bool correct_coarsening = verify_coarsening(fine_A, coarse_A); + EXPECT_TRUE(correct_graph) << "Coarsening with dedupe method " << static_cast(b) + << " produced invalid graph with aggregation heuristic " << static_cast(h) << "."; + EXPECT_TRUE(correct_coarsening) << "Coarsening with dedupe method " << static_cast(b) + << " produced invalid coarsening with aggregation heuristic " + << static_cast(h) << "."; } } } -#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - graph##_##random_graph_coarsen##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_coarsen_random(5000, 5000 * 20, \ - 1000, 10); \ - test_coarsen_random(50, 50 * 10, 40, 10); \ - test_coarsen_random(5, 5 * 3, 5, 0); \ - } \ - TEST_F( \ - TestCategory, \ - graph##_##grid_graph_coarsen##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_coarsen_grid(); \ - } \ - TEST_F( \ - TestCategory, \ - graph##_##grid_graph_multilevel_coarsen##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_multilevel_coarsen_grid(); \ +#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, graph##_##random_graph_coarsen##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_coarsen_random(5000, 5000 * 20, 1000, 10); \ + test_coarsen_random(50, 50 * 10, 40, 10); \ + test_coarsen_random(5, 5 * 3, 5, 0); \ + } \ + TEST_F(TestCategory, graph##_##grid_graph_coarsen##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_coarsen_grid(); \ + } \ + TEST_F(TestCategory, graph##_##grid_graph_multilevel_coarsen##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_multilevel_coarsen_grid(); \ } // FIXME_SYCL #ifndef KOKKOS_ENABLE_SYCL #if defined(KOKKOSKERNELS_INST_DOUBLE) -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ - defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int, int, TestDevice) #endif #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ - defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int64_t, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ - defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int, size_t, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ - defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int64_t, size_t, TestDevice) #endif #endif diff --git a/packages/kokkos-kernels/graph/unit_test/Test_Graph_graph_color.hpp b/packages/kokkos-kernels/graph/unit_test/Test_Graph_graph_color.hpp index 101c489bc059..3ddfa7c9b0de 100644 --- a/packages/kokkos-kernels/graph/unit_test/Test_Graph_graph_color.hpp +++ b/packages/kokkos-kernels/graph/unit_test/Test_Graph_graph_color.hpp @@ -32,11 +32,8 @@ using namespace KokkosGraph::Experimental; namespace Test { template -int run_graphcolor( - crsMat_t input_mat, ColoringAlgorithm coloring_algorithm, - size_t &num_colors, - typename crsMat_t::StaticCrsGraphType::entries_type::non_const_type - &vertex_colors) { +int run_graphcolor(crsMat_t input_mat, ColoringAlgorithm coloring_algorithm, size_t &num_colors, + typename crsMat_t::StaticCrsGraphType::entries_type::non_const_type &vertex_colors) { typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type lno_view_t; typedef typename graph_t::entries_type lno_nnz_view_t; @@ -46,9 +43,8 @@ int run_graphcolor( typedef typename lno_nnz_view_t::value_type lno_t; typedef typename scalar_view_t::value_type scalar_t; - typedef KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space> + typedef KokkosKernelsHandle KernelHandle; KernelHandle kh; @@ -60,9 +56,8 @@ int run_graphcolor( const size_t num_rows_1 = input_mat.numRows(); const size_t num_cols_1 = input_mat.numCols(); - graph_color( - &kh, num_rows_1, num_cols_1, input_mat.graph.row_map, - input_mat.graph.entries); + graph_color(&kh, num_rows_1, num_cols_1, input_mat.graph.row_map, + input_mat.graph.entries); num_colors = kh.get_graph_coloring_handle()->get_num_colors(); vertex_colors = kh.get_graph_coloring_handle()->get_vertex_colors(); @@ -72,14 +67,10 @@ int run_graphcolor( } // namespace Test -template -void test_coloring(lno_t numRows, size_type nnz, lno_t bandwidth, - lno_t row_size_variance) { +template +void test_coloring(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t row_size_variance) { using namespace Test; - typedef - typename KokkosSparse::CrsMatrix - crsMat_t; + typedef typename KokkosSparse::CrsMatrix crsMat_t; typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type lno_view_t; typedef typename graph_t::entries_type lno_nnz_view_t; @@ -87,28 +78,24 @@ void test_coloring(lno_t numRows, size_type nnz, lno_t bandwidth, typedef typename crsMat_t::values_type::non_const_type scalar_view_t; // typedef typename lno_view_t::non_const_value_type size_type; - lno_t numCols = numRows; - crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( - numRows, numCols, nnz, row_size_variance, bandwidth); + lno_t numCols = numRows; + crsMat_t input_mat = + KokkosSparse::Impl::kk_generate_sparse_matrix(numRows, numCols, nnz, row_size_variance, bandwidth); typename lno_view_t::non_const_type sym_xadj; typename lno_nnz_view_t::non_const_type sym_adj; KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap< - lno_view_t, lno_nnz_view_t, typename lno_view_t::non_const_type, - typename lno_nnz_view_t::non_const_type, - typename device::execution_space>(numRows, input_mat.graph.row_map, - input_mat.graph.entries, sym_xadj, - sym_adj); + lno_view_t, lno_nnz_view_t, typename lno_view_t::non_const_type, typename lno_nnz_view_t::non_const_type, + typename device::execution_space>(numRows, input_mat.graph.row_map, input_mat.graph.entries, sym_xadj, sym_adj); size_type numentries = sym_adj.extent(0); scalar_view_t newValues("vals", numentries); graph_t static_graph(sym_adj, sym_xadj); input_mat = crsMat_t("CrsMatrix", numCols, newValues, static_graph); - std::vector coloring_algorithms = { - COLORING_DEFAULT, COLORING_SERIAL, COLORING_VB, COLORING_VBBIT, - COLORING_VBCS}; + std::vector coloring_algorithms = {COLORING_DEFAULT, COLORING_SERIAL, COLORING_VB, COLORING_VBBIT, + COLORING_VBCS}; // FIXME: VBD sometimes fails on CUDA and HIP #if defined(KOKKOS_ENABLE_CUDA) @@ -125,8 +112,7 @@ void test_coloring(lno_t numRows, size_type nnz, lno_t bandwidth, // FIXME SYCL: re-enable this when EB is working #ifdef KOKKOS_ENABLE_SYCL - if (!std::is_same::value) { + if (!std::is_same::value) { coloring_algorithms.push_back(COLORING_EB); } #else @@ -140,28 +126,22 @@ void test_coloring(lno_t numRows, size_type nnz, lno_t bandwidth, Kokkos::Timer timer1; crsMat_t output_mat; - int res = run_graphcolor(input_mat, coloring_algorithm, - num_colors, vector_colors); + int res = run_graphcolor(input_mat, coloring_algorithm, num_colors, vector_colors); // double coloring_time = timer1.seconds(); EXPECT_TRUE((res == 0)); const lno_t num_rows_1 = input_mat.numRows(); const lno_t num_cols_1 = input_mat.numCols(); - lno_t num_conflict = KokkosSparse::Impl::kk_is_d1_coloring_valid< - lno_view_t, lno_nnz_view_t, color_view_t, - typename device::execution_space>( - num_rows_1, num_cols_1, input_mat.graph.row_map, - input_mat.graph.entries, vector_colors); + lno_t num_conflict = KokkosSparse::Impl::kk_is_d1_coloring_valid( + num_rows_1, num_cols_1, input_mat.graph.row_map, input_mat.graph.entries, vector_colors); lno_t conf = 0; { // also check the correctness of the validation code :) - typename lno_view_t::HostMirror hrm = - Kokkos::create_mirror_view(input_mat.graph.row_map); - typename lno_nnz_view_t::HostMirror hentries = - Kokkos::create_mirror_view(input_mat.graph.entries); - typename color_view_t::HostMirror hcolor = - Kokkos::create_mirror_view(vector_colors); + typename lno_view_t::HostMirror hrm = Kokkos::create_mirror_view(input_mat.graph.row_map); + typename lno_nnz_view_t::HostMirror hentries = Kokkos::create_mirror_view(input_mat.graph.entries); + typename color_view_t::HostMirror hcolor = Kokkos::create_mirror_view(vector_colors); Kokkos::deep_copy(hrm, input_mat.graph.row_map); Kokkos::deep_copy(hentries, input_mat.graph.entries); Kokkos::deep_copy(hcolor, vector_colors); @@ -179,53 +159,39 @@ void test_coloring(lno_t numRows, size_type nnz, lno_t bandwidth, } } } - EXPECT_TRUE((num_conflict == conf)) - << "Coloring algo " << (int)coloring_algorithm - << ": kk_is_d1_coloring_valid returned incorrect number of conflicts (" - << num_conflict << ", should be " << conf << ")"; - - EXPECT_TRUE((num_conflict == 0)) - << "Coloring algo " << (int)coloring_algorithm - << ": D1 coloring produced invalid coloring (" << num_conflict - << " conflicts)"; + EXPECT_TRUE((num_conflict == conf)) << "Coloring algo " << (int)coloring_algorithm + << ": kk_is_d1_coloring_valid returned incorrect number of conflicts (" + << num_conflict << ", should be " << conf << ")"; + + EXPECT_TRUE((num_conflict == 0)) << "Coloring algo " << (int)coloring_algorithm + << ": D1 coloring produced invalid coloring (" << num_conflict << " conflicts)"; } // device::execution_space::finalize(); } -#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - graph##_##graph_color##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_coloring(50000, 50000 * 30, 200, \ - 10); \ - test_coloring(50000, 50000 * 30, 100, \ - 10); \ +#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, graph##_##graph_color##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_coloring(50000, 50000 * 30, 200, 10); \ + test_coloring(50000, 50000 * 30, 100, 10); \ } -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ - defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(default_scalar, int, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ - defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(default_scalar, int64_t, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ - defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(default_scalar, int, size_t, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ - defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(default_scalar, int64_t, size_t, TestDevice) #endif diff --git a/packages/kokkos-kernels/graph/unit_test/Test_Graph_graph_color_deterministic.hpp b/packages/kokkos-kernels/graph/unit_test/Test_Graph_graph_color_deterministic.hpp index 7bd3c4cd400d..87771de84fd7 100644 --- a/packages/kokkos-kernels/graph/unit_test/Test_Graph_graph_color_deterministic.hpp +++ b/packages/kokkos-kernels/graph/unit_test/Test_Graph_graph_color_deterministic.hpp @@ -32,11 +32,8 @@ using namespace KokkosGraph::Experimental; namespace Test { template -int run_graphcolor_deter( - crsMat_t input_mat, ColoringAlgorithm coloring_algorithm, - size_t &num_colors, - typename crsMat_t::StaticCrsGraphType::entries_type::non_const_type - &vertex_colors) { +int run_graphcolor_deter(crsMat_t input_mat, ColoringAlgorithm coloring_algorithm, size_t &num_colors, + typename crsMat_t::StaticCrsGraphType::entries_type::non_const_type &vertex_colors) { typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type lno_view_t; typedef typename graph_t::entries_type lno_nnz_view_t; @@ -46,9 +43,8 @@ int run_graphcolor_deter( typedef typename lno_nnz_view_t::value_type lno_t; typedef typename scalar_view_t::value_type scalar_t; - typedef KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space> + typedef KokkosKernelsHandle KernelHandle; KernelHandle kh; @@ -60,9 +56,8 @@ int run_graphcolor_deter( const size_t num_rows_1 = input_mat.numRows(); const size_t num_cols_1 = input_mat.numCols(); - graph_color( - &kh, num_rows_1, num_cols_1, input_mat.graph.row_map, - input_mat.graph.entries); + graph_color(&kh, num_rows_1, num_cols_1, input_mat.graph.row_map, + input_mat.graph.entries); num_colors = kh.get_graph_coloring_handle()->get_num_colors(); vertex_colors = kh.get_graph_coloring_handle()->get_vertex_colors(); @@ -72,13 +67,10 @@ int run_graphcolor_deter( } // namespace Test -template +template void test_coloring_deterministic(lno_t numRows, size_type nnz) { using namespace Test; - typedef - typename KokkosSparse::CrsMatrix - crsMat_t; + typedef typename KokkosSparse::CrsMatrix crsMat_t; typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type lno_view_t; typedef typename graph_t::entries_type lno_nnz_view_t; @@ -89,11 +81,9 @@ void test_coloring_deterministic(lno_t numRows, size_type nnz) { lno_t numCols = numRows; typename lno_view_t::non_const_type xadj("xadj", numRows + 1); - typename lno_view_t::non_const_type::HostMirror h_xadj = - Kokkos::create_mirror_view(xadj); + typename lno_view_t::non_const_type::HostMirror h_xadj = Kokkos::create_mirror_view(xadj); typename lno_nnz_view_t::non_const_type adj("adj", nnz); - typename lno_nnz_view_t::non_const_type::HostMirror h_adj = - Kokkos::create_mirror_view(adj); + typename lno_nnz_view_t::non_const_type::HostMirror h_adj = Kokkos::create_mirror_view(adj); // Fill up the rowPtr array h_xadj(0) = 0; @@ -211,18 +201,15 @@ void test_coloring_deterministic(lno_t numRows, size_type nnz) { size_t num_colors; Kokkos::Timer timer1; - int res = run_graphcolor_deter( - input_mat, coloring_algorithm, num_colors, vector_colors); + int res = run_graphcolor_deter(input_mat, coloring_algorithm, num_colors, vector_colors); EXPECT_TRUE((res == 0)); EXPECT_TRUE((num_colors == 2)); - size_type num_conflict = 0; - typename color_view_t::HostMirror h_vector_colors = - Kokkos::create_mirror_view(vector_colors); + size_type num_conflict = 0; + typename color_view_t::HostMirror h_vector_colors = Kokkos::create_mirror_view(vector_colors); Kokkos::deep_copy(h_vector_colors, vector_colors); - int exact_colors[18] = {2, 1, 2, 1, 1, 2, 1, 2, 2, - 1, 2, 1, 2, 1, 2, 1, 2, 1}; + int exact_colors[18] = {2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1}; for (lno_t vertexIdx = 0; vertexIdx < numRows; ++vertexIdx) { if (h_vector_colors(vertexIdx) != exact_colors[vertexIdx]) { @@ -235,39 +222,29 @@ void test_coloring_deterministic(lno_t numRows, size_type nnz) { } } -#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - graph##_##graph_color_deterministic##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_coloring_deterministic(18, 74); \ - test_coloring_deterministic(18, 74); \ +#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, graph##_##graph_color_deterministic##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_coloring_deterministic(18, 74); \ + test_coloring_deterministic(18, 74); \ } -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ - defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(default_scalar, int, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ - defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(default_scalar, int64_t, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ - defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(default_scalar, int, size_t, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ - defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(default_scalar, int64_t, size_t, TestDevice) #endif diff --git a/packages/kokkos-kernels/graph/unit_test/Test_Graph_graph_color_distance2.hpp b/packages/kokkos-kernels/graph/unit_test/Test_Graph_graph_color_distance2.hpp index 44ddaed0bf7a..ac3bbb7a1896 100644 --- a/packages/kokkos-kernels/graph/unit_test/Test_Graph_graph_color_distance2.hpp +++ b/packages/kokkos-kernels/graph/unit_test/Test_Graph_graph_color_distance2.hpp @@ -35,10 +35,8 @@ using namespace KokkosGraph::Experimental; namespace Test { // Verify that a distance-2 coloring is correct (all views must be hostspace) -template -bool verifyD2Coloring(lno_t numVerts, const rowmap_t& rowmap, - const entries_t& entries, const colors_t& colors) { +template +bool verifyD2Coloring(lno_t numVerts, const rowmap_t& rowmap, const entries_t& entries, const colors_t& colors) { // Just do the simplest possible neighbors-of-neighbors loop to find conflicts for (lno_t v = 0; v < numVerts; v++) { if (colors(v) == 0) { @@ -52,8 +50,7 @@ bool verifyD2Coloring(lno_t numVerts, const rowmap_t& rowmap, if (nei1 < numVerts && nei1 != v) { // check for dist-1 conflict if (colors(v) == colors(nei1)) { - std::cout << "Dist-1 conflict between " << v << " and " << nei1 - << '\n'; + std::cout << "Dist-1 conflict between " << v << " and " << nei1 << '\n'; return false; } // iterate over dist-2 neighbors @@ -63,8 +60,7 @@ bool verifyD2Coloring(lno_t numVerts, const rowmap_t& rowmap, lno_t nei2 = entries(j); if (nei2 < numVerts && nei2 != v) { if (colors(v) == colors(nei2)) { - std::cout << "Dist-2 conflict between " << v << " and " << nei2 - << '\n'; + std::cout << "Dist-2 conflict between " << v << " and " << nei2 << '\n'; return false; } } @@ -75,14 +71,9 @@ bool verifyD2Coloring(lno_t numVerts, const rowmap_t& rowmap, return true; } -template -bool verifyBipartitePartialColoring(lno_t numRows, lno_t numCols, - const rowmap_t& rowmap, - const entries_t& entries, - const rowmap_t& t_rowmap, - const entries_t& t_entries, - const colors_t& colors) { +template +bool verifyBipartitePartialColoring(lno_t numRows, lno_t numCols, const rowmap_t& rowmap, const entries_t& entries, + const rowmap_t& t_rowmap, const entries_t& t_entries, const colors_t& colors) { // Just do the simplest possible neighbors-of-neighbors loop to find conflicts for (lno_t v = 0; v < numRows; v++) { if (colors(v) == 0) { @@ -101,8 +92,7 @@ bool verifyBipartitePartialColoring(lno_t numRows, lno_t numCols, lno_t nei2 = t_entries(j); if (nei2 < numRows && nei2 != v) { if (colors(v) == colors(nei2)) { - std::cout << "Hyperedge conflict between " << v << " and " << nei2 - << '\n'; + std::cout << "Hyperedge conflict between " << v << " and " << nei2 << '\n'; return false; } } @@ -114,256 +104,189 @@ bool verifyBipartitePartialColoring(lno_t numRows, lno_t numCols, } } // namespace Test -template -void test_dist2_coloring(lno_t numVerts, size_type nnz, lno_t bandwidth, - lno_t row_size_variance) { +template +void test_dist2_coloring(lno_t numVerts, size_type nnz, lno_t bandwidth, lno_t row_size_variance) { using execution_space = typename device::execution_space; using memory_space = typename device::memory_space; - using crsMat = - KokkosSparse::CrsMatrix; - using graph_type = typename crsMat::StaticCrsGraphType; - using c_rowmap_t = typename graph_type::row_map_type; - using c_entries_t = typename graph_type::entries_type; - using rowmap_t = typename c_rowmap_t::non_const_type; - using entries_t = typename c_entries_t::non_const_type; - using KernelHandle = - KokkosKernelsHandle; + using crsMat = KokkosSparse::CrsMatrix; + using graph_type = typename crsMat::StaticCrsGraphType; + using c_rowmap_t = typename graph_type::row_map_type; + using c_entries_t = typename graph_type::entries_type; + using rowmap_t = typename c_rowmap_t::non_const_type; + using entries_t = typename c_entries_t::non_const_type; + using KernelHandle = KokkosKernelsHandle; // Generate graph, and add some out-of-bounds columns - crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( - numVerts, numVerts, nnz, row_size_variance, bandwidth); + crsMat A = + KokkosSparse::Impl::kk_generate_sparse_matrix(numVerts, numVerts, nnz, row_size_variance, bandwidth); auto G = A.graph; // Symmetrize the graph rowmap_t symRowmap; entries_t symEntries; - KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap< - c_rowmap_t, c_entries_t, rowmap_t, entries_t, execution_space>( + KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap( numVerts, G.row_map, G.entries, symRowmap, symEntries); - auto rowmapHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symRowmap); - auto entriesHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symEntries); - std::vector algos = { - COLORING_D2_DEFAULT, COLORING_D2_SERIAL, COLORING_D2_VB, - COLORING_D2_VB_BIT, COLORING_D2_VB_BIT_EF, COLORING_D2_NB_BIT}; + auto rowmapHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symRowmap); + auto entriesHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symEntries); + std::vector algos = {COLORING_D2_DEFAULT, COLORING_D2_SERIAL, COLORING_D2_VB, + COLORING_D2_VB_BIT, COLORING_D2_VB_BIT_EF, COLORING_D2_NB_BIT}; for (auto algo : algos) { KernelHandle kh; kh.create_distance2_graph_coloring_handle(algo); // Compute the Distance-2 graph coloring. - graph_color_distance2( - &kh, numVerts, symRowmap, symEntries); + graph_color_distance2(&kh, numVerts, symRowmap, symEntries); execution_space().fence(); auto coloring_handle = kh.get_distance2_graph_coloring_handle(); auto colors = coloring_handle->get_vertex_colors(); - auto colorsHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), colors); - auto numColors = coloring_handle->get_num_colors(); + auto colorsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), colors); + auto numColors = coloring_handle->get_num_colors(); EXPECT_LE(numColors, numVerts); bool success = - Test::verifyD2Coloring( + Test::verifyD2Coloring( numVerts, rowmapHost, entriesHost, colorsHost); - EXPECT_TRUE(success) << "Dist-2: algorithm " - << coloring_handle->getD2AlgorithmName() + EXPECT_TRUE(success) << "Dist-2: algorithm " << coloring_handle->getD2AlgorithmName() << " produced invalid coloring"; kh.destroy_distance2_graph_coloring_handle(); } } -template -void test_bipartite_symmetric(lno_t numVerts, size_type nnz, lno_t bandwidth, - lno_t row_size_variance) { +template +void test_bipartite_symmetric(lno_t numVerts, size_type nnz, lno_t bandwidth, lno_t row_size_variance) { using execution_space = typename device::execution_space; using memory_space = typename device::memory_space; - using crsMat = - KokkosSparse::CrsMatrix; - using graph_type = typename crsMat::StaticCrsGraphType; - using c_rowmap_t = typename graph_type::row_map_type; - using c_entries_t = typename graph_type::entries_type; - using rowmap_t = typename c_rowmap_t::non_const_type; - using entries_t = typename c_entries_t::non_const_type; - using KernelHandle = - KokkosKernelsHandle; + using crsMat = KokkosSparse::CrsMatrix; + using graph_type = typename crsMat::StaticCrsGraphType; + using c_rowmap_t = typename graph_type::row_map_type; + using c_entries_t = typename graph_type::entries_type; + using rowmap_t = typename c_rowmap_t::non_const_type; + using entries_t = typename c_entries_t::non_const_type; + using KernelHandle = KokkosKernelsHandle; // Generate graph, and add some out-of-bounds columns - crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( - numVerts, numVerts, nnz, row_size_variance, bandwidth); + crsMat A = + KokkosSparse::Impl::kk_generate_sparse_matrix(numVerts, numVerts, nnz, row_size_variance, bandwidth); auto G = A.graph; // Symmetrize the graph rowmap_t symRowmap; entries_t symEntries; - KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap< - c_rowmap_t, c_entries_t, rowmap_t, entries_t, execution_space>( + KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap( numVerts, G.row_map, G.entries, symRowmap, symEntries); - auto rowmapHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symRowmap); - auto entriesHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symEntries); - std::vector algos = { - COLORING_D2_DEFAULT, COLORING_D2_SERIAL, COLORING_D2_VB, - COLORING_D2_VB_BIT, COLORING_D2_VB_BIT_EF, COLORING_D2_NB_BIT}; + auto rowmapHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symRowmap); + auto entriesHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symEntries); + std::vector algos = {COLORING_D2_DEFAULT, COLORING_D2_SERIAL, COLORING_D2_VB, + COLORING_D2_VB_BIT, COLORING_D2_VB_BIT_EF, COLORING_D2_NB_BIT}; for (auto algo : algos) { KernelHandle kh; kh.create_distance2_graph_coloring_handle(algo); // Compute the Distance-2 graph coloring. - bipartite_color_rows( - &kh, numVerts, numVerts, symRowmap, symEntries, true); + bipartite_color_rows(&kh, numVerts, numVerts, symRowmap, symEntries, true); execution_space().fence(); auto coloring_handle = kh.get_distance2_graph_coloring_handle(); auto colors = coloring_handle->get_vertex_colors(); - auto colorsHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), colors); - auto numColors = coloring_handle->get_num_colors(); + auto colorsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), colors); + auto numColors = coloring_handle->get_num_colors(); EXPECT_LE(numColors, numVerts); - bool success = Test::verifyBipartitePartialColoring< - lno_t, size_type, decltype(rowmapHost), decltype(entriesHost), - decltype(colorsHost)>(numVerts, numVerts, rowmapHost, entriesHost, - rowmapHost, entriesHost, colorsHost); - EXPECT_TRUE(success) << "Dist-2: algorithm " - << coloring_handle->getD2AlgorithmName() + bool success = Test::verifyBipartitePartialColoring( + numVerts, numVerts, rowmapHost, entriesHost, rowmapHost, entriesHost, colorsHost); + EXPECT_TRUE(success) << "Dist-2: algorithm " << coloring_handle->getD2AlgorithmName() << " produced invalid coloring"; kh.destroy_distance2_graph_coloring_handle(); } } -template -void test_bipartite(lno_t numRows, lno_t numCols, size_type nnz, - lno_t bandwidth, lno_t row_size_variance, bool colorRows) { +template +void test_bipartite(lno_t numRows, lno_t numCols, size_type nnz, lno_t bandwidth, lno_t row_size_variance, + bool colorRows) { using execution_space = typename device::execution_space; using memory_space = typename device::memory_space; - using crsMat = - KokkosSparse::CrsMatrix; - using graph_type = typename crsMat::StaticCrsGraphType; - using rowmap_t = typename graph_type::row_map_type::non_const_type; - using entries_t = typename graph_type::entries_type::non_const_type; - using c_rowmap_t = typename graph_type::row_map_type; - using c_entries_t = typename graph_type::entries_type; - using KernelHandle = - KokkosKernelsHandle; + using crsMat = KokkosSparse::CrsMatrix; + using graph_type = typename crsMat::StaticCrsGraphType; + using rowmap_t = typename graph_type::row_map_type::non_const_type; + using entries_t = typename graph_type::entries_type::non_const_type; + using c_rowmap_t = typename graph_type::row_map_type; + using c_entries_t = typename graph_type::entries_type; + using KernelHandle = KokkosKernelsHandle; // Generate graph - crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( - numRows, numCols, nnz, row_size_variance, bandwidth); - auto G = A.graph; + crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix(numRows, numCols, nnz, row_size_variance, bandwidth); + auto G = A.graph; rowmap_t t_rowmap("rowmap^T", numCols + 1); entries_t t_entries("entries^T", G.entries.extent(0)); - KokkosSparse::Impl::transpose_graph( + KokkosSparse::Impl::transpose_graph( numRows, numCols, G.row_map, G.entries, t_rowmap, t_entries); // TODO: remove me, shouldn't be needed even with UVM execution_space().fence(); - auto rowmapHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), G.row_map); - auto entriesHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), G.entries); - auto t_rowmapHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), t_rowmap); - auto t_entriesHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), t_entries); - std::vector algos = { - COLORING_D2_DEFAULT, COLORING_D2_SERIAL, COLORING_D2_VB, - COLORING_D2_VB_BIT, COLORING_D2_VB_BIT_EF, COLORING_D2_NB_BIT}; + auto rowmapHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), G.row_map); + auto entriesHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), G.entries); + auto t_rowmapHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), t_rowmap); + auto t_entriesHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), t_entries); + std::vector algos = {COLORING_D2_DEFAULT, COLORING_D2_SERIAL, COLORING_D2_VB, + COLORING_D2_VB_BIT, COLORING_D2_VB_BIT_EF, COLORING_D2_NB_BIT}; for (auto algo : algos) { KernelHandle kh; kh.create_distance2_graph_coloring_handle(algo); // Compute the one-sided bipartite coloring. if (colorRows) { - bipartite_color_rows( - &kh, numRows, numCols, G.row_map, G.entries); + bipartite_color_rows(&kh, numRows, numCols, G.row_map, G.entries); } else { - bipartite_color_columns( - &kh, numRows, numCols, G.row_map, G.entries); + bipartite_color_columns(&kh, numRows, numCols, G.row_map, G.entries); } execution_space().fence(); auto coloring_handle = kh.get_distance2_graph_coloring_handle(); auto colors = coloring_handle->get_vertex_colors(); - auto colorsHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), colors); - auto numColors = coloring_handle->get_num_colors(); + auto colorsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), colors); + auto numColors = coloring_handle->get_num_colors(); bool success; if (colorRows) { EXPECT_LE(numColors, numRows); - success = Test::verifyBipartitePartialColoring< - lno_t, size_type, decltype(rowmapHost), decltype(entriesHost), - decltype(colorsHost)>(numRows, numCols, rowmapHost, entriesHost, - t_rowmapHost, t_entriesHost, colorsHost); + success = Test::verifyBipartitePartialColoring(numRows, numCols, rowmapHost, entriesHost, + t_rowmapHost, t_entriesHost, colorsHost); } else { EXPECT_LE(numColors, numCols); - success = Test::verifyBipartitePartialColoring< - lno_t, size_type, decltype(rowmapHost), decltype(entriesHost), - decltype(colorsHost)>(numCols, numRows, t_rowmapHost, t_entriesHost, - rowmapHost, entriesHost, colorsHost); + success = Test::verifyBipartitePartialColoring( + numCols, numRows, t_rowmapHost, t_entriesHost, rowmapHost, entriesHost, colorsHost); } - EXPECT_TRUE(success) << "Bipartite " << (colorRows ? "row" : "column") - << " coloring: algorithm " - << coloring_handle->getD2AlgorithmName() - << " produced invalid coloring"; + EXPECT_TRUE(success) << "Bipartite " << (colorRows ? "row" : "column") << " coloring: algorithm " + << coloring_handle->getD2AlgorithmName() << " produced invalid coloring"; kh.destroy_distance2_graph_coloring_handle(); } } -#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - graph##_##graph_color_distance2##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_dist2_coloring(5000, 5000 * 20, \ - 1000, 10); \ - test_dist2_coloring(50, 50 * 10, 40, 10); \ - } \ - TEST_F( \ - TestCategory, \ - graph##_##graph_color_bipartite_sym##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_bipartite_symmetric(50, 50 * 5, 30, \ - 1); \ - test_bipartite_symmetric(2000, 2000 * 20, \ - 800, 10); \ - } \ - TEST_F( \ - TestCategory, \ - graph##_##graph_color_bipartite_row##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_bipartite(2000, 4000, 3000 * 20, \ - 800, 10, true); \ - test_bipartite(4000, 2000, 3000 * 20, \ - 800, 10, true); \ - } \ - TEST_F( \ - TestCategory, \ - graph##_##graph_color_bipartite_col##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_bipartite(2000, 4000, 3000 * 20, \ - 800, 10, false); \ - test_bipartite(4000, 2000, 3000 * 20, \ - 800, 10, false); \ +#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, graph##_##graph_color_distance2##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_dist2_coloring(5000, 5000 * 20, 1000, 10); \ + test_dist2_coloring(50, 50 * 10, 40, 10); \ + } \ + TEST_F(TestCategory, graph##_##graph_color_bipartite_sym##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_bipartite_symmetric(50, 50 * 5, 30, 1); \ + test_bipartite_symmetric(2000, 2000 * 20, 800, 10); \ + } \ + TEST_F(TestCategory, graph##_##graph_color_bipartite_row##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_bipartite(2000, 4000, 3000 * 20, 800, 10, true); \ + test_bipartite(4000, 2000, 3000 * 20, 800, 10, true); \ + } \ + TEST_F(TestCategory, graph##_##graph_color_bipartite_col##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_bipartite(2000, 4000, 3000 * 20, 800, 10, false); \ + test_bipartite(4000, 2000, 3000 * 20, 800, 10, false); \ } #if defined(KOKKOSKERNELS_INST_DOUBLE) -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ - defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ - defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int64_t, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ - defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int, size_t, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ - defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int64_t, size_t, TestDevice) #endif #endif diff --git a/packages/kokkos-kernels/graph/unit_test/Test_Graph_mis2.hpp b/packages/kokkos-kernels/graph/unit_test/Test_Graph_mis2.hpp index c6fb7562e713..cd96badd44fb 100644 --- a/packages/kokkos-kernels/graph/unit_test/Test_Graph_mis2.hpp +++ b/packages/kokkos-kernels/graph/unit_test/Test_Graph_mis2.hpp @@ -34,10 +34,8 @@ enum CoarseningType { PHASE2, NO_PHASE2 }; namespace Test { -template -bool verifyD2MIS(lno_t numVerts, const rowmap_t& rowmap, - const entries_t& entries, const mis_t& misArray) { +template +bool verifyD2MIS(lno_t numVerts, const rowmap_t& rowmap, const entries_t& entries, const mis_t& misArray) { // set a std::set of the mis, for fast membership test std::set mis; for (size_t i = 0; i < misArray.extent(0); i++) mis.insert(misArray(i)); @@ -82,74 +80,58 @@ bool verifyD2MIS(lno_t numVerts, const rowmap_t& rowmap, } } // namespace Test -template -void test_mis2(lno_t numVerts, size_type nnz, lno_t bandwidth, - lno_t row_size_variance) { +template +void test_mis2(lno_t numVerts, size_type nnz, lno_t bandwidth, lno_t row_size_variance) { using execution_space = typename device::execution_space; - using crsMat = - KokkosSparse::CrsMatrix; - using graph_type = typename crsMat::StaticCrsGraphType; - using c_rowmap_t = typename graph_type::row_map_type; - using c_entries_t = typename graph_type::entries_type; - using rowmap_t = typename c_rowmap_t::non_const_type; - using entries_t = typename c_entries_t::non_const_type; + using crsMat = KokkosSparse::CrsMatrix; + using graph_type = typename crsMat::StaticCrsGraphType; + using c_rowmap_t = typename graph_type::row_map_type; + using c_entries_t = typename graph_type::entries_type; + using rowmap_t = typename c_rowmap_t::non_const_type; + using entries_t = typename c_entries_t::non_const_type; // Generate graph, and add some out-of-bounds columns - crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( - numVerts, numVerts, nnz, row_size_variance, bandwidth); + crsMat A = + KokkosSparse::Impl::kk_generate_sparse_matrix(numVerts, numVerts, nnz, row_size_variance, bandwidth); auto G = A.graph; // Symmetrize the graph rowmap_t symRowmap; entries_t symEntries; - KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap< - c_rowmap_t, c_entries_t, rowmap_t, entries_t, execution_space>( + KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap( numVerts, G.row_map, G.entries, symRowmap, symEntries); - auto rowmapHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symRowmap); - auto entriesHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symEntries); + auto rowmapHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symRowmap); + auto entriesHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symEntries); // For each algorithm, compute and verify the MIS std::vector algos = {MIS2_FAST, MIS2_QUALITY}; for (auto algo : algos) { - auto mis = KokkosGraph::graph_d2_mis( - symRowmap, symEntries, algo); - auto misHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), mis); - bool success = Test::verifyD2MIS( + auto mis = KokkosGraph::graph_d2_mis(symRowmap, symEntries, algo); + auto misHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), mis); + bool success = Test::verifyD2MIS( numVerts, rowmapHost, entriesHost, misHost); - EXPECT_TRUE(success) << "Dist-2 MIS (algo " << (int)algo - << ") produced invalid set."; + EXPECT_TRUE(success) << "Dist-2 MIS (algo " << (int)algo << ") produced invalid set."; } } -template -void test_mis2_coarsening(lno_t numVerts, size_type nnz, lno_t bandwidth, - lno_t row_size_variance) { +template +void test_mis2_coarsening(lno_t numVerts, size_type nnz, lno_t bandwidth, lno_t row_size_variance) { using execution_space = typename device::execution_space; - using crsMat = - KokkosSparse::CrsMatrix; - using graph_type = typename crsMat::StaticCrsGraphType; - using c_rowmap_t = typename graph_type::row_map_type; - using c_entries_t = typename graph_type::entries_type; - using rowmap_t = typename c_rowmap_t::non_const_type; - using entries_t = typename c_entries_t::non_const_type; - using labels_t = entries_t; + using crsMat = KokkosSparse::CrsMatrix; + using graph_type = typename crsMat::StaticCrsGraphType; + using c_rowmap_t = typename graph_type::row_map_type; + using c_entries_t = typename graph_type::entries_type; + using rowmap_t = typename c_rowmap_t::non_const_type; + using entries_t = typename c_entries_t::non_const_type; + using labels_t = entries_t; // Generate graph, and add some out-of-bounds columns - crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( - numVerts, numVerts, nnz, row_size_variance, bandwidth); + crsMat A = + KokkosSparse::Impl::kk_generate_sparse_matrix(numVerts, numVerts, nnz, row_size_variance, bandwidth); auto G = A.graph; // Symmetrize the graph rowmap_t symRowmap; entries_t symEntries; - KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap< - c_rowmap_t, c_entries_t, rowmap_t, entries_t, execution_space>( + KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap( numVerts, G.row_map, G.entries, symRowmap, symEntries); - auto rowmapHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symRowmap); - auto entriesHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symEntries); + auto rowmapHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symRowmap); + auto entriesHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), symEntries); // For each algorithm, compute and verify the MIS std::vector algos = {PHASE2, NO_PHASE2}; for (auto algo : algos) { @@ -157,46 +139,34 @@ void test_mis2_coarsening(lno_t numVerts, size_type nnz, lno_t bandwidth, labels_t labels; switch (algo) { case NO_PHASE2: - labels = KokkosGraph::graph_mis2_coarsen( - symRowmap, symEntries, numClusters); + labels = KokkosGraph::graph_mis2_coarsen(symRowmap, symEntries, numClusters); break; case PHASE2: - labels = KokkosGraph::graph_mis2_aggregate( - symRowmap, symEntries, numClusters); + labels = KokkosGraph::graph_mis2_aggregate(symRowmap, symEntries, numClusters); } - auto labelsHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), labels); + auto labelsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), labels); // Not a strong test, but sanity check the number of clusters returned EXPECT_TRUE(numClusters >= 1 && numClusters <= numVerts); // Check that every label is in the range [0, numClusters) - for (lno_t i = 0; i < numVerts; i++) - EXPECT_TRUE(0 <= labelsHost(i) && labelsHost(i) < numClusters); + for (lno_t i = 0; i < numVerts; i++) EXPECT_TRUE(0 <= labelsHost(i) && labelsHost(i) < numClusters); // Test explicit coarsening given the labels, with and without compressing // the result rowmap_t coarseRowmapNC, coarseRowmapC; entries_t coarseEntriesNC, coarseEntriesC; - KokkosGraph::Experimental::graph_explicit_coarsen< - device, rowmap_t, entries_t, entries_t, rowmap_t, entries_t>( - symRowmap, symEntries, labels, numClusters, coarseRowmapNC, - coarseEntriesNC, false); - KokkosGraph::Experimental::graph_explicit_coarsen< - device, rowmap_t, entries_t, entries_t, rowmap_t, entries_t>( - symRowmap, symEntries, labels, numClusters, coarseRowmapC, - coarseEntriesC, true); + KokkosGraph::Experimental::graph_explicit_coarsen( + symRowmap, symEntries, labels, numClusters, coarseRowmapNC, coarseEntriesNC, false); + KokkosGraph::Experimental::graph_explicit_coarsen( + symRowmap, symEntries, labels, numClusters, coarseRowmapC, coarseEntriesC, true); EXPECT_EQ(coarseRowmapC.extent(0), numClusters + 1); EXPECT_EQ(coarseRowmapNC.extent(0), numClusters + 1); // Check that coarse graph doesn't have more edges than fine graph EXPECT_LE(coarseEntriesC.extent(0), symEntries.extent(0)); EXPECT_LE(coarseEntriesNC.extent(0), symEntries.extent(0)); // Verify compression is working. - auto hostRowmapNC = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), - coarseRowmapNC); - auto hostEntriesNC = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), coarseEntriesNC); - auto hostRowmapC = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), coarseRowmapC); - auto hostEntriesC = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), - coarseEntriesC); + auto hostRowmapNC = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), coarseRowmapNC); + auto hostEntriesNC = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), coarseEntriesNC); + auto hostRowmapC = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), coarseRowmapC); + auto hostEntriesC = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), coarseEntriesC); for (lno_t i = 0; i < numClusters; i++) { // std::set maintains uniqueness as well as ascending order of elements. // So it should exactly match the entries in the compressed version. @@ -215,11 +185,9 @@ void test_mis2_coarsening(lno_t numVerts, size_type nnz, lno_t bandwidth, } } -template +template void test_mis2_coarsening_zero_rows() { - using crsMat = - KokkosSparse::CrsMatrix; + using crsMat = KokkosSparse::CrsMatrix; using graph_type = typename crsMat::StaticCrsGraphType; using c_rowmap_t = typename graph_type::row_map_type; using c_entries_t = typename graph_type::entries_type; @@ -230,72 +198,55 @@ void test_mis2_coarsening_zero_rows() { // note: MIS2 coarsening first calls MIS2 on the fine graph, so this covers // the zero-row case for MIS2 alone. lno_t numClusters; - auto labels = KokkosGraph::graph_mis2_coarsen( - fineRowmap, fineEntries, numClusters); + auto labels = KokkosGraph::graph_mis2_coarsen(fineRowmap, fineEntries, numClusters); EXPECT_EQ(numClusters, 0); EXPECT_EQ(labels.extent(0), 0); // coarsen, should also produce a graph with 0 rows/entries rowmap_t coarseRowmap; entries_t coarseEntries; - KokkosGraph::Experimental::graph_explicit_coarsen< - device, rowmap_t, entries_t, entries_t, rowmap_t, entries_t>( + KokkosGraph::Experimental::graph_explicit_coarsen( fineRowmap, fineEntries, labels, 0, coarseRowmap, coarseEntries, false); EXPECT_LE(coarseRowmap.extent(0), 1); EXPECT_EQ(coarseEntries.extent(0), 0); - KokkosGraph::Experimental::graph_explicit_coarsen< - device, rowmap_t, entries_t, entries_t, rowmap_t, entries_t>( + KokkosGraph::Experimental::graph_explicit_coarsen( fineRowmap, fineEntries, labels, 0, coarseRowmap, coarseEntries, true); EXPECT_LE(coarseRowmap.extent(0), 1); EXPECT_EQ(coarseEntries.extent(0), 0); } -#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - graph##_##graph_mis2##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_mis2(5000, 5000 * 20, 1000, 10); \ - test_mis2(50, 50 * 10, 40, 10); \ - test_mis2(5, 5 * 3, 5, 0); \ - } \ - TEST_F( \ - TestCategory, \ - graph##_##graph_mis2_coarsening##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_mis2_coarsening(5000, 5000 * 200, \ - 2000, 10); \ - test_mis2_coarsening(5000, 5000 * 20, \ - 1000, 10); \ - test_mis2_coarsening(50, 50 * 10, 40, \ - 10); \ - test_mis2_coarsening(5, 5 * 3, 5, 0); \ - test_mis2_coarsening_zero_rows(); \ +#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, graph##_##graph_mis2##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_mis2(5000, 5000 * 20, 1000, 10); \ + test_mis2(50, 50 * 10, 40, 10); \ + test_mis2(5, 5 * 3, 5, 0); \ + } \ + TEST_F(TestCategory, graph##_##graph_mis2_coarsening##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_mis2_coarsening(5000, 5000 * 200, 2000, 10); \ + test_mis2_coarsening(5000, 5000 * 20, 1000, 10); \ + test_mis2_coarsening(50, 50 * 10, 40, 10); \ + test_mis2_coarsening(5, 5 * 3, 5, 0); \ + test_mis2_coarsening_zero_rows(); \ } #if defined(KOKKOSKERNELS_INST_DOUBLE) -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ - defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int, int, TestDevice) #endif #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ - defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int64_t, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ - defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int, size_t, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ - defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int64_t, size_t, TestDevice) #endif diff --git a/packages/kokkos-kernels/graph/unit_test/Test_Graph_rcm.hpp b/packages/kokkos-kernels/graph/unit_test/Test_Graph_rcm.hpp index 2e05554d2da7..0a9543367a53 100644 --- a/packages/kokkos-kernels/graph/unit_test/Test_Graph_rcm.hpp +++ b/packages/kokkos-kernels/graph/unit_test/Test_Graph_rcm.hpp @@ -19,20 +19,17 @@ #include "KokkosGraph_RCM.hpp" #include "KokkosKernels_IOUtils.hpp" -#include "KokkosSparse_CrsMatrix.hpp" +#include "Kokkos_StaticCrsGraph.hpp" #include // Generates a graph from 3D 7-pt stencil. Slices grid into 2 connected // components near the middle of X dimension. template -void generate7pt(rowmap_t& rowmapView, entries_t& entriesView, int gridX, - int gridY, int gridZ) { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - auto getVertexID = [=](lno_t x, lno_t y, lno_t z) -> lno_t { - return x + y * gridX + z * gridX * gridY; - }; +void generate7pt(rowmap_t& rowmapView, entries_t& entriesView, int gridX, int gridY, int gridZ) { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + auto getVertexID = [=](lno_t x, lno_t y, lno_t z) -> lno_t { return x + y * gridX + z * gridX * gridY; }; lno_t numVertices = gridX * gridY * gridZ; // Generate the graph on host (use std::vector to not need to know // how many entries ahead of time) @@ -44,10 +41,8 @@ void generate7pt(rowmap_t& rowmapView, entries_t& entriesView, int gridX, for (lno_t j = 0; j < gridY; j++) { for (lno_t i = 0; i < gridX; i++) { lno_t v = getVertexID(i, j, k); - if (i != 0 && i != xslice + 1) - entries.push_back(getVertexID(i - 1, j, k)); - if (i != gridX - 1 && i != xslice) - entries.push_back(getVertexID(i + 1, j, k)); + if (i != 0 && i != xslice + 1) entries.push_back(getVertexID(i - 1, j, k)); + if (i != gridX - 1 && i != xslice) entries.push_back(getVertexID(i + 1, j, k)); if (j != 0) entries.push_back(getVertexID(i, j - 1, k)); if (j != gridY - 1) entries.push_back(getVertexID(i, j + 1, k)); if (k != 0) entries.push_back(getVertexID(i, j, k - 1)); @@ -59,29 +54,23 @@ void generate7pt(rowmap_t& rowmapView, entries_t& entriesView, int gridX, size_type numEdges = entries.size(); // Now that the graph is formed, copy rowmap and entries to Kokkos::Views in // device memory The nonowning host views just alias the std::vectors. - Kokkos::View> - rowmapHost(rowmap.data(), numVertices + 1); - Kokkos::View> - entriesHost(entries.data(), numEdges); + Kokkos::View> rowmapHost(rowmap.data(), + numVertices + 1); + Kokkos::View> entriesHost(entries.data(), + numEdges); // Allocate owning views on device with the correct size. - rowmapView = - rowmap_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Rowmap"), - numVertices + 1); - entriesView = entries_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Colinds"), numEdges); + rowmapView = rowmap_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Rowmap"), numVertices + 1); + entriesView = entries_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Colinds"), numEdges); // Copy the graph from host to device Kokkos::deep_copy(rowmapView, rowmapHost); Kokkos::deep_copy(entriesView, entriesHost); } template -int maxBandwidth(const rowmap_t& rowmap, const entries_t& entries, - const labels_t& invPerm, const labels_t& perm) { +int maxBandwidth(const rowmap_t& rowmap, const entries_t& entries, const labels_t& invPerm, const labels_t& perm) { using size_type = typename rowmap_t::non_const_value_type; using lno_t = typename entries_t::non_const_value_type; - lno_t numVerts = rowmap.extent(0) - 1; + lno_t numVerts = std::max(1, rowmap.extent_int(0)) - 1; int bw = 0; for (lno_t i = 0; i < numVerts; i++) { lno_t origRow = perm(i); @@ -97,27 +86,15 @@ int maxBandwidth(const rowmap_t& rowmap, const entries_t& entries, return bw; } -template -void test_rcm(lno_t gridX, lno_t gridY, lno_t gridZ) { - typedef - typename KokkosSparse::CrsMatrix - crsMat_t; - typedef typename crsMat_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type rowmap_t; - typedef typename graph_t::entries_type entries_t; - lno_t numVerts = gridX * gridY * gridZ; - typename rowmap_t::non_const_type rowmap; - typename entries_t::non_const_type entries; - generate7pt(rowmap, entries, gridX, gridY, gridZ); - auto rcm = KokkosGraph::Experimental::graph_rcm( - rowmap, entries); - auto rowmapHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rowmap); - auto entriesHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), entries); - auto rcmHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rcm); - decltype(rcmHost) rcmPermHost( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "RCMPerm"), numVerts); +template +void test_rcm(const rowmap_t& rowmap, const entries_t& entries, bool expectBandwidthReduced) { + using lno_t = typename entries_t::non_const_value_type; + auto rcm = KokkosGraph::Experimental::graph_rcm(rowmap, entries); + auto rowmapHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rowmap); + auto entriesHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), entries); + auto rcmHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rcm); + lno_t numVerts = std::max(rowmap.extent_int(0), 1) - 1; + decltype(rcmHost) rcmPermHost(Kokkos::view_alloc(Kokkos::WithoutInitializing, "RCMPerm"), numVerts); for (lno_t i = 0; i < numVerts; i++) rcmPermHost(rcmHost(i)) = i; // make sure each row index shows up exactly once { @@ -130,48 +107,124 @@ void test_rcm(lno_t gridX, lno_t gridY, lno_t gridZ) { } for (lno_t i = 0; i < numVerts; i++) ASSERT_EQ(counts[i], 1); } - Kokkos::View identityOrder( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Identity"), numVerts); - for (lno_t i = 0; i < numVerts; i++) identityOrder(i) = i; - size_t origBW = - maxBandwidth(rowmapHost, entriesHost, identityOrder, identityOrder); - size_t rcmBW = maxBandwidth(rowmapHost, entriesHost, rcmHost, rcmPermHost); - EXPECT_LE(rcmBW, origBW); + if (expectBandwidthReduced) { + Kokkos::View identityOrder(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Identity"), + numVerts); + for (lno_t i = 0; i < numVerts; i++) identityOrder(i) = i; + size_t origBW = maxBandwidth(rowmapHost, entriesHost, identityOrder, identityOrder); + size_t rcmBW = maxBandwidth(rowmapHost, entriesHost, rcmHost, rcmPermHost); + EXPECT_LE(rcmBW, origBW); + } +} + +template +void test_rcm_zerorows() { + using graph_t = Kokkos::StaticCrsGraph; + using rowmap_t = typename graph_t::row_map_type::non_const_type; + using entries_t = typename graph_t::entries_type::non_const_type; + rowmap_t rowmap; + entries_t entries; + test_rcm(rowmap, entries, false); +} + +template +void test_rcm_7pt(lno_t gridX, lno_t gridY, lno_t gridZ, bool expectBandwidthReduced) { + using graph_t = Kokkos::StaticCrsGraph; + using rowmap_t = typename graph_t::row_map_type::non_const_type; + using entries_t = typename graph_t::entries_type::non_const_type; + rowmap_t rowmap; + entries_t entries; + generate7pt(rowmap, entries, gridX, gridY, gridZ); + test_rcm(rowmap, entries, expectBandwidthReduced); +} + +template +void test_rcm_4clique() { + using graph_t = Kokkos::StaticCrsGraph; + using rowmap_t = typename graph_t::row_map_type::non_const_type; + using entries_t = typename graph_t::entries_type::non_const_type; + rowmap_t rowmap("rowmap", 5); + entries_t entries("entries", 16); + auto rowmap_host = Kokkos::create_mirror_view(rowmap); + auto entries_host = Kokkos::create_mirror_view(entries); + for (lno_t i = 0; i < 5; i++) rowmap_host(i) = i * 4; + for (lno_t i = 0; i < 16; i++) entries_host(i) = i % 4; + Kokkos::deep_copy(rowmap, rowmap_host); + Kokkos::deep_copy(entries, entries_host); + test_rcm(rowmap, entries, false); +} + +template +void test_rcm_multiple_components() { + using graph_t = Kokkos::StaticCrsGraph; + using rowmap_t = typename graph_t::row_map_type::non_const_type; + using entries_t = typename graph_t::entries_type::non_const_type; + // Generate a single 3D grid first + rowmap_t rowmap_cube; + entries_t entries_cube; + generate7pt(rowmap_cube, entries_cube, 7, 7, 7); + auto rowmap_cube_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rowmap_cube); + auto entries_cube_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), entries_cube); + lno_t nv_cube = 7 * 7 * 7; + lno_t ne_cube = entries_cube.extent(0); + // Now replicate the graph twice, so there are 2 disconnected copies of the + // cube + rowmap_t rowmap("rowmap", nv_cube * 2 + 1); + entries_t entries("entries", ne_cube * 2); + auto rowmap_host = Kokkos::create_mirror_view(rowmap); + auto entries_host = Kokkos::create_mirror_view(entries); + for (lno_t i = 0; i <= nv_cube * 2; i++) { + if (i < nv_cube) + rowmap_host(i) = rowmap_cube_host(i); + else + rowmap_host(i) = ne_cube + rowmap_cube_host(i - nv_cube); + } + for (lno_t i = 0; i < ne_cube * 2; i++) { + if (i < ne_cube) + entries_host(i) = entries_cube_host(i); + else + entries_host(i) = nv_cube + entries_cube_host(i - ne_cube); + } + Kokkos::deep_copy(rowmap, rowmap_host); + Kokkos::deep_copy(entries, entries_host); + test_rcm(rowmap, entries, true); } -#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - graph##_##rcm##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_rcm(6, 3, 3); \ - test_rcm(20, 20, 20); \ - test_rcm(100, 100, 1); \ +#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, graph##_##rcm_zerorows##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_rcm_zerorows(); \ + } \ + TEST_F(TestCategory, graph##_##rcm_7pt##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_rcm_7pt(1, 1, 1, false); \ + test_rcm_7pt(2, 1, 1, false); \ + test_rcm_7pt(6, 3, 3, true); \ + test_rcm_7pt(20, 20, 20, true); \ + test_rcm_7pt(100, 100, 1, true); \ + } \ + TEST_F(TestCategory, graph##_##rcm_4clique##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_rcm_4clique(); \ + } \ + TEST_F(TestCategory, graph##_##rcm_multiple_components##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_rcm_multiple_components(); \ } -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ - defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ - defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && defined(KOKKOSKERNELS_INST_OFFSET_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int64_t, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ - defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT) && defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int, size_t, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ - defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int64_t, size_t, TestDevice) #endif diff --git a/packages/kokkos-kernels/lapack/impl/KokkosLapack_gesv_spec.hpp b/packages/kokkos-kernels/lapack/impl/KokkosLapack_gesv_spec.hpp index 97d74280ffc4..60a69e72b3e2 100644 --- a/packages/kokkos-kernels/lapack/impl/KokkosLapack_gesv_spec.hpp +++ b/packages/kokkos-kernels/lapack/impl/KokkosLapack_gesv_spec.hpp @@ -42,21 +42,17 @@ struct gesv_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSLAPACK_GESV_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - template <> \ - struct gesv_eti_spec_avail< \ - EXEC_SPACE_TYPE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSLAPACK_GESV_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct gesv_eti_spec_avail< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -70,23 +66,19 @@ namespace Impl { /// \brief Implementation of KokkosLapack::gesv. template ::value, - bool eti_spec_avail = - gesv_eti_spec_avail::value> + bool tpl_spec_avail = gesv_tpl_spec_avail::value, + bool eti_spec_avail = gesv_eti_spec_avail::value> struct GESV { - static void gesv(const ExecutionSpace &space, const AMatrix &A, const BXMV &B, - const IPIVV &IPIV); + static void gesv(const ExecutionSpace &space, const AMatrix &A, const BXMV &B, const IPIVV &IPIV); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of gesv for multi vectors. // Unification layer template -struct GESV { - static void gesv(const ExecutionSpace & /* space */, const AMatrix & /* A */, - const BXMV & /* B */, const IPIVV & /* IPIV */) { +struct GESV { + static void gesv(const ExecutionSpace & /* space */, const AMatrix & /* A */, const BXMV & /* B */, + const IPIVV & /* IPIV */) { // NOTE: Might add the implementation of KokkosLapack::gesv later throw std::runtime_error( "No fallback implementation of GESV (general LU factorization & solve) " @@ -105,36 +97,26 @@ struct GESV, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSLAPACK_GESV_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + extern template struct GESV< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ false, true>; -#define KOKKOSLAPACK_GESV_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - template struct GESV< \ - EXEC_SPACE_TYPE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSLAPACK_GESV_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template struct GESV< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ false, true>; #include diff --git a/packages/kokkos-kernels/lapack/impl/KokkosLapack_svd_spec.hpp b/packages/kokkos-kernels/lapack/impl/KokkosLapack_svd_spec.hpp index fc0a34f790a3..b0dfe3d091bf 100644 --- a/packages/kokkos-kernels/lapack/impl/KokkosLapack_svd_spec.hpp +++ b/packages/kokkos-kernels/lapack/impl/KokkosLapack_svd_spec.hpp @@ -28,8 +28,7 @@ namespace KokkosLapack { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct svd_eti_spec_avail { enum : bool { value = false }; }; @@ -43,24 +42,19 @@ struct svd_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSLAPACK_SVD_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - template <> \ - struct svd_eti_spec_avail< \ - EXEC_SPACE_TYPE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type *, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSLAPACK_SVD_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct svd_eti_spec_avail< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type *, LAYOUT_TYPE, \ + Kokkos::Device, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -73,29 +67,21 @@ namespace Impl { // Unification layer /// \brief Implementation of KokkosLapack::svd. -template ::value, - bool eti_spec_avail = svd_eti_spec_avail< - ExecutionSpace, AMatrix, SVector, UMatrix, VMatrix>::value> +template ::value, + bool eti_spec_avail = svd_eti_spec_avail::value> struct SVD { - static void svd(const ExecutionSpace &space, const char jobu[], - const char jobvt[], const AMatrix &A, const SVector &S, - const UMatrix &U, const VMatrix &Vt); + static void svd(const ExecutionSpace &space, const char jobu[], const char jobvt[], const AMatrix &A, + const SVector &S, const UMatrix &U, const VMatrix &Vt); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of svd // Unification layer -template -struct SVD { - static void svd(const ExecutionSpace & /* space */, const char * /* jobu */, - const char * /* jobvt */, const AMatrix & /* A */, - const SVector & /* S */, const UMatrix & /* U */, - const VMatrix & /* Vt */) { +template +struct SVD { + static void svd(const ExecutionSpace & /* space */, const char * /* jobu */, const char * /* jobvt */, + const AMatrix & /* A */, const SVector & /* S */, const UMatrix & /* U */, const VMatrix & /* Vt */) { // NOTE: Might add the implementation of KokkosLapack::svd later throw std::runtime_error( "No fallback implementation of SVD (singular value decomposition) " @@ -115,40 +101,30 @@ struct SVD, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type *, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSLAPACK_SVD_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + extern template struct SVD< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type *, LAYOUT_TYPE, \ + Kokkos::Device, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ false, true>; -#define KOKKOSLAPACK_SVD_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - template struct SVD< \ - EXEC_SPACE_TYPE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type *, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ +#define KOKKOSLAPACK_SVD_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template struct SVD< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type *, LAYOUT_TYPE, \ + Kokkos::Device, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ false, true>; #include diff --git a/packages/kokkos-kernels/lapack/impl/KokkosLapack_trtri_impl.hpp b/packages/kokkos-kernels/lapack/impl/KokkosLapack_trtri_impl.hpp index 9f52c2d412f8..5ba6f80eecd5 100644 --- a/packages/kokkos-kernels/lapack/impl/KokkosLapack_trtri_impl.hpp +++ b/packages/kokkos-kernels/lapack/impl/KokkosLapack_trtri_impl.hpp @@ -31,8 +31,7 @@ namespace KokkosLapack { namespace Impl { template -void SerialTrtri_Invoke(const RViewType &R, const char uplo[], - const char diag[], const AViewType &A) { +void SerialTrtri_Invoke(const RViewType &R, const char uplo[], const char diag[], const AViewType &A) { using KokkosBatched::Algo; using KokkosBatched::Diag; using KokkosBatched::SerialTrtriInternalLower; @@ -43,24 +42,20 @@ void SerialTrtri_Invoke(const RViewType &R, const char uplo[], //// Lower //// if (__uplo == 'l') { if (__diag == 'u') { - R() = SerialTrtriInternalLower::invoke( - Diag::Unit::use_unit_diag, A.extent(0), A.extent(1), A.data(), - A.stride(0), A.stride(1)); + R() = SerialTrtriInternalLower::invoke(Diag::Unit::use_unit_diag, A.extent(0), + A.extent(1), A.data(), A.stride(0), A.stride(1)); } else { - R() = SerialTrtriInternalLower::invoke( - Diag::NonUnit::use_unit_diag, A.extent(0), A.extent(1), A.data(), - A.stride(0), A.stride(1)); + R() = SerialTrtriInternalLower::invoke(Diag::NonUnit::use_unit_diag, A.extent(0), + A.extent(1), A.data(), A.stride(0), A.stride(1)); } } else { //// Upper //// if (__diag == 'u') { - R() = SerialTrtriInternalUpper::invoke( - Diag::Unit::use_unit_diag, A.extent(0), A.extent(1), A.data(), - A.stride(0), A.stride(1)); + R() = SerialTrtriInternalUpper::invoke(Diag::Unit::use_unit_diag, A.extent(0), + A.extent(1), A.data(), A.stride(0), A.stride(1)); } else { - R() = SerialTrtriInternalUpper::invoke( - Diag::NonUnit::use_unit_diag, A.extent(0), A.extent(1), A.data(), - A.stride(0), A.stride(1)); + R() = SerialTrtriInternalUpper::invoke(Diag::NonUnit::use_unit_diag, A.extent(0), + A.extent(1), A.data(), A.stride(0), A.stride(1)); } } } diff --git a/packages/kokkos-kernels/lapack/impl/KokkosLapack_trtri_spec.hpp b/packages/kokkos-kernels/lapack/impl/KokkosLapack_trtri_spec.hpp index a17184dc41b0..ef458f7e5703 100644 --- a/packages/kokkos-kernels/lapack/impl/KokkosLapack_trtri_spec.hpp +++ b/packages/kokkos-kernels/lapack/impl/KokkosLapack_trtri_spec.hpp @@ -37,15 +37,13 @@ struct trtri_eti_spec_avail { // This Macros provides the ETI specialization of trtri, currently not // available. // -#define KOKKOSLAPACK_TRTRI_ETI_SPEC_AVAIL(SCALAR, LAYOUTA, EXEC_SPACE, \ - MEM_SPACE) \ - template <> \ - struct trtri_eti_spec_avail< \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSLAPACK_TRTRI_ETI_SPEC_AVAIL(SCALAR, LAYOUTA, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct trtri_eti_spec_avail< \ + Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -60,33 +58,28 @@ namespace Impl { // // Unification layer -template ::value, +template ::value, bool eti_spec_avail = trtri_eti_spec_avail::value> struct TRTRI { - static void trtri(const RVIT& R, const char uplo[], const char diag[], - const AVIT& A); + static void trtri(const RVIT& R, const char uplo[], const char diag[], const AVIT& A); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY template struct TRTRI { - static void trtri(const RVIT& R, const char uplo[], const char diag[], - const AVIT& A) { + static void trtri(const RVIT& R, const char uplo[], const char diag[], const AVIT& A) { static_assert(Kokkos::is_view::value, "AVIT must be a Kokkos::View."); static_assert(static_cast(AVIT::rank) == 2, "AVIT must have rank 2."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosLapack::trtri[ETI]" - : "KokkosLapack::trtri[noETI]"); + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosLapack::trtri[ETI]" + : "KokkosLapack::trtri[noETI]"); typename AVIT::HostMirror host_A = Kokkos::create_mirror_view(A); typename RVIT::HostMirror host_R = Kokkos::create_mirror_view(R); Kokkos::deep_copy(host_A, A); - SerialTrtri_Invoke( - R, uplo, diag, host_A); + SerialTrtri_Invoke(R, uplo, diag, host_A); Kokkos::deep_copy(A, host_A); @@ -106,22 +99,18 @@ struct TRTRI { // "extern template" skips the implicit instatiation step ensuring that the // callers code uses this explicit instantiation definition of TRTRI. // -#define KOKKOSLAPACK_TRTRI_ETI_SPEC_DECL(SCALAR, LAYOUTA, EXEC_SPACE, \ - MEM_SPACE) \ - extern template struct TRTRI< \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSLAPACK_TRTRI_ETI_SPEC_DECL(SCALAR, LAYOUTA, EXEC_SPACE, MEM_SPACE) \ + extern template struct TRTRI< \ + Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ false, true>; -#define KOKKOSLAPACK_TRTRI_ETI_SPEC_INST(SCALAR, LAYOUTA, EXEC_SPACE, \ - MEM_SPACE) \ - template struct TRTRI< \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSLAPACK_TRTRI_ETI_SPEC_INST(SCALAR, LAYOUTA, EXEC_SPACE, MEM_SPACE) \ + template struct TRTRI< \ + Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ false, true>; #include diff --git a/packages/kokkos-kernels/lapack/src/KokkosLapack_gesv.hpp b/packages/kokkos-kernels/lapack/src/KokkosLapack_gesv.hpp index b66583bbdf2b..281d6a565148 100644 --- a/packages/kokkos-kernels/lapack/src/KokkosLapack_gesv.hpp +++ b/packages/kokkos-kernels/lapack/src/KokkosLapack_gesv.hpp @@ -53,44 +53,29 @@ namespace KokkosLapack { /// used. /// template -void gesv(const ExecutionSpace& space, const AMatrix& A, const BXMV& B, - const IPIVV& IPIV) { +void gesv(const ExecutionSpace& space, const AMatrix& A, const BXMV& B, const IPIVV& IPIV) { // NOTE: Currently, KokkosLapack::gesv only supports LAPACK, MAGMA and // rocSOLVER TPLs. // MAGMA/rocSOLVER TPL should be enabled to call the MAGMA/rocSOLVER GPU // interface for device views LAPACK TPL should be enabled to call the // LAPACK interface for host views - static_assert( - Kokkos::SpaceAccessibility::accessible); - static_assert( - Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::SpaceAccessibility::accessible); #if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) if constexpr (!std::is_same_v) { - static_assert( - Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::SpaceAccessibility::accessible); } #else - static_assert( - Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::SpaceAccessibility::accessible); #endif - static_assert(Kokkos::is_view::value, - "KokkosLapack::gesv: A must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosLapack::gesv: B must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosLapack::gesv: IPIV must be a Kokkos::View."); - static_assert(static_cast(AMatrix::rank) == 2, - "KokkosLapack::gesv: A must have rank 2."); - static_assert( - static_cast(BXMV::rank) == 1 || static_cast(BXMV::rank) == 2, - "KokkosLapack::gesv: B must have either rank 1 or rank 2."); - static_assert(static_cast(IPIVV::rank) == 1, - "KokkosLapack::gesv: IPIV must have rank 1."); + static_assert(Kokkos::is_view::value, "KokkosLapack::gesv: A must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosLapack::gesv: B must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosLapack::gesv: IPIV must be a Kokkos::View."); + static_assert(static_cast(AMatrix::rank) == 2, "KokkosLapack::gesv: A must have rank 2."); + static_assert(static_cast(BXMV::rank) == 1 || static_cast(BXMV::rank) == 2, + "KokkosLapack::gesv: B must have either rank 1 or rank 2."); + static_assert(static_cast(IPIVV::rank) == 1, "KokkosLapack::gesv: IPIV must have rank 1."); int64_t IPIV0 = IPIV.extent(0); int64_t A0 = A.extent(0); @@ -98,8 +83,7 @@ void gesv(const ExecutionSpace& space, const AMatrix& A, const BXMV& B, int64_t B0 = B.extent(0); // Check validity of pivot argument - bool valid_pivot = - (IPIV0 == A1) || ((IPIV0 == 0) && (IPIV.data() == nullptr)); + bool valid_pivot = (IPIV0 == A1) || ((IPIV0 == 0) && (IPIV.data() == nullptr)); if (!(valid_pivot)) { std::ostringstream os; os << "KokkosLapack::gesv: IPIV: " << IPIV0 << ". " @@ -112,9 +96,8 @@ void gesv(const ExecutionSpace& space, const AMatrix& A, const BXMV& B, // Check for no pivoting case. Only MAGMA supports no pivoting interface #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL - if ((!std::is_same::value) && - (IPIV0 == 0) && (IPIV.data() == nullptr)) { + if ((!std::is_same::value) && (IPIV0 == 0) && + (IPIV.data() == nullptr)) { std::ostringstream os; os << "KokkosLapack::gesv: IPIV: " << IPIV0 << ". " << "LAPACK TPL does not support no pivoting."; @@ -136,22 +119,18 @@ void gesv(const ExecutionSpace& space, const AMatrix& A, const BXMV& B, if ((A0 < A1) || (A0 != B0)) { std::ostringstream os; os << "KokkosLapack::gesv: Dimensions of A, and B do not match: " - << " A: " << A.extent(0) << " x " << A.extent(1) << " B: " << B.extent(0) - << " x " << B.extent(1); + << " A: " << A.extent(0) << " x " << A.extent(1) << " B: " << B.extent(0) << " x " << B.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - typedef Kokkos::View< - typename AMatrix::non_const_value_type**, typename AMatrix::array_layout, - typename AMatrix::device_type, Kokkos::MemoryTraits > + typedef Kokkos::View > AMatrix_Internal; - typedef Kokkos::View > BXMV_Internal; - typedef Kokkos::View< - typename IPIVV::non_const_value_type*, typename IPIVV::array_layout, - typename IPIVV::device_type, Kokkos::MemoryTraits > + typedef Kokkos::View > IPIVV_Internal; AMatrix_Internal A_i = A; // BXMV_Internal B_i = B; @@ -159,12 +138,12 @@ void gesv(const ExecutionSpace& space, const AMatrix& A, const BXMV& B, if (BXMV::rank == 1) { auto B_i = BXMV_Internal(B.data(), B.extent(0), 1); - KokkosLapack::Impl::GESV::gesv(space, A_i, B_i, IPIV_i); + KokkosLapack::Impl::GESV::gesv(space, A_i, B_i, + IPIV_i); } else { // BXMV::rank == 2 auto B_i = BXMV_Internal(B.data(), B.extent(0), B.extent(1)); - KokkosLapack::Impl::GESV::gesv(space, A_i, B_i, IPIV_i); + KokkosLapack::Impl::GESV::gesv(space, A_i, B_i, + IPIV_i); } } diff --git a/packages/kokkos-kernels/lapack/src/KokkosLapack_svd.hpp b/packages/kokkos-kernels/lapack/src/KokkosLapack_svd.hpp index 71ea7cc30f81..c0c962fb19d9 100644 --- a/packages/kokkos-kernels/lapack/src/KokkosLapack_svd.hpp +++ b/packages/kokkos-kernels/lapack/src/KokkosLapack_svd.hpp @@ -58,36 +58,21 @@ namespace KokkosLapack { /// vectors of A. /// // clang-format on -template -void svd(const ExecutionSpace& space, const char jobu[], const char jobvt[], - const AMatrix& A, const SVector& S, const UMatrix& U, - const VMatrix& Vt) { - static_assert( - Kokkos::SpaceAccessibility::accessible); - static_assert( - Kokkos::SpaceAccessibility::accessible); - static_assert( - Kokkos::SpaceAccessibility::accessible); - static_assert( - Kokkos::SpaceAccessibility::accessible); - static_assert(Kokkos::is_view::value, - "KokkosLapack::svd: A must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosLapack::svd: S must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosLapack::svd: U must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosLapack::svd: Vt must be a Kokkos::View."); +template +void svd(const ExecutionSpace& space, const char jobu[], const char jobvt[], const AMatrix& A, const SVector& S, + const UMatrix& U, const VMatrix& Vt) { + static_assert(Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::is_view::value, "KokkosLapack::svd: A must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosLapack::svd: S must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosLapack::svd: U must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosLapack::svd: Vt must be a Kokkos::View."); static_assert(AMatrix::rank() == 2, "KokkosLapack::svd: A must have rank 2."); static_assert(SVector::rank() == 1, "KokkosLapack::svd: S must have rank 1."); static_assert(UMatrix::rank() == 2, "KokkosLapack::svd: U must have rank 2."); - static_assert(VMatrix::rank() == 2, - "KokkosLapack::svd: Vt must have rank 2."); + static_assert(VMatrix::rank() == 2, "KokkosLapack::svd: Vt must have rank 2."); int64_t m = A.extent(0); int64_t n = A.extent(1); @@ -102,40 +87,32 @@ void svd(const ExecutionSpace& space, const char jobu[], const char jobvt[], // Check the jobu and jobvt control flags // The only valid options there are 'A', 'S', 'O' and 'N' - const bool is_jobu_invalid = - !((jobu[0] == 'A') || (jobu[0] == 'a') || (jobu[0] == 'S') || - (jobu[0] == 's') || (jobu[0] == 'O') || (jobu[0] == 'o') || - (jobu[0] == 'N') || (jobu[0] == 'n')); + const bool is_jobu_invalid = !((jobu[0] == 'A') || (jobu[0] == 'a') || (jobu[0] == 'S') || (jobu[0] == 's') || + (jobu[0] == 'O') || (jobu[0] == 'o') || (jobu[0] == 'N') || (jobu[0] == 'n')); - const bool is_jobvt_invalid = - !((jobvt[0] == 'A') || (jobvt[0] == 'a') || (jobvt[0] == 'S') || - (jobvt[0] == 's') || (jobvt[0] == 'O') || (jobvt[0] == 'o') || - (jobvt[0] == 'N') || (jobvt[0] == 'n')); + const bool is_jobvt_invalid = !((jobvt[0] == 'A') || (jobvt[0] == 'a') || (jobvt[0] == 'S') || (jobvt[0] == 's') || + (jobvt[0] == 'O') || (jobvt[0] == 'o') || (jobvt[0] == 'N') || (jobvt[0] == 'n')); if (is_jobu_invalid && is_jobvt_invalid) { std::ostringstream oss; oss << "KokkosLapack::svd: both jobu and jobvt are invalid!\n" - << "Possible values are A, S, O or N, submitted values are " << jobu[0] - << " and " << jobvt[0] << "\n"; + << "Possible values are A, S, O or N, submitted values are " << jobu[0] << " and " << jobvt[0] << "\n"; KokkosKernels::Impl::throw_runtime_exception(oss.str()); } if (is_jobu_invalid) { std::ostringstream oss; oss << "KokkosLapack::svd: jobu is invalid!\n" - << "Possible values are A, S, O or N, submitted value is " << jobu[0] - << "\n"; + << "Possible values are A, S, O or N, submitted value is " << jobu[0] << "\n"; KokkosKernels::Impl::throw_runtime_exception(oss.str()); } if (is_jobvt_invalid) { std::ostringstream oss; oss << "KokkosLapack::svd: jobvt is invalid!\n" - << "Possible values are A, S, O or N, submitted value is " << jobvt[0] - << "\n"; + << "Possible values are A, S, O or N, submitted value is " << jobvt[0] << "\n"; KokkosKernels::Impl::throw_runtime_exception(oss.str()); } - if (((jobu[0] == 'O') || (jobu[0] == 'o')) && - ((jobvt[0] == 'O') || (jobvt[0] == 'o'))) { + if (((jobu[0] == 'O') || (jobu[0] == 'o')) && ((jobvt[0] == 'O') || (jobvt[0] == 'o'))) { std::ostringstream oss; oss << "KokkosLapack::svd: jobu and jobvt cannot be O at the same time!\n"; KokkosKernels::Impl::throw_runtime_exception(oss.str()); @@ -148,23 +125,20 @@ void svd(const ExecutionSpace& space, const char jobu[], const char jobvt[], std::ostringstream os; if (S.extent_int(0) != rankA) { is_extent_invalid = true; - os << "KokkosLapack::svd: S has extent " << S.extent(0) << ", instead of " - << rankA << ".\n"; + os << "KokkosLapack::svd: S has extent " << S.extent(0) << ", instead of " << rankA << ".\n"; } - if ((jobu[0] == 'A') || (jobu[0] == 'a') || (jobu[0] == 'S') || - (jobu[0] == 's')) { + if ((jobu[0] == 'A') || (jobu[0] == 'a') || (jobu[0] == 'S') || (jobu[0] == 's')) { if (U.extent_int(0) != m || U.extent_int(1) != m) { is_extent_invalid = true; - os << "KokkosLapack::svd: U has extents (" << U.extent(0) << ", " - << U.extent(1) << ") instead of (" << m << ", " << m << ").\n"; + os << "KokkosLapack::svd: U has extents (" << U.extent(0) << ", " << U.extent(1) << ") instead of (" << m << ", " + << m << ").\n"; } } - if ((jobvt[0] == 'A') || (jobvt[0] == 'a') || (jobvt[0] == 'S') || - (jobvt[0] == 's')) { + if ((jobvt[0] == 'A') || (jobvt[0] == 'a') || (jobvt[0] == 'S') || (jobvt[0] == 's')) { if (Vt.extent_int(0) != n || Vt.extent_int(1) != n) { is_extent_invalid = true; - os << "KokkosLapack::svd: V has extents (" << Vt.extent(0) << ", " - << Vt.extent(1) << ") instead of (" << n << ", " << n << ").\n"; + os << "KokkosLapack::svd: V has extents (" << Vt.extent(0) << ", " << Vt.extent(1) << ") instead of (" << n + << ", " << n << ").\n"; } } if (is_extent_invalid) { @@ -172,8 +146,7 @@ void svd(const ExecutionSpace& space, const char jobu[], const char jobvt[], } #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) - if (std::is_same_v && - (A.extent(0) < A.extent(1))) { + if (std::is_same_v && (A.extent(0) < A.extent(1))) { throw std::runtime_error( "CUSOLVER does not support SVD for matrices with more columns " "than rows, you can transpose you matrix first then compute " @@ -182,32 +155,25 @@ void svd(const ExecutionSpace& space, const char jobu[], const char jobvt[], } #endif - using AMatrix_Internal = Kokkos::View< - typename AMatrix::non_const_value_type**, typename AMatrix::array_layout, - typename AMatrix::device_type, Kokkos::MemoryTraits>; + using AMatrix_Internal = Kokkos::View>; - using SVector_Internal = Kokkos::View< - typename SVector::non_const_value_type*, typename SVector::array_layout, - typename SVector::device_type, Kokkos::MemoryTraits>; + using SVector_Internal = Kokkos::View>; - using UMatrix_Internal = Kokkos::View< - typename UMatrix::non_const_value_type**, typename UMatrix::array_layout, - typename UMatrix::device_type, Kokkos::MemoryTraits>; + using UMatrix_Internal = Kokkos::View>; - using VMatrix_Internal = Kokkos::View< - typename VMatrix::non_const_value_type**, typename VMatrix::array_layout, - typename VMatrix::device_type, Kokkos::MemoryTraits>; + using VMatrix_Internal = Kokkos::View>; AMatrix_Internal A_i = A; SVector_Internal S_i = S; UMatrix_Internal U_i = U; VMatrix_Internal Vt_i = Vt; - KokkosLapack::Impl::SVD::svd(space, jobu, - jobvt, A_i, - S_i, U_i, - Vt_i); + KokkosLapack::Impl::SVD::svd( + space, jobu, jobvt, A_i, S_i, U_i, Vt_i); } // clang-format off @@ -235,8 +201,8 @@ void svd(const ExecutionSpace& space, const char jobu[], const char jobvt[], /// // clang-format on template -void svd(const char jobu[], const char jobvt[], const AMatrix& A, - const SVector& S, const UMatrix& U, const VMatrix& Vt) { +void svd(const char jobu[], const char jobvt[], const AMatrix& A, const SVector& S, const UMatrix& U, + const VMatrix& Vt) { typename AMatrix::execution_space space{}; svd(space, jobu, jobvt, A, S, U, Vt); } diff --git a/packages/kokkos-kernels/lapack/src/KokkosLapack_trtri.hpp b/packages/kokkos-kernels/lapack/src/KokkosLapack_trtri.hpp index 9a884f23036f..cfe311f47645 100644 --- a/packages/kokkos-kernels/lapack/src/KokkosLapack_trtri.hpp +++ b/packages/kokkos-kernels/lapack/src/KokkosLapack_trtri.hpp @@ -49,16 +49,12 @@ namespace KokkosLapack { // source: https://software.intel.com/en-us/mkl-developer-reference-c-trtri template int trtri(const char uplo[], const char diag[], const AViewType& A) { - static_assert(Kokkos::is_view::value, - "AViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, - "AViewType must have rank 2."); + static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); // Check validity of indicator argument - bool valid_uplo = (uplo[0] == 'U') || (uplo[0] == 'u') || (uplo[0] == 'L') || - (uplo[0] == 'l'); - bool valid_diag = (diag[0] == 'U') || (diag[0] == 'u') || (diag[0] == 'N') || - (diag[0] == 'n'); + bool valid_uplo = (uplo[0] == 'U') || (uplo[0] == 'u') || (uplo[0] == 'L') || (uplo[0] == 'l'); + bool valid_diag = (diag[0] == 'U') || (diag[0] == 'u') || (diag[0] == 'N') || (diag[0] == 'n'); if (!valid_uplo) { std::ostringstream os; @@ -94,22 +90,17 @@ int trtri(const char uplo[], const char diag[], const AViewType& A) { } // Create A matrix view type alias - using AViewInternalType = - Kokkos::View >; + using AViewInternalType = Kokkos::View >; // This is the return value type and should always reside on host using RViewInternalType = - Kokkos::View >; + Kokkos::View >; int result; RViewInternalType R = RViewInternalType(&result); - KokkosLapack::Impl::TRTRI::trtri( - R, uplo, diag, A); + KokkosLapack::Impl::TRTRI::trtri(R, uplo, diag, A); return result; } diff --git a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_Cuda_tpl.hpp b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_Cuda_tpl.hpp index 943d10d11175..3ead12d5f4de 100644 --- a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_Cuda_tpl.hpp +++ b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_Cuda_tpl.hpp @@ -24,8 +24,7 @@ namespace Impl { CudaLapackSingleton::CudaLapackSingleton() { cusolverStatus_t stat = cusolverDnCreate(&handle); - if (stat != CUSOLVER_STATUS_SUCCESS) - Kokkos::abort("CUSOLVER initialization failed\n"); + if (stat != CUSOLVER_STATUS_SUCCESS) Kokkos::abort("CUSOLVER initialization failed\n"); Kokkos::push_finalize_hook([&]() { cusolverDnDestroy(handle); }); } diff --git a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_Host_tpl.cpp b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_Host_tpl.cpp index add0a802bd9f..3b60a0578bc4 100644 --- a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_Host_tpl.cpp +++ b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_Host_tpl.cpp @@ -29,39 +29,25 @@ extern "C" { /// Gesv /// -void F77_BLAS_MANGLE(sgesv, SGESV)(int*, int*, float*, int*, int*, float*, int*, - int*); -void F77_BLAS_MANGLE(dgesv, DGESV)(int*, int*, double*, int*, int*, double*, - int*, int*); -void F77_BLAS_MANGLE(cgesv, CGESV)(int*, int*, std::complex*, int*, int*, - std::complex*, int*, int*); -void F77_BLAS_MANGLE(zgesv, ZGESV)(int*, int*, std::complex*, int*, - int*, std::complex*, int*, int*); +void F77_BLAS_MANGLE(sgesv, SGESV)(int*, int*, float*, int*, int*, float*, int*, int*); +void F77_BLAS_MANGLE(dgesv, DGESV)(int*, int*, double*, int*, int*, double*, int*, int*); +void F77_BLAS_MANGLE(cgesv, CGESV)(int*, int*, std::complex*, int*, int*, std::complex*, int*, int*); +void F77_BLAS_MANGLE(zgesv, ZGESV)(int*, int*, std::complex*, int*, int*, std::complex*, int*, int*); /// /// Gesvd /// -void F77_BLAS_MANGLE(sgesvd, SGESVD)(const char*, const char*, const int*, - const int*, float*, const int*, float*, - float*, const int*, float*, const int*, - float*, int*, int*); -void F77_BLAS_MANGLE(dgesvd, DGESVD)(const char*, const char*, const int*, - const int*, double*, const int*, double*, - double*, const int*, double*, const int*, - double*, int*, int*); -void F77_BLAS_MANGLE(cgesvd, CGESVD)(const char*, const char*, const int*, - const int*, std::complex*, - const int*, float*, std::complex*, - const int*, std::complex*, - const int*, std::complex*, int*, - float*, int*); -void F77_BLAS_MANGLE(zgesvd, ZGESVD)(const char*, const char*, const int*, - const int*, std::complex*, - const int*, double*, std::complex*, - const int*, std::complex*, - const int*, std::complex*, int*, - double*, int*); +void F77_BLAS_MANGLE(sgesvd, SGESVD)(const char*, const char*, const int*, const int*, float*, const int*, float*, + float*, const int*, float*, const int*, float*, int*, int*); +void F77_BLAS_MANGLE(dgesvd, DGESVD)(const char*, const char*, const int*, const int*, double*, const int*, double*, + double*, const int*, double*, const int*, double*, int*, int*); +void F77_BLAS_MANGLE(cgesvd, CGESVD)(const char*, const char*, const int*, const int*, std::complex*, const int*, + float*, std::complex*, const int*, std::complex*, const int*, + std::complex*, int*, float*, int*); +void F77_BLAS_MANGLE(zgesvd, ZGESVD)(const char*, const char*, const int*, const int*, std::complex*, + const int*, double*, std::complex*, const int*, std::complex*, + const int*, std::complex*, int*, double*, int*); /// /// Trtri @@ -74,14 +60,10 @@ void F77_BLAS_MANGLE(zgesvd, ZGESVD)(const char*, const char*, const int*, &diag, &n, a, &lda, &info); */ -void F77_BLAS_MANGLE(strtri, STRTRI)(const char*, const char*, int*, - const float*, int*, int*); -void F77_BLAS_MANGLE(dtrtri, DTRTRI)(const char*, const char*, int*, - const double*, int*, int*); -void F77_BLAS_MANGLE(ctrtri, CTRTRI)(const char*, const char*, int*, - const std::complex*, int*, int*); -void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, - const std::complex*, int*, int*); +void F77_BLAS_MANGLE(strtri, STRTRI)(const char*, const char*, int*, const float*, int*, int*); +void F77_BLAS_MANGLE(dtrtri, DTRTRI)(const char*, const char*, int*, const double*, int*, int*); +void F77_BLAS_MANGLE(ctrtri, CTRTRI)(const char*, const char*, int*, const std::complex*, int*, int*); +void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, const std::complex*, int*, int*); } #define F77_FUNC_SGESV F77_BLAS_MANGLE(sgesv, SGESV) @@ -107,22 +89,17 @@ namespace Impl { /// template <> -void HostLapack::gesv(int n, int rhs, float* a, int lda, int* ipiv, - float* b, int ldb, int info) { +void HostLapack::gesv(int n, int rhs, float* a, int lda, int* ipiv, float* b, int ldb, int info) { F77_FUNC_SGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> -void HostLapack::gesvd(const char jobu, const char jobvt, const int m, - const int n, float* a, const int lda, float* s, - float* u, const int ldu, float* vt, - const int ldvt, float* work, int lwork, +void HostLapack::gesvd(const char jobu, const char jobvt, const int m, const int n, float* a, const int lda, + float* s, float* u, const int ldu, float* vt, const int ldvt, float* work, int lwork, float* /*rwork*/, int info) { - F77_FUNC_SGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, - &lwork, &info); + F77_FUNC_SGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, &info); } template <> -int HostLapack::trtri(const char uplo, const char diag, int n, - const float* a, int lda) { +int HostLapack::trtri(const char uplo, const char diag, int n, const float* a, int lda) { int info = 0; F77_FUNC_STRTRI(&uplo, &diag, &n, a, &lda, &info); return info; @@ -133,22 +110,17 @@ int HostLapack::trtri(const char uplo, const char diag, int n, /// template <> -void HostLapack::gesv(int n, int rhs, double* a, int lda, int* ipiv, - double* b, int ldb, int info) { +void HostLapack::gesv(int n, int rhs, double* a, int lda, int* ipiv, double* b, int ldb, int info) { F77_FUNC_DGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> -void HostLapack::gesvd(const char jobu, const char jobvt, const int m, - const int n, double* a, const int lda, double* s, - double* u, const int ldu, double* vt, - const int ldvt, double* work, int lwork, +void HostLapack::gesvd(const char jobu, const char jobvt, const int m, const int n, double* a, const int lda, + double* s, double* u, const int ldu, double* vt, const int ldvt, double* work, int lwork, double* /*rwork*/, int info) { - F77_FUNC_DGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, - &lwork, &info); + F77_FUNC_DGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, &info); } template <> -int HostLapack::trtri(const char uplo, const char diag, int n, - const double* a, int lda) { +int HostLapack::trtri(const char uplo, const char diag, int n, const double* a, int lda) { int info = 0; F77_FUNC_DTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; @@ -159,24 +131,19 @@ int HostLapack::trtri(const char uplo, const char diag, int n, /// template <> -void HostLapack >::gesv(int n, int rhs, - std::complex* a, int lda, - int* ipiv, std::complex* b, - int ldb, int info) { +void HostLapack >::gesv(int n, int rhs, std::complex* a, int lda, int* ipiv, + std::complex* b, int ldb, int info) { F77_FUNC_CGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> -void HostLapack >::gesvd( - const char jobu, const char jobvt, const int m, const int n, - std::complex* a, const int lda, float* s, std::complex* u, - const int ldu, std::complex* vt, const int ldvt, - std::complex* work, int lwork, float* rwork, int info) { - F77_FUNC_CGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, - &lwork, rwork, &info); +void HostLapack >::gesvd(const char jobu, const char jobvt, const int m, const int n, + std::complex* a, const int lda, float* s, std::complex* u, + const int ldu, std::complex* vt, const int ldvt, + std::complex* work, int lwork, float* rwork, int info) { + F77_FUNC_CGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, &info); } template <> -int HostLapack >::trtri(const char uplo, const char diag, - int n, const std::complex* a, +int HostLapack >::trtri(const char uplo, const char diag, int n, const std::complex* a, int lda) { int info = 0; F77_FUNC_CTRTRI(&uplo, &diag, &n, a, &lda, &info); @@ -188,25 +155,20 @@ int HostLapack >::trtri(const char uplo, const char diag, /// template <> -void HostLapack >::gesv(int n, int rhs, - std::complex* a, int lda, - int* ipiv, std::complex* b, - int ldb, int info) { +void HostLapack >::gesv(int n, int rhs, std::complex* a, int lda, int* ipiv, + std::complex* b, int ldb, int info) { F77_FUNC_ZGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> -void HostLapack >::gesvd( - const char jobu, const char jobvt, const int m, const int n, - std::complex* a, const int lda, double* s, std::complex* u, - const int ldu, std::complex* vt, const int ldvt, - std::complex* work, int lwork, double* rwork, int info) { - F77_FUNC_ZGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, - &lwork, rwork, &info); +void HostLapack >::gesvd(const char jobu, const char jobvt, const int m, const int n, + std::complex* a, const int lda, double* s, + std::complex* u, const int ldu, std::complex* vt, + const int ldvt, std::complex* work, int lwork, double* rwork, + int info) { + F77_FUNC_ZGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, &info); } template <> -int HostLapack >::trtri(const char uplo, const char diag, - int n, - const std::complex* a, +int HostLapack >::trtri(const char uplo, const char diag, int n, const std::complex* a, int lda) { int info = 0; F77_FUNC_ZTRTRI(&uplo, &diag, &n, a, &lda, &info); diff --git a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_Host_tpl.hpp b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_Host_tpl.hpp index 9eca83afea0d..092f9ac9f084 100644 --- a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_Host_tpl.hpp +++ b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_Host_tpl.hpp @@ -30,17 +30,13 @@ namespace Impl { template struct HostLapack { - static void gesv(int n, int rhs, T *a, int lda, int *ipiv, T *b, int ldb, - int info); + static void gesv(int n, int rhs, T *a, int lda, int *ipiv, T *b, int ldb, int info); - static void gesvd(const char jobu, const char jobvt, const int m, const int n, - T *A, const int lda, - typename Kokkos::ArithTraits::mag_type *S, T *U, - const int ldu, T *Vt, const int ldvt, T *work, int lwork, - typename Kokkos::ArithTraits::mag_type *rwork, int info); + static void gesvd(const char jobu, const char jobvt, const int m, const int n, T *A, const int lda, + typename Kokkos::ArithTraits::mag_type *S, T *U, const int ldu, T *Vt, const int ldvt, T *work, + int lwork, typename Kokkos::ArithTraits::mag_type *rwork, int info); - static int trtri(const char uplo, const char diag, int n, const T *a, - int lda); + static int trtri(const char uplo, const char diag, int n, const T *a, int lda); }; } // namespace Impl } // namespace KokkosLapack diff --git a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_cusolver.hpp b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_cusolver.hpp index 006fd68b6fe3..272fb8b3b83f 100644 --- a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_cusolver.hpp +++ b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_cusolver.hpp @@ -34,8 +34,7 @@ struct CudaLapackSingleton { static CudaLapackSingleton& singleton(); }; -inline void cusolver_internal_error_throw(cusolverStatus_t cusolverStatus, - const char* name, const char* file, +inline void cusolver_internal_error_throw(cusolverStatus_t cusolverStatus, const char* name, const char* file, const int line) { std::ostringstream out; out << name << " error( "; @@ -48,21 +47,11 @@ inline void cusolver_internal_error_throw(cusolverStatus_t cusolverStatus, out << "CUSOLVER_STATUS_ALLOC_FAILED): you might tried to allocate too " "much memory"; break; - case CUSOLVER_STATUS_INVALID_VALUE: - out << "CUSOLVER_STATUS_INVALID_VALUE)"; - break; - case CUSOLVER_STATUS_ARCH_MISMATCH: - out << "CUSOLVER_STATUS_ARCH_MISMATCH)"; - break; - case CUSOLVER_STATUS_EXECUTION_FAILED: - out << "CUSOLVER_STATUS_EXECUTION_FAILED)"; - break; - case CUSOLVER_STATUS_INTERNAL_ERROR: - out << "CUSOLVER_STATUS_INTERNAL_ERROR)"; - break; - case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: - out << "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED)"; - break; + case CUSOLVER_STATUS_INVALID_VALUE: out << "CUSOLVER_STATUS_INVALID_VALUE)"; break; + case CUSOLVER_STATUS_ARCH_MISMATCH: out << "CUSOLVER_STATUS_ARCH_MISMATCH)"; break; + case CUSOLVER_STATUS_EXECUTION_FAILED: out << "CUSOLVER_STATUS_EXECUTION_FAILED)"; break; + case CUSOLVER_STATUS_INTERNAL_ERROR: out << "CUSOLVER_STATUS_INTERNAL_ERROR)"; break; + case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: out << "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED)"; break; default: out << "unrecognized error code): this is bad!"; break; } if (file) { @@ -71,10 +60,8 @@ inline void cusolver_internal_error_throw(cusolverStatus_t cusolverStatus, throw std::runtime_error(out.str()); } -inline void cusolver_internal_safe_call(cusolverStatus_t cusolverStatus, - const char* name, - const char* file = nullptr, - const int line = 0) { +inline void cusolver_internal_safe_call(cusolverStatus_t cusolverStatus, const char* name, const char* file = nullptr, + const int line = 0) { if (CUSOLVER_STATUS_SUCCESS != cusolverStatus) { cusolver_internal_error_throw(cusolverStatus, name, file, line); } @@ -82,9 +69,8 @@ inline void cusolver_internal_safe_call(cusolverStatus_t cusolverStatus, // The macro below defines is the public interface for the safe cusolver calls. // The functions themselves are protected by impl namespace. -#define KOKKOS_CUSOLVER_SAFE_CALL_IMPL(call) \ - KokkosLapack::Impl::cusolver_internal_safe_call(call, #call, __FILE__, \ - __LINE__) +#define KOKKOS_CUSOLVER_SAFE_CALL_IMPL(call) \ + KokkosLapack::Impl::cusolver_internal_safe_call(call, #call, __FILE__, __LINE__) } // namespace Impl } // namespace KokkosLapack diff --git a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp index 9fbd299ca528..472b79ce85fc 100644 --- a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp @@ -28,27 +28,20 @@ struct gesv_tpl_spec_avail { // Generic Host side LAPACK (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK -#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUT, MEMSPACE) \ - template \ - struct gesv_tpl_spec_avail< \ - ExecSpace, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct gesv_tpl_spec_avail< \ + ExecSpace, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) #endif } // namespace Impl } // namespace KokkosLapack @@ -59,29 +52,23 @@ KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, namespace KokkosLapack { namespace Impl { -#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(SCALAR, LAYOUT, MEMSPACE) \ - template <> \ - struct gesv_tpl_spec_avail< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct gesv_tpl_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) } // namespace Impl } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA @@ -91,39 +78,28 @@ KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, namespace KokkosLapack { namespace Impl { -#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUT, MEMSPACE) \ - template <> \ - struct gesv_tpl_spec_avail< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct gesv_tpl_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) #if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) #endif } // namespace Impl @@ -136,28 +112,21 @@ KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, namespace KokkosLapack { namespace Impl { -#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_ROCSOLVER(SCALAR, LAYOUT, MEMSPACE) \ - template <> \ - struct gesv_tpl_spec_avail< \ - Kokkos::HIP, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_ROCSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct gesv_tpl_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, - Kokkos::HIPSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, - Kokkos::HIPSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) } // namespace Impl } // namespace KokkosLapack diff --git a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp index ca4b9e7abc2e..559f5d0509cd 100644 --- a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp @@ -23,14 +23,12 @@ template inline void gesv_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA - printf("KokkosLapack::gesv<> TPL MAGMA specialization for < %s , %s, %s >\n", - typeid(AViewType).name(), typeid(BViewType).name(), - typeid(PViewType).name()); + printf("KokkosLapack::gesv<> TPL MAGMA specialization for < %s , %s, %s >\n", typeid(AViewType).name(), + typeid(BViewType).name(), typeid(PViewType).name()); #else #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK - printf("KokkosLapack::gesv<> TPL Lapack specialization for < %s , %s, %s >\n", - typeid(AViewType).name(), typeid(BViewType).name(), - typeid(PViewType).name()); + printf("KokkosLapack::gesv<> TPL Lapack specialization for < %s , %s, %s >\n", typeid(AViewType).name(), + typeid(BViewType).name(), typeid(PViewType).name()); #endif #endif #endif @@ -46,8 +44,7 @@ namespace KokkosLapack { namespace Impl { template -void lapackGesvWrapper(const AViewType& A, const BViewType& B, - const IPIVViewType& IPIV) { +void lapackGesvWrapper(const AViewType& A, const BViewType& B, const IPIVViewType& IPIV) { using Scalar = typename AViewType::non_const_value_type; const bool with_pivot = !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); @@ -65,88 +62,65 @@ void lapackGesvWrapper(const AViewType& A, const BViewType& B, if constexpr (Kokkos::ArithTraits::is_complex) { using MagType = typename Kokkos::ArithTraits::mag_type; - HostLapack>::gesv( - N, NRHS, reinterpret_cast*>(A.data()), LDA, - IPIV.data(), reinterpret_cast*>(B.data()), LDB, - info); + HostLapack>::gesv(N, NRHS, reinterpret_cast*>(A.data()), LDA, + IPIV.data(), reinterpret_cast*>(B.data()), LDB, + info); } else { - HostLapack::gesv(N, NRHS, A.data(), LDA, IPIV.data(), B.data(), - LDB, info); + HostLapack::gesv(N, NRHS, A.data(), LDA, IPIV.data(), B.data(), LDB, info); } } } -#define KOKKOSLAPACK_GESV_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE) \ - template <> \ - struct GESV< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, \ - gesv_eti_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using AViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using BViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using PViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void gesv(const EXECSPACE& /* space */, const AViewType& A, \ - const BViewType& B, const PViewType& IPIV) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_LAPACK," #SCALAR \ - "]"); \ - gesv_print_specialization(); \ - lapackGesvWrapper(A, B, IPIV); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSLAPACK_GESV_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE) \ + template <> \ + struct GESV< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + gesv_eti_spec_avail, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using BViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using PViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + \ + static void gesv(const EXECSPACE& /* space */, const AViewType& A, const BViewType& B, const PViewType& IPIV) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_LAPACK," #SCALAR "]"); \ + gesv_print_specialization(); \ + lapackGesvWrapper(A, B, IPIV); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #if defined(KOKKOS_ENABLE_SERIAL) -KOKKOSLAPACK_GESV_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSLAPACK_GESV_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) -KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial, Kokkos::HostSpace) -KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) #endif #if defined(KOKKOS_ENABLE_OPENMP) -KOKKOSLAPACK_GESV_LAPACK(float, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSLAPACK_GESV_LAPACK(double, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) -KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP, Kokkos::HostSpace) -KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(float, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(double, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) #endif #if defined(KOKKOS_ENABLE_THREADS) -KOKKOSLAPACK_GESV_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Threads, - Kokkos::HostSpace) -KOKKOSLAPACK_GESV_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Threads, - Kokkos::HostSpace) -KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Threads, Kokkos::HostSpace) -KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Threads, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) #endif } // namespace Impl @@ -161,12 +135,10 @@ namespace KokkosLapack { namespace Impl { template -void magmaGesvWrapper(const ExecSpace& space, const AViewType& A, - const BViewType& B, const IPIVViewType& IPIV) { +void magmaGesvWrapper(const ExecSpace& space, const AViewType& A, const BViewType& B, const IPIVViewType& IPIV) { using scalar_type = typename AViewType::non_const_value_type; - Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_MAGMA," + - Kokkos::ArithTraits::name() + "]"); + Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_MAGMA," + Kokkos::ArithTraits::name() + "]"); gesv_print_specialization(); const bool with_pivot = !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); @@ -178,112 +150,88 @@ void magmaGesvWrapper(const ExecSpace& space, const AViewType& A, magma_int_t LDB = (BST == 0) ? 1 : BST; magma_int_t NRHS = static_cast(B.extent(1)); - KokkosLapack::Impl::MagmaSingleton& s = - KokkosLapack::Impl::MagmaSingleton::singleton(); - magma_int_t info = 0; + KokkosLapack::Impl::MagmaSingleton& s = KokkosLapack::Impl::MagmaSingleton::singleton(); + magma_int_t info = 0; space.fence(); if constexpr (std::is_same_v) { if (with_pivot) { - magma_sgesv_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, - IPIV.data(), reinterpret_cast(B.data()), - LDB, &info); + magma_sgesv_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, IPIV.data(), + reinterpret_cast(B.data()), LDB, &info); } else { - magma_sgesv_nopiv_gpu(N, NRHS, reinterpret_cast(A.data()), - LDA, reinterpret_cast(B.data()), - LDB, &info); + magma_sgesv_nopiv_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, + reinterpret_cast(B.data()), LDB, &info); } } if constexpr (std::is_same_v) { if (with_pivot) { - magma_dgesv_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, - IPIV.data(), reinterpret_cast(B.data()), - LDB, &info); + magma_dgesv_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, IPIV.data(), + reinterpret_cast(B.data()), LDB, &info); } else { - magma_dgesv_nopiv_gpu( - N, NRHS, reinterpret_cast(A.data()), LDA, - reinterpret_cast(B.data()), LDB, &info); + magma_dgesv_nopiv_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, + reinterpret_cast(B.data()), LDB, &info); } } if constexpr (std::is_same_v>) { if (with_pivot) { - magma_cgesv_gpu( - N, NRHS, reinterpret_cast(A.data()), LDA, - IPIV.data(), reinterpret_cast(B.data()), LDB, - &info); + magma_cgesv_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, IPIV.data(), + reinterpret_cast(B.data()), LDB, &info); } else { - magma_cgesv_nopiv_gpu( - N, NRHS, reinterpret_cast(A.data()), LDA, - reinterpret_cast(B.data()), LDB, &info); + magma_cgesv_nopiv_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, + reinterpret_cast(B.data()), LDB, &info); } } if constexpr (std::is_same_v>) { if (with_pivot) { - magma_zgesv_gpu( - N, NRHS, reinterpret_cast(A.data()), LDA, - IPIV.data(), reinterpret_cast(B.data()), LDB, - &info); + magma_zgesv_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, IPIV.data(), + reinterpret_cast(B.data()), LDB, &info); } else { - magma_zgesv_nopiv_gpu( - N, NRHS, reinterpret_cast(A.data()), LDA, - reinterpret_cast(B.data()), LDB, &info); + magma_zgesv_nopiv_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, + reinterpret_cast(B.data()), LDB, &info); } } ExecSpace().fence(); Kokkos::Profiling::popRegion(); } -#define KOKKOSLAPACK_GESV_MAGMA(SCALAR, LAYOUT, MEM_SPACE) \ - template <> \ - struct GESV< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, \ - gesv_eti_spec_avail< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using AViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using BViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using PViewType = Kokkos::View< \ - magma_int_t*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - \ - static void gesv(const Kokkos::Cuda& space, const AViewType& A, \ - const BViewType& B, const PViewType& IPIV) { \ - magmaGesvWrapper(space, A, B, IPIV); \ - } \ +#define KOKKOSLAPACK_GESV_MAGMA(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GESV, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + gesv_eti_spec_avail, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using BViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using PViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void gesv(const Kokkos::Cuda& space, const AViewType& A, const BViewType& B, const PViewType& IPIV) { \ + magmaGesvWrapper(space, A, B, IPIV); \ + } \ }; KOKKOSLAPACK_GESV_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSLAPACK_GESV_MAGMA(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) } // namespace Impl } // namespace KokkosLapack @@ -296,10 +244,9 @@ KOKKOSLAPACK_GESV_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { -template -void cusolverGesvWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV, - const AViewType& A, const BViewType& B) { +template +void cusolverGesvWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV, const AViewType& A, + const BViewType& B) { using memory_space = typename AViewType::memory_space; using Scalar = typename BViewType::non_const_value_type; using ALayout_t = typename AViewType::array_layout; @@ -307,137 +254,109 @@ void cusolverGesvWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV, const int m = A.extent_int(0); const int n = A.extent_int(1); - const int lda = std::is_same_v ? A.stride(0) - : A.stride(1); + const int lda = std::is_same_v ? A.stride(0) : A.stride(1); (void)B; const int nrhs = B.extent_int(1); - const int ldb = std::is_same_v ? B.stride(0) - : B.stride(1); - int lwork = 0; + const int ldb = std::is_same_v ? B.stride(0) : B.stride(1); + int lwork = 0; Kokkos::View info("getrf info"); CudaLapackSingleton& s = CudaLapackSingleton::singleton(); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnSetStream(s.handle, space.cuda_stream())); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, space.cuda_stream())); if constexpr (std::is_same_v) { - KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnSgetrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgetrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); Kokkos::View Workspace("getrf workspace", lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgetrf(s.handle, m, n, A.data(), - lda, Workspace.data(), - IPIV.data(), info.data())); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSgetrf(s.handle, m, n, A.data(), lda, Workspace.data(), IPIV.data(), info.data())); KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnSgetrs(s.handle, CUBLAS_OP_N, m, nrhs, A.data(), lda, - IPIV.data(), B.data(), ldb, info.data())); + cusolverDnSgetrs(s.handle, CUBLAS_OP_N, m, nrhs, A.data(), lda, IPIV.data(), B.data(), ldb, info.data())); } if constexpr (std::is_same_v) { - KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnDgetrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgetrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); Kokkos::View Workspace("getrf workspace", lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgetrf(s.handle, m, n, A.data(), - lda, Workspace.data(), - IPIV.data(), info.data())); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnDgetrf(s.handle, m, n, A.data(), lda, Workspace.data(), IPIV.data(), info.data())); KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnDgetrs(s.handle, CUBLAS_OP_N, m, nrhs, A.data(), lda, - IPIV.data(), B.data(), ldb, info.data())); + cusolverDnDgetrs(s.handle, CUBLAS_OP_N, m, nrhs, A.data(), lda, IPIV.data(), B.data(), ldb, info.data())); } if constexpr (std::is_same_v>) { - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgetrf_bufferSize( - s.handle, m, n, reinterpret_cast(A.data()), lda, &lwork)); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnCgetrf_bufferSize(s.handle, m, n, reinterpret_cast(A.data()), lda, &lwork)); Kokkos::View Workspace("getrf workspace", lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnCgetrf(s.handle, m, n, reinterpret_cast(A.data()), - lda, reinterpret_cast(Workspace.data()), - IPIV.data(), info.data())); - - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgetrs( - s.handle, CUBLAS_OP_N, m, nrhs, reinterpret_cast(A.data()), - lda, IPIV.data(), reinterpret_cast(B.data()), ldb, - info.data())); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgetrf(s.handle, m, n, reinterpret_cast(A.data()), lda, + reinterpret_cast(Workspace.data()), IPIV.data(), + info.data())); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgetrs(s.handle, CUBLAS_OP_N, m, nrhs, + reinterpret_cast(A.data()), lda, IPIV.data(), + reinterpret_cast(B.data()), ldb, info.data())); } if constexpr (std::is_same_v>) { - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrf_bufferSize( - s.handle, m, n, reinterpret_cast(A.data()), lda, - &lwork)); - Kokkos::View Workspace("getrf workspace", - lwork); - - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrf( - s.handle, m, n, reinterpret_cast(A.data()), lda, - reinterpret_cast(Workspace.data()), IPIV.data(), - info.data())); - - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrs( - s.handle, CUBLAS_OP_N, m, nrhs, - reinterpret_cast(A.data()), lda, IPIV.data(), - reinterpret_cast(B.data()), ldb, info.data())); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnZgetrf_bufferSize(s.handle, m, n, reinterpret_cast(A.data()), lda, &lwork)); + Kokkos::View Workspace("getrf workspace", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrf(s.handle, m, n, reinterpret_cast(A.data()), lda, + reinterpret_cast(Workspace.data()), IPIV.data(), + info.data())); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrs(s.handle, CUBLAS_OP_N, m, nrhs, + reinterpret_cast(A.data()), lda, IPIV.data(), + reinterpret_cast(B.data()), ldb, info.data())); } KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL)); } -#define KOKKOSLAPACK_GESV_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ - template <> \ - struct GESV< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, \ - gesv_eti_spec_avail< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using AViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using BViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using PViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void gesv(const Kokkos::Cuda& space, const AViewType& A, \ - const BViewType& B, const PViewType& IPIV) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_CUSOLVER," #SCALAR \ - "]"); \ - gesv_print_specialization(); \ - \ - cusolverGesvWrapper(space, IPIV, A, B); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSLAPACK_GESV_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GESV< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, \ + gesv_eti_spec_avail, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using BViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using PViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + \ + static void gesv(const Kokkos::Cuda& space, const AViewType& A, const BViewType& B, const PViewType& IPIV) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_CUSOLVER," #SCALAR "]"); \ + gesv_print_specialization(); \ + \ + cusolverGesvWrapper(space, IPIV, A, B); \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSLAPACK_GESV_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSLAPACK_GESV_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) #if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) KOKKOSLAPACK_GESV_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) KOKKOSLAPACK_GESV_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSLAPACK_GESV_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSLAPACK_GESV_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GESV_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GESV_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) #endif } // namespace Impl @@ -452,103 +371,78 @@ KOKKOSLAPACK_GESV_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { -template -void rocsolverGesvWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV, - const AViewType& A, const BViewType& B) { +template +void rocsolverGesvWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV, const AViewType& A, + const BViewType& B) { using Scalar = typename BViewType::non_const_value_type; using ALayout_t = typename AViewType::array_layout; using BLayout_t = typename BViewType::array_layout; const rocblas_int N = static_cast(A.extent(0)); const rocblas_int nrhs = static_cast(B.extent(1)); - const rocblas_int lda = std::is_same_v - ? A.stride(0) - : A.stride(1); - const rocblas_int ldb = std::is_same_v - ? B.stride(0) - : B.stride(1); + const rocblas_int lda = std::is_same_v ? A.stride(0) : A.stride(1); + const rocblas_int ldb = std::is_same_v ? B.stride(0) : B.stride(1); Kokkos::View info("rocsolver info"); - KokkosBlas::Impl::RocBlasSingleton& s = - KokkosBlas::Impl::RocBlasSingleton::singleton(); - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( - rocblas_set_stream(s.handle, space.hip_stream())); + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); if constexpr (std::is_same_v) { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_sgesv(s.handle, N, nrhs, A.data(), - lda, IPIV.data(), B.data(), - ldb, info.data())); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocsolver_sgesv(s.handle, N, nrhs, A.data(), lda, IPIV.data(), B.data(), ldb, info.data())); } if constexpr (std::is_same_v) { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_dgesv(s.handle, N, nrhs, A.data(), - lda, IPIV.data(), B.data(), - ldb, info.data())); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocsolver_dgesv(s.handle, N, nrhs, A.data(), lda, IPIV.data(), B.data(), ldb, info.data())); } if constexpr (std::is_same_v>) { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgesv( - s.handle, N, nrhs, reinterpret_cast(A.data()), - lda, IPIV.data(), reinterpret_cast(B.data()), - ldb, info.data())); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgesv(s.handle, N, nrhs, reinterpret_cast(A.data()), + lda, IPIV.data(), reinterpret_cast(B.data()), + ldb, info.data())); } if constexpr (std::is_same_v>) { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgesv( - s.handle, N, nrhs, reinterpret_cast(A.data()), - lda, IPIV.data(), reinterpret_cast(B.data()), - ldb, info.data())); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocsolver_zgesv(s.handle, N, nrhs, reinterpret_cast(A.data()), lda, IPIV.data(), + reinterpret_cast(B.data()), ldb, info.data())); } KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); } -#define KOKKOSLAPACK_GESV_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ - template <> \ - struct GESV< \ - Kokkos::HIP, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, \ - gesv_eti_spec_avail< \ - Kokkos::HIP, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using AViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using BViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using PViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void gesv(const Kokkos::HIP& space, const AViewType& A, \ - const BViewType& B, const PViewType& IPIV) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosLapack::gesv[TPL_ROCSOLVER," #SCALAR "]"); \ - gesv_print_specialization(); \ - \ - rocsolverGesvWrapper(space, IPIV, A, B); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSLAPACK_GESV_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GESV< \ + Kokkos::HIP, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + gesv_eti_spec_avail, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using BViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using PViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void gesv(const Kokkos::HIP& space, const AViewType& A, const BViewType& B, const PViewType& IPIV) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_ROCSOLVER," #SCALAR "]"); \ + gesv_print_specialization(); \ + \ + rocsolverGesvWrapper(space, IPIV, A, B); \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSLAPACK_GESV_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) KOKKOSLAPACK_GESV_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSLAPACK_GESV_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HIPSpace) -KOKKOSLAPACK_GESV_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HIPSpace) +KOKKOSLAPACK_GESV_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GESV_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) } // namespace Impl } // namespace KokkosLapack diff --git a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp index 7a7403209fa5..cc1ad12b9651 100644 --- a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp @@ -20,148 +20,104 @@ namespace KokkosLapack { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct svd_tpl_spec_avail { enum : bool { value = false }; }; // LAPACK -#if defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) || \ - defined(KOKKOSKERNELS_ENABLE_TPL_MKL) -#define KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUT, EXECSPACE) \ - template <> \ - struct svd_tpl_spec_avail< \ - EXECSPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#if defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) || defined(KOKKOSKERNELS_ENABLE_TPL_MKL) +#define KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUT, EXECSPACE) \ + template <> \ + struct svd_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; #if defined(KOKKOS_ENABLE_SERIAL) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, - Kokkos::Serial) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, - Kokkos::Serial) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Serial) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial) #endif #if defined(KOKKOS_ENABLE_OPENMP) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, - Kokkos::OpenMP) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, - Kokkos::OpenMP) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::OpenMP) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP) #endif #if defined(KOKKOS_ENABLE_THREADS) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, - Kokkos::Threads) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, - Kokkos::Threads) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Threads) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Threads) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Threads) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Threads) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Threads) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Threads) #endif #endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK || KOKKOSKERNELS_ENABLE_TPL_MKL // CUSOLVER #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER -#define KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUT, MEMSPACE) \ - template <> \ - struct svd_tpl_spec_avail< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct svd_tpl_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) #if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) #endif // CUDAUVMSPACE #endif // CUSOLVER // ROCSOLVER #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER -#define KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(SCALAR, LAYOUT, MEMSPACE) \ - template <> \ - struct svd_tpl_spec_avail< \ - Kokkos::HIP, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct svd_tpl_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, - Kokkos::HIPSpace) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, - Kokkos::HIPSpace) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) #if defined(KOKKOSKERNELS_INST_MEMSPACE_HIPMANAGEDSPACE) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, - Kokkos::HIPManagedSpace) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, - Kokkos::HIPManagedSpace) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, - Kokkos::HIPManagedSpace) -KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, - Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPManagedSpace) #endif // HIPMANAGEDSPACE #endif // ROCSOLVER diff --git a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp index 4385fa40d636..01255bf427cd 100644 --- a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp @@ -22,8 +22,7 @@ namespace KokkosLapack { namespace Impl { -template +template inline void svd_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER @@ -31,8 +30,7 @@ inline void svd_print_specialization() { printf( "KokkosLapack::svd<> TPL Cusolver specialization for < %s , %s, %s, %s " ">\n", - typeid(AMatrix).name(), typeid(SVector).name(), typeid(UMatrix).name(), - typeid(VMatrix).name()); + typeid(AMatrix).name(), typeid(SVector).name(), typeid(UMatrix).name(), typeid(VMatrix).name()); } #endif #endif @@ -41,18 +39,15 @@ inline void svd_print_specialization() { } // namespace KokkosLapack // LAPACK -#if defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ - !defined(KOKKOSKERNELS_ENABLE_TPL_MKL) +#if defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL) #include "KokkosLapack_Host_tpl.hpp" namespace KokkosLapack { namespace Impl { -template -void lapackSvdWrapper(const ExecutionSpace& /* space */, const char jobu[], - const char jobvt[], const AMatrix& A, const SVector& S, - const UMatrix& U, const VMatrix& Vt) { +template +void lapackSvdWrapper(const ExecutionSpace& /* space */, const char jobu[], const char jobvt[], const AMatrix& A, + const SVector& S, const UMatrix& U, const VMatrix& Vt) { using memory_space = typename AMatrix::memory_space; using Scalar = typename AMatrix::non_const_value_type; using Magnitude = typename SVector::non_const_value_type; @@ -74,128 +69,96 @@ void lapackSvdWrapper(const ExecutionSpace& /* space */, const char jobu[], const int ldvt = Vt.stride(1); int lwork = -1, info = 0; - Kokkos::View rwork("svd rwork buffer", - 5 * Kokkos::min(m, n)); + Kokkos::View rwork("svd rwork buffer", 5 * Kokkos::min(m, n)); Kokkos::View work("svd work buffer", 1); if constexpr (Kokkos::ArithTraits::is_complex) { HostLapack>::gesvd( - jobu[0], jobvt[0], m, n, - reinterpret_cast*>(A.data()), lda, S.data(), + jobu[0], jobvt[0], m, n, reinterpret_cast*>(A.data()), lda, S.data(), reinterpret_cast*>(U.data()), ldu, reinterpret_cast*>(Vt.data()), ldvt, - reinterpret_cast*>(work.data()), lwork, - rwork.data(), info); + reinterpret_cast*>(work.data()), lwork, rwork.data(), info); lwork = static_cast(work(0).real()); work = Kokkos::View("svd work buffer", lwork); HostLapack>::gesvd( - jobu[0], jobvt[0], m, n, - reinterpret_cast*>(A.data()), lda, S.data(), + jobu[0], jobvt[0], m, n, reinterpret_cast*>(A.data()), lda, S.data(), reinterpret_cast*>(U.data()), ldu, reinterpret_cast*>(Vt.data()), ldvt, - reinterpret_cast*>(work.data()), lwork, - rwork.data(), info); + reinterpret_cast*>(work.data()), lwork, rwork.data(), info); } else { - HostLapack::gesvd(jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), - U.data(), ldu, Vt.data(), ldvt, work.data(), - lwork, rwork.data(), info); + HostLapack::gesvd(jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), U.data(), ldu, Vt.data(), ldvt, + work.data(), lwork, rwork.data(), info); lwork = static_cast(work(0)); work = Kokkos::View("svd work buffer", lwork); - HostLapack::gesvd(jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), - U.data(), ldu, Vt.data(), ldvt, work.data(), - lwork, rwork.data(), info); + HostLapack::gesvd(jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), U.data(), ldu, Vt.data(), ldvt, + work.data(), lwork, rwork.data(), info); } } -#define KOKKOSLAPACK_SVD_LAPACK(SCALAR, LAYOUT, EXEC_SPACE) \ - template <> \ - struct SVD< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, \ - svd_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using AMatrix = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using SVector = \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using UMatrix = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using VMatrix = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void svd(const EXEC_SPACE& space, const char jobu[], \ - const char jobvt[], const AMatrix& A, const SVector& S, \ - const UMatrix& U, const VMatrix& Vt) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_LAPACK," #SCALAR \ - "]"); \ - svd_print_specialization(); \ - \ - lapackSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSLAPACK_SVD_LAPACK(SCALAR, LAYOUT, EXEC_SPACE) \ + template <> \ + struct SVD, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + svd_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using SVector = \ + Kokkos::View::mag_type*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using UMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using VMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void svd(const EXEC_SPACE& space, const char jobu[], const char jobvt[], const AMatrix& A, \ + const SVector& S, const UMatrix& U, const VMatrix& Vt) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_LAPACK," #SCALAR "]"); \ + svd_print_specialization(); \ + \ + lapackSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #if defined(KOKKOS_ENABLE_SERIAL) KOKKOSLAPACK_SVD_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Serial) KOKKOSLAPACK_SVD_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Serial) -KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial) -KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial) #endif #if defined(KOKKOS_ENABLE_OPENMP) KOKKOSLAPACK_SVD_LAPACK(float, Kokkos::LayoutLeft, Kokkos::OpenMP) KOKKOSLAPACK_SVD_LAPACK(double, Kokkos::LayoutLeft, Kokkos::OpenMP) -KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP) -KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP) #endif #if defined(KOKKOS_ENABLE_THREADS) KOKKOSLAPACK_SVD_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Threads) KOKKOSLAPACK_SVD_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Threads) -KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Threads) -KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Threads) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Threads) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Threads) #endif } // namespace Impl @@ -208,11 +171,9 @@ KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { -template -void mklSvdWrapper(const ExecutionSpace& /* space */, const char jobu[], - const char jobvt[], const AMatrix& A, const SVector& S, - const UMatrix& U, const VMatrix& Vt) { +template +void mklSvdWrapper(const ExecutionSpace& /* space */, const char jobu[], const char jobvt[], const AMatrix& A, + const SVector& S, const UMatrix& U, const VMatrix& Vt) { using memory_space = typename AMatrix::memory_space; using Scalar = typename AMatrix::non_const_value_type; using Magnitude = typename SVector::non_const_value_type; @@ -233,33 +194,25 @@ void mklSvdWrapper(const ExecutionSpace& /* space */, const char jobu[], const lapack_int ldu = U.stride(1); const lapack_int ldvt = Vt.stride(1); - Kokkos::View rwork("svd rwork buffer", - Kokkos::min(m, n) - 1); + Kokkos::View rwork("svd rwork buffer", Kokkos::min(m, n) - 1); lapack_int ret = 0; if constexpr (std::is_same_v) { - ret = - LAPACKE_sgesvd(LAPACK_COL_MAJOR, jobu[0], jobvt[0], m, n, A.data(), lda, - S.data(), U.data(), ldu, Vt.data(), ldvt, rwork.data()); + ret = LAPACKE_sgesvd(LAPACK_COL_MAJOR, jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), U.data(), ldu, Vt.data(), + ldvt, rwork.data()); } if constexpr (std::is_same_v) { - ret = - LAPACKE_dgesvd(LAPACK_COL_MAJOR, jobu[0], jobvt[0], m, n, A.data(), lda, - S.data(), U.data(), ldu, Vt.data(), ldvt, rwork.data()); + ret = LAPACKE_dgesvd(LAPACK_COL_MAJOR, jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), U.data(), ldu, Vt.data(), + ldvt, rwork.data()); } if constexpr (std::is_same_v>) { - ret = LAPACKE_cgesvd( - LAPACK_COL_MAJOR, jobu[0], jobvt[0], m, n, - reinterpret_cast(A.data()), lda, S.data(), - reinterpret_cast(U.data()), ldu, - reinterpret_cast(Vt.data()), ldvt, rwork.data()); + ret = LAPACKE_cgesvd(LAPACK_COL_MAJOR, jobu[0], jobvt[0], m, n, reinterpret_cast(A.data()), + lda, S.data(), reinterpret_cast(U.data()), ldu, + reinterpret_cast(Vt.data()), ldvt, rwork.data()); } if constexpr (std::is_same_v>) { - ret = LAPACKE_zgesvd( - LAPACK_COL_MAJOR, jobu[0], jobvt[0], m, n, - reinterpret_cast(A.data()), lda, S.data(), - reinterpret_cast(U.data()), ldu, - reinterpret_cast(Vt.data()), ldvt, - rwork.data()); + ret = LAPACKE_zgesvd(LAPACK_COL_MAJOR, jobu[0], jobvt[0], m, n, reinterpret_cast(A.data()), + lda, S.data(), reinterpret_cast(U.data()), ldu, + reinterpret_cast(Vt.data()), ldvt, rwork.data()); } if (ret != 0) { @@ -269,90 +222,67 @@ void mklSvdWrapper(const ExecutionSpace& /* space */, const char jobu[], } } -#define KOKKOSLAPACK_SVD_MKL(SCALAR, LAYOUT, EXEC_SPACE) \ - template <> \ - struct SVD< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, \ - svd_eti_spec_avail< \ - EXEC_SPACE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using AMatrix = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using SVector = \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using UMatrix = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using VMatrix = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void svd(const EXEC_SPACE& space, const char jobu[], \ - const char jobvt[], const AMatrix& A, const SVector& S, \ - const UMatrix& U, const VMatrix& Vt) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_LAPACK," #SCALAR \ - "]"); \ - svd_print_specialization(); \ - \ - mklSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSLAPACK_SVD_MKL(SCALAR, LAYOUT, EXEC_SPACE) \ + template <> \ + struct SVD, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + svd_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using SVector = \ + Kokkos::View::mag_type*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using UMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using VMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void svd(const EXEC_SPACE& space, const char jobu[], const char jobvt[], const AMatrix& A, \ + const SVector& S, const UMatrix& U, const VMatrix& Vt) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_LAPACK," #SCALAR "]"); \ + svd_print_specialization(); \ + \ + mklSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #if defined(KOKKOS_ENABLE_SERIAL) KOKKOSLAPACK_SVD_MKL(float, Kokkos::LayoutLeft, Kokkos::Serial) KOKKOSLAPACK_SVD_MKL(double, Kokkos::LayoutLeft, Kokkos::Serial) KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial) -KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial) +KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial) #endif #if defined(KOKKOS_ENABLE_OPENMP) KOKKOSLAPACK_SVD_MKL(float, Kokkos::LayoutLeft, Kokkos::OpenMP) KOKKOSLAPACK_SVD_MKL(double, Kokkos::LayoutLeft, Kokkos::OpenMP) KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP) -KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP) +KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP) #endif #if defined(KOKKOS_ENABLE_THREADS) KOKKOSLAPACK_SVD_MKL(float, Kokkos::LayoutLeft, Kokkos::Threads) KOKKOSLAPACK_SVD_MKL(double, Kokkos::LayoutLeft, Kokkos::Threads) -KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Threads) -KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Threads) +KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Threads) +KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Threads) #endif } // namespace Impl @@ -366,11 +296,9 @@ KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { -template -void cusolverSvdWrapper(const ExecutionSpace& space, const char jobu[], - const char jobvt[], const AMatrix& A, const SVector& S, - const UMatrix& U, const VMatrix& Vt) { +template +void cusolverSvdWrapper(const ExecutionSpace& space, const char jobu[], const char jobvt[], const AMatrix& A, + const SVector& S, const UMatrix& U, const VMatrix& Vt) { using memory_space = typename AMatrix::memory_space; using Scalar = typename AMatrix::non_const_value_type; using Magnitude = typename SVector::non_const_value_type; @@ -393,128 +321,98 @@ void cusolverSvdWrapper(const ExecutionSpace& space, const char jobu[], int lwork = 0; Kokkos::View info("svd info"); - Kokkos::View rwork("svd rwork buffer", - Kokkos::min(m, n) - 1); + Kokkos::View rwork("svd rwork buffer", Kokkos::min(m, n) - 1); CudaLapackSingleton& s = CudaLapackSingleton::singleton(); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnSetStream(s.handle, space.cuda_stream())); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, space.cuda_stream())); if constexpr (std::is_same_v) { - KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnSgesvd_bufferSize(s.handle, m, n, &lwork)); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgesvd_bufferSize(s.handle, m, n, &lwork)); Kokkos::View work("svd work buffer", lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgesvd( - s.handle, jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), U.data(), - ldu, Vt.data(), ldvt, work.data(), lwork, rwork.data(), info.data())); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgesvd(s.handle, jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), + U.data(), ldu, Vt.data(), ldvt, work.data(), lwork, rwork.data(), + info.data())); } if constexpr (std::is_same_v) { - KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnDgesvd_bufferSize(s.handle, m, n, &lwork)); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgesvd_bufferSize(s.handle, m, n, &lwork)); Kokkos::View work("svd work buffer", lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgesvd( - s.handle, jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), U.data(), - ldu, Vt.data(), ldvt, work.data(), lwork, rwork.data(), info.data())); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgesvd(s.handle, jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), + U.data(), ldu, Vt.data(), ldvt, work.data(), lwork, rwork.data(), + info.data())); } if constexpr (std::is_same_v>) { - KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnCgesvd_bufferSize(s.handle, m, n, &lwork)); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgesvd_bufferSize(s.handle, m, n, &lwork)); Kokkos::View work("svd work buffer", lwork); KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnCgesvd(s.handle, jobu[0], jobvt[0], m, n, - reinterpret_cast(A.data()), lda, S.data(), - reinterpret_cast(U.data()), ldu, - reinterpret_cast(Vt.data()), ldvt, - reinterpret_cast(work.data()), lwork, - rwork.data(), info.data())); + cusolverDnCgesvd(s.handle, jobu[0], jobvt[0], m, n, reinterpret_cast(A.data()), lda, S.data(), + reinterpret_cast(U.data()), ldu, reinterpret_cast(Vt.data()), ldvt, + reinterpret_cast(work.data()), lwork, rwork.data(), info.data())); } if constexpr (std::is_same_v>) { - KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnZgesvd_bufferSize(s.handle, m, n, &lwork)); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgesvd_bufferSize(s.handle, m, n, &lwork)); Kokkos::View work("svd work buffer", lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnZgesvd(s.handle, jobu[0], jobvt[0], m, n, - reinterpret_cast(A.data()), lda, - S.data(), reinterpret_cast(U.data()), - ldu, reinterpret_cast(Vt.data()), - ldvt, reinterpret_cast(work.data()), - lwork, rwork.data(), info.data())); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgesvd( + s.handle, jobu[0], jobvt[0], m, n, reinterpret_cast(A.data()), lda, S.data(), + reinterpret_cast(U.data()), ldu, reinterpret_cast(Vt.data()), ldvt, + reinterpret_cast(work.data()), lwork, rwork.data(), info.data())); } KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL)); } -#define KOKKOSLAPACK_SVD_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ - template <> \ - struct SVD< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, \ - svd_eti_spec_avail< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using AMatrix = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using SVector = \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using UMatrix = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using VMatrix = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void svd(const Kokkos::Cuda& space, const char jobu[], \ - const char jobvt[], const AMatrix& A, const SVector& S, \ - const UMatrix& U, const VMatrix& Vt) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_CUSOLVER," #SCALAR \ - "]"); \ - svd_print_specialization(); \ - \ - cusolverSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSLAPACK_SVD_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct SVD, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + svd_eti_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using SVector = Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, Kokkos::MemoryTraits>; \ + using UMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using VMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void svd(const Kokkos::Cuda& space, const char jobu[], const char jobvt[], const AMatrix& A, \ + const SVector& S, const UMatrix& U, const VMatrix& Vt) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_CUSOLVER," #SCALAR "]"); \ + svd_print_specialization(); \ + \ + cusolverSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSLAPACK_SVD_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSLAPACK_SVD_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) #if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) KOKKOSLAPACK_SVD_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) KOKKOSLAPACK_SVD_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) #endif } // namespace Impl @@ -529,11 +427,9 @@ KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { -template -void rocsolverSvdWrapper(const ExecutionSpace& space, const char jobu[], - const char jobvt[], const AMatrix& A, const SVector& S, - const UMatrix& U, const VMatrix& Vt) { +template +void rocsolverSvdWrapper(const ExecutionSpace& space, const char jobu[], const char jobvt[], const AMatrix& A, + const SVector& S, const UMatrix& U, const VMatrix& Vt) { using memory_space = typename AMatrix::memory_space; using Scalar = typename AMatrix::non_const_value_type; using Magnitude = typename SVector::non_const_value_type; @@ -574,111 +470,84 @@ void rocsolverSvdWrapper(const ExecutionSpace& space, const char jobu[], const rocblas_workmode WorkMode = rocblas_outofplace; Kokkos::View info("svd info"); - Kokkos::View rwork("svd rwork buffer", - Kokkos::min(m, n) - 1); + Kokkos::View rwork("svd rwork buffer", Kokkos::min(m, n) - 1); - KokkosBlas::Impl::RocBlasSingleton& s = - KokkosBlas::Impl::RocBlasSingleton::singleton(); - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( - rocblas_set_stream(s.handle, space.hip_stream())); + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, space.hip_stream())); if constexpr (std::is_same_v) { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_sgesvd( - s.handle, UVecMode, VVecMode, m, n, A.data(), lda, S.data(), U.data(), - ldu, Vt.data(), ldvt, rwork.data(), WorkMode, info.data())); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_sgesvd(s.handle, UVecMode, VVecMode, m, n, A.data(), lda, S.data(), + U.data(), ldu, Vt.data(), ldvt, rwork.data(), WorkMode, + info.data())); } if constexpr (std::is_same_v) { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_dgesvd( - s.handle, UVecMode, VVecMode, m, n, A.data(), lda, S.data(), U.data(), - ldu, Vt.data(), ldvt, rwork.data(), WorkMode, info.data())); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_dgesvd(s.handle, UVecMode, VVecMode, m, n, A.data(), lda, S.data(), + U.data(), ldu, Vt.data(), ldvt, rwork.data(), WorkMode, + info.data())); } if constexpr (std::is_same_v>) { KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgesvd( - s.handle, UVecMode, VVecMode, m, n, - reinterpret_cast(A.data()), lda, S.data(), - reinterpret_cast(U.data()), ldu, - reinterpret_cast(Vt.data()), ldvt, rwork.data(), - WorkMode, info.data())); + s.handle, UVecMode, VVecMode, m, n, reinterpret_cast(A.data()), lda, S.data(), + reinterpret_cast(U.data()), ldu, reinterpret_cast(Vt.data()), + ldvt, rwork.data(), WorkMode, info.data())); } if constexpr (std::is_same_v>) { KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgesvd( - s.handle, UVecMode, VVecMode, m, n, - reinterpret_cast(A.data()), lda, S.data(), - reinterpret_cast(U.data()), ldu, - reinterpret_cast(Vt.data()), ldvt, - rwork.data(), WorkMode, info.data())); + s.handle, UVecMode, VVecMode, m, n, reinterpret_cast(A.data()), lda, S.data(), + reinterpret_cast(U.data()), ldu, reinterpret_cast(Vt.data()), + ldvt, rwork.data(), WorkMode, info.data())); } KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); } -#define KOKKOSLAPACK_SVD_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ - template <> \ - struct SVD< \ - Kokkos::HIP, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, \ - svd_eti_spec_avail< \ - Kokkos::HIP, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using AMatrix = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using SVector = \ - Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using UMatrix = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using VMatrix = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void svd(const Kokkos::HIP& space, const char jobu[], \ - const char jobvt[], const AMatrix& A, const SVector& S, \ - const UMatrix& U, const VMatrix& Vt) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_ROCSOLVER," #SCALAR \ - "]"); \ - svd_print_specialization(); \ - \ - rocsolverSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSLAPACK_SVD_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct SVD< \ + Kokkos::HIP, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, \ + svd_eti_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using SVector = Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, Kokkos::MemoryTraits>; \ + using UMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using VMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void svd(const Kokkos::HIP& space, const char jobu[], const char jobvt[], const AMatrix& A, \ + const SVector& S, const UMatrix& U, const VMatrix& Vt) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_ROCSOLVER," #SCALAR "]"); \ + svd_print_specialization(); \ + \ + rocsolverSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSLAPACK_SVD_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) KOKKOSLAPACK_SVD_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HIPSpace) -KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) #if defined(KOKKOSKERNELS_INST_MEMSPACE_HIPMANAGEDSPACE) KOKKOSLAPACK_SVD_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPManagedSpace) KOKKOSLAPACK_SVD_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPManagedSpace) -KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HIPManagedSpace) -KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPManagedSpace) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp index 7251d97086e2..6ec8d26a98bd 100644 --- a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp @@ -27,14 +27,13 @@ struct trtri_tpl_spec_avail { }; // Generic Host side LAPACK (could be MKL or whatever) -#define KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL(SCALAR, LAYOUTA, MEMSPACE) \ - template \ - struct trtri_tpl_spec_avail< \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL(SCALAR, LAYOUTA, MEMSPACE) \ + template \ + struct trtri_tpl_spec_avail< \ + Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK @@ -51,80 +50,52 @@ struct trtri_tpl_spec_avail { #define KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(SCALAR, LAYOUTA, MEMSPACE) #endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, - Kokkos::HostSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, Kokkos::HostSpace) #ifdef KOKKOS_ENABLE_CUDA -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) #endif -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, - Kokkos::HostSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, Kokkos::HostSpace) #ifdef KOKKOS_ENABLE_CUDA -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) #endif -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) #ifdef KOKKOS_ENABLE_CUDA -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) #endif -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) #ifdef KOKKOS_ENABLE_CUDA -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) #endif -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutRight, - Kokkos::HostSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutRight, Kokkos::HostSpace) #ifdef KOKKOS_ENABLE_CUDA -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutRight, - Kokkos::CudaSpace) -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) #endif -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutRight, - Kokkos::HostSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutRight, Kokkos::HostSpace) #ifdef KOKKOS_ENABLE_CUDA -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutRight, - Kokkos::CudaSpace) -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) #endif -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HostSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HostSpace) #ifdef KOKKOS_ENABLE_CUDA -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) #endif -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HostSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HostSpace) #ifdef KOKKOS_ENABLE_CUDA -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp index b7e9c6e341e7..b326e722a0f4 100644 --- a/packages/kokkos-kernels/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp @@ -27,112 +27,86 @@ namespace KokkosLapack { namespace Impl { #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK -#define KOKKOSLAPACK_TRTRI_LAPACK_HOST(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, \ - MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct TRTRI >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef SCALAR_TYPE SCALAR; \ - typedef Kokkos::View > \ - RViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void trtri(const RViewType& R, const char uplo[], \ - const char diag[], const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosLapack::trtri[TPL_LAPACK," #SCALAR_TYPE "]"); \ - const int M = static_cast(A.extent(0)); \ - \ - bool A_is_layout_left = \ - std::is_same::value; \ - \ - const int AST = A_is_layout_left ? A.stride(1) : A.stride(0), \ - LDA = (AST == 0) ? 1 : AST; \ - \ - char uplo_; \ - \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = A_is_layout_left ? 'L' : 'U'; \ - else \ - uplo_ = A_is_layout_left ? 'U' : 'L'; \ - \ - R() = HostLapack::trtri( \ - uplo_, diag[0], M, \ - reinterpret_cast(A.data()), LDA); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSLAPACK_TRTRI_LAPACK_HOST(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct TRTRI >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef SCALAR_TYPE SCALAR; \ + typedef Kokkos::View > \ + RViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void trtri(const RViewType& R, const char uplo[], const char diag[], const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::trtri[TPL_LAPACK," #SCALAR_TYPE "]"); \ + const int M = static_cast(A.extent(0)); \ + \ + bool A_is_layout_left = std::is_same::value; \ + \ + const int AST = A_is_layout_left ? A.stride(1) : A.stride(0), LDA = (AST == 0) ? 1 : AST; \ + \ + char uplo_; \ + \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = A_is_layout_left ? 'L' : 'U'; \ + else \ + uplo_ = A_is_layout_left ? 'U' : 'L'; \ + \ + R() = HostLapack::trtri(uplo_, diag[0], M, \ + reinterpret_cast(A.data()), LDA); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #else -#define KOKKOSLAPACK_TRTRI_LAPACK_HOST(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, \ - MEM_SPACE, ETI_SPEC_AVAIL) +#define KOKKOSLAPACK_TRTRI_LAPACK_HOST(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, MEM_SPACE, ETI_SPEC_AVAIL) #endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA -#define KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(SCALAR_TYPE, BASE_SCALAR_TYPE, \ - MAGMA_FN, LAYOUTA, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct TRTRI >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef SCALAR_TYPE SCALAR; \ - typedef Kokkos::View > \ - RViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void trtri(const RViewType& R, const char uplo[], \ - const char diag[], const AViewType& A) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosLapack::trtri[TPL_LAPACK," #SCALAR_TYPE "]"); \ - magma_int_t M = static_cast(A.extent(0)); \ - \ - bool A_is_layout_left = \ - std::is_same::value; \ - \ - magma_int_t AST = A_is_layout_left ? A.stride(1) : A.stride(0), \ - LDA = (AST == 0) ? 1 : AST; \ - magma_int_t info = 0; \ - magma_uplo_t uplo_; \ - magma_diag_t diag_; \ - \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = A_is_layout_left ? MagmaLower : MagmaUpper; \ - else \ - uplo_ = A_is_layout_left ? MagmaUpper : MagmaLower; \ - \ - if (diag[0] == 'U' || diag[0] == 'u') \ - diag_ = MagmaUnit; \ - else \ - diag_ = MagmaNonUnit; \ - \ - KokkosLapack::Impl::MagmaSingleton& s = \ - KokkosLapack::Impl::MagmaSingleton::singleton(); \ - R() = MAGMA_FN(uplo_, diag_, M, \ - reinterpret_cast( \ - const_cast(A.data())), \ - LDA, &info); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(SCALAR_TYPE, BASE_SCALAR_TYPE, MAGMA_FN, LAYOUTA, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct TRTRI >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef SCALAR_TYPE SCALAR; \ + typedef Kokkos::View > \ + RViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void trtri(const RViewType& R, const char uplo[], const char diag[], const AViewType& A) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::trtri[TPL_LAPACK," #SCALAR_TYPE "]"); \ + magma_int_t M = static_cast(A.extent(0)); \ + \ + bool A_is_layout_left = std::is_same::value; \ + \ + magma_int_t AST = A_is_layout_left ? A.stride(1) : A.stride(0), LDA = (AST == 0) ? 1 : AST; \ + magma_int_t info = 0; \ + magma_uplo_t uplo_; \ + magma_diag_t diag_; \ + \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = A_is_layout_left ? MagmaLower : MagmaUpper; \ + else \ + uplo_ = A_is_layout_left ? MagmaUpper : MagmaLower; \ + \ + if (diag[0] == 'U' || diag[0] == 'u') \ + diag_ = MagmaUnit; \ + else \ + diag_ = MagmaNonUnit; \ + \ + KokkosLapack::Impl::MagmaSingleton& s = KokkosLapack::Impl::MagmaSingleton::singleton(); \ + R() = MAGMA_FN(uplo_, diag_, M, reinterpret_cast(const_cast(A.data())), LDA, \ + &info); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #else -#define KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(SCALAR_TYPE, BASE_SCALAR_TYPE, \ - MAGMA_FN, LAYOUTA, MEM_SPACE, \ - ETI_SPEC_AVAIL) +#define KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(SCALAR_TYPE, BASE_SCALAR_TYPE, MAGMA_FN, LAYOUTA, MEM_SPACE, ETI_SPEC_AVAIL) #endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA // Explicitly define the TRTRI class for all permutations listed below @@ -140,63 +114,50 @@ namespace Impl { // Handle type and space permutations #ifdef KOKKOS_ENABLE_CUDA -#define KOKKOSLAPACK_DTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_HOST(double, double, LAYOUTA, Kokkos::HostSpace, \ - ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(double, magmaDouble_ptr, magma_dtrtri_gpu, \ - LAYOUTA, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(double, magmaDouble_ptr, magma_dtrtri_gpu, \ - LAYOUTA, Kokkos::CudaUVMSpace, \ +#define KOKKOSLAPACK_DTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(double, double, LAYOUTA, Kokkos::HostSpace, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(double, magmaDouble_ptr, magma_dtrtri_gpu, LAYOUTA, Kokkos::CudaSpace, \ + ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(double, magmaDouble_ptr, magma_dtrtri_gpu, LAYOUTA, Kokkos::CudaUVMSpace, \ ETI_SPEC_AVAIL) -#define KOKKOSLAPACK_STRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_HOST(float, float, LAYOUTA, Kokkos::HostSpace, \ - ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(float, magmaFloat_ptr, magma_strtri_gpu, \ - LAYOUTA, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(float, magmaFloat_ptr, magma_strtri_gpu, \ - LAYOUTA, Kokkos::CudaUVMSpace, \ +#define KOKKOSLAPACK_STRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(float, float, LAYOUTA, Kokkos::HostSpace, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(float, magmaFloat_ptr, magma_strtri_gpu, LAYOUTA, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(float, magmaFloat_ptr, magma_strtri_gpu, LAYOUTA, Kokkos::CudaUVMSpace, \ ETI_SPEC_AVAIL) -#define KOKKOSLAPACK_ZTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, \ - std::complex, LAYOUTA, \ - Kokkos::HostSpace, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, \ - magmaDoubleComplex_ptr, magma_ztrtri_gpu, \ - LAYOUTA, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_MAGMA( \ - Kokkos::complex, magmaDoubleComplex_ptr, magma_ztrtri_gpu, \ - LAYOUTA, Kokkos::CudaUVMSpace, ETI_SPEC_AVAIL) - -#define KOKKOSLAPACK_CTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, std::complex, \ - LAYOUTA, Kokkos::HostSpace, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, \ - magmaFloatComplex_ptr, magma_ctrtri_gpu, \ - LAYOUTA, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_MAGMA( \ - Kokkos::complex, magmaFloatComplex_ptr, magma_ctrtri_gpu, \ - LAYOUTA, Kokkos::CudaUVMSpace, ETI_SPEC_AVAIL) +#define KOKKOSLAPACK_ZTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, std::complex, LAYOUTA, Kokkos::HostSpace, \ + ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, magmaDoubleComplex_ptr, magma_ztrtri_gpu, LAYOUTA, \ + Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, magmaDoubleComplex_ptr, magma_ztrtri_gpu, LAYOUTA, \ + Kokkos::CudaUVMSpace, ETI_SPEC_AVAIL) + +#define KOKKOSLAPACK_CTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, std::complex, LAYOUTA, Kokkos::HostSpace, \ + ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, magmaFloatComplex_ptr, magma_ctrtri_gpu, LAYOUTA, \ + Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, magmaFloatComplex_ptr, magma_ctrtri_gpu, LAYOUTA, \ + Kokkos::CudaUVMSpace, ETI_SPEC_AVAIL) #else -#define KOKKOSLAPACK_DTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_HOST(double, double, LAYOUTA, Kokkos::HostSpace, \ - ETI_SPEC_AVAIL) +#define KOKKOSLAPACK_DTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(double, double, LAYOUTA, Kokkos::HostSpace, ETI_SPEC_AVAIL) -#define KOKKOSLAPACK_STRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_HOST(float, float, LAYOUTA, Kokkos::HostSpace, \ - ETI_SPEC_AVAIL) +#define KOKKOSLAPACK_STRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(float, float, LAYOUTA, Kokkos::HostSpace, ETI_SPEC_AVAIL) -#define KOKKOSLAPACK_ZTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, \ - std::complex, LAYOUTA, \ - Kokkos::HostSpace, ETI_SPEC_AVAIL) +#define KOKKOSLAPACK_ZTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, std::complex, LAYOUTA, Kokkos::HostSpace, \ + ETI_SPEC_AVAIL) -#define KOKKOSLAPACK_CTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, std::complex, \ - LAYOUTA, Kokkos::HostSpace, ETI_SPEC_AVAIL) +#define KOKKOSLAPACK_CTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, std::complex, LAYOUTA, Kokkos::HostSpace, \ + ETI_SPEC_AVAIL) #endif diff --git a/packages/kokkos-kernels/lapack/unit_test/Test_Lapack_gesv.hpp b/packages/kokkos-kernels/lapack/unit_test/Test_Lapack_gesv.hpp index 77774d1d3f3a..653ed2cbf26b 100644 --- a/packages/kokkos-kernels/lapack/unit_test/Test_Lapack_gesv.hpp +++ b/packages/kokkos-kernels/lapack/unit_test/Test_Lapack_gesv.hpp @@ -16,14 +16,11 @@ // only enable this test where KokkosLapack supports gesv: // CUDA+(MAGMA or CUSOLVER), HIP+ROCSOLVER and HOST+LAPACK -#if (defined(TEST_CUDA_LAPACK_CPP) && \ - (defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) || \ - defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER))) || \ - (defined(TEST_HIP_LAPACK_CPP) && \ - defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ - (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ - (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \ - defined(TEST_THREADS_LAPACK_CPP))) +#if (defined(TEST_CUDA_LAPACK_CPP) && \ + (defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) || defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER))) || \ + (defined(TEST_HIP_LAPACK_CPP) && defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ + (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ + (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || defined(TEST_THREADS_LAPACK_CPP))) #include #include @@ -66,12 +63,8 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { typename ViewTypeB::HostMirror h_B = Kokkos::create_mirror(B); // Initialize data. - Kokkos::fill_random( - A, rand_pool, - Kokkos::rand, ScalarA>::max()); - Kokkos::fill_random( - X0, rand_pool, - Kokkos::rand, ScalarA>::max()); + Kokkos::fill_random(A, rand_pool, Kokkos::rand, ScalarA>::max()); + Kokkos::fill_random(X0, rand_pool, Kokkos::rand, ScalarA>::max()); // Generate RHS B = A*X0. ScalarA alpha = 1.0; @@ -84,9 +77,8 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { Kokkos::deep_copy(h_X0, X0); // Allocate IPIV view on host - using ViewTypeP = typename std::conditional< - MAGMA, Kokkos::View, - Kokkos::View>::type; + using ViewTypeP = typename std::conditional, + Kokkos::View>::type; ViewTypeP ipiv; int Nt = 0; if (mode[0] == 'Y') { @@ -105,8 +97,7 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { bool notpl_runtime_err = false; #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL - nopivot_runtime_err = (!std::is_same::value) && + nopivot_runtime_err = (!std::is_same::value) && (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); notpl_runtime_err = false; #else @@ -138,8 +129,7 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { printf( " Error %d, pivot %c, padding %c: result( %.15lf ) !=" "solution( %.15lf ) at (%d), error=%.15e, eps=%.15e\n", - N, mode[0], padding[0], ats::abs(h_B(i)), ats::abs(h_X0(i)), int(i), - ats::abs(h_B(i) - h_X0(i)), eps); + N, mode[0], padding[0], ats::abs(h_B(i)), ats::abs(h_X0(i)), int(i), ats::abs(h_B(i) - h_X0(i)), eps); break; } } @@ -147,8 +137,7 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { } template -void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, - int nrhs) { +void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, int nrhs) { using execution_space = typename Device::execution_space; using ScalarA = typename ViewTypeA::value_type; using ats = Kokkos::ArithTraits; @@ -177,12 +166,8 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, typename ViewTypeB::HostMirror h_B = Kokkos::create_mirror(B); // Initialize data. - Kokkos::fill_random( - A, rand_pool, - Kokkos::rand, ScalarA>::max()); - Kokkos::fill_random( - X0, rand_pool, - Kokkos::rand, ScalarA>::max()); + Kokkos::fill_random(A, rand_pool, Kokkos::rand, ScalarA>::max()); + Kokkos::fill_random(X0, rand_pool, Kokkos::rand, ScalarA>::max()); // Generate RHS B = A*X0. ScalarA alpha = 1.0; @@ -195,9 +180,8 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, Kokkos::deep_copy(h_X0, X0); // Allocate IPIV view on host - using ViewTypeP = typename std::conditional< - MAGMA, Kokkos::View, - Kokkos::View>::type; + using ViewTypeP = typename std::conditional, + Kokkos::View>::type; ViewTypeP ipiv; int Nt = 0; if (mode[0] == 'Y') { @@ -216,8 +200,7 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, bool notpl_runtime_err = false; #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL - nopivot_runtime_err = (!std::is_same::value) && + nopivot_runtime_err = (!std::is_same::value) && (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); notpl_runtime_err = false; #else @@ -263,49 +246,32 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, template int test_gesv(const char* mode) { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) using view_type_a_ll = Kokkos::View; using view_type_b_ll = Kokkos::View; -#if (defined(TEST_CUDA_LAPACK_CPP) && \ - defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) || \ - (defined(TEST_HIP_LAPACK_CPP) && \ - defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ - (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ - (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \ - defined(TEST_THREADS_LAPACK_CPP))) - Test::impl_test_gesv( - &mode[0], "N", 2); // no padding - Test::impl_test_gesv( - &mode[0], "N", 13); // no padding - Test::impl_test_gesv( - &mode[0], "N", 179); // no padding - Test::impl_test_gesv( - &mode[0], "N", 64); // no padding - Test::impl_test_gesv( - &mode[0], "N", 1024); // no padding +#if (defined(TEST_CUDA_LAPACK_CPP) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) || \ + (defined(TEST_HIP_LAPACK_CPP) && defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ + (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ + (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || defined(TEST_THREADS_LAPACK_CPP))) + Test::impl_test_gesv(&mode[0], "N", 2); // no padding + Test::impl_test_gesv(&mode[0], "N", 13); // no padding + Test::impl_test_gesv(&mode[0], "N", 179); // no padding + Test::impl_test_gesv(&mode[0], "N", 64); // no padding + Test::impl_test_gesv(&mode[0], "N", 1024); // no padding #elif defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && defined(KOKKOS_ENABLE_CUDA) - if constexpr (std::is_same_v) { - Test::impl_test_gesv( - &mode[0], "N", 2); // no padding - Test::impl_test_gesv( - &mode[0], "N", 13); // no padding - Test::impl_test_gesv( - &mode[0], "N", 179); // no padding - Test::impl_test_gesv( - &mode[0], "N", 64); // no padding - Test::impl_test_gesv( - &mode[0], "N", 1024); // no padding - - Test::impl_test_gesv( - &mode[0], "Y", - 13); // padding - Test::impl_test_gesv( - &mode[0], "Y", - 179); // padding + if constexpr (std::is_same_v) { + Test::impl_test_gesv(&mode[0], "N", 2); // no padding + Test::impl_test_gesv(&mode[0], "N", 13); // no padding + Test::impl_test_gesv(&mode[0], "N", 179); // no padding + Test::impl_test_gesv(&mode[0], "N", 64); // no padding + Test::impl_test_gesv(&mode[0], "N", 1024); // no padding + + Test::impl_test_gesv(&mode[0], "Y", + 13); // padding + Test::impl_test_gesv(&mode[0], "Y", + 179); // padding } #endif #endif @@ -318,48 +284,31 @@ int test_gesv(const char* mode) { template int test_gesv_mrhs(const char* mode) { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) using view_type_a_ll = Kokkos::View; using view_type_b_ll = Kokkos::View; -#if (defined(TEST_CUDA_LAPACK_CPP) && \ - defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) || \ - (defined(TEST_HIP_LAPACK_CPP) && \ - defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ - (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ - (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \ - defined(TEST_THREADS_LAPACK_CPP))) - Test::impl_test_gesv_mrhs( - &mode[0], "N", 2, 5); // no padding - Test::impl_test_gesv_mrhs( - &mode[0], "N", 13, 5); // no padding - Test::impl_test_gesv_mrhs( - &mode[0], "N", 179, 5); // no padding - Test::impl_test_gesv_mrhs( - &mode[0], "N", 64, 5); // no padding - Test::impl_test_gesv_mrhs( - &mode[0], "N", 1024, 5); // no padding +#if (defined(TEST_CUDA_LAPACK_CPP) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) || \ + (defined(TEST_HIP_LAPACK_CPP) && defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ + (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ + (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || defined(TEST_THREADS_LAPACK_CPP))) + Test::impl_test_gesv_mrhs(&mode[0], "N", 2, 5); // no padding + Test::impl_test_gesv_mrhs(&mode[0], "N", 13, 5); // no padding + Test::impl_test_gesv_mrhs(&mode[0], "N", 179, 5); // no padding + Test::impl_test_gesv_mrhs(&mode[0], "N", 64, 5); // no padding + Test::impl_test_gesv_mrhs(&mode[0], "N", 1024, 5); // no padding // When appropriate run MAGMA specific tests #elif defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && defined(KOKKOS_ENABLE_CUDA) - if constexpr (std::is_same_v) { - Test::impl_test_gesv_mrhs( - &mode[0], "N", 2, 5); // no padding - Test::impl_test_gesv_mrhs( - &mode[0], "N", 13, 5); // no padding - Test::impl_test_gesv_mrhs( - &mode[0], "N", 179, 5); // no padding - Test::impl_test_gesv_mrhs( - &mode[0], "N", 64, 5); // no padding - Test::impl_test_gesv_mrhs( - &mode[0], "N", 1024, 5); // no padding - - Test::impl_test_gesv_mrhs( - &mode[0], "Y", 13, 5); // padding - Test::impl_test_gesv_mrhs( - &mode[0], "Y", 179, 5); // padding + if constexpr (std::is_same_v) { + Test::impl_test_gesv_mrhs(&mode[0], "N", 2, 5); // no padding + Test::impl_test_gesv_mrhs(&mode[0], "N", 13, 5); // no padding + Test::impl_test_gesv_mrhs(&mode[0], "N", 179, 5); // no padding + Test::impl_test_gesv_mrhs(&mode[0], "N", 64, 5); // no padding + Test::impl_test_gesv_mrhs(&mode[0], "N", 1024, 5); // no padding + + Test::impl_test_gesv_mrhs(&mode[0], "Y", 13, 5); // padding + Test::impl_test_gesv_mrhs(&mode[0], "Y", 179, 5); // padding } #endif #endif @@ -370,8 +319,7 @@ int test_gesv_mrhs(const char* mode) { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gesv_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_float"); test_gesv("N"); // No pivoting @@ -388,8 +336,7 @@ TEST_F(TestCategory, gesv_mrhs_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gesv_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_double"); test_gesv("N"); // No pivoting @@ -406,8 +353,7 @@ TEST_F(TestCategory, gesv_mrhs_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gesv_complex_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_complex_double"); test_gesv, TestDevice>("N"); // No pivoting @@ -424,8 +370,7 @@ TEST_F(TestCategory, gesv_mrhs_complex_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gesv_complex_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_complex_float"); test_gesv, TestDevice>("N"); // No pivoting diff --git a/packages/kokkos-kernels/lapack/unit_test/Test_Lapack_svd.hpp b/packages/kokkos-kernels/lapack/unit_test/Test_Lapack_svd.hpp index da9f9ba480b2..a47dbbe9b9b0 100644 --- a/packages/kokkos-kernels/lapack/unit_test/Test_Lapack_svd.hpp +++ b/packages/kokkos-kernels/lapack/unit_test/Test_Lapack_svd.hpp @@ -26,10 +26,8 @@ namespace Test { template -void check_triple_product( - const AMatrix& A, const SVector& S, const UMatrix& U, const VMatrix& Vt, - typename Kokkos::ArithTraits< - typename AMatrix::non_const_value_type>::mag_type tol) { +void check_triple_product(const AMatrix& A, const SVector& S, const UMatrix& U, const VMatrix& Vt, + typename Kokkos::ArithTraits::mag_type tol) { // After a successful SVD decomposition we have A=U*S*V // So using gemm we should be able to compare the above // triple product to the original matrix A. @@ -40,8 +38,7 @@ void check_triple_product( // First compute the left side of the product: temp = U*S Kokkos::parallel_for( - Kokkos::RangePolicy(0, U.extent_int(0)), - KOKKOS_LAMBDA(const int& rowIdx) { + Kokkos::RangePolicy(0, U.extent_int(0)), KOKKOS_LAMBDA(const int& rowIdx) { for (int colIdx = 0; colIdx < U.extent_int(1); ++colIdx) { if (colIdx < S.extent_int(0)) { temp(rowIdx, colIdx) = U(rowIdx, colIdx) * S(colIdx); @@ -69,8 +66,7 @@ void check_triple_product( template void check_unitary_orthogonal_matrix( - const Matrix& M, typename Kokkos::ArithTraits< - typename Matrix::non_const_value_type>::mag_type tol) { + const Matrix& M, typename Kokkos::ArithTraits::mag_type tol) { // After a successful SVD decomposition the matrices // U and V are unitary matrices. Thus we can check // the property UUt=UtU=I and VVt=VtV=I using gemm. @@ -83,11 +79,9 @@ void check_unitary_orthogonal_matrix( for (int rowIdx = 0; rowIdx < M.extent_int(0); ++rowIdx) { for (int colIdx = 0; colIdx < M.extent_int(0); ++colIdx) { if (rowIdx == colIdx) { - EXPECT_NEAR_KK_REL(I0_h(rowIdx, colIdx), - Kokkos::ArithTraits::one(), tol); + EXPECT_NEAR_KK_REL(I0_h(rowIdx, colIdx), Kokkos::ArithTraits::one(), tol); } else { - EXPECT_NEAR_KK(I0_h(rowIdx, colIdx), - Kokkos::ArithTraits::zero(), tol); + EXPECT_NEAR_KK(I0_h(rowIdx, colIdx), Kokkos::ArithTraits::zero(), tol); } } } @@ -99,11 +93,9 @@ void check_unitary_orthogonal_matrix( for (int rowIdx = 0; rowIdx < M.extent_int(1); ++rowIdx) { for (int colIdx = 0; colIdx < M.extent_int(1); ++colIdx) { if (rowIdx == colIdx) { - EXPECT_NEAR_KK_REL(I1_h(rowIdx, colIdx), - Kokkos::ArithTraits::one(), tol); + EXPECT_NEAR_KK_REL(I1_h(rowIdx, colIdx), Kokkos::ArithTraits::one(), tol); } else { - EXPECT_NEAR_KK(I1_h(rowIdx, colIdx), - Kokkos::ArithTraits::zero(), tol); + EXPECT_NEAR_KK(I1_h(rowIdx, colIdx), Kokkos::ArithTraits::zero(), tol); } } } @@ -113,9 +105,8 @@ template int impl_analytic_2x2_svd() { using scalar_type = typename AMatrix::value_type; using mag_type = typename Kokkos::ArithTraits::mag_type; - using vector_type = - Kokkos::View; - using KAT_S = Kokkos::ArithTraits; + using vector_type = Kokkos::View; + using KAT_S = Kokkos::ArithTraits; const mag_type eps = KAT_S::eps(); @@ -147,8 +138,7 @@ int impl_analytic_2x2_svd() { // The singular values for this problem // are known: sqrt(45) and sqrt(5) - EXPECT_NEAR_KK_REL(S_h(0), static_cast(Kokkos::sqrt(45)), - 100 * eps); + EXPECT_NEAR_KK_REL(S_h(0), static_cast(Kokkos::sqrt(45)), 100 * eps); EXPECT_NEAR_KK_REL(S_h(1), static_cast(Kokkos::sqrt(5)), 100 * eps); // The singular vectors should be identical @@ -156,21 +146,16 @@ int impl_analytic_2x2_svd() { // component of the vectors to determine // the proper signed comparison. std::vector Uref = { - static_cast(1 / Kokkos::sqrt(10)), - static_cast(3 / Kokkos::sqrt(10)), - static_cast(-3 / Kokkos::sqrt(10)), - static_cast(1 / Kokkos::sqrt(10))}; + static_cast(1 / Kokkos::sqrt(10)), static_cast(3 / Kokkos::sqrt(10)), + static_cast(-3 / Kokkos::sqrt(10)), static_cast(1 / Kokkos::sqrt(10))}; std::vector Vtref = { - static_cast(1 / Kokkos::sqrt(2)), - static_cast(-1 / Kokkos::sqrt(2)), - static_cast(1 / Kokkos::sqrt(2)), - static_cast(1 / Kokkos::sqrt(2))}; + static_cast(1 / Kokkos::sqrt(2)), static_cast(-1 / Kokkos::sqrt(2)), + static_cast(1 / Kokkos::sqrt(2)), static_cast(1 / Kokkos::sqrt(2))}; // Both rotations and reflections are valid // vector basis so we need to check both signs // to confirm proper SVD was achieved. - Kokkos::View U_real("U real", 2, 2), - Vt_real("Vt real", 2, 2); + Kokkos::View U_real("U real", 2, 2), Vt_real("Vt real", 2, 2); if constexpr (KAT_S::is_complex) { U_real(0, 0) = U_h(0, 0).real(); U_real(0, 1) = U_h(0, 1).real(); @@ -219,9 +204,8 @@ template int impl_analytic_2x3_svd() { using scalar_type = typename AMatrix::value_type; using mag_type = typename Kokkos::ArithTraits::mag_type; - using vector_type = - Kokkos::View; - using KAT_S = Kokkos::ArithTraits; + using vector_type = Kokkos::View; + using KAT_S = Kokkos::ArithTraits; const mag_type tol = 100 * KAT_S::eps(); @@ -277,8 +261,7 @@ int impl_analytic_2x3_svd() { // Both rotations and reflections are valid // vector basis so we need to check both signs // to confirm proper SVD was achieved. - Kokkos::View U_real("U real", 2, 2), - Vt_real("Vt real", 3, 3); + Kokkos::View U_real("U real", 2, 2), Vt_real("Vt real", 3, 3); if constexpr (KAT_S::is_complex) { U_real(0, 0) = U_h(0, 0).real(); U_real(0, 1) = U_h(0, 1).real(); @@ -350,9 +333,8 @@ template int impl_analytic_3x2_svd() { using scalar_type = typename AMatrix::value_type; using mag_type = typename Kokkos::ArithTraits::mag_type; - using vector_type = - Kokkos::View; - using KAT_S = Kokkos::ArithTraits; + using vector_type = Kokkos::View; + using KAT_S = Kokkos::ArithTraits; const mag_type tol = 100 * KAT_S::eps(); @@ -396,8 +378,7 @@ int impl_analytic_3x2_svd() { // Both rotations and reflections are valid // vector basis so we need to check both signs // to confirm proper SVD was achieved. - Kokkos::View U_real("U real", 3, 3), - Vt_real("Vt real", 2, 2); + Kokkos::View U_real("U real", 3, 3), Vt_real("Vt real", 2, 2); if constexpr (KAT_S::is_complex) { U_real(0, 0) = U_h(0, 0).real(); U_real(0, 1) = U_h(0, 1).real(); @@ -471,8 +452,7 @@ int impl_test_svd(const int m, const int n) { using scalar_type = typename AMatrix::value_type; using KAT_S = Kokkos::ArithTraits; using mag_type = typename KAT_S::mag_type; - using vector_type = - Kokkos::View; + using vector_type = Kokkos::View; const mag_type max_val = 10; const mag_type tol = 2000 * max_val * KAT_S::eps(); @@ -480,8 +460,7 @@ int impl_test_svd(const int m, const int n) { AMatrix A("A", m, n), U("U", m, m), Vt("Vt", n, n), Aref("A ref", m, n); vector_type S("S", Kokkos::min(m, n)); - const uint64_t seed = - std::chrono::high_resolution_clock::now().time_since_epoch().count(); + const uint64_t seed = std::chrono::high_resolution_clock::now().time_since_epoch().count(); Kokkos::Random_XorShift64_Pool rand_pool(seed); // Initialize A with random numbers @@ -492,8 +471,7 @@ int impl_test_svd(const int m, const int n) { // Working around CUSOLVER constraint for m >= n #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { if (m >= n) { KokkosLapack::svd("A", "A", A, S, U, Vt); } else { @@ -523,10 +501,8 @@ int test_svd() { int ret; #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - using view_type_a_layout_left = - Kokkos::View; + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + using view_type_a_layout_left = Kokkos::View; ret = Test::impl_analytic_2x2_svd(); EXPECT_EQ(ret, 0); @@ -554,10 +530,8 @@ int test_svd() { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - using view_type_a_layout_right = - Kokkos::View; + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + using view_type_a_layout_right = Kokkos::View; ret = Test::impl_analytic_2x2_svd(); EXPECT_EQ(ret, 0); @@ -589,18 +563,15 @@ int test_svd() { template int test_svd_wrapper() { -#if defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) || \ - defined(KOKKOSKERNELS_ENABLE_TPL_MKL) - if constexpr (std::is_same_v) { +#if defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) || defined(KOKKOSKERNELS_ENABLE_TPL_MKL) + if constexpr (std::is_same_v) { // Using a device side space with LAPACK/MKL return test_svd(); } #endif #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { // Using a Cuda device with CUSOLVER return test_svd(); } @@ -618,8 +589,7 @@ int test_svd_wrapper() { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, svd_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::svd_float"); test_svd_wrapper(); @@ -628,8 +598,7 @@ TEST_F(TestCategory, svd_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, svd_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::svd_double"); test_svd_wrapper(); @@ -638,8 +607,7 @@ TEST_F(TestCategory, svd_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, svd_complex_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::svd_complex_float"); test_svd_wrapper, TestDevice>(); @@ -648,8 +616,7 @@ TEST_F(TestCategory, svd_complex_float) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, svd_complex_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::svd_complex_double"); test_svd_wrapper, TestDevice>(); diff --git a/packages/kokkos-kernels/lapack/unit_test/Test_Lapack_trtri.hpp b/packages/kokkos-kernels/lapack/unit_test/Test_Lapack_trtri.hpp index a19e575d8987..b555ea8aafa6 100644 --- a/packages/kokkos-kernels/lapack/unit_test/Test_Lapack_trtri.hpp +++ b/packages/kokkos-kernels/lapack/unit_test/Test_Lapack_trtri.hpp @@ -43,8 +43,7 @@ struct NonUnitDiagTRTRI { KOKKOS_INLINE_FUNCTION void operator()(const int& i) const { A_(i, i) = A_(i, i) + 10; } }; -template +template struct VanillaGEMM { bool A_t, B_t, A_c, B_c; int N, K; @@ -61,12 +60,9 @@ struct VanillaGEMM { ScalarC beta; KOKKOS_INLINE_FUNCTION - void operator()( - const typename Kokkos::TeamPolicy::member_type& team) - const { + void operator()(const typename Kokkos::TeamPolicy::member_type& team) const { // GNU COMPILER BUG WORKAROUND -#if defined(KOKKOS_COMPILER_GNU) && !defined(__CUDA_ARCH__) && \ - !defined(__HIP_DEVICE_COMPILE__) +#if defined(KOKKOS_COMPILER_GNU) && !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) int i = team.league_rank(); #else const int i = team.league_rank(); @@ -97,8 +93,7 @@ struct VanillaGEMM { }; template -int impl_test_trtri(int bad_diag_idx, const char* uplo, const char* diag, - const int M, const int N) { +int impl_test_trtri(int bad_diag_idx, const char* uplo, const char* diag, const int M, const int N) { using execution_space = typename ViewTypeA::device_type::execution_space; using ScalarA = typename ViewTypeA::value_type; using APT = Kokkos::ArithTraits; @@ -111,9 +106,8 @@ int impl_test_trtri(int bad_diag_idx, const char* uplo, const char* diag, ViewTypeA A("A", M, N); ViewTypeA A_original("A_original", M, N); ViewTypeA A_I("A_I", M, N); // is I taken...? - uint64_t seed = - std::chrono::high_resolution_clock::now().time_since_epoch().count(); - ScalarA beta = ScalarA(0); + uint64_t seed = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + ScalarA beta = ScalarA(0); ScalarA cur_check_val; // Either 1 or 0, to check A_I // const int As0 = A.stride(0), As1 = A.stride(1); @@ -137,8 +131,7 @@ int impl_test_trtri(int bad_diag_idx, const char* uplo, const char* diag, } } // Set just 1 value in the diagonal to 0. - if (M > 0 && N > 0) - host_A(bad_diag_idx - 1, bad_diag_idx - 1) = ScalarA(0); + if (M > 0 && N > 0) host_A(bad_diag_idx - 1, bad_diag_idx - 1) = ScalarA(0); Kokkos::deep_copy(A, host_A); } return KokkosLapack::trtri(uplo, diag, A); @@ -151,21 +144,17 @@ int impl_test_trtri(int bad_diag_idx, const char* uplo, const char* diag, Kokkos::Random_XorShift64_Pool rand_pool(seed); // Initialize A with deterministic random numbers - Kokkos::fill_random( - A, rand_pool, - Kokkos::rand, ScalarA>::max()); + Kokkos::fill_random(A, rand_pool, Kokkos::rand, ScalarA>::max()); if ((diag[0] == 'U') || (diag[0] == 'u')) { using functor_type = UnitDiagTRTRI; functor_type udtrtri(A); // Initialize As diag with 1s - Kokkos::parallel_for("KokkosLapack::Test::UnitDiagTRTRI", - Kokkos::RangePolicy(0, M), udtrtri); + Kokkos::parallel_for("KokkosLapack::Test::UnitDiagTRTRI", Kokkos::RangePolicy(0, M), udtrtri); } else { //(diag[0]=='N')||(diag[0]=='n') using functor_type = NonUnitDiagTRTRI; functor_type nudtrtri(A); // Initialize As diag with A(i,i)+10 - Kokkos::parallel_for("KokkosLapack::Test::NonUnitDiagTRTRI", - Kokkos::RangePolicy(0, M), nudtrtri); + Kokkos::parallel_for("KokkosLapack::Test::NonUnitDiagTRTRI", Kokkos::RangePolicy(0, M), nudtrtri); } Kokkos::fence(); Kokkos::deep_copy(host_A, A); @@ -199,8 +188,7 @@ int impl_test_trtri(int bad_diag_idx, const char* uplo, const char* diag, Kokkos::fence(); if (ret) { - printf("KokkosLapack::trtri(%c, %c, %s) returned %d\n", uplo[0], diag[0], - typeid(ViewTypeA).name(), ret); + printf("KokkosLapack::trtri(%c, %c, %s) returned %d\n", uplo[0], diag[0], typeid(ViewTypeA).name(), ret); return ret; } @@ -228,12 +216,10 @@ int impl_test_trtri(int bad_diag_idx, const char* uplo, const char* diag, vgemm.C = A_I; // out vgemm.alpha = ScalarA(1); vgemm.beta = beta; - Kokkos::parallel_for( - "KokkosLapack::Test::VanillaGEMM", - Kokkos::TeamPolicy( - M, Kokkos::AUTO, - KokkosKernels::Impl::kk_get_max_vector_size()), - vgemm); + Kokkos::parallel_for("KokkosLapack::Test::VanillaGEMM", + Kokkos::TeamPolicy( + M, Kokkos::AUTO, KokkosKernels::Impl::kk_get_max_vector_size()), + vgemm); Kokkos::fence(); Kokkos::deep_copy(host_I, A_I); @@ -251,8 +237,7 @@ int impl_test_trtri(int bad_diag_idx, const char* uplo, const char* diag, for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) { // Set check value - cur_check_val = - (i == j) ? ScalarA(1) : ScalarA(0); // APT::abs(host_A(i,j)); + cur_check_val = (i == j) ? ScalarA(1) : ScalarA(0); // APT::abs(host_A(i,j)); // Check how close |A_I - cur_check_val| is to 0. if (APT::abs(APT::abs(host_I(i, j)) - cur_check_val) > eps) { @@ -276,38 +261,30 @@ int test_trtri(const char* mode) { int ret; int bad_diag_idx = -1; #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - using view_type_a_layout_left = - Kokkos::View; + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + using view_type_a_layout_left = Kokkos::View; - ret = Test::impl_test_trtri( - bad_diag_idx, &mode[0], &mode[1], 0, 0); + ret = Test::impl_test_trtri(bad_diag_idx, &mode[0], &mode[1], 0, 0); EXPECT_EQ(ret, 0); - ret = Test::impl_test_trtri( - bad_diag_idx, &mode[0], &mode[1], 1, 1); + ret = Test::impl_test_trtri(bad_diag_idx, &mode[0], &mode[1], 1, 1); EXPECT_EQ(ret, 0); - ret = Test::impl_test_trtri( - bad_diag_idx, &mode[0], &mode[1], 15, 15); + ret = Test::impl_test_trtri(bad_diag_idx, &mode[0], &mode[1], 15, 15); EXPECT_EQ(ret, 0); - ret = Test::impl_test_trtri( - bad_diag_idx, &mode[0], &mode[1], 100, 100); + ret = Test::impl_test_trtri(bad_diag_idx, &mode[0], &mode[1], 100, 100); EXPECT_EQ(ret, 0); // Rounding errors with randomly generated matrices begin here where M>100, so // we pass in A=I - ret = Test::impl_test_trtri( - bad_diag_idx, &mode[0], &mode[1], 273, 273); + ret = Test::impl_test_trtri(bad_diag_idx, &mode[0], &mode[1], 273, 273); EXPECT_EQ(ret, 0); // Only non-unit matrices could be singular. if (mode[1] == 'N' || mode[1] == 'n') { bad_diag_idx = 2; // 1-index based - ret = Test::impl_test_trtri( - bad_diag_idx, &mode[0], &mode[1], 2, 2); + ret = Test::impl_test_trtri(bad_diag_idx, &mode[0], &mode[1], 2, 2); EXPECT_EQ(ret, bad_diag_idx); bad_diag_idx = -1; } @@ -318,38 +295,30 @@ int test_trtri(const char* mode) { #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - using view_type_a_layout_right = - Kokkos::View; + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + using view_type_a_layout_right = Kokkos::View; - ret = Test::impl_test_trtri( - bad_diag_idx, &mode[0], &mode[1], 0, 0); + ret = Test::impl_test_trtri(bad_diag_idx, &mode[0], &mode[1], 0, 0); EXPECT_EQ(ret, 0); - ret = Test::impl_test_trtri( - bad_diag_idx, &mode[0], &mode[1], 1, 1); + ret = Test::impl_test_trtri(bad_diag_idx, &mode[0], &mode[1], 1, 1); EXPECT_EQ(ret, 0); - ret = Test::impl_test_trtri( - bad_diag_idx, &mode[0], &mode[1], 15, 15); + ret = Test::impl_test_trtri(bad_diag_idx, &mode[0], &mode[1], 15, 15); EXPECT_EQ(ret, 0); - ret = Test::impl_test_trtri( - bad_diag_idx, &mode[0], &mode[1], 100, 100); + ret = Test::impl_test_trtri(bad_diag_idx, &mode[0], &mode[1], 100, 100); EXPECT_EQ(ret, 0); // Rounding errors with randomly generated matrices begin here where M>100, so // we pass in A=I - ret = Test::impl_test_trtri( - bad_diag_idx, &mode[0], &mode[1], 273, 273); + ret = Test::impl_test_trtri(bad_diag_idx, &mode[0], &mode[1], 273, 273); EXPECT_EQ(ret, 0); // Only non-unit matrices could be singular. if (mode[1] == 'N' || mode[1] == 'n') { bad_diag_idx = 2; // 1-index based - ret = Test::impl_test_trtri( - bad_diag_idx, &mode[0], &mode[1], 2, 2); + ret = Test::impl_test_trtri(bad_diag_idx, &mode[0], &mode[1], 2, 2); EXPECT_EQ(ret, bad_diag_idx); bad_diag_idx = -1; } @@ -359,8 +328,7 @@ int test_trtri(const char* mode) { } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trtri_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::trtri_float"); test_trtri("UN"); @@ -372,8 +340,7 @@ TEST_F(TestCategory, trtri_float) { #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trtri_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::trtri_double"); test_trtri("UN"); @@ -385,8 +352,7 @@ TEST_F(TestCategory, trtri_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trtri_complex_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::trtri_complex_double"); test_trtri, TestDevice>("UN"); @@ -398,8 +364,7 @@ TEST_F(TestCategory, trtri_complex_double) { #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trtri_complex_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::trtri_complex_float"); test_trtri, TestDevice>("UN"); diff --git a/packages/kokkos-kernels/master_history.txt b/packages/kokkos-kernels/master_history.txt index 3e8f8fcbd871..6a546fb885ef 100644 --- a/packages/kokkos-kernels/master_history.txt +++ b/packages/kokkos-kernels/master_history.txt @@ -26,3 +26,4 @@ tag: 4.2.00 date: 11/09/2023 master: 25a31f88 release: 912d3778 tag: 4.2.01 date: 01/30/2024 master: f429f6ec release: bcf9854b tag: 4.3.00 date: 04/03/2024 master: afd65f03 release: ebbf4b78 tag: 4.3.01 date: 05/07/2024 master: 1b0a15f5 release: 58785c1b +tag: 4.4.00 date: 08/08/2024 master: d1a91b8a release: 1145f529 diff --git a/packages/kokkos-kernels/ode/impl/KokkosODE_BDF_impl.hpp b/packages/kokkos-kernels/ode/impl/KokkosODE_BDF_impl.hpp index cf89731f1b1f..3119ff0e3aa4 100644 --- a/packages/kokkos-kernels/ode/impl/KokkosODE_BDF_impl.hpp +++ b/packages/kokkos-kernels/ode/impl/KokkosODE_BDF_impl.hpp @@ -44,31 +44,27 @@ struct BDF_table<2> { template <> struct BDF_table<3> { static constexpr int order = 3; - Kokkos::Array coefficients{ - {-18.0 / 11.0, 9.0 / 11.0, -2.0 / 11.0, 6.0 / 11.0}}; + Kokkos::Array coefficients{{-18.0 / 11.0, 9.0 / 11.0, -2.0 / 11.0, 6.0 / 11.0}}; }; template <> struct BDF_table<4> { static constexpr int order = 4; - Kokkos::Array coefficients{ - {-48.0 / 25.0, 36.0 / 25.0, -16.0 / 25.0, 3.0 / 25.0, 12.0 / 25.0}}; + Kokkos::Array coefficients{{-48.0 / 25.0, 36.0 / 25.0, -16.0 / 25.0, 3.0 / 25.0, 12.0 / 25.0}}; }; template <> struct BDF_table<5> { static constexpr int order = 5; - Kokkos::Array coefficients{{-300.0 / 137.0, 300.0 / 137.0, - -200.0 / 137.0, 75.0 / 137.0, - -12.0 / 137.0, 60.0 / 137.0}}; + Kokkos::Array coefficients{ + {-300.0 / 137.0, 300.0 / 137.0, -200.0 / 137.0, 75.0 / 137.0, -12.0 / 137.0, 60.0 / 137.0}}; }; template <> struct BDF_table<6> { static constexpr int order = 6; Kokkos::Array coefficients{ - {-360.0 / 147.0, 450.0 / 147.0, -400.0 / 147.0, 225.0 / 147.0, - -72.0 / 147.0, 10.0 / 147.0, 60.0 / 147.0}}; + {-360.0 / 147.0, 450.0 / 147.0, -400.0 / 147.0, 225.0 / 147.0, -72.0 / 147.0, 10.0 / 147.0, 60.0 / 147.0}}; }; template @@ -82,14 +78,9 @@ struct BDF_system_wrapper { mv_type yn; KOKKOS_FUNCTION - BDF_system_wrapper(const system_type& mySys_, const table_type& table_, - const double t_, const double dt_, const mv_type& yn_) - : mySys(mySys_), - neqs(mySys_.neqs), - table(table_), - t(t_), - dt(dt_), - yn(yn_) {} + BDF_system_wrapper(const system_type& mySys_, const table_type& table_, const double t_, const double dt_, + const mv_type& yn_) + : mySys(mySys_), neqs(mySys_.neqs), table(table_), t(t_), dt(dt_), yn(yn_) {} template KOKKOS_FUNCTION void residual(const vec_type& y, const vec_type& f) const { @@ -99,8 +90,7 @@ struct BDF_system_wrapper { for (int eqIdx = 0; eqIdx < neqs; ++eqIdx) { f(eqIdx) = y(eqIdx) - table.coefficients[order] * dt * f(eqIdx); for (int orderIdx = 0; orderIdx < order; ++orderIdx) { - f(eqIdx) += - table.coefficients[order - 1 - orderIdx] * yn(eqIdx, orderIdx); + f(eqIdx) += table.coefficients[order - 1 - orderIdx] * yn(eqIdx, orderIdx); } } } @@ -111,8 +101,7 @@ struct BDF_system_wrapper { for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) { for (int colIdx = 0; colIdx < neqs; ++colIdx) { - jac(rowIdx, colIdx) = - -table.coefficients[order] * dt * jac(rowIdx, colIdx); + jac(rowIdx, colIdx) = -table.coefficients[order] * dt * jac(rowIdx, colIdx); } jac(rowIdx, rowIdx) += 1.0; } @@ -130,13 +119,12 @@ struct BDF_system_wrapper2 { double t, dt, c = 0; KOKKOS_FUNCTION - BDF_system_wrapper2(const system_type& mySys_, const subview_type& psi_, - const d_vec_type& d_, const double t_, const double dt_) + BDF_system_wrapper2(const system_type& mySys_, const subview_type& psi_, const d_vec_type& d_, const double t_, + const double dt_) : mySys(mySys_), neqs(mySys_.neqs), psi(psi_), d(d_), t(t_), dt(dt_) {} template - KOKKOS_FUNCTION void residual(const YVectorType& y, - const FVectorType& f) const { + KOKKOS_FUNCTION void residual(const YVectorType& y, const FVectorType& f) const { // f = f(t+dt, y) mySys.evaluate_function(t, dt, y, f); @@ -165,14 +153,10 @@ struct BDF_system_wrapper2 { } }; -template -KOKKOS_FUNCTION void BDFStep(ode_type& ode, const table_type& table, - scalar_type t, scalar_type dt, - const vec_type& y_old, const vec_type& y_new, - const vec_type& rhs, const vec_type& update, - const vec_type& scale, const mv_type& y_vecs, - const mat_type& temp, const mat_type& jac) { +template +KOKKOS_FUNCTION void BDFStep(ode_type& ode, const table_type& table, scalar_type t, scalar_type dt, + const vec_type& y_old, const vec_type& y_new, const vec_type& rhs, const vec_type& update, + const vec_type& scale, const mv_type& y_vecs, const mat_type& temp, const mat_type& jac) { using newton_params = KokkosODE::Experimental::Newton_params; BDF_system_wrapper sys(ode, table, t, dt, y_vecs); @@ -184,57 +168,43 @@ KOKKOS_FUNCTION void BDFStep(ode_type& ode, const table_type& table, } // solver the nonlinear problem - { - KokkosODE::Experimental::Newton::Solve(sys, param, jac, temp, y_new, rhs, - update, scale); - } + { KokkosODE::Experimental::Newton::Solve(sys, param, jac, temp, y_new, rhs, update, scale); } } // BDFStep template -KOKKOS_FUNCTION void compute_coeffs(const int order, const scalar_type factor, - const mat_type& coeffs) { +KOKKOS_FUNCTION void compute_coeffs(const int order, const scalar_type factor, const mat_type& coeffs) { coeffs(0, 0) = 1.0; for (int colIdx = 0; colIdx < order; ++colIdx) { coeffs(0, colIdx + 1) = 1.0; for (int rowIdx = 0; rowIdx < order; ++rowIdx) { coeffs(rowIdx + 1, colIdx + 1) = - ((rowIdx - factor * (colIdx + 1.0)) / (rowIdx + 1.0)) * - coeffs(rowIdx, colIdx + 1); + ((rowIdx - factor * (colIdx + 1.0)) / (rowIdx + 1.0)) * coeffs(rowIdx, colIdx + 1); } } } template -KOKKOS_FUNCTION void update_D(const int order, const scalar_type factor, - const mat_type& coeffs, const mat_type& tempD, +KOKKOS_FUNCTION void update_D(const int order, const scalar_type factor, const mat_type& coeffs, const mat_type& tempD, const mat_type& D) { - auto subD = - Kokkos::subview(D, Kokkos::ALL(), Kokkos::pair(0, order + 1)); - auto subTempD = Kokkos::subview(tempD, Kokkos::ALL(), - Kokkos::pair(0, order + 1)); + auto subD = Kokkos::subview(D, Kokkos::ALL(), Kokkos::pair(0, order + 1)); + auto subTempD = Kokkos::subview(tempD, Kokkos::ALL(), Kokkos::pair(0, order + 1)); compute_coeffs(order, factor, coeffs); - auto R = Kokkos::subview(coeffs, Kokkos::pair(0, order + 1), - Kokkos::pair(0, order + 1)); - KokkosBatched::SerialGemm< - KokkosBatched::Trans::NoTranspose, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::Gemm::Blocked>::invoke(1.0, subD, R, 0.0, subTempD); + auto R = Kokkos::subview(coeffs, Kokkos::pair(0, order + 1), Kokkos::pair(0, order + 1)); + KokkosBatched::SerialGemm::invoke(1.0, subD, R, 0.0, subTempD); compute_coeffs(order, 1.0, coeffs); - auto U = Kokkos::subview(coeffs, Kokkos::pair(0, order + 1), - Kokkos::pair(0, order + 1)); - KokkosBatched::SerialGemm< - KokkosBatched::Trans::NoTranspose, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::Gemm::Blocked>::invoke(1.0, subTempD, U, 0.0, subD); + auto U = Kokkos::subview(coeffs, Kokkos::pair(0, order + 1), Kokkos::pair(0, order + 1)); + KokkosBatched::SerialGemm::invoke(1.0, subTempD, U, 0.0, subD); } -template -KOKKOS_FUNCTION void initial_step_size( - const ode_type ode, const int order, const scalar_type t0, - const scalar_type atol, const scalar_type rtol, const vec_type& y0, - const res_type& f0, const mat_type& temp, scalar_type& dt_ini) { +template +KOKKOS_FUNCTION void initial_step_size(const ode_type ode, const int order, const scalar_type t0, + const scalar_type atol, const scalar_type rtol, const vec_type& y0, + const res_type& f0, const mat_type& temp, scalar_type& dt_ini) { using KAT = Kokkos::ArithTraits; // Extract subviews to store intermediate data @@ -290,16 +260,12 @@ KOKKOS_FUNCTION void initial_step_size( } } // initial_step_size -template -KOKKOS_FUNCTION void BDFStep(ode_type& ode, scalar_type& t, scalar_type& dt, - scalar_type t_end, int& order, - int& num_equal_steps, const int max_newton_iters, - const scalar_type atol, const scalar_type rtol, - const scalar_type min_factor, - const vec_type& y_old, const vec_type& y_new, - const res_type& rhs, const res_type& update, - const mat_type& temp, const mat_type& temp2) { +template +KOKKOS_FUNCTION void BDFStep(ode_type& ode, scalar_type& t, scalar_type& dt, scalar_type t_end, int& order, + int& num_equal_steps, const int max_newton_iters, const scalar_type atol, + const scalar_type rtol, const scalar_type min_factor, const vec_type& y_old, + const vec_type& y_new, const res_type& rhs, const res_type& update, const mat_type& temp, + const mat_type& temp2) { using newton_params = KokkosODE::Experimental::Newton_params; constexpr int max_order = 5; @@ -310,10 +276,8 @@ KOKKOS_FUNCTION void BDFStep(ode_type& ode, scalar_type& t, scalar_type& dt, // kappa gamma(i) = sum_{k=1}^i(1.0 / k); gamma(0) = 0; // NDF coefficients // gamma_k alpha(i) = (1 - kappa(i)) * gamma(i) error_const(i) = kappa(i) * // gamma(i) + 1 / (i + 1) - const Kokkos::Array alpha{ - {0., 1.185, 1.66666667, 1.98421667, 2.16979167, 2.28333333}}; - const Kokkos::Array error_const{ - {1., 0.315, 0.16666667, 0.09911667, 0.11354167, 0.16666667}}; + const Kokkos::Array alpha{{0., 1.185, 1.66666667, 1.98421667, 2.16979167, 2.28333333}}; + const Kokkos::Array error_const{{1., 0.315, 0.16666667, 0.09911667, 0.11354167, 0.16666667}}; // Extract columns of temp to form temporary // subviews to operate on. @@ -322,12 +286,9 @@ KOKKOS_FUNCTION void BDFStep(ode_type& ode, scalar_type& t, scalar_type& dt, // numCols << std::endl; std::cout << "Extract subview from temp" << // std::endl; int offset = 2; - auto D = Kokkos::subview( - temp, Kokkos::ALL(), - Kokkos::pair(offset, offset + 8)); // y and its derivatives + auto D = Kokkos::subview(temp, Kokkos::ALL(), Kokkos::pair(offset, offset + 8)); // y and its derivatives offset += 8; - auto tempD = Kokkos::subview(temp, Kokkos::ALL(), - Kokkos::pair(offset, offset + 8)); + auto tempD = Kokkos::subview(temp, Kokkos::ALL(), Kokkos::pair(offset, offset + 8)); offset += 8; auto scale = Kokkos::subview(temp, Kokkos::ALL(), offset + 1); ++offset; // Scaling coefficients for error calculation @@ -337,31 +298,26 @@ KOKKOS_FUNCTION void BDFStep(ode_type& ode, scalar_type& t, scalar_type& dt, ++offset; // Higher order terms contribution to rhs auto error = Kokkos::subview(temp, Kokkos::ALL(), offset + 1); ++offset; // Error estimate - auto jac = Kokkos::subview( - temp, Kokkos::ALL(), - Kokkos::pair(offset, offset + ode.neqs)); // Jacobian matrix + auto jac = + Kokkos::subview(temp, Kokkos::ALL(), Kokkos::pair(offset, offset + ode.neqs)); // Jacobian matrix offset += ode.neqs; auto tmp_gesv = Kokkos::subview( - temp, Kokkos::ALL(), - Kokkos::pair( - offset, offset + ode.neqs + 4)); // Buffer space for gesv calculation + temp, Kokkos::ALL(), Kokkos::pair(offset, offset + ode.neqs + 4)); // Buffer space for gesv calculation offset += ode.neqs + 4; - auto coeffs = - Kokkos::subview(temp2, Kokkos::ALL(), Kokkos::pair(0, 6)); - auto gamma = Kokkos::subview(temp2, Kokkos::ALL(), 6); - gamma(0) = 0.0; - gamma(1) = 1.0; - gamma(2) = 1.5; - gamma(3) = 1.83333333; - gamma(4) = 2.08333333; - gamma(5) = 2.28333333; + auto coeffs = Kokkos::subview(temp2, Kokkos::ALL(), Kokkos::pair(0, 6)); + auto gamma = Kokkos::subview(temp2, Kokkos::ALL(), 6); + gamma(0) = 0.0; + gamma(1) = 1.0; + gamma(2) = 1.5; + gamma(3) = 1.83333333; + gamma(4) = 2.08333333; + gamma(5) = 2.28333333; BDF_system_wrapper2 sys(ode, psi, update, t, dt); const newton_params param( max_newton_iters, atol, - Kokkos::max(10 * Kokkos::ArithTraits::eps() / rtol, - Kokkos::min(0.03, Kokkos::sqrt(rtol)))); + Kokkos::max(10 * Kokkos::ArithTraits::eps() / rtol, Kokkos::min(0.03, Kokkos::sqrt(rtol)))); scalar_type max_step = Kokkos::ArithTraits::max(); scalar_type min_step = Kokkos::ArithTraits::min(); @@ -406,12 +362,9 @@ KOKKOS_FUNCTION void BDFStep(ode_type& ode, scalar_type& t, scalar_type& dt, // Compute psi, the sum of the higher order // contribution to the residual - auto subD = - Kokkos::subview(D, Kokkos::ALL(), Kokkos::pair(1, order + 1)); - auto subGamma = - Kokkos::subview(gamma, Kokkos::pair(1, order + 1)); - KokkosBlas::Experimental::serial_gemv('N', 1.0 / alpha[order], subD, - subGamma, 0.0, psi); + auto subD = Kokkos::subview(D, Kokkos::ALL(), Kokkos::pair(1, order + 1)); + auto subGamma = Kokkos::subview(gamma, Kokkos::pair(1, order + 1)); + KokkosBlas::Experimental::serial_gemv('N', 1.0 / alpha[order], subD, subGamma, 0.0, psi); sys.compute_jac = true; sys.c = dt / alpha[order]; @@ -420,23 +373,20 @@ KOKKOS_FUNCTION void BDFStep(ode_type& ode, scalar_type& t, scalar_type& dt, Kokkos::Experimental::local_deep_copy(y_new, y_predict); Kokkos::Experimental::local_deep_copy(update, 0); KokkosODE::Experimental::newton_solver_status newton_status = - KokkosODE::Experimental::Newton::Solve(sys, param, jac, tmp_gesv, y_new, - rhs, update, scale); + KokkosODE::Experimental::Newton::Solve(sys, param, jac, tmp_gesv, y_new, rhs, update, scale); for (int eqIdx = 0; eqIdx < sys.neqs; ++eqIdx) { update(eqIdx) = y_new(eqIdx) - y_predict(eqIdx); } - if (newton_status == - KokkosODE::Experimental::newton_solver_status::MAX_ITER) { + if (newton_status == KokkosODE::Experimental::newton_solver_status::MAX_ITER) { dt = 0.5 * dt; update_D(order, 0.5, coeffs, tempD, D); num_equal_steps = 0; } else { // Estimate the solution error - safety = 0.9 * (2 * max_newton_iters + 1) / - (2 * max_newton_iters + param.iters); + safety = 0.9 * (2 * max_newton_iters + 1) / (2 * max_newton_iters + param.iters); error_norm = 0; for (int eqIdx = 0; eqIdx < sys.neqs; ++eqIdx) { scale(eqIdx) = atol + rtol * Kokkos::abs(y_new(eqIdx)); @@ -447,9 +397,8 @@ KOKKOS_FUNCTION void BDFStep(ode_type& ode, scalar_type& t, scalar_type& dt, // Check error norm and adapt step size or accept step if (error_norm > 1) { - scalar_type factor = Kokkos::max( - min_factor, safety * Kokkos::pow(error_norm, -1.0 / (order + 1))); - dt = factor * dt; + scalar_type factor = Kokkos::max(min_factor, safety * Kokkos::pow(error_norm, -1.0 / (order + 1))); + dt = factor * dt; update_D(order, factor, coeffs, tempD, D); num_equal_steps = 0; } else { @@ -483,8 +432,7 @@ KOKKOS_FUNCTION void BDFStep(ode_type& ode, scalar_type& t, scalar_type& dt, if (1 < order) { for (int eqIdx = 0; eqIdx < sys.neqs; ++eqIdx) { - error_low += Kokkos::pow( - error_const[order - 1] * D(eqIdx, order) / scale(eqIdx), 2); + error_low += Kokkos::pow(error_const[order - 1] * D(eqIdx, order) / scale(eqIdx), 2); } error_low = Kokkos::sqrt(error_low) / Kokkos::sqrt(sys.neqs); } else { @@ -493,8 +441,7 @@ KOKKOS_FUNCTION void BDFStep(ode_type& ode, scalar_type& t, scalar_type& dt, if (order < max_order) { for (int eqIdx = 0; eqIdx < sys.neqs; ++eqIdx) { - error_high += Kokkos::pow( - error_const[order + 1] * D(eqIdx, order + 2) / scale(eqIdx), 2); + error_high += Kokkos::pow(error_const[order + 1] * D(eqIdx, order + 2) / scale(eqIdx), 2); } error_high = Kokkos::sqrt(error_high) / Kokkos::sqrt(sys.neqs); } else { diff --git a/packages/kokkos-kernels/ode/impl/KokkosODE_Newton_impl.hpp b/packages/kokkos-kernels/ode/impl/KokkosODE_Newton_impl.hpp index 348bf0aa226e..1ca545689a73 100644 --- a/packages/kokkos-kernels/ode/impl/KokkosODE_Newton_impl.hpp +++ b/packages/kokkos-kernels/ode/impl/KokkosODE_Newton_impl.hpp @@ -30,19 +30,18 @@ namespace KokkosODE { namespace Impl { -template +template KOKKOS_FUNCTION KokkosODE::Experimental::newton_solver_status NewtonSolve( - system_type& sys, const KokkosODE::Experimental::Newton_params& params, - mat_type& J, mat_type& tmp, ini_vec_type& y0, rhs_vec_type& rhs, - update_type& update, const scale_type& scale) { + system_type& sys, const KokkosODE::Experimental::Newton_params& params, mat_type& J, mat_type& tmp, + ini_vec_type& y0, rhs_vec_type& rhs, update_type& update, const scale_type& scale) { using newton_solver_status = KokkosODE::Experimental::newton_solver_status; using value_type = typename ini_vec_type::non_const_value_type; // Define the type returned by nrm2 to store // the norm of the residual. - using norm_type = typename Kokkos::Details::InnerProductSpaceTraits< - typename ini_vec_type::non_const_value_type>::mag_type; + using norm_type = + typename Kokkos::Details::InnerProductSpaceTraits::mag_type; sys.residual(y0, rhs); const norm_type norm0 = KokkosBlas::serial_nrm2(rhs); norm_type norm = Kokkos::ArithTraits::zero(); @@ -50,9 +49,8 @@ KOKKOS_FUNCTION KokkosODE::Experimental::newton_solver_status NewtonSolve( norm_type norm_new = Kokkos::ArithTraits::zero(); norm_type rate = Kokkos::ArithTraits::zero(); - const norm_type tol = - Kokkos::max(10 * Kokkos::ArithTraits::eps() / params.rel_tol, - Kokkos::min(0.03, Kokkos::sqrt(params.rel_tol))); + const norm_type tol = Kokkos::max(10 * Kokkos::ArithTraits::eps() / params.rel_tol, + Kokkos::min(0.03, Kokkos::sqrt(params.rel_tol))); // LBV - 07/24/2023: for now assume that we take // a full Newton step. Eventually this value can @@ -73,9 +71,7 @@ KOKKOS_FUNCTION KokkosODE::Experimental::newton_solver_status NewtonSolve( sys.jacobian(y0, J); // solve linear problem - int linSolverStat = - KokkosBatched::SerialGesv::invoke( - J, update, rhs, tmp); + int linSolverStat = KokkosBatched::SerialGesv::invoke(J, update, rhs, tmp); KokkosBlas::SerialScale::invoke(-1, update); // update solution // x = x + alpha*update @@ -89,9 +85,7 @@ KOKKOS_FUNCTION KokkosODE::Experimental::newton_solver_status NewtonSolve( norm_new = Kokkos::sqrt(norm_new / sys.neqs); if ((it > 0) && norm_old > Kokkos::ArithTraits::zero()) { rate = norm_new / norm_old; - if ((rate >= 1) || - Kokkos::pow(rate, params.max_iters - it) / (1 - rate) * norm_new > - tol) { + if ((rate >= 1) || Kokkos::pow(rate, params.max_iters - it) / (1 - rate) * norm_new > tol) { return newton_solver_status::NLS_DIVERGENCE; } else if ((norm_new == 0) || ((rate / (1 - rate)) * norm_new < tol)) { return newton_solver_status::NLS_SUCCESS; @@ -99,17 +93,11 @@ KOKKOS_FUNCTION KokkosODE::Experimental::newton_solver_status NewtonSolve( } if (linSolverStat == 1) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "NewtonFunctor: Linear solve gesv returned failure! \n"); -#else Kokkos::printf("NewtonFunctor: Linear solve gesv returned failure! \n"); -#endif return newton_solver_status::LIN_SOLVE_FAIL; } - if ((norm < (params.rel_tol * norm0)) || - (it > 0 ? KokkosBlas::serial_nrm2(update) < params.abs_tol : false)) { + if ((norm < (params.rel_tol * norm0)) || (it > 0 ? KokkosBlas::serial_nrm2(update) < params.abs_tol : false)) { return newton_solver_status::NLS_SUCCESS; } diff --git a/packages/kokkos-kernels/ode/impl/KokkosODE_RungeKuttaTables_impl.hpp b/packages/kokkos-kernels/ode/impl/KokkosODE_RungeKuttaTables_impl.hpp index 85a8ec0b45c2..6a0770d1a7ab 100644 --- a/packages/kokkos-kernels/ode/impl/KokkosODE_RungeKuttaTables_impl.hpp +++ b/packages/kokkos-kernels/ode/impl/KokkosODE_RungeKuttaTables_impl.hpp @@ -83,8 +83,7 @@ struct ButcherTableau<1, 1> // Euler-Heun Method static constexpr int order = 2; static constexpr int nstages = 2; // total dimensions, nstagesxnstages system Kokkos::Array a{ - {0.0, 1.0, - 0.0}}; //(nstages*nstages+nstages)/2 size of lower triangular matrix + {0.0, 1.0, 0.0}}; //(nstages*nstages+nstages)/2 size of lower triangular matrix Kokkos::Array b{{0.5, 0.5}}; Kokkos::Array c{{0.0, 1.0}}; Kokkos::Array e{{-0.5, 0.5}}; @@ -100,12 +99,10 @@ struct ButcherTableau<1, 2> // Known as Fehlberg 1-2 method { static constexpr int order = 2; static constexpr int nstages = 3; - Kokkos::Array a{ - {0.0, 0.5, 0.0, 1.0 / 256.0, 255.0 / 256.0, 0.0}}; + Kokkos::Array a{{0.0, 0.5, 0.0, 1.0 / 256.0, 255.0 / 256.0, 0.0}}; Kokkos::Array b{{1.0 / 512.0, 255.0 / 256.0, 1. / 512}}; Kokkos::Array c{{0.0, 1.0 / 2.0, 1.0}}; - Kokkos::Array e{ - {1.0 / 256.0 - 1.0 / 512.0, 0.0, -1.0 / 512.0}}; + Kokkos::Array e{{1.0 / 256.0 - 1.0 / 512.0, 0.0, -1.0 / 512.0}}; }; // Coefficients obtained from: @@ -119,12 +116,10 @@ struct ButcherTableau<2, 3> // Bogacki-Shampine method static constexpr int order = 3; static constexpr int nstages = 4; Kokkos::Array a{ - {0.0, 0.5, 0.0, 0.0, 3.0 / 4.0, 0.0, 2.0 / 9.0, 1.0 / 3.0, 4.0 / 9.0, - 0.0}}; + {0.0, 0.5, 0.0, 0.0, 3.0 / 4.0, 0.0, 2.0 / 9.0, 1.0 / 3.0, 4.0 / 9.0, 0.0}}; Kokkos::Array b{{2.0 / 9.0, 1.0 / 3.0, 4.0 / 9.0, 0.0}}; Kokkos::Array c{{0.0, 0.5, 0.75, 1.0}}; - Kokkos::Array e{{2.0 / 9.0 - 7.0 / 24.0, 1.0 / 3.0 - 0.25, - 4.0 / 9.0 - 1.0 / 3.0, -1.0 / 8.0}}; + Kokkos::Array e{{2.0 / 9.0 - 7.0 / 24.0, 1.0 / 3.0 - 0.25, 4.0 / 9.0 - 1.0 / 3.0, -1.0 / 8.0}}; }; // Coefficients obtained from: @@ -136,10 +131,8 @@ struct ButcherTableau<3, 3> // RK4 { static constexpr int order = 4; static constexpr int nstages = 4; - Kokkos::Array a{ - {0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 1.0, 0.0}}; - Kokkos::Array b{ - {1.0 / 6.0, 1.0 / 3.0, 1.0 / 3.0, 1.0 / 6.0}}; + Kokkos::Array a{{0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 1.0, 0.0}}; + Kokkos::Array b{{1.0 / 6.0, 1.0 / 3.0, 1.0 / 3.0, 1.0 / 6.0}}; Kokkos::Array c{{0.0, 0.5, 0.5, 1.0}}; Kokkos::Array e{{1.0 / 6.0, 0.0, -1.0 / 3.0, 1.0 / 6.0}}; }; @@ -175,14 +168,10 @@ struct ButcherTableau<4, 5> // Fehlberg Method 1859.0 / 4104.0, -11.0 / 40.0, 0.0}}; - Kokkos::Array b{{16.0 / 135.0, 0.0, 6656.0 / 12825.0, - 28561.0 / 56430.0, -9.0 / 50.0, - 2.0 / 55.0}}; - Kokkos::Array c{ - {0.0, 0.25, 3.0 / 8.0, 12.0 / 13.0, 1.0, 0.5}}; - Kokkos::Array e{ - {16.0 / 135.0 - 25.0 / 216.0, 0.0, 6656.0 / 12825.0 - 1408.0 / 2565.0, - 28561.0 / 56430.0 - 2197.0 / 4104.0, -9.0 / 50.0 + 0.2, 2.0 / 55.0}}; + Kokkos::Array b{{16.0 / 135.0, 0.0, 6656.0 / 12825.0, 28561.0 / 56430.0, -9.0 / 50.0, 2.0 / 55.0}}; + Kokkos::Array c{{0.0, 0.25, 3.0 / 8.0, 12.0 / 13.0, 1.0, 0.5}}; + Kokkos::Array e{{16.0 / 135.0 - 25.0 / 216.0, 0.0, 6656.0 / 12825.0 - 1408.0 / 2565.0, + 28561.0 / 56430.0 - 2197.0 / 4104.0, -9.0 / 50.0 + 0.2, 2.0 / 55.0}}; }; // Coefficients obtained from: @@ -195,35 +184,31 @@ struct ButcherTableau<4, 5, 1> // Cash-Karp { static constexpr int order = 5; static constexpr int nstages = 6; - Kokkos::Array a{ - {0.0, - 0.2, - 0.0, - 3.0 / 40.0, - 9.0 / 40.0, - 0.0, - 0.3, - -0.9, - 1.2, - 0.0, - -11.0 / 54.0, - 2.5, - -70.0 / 27.0, - 35.0 / 27.0, - 0.0, - 1631.0 / 55296.0, - 175.0 / 512.0, - 575.0 / 13824.0, - 44275.0 / 110592.0, - 253.0 / 4096.0, - 0.0}}; - Kokkos::Array b{ - {37.0 / 378.0, 0.0, 250.0 / 621.0, 125.0 / 594.0, 0.0, 512.0 / 1771.0}}; + Kokkos::Array a{{0.0, + 0.2, + 0.0, + 3.0 / 40.0, + 9.0 / 40.0, + 0.0, + 0.3, + -0.9, + 1.2, + 0.0, + -11.0 / 54.0, + 2.5, + -70.0 / 27.0, + 35.0 / 27.0, + 0.0, + 1631.0 / 55296.0, + 175.0 / 512.0, + 575.0 / 13824.0, + 44275.0 / 110592.0, + 253.0 / 4096.0, + 0.0}}; + Kokkos::Array b{{37.0 / 378.0, 0.0, 250.0 / 621.0, 125.0 / 594.0, 0.0, 512.0 / 1771.0}}; Kokkos::Array c{{0.0, 0.2, 0.3, 0.6, 1.0, 7.0 / 8.0}}; - Kokkos::Array e{{37.0 / 378.0 - 2825.0 / 27648.0, 0.0, - 250.0 / 621.0 - 18575.0 / 48384.0, - 125.0 / 594.0 - 13525.0 / 55296.0, - -277.0 / 14336.0, 512.0 / 1771.0 - 0.25}}; + Kokkos::Array e{{37.0 / 378.0 - 2825.0 / 27648.0, 0.0, 250.0 / 621.0 - 18575.0 / 48384.0, + 125.0 / 594.0 - 13525.0 / 55296.0, -277.0 / 14336.0, 512.0 / 1771.0 - 0.25}}; }; // Coefficients obtained from: @@ -264,14 +249,12 @@ struct ButcherTableau<4, 6> // Referred to as DOPRI5 or RKDP -2187.0 / 6784.0, 11.0 / 84.0, 0.0}}; - Kokkos::Array b{{35.0 / 384.0, 0.0, 500.0 / 1113.0, - 125.0 / 192.0, -2187.0 / 6784.0, - 11.0 / 84.0, 0.0}}; + Kokkos::Array b{ + {35.0 / 384.0, 0.0, 500.0 / 1113.0, 125.0 / 192.0, -2187.0 / 6784.0, 11.0 / 84.0, 0.0}}; Kokkos::Array c{{0.0, 0.2, 0.3, 0.8, 8.0 / 9.0, 1.0, 1.0}}; - Kokkos::Array e{ - {35.0 / 384.0 - 5179.0 / 57600.0, 0.0, 500.0 / 1113.0 - 7571.0 / 16695.0, - 125.0 / 192.0 - 393.0 / 640.0, -2187.0 / 6784.0 + 92097.0 / 339200.0, - 11.0 / 84.0 - 187.0 / 2100.0, -1.0 / 40.0}}; + Kokkos::Array e{{35.0 / 384.0 - 5179.0 / 57600.0, 0.0, 500.0 / 1113.0 - 7571.0 / 16695.0, + 125.0 / 192.0 - 393.0 / 640.0, -2187.0 / 6784.0 + 92097.0 / 339200.0, + 11.0 / 84.0 - 187.0 / 2100.0, -1.0 / 40.0}}; }; } // namespace Impl diff --git a/packages/kokkos-kernels/ode/impl/KokkosODE_RungeKutta_impl.hpp b/packages/kokkos-kernels/ode/impl/KokkosODE_RungeKutta_impl.hpp index f5fe39d65ddf..83ab76758f96 100644 --- a/packages/kokkos-kernels/ode/impl/KokkosODE_RungeKutta_impl.hpp +++ b/packages/kokkos-kernels/ode/impl/KokkosODE_RungeKutta_impl.hpp @@ -30,12 +30,9 @@ namespace Impl { // k_i = f(t+c_i*dt, y_old+sum(a_{ij}*k_i)) j in [1, i-1] // we need to compute the k_i and store them as we go // to use them for k_{i+1} computation. -template -KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, - const bool adaptivity, scalar_type t, - scalar_type dt, const vec_type& y_old, - const vec_type& y_new, const vec_type& temp, +template +KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, const bool adaptivity, scalar_type t, + scalar_type dt, const vec_type& y_old, const vec_type& y_new, const vec_type& temp, const mv_type& k_vecs) { const int neqs = ode.neqs; const int nstages = table.nstages; @@ -64,8 +61,7 @@ KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, for (int idx = 0; idx < stageIdx; ++idx) { for (int eqIdx = 0; eqIdx < neqs; ++eqIdx) { - temp(eqIdx) += - table.a[stageIdx * (stageIdx + 1) / 2 + idx] * k_vecs(idx, eqIdx); + temp(eqIdx) += table.a[stageIdx * (stageIdx + 1) / 2 + idx] * k_vecs(idx, eqIdx); } } KokkosBlas::SerialScale::invoke(dt, temp); @@ -88,13 +84,12 @@ KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, } } // RKStep -template -KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve( - const ode_type& ode, const table_type& table, - const KokkosODE::Experimental::ODE_params& params, - const scalar_type t_start, const scalar_type t_end, const vec_type& y0, - const vec_type& y, const vec_type& temp, const mv_type& k_vecs) { +template +KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve(const ode_type& ode, const table_type& table, + const KokkosODE::Experimental::ODE_params& params, + const scalar_type t_start, const scalar_type t_end, + const vec_type& y0, const vec_type& y, const vec_type& temp, + const mv_type& k_vecs) { constexpr scalar_type error_threshold = 1; bool adapt = params.adaptivity; bool dt_was_reduced; @@ -107,8 +102,7 @@ KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve( scalar_type dt = (t_end - t_start) / params.max_steps; // Loop over time steps to integrate ODE - for (int stepIdx = 0; (stepIdx < params.max_steps) && (t_now <= t_end); - ++stepIdx) { + for (int stepIdx = 0; (stepIdx < params.max_steps) && (t_now <= t_end); ++stepIdx) { // Check that the step attempted is not putting // the solution past t_end, otherwise shrink dt if (t_end < t_now + dt) { @@ -138,9 +132,7 @@ KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve( for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) { error = Kokkos::max(error, Kokkos::abs(temp(eqIdx))); tol = Kokkos::max( - tol, params.abs_tol + - params.rel_tol * Kokkos::max(Kokkos::abs(y(eqIdx)), - Kokkos::abs(y0(eqIdx)))); + tol, params.abs_tol + params.rel_tol * Kokkos::max(Kokkos::abs(y(eqIdx)), Kokkos::abs(y0(eqIdx)))); } error = error / tol; @@ -148,12 +140,11 @@ KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve( // is too large and current step // is rejected. if (error > 1) { - dt = dt * Kokkos::max(0.2, 0.8 / Kokkos::pow(error, 1 / table.order)); + dt = dt * Kokkos::max(0.2, 0.8 / Kokkos::pow(error, 1 / table.order)); dt_was_reduced = true; } - if (dt < params.min_step_size) - return Experimental::ode_solver_status::MIN_SIZE; + if (dt < params.min_step_size) return Experimental::ode_solver_status::MIN_SIZE; } } @@ -166,10 +157,7 @@ KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve( if (t_now < t_end) { if (adapt && !dt_was_reduced && error < 0.5) { // Compute new time increment - dt = dt * - Kokkos::min( - 10.0, - Kokkos::max(2.0, 0.9 * Kokkos::pow(error, 1 / table.order))); + dt = dt * Kokkos::min(10.0, Kokkos::max(2.0, 0.9 * Kokkos::pow(error, 1 / table.order))); } } else { return Experimental::ode_solver_status::SUCCESS; diff --git a/packages/kokkos-kernels/ode/src/KokkosODE_BDF.hpp b/packages/kokkos-kernels/ode/src/KokkosODE_BDF.hpp index 71a450a1c680..419316ba4539 100644 --- a/packages/kokkos-kernels/ode/src/KokkosODE_BDF.hpp +++ b/packages/kokkos-kernels/ode/src/KokkosODE_BDF.hpp @@ -29,14 +29,7 @@ namespace KokkosODE { namespace Experimental { -enum BDF_type : int { - BDF1 = 0, - BDF2 = 1, - BDF3 = 2, - BDF4 = 3, - BDF5 = 4, - BDF6 = 5 -}; +enum BDF_type : int { BDF1 = 0, BDF2 = 1, BDF3 = 2, BDF4 = 3, BDF5 = 4, BDF6 = 5 }; template struct BDF_coeff_helper { @@ -91,14 +84,11 @@ template struct BDF { using table_type = typename BDF_coeff_helper::table_type; - template - KOKKOS_FUNCTION static void Solve( - const ode_type& ode, const scalar_type t_start, const scalar_type t_end, - const int num_steps, const vec_type& y0, const vec_type& y, - const vec_type& rhs, const vec_type& update, const vec_type& scale, - const mv_type& y_vecs, const mv_type& kstack, const mat_type& temp, - const mat_type& jac) { + template + KOKKOS_FUNCTION static void Solve(const ode_type& ode, const scalar_type t_start, const scalar_type t_end, + const int num_steps, const vec_type& y0, const vec_type& y, const vec_type& rhs, + const vec_type& update, const vec_type& scale, const mv_type& y_vecs, + const mv_type& kstack, const mat_type& temp, const mat_type& jac) { const table_type table{}; const double dt = (t_end - t_start) / num_steps; @@ -117,8 +107,7 @@ struct BDF { } KokkosODE::Experimental::ODE_params params(table.order - 1); for (int stepIdx = 0; stepIdx < init_steps; ++stepIdx) { - KokkosODE::Experimental::RungeKutta::Solve( - ode, params, t, t + dt, y0, y, update, kstack); + KokkosODE::Experimental::RungeKutta::Solve(ode, params, t, t + dt, y0, y, update, kstack); for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) { y_vecs(eqIdx, stepIdx + 1) = y(eqIdx); @@ -128,8 +117,7 @@ struct BDF { } for (int stepIdx = init_steps; stepIdx < num_steps; ++stepIdx) { - KokkosODE::Impl::BDFStep(ode, table, t, dt, y0, y, rhs, update, scale, - y_vecs, temp, jac); + KokkosODE::Impl::BDFStep(ode, table, t, dt, y0, y, rhs, update, scale, y_vecs, temp, jac); // Update history for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) { @@ -167,12 +155,9 @@ struct BDF { /// \param temp [in]: vectors for temporary storage /// \param temp2 [in]: vectors for temporary storage template -KOKKOS_FUNCTION void BDFSolve(const ode_type& ode, const scalar_type t_start, - const scalar_type t_end, - const scalar_type initial_step, - const scalar_type max_step, const vec_type& y0, - const vec_type& y_new, mat_type& temp, - mat_type& temp2) { +KOKKOS_FUNCTION void BDFSolve(const ode_type& ode, const scalar_type t_start, const scalar_type t_end, + const scalar_type initial_step, const scalar_type max_step, const vec_type& y0, + const vec_type& y_new, mat_type& temp, mat_type& temp2) { using KAT = Kokkos::ArithTraits; // This needs to go away and be pulled out of temp instead... @@ -195,8 +180,7 @@ KOKKOS_FUNCTION void BDFSolve(const ode_type& ode, const scalar_type t_start, // Check if we need to compute the initial // time step size. if (initial_step == KAT::zero()) { - KokkosODE::Impl::initial_step_size(ode, order, t_start, atol, rtol, y0, rhs, - temp, dt); + KokkosODE::Impl::initial_step_size(ode, order, t_start, atol, rtol, y0, rhs, temp, dt); } // Initialize D(:, 0) = y0 and D(:, 1) = dt*rhs @@ -210,8 +194,7 @@ KOKKOS_FUNCTION void BDFSolve(const ode_type& ode, const scalar_type t_start, // Now we loop over the time interval [t_start, t_end] // and solve our ODE. while (t < t_end) { - KokkosODE::Impl::BDFStep(ode, t, dt, t_end, order, num_equal_steps, - max_newton_iters, atol, rtol, min_factor, y0, + KokkosODE::Impl::BDFStep(ode, t, dt, t_end, order, num_equal_steps, max_newton_iters, atol, rtol, min_factor, y0, y_new, rhs, update, temp, temp2); for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) { diff --git a/packages/kokkos-kernels/ode/src/KokkosODE_Newton.hpp b/packages/kokkos-kernels/ode/src/KokkosODE_Newton.hpp index ffccba5cd33e..5686423e9e2f 100644 --- a/packages/kokkos-kernels/ode/src/KokkosODE_Newton.hpp +++ b/packages/kokkos-kernels/ode/src/KokkosODE_Newton.hpp @@ -30,14 +30,13 @@ namespace Experimental { /// \brief Newton solver for non-linear system of equations struct Newton { - template - KOKKOS_FUNCTION static newton_solver_status Solve( - const system_type& sys, const Newton_params& params, const mat_type& J, - const mat_type& tmp, const ini_vec_type& y0, const rhs_vec_type& rhs, - const update_type& update, const scale_type& scale) { - return KokkosODE::Impl::NewtonSolve(sys, params, J, tmp, y0, rhs, update, - scale); + template + KOKKOS_FUNCTION static newton_solver_status Solve(const system_type& sys, const Newton_params& params, + const mat_type& J, const mat_type& tmp, const ini_vec_type& y0, + const rhs_vec_type& rhs, const update_type& update, + const scale_type& scale) { + return KokkosODE::Impl::NewtonSolve(sys, params, J, tmp, y0, rhs, update, scale); } }; diff --git a/packages/kokkos-kernels/ode/src/KokkosODE_RungeKutta.hpp b/packages/kokkos-kernels/ode/src/KokkosODE_RungeKutta.hpp index b4711de81c49..2d298a65689c 100644 --- a/packages/kokkos-kernels/ode/src/KokkosODE_RungeKutta.hpp +++ b/packages/kokkos-kernels/ode/src/KokkosODE_RungeKutta.hpp @@ -31,8 +31,8 @@ namespace Experimental { /// \brief RK_type is an enum tye that conveniently /// describes the Runge-Kutta methods implemented. enum RK_type : int { - RKFE = 0, ///< Forward Euler method (no adaptivity available for this method) - RKEH = 1, ///< Euler-Heun method + RKFE = 0, ///< Forward Euler method (no adaptivity available for this method) + RKEH = 1, ///< Euler-Heun method RKF12 = 2, ///< Fehlberg order 2 method RKBS = 3, ///< Bogacki-Shampine method RK4 = 4, ///< Runge-Kutta classic order 4 method @@ -126,13 +126,11 @@ struct RungeKutta { /// \return ode_solver_status an enum that describes success of failure /// of the integration method once it at terminated. template - KOKKOS_FUNCTION static ode_solver_status Solve( - const ode_type& ode, const KokkosODE::Experimental::ODE_params& params, - const scalar_type t_start, const scalar_type t_end, const vec_type& y0, - const vec_type& y, const vec_type& temp, const mv_type& k_vecs) { + KOKKOS_FUNCTION static ode_solver_status Solve(const ode_type& ode, const KokkosODE::Experimental::ODE_params& params, + const scalar_type t_start, const scalar_type t_end, const vec_type& y0, + const vec_type& y, const vec_type& temp, const mv_type& k_vecs) { table_type table; - return KokkosODE::Impl::RKSolve(ode, table, params, t_start, t_end, y0, y, - temp, k_vecs); + return KokkosODE::Impl::RKSolve(ode, table, params, t_start, t_end, y0, y, temp, k_vecs); } }; diff --git a/packages/kokkos-kernels/ode/src/KokkosODE_Types.hpp b/packages/kokkos-kernels/ode/src/KokkosODE_Types.hpp index 5fb2c44846c1..2145afb71823 100644 --- a/packages/kokkos-kernels/ode/src/KokkosODE_Types.hpp +++ b/packages/kokkos-kernels/ode/src/KokkosODE_Types.hpp @@ -32,17 +32,12 @@ struct ODE_params { // be constant such that dt = (tend - tstart) / num_steps; KOKKOS_FUNCTION ODE_params(const int num_steps_) - : adaptivity(false), - num_steps(num_steps_), - max_steps(num_steps_), - abs_tol(0), - rel_tol(0), - min_step_size(0) {} + : adaptivity(false), num_steps(num_steps_), max_steps(num_steps_), abs_tol(0), rel_tol(0), min_step_size(0) {} /// ODE_parms construtor for adaptive time stepping. KOKKOS_FUNCTION - ODE_params(const int num_steps_, const int max_steps_, const double abs_tol_, - const double rel_tol_, const double min_step_size_) + ODE_params(const int num_steps_, const int max_steps_, const double abs_tol_, const double rel_tol_, + const double min_step_size_) : adaptivity(true), num_steps(num_steps_), max_steps(max_steps_), @@ -68,8 +63,7 @@ struct Newton_params { // double abs_tol_ [in]: absolute tolerance to reach for successful solve // double rel_tol_ [in]: relative tolerance to reach for successful solve KOKKOS_FUNCTION - Newton_params(const int max_iters_, const double abs_tol_, - const double rel_tol_) + Newton_params(const int max_iters_, const double abs_tol_, const double rel_tol_) : max_iters(max_iters_), abs_tol(abs_tol_), rel_tol(rel_tol_) {} }; diff --git a/packages/kokkos-kernels/ode/unit_test/Test_ODE_BDF.hpp b/packages/kokkos-kernels/ode/unit_test/Test_ODE_BDF.hpp index 836030297105..8f8319cb1df8 100644 --- a/packages/kokkos-kernels/ode/unit_test/Test_ODE_BDF.hpp +++ b/packages/kokkos-kernels/ode/unit_test/Test_ODE_BDF.hpp @@ -37,23 +37,19 @@ struct Logistic { Logistic(double r_, double K_) : r(r_), K(K_){}; template - KOKKOS_FUNCTION void evaluate_function(const double /*t*/, - const double /*dt*/, - const vec_type1& y, + KOKKOS_FUNCTION void evaluate_function(const double /*t*/, const double /*dt*/, const vec_type1& y, const vec_type2& f) const { f(0) = r * y(0) * (1.0 - y(0) / K); } template - KOKKOS_FUNCTION void evaluate_jacobian(const double /*t*/, - const double /*dt*/, const vec_type& y, + KOKKOS_FUNCTION void evaluate_jacobian(const double /*t*/, const double /*dt*/, const vec_type& y, const mat_type& jac) const { jac(0, 0) = r - 2 * r * y(0) / K; } template - KOKKOS_FUNCTION void solution(const double t, const vec_type& y0, - const vec_type& y) const { + KOKKOS_FUNCTION void solution(const double t, const vec_type& y0, const vec_type& y) const { y(0) = K / (1 + (K - y0) / y0 * Kokkos::exp(-r * t)); } @@ -78,17 +74,14 @@ struct LotkaVolterra { : alpha(alpha_), beta(beta_), delta(delta_), gamma(gamma_){}; template - KOKKOS_FUNCTION void evaluate_function(const double /*t*/, - const double /*dt*/, - const vec_type1& y, + KOKKOS_FUNCTION void evaluate_function(const double /*t*/, const double /*dt*/, const vec_type1& y, const vec_type2& f) const { f(0) = alpha * y(0) - beta * y(0) * y(1); f(1) = delta * y(0) * y(1) - gamma * y(1); } template - KOKKOS_FUNCTION void evaluate_jacobian(const double /*t*/, - const double /*dt*/, const vec_type& y, + KOKKOS_FUNCTION void evaluate_jacobian(const double /*t*/, const double /*dt*/, const vec_type& y, const mat_type& jac) const { jac(0, 0) = alpha - beta * y(1); jac(0, 1) = -beta * y(0); @@ -112,9 +105,7 @@ struct StiffChemistry { StiffChemistry() {} template - KOKKOS_FUNCTION void evaluate_function(const double /*t*/, - const double /*dt*/, - const vec_type1& y, + KOKKOS_FUNCTION void evaluate_function(const double /*t*/, const double /*dt*/, const vec_type1& y, const vec_type2& f) const { f(0) = -0.04 * y(0) + 1.e4 * y(1) * y(2); f(1) = 0.04 * y(0) - 1.e4 * y(1) * y(2) - 3.e7 * y(1) * y(1); @@ -122,8 +113,7 @@ struct StiffChemistry { } template - KOKKOS_FUNCTION void evaluate_jacobian(const double /*t*/, - const double /*dt*/, const vec_type& y, + KOKKOS_FUNCTION void evaluate_jacobian(const double /*t*/, const double /*dt*/, const vec_type& y, const mat_type& jac) const { jac(0, 0) = -0.04; jac(0, 1) = 1.e4 * y(2); @@ -137,8 +127,8 @@ struct StiffChemistry { } }; -template +template struct BDFSolve_wrapper { ode_type my_ode; scalar_type tstart, tend; @@ -147,12 +137,9 @@ struct BDFSolve_wrapper { mv_type y_vecs, kstack; mat_type temp, jac; - BDFSolve_wrapper(const ode_type& my_ode_, const scalar_type tstart_, - const scalar_type tend_, const int num_steps_, - const vec_type& y_old_, const vec_type& y_new_, - const vec_type& rhs_, const vec_type& update_, - const vec_type& scale_, const mv_type& y_vecs_, - const mv_type& kstack_, const mat_type& temp_, + BDFSolve_wrapper(const ode_type& my_ode_, const scalar_type tstart_, const scalar_type tend_, const int num_steps_, + const vec_type& y_old_, const vec_type& y_new_, const vec_type& rhs_, const vec_type& update_, + const vec_type& scale_, const mv_type& y_vecs_, const mv_type& kstack_, const mat_type& temp_, const mat_type& jac_) : my_ode(my_ode_), tstart(tstart_), @@ -170,9 +157,8 @@ struct BDFSolve_wrapper { KOKKOS_FUNCTION void operator()(const int /*idx*/) const { - KokkosODE::Experimental::BDF::Solve( - my_ode, tstart, tend, num_steps, y_old, y_new, rhs, update, scale, - y_vecs, kstack, temp, jac); + KokkosODE::Experimental::BDF::Solve(my_ode, tstart, tend, num_steps, y_old, y_new, rhs, update, scale, + y_vecs, kstack, temp, jac); } }; @@ -183,11 +169,9 @@ struct BDF_Solve_wrapper { const vec_type y0, y_new; const mat_type temp, temp2; - BDF_Solve_wrapper(const ode_type& my_ode_, const scalar_type& t_start_, - const scalar_type& t_end_, const scalar_type& dt_, - const scalar_type& max_step_, const vec_type& y0_, - const vec_type& y_new_, const mat_type& temp_, - const mat_type& temp2_) + BDF_Solve_wrapper(const ode_type& my_ode_, const scalar_type& t_start_, const scalar_type& t_end_, + const scalar_type& dt_, const scalar_type& max_step_, const vec_type& y0_, const vec_type& y_new_, + const mat_type& temp_, const mat_type& temp2_) : my_ode(my_ode_), t_start(t_start_), t_end(t_end_), @@ -199,8 +183,7 @@ struct BDF_Solve_wrapper { temp2(temp2_) {} KOKKOS_FUNCTION void operator()(const int) const { - KokkosODE::Experimental::BDFSolve(my_ode, t_start, t_end, dt, max_step, y0, - y_new, temp, temp2); + KokkosODE::Experimental::BDFSolve(my_ode, t_start, t_end, dt, max_step, y0, y_new, temp, temp2); } }; @@ -221,8 +204,7 @@ void test_BDF_Logistic() { vec_type y0("initial conditions", mySys.neqs), y_new("solution", mySys.neqs); vec_type rhs("rhs", mySys.neqs), update("update", mySys.neqs); vec_type scale("scaling factors", mySys.neqs); - mat_type jac("jacobian", mySys.neqs, mySys.neqs), - temp("temp storage", mySys.neqs, mySys.neqs + 4); + mat_type jac("jacobian", mySys.neqs, mySys.neqs), temp("temp storage", mySys.neqs, mySys.neqs + 4); mv_type kstack("Startup RK vectors", 6, mySys.neqs); Kokkos::deep_copy(scale, 1); @@ -239,26 +221,21 @@ void test_BDF_Logistic() { Kokkos::deep_copy(y0, 0.5); Kokkos::deep_copy(y_vecs, 0.5); - BDFSolve_wrapper - solve_wrapper(mySys, t_start, t_end, num_steps[idx], y0, y_new, rhs, - update, scale, y_vecs, kstack, temp, jac); + BDFSolve_wrapper + solve_wrapper(mySys, t_start, t_end, num_steps[idx], y0, y_new, rhs, update, scale, y_vecs, kstack, temp, jac); Kokkos::parallel_for(myPolicy, solve_wrapper); Kokkos::fence(); auto y_new_h = Kokkos::create_mirror_view(y_new); Kokkos::deep_copy(y_new_h, y_new); - errors[idx] = Kokkos::abs(y_new_h(0) - 1 / (1 + Kokkos::exp(-t_end))) / - Kokkos::abs(1 / (1 + Kokkos::exp(-t_end))); + errors[idx] = Kokkos::abs(y_new_h(0) - 1 / (1 + Kokkos::exp(-t_end))) / Kokkos::abs(1 / (1 + Kokkos::exp(-t_end))); } - measured_order = - Kokkos::pow(errors[num_tests - 1] / errors[0], 1.0 / (num_tests - 1)); + measured_order = Kokkos::pow(errors[num_tests - 1] / errors[0], 1.0 / (num_tests - 1)); EXPECT_NEAR_KK_REL(measured_order, 2.0, 0.15); #if defined(HAVE_KOKKOSKERNELS_DEBUG) std::cout << "expected ratio: 2, actual ratio: " << measured_order - << ", order error=" << Kokkos::abs(measured_order - 2.0) / 2.0 - << std::endl; + << ", order error=" << Kokkos::abs(measured_order - 2.0) / 2.0 << std::endl; #endif // Test BDF2 @@ -269,26 +246,21 @@ void test_BDF_Logistic() { mv_type y_vecs("history vectors", mySys.neqs, 2); Kokkos::deep_copy(y0, 0.5); - BDFSolve_wrapper - solve_wrapper(mySys, t_start, t_end, num_steps[idx], y0, y_new, rhs, - update, scale, y_vecs, kstack, temp, jac); + BDFSolve_wrapper + solve_wrapper(mySys, t_start, t_end, num_steps[idx], y0, y_new, rhs, update, scale, y_vecs, kstack, temp, jac); Kokkos::parallel_for(myPolicy, solve_wrapper); Kokkos::fence(); auto y_new_h = Kokkos::create_mirror_view(y_new); Kokkos::deep_copy(y_new_h, y_new); - errors[idx] = Kokkos::abs(y_new_h(0) - 1 / (1 + Kokkos::exp(-t_end))) / - Kokkos::abs(1 / (1 + Kokkos::exp(-t_end))); + errors[idx] = Kokkos::abs(y_new_h(0) - 1 / (1 + Kokkos::exp(-t_end))) / Kokkos::abs(1 / (1 + Kokkos::exp(-t_end))); } - measured_order = - Kokkos::pow(errors[num_tests - 1] / errors[0], 1.0 / (num_tests - 1)); + measured_order = Kokkos::pow(errors[num_tests - 1] / errors[0], 1.0 / (num_tests - 1)); EXPECT_NEAR_KK_REL(measured_order, 4.0, 0.15); #if defined(HAVE_KOKKOSKERNELS_DEBUG) std::cout << "expected ratio: 4, actual ratio: " << measured_order - << ", order error=" << Kokkos::abs(measured_order - 4.0) / 4.0 - << std::endl; + << ", order error=" << Kokkos::abs(measured_order - 4.0) / 4.0 << std::endl; #endif // Test BDF3 @@ -299,26 +271,21 @@ void test_BDF_Logistic() { mv_type y_vecs("history vectors", mySys.neqs, 3); Kokkos::deep_copy(y0, 0.5); - BDFSolve_wrapper - solve_wrapper(mySys, t_start, t_end, num_steps[idx], y0, y_new, rhs, - update, scale, y_vecs, kstack, temp, jac); + BDFSolve_wrapper + solve_wrapper(mySys, t_start, t_end, num_steps[idx], y0, y_new, rhs, update, scale, y_vecs, kstack, temp, jac); Kokkos::parallel_for(myPolicy, solve_wrapper); Kokkos::fence(); auto y_new_h = Kokkos::create_mirror_view(y_new); Kokkos::deep_copy(y_new_h, y_new); - errors[idx] = Kokkos::abs(y_new_h(0) - 1 / (1 + Kokkos::exp(-t_end))) / - Kokkos::abs(1 / (1 + Kokkos::exp(-t_end))); + errors[idx] = Kokkos::abs(y_new_h(0) - 1 / (1 + Kokkos::exp(-t_end))) / Kokkos::abs(1 / (1 + Kokkos::exp(-t_end))); } - measured_order = - Kokkos::pow(errors[num_tests - 1] / errors[0], 1.0 / (num_tests - 1)); + measured_order = Kokkos::pow(errors[num_tests - 1] / errors[0], 1.0 / (num_tests - 1)); EXPECT_NEAR_KK_REL(measured_order, 8.0, 0.15); #if defined(HAVE_KOKKOSKERNELS_DEBUG) std::cout << "expected ratio: 8, actual ratio: " << measured_order - << ", order error=" << Kokkos::abs(measured_order - 8.0) / 8.0 - << std::endl; + << ", order error=" << Kokkos::abs(measured_order - 8.0) / 8.0 << std::endl; #endif // Test BDF4 @@ -329,25 +296,20 @@ void test_BDF_Logistic() { mv_type y_vecs("history vectors", mySys.neqs, 4); Kokkos::deep_copy(y0, 0.5); - BDFSolve_wrapper - solve_wrapper(mySys, t_start, t_end, num_steps[idx], y0, y_new, rhs, - update, scale, y_vecs, kstack, temp, jac); + BDFSolve_wrapper + solve_wrapper(mySys, t_start, t_end, num_steps[idx], y0, y_new, rhs, update, scale, y_vecs, kstack, temp, jac); Kokkos::parallel_for(myPolicy, solve_wrapper); Kokkos::fence(); auto y_new_h = Kokkos::create_mirror_view(y_new); Kokkos::deep_copy(y_new_h, y_new); - errors[idx] = Kokkos::abs(y_new_h(0) - 1 / (1 + Kokkos::exp(-t_end))) / - Kokkos::abs(1 / (1 + Kokkos::exp(-t_end))); + errors[idx] = Kokkos::abs(y_new_h(0) - 1 / (1 + Kokkos::exp(-t_end))) / Kokkos::abs(1 / (1 + Kokkos::exp(-t_end))); } - measured_order = - Kokkos::pow(errors[num_tests - 1] / errors[0], 1.0 / (num_tests - 1)); + measured_order = Kokkos::pow(errors[num_tests - 1] / errors[0], 1.0 / (num_tests - 1)); #if defined(HAVE_KOKKOSKERNELS_DEBUG) std::cout << "expected ratio: 16, actual ratio: " << measured_order - << ", order error=" << Kokkos::abs(measured_order - 16.0) / 16.0 - << std::endl; + << ", order error=" << Kokkos::abs(measured_order - 16.0) / 16.0 << std::endl; #endif // Test BDF5 @@ -358,25 +320,20 @@ void test_BDF_Logistic() { mv_type y_vecs("history vectors", mySys.neqs, 5); Kokkos::deep_copy(y0, 0.5); - BDFSolve_wrapper - solve_wrapper(mySys, t_start, t_end, num_steps[idx], y0, y_new, rhs, - update, scale, y_vecs, kstack, temp, jac); + BDFSolve_wrapper + solve_wrapper(mySys, t_start, t_end, num_steps[idx], y0, y_new, rhs, update, scale, y_vecs, kstack, temp, jac); Kokkos::parallel_for(myPolicy, solve_wrapper); Kokkos::fence(); auto y_new_h = Kokkos::create_mirror_view(y_new); Kokkos::deep_copy(y_new_h, y_new); - errors[idx] = Kokkos::abs(y_new_h(0) - 1 / (1 + Kokkos::exp(-t_end))) / - Kokkos::abs(1 / (1 + Kokkos::exp(-t_end))); + errors[idx] = Kokkos::abs(y_new_h(0) - 1 / (1 + Kokkos::exp(-t_end))) / Kokkos::abs(1 / (1 + Kokkos::exp(-t_end))); } - measured_order = - Kokkos::pow(errors[num_tests - 1] / errors[0], 1.0 / (num_tests - 1)); + measured_order = Kokkos::pow(errors[num_tests - 1] / errors[0], 1.0 / (num_tests - 1)); #if defined(HAVE_KOKKOSKERNELS_DEBUG) std::cout << "expected ratio: 32, actual ratio: " << measured_order - << ", order error=" << Kokkos::abs(measured_order - 32.0) / 32.0 - << std::endl; + << ", order error=" << Kokkos::abs(measured_order - 32.0) / 32.0 << std::endl; #endif } // test_BDF_Logistic @@ -394,8 +351,7 @@ void test_BDF_LotkaVolterra() { vec_type y0("initial conditions", mySys.neqs), y_new("solution", mySys.neqs); vec_type rhs("rhs", mySys.neqs), update("update", mySys.neqs); vec_type scale("scaling factors", mySys.neqs); - mat_type jac("jacobian", mySys.neqs, mySys.neqs), - temp("temp storage", mySys.neqs, mySys.neqs + 4); + mat_type jac("jacobian", mySys.neqs, mySys.neqs), temp("temp storage", mySys.neqs, mySys.neqs + 4); Kokkos::deep_copy(scale, 1); @@ -407,10 +363,8 @@ void test_BDF_LotkaVolterra() { Kokkos::deep_copy(y_vecs, 10.0); Kokkos::RangePolicy myPolicy(0, 1); - BDFSolve_wrapper - solve_wrapper(mySys, t_start, t_end, 1000, y0, y_new, rhs, update, scale, - y_vecs, kstack, temp, jac); + BDFSolve_wrapper + solve_wrapper(mySys, t_start, t_end, 1000, y0, y_new, rhs, update, scale, y_vecs, kstack, temp, jac); Kokkos::parallel_for(myPolicy, solve_wrapper); } @@ -427,8 +381,7 @@ void test_BDF_StiffChemistry() { vec_type y0("initial conditions", mySys.neqs), y_new("solution", mySys.neqs); vec_type rhs("rhs", mySys.neqs), update("update", mySys.neqs); vec_type scale("scaling factors", mySys.neqs); - mat_type jac("jacobian", mySys.neqs, mySys.neqs), - temp("temp storage", mySys.neqs, mySys.neqs + 4); + mat_type jac("jacobian", mySys.neqs, mySys.neqs), temp("temp storage", mySys.neqs, mySys.neqs + 4); Kokkos::deep_copy(scale, 1); @@ -444,10 +397,8 @@ void test_BDF_StiffChemistry() { Kokkos::deep_copy(y_vecs, 0.0); Kokkos::RangePolicy myPolicy(0, 1); - BDFSolve_wrapper - solve_wrapper(mySys, t_start, t_end, 110000, y0, y_new, rhs, update, - scale, y_vecs, kstack, temp, jac); + BDFSolve_wrapper + solve_wrapper(mySys, t_start, t_end, 110000, y0, y_new, rhs, update, scale, y_vecs, kstack, temp, jac); Kokkos::parallel_for(myPolicy, solve_wrapper); } @@ -559,8 +510,7 @@ void test_BDF_StiffChemistry() { // } template -void compute_coeffs(const int order, const scalar_type factor, - const mat_type& coeffs) { +void compute_coeffs(const int order, const scalar_type factor, const mat_type& coeffs) { std::cout << "compute_coeffs" << std::endl; coeffs(0, 0) = 1.0; @@ -568,35 +518,28 @@ void compute_coeffs(const int order, const scalar_type factor, coeffs(0, colIdx + 1) = 1.0; for (int rowIdx = 0; rowIdx < order; ++rowIdx) { coeffs(rowIdx + 1, colIdx + 1) = - ((rowIdx - factor * (colIdx + 1.0)) / (rowIdx + 1.0)) * - coeffs(rowIdx, colIdx + 1); + ((rowIdx - factor * (colIdx + 1.0)) / (rowIdx + 1.0)) * coeffs(rowIdx, colIdx + 1); } } } template -void update_D(const int order, const scalar_type factor, const mat_type& coeffs, - const mat_type& tempD, const mat_type& D) { - auto subD = - Kokkos::subview(D, Kokkos::pair(0, order + 1), Kokkos::ALL); - auto subTempD = - Kokkos::subview(tempD, Kokkos::pair(0, order + 1), Kokkos::ALL); +void update_D(const int order, const scalar_type factor, const mat_type& coeffs, const mat_type& tempD, + const mat_type& D) { + auto subD = Kokkos::subview(D, Kokkos::pair(0, order + 1), Kokkos::ALL); + auto subTempD = Kokkos::subview(tempD, Kokkos::pair(0, order + 1), Kokkos::ALL); compute_coeffs(order, factor, coeffs); - auto R = Kokkos::subview(coeffs, Kokkos::pair(0, order + 1), - Kokkos::pair(0, order + 1)); + auto R = Kokkos::subview(coeffs, Kokkos::pair(0, order + 1), Kokkos::pair(0, order + 1)); std::cout << "SerialGemm" << std::endl; - KokkosBatched::SerialGemm< - KokkosBatched::Trans::Transpose, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::Gemm::Blocked>::invoke(1.0, R, subD, 0.0, subTempD); + KokkosBatched::SerialGemm::invoke(1.0, R, subD, 0.0, subTempD); compute_coeffs(order, 1.0, coeffs); - auto U = Kokkos::subview(coeffs, Kokkos::pair(0, order + 1), - Kokkos::pair(0, order + 1)); + auto U = Kokkos::subview(coeffs, Kokkos::pair(0, order + 1), Kokkos::pair(0, order + 1)); std::cout << "SerialGemm" << std::endl; - KokkosBatched::SerialGemm< - KokkosBatched::Trans::Transpose, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::Gemm::Blocked>::invoke(1.0, U, subTempD, 0.0, subD); + KokkosBatched::SerialGemm::invoke(1.0, U, subTempD, 0.0, subD); } template @@ -604,10 +547,8 @@ void test_Nordsieck() { using execution_space = Kokkos::HostSpace; StiffChemistry mySys{}; - Kokkos::View R("coeffs", 6, 6), - U("coeffs", 6, 6); - Kokkos::View D("D", 8, mySys.neqs), - tempD("tmp", 8, mySys.neqs); + Kokkos::View R("coeffs", 6, 6), U("coeffs", 6, 6); + Kokkos::View D("D", 8, mySys.neqs), tempD("tmp", 8, mySys.neqs); int order = 1; double factor = 0.8; @@ -639,17 +580,13 @@ void test_Nordsieck() { } std::cout << "D before update:" << std::endl; - std::cout << " { " << D(0, 0) << ", " << D(0, 1) << ", " << D(0, 2) << " }" - << std::endl; - std::cout << " { " << D(1, 0) << ", " << D(1, 1) << ", " << D(1, 2) << " }" - << std::endl; + std::cout << " { " << D(0, 0) << ", " << D(0, 1) << ", " << D(0, 2) << " }" << std::endl; + std::cout << " { " << D(1, 0) << ", " << D(1, 1) << ", " << D(1, 2) << " }" << std::endl; update_D(order, factor, R, tempD, D); std::cout << "D after update:" << std::endl; - std::cout << " { " << D(0, 0) << ", " << D(0, 1) << ", " << D(0, 2) << " }" - << std::endl; - std::cout << " { " << D(1, 0) << ", " << D(1, 1) << ", " << D(1, 2) << " }" - << std::endl; + std::cout << " { " << D(0, 0) << ", " << D(0, 1) << ", " << D(0, 2) << " }" << std::endl; + std::cout << " { " << D(1, 0) << ", " << D(1, 1) << ", " << D(1, 2) << " }" << std::endl; } template @@ -668,8 +605,7 @@ void test_adaptive_BDF() { vec_type y0("initial conditions", mySys.neqs), y_new("solution", mySys.neqs); vec_type rhs("rhs", mySys.neqs), update("update", mySys.neqs); - mat_type temp("buffer1", mySys.neqs, 23 + 2 * mySys.neqs + 4), - temp2("buffer2", 6, 7); + mat_type temp("buffer1", mySys.neqs, 23 + 2 * mySys.neqs + 4), temp2("buffer2", 6, 7); // Initial condition Kokkos::deep_copy(y0, 0.5); @@ -688,13 +624,11 @@ void test_adaptive_BDF() { std::cout << "Initial conditions" << std::endl; std::cout << " y0=" << y0(0) << ", t=" << t << ", dt=" << dt << std::endl; - std::cout << "Initial D: {" << D(0, 0) << ", " << D(0, 1) << ", " << D(0, 2) - << ", " << D(0, 3) << ", " << D(0, 4) << ", " << D(0, 5) << ", " - << D(0, 6) << ", " << D(0, 7) << "}" << std::endl; + std::cout << "Initial D: {" << D(0, 0) << ", " << D(0, 1) << ", " << D(0, 2) << ", " << D(0, 3) << ", " << D(0, 4) + << ", " << D(0, 5) << ", " << D(0, 6) << ", " << D(0, 7) << "}" << std::endl; - KokkosODE::Impl::BDFStep(mySys, t, dt, t_end, order, num_equal_steps, - max_newton_iters, atol, rtol, 0.2, y0, y_new, rhs, - update, temp, temp2); + KokkosODE::Impl::BDFStep(mySys, t, dt, t_end, order, num_equal_steps, max_newton_iters, atol, rtol, 0.2, y0, y_new, + rhs, update, temp, temp2); for (int eqIdx = 0; eqIdx < mySys.neqs; ++eqIdx) { y0(eqIdx) = y_new(eqIdx); @@ -706,13 +640,11 @@ void test_adaptive_BDF() { std::cout << " y0=" << y0(0) << ", t=" << t << ", dt: " << dt << std::endl; - std::cout << "Initial D: {" << D(0, 0) << ", " << D(0, 1) << ", " << D(0, 2) - << ", " << D(0, 3) << ", " << D(0, 4) << ", " << D(0, 5) << ", " - << D(0, 6) << ", " << D(0, 7) << "}" << std::endl; + std::cout << "Initial D: {" << D(0, 0) << ", " << D(0, 1) << ", " << D(0, 2) << ", " << D(0, 3) << ", " << D(0, 4) + << ", " << D(0, 5) << ", " << D(0, 6) << ", " << D(0, 7) << "}" << std::endl; - KokkosODE::Impl::BDFStep(mySys, t, dt, t_end, order, num_equal_steps, - max_newton_iters, atol, rtol, 0.2, y0, y_new, rhs, - update, temp, temp2); + KokkosODE::Impl::BDFStep(mySys, t, dt, t_end, order, num_equal_steps, max_newton_iters, atol, rtol, 0.2, y0, y_new, + rhs, update, temp, temp2); for (int eqIdx = 0; eqIdx < mySys.neqs; ++eqIdx) { y0(eqIdx) = y_new(eqIdx); @@ -724,13 +656,11 @@ void test_adaptive_BDF() { std::cout << " y0=" << y0(0) << ", t=" << t << ", dt: " << dt << std::endl; - std::cout << "Initial D: {" << D(0, 0) << ", " << D(0, 1) << ", " << D(0, 2) - << ", " << D(0, 3) << ", " << D(0, 4) << ", " << D(0, 5) << ", " - << D(0, 6) << ", " << D(0, 7) << "}" << std::endl; + std::cout << "Initial D: {" << D(0, 0) << ", " << D(0, 1) << ", " << D(0, 2) << ", " << D(0, 3) << ", " << D(0, 4) + << ", " << D(0, 5) << ", " << D(0, 6) << ", " << D(0, 7) << "}" << std::endl; - KokkosODE::Impl::BDFStep(mySys, t, dt, t_end, order, num_equal_steps, - max_newton_iters, atol, rtol, 0.2, y0, y_new, rhs, - update, temp, temp2); + KokkosODE::Impl::BDFStep(mySys, t, dt, t_end, order, num_equal_steps, max_newton_iters, atol, rtol, 0.2, y0, y_new, + rhs, update, temp, temp2); std::cout << "Final t: " << t << ", y=" << y_new(0) << std::endl; @@ -751,22 +681,18 @@ void test_adaptive_BDF_v2() { vec_type y0("initial conditions", mySys.neqs), y_new("solution", mySys.neqs); Kokkos::deep_copy(y0, 0.5); - mat_type temp("buffer1", mySys.neqs, 23 + 2 * mySys.neqs + 4), - temp2("buffer2", 6, 7); + mat_type temp("buffer1", mySys.neqs, 23 + 2 * mySys.neqs + 4), temp2("buffer2", 6, 7); { scalar_type dt = KAT::zero(); vec_type f0("initial value f", mySys.neqs); mySys.evaluate_function(t_start, KAT::zero(), y0, f0); - KokkosODE::Impl::initial_step_size(mySys, 1, t_start, 1e-6, 1e-3, y0, f0, - temp, dt); + KokkosODE::Impl::initial_step_size(mySys, 1, t_start, 1e-6, 1e-3, y0, f0, temp, dt); std::cout << "Initial Step Size: dt=" << dt << std::endl; } - KokkosODE::Experimental::BDFSolve(mySys, t_start, t_end, 0.0117188, - (t_end - t_start) / 10, y0, y_new, temp, - temp2); + KokkosODE::Experimental::BDFSolve(mySys, t_start, t_end, 0.0117188, (t_end - t_start) / 10, y0, y_new, temp, temp2); } template @@ -789,42 +715,30 @@ void test_BDF_adaptive_stiff() { y0_h(2) = KAT::zero(); Kokkos::deep_copy(y0, y0_h); - mat_type temp("buffer1", mySys.neqs, 23 + 2 * mySys.neqs + 4), - temp2("buffer2", 6, 7); + mat_type temp("buffer1", mySys.neqs, 23 + 2 * mySys.neqs + 4), temp2("buffer2", 6, 7); Kokkos::RangePolicy policy(0, 1); - BDF_Solve_wrapper bdf_wrapper(mySys, t_start, t_end, dt, - (t_end - t_start) / 10, y0, y_new, temp, temp2); + BDF_Solve_wrapper bdf_wrapper(mySys, t_start, t_end, dt, (t_end - t_start) / 10, y0, y_new, temp, temp2); Kokkos::parallel_for(policy, bdf_wrapper); auto y_new_h = Kokkos::create_mirror_view(y_new); Kokkos::deep_copy(y_new_h, y_new); - std::cout << "Stiff Chemistry solution at t=500: {" << y_new_h(0) << ", " - << y_new_h(1) << ", " << y_new_h(2) << "}" << std::endl; + std::cout << "Stiff Chemistry solution at t=500: {" << y_new_h(0) << ", " << y_new_h(1) << ", " << y_new_h(2) << "}" + << std::endl; } } // namespace Test -TEST_F(TestCategory, BDF_Logistic_serial) { - ::Test::test_BDF_Logistic(); -} -TEST_F(TestCategory, BDF_LotkaVolterra_serial) { - ::Test::test_BDF_LotkaVolterra(); -} -TEST_F(TestCategory, BDF_StiffChemistry_serial) { - ::Test::test_BDF_StiffChemistry(); -} +TEST_F(TestCategory, BDF_Logistic_serial) { ::Test::test_BDF_Logistic(); } +TEST_F(TestCategory, BDF_LotkaVolterra_serial) { ::Test::test_BDF_LotkaVolterra(); } +TEST_F(TestCategory, BDF_StiffChemistry_serial) { ::Test::test_BDF_StiffChemistry(); } // TEST_F(TestCategory, BDF_parallel_serial) { // ::Test::test_BDF_parallel(); // } -TEST_F(TestCategory, BDF_Nordsieck) { - ::Test::test_Nordsieck(); -} +TEST_F(TestCategory, BDF_Nordsieck) { ::Test::test_Nordsieck(); } // TEST_F(TestCategory, BDF_adaptive) { // ::Test::test_adaptive_BDF(); // ::Test::test_adaptive_BDF_v2(); // } -TEST_F(TestCategory, BDF_StiffChemistry_adaptive) { - ::Test::test_BDF_adaptive_stiff(); -} +TEST_F(TestCategory, BDF_StiffChemistry_adaptive) { ::Test::test_BDF_adaptive_stiff(); } diff --git a/packages/kokkos-kernels/ode/unit_test/Test_ODE_Newton.hpp b/packages/kokkos-kernels/ode/unit_test/Test_ODE_Newton.hpp index 45dd4adb6adf..c37142ee8f62 100644 --- a/packages/kokkos-kernels/ode/unit_test/Test_ODE_Newton.hpp +++ b/packages/kokkos-kernels/ode/unit_test/Test_ODE_Newton.hpp @@ -21,8 +21,7 @@ namespace Test { -template +template struct NewtonSolve_wrapper { using newton_params = KokkosODE::Experimental::Newton_params; @@ -35,11 +34,9 @@ struct NewtonSolve_wrapper { scale_type scale; - NewtonSolve_wrapper(const system_type& my_nls_, const newton_params& params_, - const vec_type& x_, const vec_type& rhs_, - const vec_type& update_, const mat_type& J_, - const mat_type& tmp_, const status_view& status_, - const scale_type& scale_) + NewtonSolve_wrapper(const system_type& my_nls_, const newton_params& params_, const vec_type& x_, + const vec_type& rhs_, const vec_type& update_, const mat_type& J_, const mat_type& tmp_, + const status_view& status_, const scale_type& scale_) : my_nls(my_nls_), params(params_), x(x_), @@ -54,38 +51,27 @@ struct NewtonSolve_wrapper { void operator()(const int idx) const { // Take subviews to create the local problem auto local_x = Kokkos::subview( - x, Kokkos::pair(static_cast(my_nls.neqs * idx), - static_cast(my_nls.neqs * (idx + 1)))); + x, Kokkos::pair(static_cast(my_nls.neqs * idx), static_cast(my_nls.neqs * (idx + 1)))); auto local_rhs = Kokkos::subview( - rhs, Kokkos::pair(static_cast(my_nls.neqs * idx), - static_cast(my_nls.neqs * (idx + 1)))); + rhs, Kokkos::pair(static_cast(my_nls.neqs * idx), static_cast(my_nls.neqs * (idx + 1)))); auto local_update = Kokkos::subview( - update, - Kokkos::pair(static_cast(my_nls.neqs * idx), - static_cast(my_nls.neqs * (idx + 1)))); + update, Kokkos::pair(static_cast(my_nls.neqs * idx), static_cast(my_nls.neqs * (idx + 1)))); auto local_J = Kokkos::subview( - J, - Kokkos::pair(static_cast(my_nls.neqs * idx), - static_cast(my_nls.neqs * (idx + 1))), + J, Kokkos::pair(static_cast(my_nls.neqs * idx), static_cast(my_nls.neqs * (idx + 1))), Kokkos::ALL()); auto local_tmp = Kokkos::subview( - tmp, - Kokkos::pair(static_cast(my_nls.neqs * idx), - static_cast(my_nls.neqs * (idx + 1))), + tmp, Kokkos::pair(static_cast(my_nls.neqs * idx), static_cast(my_nls.neqs * (idx + 1))), Kokkos::ALL()); // Run Newton nonlinear solver - status(idx) = KokkosODE::Experimental::Newton::Solve( - my_nls, params, local_J, local_tmp, local_x, local_rhs, local_update, - scale); + status(idx) = KokkosODE::Experimental::Newton::Solve(my_nls, params, local_J, local_tmp, local_x, local_rhs, + local_update, scale); } }; template -void run_newton_test(const system_type& mySys, - KokkosODE::Experimental::Newton_params& params, - const scalar_type* const initial_val, - const scalar_type* const solution) { +void run_newton_test(const system_type& mySys, KokkosODE::Experimental::Newton_params& params, + const scalar_type* const initial_val, const scalar_type* const solution) { using execution_space = typename Device::execution_space; using newton_solver_status = KokkosODE::Experimental::newton_solver_status; using vec_type = typename Kokkos::View; @@ -96,14 +82,12 @@ void run_newton_test(const system_type& mySys, vec_type scale("scaling factors", mySys.neqs); Kokkos::deep_copy(scale, 1); - vec_type x("solution vector", mySys.neqs), - rhs("right hand side vector", mySys.neqs); + vec_type x("solution vector", mySys.neqs), rhs("right hand side vector", mySys.neqs); auto x_h = Kokkos::create_mirror_view(x); auto r_h = Kokkos::create_mirror_view(rhs); vec_type update("update", mySys.neqs); - mat_type J("jacobian", mySys.neqs, mySys.neqs), - tmp("temp mem", mySys.neqs, mySys.neqs + 4); + mat_type J("jacobian", mySys.neqs, mySys.neqs), tmp("temp mem", mySys.neqs, mySys.neqs + 4); // Initial values for (int eqIdx = 0; eqIdx < mySys.neqs; ++eqIdx) { @@ -112,8 +96,7 @@ void run_newton_test(const system_type& mySys, Kokkos::deep_copy(x, x_h); Kokkos::RangePolicy my_policy(0, 1); - NewtonSolve_wrapper solve_wrapper(mySys, params, x, rhs, update, J, tmp, - status, scale); + NewtonSolve_wrapper solve_wrapper(mySys, params, x, rhs, update, J, tmp, status, scale); Kokkos::parallel_for(my_policy, solve_wrapper); @@ -131,9 +114,7 @@ void run_newton_test(const system_type& mySys, } std::cout << " ), " << KokkosBlas::serial_nrm2(rhs) << ", ("; for (int eqIdx = 0; eqIdx < mySys.neqs; ++eqIdx) { - std::cout << " " - << Kokkos::abs(x_h(eqIdx) - solution[eqIdx]) / - Kokkos::abs(solution[eqIdx]); + std::cout << " " << Kokkos::abs(x_h(eqIdx) - solution[eqIdx]) / Kokkos::abs(solution[eqIdx]); } std::cout << " )]" << std::endl; #else @@ -154,13 +135,9 @@ struct QuadraticEquation { QuadraticEquation() {} - KOKKOS_FUNCTION void residual(const vec_type& y, const vec_type& f) const { - f(0) = y(0) * y(0) - y(0) - 2; - } + KOKKOS_FUNCTION void residual(const vec_type& y, const vec_type& f) const { f(0) = y(0) * y(0) - y(0) - 2; } - KOKKOS_FUNCTION void jacobian(const vec_type& y, const mat_type& jac) const { - jac(0, 0) = 2 * y(0) - 1; - } + KOKKOS_FUNCTION void jacobian(const vec_type& y, const mat_type& jac) const { jac(0, 0) = 2 * y(0) - 1; } }; // Trigonometric equation @@ -176,13 +153,9 @@ struct TrigonometricEquation { TrigonometricEquation() {} - KOKKOS_FUNCTION void residual(const vec_type& y, const vec_type& f) const { - f(0) = Kokkos::cos(y(0)) - y(0); - } + KOKKOS_FUNCTION void residual(const vec_type& y, const vec_type& f) const { f(0) = Kokkos::cos(y(0)) - y(0); } - KOKKOS_FUNCTION void jacobian(const vec_type& y, const mat_type& jac) const { - jac(0, 0) = -Kokkos::sin(y(0)) - 1; - } + KOKKOS_FUNCTION void jacobian(const vec_type& y, const mat_type& jac) const { jac(0, 0) = -Kokkos::sin(y(0)) - 1; } }; // Logarithmic equation @@ -202,9 +175,7 @@ struct LogarithmicEquation { f(0) = 7 * y(0) - Kokkos::log(7 * y(0)) - 1; } - KOKKOS_FUNCTION void jacobian(const vec_type& y, const mat_type& jac) const { - jac(0, 0) = 7 - 1 / y(0); - } + KOKKOS_FUNCTION void jacobian(const vec_type& y, const mat_type& jac) const { jac(0, 0) = 7 - 1 / y(0); } }; template @@ -238,9 +209,8 @@ void test_newton_status() { #ifdef HAVE_KOKKOSKERNELS_DEBUG scalar_type solution[3] = {2.0, -1.0, 0.0}; #endif - newton_solver_status newton_status[3] = { - newton_solver_status::NLS_SUCCESS, newton_solver_status::NLS_DIVERGENCE, - newton_solver_status::LIN_SOLVE_FAIL}; + newton_solver_status newton_status[3] = {newton_solver_status::NLS_SUCCESS, newton_solver_status::NLS_DIVERGENCE, + newton_solver_status::LIN_SOLVE_FAIL}; vec_type x("solution vector", 1), rhs("right hand side vector", 1); auto x_h = Kokkos::create_mirror_view(x); auto r_h = Kokkos::create_mirror_view(rhs); @@ -253,8 +223,7 @@ void test_newton_status() { Kokkos::deep_copy(x, initial_value[idx]); Kokkos::RangePolicy my_policy(0, 1); - NewtonSolve_wrapper solve_wrapper(my_system, params, x, rhs, update, J, tmp, - status, scale); + NewtonSolve_wrapper solve_wrapper(my_system, params, x, rhs, update, J, tmp, status, scale); Kokkos::parallel_for(my_policy, solve_wrapper); Kokkos::deep_copy(status_h, status); @@ -263,10 +232,8 @@ void test_newton_status() { #ifdef HAVE_KOKKOSKERNELS_DEBUG Kokkos::deep_copy(x_h, x); Kokkos::deep_copy(r_h, rhs); - printf("Non-linear problem solution and residual with initial value %f:\n", - initial_value[idx]); - printf(" [%f, %g, %g]\n", x_h(0), r_h(0), - Kokkos::abs(x_h(0) - solution[idx]) / Kokkos::abs(solution[idx])); + printf("Non-linear problem solution and residual with initial value %f:\n", initial_value[idx]); + printf(" [%f, %g, %g]\n", x_h(0), r_h(0), Kokkos::abs(x_h(0) - solution[idx]) / Kokkos::abs(solution[idx])); #endif } } @@ -296,8 +263,7 @@ void test_simple_problems() { system_type mySys{}; scalar_type initial_value[2] = {1.0, -0.5}, solution[2] = {2.0, -1.0}; for (int idx = 0; idx < 2; ++idx) { - run_newton_test( - mySys, params, &(initial_value[idx]), &(solution[idx])); + run_newton_test(mySys, params, &(initial_value[idx]), &(solution[idx])); } #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Finished Quadratic Equation problem" << std::endl; @@ -312,8 +278,7 @@ void test_simple_problems() { using system_type = TrigonometricEquation; system_type mySys{}; scalar_type initial_value[1] = {0.1}, solution[1] = {0.739085}; - run_newton_test(mySys, params, - initial_value, solution); + run_newton_test(mySys, params, initial_value, solution); #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Finished Trigonometric Equation problem" << std::endl; #endif @@ -327,10 +292,8 @@ void test_simple_problems() { using system_type = LogarithmicEquation; system_type mySys{}; scalar_type initial_value[1] = {static_cast(0.5)}, - solution[1] = {static_cast(1.0) / - static_cast(7.0)}; - run_newton_test(mySys, params, - initial_value, solution); + solution[1] = {static_cast(1.0) / static_cast(7.0)}; + run_newton_test(mySys, params, initial_value, solution); #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Finished Logarithmic Equation problem" << std::endl; #endif @@ -431,8 +394,7 @@ void test_simple_systems() { system_type mySys{}; scalar_type initial_values[2] = {1.5, 1.5}; scalar_type solution[2] = {10.75 / 6, 0.8887803753}; - run_newton_test(mySys, params, - initial_values, solution); + run_newton_test(mySys, params, initial_values, solution); #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Finished Circles Intersetcion problem" << std::endl; #endif @@ -441,8 +403,7 @@ void test_simple_systems() { { // Second problem: circle / hyperbola intersection #ifdef HAVE_KOKKOSKERNELS_DEBUG - std::cout << "\nStarting Circle/Hyperbola Intersetcion problem" - << std::endl; + std::cout << "\nStarting Circle/Hyperbola Intersetcion problem" << std::endl; #endif using system_type = CircleHyperbolaIntersection; system_type mySys{}; @@ -450,12 +411,9 @@ void test_simple_systems() { scalar_type init_vals[2] = {0.0, 1.0}; scalar_type solutions[2] = { Kokkos::ArithTraits::one() / - Kokkos::sqrt(static_cast( - 4 + Kokkos::sqrt(static_cast(12.0)) / 2)), - Kokkos::sqrt(static_cast( - (4 + Kokkos::sqrt(static_cast(12.0))) / 2))}; - run_newton_test(mySys, params, init_vals, - solutions); + Kokkos::sqrt(static_cast(4 + Kokkos::sqrt(static_cast(12.0)) / 2)), + Kokkos::sqrt(static_cast((4 + Kokkos::sqrt(static_cast(12.0))) / 2))}; + run_newton_test(mySys, params, init_vals, solutions); #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "Finished Circle/Hyperbola Intersetcion problem" << std::endl; #endif @@ -502,8 +460,7 @@ void test_newton_on_device() { mat_type J("jacobian", mySys.neqs * num_systems, mySys.neqs); mat_type tmp("temp mem", mySys.neqs * num_systems, mySys.neqs + 4); - Kokkos::View status("solver status", - num_systems); + Kokkos::View status("solver status", num_systems); auto x_h = Kokkos::create_mirror_view(x); auto r_h = Kokkos::create_mirror_view(rhs); @@ -517,8 +474,7 @@ void test_newton_on_device() { Kokkos::deep_copy(x, x_h); Kokkos::RangePolicy my_policy(0, num_systems); - NewtonSolve_wrapper solve_wrapper(mySys, params, x, rhs, update, J, tmp, - status, scale); + NewtonSolve_wrapper solve_wrapper(mySys, params, x, rhs, update, J, tmp, status, scale); Kokkos::parallel_for(my_policy, solve_wrapper); Kokkos::fence(); @@ -536,30 +492,14 @@ void test_newton_on_device() { // No ETI is performed for these device routines // Just pick scalar types at will... -TEST_F(TestCategory, Newton_status_float) { - ::Test::test_newton_status(); -} -TEST_F(TestCategory, Newton_status_double) { - ::Test::test_newton_status(); -} +TEST_F(TestCategory, Newton_status_float) { ::Test::test_newton_status(); } +TEST_F(TestCategory, Newton_status_double) { ::Test::test_newton_status(); } -TEST_F(TestCategory, Newton_simple_float) { - ::Test::test_simple_problems(); -} -TEST_F(TestCategory, Newton_simple_double) { - ::Test::test_simple_problems(); -} +TEST_F(TestCategory, Newton_simple_float) { ::Test::test_simple_problems(); } +TEST_F(TestCategory, Newton_simple_double) { ::Test::test_simple_problems(); } -TEST_F(TestCategory, Newton_system_float) { - ::Test::test_simple_systems(); -} -TEST_F(TestCategory, Newton_system_double) { - ::Test::test_simple_systems(); -} +TEST_F(TestCategory, Newton_system_float) { ::Test::test_simple_systems(); } +TEST_F(TestCategory, Newton_system_double) { ::Test::test_simple_systems(); } -TEST_F(TestCategory, Newton_parallel_float) { - ::Test::test_newton_on_device(); -} -TEST_F(TestCategory, Newton_parallel_double) { - ::Test::test_newton_on_device(); -} +TEST_F(TestCategory, Newton_parallel_float) { ::Test::test_newton_on_device(); } +TEST_F(TestCategory, Newton_parallel_double) { ::Test::test_newton_on_device(); } diff --git a/packages/kokkos-kernels/ode/unit_test/Test_ODE_RK.hpp b/packages/kokkos-kernels/ode/unit_test/Test_ODE_RK.hpp index c7d1a84865da..90bec0e1841d 100644 --- a/packages/kokkos-kernels/ode/unit_test/Test_ODE_RK.hpp +++ b/packages/kokkos-kernels/ode/unit_test/Test_ODE_RK.hpp @@ -37,25 +37,17 @@ struct duho { const double a11 = 0, a12 = 1, a21, a22; duho(const double m_, const double c_, const double k_) - : m(m_), - c(c_), - k(k_), - d(k_ / m_ - (c_ * c_) / (4 * m_ * m_)), - a21(-k / m), - a22(-c / m){}; + : m(m_), c(c_), k(k_), d(k_ / m_ - (c_ * c_) / (4 * m_ * m_)), a21(-k / m), a22(-c / m){}; template - KOKKOS_FUNCTION void evaluate_function(const double /*t*/, - const double /*dt*/, - const vec_type1& y, + KOKKOS_FUNCTION void evaluate_function(const double /*t*/, const double /*dt*/, const vec_type1& y, const vec_type2& f) const { f(0) = a11 * y(0) + a12 * y(1); f(1) = a21 * y(0) + a22 * y(1); } template - KOKKOS_FUNCTION void solution(const double t, const vec_type& y0, - const vec_type& y) const { + KOKKOS_FUNCTION void solution(const double t, const vec_type& y0, const vec_type& y) const { using KAT = Kokkos::ArithTraits; const double gamma = c / (2 * m); @@ -64,8 +56,7 @@ struct duho { const double A = y0(0) / KAT::cos(phi); y(0) = A * KAT::cos(omega * t - phi) * KAT::exp(-t * gamma); - y(1) = -y(0) * gamma - - omega * A * KAT::sin(omega * t - phi) * KAT::exp(-t * gamma); + y(1) = -y(0) * gamma - omega * A * KAT::sin(omega * t - phi) * KAT::exp(-t * gamma); } }; // duho @@ -76,16 +67,14 @@ struct solution_wrapper { scalar_type t; vec_type y_old, y_ref; - solution_wrapper(const ode_type& ode_, const scalar_type t_, - const vec_type& y_old_, const vec_type& y_ref_) + solution_wrapper(const ode_type& ode_, const scalar_type t_, const vec_type& y_old_, const vec_type& y_ref_) : ode(ode_), t(t_), y_old(y_old_), y_ref(y_ref_){}; KOKKOS_FUNCTION void operator()(const int /*idx*/) const { ode.solution(t, y_old, y_ref); } }; -template +template struct RKSolve_wrapper { using ode_params = KokkosODE::Experimental::ODE_params; @@ -96,10 +85,9 @@ struct RKSolve_wrapper { vec_type y_old, y_new, tmp; mv_type kstack; - RKSolve_wrapper(const ode_type& my_ode_, const ode_params& params_, - const scalar_type tstart_, const scalar_type tend_, - const vec_type& y_old_, const vec_type& y_new_, - const vec_type& tmp_, const mv_type& kstack_) + RKSolve_wrapper(const ode_type& my_ode_, const ode_params& params_, const scalar_type tstart_, + const scalar_type tend_, const vec_type& y_old_, const vec_type& y_new_, const vec_type& tmp_, + const mv_type& kstack_) : my_ode(my_ode_), params(params_), tstart(tstart_), @@ -111,20 +99,15 @@ struct RKSolve_wrapper { KOKKOS_FUNCTION void operator()(const int /*idx*/) const { - KokkosODE::Experimental::RungeKutta::Solve( - my_ode, params, tstart, tend, y_old, y_new, tmp, kstack); + KokkosODE::Experimental::RungeKutta::Solve(my_ode, params, tstart, tend, y_old, y_new, tmp, kstack); } }; -template -void test_method(const std::string label, ode_type& my_ode, - const scalar_type& tstart, const scalar_type& tend, - const int num_steps, vec_type& y_old, vec_type& y_new, - const int order, const int num_stages, +template +void test_method(const std::string label, ode_type& my_ode, const scalar_type& tstart, const scalar_type& tend, + const int num_steps, vec_type& y_old, vec_type& y_new, const int order, const int num_stages, const Kokkos::View& ks, - const Kokkos::View& sol, - typename vec_type::HostMirror y_ref_h) { + const Kokkos::View& sol, typename vec_type::HostMirror y_ref_h) { using execution_space = typename vec_type::execution_space; using solver_type = KokkosODE::Experimental::RungeKutta; @@ -133,8 +116,8 @@ void test_method(const std::string label, ode_type& my_ode, mv_type kstack("k stack", solver_type::num_stages(), my_ode.neqs); Kokkos::RangePolicy my_policy(0, 1); - RKSolve_wrapper - solve_wrapper(my_ode, params, tstart, tend, y_old, y_new, tmp, kstack); + RKSolve_wrapper solve_wrapper(my_ode, params, tstart, tend, y_old, + y_new, tmp, kstack); Kokkos::parallel_for(my_policy, solve_wrapper); auto y_new_h = Kokkos::create_mirror_view(y_new); @@ -155,19 +138,16 @@ void test_method(const std::string label, ode_type& my_ode, EXPECT_NEAR_KK(ks(0, stageIdx), kstack_h(stageIdx, 0), 1e-8); EXPECT_NEAR_KK(ks(1, stageIdx), kstack_h(stageIdx, 1), 1e-8); #if defined(HAVE_KOKKOSKERNELS_DEBUG) - std::cout << " k" << stageIdx << "={" << kstack_h(stageIdx, 0) << ", " - << kstack_h(stageIdx, 1) << "}" << std::endl; + std::cout << " k" << stageIdx << "={" << kstack_h(stageIdx, 0) << ", " << kstack_h(stageIdx, 1) << "}" + << std::endl; #endif } EXPECT_NEAR_KK(sol(0), y_new_h(0), 1e-8); EXPECT_NEAR_KK(sol(1), y_new_h(1), 1e-8); #if defined(HAVE_KOKKOSKERNELS_DEBUG) std::cout << " y={" << y_new_h(0) << ", " << y_new_h(1) << "}" << std::endl; - std::cout << " error={" - << Kokkos::abs(y_new_h(0) - y_ref_h(0)) / Kokkos::abs(y_ref_h(0)) - << ", " - << Kokkos::abs(y_new_h(1) - y_ref_h(1)) / Kokkos::abs(y_ref_h(1)) - << "}" << std::endl; + std::cout << " error={" << Kokkos::abs(y_new_h(0) - y_ref_h(0)) / Kokkos::abs(y_ref_h(0)) << ", " + << Kokkos::abs(y_new_h(1) - y_ref_h(1)) / Kokkos::abs(y_ref_h(1)) << "}" << std::endl; #else (void)y_ref_h; #endif @@ -216,8 +196,7 @@ void test_RK() { Kokkos::deep_copy(y_ref_h, y_ref); #if defined(HAVE_KOKKOSKERNELS_DEBUG) std::cout << "\nAnalytical solution" << std::endl; - std::cout << " y={" << y_ref_h(0) << ", " << y_ref_h(1) << "}" - << std::endl; + std::cout << " y={" << y_ref_h(0) << ", " << y_ref_h(1) << "}" << std::endl; #endif } @@ -230,9 +209,8 @@ void test_RK() { Kokkos::View ks(ks_raw, 2, 1); double sol_raw[2] = {1, -0.04}; Kokkos::View sol(sol_raw, 2); - test_method( - "Euler-Forward", my_oscillator, tstart, tend, 1, y_old, y_new, 1, 1, ks, - sol, y_ref_h); + test_method("Euler-Forward", my_oscillator, tstart, tend, 1, y_old, + y_new, 1, 1, ks, sol, y_ref_h); } { @@ -241,9 +219,8 @@ void test_RK() { Kokkos::View ks(ks_raw, 2, 2); double sol_raw[2] = {0.9998, -0.0398}; Kokkos::View sol(sol_raw, 2); - test_method( - "Euler-Heun", my_oscillator, tstart, tend, 1, y_old, y_new, 2, 2, ks, - sol, y_ref_h); + test_method("Euler-Heun", my_oscillator, tstart, tend, 1, y_old, + y_new, 2, 2, ks, sol, y_ref_h); } { @@ -252,73 +229,59 @@ void test_RK() { Kokkos::View ks(ks_raw, 2, 3); double sol_raw[2] = {0.9998, -0.03979999}; Kokkos::View sol(sol_raw, 2); - test_method( - "RKF-12", my_oscillator, tstart, tend, 1, y_old, y_new, 2, 3, ks, sol, - y_ref_h); + test_method("RKF-12", my_oscillator, tstart, tend, 1, y_old, y_new, + 2, 3, ks, sol, y_ref_h); } { Kokkos::deep_copy(y_old, y_old_h); - double ks_raw[8] = {0, -0.02, -0.02985, -0.039798, - -4, -3.98, -3.96955, -3.95940467}; + double ks_raw[8] = {0, -0.02, -0.02985, -0.039798, -4, -3.98, -3.96955, -3.95940467}; Kokkos::View ks(ks_raw, 2, 4); double sol_raw[2] = {0.99980067, -0.039798}; Kokkos::View sol(sol_raw, 2); - test_method( - "RKBS", my_oscillator, tstart, tend, 1, y_old, y_new, 3, 4, ks, sol, - y_ref_h); + test_method("RKBS", my_oscillator, tstart, tend, 1, y_old, y_new, 3, + 4, ks, sol, y_ref_h); } { Kokkos::deep_copy(y_old, y_old_h); - double ks_raw[12] = {0, -0.01, -0.01497188, -0.03674986, - -0.03979499, -0.0199505, -4, -3.99, - -3.98491562, -3.96257222, -3.95941166, -3.97984883}; + double ks_raw[12] = {0, -0.01, -0.01497188, -0.03674986, -0.03979499, -0.0199505, + -4, -3.99, -3.98491562, -3.96257222, -3.95941166, -3.97984883}; Kokkos::View ks(ks_raw, 2, 6); double sol_raw[2] = {0.99980067, -0.03979801}; Kokkos::View sol(sol_raw, 2); - test_method( - "RKF-45", my_oscillator, tstart, tend, 1, y_old, y_new, 5, 6, ks, sol, - y_ref_h); + test_method("RKF-45", my_oscillator, tstart, tend, 1, y_old, y_new, + 5, 6, ks, sol, y_ref_h); } { Kokkos::deep_copy(y_old, y_old_h); - double ks_raw[12] = {0, -0.008, -0.011982, -0.02392735, - -0.03979862, -0.03484563, -4, -3.992, - -3.987946, -3.97578551, -3.95940328, -3.96454357}; + double ks_raw[12] = {0, -0.008, -0.011982, -0.02392735, -0.03979862, -0.03484563, + -4, -3.992, -3.987946, -3.97578551, -3.95940328, -3.96454357}; Kokkos::View ks(ks_raw, 2, 6); double sol_raw[2] = {0.99980067, -0.03979801}; Kokkos::View sol(sol_raw, 2); - test_method( - "Cash-Karp", my_oscillator, tstart, tend, 1, y_old, y_new, 5, 6, ks, - sol, y_ref_h); + test_method("Cash-Karp", my_oscillator, tstart, tend, 1, y_old, + y_new, 5, 6, ks, sol, y_ref_h); } { Kokkos::deep_copy(y_old, y_old_h); - double ks_raw[14] = {0, -0.008, -0.011982, -0.03187008, - -0.03539333, -0.0397954, -0.03979801, -4, - -3.992, -3.987946, -3.96762048, -3.96398013, - -3.95941068, -3.95940467}; + double ks_raw[14] = {0, -0.008, -0.011982, -0.03187008, -0.03539333, -0.0397954, -0.03979801, + -4, -3.992, -3.987946, -3.96762048, -3.96398013, -3.95941068, -3.95940467}; Kokkos::View ks(ks_raw, 2, 7); double sol_raw[2] = {0.99980067, -0.03979801}; Kokkos::View sol(sol_raw, 2); - test_method( - "Dormand-Prince", my_oscillator, tstart, tend, 1, y_old, y_new, 5, 7, - ks, sol, y_ref_h); + test_method("Dormand-Prince", my_oscillator, tstart, tend, 1, y_old, + y_new, 5, 7, ks, sol, y_ref_h); } } // test_RK -template -void test_rate(ode_type& my_ode, const scalar_type& tstart, - const scalar_type& tend, - Kokkos::View num_steps, - typename vec_type::HostMirror& y_old_h, - typename vec_type::HostMirror& y_ref_h, - typename vec_type::HostMirror& error) { +template +void test_rate(ode_type& my_ode, const scalar_type& tstart, const scalar_type& tend, + Kokkos::View num_steps, typename vec_type::HostMirror& y_old_h, + typename vec_type::HostMirror& y_ref_h, typename vec_type::HostMirror& error) { using execution_space = typename vec_type::execution_space; using solver_type = KokkosODE::Experimental::RungeKutta; @@ -334,8 +297,8 @@ void test_rate(ode_type& my_ode, const scalar_type& tstart, KokkosODE::Experimental::ODE_params params(num_steps(idx)); Kokkos::deep_copy(y_old, y_old_h); Kokkos::deep_copy(y_new, y_old_h); - RKSolve_wrapper - solve_wrapper(my_ode, params, tstart, tend, y_old, y_new, tmp, kstack); + RKSolve_wrapper solve_wrapper(my_ode, params, tstart, tend, + y_old, y_new, tmp, kstack); Kokkos::parallel_for(my_policy, solve_wrapper); Kokkos::deep_copy(y_new_h, y_new); @@ -343,8 +306,8 @@ void test_rate(ode_type& my_ode, const scalar_type& tstart, #if defined(HAVE_KOKKOSKERNELS_DEBUG) scalar_type dt = (tend - tstart) / num_steps(idx); - std::cout << "dt=" << dt << ", error=" << error(idx) << ", solution: {" - << y_new_h(0) << ", " << y_new_h(1) << "}" << std::endl; + std::cout << "dt=" << dt << ", error=" << error(idx) << ", solution: {" << y_new_h(0) << ", " << y_new_h(1) << "}" + << std::endl; #endif } @@ -399,67 +362,57 @@ void test_convergence_rate() { Kokkos::deep_copy(y_ref_h, y_ref); #if defined(HAVE_KOKKOSKERNELS_DEBUG) std::cout << "\nAnalytical solution" << std::endl; - std::cout << " y={" << y_ref_h(0) << ", " << y_ref_h(1) << "}" - << std::endl; + std::cout << " y={" << y_ref_h(0) << ", " << y_ref_h(1) << "}" << std::endl; #endif } typename vec_type::HostMirror error("error", num_steps.extent(0)); - test_rate( - my_oscillator, tstart, tend, num_steps, y_old_h, y_ref_h, error); + test_rate(my_oscillator, tstart, tend, num_steps, y_old_h, y_ref_h, + error); for (int idx = 1; idx < num_steps.extent_int(0) - 2; ++idx) { double expected_ratio = - Kokkos::pow(num_steps(idx) / num_steps(idx + 1), - KokkosODE::Impl::ButcherTableau<1, 1>::order); + Kokkos::pow(num_steps(idx) / num_steps(idx + 1), KokkosODE::Impl::ButcherTableau<1, 1>::order); double actual_ratio = error(idx + 1) / error(idx); EXPECT_NEAR_KK_REL(actual_ratio, expected_ratio, 0.15); #if defined(HAVE_KOKKOSKERNELS_DEBUG) - double rel_ratio_diff = Kokkos::abs(actual_ratio - expected_ratio) / - Kokkos::abs(expected_ratio); - std::cout << "error ratio: " << actual_ratio - << ", expected ratio: " << expected_ratio + double rel_ratio_diff = Kokkos::abs(actual_ratio - expected_ratio) / Kokkos::abs(expected_ratio); + std::cout << "error ratio: " << actual_ratio << ", expected ratio: " << expected_ratio << ", rel diff: " << rel_ratio_diff << std::endl; #endif } Kokkos::deep_copy(error, 0); - test_rate( - my_oscillator, tstart, tend, num_steps, y_old_h, y_ref_h, error); + test_rate(my_oscillator, tstart, tend, num_steps, y_old_h, y_ref_h, + error); for (int idx = 1; idx < num_steps.extent_int(0) - 2; ++idx) { double expected_ratio = - Kokkos::pow(num_steps(idx) / num_steps(idx + 1), - KokkosODE::Impl::ButcherTableau<2, 3>::order); + Kokkos::pow(num_steps(idx) / num_steps(idx + 1), KokkosODE::Impl::ButcherTableau<2, 3>::order); double actual_ratio = error(idx + 1) / error(idx); EXPECT_NEAR_KK_REL(actual_ratio, expected_ratio, 0.05); #if defined(HAVE_KOKKOSKERNELS_DEBUG) - double rel_ratio_diff = Kokkos::abs(actual_ratio - expected_ratio) / - Kokkos::abs(expected_ratio); - std::cout << "error ratio: " << actual_ratio - << ", expected ratio: " << expected_ratio + double rel_ratio_diff = Kokkos::abs(actual_ratio - expected_ratio) / Kokkos::abs(expected_ratio); + std::cout << "error ratio: " << actual_ratio << ", expected ratio: " << expected_ratio << ", rel diff: " << rel_ratio_diff << std::endl; #endif } Kokkos::deep_copy(error, 0); - test_rate( - my_oscillator, tstart, tend, num_steps, y_old_h, y_ref_h, error); + test_rate(my_oscillator, tstart, tend, num_steps, y_old_h, y_ref_h, + error); for (int idx = 1; idx < num_steps.extent_int(0) - 2; ++idx) { double expected_ratio = - Kokkos::pow(num_steps(idx) / num_steps(idx + 1), - KokkosODE::Impl::ButcherTableau<4, 5>::order); + Kokkos::pow(num_steps(idx) / num_steps(idx + 1), KokkosODE::Impl::ButcherTableau<4, 5>::order); double actual_ratio = error(idx + 1) / error(idx); EXPECT_NEAR_KK_REL(actual_ratio, expected_ratio, 0.05); #if defined(HAVE_KOKKOSKERNELS_DEBUG) - double rel_ratio_diff = Kokkos::abs(actual_ratio - expected_ratio) / - Kokkos::abs(expected_ratio); - std::cout << "error ratio: " << actual_ratio - << ", expected ratio: " << expected_ratio + double rel_ratio_diff = Kokkos::abs(actual_ratio - expected_ratio) / Kokkos::abs(expected_ratio); + std::cout << "error ratio: " << actual_ratio << ", expected ratio: " << expected_ratio << ", rel diff: " << rel_ratio_diff << std::endl; #endif } @@ -507,24 +460,19 @@ void test_adaptivity() { Kokkos::deep_copy(y_ref_h, y_ref); #if defined(HAVE_KOKKOSKERNELS_DEBUG) std::cout << "\nAnalytical solution" << std::endl; - std::cout << " y={" << y_ref_h(0) << ", " << y_ref_h(1) << "}" - << std::endl; + std::cout << " y={" << y_ref_h(0) << ", " << y_ref_h(1) << "}" << std::endl; #endif } vec_type tmp("tmp vector", neqs); - mv_type kstack( - "k stack", - KokkosODE::Experimental::RungeKutta::num_stages(), neqs); + mv_type kstack("k stack", KokkosODE::Experimental::RungeKutta::num_stages(), neqs); Kokkos::RangePolicy my_policy(0, 1); - KokkosODE::Experimental::ODE_params params(numSteps, maxSteps, absTol, relTol, - minStepSize); + KokkosODE::Experimental::ODE_params params(numSteps, maxSteps, absTol, relTol, minStepSize); Kokkos::deep_copy(y_old, y_old_h); Kokkos::deep_copy(y_new, y_old_h); - RKSolve_wrapper - solve_wrapper(my_oscillator, params, tstart, tend, y_old, y_new, tmp, - kstack); + RKSolve_wrapper solve_wrapper(my_oscillator, params, tstart, tend, + y_old, y_new, tmp, kstack); Kokkos::parallel_for(my_policy, solve_wrapper); auto y_new_h = Kokkos::create_mirror(y_new); @@ -547,8 +495,7 @@ void test_adaptivity() { for (int idx = 0; idx < y_new_h.extent_int(0); ++idx) { #if defined(HAVE_KOKKOSKERNELS_DEBUG) - error = - Kokkos::abs(y_new_h(idx) - y_ref_h(idx)) / Kokkos::abs(y_ref_h(idx)); + error = Kokkos::abs(y_new_h(idx) - y_ref_h(idx)) / Kokkos::abs(y_ref_h(idx)); std::cout << error << " "; #endif EXPECT_NEAR_KK_REL(y_new_h(idx), y_ref_h(idx), 1e-7); diff --git a/packages/kokkos-kernels/ode/unit_test/Test_ODE_RK_chem.hpp b/packages/kokkos-kernels/ode/unit_test/Test_ODE_RK_chem.hpp index 763f38a0139d..690e271c84f4 100644 --- a/packages/kokkos-kernels/ode/unit_test/Test_ODE_RK_chem.hpp +++ b/packages/kokkos-kernels/ode/unit_test/Test_ODE_RK_chem.hpp @@ -33,13 +33,11 @@ struct chem_model_1 { const double tstart, tend, T0, T1; - chem_model_1(const double tstart_ = 0, const double tend_ = 100, - const double T0_ = 300, const double T1_ = 800) + chem_model_1(const double tstart_ = 0, const double tend_ = 100, const double T0_ = 300, const double T1_ = 800) : tstart(tstart_), tend(tend_), T0(T0_), T1(T1_){}; template - KOKKOS_FUNCTION void evaluate_function(const double t, const double /*dt*/, - const vec_type1& y, + KOKKOS_FUNCTION void evaluate_function(const double t, const double /*dt*/, const vec_type1& y, const vec_type2& f) const { // First compute the temperature // using linear ramp from T0 to T1 @@ -61,13 +59,11 @@ struct chem_model_2 { const double tstart, tend, T0, T1; - chem_model_2(const double tstart_ = 0, const double tend_ = 1200, - const double T0_ = 300, const double T1_ = 1000) + chem_model_2(const double tstart_ = 0, const double tend_ = 1200, const double T0_ = 300, const double T1_ = 1000) : tstart(tstart_), tend(tend_), T0(T0_), T1(T1_){}; template - KOKKOS_FUNCTION void evaluate_function(const double t, const double /*dt*/, - const vec_type1& y, + KOKKOS_FUNCTION void evaluate_function(const double t, const double /*dt*/, const vec_type1& y, const vec_type2& f) const { // First compute the temperature // using linear ramp from T0 to T1 @@ -116,9 +112,8 @@ void test_chem() { Kokkos::deep_copy(y_new, y_old_h); Kokkos::RangePolicy my_policy(0, 1); - RKSolve_wrapper - solve_wrapper(chem_model, params, chem_model.tstart, chem_model.tend, - y_old, y_new, tmp, kstack); + RKSolve_wrapper solve_wrapper( + chem_model, params, chem_model.tstart, chem_model.tend, y_old, y_new, tmp, kstack); Kokkos::parallel_for(my_policy, solve_wrapper); auto y_new_h = Kokkos::create_mirror(y_new); @@ -126,15 +121,11 @@ void test_chem() { #if defined(HAVE_KOKKOSKERNELS_DEBUG) const double dt = (chem_model.tend - chem_model.tstart) / params.num_steps; std::cout << "\nChem model 1" << std::endl; - std::cout << " t0=" << chem_model.tstart << ", tn=" << chem_model.tend - << std::endl; - std::cout << " T0=" << chem_model.T0 << ", Tn=" << chem_model.T1 - << std::endl; + std::cout << " t0=" << chem_model.tstart << ", tn=" << chem_model.tend << std::endl; + std::cout << " T0=" << chem_model.T0 << ", Tn=" << chem_model.T1 << std::endl; std::cout << " dt=" << dt << std::endl; - std::cout << " y(t0)={" << y_old_h(0) << ", " << y_old_h(1) << "}" - << std::endl; - std::cout << " y(tn)={" << y_new_h(0) << ", " << y_new_h(1) << "}" - << std::endl; + std::cout << " y(t0)={" << y_old_h(0) << ", " << y_old_h(1) << "}" << std::endl; + std::cout << " y(tn)={" << y_new_h(0) << ", " << y_new_h(1) << "}" << std::endl; #endif } @@ -162,9 +153,8 @@ void test_chem() { Kokkos::deep_copy(y_new, y_old_h); Kokkos::RangePolicy my_policy(0, 1); - RKSolve_wrapper - solve_wrapper(chem_model, params, chem_model.tstart, chem_model.tend, - y_old, y_new, tmp, kstack); + RKSolve_wrapper solve_wrapper( + chem_model, params, chem_model.tstart, chem_model.tend, y_old, y_new, tmp, kstack); Kokkos::parallel_for(my_policy, solve_wrapper); auto y_new_h = Kokkos::create_mirror(y_new); @@ -172,17 +162,13 @@ void test_chem() { #if defined(HAVE_KOKKOSKERNELS_DEBUG) const double dt = (chem_model.tend - chem_model.tstart) / params.num_steps; std::cout << "\nChem model 2" << std::endl; - std::cout << " t0=" << chem_model.tstart << ", tn=" << chem_model.tend - << std::endl; - std::cout << " T0=" << chem_model.T0 << ", Tn=" << chem_model.T1 - << std::endl; + std::cout << " t0=" << chem_model.tstart << ", tn=" << chem_model.tend << std::endl; + std::cout << " T0=" << chem_model.T0 << ", Tn=" << chem_model.T1 << std::endl; std::cout << " dt=" << dt << std::endl; - std::cout << " y(t0)={" << y_old_h(0) << ", " << y_old_h(1) << ", " - << y_old_h(2) << ", " << y_old_h(3) << ", " << y_old_h(4) << ", " - << y_old_h(5) << ", " << y_old_h(6) << "}" << std::endl; - std::cout << " y(tn)={" << y_new_h(0) << ", " << y_new_h(1) << ", " - << y_new_h(2) << ", " << y_new_h(3) << ", " << y_new_h(4) << ", " - << y_new_h(5) << ", " << y_new_h(6) << "}" << std::endl; + std::cout << " y(t0)={" << y_old_h(0) << ", " << y_old_h(1) << ", " << y_old_h(2) << ", " << y_old_h(3) << ", " + << y_old_h(4) << ", " << y_old_h(5) << ", " << y_old_h(6) << "}" << std::endl; + std::cout << " y(tn)={" << y_new_h(0) << ", " << y_new_h(1) << ", " << y_new_h(2) << ", " << y_new_h(3) << ", " + << y_new_h(4) << ", " << y_new_h(5) << ", " << y_new_h(6) << "}" << std::endl; #endif } } // test_chem diff --git a/packages/kokkos-kernels/perf_test/Benchmark_Context.hpp b/packages/kokkos-kernels/perf_test/Benchmark_Context.hpp index adfc336576cb..e4a241601010 100644 --- a/packages/kokkos-kernels/perf_test/Benchmark_Context.hpp +++ b/packages/kokkos-kernels/perf_test/Benchmark_Context.hpp @@ -61,8 +61,7 @@ inline void add_kokkos_configuration(bool verbose) { auto val = remove_unwanted_characters(line.substr(found + 1)); // Ignore line without value, for example a category name if (!val.empty()) { - benchmark::AddCustomContext( - remove_unwanted_characters(line.substr(0, found)), val); + benchmark::AddCustomContext(remove_unwanted_characters(line.substr(0, found)), val); } } } @@ -75,18 +74,13 @@ inline void add_version_info() { if (!GIT_BRANCH.empty()) { benchmark::AddCustomContext("GIT_BRANCH", std::string(GIT_BRANCH)); - benchmark::AddCustomContext("GIT_COMMIT_HASH", - std::string(GIT_COMMIT_HASH)); - benchmark::AddCustomContext("GIT_CLEAN_STATUS", - std::string(GIT_CLEAN_STATUS)); - benchmark::AddCustomContext("GIT_COMMIT_DESCRIPTION", - std::string(GIT_COMMIT_DESCRIPTION)); - benchmark::AddCustomContext("GIT_COMMIT_DATE", - std::string(GIT_COMMIT_DATE)); + benchmark::AddCustomContext("GIT_COMMIT_HASH", std::string(GIT_COMMIT_HASH)); + benchmark::AddCustomContext("GIT_CLEAN_STATUS", std::string(GIT_CLEAN_STATUS)); + benchmark::AddCustomContext("GIT_COMMIT_DESCRIPTION", std::string(GIT_COMMIT_DESCRIPTION)); + benchmark::AddCustomContext("GIT_COMMIT_DATE", std::string(GIT_COMMIT_DATE)); } if (!BENCHMARK_VERSION.empty()) { - benchmark::AddCustomContext("GOOGLE_BENCHMARK_VERSION", - std::string(BENCHMARK_VERSION)); + benchmark::AddCustomContext("GOOGLE_BENCHMARK_VERSION", std::string(BENCHMARK_VERSION)); } } @@ -117,20 +111,16 @@ inline void add_benchmark_context(bool verbose = false) { } template -inline auto register_benchmark(const char* name, FuncType func, - std::vector arg_names, - std::vector args, int repeat, - ArgsToCallOp&&... func_args) { +inline auto register_benchmark(const char* name, FuncType func, std::vector arg_names, + std::vector args, int repeat, ArgsToCallOp&&... func_args) { if (repeat > 0) { - return benchmark::RegisterBenchmark( - name, func, std::forward(func_args)...) + return benchmark::RegisterBenchmark(name, func, std::forward(func_args)...) ->ArgNames(arg_names) ->Args(args) ->UseManualTime() ->Iterations(repeat); } else { - return benchmark::RegisterBenchmark( - name, func, std::forward(func_args)...) + return benchmark::RegisterBenchmark(name, func, std::forward(func_args)...) ->ArgNames(arg_names) ->Args(args) ->UseManualTime(); @@ -138,20 +128,16 @@ inline auto register_benchmark(const char* name, FuncType func, } template -inline auto register_benchmark_real_time(const char* name, FuncType func, - std::vector arg_names, - std::vector args, int repeat, - ArgsToCallOp&&... func_args) { +inline auto register_benchmark_real_time(const char* name, FuncType func, std::vector arg_names, + std::vector args, int repeat, ArgsToCallOp&&... func_args) { if (repeat > 0) { - return benchmark::RegisterBenchmark( - name, func, std::forward(func_args)...) + return benchmark::RegisterBenchmark(name, func, std::forward(func_args)...) ->ArgNames(arg_names) ->Args(args) ->UseRealTime() ->Iterations(repeat); } else { - return benchmark::RegisterBenchmark( - name, func, std::forward(func_args)...) + return benchmark::RegisterBenchmark(name, func, std::forward(func_args)...) ->ArgNames(arg_names) ->Args(args) ->UseRealTime(); diff --git a/packages/kokkos-kernels/perf_test/KokkosKernels_perf_test_instantiation.hpp b/packages/kokkos-kernels/perf_test/KokkosKernels_perf_test_instantiation.hpp index 6844922ddbe3..8a46754030d2 100644 --- a/packages/kokkos-kernels/perf_test/KokkosKernels_perf_test_instantiation.hpp +++ b/packages/kokkos-kernels/perf_test/KokkosKernels_perf_test_instantiation.hpp @@ -57,9 +57,7 @@ int main_instantiation(int argc, char** argv) { else if (params.use_sycl) device_id = params.use_sycl - 1; - Kokkos::initialize(Kokkos::InitializationSettings() - .set_num_threads(num_threads) - .set_device_id(device_id)); + Kokkos::initialize(Kokkos::InitializationSettings().set_num_threads(num_threads).set_device_id(device_id)); Kokkos::print_configuration(std::cout); std::cout << '\n'; @@ -112,8 +110,7 @@ int main_instantiation(int argc, char** argv) { if (params.use_sycl) { #if defined(KOKKOS_ENABLE_SYCL) std::cout << "Running on SYCL backend.\n"; - KOKKOSKERNELS_PERF_TEST_NAME(argc, argv, - params); + KOKKOSKERNELS_PERF_TEST_NAME(argc, argv, params); ran = true; #else std::cout << "ERROR: SYCL requested, but not available.\n"; diff --git a/packages/kokkos-kernels/perf_test/KokkosKernels_perf_test_utilities.hpp b/packages/kokkos-kernels/perf_test/KokkosKernels_perf_test_utilities.hpp index 1303b2370ec6..ec767c68f756 100644 --- a/packages/kokkos-kernels/perf_test/KokkosKernels_perf_test_utilities.hpp +++ b/packages/kokkos-kernels/perf_test/KokkosKernels_perf_test_utilities.hpp @@ -39,50 +39,49 @@ struct CommonInputParams { std::string list_common_options() { std::ostringstream common_options; - common_options - << "\t[Required] Backend: the available backends are:\n" + common_options << "\t[Required] Backend: the available backends are:\n" #ifdef KOKKOS_ENABLE_THREADS - << "\t\t'--threads [numThreads]'\n" + << "\t\t'--threads [numThreads]'\n" #endif #ifdef KOKKOS_ENABLE_OPENMP - << "\t\t'--openmp [numThreads]'\n" + << "\t\t'--openmp [numThreads]'\n" #endif #ifdef KOKKOS_ENABLE_CUDA - << "\t\t'--cuda [deviceIndex]'\n" + << "\t\t'--cuda [deviceIndex]'\n" #endif #ifdef KOKKOS_ENABLE_HIP - << "\t\t'--hip [deviceIndex]'\n" + << "\t\t'--hip [deviceIndex]'\n" #endif #ifdef KOKKOS_ENABLE_SYCL - << "\t\t'--sycl [deviceIndex]'\n" + << "\t\t'--sycl [deviceIndex]'\n" #endif #ifdef KOKKOS_ENABLE_SERIAL - << "\t\tIf no parallel backend is requested, Serial will be used.\n" + << "\t\tIf no parallel backend is requested, Serial will be used.\n" #endif - << "\n" - << "\t The following backends are not available because Kokkos was not " - "configured with them:\n" + << "\n" + << "\t The following backends are not available because Kokkos was not " + "configured with them:\n" #ifndef KOKKOS_ENABLE_THREADS - << "\t\t'--threads [numThreads]'\n" + << "\t\t'--threads [numThreads]'\n" #endif #ifndef KOKKOS_ENABLE_OPENMP - << "\t\t'--openmp [numThreads]'\n" + << "\t\t'--openmp [numThreads]'\n" #endif #ifndef KOKKOS_ENABLE_CUDA - << "\t\t'--cuda [deviceIndex]'\n" + << "\t\t'--cuda [deviceIndex]'\n" #endif #ifndef KOKKOS_ENABLE_HIP - << "\t\t'--hip [deviceIndex]'\n" + << "\t\t'--hip [deviceIndex]'\n" #endif #ifndef KOKKOS_ENABLE_SYCL - << "\t\t'--sycl [deviceIndex]'\n" + << "\t\t'--sycl [deviceIndex]'\n" #endif #ifndef KOKKOS_ENABLE_SERIAL - << "\t\tSerial is not enabled so a parallel backend must be selected.\n" + << "\t\tSerial is not enabled so a parallel backend must be selected.\n" #endif - << "\n" - << "\t[Optional]:\n" - << "\t\t'-h', '--help': show available options\n\n"; + << "\n" + << "\t[Optional]:\n" + << "\t\t'-h', '--help': show available options\n\n"; return common_options.str(); } @@ -94,15 +93,13 @@ void process_arg_int(char const* str_val, int& val) { if (str_val == ptr_end) { std::stringstream ss; - ss << "Error: cannot convert command line argument '" << str_val - << "' to an integer.\n"; + ss << "Error: cannot convert command line argument '" << str_val << "' to an integer.\n"; throw std::invalid_argument(ss.str()); } if (errno == ERANGE) { std::stringstream ss; - ss << "Error: converted value for command line argument '" << str_val - << "' falls out of range.\n"; + ss << "Error: converted value for command line argument '" << str_val << "' falls out of range.\n"; throw std::invalid_argument(ss.str()); } } @@ -114,21 +111,18 @@ void process_arg_double(char const* str_val, double& val) { if (str_val == ptr_end) { std::stringstream ss; - ss << "Error: cannot convert command line argument '" << str_val - << "' to a double.\n"; + ss << "Error: cannot convert command line argument '" << str_val << "' to a double.\n"; throw std::invalid_argument(ss.str()); } if (errno == ERANGE) { std::stringstream ss; - ss << "Error: converted value for command line argument '" << str_val - << "' falls out of range.\n"; + ss << "Error: converted value for command line argument '" << str_val << "' falls out of range.\n"; throw std::invalid_argument(ss.str()); } } -bool check_arg_int(int const i, int const argc, char** argv, char const* name, - int& val) { +bool check_arg_int(int const i, int const argc, char** argv, char const* name, int& val) { if (0 != Test::string_compare_no_case(argv[i], name)) { return false; } @@ -143,8 +137,7 @@ bool check_arg_int(int const i, int const argc, char** argv, char const* name, return true; } -bool check_arg_double(int const i, int const argc, char** argv, - char const* name, double& val) { +bool check_arg_double(int const i, int const argc, char** argv, char const* name, double& val) { if (0 != Test::string_compare_no_case(argv[i], name)) { return false; } @@ -159,8 +152,7 @@ bool check_arg_double(int const i, int const argc, char** argv, return true; } -bool check_arg_bool(int const i, int const /*argc*/, char** argv, - char const* name, bool& val) { +bool check_arg_bool(int const i, int const /*argc*/, char** argv, char const* name, bool& val) { if (0 != Test::string_compare_no_case(argv[i], name)) { return false; } @@ -168,8 +160,7 @@ bool check_arg_bool(int const i, int const /*argc*/, char** argv, return true; } -bool check_arg_str(int const i, int const argc, char** argv, char const* name, - std::string& val) { +bool check_arg_str(int const i, int const argc, char** argv, char const* name, std::string& val) { if (0 != Test::string_compare_no_case(argv[i], name)) { return false; } @@ -198,8 +189,7 @@ void parse_common_options(int& argc, char** argv, CommonInputParams& params) { int remove_flags = 0; if (check_arg_int(argIdx, argc, argv, "--threads", params.use_threads)) { remove_flags = 2; - } else if (check_arg_int(argIdx, argc, argv, "--openmp", - params.use_openmp)) { + } else if (check_arg_int(argIdx, argc, argv, "--openmp", params.use_openmp)) { remove_flags = 2; } else if (check_arg_int(argIdx, argc, argv, "--cuda", params.use_cuda)) { params.use_cuda++; @@ -213,8 +203,7 @@ void parse_common_options(int& argc, char** argv, CommonInputParams& params) { } else if (check_arg_int(argIdx, argc, argv, "--repeat", params.repeat)) { remove_flags = 2; } else if (check_arg_bool(argIdx, argc, argv, "-h", params.print_help) || - check_arg_bool(argIdx, argc, argv, "--help", - params.print_help)) { + check_arg_bool(argIdx, argc, argv, "--help", params.print_help)) { remove_flags = 1; } diff --git a/packages/kokkos-kernels/perf_test/PerfTestUtilities.cpp b/packages/kokkos-kernels/perf_test/PerfTestUtilities.cpp index c403d0213d6e..479d50d2bad2 100644 --- a/packages/kokkos-kernels/perf_test/PerfTestUtilities.cpp +++ b/packages/kokkos-kernels/perf_test/PerfTestUtilities.cpp @@ -23,8 +23,6 @@ namespace test { std::string inputDataPath; -void set_input_data_path(const std::string& path_to_data) { - inputDataPath = path_to_data; -} +void set_input_data_path(const std::string& path_to_data) { inputDataPath = path_to_data; } std::string get_input_data_path() { return inputDataPath; } } // namespace test diff --git a/packages/kokkos-kernels/perf_test/PerfTestUtilities.hpp b/packages/kokkos-kernels/perf_test/PerfTestUtilities.hpp index 4de10312b630..f6531a76fb25 100644 --- a/packages/kokkos-kernels/perf_test/PerfTestUtilities.hpp +++ b/packages/kokkos-kernels/perf_test/PerfTestUtilities.hpp @@ -36,8 +36,7 @@ std::string get_input_data_path(); namespace KokkosSparse { -template +template class CrsMatrix; } @@ -62,8 +61,7 @@ inline std::vector get_directories(std::string path) { std::string nname = std::string(dir->d_name); // Check to see if item is a directory // if (isDirectory(path + '/' + nname)) - if (nname != "." && nname != ".." && - isDirectory(path + '/' + dir->d_name)) + if (nname != "." && nname != ".." && isDirectory(path + '/' + dir->d_name)) // std::vector::emplace_back: insert a new element to the end of vector paths.emplace_back(dir->d_name); } @@ -75,18 +73,16 @@ inline std::vector get_directories(std::string path) { namespace readers { template -using matrix_type = - KokkosSparse::CrsMatrix; +using matrix_type = KokkosSparse::CrsMatrix; template struct test_reader; template struct test_reader> { - static matrix_type read( - const std::string &filename) { - return KokkosKernels::Impl::read_kokkos_crst_matrix< - matrix_type>(filename.c_str()); + static matrix_type read(const std::string &filename) { + return KokkosKernels::Impl::read_kokkos_crst_matrix>( + filename.c_str()); } }; @@ -100,30 +96,23 @@ struct data_retriever { std::tuple test_data; }; std::vector test_cases; - std::string make_full_path_to_data_file(std::string repo, - std::string path_to_data, - std::string dataset, + std::string make_full_path_to_data_file(std::string repo, std::string path_to_data, std::string dataset, std::string filename) { - return root_path + "/" + repo + "/" + path_to_data + dataset + "/" + - filename; + return root_path + "/" + repo + "/" + path_to_data + dataset + "/" + filename; } template - data_retriever(std::string path_to_data, Locations... locations) - : sub_path(path_to_data) { + data_retriever(std::string path_to_data, Locations... locations) : sub_path(path_to_data) { root_path = test::get_input_data_path(); // TODO: way to list the directories in the root path std::vector data_repos = get_directories(root_path + "/"); // TODO: list directories in subpaths for (auto repo : data_repos) { - std::vector datasets = - get_directories(root_path + "/" + repo + "/" + path_to_data + "/"); + std::vector datasets = get_directories(root_path + "/" + repo + "/" + path_to_data + "/"); for (auto dataset : datasets) { - test_cases.push_back( - test_case{repo + "/" + dataset, - std::make_tuple(readers::test_reader::read( - make_full_path_to_data_file( - repo, path_to_data, dataset, locations))...)}); + test_cases.push_back(test_case{repo + "/" + dataset, + std::make_tuple(readers::test_reader::read( + make_full_path_to_data_file(repo, path_to_data, dataset, locations))...)}); } } } diff --git a/packages/kokkos-kernels/perf_test/batched/dense/KokkosBatched_Test_BlockJacobi_Tutorial.cpp b/packages/kokkos-kernels/perf_test/batched/dense/KokkosBatched_Test_BlockJacobi_Tutorial.cpp index 53a6f8f17397..5081017e468b 100644 --- a/packages/kokkos-kernels/perf_test/batched/dense/KokkosBatched_Test_BlockJacobi_Tutorial.cpp +++ b/packages/kokkos-kernels/perf_test/batched/dense/KokkosBatched_Test_BlockJacobi_Tutorial.cpp @@ -50,8 +50,8 @@ using member_type = typename policy_type::member_type; using namespace KokkosBatched; template -val_type computeResidual(const ManyMatrixType &A, const ManyVectorType &x, - const ManyVectorType &b, const ManyVectorType &r) { +val_type computeResidual(const ManyMatrixType &A, const ManyVectorType &x, const ManyVectorType &b, + const ManyVectorType &r) { /// compute residual val_type residual(0); { @@ -66,17 +66,12 @@ val_type computeResidual(const ManyMatrixType &A, const ManyVectorType &x, auto xx = Kokkos::subview(x, i, Kokkos::ALL()); auto rr = Kokkos::subview(r, i, Kokkos::ALL()); - TeamGemv::invoke(member, -one, AA, xx, one, - rr); + TeamGemv::invoke(member, -one, AA, xx, one, rr); val_type sum(0); Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member, rr.extent(0)), - [&](const int &k, val_type &lsum) { - lsum += Kokkos::ArithTraits::abs(rr(k)); - }, - sum); + [&](const int &k, val_type &lsum) { lsum += Kokkos::ArithTraits::abs(rr(k)); }, sum); Kokkos::single(Kokkos::PerTeam(member), [&]() { update += sum; }); }, residual); @@ -132,8 +127,8 @@ struct Task1SolveLowerTriangular { const val_type one(1); auto AA = Kokkos::subview(__A, i, Kokkos::ALL(), Kokkos::ALL()); auto TT = Kokkos::subview(__T, i, Kokkos::ALL(), Kokkos::ALL()); - TeamTrsm::invoke(member, one, TT, AA); + TeamTrsm::invoke( + member, one, TT, AA); } }; @@ -152,9 +147,8 @@ struct Task1SolveUpperTriangular { const val_type one(1); auto AA = Kokkos::subview(__A, i, Kokkos::ALL(), Kokkos::ALL()); auto TT = Kokkos::subview(__T, i, Kokkos::ALL(), Kokkos::ALL()); - TeamTrsm::invoke(member, one, TT, - AA); + TeamTrsm::invoke( + member, one, TT, AA); } }; } // namespace ConstructBlockJacobi @@ -176,8 +170,7 @@ struct Task1ApplyBlockJacobi { auto AA = Kokkos::subview(__A, i, Kokkos::ALL(), Kokkos::ALL()); auto xx = Kokkos::subview(__x, i, Kokkos::ALL()); auto bb = Kokkos::subview(__b, i, Kokkos::ALL()); - TeamGemv::invoke( - member, one, AA, bb, zero, xx); + TeamGemv::invoke(member, one, AA, bb, zero, xx); } }; @@ -200,11 +193,10 @@ struct Task2FactorizeInvert { TeamLU::invoke(member, AA); TeamCopy::invoke(member, AA, TT); TeamSetIdentity::invoke(member, AA); - TeamTrsm::invoke(member, one, TT, AA); - TeamTrsm::invoke(member, one, TT, - AA); + TeamTrsm::invoke( + member, one, TT, AA); + TeamTrsm::invoke( + member, one, TT, AA); } }; @@ -225,8 +217,7 @@ struct Task2ApplyBlockJacobi { auto AA = Kokkos::subview(__A, i, Kokkos::ALL(), Kokkos::ALL()); auto xx = Kokkos::subview(__x, i, Kokkos::ALL()); auto bb = Kokkos::subview(__b, i, Kokkos::ALL()); - TeamGemv::invoke( - member, one, AA, bb, zero, xx); + TeamGemv::invoke(member, one, AA, bb, zero, xx); } }; @@ -260,22 +251,17 @@ int main(int argc, char *argv[]) { /// x - solution vector /// b - right hand side vector /// - Kokkos::View A( - "block diagonals", N, Blk, Blk); - Kokkos::View T( - "temporal block diagonals", N, Blk, Blk); - Kokkos::View x("x", N, - Blk); - Kokkos::View b("b", N, - Blk); + Kokkos::View A("block diagonals", N, Blk, Blk); + Kokkos::View T("temporal block diagonals", N, Blk, Blk); + Kokkos::View x("x", N, Blk); + Kokkos::View b("b", N, Blk); /// copy of A to check residual - Kokkos::View Acopy( - "Acopy", A.extent(0), A.extent(1), A.extent(2)); + Kokkos::View Acopy("Acopy", A.extent(0), A.extent(1), + A.extent(2)); /// residual vector - Kokkos::View r( - "r", b.extent(0), b.extent(1)); + Kokkos::View r("r", b.extent(0), b.extent(1)); /// The block diagonal matrices are assumed to be extracted from a block /// sparse matrix. Here we set the blocks with random values @@ -308,23 +294,15 @@ int main(int argc, char *argv[]) { { policy_type policy(A.extent(0), Kokkos::AUTO()); timer.reset(); - Kokkos::parallel_for( - "task1.factorize", policy, - ConstructBlockJacobi::Task1Factorize(A)); + Kokkos::parallel_for("task1.factorize", policy, ConstructBlockJacobi::Task1Factorize(A)); Kokkos::deep_copy(T, A); - Kokkos::parallel_for( - "task1.set-identity", policy, - ConstructBlockJacobi::Task1SetIdentity(A)); + Kokkos::parallel_for("task1.set-identity", policy, ConstructBlockJacobi::Task1SetIdentity(A)); Kokkos::fence(); - Kokkos::parallel_for( - "task1.solve-lower-triangular", policy, - ConstructBlockJacobi::Task1SolveLowerTriangular(A, T)); + Kokkos::parallel_for("task1.solve-lower-triangular", policy, + ConstructBlockJacobi::Task1SolveLowerTriangular(A, T)); Kokkos::fence(); - Kokkos::parallel_for( - "task1.solve-upper-triangular", policy, - ConstructBlockJacobi::Task1SolveUpperTriangular(A, T)); + Kokkos::parallel_for("task1.solve-upper-triangular", policy, + ConstructBlockJacobi::Task1SolveUpperTriangular(A, T)); Kokkos::fence(); const double t = timer.seconds(); printf( @@ -337,10 +315,8 @@ int main(int argc, char *argv[]) { { timer.reset(); policy_type policy(A.extent(0), Kokkos::AUTO()); - Kokkos::parallel_for( - "task1.apply-block-jacobi", policy, - Task1ApplyBlockJacobi(A, x, - b)); + Kokkos::parallel_for("task1.apply-block-jacobi", policy, + Task1ApplyBlockJacobi(A, x, b)); const double t = timer.seconds(); printf( "task 1: application of jacobi time = %f , # of applications per " @@ -374,9 +350,7 @@ int main(int argc, char *argv[]) { { policy_type policy(A.extent(0), Kokkos::AUTO()); timer.reset(); - Kokkos::parallel_for( - "task2.factorize-invert", policy, - Task2FactorizeInvert(A, T)); + Kokkos::parallel_for("task2.factorize-invert", policy, Task2FactorizeInvert(A, T)); Kokkos::fence(); const double t = timer.seconds(); printf( @@ -389,10 +363,8 @@ int main(int argc, char *argv[]) { { timer.reset(); policy_type policy(A.extent(0), Kokkos::AUTO()); - Kokkos::parallel_for( - "task2.apply-block-jacobi", policy, - Task2ApplyBlockJacobi(A, x, - b)); + Kokkos::parallel_for("task2.apply-block-jacobi", policy, + Task2ApplyBlockJacobi(A, x, b)); const double t = timer.seconds(); printf( "task 2: application of jacobi time = %f , # of applications per " diff --git a/packages/kokkos-kernels/perf_test/batched/dense/KokkosBatched_Test_BlockTridiagDirect.cpp b/packages/kokkos-kernels/perf_test/batched/dense/KokkosBatched_Test_BlockTridiagDirect.cpp index f3eb0dd8aca3..810112baa355 100644 --- a/packages/kokkos-kernels/perf_test/batched/dense/KokkosBatched_Test_BlockTridiagDirect.cpp +++ b/packages/kokkos-kernels/perf_test/batched/dense/KokkosBatched_Test_BlockTridiagDirect.cpp @@ -66,11 +66,9 @@ using member_type = typename policy_type::member_type; /// using namespace KokkosBatched; -static constexpr int vector_length = - DefaultVectorLength::value; +static constexpr int vector_length = DefaultVectorLength::value; #if defined(KOKKOSBATCHED_USE_128BIT_MEMORY_INST) -static constexpr int internal_vector_length = - DefaultInternalVectorLength::value; +static constexpr int internal_vector_length = DefaultInternalVectorLength::value; #else static constexpr int internal_vector_length = 1; #endif @@ -169,15 +167,11 @@ struct SetTridiagToIdentity { KOKKOS_INLINE_FUNCTION void operator()(const member_type &member) const { const int i = member.league_rank(); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, __AA.extent(1)), [&](const int &j) { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, __AA.extent(5)), - [&](const int &v) { - for (int k = 0, kend = __AA.extent(3); k < kend; ++k) - __AA(i, j, 1, k, k, v) = 1; - }); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, __AA.extent(1)), [&](const int &j) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, __AA.extent(5)), [&](const int &v) { + for (int k = 0, kend = __AA.extent(3); k < kend; ++k) __AA(i, j, 1, k, k, v) = 1; + }); + }); } }; @@ -192,46 +186,42 @@ struct Factorize { KOKKOS_INLINE_FUNCTION void operator()(const member_type &member) const { - typedef FactorizeModeAndAlgo - default_mode_and_algo_type; + typedef FactorizeModeAndAlgo default_mode_and_algo_type; typedef default_mode_and_algo_type::mode_type mode_type; typedef default_mode_and_algo_type::algo_type algo_type; const int i = member.league_rank(); - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, __AA.extent(5)), [&](const int &v) { - auto AAA = Kokkos::subview(__AA, i, Kokkos::ALL(), Kokkos::ALL(), - Kokkos::ALL(), Kokkos::ALL(), v); - - /// subview patterns - auto A = Kokkos::subview(AAA, 0, 1, Kokkos::ALL(), Kokkos::ALL()); - auto B = Kokkos::subview(AAA, 0, 2, Kokkos::ALL(), Kokkos::ALL()); - auto C = Kokkos::subview(AAA, 0, 0, Kokkos::ALL(), Kokkos::ALL()); - auto D = Kokkos::subview(AAA, 0, 1, Kokkos::ALL(), Kokkos::ALL()); - - if (__L == 1) { - A.assign_data(&AAA(0, 1, 0, 0)); - LU::invoke(member, A); - } else { - for (int k = 0; k < (__L - 1); ++k) { - A.assign_data(&AAA(k, 1, 0, 0)); - B.assign_data(&AAA(k, 2, 0, 0)); - C.assign_data(&AAA(k, 0, 0, 0)); - D.assign_data(&AAA(k + 1, 1, 0, 0)); - - LU::invoke(member, A); - Trsm::invoke(member, 1.0, A, B); - Trsm::invoke(member, 1.0, A, - C); - Gemm::invoke(member, -1.0, C, B, 1.0, D); - } - LU::invoke(member, D); - } - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, __AA.extent(5)), [&](const int &v) { + auto AAA = Kokkos::subview(__AA, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), v); + + /// subview patterns + auto A = Kokkos::subview(AAA, 0, 1, Kokkos::ALL(), Kokkos::ALL()); + auto B = Kokkos::subview(AAA, 0, 2, Kokkos::ALL(), Kokkos::ALL()); + auto C = Kokkos::subview(AAA, 0, 0, Kokkos::ALL(), Kokkos::ALL()); + auto D = Kokkos::subview(AAA, 0, 1, Kokkos::ALL(), Kokkos::ALL()); + + if (__L == 1) { + A.assign_data(&AAA(0, 1, 0, 0)); + LU::invoke(member, A); + } else { + for (int k = 0; k < (__L - 1); ++k) { + A.assign_data(&AAA(k, 1, 0, 0)); + B.assign_data(&AAA(k, 2, 0, 0)); + C.assign_data(&AAA(k, 0, 0, 0)); + D.assign_data(&AAA(k + 1, 1, 0, 0)); + + LU::invoke(member, A); + Trsm::invoke( + member, 1.0, A, B); + Trsm::invoke( + member, 1.0, A, C); + Gemm::invoke(member, -1.0, C, B, + 1.0, D); + } + LU::invoke(member, D); + } + }); } }; @@ -275,58 +265,46 @@ int main(int argc, char *argv[]) { /// /// double 16 - Kokkos::View Av( - "A", N / vector_length, L, 3, Blk, Blk); + Kokkos::View Av("A", N / vector_length, L, 3, Blk, Blk); /// double Kokkos::View As( - (value_type *)Av.data(), Av.extent(0), Av.extent(1), Av.extent(2), - Av.extent(3), Av.extent(4), vector_length); + (value_type *)Av.data(), Av.extent(0), Av.extent(1), Av.extent(2), Av.extent(3), Av.extent(4), vector_length); /// double 2 - Kokkos::View - Ai((internal_vector_type *)Av.data(), Av.extent(0), Av.extent(1), - Av.extent(2), Av.extent(3), Av.extent(4), - vector_length / internal_vector_length); + Kokkos::View Ai( + (internal_vector_type *)Av.data(), Av.extent(0), Av.extent(1), Av.extent(2), Av.extent(3), Av.extent(4), + vector_length / internal_vector_length); /// double 16 - Kokkos::View xv( - "x", N / vector_length, Nvec, L, Blk); + Kokkos::View xv("x", N / vector_length, Nvec, L, Blk); /// double Kokkos::View xs( - (value_type *)xv.data(), xv.extent(0), xv.extent(1), xv.extent(2), - xv.extent(3), vector_length); + (value_type *)xv.data(), xv.extent(0), xv.extent(1), xv.extent(2), xv.extent(3), vector_length); /// double 2 - Kokkos::View - xi((internal_vector_type *)xv.data(), xv.extent(0), xv.extent(1), - xv.extent(2), xv.extent(3), vector_length / internal_vector_length); + Kokkos::View xi( + (internal_vector_type *)xv.data(), xv.extent(0), xv.extent(1), xv.extent(2), xv.extent(3), + vector_length / internal_vector_length); /// double 16 - Kokkos::View bv( - "b", N / vector_length, Nvec, L, Blk); + Kokkos::View bv("b", N / vector_length, Nvec, L, Blk); /// double Kokkos::View bs( - (value_type *)bv.data(), bv.extent(0), bv.extent(1), bv.extent(2), - bv.extent(3), vector_length); + (value_type *)bv.data(), bv.extent(0), bv.extent(1), bv.extent(2), bv.extent(3), vector_length); /// double 2 - Kokkos::View - bi((internal_vector_type *)bv.data(), bv.extent(0), bv.extent(1), - bv.extent(2), bv.extent(3), vector_length / internal_vector_length); + Kokkos::View bi( + (internal_vector_type *)bv.data(), bv.extent(0), bv.extent(1), bv.extent(2), bv.extent(3), + vector_length / internal_vector_length); /// double copy of A Kokkos::View Acopy( - "Acopy", As.extent(0), As.extent(1), As.extent(2), As.extent(3), - As.extent(4), As.extent(5)); + "Acopy", As.extent(0), As.extent(1), As.extent(2), As.extent(3), As.extent(4), As.extent(5)); - Kokkos::View rs( - "rs", bs.extent(0), bs.extent(1), bs.extent(2), bs.extent(3), - bs.extent(4)); + Kokkos::View rs("rs", bs.extent(0), bs.extent(1), + bs.extent(2), bs.extent(3), bs.extent(4)); #if defined(KOKKOSBATCHED_USE_128BIT_MEMORY_INST) auto AA = Ai; @@ -347,8 +325,7 @@ int main(int argc, char *argv[]) { #endif timer.reset(); policy_type policy(AA.extent(0), Kokkos::AUTO(), AA.extent(5)); - Kokkos::parallel_for("setTridiagToIdentity", policy, - SetTridiagToIdentity(AA)); + Kokkos::parallel_for("setTridiagToIdentity", policy, SetTridiagToIdentity(AA)); Kokkos::fence(); const double t = timer.seconds(); #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSBATCHED_PROFILE) @@ -385,16 +362,14 @@ int main(int argc, char *argv[]) { } policy_type policy(AA.extent(0), team_size, AA.extent(5)); - Kokkos::parallel_for("factorize", - policy.set_scratch_size(0, Kokkos::PerTeam(S)), + Kokkos::parallel_for("factorize", policy.set_scratch_size(0, Kokkos::PerTeam(S)), Factorize(AA, L)); Kokkos::fence(); const double t = timer.seconds(); #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSBATCHED_PROFILE) cudaProfilerStop(); #endif - printf("factorize time = %f , # of factorization per min = %f \n", t, - 1.0 / t * 60); + printf("factorize time = %f , # of factorization per min = %f \n", t, 1.0 / t * 60); } /// @@ -417,121 +392,96 @@ int main(int argc, char *argv[]) { policy_type policy(AA.extent(0), team_size, AA.extent(5)); for (int iter = 0; iter < niter; ++iter) { Kokkos::parallel_for( - "solve", policy.set_scratch_size(0, Kokkos::PerTeam(S)), - KOKKOS_LAMBDA(const member_type &member) { - typedef SolveModeAndAlgo - default_mode_and_algo_type; + "solve", policy.set_scratch_size(0, Kokkos::PerTeam(S)), KOKKOS_LAMBDA(const member_type &member) { + typedef SolveModeAndAlgo default_mode_and_algo_type; typedef default_mode_and_algo_type::mode_type mode_type; typedef default_mode_and_algo_type::algo_type algo_type; const int i = member.league_rank(); - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, AA.extent(5)), - [&](const int &v) { - auto A = Kokkos::subview(AA, i, Kokkos::ALL(), 1, - Kokkos::ALL(), Kokkos::ALL(), v); - auto B = Kokkos::subview(AA, i, Kokkos::ALL(), 2, - Kokkos::ALL(), Kokkos::ALL(), v); - auto C = Kokkos::subview(AA, i, Kokkos::ALL(), 0, - Kokkos::ALL(), Kokkos::ALL(), v); - - for (int jvec = 0; jvec < Nvec; ++jvec) { - auto x = Kokkos::subview(xx, i, jvec, Kokkos::ALL(), - Kokkos::ALL(), v); - auto b = Kokkos::subview(bb, i, jvec, Kokkos::ALL(), - Kokkos::ALL(), v); - - auto xt = Kokkos::subview(x, 0, Kokkos::ALL()); - auto xb = Kokkos::subview(x, 0, Kokkos::ALL()); - - /// - /// forward substitution - /// - { - // const bool is_same_x_and_b = (x.data() == b.data()); - auto LT = - Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL()); - auto LB = - Kokkos::subview(C, 0, Kokkos::ALL(), Kokkos::ALL()); - - auto bk = Kokkos::subview(b, 0, Kokkos::ALL()); - { - { // if (!is_same_x_and_b) { - Copy::invoke(member, bk, xb); - member.team_barrier(); - } - } - const int kend = L - 1; - for (int k = 0; k < kend; ++k) { - LT.assign_data(&A(k, 0, 0)); - LB.assign_data(&C(k, 0, 0)); - - xt.assign_data(&x(k, 0)); - xb.assign_data(&x(k + 1, 0)); - - { // if (!is_same_x_and_b) { - bk.assign_data(&b(k + 1, 0)); - Copy::invoke(member, bk, xb); - } - - Trsv::invoke(member, - 1.0, - LT, - xt); - - Gemv::invoke(member, -1.0, LB, xt, 1.0, - xb); - } - { - LT.assign_data(&A(kend, 0, 0)); - xt.assign_data(&x(kend, 0)); - Trsv::invoke(member, - 1.0, - LT, - xt); - } - } /// end forward substitution - - /// - /// backward substitution - /// - { - auto UT = - Kokkos::subview(B, 0, Kokkos::ALL(), Kokkos::ALL()); - auto UB = - Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL()); - - const int kbegin = L - 1; - for (int k = kbegin; k > 0; --k) { - UT.assign_data(&B(k - 1, 0, 0)); - UB.assign_data(&A(k, 0, 0)); - - xt.assign_data(&x(k - 1, 0)); - xb.assign_data(&x(k, 0)); - - Trsv::invoke(member, 1.0, UB, xb); - - Gemv::invoke(member, -1.0, UT, xb, 1.0, - xt); - } - { - UB.assign_data(&A(0, 0, 0)); - xb.assign_data(&x(0, 0)); - Trsv::invoke(member, 1.0, UB, xb); - } - } // end backward substitution + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, AA.extent(5)), [&](const int &v) { + auto A = Kokkos::subview(AA, i, Kokkos::ALL(), 1, Kokkos::ALL(), Kokkos::ALL(), v); + auto B = Kokkos::subview(AA, i, Kokkos::ALL(), 2, Kokkos::ALL(), Kokkos::ALL(), v); + auto C = Kokkos::subview(AA, i, Kokkos::ALL(), 0, Kokkos::ALL(), Kokkos::ALL(), v); + + for (int jvec = 0; jvec < Nvec; ++jvec) { + auto x = Kokkos::subview(xx, i, jvec, Kokkos::ALL(), Kokkos::ALL(), v); + auto b = Kokkos::subview(bb, i, jvec, Kokkos::ALL(), Kokkos::ALL(), v); + + auto xt = Kokkos::subview(x, 0, Kokkos::ALL()); + auto xb = Kokkos::subview(x, 0, Kokkos::ALL()); + + /// + /// forward substitution + /// + { + // const bool is_same_x_and_b = (x.data() == b.data()); + auto LT = Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL()); + auto LB = Kokkos::subview(C, 0, Kokkos::ALL(), Kokkos::ALL()); + + auto bk = Kokkos::subview(b, 0, Kokkos::ALL()); + { + { // if (!is_same_x_and_b) { + Copy::invoke(member, bk, xb); + member.team_barrier(); + } + } + const int kend = L - 1; + for (int k = 0; k < kend; ++k) { + LT.assign_data(&A(k, 0, 0)); + LB.assign_data(&C(k, 0, 0)); + + xt.assign_data(&x(k, 0)); + xb.assign_data(&x(k + 1, 0)); + + { // if (!is_same_x_and_b) { + bk.assign_data(&b(k + 1, 0)); + Copy::invoke(member, bk, xb); + } + + Trsv::invoke( + member, 1.0, LT, xt); + + Gemv::invoke(member, -1.0, LB, xt, 1.0, + xb); + } + { + LT.assign_data(&A(kend, 0, 0)); + xt.assign_data(&x(kend, 0)); + Trsv::invoke( + member, 1.0, LT, xt); + } + } /// end forward substitution + + /// + /// backward substitution + /// + { + auto UT = Kokkos::subview(B, 0, Kokkos::ALL(), Kokkos::ALL()); + auto UB = Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL()); + + const int kbegin = L - 1; + for (int k = kbegin; k > 0; --k) { + UT.assign_data(&B(k - 1, 0, 0)); + UB.assign_data(&A(k, 0, 0)); + + xt.assign_data(&x(k - 1, 0)); + xb.assign_data(&x(k, 0)); + + Trsv::invoke( + member, 1.0, UB, xb); + + Gemv::invoke(member, -1.0, UT, xb, 1.0, + xt); } - }); + { + UB.assign_data(&A(0, 0, 0)); + xb.assign_data(&x(0, 0)); + Trsv::invoke( + member, 1.0, UB, xb); + } + } // end backward substitution + } + }); }); Kokkos::fence(); } @@ -539,8 +489,7 @@ int main(int argc, char *argv[]) { #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSBATCHED_PROFILE) cudaProfilerStop(); #endif - printf("solve time = %f , # of solves per min = %f\n", t, - 1.0 / t * 60 * niter); + printf("solve time = %f , # of solves per min = %f\n", t, 1.0 / t * 60 * niter); } /// @@ -552,114 +501,77 @@ int main(int argc, char *argv[]) { Kokkos::parallel_for( "compute residual", policy, KOKKOS_LAMBDA(const member_type &member) { const int i = member.league_rank(); - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, Acopy.extent(5)), - [&](const int &v) { - auto A = Kokkos::subview(Acopy, i, Kokkos::ALL(), 1, - Kokkos::ALL(), Kokkos::ALL(), v); - auto B = Kokkos::subview(Acopy, i, Kokkos::ALL(), 2, - Kokkos::ALL(), Kokkos::ALL(), v); - auto C = Kokkos::subview(Acopy, i, Kokkos::ALL(), 0, - Kokkos::ALL(), Kokkos::ALL(), v); - - for (int jvec = 0, jvecend = rs.extent(1); jvec < jvecend; - ++jvec) { - auto x = Kokkos::subview(xs, i, jvec, Kokkos::ALL(), - Kokkos::ALL(), v); - auto b = Kokkos::subview(bs, i, jvec, Kokkos::ALL(), - Kokkos::ALL(), v); - auto r = Kokkos::subview(rs, i, jvec, Kokkos::ALL(), - Kokkos::ALL(), v); - - if (L == 1) { - auto A0 = - Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL()); - auto x0 = Kokkos::subview(x, 0, Kokkos::ALL()); - auto b0 = Kokkos::subview(b, 0, Kokkos::ALL()); - auto r0 = Kokkos::subview(r, 0, Kokkos::ALL()); - - TeamCopy::invoke(member, - b0, r0); - TeamGemv::invoke(member, -1.0, A0, x0, 1.0, - r0); - } else { - int k = 0; - { - /// first row - auto A1 = - Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); - auto B2 = - Kokkos::subview(B, k, Kokkos::ALL(), Kokkos::ALL()); - - auto x1 = Kokkos::subview(x, k, Kokkos::ALL()); - auto x2 = Kokkos::subview(x, k + 1, Kokkos::ALL()); - - auto bk = Kokkos::subview(b, k, Kokkos::ALL()); - auto rk = Kokkos::subview(r, k, Kokkos::ALL()); - TeamCopy::invoke( - member, bk, rk); - member.team_barrier(); - TeamGemv::invoke(member, -1.0, A1, x1, 1.0, - rk); - TeamGemv::invoke(member, -1.0, B2, x2, 1.0, - rk); - ++k; - } - for (; k < (L - 1); ++k) { - auto C0 = Kokkos::subview(C, k - 1, Kokkos::ALL(), - Kokkos::ALL()); - auto A1 = - Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); - auto B2 = - Kokkos::subview(B, k, Kokkos::ALL(), Kokkos::ALL()); - - auto x0 = Kokkos::subview(x, k - 1, Kokkos::ALL()); - auto x1 = Kokkos::subview(x, k, Kokkos::ALL()); - auto x2 = Kokkos::subview(x, k + 1, Kokkos::ALL()); - - auto bk = Kokkos::subview(b, k, Kokkos::ALL()); - auto rk = Kokkos::subview(r, k, Kokkos::ALL()); - TeamCopy::invoke( - member, bk, rk); - member.team_barrier(); - TeamGemv::invoke(member, -1.0, C0, x0, 1.0, - rk); - TeamGemv::invoke(member, -1.0, A1, x1, 1.0, - rk); - TeamGemv::invoke(member, -1.0, B2, x2, 1.0, - rk); - } - { - // last row - auto C0 = Kokkos::subview(C, k - 1, Kokkos::ALL(), - Kokkos::ALL()); - auto A1 = - Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); - - auto x0 = Kokkos::subview(x, k - 1, Kokkos::ALL()); - auto x1 = Kokkos::subview(x, k, Kokkos::ALL()); - - auto bk = Kokkos::subview(b, k, Kokkos::ALL()); - auto rk = Kokkos::subview(r, k, Kokkos::ALL()); - TeamCopy::invoke( - member, bk, rk); - member.team_barrier(); - TeamGemv::invoke(member, -1.0, C0, x0, 1.0, - rk); - TeamGemv::invoke(member, -1.0, A1, x1, 1.0, - rk); - } - } + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, Acopy.extent(5)), [&](const int &v) { + auto A = Kokkos::subview(Acopy, i, Kokkos::ALL(), 1, Kokkos::ALL(), Kokkos::ALL(), v); + auto B = Kokkos::subview(Acopy, i, Kokkos::ALL(), 2, Kokkos::ALL(), Kokkos::ALL(), v); + auto C = Kokkos::subview(Acopy, i, Kokkos::ALL(), 0, Kokkos::ALL(), Kokkos::ALL(), v); + + for (int jvec = 0, jvecend = rs.extent(1); jvec < jvecend; ++jvec) { + auto x = Kokkos::subview(xs, i, jvec, Kokkos::ALL(), Kokkos::ALL(), v); + auto b = Kokkos::subview(bs, i, jvec, Kokkos::ALL(), Kokkos::ALL(), v); + auto r = Kokkos::subview(rs, i, jvec, Kokkos::ALL(), Kokkos::ALL(), v); + + if (L == 1) { + auto A0 = Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL()); + auto x0 = Kokkos::subview(x, 0, Kokkos::ALL()); + auto b0 = Kokkos::subview(b, 0, Kokkos::ALL()); + auto r0 = Kokkos::subview(r, 0, Kokkos::ALL()); + + TeamCopy::invoke(member, b0, r0); + TeamGemv::invoke(member, -1.0, A0, x0, 1.0, r0); + } else { + int k = 0; + { + /// first row + auto A1 = Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); + auto B2 = Kokkos::subview(B, k, Kokkos::ALL(), Kokkos::ALL()); + + auto x1 = Kokkos::subview(x, k, Kokkos::ALL()); + auto x2 = Kokkos::subview(x, k + 1, Kokkos::ALL()); + + auto bk = Kokkos::subview(b, k, Kokkos::ALL()); + auto rk = Kokkos::subview(r, k, Kokkos::ALL()); + TeamCopy::invoke(member, bk, rk); + member.team_barrier(); + TeamGemv::invoke(member, -1.0, A1, x1, 1.0, rk); + TeamGemv::invoke(member, -1.0, B2, x2, 1.0, rk); + ++k; } - }); + for (; k < (L - 1); ++k) { + auto C0 = Kokkos::subview(C, k - 1, Kokkos::ALL(), Kokkos::ALL()); + auto A1 = Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); + auto B2 = Kokkos::subview(B, k, Kokkos::ALL(), Kokkos::ALL()); + + auto x0 = Kokkos::subview(x, k - 1, Kokkos::ALL()); + auto x1 = Kokkos::subview(x, k, Kokkos::ALL()); + auto x2 = Kokkos::subview(x, k + 1, Kokkos::ALL()); + + auto bk = Kokkos::subview(b, k, Kokkos::ALL()); + auto rk = Kokkos::subview(r, k, Kokkos::ALL()); + TeamCopy::invoke(member, bk, rk); + member.team_barrier(); + TeamGemv::invoke(member, -1.0, C0, x0, 1.0, rk); + TeamGemv::invoke(member, -1.0, A1, x1, 1.0, rk); + TeamGemv::invoke(member, -1.0, B2, x2, 1.0, rk); + } + { + // last row + auto C0 = Kokkos::subview(C, k - 1, Kokkos::ALL(), Kokkos::ALL()); + auto A1 = Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); + + auto x0 = Kokkos::subview(x, k - 1, Kokkos::ALL()); + auto x1 = Kokkos::subview(x, k, Kokkos::ALL()); + + auto bk = Kokkos::subview(b, k, Kokkos::ALL()); + auto rk = Kokkos::subview(r, k, Kokkos::ALL()); + TeamCopy::invoke(member, bk, rk); + member.team_barrier(); + TeamGemv::invoke(member, -1.0, C0, x0, 1.0, rk); + TeamGemv::invoke(member, -1.0, A1, x1, 1.0, rk); + } + } + } + }); }); Kokkos::fence(); auto rs_host = Kokkos::create_mirror_view(rs); @@ -669,13 +581,11 @@ int main(int argc, char *argv[]) { Kokkos::fence(); { double norm2 = 0, diff2 = 0; - for (int i0 = 0, i0end = rs.extent(0); i0 < i0end; - ++i0) // N/vector_length - for (int i1 = 0, i1end = rs.extent(1); i1 < i1end; ++i1) // Nvec - for (int i2 = 0, i2end = rs.extent(2); i2 < i2end; ++i2) // L - for (int i3 = 0, i3end = rs.extent(3); i3 < i3end; ++i3) // Blk - for (int i4 = 0, i4end = rs.extent(4); i4 < i4end; - ++i4) { // vector_length + for (int i0 = 0, i0end = rs.extent(0); i0 < i0end; ++i0) // N/vector_length + for (int i1 = 0, i1end = rs.extent(1); i1 < i1end; ++i1) // Nvec + for (int i2 = 0, i2end = rs.extent(2); i2 < i2end; ++i2) // L + for (int i3 = 0, i3end = rs.extent(3); i3 < i3end; ++i3) // Blk + for (int i4 = 0, i4end = rs.extent(4); i4 < i4end; ++i4) { // vector_length const auto val = bs_host(i0, i1, i2, i3, i4); const auto res = rs_host(i0, i1, i2, i3, i4); norm2 += val * val; diff --git a/packages/kokkos-kernels/perf_test/batched/dense/KokkosBatched_Test_BlockTridiagJacobi.cpp b/packages/kokkos-kernels/perf_test/batched/dense/KokkosBatched_Test_BlockTridiagJacobi.cpp index 67a141578e75..629c73924ebf 100644 --- a/packages/kokkos-kernels/perf_test/batched/dense/KokkosBatched_Test_BlockTridiagJacobi.cpp +++ b/packages/kokkos-kernels/perf_test/batched/dense/KokkosBatched_Test_BlockTridiagJacobi.cpp @@ -72,11 +72,9 @@ typedef double value_type; /// using namespace KokkosBatched; -static constexpr int vector_length = - DefaultVectorLength::value; +static constexpr int vector_length = DefaultVectorLength::value; #if defined(KOKKOSBATCHED_USE_128BIT_MEMORY_INST) -static constexpr int internal_vector_length = - DefaultInternalVectorLength::value; +static constexpr int internal_vector_length = DefaultInternalVectorLength::value; #else static constexpr int internal_vector_length = 1; #endif @@ -98,20 +96,17 @@ struct InverseDiagonalsModeAndAlgoHostImpl { #if defined(KOKKOS_ENABLE_SERIAL) template <> -struct InverseDiagonalsModeAndAlgo - : InverseDiagonalsModeAndAlgoHostImpl {}; +struct InverseDiagonalsModeAndAlgo : InverseDiagonalsModeAndAlgoHostImpl {}; #endif #if defined(KOKKOS_ENABLE_THREADS) template <> -struct InverseDiagonalsModeAndAlgo - : InverseDiagonalsModeAndAlgoHostImpl {}; +struct InverseDiagonalsModeAndAlgo : InverseDiagonalsModeAndAlgoHostImpl {}; #endif #if defined(KOKKOS_ENABLE_ONPENMP) template <> -struct InverseDiagonalsModeAndAlgo - : InverseDiagonalsModeAndAlgoHostImpl {}; +struct InverseDiagonalsModeAndAlgo : InverseDiagonalsModeAndAlgoHostImpl {}; #endif struct InverseDiagonalsModeAndAlgoDeviceImpl { @@ -121,14 +116,12 @@ struct InverseDiagonalsModeAndAlgoDeviceImpl { #if defined(KOKKOS_ENABLE_CUDA) template <> -struct InverseDiagonalsModeAndAlgo - : InverseDiagonalsModeAndAlgoDeviceImpl {}; +struct InverseDiagonalsModeAndAlgo : InverseDiagonalsModeAndAlgoDeviceImpl {}; #endif #if defined(KOKKOS_ENABLE_HIP) template <> -struct InverseDiagonalsModeAndAlgo - : InverseDiagonalsModeAndAlgoDeviceImpl {}; +struct InverseDiagonalsModeAndAlgo : InverseDiagonalsModeAndAlgoDeviceImpl {}; #endif template @@ -211,56 +204,46 @@ int main(int argc, char *argv[]) { /// /// double 16 - Kokkos::View Av( - "A", N / vector_length, L, 4, Blk, Blk); + Kokkos::View Av("A", N / vector_length, L, 4, Blk, Blk); /// double Kokkos::View As( - (value_type *)Av.data(), Av.extent(0), Av.extent(1), Av.extent(2), - Av.extent(3), Av.extent(4), vector_length); + (value_type *)Av.data(), Av.extent(0), Av.extent(1), Av.extent(2), Av.extent(3), Av.extent(4), vector_length); /// double 2 - Kokkos::View - Ai((internal_vector_type *)Av.data(), Av.extent(0), Av.extent(1), - Av.extent(2), Av.extent(3), Av.extent(4), - vector_length / internal_vector_length); + Kokkos::View Ai( + (internal_vector_type *)Av.data(), Av.extent(0), Av.extent(1), Av.extent(2), Av.extent(3), Av.extent(4), + vector_length / internal_vector_length); /// double 16 - Kokkos::View xv( - "x", N / vector_length, Nvec, 2, L, Blk); + Kokkos::View xv("x", N / vector_length, Nvec, 2, L, Blk); /// double Kokkos::View xs( - (value_type *)xv.data(), xv.extent(0), xv.extent(1), xv.extent(2), - xv.extent(3), xv.extent(4), vector_length); + (value_type *)xv.data(), xv.extent(0), xv.extent(1), xv.extent(2), xv.extent(3), xv.extent(4), vector_length); /// double 2 - Kokkos::View - xi((internal_vector_type *)xv.data(), xv.extent(0), xv.extent(1), - xv.extent(2), xv.extent(3), xv.extent(4), - vector_length / internal_vector_length); + Kokkos::View xi( + (internal_vector_type *)xv.data(), xv.extent(0), xv.extent(1), xv.extent(2), xv.extent(3), xv.extent(4), + vector_length / internal_vector_length); /// double 16 - Kokkos::View bv( - "b", N / vector_length, Nvec, L, Blk); + Kokkos::View bv("b", N / vector_length, Nvec, L, Blk); /// double Kokkos::View bs( - (value_type *)bv.data(), bv.extent(0), bv.extent(1), bv.extent(2), - bv.extent(3), vector_length); + (value_type *)bv.data(), bv.extent(0), bv.extent(1), bv.extent(2), bv.extent(3), vector_length); /// double 2 - Kokkos::View - bi((internal_vector_type *)bv.data(), bv.extent(0), bv.extent(1), - bv.extent(2), bv.extent(3), vector_length / internal_vector_length); + Kokkos::View bi( + (internal_vector_type *)bv.data(), bv.extent(0), bv.extent(1), bv.extent(2), bv.extent(3), + vector_length / internal_vector_length); /// double copy of A Kokkos::View Acopy( - "Acopy", As.extent(0), As.extent(1), As.extent(2), As.extent(3), - As.extent(4), As.extent(5)); + "Acopy", As.extent(0), As.extent(1), As.extent(2), As.extent(3), As.extent(4), As.extent(5)); - Kokkos::View rs( - "rs", bs.extent(0), bs.extent(1), bs.extent(2), bs.extent(3), - bs.extent(4)); + Kokkos::View rs("rs", bs.extent(0), bs.extent(1), bs.extent(2), + bs.extent(3), bs.extent(4)); #if defined(KOKKOSBATCHED_USE_128BIT_MEMORY_INST) auto AA = Ai; @@ -288,18 +271,13 @@ int main(int argc, char *argv[]) { using member_type = typename policy_type::member_type; policy_type policy(AA.extent(0) * L, Kokkos::AUTO(), AA.extent(5)); Kokkos::parallel_for( - "diagonal dominant", policy, - KOKKOS_LAMBDA(const member_type &member) { + "diagonal dominant", policy, KOKKOS_LAMBDA(const member_type &member) { const int i = member.league_rank() / L; const int k = member.league_rank() % L; - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, Blk), [&](const int &j) { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, AA.extent(5)), - [&](const int &v) { - AA(i, k, 1, j, j, v) += internal_vector_type(9 * Blk); - }); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, Blk), [&](const int &j) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, AA.extent(5)), + [&](const int &v) { AA(i, k, 1, j, j, v) += internal_vector_type(9 * Blk); }); + }); }); Kokkos::fence(); #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSBATCHED_PROFILE) @@ -318,16 +296,14 @@ int main(int argc, char *argv[]) { #endif timer.reset(); typedef internal_vector_type scratch_value_type; - typedef Kokkos::View scratch_view_type; - using policy_type = Kokkos::TeamPolicy; - using member_type = typename policy_type::member_type; - const int per_team_scratch = - scratch_view_type::shmem_size(Blk, Blk, AA.extent(5)); - int team_size = 0; + using policy_type = Kokkos::TeamPolicy; + using member_type = typename policy_type::member_type; + const int per_team_scratch = scratch_view_type::shmem_size(Blk, Blk, AA.extent(5)); + int team_size = 0; if (Blk < 8) { team_size = 32 / AA.extent(5); } else if (Blk < 12) { @@ -338,49 +314,37 @@ int main(int argc, char *argv[]) { policy_type policy(AA.extent(0) * L, team_size, AA.extent(5)); Kokkos::parallel_for( - "inverse diagonals", - policy.set_scratch_size( - 0, Kokkos::PerTeam(S < per_team_scratch ? per_team_scratch : S)), + "inverse diagonals", policy.set_scratch_size(0, Kokkos::PerTeam(S < per_team_scratch ? per_team_scratch : S)), KOKKOS_LAMBDA(const member_type &member) { - typedef InverseDiagonalsModeAndAlgo - default_mode_and_algo_type; + typedef InverseDiagonalsModeAndAlgo default_mode_and_algo_type; typedef default_mode_and_algo_type::mode_type mode_type; typedef default_mode_and_algo_type::algo_type algo_type; const int i = member.league_rank() / L; const int k = member.league_rank() % L; - scratch_view_type WW(member.team_scratch(0), Blk, Blk, - AA.extent(5)); - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, AA.extent(5)), - [&](const int &v) { - auto A = Kokkos::subview(AA, i, k, 1, Kokkos::ALL(), - Kokkos::ALL(), v); - auto D = Kokkos::subview(AA, i, k, 3, Kokkos::ALL(), - Kokkos::ALL(), v); - auto W = Kokkos::subview(WW, Kokkos::ALL(), Kokkos::ALL(), v); - - Copy::invoke( - member, A, W); - SetIdentity::invoke(member, D); - member.team_barrier(); - LU::invoke(member, W); - Trsm::invoke(member, 1.0, W, - D); - Trsm::invoke(member, 1.0, - W, D); - }); + scratch_view_type WW(member.team_scratch(0), Blk, Blk, AA.extent(5)); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, AA.extent(5)), [&](const int &v) { + auto A = Kokkos::subview(AA, i, k, 1, Kokkos::ALL(), Kokkos::ALL(), v); + auto D = Kokkos::subview(AA, i, k, 3, Kokkos::ALL(), Kokkos::ALL(), v); + auto W = Kokkos::subview(WW, Kokkos::ALL(), Kokkos::ALL(), v); + + Copy::invoke(member, A, W); + SetIdentity::invoke(member, D); + member.team_barrier(); + LU::invoke(member, W); + Trsm::invoke( + member, 1.0, W, D); + Trsm::invoke(member, 1.0, W, D); + }); }); Kokkos::fence(); const double t = timer.seconds(); #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSBATCHED_PROFILE) cudaProfilerStop(); #endif - printf("inverse time = %f , # of inverse per min = %f \n", t, - 1.0 / t * 60); + printf("inverse time = %f , # of inverse per min = %f \n", t, 1.0 / t * 60); } /// @@ -392,12 +356,10 @@ int main(int argc, char *argv[]) { #endif timer.reset(); typedef internal_vector_type scratch_value_type; - typedef Kokkos::View scratch_view_type; - const int per_team_scratch = - scratch_view_type::shmem_size(Blk, AA.extent(5)); + const int per_team_scratch = scratch_view_type::shmem_size(Blk, AA.extent(5)); using policy_type = Kokkos::TeamPolicy; using member_type = typename policy_type::member_type; @@ -412,78 +374,53 @@ int main(int argc, char *argv[]) { policy_type policy(AA.extent(0) * L, team_size, AA.extent(5)); for (int iter = 0; iter < niter; ++iter) { - auto xxx = Kokkos::subview(xx, Kokkos::ALL(), Kokkos::ALL(), 0, - Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); - auto yyy = Kokkos::subview(xx, Kokkos::ALL(), Kokkos::ALL(), 1, - Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto xxx = Kokkos::subview(xx, Kokkos::ALL(), Kokkos::ALL(), 0, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto yyy = Kokkos::subview(xx, Kokkos::ALL(), Kokkos::ALL(), 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); for (int nis = 0; nis < nsweep; ++nis) { Kokkos::parallel_for( - "solve", - policy.set_scratch_size( - 0, - Kokkos::PerTeam(S < per_team_scratch ? per_team_scratch : S)), + "solve", policy.set_scratch_size(0, Kokkos::PerTeam(S < per_team_scratch ? per_team_scratch : S)), KOKKOS_LAMBDA(const member_type &member) { - typedef SolveModeAndAlgo - default_mode_and_algo_type; + typedef SolveModeAndAlgo default_mode_and_algo_type; typedef default_mode_and_algo_type::mode_type mode_type; typedef default_mode_and_algo_type::algo_type algo_type; scratch_view_type WW(member.team_scratch(0), Blk, AA.extent(5)); const int i = member.league_rank() / L; //%AA.extent(0); const int k = member.league_rank() % L; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, AA.extent(5)), - [&](const int &v) { - auto A = Kokkos::subview(AA, i, k, 1, Kokkos::ALL(), - Kokkos::ALL(), v); - auto D = Kokkos::subview(AA, i, k, 3, Kokkos::ALL(), - Kokkos::ALL(), v); - auto B = Kokkos::subview(AA, i, k, 2, Kokkos::ALL(), - Kokkos::ALL(), v); - auto C = Kokkos::subview(AA, i, k ? k - 1 : 0, 0, - Kokkos::ALL(), Kokkos::ALL(), v); - auto u = Kokkos::subview(WW, Kokkos::ALL(), v); - for (int jvec = 0; jvec < Nvec; ++jvec) { - auto x0 = Kokkos::subview( - xxx, i, jvec, k == 0 ? 0 : k - 1, Kokkos::ALL(), v); - auto x1 = - Kokkos::subview(xxx, i, jvec, k, Kokkos::ALL(), v); - auto x2 = Kokkos::subview(xxx, i, jvec, - k == L - 1 ? 0 : k + 1, - Kokkos::ALL(), v); - auto y1 = - Kokkos::subview(yyy, i, jvec, k, Kokkos::ALL(), v); - auto b = - Kokkos::subview(bb, i, jvec, k, Kokkos::ALL(), v); - - if (L == 1) { - Gemv::invoke(member, 1.0, D, b, 0.0, x1); - } else { - Copy::invoke(member, b, u); - if (k == 0) { - Gemv::invoke(member, -1.0, B, x2, 1.0, - u); - } else if (k == L - 1) { - Gemv::invoke(member, -1.0, C, x0, 1.0, - u); - } else { - Gemv::invoke(member, -1.0, B, x2, 1.0, - u); - Gemv::invoke(member, -1.0, C, x0, 1.0, - u); - } - Gemv::invoke(member, 1.0, D, u, 0.0, y1); - } + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, AA.extent(5)), [&](const int &v) { + auto A = Kokkos::subview(AA, i, k, 1, Kokkos::ALL(), Kokkos::ALL(), v); + auto D = Kokkos::subview(AA, i, k, 3, Kokkos::ALL(), Kokkos::ALL(), v); + auto B = Kokkos::subview(AA, i, k, 2, Kokkos::ALL(), Kokkos::ALL(), v); + auto C = Kokkos::subview(AA, i, k ? k - 1 : 0, 0, Kokkos::ALL(), Kokkos::ALL(), v); + auto u = Kokkos::subview(WW, Kokkos::ALL(), v); + for (int jvec = 0; jvec < Nvec; ++jvec) { + auto x0 = Kokkos::subview(xxx, i, jvec, k == 0 ? 0 : k - 1, Kokkos::ALL(), v); + auto x1 = Kokkos::subview(xxx, i, jvec, k, Kokkos::ALL(), v); + auto x2 = Kokkos::subview(xxx, i, jvec, k == L - 1 ? 0 : k + 1, Kokkos::ALL(), v); + auto y1 = Kokkos::subview(yyy, i, jvec, k, Kokkos::ALL(), v); + auto b = Kokkos::subview(bb, i, jvec, k, Kokkos::ALL(), v); + + if (L == 1) { + Gemv::invoke(member, 1.0, D, b, 0.0, x1); + } else { + Copy::invoke(member, b, u); + if (k == 0) { + Gemv::invoke(member, -1.0, B, x2, 1.0, + u); + } else if (k == L - 1) { + Gemv::invoke(member, -1.0, C, x0, 1.0, + u); + } else { + Gemv::invoke(member, -1.0, B, x2, 1.0, + u); + Gemv::invoke(member, -1.0, C, x0, 1.0, + u); } - }); + Gemv::invoke(member, 1.0, D, u, 0.0, y1); + } + } + }); }); auto tmp = xxx; xxx = yyy; @@ -495,8 +432,7 @@ int main(int argc, char *argv[]) { #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSBATCHED_PROFILE) cudaProfilerStop(); #endif - printf("solve time = %f , # of solves per min = %f\n", t, - 1.0 / t * 60 * niter); + printf("solve time = %f , # of solves per min = %f\n", t, 1.0 / t * 60 * niter); } /// @@ -507,140 +443,87 @@ int main(int argc, char *argv[]) { using policy_type = Kokkos::TeamPolicy; policy_type policy(Acopy.extent(0), Kokkos::AUTO(), Acopy.extent(5)); Kokkos::parallel_for( - "compute residual", policy, - KOKKOS_LAMBDA(const typename policy_type::member_type &member) { + "compute residual", policy, KOKKOS_LAMBDA(const typename policy_type::member_type &member) { const int i = member.league_rank(); - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, Acopy.extent(5)), - [&](const int &v) { - auto A = Kokkos::subview(Acopy, i, Kokkos::ALL(), 1, - Kokkos::ALL(), Kokkos::ALL(), v); - auto B = Kokkos::subview(Acopy, i, Kokkos::ALL(), 2, - Kokkos::ALL(), Kokkos::ALL(), v); - auto C = Kokkos::subview(Acopy, i, Kokkos::ALL(), 0, - Kokkos::ALL(), Kokkos::ALL(), v); - - for (int jvec = 0, jvecend = rs.extent(1); jvec < jvecend; - ++jvec) { - auto x = Kokkos::subview(xs, i, jvec, nsweep % 2, - Kokkos::ALL(), Kokkos::ALL(), v); - auto b = Kokkos::subview(bs, i, jvec, Kokkos::ALL(), - Kokkos::ALL(), v); - auto r = Kokkos::subview(rs, i, jvec, Kokkos::ALL(), - Kokkos::ALL(), v); - - if (L == 1) { - auto A0 = - Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL()); - auto x0 = Kokkos::subview(x, 0, Kokkos::ALL()); - auto b0 = Kokkos::subview(b, 0, Kokkos::ALL()); - auto r0 = Kokkos::subview(r, 0, Kokkos::ALL()); - - TeamCopy::invoke(member, b0, r0); - TeamGemv::invoke(member, - -1.0, A0, - x0, 1.0, - r0); - } else { - int k = 0; - { - /// first row - auto A1 = - Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); - auto B2 = - Kokkos::subview(B, k, Kokkos::ALL(), Kokkos::ALL()); - - auto x1 = Kokkos::subview(x, k, Kokkos::ALL()); - auto x2 = Kokkos::subview(x, k + 1, Kokkos::ALL()); - - auto bk = Kokkos::subview(b, k, Kokkos::ALL()); - auto rk = Kokkos::subview(r, k, Kokkos::ALL()); - TeamCopy::invoke(member, bk, rk); - member.team_barrier(); - TeamGemv::invoke(member, - -1.0, - A1, x1, - 1.0, - rk); - TeamGemv::invoke(member, - -1.0, - B2, x2, - 1.0, - rk); - ++k; - } - for (; k < (L - 1); ++k) { - auto C0 = Kokkos::subview(C, k - 1, Kokkos::ALL(), - Kokkos::ALL()); - auto A1 = - Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); - auto B2 = - Kokkos::subview(B, k, Kokkos::ALL(), Kokkos::ALL()); - - auto x0 = Kokkos::subview(x, k - 1, Kokkos::ALL()); - auto x1 = Kokkos::subview(x, k, Kokkos::ALL()); - auto x2 = Kokkos::subview(x, k + 1, Kokkos::ALL()); - - auto bk = Kokkos::subview(b, k, Kokkos::ALL()); - auto rk = Kokkos::subview(r, k, Kokkos::ALL()); - TeamCopy::invoke(member, bk, rk); - member.team_barrier(); - TeamGemv::invoke(member, - -1.0, - C0, x0, - 1.0, - rk); - TeamGemv::invoke(member, - -1.0, - A1, x1, - 1.0, - rk); - TeamGemv::invoke(member, - -1.0, - B2, x2, - 1.0, - rk); - } - { - // last row - auto C0 = Kokkos::subview(C, k - 1, Kokkos::ALL(), - Kokkos::ALL()); - auto A1 = - Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); - - auto x0 = Kokkos::subview(x, k - 1, Kokkos::ALL()); - auto x1 = Kokkos::subview(x, k, Kokkos::ALL()); - - auto bk = Kokkos::subview(b, k, Kokkos::ALL()); - auto rk = Kokkos::subview(r, k, Kokkos::ALL()); - TeamCopy::invoke(member, bk, rk); - member.team_barrier(); - TeamGemv::invoke(member, - -1.0, - C0, x0, - 1.0, - rk); - TeamGemv::invoke(member, - -1.0, - A1, x1, - 1.0, - rk); - } - } + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, Acopy.extent(5)), [&](const int &v) { + auto A = Kokkos::subview(Acopy, i, Kokkos::ALL(), 1, Kokkos::ALL(), Kokkos::ALL(), v); + auto B = Kokkos::subview(Acopy, i, Kokkos::ALL(), 2, Kokkos::ALL(), Kokkos::ALL(), v); + auto C = Kokkos::subview(Acopy, i, Kokkos::ALL(), 0, Kokkos::ALL(), Kokkos::ALL(), v); + + for (int jvec = 0, jvecend = rs.extent(1); jvec < jvecend; ++jvec) { + auto x = Kokkos::subview(xs, i, jvec, nsweep % 2, Kokkos::ALL(), Kokkos::ALL(), v); + auto b = Kokkos::subview(bs, i, jvec, Kokkos::ALL(), Kokkos::ALL(), v); + auto r = Kokkos::subview(rs, i, jvec, Kokkos::ALL(), Kokkos::ALL(), v); + + if (L == 1) { + auto A0 = Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL()); + auto x0 = Kokkos::subview(x, 0, Kokkos::ALL()); + auto b0 = Kokkos::subview(b, 0, Kokkos::ALL()); + auto r0 = Kokkos::subview(r, 0, Kokkos::ALL()); + + TeamCopy::invoke(member, b0, r0); + TeamGemv::invoke(member, -1.0, A0, + x0, 1.0, r0); + } else { + int k = 0; + { + /// first row + auto A1 = Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); + auto B2 = Kokkos::subview(B, k, Kokkos::ALL(), Kokkos::ALL()); + + auto x1 = Kokkos::subview(x, k, Kokkos::ALL()); + auto x2 = Kokkos::subview(x, k + 1, Kokkos::ALL()); + + auto bk = Kokkos::subview(b, k, Kokkos::ALL()); + auto rk = Kokkos::subview(r, k, Kokkos::ALL()); + TeamCopy::invoke(member, bk, rk); + member.team_barrier(); + TeamGemv::invoke(member, -1.0, A1, + x1, 1.0, rk); + TeamGemv::invoke(member, -1.0, B2, + x2, 1.0, rk); + ++k; } - }); + for (; k < (L - 1); ++k) { + auto C0 = Kokkos::subview(C, k - 1, Kokkos::ALL(), Kokkos::ALL()); + auto A1 = Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); + auto B2 = Kokkos::subview(B, k, Kokkos::ALL(), Kokkos::ALL()); + + auto x0 = Kokkos::subview(x, k - 1, Kokkos::ALL()); + auto x1 = Kokkos::subview(x, k, Kokkos::ALL()); + auto x2 = Kokkos::subview(x, k + 1, Kokkos::ALL()); + + auto bk = Kokkos::subview(b, k, Kokkos::ALL()); + auto rk = Kokkos::subview(r, k, Kokkos::ALL()); + TeamCopy::invoke(member, bk, rk); + member.team_barrier(); + TeamGemv::invoke(member, -1.0, C0, + x0, 1.0, rk); + TeamGemv::invoke(member, -1.0, A1, + x1, 1.0, rk); + TeamGemv::invoke(member, -1.0, B2, + x2, 1.0, rk); + } + { + // last row + auto C0 = Kokkos::subview(C, k - 1, Kokkos::ALL(), Kokkos::ALL()); + auto A1 = Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); + + auto x0 = Kokkos::subview(x, k - 1, Kokkos::ALL()); + auto x1 = Kokkos::subview(x, k, Kokkos::ALL()); + + auto bk = Kokkos::subview(b, k, Kokkos::ALL()); + auto rk = Kokkos::subview(r, k, Kokkos::ALL()); + TeamCopy::invoke(member, bk, rk); + member.team_barrier(); + TeamGemv::invoke(member, -1.0, C0, + x0, 1.0, rk); + TeamGemv::invoke(member, -1.0, A1, + x1, 1.0, rk); + } + } + } + }); }); Kokkos::fence(); auto rs_host = Kokkos::create_mirror_view(rs); @@ -650,13 +533,11 @@ int main(int argc, char *argv[]) { Kokkos::fence(); { double norm2 = 0, diff2 = 0; - for (int i0 = 0, i0end = rs.extent(0); i0 < i0end; - ++i0) // N/vector_length - for (int i1 = 0, i1end = rs.extent(1); i1 < i1end; ++i1) // Nvec - for (int i2 = 0, i2end = rs.extent(2); i2 < i2end; ++i2) // L - for (int i3 = 0, i3end = rs.extent(3); i3 < i3end; ++i3) // Blk - for (int i4 = 0, i4end = rs.extent(4); i4 < i4end; - ++i4) { // vector_length + for (int i0 = 0, i0end = rs.extent(0); i0 < i0end; ++i0) // N/vector_length + for (int i1 = 0, i1end = rs.extent(1); i1 < i1end; ++i1) // Nvec + for (int i2 = 0, i2end = rs.extent(2); i2 < i2end; ++i2) // L + for (int i3 = 0, i3end = rs.extent(3); i3 < i3end; ++i3) // Blk + for (int i4 = 0, i4end = rs.extent(4); i4 < i4end; ++i4) { // vector_length const auto val = bs_host(i0, i1, i2, i3, i4); const auto res = rs_host(i0, i1, i2, i3, i4); norm2 += val * val; diff --git a/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Gemm_Cuda.cpp b/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Gemm_Cuda.cpp index 5f9c167b728c..9ac7e82d3a5d 100644 --- a/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Gemm_Cuda.cpp +++ b/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Gemm_Cuda.cpp @@ -69,8 +69,7 @@ struct Functor { Functor() = default; KOKKOS_INLINE_FUNCTION - Functor(const ViewType &a, const ViewType &b, const ViewType &c) - : _a(a), _b(b), _c(c) {} + Functor(const ViewType &a, const ViewType &b, const ViewType &c) : _a(a), _b(b), _c(c) {} KOKKOS_INLINE_FUNCTION void operator()(const RangeTag &, const int k) const { @@ -78,98 +77,81 @@ struct Functor { auto bb = Kokkos::subview(_b, k, Kokkos::ALL(), Kokkos::ALL()); auto cc = Kokkos::subview(_c, k, Kokkos::ALL(), Kokkos::ALL()); - SerialGemm::invoke( - 1.0, aa, bb, 1.0, cc); + SerialGemm::invoke(1.0, aa, bb, 1.0, cc); } template - KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV1 &, - const MemberType &member) const { - const int kbeg = - (member.league_rank() * (member.team_size() * VectorLength) + - member.team_rank() * VectorLength); - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { - const int kk = kbeg + k; - if (kk < int(_c.extent(0))) { - auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); - auto bb = Kokkos::subview(_b, kk, Kokkos::ALL(), Kokkos::ALL()); - auto cc = Kokkos::subview(_c, kk, Kokkos::ALL(), Kokkos::ALL()); - - SerialGemm::invoke(1.0, aa, bb, 1.0, cc); - } - }); + KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV1 &, const MemberType &member) const { + const int kbeg = (member.league_rank() * (member.team_size() * VectorLength) + member.team_rank() * VectorLength); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { + const int kk = kbeg + k; + if (kk < int(_c.extent(0))) { + auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); + auto bb = Kokkos::subview(_b, kk, Kokkos::ALL(), Kokkos::ALL()); + auto cc = Kokkos::subview(_c, kk, Kokkos::ALL(), Kokkos::ALL()); + + SerialGemm::invoke(1.0, aa, bb, 1.0, cc); + } + }); } template - KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV2 &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV2 &, const MemberType &member) const { const int kbeg = member.league_rank() * VectorLength; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { - const int kk = kbeg + k; - if (kk < int(_c.extent(0))) { - auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); - auto bb = Kokkos::subview(_b, kk, Kokkos::ALL(), Kokkos::ALL()); - auto cc = Kokkos::subview(_c, kk, Kokkos::ALL(), Kokkos::ALL()); - - TeamGemm::invoke(member, 1.0, aa, bb, 1.0, cc); - } - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { + const int kk = kbeg + k; + if (kk < int(_c.extent(0))) { + auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); + auto bb = Kokkos::subview(_b, kk, Kokkos::ALL(), Kokkos::ALL()); + auto cc = Kokkos::subview(_c, kk, Kokkos::ALL(), Kokkos::ALL()); + + TeamGemm::invoke(member, 1.0, aa, bb, 1.0, cc); + } + }); } template - KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV3 &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV3 &, const MemberType &member) const { const int lvl = 0; - ScratchViewType sa(member.team_scratch(lvl), VectorLength, - _a.extent(1), _a.extent(2)); - ScratchViewType sb(member.team_scratch(lvl), VectorLength, - _b.extent(1), _b.extent(2)); + ScratchViewType sa(member.team_scratch(lvl), VectorLength, _a.extent(1), _a.extent(2)); + ScratchViewType sb(member.team_scratch(lvl), VectorLength, _b.extent(1), _b.extent(2)); const int kbeg = member.league_rank() * VectorLength; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { - const int kk = kbeg + k; - if (kk < int(_c.extent(0))) { - auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); - auto bb = Kokkos::subview(_b, kk, Kokkos::ALL(), Kokkos::ALL()); - auto cc = Kokkos::subview(_c, kk, Kokkos::ALL(), Kokkos::ALL()); - - auto saa = Kokkos::subview(sa, k, Kokkos::ALL(), Kokkos::ALL()); - auto sbb = Kokkos::subview(sb, k, Kokkos::ALL(), Kokkos::ALL()); - - TeamCopy::invoke(member, aa, saa); - TeamCopy::invoke(member, bb, sbb); - member.team_barrier(); - - TeamGemm::invoke(member, 1.0, saa, sbb, 1.0, cc); - } - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { + const int kk = kbeg + k; + if (kk < int(_c.extent(0))) { + auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); + auto bb = Kokkos::subview(_b, kk, Kokkos::ALL(), Kokkos::ALL()); + auto cc = Kokkos::subview(_c, kk, Kokkos::ALL(), Kokkos::ALL()); + + auto saa = Kokkos::subview(sa, k, Kokkos::ALL(), Kokkos::ALL()); + auto sbb = Kokkos::subview(sb, k, Kokkos::ALL(), Kokkos::ALL()); + + TeamCopy::invoke(member, aa, saa); + TeamCopy::invoke(member, bb, sbb); + member.team_barrier(); + + TeamGemm::invoke(member, 1.0, saa, sbb, 1.0, + cc); + } + }); } template - KOKKOS_INLINE_FUNCTION void operator()(const TeamTagHandmade &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const TeamTagHandmade &, const MemberType &member) const { const int kbeg = member.league_rank() * VectorLength; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { - const int kk = kbeg + k; - if (kk < int(_c.extent(0))) { - const int m = _c.extent(1), n = _c.extent(2), q = _a.extent(2); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(member, 0, m * n), [&](const int &ij) { - const int i = ij % m, j = ij / m; - typename ViewType::non_const_value_type cval = 0; - for (int p = 0; p < q; ++p) - cval += _a(kk, i, p) * _b(kk, p, j); - _c(kk, i, j) += cval; - }); - } + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { + const int kk = kbeg + k; + if (kk < int(_c.extent(0))) { + const int m = _c.extent(1), n = _c.extent(2), q = _a.extent(2); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, m * n), [&](const int &ij) { + const int i = ij % m, j = ij / m; + typename ViewType::non_const_value_type cval = 0; + for (int p = 0; p < q; ++p) cval += _a(kk, i, p) * _b(kk, p, j); + _c(kk, i, j) += cval; }); + } + }); } }; @@ -177,19 +159,15 @@ template void Gemm(const int NN, const int BlkSize) { typedef Kokkos::Schedule ScheduleType; - constexpr int VectorLength = - DefaultVectorLength::value; - const int N = NN / VectorLength; + constexpr int VectorLength = DefaultVectorLength::value; + const int N = NN / VectorLength; { std::string value_type_name; if (std::is_same::value) value_type_name = "double"; - if (std::is_same >::value) - value_type_name = "Kokkos::complex"; + if (std::is_same >::value) value_type_name = "Kokkos::complex"; - std::cout << "SIMD is defined: datatype " << value_type_name - << " a vector length " << VectorLength << "\n"; + std::cout << "SIMD is defined: datatype " << value_type_name << " a vector length " << VectorLength << "\n"; } const double flop = (N * VectorLength) * FlopCount(BlkSize, BlkSize, BlkSize); @@ -201,10 +179,8 @@ void Gemm(const int NN, const int BlkSize) { const int iter_begin = -3, iter_end = 30; Kokkos::Timer timer; - Kokkos::View amat( - "amat", N * VectorLength, BlkSize, BlkSize), - bmat("bmat", N * VectorLength, BlkSize, BlkSize), - cref("cref", N * VectorLength, BlkSize, BlkSize); + Kokkos::View amat("amat", N * VectorLength, BlkSize, BlkSize), + bmat("bmat", N * VectorLength, BlkSize, BlkSize), cref("cref", N * VectorLength, BlkSize, BlkSize); { Random random; @@ -225,12 +201,9 @@ void Gemm(const int NN, const int BlkSize) { /// /// CUBLAS Strided version /// - const Kokkos::LayoutStride stride(N * VectorLength, BlkSize * BlkSize, - BlkSize, 1, BlkSize, BlkSize); + const Kokkos::LayoutStride stride(N * VectorLength, BlkSize * BlkSize, BlkSize, 1, BlkSize, BlkSize); - Kokkos::View a( - "a", stride), - b("b", stride), c("c", stride); + Kokkos::View a("a", stride), b("b", stride), c("c", stride); double tavg = 0, tmin = tmax; @@ -238,13 +211,10 @@ void Gemm(const int NN, const int BlkSize) { cublasHandle_t handle; stat = cublasCreate(&handle); - if (stat != CUBLAS_STATUS_SUCCESS) - Kokkos::abort("CUBLAS initialization failed\n"); + if (stat != CUBLAS_STATUS_SUCCESS) Kokkos::abort("CUBLAS initialization failed\n"); - auto amat_device = - Kokkos::create_mirror_view(DeviceMemorySpaceType(), amat); - auto bmat_device = - Kokkos::create_mirror_view(DeviceMemorySpaceType(), bmat); + auto amat_device = Kokkos::create_mirror_view(DeviceMemorySpaceType(), amat); + auto bmat_device = Kokkos::create_mirror_view(DeviceMemorySpaceType(), bmat); Kokkos::deep_copy(amat_device, amat); Kokkos::deep_copy(bmat_device, bmat); @@ -268,12 +238,10 @@ void Gemm(const int NN, const int BlkSize) { Kokkos::fence(); timer.reset(); - stat = cublasDgemmStridedBatched( - handle, CUBLAS_OP_N, CUBLAS_OP_N, BlkSize, BlkSize, BlkSize, &one, - (const value_type *)a.data(), BlkSize, BlkSize * BlkSize, - (const value_type *)b.data(), BlkSize, BlkSize * BlkSize, &zero, - (value_type *)c.data(), BlkSize, BlkSize * BlkSize, - N * VectorLength); + stat = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, BlkSize, BlkSize, BlkSize, &one, + (const value_type *)a.data(), BlkSize, BlkSize * BlkSize, + (const value_type *)b.data(), BlkSize, BlkSize * BlkSize, &zero, + (value_type *)c.data(), BlkSize, BlkSize * BlkSize, N * VectorLength); Kokkos::fence(); const double t = timer.seconds(); @@ -282,16 +250,14 @@ void Gemm(const int NN, const int BlkSize) { } tavg /= iter_end; - auto csol = - Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), c); + auto csol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), c); Kokkos::deep_copy(csol, c); Kokkos::deep_copy(cref, csol); std::cout << std::setw(8) << "CUBLAS" << std::setw(8) << "Strided" << " BlkSize = " << std::setw(3) << BlkSize << " TeamSize = N/A" << " ScratchSize (KB) = 0" - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin) << std::endl; } cublasDestroy(handle); @@ -303,15 +269,13 @@ void Gemm(const int NN, const int BlkSize) { /// Range policy version /// typedef Kokkos::View view_type; - view_type a("a", N * VectorLength, BlkSize, BlkSize), - b("b", N * VectorLength, BlkSize, BlkSize), + view_type a("a", N * VectorLength, BlkSize, BlkSize), b("b", N * VectorLength, BlkSize, BlkSize), c("c", N * VectorLength, BlkSize, BlkSize); double tavg = 0, tmin = tmax; { typedef Functor functor_type; - const Kokkos::RangePolicy policy( - 0, N * VectorLength); + const Kokkos::RangePolicy policy(0, N * VectorLength); for (int iter = iter_begin; iter < iter_end; ++iter) { // flush @@ -325,8 +289,7 @@ void Gemm(const int NN, const int BlkSize) { Kokkos::fence(); timer.reset(); - Kokkos::parallel_for("KokkosBatched::PerfTest::GemmCuda::RangeTag", - policy, functor_type(a, b, c)); + Kokkos::parallel_for("KokkosBatched::PerfTest::GemmCuda::RangeTag", policy, functor_type(a, b, c)); Kokkos::fence(); const double t = timer.seconds(); @@ -335,22 +298,19 @@ void Gemm(const int NN, const int BlkSize) { } tavg /= iter_end; - auto csol = - Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), c); + auto csol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), c); Kokkos::deep_copy(csol, c); double diff = 0; for (int i = 0, iend = cref.extent(0); i < iend; ++i) for (int j = 0, jend = cref.extent(1); j < jend; ++j) for (int k = 0, kend = cref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(cref(i, j, k) - - csol(i, j, k)); + diff += Kokkos::ArithTraits::abs(cref(i, j, k) - csol(i, j, k)); std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Range" << " BlkSize = " << std::setw(3) << BlkSize << " TeamSize = N/A" << " ScratchSize (KB) = 0" - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin); #if defined(__KOKKOSKERNELS_NVIDIA_CUBLAS__) std::cout << " diff to ref = " << diff; @@ -365,21 +325,18 @@ void Gemm(const int NN, const int BlkSize) { /// expect the same performance as range policy /// typedef Kokkos::View view_type; - view_type a("a", N * VectorLength, BlkSize, BlkSize), - b("b", N * VectorLength, BlkSize, BlkSize), + view_type a("a", N * VectorLength, BlkSize, BlkSize), b("b", N * VectorLength, BlkSize, BlkSize), c("c", N * VectorLength, BlkSize, BlkSize); double tavg = 0, tmin = tmax; { - typedef Kokkos::TeamPolicy - policy_type; + typedef Kokkos::TeamPolicy policy_type; typedef Functor functor_type; // 128 is rough estimates - const int team_size = - policy_type(N / 32, Kokkos::AUTO, VectorLength) - .team_size_recommended(functor_type(), Kokkos::ParallelForTag()); + const int team_size = policy_type(N / 32, Kokkos::AUTO, VectorLength) + .team_size_recommended(functor_type(), Kokkos::ParallelForTag()); const policy_type policy(N / team_size, team_size, VectorLength); for (int iter = iter_begin; iter < iter_end; ++iter) { @@ -394,8 +351,7 @@ void Gemm(const int NN, const int BlkSize) { Kokkos::fence(); timer.reset(); - Kokkos::parallel_for("KokkosBatched::PerfTest::GemmCuda::TeamPolicyV1", - policy, functor_type(a, b, c)); + Kokkos::parallel_for("KokkosBatched::PerfTest::GemmCuda::TeamPolicyV1", policy, functor_type(a, b, c)); Kokkos::fence(); const double t = timer.seconds(); @@ -404,23 +360,19 @@ void Gemm(const int NN, const int BlkSize) { } tavg /= iter_end; - auto csol = - Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), c); + auto csol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), c); Kokkos::deep_copy(csol, c); double diff = 0; for (int i = 0, iend = cref.extent(0); i < iend; ++i) for (int j = 0, jend = cref.extent(1); j < jend; ++j) for (int k = 0, kend = cref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(cref(i, j, k) - - csol(i, j, k)); + diff += Kokkos::ArithTraits::abs(cref(i, j, k) - csol(i, j, k)); std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Team V1" - << " BlkSize = " << std::setw(3) << BlkSize - << " TeamSize = " << std::setw(3) << team_size + << " BlkSize = " << std::setw(3) << BlkSize << " TeamSize = " << std::setw(3) << team_size << " ScratchSize (KB) = 0" - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin); #if defined(__KOKKOSKERNELS_NVIDIA_CUBLAS__) std::cout << " diff to ref = " << diff; @@ -434,26 +386,21 @@ void Gemm(const int NN, const int BlkSize) { /// Team policy V2 - team parallel /// typedef Kokkos::View view_type; - view_type a("a", N * VectorLength, BlkSize, BlkSize), - b("b", N * VectorLength, BlkSize, BlkSize), + view_type a("a", N * VectorLength, BlkSize, BlkSize), b("b", N * VectorLength, BlkSize, BlkSize), c("c", N * VectorLength, BlkSize, BlkSize); double tavg = 0, tmin = tmax; { - typedef Kokkos::TeamPolicy - policy_type; + typedef Kokkos::TeamPolicy policy_type; typedef Functor functor_type; - const int is_blocked_algo = - (std::is_same::value), - mb = Algo::Gemm::Blocked::mb(), - mp = BlkSize % mb > 0; + const int is_blocked_algo = (std::is_same::value), + mb = Algo::Gemm::Blocked::mb(), mp = BlkSize % mb > 0; const int mblk = is_blocked_algo ? (BlkSize / mb + mp) : BlkSize; const int max_team_size = - policy_type(N, Kokkos::AUTO, VectorLength) - .team_size_max(functor_type(), Kokkos::ParallelForTag()); + policy_type(N, Kokkos::AUTO, VectorLength).team_size_max(functor_type(), Kokkos::ParallelForTag()); const int team_size = std::min(std::max(mblk * mblk, 4), max_team_size); policy_type policy(N, team_size, VectorLength); @@ -469,8 +416,7 @@ void Gemm(const int NN, const int BlkSize) { Kokkos::fence(); timer.reset(); - Kokkos::parallel_for("KokkosBatched::PerfTest::GemmCuda::TeamPolicyV2", - policy, functor_type(a, b, c)); + Kokkos::parallel_for("KokkosBatched::PerfTest::GemmCuda::TeamPolicyV2", policy, functor_type(a, b, c)); Kokkos::fence(); const double t = timer.seconds(); @@ -479,23 +425,19 @@ void Gemm(const int NN, const int BlkSize) { } tavg /= iter_end; - auto csol = - Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), c); + auto csol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), c); Kokkos::deep_copy(csol, c); double diff = 0; for (int i = 0, iend = cref.extent(0); i < iend; ++i) for (int j = 0, jend = cref.extent(1); j < jend; ++j) for (int k = 0, kend = cref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(cref(i, j, k) - - csol(i, j, k)); + diff += Kokkos::ArithTraits::abs(cref(i, j, k) - csol(i, j, k)); std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Team V2" - << " BlkSize = " << std::setw(3) << BlkSize - << " TeamSize = " << std::setw(3) << team_size + << " BlkSize = " << std::setw(3) << BlkSize << " TeamSize = " << std::setw(3) << team_size << " ScratchSize (KB) = 0" - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin); #if defined(__KOKKOSKERNELS_NVIDIA_CUBLAS__) std::cout << " diff to ref = " << diff; @@ -509,37 +451,29 @@ void Gemm(const int NN, const int BlkSize) { /// Team policy V3 - team parallel + scratch /// typedef Kokkos::View view_type; - view_type a("a", N * VectorLength, BlkSize, BlkSize), - b("b", N * VectorLength, BlkSize, BlkSize), + view_type a("a", N * VectorLength, BlkSize, BlkSize), b("b", N * VectorLength, BlkSize, BlkSize), c("c", N * VectorLength, BlkSize, BlkSize); double tavg = 0, tmin = tmax; { - typedef Kokkos::TeamPolicy - policy_type; + typedef Kokkos::TeamPolicy policy_type; typedef Functor functor_type; - const int lvl = 0, - per_team_scratch = 2 * ScratchViewType::shmem_size( - VectorLength, BlkSize, BlkSize); + const int lvl = 0, per_team_scratch = 2 * ScratchViewType::shmem_size(VectorLength, BlkSize, BlkSize); // std::cout << "per team scratch " << per_team_scratch << "\n"; if (per_team_scratch / 1024 < 48) { - const int is_blocked_algo = - (std::is_same::value), - mb = Algo::Gemm::Blocked::mb(), - mp = BlkSize % mb > 0; + const int is_blocked_algo = (std::is_same::value), + mb = Algo::Gemm::Blocked::mb(), mp = BlkSize % mb > 0; const int mblk = is_blocked_algo ? (BlkSize / mb + mp) : BlkSize; - const int max_team_size = - policy_type(N, Kokkos::AUTO, VectorLength) - .set_scratch_size(lvl, Kokkos::PerTeam(per_team_scratch)) - .team_size_max(functor_type(), Kokkos::ParallelForTag()); + const int max_team_size = policy_type(N, Kokkos::AUTO, VectorLength) + .set_scratch_size(lvl, Kokkos::PerTeam(per_team_scratch)) + .team_size_max(functor_type(), Kokkos::ParallelForTag()); const int team_size = std::min(std::max(mblk * mblk, 4), max_team_size); policy_type policy = - policy_type(N, team_size, VectorLength) - .set_scratch_size(lvl, Kokkos::PerTeam(per_team_scratch)); + policy_type(N, team_size, VectorLength).set_scratch_size(lvl, Kokkos::PerTeam(per_team_scratch)); for (int iter = iter_begin; iter < iter_end; ++iter) { // flush flush.run(); @@ -552,9 +486,7 @@ void Gemm(const int NN, const int BlkSize) { Kokkos::fence(); timer.reset(); - Kokkos::parallel_for( - "KokkosBatched::PerfTest::GemmCuda::TeamPolicyV3", policy, - functor_type(a, b, c)); + Kokkos::parallel_for("KokkosBatched::PerfTest::GemmCuda::TeamPolicyV3", policy, functor_type(a, b, c)); Kokkos::fence(); const double t = timer.seconds(); @@ -563,23 +495,19 @@ void Gemm(const int NN, const int BlkSize) { } tavg /= iter_end; - auto csol = Kokkos::create_mirror_view( - typename HostSpaceType::memory_space(), c); + auto csol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), c); Kokkos::deep_copy(csol, c); double diff = 0; for (int i = 0, iend = cref.extent(0); i < iend; ++i) for (int j = 0, jend = cref.extent(1); j < jend; ++j) for (int k = 0, kend = cref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(cref(i, j, k) - - csol(i, j, k)); + diff += Kokkos::ArithTraits::abs(cref(i, j, k) - csol(i, j, k)); std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Team V3" - << " BlkSize = " << std::setw(3) << BlkSize - << " TeamSize = " << std::setw(3) << team_size - << " ScratchSize (KB) = " << std::setw(3) - << (per_team_scratch / 1024) << " time = " << std::scientific - << tmin << " avg flop/s = " << (flop / tavg) + << " BlkSize = " << std::setw(3) << BlkSize << " TeamSize = " << std::setw(3) << team_size + << " ScratchSize (KB) = " << std::setw(3) << (per_team_scratch / 1024) + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin); #if defined(__KOKKOSKERNELS_NVIDIA_CUBLAS__) std::cout << " diff to ref = " << diff; @@ -587,8 +515,7 @@ void Gemm(const int NN, const int BlkSize) { std::cout << std::endl; } else { std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Team V3" - << " Scratch per team is too big:" << std::setw(3) - << (per_team_scratch / 1024) << std::endl; + << " Scratch per team is too big:" << std::setw(3) << (per_team_scratch / 1024) << std::endl; } } } @@ -598,19 +525,16 @@ void Gemm(const int NN, const int BlkSize) { /// Team policy - handmade /// typedef Kokkos::View view_type; - view_type a("a", N * VectorLength, BlkSize, BlkSize), - b("b", N * VectorLength, BlkSize, BlkSize), + view_type a("a", N * VectorLength, BlkSize, BlkSize), b("b", N * VectorLength, BlkSize, BlkSize), c("c", N * VectorLength, BlkSize, BlkSize); double tavg = 0, tmin = tmax; { - typedef Kokkos::TeamPolicy - policy_type; + typedef Kokkos::TeamPolicy policy_type; typedef Functor functor_type; const int max_team_size = - policy_type(N, Kokkos::AUTO, VectorLength) - .team_size_max(functor_type(), Kokkos::ParallelForTag()); + policy_type(N, Kokkos::AUTO, VectorLength).team_size_max(functor_type(), Kokkos::ParallelForTag()); const int team_size = std::min(max_team_size, BlkSize * BlkSize); @@ -627,9 +551,7 @@ void Gemm(const int NN, const int BlkSize) { Kokkos::fence(); timer.reset(); - Kokkos::parallel_for( - "KokkosBatched::PerfTest::GemmCuda::TeamPolicyHandmade", policy, - functor_type(a, b, c)); + Kokkos::parallel_for("KokkosBatched::PerfTest::GemmCuda::TeamPolicyHandmade", policy, functor_type(a, b, c)); Kokkos::fence(); const double t = timer.seconds(); @@ -638,23 +560,19 @@ void Gemm(const int NN, const int BlkSize) { } tavg /= iter_end; - auto csol = - Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), c); + auto csol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), c); Kokkos::deep_copy(csol, c); double diff = 0; for (int i = 0, iend = cref.extent(0); i < iend; ++i) for (int j = 0, jend = cref.extent(1); j < jend; ++j) for (int k = 0, kend = cref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(cref(i, j, k) - - csol(i, j, k)); + diff += Kokkos::ArithTraits::abs(cref(i, j, k) - csol(i, j, k)); std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Team HM" - << " BlkSize = " << std::setw(3) << BlkSize - << " TeamSize = " << std::setw(3) << team_size + << " BlkSize = " << std::setw(3) << BlkSize << " TeamSize = " << std::setw(3) << team_size << " ScratchSize (KB) = 0" - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin); #if defined(__KOKKOSKERNELS_NVIDIA_CUBLAS__) std::cout << " diff to ref = " << diff; diff --git a/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Gemm_Host.hpp b/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Gemm_Host.hpp index 225e10f63b0a..cfcbb176faa0 100644 --- a/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Gemm_Host.hpp +++ b/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Gemm_Host.hpp @@ -35,7 +35,7 @@ #include "KokkosBatched_Gemm_Decl.hpp" #include "KokkosBatched_Gemm_Serial_Impl.hpp" -//#undef __KOKKOSBATCHED_INTEL_MKL_BATCHED__ +// #undef __KOKKOSBATCHED_INTEL_MKL_BATCHED__ namespace KokkosBatched { namespace PerfTest { @@ -66,25 +66,20 @@ template void Gemm(const int NN) { typedef Kokkos::Schedule ScheduleType; - constexpr int VectorLength = - DefaultVectorLength::value; - const int N = NN / VectorLength; + constexpr int VectorLength = DefaultVectorLength::value; + const int N = NN / VectorLength; { std::string value_type_name; if (std::is_same::value) value_type_name = "double"; - if (std::is_same >::value) - value_type_name = "Kokkos::complex"; + if (std::is_same >::value) value_type_name = "Kokkos::complex"; #if defined(__AVX512F__) - std::cout << "AVX512 is defined: datatype " << value_type_name - << " a vector length " << VectorLength << "\n"; + std::cout << "AVX512 is defined: datatype " << value_type_name << " a vector length " << VectorLength << "\n"; #elif defined(__AVX__) || defined(__AVX2__) - std::cout << "AVX or AVX2 is defined: datatype " << value_type_name - << " a vector length " << VectorLength << "\n"; + std::cout << "AVX or AVX2 is defined: datatype " << value_type_name << " a vector length " << VectorLength << "\n"; #else - std::cout << "SIMD (compiler vectorization) is defined: datatype " - << value_type_name << " a vector length " << VectorLength << "\n"; + std::cout << "SIMD (compiler vectorization) is defined: datatype " << value_type_name << " a vector length " + << VectorLength << "\n"; #endif } @@ -95,8 +90,7 @@ void Gemm(const int NN) { Kokkos::Timer timer; Kokkos::View cref; - Kokkos::View amat( - "amat", N * VectorLength, BlkSize, BlkSize), + Kokkos::View amat("amat", N * VectorLength, BlkSize, BlkSize), bmat("bmat", N * VectorLength, BlkSize, BlkSize); Kokkos::Random_XorShift64_Pool random(13718); @@ -104,13 +98,11 @@ void Gemm(const int NN) { Kokkos::fill_random(bmat, random, value_type(1.0)); typedef Vector, VectorLength> VectorType; - Kokkos::View amat_simd( - "amat_simd", N, BlkSize, BlkSize), + Kokkos::View amat_simd("amat_simd", N, BlkSize, BlkSize), bmat_simd("bmat_simd", N, BlkSize, BlkSize); Kokkos::parallel_for( - "KokkosBatched::PerfTest::GemmHost::Pack", - Kokkos::RangePolicy(0, N * VectorLength), + "KokkosBatched::PerfTest::GemmHost::Pack", Kokkos::RangePolicy(0, N * VectorLength), KOKKOS_LAMBDA(const int k) { const int k0 = k / VectorLength, k1 = k % VectorLength; for (int i = 0; i < BlkSize; ++i) @@ -129,14 +121,11 @@ void Gemm(const int NN) { /// #if defined(__KOKKOSBATCHED_INTEL_MKL__) { - Kokkos::View a( - "a", N * VectorLength, BlkSize, BlkSize), - b("b", N * VectorLength, BlkSize, BlkSize), - c("c", N * VectorLength, BlkSize, BlkSize); + Kokkos::View a("a", N * VectorLength, BlkSize, BlkSize), + b("b", N * VectorLength, BlkSize, BlkSize), c("c", N * VectorLength, BlkSize, BlkSize); { - const Kokkos::RangePolicy policy( - 0, N * VectorLength); + const Kokkos::RangePolicy policy(0, N * VectorLength); double tavg = 0, tmin = tmax; for (int iter = iter_begin; iter < iter_end; ++iter) { @@ -152,24 +141,20 @@ void Gemm(const int NN) { timer.reset(); Kokkos::parallel_for( - "KokkosBatched::PerfTest::GemmHost::CblasOpenMP", policy, - KOKKOS_LAMBDA(const int k) { + "KokkosBatched::PerfTest::GemmHost::CblasOpenMP", policy, KOKKOS_LAMBDA(const int k) { auto aa = Kokkos::subview(a, k, Kokkos::ALL(), Kokkos::ALL()); auto bb = Kokkos::subview(b, k, Kokkos::ALL(), Kokkos::ALL()); auto cc = Kokkos::subview(c, k, Kokkos::ALL(), Kokkos::ALL()); const double one = 1.0; if (std::is_same::value) { - cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, BlkSize, - BlkSize, BlkSize, one, (double *)aa.data(), - aa.stride_0(), (double *)bb.data(), bb.stride_0(), - one, (double *)cc.data(), cc.stride_0()); - } else if (std::is_same >::value) { - cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, BlkSize, - BlkSize, BlkSize, (void *)&one, (void *)aa.data(), - aa.stride_0(), (void *)bb.data(), bb.stride_0(), - (void *)&one, (void *)cc.data(), cc.stride_0()); + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, BlkSize, BlkSize, BlkSize, one, + (double *)aa.data(), aa.stride_0(), (double *)bb.data(), bb.stride_0(), one, + (double *)cc.data(), cc.stride_0()); + } else if (std::is_same >::value) { + cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, BlkSize, BlkSize, BlkSize, (void *)&one, + (void *)aa.data(), aa.stride_0(), (void *)bb.data(), bb.stride_0(), (void *)&one, + (void *)cc.data(), cc.stride_0()); } }); @@ -181,10 +166,8 @@ void Gemm(const int NN) { tavg /= iter_end; std::cout << std::setw(12) << "MKL DGEMM" - << " BlkSize = " << std::setw(3) << BlkSize - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) - << " max flop/s = " << (flop / tmin) << std::endl; + << " BlkSize = " << std::setw(3) << BlkSize << " time = " << std::scientific << tmin + << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin) << std::endl; cref = c; } @@ -192,14 +175,11 @@ void Gemm(const int NN) { #if defined(__KOKKOSBATCHED_INTEL_MKL_BATCHED__) { - typedef Kokkos::View - ViewType; - ViewType a("a", N * VectorLength, BlkSize, BlkSize), - b("b", N * VectorLength, BlkSize, BlkSize), + typedef Kokkos::View ViewType; + ViewType a("a", N * VectorLength, BlkSize, BlkSize), b("b", N * VectorLength, BlkSize, BlkSize), c("c", N * VectorLength, BlkSize, BlkSize); - value_type *aa[N * VectorLength], *bb[N * VectorLength], - *cc[N * VectorLength]; + value_type *aa[N * VectorLength], *bb[N * VectorLength], *cc[N * VectorLength]; for (int k = 0; k < N * VectorLength; ++k) { aa[k] = &a(k, 0, 0); @@ -234,15 +214,11 @@ void Gemm(const int NN) { timer.reset(); if (std::is_same::value) { - cblas_dgemm_batch(CblasRowMajor, transA, transB, blksize, blksize, - blksize, one, (const double **)aa, lda, - (const double **)bb, ldb, one, (double **)cc, ldc, - 1, size_per_grp); + cblas_dgemm_batch(CblasRowMajor, transA, transB, blksize, blksize, blksize, one, (const double **)aa, lda, + (const double **)bb, ldb, one, (double **)cc, ldc, 1, size_per_grp); } else if (std::is_same >::value) { - cblas_zgemm_batch(CblasRowMajor, transA, transB, blksize, blksize, - blksize, one, (const void **)aa, lda, - (const void **)bb, ldb, one, (void **)cc, ldc, 1, - size_per_grp); + cblas_zgemm_batch(CblasRowMajor, transA, transB, blksize, blksize, blksize, one, (const void **)aa, lda, + (const void **)bb, ldb, one, (void **)cc, ldc, 1, size_per_grp); } HostSpaceType().fence(); @@ -255,22 +231,18 @@ void Gemm(const int NN) { double diff = 0; for (int i = 0, iend = cref.extent(0); i < iend; ++i) for (int j = 0, jend = cref.extent(1); j < jend; ++j) - for (int k = 0, kend = cref.extent(2); k < kend; ++k) - diff += abs(cref(i, j, k) - c(i, j, k)); + for (int k = 0, kend = cref.extent(2); k < kend; ++k) diff += abs(cref(i, j, k) - c(i, j, k)); std::cout << std::setw(12) << "MKL Batch" - << " BlkSize = " << std::setw(3) << BlkSize - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) - << " max flop/s = " << (flop / tmin) - << " diff to ref = " << diff << std::endl; + << " BlkSize = " << std::setw(3) << BlkSize << " time = " << std::scientific << tmin + << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin) << " diff to ref = " << diff + << std::endl; } } #endif #if defined(__KOKKOSBATCHED_INTEL_MKL_COMPACT_BATCHED__) { - Kokkos::View a( - "a", N, BlkSize, BlkSize), + Kokkos::View a("a", N, BlkSize, BlkSize), b("b", N, BlkSize, BlkSize), c("c", N, BlkSize, BlkSize); { @@ -306,19 +278,15 @@ void Gemm(const int NN) { timer.reset(); if (std::is_same::value) { - mkl_dgemm_compact(MKL_ROW_MAJOR, MKL_NOTRANS, MKL_NOTRANS, BlkSize, - BlkSize, BlkSize, done, (const double *)a.data(), - (MKL_INT)a.stride_1(), (const double *)b.data(), - (MKL_INT)b.stride_1(), done, (double *)c.data(), - (MKL_INT)c.stride_1(), format, N * VectorLength); - } else if (std::is_same >::value) { - mkl_zgemm_compact(MKL_ROW_MAJOR, MKL_NOTRANS, MKL_NOTRANS, BlkSize, - BlkSize, BlkSize, (MKL_Complex16 *)&zone, - (const double *)a.data(), (MKL_INT)a.stride_1(), - (const double *)b.data(), (MKL_INT)b.stride_1(), - (MKL_Complex16 *)&zone, (double *)c.data(), - (MKL_INT)c.stride_1(), format, N * VectorLength); + mkl_dgemm_compact(MKL_ROW_MAJOR, MKL_NOTRANS, MKL_NOTRANS, BlkSize, BlkSize, BlkSize, done, + (const double *)a.data(), (MKL_INT)a.stride_1(), (const double *)b.data(), + (MKL_INT)b.stride_1(), done, (double *)c.data(), (MKL_INT)c.stride_1(), format, + N * VectorLength); + } else if (std::is_same >::value) { + mkl_zgemm_compact(MKL_ROW_MAJOR, MKL_NOTRANS, MKL_NOTRANS, BlkSize, BlkSize, BlkSize, + (MKL_Complex16 *)&zone, (const double *)a.data(), (MKL_INT)a.stride_1(), + (const double *)b.data(), (MKL_INT)b.stride_1(), (MKL_Complex16 *)&zone, + (double *)c.data(), (MKL_INT)c.stride_1(), format, N * VectorLength); } HostSpaceType().fence(); @@ -332,15 +300,12 @@ void Gemm(const int NN) { for (int i = 0, iend = cref.extent(0); i < iend; ++i) for (int j = 0, jend = cref.extent(1); j < jend; ++j) for (int k = 0, kend = cref.extent(2); k < kend; ++k) - diff += abs(cref(i, j, k) - - c(i / VectorLength, j, k)[i % VectorLength]); + diff += abs(cref(i, j, k) - c(i / VectorLength, j, k)[i % VectorLength]); std::cout << std::setw(12) << "MKL Cmpct" - << " BlkSize = " << std::setw(3) << BlkSize - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) - << " max flop/s = " << (flop / tmin) - << " diff to ref = " << diff << std::endl; + << " BlkSize = " << std::setw(3) << BlkSize << " time = " << std::scientific << tmin + << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin) << " diff to ref = " << diff + << std::endl; } } } @@ -351,16 +316,13 @@ void Gemm(const int NN) { { libxsmm_init(); - Kokkos::View a( - "a", N * VectorLength, BlkSize, BlkSize), - b("b", N * VectorLength, BlkSize, BlkSize), - c("c", N * VectorLength, BlkSize, BlkSize); + Kokkos::View a("a", N * VectorLength, BlkSize, BlkSize), + b("b", N * VectorLength, BlkSize, BlkSize), c("c", N * VectorLength, BlkSize, BlkSize); libxsmm_blasint lda = a.stride_1(), ldb = b.stride_1(), ldc = c.stride_1(); { - const Kokkos::RangePolicy policy( - 0, N * VectorLength); + const Kokkos::RangePolicy policy(0, N * VectorLength); double tavg = 0, tmin = tmax; @@ -382,19 +344,15 @@ void Gemm(const int NN) { timer.reset(); Kokkos::parallel_for( - "KokkosBatched::PerfTest::GemmHost::libxswmmOpenMP", policy, - KOKKOS_LAMBDA(const int k) { + "KokkosBatched::PerfTest::GemmHost::libxswmmOpenMP", policy, KOKKOS_LAMBDA(const int k) { auto aa = Kokkos::subview(a, k, Kokkos::ALL(), Kokkos::ALL()); auto bb = Kokkos::subview(b, k, Kokkos::ALL(), Kokkos::ALL()); auto cc = Kokkos::subview(c, k, Kokkos::ALL(), Kokkos::ALL()); // column major - libxsmm_gemm((const char *)&transA, (const char *)&transB, - blksize, blksize, blksize, (const double *)&one, - (const double *)bb.data(), - (const libxsmm_blasint *)&ldb, - (const double *)aa.data(), - (const libxsmm_blasint *)&lda, (const double *)&one, + libxsmm_gemm((const char *)&transA, (const char *)&transB, blksize, blksize, blksize, + (const double *)&one, (const double *)bb.data(), (const libxsmm_blasint *)&ldb, + (const double *)aa.data(), (const libxsmm_blasint *)&lda, (const double *)&one, (double *)cc.data(), (const libxsmm_blasint *)&ldc); }); @@ -409,15 +367,12 @@ void Gemm(const int NN) { double diff = 0; for (int i = 0, iend = cref.extent(0); i < iend; ++i) for (int j = 0, jend = cref.extent(1); j < jend; ++j) - for (int k = 0, kend = cref.extent(2); k < kend; ++k) - diff += abs(cref(i, j, k) - c(i, j, k)); + for (int k = 0, kend = cref.extent(2); k < kend; ++k) diff += abs(cref(i, j, k) - c(i, j, k)); std::cout << std::setw(12) << "libxsmm" - << " BlkSize = " << std::setw(3) << BlkSize - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) - << " max flop/s = " << (flop / tmin) - << " diff to ref = " << diff << std::endl; + << " BlkSize = " << std::setw(3) << BlkSize << " time = " << std::scientific << tmin + << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin) << " diff to ref = " << diff + << std::endl; } libxsmm_finalize(); } @@ -488,8 +443,7 @@ void Gemm(const int NN) { /// Serial SIMD with appropriate data layout /// { - Kokkos::View a( - "a", N, BlkSize, BlkSize), + Kokkos::View a("a", N, BlkSize, BlkSize), b("b", N, BlkSize, BlkSize), c("c", N, BlkSize, BlkSize); { @@ -510,14 +464,12 @@ void Gemm(const int NN) { timer.reset(); Kokkos::parallel_for( - "KokkosBatched::PerfTest::GemmHost::SIMDSerialOpenMP", policy, - KOKKOS_LAMBDA(const int k) { + "KokkosBatched::PerfTest::GemmHost::SIMDSerialOpenMP", policy, KOKKOS_LAMBDA(const int k) { auto aa = Kokkos::subview(a, k, Kokkos::ALL(), Kokkos::ALL()); auto bb = Kokkos::subview(b, k, Kokkos::ALL(), Kokkos::ALL()); auto cc = Kokkos::subview(c, k, Kokkos::ALL(), Kokkos::ALL()); - SerialGemm::invoke(1.0, aa, bb, 1.0, cc); + SerialGemm::invoke(1.0, aa, bb, 1.0, cc); }); HostSpaceType().fence(); @@ -531,15 +483,12 @@ void Gemm(const int NN) { for (int i = 0, iend = cref.extent(0); i < iend; ++i) for (int j = 0, jend = cref.extent(1); j < jend; ++j) for (int k = 0, kend = cref.extent(2); k < kend; ++k) - diff += abs(cref(i, j, k) - - c(i / VectorLength, j, k)[i % VectorLength]); + diff += abs(cref(i, j, k) - c(i / VectorLength, j, k)[i % VectorLength]); std::cout << std::setw(12) << "KK Vector" - << " BlkSize = " << std::setw(3) << BlkSize - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) - << " max flop/s = " << (flop / tmin) - << " diff to ref = " << diff << std::endl; + << " BlkSize = " << std::setw(3) << BlkSize << " time = " << std::scientific << tmin + << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin) << " diff to ref = " << diff + << std::endl; } } std::cout << std::endl; diff --git a/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Gemv_Host.hpp b/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Gemv_Host.hpp index 9ae401f03fba..e368e8c00b3a 100644 --- a/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Gemv_Host.hpp +++ b/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Gemv_Host.hpp @@ -15,8 +15,8 @@ //@HEADER /// \author Kyungjoo Kim (kyukim@sandia.gov) -//#define __KOKKOSBATCHED_INTEL_MKL__ -//#define __KOKKOSBATCHED_INTEL_MKL_BATCHED__ +// #define __KOKKOSBATCHED_INTEL_MKL__ +// #define __KOKKOSBATCHED_INTEL_MKL_BATCHED__ #include @@ -60,47 +60,38 @@ double FlopCount(int mm, int nn) { return (FLOP_MUL * (m * n) + FLOP_ADD * (m * n)); } -template +template void Gemv(const int NN) { typedef Kokkos::Schedule ScheduleType; // typedef Kokkos::Schedule ScheduleType; - constexpr int VectorLength = - DefaultVectorLength::value; - const int N = NN / VectorLength; + constexpr int VectorLength = DefaultVectorLength::value; + const int N = NN / VectorLength; { std::string value_type_name; if (std::is_same::value) value_type_name = "double"; - if (std::is_same >::value) - value_type_name = "Kokkos::complex"; + if (std::is_same >::value) value_type_name = "Kokkos::complex"; #if defined(__AVX512F__) - std::cout << "AVX512 is defined: datatype " << value_type_name - << " a vector length " << VectorLength << "\n"; + std::cout << "AVX512 is defined: datatype " << value_type_name << " a vector length " << VectorLength << "\n"; #elif defined(__AVX__) || defined(__AVX2__) - std::cout << "AVX or AVX2 is defined: datatype " << value_type_name - << " a vector length " << VectorLength << "\n"; + std::cout << "AVX or AVX2 is defined: datatype " << value_type_name << " a vector length " << VectorLength << "\n"; #else - std::cout << "SIMD (compiler vectorization) is defined: datatype " - << value_type_name << " a vector length " << VectorLength << "\n"; + std::cout << "SIMD (compiler vectorization) is defined: datatype " << value_type_name << " a vector length " + << VectorLength << "\n"; #endif } - const double flop = - (N * VectorLength) * FlopCount(BlkSize, BlkSize) * NumVecs; + const double flop = (N * VectorLength) * FlopCount(BlkSize, BlkSize) * NumVecs; // const double tmax = 1.0e15; const int iter_begin = -10, iter_end = 100; Kokkos::Timer timer; Kokkos::View yref; - Kokkos::View amat( - "amat", N * VectorLength, BlkSize, BlkSize); - Kokkos::View xvec( - "xvec", N * VectorLength, NumVecs, BlkSize); + Kokkos::View amat("amat", N * VectorLength, BlkSize, BlkSize); + Kokkos::View xvec("xvec", N * VectorLength, NumVecs, BlkSize); Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(xvec, random, value_type(1.0)); @@ -115,14 +106,11 @@ void Gemv(const int NN) { /// #if defined(__KOKKOSBATCHED_INTEL_MKL__) { - Kokkos::View a( - "a", N * VectorLength, BlkSize, BlkSize), - x("x", N * VectorLength, NumVecs, BlkSize), - y("y", N * VectorLength, NumVecs, BlkSize); + Kokkos::View a("a", N * VectorLength, BlkSize, BlkSize), + x("x", N * VectorLength, NumVecs, BlkSize), y("y", N * VectorLength, NumVecs, BlkSize); { - const Kokkos::RangePolicy policy( - 0, N * VectorLength); + const Kokkos::RangePolicy policy(0, N * VectorLength); double t = 0; for (int iter = iter_begin; iter < iter_end; ++iter) { @@ -138,17 +126,14 @@ void Gemv(const int NN) { timer.reset(); Kokkos::parallel_for( - "KokkosBatched::PerfTest::GemvHost::CblasOpenMP", policy, - KOKKOS_LAMBDA(const int k) { + "KokkosBatched::PerfTest::GemvHost::CblasOpenMP", policy, KOKKOS_LAMBDA(const int k) { auto aa = Kokkos::subview(a, k, Kokkos::ALL(), Kokkos::ALL()); for (int j = 0; j < NumVecs; ++j) { auto xx = Kokkos::subview(x, k, j, Kokkos::ALL()); auto yy = Kokkos::subview(y, k, j, Kokkos::ALL()); - cblas_dgemv(CblasRowMajor, CblasNoTrans, BlkSize, BlkSize, 1.0, - (double*)aa.data(), aa.stride_0(), - (double*)xx.data(), xx.stride_0(), 1.0, - (double*)yy.data(), yy.stride_0()); + cblas_dgemv(CblasRowMajor, CblasNoTrans, BlkSize, BlkSize, 1.0, (double*)aa.data(), aa.stride_0(), + (double*)xx.data(), xx.stride_0(), 1.0, (double*)yy.data(), yy.stride_0()); } }); @@ -158,10 +143,8 @@ void Gemv(const int NN) { t /= iter_end; std::cout << std::setw(12) << "MKL DGEMV" - << " BlkSize = " << std::setw(3) << BlkSize - << " NumVecs = " << std::setw(3) << NumVecs - << " time = " << std::scientific << t - << " flop/s = " << (flop / t) << std::endl; + << " BlkSize = " << std::setw(3) << BlkSize << " NumVecs = " << std::setw(3) << NumVecs + << " time = " << std::scientific << t << " flop/s = " << (flop / t) << std::endl; yref = y; } @@ -172,14 +155,11 @@ void Gemv(const int NN) { /// Plain version (comparable to micro BLAS version) /// { - Kokkos::View a( - "a", N * VectorLength, BlkSize, BlkSize), - x("x", N * VectorLength, NumVecs, BlkSize), - y("y", N * VectorLength, NumVecs, BlkSize); + Kokkos::View a("a", N * VectorLength, BlkSize, BlkSize), + x("x", N * VectorLength, NumVecs, BlkSize), y("y", N * VectorLength, NumVecs, BlkSize); { - const Kokkos::RangePolicy policy( - 0, N * VectorLength); + const Kokkos::RangePolicy policy(0, N * VectorLength); double t = 0; for (int iter = iter_begin; iter < iter_end; ++iter) { @@ -195,16 +175,14 @@ void Gemv(const int NN) { timer.reset(); Kokkos::parallel_for( - "KokkosBatched::PerfTest::GemvHost::SerialOpenMP", policy, - KOKKOS_LAMBDA(const int k) { + "KokkosBatched::PerfTest::GemvHost::SerialOpenMP", policy, KOKKOS_LAMBDA(const int k) { auto aa = Kokkos::subview(a, k, Kokkos::ALL(), Kokkos::ALL()); for (int j = 0; j < NumVecs; ++j) { auto xx = Kokkos::subview(x, k, j, Kokkos::ALL()); auto yy = Kokkos::subview(y, k, j, Kokkos::ALL()); - SerialGemv::invoke(1.0, aa, xx, - 1.0, yy); + SerialGemv::invoke(1.0, aa, xx, 1.0, yy); } }); @@ -217,38 +195,31 @@ void Gemv(const int NN) { for (int i = 0, iend = yref.extent(0); i < iend; ++i) for (int j = 0, jend = yref.extent(1); j < jend; ++j) for (int k = 0, kend = yref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(yref(i, j, k) - - y(i, j, k)); + diff += Kokkos::ArithTraits::abs(yref(i, j, k) - y(i, j, k)); std::cout << std::setw(12) << "Plain" - << " BlkSize = " << std::setw(3) << BlkSize - << " NumVecs = " << std::setw(3) << NumVecs - << " time = " << std::scientific << t - << " flop/s = " << (flop / t) << " diff to ref = " << diff + << " BlkSize = " << std::setw(3) << BlkSize << " NumVecs = " << std::setw(3) << NumVecs + << " time = " << std::scientific << t << " flop/s = " << (flop / t) << " diff to ref = " << diff << std::endl; } } typedef Vector, VectorLength> VectorType; - Kokkos::View amat_simd( - "amat_simd", N, BlkSize, BlkSize), + Kokkos::View amat_simd("amat_simd", N, BlkSize, BlkSize), xvec_simd("xvec_simd", N, NumVecs, BlkSize); for (int k0 = 0; k0 < N; ++k0) for (int k1 = 0; k1 < VectorLength; ++k1) for (int i = 0; i < BlkSize; ++i) { - for (int j = 0; j < NumVecs; ++j) - xvec_simd(k0, j, i)[k1] = xvec(k0 * VectorLength + k1, j, i); - for (int j = 0; j < BlkSize; ++j) - amat_simd(k0, i, j)[k1] = amat(k0 * VectorLength + k1, i, j); + for (int j = 0; j < NumVecs; ++j) xvec_simd(k0, j, i)[k1] = xvec(k0 * VectorLength + k1, j, i); + for (int j = 0; j < BlkSize; ++j) amat_simd(k0, i, j)[k1] = amat(k0 * VectorLength + k1, i, j); } /// /// Serial SIMD with appropriate data layout /// { - Kokkos::View a( - "a", N, BlkSize, BlkSize), + Kokkos::View a("a", N, BlkSize, BlkSize), x("x", N, NumVecs, BlkSize), y("y", N, NumVecs, BlkSize); { @@ -268,16 +239,14 @@ void Gemv(const int NN) { timer.reset(); Kokkos::parallel_for( - "KokkosBatched::PerfTest::GemvHost::SIMDSerialOpenMP", policy, - KOKKOS_LAMBDA(const int k) { + "KokkosBatched::PerfTest::GemvHost::SIMDSerialOpenMP", policy, KOKKOS_LAMBDA(const int k) { auto aa = Kokkos::subview(a, k, Kokkos::ALL(), Kokkos::ALL()); for (int j = 0; j < NumVecs; ++j) { auto xx = Kokkos::subview(x, k, j, Kokkos::ALL()); auto yy = Kokkos::subview(y, k, j, Kokkos::ALL()); - SerialGemv::invoke(1.0, aa, xx, - 1.0, yy); + SerialGemv::invoke(1.0, aa, xx, 1.0, yy); } }); @@ -290,14 +259,11 @@ void Gemv(const int NN) { for (int i = 0, iend = yref.extent(0); i < iend; ++i) for (int j = 0, jend = yref.extent(1); j < jend; ++j) for (int k = 0, kend = yref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs( - yref(i, j, k) - y(i / VectorLength, j, k)[i % VectorLength]); + diff += Kokkos::ArithTraits::abs(yref(i, j, k) - y(i / VectorLength, j, k)[i % VectorLength]); std::cout << std::setw(12) << "Serial SIMD" - << " BlkSize = " << std::setw(3) << BlkSize - << " NumVecs = " << std::setw(3) << NumVecs - << " time = " << std::scientific << t - << " flop/s = " << (flop / t) << " diff to ref = " << diff + << " BlkSize = " << std::setw(3) << BlkSize << " NumVecs = " << std::setw(3) << NumVecs + << " time = " << std::scientific << t << " flop/s = " << (flop / t) << " diff to ref = " << diff << std::endl; } } diff --git a/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_LU_Cuda.cpp b/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_LU_Cuda.cpp index 9909afd94374..4d3f7c8fd0df 100644 --- a/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_LU_Cuda.cpp +++ b/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_LU_Cuda.cpp @@ -48,15 +48,11 @@ double FlopCount(int mm, int nn) { double m = (double)mm; double n = (double)nn; if (m > n) - return (FLOP_MUL * (0.5 * m * n * n - (1.0 / 6.0) * n * n * n + - 0.5 * m * n - 0.5 * n * n + (2.0 / 3.0) * n) + - FLOP_ADD * (0.5 * m * n * n - (1.0 / 6.0) * n * n * n - - 0.5 * m * n + (1.0 / 6.0) * n)); + return (FLOP_MUL * (0.5 * m * n * n - (1.0 / 6.0) * n * n * n + 0.5 * m * n - 0.5 * n * n + (2.0 / 3.0) * n) + + FLOP_ADD * (0.5 * m * n * n - (1.0 / 6.0) * n * n * n - 0.5 * m * n + (1.0 / 6.0) * n)); else - return (FLOP_MUL * (0.5 * n * m * m - (1.0 / 6.0) * m * m * m + - 0.5 * n * m - 0.5 * m * m + (2.0 / 3.0) * m) + - FLOP_ADD * (0.5 * n * m * m - (1.0 / 6.0) * m * m * m - - 0.5 * n * m + (1.0 / 6.0) * m)); + return (FLOP_MUL * (0.5 * n * m * m - (1.0 / 6.0) * m * m * m + 0.5 * n * m - 0.5 * m * m + (2.0 / 3.0) * m) + + FLOP_ADD * (0.5 * n * m * m - (1.0 / 6.0) * m * m * m - 0.5 * n * m + (1.0 / 6.0) * m)); } struct RangeTag {}; @@ -82,57 +78,48 @@ struct Functor { } template - KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV1 &, - const MemberType &member) const { - const int kbeg = - (member.league_rank() * (member.team_size() * VectorLength) + - member.team_rank() * VectorLength); - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { - const int kk = kbeg + k; - if (kk < _a.extent_int(0)) { - auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); - SerialLU::invoke(aa); - } - }); + KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV1 &, const MemberType &member) const { + const int kbeg = (member.league_rank() * (member.team_size() * VectorLength) + member.team_rank() * VectorLength); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { + const int kk = kbeg + k; + if (kk < _a.extent_int(0)) { + auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); + SerialLU::invoke(aa); + } + }); } template - KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV2 &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV2 &, const MemberType &member) const { const int kbeg = member.league_rank() * VectorLength; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { - const int kk = kbeg + k; - if (kk < _a.extent_int(0)) { - auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); - TeamLU::invoke(member, aa); - } - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { + const int kk = kbeg + k; + if (kk < _a.extent_int(0)) { + auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); + TeamLU::invoke(member, aa); + } + }); } template - KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV3 &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV3 &, const MemberType &member) const { const int lvl = 0; - ScratchViewType sa(member.team_scratch(lvl), VectorLength, - _a.extent(1), _a.extent(2)); + ScratchViewType sa(member.team_scratch(lvl), VectorLength, _a.extent(1), _a.extent(2)); const int kbeg = member.league_rank() * VectorLength; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { - const int kk = kbeg + k; - if (kk < _a.extent_int(0)) { - auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); - auto saa = Kokkos::subview(sa, k, Kokkos::ALL(), Kokkos::ALL()); - - TeamCopy::invoke(member, aa, saa); - member.team_barrier(); - TeamLU::invoke(member, saa); - member.team_barrier(); - TeamCopy::invoke(member, saa, aa); - } - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { + const int kk = kbeg + k; + if (kk < _a.extent_int(0)) { + auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); + auto saa = Kokkos::subview(sa, k, Kokkos::ALL(), Kokkos::ALL()); + + TeamCopy::invoke(member, aa, saa); + member.team_barrier(); + TeamLU::invoke(member, saa); + member.team_barrier(); + TeamCopy::invoke(member, saa, aa); + } + }); } }; @@ -140,19 +127,15 @@ template void LU(const int NN, const int BlkSize) { typedef Kokkos::Schedule ScheduleType; - constexpr int VectorLength = - DefaultVectorLength::value; - const int N = NN / VectorLength; + constexpr int VectorLength = DefaultVectorLength::value; + const int N = NN / VectorLength; { std::string value_type_name; if (std::is_same::value) value_type_name = "double"; - if (std::is_same >::value) - value_type_name = "Kokkos::complex"; + if (std::is_same >::value) value_type_name = "Kokkos::complex"; - std::cout << "SIMD is defined: datatype " << value_type_name - << " a vector length " << VectorLength << "\n"; + std::cout << "SIMD is defined: datatype " << value_type_name << " a vector length " << VectorLength << "\n"; } const double flop = (N * VectorLength) * FlopCount(BlkSize, BlkSize); @@ -164,8 +147,7 @@ void LU(const int NN, const int BlkSize) { const int iter_begin = -3, iter_end = 50; Kokkos::Timer timer; - Kokkos::View amat( - "amat", N * VectorLength, BlkSize, BlkSize), + Kokkos::View amat("amat", N * VectorLength, BlkSize, BlkSize), aref("aref", N * VectorLength, BlkSize, BlkSize); { @@ -202,22 +184,18 @@ void LU(const int NN, const int BlkSize) { /// /// CUBLAS Batch version /// - const Kokkos::LayoutStride stride(N * VectorLength, BlkSize * BlkSize, - BlkSize, 1, BlkSize, BlkSize); + const Kokkos::LayoutStride stride(N * VectorLength, BlkSize * BlkSize, BlkSize, 1, BlkSize, BlkSize); - Kokkos::View a( - "a", stride); + Kokkos::View a("a", stride); Kokkos::View info("info", N * VectorLength); cublasStatus_t stat; cublasHandle_t handle; stat = cublasCreate(&handle); - if (stat != CUBLAS_STATUS_SUCCESS) - Kokkos::abort("CUBLAS initialization failed\n"); + if (stat != CUBLAS_STATUS_SUCCESS) Kokkos::abort("CUBLAS initialization failed\n"); - auto amat_device = Kokkos::create_mirror_view( - typename DeviceSpaceType::memory_space(), amat); + auto amat_device = Kokkos::create_mirror_view(typename DeviceSpaceType::memory_space(), amat); Kokkos::deep_copy(amat_device, amat); Kokkos::fence(); @@ -229,12 +207,10 @@ void LU(const int NN, const int BlkSize) { aa[k] = a.data() + k * a.stride_0(); } value_type **aa_device; - if (cudaMalloc(&aa_device, N * VectorLength * sizeof(value_type *)) != - cudaSuccess) { + if (cudaMalloc(&aa_device, N * VectorLength * sizeof(value_type *)) != cudaSuccess) { Kokkos::abort("CUDA memory allocation failed\n"); } - if (cudaMemcpy(aa_device, aa, sizeof(value_type *) * N * VectorLength, - cudaMemcpyHostToDevice) != cudaSuccess) { + if (cudaMemcpy(aa_device, aa, sizeof(value_type *) * N * VectorLength, cudaMemcpyHostToDevice) != cudaSuccess) { Kokkos::abort("CUDA memcpy failed\n"); } Kokkos::fence(); @@ -248,8 +224,7 @@ void LU(const int NN, const int BlkSize) { Kokkos::fence(); timer.reset(); - stat = cublasDgetrfBatched(handle, BlkSize, (value_type **)aa_device, - BlkSize, NULL, (int *)info.data(), + stat = cublasDgetrfBatched(handle, BlkSize, (value_type **)aa_device, BlkSize, NULL, (int *)info.data(), N * VectorLength); if (stat != CUBLAS_STATUS_SUCCESS) { Kokkos::abort("CUBLAS LU Batched failed\n"); @@ -262,8 +237,7 @@ void LU(const int NN, const int BlkSize) { } tavg /= iter_end; - auto asol = - Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), a); + auto asol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), a); Kokkos::deep_copy(asol, a); Kokkos::deep_copy(aref, asol); @@ -274,8 +248,7 @@ void LU(const int NN, const int BlkSize) { std::cout << std::setw(8) << "CUBLAS" << std::setw(8) << "Batch" << " BlkSize = " << std::setw(3) << BlkSize << " TeamSize = N/A" << " ScratchSize (KB) = N/A" - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin) << std::endl; } } @@ -291,8 +264,7 @@ void LU(const int NN, const int BlkSize) { double tavg = 0, tmin = tmax; { typedef Functor functor_type; - const Kokkos::RangePolicy policy( - 0, N * VectorLength); + const Kokkos::RangePolicy policy(0, N * VectorLength); for (int iter = iter_begin; iter < iter_end; ++iter) { // flush @@ -304,8 +276,7 @@ void LU(const int NN, const int BlkSize) { Kokkos::fence(); timer.reset(); - Kokkos::parallel_for("KokkosBatched::PerfTest::LUCuda::RangeTag", - policy, functor_type(a)); + Kokkos::parallel_for("KokkosBatched::PerfTest::LUCuda::RangeTag", policy, functor_type(a)); Kokkos::fence(); const double t = timer.seconds(); @@ -314,22 +285,19 @@ void LU(const int NN, const int BlkSize) { } tavg /= iter_end; - auto asol = - Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), a); + auto asol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), a); Kokkos::deep_copy(asol, a); double diff = 0; for (int i = 0, iend = aref.extent(0); i < iend; ++i) for (int j = 0, jend = aref.extent(1); j < jend; ++j) for (int k = 0, kend = aref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(aref(i, j, k) - - asol(i, j, k)); + diff += Kokkos::ArithTraits::abs(aref(i, j, k) - asol(i, j, k)); std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Range" << " BlkSize = " << std::setw(3) << BlkSize << " TeamSize = N/A" << " ScratchSize (KB) = 0" - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin); #if defined(__KOKKOSKERNELS_NVIDIA_CUBLAS__) std::cout << " diff to ref = " << diff; @@ -346,13 +314,11 @@ void LU(const int NN, const int BlkSize) { double tavg = 0, tmin = tmax; { - typedef Kokkos::TeamPolicy - policy_type; + typedef Kokkos::TeamPolicy policy_type; typedef Functor functor_type; - const int team_size = - policy_type(N / 32, Kokkos::AUTO, VectorLength) - .team_size_recommended(functor_type(), Kokkos::ParallelForTag()); + const int team_size = policy_type(N / 32, Kokkos::AUTO, VectorLength) + .team_size_recommended(functor_type(), Kokkos::ParallelForTag()); const policy_type policy(N / team_size, team_size, VectorLength); for (int iter = iter_begin; iter < iter_end; ++iter) { @@ -365,8 +331,7 @@ void LU(const int NN, const int BlkSize) { Kokkos::fence(); timer.reset(); - Kokkos::parallel_for("KokkosBatched::PerfTest::LUCuda::TeamTagV1", - policy, functor_type(a)); + Kokkos::parallel_for("KokkosBatched::PerfTest::LUCuda::TeamTagV1", policy, functor_type(a)); Kokkos::fence(); const double t = timer.seconds(); @@ -375,23 +340,19 @@ void LU(const int NN, const int BlkSize) { } tavg /= iter_end; - auto asol = - Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), a); + auto asol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), a); Kokkos::deep_copy(asol, a); double diff = 0; for (int i = 0, iend = aref.extent(0); i < iend; ++i) for (int j = 0, jend = aref.extent(1); j < jend; ++j) for (int k = 0, kend = aref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(aref(i, j, k) - - asol(i, j, k)); + diff += Kokkos::ArithTraits::abs(aref(i, j, k) - asol(i, j, k)); std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Team V1" - << " BlkSize = " << std::setw(3) << BlkSize - << " TeamSize = " << std::setw(3) << team_size + << " BlkSize = " << std::setw(3) << BlkSize << " TeamSize = " << std::setw(3) << team_size << " ScratchSize (KB) = 0" - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin); #if defined(__KOKKOSKERNELS_NVIDIA_CUBLAS__) std::cout << " diff to ref = " << diff; @@ -408,13 +369,11 @@ void LU(const int NN, const int BlkSize) { double tavg = 0, tmin = tmax; { - typedef Kokkos::TeamPolicy - policy_type; + typedef Kokkos::TeamPolicy policy_type; typedef Functor functor_type; - const int is_blocked_algo = - (std::is_same::value), - mb = Algo::LU::Blocked::mb(); + const int is_blocked_algo = (std::is_same::value), + mb = Algo::LU::Blocked::mb(); // mp = BlkSize%mb > 0; const int @@ -422,8 +381,7 @@ void LU(const int NN, const int BlkSize) { mblk = is_blocked_algo ? (BlkSize - mb) : (BlkSize - 1); const int max_team_size = - policy_type(N, Kokkos::AUTO, VectorLength) - .team_size_max(functor_type(), Kokkos::ParallelForTag()); + policy_type(N, Kokkos::AUTO, VectorLength).team_size_max(functor_type(), Kokkos::ParallelForTag()); const int team_size = std::min(std::max(mblk * 2, 1), max_team_size); const policy_type policy(N, team_size, VectorLength); @@ -437,8 +395,7 @@ void LU(const int NN, const int BlkSize) { Kokkos::fence(); timer.reset(); - Kokkos::parallel_for("KokkosBatched::PerfTest::LUCuda::TeamTagV2", - policy, functor_type(a)); + Kokkos::parallel_for("KokkosBatched::PerfTest::LUCuda::TeamTagV2", policy, functor_type(a)); Kokkos::fence(); const double t = timer.seconds(); @@ -447,23 +404,19 @@ void LU(const int NN, const int BlkSize) { } tavg /= iter_end; - auto asol = - Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), a); + auto asol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), a); Kokkos::deep_copy(asol, a); double diff = 0; for (int i = 0, iend = aref.extent(0); i < iend; ++i) for (int j = 0, jend = aref.extent(1); j < jend; ++j) for (int k = 0, kend = aref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(aref(i, j, k) - - asol(i, j, k)); + diff += Kokkos::ArithTraits::abs(aref(i, j, k) - asol(i, j, k)); std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Team V2" - << " BlkSize = " << std::setw(3) << BlkSize - << " TeamSize = " << std::setw(3) << team_size + << " BlkSize = " << std::setw(3) << BlkSize << " TeamSize = " << std::setw(3) << team_size << " ScratchSize (KB) = 0" - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin); #if defined(__KOKKOSKERNELS_NVIDIA_CUBLAS__) std::cout << " diff to ref = " << diff; @@ -480,27 +433,22 @@ void LU(const int NN, const int BlkSize) { double tavg = 0, tmin = tmax; { - typedef Kokkos::TeamPolicy - policy_type; + typedef Kokkos::TeamPolicy policy_type; typedef Functor functor_type; - const int lvl = 0, - per_team_scratch = ScratchViewType::shmem_size( - VectorLength, BlkSize, BlkSize); + const int lvl = 0, per_team_scratch = ScratchViewType::shmem_size(VectorLength, BlkSize, BlkSize); if (per_team_scratch / 1024 < 48) { - const int is_blocked_algo = - (std::is_same::value), - mb = Algo::LU::Blocked::mb(); + const int is_blocked_algo = (std::is_same::value), + mb = Algo::LU::Blocked::mb(); // mp = BlkSize%mb > 0; const int // mblk = is_blocked_algo ? (BlkSize/mb + mp) : BlkSize; mblk = is_blocked_algo ? (BlkSize - mb) : (BlkSize - 1); - const int max_team_size = - policy_type(N, Kokkos::AUTO, VectorLength) - .set_scratch_size(lvl, Kokkos::PerTeam(per_team_scratch)) - .team_size_max(functor_type(), Kokkos::ParallelForTag()); + const int max_team_size = policy_type(N, Kokkos::AUTO, VectorLength) + .set_scratch_size(lvl, Kokkos::PerTeam(per_team_scratch)) + .team_size_max(functor_type(), Kokkos::ParallelForTag()); const int team_size = std::min(std::max(mblk * 2, 1), max_team_size); policy_type policy(N, team_size, VectorLength); @@ -514,10 +462,8 @@ void LU(const int NN, const int BlkSize) { Kokkos::fence(); timer.reset(); - Kokkos::parallel_for( - "KokkosBatched::PerfTest::LUCuda::TeamTagV3", - policy.set_scratch_size(lvl, Kokkos::PerTeam(per_team_scratch)), - functor_type(a)); + Kokkos::parallel_for("KokkosBatched::PerfTest::LUCuda::TeamTagV3", + policy.set_scratch_size(lvl, Kokkos::PerTeam(per_team_scratch)), functor_type(a)); Kokkos::fence(); const double t = timer.seconds(); @@ -526,23 +472,19 @@ void LU(const int NN, const int BlkSize) { } tavg /= iter_end; - auto asol = Kokkos::create_mirror_view( - typename HostSpaceType::memory_space(), a); + auto asol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), a); Kokkos::deep_copy(asol, a); double diff = 0; for (int i = 0, iend = aref.extent(0); i < iend; ++i) for (int j = 0, jend = aref.extent(1); j < jend; ++j) for (int k = 0, kend = aref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(aref(i, j, k) - - asol(i, j, k)); + diff += Kokkos::ArithTraits::abs(aref(i, j, k) - asol(i, j, k)); std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Team V3" - << " BlkSize = " << std::setw(3) << BlkSize - << " TeamSize = " << std::setw(3) << team_size - << " ScratchSize (KB) = " << std::setw(3) - << (per_team_scratch / 1024) << " time = " << std::scientific - << tmin << " avg flop/s = " << (flop / tavg) + << " BlkSize = " << std::setw(3) << BlkSize << " TeamSize = " << std::setw(3) << team_size + << " ScratchSize (KB) = " << std::setw(3) << (per_team_scratch / 1024) + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin); #if defined(__KOKKOSKERNELS_NVIDIA_CUBLAS__) std::cout << " diff to ref = " << diff; @@ -550,8 +492,7 @@ void LU(const int NN, const int BlkSize) { std::cout << std::endl; } else { std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Team V3" - << " Scratch per team is too big (KB): " - << (per_team_scratch / 1024) << std::endl; + << " Scratch per team is too big (KB): " << (per_team_scratch / 1024) << std::endl; } } } diff --git a/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_LU_Host.hpp b/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_LU_Host.hpp index d17f9b90032b..f27365694a64 100644 --- a/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_LU_Host.hpp +++ b/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_LU_Host.hpp @@ -15,8 +15,8 @@ //@HEADER /// \author Kyungjoo Kim (kyukim@sandia.gov) -//#define __KOKKOSBATCHED_INTEL_MKL__ -//#define __KOKKOSBATCHED_INTEL_MKL_BATCHED__ +// #define __KOKKOSBATCHED_INTEL_MKL__ +// #define __KOKKOSBATCHED_INTEL_MKL_BATCHED__ #include #include "KokkosBatched_Util.hpp" @@ -57,15 +57,11 @@ double FlopCount(int mm, int nn) { double m = (double)mm; double n = (double)nn; if (m > n) - return (FLOP_MUL * (0.5 * m * n * n - (1.0 / 6.0) * n * n * n + - 0.5 * m * n - 0.5 * n * n + (2.0 / 3.0) * n) + - FLOP_ADD * (0.5 * m * n * n - (1.0 / 6.0) * n * n * n - - 0.5 * m * n + (1.0 / 6.0) * n)); + return (FLOP_MUL * (0.5 * m * n * n - (1.0 / 6.0) * n * n * n + 0.5 * m * n - 0.5 * n * n + (2.0 / 3.0) * n) + + FLOP_ADD * (0.5 * m * n * n - (1.0 / 6.0) * n * n * n - 0.5 * m * n + (1.0 / 6.0) * n)); else - return (FLOP_MUL * (0.5 * n * m * m - (1.0 / 6.0) * m * m * m + - 0.5 * n * m - 0.5 * m * m + (2.0 / 3.0) * m) + - FLOP_ADD * (0.5 * n * m * m - (1.0 / 6.0) * m * m * m - - 0.5 * n * m + (1.0 / 6.0) * m)); + return (FLOP_MUL * (0.5 * n * m * m - (1.0 / 6.0) * m * m * m + 0.5 * n * m - 0.5 * m * m + (2.0 / 3.0) * m) + + FLOP_ADD * (0.5 * n * m * m - (1.0 / 6.0) * m * m * m - 0.5 * n * m + (1.0 / 6.0) * m)); } template @@ -73,26 +69,21 @@ void LU(const int NN) { typedef Kokkos::Schedule ScheduleType; // typedef Kokkos::Schedule ScheduleType; - constexpr int VectorLength = - DefaultVectorLength::value; - const int N = NN / VectorLength; + constexpr int VectorLength = DefaultVectorLength::value; + const int N = NN / VectorLength; { std::string value_type_name; if (std::is_same::value) value_type_name = "double"; - if (std::is_same >::value) - value_type_name = "Kokkos::complex"; + if (std::is_same >::value) value_type_name = "Kokkos::complex"; #if defined(__AVX512F__) - std::cout << "AVX512 is defined: datatype " << value_type_name - << " a vector length " << VectorLength << "\n"; + std::cout << "AVX512 is defined: datatype " << value_type_name << " a vector length " << VectorLength << "\n"; #elif defined(__AVX__) || defined(__AVX2__) - std::cout << "AVX or AVX2 is defined: datatype " << value_type_name - << " a vector length " << VectorLength << "\n"; + std::cout << "AVX or AVX2 is defined: datatype " << value_type_name << " a vector length " << VectorLength << "\n"; #else - std::cout << "SIMD (compiler vectorization) is defined: datatype " - << value_type_name << " a vector length " << VectorLength << "\n"; + std::cout << "SIMD (compiler vectorization) is defined: datatype " << value_type_name << " a vector length " + << VectorLength << "\n"; #endif } @@ -106,8 +97,7 @@ void LU(const int NN) { /// Reference version using MKL DGETRF /// Kokkos::View aref; - Kokkos::View amat( - "amat", N * VectorLength, BlkSize, BlkSize); + Kokkos::View amat("amat", N * VectorLength, BlkSize, BlkSize); Random random; @@ -124,12 +114,11 @@ void LU(const int NN) { } typedef Vector, VectorLength> VectorType; - Kokkos::View amat_simd( - "amat_simd", N, BlkSize, BlkSize); //, a("a", N, BlkSize, BlkSize); + Kokkos::View amat_simd("amat_simd", N, BlkSize, + BlkSize); //, a("a", N, BlkSize, BlkSize); Kokkos::parallel_for( - "KokkosBatched::PerfTest::LUHost::Pack", - Kokkos::RangePolicy(0, N * VectorLength), + "KokkosBatched::PerfTest::LUHost::Pack", Kokkos::RangePolicy(0, N * VectorLength), KOKKOS_LAMBDA(const int k) { const int k0 = k / VectorLength, k1 = k % VectorLength; for (int i = 0; i < BlkSize; ++i) @@ -147,10 +136,8 @@ void LU(const int NN) { /// #if defined(__KOKKOSBATCHED_INTEL_MKL__) { - Kokkos::View a( - "a", N * VectorLength, BlkSize, BlkSize); - Kokkos::View p( - "p", N * VectorLength, BlkSize); + Kokkos::View a("a", N * VectorLength, BlkSize, BlkSize); + Kokkos::View p("p", N * VectorLength, BlkSize); { double tavg = 0, tmin = tmax; for (int iter = iter_begin; iter < iter_end; ++iter) { @@ -163,16 +150,12 @@ void LU(const int NN) { HostSpaceType().fence(); timer.reset(); - Kokkos::RangePolicy policy( - 0, N * VectorLength); + Kokkos::RangePolicy policy(0, N * VectorLength); Kokkos::parallel_for( - "KokkosBatched::PerfTest::LUHost::LAPACKE_dgetrfOpenMP", policy, - KOKKOS_LAMBDA(const int k) { + "KokkosBatched::PerfTest::LUHost::LAPACKE_dgetrfOpenMP", policy, KOKKOS_LAMBDA(const int k) { auto aa = Kokkos::subview(a, k, Kokkos::ALL(), Kokkos::ALL()); auto pp = Kokkos::subview(p, k, Kokkos::ALL()); - LAPACKE_dgetrf(LAPACK_ROW_MAJOR, BlkSize, BlkSize, - (double*)aa.data(), aa.stride_0(), - (int*)pp.data()); + LAPACKE_dgetrf(LAPACK_ROW_MAJOR, BlkSize, BlkSize, (double*)aa.data(), aa.stride_0(), (int*)pp.data()); }); HostSpaceType().fence(); @@ -183,10 +166,8 @@ void LU(const int NN) { tavg /= iter_end; std::cout << std::setw(10) << "MKL LU" - << " BlkSize = " << std::setw(3) << BlkSize - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) - << " max flop/s = " << (flop / tmin) << std::endl; + << " BlkSize = " << std::setw(3) << BlkSize << " time = " << std::scientific << tmin + << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin) << std::endl; } aref = a; @@ -197,8 +178,7 @@ void LU(const int NN) { #if defined(__KOKKOSBATCHED_INTEL_MKL_COMPACT_BATCHED__) { - Kokkos::View a( - "a", N, BlkSize, BlkSize); + Kokkos::View a("a", N, BlkSize, BlkSize); { double tavg = 0, tmin = tmax; @@ -220,8 +200,7 @@ void LU(const int NN) { HostSpaceType().fence(); timer.reset(); - mkl_dgetrfnp_compact(MKL_ROW_MAJOR, BlkSize, BlkSize, - (double*)a.data(), a.stride_1(), (MKL_INT*)&info, + mkl_dgetrfnp_compact(MKL_ROW_MAJOR, BlkSize, BlkSize, (double*)a.data(), a.stride_1(), (MKL_INT*)&info, format, (MKL_INT)N * VectorLength); HostSpaceType().fence(); @@ -235,15 +214,12 @@ void LU(const int NN) { for (int i = 0, iend = aref.extent(0); i < iend; ++i) for (int j = 0, jend = aref.extent(1); j < jend; ++j) for (int k = 0, kend = aref.extent(2); k < kend; ++k) - diff += abs(aref(i, j, k) - - a(i / VectorLength, j, k)[i % VectorLength]); + diff += abs(aref(i, j, k) - a(i / VectorLength, j, k)[i % VectorLength]); std::cout << std::setw(10) << "MKL Cmpt" - << " BlkSize = " << std::setw(3) << BlkSize - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) - << " max flop/s = " << (flop / tmin) - << " diff to ref = " << diff << std::endl; + << " BlkSize = " << std::setw(3) << BlkSize << " time = " << std::scientific << tmin + << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin) << " diff to ref = " << diff + << std::endl; } } } @@ -307,8 +283,7 @@ void LU(const int NN) { /// { - Kokkos::View a( - "a", N, BlkSize, BlkSize); + Kokkos::View a("a", N, BlkSize, BlkSize); { double tavg = 0, tmin = tmax; @@ -324,8 +299,7 @@ void LU(const int NN) { Kokkos::RangePolicy policy(0, N); Kokkos::parallel_for( - "KokkosBatched::PerfTest::LUHost::SIMDSerialOpenMP", policy, - KOKKOS_LAMBDA(const int k) { + "KokkosBatched::PerfTest::LUHost::SIMDSerialOpenMP", policy, KOKKOS_LAMBDA(const int k) { auto aa = Kokkos::subview(a, k, Kokkos::ALL(), Kokkos::ALL()); SerialLU::invoke(aa); @@ -342,14 +316,11 @@ void LU(const int NN) { for (int i = 0, iend = aref.extent(0); i < iend; ++i) for (int j = 0, jend = aref.extent(1); j < jend; ++j) for (int k = 0, kend = aref.extent(2); k < kend; ++k) - diff += abs(aref(i, j, k) - - a(i / VectorLength, j, k)[i % VectorLength]); + diff += abs(aref(i, j, k) - a(i / VectorLength, j, k)[i % VectorLength]); std::cout << std::setw(10) << "SIMD" - << " BlkSize = " << std::setw(3) << BlkSize - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) - << " max flop/s = " << (flop / tmin) - << " diff to ref = " << diff << std::endl; + << " BlkSize = " << std::setw(3) << BlkSize << " time = " << std::scientific << tmin + << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin) << " diff to ref = " << diff + << std::endl; } } } diff --git a/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Trsm_Cuda.cpp b/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Trsm_Cuda.cpp index f99ee9dc8002..99f1a1d5376c 100644 --- a/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Trsm_Cuda.cpp +++ b/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Trsm_Cuda.cpp @@ -50,15 +50,13 @@ typedef double value_type; double FlopCountLower(int mm, int nn) { double m = (double)mm; double n = (double)nn; - return (FLOP_MUL * (0.5 * m * n * (n + 1.0)) + - FLOP_ADD * (0.5 * m * n * (n - 1.0))); + return (FLOP_MUL * (0.5 * m * n * (n + 1.0)) + FLOP_ADD * (0.5 * m * n * (n - 1.0))); } double FlopCountUpper(int mm, int nn) { double m = (double)mm; double n = (double)nn; - return (FLOP_MUL * (0.5 * m * n * (n + 1.0)) + - FLOP_ADD * (0.5 * m * n * (n - 1.0))); + return (FLOP_MUL * (0.5 * m * n * (n + 1.0)) + FLOP_ADD * (0.5 * m * n * (n - 1.0))); } struct RangeTag {}; @@ -67,8 +65,7 @@ struct TeamTagV2 {}; struct TeamTagV3 {}; struct TeamTagHandmade {}; -template +template struct Functor { ConstUnmanagedViewType _a; UnmanagedViewType _b; @@ -86,160 +83,131 @@ struct Functor { switch (test) { case 0: - SerialTrsm::invoke(1.0, aa, bb); + SerialTrsm::invoke(1.0, aa, bb); break; case 1: - SerialTrsm::invoke(1.0, aa, bb); + SerialTrsm::invoke(1.0, aa, bb); break; case 2: - SerialTrsm::invoke(1.0, aa, bb); + SerialTrsm::invoke(1.0, aa, bb); break; case 3: - SerialTrsm::invoke(1.0, aa, bb); + SerialTrsm::invoke(1.0, aa, bb); break; case 4: - SerialTrsm::invoke(1.0, aa, bb); + SerialTrsm::invoke(1.0, aa, bb); break; } } template - KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV1 &, - const MemberType &member) const { - const int kbeg = - (member.league_rank() * (member.team_size() * VectorLength) + - member.team_rank() * VectorLength); - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { - const int kk = kbeg + k; - if (kk < int(_b.extent(0))) { - auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); - auto bb = Kokkos::subview(_b, kk, Kokkos::ALL(), Kokkos::ALL()); - - switch (test) { - case 0: - SerialTrsm::invoke(1.0, aa, bb); - break; - case 1: - SerialTrsm::invoke(1.0, aa, bb); - break; - case 2: - SerialTrsm::invoke(1.0, aa, bb); - break; - case 3: - SerialTrsm::invoke(1.0, aa, bb); - break; - case 4: - SerialTrsm::invoke(1.0, aa, bb); - break; - } - } - }); + KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV1 &, const MemberType &member) const { + const int kbeg = (member.league_rank() * (member.team_size() * VectorLength) + member.team_rank() * VectorLength); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { + const int kk = kbeg + k; + if (kk < int(_b.extent(0))) { + auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); + auto bb = Kokkos::subview(_b, kk, Kokkos::ALL(), Kokkos::ALL()); + + switch (test) { + case 0: + SerialTrsm::invoke(1.0, aa, bb); + break; + case 1: + SerialTrsm::invoke(1.0, aa, bb); + break; + case 2: + SerialTrsm::invoke(1.0, aa, bb); + break; + case 3: + SerialTrsm::invoke(1.0, aa, bb); + break; + case 4: + SerialTrsm::invoke(1.0, aa, bb); + break; + } + } + }); } template - KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV2 &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV2 &, const MemberType &member) const { const int kbeg = member.league_rank() * VectorLength; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { - const int kk = kbeg + k; - if (kk < int(_b.extent(0))) { - auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); - auto bb = Kokkos::subview(_b, kk, Kokkos::ALL(), Kokkos::ALL()); - - switch (test) { - case 0: - TeamTrsm::invoke(member, 1.0, aa, bb); - break; - case 1: - TeamTrsm::invoke(member, 1.0, aa, bb); - break; - case 2: - TeamTrsm::invoke(member, 1.0, aa, bb); - break; - case 3: - TeamTrsm::invoke(member, 1.0, aa, bb); - break; - case 4: - TeamTrsm::invoke(member, 1.0, aa, bb); - break; - } - } - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { + const int kk = kbeg + k; + if (kk < int(_b.extent(0))) { + auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); + auto bb = Kokkos::subview(_b, kk, Kokkos::ALL(), Kokkos::ALL()); + + switch (test) { + case 0: + TeamTrsm::invoke( + member, 1.0, aa, bb); + break; + case 1: + TeamTrsm::invoke( + member, 1.0, aa, bb); + break; + case 2: + TeamTrsm::invoke( + member, 1.0, aa, bb); + break; + case 3: + TeamTrsm::invoke( + member, 1.0, aa, bb); + break; + case 4: + TeamTrsm::invoke( + member, 1.0, aa, bb); + break; + } + } + }); } template - KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV3 &, - const MemberType &member) const { + KOKKOS_INLINE_FUNCTION void operator()(const TeamTagV3 &, const MemberType &member) const { const int lvl = 0; - ScratchViewType sa(member.team_scratch(lvl), VectorLength, - _a.extent(1), _a.extent(2)); + ScratchViewType sa(member.team_scratch(lvl), VectorLength, _a.extent(1), _a.extent(2)); // ScratchViewType sb(member.team_scratch(lvl), VectorLength, // _b.extent(1), _b.extent(2)); const int kbeg = member.league_rank() * VectorLength; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { - const int kk = kbeg + k; - if (kk < int(_b.extent(0))) { - auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); - auto bb = Kokkos::subview(_b, kk, Kokkos::ALL(), Kokkos::ALL()); - - auto saa = Kokkos::subview(sa, k, Kokkos::ALL(), Kokkos::ALL()); - - TeamCopy::invoke(member, aa, saa); - member.team_barrier(); - - switch (test) { - case 0: - TeamTrsm::invoke(member, 1.0, saa, bb); - break; - case 1: - TeamTrsm::invoke(member, 1.0, saa, bb); - break; - case 2: - TeamTrsm::invoke(member, 1.0, saa, bb); - break; - case 3: - TeamTrsm::invoke(member, 1.0, saa, bb); - break; - case 4: - TeamTrsm::invoke(member, 1.0, saa, bb); - break; - } - } - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &k) { + const int kk = kbeg + k; + if (kk < int(_b.extent(0))) { + auto aa = Kokkos::subview(_a, kk, Kokkos::ALL(), Kokkos::ALL()); + auto bb = Kokkos::subview(_b, kk, Kokkos::ALL(), Kokkos::ALL()); + + auto saa = Kokkos::subview(sa, k, Kokkos::ALL(), Kokkos::ALL()); + + TeamCopy::invoke(member, aa, saa); + member.team_barrier(); + + switch (test) { + case 0: + TeamTrsm::invoke( + member, 1.0, saa, bb); + break; + case 1: + TeamTrsm::invoke( + member, 1.0, saa, bb); + break; + case 2: + TeamTrsm::invoke( + member, 1.0, saa, bb); + break; + case 3: + TeamTrsm::invoke( + member, 1.0, saa, bb); + break; + case 4: + TeamTrsm::invoke( + member, 1.0, saa, bb); + break; + } + } + }); } }; @@ -247,19 +215,15 @@ template void Trsm(const int NN, const int BlkSize, const int NumCols) { typedef Kokkos::Schedule ScheduleType; - constexpr int VectorLength = - DefaultVectorLength::value; - const int N = NN / VectorLength; + constexpr int VectorLength = DefaultVectorLength::value; + const int N = NN / VectorLength; { std::string value_type_name; if (std::is_same::value) value_type_name = "double"; - if (std::is_same >::value) - value_type_name = "Kokkos::complex"; + if (std::is_same >::value) value_type_name = "Kokkos::complex"; - std::cout << "SIMD is defined: datatype " << value_type_name - << " a vector length " << VectorLength << "\n"; + std::cout << "SIMD is defined: datatype " << value_type_name << " a vector length " << VectorLength << "\n"; } switch (test) { @@ -288,17 +252,14 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { const int iter_begin = -3, iter_end = 30; Kokkos::Timer timer; - Kokkos::View amat( - "amat", N * VectorLength, BlkSize, BlkSize), - bmat("bmat", N * VectorLength, BlkSize, NumCols), - bref("bmat", N * VectorLength, BlkSize, NumCols); + Kokkos::View amat("amat", N * VectorLength, BlkSize, BlkSize), + bmat("bmat", N * VectorLength, BlkSize, NumCols), bref("bmat", N * VectorLength, BlkSize, NumCols); { Random random; for (int k = 0; k < N * VectorLength; ++k) { for (int i = 0; i < BlkSize; ++i) - for (int j = 0; j < BlkSize; ++j) - amat(k, i, j) = random.value() + 4.0 * (i == j); + for (int j = 0; j < BlkSize; ++j) amat(k, i, j) = random.value() + 4.0 * (i == j); for (int i = 0; i < BlkSize; ++i) for (int j = 0; j < NumCols; ++j) bmat(k, i, j) = random.value(); } @@ -313,24 +274,18 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { /// /// CUBLAS Batch version /// - const Kokkos::LayoutStride stride(N * VectorLength, BlkSize * BlkSize, - BlkSize, 1, BlkSize, BlkSize); + const Kokkos::LayoutStride stride(N * VectorLength, BlkSize * BlkSize, BlkSize, 1, BlkSize, BlkSize); - Kokkos::View a( - "a", stride), - b("b", stride); + Kokkos::View a("a", stride), b("b", stride); cublasStatus_t stat; cublasHandle_t handle; stat = cublasCreate(&handle); - if (stat != CUBLAS_STATUS_SUCCESS) - Kokkos::abort("CUBLAS initialization failed\n"); + if (stat != CUBLAS_STATUS_SUCCESS) Kokkos::abort("CUBLAS initialization failed\n"); - auto amat_device = Kokkos::create_mirror_view( - typename DeviceSpaceType::memory_space(), amat); - auto bmat_device = Kokkos::create_mirror_view( - typename DeviceSpaceType::memory_space(), bmat); + auto amat_device = Kokkos::create_mirror_view(typename DeviceSpaceType::memory_space(), amat); + auto bmat_device = Kokkos::create_mirror_view(typename DeviceSpaceType::memory_space(), bmat); Kokkos::deep_copy(amat_device, amat); Kokkos::deep_copy(bmat_device, bmat); @@ -346,16 +301,12 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { bb[k] = b.data() + k * b.stride_0(); } value_type **aa_device, **bb_device; - if (cudaMalloc(&aa_device, N * VectorLength * sizeof(value_type *)) != - cudaSuccess || - cudaMalloc(&bb_device, N * VectorLength * sizeof(value_type *)) != - cudaSuccess) { + if (cudaMalloc(&aa_device, N * VectorLength * sizeof(value_type *)) != cudaSuccess || + cudaMalloc(&bb_device, N * VectorLength * sizeof(value_type *)) != cudaSuccess) { Kokkos::abort("CUDA memory allocation failed\n"); } - if (cudaMemcpy(aa_device, aa, sizeof(value_type *) * N * VectorLength, - cudaMemcpyHostToDevice) != cudaSuccess || - cudaMemcpy(bb_device, bb, sizeof(value_type *) * N * VectorLength, - cudaMemcpyHostToDevice) != cudaSuccess) { + if (cudaMemcpy(aa_device, aa, sizeof(value_type *) * N * VectorLength, cudaMemcpyHostToDevice) != cudaSuccess || + cudaMemcpy(bb_device, bb, sizeof(value_type *) * N * VectorLength, cudaMemcpyHostToDevice) != cudaSuccess) { Kokkos::abort("CUDA memcpy failed\n"); } Kokkos::fence(); @@ -371,47 +322,37 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { switch (test) { case 0: { // Left, Lower, NoTrans, UnitDiag - stat = cublasDtrsmBatched( - handle, CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, - CUBLAS_DIAG_UNIT, BlkSize, NumCols, &one, - (const value_type **)aa_device, BlkSize, - (value_type **)bb_device, BlkSize, N * VectorLength); + stat = cublasDtrsmBatched(handle, CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, CUBLAS_DIAG_UNIT, + BlkSize, NumCols, &one, (const value_type **)aa_device, BlkSize, + (value_type **)bb_device, BlkSize, N * VectorLength); break; } case 1: { // Left, Lower, NoTrans, NonUnitDiag - stat = cublasDtrsmBatched( - handle, CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, - CUBLAS_DIAG_NON_UNIT, BlkSize, NumCols, &one, - (const value_type **)aa_device, BlkSize, - (value_type **)bb_device, BlkSize, N * VectorLength); + stat = cublasDtrsmBatched(handle, CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, + CUBLAS_DIAG_NON_UNIT, BlkSize, NumCols, &one, (const value_type **)aa_device, + BlkSize, (value_type **)bb_device, BlkSize, N * VectorLength); break; } case 2: { // Right, Upper, NoTrans, UnitDiag - stat = cublasDtrsmBatched( - handle, CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, - CUBLAS_DIAG_UNIT, BlkSize, NumCols, &one, - (const value_type **)aa_device, BlkSize, - (value_type **)bb_device, BlkSize, N * VectorLength); + stat = cublasDtrsmBatched(handle, CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, CUBLAS_DIAG_UNIT, + BlkSize, NumCols, &one, (const value_type **)aa_device, BlkSize, + (value_type **)bb_device, BlkSize, N * VectorLength); break; } case 3: { // Right, Upper, NoTrans, NonUnitDiag - stat = cublasDtrsmBatched( - handle, CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, - CUBLAS_DIAG_NON_UNIT, BlkSize, NumCols, &one, - (const value_type **)aa_device, BlkSize, - (value_type **)bb_device, BlkSize, N * VectorLength); + stat = cublasDtrsmBatched(handle, CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, + CUBLAS_DIAG_NON_UNIT, BlkSize, NumCols, &one, (const value_type **)aa_device, + BlkSize, (value_type **)bb_device, BlkSize, N * VectorLength); break; } case 4: { // Left, Upper, NoTrans, NonUnitDiag - stat = cublasDtrsmBatched( - handle, CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, - CUBLAS_DIAG_NON_UNIT, BlkSize, NumCols, &one, - (const value_type **)aa_device, BlkSize, - (value_type **)bb_device, BlkSize, N * VectorLength); + stat = cublasDtrsmBatched(handle, CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, + CUBLAS_DIAG_NON_UNIT, BlkSize, NumCols, &one, (const value_type **)aa_device, + BlkSize, (value_type **)bb_device, BlkSize, N * VectorLength); break; } } @@ -426,22 +367,19 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { } tavg /= iter_end; - auto bsol = - Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), b); + auto bsol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), b); Kokkos::deep_copy(bsol, b); Kokkos::deep_copy(bref, bsol); - if (cudaFree(aa_device) != cudaSuccess || - cudaFree(bb_device) != cudaSuccess) { + if (cudaFree(aa_device) != cudaSuccess || cudaFree(bb_device) != cudaSuccess) { Kokkos::abort("CUDA memory free failed\n"); } std::cout << std::setw(8) << "CUBLAS" << std::setw(8) << "Batched" - << " BlkSize = " << std::setw(3) << BlkSize - << " NumCols = " << std::setw(3) << NumCols << " TeamSize = N/A" + << " BlkSize = " << std::setw(3) << BlkSize << " NumCols = " << std::setw(3) << NumCols + << " TeamSize = N/A" << " ScratchSize (KB) = N/A" - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin) << std::endl; } cublasDestroy(handle); @@ -453,14 +391,12 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { /// Range policy version /// typedef Kokkos::View view_type; - view_type a("a", N * VectorLength, BlkSize, BlkSize), - b("b", N * VectorLength, BlkSize, NumCols); + view_type a("a", N * VectorLength, BlkSize, BlkSize), b("b", N * VectorLength, BlkSize, NumCols); double tavg = 0, tmin = tmax; { typedef Functor functor_type; - const Kokkos::RangePolicy policy( - 0, N * VectorLength); + const Kokkos::RangePolicy policy(0, N * VectorLength); for (int iter = iter_begin; iter < iter_end; ++iter) { // flush @@ -473,8 +409,7 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { Kokkos::fence(); timer.reset(); - Kokkos::parallel_for("KokkosBatched::PerfTest::RangeTag", policy, - functor_type(a, b)); + Kokkos::parallel_for("KokkosBatched::PerfTest::RangeTag", policy, functor_type(a, b)); Kokkos::fence(); const double t = timer.seconds(); @@ -483,23 +418,20 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { } tavg /= iter_end; - auto bsol = - Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), b); + auto bsol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), b); Kokkos::deep_copy(bsol, b); double diff = 0; for (int i = 0, iend = bref.extent(0); i < iend; ++i) for (int j = 0, jend = bref.extent(1); j < jend; ++j) for (int k = 0, kend = bref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(bref(i, j, k) - - bsol(i, j, k)); + diff += Kokkos::ArithTraits::abs(bref(i, j, k) - bsol(i, j, k)); std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Range" - << " BlkSize = " << std::setw(3) << BlkSize - << " NumCols = " << std::setw(3) << NumCols << " TeamSize = N/A" + << " BlkSize = " << std::setw(3) << BlkSize << " NumCols = " << std::setw(3) << NumCols + << " TeamSize = N/A" << " ScratchSize (KB) = 0" - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin); #if defined(__KOKKOSKERNELS_NVIDIA_CUBLAS__) std::cout << " diff to ref = " << diff; @@ -513,18 +445,15 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { /// Team policy V1 - almost same scheduling with range policy /// typedef Kokkos::View view_type; - view_type a("a", N * VectorLength, BlkSize, BlkSize), - b("b", N * VectorLength, BlkSize, NumCols); + view_type a("a", N * VectorLength, BlkSize, BlkSize), b("b", N * VectorLength, BlkSize, NumCols); double tavg = 0, tmin = tmax; { - typedef Kokkos::TeamPolicy - policy_type; + typedef Kokkos::TeamPolicy policy_type; typedef Functor functor_type; - const int team_size = - policy_type(N / 32, Kokkos::AUTO, VectorLength) - .team_size_recommended(functor_type(), Kokkos::ParallelForTag()); + const int team_size = policy_type(N / 32, Kokkos::AUTO, VectorLength) + .team_size_recommended(functor_type(), Kokkos::ParallelForTag()); const policy_type policy(N / team_size, team_size, VectorLength); for (int iter = iter_begin; iter < iter_end; ++iter) { @@ -538,8 +467,7 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { Kokkos::fence(); timer.reset(); - Kokkos::parallel_for("KokkosBatched::PerfTest::TeamTagV1", policy, - functor_type(a, b)); + Kokkos::parallel_for("KokkosBatched::PerfTest::TeamTagV1", policy, functor_type(a, b)); Kokkos::fence(); const double t = timer.seconds(); @@ -548,24 +476,19 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { } tavg /= iter_end; - auto bsol = - Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), b); + auto bsol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), b); Kokkos::deep_copy(bsol, b); double diff = 0; for (int i = 0, iend = bref.extent(0); i < iend; ++i) for (int j = 0, jend = bref.extent(1); j < jend; ++j) for (int k = 0, kend = bref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(bref(i, j, k) - - bsol(i, j, k)); + diff += Kokkos::ArithTraits::abs(bref(i, j, k) - bsol(i, j, k)); std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Team V1" - << " BlkSize = " << std::setw(3) << BlkSize - << " NumCols = " << std::setw(3) << NumCols - << " TeamSize = " << std::setw(3) << team_size - << " ScratchSize (KB) = 0" - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) + << " BlkSize = " << std::setw(3) << BlkSize << " NumCols = " << std::setw(3) << NumCols + << " TeamSize = " << std::setw(3) << team_size << " ScratchSize (KB) = 0" + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin); #if defined(__KOKKOSKERNELS_NVIDIA_CUBLAS__) std::cout << " diff to ref = " << diff; @@ -579,27 +502,21 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { /// Team policy V2 - team parallel /// typedef Kokkos::View view_type; - view_type a("a", N * VectorLength, BlkSize, BlkSize), - b("b", N * VectorLength, BlkSize, NumCols); + view_type a("a", N * VectorLength, BlkSize, BlkSize), b("b", N * VectorLength, BlkSize, NumCols); double tavg = 0, tmin = tmax; { - typedef Kokkos::TeamPolicy - policy_type; + typedef Kokkos::TeamPolicy policy_type; typedef Functor functor_type; - const int is_blocked_algo = - (std::is_same::value), - mb = Algo::Trsm::Blocked::mb(), - mp = BlkSize % mb > 0; + const int is_blocked_algo = (std::is_same::value), + mb = Algo::Trsm::Blocked::mb(), mp = BlkSize % mb > 0; const int mblk = is_blocked_algo ? (BlkSize / mb + mp) : BlkSize; const int max_team_size = - policy_type(N, Kokkos::AUTO, VectorLength) - .team_size_max(functor_type(), Kokkos::ParallelForTag()); - const int team_size = - std::min(std::max(NumCols, (mblk - 1) * mblk), max_team_size); + policy_type(N, Kokkos::AUTO, VectorLength).team_size_max(functor_type(), Kokkos::ParallelForTag()); + const int team_size = std::min(std::max(NumCols, (mblk - 1) * mblk), max_team_size); const policy_type policy(N, team_size, VectorLength); for (int iter = iter_begin; iter < iter_end; ++iter) { @@ -613,8 +530,7 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { DeviceSpaceType().fence(); timer.reset(); - Kokkos::parallel_for("KokkosBatched::PerfTest::TeamTagV2", policy, - functor_type(a, b)); + Kokkos::parallel_for("KokkosBatched::PerfTest::TeamTagV2", policy, functor_type(a, b)); DeviceSpaceType().fence(); const double t = timer.seconds(); @@ -623,24 +539,19 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { } tavg /= iter_end; - auto bsol = - Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), b); + auto bsol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), b); Kokkos::deep_copy(bsol, b); double diff = 0; for (int i = 0, iend = bref.extent(0); i < iend; ++i) for (int j = 0, jend = bref.extent(1); j < jend; ++j) for (int k = 0, kend = bref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(bref(i, j, k) - - bsol(i, j, k)); + diff += Kokkos::ArithTraits::abs(bref(i, j, k) - bsol(i, j, k)); std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Team V2" - << " BlkSize = " << std::setw(3) << BlkSize - << " NumCols = " << std::setw(3) << NumCols - << " TeamSize = " << std::setw(3) << team_size - << " ScratchSize (KB) = 0" - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) + << " BlkSize = " << std::setw(3) << BlkSize << " NumCols = " << std::setw(3) << NumCols + << " TeamSize = " << std::setw(3) << team_size << " ScratchSize (KB) = 0" + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin); #if defined(__KOKKOSKERNELS_NVIDIA_CUBLAS__) std::cout << " diff to ref = " << diff; @@ -654,33 +565,25 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { /// Team policy V3 - team parallel + sratch /// typedef Kokkos::View view_type; - view_type a("a", N * VectorLength, BlkSize, BlkSize), - b("b", N * VectorLength, BlkSize, NumCols); + view_type a("a", N * VectorLength, BlkSize, BlkSize), b("b", N * VectorLength, BlkSize, NumCols); double tavg = 0, tmin = tmax; { - typedef Kokkos::TeamPolicy - policy_type; + typedef Kokkos::TeamPolicy policy_type; typedef Functor functor_type; - const int lvl = 0, - per_team_scratch = ScratchViewType::shmem_size( - VectorLength, BlkSize, BlkSize); + const int lvl = 0, per_team_scratch = ScratchViewType::shmem_size(VectorLength, BlkSize, BlkSize); if (per_team_scratch / 1024 < 48) { - const int is_blocked_algo = - (std::is_same::value), - mb = Algo::Trsm::Blocked::mb(), - mp = BlkSize % mb > 0; + const int is_blocked_algo = (std::is_same::value), + mb = Algo::Trsm::Blocked::mb(), mp = BlkSize % mb > 0; const int mblk = is_blocked_algo ? (BlkSize / mb + mp) : BlkSize; - const int max_team_size = - policy_type(N, Kokkos::AUTO, VectorLength) - .set_scratch_size(lvl, Kokkos::PerTeam(per_team_scratch)) - .team_size_max(functor_type(), Kokkos::ParallelForTag()); - const int team_size = - std::min(std::max(NumCols, (mblk - 1) * mblk), max_team_size); + const int max_team_size = policy_type(N, Kokkos::AUTO, VectorLength) + .set_scratch_size(lvl, Kokkos::PerTeam(per_team_scratch)) + .team_size_max(functor_type(), Kokkos::ParallelForTag()); + const int team_size = std::min(std::max(NumCols, (mblk - 1) * mblk), max_team_size); policy_type policy(N, team_size, VectorLength); for (int iter = iter_begin; iter < iter_end; ++iter) { @@ -694,8 +597,7 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { DeviceSpaceType().fence(); timer.reset(); - Kokkos::parallel_for("KokkosBatched::PerfTest::TeamTagV3", policy, - functor_type(a, b)); + Kokkos::parallel_for("KokkosBatched::PerfTest::TeamTagV3", policy, functor_type(a, b)); DeviceSpaceType().fence(); const double t = timer.seconds(); @@ -704,33 +606,27 @@ void Trsm(const int NN, const int BlkSize, const int NumCols) { } tavg /= iter_end; - auto bsol = Kokkos::create_mirror_view( - typename HostSpaceType::memory_space(), b); + auto bsol = Kokkos::create_mirror_view(typename HostSpaceType::memory_space(), b); Kokkos::deep_copy(bsol, b); double diff = 0; for (int i = 0, iend = bref.extent(0); i < iend; ++i) for (int j = 0, jend = bref.extent(1); j < jend; ++j) for (int k = 0, kend = bref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(bref(i, j, k) - - bsol(i, j, k)); + diff += Kokkos::ArithTraits::abs(bref(i, j, k) - bsol(i, j, k)); std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Team V3" - << " BlkSize = " << std::setw(3) << BlkSize - << " NumCols = " << std::setw(3) << NumCols - << " TeamSize = " << std::setw(3) << team_size - << " ScratchSize (KB) = " << std::setw(3) - << (per_team_scratch / 1024) << " time = " << std::scientific - << tmin << " avg flop/s = " << (flop / tavg) - << " max flop/s = " << (flop / tmin); + << " BlkSize = " << std::setw(3) << BlkSize << " NumCols = " << std::setw(3) << NumCols + << " TeamSize = " << std::setw(3) << team_size << " ScratchSize (KB) = " << std::setw(3) + << (per_team_scratch / 1024) << " time = " << std::scientific << tmin + << " avg flop/s = " << (flop / tavg) << " max flop/s = " << (flop / tmin); #if defined(__KOKKOSKERNELS_NVIDIA_CUBLAS__) std::cout << " diff to ref = " << diff; #endif std::cout << std::endl; } else { std::cout << std::setw(8) << "Kokkos" << std::setw(8) << "Team V3" - << " Scratch per team is too big (KB): " - << (per_team_scratch / 1024) << std::endl; + << " Scratch per team is too big (KB): " << (per_team_scratch / 1024) << std::endl; } } } diff --git a/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Trsm_Host.hpp b/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Trsm_Host.hpp index 52b2395b8d4c..5e8c6a6abc2d 100644 --- a/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Trsm_Host.hpp +++ b/packages/kokkos-kernels/perf_test/batched/dense/do-not-use/KokkosBatched_Test_Trsm_Host.hpp @@ -30,7 +30,7 @@ #include "KokkosBatched_Trsm_Decl.hpp" #include "KokkosBatched_Trsm_Serial_Impl.hpp" -//#undef __KOKKOSBATCHED_INTEL_MKL_BATCHED__ +// #undef __KOKKOSBATCHED_INTEL_MKL_BATCHED__ namespace KokkosBatched { namespace PerfTest { @@ -54,41 +54,33 @@ typedef double value_type; double FlopCountLower(int mm, int nn) { double m = (double)mm; double n = (double)nn; - return (FLOP_MUL * (0.5 * m * n * (n + 1.0)) + - FLOP_ADD * (0.5 * m * n * (n - 1.0))); + return (FLOP_MUL * (0.5 * m * n * (n + 1.0)) + FLOP_ADD * (0.5 * m * n * (n - 1.0))); } double FlopCountUpper(int mm, int nn) { double m = (double)mm; double n = (double)nn; - return (FLOP_MUL * (0.5 * m * n * (n + 1.0)) + - FLOP_ADD * (0.5 * m * n * (n - 1.0))); + return (FLOP_MUL * (0.5 * m * n * (n + 1.0)) + FLOP_ADD * (0.5 * m * n * (n - 1.0))); } -template +template void Trsm(const int NN) { typedef Kokkos::Schedule ScheduleType; - constexpr int VectorLength = - DefaultVectorLength::value; - const int N = NN / VectorLength; + constexpr int VectorLength = DefaultVectorLength::value; + const int N = NN / VectorLength; { std::string value_type_name; if (std::is_same::value) value_type_name = "double"; - if (std::is_same >::value) - value_type_name = "Kokkos::complex"; + if (std::is_same >::value) value_type_name = "Kokkos::complex"; #if defined(__AVX512F__) - std::cout << "AVX512 is defined: datatype " << value_type_name - << " a vector length " << VectorLength << "\n"; + std::cout << "AVX512 is defined: datatype " << value_type_name << " a vector length " << VectorLength << "\n"; #elif defined(__AVX__) || defined(__AVX2__) - std::cout << "AVX or AVX2 is defined: datatype " << value_type_name - << " a vector length " << VectorLength << "\n"; + std::cout << "AVX or AVX2 is defined: datatype " << value_type_name << " a vector length " << VectorLength << "\n"; #else - std::cout << "SIMD (compiler vectorization) is defined: datatype " - << value_type_name << " a vector length " << VectorLength << "\n"; + std::cout << "SIMD (compiler vectorization) is defined: datatype " << value_type_name << " a vector length " + << VectorLength << "\n"; #endif } @@ -120,13 +112,11 @@ void Trsm(const int NN) { /// Reference version using MKL DTRSM /// Kokkos::View bref; - Kokkos::View amat( - "amat", N * VectorLength, BlkSize, BlkSize), + Kokkos::View amat("amat", N * VectorLength, BlkSize, BlkSize), bmat("bmat", N * VectorLength, BlkSize, NumCols); typedef Vector, VectorLength> VectorType; - Kokkos::View amat_simd( - "amat_simd", N, BlkSize, BlkSize), + Kokkos::View amat_simd("amat_simd", N, BlkSize, BlkSize), bmat_simd("bmat_simd", N, BlkSize, NumCols); Random random; @@ -154,8 +144,7 @@ void Trsm(const int NN) { /// #if defined(__KOKKOSBATCHED_INTEL_MKL__) { - Kokkos::View a( - "a", N * VectorLength, BlkSize, BlkSize), + Kokkos::View a("a", N * VectorLength, BlkSize, BlkSize), b("b", N * VectorLength, BlkSize, NumCols); { @@ -171,44 +160,32 @@ void Trsm(const int NN) { HostSpaceType().fence(); timer.reset(); - Kokkos::RangePolicy policy( - 0, N * VectorLength); + Kokkos::RangePolicy policy(0, N * VectorLength); Kokkos::parallel_for( - "KokkosBatched::PerfTest::TrsmHost::MKLOpenMP", policy, - KOKKOS_LAMBDA(const int k) { + "KokkosBatched::PerfTest::TrsmHost::MKLOpenMP", policy, KOKKOS_LAMBDA(const int k) { auto aa = Kokkos::subview(a, k, Kokkos::ALL(), Kokkos::ALL()); auto bb = Kokkos::subview(b, k, Kokkos::ALL(), Kokkos::ALL()); switch (test) { case 0: - cblas_dtrsm(CblasRowMajor, CblasLeft, CblasLower, - CblasNoTrans, CblasUnit, BlkSize, NumCols, 1.0, - (double *)aa.data(), aa.stride_0(), - (double *)bb.data(), bb.stride_0()); + cblas_dtrsm(CblasRowMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, BlkSize, NumCols, 1.0, + (double *)aa.data(), aa.stride_0(), (double *)bb.data(), bb.stride_0()); break; case 1: - cblas_dtrsm(CblasRowMajor, CblasLeft, CblasLower, - CblasNoTrans, CblasNonUnit, BlkSize, NumCols, 1.0, - (double *)aa.data(), aa.stride_0(), - (double *)bb.data(), bb.stride_0()); + cblas_dtrsm(CblasRowMajor, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, BlkSize, NumCols, 1.0, + (double *)aa.data(), aa.stride_0(), (double *)bb.data(), bb.stride_0()); break; case 2: - cblas_dtrsm(CblasRowMajor, CblasRight, CblasUpper, - CblasNoTrans, CblasUnit, BlkSize, NumCols, 1.0, - (double *)aa.data(), aa.stride_0(), - (double *)bb.data(), bb.stride_0()); + cblas_dtrsm(CblasRowMajor, CblasRight, CblasUpper, CblasNoTrans, CblasUnit, BlkSize, NumCols, 1.0, + (double *)aa.data(), aa.stride_0(), (double *)bb.data(), bb.stride_0()); break; case 3: - cblas_dtrsm(CblasRowMajor, CblasRight, CblasUpper, - CblasNoTrans, CblasNonUnit, BlkSize, NumCols, 1.0, - (double *)aa.data(), aa.stride_0(), - (double *)bb.data(), bb.stride_0()); + cblas_dtrsm(CblasRowMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, BlkSize, NumCols, 1.0, + (double *)aa.data(), aa.stride_0(), (double *)bb.data(), bb.stride_0()); break; case 4: - cblas_dtrsm(CblasRowMajor, CblasLeft, CblasUpper, - CblasNoTrans, CblasNonUnit, BlkSize, NumCols, 1.0, - (double *)aa.data(), aa.stride_0(), - (double *)bb.data(), bb.stride_0()); + cblas_dtrsm(CblasRowMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, BlkSize, NumCols, 1.0, + (double *)aa.data(), aa.stride_0(), (double *)bb.data(), bb.stride_0()); break; } }); @@ -223,24 +200,19 @@ void Trsm(const int NN) { double sum = 0; for (int i = 0, iend = b.extent(0); i < iend; ++i) for (int j = 0, jend = b.extent(1); j < jend; ++j) - for (int k = 0, kend = b.extent(2); k < kend; ++k) - sum += Kokkos::ArithTraits::abs(bmat(i, j, k)); + for (int k = 0, kend = b.extent(2); k < kend; ++k) sum += Kokkos::ArithTraits::abs(bmat(i, j, k)); std::cout << std::setw(10) << "MKL TRSM" - << " BlkSize = " << std::setw(3) << BlkSize - << " NumCols = " << std::setw(3) << NumCols - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) - << " max flop/s = " << (flop / tmin) << " sum abs(B) = " << sum - << std::endl; + << " BlkSize = " << std::setw(3) << BlkSize << " NumCols = " << std::setw(3) << NumCols + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) + << " max flop/s = " << (flop / tmin) << " sum abs(B) = " << sum << std::endl; bref = b; } } #if defined(__KOKKOSBATCHED_INTEL_MKL_BATCHED__) { - Kokkos::View a( - "a", N * VectorLength, BlkSize, BlkSize), + Kokkos::View a("a", N * VectorLength, BlkSize, BlkSize), b("b", N * VectorLength, BlkSize, NumCols); value_type *aa[N * VectorLength], *bb[N * VectorLength]; @@ -280,8 +252,7 @@ void Trsm(const int NN) { CBLAS_TRANSPOSE transA[1] = {CblasNoTrans}; CBLAS_DIAG diag[1] = {CblasUnit}; - cblas_dtrsm_batch(CblasRowMajor, side, uplo, transA, diag, blksize, - numcols, one, (const double **)aa, lda, + cblas_dtrsm_batch(CblasRowMajor, side, uplo, transA, diag, blksize, numcols, one, (const double **)aa, lda, (double **)bb, ldb, 1, size_per_grp); break; } @@ -291,8 +262,7 @@ void Trsm(const int NN) { CBLAS_TRANSPOSE transA[1] = {CblasNoTrans}; CBLAS_DIAG diag[1] = {CblasNonUnit}; - cblas_dtrsm_batch(CblasRowMajor, side, uplo, transA, diag, blksize, - numcols, one, (const double **)aa, lda, + cblas_dtrsm_batch(CblasRowMajor, side, uplo, transA, diag, blksize, numcols, one, (const double **)aa, lda, (double **)bb, ldb, 1, size_per_grp); break; } @@ -302,8 +272,7 @@ void Trsm(const int NN) { CBLAS_TRANSPOSE transA[1] = {CblasNoTrans}; CBLAS_DIAG diag[1] = {CblasUnit}; - cblas_dtrsm_batch(CblasRowMajor, side, uplo, transA, diag, blksize, - numcols, one, (const double **)aa, lda, + cblas_dtrsm_batch(CblasRowMajor, side, uplo, transA, diag, blksize, numcols, one, (const double **)aa, lda, (double **)bb, ldb, 1, size_per_grp); break; } @@ -313,8 +282,7 @@ void Trsm(const int NN) { CBLAS_TRANSPOSE transA[1] = {CblasNoTrans}; CBLAS_DIAG diag[1] = {CblasNonUnit}; - cblas_dtrsm_batch(CblasRowMajor, side, uplo, transA, diag, blksize, - numcols, one, (const double **)aa, lda, + cblas_dtrsm_batch(CblasRowMajor, side, uplo, transA, diag, blksize, numcols, one, (const double **)aa, lda, (double **)bb, ldb, 1, size_per_grp); break; } @@ -324,8 +292,7 @@ void Trsm(const int NN) { CBLAS_TRANSPOSE transA[1] = {CblasNoTrans}; CBLAS_DIAG diag[1] = {CblasNonUnit}; - cblas_dtrsm_batch(CblasRowMajor, side, uplo, transA, diag, blksize, - numcols, one, (const double **)aa, lda, + cblas_dtrsm_batch(CblasRowMajor, side, uplo, transA, diag, blksize, numcols, one, (const double **)aa, lda, (double **)bb, ldb, 1, size_per_grp); break; } @@ -342,24 +309,19 @@ void Trsm(const int NN) { for (int i = 0, iend = bref.extent(0); i < iend; ++i) for (int j = 0, jend = bref.extent(1); j < jend; ++j) for (int k = 0, kend = bref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs(bref(i, j, k) - - b(i, j, k)); + diff += Kokkos::ArithTraits::abs(bref(i, j, k) - b(i, j, k)); std::cout << std::setw(10) << "MKL Batch" - << " BlkSize = " << std::setw(3) << BlkSize - << " NumCols = " << std::setw(3) << NumCols - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) - << " max flop/s = " << (flop / tmin) - << " diff to ref = " << diff << std::endl; + << " BlkSize = " << std::setw(3) << BlkSize << " NumCols = " << std::setw(3) << NumCols + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) + << " max flop/s = " << (flop / tmin) << " diff to ref = " << diff << std::endl; } } #endif #if defined(__KOKKOSBATCHED_INTEL_MKL_COMPACT_BATCHED__) { - Kokkos::View a( - "a", N, BlkSize, BlkSize), + Kokkos::View a("a", N, BlkSize, BlkSize), b("b", N, BlkSize, NumCols); { @@ -392,10 +354,9 @@ void Trsm(const int NN) { MKL_TRANSPOSE transA = MKL_NOTRANS; MKL_DIAG diag = MKL_UNIT; - mkl_dtrsm_compact(MKL_ROW_MAJOR, side, uplo, transA, diag, - BlkSize, NumCols, one, (const double *)a.data(), - a.stride_1(), (double *)b.data(), b.stride_1(), - format, (MKL_INT)N * VectorLength); + mkl_dtrsm_compact(MKL_ROW_MAJOR, side, uplo, transA, diag, BlkSize, NumCols, one, + (const double *)a.data(), a.stride_1(), (double *)b.data(), b.stride_1(), format, + (MKL_INT)N * VectorLength); break; } case 1: { @@ -404,10 +365,9 @@ void Trsm(const int NN) { MKL_TRANSPOSE transA = MKL_NOTRANS; MKL_DIAG diag = MKL_NONUNIT; - mkl_dtrsm_compact(MKL_ROW_MAJOR, side, uplo, transA, diag, - BlkSize, NumCols, one, (const double *)a.data(), - a.stride_1(), (double *)b.data(), b.stride_1(), - format, (MKL_INT)N * VectorLength); + mkl_dtrsm_compact(MKL_ROW_MAJOR, side, uplo, transA, diag, BlkSize, NumCols, one, + (const double *)a.data(), a.stride_1(), (double *)b.data(), b.stride_1(), format, + (MKL_INT)N * VectorLength); break; } case 2: { @@ -416,10 +376,9 @@ void Trsm(const int NN) { MKL_TRANSPOSE transA = MKL_NOTRANS; MKL_DIAG diag = MKL_UNIT; - mkl_dtrsm_compact(MKL_ROW_MAJOR, side, uplo, transA, diag, - BlkSize, NumCols, one, (const double *)a.data(), - a.stride_1(), (double *)b.data(), b.stride_1(), - format, (MKL_INT)N * VectorLength); + mkl_dtrsm_compact(MKL_ROW_MAJOR, side, uplo, transA, diag, BlkSize, NumCols, one, + (const double *)a.data(), a.stride_1(), (double *)b.data(), b.stride_1(), format, + (MKL_INT)N * VectorLength); break; } case 3: { @@ -428,10 +387,9 @@ void Trsm(const int NN) { MKL_TRANSPOSE transA = MKL_NOTRANS; MKL_DIAG diag = MKL_NONUNIT; - mkl_dtrsm_compact(MKL_ROW_MAJOR, side, uplo, transA, diag, - BlkSize, NumCols, one, (const double *)a.data(), - a.stride_1(), (double *)b.data(), b.stride_1(), - format, (MKL_INT)N * VectorLength); + mkl_dtrsm_compact(MKL_ROW_MAJOR, side, uplo, transA, diag, BlkSize, NumCols, one, + (const double *)a.data(), a.stride_1(), (double *)b.data(), b.stride_1(), format, + (MKL_INT)N * VectorLength); break; } case 4: { @@ -440,10 +398,9 @@ void Trsm(const int NN) { MKL_TRANSPOSE transA = MKL_NOTRANS; MKL_DIAG diag = MKL_NONUNIT; - mkl_dtrsm_compact(MKL_ROW_MAJOR, side, uplo, transA, diag, - BlkSize, NumCols, one, (const double *)a.data(), - a.stride_1(), (double *)b.data(), b.stride_1(), - format, (MKL_INT)N * VectorLength); + mkl_dtrsm_compact(MKL_ROW_MAJOR, side, uplo, transA, diag, BlkSize, NumCols, one, + (const double *)a.data(), a.stride_1(), (double *)b.data(), b.stride_1(), format, + (MKL_INT)N * VectorLength); break; } } @@ -459,16 +416,12 @@ void Trsm(const int NN) { for (int i = 0, iend = bref.extent(0); i < iend; ++i) for (int j = 0, jend = bref.extent(1); j < jend; ++j) for (int k = 0, kend = bref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs( - bref(i, j, k) - b(i / VectorLength, j, k)[i % VectorLength]); + diff += Kokkos::ArithTraits::abs(bref(i, j, k) - b(i / VectorLength, j, k)[i % VectorLength]); std::cout << std::setw(10) << "MKL Cmpt" - << " BlkSize = " << std::setw(3) << BlkSize - << " NumCols = " << std::setw(3) << NumCols - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) - << " max flop/s = " << (flop / tmin) - << " diff to ref = " << diff << std::endl; + << " BlkSize = " << std::setw(3) << BlkSize << " NumCols = " << std::setw(3) << NumCols + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) + << " max flop/s = " << (flop / tmin) << " diff to ref = " << diff << std::endl; } } } @@ -557,8 +510,7 @@ void Trsm(const int NN) { /// SIMD with appropriate data layout /// { - Kokkos::View a( - "a", N, BlkSize, BlkSize), + Kokkos::View a("a", N, BlkSize, BlkSize), b("b", N, BlkSize, NumCols); { @@ -576,31 +528,29 @@ void Trsm(const int NN) { Kokkos::RangePolicy policy(0, N); Kokkos::parallel_for( - "KokkosBatched::PerfTest::TrsmHost::SIMDSerialOpenMP", policy, - KOKKOS_LAMBDA(const int k) { + "KokkosBatched::PerfTest::TrsmHost::SIMDSerialOpenMP", policy, KOKKOS_LAMBDA(const int k) { auto aa = Kokkos::subview(a, k, Kokkos::ALL(), Kokkos::ALL()); auto bb = Kokkos::subview(b, k, Kokkos::ALL(), Kokkos::ALL()); switch (test) { case 0: - SerialTrsm::invoke(1.0, aa, bb); + SerialTrsm::invoke(1.0, aa, bb); break; case 1: - SerialTrsm::invoke(1.0, aa, bb); + SerialTrsm::invoke(1.0, aa, + bb); break; case 2: - SerialTrsm::invoke(1.0, aa, bb); + SerialTrsm::invoke(1.0, aa, + bb); break; case 3: - SerialTrsm::invoke(1.0, aa, bb); + SerialTrsm::invoke(1.0, aa, + bb); break; case 4: - SerialTrsm::invoke(1.0, aa, bb); + SerialTrsm::invoke(1.0, aa, + bb); break; } }); @@ -616,16 +566,12 @@ void Trsm(const int NN) { for (int i = 0, iend = bref.extent(0); i < iend; ++i) for (int j = 0, jend = bref.extent(1); j < jend; ++j) for (int k = 0, kend = bref.extent(2); k < kend; ++k) - diff += Kokkos::ArithTraits::abs( - bref(i, j, k) - b(i / VectorLength, j, k)[i % VectorLength]); + diff += Kokkos::ArithTraits::abs(bref(i, j, k) - b(i / VectorLength, j, k)[i % VectorLength]); std::cout << std::setw(10) << "KK Vector" - << " BlkSize = " << std::setw(3) << BlkSize - << " NumCols = " << std::setw(3) << NumCols - << " time = " << std::scientific << tmin - << " avg flop/s = " << (flop / tavg) - << " max flop/s = " << (flop / tmin) - << " diff to ref = " << diff << std::endl; + << " BlkSize = " << std::setw(3) << BlkSize << " NumCols = " << std::setw(3) << NumCols + << " time = " << std::scientific << tmin << " avg flop/s = " << (flop / tavg) + << " max flop/s = " << (flop / tmin) << " diff to ref = " << diff << std::endl; } } std::cout << "\n\n"; diff --git a/packages/kokkos-kernels/perf_test/batched/sparse/CG/Functor_TestBatchedTeamVectorCG_1.hpp b/packages/kokkos-kernels/perf_test/batched/sparse/CG/Functor_TestBatchedTeamVectorCG_1.hpp index e289f8fa5257..572248021214 100644 --- a/packages/kokkos-kernels/perf_test/batched/sparse/CG/Functor_TestBatchedTeamVectorCG_1.hpp +++ b/packages/kokkos-kernels/perf_test/batched/sparse/CG/Functor_TestBatchedTeamVectorCG_1.hpp @@ -14,8 +14,8 @@ // //@HEADER -template +template struct Functor_TestBatchedTeamVectorCG_1 { const ValuesViewType _D; const IntView _r; @@ -26,12 +26,9 @@ struct Functor_TestBatchedTeamVectorCG_1 { KrylovHandleType _handle; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorCG_1(const ValuesViewType &D, const IntView &r, - const IntView &c, const VectorViewType &X, - const VectorViewType &B, const int N_team, - const int team_size, - const int vector_length, - KrylovHandleType &handle) + Functor_TestBatchedTeamVectorCG_1(const ValuesViewType &D, const IntView &r, const IntView &c, + const VectorViewType &X, const VectorViewType &B, const int N_team, + const int team_size, const int vector_length, KrylovHandleType &handle) : _D(D), _r(r), _c(c), @@ -47,20 +44,15 @@ struct Functor_TestBatchedTeamVectorCG_1 { const int first_matrix = _handle.first_index(member.league_rank()); const int last_matrix = _handle.last_index(member.league_rank()); - auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); using Operator = KokkosBatched::CrsMatrix; Operator A(d, _r, _c); - KokkosBatched::TeamVectorCG::template invoke( - member, A, b, x, _handle); + KokkosBatched::TeamVectorCG::template invoke(member, A, b, x, _handle); } inline double run() { @@ -70,13 +62,10 @@ struct Functor_TestBatchedTeamVectorCG_1 { _handle.set_memory_strategy(1); - _handle.tmp_view = typename KrylovHandleType::TemporaryViewType( - "", _X.extent(0), 4 * _X.extent(1)); + _handle.tmp_view = typename KrylovHandleType::TemporaryViewType("", _X.extent(0), 4 * _X.extent(1)); - Kokkos::TeamPolicy auto_policy(_handle.get_number_of_teams(), - Kokkos::AUTO(), Kokkos::AUTO()); - Kokkos::TeamPolicy tuned_policy(_handle.get_number_of_teams(), - _team_size, _vector_length); + Kokkos::TeamPolicy auto_policy(_handle.get_number_of_teams(), Kokkos::AUTO(), Kokkos::AUTO()); + Kokkos::TeamPolicy tuned_policy(_handle.get_number_of_teams(), _team_size, _vector_length); Kokkos::TeamPolicy policy; if (_team_size < 1) diff --git a/packages/kokkos-kernels/perf_test/batched/sparse/CG/Functor_TestBatchedTeamVectorCG_2.hpp b/packages/kokkos-kernels/perf_test/batched/sparse/CG/Functor_TestBatchedTeamVectorCG_2.hpp index b3451938c5ce..5749d640d0bc 100644 --- a/packages/kokkos-kernels/perf_test/batched/sparse/CG/Functor_TestBatchedTeamVectorCG_2.hpp +++ b/packages/kokkos-kernels/perf_test/batched/sparse/CG/Functor_TestBatchedTeamVectorCG_2.hpp @@ -14,8 +14,8 @@ // //@HEADER -template +template struct Functor_TestBatchedTeamVectorCG_2 { const ValuesViewType _D; const IntView _r; @@ -26,12 +26,9 @@ struct Functor_TestBatchedTeamVectorCG_2 { KrylovHandleType _handle; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorCG_2(const ValuesViewType &D, const IntView &r, - const IntView &c, const VectorViewType &X, - const VectorViewType &B, const int N_team, - const int team_size, - const int vector_length, - KrylovHandleType &handle) + Functor_TestBatchedTeamVectorCG_2(const ValuesViewType &D, const IntView &r, const IntView &c, + const VectorViewType &X, const VectorViewType &B, const int N_team, + const int team_size, const int vector_length, KrylovHandleType &handle) : _D(D), _r(r), _c(c), @@ -47,41 +44,27 @@ struct Functor_TestBatchedTeamVectorCG_2 { const int first_matrix = _handle.first_index(member.league_rank()); const int last_matrix = _handle.last_index(member.league_rank()); - using TeamVectorCopy1D = - KokkosBatched::TeamVectorCopy; + using TeamVectorCopy1D = KokkosBatched::TeamVectorCopy; - auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - using ScratchPadIntViewType = - Kokkos::View; + using ScratchPadIntViewType = Kokkos::View; - using Operator = - KokkosBatched::CrsMatrix; + using Operator = KokkosBatched::CrsMatrix; - ScratchPadIntViewType tmp_1D_int(member.team_scratch(0), - _r.extent(0) + _c.extent(0)); + ScratchPadIntViewType tmp_1D_int(member.team_scratch(0), _r.extent(0) + _c.extent(0)); - auto r = - Kokkos::subview(tmp_1D_int, Kokkos::make_pair(0, (int)_r.extent(0))); - auto c = Kokkos::subview( - tmp_1D_int, - Kokkos::make_pair((int)_r.extent(0), (int)tmp_1D_int.extent(0))); + auto r = Kokkos::subview(tmp_1D_int, Kokkos::make_pair(0, (int)_r.extent(0))); + auto c = Kokkos::subview(tmp_1D_int, Kokkos::make_pair((int)_r.extent(0), (int)tmp_1D_int.extent(0))); TeamVectorCopy1D::invoke(member, _r, r); TeamVectorCopy1D::invoke(member, _c, c); Operator A(d, r, c); - KokkosBatched::TeamVectorCG::template invoke( - member, A, b, x, _handle); + KokkosBatched::TeamVectorCG::template invoke(member, A, b, x, _handle); } inline double run() { @@ -91,13 +74,10 @@ struct Functor_TestBatchedTeamVectorCG_2 { _handle.set_memory_strategy(1); - _handle.tmp_view = typename KrylovHandleType::TemporaryViewType( - "", _X.extent(0), 4 * _X.extent(1)); + _handle.tmp_view = typename KrylovHandleType::TemporaryViewType("", _X.extent(0), 4 * _X.extent(1)); - Kokkos::TeamPolicy auto_policy(_handle.get_number_of_teams(), - Kokkos::AUTO(), Kokkos::AUTO()); - Kokkos::TeamPolicy tuned_policy(_handle.get_number_of_teams(), - _team_size, _vector_length); + Kokkos::TeamPolicy auto_policy(_handle.get_number_of_teams(), Kokkos::AUTO(), Kokkos::AUTO()); + Kokkos::TeamPolicy tuned_policy(_handle.get_number_of_teams(), _team_size, _vector_length); Kokkos::TeamPolicy policy; if (_team_size < 1) diff --git a/packages/kokkos-kernels/perf_test/batched/sparse/CG/Functor_TestBatchedTeamVectorCG_3.hpp b/packages/kokkos-kernels/perf_test/batched/sparse/CG/Functor_TestBatchedTeamVectorCG_3.hpp index 3dbfca7f15c5..9df01fd5f0b6 100644 --- a/packages/kokkos-kernels/perf_test/batched/sparse/CG/Functor_TestBatchedTeamVectorCG_3.hpp +++ b/packages/kokkos-kernels/perf_test/batched/sparse/CG/Functor_TestBatchedTeamVectorCG_3.hpp @@ -14,8 +14,8 @@ // //@HEADER -template +template struct Functor_TestBatchedTeamVectorCG_3 { const ValuesViewType _D; const IntView _r; @@ -26,12 +26,9 @@ struct Functor_TestBatchedTeamVectorCG_3 { KrylovHandleType _handle; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorCG_3(const ValuesViewType &D, const IntView &r, - const IntView &c, const VectorViewType &X, - const VectorViewType &B, const int N_team, - const int team_size, - const int vector_length, - KrylovHandleType &handle) + Functor_TestBatchedTeamVectorCG_3(const ValuesViewType &D, const IntView &r, const IntView &c, + const VectorViewType &X, const VectorViewType &B, const int N_team, + const int team_size, const int vector_length, KrylovHandleType &handle) : _D(D), _r(r), _c(c), @@ -47,41 +44,27 @@ struct Functor_TestBatchedTeamVectorCG_3 { const int first_matrix = _handle.first_index(member.league_rank()); const int last_matrix = _handle.last_index(member.league_rank()); - using TeamVectorCopy1D = - KokkosBatched::TeamVectorCopy; + using TeamVectorCopy1D = KokkosBatched::TeamVectorCopy; - auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - using ScratchPadIntViewType = - Kokkos::View; + using ScratchPadIntViewType = Kokkos::View; - using Operator = - KokkosBatched::CrsMatrix; + using Operator = KokkosBatched::CrsMatrix; - ScratchPadIntViewType tmp_1D_int(member.team_scratch(0), - _r.extent(0) + _c.extent(0)); + ScratchPadIntViewType tmp_1D_int(member.team_scratch(0), _r.extent(0) + _c.extent(0)); - auto r = - Kokkos::subview(tmp_1D_int, Kokkos::make_pair(0, (int)_r.extent(0))); - auto c = Kokkos::subview( - tmp_1D_int, - Kokkos::make_pair((int)_r.extent(0), (int)tmp_1D_int.extent(0))); + auto r = Kokkos::subview(tmp_1D_int, Kokkos::make_pair(0, (int)_r.extent(0))); + auto c = Kokkos::subview(tmp_1D_int, Kokkos::make_pair((int)_r.extent(0), (int)tmp_1D_int.extent(0))); TeamVectorCopy1D::invoke(member, _r, r); TeamVectorCopy1D::invoke(member, _c, c); Operator A(d, r, c); - KokkosBatched::TeamVectorCG::template invoke( - member, A, b, x, _handle); + KokkosBatched::TeamVectorCG::template invoke(member, A, b, x, _handle); } inline double run() { @@ -91,10 +74,8 @@ struct Functor_TestBatchedTeamVectorCG_3 { _handle.set_memory_strategy(0); - Kokkos::TeamPolicy auto_policy(_handle.get_number_of_teams(), - Kokkos::AUTO(), Kokkos::AUTO()); - Kokkos::TeamPolicy tuned_policy(_handle.get_number_of_teams(), - _team_size, _vector_length); + Kokkos::TeamPolicy auto_policy(_handle.get_number_of_teams(), Kokkos::AUTO(), Kokkos::AUTO()); + Kokkos::TeamPolicy tuned_policy(_handle.get_number_of_teams(), _team_size, _vector_length); Kokkos::TeamPolicy policy; if (_team_size < 1) @@ -106,7 +87,7 @@ struct Functor_TestBatchedTeamVectorCG_3 { size_t bytes_col_idc = IntView::shmem_size(_c.extent(0)); size_t bytes_int = bytes_row_ptr + bytes_col_idc; size_t bytes_0 = ValuesViewType::shmem_size(_N_team, 5); - size_t bytes_1 = ValuesViewType::shmem_size(_N_team, 4 * _X.extent(1)); + size_t bytes_1 = ValuesViewType::shmem_size(_N_team, 4 * _X.extent(1)); policy.set_scratch_size(0, Kokkos::PerTeam(bytes_int + bytes_0 + bytes_1)); diff --git a/packages/kokkos-kernels/perf_test/batched/sparse/CG/KokkosBatched_Test_CG.cpp b/packages/kokkos-kernels/perf_test/batched/sparse/CG/KokkosBatched_Test_CG.cpp index 5bf6061fe4f3..e0440ddbfdff 100644 --- a/packages/kokkos-kernels/perf_test/batched/sparse/CG/KokkosBatched_Test_CG.cpp +++ b/packages/kokkos-kernels/perf_test/batched/sparse/CG/KokkosBatched_Test_CG.cpp @@ -73,50 +73,41 @@ int main(int argc, char *argv[]) { for (int i = 1; i < argc; ++i) { const std::string &token = argv[i]; if (token == std::string("--help") || token == std::string("-h")) { - std::cout - << "Kokkos Batched CG performance test options:" << std::endl - << "-A : Filename of the input batched matrix." - << std::endl - << "-B : Filename of the input batched right-hand " - "side." - << std::endl - << "-X : Filename of the output batched solution." - << std::endl - << "-res : Filename of the output residual history." - << std::endl - << "-timers : Filename of the output timers." - << std::endl - << "-n1 : Number of repetitions of the experience." - << std::endl - << "-n2 : Number of the kernel calls inside one " - "experience." - << std::endl - << "-team_size : Used team size." << std::endl - << "-n_implementations: Number of implementations to use: test " - "all " - "implementations [0, specified number -1]." - << std::endl - << "-implementation : Specify only one implementation at a time." - << std::endl - << " Note: implementation 0 : use scratch pad " - "only for scalar temporary variable." - << std::endl - << " Note: implementation 1 : use scratch pad " - "for scalar temporary variables and for the graph of the " - "matrices." - << std::endl - << " Note: implementation 2 : use scratch pad " - "for scalar and vector temporary variables and for the graph of " - "the matrices." - << std::endl - << "-l : Specify left layout." << std::endl - << "-r : Specify right layout." << std::endl - << "-C : Specify if the convergence is monitored." - << std::endl - << "-N_team : Specify the number of systems per team." - << std::endl - << "-vector_length : Specify the vector length." << std::endl - << std::endl; + std::cout << "Kokkos Batched CG performance test options:" << std::endl + << "-A : Filename of the input batched matrix." << std::endl + << "-B : Filename of the input batched right-hand " + "side." + << std::endl + << "-X : Filename of the output batched solution." << std::endl + << "-res : Filename of the output residual history." << std::endl + << "-timers : Filename of the output timers." << std::endl + << "-n1 : Number of repetitions of the experience." << std::endl + << "-n2 : Number of the kernel calls inside one " + "experience." + << std::endl + << "-team_size : Used team size." << std::endl + << "-n_implementations: Number of implementations to use: test " + "all " + "implementations [0, specified number -1]." + << std::endl + << "-implementation : Specify only one implementation at a time." << std::endl + << " Note: implementation 0 : use scratch pad " + "only for scalar temporary variable." + << std::endl + << " Note: implementation 1 : use scratch pad " + "for scalar temporary variables and for the graph of the " + "matrices." + << std::endl + << " Note: implementation 2 : use scratch pad " + "for scalar and vector temporary variables and for the graph of " + "the matrices." + << std::endl + << "-l : Specify left layout." << std::endl + << "-r : Specify right layout." << std::endl + << "-C : Specify if the convergence is monitored." << std::endl + << "-N_team : Specify the number of systems per team." << std::endl + << "-vector_length : Specify the vector length." << std::endl + << std::endl; return 0; } if (token == std::string("-A")) name_A = argv[++i]; @@ -131,10 +122,8 @@ int main(int argc, char *argv[]) { if (token == std::string("-n1")) n_rep_1 = std::atoi(argv[++i]); if (token == std::string("-n2")) n_rep_2 = std::atoi(argv[++i]); if (token == std::string("-team_size")) team_size = std::atoi(argv[++i]); - if (token == std::string("-n_implementations")) - n_impl = std::atoi(argv[++i]); - if (token == std::string("-implementation")) - impls.push_back(std::atoi(argv[++i])); + if (token == std::string("-n_implementations")) n_impl = std::atoi(argv[++i]); + if (token == std::string("-implementation")) impls.push_back(std::atoi(argv[++i])); if (token == std::string("-l")) { layout_left = true; layout_right = false; @@ -144,10 +133,8 @@ int main(int argc, char *argv[]) { layout_right = true; } if (token == std::string("-C")) monitor_convergence = true; - if (token == std::string("-N_team")) - N_team_potential = std::atoi(argv[++i]); - if (token == std::string("-vector_length")) - vector_length = std::atoi(argv[++i]); + if (token == std::string("-N_team")) N_team_potential = std::atoi(argv[++i]); + if (token == std::string("-vector_length")) vector_length = std::atoi(argv[++i]); } int N, Blk, nnz, ncols; @@ -157,16 +144,14 @@ int main(int argc, char *argv[]) { if (impls.size() == 0) for (int i = 0; i < n_impl; ++i) impls.push_back(i); - std::cout << "N_team_potential = " << N_team_potential << ", n = " << Blk - << ", N = " << N << ", team_size = " << team_size - << ", vector_length = " << vector_length << std::endl; + std::cout << "N_team_potential = " << N_team_potential << ", n = " << Blk << ", N = " << N + << ", team_size = " << team_size << ", vector_length = " << vector_length << std::endl; // V100 L2 cache 6MB per core constexpr size_t LLC_CAPACITY = 80 * 6 * 1024 * 1024; KokkosBatched::Flush flush; - printf(" :::: CG Testing (N = %d, Blk = %d, nnz = %d, vl = %d, n = %d)\n", - N, Blk, nnz, vector_length, n_rep_1); + printf(" :::: CG Testing (N = %d, Blk = %d, nnz = %d, vl = %d, n = %d)\n", N, Blk, nnz, vector_length, n_rep_1); typedef Kokkos::LayoutRight LR; typedef Kokkos::LayoutLeft LL; @@ -193,12 +178,9 @@ int main(int argc, char *argv[]) { XYTypeLL yLL("values", N, Blk); if (layout_left) - printf(" :::: Testing left layout (team_size = %d, vector_length = %d)\n", - team_size, vector_length); + printf(" :::: Testing left layout (team_size = %d, vector_length = %d)\n", team_size, vector_length); if (layout_right) - printf( - " :::: Testing right layout (team_size = %d, vector_length = %d)\n", - team_size, vector_length); + printf(" :::: Testing right layout (team_size = %d, vector_length = %d)\n", team_size, vector_length); if (layout_left) { readCRSFromMM(name_A, valuesLL, rowOffsets, colIndices); @@ -226,9 +208,7 @@ int main(int argc, char *argv[]) { using Scalar3DViewType = Kokkos::View; using IntViewType = Kokkos::View; - using KrylovHandleType = - KokkosBatched::KrylovHandle; + using KrylovHandleType = KokkosBatched::KrylovHandle; KrylovHandleType handle(N, N_team); handle.set_scratch_pad_level(0); @@ -246,56 +226,38 @@ int main(int argc, char *argv[]) { if (i_impl == 0 && layout_left) { t_spmv += - Functor_TestBatchedTeamVectorCG_1( - valuesLL, rowOffsets, colIndices, xLL, yLL, N_team, - team_size, vector_length, handle) + Functor_TestBatchedTeamVectorCG_1( + valuesLL, rowOffsets, colIndices, xLL, yLL, N_team, team_size, vector_length, handle) .run(); } if (i_impl == 1 && layout_left) { t_spmv += - Functor_TestBatchedTeamVectorCG_2( - valuesLL, rowOffsets, colIndices, xLL, yLL, N_team, - team_size, vector_length, handle) + Functor_TestBatchedTeamVectorCG_2( + valuesLL, rowOffsets, colIndices, xLL, yLL, N_team, team_size, vector_length, handle) .run(); } if (i_impl == 2 && layout_left) { t_spmv += - Functor_TestBatchedTeamVectorCG_3( - valuesLL, rowOffsets, colIndices, xLL, yLL, N_team, - team_size, vector_length, handle) + Functor_TestBatchedTeamVectorCG_3( + valuesLL, rowOffsets, colIndices, xLL, yLL, N_team, team_size, vector_length, handle) .run(); } if (i_impl == 0 && layout_right) { t_spmv += - Functor_TestBatchedTeamVectorCG_1( - valuesLR, rowOffsets, colIndices, xLR, yLR, N_team, - team_size, vector_length, handle) + Functor_TestBatchedTeamVectorCG_1( + valuesLR, rowOffsets, colIndices, xLR, yLR, N_team, team_size, vector_length, handle) .run(); } if (i_impl == 1 && layout_right) { t_spmv += - Functor_TestBatchedTeamVectorCG_2( - valuesLR, rowOffsets, colIndices, xLR, yLR, N_team, - team_size, vector_length, handle) + Functor_TestBatchedTeamVectorCG_2( + valuesLR, rowOffsets, colIndices, xLR, yLR, N_team, team_size, vector_length, handle) .run(); } if (i_impl == 2 && layout_right) { t_spmv += - Functor_TestBatchedTeamVectorCG_3( - valuesLR, rowOffsets, colIndices, xLR, yLR, N_team, - team_size, vector_length, handle) + Functor_TestBatchedTeamVectorCG_3( + valuesLR, rowOffsets, colIndices, xLR, yLR, N_team, team_size, vector_length, handle) .run(); } exec_space().fence(); @@ -310,10 +272,8 @@ int main(int argc, char *argv[]) { { std::ofstream myfile; std::string name; - if (layout_left) - name = name_timer + "_" + std::to_string(i_impl) + "_left.txt"; - if (layout_right) - name = name_timer + "_" + std::to_string(i_impl) + "_right.txt"; + if (layout_left) name = name_timer + "_" + std::to_string(i_impl) + "_left.txt"; + if (layout_right) name = name_timer + "_" + std::to_string(i_impl) + "_right.txt"; myfile.open(name); @@ -326,15 +286,10 @@ int main(int argc, char *argv[]) { double average_time = 0.; - for (size_t i = 0; i < timers.size(); ++i) - average_time += timers[i] / timers.size(); + for (size_t i = 0; i < timers.size(); ++i) average_time += timers[i] / timers.size(); - if (layout_left) - printf("Left layout: Implementation %d: solve time = %f\n", i_impl, - average_time); - if (layout_right) - printf("Right layout: Implementation %d: solve time = %f\n", i_impl, - average_time); + if (layout_left) printf("Left layout: Implementation %d: solve time = %f\n", i_impl, average_time); + if (layout_right) printf("Right layout: Implementation %d: solve time = %f\n", i_impl, average_time); if (layout_left) { writeArrayToMM(name_X + std::to_string(i_impl) + "_l.mm", xLL); @@ -343,8 +298,7 @@ int main(int argc, char *argv[]) { writeArrayToMM(name_X + std::to_string(i_impl) + "_r.mm", xLR); } if (monitor_convergence) { - writeArrayToMM(name_conv + std::to_string(i_impl) + ".mm", - handle.residual_norms); + writeArrayToMM(name_conv + std::to_string(i_impl) + ".mm", handle.residual_norms); } } } diff --git a/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/Functor_TestBatchedTeamVectorGMRES_1.hpp b/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/Functor_TestBatchedTeamVectorGMRES_1.hpp index 0640ac815150..068960bbb603 100644 --- a/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/Functor_TestBatchedTeamVectorGMRES_1.hpp +++ b/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/Functor_TestBatchedTeamVectorGMRES_1.hpp @@ -14,8 +14,8 @@ // //@HEADER -template +template struct Functor_TestBatchedTeamVectorGMRES_1 { const ValuesViewType _D; const ValuesViewType _diag; @@ -32,12 +32,11 @@ struct Functor_TestBatchedTeamVectorGMRES_1 { KrylovHandleType _handle; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorGMRES_1( - const ValuesViewType &D, const IntView &r, const IntView &c, - const VectorViewType &X, const VectorViewType &B, const int N_team, - const int team_size, const int vector_length, const int N_iteration, - const double tol, const int ortho_strategy, const int arnoldi_level, - const int other_level, KrylovHandleType &handle) + Functor_TestBatchedTeamVectorGMRES_1(const ValuesViewType &D, const IntView &r, const IntView &c, + const VectorViewType &X, const VectorViewType &B, const int N_team, + const int team_size, const int vector_length, const int N_iteration, + const double tol, const int ortho_strategy, const int arnoldi_level, + const int other_level, KrylovHandleType &handle) : _D(D), _r(r), _c(c), @@ -54,12 +53,11 @@ struct Functor_TestBatchedTeamVectorGMRES_1 { _handle(handle) {} KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorGMRES_1( - const ValuesViewType &D, const ValuesViewType &diag, const IntView &r, - const IntView &c, const VectorViewType &X, const VectorViewType &B, - const int N_team, const int team_size, const int vector_length, - const int N_iteration, const double tol, int ortho_strategy, - const int arnoldi_level, const int other_level, KrylovHandleType &handle) + Functor_TestBatchedTeamVectorGMRES_1(const ValuesViewType &D, const ValuesViewType &diag, const IntView &r, + const IntView &c, const VectorViewType &X, const VectorViewType &B, + const int N_team, const int team_size, const int vector_length, + const int N_iteration, const double tol, int ortho_strategy, + const int arnoldi_level, const int other_level, KrylovHandleType &handle) : _D(D), _diag(diag), _r(r), @@ -81,31 +79,25 @@ struct Functor_TestBatchedTeamVectorGMRES_1 { const int first_matrix = _handle.first_index(member.league_rank()); const int last_matrix = _handle.last_index(member.league_rank()); - auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); using Operator = KokkosBatched::CrsMatrix; Operator A(d, _r, _c); if (UsePrec) { - auto diag = Kokkos::subview( - _diag, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto diag = Kokkos::subview(_diag, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); using PrecOperator = KokkosBatched::JacobiPrec; PrecOperator P(diag); P.setComputedInverse(); - KokkosBatched::TeamVectorGMRES::template invoke< - Operator, VectorViewType, PrecOperator, KrylovHandleType>( - member, A, b, x, P, _handle); + KokkosBatched::TeamVectorGMRES::template invoke(member, A, b, x, P, _handle); } else { - KokkosBatched::TeamVectorGMRES::template invoke< - Operator, VectorViewType>(member, A, b, x, _handle); + KokkosBatched::TeamVectorGMRES::template invoke(member, A, b, x, _handle); } } @@ -118,13 +110,11 @@ struct Functor_TestBatchedTeamVectorGMRES_1 { _handle.set_memory_strategy(1); - _handle.tmp_view = typename KrylovHandleType::TemporaryViewType( - "", _X.extent(0), _X.extent(1) + maximum_iteration + 3); + _handle.tmp_view = + typename KrylovHandleType::TemporaryViewType("", _X.extent(0), _X.extent(1) + maximum_iteration + 3); - Kokkos::TeamPolicy auto_policy(_handle.get_number_of_teams(), - Kokkos::AUTO(), Kokkos::AUTO()); - Kokkos::TeamPolicy tuned_policy(_handle.get_number_of_teams(), - _team_size, _vector_length); + Kokkos::TeamPolicy auto_policy(_handle.get_number_of_teams(), Kokkos::AUTO(), Kokkos::AUTO()); + Kokkos::TeamPolicy tuned_policy(_handle.get_number_of_teams(), _team_size, _vector_length); Kokkos::TeamPolicy policy; if (_team_size < 1) diff --git a/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/Functor_TestBatchedTeamVectorGMRES_2.hpp b/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/Functor_TestBatchedTeamVectorGMRES_2.hpp index 3970b7e94afd..22e735c30493 100644 --- a/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/Functor_TestBatchedTeamVectorGMRES_2.hpp +++ b/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/Functor_TestBatchedTeamVectorGMRES_2.hpp @@ -14,8 +14,8 @@ // //@HEADER -template +template struct Functor_TestBatchedTeamVectorGMRES_2 { const ValuesViewType _D; const ValuesViewType _diag; @@ -32,12 +32,11 @@ struct Functor_TestBatchedTeamVectorGMRES_2 { KrylovHandleType _handle; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorGMRES_2( - const ValuesViewType &D, const IntView &r, const IntView &c, - const VectorViewType &X, const VectorViewType &B, const int N_team, - const int team_size, const int vector_length, const int N_iteration, - const double tol, const int ortho_strategy, const int arnoldi_level, - const int other_level, KrylovHandleType &handle) + Functor_TestBatchedTeamVectorGMRES_2(const ValuesViewType &D, const IntView &r, const IntView &c, + const VectorViewType &X, const VectorViewType &B, const int N_team, + const int team_size, const int vector_length, const int N_iteration, + const double tol, const int ortho_strategy, const int arnoldi_level, + const int other_level, KrylovHandleType &handle) : _D(D), _r(r), _c(c), @@ -54,12 +53,11 @@ struct Functor_TestBatchedTeamVectorGMRES_2 { _handle(handle) {} KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorGMRES_2( - const ValuesViewType &D, const ValuesViewType &diag, const IntView &r, - const IntView &c, const VectorViewType &X, const VectorViewType &B, - const int N_team, const int team_size, const int vector_length, - const int N_iteration, const double tol, int ortho_strategy, - const int arnoldi_level, const int other_level, KrylovHandleType &handle) + Functor_TestBatchedTeamVectorGMRES_2(const ValuesViewType &D, const ValuesViewType &diag, const IntView &r, + const IntView &c, const VectorViewType &X, const VectorViewType &B, + const int N_team, const int team_size, const int vector_length, + const int N_iteration, const double tol, int ortho_strategy, + const int arnoldi_level, const int other_level, KrylovHandleType &handle) : _D(D), _diag(diag), _r(r), @@ -81,60 +79,41 @@ struct Functor_TestBatchedTeamVectorGMRES_2 { const int first_matrix = _handle.first_index(member.league_rank()); const int last_matrix = _handle.last_index(member.league_rank()); - using TeamVectorCopy1D = - KokkosBatched::TeamVectorCopy; - - auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - - using ScratchPadIntViewType = - Kokkos::View; - using ScratchPadValuesViewType = Kokkos::View< - typename ValuesViewType::non_const_value_type **, - typename ValuesViewType::array_layout, - typename ValuesViewType::execution_space::scratch_memory_space>; - using Operator = - KokkosBatched::CrsMatrix; - - ScratchPadIntViewType tmp_1D_int(member.team_scratch(0), - _r.extent(0) + _c.extent(0)); - - auto r = - Kokkos::subview(tmp_1D_int, Kokkos::make_pair(0, (int)_r.extent(0))); - auto c = Kokkos::subview( - tmp_1D_int, - Kokkos::make_pair((int)_r.extent(0), (int)tmp_1D_int.extent(0))); + using TeamVectorCopy1D = KokkosBatched::TeamVectorCopy; + + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + + using ScratchPadIntViewType = Kokkos::View; + using ScratchPadValuesViewType = + Kokkos::View; + using Operator = KokkosBatched::CrsMatrix; + + ScratchPadIntViewType tmp_1D_int(member.team_scratch(0), _r.extent(0) + _c.extent(0)); + + auto r = Kokkos::subview(tmp_1D_int, Kokkos::make_pair(0, (int)_r.extent(0))); + auto c = Kokkos::subview(tmp_1D_int, Kokkos::make_pair((int)_r.extent(0), (int)tmp_1D_int.extent(0))); TeamVectorCopy1D::invoke(member, _r, r); TeamVectorCopy1D::invoke(member, _c, c); Operator A(d, r, c); if (UsePrec) { - ScratchPadValuesViewType diag( - member.team_scratch(0), last_matrix - first_matrix, _diag.extent(1)); + ScratchPadValuesViewType diag(member.team_scratch(0), last_matrix - first_matrix, _diag.extent(1)); using PrecOperator = KokkosBatched::JacobiPrec; KokkosBatched::TeamVectorCopy::invoke( - member, - Kokkos::subview(_diag, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL), - diag); + member, Kokkos::subview(_diag, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL), diag); PrecOperator P(diag); P.setComputedInverse(); - KokkosBatched::TeamVectorGMRES::template invoke< - Operator, VectorViewType, PrecOperator, KrylovHandleType>( - member, A, b, x, P, _handle); + KokkosBatched::TeamVectorGMRES::template invoke(member, A, b, x, P, _handle); } else { - KokkosBatched::TeamVectorGMRES::template invoke< - Operator, VectorViewType>(member, A, b, x, _handle); + KokkosBatched::TeamVectorGMRES::template invoke(member, A, b, x, _handle); } } @@ -143,10 +122,8 @@ struct Functor_TestBatchedTeamVectorGMRES_2 { Kokkos::Timer timer; Kokkos::Profiling::pushRegion(name.c_str()); - Kokkos::TeamPolicy auto_policy(_handle.get_number_of_teams(), - Kokkos::AUTO(), Kokkos::AUTO()); - Kokkos::TeamPolicy tuned_policy(_handle.get_number_of_teams(), - _team_size, _vector_length); + Kokkos::TeamPolicy auto_policy(_handle.get_number_of_teams(), Kokkos::AUTO(), Kokkos::AUTO()); + Kokkos::TeamPolicy tuned_policy(_handle.get_number_of_teams(), _team_size, _vector_length); Kokkos::TeamPolicy policy; if (_team_size < 1) @@ -158,8 +135,8 @@ struct Functor_TestBatchedTeamVectorGMRES_2 { _handle.set_memory_strategy(1); - _handle.tmp_view = typename KrylovHandleType::TemporaryViewType( - "", _X.extent(0), _X.extent(1) + maximum_iteration + 3); + _handle.tmp_view = + typename KrylovHandleType::TemporaryViewType("", _X.extent(0), _X.extent(1) + maximum_iteration + 3); using ScalarType = typename ValuesViewType::non_const_value_type; using Layout = typename ValuesViewType::array_layout; diff --git a/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/Functor_TestBatchedTeamVectorGMRES_3.hpp b/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/Functor_TestBatchedTeamVectorGMRES_3.hpp index 013984b3d195..7c7d9103b289 100644 --- a/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/Functor_TestBatchedTeamVectorGMRES_3.hpp +++ b/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/Functor_TestBatchedTeamVectorGMRES_3.hpp @@ -14,8 +14,8 @@ // //@HEADER -template +template struct Functor_TestBatchedTeamVectorGMRES_3 { const ValuesViewType _D; const ValuesViewType _diag; @@ -32,12 +32,11 @@ struct Functor_TestBatchedTeamVectorGMRES_3 { KrylovHandleType _handle; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorGMRES_3( - const ValuesViewType &D, const IntView &r, const IntView &c, - const VectorViewType &X, const VectorViewType &B, const int N_team, - const int team_size, const int vector_length, const int N_iteration, - const double tol, const int ortho_strategy, const int arnoldi_level, - const int other_level, KrylovHandleType &handle) + Functor_TestBatchedTeamVectorGMRES_3(const ValuesViewType &D, const IntView &r, const IntView &c, + const VectorViewType &X, const VectorViewType &B, const int N_team, + const int team_size, const int vector_length, const int N_iteration, + const double tol, const int ortho_strategy, const int arnoldi_level, + const int other_level, KrylovHandleType &handle) : _D(D), _r(r), _c(c), @@ -54,12 +53,11 @@ struct Functor_TestBatchedTeamVectorGMRES_3 { _handle(handle) {} KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorGMRES_3( - const ValuesViewType &D, const ValuesViewType &diag, const IntView &r, - const IntView &c, const VectorViewType &X, const VectorViewType &B, - const int N_team, const int team_size, const int vector_length, - const int N_iteration, const double tol, int ortho_strategy, - const int arnoldi_level, const int other_level, KrylovHandleType &handle) + Functor_TestBatchedTeamVectorGMRES_3(const ValuesViewType &D, const ValuesViewType &diag, const IntView &r, + const IntView &c, const VectorViewType &X, const VectorViewType &B, + const int N_team, const int team_size, const int vector_length, + const int N_iteration, const double tol, int ortho_strategy, + const int arnoldi_level, const int other_level, KrylovHandleType &handle) : _D(D), _diag(diag), _r(r), @@ -81,60 +79,41 @@ struct Functor_TestBatchedTeamVectorGMRES_3 { const int first_matrix = _handle.first_index(member.league_rank()); const int last_matrix = _handle.last_index(member.league_rank()); - using TeamVectorCopy1D = - KokkosBatched::TeamVectorCopy; - - auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL); - - using ScratchPadIntViewType = - Kokkos::View; - using ScratchPadValuesViewType = Kokkos::View< - typename ValuesViewType::non_const_value_type **, - typename ValuesViewType::array_layout, - typename ValuesViewType::execution_space::scratch_memory_space>; - using Operator = - KokkosBatched::CrsMatrix; - - ScratchPadIntViewType tmp_1D_int(member.team_scratch(0), - _r.extent(0) + _c.extent(0)); - - auto r = - Kokkos::subview(tmp_1D_int, Kokkos::make_pair(0, (int)_r.extent(0))); - auto c = Kokkos::subview( - tmp_1D_int, - Kokkos::make_pair((int)_r.extent(0), (int)tmp_1D_int.extent(0))); + using TeamVectorCopy1D = KokkosBatched::TeamVectorCopy; + + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + + using ScratchPadIntViewType = Kokkos::View; + using ScratchPadValuesViewType = + Kokkos::View; + using Operator = KokkosBatched::CrsMatrix; + + ScratchPadIntViewType tmp_1D_int(member.team_scratch(0), _r.extent(0) + _c.extent(0)); + + auto r = Kokkos::subview(tmp_1D_int, Kokkos::make_pair(0, (int)_r.extent(0))); + auto c = Kokkos::subview(tmp_1D_int, Kokkos::make_pair((int)_r.extent(0), (int)tmp_1D_int.extent(0))); TeamVectorCopy1D::invoke(member, _r, r); TeamVectorCopy1D::invoke(member, _c, c); Operator A(d, r, c); if (UsePrec) { - ScratchPadValuesViewType diag( - member.team_scratch(0), last_matrix - first_matrix, _diag.extent(1)); + ScratchPadValuesViewType diag(member.team_scratch(0), last_matrix - first_matrix, _diag.extent(1)); using PrecOperator = KokkosBatched::JacobiPrec; KokkosBatched::TeamVectorCopy::invoke( - member, - Kokkos::subview(_diag, Kokkos::make_pair(first_matrix, last_matrix), - Kokkos::ALL), - diag); + member, Kokkos::subview(_diag, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL), diag); PrecOperator P(diag); P.setComputedInverse(); - KokkosBatched::TeamVectorGMRES::template invoke< - Operator, VectorViewType, PrecOperator, KrylovHandleType>( - member, A, b, x, P, _handle); + KokkosBatched::TeamVectorGMRES::template invoke(member, A, b, x, P, _handle); } else { - KokkosBatched::TeamVectorGMRES::template invoke< - Operator, VectorViewType>(member, A, b, x, _handle); + KokkosBatched::TeamVectorGMRES::template invoke(member, A, b, x, _handle); } } @@ -143,10 +122,8 @@ struct Functor_TestBatchedTeamVectorGMRES_3 { Kokkos::Timer timer; Kokkos::Profiling::pushRegion(name.c_str()); - Kokkos::TeamPolicy auto_policy(_handle.get_number_of_teams(), - Kokkos::AUTO(), Kokkos::AUTO()); - Kokkos::TeamPolicy tuned_policy(_handle.get_number_of_teams(), - _team_size, _vector_length); + Kokkos::TeamPolicy auto_policy(_handle.get_number_of_teams(), Kokkos::AUTO(), Kokkos::AUTO()); + Kokkos::TeamPolicy tuned_policy(_handle.get_number_of_teams(), _team_size, _vector_length); Kokkos::TeamPolicy policy; if (_team_size < 1) @@ -168,14 +145,13 @@ struct Functor_TestBatchedTeamVectorGMRES_3 { size_t bytes_row_ptr = IntView::shmem_size(_r.extent(0)); size_t bytes_col_idc = IntView::shmem_size(_c.extent(0)); size_t bytes_2D_1 = ViewType2D::shmem_size(_N_team, _X.extent(1)); - size_t bytes_2D_2 = ViewType2D::shmem_size(_N_team, maximum_iteration + 1); + size_t bytes_2D_2 = ViewType2D::shmem_size(_N_team, maximum_iteration + 1); size_t bytes_int = bytes_row_ptr + bytes_col_idc; size_t bytes_diag = bytes_2D_1; size_t bytes_tmp = 2 * bytes_2D_1 + 2 * bytes_1D + bytes_2D_2; - policy.set_scratch_size( - 0, Kokkos::PerTeam(bytes_tmp + bytes_diag + bytes_int)); + policy.set_scratch_size(0, Kokkos::PerTeam(bytes_tmp + bytes_diag + bytes_int)); exec_space().fence(); timer.reset(); diff --git a/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/KokkosBatched_Test_GMRES.cpp b/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/KokkosBatched_Test_GMRES.cpp index c0ce8f0bd472..f69ccadd7e27 100644 --- a/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/KokkosBatched_Test_GMRES.cpp +++ b/packages/kokkos-kernels/perf_test/batched/sparse/GMRES/KokkosBatched_Test_GMRES.cpp @@ -82,60 +82,50 @@ int main(int argc, char *argv[]) { for (int i = 1; i < argc; ++i) { const std::string &token = argv[i]; if (token == std::string("--help") || token == std::string("-h")) { - std::cout - << "Kokkos Batched GMRES performance test options:" << std::endl - << "-A : Filename of the input batched matrix." - << std::endl - << "-B : Filename of the input batched right-hand " - "side." - << std::endl - << "-X : Filename of the output batched solution." - << std::endl - << "-res : Filename of the output residual history." - << std::endl - << "-timers : Filename of the output timers." - << std::endl - << "-ortho_strategy : Select the orthogonalization strategy." - << std::endl - << "-arnoldi_level : Select the scratch pad level (if used) " - "where Arnoldi vectors are stored." - << std::endl - << "-other_level : Select the scratch pad level (if used) " - "where everything except the Arnoldi vectors are stored." - << std::endl - << "-n1 : Number of repetitions of the experience." - << std::endl - << "-n2 : Number of the kernel calls inside one " - "experience." - << std::endl - << "-team_size : Used team size." << std::endl - << "-n_implementations: Number of implementations to use: test " - "all " - "implementations [0, specified number -1]." - << std::endl - << "-implementation : Specify only one implementation at a time." - << std::endl - << " Note: implementation 0 : does not use " - "scratch pad." - << std::endl - << " Note: implementation 1 : use scratch pad " - "for the graph and for the diagonal entries of the matrices." - << std::endl - << " Note: implementation 2 : use scratch pad " - "for the graph and for the diagonal entries of the matrices and " - "for the temporary variable but not for the Arnoldi vectors." - << std::endl - << "-l : Specify left layout." << std::endl - << "-r : Specify right layout." << std::endl - << "-P : Specify if a Jacobi preconditioner is " - "used." - << std::endl - << "-C : Specify if the convergence is monitored." - << std::endl - << "-N_team : Specify the number of systems per team." - << std::endl - << "-vector_length : Specify the vector length." << std::endl - << std::endl; + std::cout << "Kokkos Batched GMRES performance test options:" << std::endl + << "-A : Filename of the input batched matrix." << std::endl + << "-B : Filename of the input batched right-hand " + "side." + << std::endl + << "-X : Filename of the output batched solution." << std::endl + << "-res : Filename of the output residual history." << std::endl + << "-timers : Filename of the output timers." << std::endl + << "-ortho_strategy : Select the orthogonalization strategy." << std::endl + << "-arnoldi_level : Select the scratch pad level (if used) " + "where Arnoldi vectors are stored." + << std::endl + << "-other_level : Select the scratch pad level (if used) " + "where everything except the Arnoldi vectors are stored." + << std::endl + << "-n1 : Number of repetitions of the experience." << std::endl + << "-n2 : Number of the kernel calls inside one " + "experience." + << std::endl + << "-team_size : Used team size." << std::endl + << "-n_implementations: Number of implementations to use: test " + "all " + "implementations [0, specified number -1]." + << std::endl + << "-implementation : Specify only one implementation at a time." << std::endl + << " Note: implementation 0 : does not use " + "scratch pad." + << std::endl + << " Note: implementation 1 : use scratch pad " + "for the graph and for the diagonal entries of the matrices." + << std::endl + << " Note: implementation 2 : use scratch pad " + "for the graph and for the diagonal entries of the matrices and " + "for the temporary variable but not for the Arnoldi vectors." + << std::endl + << "-l : Specify left layout." << std::endl + << "-r : Specify right layout." << std::endl + << "-P : Specify if a Jacobi preconditioner is " + "used." + << std::endl + << "-C : Specify if the convergence is monitored." << std::endl + << "-N_team : Specify the number of systems per team." << std::endl + << "-vector_length : Specify the vector length." << std::endl + << std::endl; return 0; } if (token == std::string("-A")) name_A = argv[++i]; @@ -143,26 +133,18 @@ int main(int argc, char *argv[]) { if (token == std::string("-X")) name_X = argv[++i]; if (token == std::string("-res")) name_conv = argv[++i]; if (token == std::string("-timers")) name_timer = argv[++i]; - if (token == std::string("-ortho_strategy")) - ortho_strategy = std::atoi(argv[++i]); - if (token == std::string("-arnoldi_level")) - arnoldi_level = std::atoi(argv[++i]); - if (token == std::string("-other_level")) - other_level = std::atoi(argv[++i]); + if (token == std::string("-ortho_strategy")) ortho_strategy = std::atoi(argv[++i]); + if (token == std::string("-arnoldi_level")) arnoldi_level = std::atoi(argv[++i]); + if (token == std::string("-other_level")) other_level = std::atoi(argv[++i]); if (token == std::string("-n1")) n_rep_1 = std::atoi(argv[++i]); if (token == std::string("-n2")) n_rep_2 = std::atoi(argv[++i]); - if (token == std::string("-n_iterations")) - n_iterations = std::atoi(argv[++i]); + if (token == std::string("-n_iterations")) n_iterations = std::atoi(argv[++i]); if (token == std::string("-tol")) tol = std::stod(argv[++i]); if (token == std::string("-team_size")) team_size = std::atoi(argv[++i]); - if (token == std::string("-N_team")) - N_team_potential = std::atoi(argv[++i]); - if (token == std::string("-vector_length")) - vector_length = std::atoi(argv[++i]); - if (token == std::string("-n_implementations")) - n_impl = std::atoi(argv[++i]); - if (token == std::string("-implementation")) - impls.push_back(std::atoi(argv[++i])); + if (token == std::string("-N_team")) N_team_potential = std::atoi(argv[++i]); + if (token == std::string("-vector_length")) vector_length = std::atoi(argv[++i]); + if (token == std::string("-n_implementations")) n_impl = std::atoi(argv[++i]); + if (token == std::string("-implementation")) impls.push_back(std::atoi(argv[++i])); if (token == std::string("-l")) { layout_left = true; layout_right = false; @@ -179,9 +161,8 @@ int main(int argc, char *argv[]) { readSizesFromMM(name_A, Blk, ncols, nnz, N); - std::cout << "N_team_potential = " << N_team_potential << ", n = " << Blk - << ", N = " << N << ", team_size = " << team_size - << ", vector_length = " << vector_length << std::endl; + std::cout << "N_team_potential = " << N_team_potential << ", n = " << Blk << ", N = " << N + << ", team_size = " << team_size << ", vector_length = " << vector_length << std::endl; if (impls.size() == 0) for (int i = 0; i < n_impl; ++i) impls.push_back(i); @@ -190,9 +171,7 @@ int main(int argc, char *argv[]) { constexpr size_t LLC_CAPACITY = 80 * 6 * 1024 * 1024; KokkosBatched::Flush flush; - printf( - " :::: GMRES Testing (N = %d, Blk = %d, nnz = %d, vl = %d, n = %d)\n", - N, Blk, nnz, vector_length, n_rep_1); + printf(" :::: GMRES Testing (N = %d, Blk = %d, nnz = %d, vl = %d, n = %d)\n", N, Blk, nnz, vector_length, n_rep_1); typedef Kokkos::LayoutRight LR; typedef Kokkos::LayoutLeft LL; @@ -221,22 +200,18 @@ int main(int argc, char *argv[]) { XYTypeLL xLL("values", N, Blk); XYTypeLL yLL("values", N, Blk); - if (layout_left) - printf(" :::: Testing left layout (team_size = %d)\n", team_size); - if (layout_right) - printf(" :::: Testing right layout (team_size = %d)\n", team_size); + if (layout_left) printf(" :::: Testing left layout (team_size = %d)\n", team_size); + if (layout_right) printf(" :::: Testing right layout (team_size = %d)\n", team_size); if (layout_left) { readCRSFromMM(name_A, valuesLL, rowOffsets, colIndices); readArrayFromMM(name_B, yLL); - if (use_preconditioner) - getInvDiagFromCRS(valuesLL, rowOffsets, colIndices, diagLL); + if (use_preconditioner) getInvDiagFromCRS(valuesLL, rowOffsets, colIndices, diagLL); } if (layout_right) { readCRSFromMM(name_A, valuesLR, rowOffsets, colIndices); readArrayFromMM(name_B, yLR); - if (use_preconditioner) - getInvDiagFromCRS(valuesLR, rowOffsets, colIndices, diagLR); + if (use_preconditioner) getInvDiagFromCRS(valuesLR, rowOffsets, colIndices, diagLR); } for (auto i_impl : impls) { @@ -256,12 +231,9 @@ int main(int argc, char *argv[]) { using Scalar3DViewType = Kokkos::View; using IntViewType = Kokkos::View; - using KrylovHandleType = - KokkosBatched::KrylovHandle; + using KrylovHandleType = KokkosBatched::KrylovHandle; KrylovHandleType handle(N, N_team, n_iterations, true); - handle.Arnoldi_view = - Scalar3DViewType("", N, n_iterations, Blk + n_iterations + 3); + handle.Arnoldi_view = Scalar3DViewType("", N, n_iterations, Blk + n_iterations + 3); // handle.tmp_view = typename KrylovHandleType::TemporaryViewType( // "", N, Blk + n_iterations + 3); @@ -285,110 +257,86 @@ int main(int argc, char *argv[]) { if (i_impl == 0 && layout_left) { if (use_preconditioner) - t_spmv += Functor_TestBatchedTeamVectorGMRES_1< - exec_space, AMatrixValueViewLL, IntView, XYTypeLL, - KrylovHandleType, true>( - valuesLL, diagLL, rowOffsets, colIndices, xLL, yLL, - N_team, team_size, vector_length, n_iterations, tol, - ortho_strategy, arnoldi_level, other_level, handle) + t_spmv += Functor_TestBatchedTeamVectorGMRES_1( + valuesLL, diagLL, rowOffsets, colIndices, xLL, yLL, N_team, team_size, vector_length, + n_iterations, tol, ortho_strategy, arnoldi_level, other_level, handle) .run(); else - t_spmv += Functor_TestBatchedTeamVectorGMRES_1< - exec_space, AMatrixValueViewLL, IntView, XYTypeLL, - KrylovHandleType, false>( - valuesLL, rowOffsets, colIndices, xLL, yLL, N_team, - team_size, vector_length, n_iterations, tol, - ortho_strategy, arnoldi_level, other_level, handle) + t_spmv += Functor_TestBatchedTeamVectorGMRES_1( + valuesLL, rowOffsets, colIndices, xLL, yLL, N_team, team_size, vector_length, n_iterations, + tol, ortho_strategy, arnoldi_level, other_level, handle) .run(); } if (i_impl == 1 && layout_left) { if (use_preconditioner) - t_spmv += Functor_TestBatchedTeamVectorGMRES_2< - exec_space, AMatrixValueViewLL, IntView, XYTypeLL, - KrylovHandleType, true>( - valuesLL, diagLL, rowOffsets, colIndices, xLL, yLL, - N_team, team_size, vector_length, n_iterations, tol, - ortho_strategy, arnoldi_level, other_level, handle) + t_spmv += Functor_TestBatchedTeamVectorGMRES_2( + valuesLL, diagLL, rowOffsets, colIndices, xLL, yLL, N_team, team_size, vector_length, + n_iterations, tol, ortho_strategy, arnoldi_level, other_level, handle) .run(); else - t_spmv += Functor_TestBatchedTeamVectorGMRES_2< - exec_space, AMatrixValueViewLL, IntView, XYTypeLL, - KrylovHandleType, false>( - valuesLL, rowOffsets, colIndices, xLL, yLL, N_team, - team_size, vector_length, n_iterations, tol, - ortho_strategy, arnoldi_level, other_level, handle) + t_spmv += Functor_TestBatchedTeamVectorGMRES_2( + valuesLL, rowOffsets, colIndices, xLL, yLL, N_team, team_size, vector_length, n_iterations, + tol, ortho_strategy, arnoldi_level, other_level, handle) .run(); } if (i_impl == 2 && layout_left) { if (use_preconditioner) - t_spmv += Functor_TestBatchedTeamVectorGMRES_3< - exec_space, AMatrixValueViewLL, IntView, XYTypeLL, - KrylovHandleType, true>( - valuesLL, diagLL, rowOffsets, colIndices, xLL, yLL, - N_team, team_size, vector_length, n_iterations, tol, - ortho_strategy, arnoldi_level, other_level, handle) + t_spmv += Functor_TestBatchedTeamVectorGMRES_3( + valuesLL, diagLL, rowOffsets, colIndices, xLL, yLL, N_team, team_size, vector_length, + n_iterations, tol, ortho_strategy, arnoldi_level, other_level, handle) .run(); else - t_spmv += Functor_TestBatchedTeamVectorGMRES_3< - exec_space, AMatrixValueViewLL, IntView, XYTypeLL, - KrylovHandleType, false>( - valuesLL, rowOffsets, colIndices, xLL, yLL, N_team, - team_size, vector_length, n_iterations, tol, - ortho_strategy, arnoldi_level, other_level, handle) + t_spmv += Functor_TestBatchedTeamVectorGMRES_3( + valuesLL, rowOffsets, colIndices, xLL, yLL, N_team, team_size, vector_length, n_iterations, + tol, ortho_strategy, arnoldi_level, other_level, handle) .run(); } if (i_impl == 0 && layout_right) { if (use_preconditioner) - t_spmv += Functor_TestBatchedTeamVectorGMRES_1< - exec_space, AMatrixValueViewLR, IntView, XYTypeLR, - KrylovHandleType, true>( - valuesLR, diagLR, rowOffsets, colIndices, xLR, yLR, - N_team, team_size, vector_length, n_iterations, tol, - ortho_strategy, arnoldi_level, other_level, handle) + t_spmv += Functor_TestBatchedTeamVectorGMRES_1( + valuesLR, diagLR, rowOffsets, colIndices, xLR, yLR, N_team, team_size, vector_length, + n_iterations, tol, ortho_strategy, arnoldi_level, other_level, handle) .run(); else - t_spmv += Functor_TestBatchedTeamVectorGMRES_1< - exec_space, AMatrixValueViewLR, IntView, XYTypeLR, - KrylovHandleType, false>( - valuesLR, rowOffsets, colIndices, xLR, yLR, N_team, - team_size, vector_length, n_iterations, tol, - ortho_strategy, arnoldi_level, other_level, handle) + t_spmv += Functor_TestBatchedTeamVectorGMRES_1( + valuesLR, rowOffsets, colIndices, xLR, yLR, N_team, team_size, vector_length, n_iterations, + tol, ortho_strategy, arnoldi_level, other_level, handle) .run(); } if (i_impl == 1 && layout_right) { if (use_preconditioner) - t_spmv += Functor_TestBatchedTeamVectorGMRES_2< - exec_space, AMatrixValueViewLR, IntView, XYTypeLR, - KrylovHandleType, true>( - valuesLR, diagLR, rowOffsets, colIndices, xLR, yLR, - N_team, team_size, vector_length, n_iterations, tol, - ortho_strategy, arnoldi_level, other_level, handle) + t_spmv += Functor_TestBatchedTeamVectorGMRES_2( + valuesLR, diagLR, rowOffsets, colIndices, xLR, yLR, N_team, team_size, vector_length, + n_iterations, tol, ortho_strategy, arnoldi_level, other_level, handle) .run(); else - t_spmv += Functor_TestBatchedTeamVectorGMRES_2< - exec_space, AMatrixValueViewLR, IntView, XYTypeLR, - KrylovHandleType, false>( - valuesLR, rowOffsets, colIndices, xLR, yLR, N_team, - team_size, vector_length, n_iterations, tol, - ortho_strategy, arnoldi_level, other_level, handle) + t_spmv += Functor_TestBatchedTeamVectorGMRES_2( + valuesLR, rowOffsets, colIndices, xLR, yLR, N_team, team_size, vector_length, n_iterations, + tol, ortho_strategy, arnoldi_level, other_level, handle) .run(); } if (i_impl == 2 && layout_right) { if (use_preconditioner) - t_spmv += Functor_TestBatchedTeamVectorGMRES_3< - exec_space, AMatrixValueViewLR, IntView, XYTypeLR, - KrylovHandleType, true>( - valuesLR, diagLR, rowOffsets, colIndices, xLR, yLR, - N_team, team_size, vector_length, n_iterations, tol, - ortho_strategy, arnoldi_level, other_level, handle) + t_spmv += Functor_TestBatchedTeamVectorGMRES_3( + valuesLR, diagLR, rowOffsets, colIndices, xLR, yLR, N_team, team_size, vector_length, + n_iterations, tol, ortho_strategy, arnoldi_level, other_level, handle) .run(); else - t_spmv += Functor_TestBatchedTeamVectorGMRES_3< - exec_space, AMatrixValueViewLR, IntView, XYTypeLR, - KrylovHandleType, false>( - valuesLR, rowOffsets, colIndices, xLR, yLR, N_team, - team_size, vector_length, n_iterations, tol, - ortho_strategy, arnoldi_level, other_level, handle) + t_spmv += Functor_TestBatchedTeamVectorGMRES_3( + valuesLR, rowOffsets, colIndices, xLR, yLR, N_team, team_size, vector_length, n_iterations, + tol, ortho_strategy, arnoldi_level, other_level, handle) .run(); } exec_space().fence(); @@ -403,10 +351,8 @@ int main(int argc, char *argv[]) { { std::ofstream myfile; std::string name; - if (layout_left) - name = name_timer + "_" + std::to_string(i_impl) + "_left.txt"; - if (layout_right) - name = name_timer + "_" + std::to_string(i_impl) + "_right.txt"; + if (layout_left) name = name_timer + "_" + std::to_string(i_impl) + "_left.txt"; + if (layout_right) name = name_timer + "_" + std::to_string(i_impl) + "_right.txt"; myfile.open(name); @@ -419,15 +365,10 @@ int main(int argc, char *argv[]) { double average_time = 0.; - for (size_t i = 0; i < timers.size(); ++i) - average_time += timers[i] / timers.size(); + for (size_t i = 0; i < timers.size(); ++i) average_time += timers[i] / timers.size(); - if (layout_left) - printf("Left layout: Implementation %d: solve time = %f\n", i_impl, - average_time); - if (layout_right) - printf("Right layout: Implementation %d: solve time = %f\n", i_impl, - average_time); + if (layout_left) printf("Left layout: Implementation %d: solve time = %f\n", i_impl, average_time); + if (layout_right) printf("Right layout: Implementation %d: solve time = %f\n", i_impl, average_time); if (layout_left) { writeArrayToMM(name_X + std::to_string(i_impl) + "_l.mm", xLL); @@ -436,8 +377,7 @@ int main(int argc, char *argv[]) { writeArrayToMM(name_X + std::to_string(i_impl) + "_r.mm", xLR); } if (monitor_convergence) { - writeArrayToMM(name_conv + std::to_string(i_impl) + ".mm", - handle.residual_norms); + writeArrayToMM(name_conv + std::to_string(i_impl) + ".mm", handle.residual_norms); } } } diff --git a/packages/kokkos-kernels/perf_test/batched/sparse/KokkosBatched_Test_Sparse_Helper.hpp b/packages/kokkos-kernels/perf_test/batched/sparse/KokkosBatched_Test_Sparse_Helper.hpp index 1eaacbde5e6a..53f1c48f6cf5 100644 --- a/packages/kokkos-kernels/perf_test/batched/sparse/KokkosBatched_Test_Sparse_Helper.hpp +++ b/packages/kokkos-kernels/perf_test/batched/sparse/KokkosBatched_Test_Sparse_Helper.hpp @@ -36,11 +36,9 @@ void writeArrayToMM(std::string name, const XType x) { myfile.close(); } -void readSizesFromMM(std::string name, int &nrows, int &ncols, int &nnz, - int &N) { +void readSizesFromMM(std::string name, int &nrows, int &ncols, int &nnz, int &N) { std::ifstream input(name); - while (input.peek() == '%') - input.ignore(std::numeric_limits::max(), '\n'); + while (input.peek() == '%') input.ignore(std::numeric_limits::max(), '\n'); std::string line_sizes; @@ -67,8 +65,7 @@ template void readArrayFromMM(std::string name, const XType &x) { std::ifstream input(name); - while (input.peek() == '%') - input.ignore(std::numeric_limits::max(), '\n'); + while (input.peek() == '%') input.ignore(std::numeric_limits::max(), '\n'); input.ignore(std::numeric_limits::max(), '\n'); typename XType::HostMirror x_h = Kokkos::create_mirror_view(x); @@ -85,8 +82,7 @@ template void readDenseFromMM(std::string name, const AType &A) { std::ifstream input(name); - while (input.peek() == '%') - input.ignore(std::numeric_limits::max(), '\n'); + while (input.peek() == '%') input.ignore(std::numeric_limits::max(), '\n'); input.ignore(std::numeric_limits::max(), '\n'); typename AType::HostMirror A_h = Kokkos::create_mirror_view(A); @@ -113,12 +109,10 @@ void readDenseFromMM(std::string name, const AType &A) { } template -void readCRSFromMM(std::string name, const VType &V, const IntType &r, - const IntType &c) { +void readCRSFromMM(std::string name, const VType &V, const IntType &r, const IntType &c) { std::ifstream input(name); - while (input.peek() == '%') - input.ignore(std::numeric_limits::max(), '\n'); + while (input.peek() == '%') input.ignore(std::numeric_limits::max(), '\n'); input.ignore(std::numeric_limits::max(), '\n'); typename VType::HostMirror V_h = Kokkos::create_mirror_view(V); @@ -137,8 +131,7 @@ void readCRSFromMM(std::string name, const VType &V, const IntType &r, input >> read_row >> c_h(i); --read_row; --c_h(i); - for (int tmp_row = current_row + 1; tmp_row <= read_row; ++tmp_row) - r_h(tmp_row) = i; + for (int tmp_row = current_row + 1; tmp_row <= read_row; ++tmp_row) r_h(tmp_row) = i; current_row = read_row; // if (VType::rank == 1) @@ -157,8 +150,7 @@ void readCRSFromMM(std::string name, const VType &V, const IntType &r, } template -void getInvDiagFromCRS(const VType &V, const IntType &r, const IntType &c, - const VType &diag) { +void getInvDiagFromCRS(const VType &V, const IntType &r, const IntType &c, const VType &diag) { auto diag_values_host = Kokkos::create_mirror_view(diag); auto values_host = Kokkos::create_mirror_view(V); auto row_ptr_host = Kokkos::create_mirror_view(r); @@ -173,12 +165,10 @@ void getInvDiagFromCRS(const VType &V, const IntType &r, const IntType &c, int BlkSize = diag.extent(1); for (int i = 0; i < BlkSize; ++i) { - for (current_index = row_ptr_host(i); current_index < row_ptr_host(i + 1); - ++current_index) { + for (current_index = row_ptr_host(i); current_index < row_ptr_host(i + 1); ++current_index) { if (colIndices_host(current_index) == i) break; } - for (int j = 0; j < N; ++j) - diag_values_host(j, i) = 1. / values_host(j, current_index); + for (int j = 0; j < N; ++j) diag_values_host(j, i) = 1. / values_host(j, current_index); } Kokkos::deep_copy(diag, diag_values_host); diff --git a/packages/kokkos-kernels/perf_test/batched/sparse/SPMV/KokkosBatched_SPMV_View.hpp b/packages/kokkos-kernels/perf_test/batched/sparse/SPMV/KokkosBatched_SPMV_View.hpp index 17b8ad6d3ea0..c1cdec2778ca 100644 --- a/packages/kokkos-kernels/perf_test/batched/sparse/SPMV/KokkosBatched_SPMV_View.hpp +++ b/packages/kokkos-kernels/perf_test/batched/sparse/SPMV/KokkosBatched_SPMV_View.hpp @@ -14,8 +14,7 @@ // //@HEADER -template +template struct BSPMV_Functor_View { typedef typename AMatrix::execution_space exec_space; typedef typename AMatrix::non_const_value_type value_type; @@ -36,11 +35,9 @@ struct BSPMV_Functor_View { const int N; int implementation; - BSPMV_Functor_View(const value_type* alpha_, const AMatrix m_A_values_, - const IntView m_A_row_ptr_, const IntView m_A_col_indices_, - const XVector m_x_, const value_type* beta_, - const YVector m_y_, const int matrices_per_team_, - const int N_, const int implementation_ = 0) + BSPMV_Functor_View(const value_type* alpha_, const AMatrix m_A_values_, const IntView m_A_row_ptr_, + const IntView m_A_col_indices_, const XVector m_x_, const value_type* beta_, const YVector m_y_, + const int matrices_per_team_, const int N_, const int implementation_ = 0) : alpha(alpha_), m_A_values(m_A_values_), m_A_row_ptr(m_A_row_ptr_), @@ -51,23 +48,16 @@ struct BSPMV_Functor_View { matrices_per_team(matrices_per_team_), N(N_), implementation(implementation_) { - static_assert(static_cast(AMatrix::rank) == 2, - "AMatrix must be a rank 2 View."); - static_assert(static_cast(IntView::rank) == 1, - "IntView must be a rank 1 View."); - static_assert(static_cast(XVector::rank) == 2, - "XVector must be a rank 2 View."); - static_assert(static_cast(YVector::rank) == 2, - "YVector must be a rank 2 View."); + static_assert(static_cast(AMatrix::rank) == 2, "AMatrix must be a rank 2 View."); + static_assert(static_cast(IntView::rank) == 1, "IntView must be a rank 1 View."); + static_assert(static_cast(XVector::rank) == 2, "XVector must be a rank 2 View."); + static_assert(static_cast(YVector::rank) == 2, "YVector must be a rank 2 View."); } - KOKKOS_INLINE_FUNCTION void getIndices(const ordinal_type iTemp, - const ordinal_type n_rows, - const ordinal_type n_matrices, - ordinal_type& iRow, + KOKKOS_INLINE_FUNCTION void getIndices(const ordinal_type iTemp, const ordinal_type n_rows, + const ordinal_type n_matrices, ordinal_type& iRow, ordinal_type& iMatrix) const { - if (std::is_same::value) { + if (std::is_same::value) { iRow = iTemp / n_matrices; iMatrix = iTemp % n_matrices; } else { @@ -78,90 +68,72 @@ struct BSPMV_Functor_View { KOKKOS_INLINE_FUNCTION void operator()(const team_member& dev) const { if (implementation == 0) { - const int first_matrix = - static_cast(dev.league_rank()) * matrices_per_team; - const int last_matrix = - static_cast(dev.league_rank() + 1) * matrices_per_team < N - ? static_cast(dev.league_rank() + 1) * matrices_per_team - : N; + const int first_matrix = static_cast(dev.league_rank()) * matrices_per_team; + const int last_matrix = static_cast(dev.league_rank() + 1) * matrices_per_team < N + ? static_cast(dev.league_rank() + 1) * matrices_per_team + : N; const ordinal_type n_rows = m_A_row_ptr.extent(0) - 1; for (int i_matrix = first_matrix; i_matrix < last_matrix; ++i_matrix) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(dev, 0, n_rows), - [&](const ordinal_type& iRow) { - const ordinal_type row_length = - m_A_row_ptr(iRow + 1) - m_A_row_ptr(iRow); - value_type sum = 0; - - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(dev, row_length), - [&](const ordinal_type& iEntry, value_type& lsum) { - const value_type val = - m_A_values(i_matrix, m_A_row_ptr(iRow) + iEntry); - lsum += - val * m_x(i_matrix, - m_A_col_indices(m_A_row_ptr(iRow) + iEntry)); - }, - sum); - - Kokkos::single(Kokkos::PerThread(dev), [&]() { - sum *= alpha[i_matrix]; - - if (dobeta == 0) { - m_y(i_matrix, iRow) = sum; - } else { - m_y(i_matrix, iRow) = - beta[i_matrix] * m_y(i_matrix, iRow) + sum; - } - }); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(dev, 0, n_rows), [&](const ordinal_type& iRow) { + const ordinal_type row_length = m_A_row_ptr(iRow + 1) - m_A_row_ptr(iRow); + value_type sum = 0; + + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(dev, row_length), + [&](const ordinal_type& iEntry, value_type& lsum) { + const value_type val = m_A_values(i_matrix, m_A_row_ptr(iRow) + iEntry); + lsum += val * m_x(i_matrix, m_A_col_indices(m_A_row_ptr(iRow) + iEntry)); + }, + sum); + + Kokkos::single(Kokkos::PerThread(dev), [&]() { + sum *= alpha[i_matrix]; + + if (dobeta == 0) { + m_y(i_matrix, iRow) = sum; + } else { + m_y(i_matrix, iRow) = beta[i_matrix] * m_y(i_matrix, iRow) + sum; + } + }); + }); } } if (implementation == 1) { - const int first_matrix = - static_cast(dev.league_rank()) * matrices_per_team; - const int last_matrix = - static_cast(dev.league_rank() + 1) * matrices_per_team < N - ? static_cast(dev.league_rank() + 1) * matrices_per_team - : N; + const int first_matrix = static_cast(dev.league_rank()) * matrices_per_team; + const int last_matrix = static_cast(dev.league_rank() + 1) * matrices_per_team < N + ? static_cast(dev.league_rank() + 1) * matrices_per_team + : N; const int n_matrices = last_matrix - first_matrix; const ordinal_type n_rows = m_A_row_ptr.extent(0) - 1; - Kokkos::parallel_for( - Kokkos::TeamVectorRange(dev, 0, n_rows * n_matrices), - [&](const ordinal_type& iTemp) { - ordinal_type iRow, iMatrix; - this->getIndices(iTemp, n_rows, n_matrices, iRow, iMatrix); - const int iGlobalMatrix = first_matrix + iMatrix; + Kokkos::parallel_for(Kokkos::TeamVectorRange(dev, 0, n_rows * n_matrices), [&](const ordinal_type& iTemp) { + ordinal_type iRow, iMatrix; + this->getIndices(iTemp, n_rows, n_matrices, iRow, iMatrix); + const int iGlobalMatrix = first_matrix + iMatrix; - const ordinal_type row_length = - m_A_row_ptr(iRow + 1) - m_A_row_ptr(iRow); - value_type sum = 0; + const ordinal_type row_length = m_A_row_ptr(iRow + 1) - m_A_row_ptr(iRow); + value_type sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif - for (int iEntry = 0; iEntry < row_length; ++iEntry) { - sum += m_A_values(iGlobalMatrix, m_A_row_ptr(iRow) + iEntry) * - m_x(iGlobalMatrix, - m_A_col_indices(m_A_row_ptr(iRow) + iEntry)); - } - - sum *= alpha[iGlobalMatrix]; - - if (dobeta == 0) { - m_y(iGlobalMatrix, iRow) = sum; - } else { - m_y(iGlobalMatrix, iRow) = - beta[iGlobalMatrix] * m_y(iGlobalMatrix, iRow) + sum; - } - }); + for (int iEntry = 0; iEntry < row_length; ++iEntry) { + sum += m_A_values(iGlobalMatrix, m_A_row_ptr(iRow) + iEntry) * + m_x(iGlobalMatrix, m_A_col_indices(m_A_row_ptr(iRow) + iEntry)); + } + + sum *= alpha[iGlobalMatrix]; + + if (dobeta == 0) { + m_y(iGlobalMatrix, iRow) = sum; + } else { + m_y(iGlobalMatrix, iRow) = beta[iGlobalMatrix] * m_y(iGlobalMatrix, iRow) + sum; + } + }); } if (implementation == 2) { - using ScratchPadIntView = - Kokkos::View; + using ScratchPadIntView = Kokkos::View; const ordinal_type n_rows = m_A_row_ptr.extent(0) - 1; const ordinal_type nnz = m_A_col_indices.extent(0); @@ -169,51 +141,43 @@ struct BSPMV_Functor_View { ScratchPadIntView cols(dev.team_scratch(0), nnz); ScratchPadIntView row_map(dev.team_scratch(0), n_rows + 1); - Kokkos::parallel_for( - Kokkos::TeamVectorRange(dev, 0, n_rows + 1), - [&](const ordinal_type& i) { row_map(i) = m_A_row_ptr(i); }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(dev, 0, n_rows + 1), + [&](const ordinal_type& i) { row_map(i) = m_A_row_ptr(i); }); - Kokkos::parallel_for( - Kokkos::TeamVectorRange(dev, 0, nnz), - [&](const ordinal_type& i) { cols(i) = m_A_col_indices(i); }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(dev, 0, nnz), + [&](const ordinal_type& i) { cols(i) = m_A_col_indices(i); }); dev.team_barrier(); - const int first_matrix = - static_cast(dev.league_rank()) * matrices_per_team; - const int last_matrix = - static_cast(dev.league_rank() + 1) * matrices_per_team < N - ? static_cast(dev.league_rank() + 1) * matrices_per_team - : N; - const int n_matrices = last_matrix - first_matrix; + const int first_matrix = static_cast(dev.league_rank()) * matrices_per_team; + const int last_matrix = static_cast(dev.league_rank() + 1) * matrices_per_team < N + ? static_cast(dev.league_rank() + 1) * matrices_per_team + : N; + const int n_matrices = last_matrix - first_matrix; - Kokkos::parallel_for( - Kokkos::TeamVectorRange(dev, 0, n_rows * n_matrices), - [&](const ordinal_type& iTemp) { - ordinal_type iRow, iMatrix; - this->getIndices(iTemp, n_rows, n_matrices, iRow, iMatrix); - const int iGlobalMatrix = first_matrix + iMatrix; + Kokkos::parallel_for(Kokkos::TeamVectorRange(dev, 0, n_rows * n_matrices), [&](const ordinal_type& iTemp) { + ordinal_type iRow, iMatrix; + this->getIndices(iTemp, n_rows, n_matrices, iRow, iMatrix); + const int iGlobalMatrix = first_matrix + iMatrix; - const ordinal_type row_length = row_map(iRow + 1) - row_map(iRow); - value_type sum = 0; + const ordinal_type row_length = row_map(iRow + 1) - row_map(iRow); + value_type sum = 0; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif - for (int iEntry = 0; iEntry < row_length; ++iEntry) { - sum += m_A_values(iGlobalMatrix, row_map(iRow) + iEntry) * - m_x(iGlobalMatrix, cols(row_map(iRow) + iEntry)); - } - - sum *= alpha[iGlobalMatrix]; - - if (dobeta == 0) { - m_y(iGlobalMatrix, iRow) = sum; - } else { - m_y(iGlobalMatrix, iRow) = - beta[iGlobalMatrix] * m_y(iGlobalMatrix, iRow) + sum; - } - }); + for (int iEntry = 0; iEntry < row_length; ++iEntry) { + sum += m_A_values(iGlobalMatrix, row_map(iRow) + iEntry) * m_x(iGlobalMatrix, cols(row_map(iRow) + iEntry)); + } + + sum *= alpha[iGlobalMatrix]; + + if (dobeta == 0) { + m_y(iGlobalMatrix, iRow) = sum; + } else { + m_y(iGlobalMatrix, iRow) = beta[iGlobalMatrix] * m_y(iGlobalMatrix, iRow) + sum; + } + }); } } }; \ No newline at end of file diff --git a/packages/kokkos-kernels/perf_test/batched/sparse/SPMV/KokkosBatched_Test_SPMV.cpp b/packages/kokkos-kernels/perf_test/batched/sparse/SPMV/KokkosBatched_Test_SPMV.cpp index 06ea55e3036b..e93c65f7f939 100644 --- a/packages/kokkos-kernels/perf_test/batched/sparse/SPMV/KokkosBatched_Test_SPMV.cpp +++ b/packages/kokkos-kernels/perf_test/batched/sparse/SPMV/KokkosBatched_Test_SPMV.cpp @@ -31,9 +31,8 @@ typedef typename exec_space::memory_space memory_space; typedef Kokkos::DefaultHostExecutionSpace host_space; typedef typename Kokkos::Device device; -template +template struct Functor_TestBatchedTeamVectorSpmv { PolicyType _policy; const alphaViewType _alpha; @@ -46,10 +45,9 @@ struct Functor_TestBatchedTeamVectorSpmv { int _matrices_per_team; KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamVectorSpmv( - PolicyType policy, const alphaViewType &alpha, const DViewType &D, - const IntView &r, const IntView &c, const xViewType &X, - const betaViewType &beta, const yViewType &Y, const int matrices_per_team) + Functor_TestBatchedTeamVectorSpmv(PolicyType policy, const alphaViewType &alpha, const DViewType &D, const IntView &r, + const IntView &c, const xViewType &X, const betaViewType &beta, const yViewType &Y, + const int matrices_per_team) : _policy(policy), _alpha(alpha), _D(D), @@ -62,28 +60,19 @@ struct Functor_TestBatchedTeamVectorSpmv { template KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { - const int first_matrix = - static_cast(member.league_rank()) * _matrices_per_team; - const int N = _D.extent(0); - const int last_matrix = - (static_cast(member.league_rank() + 1) * _matrices_per_team < N - ? static_cast(member.league_rank() + 1) * _matrices_per_team - : N); - - auto alpha_team = - Kokkos::subview(_alpha, Kokkos::make_pair(first_matrix, last_matrix)); - auto D_team = Kokkos::subview( - _D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - auto X_team = Kokkos::subview( - _X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - auto beta_team = - Kokkos::subview(_beta, Kokkos::make_pair(first_matrix, last_matrix)); - auto Y_team = Kokkos::subview( - _Y, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - - using ScratchPadIntView = - Kokkos::View; + const int first_matrix = static_cast(member.league_rank()) * _matrices_per_team; + const int N = _D.extent(0); + const int last_matrix = (static_cast(member.league_rank() + 1) * _matrices_per_team < N + ? static_cast(member.league_rank() + 1) * _matrices_per_team + : N); + + auto alpha_team = Kokkos::subview(_alpha, Kokkos::make_pair(first_matrix, last_matrix)); + auto D_team = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto X_team = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + auto beta_team = Kokkos::subview(_beta, Kokkos::make_pair(first_matrix, last_matrix)); + auto Y_team = Kokkos::subview(_Y, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); + + using ScratchPadIntView = Kokkos::View; const int n_rows = _r.extent(0) - 1; const int nnz = _c.extent(0); @@ -91,31 +80,23 @@ struct Functor_TestBatchedTeamVectorSpmv { ScratchPadIntView cols(member.team_scratch(0), nnz); ScratchPadIntView row_map(member.team_scratch(0), n_rows + 1); - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, n_rows + 1), - [&](const int &i) { row_map(i) = _r(i); }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, n_rows + 1), [&](const int &i) { row_map(i) = _r(i); }); - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, nnz), - [&](const int &i) { cols(i) = _c(i); }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, nnz), [&](const int &i) { cols(i) = _c(i); }); member.team_barrier(); if (last_matrix != N && _matrices_per_team == 8) - KokkosBatched::TeamVectorSpmv< - MemberType, KokkosBatched::Trans::NoTranspose, - 8>::template invoke( + KokkosBatched::TeamVectorSpmv::template invoke< + DViewType, ScratchPadIntView, xViewType, yViewType, alphaViewType, betaViewType, dobeta>( member, alpha_team, D_team, row_map, cols, X_team, beta_team, Y_team); else - KokkosBatched::TeamVectorSpmv< - MemberType, KokkosBatched::Trans::NoTranspose, - 1>::template invoke( + KokkosBatched::TeamVectorSpmv::template invoke< + DViewType, ScratchPadIntView, xViewType, yViewType, alphaViewType, betaViewType, dobeta>( member, alpha_team, D_team, row_map, cols, X_team, beta_team, Y_team); } - inline void run() { - Kokkos::parallel_for("KokkosSparse::PerfTest::BSpMV", _policy, *this); - } + inline void run() { Kokkos::parallel_for("KokkosSparse::PerfTest::BSpMV", _policy, *this); } }; int main(int argc, char *argv[]) { @@ -151,53 +132,46 @@ int main(int argc, char *argv[]) { for (int i = 1; i < argc; ++i) { const std::string &token = argv[i]; if (token == std::string("--help") || token == std::string("-h")) { - std::cout - << "Kokkos Batched SPMV performance test options:" << std::endl - << "-A : Filename of the input batched matrix." - << std::endl - << "-B : Filename of the input batched right-hand " - "side." - << std::endl - << "-X : Filename of the output batched solution." - << std::endl - << "-timers : Filename of the output timers." - << std::endl - << "-n1 : Number of repetitions of the experience." - << std::endl - << "-n2 : Number of the kernel calls inside one " - "experience." - << std::endl - << "-team_size : Used team size." << std::endl - << "-n_implementations: Number of implementations to use: test " - "all " - "implementations [0, specified number -1]." - << std::endl - << "-implementation : Specify only one implementation at a time." - << std::endl - << " Note: implementation 0 : use a Team " - "approach where a Team have to apply N_team SPMV. A given team " - "applies N_team SPMV sequentially and uses a ThreadRange over " - "the row and a VectorRange over the non zero entries of a given " - "row." - << std::endl - << " Note: implementation 1 : use a Team " - "approach where a Team have to apply N_team SPMV. A given team " - "uses a fused thread vector range policy to loop over the " - "independent fibers." - << std::endl - << " Note: implementation 2 : same as " - "implementation 1 but using scratch pad for the graph." - << std::endl - << " Note: implementation 3 : same as " - "implementation 1 but using the kernels from " - "batched/sparse/impl/*." - << std::endl - << "-l : Specify left layout." << std::endl - << "-r : Specify right layout." << std::endl - << "-N_team : Specify the number of systems per team." - << std::endl - << "-vector_length : Specify the vector length." << std::endl - << std::endl; + std::cout << "Kokkos Batched SPMV performance test options:" << std::endl + << "-A : Filename of the input batched matrix." << std::endl + << "-B : Filename of the input batched right-hand " + "side." + << std::endl + << "-X : Filename of the output batched solution." << std::endl + << "-timers : Filename of the output timers." << std::endl + << "-n1 : Number of repetitions of the experience." << std::endl + << "-n2 : Number of the kernel calls inside one " + "experience." + << std::endl + << "-team_size : Used team size." << std::endl + << "-n_implementations: Number of implementations to use: test " + "all " + "implementations [0, specified number -1]." + << std::endl + << "-implementation : Specify only one implementation at a time." << std::endl + << " Note: implementation 0 : use a Team " + "approach where a Team have to apply N_team SPMV. A given team " + "applies N_team SPMV sequentially and uses a ThreadRange over " + "the row and a VectorRange over the non zero entries of a given " + "row." + << std::endl + << " Note: implementation 1 : use a Team " + "approach where a Team have to apply N_team SPMV. A given team " + "uses a fused thread vector range policy to loop over the " + "independent fibers." + << std::endl + << " Note: implementation 2 : same as " + "implementation 1 but using scratch pad for the graph." + << std::endl + << " Note: implementation 3 : same as " + "implementation 1 but using the kernels from " + "batched/sparse/impl/*." + << std::endl + << "-l : Specify left layout." << std::endl + << "-r : Specify right layout." << std::endl + << "-N_team : Specify the number of systems per team." << std::endl + << "-vector_length : Specify the vector length." << std::endl + << std::endl; return 0; } if (token == std::string("-A")) name_A = argv[++i]; @@ -209,15 +183,11 @@ int main(int argc, char *argv[]) { if (token == std::string("-n1")) n_rep_1 = std::atoi(argv[++i]); if (token == std::string("-n2")) n_rep_2 = std::atoi(argv[++i]); - if (token == std::string("-vector_length")) - vector_length = std::atoi(argv[++i]); - if (token == std::string("-N_team")) - N_team_potential = std::atoi(argv[++i]); + if (token == std::string("-vector_length")) vector_length = std::atoi(argv[++i]); + if (token == std::string("-N_team")) N_team_potential = std::atoi(argv[++i]); if (token == std::string("-team_size")) team_size = std::atoi(argv[++i]); - if (token == std::string("-n_implementations")) - n_impl = std::atoi(argv[++i]); - if (token == std::string("-implementation")) - impls.push_back(std::atoi(argv[++i])); + if (token == std::string("-n_implementations")) n_impl = std::atoi(argv[++i]); + if (token == std::string("-implementation")) impls.push_back(std::atoi(argv[++i])); if (token == std::string("-l")) { layout_left = true; layout_right = false; @@ -244,8 +214,7 @@ int main(int argc, char *argv[]) { printf( " :::: Testing (N = %d, Blk = %d, nnz = %d, vl = %d, vi = %d, n = " "%d, N_team_potential = %d)\n", - N, Blk, nnz, vector_length, internal_vector_length, n_rep_1, - N_team_potential); + N, Blk, nnz, vector_length, internal_vector_length, n_rep_1, N_team_potential); typedef Kokkos::LayoutRight LR; typedef Kokkos::LayoutLeft LL; @@ -274,10 +243,8 @@ int main(int argc, char *argv[]) { double *s_a = new double[N]; double *s_b = new double[N]; - if (layout_left) - printf(" :::: Testing left layout (team_size = %d)\n", team_size); - if (layout_right) - printf(" :::: Testing right layout (team_size = %d)\n", team_size); + if (layout_left) printf(" :::: Testing left layout (team_size = %d)\n", team_size); + if (layout_right) printf(" :::: Testing right layout (team_size = %d)\n", team_size); if (layout_left) { readCRSFromMM(name_A, valuesLL, rowOffsets, colIndices); @@ -301,8 +268,7 @@ int main(int argc, char *argv[]) { Kokkos::deep_copy(alphaV, alphaV_h); Kokkos::deep_copy(betaV, betaV_h); - using ScratchPadIntView = - Kokkos::View; + using ScratchPadIntView = Kokkos::View; for (auto i_impl : impls) { std::vector timers; @@ -327,12 +293,9 @@ int main(int argc, char *argv[]) { if (layout_left) { using policy_type = Kokkos::TeamPolicy; - policy_type auto_policy(number_of_teams, Kokkos::AUTO(), - Kokkos::AUTO()); - policy_type tuned_policy(number_of_teams, team_size, - Kokkos::AUTO()); - policy_type tuned_policy_2(number_of_teams, team_size, - vector_length); + policy_type auto_policy(number_of_teams, Kokkos::AUTO(), Kokkos::AUTO()); + policy_type tuned_policy(number_of_teams, team_size, Kokkos::AUTO()); + policy_type tuned_policy_2(number_of_teams, team_size, vector_length); policy_type policy; if (team_size < 1) @@ -347,33 +310,24 @@ int main(int argc, char *argv[]) { size_t bytes_0 = ScratchPadIntView::shmem_size(Blk + 1); size_t bytes_1 = ScratchPadIntView::shmem_size(nnz); - if (i_impl > 1) - policy.set_scratch_size(0, Kokkos::PerTeam(bytes_0 + bytes_1)); + if (i_impl > 1) policy.set_scratch_size(0, Kokkos::PerTeam(bytes_0 + bytes_1)); // policy.set_scratch_size(1, Kokkos::PerTeam(bytes_1)); if (i_impl == 3) { - Functor_TestBatchedTeamVectorSpmv< - policy_type, AMatrixValueViewLL, IntView, XYTypeLL, XYTypeLL, - alphaViewType, alphaViewType, 0>(policy, alphaV, valuesLL, - rowOffsets, colIndices, xLL, - betaV, yLL, N_team) + Functor_TestBatchedTeamVectorSpmv(policy, alphaV, valuesLL, rowOffsets, + colIndices, xLL, betaV, yLL, N_team) .run(); } else { - Kokkos::parallel_for( - "KokkosSparse::PerfTest::BSpMV", policy, - BSPMV_Functor_View(s_a, valuesLL, rowOffsets, - colIndices, xLL, s_b, yLL, - N_team, N, i_impl)); + Kokkos::parallel_for("KokkosSparse::PerfTest::BSpMV", policy, + BSPMV_Functor_View( + s_a, valuesLL, rowOffsets, colIndices, xLL, s_b, yLL, N_team, N, i_impl)); } } if (layout_right) { using policy_type = Kokkos::TeamPolicy; - policy_type auto_policy(number_of_teams, Kokkos::AUTO(), - Kokkos::AUTO()); - policy_type tuned_policy(number_of_teams, team_size, - Kokkos::AUTO()); - policy_type tuned_policy_2(number_of_teams, team_size, - vector_length); + policy_type auto_policy(number_of_teams, Kokkos::AUTO(), Kokkos::AUTO()); + policy_type tuned_policy(number_of_teams, team_size, Kokkos::AUTO()); + policy_type tuned_policy_2(number_of_teams, team_size, vector_length); policy_type policy; if (team_size < 1) @@ -385,23 +339,17 @@ int main(int argc, char *argv[]) { size_t bytes_0 = ScratchPadIntView::shmem_size(Blk + 1); size_t bytes_1 = ScratchPadIntView::shmem_size(nnz); - if (i_impl > 1) - policy.set_scratch_size(0, Kokkos::PerTeam(bytes_0 + bytes_1)); + if (i_impl > 1) policy.set_scratch_size(0, Kokkos::PerTeam(bytes_0 + bytes_1)); // policy.set_scratch_size(1, Kokkos::PerTeam(bytes_1)); if (i_impl == 3) { - Functor_TestBatchedTeamVectorSpmv< - policy_type, AMatrixValueViewLR, IntView, XYTypeLR, XYTypeLR, - alphaViewType, alphaViewType, 0>(policy, alphaV, valuesLR, - rowOffsets, colIndices, xLR, - betaV, yLR, N_team) + Functor_TestBatchedTeamVectorSpmv(policy, alphaV, valuesLR, rowOffsets, + colIndices, xLR, betaV, yLR, N_team) .run(); } else { - Kokkos::parallel_for( - "KokkosSparse::PerfTest::BSpMV", policy, - BSPMV_Functor_View(s_a, valuesLR, rowOffsets, - colIndices, xLR, s_b, yLR, - N_team, N, i_impl)); + Kokkos::parallel_for("KokkosSparse::PerfTest::BSpMV", policy, + BSPMV_Functor_View( + s_a, valuesLR, rowOffsets, colIndices, xLR, s_b, yLR, N_team, N, i_impl)); } } exec_space().fence(); @@ -416,10 +364,8 @@ int main(int argc, char *argv[]) { { std::ofstream myfile; std::string name; - if (layout_left) - name = name_timer + "_" + std::to_string(i_impl) + "_left.txt"; - if (layout_right) - name = name_timer + "_" + std::to_string(i_impl) + "_right.txt"; + if (layout_left) name = name_timer + "_" + std::to_string(i_impl) + "_left.txt"; + if (layout_right) name = name_timer + "_" + std::to_string(i_impl) + "_right.txt"; myfile.open(name); @@ -432,8 +378,7 @@ int main(int argc, char *argv[]) { double average_time = 0.; - for (size_t i = 0; i < timers.size(); ++i) - average_time += timers[i] / timers.size(); + for (size_t i = 0; i < timers.size(); ++i) average_time += timers[i] / timers.size(); if (layout_left) printf( diff --git a/packages/kokkos-kernels/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverDn.cpp b/packages/kokkos-kernels/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverDn.cpp index 2294c23805ce..5e9bf13f8cee 100644 --- a/packages/kokkos-kernels/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverDn.cpp +++ b/packages/kokkos-kernels/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverDn.cpp @@ -71,9 +71,7 @@ struct Functor_Test_BatchedDenseCuSolve { const VectorViewType _B; KOKKOS_INLINE_FUNCTION - Functor_Test_BatchedDenseCuSolve(const MatrixViewType &A, - const VectorViewType &X, - const VectorViewType &B) + Functor_Test_BatchedDenseCuSolve(const MatrixViewType &A, const VectorViewType &X, const VectorViewType &B) : _A(A), _X(X), _B(B) {} inline double run() { @@ -100,10 +98,8 @@ struct Functor_Test_BatchedDenseCuSolve { double **d_Aarray = nullptr; double **d_Barray = nullptr; - cudaMalloc(reinterpret_cast(&d_Aarray), - sizeof(double *) * batchSize); - cudaMalloc(reinterpret_cast(&d_Barray), - sizeof(double *) * batchSize); + cudaMalloc(reinterpret_cast(&d_Aarray), sizeof(double *) * batchSize); + cudaMalloc(reinterpret_cast(&d_Barray), sizeof(double *) * batchSize); std::vector Aarray(batchSize, nullptr); std::vector Barray(batchSize, nullptr); @@ -112,34 +108,26 @@ struct Functor_Test_BatchedDenseCuSolve { Barray[i] = Kokkos::subview(_X, i, Kokkos::ALL).data(); } - cudaMemcpyAsync(d_Aarray, Aarray.data(), sizeof(double *) * batchSize, - cudaMemcpyHostToDevice); - cudaMemcpyAsync(d_Barray, Barray.data(), sizeof(double *) * batchSize, - cudaMemcpyHostToDevice); + cudaMemcpyAsync(d_Aarray, Aarray.data(), sizeof(double *) * batchSize, cudaMemcpyHostToDevice); + cudaMemcpyAsync(d_Barray, Barray.data(), sizeof(double *) * batchSize, cudaMemcpyHostToDevice); cudaDeviceSynchronize(); exec_space().fence(); timer.reset(); - auto status1 = cusolverDnDpotrfBatched(handle, uplo, m, d_Aarray, lda, - d_infoArray, batchSize); + auto status1 = cusolverDnDpotrfBatched(handle, uplo, m, d_Aarray, lda, d_infoArray, batchSize); if (status1 != CUSOLVER_STATUS_SUCCESS) - std::cout << "Error in cusolverDnDpotrfBatched with batchSize = " - << batchSize << " and m = " << m << std::endl; + std::cout << "Error in cusolverDnDpotrfBatched with batchSize = " << batchSize << " and m = " << m << std::endl; cudaDeviceSynchronize(); - auto status2 = cusolverDnDpotrsBatched(handle, uplo, m, 1, d_Aarray, lda, - d_Barray, ldb, info, batchSize); + auto status2 = cusolverDnDpotrsBatched(handle, uplo, m, 1, d_Aarray, lda, d_Barray, ldb, info, batchSize); if (status2 != CUSOLVER_STATUS_SUCCESS) { if (status2 == CUSOLVER_STATUS_NOT_INITIALIZED) - std::cout << "Error in cusolverDnDpotrsBatched with batchSize = " - << batchSize << " and m = " << m + std::cout << "Error in cusolverDnDpotrsBatched with batchSize = " << batchSize << " and m = " << m << " CUSOLVER_STATUS_NOT_INITIALIZED " << std::endl; if (status2 == CUSOLVER_STATUS_INVALID_VALUE) - std::cout << "Error in cusolverDnDpotrsBatched with batchSize = " - << batchSize << " and m = " << m + std::cout << "Error in cusolverDnDpotrsBatched with batchSize = " << batchSize << " and m = " << m << " CUSOLVER_STATUS_INVALID_VALUE " << std::endl; if (status2 == CUSOLVER_STATUS_INTERNAL_ERROR) - std::cout << "Error in cusolverDnDpotrsBatched with batchSize = " - << batchSize << " and m = " << m + std::cout << "Error in cusolverDnDpotrsBatched with batchSize = " << batchSize << " and m = " << m << " CUSOLVER_STATUS_INTERNAL_ERROR " << std::endl; cudaDeviceSynchronize(); exec_space().fence(); @@ -189,12 +177,9 @@ int main(int argc, char *argv[]) { if (token == std::string("-X")) name_X = argv[++i]; if (token == std::string("-timers")) name_timer = argv[++i]; if (token == std::string("-team_size")) team_size = std::atoi(argv[++i]); - if (token == std::string("-vector_length")) - vector_length = std::atoi(argv[++i]); - if (token == std::string("-n_implementations")) - n_impl = std::atoi(argv[++i]); - if (token == std::string("-implementation")) - impls.push_back(std::atoi(argv[++i])); + if (token == std::string("-vector_length")) vector_length = std::atoi(argv[++i]); + if (token == std::string("-n_implementations")) n_impl = std::atoi(argv[++i]); + if (token == std::string("-implementation")) impls.push_back(std::atoi(argv[++i])); if (token == std::string("-l")) { layout_left = true; layout_right = false; @@ -219,8 +204,7 @@ int main(int argc, char *argv[]) { constexpr size_t LLC_CAPACITY = 80 * 6 * 1024 * 1024; KokkosBatched::Flush flush; - printf(" :::: CusolverDn Testing (N = %d, Blk = %d, vl = %d, n = %d)\n", N, - Blk, vector_length, n_rep_1); + printf(" :::: CusolverDn Testing (N = %d, Blk = %d, vl = %d, n = %d)\n", N, Blk, vector_length, n_rep_1); typedef Kokkos::LayoutRight LR; typedef Kokkos::LayoutLeft LL; @@ -240,10 +224,8 @@ int main(int argc, char *argv[]) { XYTypeLL xLL("values", N, Blk); XYTypeLL yLL("values", N, Blk); - if (layout_left) - printf(" :::: Testing left layout (team_size = %d)\n", team_size); - if (layout_right) - printf(" :::: Testing right layout (team_size = %d)\n", team_size); + if (layout_left) printf(" :::: Testing left layout (team_size = %d)\n", team_size); + if (layout_right) printf(" :::: Testing right layout (team_size = %d)\n", team_size); if (layout_left) { readDenseFromMM(name_A, aLL); @@ -269,9 +251,7 @@ int main(int argc, char *argv[]) { if (i_impl == 0) { if (layout_right) { - t_spmv = Functor_Test_BatchedDenseCuSolve(aLR, xLR, yLR) - .run(); + t_spmv = Functor_Test_BatchedDenseCuSolve(aLR, xLR, yLR).run(); } } exec_space().fence(); @@ -285,10 +265,8 @@ int main(int argc, char *argv[]) { { std::ofstream myfile; std::string name; - if (layout_left) - name = name_timer + "_" + std::to_string(i_impl) + "_left.txt"; - if (layout_right) - name = name_timer + "_" + std::to_string(i_impl) + "_right.txt"; + if (layout_left) name = name_timer + "_" + std::to_string(i_impl) + "_left.txt"; + if (layout_right) name = name_timer + "_" + std::to_string(i_impl) + "_right.txt"; myfile.open(name); @@ -301,15 +279,10 @@ int main(int argc, char *argv[]) { double average_time = 0.; - for (size_t i = 0; i < timers.size(); ++i) - average_time += timers[i] / timers.size(); + for (size_t i = 0; i < timers.size(); ++i) average_time += timers[i] / timers.size(); - if (layout_left) - printf("Left layout: Implementation %d: solve time = %f\n", i_impl, - average_time); - if (layout_right) - printf("Right layout: Implementation %d: solve time = %f\n", i_impl, - average_time); + if (layout_left) printf("Left layout: Implementation %d: solve time = %f\n", i_impl, average_time); + if (layout_right) printf("Right layout: Implementation %d: solve time = %f\n", i_impl, average_time); if (layout_left) { writeArrayToMM(name_X + std::to_string(i_impl) + "_l.mm", xLL); diff --git a/packages/kokkos-kernels/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverSp.cpp b/packages/kokkos-kernels/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverSp.cpp index 808e235edce5..8b2b48c0f4f6 100644 --- a/packages/kokkos-kernels/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverSp.cpp +++ b/packages/kokkos-kernels/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverSp.cpp @@ -26,7 +26,7 @@ #include "Kokkos_Sort.hpp" // -//#define KOKKOSKERNELS_ENABLE_TPL_CUSPARSE +// #define KOKKOSKERNELS_ENABLE_TPL_CUSPARSE #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE @@ -66,8 +66,7 @@ typedef typename exec_space::memory_space memory_space; typedef Kokkos::DefaultHostExecutionSpace host_space; typedef typename Kokkos::Device device; -template +template struct Functor_Test_SparseCuSolveQR { const MatrixViewType _A; const IntView _r; @@ -76,8 +75,7 @@ struct Functor_Test_SparseCuSolveQR { const VectorViewType _B; KOKKOS_INLINE_FUNCTION - Functor_Test_SparseCuSolveQR(const MatrixViewType &A, const IntView &r, - const IntView &c, const VectorViewType &X, + Functor_Test_SparseCuSolveQR(const MatrixViewType &A, const IntView &r, const IntView &c, const VectorViewType &X, const VectorViewType &B) : _A(A), _r(r), _c(c), _X(X), _B(B) {} @@ -94,10 +92,8 @@ struct Functor_Test_SparseCuSolveQR { cusparseMatDescr_t descrA = 0; KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); double tol = 1e-18; int reorder = 0; @@ -110,10 +106,8 @@ struct Functor_Test_SparseCuSolveQR { auto b = Kokkos::subview(_B, i, Kokkos::ALL).data(); auto x = Kokkos::subview(_X, i, Kokkos::ALL).data(); - cusolverSpDcsrlsvqr(handle, m, nnz, descrA, csrValA, _r.data(), _c.data(), - b, tol, reorder, x, singularity); - if (singularity[0] != -1) - std::cout << " Error ! " << singularity[0] << " " << m << std::endl; + cusolverSpDcsrlsvqr(handle, m, nnz, descrA, csrValA, _r.data(), _c.data(), b, tol, reorder, x, singularity); + if (singularity[0] != -1) std::cout << " Error ! " << singularity[0] << " " << m << std::endl; } exec_space().fence(); @@ -124,8 +118,7 @@ struct Functor_Test_SparseCuSolveQR { } }; -template +template struct Functor_Test_Block_SparseCuSolveQR { const MatrixViewType _A; const IntView _r; @@ -134,9 +127,8 @@ struct Functor_Test_Block_SparseCuSolveQR { const VectorViewType _B; KOKKOS_INLINE_FUNCTION - Functor_Test_Block_SparseCuSolveQR(const MatrixViewType &A, const IntView &r, - const IntView &c, const VectorViewType &X, - const VectorViewType &B) + Functor_Test_Block_SparseCuSolveQR(const MatrixViewType &A, const IntView &r, const IntView &c, + const VectorViewType &X, const VectorViewType &B) : _A(A), _r(r), _c(c), _X(X), _B(B) {} inline double run() { @@ -155,10 +147,8 @@ struct Functor_Test_Block_SparseCuSolveQR { cusparseMatDescr_t descrA = 0; KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); double tol = 1e-18; int reorder = 0; @@ -180,15 +170,12 @@ struct Functor_Test_Block_SparseCuSolveQR { rowOffsets_host(0) = 0; for (size_t i = 0; i < N; ++i) { for (size_t row = 0; row < m; ++row) { - const size_t current_row_index = i * m + row; - const size_t row_length = _r_host(row + 1) - _r_host(row); - rowOffsets_host(current_row_index + 1) = - rowOffsets_host(current_row_index) + row_length; + const size_t current_row_index = i * m + row; + const size_t row_length = _r_host(row + 1) - _r_host(row); + rowOffsets_host(current_row_index + 1) = rowOffsets_host(current_row_index) + row_length; for (size_t nnz_row = 0; nnz_row < row_length; ++nnz_row) { - const size_t current_block_nnz_index = - rowOffsets_host(current_row_index) + nnz_row; - const size_t current_block_col_index = - _c_host(_r_host(row) + nnz_row) + i * m; + const size_t current_block_nnz_index = rowOffsets_host(current_row_index) + nnz_row; + const size_t current_block_col_index = _c_host(_r_host(row) + nnz_row) + i * m; colIndices_host(current_block_nnz_index) = current_block_col_index; } } @@ -204,12 +191,10 @@ struct Functor_Test_Block_SparseCuSolveQR { auto b = _B.data(); auto x = _X.data(); - cusolverSpDcsrlsvqr(handle, block_m, block_nnz, descrA, csrValA, - rowOffsets.data(), colIndices.data(), b, tol, reorder, - x, singularity); + cusolverSpDcsrlsvqr(handle, block_m, block_nnz, descrA, csrValA, rowOffsets.data(), colIndices.data(), b, tol, + reorder, x, singularity); - if (singularity[0] != -1) - std::cout << " Error ! " << singularity[0] << " " << m << std::endl; + if (singularity[0] != -1) std::cout << " Error ! " << singularity[0] << " " << m << std::endl; exec_space().fence(); double sec = timer.seconds(); @@ -219,8 +204,7 @@ struct Functor_Test_Block_SparseCuSolveQR { } }; -template +template struct Functor_Test_SparseCuSolveChol { const MatrixViewType _A; const IntView _r; @@ -229,8 +213,7 @@ struct Functor_Test_SparseCuSolveChol { const VectorViewType _B; KOKKOS_INLINE_FUNCTION - Functor_Test_SparseCuSolveChol(const MatrixViewType &A, const IntView &r, - const IntView &c, const VectorViewType &X, + Functor_Test_SparseCuSolveChol(const MatrixViewType &A, const IntView &r, const IntView &c, const VectorViewType &X, const VectorViewType &B) : _A(A), _r(r), _c(c), _X(X), _B(B) {} @@ -247,10 +230,8 @@ struct Functor_Test_SparseCuSolveChol { cusparseMatDescr_t descrA = 0; KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); double tol = 1e-18; int reorder = 0; @@ -263,10 +244,8 @@ struct Functor_Test_SparseCuSolveChol { auto b = Kokkos::subview(_B, i, Kokkos::ALL).data(); auto x = Kokkos::subview(_X, i, Kokkos::ALL).data(); - cusolverSpDcsrlsvchol(handle, m, nnz, descrA, csrValA, _r.data(), - _c.data(), b, tol, reorder, x, singularity); - if (singularity[0] != -1) - std::cout << " Error ! " << singularity[0] << " " << m << std::endl; + cusolverSpDcsrlsvchol(handle, m, nnz, descrA, csrValA, _r.data(), _c.data(), b, tol, reorder, x, singularity); + if (singularity[0] != -1) std::cout << " Error ! " << singularity[0] << " " << m << std::endl; } exec_space().fence(); @@ -277,8 +256,7 @@ struct Functor_Test_SparseCuSolveChol { } }; -template +template struct Functor_Test_Block_SparseCuSolveChol { const MatrixViewType _A; const IntView _r; @@ -287,10 +265,8 @@ struct Functor_Test_Block_SparseCuSolveChol { const VectorViewType _B; KOKKOS_INLINE_FUNCTION - Functor_Test_Block_SparseCuSolveChol(const MatrixViewType &A, - const IntView &r, const IntView &c, - const VectorViewType &X, - const VectorViewType &B) + Functor_Test_Block_SparseCuSolveChol(const MatrixViewType &A, const IntView &r, const IntView &c, + const VectorViewType &X, const VectorViewType &B) : _A(A), _r(r), _c(c), _X(X), _B(B) {} inline double run() { @@ -309,10 +285,8 @@ struct Functor_Test_Block_SparseCuSolveChol { cusparseMatDescr_t descrA = 0; KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); double tol = 1e-18; int reorder = 0; @@ -334,15 +308,12 @@ struct Functor_Test_Block_SparseCuSolveChol { rowOffsets_host(0) = 0; for (size_t i = 0; i < N; ++i) { for (size_t row = 0; row < m; ++row) { - const size_t current_row_index = i * m + row; - const size_t row_length = _r_host(row + 1) - _r_host(row); - rowOffsets_host(current_row_index + 1) = - rowOffsets_host(current_row_index) + row_length; + const size_t current_row_index = i * m + row; + const size_t row_length = _r_host(row + 1) - _r_host(row); + rowOffsets_host(current_row_index + 1) = rowOffsets_host(current_row_index) + row_length; for (size_t nnz_row = 0; nnz_row < row_length; ++nnz_row) { - const size_t current_block_nnz_index = - rowOffsets_host(current_row_index) + nnz_row; - const size_t current_block_col_index = - _c_host(_r_host(row) + nnz_row) + i * m; + const size_t current_block_nnz_index = rowOffsets_host(current_row_index) + nnz_row; + const size_t current_block_col_index = _c_host(_r_host(row) + nnz_row) + i * m; colIndices_host(current_block_nnz_index) = current_block_col_index; } } @@ -358,11 +329,9 @@ struct Functor_Test_Block_SparseCuSolveChol { auto b = _B.data(); auto x = _X.data(); - cusolverSpDcsrlsvchol(handle, block_m, block_nnz, descrA, csrValA, - rowOffsets.data(), colIndices.data(), b, tol, reorder, - x, singularity); - if (singularity[0] != -1) - std::cout << " Error ! " << singularity[0] << " " << m << std::endl; + cusolverSpDcsrlsvchol(handle, block_m, block_nnz, descrA, csrValA, rowOffsets.data(), colIndices.data(), b, tol, + reorder, x, singularity); + if (singularity[0] != -1) std::cout << " Error ! " << singularity[0] << " " << m << std::endl; exec_space().fence(); double sec = timer.seconds(); @@ -407,12 +376,9 @@ int main(int argc, char *argv[]) { if (token == std::string("-X")) name_X = argv[++i]; if (token == std::string("-timers")) name_timer = argv[++i]; if (token == std::string("-team_size")) team_size = std::atoi(argv[++i]); - if (token == std::string("-vector_length")) - vector_length = std::atoi(argv[++i]); - if (token == std::string("-n_implementations")) - n_impl = std::atoi(argv[++i]); - if (token == std::string("-implementation")) - impls.push_back(std::atoi(argv[++i])); + if (token == std::string("-vector_length")) vector_length = std::atoi(argv[++i]); + if (token == std::string("-n_implementations")) n_impl = std::atoi(argv[++i]); + if (token == std::string("-implementation")) impls.push_back(std::atoi(argv[++i])); if (token == std::string("-l")) { layout_left = true; layout_right = false; @@ -437,8 +403,7 @@ int main(int argc, char *argv[]) { constexpr size_t LLC_CAPACITY = 80 * 6 * 1024 * 1024; KokkosBatched::Flush flush; - printf(" :::: CusolverSp Testing (N = %d, Blk = %d, vl = %d, n = %d)\n", N, - Blk, vector_length, n_rep_1); + printf(" :::: CusolverSp Testing (N = %d, Blk = %d, vl = %d, n = %d)\n", N, Blk, vector_length, n_rep_1); typedef Kokkos::LayoutRight LR; typedef Kokkos::LayoutLeft LL; @@ -460,10 +425,8 @@ int main(int argc, char *argv[]) { XYTypeLL xLL("values", N, Blk); XYTypeLL yLL("values", N, Blk); - if (layout_left) - printf(" :::: Testing left layout (team_size = %d)\n", team_size); - if (layout_right) - printf(" :::: Testing right layout (team_size = %d)\n", team_size); + if (layout_left) printf(" :::: Testing left layout (team_size = %d)\n", team_size); + if (layout_right) printf(" :::: Testing right layout (team_size = %d)\n", team_size); if (layout_left) { readCRSFromMM(name_A, valuesLL, rowOffsets, colIndices); @@ -490,34 +453,28 @@ int main(int argc, char *argv[]) { if (i_impl == 0) { if (layout_right) { - t_spmv = Functor_Test_SparseCuSolveQR( - valuesLR, rowOffsets, colIndices, xLR, yLR) + t_spmv = Functor_Test_SparseCuSolveQR(valuesLR, rowOffsets, + colIndices, xLR, yLR) .run(); } } if (i_impl == 1) { if (layout_right) { - t_spmv = - Functor_Test_SparseCuSolveChol( - valuesLR, rowOffsets, colIndices, xLR, yLR) - .run(); + t_spmv = Functor_Test_SparseCuSolveChol( + valuesLR, rowOffsets, colIndices, xLR, yLR) + .run(); } } if (i_impl == 2) { if (layout_right) { - t_spmv = - Functor_Test_Block_SparseCuSolveQR( - valuesLR, rowOffsets, colIndices, xLR, yLR) - .run(); + t_spmv = Functor_Test_Block_SparseCuSolveQR( + valuesLR, rowOffsets, colIndices, xLR, yLR) + .run(); } } if (i_impl == 3) { if (layout_right) { - t_spmv = Functor_Test_Block_SparseCuSolveChol< - exec_space, AMatrixValueViewLR, IntView, XYTypeLR>( + t_spmv = Functor_Test_Block_SparseCuSolveChol( valuesLR, rowOffsets, colIndices, xLR, yLR) .run(); } @@ -533,10 +490,8 @@ int main(int argc, char *argv[]) { { std::ofstream myfile; std::string name; - if (layout_left) - name = name_timer + "_" + std::to_string(i_impl) + "_left.txt"; - if (layout_right) - name = name_timer + "_" + std::to_string(i_impl) + "_right.txt"; + if (layout_left) name = name_timer + "_" + std::to_string(i_impl) + "_left.txt"; + if (layout_right) name = name_timer + "_" + std::to_string(i_impl) + "_right.txt"; myfile.open(name); @@ -549,15 +504,10 @@ int main(int argc, char *argv[]) { double average_time = 0.; - for (size_t i = 0; i < timers.size(); ++i) - average_time += timers[i] / timers.size(); + for (size_t i = 0; i < timers.size(); ++i) average_time += timers[i] / timers.size(); - if (layout_left) - printf("Left layout: Implementation %d: solve time = %f\n", i_impl, - average_time); - if (layout_right) - printf("Right layout: Implementation %d: solve time = %f\n", i_impl, - average_time); + if (layout_left) printf("Left layout: Implementation %d: solve time = %f\n", i_impl, average_time); + if (layout_right) printf("Right layout: Implementation %d: solve time = %f\n", i_impl, average_time); if (layout_left) { writeArrayToMM(name_X + std::to_string(i_impl) + "_l.mm", xLL); diff --git a/packages/kokkos-kernels/perf_test/blas/KokkosBlas_blas1.cpp b/packages/kokkos-kernels/perf_test/blas/KokkosBlas_blas1.cpp index 52d2cd4b42f8..b9471dee3734 100644 --- a/packages/kokkos-kernels/perf_test/blas/KokkosBlas_blas1.cpp +++ b/packages/kokkos-kernels/perf_test/blas/KokkosBlas_blas1.cpp @@ -40,8 +40,7 @@ RCP

  • entries( entry , i2 , i3 , ... );
  • ///
  • entries( col_map[i0] + i1 , i2 , i3 , ... );
  • /// -template ::size_type> +template ::size_type> class StaticCcsGraph { private: using traits = ViewTraits; @@ -82,12 +80,9 @@ class StaticCcsGraph { using memory_traits = typename traits::memory_traits; using size_type = SizeType; - using col_map_type = - View; - using entries_type = - View; - using row_block_type = - View; + using col_map_type = View; + using entries_type = View; + using row_block_type = View; entries_type entries; col_map_type col_map; @@ -98,21 +93,17 @@ class StaticCcsGraph { //! Copy constructor (shallow copy). KOKKOS_INLINE_FUNCTION - StaticCcsGraph(const StaticCcsGraph& rhs) - : entries(rhs.entries), col_map(rhs.col_map) {} + StaticCcsGraph(const StaticCcsGraph& rhs) : entries(rhs.entries), col_map(rhs.col_map) {} template - KOKKOS_INLINE_FUNCTION StaticCcsGraph(const EntriesType& entries_, - const ColMapType& col_map_) + KOKKOS_INLINE_FUNCTION StaticCcsGraph(const EntriesType& entries_, const ColMapType& col_map_) : entries(entries_), col_map(col_map_) {} /** \brief Return number of columns in the graph */ KOKKOS_INLINE_FUNCTION size_type numCols() const { - return (col_map.extent(0) != 0) - ? col_map.extent(0) - static_cast(1) - : static_cast(0); + return (col_map.extent(0) != 0) ? col_map.extent(0) - static_cast(1) : static_cast(0); } }; } // namespace Kokkos @@ -127,14 +118,10 @@ namespace KokkosSparse { /// accesses data. The default parameter suffices for most users. /// /// "Ccs" stands for "compressed column sparse." -template ::size_type> +template ::size_type> class CcsMatrix { - static_assert( - std::is_signed::value, - "CcsMatrix requires that OrdinalType is a signed integer type."); + static_assert(std::is_signed::value, "CcsMatrix requires that OrdinalType is a signed integer type."); public: //! Type of the matrix's execution space. @@ -155,15 +142,12 @@ class CcsMatrix { //! Type of each (column) index in the matrix. typedef OrdinalType ordinal_type; //! Type of the graph structure of the sparse matrix - consistent with Kokkos. - typedef Kokkos::StaticCcsGraph + typedef Kokkos::StaticCcsGraph staticccsgraph_type; //! Type of the "column map" (which contains the offset for each column's //! data). typedef typename staticccsgraph_type::col_map_type col_map_type; - typedef Kokkos::View - values_type; + typedef Kokkos::View values_type; //! Type of column indices in the sparse matrix. typedef typename staticccsgraph_type::entries_type index_type; @@ -202,16 +186,12 @@ class CcsMatrix { /// each column). /// \param rows [in] The row indices. // clang-format on - CcsMatrix(const std::string& /* label */, const OrdinalType nrows, - const OrdinalType ncols, const size_type annz, - const values_type& vals, const col_map_type& colmap, - const index_type& rows) + CcsMatrix(const std::string& /* label */, const OrdinalType nrows, const OrdinalType ncols, const size_type annz, + const values_type& vals, const col_map_type& colmap, const index_type& rows) : graph(rows, colmap), values(vals), numRows_(nrows) { - const ordinal_type actualNumRows = - (colmap.extent(0) != 0) - ? static_cast(colmap.extent(0) - - static_cast(1)) - : static_cast(0); + const ordinal_type actualNumRows = (colmap.extent(0) != 0) + ? static_cast(colmap.extent(0) - static_cast(1)) + : static_cast(0); if (ncols != actualNumRows) { std::ostringstream os; os << "Input argument ncols = " << ncols @@ -222,16 +202,13 @@ class CcsMatrix { } if (annz != nnz()) { std::ostringstream os; - os << "Input argument annz = " << annz << " != this->nnz () = " << nnz() - << "."; + os << "Input argument annz = " << annz << " != this->nnz () = " << nnz() << "."; throw std::invalid_argument(os.str()); } } //! The number of rows in the sparse matrix. - KOKKOS_INLINE_FUNCTION ordinal_type numCols() const { - return graph.numCols(); - } + KOKKOS_INLINE_FUNCTION ordinal_type numCols() const { return graph.numCols(); } //! The number of columns in the sparse matrix. KOKKOS_INLINE_FUNCTION ordinal_type numRows() const { return numRows_; } @@ -245,9 +222,7 @@ class CcsMatrix { KOKKOS_INLINE_FUNCTION ordinal_type numPointCols() const { return numCols(); } //! The number of stored entries in the sparse matrix. - KOKKOS_INLINE_FUNCTION size_type nnz() const { - return graph.entries.extent(0); - } + KOKKOS_INLINE_FUNCTION size_type nnz() const { return graph.entries.extent(0); } }; /// \class is_ccs_matrix diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_CooMatrix.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_CooMatrix.hpp index 30a41ba11c3f..996b3c29aaf2 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_CooMatrix.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_CooMatrix.hpp @@ -40,10 +40,8 @@ namespace KokkosSparse { /// \tparam MemoryTraits Traits describing how Kokkos manages and /// accesses data. The default parameter suffices for most users. /// "Coo" stands for "coordinate format". -template ::size_type> +template ::size_type> class CooMatrix { public: //! Type of each value in the matrix @@ -73,21 +71,16 @@ class CooMatrix { //! Type of all integral class members using size_type = SizeType; - static_assert(std::is_integral_v, - "OrdinalType must be an integral."); + static_assert(std::is_integral_v, "OrdinalType must be an integral."); //! The type of the row index view in the matrix - using row_view = - Kokkos::View; + using row_view = Kokkos::View; //! The type of the column index view in the matrix - using column_view = Kokkos::View; + using column_view = Kokkos::View; //! The type of the scalar values view in the matrix - using scalar_view = Kokkos::View; + using scalar_view = Kokkos::View; //! The type of a constant CooMatrix - using const_type = CooMatrix; + using const_type = CooMatrix; private: size_type m_num_rows, m_num_cols; @@ -113,15 +106,9 @@ class CooMatrix { /// \param col_in [in] The column indexes. /// \param data_in [in] The values. // clang-format on - CooMatrix(size_type nrows, size_type ncols, row_view row_in, - column_view col_in, scalar_view data_in) - : m_num_rows(nrows), - m_num_cols(ncols), - m_row(row_in), - m_col(col_in), - m_data(data_in) { - if (m_data.extent(0) != m_row.extent(0) || - m_row.extent(0) != m_col.extent(0)) { + CooMatrix(size_type nrows, size_type ncols, row_view row_in, column_view col_in, scalar_view data_in) + : m_num_rows(nrows), m_num_cols(ncols), m_row(row_in), m_col(col_in), m_data(data_in) { + if (m_data.extent(0) != m_row.extent(0) || m_row.extent(0) != m_col.extent(0)) { std::ostringstream os; os << "data.extent(0): " << m_data.extent(0) << " != " << "row.extent(0): " << m_row.extent(0) << " != " diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_CrsMatrix.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_CrsMatrix.hpp index ce9ec99e4e5b..86586401cd78 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_CrsMatrix.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_CrsMatrix.hpp @@ -78,8 +78,7 @@ struct DeviceConfig { struct Dim3 { size_t x, y, z; KOKKOS_INLINE_FUNCTION - Dim3(const size_t x_, const size_t y_ = 1, const size_t z_ = 1) - : x(x_), y(y_), z(z_) {} + Dim3(const size_t x_, const size_t y_ = 1, const size_t z_ = 1) : x(x_), y(y_), z(z_) {} }; Dim3 block_dim; @@ -87,12 +86,9 @@ struct DeviceConfig { size_t num_threads_per_block; KOKKOS_INLINE_FUNCTION - DeviceConfig(const size_t num_blocks_ = 0, - const size_t threads_per_block_x_ = 0, - const size_t threads_per_block_y_ = 0, - const size_t threads_per_block_z_ = 1) - : block_dim(threads_per_block_x_, threads_per_block_y_, - threads_per_block_z_), + DeviceConfig(const size_t num_blocks_ = 0, const size_t threads_per_block_x_ = 0, + const size_t threads_per_block_y_ = 0, const size_t threads_per_block_z_ = 1) + : block_dim(threads_per_block_x_, threads_per_block_y_, threads_per_block_z_), num_blocks(num_blocks_), num_threads_per_block(block_dim.x * block_dim.y * block_dim.z) {} }; @@ -165,8 +161,8 @@ struct SparseRowView { /// each of the above arrays. /// \param count [in] Number of entries in the row. KOKKOS_INLINE_FUNCTION - SparseRowView(value_type* const values, ordinal_type* const colidx__, - const ordinal_type& stride, const ordinal_type& count) + SparseRowView(value_type* const values, ordinal_type* const colidx__, const ordinal_type& stride, + const ordinal_type& count) : values_(values), colidx_(colidx__), stride_(stride), length(count) {} /// \brief Constructor with offset into \c colidx array @@ -183,16 +179,10 @@ struct SparseRowView { /// \param idx [in] Start offset into \c colidx array template KOKKOS_INLINE_FUNCTION SparseRowView( - const typename MatrixType::values_type& values, - const typename MatrixType::index_type& colidx__, - const ordinal_type& stride, const ordinal_type& count, - const OffsetType& idx, - const typename std::enable_if::value, - int>::type& = 0) - : values_(&values(idx)), - colidx_(&colidx__(idx)), - stride_(stride), - length(count) {} + const typename MatrixType::values_type& values, const typename MatrixType::index_type& colidx__, + const ordinal_type& stride, const ordinal_type& count, const OffsetType& idx, + const typename std::enable_if::value, int>::type& = 0) + : values_(&values(idx)), colidx_(&colidx__(idx)), stride_(stride), length(count) {} /// \brief Number of entries in the row. /// @@ -211,9 +201,7 @@ struct SparseRowView { /// "Entry i" is not necessarily the entry with column index i, nor /// does i necessarily correspond to the (local) row index. KOKKOS_INLINE_FUNCTION - value_type& value(const ordinal_type& i) const { - return values_[i * stride_]; - } + value_type& value(const ordinal_type& i) const { return values_[i * stride_]; } /// \brief Reference to the column index of entry i in this row of the sparse /// matrix. @@ -221,9 +209,7 @@ struct SparseRowView { /// "Entry i" is not necessarily the entry with column index i, nor /// does i necessarily correspond to the (local) row index. KOKKOS_INLINE_FUNCTION - ordinal_type& colidx(const ordinal_type& i) const { - return colidx_[i * stride_]; - } + ordinal_type& colidx(const ordinal_type& i) const { return colidx_[i * stride_]; } }; /// \class SparseRowViewConst @@ -264,8 +250,8 @@ struct SparseRowViewConst { /// each of the above arrays. /// \param count [in] Number of entries in the row. KOKKOS_INLINE_FUNCTION - SparseRowViewConst(value_type* const values, ordinal_type* const colidx__, - const ordinal_type& stride, const ordinal_type& count) + SparseRowViewConst(value_type* const values, ordinal_type* const colidx__, const ordinal_type& stride, + const ordinal_type& count) : values_(values), colidx_(colidx__), stride_(stride), length(count) {} /// \brief Constructor with offset into \c colidx array @@ -282,16 +268,10 @@ struct SparseRowViewConst { /// \param idx [in] Start offset into \c colidx array template KOKKOS_INLINE_FUNCTION SparseRowViewConst( - const typename MatrixType::values_type& values, - const typename MatrixType::index_type& colidx__, - const ordinal_type& stride, const ordinal_type& count, - const OffsetType& idx, - const typename std::enable_if::value, - int>::type& = 0) - : values_(&values(idx)), - colidx_(&colidx__(idx)), - stride_(stride), - length(count) {} + const typename MatrixType::values_type& values, const typename MatrixType::index_type& colidx__, + const ordinal_type& stride, const ordinal_type& count, const OffsetType& idx, + const typename std::enable_if::value, int>::type& = 0) + : values_(&values(idx)), colidx_(&colidx__(idx)), stride_(stride), length(count) {} /// \brief Number of entries in the row. /// @@ -311,9 +291,7 @@ struct SparseRowViewConst { /// "Entry i" is not necessarily the entry with column index i, nor /// does i necessarily correspond to the (local) row index. KOKKOS_INLINE_FUNCTION - value_type& value(const ordinal_type& i) const { - return values_[i * stride_]; - } + value_type& value(const ordinal_type& i) const { return values_[i * stride_]; } /// \brief (Const) reference to the column index of entry i in this /// row of the sparse matrix. @@ -321,9 +299,7 @@ struct SparseRowViewConst { /// "Entry i" is not necessarily the entry with column index i, nor /// does i necessarily correspond to the (local) row index. KOKKOS_INLINE_FUNCTION - ordinal_type& colidx(const ordinal_type& i) const { - return colidx_[i * stride_]; - } + ordinal_type& colidx(const ordinal_type& i) const { return colidx_[i * stride_]; } }; /// \class CrsMatrix @@ -338,19 +314,13 @@ struct SparseRowViewConst { /// Trilinos traditionally uses to describe compressed sparse row /// storage for sparse matrices, as described, for example, in Saad /// (2nd ed.). -template ::size_type> +template class CrsMatrix { - static_assert( - std::is_signed::value, - "CrsMatrix requires that OrdinalType is a signed integer type."); + static_assert(std::is_signed::value, "CrsMatrix requires that OrdinalType is a signed integer type."); private: - typedef typename Kokkos::ViewTraits::host_mirror_space - host_mirror_space; + typedef typename Kokkos::ViewTraits::host_mirror_space host_mirror_space; public: //! Type of the matrix's execution space. @@ -372,16 +342,12 @@ class CrsMatrix { typedef SizeType size_type; //! Type of a host-memory mirror of the sparse matrix. - typedef CrsMatrix - HostMirror; + typedef CrsMatrix HostMirror; //! Type of the graph structure of the sparse matrix. - typedef Kokkos::StaticCrsGraph + typedef Kokkos::StaticCrsGraph StaticCrsGraphType; //! Type of the graph structure of the sparse matrix - consistent with Kokkos. - typedef Kokkos::StaticCrsGraph + typedef Kokkos::StaticCrsGraph staticcrsgraph_type; //! Type of column indices in the sparse matrix. typedef typename staticcrsgraph_type::entries_type index_type; @@ -396,17 +362,13 @@ class CrsMatrix { //! Nonconst version of the type of row offsets in the sparse matrix. typedef typename row_map_type::non_const_value_type non_const_size_type; //! Kokkos Array type of the entries (values) in the sparse matrix. - typedef Kokkos::View - values_type; + typedef Kokkos::View values_type; //! Const version of the type of the entries in the sparse matrix. typedef typename values_type::const_value_type const_value_type; //! Nonconst version of the type of the entries in the sparse matrix. typedef typename values_type::non_const_value_type non_const_value_type; - typedef CrsMatrix - const_type; + typedef CrsMatrix const_type; /// \name Storage of the actual sparsity structure and values. /// @@ -448,15 +410,9 @@ class CrsMatrix { CrsMatrix() : numCols_(0) {} //! Copy constructor (shallow copy). - template - KOKKOS_INLINE_FUNCTION CrsMatrix( - const CrsMatrix& - B) - : graph(B.graph.entries, B.graph.row_map), - values(B.values), - numCols_(B.numCols()), - dev_config(B.dev_config) { + template + KOKKOS_INLINE_FUNCTION CrsMatrix(const CrsMatrix& B) + : graph(B.graph.entries, B.graph.row_map), values(B.values), numCols_(B.numCols()), dev_config(B.dev_config) { graph.row_block_offsets = B.graph.row_block_offsets; // TODO: MD 07/2017: Changed the copy constructor of graph // as the constructor of StaticCrsGraph does not allow copy from non const @@ -464,18 +420,12 @@ class CrsMatrix { } //! Deep copy constructor (can cross spaces) - template - CrsMatrix(const std::string&, - const CrsMatrix& mat_) { - typename row_map_type::non_const_type rowmap( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowmap"), - mat_.graph.row_map.extent(0)); - index_type cols(Kokkos::view_alloc(Kokkos::WithoutInitializing, "cols"), - mat_.nnz()); - values = values_type( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "values"), mat_.nnz()); + template + CrsMatrix(const std::string&, const CrsMatrix& mat_) { + typename row_map_type::non_const_type rowmap(Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowmap"), + mat_.graph.row_map.extent(0)); + index_type cols(Kokkos::view_alloc(Kokkos::WithoutInitializing, "cols"), mat_.nnz()); + values = values_type(Kokkos::view_alloc(Kokkos::WithoutInitializing, "values"), mat_.nnz()); Kokkos::deep_copy(rowmap, mat_.graph.row_map); Kokkos::deep_copy(cols, mat_.graph.entries); Kokkos::deep_copy(values, mat_.values); @@ -487,14 +437,12 @@ class CrsMatrix { /// \brief Construct with a graph that will be shared. /// /// Allocate the values array for subsquent fill. - template - [ - [deprecated("Use the constructor that accepts ncols as input " - "instead.")]] CrsMatrix(const std::string& label, - const Kokkos::StaticCrsGraph< - InOrdinal, InLayout, InDevice, - InMemTraits, InSizeType>& graph_) + template + [[deprecated( + "Use the constructor that accepts ncols as input " + "instead.")]] CrsMatrix(const std::string& label, + const Kokkos::StaticCrsGraph& + graph_) : graph(graph_.entries, graph_.row_map), values(label, graph_.entries.extent(0)), numCols_(maximum_entry(graph_) + 1) {} @@ -507,15 +455,11 @@ class CrsMatrix { /// \param label [in] The sparse matrix's label. /// \param graph_ [in] The graph for storing the rowmap and col ids. /// \param ncols [in] The number of columns. - template + template CrsMatrix(const std::string& label, - const Kokkos::StaticCrsGraph& graph_, + const Kokkos::StaticCrsGraph& graph_, const OrdinalType& ncols) - : graph(graph_.entries, graph_.row_map), - values(label, graph_.entries.extent(0)), - numCols_(ncols) {} + : graph(graph_.entries, graph_.row_map), values(label, graph_.entries.extent(0)), numCols_(ncols) {} /// \brief Constructor that accepts a a static graph, and values. /// @@ -525,12 +469,9 @@ class CrsMatrix { /// \param ncols [in] The number of columns. /// \param vals [in/out] The entries. /// \param graph_ The graph for storing the rowmap and col ids. - template - CrsMatrix(const std::string&, const OrdinalType& ncols, - const values_type& vals, - const Kokkos::StaticCrsGraph& graph_) + template + CrsMatrix(const std::string&, const OrdinalType& ncols, const values_type& vals, + const Kokkos::StaticCrsGraph& graph_) : graph(graph_.entries, graph_.row_map), values(vals), numCols_(ncols) {} /// \brief Constructor that copies raw arrays of host data in @@ -554,30 +495,23 @@ class CrsMatrix { /// array must have length \c nrows+1. /// \param cols [in] The column indices. \c cols[k] is the column /// index of entry k, with a corresponding value of \c val[k] . - CrsMatrix(const std::string& /*label*/, OrdinalType nrows, OrdinalType ncols, - size_type annz, ScalarType* val, OrdinalType* rowmap, - OrdinalType* cols) { + CrsMatrix(const std::string& /*label*/, OrdinalType nrows, OrdinalType ncols, size_type annz, ScalarType* val, + OrdinalType* rowmap, OrdinalType* cols) { using Kokkos::Unmanaged; using HostRowmap = Kokkos::View; - using UnmanagedRowmap = Kokkos::View>; - using UnmanagedEntries = Kokkos::View>; - using UnmanagedValues = Kokkos::View>; + using UnmanagedRowmap = Kokkos::View>; + using UnmanagedEntries = Kokkos::View>; + using UnmanagedValues = Kokkos::View>; // Allocate device rowmap, entries, values views - typename row_map_type::non_const_type rowmapDevice( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowmap"), nrows + 1); - index_type entriesDevice( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "entries"), annz); + typename row_map_type::non_const_type rowmapDevice(Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowmap"), + nrows + 1); + index_type entriesDevice(Kokkos::view_alloc(Kokkos::WithoutInitializing, "entries"), annz); // given rowmap in ordinal_type, so may need to convert to size_type // explicitly HostRowmap rowmapConverted; UnmanagedRowmap rowmapRaw; if (!std::is_same::value) { - rowmapConverted = HostRowmap( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowmap raw"), - nrows + 1); + rowmapConverted = HostRowmap(Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowmap raw"), nrows + 1); for (OrdinalType i = 0; i <= nrows; i++) rowmapConverted(i) = rowmap[i]; rowmapRaw = rowmapConverted; } else { @@ -589,8 +523,7 @@ class CrsMatrix { // Construct graph and populate all members this->numCols_ = ncols; this->graph = StaticCrsGraphType(entriesDevice, rowmapDevice); - this->values = values_type( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "values"), annz); + this->values = values_type(Kokkos::view_alloc(Kokkos::WithoutInitializing, "values"), annz); UnmanagedValues valuesRaw(val, annz); Kokkos::deep_copy(this->values, valuesRaw); } @@ -608,16 +541,12 @@ class CrsMatrix { /// \param rowmap [in] The row map (containing the offsets to the /// data in each row). /// \param cols [in] The column indices. - CrsMatrix(const std::string& /* label */, const OrdinalType nrows, - const OrdinalType ncols, const size_type annz, - const values_type& vals, const row_map_type& rowmap, - const index_type& cols) + CrsMatrix(const std::string& /* label */, const OrdinalType nrows, const OrdinalType ncols, const size_type annz, + const values_type& vals, const row_map_type& rowmap, const index_type& cols) : graph(cols, rowmap), values(vals), numCols_(ncols) { - const ordinal_type actualNumRows = - (rowmap.extent(0) != 0) - ? static_cast(rowmap.extent(0) - - static_cast(1)) - : static_cast(0); + const ordinal_type actualNumRows = (rowmap.extent(0) != 0) + ? static_cast(rowmap.extent(0) - static_cast(1)) + : static_cast(0); if (nrows != actualNumRows) { std::ostringstream os; os << "Input argument nrows = " << nrows @@ -628,29 +557,26 @@ class CrsMatrix { } if (annz != nnz()) { std::ostringstream os; - os << "Input argument annz = " << annz << " != this->nnz () = " << nnz() - << "."; + os << "Input argument annz = " << annz << " != this->nnz () = " << nnz() << "."; throw std::invalid_argument(os.str()); } } KOKKOS_INLINE_FUNCTION - OrdinalType sumIntoValues(const OrdinalType rowi, const OrdinalType cols[], - const OrdinalType ncol, const ScalarType vals[], - const bool is_sorted = false, + OrdinalType sumIntoValues(const OrdinalType rowi, const OrdinalType cols[], const OrdinalType ncol, + const ScalarType vals[], const bool is_sorted = false, const bool force_atomic = false) const { SparseRowView row_view = this->row(rowi); const ordinal_type length = row_view.length; - ordinal_type hint = 0; // Guess for offset of current column index in row + ordinal_type hint = 0; // Guess for offset of current column index in row ordinal_type numValid = 0; // number of valid local column indices for (ordinal_type i = 0; i < ncol; ++i) { // NOTE (mfh 19 Sep 2017) This assumes that row_view stores // column indices contiguously. It does, but one could imagine // changing that at some point. - const ordinal_type offset = findRelOffset(&(row_view.colidx(0)), length, - cols[i], hint, is_sorted); + const ordinal_type offset = findRelOffset(&(row_view.colidx(0)), length, cols[i], hint, is_sorted); if (offset != length) { if (force_atomic) { Kokkos::atomic_add(&(row_view.value(offset)), vals[i]); @@ -669,22 +595,20 @@ class CrsMatrix { } KOKKOS_INLINE_FUNCTION - OrdinalType replaceValues(const OrdinalType rowi, const OrdinalType cols[], - const OrdinalType ncol, const ScalarType vals[], - const bool is_sorted = false, + OrdinalType replaceValues(const OrdinalType rowi, const OrdinalType cols[], const OrdinalType ncol, + const ScalarType vals[], const bool is_sorted = false, const bool force_atomic = false) const { SparseRowView row_view = this->row(rowi); const ordinal_type length = row_view.length; - ordinal_type hint = 0; // Guess for offset of current column index in row + ordinal_type hint = 0; // Guess for offset of current column index in row ordinal_type numValid = 0; // number of valid local column indices for (ordinal_type i = 0; i < ncol; ++i) { // NOTE (mfh 19 Sep 2017) This assumes that row_view stores // column indices contiguously. It does, but one could imagine // changing that at some point. - const ordinal_type offset = findRelOffset(&(row_view.colidx(0)), length, - cols[i], hint, is_sorted); + const ordinal_type offset = findRelOffset(&(row_view.colidx(0)), length, cols[i], hint, is_sorted); if (offset != length) { if (force_atomic) { Kokkos::atomic_assign(&(row_view.value(offset)), vals[i]); @@ -703,10 +627,8 @@ class CrsMatrix { } //! Attempt to assign the input matrix to \c *this. - template - CrsMatrix& operator=(const CrsMatrix& mtx) { + template + CrsMatrix& operator=(const CrsMatrix& mtx) { numCols_ = mtx.numCols(); graph = mtx.graph; values = mtx.values; @@ -715,9 +637,7 @@ class CrsMatrix { } //! The number of rows in the sparse matrix. - KOKKOS_INLINE_FUNCTION ordinal_type numRows() const { - return graph.numRows(); - } + KOKKOS_INLINE_FUNCTION ordinal_type numRows() const { return graph.numRows(); } //! The number of columns in the sparse matrix. KOKKOS_INLINE_FUNCTION ordinal_type numCols() const { return numCols_; } @@ -731,9 +651,7 @@ class CrsMatrix { KOKKOS_INLINE_FUNCTION ordinal_type numPointCols() const { return numCols(); } //! The number of stored entries in the sparse matrix. - KOKKOS_INLINE_FUNCTION size_type nnz() const { - return graph.entries.extent(0); - } + KOKKOS_INLINE_FUNCTION size_type nnz() const { return graph.entries.extent(0); } friend struct SparseRowView; @@ -786,8 +704,7 @@ class CrsMatrix { const size_type start = graph.row_map(i); // count is guaranteed to fit in ordinal_type, as long as no row // has duplicate entries. - const ordinal_type count = - static_cast(graph.row_map(i + 1) - start); + const ordinal_type count = static_cast(graph.row_map(i + 1) - start); if (count == 0) { return SparseRowView(NULL, NULL, 1, 0); @@ -845,14 +762,12 @@ class CrsMatrix { const size_type start = graph.row_map(i); // count is guaranteed to fit in ordinal_type, as long as no row // has duplicate entries. - const ordinal_type count = - static_cast(graph.row_map(i + 1) - start); + const ordinal_type count = static_cast(graph.row_map(i + 1) - start); if (count == 0) { return SparseRowViewConst(NULL, NULL, 1, 0); } else { - return SparseRowViewConst(values, graph.entries, 1, count, - start); + return SparseRowViewConst(values, graph.entries, 1, count, start); } } }; diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_IOUtils.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_IOUtils.hpp index 4704a8724c23..588c9dbca970 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_IOUtils.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_IOUtils.hpp @@ -19,16 +19,16 @@ #include "KokkosKernels_IOUtils.hpp" #include "KokkosSparse_CrsMatrix.hpp" +#include + namespace KokkosSparse { namespace Impl { // MD: Bases on Christian's sparseMatrix_generate function in test_crsmatrix.cpp // file. template -void kk_sparseMatrix_generate(OrdinalType nrows, OrdinalType ncols, - SizeType &nnz, OrdinalType row_size_variance, - OrdinalType bandwidth, ScalarType *&values, - SizeType *&rowPtr, OrdinalType *&colInd, +void kk_sparseMatrix_generate(OrdinalType nrows, OrdinalType ncols, SizeType &nnz, OrdinalType row_size_variance, + OrdinalType bandwidth, ScalarType *&values, SizeType *&rowPtr, OrdinalType *&colInd, OrdinalType block_elem_count = 1) { rowPtr = new SizeType[nrows + 1]; @@ -73,8 +73,7 @@ void kk_sparseMatrix_generate(OrdinalType nrows, OrdinalType ncols, } // Sample each value from uniform (-50, 50) for real types, or (-50 - 50i, 50 // + 50i) for complex types. - Kokkos::View valuesView( - values, nnz * block_elem_count); + Kokkos::View valuesView(values, nnz * block_elem_count); ScalarType randStart, randEnd; KokkosKernels::Impl::getRandomBounds(50.0, randStart, randEnd); Kokkos::Random_XorShift64_Pool pool(13718); @@ -82,10 +81,9 @@ void kk_sparseMatrix_generate(OrdinalType nrows, OrdinalType ncols, } template -void kk_sparseMatrix_generate_lower_upper_triangle( - char uplo, OrdinalType nrows, OrdinalType ncols, SizeType &nnz, - OrdinalType /*row_size_variance*/, OrdinalType /*bandwidth*/, - ScalarType *&values, SizeType *&rowPtr, OrdinalType *&colInd) { +void kk_sparseMatrix_generate_lower_upper_triangle(char uplo, OrdinalType nrows, OrdinalType ncols, SizeType &nnz, + OrdinalType /*row_size_variance*/, OrdinalType /*bandwidth*/, + ScalarType *&values, SizeType *&rowPtr, OrdinalType *&colInd) { rowPtr = new SizeType[nrows + 1]; // OrdinalType elements_per_row = nnz/nrows; @@ -112,29 +110,23 @@ void kk_sparseMatrix_generate_lower_upper_triangle( } template -void kk_diagonally_dominant_sparseMatrix_generate( - OrdinalType nrows, OrdinalType ncols, SizeType &nnz, - OrdinalType row_size_variance, OrdinalType bandwidth, ScalarType *&values, - SizeType *&rowPtr, OrdinalType *&colInd, - ScalarType diagDominance = 10 * Kokkos::ArithTraits::one()) { +void kk_diagonally_dominant_sparseMatrix_generate(OrdinalType nrows, OrdinalType ncols, SizeType &nnz, + OrdinalType row_size_variance, OrdinalType bandwidth, + ScalarType *&values, SizeType *&rowPtr, OrdinalType *&colInd, + ScalarType diagDominance = 10 * + Kokkos::ArithTraits::one()) { rowPtr = new SizeType[nrows + 1]; OrdinalType elements_per_row = nnz / nrows; // Set a hard limit to the actual entries in any one row, so that the // loop to find a column not already taken will terminate quickly. - OrdinalType max_elements_per_row = 0.7 * bandwidth; - OrdinalType requested_max_elements_per_row = - elements_per_row + 0.5 * row_size_variance; + OrdinalType max_elements_per_row = 0.7 * bandwidth; + OrdinalType requested_max_elements_per_row = elements_per_row + 0.5 * row_size_variance; if (requested_max_elements_per_row > max_elements_per_row) { - std::cerr - << "kk_diagonally_dominant_sparseMatrix_generate: given the bandwidth (" - << bandwidth << "),\n"; - std::cerr << " can insert a maximum of " << max_elements_per_row - << " entries per row (0.7*bandwidth).\n"; - std::cerr << " But given the requested average entries per row of " - << elements_per_row << " and variance of " << row_size_variance - << ",\n"; - std::cerr << " there should be up to " << requested_max_elements_per_row - << " entries per row.\n"; + std::cerr << "kk_diagonally_dominant_sparseMatrix_generate: given the bandwidth (" << bandwidth << "),\n"; + std::cerr << " can insert a maximum of " << max_elements_per_row << " entries per row (0.7*bandwidth).\n"; + std::cerr << " But given the requested average entries per row of " << elements_per_row << " and variance of " + << row_size_variance << ",\n"; + std::cerr << " there should be up to " << requested_max_elements_per_row << " entries per row.\n"; std::cerr << " Increase the bandwidth, or decrease nnz and/or " "row_size_variance.\n"; throw std::invalid_argument( @@ -146,12 +138,11 @@ void kk_diagonally_dominant_sparseMatrix_generate( for (int row = 0; row < nrows; row++) { // variance is how many more (or less) entries this row has compared to the // mean (elements_per_row). - OrdinalType variance = (1.0 * rand() / RAND_MAX - 0.5) * row_size_variance; + OrdinalType variance = (1.0 * rand() / RAND_MAX - 0.5) * row_size_variance; OrdinalType entries_in_row = elements_per_row + variance; // Always have at least one entry (for the diagonal) if (entries_in_row < 1) entries_in_row = 1; - if (entries_in_row > max_elements_per_row) - entries_in_row = max_elements_per_row; + if (entries_in_row > max_elements_per_row) entries_in_row = max_elements_per_row; rowPtr[row + 1] = rowPtr[row] + entries_in_row; if (rowPtr[row + 1] <= rowPtr[row]) // This makes sure that there is rowPtr[row + 1] = rowPtr[row] + 1; // at least one nonzero in the row @@ -192,8 +183,7 @@ void kk_diagonally_dominant_sparseMatrix_generate( // The elements on the diagonal are 1, 2, ..., n-1, n. // If "invert" is true, it will return the inverse of the above diagonal matrix. template -crsMat_t kk_generate_diag_matrix(typename crsMat_t::const_ordinal_type n, - const bool invert = false) { +crsMat_t kk_generate_diag_matrix(typename crsMat_t::const_ordinal_type n, const bool invert = false) { typedef typename crsMat_t::ordinal_type ot; typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type::non_const_type row_map_view_t; @@ -209,12 +199,9 @@ crsMat_t kk_generate_diag_matrix(typename crsMat_t::const_ordinal_type n, values_view_t values_view("values_view", n); { - typename row_map_view_t::HostMirror hr = - Kokkos::create_mirror_view(rowmap_view); - typename cols_view_t::HostMirror hc = - Kokkos::create_mirror_view(columns_view); - typename values_view_t::HostMirror hv = - Kokkos::create_mirror_view(values_view); + typename row_map_view_t::HostMirror hr = Kokkos::create_mirror_view(rowmap_view); + typename cols_view_t::HostMirror hc = Kokkos::create_mirror_view(columns_view); + typename values_view_t::HostMirror hv = Kokkos::create_mirror_view(values_view); for (lno_t i = 0; i <= n; ++i) { hr(i) = size_type(i); @@ -240,13 +227,11 @@ crsMat_t kk_generate_diag_matrix(typename crsMat_t::const_ordinal_type n, template crsMat_t kk_generate_diagonally_dominant_sparse_matrix( - typename crsMat_t::const_ordinal_type nrows, - typename crsMat_t::const_ordinal_type ncols, - typename crsMat_t::non_const_size_type &nnz, - typename crsMat_t::const_ordinal_type row_size_variance, + typename crsMat_t::const_ordinal_type nrows, typename crsMat_t::const_ordinal_type ncols, + typename crsMat_t::non_const_size_type &nnz, typename crsMat_t::const_ordinal_type row_size_variance, typename crsMat_t::const_ordinal_type bandwidth, - typename crsMat_t::const_value_type diagDominance = - 10 * Kokkos::ArithTraits::one()) { + typename crsMat_t::const_value_type diagDominance = 10 * + Kokkos::ArithTraits::one()) { typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type::non_const_type row_map_view_t; typedef typename graph_t::entries_type::non_const_type cols_view_t; @@ -259,21 +244,17 @@ crsMat_t kk_generate_diagonally_dominant_sparse_matrix( size_type *xadj; //, nnzA; scalar_t *values; - kk_diagonally_dominant_sparseMatrix_generate( - nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj, - diagDominance); + kk_diagonally_dominant_sparseMatrix_generate(nrows, ncols, nnz, row_size_variance, + bandwidth, values, xadj, adj, diagDominance); row_map_view_t rowmap_view("rowmap_view", nrows + 1); cols_view_t columns_view("colsmap_view", nnz); values_view_t values_view("values_view", nnz); { - typename row_map_view_t::HostMirror hr = - Kokkos::create_mirror_view(rowmap_view); - typename cols_view_t::HostMirror hc = - Kokkos::create_mirror_view(columns_view); - typename values_view_t::HostMirror hv = - Kokkos::create_mirror_view(values_view); + typename row_map_view_t::HostMirror hr = Kokkos::create_mirror_view(rowmap_view); + typename cols_view_t::HostMirror hc = Kokkos::create_mirror_view(columns_view); + typename values_view_t::HostMirror hv = Kokkos::create_mirror_view(values_view); for (lno_t i = 0; i <= nrows; ++i) { hr(i) = xadj[i]; @@ -297,12 +278,11 @@ crsMat_t kk_generate_diagonally_dominant_sparse_matrix( } template -crsMat_t kk_generate_triangular_sparse_matrix( - char uplo, typename crsMat_t::const_ordinal_type nrows, - typename crsMat_t::const_ordinal_type ncols, - typename crsMat_t::non_const_size_type &nnz, - typename crsMat_t::const_ordinal_type row_size_variance, - typename crsMat_t::const_ordinal_type bandwidth) { +crsMat_t kk_generate_triangular_sparse_matrix(char uplo, typename crsMat_t::const_ordinal_type nrows, + typename crsMat_t::const_ordinal_type ncols, + typename crsMat_t::non_const_size_type &nnz, + typename crsMat_t::const_ordinal_type row_size_variance, + typename crsMat_t::const_ordinal_type bandwidth) { typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type::non_const_type row_map_view_t; typedef typename graph_t::entries_type::non_const_type cols_view_t; @@ -315,20 +295,17 @@ crsMat_t kk_generate_triangular_sparse_matrix( size_type *xadj; //, nnzA; scalar_t *values; - kk_sparseMatrix_generate_lower_upper_triangle( - uplo, nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj); + kk_sparseMatrix_generate_lower_upper_triangle(uplo, nrows, ncols, nnz, row_size_variance, + bandwidth, values, xadj, adj); row_map_view_t rowmap_view("rowmap_view", nrows + 1); cols_view_t columns_view("colsmap_view", nnz); values_view_t values_view("values_view", nnz); { - typename row_map_view_t::HostMirror hr = - Kokkos::create_mirror_view(rowmap_view); - typename cols_view_t::HostMirror hc = - Kokkos::create_mirror_view(columns_view); - typename values_view_t::HostMirror hv = - Kokkos::create_mirror_view(values_view); + typename row_map_view_t::HostMirror hr = Kokkos::create_mirror_view(rowmap_view); + typename cols_view_t::HostMirror hc = Kokkos::create_mirror_view(columns_view); + typename values_view_t::HostMirror hv = Kokkos::create_mirror_view(values_view); for (lno_t i = 0; i <= nrows; ++i) { hr(i) = xadj[i]; @@ -353,12 +330,11 @@ crsMat_t kk_generate_triangular_sparse_matrix( } template -crsMat_t kk_generate_sparse_matrix( - typename crsMat_t::const_ordinal_type nrows, - typename crsMat_t::const_ordinal_type ncols, - typename crsMat_t::non_const_size_type &nnz, - typename crsMat_t::const_ordinal_type row_size_variance, - typename crsMat_t::const_ordinal_type bandwidth) { +crsMat_t kk_generate_sparse_matrix(typename crsMat_t::const_ordinal_type nrows, + typename crsMat_t::const_ordinal_type ncols, + typename crsMat_t::non_const_size_type &nnz, + typename crsMat_t::const_ordinal_type row_size_variance, + typename crsMat_t::const_ordinal_type bandwidth) { typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type::non_const_type row_map_view_t; typedef typename graph_t::entries_type::non_const_type cols_view_t; @@ -371,20 +347,17 @@ crsMat_t kk_generate_sparse_matrix( size_type *xadj; //, nnzA; scalar_t *values; - kk_sparseMatrix_generate( - nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj); + kk_sparseMatrix_generate(nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, + adj); row_map_view_t rowmap_view("rowmap_view", nrows + 1); cols_view_t columns_view("colsmap_view", nnz); values_view_t values_view("values_view", nnz); { - typename row_map_view_t::HostMirror hr = - Kokkos::create_mirror_view(rowmap_view); - typename cols_view_t::HostMirror hc = - Kokkos::create_mirror_view(columns_view); - typename values_view_t::HostMirror hv = - Kokkos::create_mirror_view(values_view); + typename row_map_view_t::HostMirror hr = Kokkos::create_mirror_view(rowmap_view); + typename cols_view_t::HostMirror hc = Kokkos::create_mirror_view(columns_view); + typename values_view_t::HostMirror hv = Kokkos::create_mirror_view(values_view); for (lno_t i = 0; i <= nrows; ++i) { hr(i) = xadj[i]; @@ -408,29 +381,26 @@ crsMat_t kk_generate_sparse_matrix( } template -bsrMat_t kk_generate_sparse_matrix( - typename bsrMat_t::const_ordinal_type block_dim, - typename bsrMat_t::const_ordinal_type nrows, - typename bsrMat_t::const_ordinal_type ncols, - typename bsrMat_t::non_const_size_type &nnz, - typename bsrMat_t::const_ordinal_type row_size_variance, - typename bsrMat_t::const_ordinal_type bandwidth) { - typedef KokkosSparse::CrsMatrix< - typename bsrMat_t::value_type, typename bsrMat_t::ordinal_type, - typename bsrMat_t::device_type, typename bsrMat_t::memory_traits, - typename bsrMat_t::size_type> +bsrMat_t kk_generate_sparse_matrix(typename bsrMat_t::const_ordinal_type block_dim, + typename bsrMat_t::const_ordinal_type nrows, + typename bsrMat_t::const_ordinal_type ncols, + typename bsrMat_t::non_const_size_type &nnz, + typename bsrMat_t::const_ordinal_type row_size_variance, + typename bsrMat_t::const_ordinal_type bandwidth) { + typedef KokkosSparse::CrsMatrix crsMat_t; - const auto crs_mtx = kk_generate_sparse_matrix( - nrows * block_dim, ncols * block_dim, nnz, row_size_variance, bandwidth); + const auto crs_mtx = + kk_generate_sparse_matrix(nrows * block_dim, ncols * block_dim, nnz, row_size_variance, bandwidth); bsrMat_t bsrmat(crs_mtx, block_dim); return bsrmat; } // TODO: need to fix the size_type. All over the reading inputs are lno_t. template -void convert_crs_to_lower_triangle_edge_list(idx nv, idx *xadj, idx *adj, - idx *lower_triangle_srcs, +void convert_crs_to_lower_triangle_edge_list(idx nv, idx *xadj, idx *adj, idx *lower_triangle_srcs, idx *lower_triangle_dests) { idx ind = 0; for (idx i = 0; i < nv; ++i) { @@ -458,8 +428,8 @@ void convert_crs_to_edge_list(idx nv, idx *xadj, idx *srcs) { } template -void convert_edge_list_to_csr(lno_t nv, size_type ne, lno_t *srcs, lno_t *dests, - wt *ew, size_type *xadj, lno_t *adj, wt *crs_ew) { +void convert_edge_list_to_csr(lno_t nv, size_type ne, lno_t *srcs, lno_t *dests, wt *ew, size_type *xadj, lno_t *adj, + wt *crs_ew) { std::vector> edges(ne); for (size_type i = 0; i < ne; ++i) { edges[i].src = srcs[i]; @@ -481,8 +451,7 @@ void convert_edge_list_to_csr(lno_t nv, size_type ne, lno_t *srcs, lno_t *dests, } template -void convert_undirected_edge_list_to_csr(lno_t nv, size_type ne, in_lno_t *srcs, - in_lno_t *dests, size_type *xadj, +void convert_undirected_edge_list_to_csr(lno_t nv, size_type ne, in_lno_t *srcs, in_lno_t *dests, size_type *xadj, lno_t *adj) { std::vector> edges(ne * 2); for (size_type i = 0; i < ne; ++i) { @@ -497,10 +466,8 @@ void convert_undirected_edge_list_to_csr(lno_t nv, size_type ne, in_lno_t *srcs, #include #include #include - __gnu_parallel::parallel_sort_mwms< - false, true, struct KokkosKernels::Impl::Edge *>( - &(edges[0]), &(edges[0]) + ne * 2, - std::less>(), 64); + __gnu_parallel::parallel_sort_mwms *>( + &(edges[0]), &(edges[0]) + ne * 2, std::less>(), 64); #else std::sort(edges.begin(), edges.begin() + ne * 2); #endif @@ -518,8 +485,7 @@ void convert_undirected_edge_list_to_csr(lno_t nv, size_type ne, in_lno_t *srcs, } template -void write_graph_bin(lno_t nv, size_type ne, const size_type *xadj, - const lno_t *adj, const scalar_t *ew, +void write_graph_bin(lno_t nv, size_type ne, const size_type *xadj, const lno_t *adj, const scalar_t *ew, const char *filename) { std::ofstream myFile(filename, std::ios::out | std::ios::binary); myFile.write((char *)&nv, sizeof(lno_t)); @@ -534,8 +500,7 @@ void write_graph_bin(lno_t nv, size_type ne, const size_type *xadj, } template -void write_graph_crs(lno_t nv, size_type ne, const size_type *xadj, - const lno_t *adj, const scalar_t *ew, +void write_graph_crs(lno_t nv, size_type ne, const size_type *xadj, const lno_t *adj, const scalar_t *ew, const char *filename) { std::ofstream myFile(filename, std::ios::out); myFile << nv << " " << ne << std::endl; @@ -562,8 +527,7 @@ void write_graph_crs(lno_t nv, size_type ne, const size_type *xadj, } template -void write_graph_ligra(lno_t nv, size_type ne, const size_type *xadj, - const lno_t *adj, const scalar_t * /*ew*/, +void write_graph_ligra(lno_t nv, size_type ne, const size_type *xadj, const lno_t *adj, const scalar_t * /*ew*/, const char *filename) { std::ofstream ff(filename); ff << "AdjacencyGraph" << std::endl; @@ -646,8 +610,7 @@ scalar_t symmetryFlip(scalar_t val, MtxSym symFlag) { } template <> -inline Kokkos::complex symmetryFlip(Kokkos::complex val, - MtxSym symFlag) { +inline Kokkos::complex symmetryFlip(Kokkos::complex val, MtxSym symFlag) { if (symFlag == HERMITIAN) return Kokkos::conj(val); else if (symFlag == SKEW_SYMMETRIC) @@ -656,8 +619,7 @@ inline Kokkos::complex symmetryFlip(Kokkos::complex val, } template <> -inline Kokkos::complex symmetryFlip(Kokkos::complex val, - MtxSym symFlag) { +inline Kokkos::complex symmetryFlip(Kokkos::complex val, MtxSym symFlag) { if (symFlag == HERMITIAN) return Kokkos::conj(val); else if (symFlag == SKEW_SYMMETRIC) @@ -667,13 +629,11 @@ inline Kokkos::complex symmetryFlip(Kokkos::complex val, } // namespace MM template -void write_matrix_mtx(lno_t nrows, lno_t ncols, size_type nentries, - const size_type *xadj, const lno_t *adj, +void write_matrix_mtx(lno_t nrows, lno_t ncols, size_type nentries, const size_type *xadj, const lno_t *adj, const scalar_t *vals, const char *filename) { std::ofstream myFile(filename); myFile << "%%MatrixMarket matrix coordinate "; - if (std::is_same>::value || - std::is_same>::value) + if (std::is_same>::value || std::is_same>::value) myFile << "complex"; else myFile << "real"; @@ -693,13 +653,11 @@ void write_matrix_mtx(lno_t nrows, lno_t ncols, size_type nentries, } template -void write_graph_mtx(lno_t nv, size_type ne, const size_type *xadj, - const lno_t *adj, const scalar_t *ew, +void write_graph_mtx(lno_t nv, size_type ne, const size_type *xadj, const lno_t *adj, const scalar_t *ew, const char *filename) { std::ofstream myFile(filename); myFile << "%%MatrixMarket matrix coordinate "; - if (std::is_same>::value || - std::is_same>::value) + if (std::is_same>::value || std::is_same>::value) myFile << "complex"; else myFile << "real"; @@ -720,8 +678,7 @@ void write_graph_mtx(lno_t nv, size_type ne, const size_type *xadj, } template -void read_graph_bin(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, - scalar_t **ew, const char *filename) { +void read_graph_bin(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, scalar_t **ew, const char *filename) { std::ifstream myFile(filename, std::ios::in | std::ios::binary); myFile.read((char *)nv, sizeof(lno_t)); @@ -759,8 +716,7 @@ inline Kokkos::complex parseScalar(std::istream &is) { } template -void read_graph_crs(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, - scalar_t **ew, const char *filename) { +void read_graph_crs(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, scalar_t **ew, const char *filename) { std::ifstream myFile(filename, std::ios::in); myFile >> *nv >> *ne; @@ -795,22 +751,17 @@ void write_kokkos_crst_matrix(crs_matrix_t a_crsmat, const char *filename) { size_type nnz = a_crsmat.nnz(); - auto a_rowmap_view = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), a_crsmat.graph.row_map); - auto a_entries_view = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), a_crsmat.graph.entries); - auto a_values_view = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a_crsmat.values); - offset_t *a_rowmap = const_cast(a_rowmap_view.data()); - lno_t *a_entries = a_entries_view.data(); - scalar_t *a_values = a_values_view.data(); + auto a_rowmap_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a_crsmat.graph.row_map); + auto a_entries_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a_crsmat.graph.entries); + auto a_values_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a_crsmat.values); + offset_t *a_rowmap = const_cast(a_rowmap_view.data()); + lno_t *a_entries = a_entries_view.data(); + scalar_t *a_values = a_values_view.data(); std::string strfilename(filename); - if (KokkosKernels::Impl::endswith(strfilename, ".mtx") || - KokkosKernels::Impl::endswith(strfilename, ".mm")) { - write_matrix_mtx( - a_crsmat.numRows(), a_crsmat.numCols(), a_crsmat.nnz(), a_rowmap, - a_entries, a_values, filename); + if (KokkosKernels::Impl::endswith(strfilename, ".mtx") || KokkosKernels::Impl::endswith(strfilename, ".mm")) { + write_matrix_mtx(a_crsmat.numRows(), a_crsmat.numCols(), a_crsmat.nnz(), a_rowmap, + a_entries, a_values, filename); return; } else if (a_crsmat.numRows() != a_crsmat.numCols()) { throw std::runtime_error( @@ -818,27 +769,21 @@ void write_kokkos_crst_matrix(crs_matrix_t a_crsmat, const char *filename) { "write_kokkos_crst_matrix only supports square matrices"); } if (KokkosKernels::Impl::endswith(strfilename, ".bin")) { - write_graph_bin( - a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); + write_graph_bin(a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); } else if (KokkosKernels::Impl::endswith(strfilename, ".ligra")) { - write_graph_ligra( - a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); + write_graph_ligra(a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); } else if (KokkosKernels::Impl::endswith(strfilename, ".crs")) { - write_graph_crs( - a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); + write_graph_crs(a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); } else { - std::string errMsg = - std::string("write_kokkos_crst_matrix: File extension on ") + filename + - " does not correspond to a known format"; + std::string errMsg = std::string("write_kokkos_crst_matrix: File extension on ") + filename + + " does not correspond to a known format"; throw std::runtime_error(errMsg); } } template -int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, - size_type **xadj, lno_t **adj, scalar_t **ew, - bool symmetrize = false, bool remove_diagonal = true, - bool transpose = false) { +int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, size_type **xadj, lno_t **adj, + scalar_t **ew, bool symmetrize = false, bool remove_diagonal = true, bool transpose = false) { using namespace MM; std::ifstream mmf(fileName, std::ifstream::in); if (!mmf.is_open()) { @@ -863,8 +808,7 @@ int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, mtx_object = MATRIX; } else if (fline.find("vector") != std::string::npos) { mtx_object = VECTOR; - throw std::runtime_error( - "MatrixMarket \"vector\" is not supported by KokkosKernels read_mtx()"); + throw std::runtime_error("MatrixMarket \"vector\" is not supported by KokkosKernels read_mtx()"); } if (fline.find("coordinate") != std::string::npos) { @@ -875,8 +819,7 @@ int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, mtx_format = ARRAY; } - if (fline.find("real") != std::string::npos || - fline.find("double") != std::string::npos) { + if (fline.find("real") != std::string::npos || fline.find("double") != std::string::npos) { if (std::is_same::value || std::is_same::value) mtx_field = REAL; @@ -897,8 +840,7 @@ int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, else mtx_field = COMPLEX; } else if (fline.find("integer") != std::string::npos) { - if (std::is_integral::value || - std::is_floating_point::value || + if (std::is_integral::value || std::is_floating_point::value || std::is_same::value || std::is_same::value) mtx_field = INTEGER; @@ -919,8 +861,7 @@ int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, } else if (fline.find("symmetric") != std::string::npos) { // checking for "symmetric" after "skew-symmetric" because it's a substring mtx_sym = SYMMETRIC; - } else if (fline.find("hermitian") != std::string::npos || - fline.find("Hermitian") != std::string::npos) { + } else if (fline.find("hermitian") != std::string::npos || fline.find("Hermitian") != std::string::npos) { mtx_sym = HERMITIAN; } // Validate the matrix attributes @@ -931,17 +872,10 @@ int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, "array format MatrixMarket file must have general symmetry (optional " "to include \"general\")"); } - if (mtx_object == UNDEFINED_OBJECT) - throw std::runtime_error( - "MatrixMarket file header is missing the object type."); - if (mtx_format == UNDEFINED_FORMAT) - throw std::runtime_error("MatrixMarket file header is missing the format."); - if (mtx_field == UNDEFINED_FIELD) - throw std::runtime_error( - "MatrixMarket file header is missing the field type."); - if (mtx_sym == UNDEFINED_SYMMETRY) - throw std::runtime_error( - "MatrixMarket file header is missing the symmetry type."); + if (mtx_object == UNDEFINED_OBJECT) throw std::runtime_error("MatrixMarket file header is missing the object type."); + if (mtx_format == UNDEFINED_FORMAT) throw std::runtime_error("MatrixMarket file header is missing the format."); + if (mtx_field == UNDEFINED_FIELD) throw std::runtime_error("MatrixMarket file header is missing the field type."); + if (mtx_sym == UNDEFINED_SYMMETRY) throw std::runtime_error("MatrixMarket file header is missing the symmetry type."); while (1) { getline(mmf, fline); @@ -962,19 +896,15 @@ int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, } if (mtx_format == ARRAY) { // Array format only supports general symmetry and non-pattern - if (symmetrize) - throw std::runtime_error( - "array format MatrixMarket file cannot be symmetrized."); + if (symmetrize) throw std::runtime_error("array format MatrixMarket file cannot be symmetrized."); if (mtx_field == PATTERN) - throw std::runtime_error( - "array format MatrixMarket file can't have \"pattern\" field type."); + throw std::runtime_error("array format MatrixMarket file can't have \"pattern\" field type."); } if (symmetrize) { numEdges = 2 * nnz; } // numEdges is only an upper bound (diagonal entries may be removed) - std::vector> edges( - numEdges); + std::vector> edges(numEdges); size_type nE = 0; lno_t numDiagonal = 0; for (size_type i = 0; i < nnz; ++i) { @@ -1048,8 +978,7 @@ int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, (*xadj)[i] = actual; bool is_first = true; while (eind < nE && edges[eind].src == i) { - if (is_first || !symmetrize || eind == 0 || - (eind > 0 && edges[eind - 1].dst != edges[eind].dst)) { + if (is_first || !symmetrize || eind == 0 || (eind > 0 && edges[eind - 1].dst != edges[eind].dst)) { (*adj)[actual] = edges[eind].dst; (*ew)[actual] = edges[eind].ew; ++actual; @@ -1063,27 +992,235 @@ int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, return 0; } +/** + * Read a matrix from a file using the Harwell-Boeing Exchange Format + */ +template +int read_hb(const char *fileName, lno_t &nrows, lno_t &ncols, size_type &ne, size_type **xadj, lno_t **adj, + scalar_t **ew) { + using namespace MM; + + std::ifstream mmf(fileName, std::ifstream::in); + if (!mmf.is_open()) { + throw std::runtime_error("File cannot be opened\n"); + } + + // Get the title line, don't need to do anything with that data + std::string fline = ""; + getline(mmf, fline); + + // Get metadata, rhs_lines is optional + getline(mmf, fline); + std::istringstream ss(fline); + size_type total_lines = 0, ptr_lines = 0, col_lines = 0, val_lines = 0, rhs_lines = 0; + + ss >> total_lines >> ptr_lines >> col_lines >> val_lines >> rhs_lines; + ss.sync(); // This fixes tests on kokkos-dev/ipcp. + if (total_lines == 0 || ptr_lines == 0 || col_lines == 0) { + throw std::runtime_error(std::string("Problem reading HB file ") + fileName + ", Line 2 did not have valid values"); + } + + if (rhs_lines > 0) { + throw std::runtime_error(std::string("Problem reading HB file ") + fileName + + ", reader does not support RHS info at this time."); + } + + // Get next line of metadata, neltvl is optional + getline(mmf, fline); + ss = std::istringstream(fline); + std::string matrix_info; + size_type nrow = 0, ncol = 0, nnz_raw = 0, neltvl = 0; + + ss >> matrix_info >> nrow >> ncol >> nnz_raw >> neltvl; + if (matrix_info.size() != 3 || nrow == 0 || ncol == 0 || nnz_raw == 0) { + throw std::runtime_error(std::string("Problem reading HB file ") + fileName + + ", Line 3 did not have valid values: " + fline); + } + + const char matrix_scalar = matrix_info[0]; + const char matrix_type_raw = matrix_info[1]; + const char matrix_assembly = matrix_info[2]; + + // check matrix_scalar matches scalar_t + if (matrix_scalar == 'R') { + if (!(std::is_same::value || + std::is_same::value || std::is_floating_point::value)) { + throw std::runtime_error(std::string("Problem reading HB file ") + fileName + + ", scalar_t in read_hb() incompatible with " + "float or double typed HB file."); + } + } else if (matrix_scalar == 'C') { + if (!(std::is_same>::value || + std::is_same>::value)) { + throw std::runtime_error(std::string("Problem reading HB file ") + fileName + + ", scalar_t in read_hb() incompatible with complex-typed HB file."); + } + } + if (matrix_assembly != 'A') { + throw std::runtime_error(std::string("Problem reading HB file ") + fileName + + ", only assembled matrices are supported."); + } + + // Get next line of metadata + getline(mmf, fline); + ss = std::istringstream(fline); + std::string ptrfmt, indfmt, valfmt, rhsfmt; + ss >> ptrfmt >> indfmt >> valfmt >> rhsfmt; + + if (ptrfmt == "" || indfmt == "" || valfmt == "") { + throw std::runtime_error(std::string("Problem reading HB file ") + fileName + ", Line 4 did not have valid values"); + } + + // Examine mtx properties + const bool pattern_only = matrix_scalar == 'P'; + MtxSym matrix_type = MtxSym::GENERAL; + if (matrix_type_raw == 'S') matrix_type = MtxSym::SYMMETRIC; + if (matrix_type_raw == 'H') matrix_type = MtxSym::HERMITIAN; + if (matrix_type_raw == 'Z') matrix_type = MtxSym::SKEW_SYMMETRIC; + const bool symmetrize = matrix_type_raw == 'S' || matrix_type_raw == 'H' || matrix_type_raw == 'Z'; + + // Allocate temp storage + std::vector raw_rows(nrow + 1); + std::vector raw_cols(nnz_raw); + std::vector raw_vals(nnz_raw); + + // Read row_idx + size_type idx = 0; + for (size_type i = 0; i < ptr_lines; ++i) { + getline(mmf, fline); + ss = std::istringstream(fline); + size_type val; + while (ss >> val) { + raw_rows[idx++] = (val - 1); // HB uses 1-based indexing + } + } + if (idx != nrow + 1) { + throw std::runtime_error(std::string("Problem reading HB file ") + fileName + + ", did not find expected number of col ptrs"); + } + + // Read cols + idx = 0; + for (size_type i = 0; i < col_lines; ++i) { + getline(mmf, fline); + ss = std::istringstream(fline); + lno_t val; + while (ss >> val) { + raw_cols[idx++] = (val - 1); // HB uses 1-based indexing + } + } + if (idx != nnz_raw) { + throw std::runtime_error(std::string("Problem reading HB file ") + fileName + + ", did not find expected number of cols"); + } + + // Read vals if not pattern only + if (!pattern_only) { + idx = 0; + for (size_type i = 0; i < val_lines; ++i) { + getline(mmf, fline); + // The 'e' before the exponent is needed for the stringstream to read + // the value correctly + fline = std::regex_replace(fline, std::regex("([0-9])([+-])"), "$1e$2"); + ss = std::istringstream(fline); + while (ss) { + auto val = readScalar(ss); + // ss will be false if we read past the end + if (ss) { + raw_vals[idx++] = val; + } + } + } + if (idx != nnz_raw) { + throw std::runtime_error(std::string("Problem reading HB file ") + fileName + + ", did not find expected number of values"); + } + } else { + // Initialize to one + for (size_type i = 0; i < nnz_raw; ++i) { + raw_vals[i] = Kokkos::ArithTraits::one(); + } + } + + // Process raw data + size_type nnz = 0; // real nnz, differs from nnz_raw if symmetrize + if (symmetrize) { + const size_type numEdges = 2 * nnz_raw; + // numEdges is only an upper bound (diagonal entries may be removed) + std::vector> edges(numEdges); + for (size_type row_idx = 0; row_idx < nrow; ++row_idx) { + const size_type row_nnz_begin = raw_rows[row_idx]; + const size_type row_nnz_end = raw_rows[row_idx + 1]; + for (size_type row_nnz = row_nnz_begin; row_nnz < row_nnz_end; ++row_nnz) { + const lno_t col_idx = raw_cols[row_nnz]; + const scalar_t val = raw_vals[row_nnz]; + struct KokkosKernels::Impl::Edge tmp = {(lno_t)row_idx, col_idx, val}, tmp2 = { + col_idx, (lno_t)row_idx, symmetryFlip(val, matrix_type) + }; // symmetric edge + edges[nnz++] = tmp; + if (row_idx != (size_type)col_idx) { // non-diagonal + edges[nnz++] = tmp2; + } + } + } + std::sort(edges.begin(), edges.begin() + nnz); + + KokkosKernels::Impl::md_malloc(xadj, nrow + 1); + KokkosKernels::Impl::md_malloc(adj, nnz); + KokkosKernels::Impl::md_malloc(ew, nnz); + + size_type curr_nnz = 0; + for (size_type i = 0; i < nrow; ++i) { + (*xadj)[i] = curr_nnz; + while (curr_nnz < nnz && static_cast(edges[curr_nnz].src) == i) { + (*adj)[curr_nnz] = edges[curr_nnz].dst; + (*ew)[curr_nnz] = edges[curr_nnz].ew; + ++curr_nnz; + } + } + (*xadj)[nrow] = nnz; + } else { + KokkosKernels::Impl::md_malloc(xadj, nrow + 1); + KokkosKernels::Impl::md_malloc(adj, nnz_raw); + KokkosKernels::Impl::md_malloc(ew, nnz_raw); + + std::memcpy(*xadj, raw_rows.data(), raw_rows.size() * sizeof(size_type)); + std::memcpy(*adj, raw_cols.data(), raw_cols.size() * sizeof(lno_t)); + std::memcpy(*ew, raw_vals.data(), raw_vals.size() * sizeof(scalar_t)); + + nnz = nnz_raw; + } + + // Set outputs + nrows = nrow; + ncols = ncol; + ne = nnz; + + return 0; +} + // Version of read_mtx which does not capture the number of columns. // This is the old interface; it's kept for backwards compatibility. template -int read_mtx(const char *fileName, lno_t *nv, size_type *ne, size_type **xadj, - lno_t **adj, scalar_t **ew, bool symmetrize = false, - bool remove_diagonal = true, bool transpose = false) { +int read_mtx(const char *fileName, lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, scalar_t **ew, + bool symmetrize = false, bool remove_diagonal = true, bool transpose = false) { lno_t ncol; // will discard - return read_mtx(fileName, nv, &ncol, ne, xadj, - adj, ew, symmetrize, - remove_diagonal, transpose); + return read_mtx(fileName, nv, &ncol, ne, xadj, adj, ew, symmetrize, remove_diagonal, + transpose); } template -void read_matrix(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, - scalar_t **ew, const char *filename) { +void read_matrix(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, scalar_t **ew, const char *filename) { std::string strfilename(filename); - if (KokkosKernels::Impl::endswith(strfilename, ".mtx") || - KokkosKernels::Impl::endswith(strfilename, ".mm")) { + if (KokkosKernels::Impl::endswith(strfilename, ".mtx") || KokkosKernels::Impl::endswith(strfilename, ".mm")) { read_mtx(filename, nv, ne, xadj, adj, ew, false, false, false); } + else if (KokkosKernels::Impl::endswith(strfilename, ".rsa") || KokkosKernels::Impl::endswith(strfilename, ".hb")) { + lno_t ncol; // will discard + read_hb(filename, *nv, ncol, *ne, xadj, adj, ew); + } + else if (KokkosKernels::Impl::endswith(strfilename, ".bin")) { read_graph_bin(nv, ne, xadj, adj, ew, filename); } @@ -1100,9 +1237,9 @@ void read_matrix(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, template crsMat_t read_kokkos_crst_matrix(const char *filename_) { std::string strfilename(filename_); - bool isMatrixMarket = KokkosKernels::Impl::endswith(strfilename, ".mtx") || - KokkosKernels::Impl::endswith(strfilename, ".mm"); - + bool isMatrixMarket = + KokkosKernels::Impl::endswith(strfilename, ".mtx") || KokkosKernels::Impl::endswith(strfilename, ".mm"); + bool isHB = KokkosKernels::Impl::endswith(strfilename, ".rsa") || KokkosKernels::Impl::endswith(strfilename, ".hb"); typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type::non_const_type row_map_view_t; typedef typename graph_t::entries_type::non_const_type cols_view_t; @@ -1117,14 +1254,14 @@ crsMat_t read_kokkos_crst_matrix(const char *filename_) { scalar_t *values; if (isMatrixMarket) { - // MatrixMarket file contains the exact number of columns - read_mtx(filename_, &nr, &nc, &nnzA, &xadj, - &adj, &values, false, false, false); + // MatrixMarket and HBE files contain the exact number of columns + read_mtx(filename_, &nr, &nc, &nnzA, &xadj, &adj, &values, false, false, false); + } else if (isHB) { + read_hb(filename_, nr, nc, nnzA, &xadj, &adj, &values); } else { //.crs and .bin files don't contain #cols, so will compute it later based on // the entries - read_matrix(&nr, &nnzA, &xadj, &adj, &values, - filename_); + read_matrix(&nr, &nnzA, &xadj, &adj, &values, filename_); } row_map_view_t rowmap_view("rowmap_view", nr + 1); @@ -1132,24 +1269,16 @@ crsMat_t read_kokkos_crst_matrix(const char *filename_) { values_view_t values_view("values_view", nnzA); { - Kokkos::View> - hr(xadj, nr + 1); - Kokkos::View> - hc(adj, nnzA); - Kokkos::View> - hv(values, nnzA); + Kokkos::View> hr(xadj, nr + 1); + Kokkos::View> hc(adj, nnzA); + Kokkos::View> hv(values, nnzA); Kokkos::deep_copy(rowmap_view, hr); Kokkos::deep_copy(columns_view, hc); Kokkos::deep_copy(values_view, hv); } - if (!isMatrixMarket) { - KokkosKernels::Impl::kk_view_reduce_max( - nnzA, columns_view, nc); + if (!(isMatrixMarket || isHB)) { + KokkosKernels::Impl::kk_view_reduce_max(nnzA, columns_view, nc); nc++; } @@ -1173,8 +1302,7 @@ crsGraph_t read_kokkos_crst_graph(const char *filename_) { lno_t nv, *adj; size_type *xadj, nnzA; scalar_t *values; - read_matrix(&nv, &nnzA, &xadj, &adj, &values, - filename_); + read_matrix(&nv, &nnzA, &xadj, &adj, &values, filename_); row_map_view_t rowmap_view("rowmap_view", nv + 1); cols_view_t columns_view("colsmap_view", nnzA); @@ -1193,9 +1321,8 @@ crsGraph_t read_kokkos_crst_graph(const char *filename_) { } template -inline void kk_sequential_create_incidence_matrix( - nnz_lno_t num_rows, const size_type *xadj, const nnz_lno_t *adj, - size_type *i_adj // output. preallocated +inline void kk_sequential_create_incidence_matrix(nnz_lno_t num_rows, const size_type *xadj, const nnz_lno_t *adj, + size_type *i_adj // output. preallocated ) { std::vector c_xadj(num_rows); for (nnz_lno_t i = 0; i < num_rows; i++) { @@ -1219,18 +1346,16 @@ inline void kk_sequential_create_incidence_matrix( for (nnz_lno_t i = 0; i < num_rows; i++) { if (c_xadj[i] != xadj[i + 1]) { - std::cout << "i:" << i << " c_xadj[i]:" << c_xadj[i] - << " xadj[i+1]:" << xadj[i + 1] << std::endl; + std::cout << "i:" << i << " c_xadj[i]:" << c_xadj[i] << " xadj[i+1]:" << xadj[i + 1] << std::endl; } } } template -inline void kk_sequential_create_incidence_matrix_transpose( - const nnz_lno_t num_rows, const size_type num_edges, const size_type *xadj, - const nnz_lno_t *adj, - size_type *i_xadj, // output. preallocated - nnz_lno_t *i_adj // output. preallocated +inline void kk_sequential_create_incidence_matrix_transpose(const nnz_lno_t num_rows, const size_type num_edges, + const size_type *xadj, const nnz_lno_t *adj, + size_type *i_xadj, // output. preallocated + nnz_lno_t *i_adj // output. preallocated ) { for (nnz_lno_t i = 0; i < num_edges / 2 + 1; i++) { i_xadj[i] = i * 2; diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_LUPrec.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_LUPrec.hpp index d687c8dd4fb8..a4b62a28ba11 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_LUPrec.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_LUPrec.hpp @@ -44,6 +44,7 @@ template class LUPrec : public KokkosSparse::Experimental::Preconditioner { public: using ScalarType = typename std::remove_const::type; + using size_type = typename CRS::size_type; using EXSP = typename CRS::execution_space; using MEMSP = typename CRS::memory_space; using DEVICE = typename Kokkos::Device; @@ -60,20 +61,12 @@ class LUPrec : public KokkosSparse::Experimental::Preconditioner { public: //! Constructor: template - LUPrec(const CRSArg &L, const CRSArg &U) - : _L(L), - _U(U), - _tmp("LUPrec::_tmp", L.numPointRows()), - _tmp2("LUPrec::_tmp", L.numPointRows()), - _khL(), - _khU() { - KK_REQUIRE_MSG(L.numPointRows() == U.numPointRows(), - "LUPrec: L.numRows() != U.numRows()"); - - _khL.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, L.numRows(), - true); - _khU.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, U.numRows(), - false); + LUPrec(const CRSArg &L, const CRSArg &U, const size_type block_size = 0) + : _L(L), _U(U), _tmp("LUPrec::_tmp", L.numPointRows()), _tmp2("LUPrec::_tmp", L.numPointRows()), _khL(), _khU() { + KK_REQUIRE_MSG(L.numPointRows() == U.numPointRows(), "LUPrec: L.numRows() != U.numRows()"); + + _khL.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, L.numRows(), true, block_size); + _khU.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, U.numRows(), false, block_size); } //! Destructor. @@ -82,66 +75,6 @@ class LUPrec : public KokkosSparse::Experimental::Preconditioner { _khU.destroy_sptrsv_handle(); } - template < - typename Matrix, - typename std::enable_if::value>::type * = nullptr> - void apply_impl(const Kokkos::View &X, - const Kokkos::View &Y, - const char transM[] = "N", ScalarType alpha = karith::one(), - ScalarType beta = karith::zero()) const { - // tmp = trsv(L, x); //Apply L^inv to x - // y = trsv(U, tmp); //Apply U^inv to tmp - - KK_REQUIRE_MSG(transM[0] == NoTranspose[0], - "LUPrec::apply only supports 'N' for transM"); - - sptrsv_symbolic(&_khL, _L.graph.row_map, _L.graph.entries); - sptrsv_solve(&_khL, _L.graph.row_map, _L.graph.entries, _L.values, X, _tmp); - - sptrsv_symbolic(&_khU, _U.graph.row_map, _U.graph.entries); - sptrsv_solve(&_khU, _U.graph.row_map, _U.graph.entries, _U.values, _tmp, - _tmp2); - - KokkosBlas::axpby(alpha, _tmp2, beta, Y); - } - - template < - typename Matrix, - typename std::enable_if::value>::type * = nullptr> - void apply_impl(const Kokkos::View &X, - const Kokkos::View &Y, - const char transM[] = "N", ScalarType alpha = karith::one(), - ScalarType beta = karith::zero()) const { - // tmp = trsv(L, x); //Apply L^inv to x - // y = trsv(U, tmp); //Apply U^inv to tmp - - KK_REQUIRE_MSG(transM[0] == NoTranspose[0], - "LUPrec::apply only supports 'N' for transM"); - -#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) - using Layout = Kokkos::LayoutLeft; -#else - using Layout = Kokkos::LayoutRight; -#endif - - // trsv is implemented for MV so we need to convert our views - using UView2d = typename Kokkos::View< - ScalarType **, Layout, DEVICE, - Kokkos::MemoryTraits >; - using UView2dc = typename Kokkos::View< - const ScalarType **, Layout, DEVICE, - Kokkos::MemoryTraits >; - UView2dc X2d(X.data(), X.extent(0), 1); - UView2d Y2d(Y.data(), Y.extent(0), 1), - tmp2d(_tmp.data(), _tmp.extent(0), 1), - tmp22d(_tmp2.data(), _tmp2.extent(0), 1); - - KokkosSparse::trsv("L", "N", "N", _L, X2d, tmp2d); - KokkosSparse::trsv("U", "N", "N", _U, tmp2d, tmp22d); - - KokkosBlas::axpby(alpha, _tmp2, beta, Y); - } - ///// \brief Apply the preconditioner to X, putting the result in Y. ///// ///// \tparam XViewType Input vector, as a 1-D Kokkos::View @@ -153,12 +86,18 @@ class LUPrec : public KokkosSparse::Experimental::Preconditioner { ///// ///// It takes L and U and the stores U^inv L^inv X in Y // - virtual void apply(const Kokkos::View &X, - const Kokkos::View &Y, - const char transM[] = "N", - ScalarType alpha = karith::one(), - ScalarType beta = karith::zero()) const { - apply_impl(X, Y, transM, alpha, beta); + virtual void apply(const Kokkos::View &X, const Kokkos::View &Y, + const char transM[] = "N", ScalarType alpha = karith::one(), + ScalarType beta = karith::zero()) const { + KK_REQUIRE_MSG(transM[0] == NoTranspose[0], "LUPrec::apply only supports 'N' for transM"); + + sptrsv_symbolic(&_khL, _L.graph.row_map, _L.graph.entries); + sptrsv_solve(&_khL, _L.graph.row_map, _L.graph.entries, _L.values, X, _tmp); + + sptrsv_symbolic(&_khU, _U.graph.row_map, _U.graph.entries); + sptrsv_solve(&_khU, _U.graph.row_map, _U.graph.entries, _U.values, _tmp, _tmp2); + + KokkosBlas::axpby(alpha, _tmp2, beta, Y); } //@} diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_MatrixPrec.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_MatrixPrec.hpp index 1e2e408063cd..6172dfad6f38 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_MatrixPrec.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_MatrixPrec.hpp @@ -76,11 +76,9 @@ class MatrixPrec : public KokkosSparse::Experimental::Preconditioner { ///\cdot X\f$. ///// The typical case is \f$\beta = 0\f$ and \f$\alpha = 1\f$. // - virtual void apply( - const Kokkos::View> &X, - const Kokkos::View> &Y, - const char transM[] = "N", ScalarType alpha = karith::one(), - ScalarType beta = karith::zero()) const { + virtual void apply(const Kokkos::View> &X, + const Kokkos::View> &Y, const char transM[] = "N", + ScalarType alpha = karith::one(), ScalarType beta = karith::zero()) const { KokkosSparse::spmv(transM, alpha, _A, X, beta, Y); } //@} diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_OrdinalTraits.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_OrdinalTraits.hpp index 8a487de03024..ef08eb89e2f2 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_OrdinalTraits.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_OrdinalTraits.hpp @@ -60,9 +60,7 @@ struct OrdinalTraits { template <> struct OrdinalTraits { - static constexpr KOKKOS_INLINE_FUNCTION unsigned short int invalid() { - return USHRT_MAX; - } + static constexpr KOKKOS_INLINE_FUNCTION unsigned short int invalid() { return USHRT_MAX; } }; template <> @@ -72,9 +70,7 @@ struct OrdinalTraits { template <> struct OrdinalTraits { - static constexpr KOKKOS_INLINE_FUNCTION unsigned int invalid() { - return UINT_MAX; - } + static constexpr KOKKOS_INLINE_FUNCTION unsigned int invalid() { return UINT_MAX; } }; template <> @@ -84,9 +80,7 @@ struct OrdinalTraits { template <> struct OrdinalTraits { - static constexpr KOKKOS_INLINE_FUNCTION unsigned long invalid() { - return ULONG_MAX; - } + static constexpr KOKKOS_INLINE_FUNCTION unsigned long invalid() { return ULONG_MAX; } }; template <> @@ -96,9 +90,7 @@ struct OrdinalTraits { template <> struct OrdinalTraits { - static constexpr KOKKOS_INLINE_FUNCTION unsigned long long invalid() { - return ULLONG_MAX; - } + static constexpr KOKKOS_INLINE_FUNCTION unsigned long long invalid() { return ULLONG_MAX; } }; } // namespace KokkosSparse diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_Preconditioner.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_Preconditioner.hpp index 99ce1a2f1a8d..7520afe671cc 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_Preconditioner.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_Preconditioner.hpp @@ -78,11 +78,9 @@ class Preconditioner { ///\cdot X\f$. ///// The typical case is \f$\beta = 0\f$ and \f$\alpha = 1\f$. // - virtual void apply( - const Kokkos::View> &X, - const Kokkos::View> &Y, - const char transM[] = "N", ScalarType alpha = karith::one(), - ScalarType beta = karith::zero()) const = 0; + virtual void apply(const Kokkos::View> &X, + const Kokkos::View> &Y, const char transM[] = "N", + ScalarType alpha = karith::one(), ScalarType beta = karith::zero()) const = 0; //@} //! Set this preconditioner's parameters. diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp index c26ace9c697c..455068b56f43 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp @@ -27,20 +27,16 @@ namespace KokkosSparse { // Sort a BRS matrix: within each row, sort entries ascending by column and // permute the values accordingly. -template -void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, - const entries_t& entries, const values_t& values); +void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, const entries_t& entries, const values_t& values); // Sort a BRS matrix on the given execution space instance: within each row, // sort entries ascending by column and permute the values accordingly. -template -void sort_bsr_matrix(const execution_space& exec, const lno_t blockdim, - const rowmap_t& rowmap, const entries_t& entries, - const values_t& values); +void sort_bsr_matrix(const execution_space& exec, const lno_t blockdim, const rowmap_t& rowmap, + const entries_t& entries, const values_t& values); // Sort a BRS matrix: within each row, sort entries ascending by column and // permute the values accordingly. @@ -50,8 +46,7 @@ void sort_bsr_matrix(const bsrMat_t& A); // Sort a BRS matrix on the given execution space instance: within each row, // sort entries ascending by column and permute the values accordingly. template -void sort_bsr_matrix(const typename bsrMat_t::execution_space& exec, - const bsrMat_t& A); +void sort_bsr_matrix(const typename bsrMat_t::execution_space& exec, const bsrMat_t& A); // ---------------------------------- // CRS matrix/graph sorting utilities @@ -70,8 +65,7 @@ void sort_bsr_matrix(const typename bsrMat_t::execution_space& exec, namespace Impl { -template +template struct SortCrsMatrixFunctor { using size_type = typename rowmap_t::non_const_value_type; using lno_t = typename entries_t::non_const_value_type; @@ -79,21 +73,14 @@ struct SortCrsMatrixFunctor { using team_mem = typename Kokkos::TeamPolicy::member_type; // The functor owns memory for entriesAux, so it can't have // MemoryTraits - using entries_managed_t = Kokkos::View; - using values_managed_t = Kokkos::View; + using entries_managed_t = Kokkos::View; + using values_managed_t = Kokkos::View; - SortCrsMatrixFunctor(bool usingRangePol, const rowmap_t& rowmap_, - const entries_t& entries_, const values_t& values_) + SortCrsMatrixFunctor(bool usingRangePol, const rowmap_t& rowmap_, const entries_t& entries_, const values_t& values_) : rowmap(rowmap_), entries(entries_), values(values_) { if (usingRangePol) { - entriesAux = entries_managed_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), - entries.extent(0)); - valuesAux = values_managed_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values aux"), - values.extent(0)); + entriesAux = entries_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), entries.extent(0)); + valuesAux = values_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values aux"), values.extent(0)); } // otherwise, aux arrays won't be allocated (sorting in place) } @@ -105,9 +92,8 @@ struct SortCrsMatrixFunctor { // Radix sort requires unsigned keys for comparison using unsigned_lno_t = typename std::make_unsigned::type; KokkosKernels::SerialRadixSort2( - (unsigned_lno_t*)entries.data() + rowStart, - (unsigned_lno_t*)entriesAux.data() + rowStart, values.data() + rowStart, - valuesAux.data() + rowStart, rowNum); + (unsigned_lno_t*)entries.data() + rowStart, (unsigned_lno_t*)entriesAux.data() + rowStart, + values.data() + rowStart, valuesAux.data() + rowStart, rowNum); } KOKKOS_INLINE_FUNCTION void operator()(const team_mem t) const { @@ -115,8 +101,8 @@ struct SortCrsMatrixFunctor { size_type rowStart = rowmap(i); size_type rowEnd = rowmap(i + 1); lno_t rowNum = rowEnd - rowStart; - KokkosKernels::TeamBitonicSort2( - entries.data() + rowStart, values.data() + rowStart, rowNum, t); + KokkosKernels::TeamBitonicSort2(entries.data() + rowStart, + values.data() + rowStart, rowNum, t); } rowmap_t rowmap; @@ -133,16 +119,12 @@ struct SortCrsGraphFunctor { using team_mem = typename Kokkos::TeamPolicy::member_type; // The functor owns memory for entriesAux, so it can't have // MemoryTraits - using entries_managed_t = Kokkos::View; + using entries_managed_t = Kokkos::View; - SortCrsGraphFunctor(bool usingRangePol, const rowmap_t& rowmap_, - const entries_t& entries_) + SortCrsGraphFunctor(bool usingRangePol, const rowmap_t& rowmap_, const entries_t& entries_) : rowmap(rowmap_), entries(entries_) { if (usingRangePol) { - entriesAux = entries_managed_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), - entries.extent(0)); + entriesAux = entries_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), entries.extent(0)); } // otherwise, aux arrays won't be allocated (sorting in place) } @@ -153,9 +135,8 @@ struct SortCrsGraphFunctor { lno_t rowNum = rowEnd - rowStart; // Radix sort requires unsigned keys for comparison using unsigned_lno_t = typename std::make_unsigned::type; - KokkosKernels::SerialRadixSort( - (unsigned_lno_t*)entries.data() + rowStart, - (unsigned_lno_t*)entriesAux.data() + rowStart, rowNum); + KokkosKernels::SerialRadixSort((unsigned_lno_t*)entries.data() + rowStart, + (unsigned_lno_t*)entriesAux.data() + rowStart, rowNum); } KOKKOS_INLINE_FUNCTION void operator()(const team_mem t) const { @@ -163,8 +144,7 @@ struct SortCrsGraphFunctor { size_type rowStart = rowmap(i); size_type rowEnd = rowmap(i + 1); lno_t rowNum = rowEnd - rowStart; - KokkosKernels::TeamBitonicSort( - entries.data() + rowStart, rowNum, t); + KokkosKernels::TeamBitonicSort(entries.data() + rowStart, rowNum, t); } rowmap_t rowmap; @@ -179,8 +159,7 @@ struct MergedRowmapFunctor { using c_rowmap_t = typename rowmap_t::const_type; // Precondition: entries are sorted within each row - MergedRowmapFunctor(const rowmap_t& mergedCounts_, const c_rowmap_t& rowmap_, - const entries_t& entries_) + MergedRowmapFunctor(const rowmap_t& mergedCounts_, const c_rowmap_t& rowmap_, const entries_t& entries_) : mergedCounts(mergedCounts_), rowmap(rowmap_), entries(entries_) {} KOKKOS_INLINE_FUNCTION void operator()(lno_t row, size_type& lnewNNZ) const { @@ -213,10 +192,8 @@ struct MatrixMergedEntriesFunctor { using scalar_t = typename values_t::non_const_value_type; // Precondition: entries are sorted within each row - MatrixMergedEntriesFunctor(const typename rowmap_t::const_type& rowmap_, - const entries_t& entries_, const values_t& values_, - const rowmap_t& mergedRowmap_, - const entries_t& mergedEntries_, + MatrixMergedEntriesFunctor(const typename rowmap_t::const_type& rowmap_, const entries_t& entries_, + const values_t& values_, const rowmap_t& mergedRowmap_, const entries_t& mergedEntries_, const values_t& mergedValues_) : rowmap(rowmap_), entries(entries_), @@ -268,14 +245,9 @@ struct GraphMergedEntriesFunctor { using lno_t = typename entries_t::non_const_value_type; // Precondition: entries are sorted within each row - GraphMergedEntriesFunctor(const typename rowmap_t::const_type& rowmap_, - const entries_t& entries_, - const rowmap_t& mergedRowmap_, - const entries_t& mergedEntries_) - : rowmap(rowmap_), - entries(entries_), - mergedRowmap(mergedRowmap_), - mergedEntries(mergedEntries_) {} + GraphMergedEntriesFunctor(const typename rowmap_t::const_type& rowmap_, const entries_t& entries_, + const rowmap_t& mergedRowmap_, const entries_t& mergedEntries_) + : rowmap(rowmap_), entries(entries_), mergedRowmap(mergedRowmap_), mergedEntries(mergedEntries_) {} KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { size_type rowBegin = rowmap(row); @@ -321,12 +293,8 @@ struct sort_bsr_functor { values_type values; const lno_t blocksize; - sort_bsr_functor(row_map_type rowmap_, entries_type entries_, - values_type values_, const lno_t blocksize_) - : rowmap(rowmap_), - entries(entries_), - values(values_), - blocksize(blocksize_) {} + sort_bsr_functor(row_map_type rowmap_, entries_type entries_, values_type values_, const lno_t blocksize_) + : rowmap(rowmap_), entries(entries_), values(values_), blocksize(blocksize_) {} KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { @@ -343,8 +311,7 @@ struct sort_bsr_functor { Impl::kk_swap(e[jp], e[j]); auto const vb = v + j * blocksize; auto const vbp = v + jp * blocksize; - for (lno_t k = 0; k < blocksize; - ++k) // std::swap_ranges(vb, vb + blocksize, vbp); + for (lno_t k = 0; k < blocksize; ++k) // std::swap_ranges(vb, vb + blocksize, vbp); Impl::kk_swap(vb[k], vbp[k]); done = false; } @@ -356,40 +323,29 @@ struct sort_bsr_functor { // Sort a CRS matrix: within each row, sort entries ascending by column. // At the same time, permute the values. -template -void sort_crs_matrix(const execution_space& exec, const rowmap_t& rowmap, - const entries_t& entries, const values_t& values) { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "sort_crs_matrix: rowmap_t is not accessible from the given execution " - "space"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "sort_crs_matrix: entries_t is not accessible from the given execution " - "space"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "sort_crs_matrix: values_t is not accessible from the given execution " - "space"); +template +void sort_crs_matrix(const execution_space& exec, const rowmap_t& rowmap, const entries_t& entries, + const values_t& values) { + static_assert(Kokkos::SpaceAccessibility::accessible, + "sort_crs_matrix: rowmap_t is not accessible from the given execution " + "space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "sort_crs_matrix: entries_t is not accessible from the given execution " + "space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "sort_crs_matrix: values_t is not accessible from the given execution " + "space"); static_assert(!std::is_const_v, "sort_crs_matrix: entries_t must not be const-valued"); - static_assert(!std::is_const_v, - "sort_crs_matrix: value_t must not be const-valued"); + static_assert(!std::is_const_v, "sort_crs_matrix: value_t must not be const-valued"); using lno_t = typename entries_t::non_const_value_type; using team_pol = Kokkos::TeamPolicy; - bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); + lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; if (numRows == 0) return; - Impl::SortCrsMatrixFunctor - funct(useRadix, rowmap, entries, values); + Impl::SortCrsMatrixFunctor funct(useRadix, rowmap, entries, values); if (useRadix) { - Kokkos::parallel_for("sort_crs_matrix", - Kokkos::RangePolicy(exec, 0, numRows), - funct); + Kokkos::parallel_for("sort_crs_matrix", Kokkos::RangePolicy(exec, 0, numRows), funct); } else { // Try to get teamsize to be largest power of 2 not greater than avg entries // per row @@ -403,44 +359,35 @@ void sort_crs_matrix(const execution_space& exec, const rowmap_t& rowmap, team_pol temp(exec, numRows, 1); lno_t maxTeamSize = temp.team_size_max(funct, Kokkos::ParallelForTag()); lno_t teamSize = std::min(idealTeamSize, maxTeamSize); - Kokkos::parallel_for("sort_crs_matrix", team_pol(exec, numRows, teamSize), - funct); + Kokkos::parallel_for("sort_crs_matrix", team_pol(exec, numRows, teamSize), funct); } } -template -void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, - const values_t& values) { +template +void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { sort_crs_matrix(execution_space(), rowmap, entries, values); } template -void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, - const values_t& values) { - sort_crs_matrix(typename entries_t::execution_space(), rowmap, entries, - values); +void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { + sort_crs_matrix(typename entries_t::execution_space(), rowmap, entries, values); } template -void sort_crs_matrix(const typename crsMat_t::execution_space& exec, - const crsMat_t& A) { +void sort_crs_matrix(const typename crsMat_t::execution_space& exec, const crsMat_t& A) { sort_crs_matrix(exec, A.graph.row_map, A.graph.entries, A.values); } template void sort_crs_matrix(const crsMat_t& A) { - sort_crs_matrix(typename crsMat_t::execution_space(), A.graph.row_map, - A.graph.entries, A.values); + sort_crs_matrix(typename crsMat_t::execution_space(), A.graph.row_map, A.graph.entries, A.values); } // Sort a BRS matrix: within each row, sort entries ascending by column and // permute the values accordingly. -template -void sort_bsr_matrix(const execution_space& exec, const lno_t blockdim, - const rowmap_t& rowmap, const entries_t& entries, - const values_t& values) { +template +void sort_bsr_matrix(const execution_space& exec, const lno_t blockdim, const rowmap_t& rowmap, + const entries_t& entries, const values_t& values) { // TODO: this is O(N^2) mock for debugging - do regular implementation based // on Radix/Bitonic sort (like CSR) IDEA: maybe we need only one general // Radix2/Bitonic2 and CSR sorting may call it with blockSize=1 ? @@ -449,31 +396,23 @@ void sort_bsr_matrix(const execution_space& exec, const lno_t blockdim, const lno_t blocksize = blockdim * blockdim; assert(values.extent(0) == entries.extent(0) * blocksize); - Impl::sort_bsr_functor bsr_sorter( - rowmap, entries, values, blocksize); - Kokkos::parallel_for("sort_bsr_matrix", - Kokkos::RangePolicy(exec, 0, numRows), - bsr_sorter); + Impl::sort_bsr_functor bsr_sorter(rowmap, entries, values, blocksize); + Kokkos::parallel_for("sort_bsr_matrix", Kokkos::RangePolicy(exec, 0, numRows), bsr_sorter); } -template -void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, - const entries_t& entries, const values_t& values) { +template +void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { sort_bsr_matrix(execution_space(), blockdim, rowmap, entries, values); } // Sort a BSR matrix (like CRS but single values are replaced with contignous // blocks) template -void sort_bsr_matrix(const typename bsrMat_t::execution_space& exec, - const bsrMat_t& A) { +void sort_bsr_matrix(const typename bsrMat_t::execution_space& exec, const bsrMat_t& A) { // NOTE: unlike rowmap, entries and values are non-const, so we can sort them // directly - sort_bsr_matrix( + sort_bsr_matrix( exec, A.blockDim(), A.graph.row_map, A.graph.entries, A.values); } @@ -484,31 +423,22 @@ void sort_bsr_matrix(const bsrMat_t& A) { // Sort a CRS graph: within each row, sort entries ascending by column. template -void sort_crs_graph(const execution_space& exec, const rowmap_t& rowmap, - const entries_t& entries) { +void sort_crs_graph(const execution_space& exec, const rowmap_t& rowmap, const entries_t& entries) { using lno_t = typename entries_t::non_const_value_type; using team_pol = Kokkos::TeamPolicy; - static_assert( - Kokkos::SpaceAccessibility::accessible, - "sort_crs_graph: rowmap_t is not accessible from the given execution " - "space"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "sort_crs_graph: entries_t is not accessible from the given execution " - "space"); - static_assert(!std::is_const_v, - "sort_crs_graph: entries_t must not be const-valued"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "sort_crs_graph: rowmap_t is not accessible from the given execution " + "space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "sort_crs_graph: entries_t is not accessible from the given execution " + "space"); + static_assert(!std::is_const_v, "sort_crs_graph: entries_t must not be const-valued"); bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; if (numRows == 0) return; - Impl::SortCrsGraphFunctor funct( - useRadix, rowmap, entries); + Impl::SortCrsGraphFunctor funct(useRadix, rowmap, entries); if (useRadix) { - Kokkos::parallel_for("sort_crs_graph", - Kokkos::RangePolicy(exec, 0, numRows), - funct); + Kokkos::parallel_for("sort_crs_graph", Kokkos::RangePolicy(exec, 0, numRows), funct); } else { // Try to get teamsize to be largest power of 2 less than or equal to // half the entries per row. 0.5 * #entries is bitonic's parallelism within @@ -523,8 +453,7 @@ void sort_crs_graph(const execution_space& exec, const rowmap_t& rowmap, team_pol temp(exec, numRows, 1); lno_t maxTeamSize = temp.team_size_max(funct, Kokkos::ParallelForTag()); lno_t teamSize = std::min(idealTeamSize, maxTeamSize); - Kokkos::parallel_for("sort_crs_graph", team_pol(exec, numRows, teamSize), - funct); + Kokkos::parallel_for("sort_crs_graph", team_pol(exec, numRows, teamSize), funct); } } @@ -556,44 +485,33 @@ void sort_crs_graph(const crsGraph_t& G) { sort_crs_graph(typename crsGraph_t::execution_space(), G); } -template -void sort_and_merge_matrix(const exec_space& exec, - const typename rowmap_t::const_type& rowmap_in, - const entries_t& entries_in, - const values_t& values_in, rowmap_t& rowmap_out, +template +void sort_and_merge_matrix(const exec_space& exec, const typename rowmap_t::const_type& rowmap_in, + const entries_t& entries_in, const values_t& values_in, rowmap_t& rowmap_out, entries_t& entries_out, values_t& values_out) { using nc_rowmap_t = typename rowmap_t::non_const_type; using size_type = typename nc_rowmap_t::value_type; using ordinal_t = typename entries_t::value_type; using range_t = Kokkos::RangePolicy; - static_assert( - Kokkos::SpaceAccessibility::accessible, - "sort_and_merge_matrix: rowmap_t is not accessible from the given " - "execution space"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "sort_and_merge_matrix: entries_t is not accessible from the given " - "execution space"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "sort_and_merge_matrix: values_t is not accessible from the given " - "execution space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "sort_and_merge_matrix: rowmap_t is not accessible from the given " + "execution space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "sort_and_merge_matrix: entries_t is not accessible from the given " + "execution space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "sort_and_merge_matrix: values_t is not accessible from the given " + "execution space"); static_assert(!std::is_const_v, "sort_and_merge_matrix: entries_t must not be const-valued"); static_assert(!std::is_const_v, "sort_and_merge_matrix: value_t must not be const-valued"); - ordinal_t numRows = - rowmap_in.extent(0) ? ordinal_t(rowmap_in.extent(0) - 1) : ordinal_t(0); - size_type nnz = entries_in.extent(0); + ordinal_t numRows = rowmap_in.extent(0) ? ordinal_t(rowmap_in.extent(0) - 1) : ordinal_t(0); + size_type nnz = entries_in.extent(0); if (numRows == 0) { - rowmap_out = typename rowmap_t::non_const_type("SortedMerged rowmap", - rowmap_in.extent(0)); + rowmap_out = typename rowmap_t::non_const_type("SortedMerged rowmap", rowmap_in.extent(0)); entries_out = entries_t(); values_out = values_t(); return; @@ -603,14 +521,10 @@ void sort_and_merge_matrix(const exec_space& exec, // Count entries per row into a new rowmap, in terms of merges that can be // done - nc_rowmap_t nc_rowmap_out( - Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, - "SortedMerged rowmap"), - numRows + 1); + nc_rowmap_t nc_rowmap_out(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "SortedMerged rowmap"), numRows + 1); size_type numCompressedEntries = 0; Kokkos::parallel_reduce(range_t(exec, 0, numRows), - Impl::MergedRowmapFunctor( - nc_rowmap_out, rowmap_in, entries_in), + Impl::MergedRowmapFunctor(nc_rowmap_out, rowmap_in, entries_in), numCompressedEntries); if (nnz == numCompressedEntries) { // No merges to do, so just return A. Save the time of allocating and @@ -634,27 +548,21 @@ void sort_and_merge_matrix(const exec_space& exec, auto entries_orig = entries_in; auto values_orig = values_in; // Prefix sum to get rowmap - KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum( - exec, numRows + 1, nc_rowmap_out); - rowmap_out = nc_rowmap_out; - entries_out = entries_t(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, - "SortedMerged entries"), - numCompressedEntries); - values_out = values_t(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, - "SortedMerged values"), - numCompressedEntries); + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(exec, numRows + 1, nc_rowmap_out); + rowmap_out = nc_rowmap_out; + entries_out = + entries_t(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "SortedMerged entries"), numCompressedEntries); + values_out = + values_t(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "SortedMerged values"), numCompressedEntries); // Compute merged entries and values - Kokkos::parallel_for( - range_t(exec, 0, numRows), - Impl::MatrixMergedEntriesFunctor( - rowmap_orig, entries_orig, values_orig, rowmap_out, entries_out, - values_out)); + Kokkos::parallel_for(range_t(exec, 0, numRows), + Impl::MatrixMergedEntriesFunctor( + rowmap_orig, entries_orig, values_orig, rowmap_out, entries_out, values_out)); } // Sort the rows of matrix, and merge duplicate entries. template -crsMat_t sort_and_merge_matrix(const typename crsMat_t::execution_space& exec, - const crsMat_t& A) { +crsMat_t sort_and_merge_matrix(const typename crsMat_t::execution_space& exec, const crsMat_t& A) { using rowmap_t = typename crsMat_t::row_map_type; using entries_t = typename crsMat_t::index_type; using values_t = typename crsMat_t::values_type; @@ -663,11 +571,9 @@ crsMat_t sort_and_merge_matrix(const typename crsMat_t::execution_space& exec, entries_t entries_out; values_t values_out; - sort_and_merge_matrix(exec, A.graph.row_map, A.graph.entries, A.values, - rowmap_out, entries_out, values_out); + sort_and_merge_matrix(exec, A.graph.row_map, A.graph.entries, A.values, rowmap_out, entries_out, values_out); - return crsMat_t("SortedMerged", A.numRows(), A.numCols(), - values_out.extent(0), values_out, rowmap_out, entries_out); + return crsMat_t("SortedMerged", A.numRows(), A.numCols(), values_out.extent(0), values_out, rowmap_out, entries_out); } template @@ -675,52 +581,40 @@ crsMat_t sort_and_merge_matrix(const crsMat_t& A) { return sort_and_merge_matrix(typename crsMat_t::execution_space(), A); } -template -void sort_and_merge_matrix(const typename rowmap_t::const_type& rowmap_in, - const entries_t& entries_in, - const values_t& values_in, rowmap_t& rowmap_out, - entries_t& entries_out, values_t& values_out) { - sort_and_merge_matrix(exec_space(), rowmap_in, entries_in, values_in, - rowmap_out, entries_out, values_out); +template +void sort_and_merge_matrix(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, + const values_t& values_in, rowmap_t& rowmap_out, entries_t& entries_out, + values_t& values_out) { + sort_and_merge_matrix(exec_space(), rowmap_in, entries_in, values_in, rowmap_out, entries_out, values_out); } template -void sort_and_merge_matrix(const typename rowmap_t::const_type& rowmap_in, - const entries_t& entries_in, - const values_t& values_in, rowmap_t& rowmap_out, - entries_t& entries_out, values_t& values_out) { - sort_and_merge_matrix(typename entries_t::execution_space(), rowmap_in, - entries_in, values_in, rowmap_out, entries_out, - values_out); +void sort_and_merge_matrix(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, + const values_t& values_in, rowmap_t& rowmap_out, entries_t& entries_out, + values_t& values_out) { + sort_and_merge_matrix(typename entries_t::execution_space(), rowmap_in, entries_in, values_in, rowmap_out, + entries_out, values_out); } template -void sort_and_merge_graph(const exec_space& exec, - const typename rowmap_t::const_type& rowmap_in, - const entries_t& entries_in, rowmap_t& rowmap_out, - entries_t& entries_out) { +void sort_and_merge_graph(const exec_space& exec, const typename rowmap_t::const_type& rowmap_in, + const entries_t& entries_in, rowmap_t& rowmap_out, entries_t& entries_out) { using size_type = typename rowmap_t::non_const_value_type; using lno_t = typename entries_t::value_type; using range_t = Kokkos::RangePolicy; using nc_rowmap_t = typename rowmap_t::non_const_type; - static_assert( - Kokkos::SpaceAccessibility::accessible, - "sort_and_merge_graph: rowmap_t is not accessible from the given " - "execution space"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "sort_and_merge_graph: entries_t is not accessible from the given " - "execution space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "sort_and_merge_graph: rowmap_t is not accessible from the given " + "execution space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "sort_and_merge_graph: entries_t is not accessible from the given " + "execution space"); static_assert(!std::is_const_v, "sort_and_merge_graph: entries_t must not be const-valued"); lno_t numRows = rowmap_in.extent(0) ? rowmap_in.extent(0) - 1 : 0; if (numRows == 0) { - rowmap_out = typename rowmap_t::non_const_type("SortedMerged rowmap", - rowmap_in.extent(0)); + rowmap_out = typename rowmap_t::non_const_type("SortedMerged rowmap", rowmap_in.extent(0)); entries_out = entries_t(); return; } @@ -728,14 +622,10 @@ void sort_and_merge_graph(const exec_space& exec, sort_crs_graph(exec, rowmap_in, entries_in); // Count entries per row into a new rowmap, in terms of merges that can be // done - nc_rowmap_t nc_rowmap_out( - Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, - "SortedMerged rowmap"), - numRows + 1); + nc_rowmap_t nc_rowmap_out(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "SortedMerged rowmap"), numRows + 1); size_type numCompressedEntries = 0; Kokkos::parallel_reduce(range_t(exec, 0, numRows), - Impl::MergedRowmapFunctor( - nc_rowmap_out, rowmap_in, entries_in), + Impl::MergedRowmapFunctor(nc_rowmap_out, rowmap_in, entries_in), numCompressedEntries); if (entries_in.extent(0) == size_t(numCompressedEntries)) { // No merges to perform, so the output rowmap is unchanged and we can just @@ -760,42 +650,33 @@ void sort_and_merge_graph(const exec_space& exec, // In the case where the output rowmap is the same as the input, we could just // assign "rowmap_out = rowmap_in" except that would break const-correctness. // Can skip filling the entries, however. - KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum( - exec, numRows + 1, nc_rowmap_out); - rowmap_out = nc_rowmap_out; - entries_out = entries_t(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, - "SortedMerged entries"), - numCompressedEntries); + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(exec, numRows + 1, nc_rowmap_out); + rowmap_out = nc_rowmap_out; + entries_out = + entries_t(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "SortedMerged entries"), numCompressedEntries); // Compute merged entries and values - Kokkos::parallel_for(range_t(exec, 0, numRows), - Impl::GraphMergedEntriesFunctor( - rowmap_orig, entries_orig, rowmap_out, entries_out)); + Kokkos::parallel_for(range_t(exec, 0, numRows), Impl::GraphMergedEntriesFunctor( + rowmap_orig, entries_orig, rowmap_out, entries_out)); } template -void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, - const entries_t& entries_in, rowmap_t& rowmap_out, - entries_t& entries_out) { - return sort_and_merge_graph(exec_space(), rowmap_in, entries_in, rowmap_out, - entries_out); +void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, + rowmap_t& rowmap_out, entries_t& entries_out) { + return sort_and_merge_graph(exec_space(), rowmap_in, entries_in, rowmap_out, entries_out); } template -void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, - const entries_t& entries_in, rowmap_t& rowmap_out, - entries_t& entries_out) { - return sort_and_merge_graph(typename entries_t::execution_space(), rowmap_in, - entries_in, rowmap_out, entries_out); +void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, + rowmap_t& rowmap_out, entries_t& entries_out) { + return sort_and_merge_graph(typename entries_t::execution_space(), rowmap_in, entries_in, rowmap_out, entries_out); } template -crsGraph_t sort_and_merge_graph( - const typename crsGraph_t::execution_space& exec, const crsGraph_t& G) { +crsGraph_t sort_and_merge_graph(const typename crsGraph_t::execution_space& exec, const crsGraph_t& G) { using rowmap_t = typename crsGraph_t::row_map_type::non_const_type; using entries_t = typename crsGraph_t::entries_type; - static_assert( - !std::is_const::value, - "sort_and_merge_graph requires StaticCrsGraph entries to be non-const."); + static_assert(!std::is_const::value, + "sort_and_merge_graph requires StaticCrsGraph entries to be non-const."); rowmap_t mergedRowmap; entries_t mergedEntries; sort_and_merge_graph(exec, G.row_map, G.entries, mergedRowmap, mergedEntries); @@ -817,12 +698,9 @@ namespace KokkosKernels { // Sort a BRS matrix: within each row, sort entries ascending by column and // permute the values accordingly. -template -[[deprecated]] void sort_bsr_matrix(const lno_t blockdim, - const rowmap_t& rowmap, - const entries_t& entries, +[[deprecated]] void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { KokkosSparse::sort_bsr_matrix(blockdim, rowmap, entries, values); } @@ -839,13 +717,9 @@ template // The sort_crs* functions sort the adjacent column list for each row into // ascending order. -template -[[deprecated]] void sort_crs_matrix(const rowmap_t& rowmap, - const entries_t& entries, - const values_t& values) { - KokkosSparse::sort_crs_matrix( - rowmap, entries, values); +template +[[deprecated]] void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { + KokkosSparse::sort_crs_matrix(rowmap, entries, values); } template @@ -854,10 +728,8 @@ template } template -[[deprecated]] void sort_crs_graph(const rowmap_t& rowmap, - const entries_t& entries) { - KokkosSparse::sort_crs_graph(rowmap, - entries); +[[deprecated]] void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries) { + KokkosSparse::sort_crs_graph(rowmap, entries); } template @@ -879,11 +751,9 @@ template } template -[[deprecated]] void sort_and_merge_graph( - const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, - rowmap_t& rowmap_out, entries_t& entries_out) { - KokkosSparse::sort_and_merge_graph(rowmap_in, entries_in, rowmap_out, - entries_out); +[[deprecated]] void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, + rowmap_t& rowmap_out, entries_t& entries_out) { + KokkosSparse::sort_and_merge_graph(rowmap_in, entries_in, rowmap_out, entries_out); } } // namespace KokkosKernels diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp index 2b89c1a2f74e..781857ef551f 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp @@ -42,14 +42,12 @@ namespace Impl { /* create a block-sparse version of a CrsMatrix */ -template -void kk_create_bsr_formated_point_crsmatrix( - int block_size, size_t num_rows, size_t num_cols, in_row_view_t in_xadj, - in_nnz_view_t in_adj, in_val_view_t in_vals, size_t &out_num_rows, - size_t &out_num_cols, out_row_view_t &out_xadj, out_nnz_view_t &out_adj, - out_val_view_t &out_vals) { +void kk_create_bsr_formated_point_crsmatrix(int block_size, size_t num_rows, size_t num_cols, in_row_view_t in_xadj, + in_nnz_view_t in_adj, in_val_view_t in_vals, size_t &out_num_rows, + size_t &out_num_cols, out_row_view_t &out_xadj, out_nnz_view_t &out_adj, + out_val_view_t &out_vals) { typedef typename in_nnz_view_t::non_const_value_type lno_t; typedef typename in_row_view_t::non_const_value_type size_type; typedef typename in_val_view_t::non_const_value_type scalar_t; @@ -165,11 +163,9 @@ void kk_create_bsr_formated_point_crsmatrix( out_adj = out_nnz_view_t("BlockedPointCRS ADJ", block_adj.size()); out_vals = out_val_view_t("BlockedPointCRS VALS", block_vals.size()); - typename out_row_view_t::HostMirror hor = - Kokkos::create_mirror_view(out_xadj); + typename out_row_view_t::HostMirror hor = Kokkos::create_mirror_view(out_xadj); typename out_nnz_view_t::HostMirror hoe = Kokkos::create_mirror_view(out_adj); - typename out_val_view_t::HostMirror hov = - Kokkos::create_mirror_view(out_vals); + typename out_val_view_t::HostMirror hov = Kokkos::create_mirror_view(out_vals); for (lno_t i = 0; i < lno_t(out_num_rows) + 1; ++i) { hor(i) = block_rows_xadj[i]; @@ -202,19 +198,16 @@ struct ViewConverter { for BSR-format data from CRS data consistent with BSR format */ -template -void kk_create_bsr_from_bsr_formatted_point_crs( - int block_size, size_t num_rows, size_t num_cols, - in_row_view_t in_xadj, // row pointer (CrsMatrix::graph.row_map) - in_nnz_view_t in_adj, // col index (CrsMatrix::graph.entries) - in_val_view_t in_vals, // values CrsMatrix::values - size_t &out_num_rows, // rows of blocks in output - size_t &out_num_cols, // cols of blocks in output - out_row_view_t &out_xadj, out_nnz_view_t &out_adj, - out_val_view_t &out_vals) { +template +void kk_create_bsr_from_bsr_formatted_point_crs(int block_size, size_t num_rows, size_t num_cols, + in_row_view_t in_xadj, // row pointer (CrsMatrix::graph.row_map) + in_nnz_view_t in_adj, // col index (CrsMatrix::graph.entries) + in_val_view_t in_vals, // values CrsMatrix::values + size_t &out_num_rows, // rows of blocks in output + size_t &out_num_cols, // cols of blocks in output + out_row_view_t &out_xadj, out_nnz_view_t &out_adj, + out_val_view_t &out_vals) { typedef typename in_nnz_view_t::non_const_value_type in_ordinal_type; typedef typename in_val_view_t::non_const_value_type in_scalar_type; typedef typename in_nnz_view_t::device_type in_device_type; @@ -224,33 +217,23 @@ void kk_create_bsr_from_bsr_formatted_point_crs( // in_row_view_t and out_row_view_t may not be the same, so use ViewConverter // to do the conversion - typedef KokkosSparse::CrsMatrix - InMatrix; - typedef KokkosSparse::Experimental::BsrMatrix< - out_scalar_type, out_ordinal_type, out_device_type> - OutMatrix; + typedef KokkosSparse::CrsMatrix InMatrix; + typedef KokkosSparse::Experimental::BsrMatrix OutMatrix; // in_rowmap <- in_xadj - Kokkos::View - in_rowmap("", in_xadj.size()); - Kokkos::parallel_for( - "", in_xadj.size(), - ViewConverter(in_rowmap, in_xadj)); + Kokkos::View in_rowmap("", in_xadj.size()); + Kokkos::parallel_for("", in_xadj.size(), ViewConverter(in_rowmap, in_xadj)); // reconstruct original CrsMatrix - InMatrix in("", num_rows, num_cols, in_vals.size(), in_vals, in_rowmap, - in_adj); + InMatrix in("", num_rows, num_cols, in_vals.size(), in_vals, in_rowmap, in_adj); // convert to BsrMatrix OutMatrix out(in, block_size); // out_xadj <- out.graph.row_map Kokkos::resize(out_xadj, out.graph.row_map.size()); - Kokkos::parallel_for( - "", out_xadj.size(), - ViewConverter( - out_xadj, out.graph.row_map)); + Kokkos::parallel_for("", out_xadj.size(), + ViewConverter(out_xadj, out.graph.row_map)); out_adj = out.graph.entries; out_vals = out.values; @@ -258,10 +241,8 @@ void kk_create_bsr_from_bsr_formatted_point_crs( out_num_cols = out.numCols(); } -template +template struct TransposeMatrix { struct CountTag {}; struct FillTag {}; @@ -287,11 +268,9 @@ struct TransposeMatrix { bool transpose_values; nnz_lno_t team_work_size; - TransposeMatrix(nnz_lno_t num_rows_, nnz_lno_t num_cols_, in_row_view_t xadj_, - in_nnz_view_t adj_, in_scalar_view_t vals_, - out_row_view_t t_xadj_, out_nnz_view_t t_adj_, - out_scalar_view_t t_vals_, tempwork_row_view_t tmp_txadj_, - bool transpose_values_, nnz_lno_t team_row_work_size_) + TransposeMatrix(nnz_lno_t num_rows_, nnz_lno_t num_cols_, in_row_view_t xadj_, in_nnz_view_t adj_, + in_scalar_view_t vals_, out_row_view_t t_xadj_, out_nnz_view_t t_adj_, out_scalar_view_t t_vals_, + tempwork_row_view_t tmp_txadj_, bool transpose_values_, nnz_lno_t team_row_work_size_) : num_rows(num_rows_), num_cols(num_cols_), xadj(xadj_), @@ -305,90 +284,69 @@ struct TransposeMatrix { team_work_size(team_row_work_size_) {} KOKKOS_INLINE_FUNCTION - void operator()(const CountTag &, - const team_count_member_t &teamMember) const { + void operator()(const CountTag &, const team_count_member_t &teamMember) const { const nnz_lno_t team_row_begin = teamMember.league_rank() * team_work_size; - const nnz_lno_t team_row_end = - KOKKOSKERNELS_MACRO_MIN(team_row_begin + team_work_size, num_rows); + const nnz_lno_t team_row_end = KOKKOSKERNELS_MACRO_MIN(team_row_begin + team_work_size, num_rows); // TODO we dont need to go over rows // just go over nonzeroes. - Kokkos::parallel_for( - Kokkos::TeamThreadRange(teamMember, team_row_begin, team_row_end), - [&](const nnz_lno_t &row_index) { - const size_type col_begin = xadj[row_index]; - const size_type col_end = xadj[row_index + 1]; - const nnz_lno_t left_work = col_end - col_begin; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(teamMember, left_work), - [&](nnz_lno_t i) { - const size_type adjind = i + col_begin; - const nnz_lno_t colIndex = adj[adjind]; - typedef - typename std::remove_reference::type - atomic_incr_type; - Kokkos::atomic_fetch_add(&(t_xadj(colIndex)), - atomic_incr_type(1)); - }); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, team_row_begin, team_row_end), + [&](const nnz_lno_t &row_index) { + const size_type col_begin = xadj[row_index]; + const size_type col_end = xadj[row_index + 1]; + const nnz_lno_t left_work = col_end - col_begin; + Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember, left_work), [&](nnz_lno_t i) { + const size_type adjind = i + col_begin; + const nnz_lno_t colIndex = adj[adjind]; + typedef typename std::remove_reference::type atomic_incr_type; + Kokkos::atomic_fetch_add(&(t_xadj(colIndex)), atomic_incr_type(1)); + }); + }); } KOKKOS_INLINE_FUNCTION void operator()(const FillTag &, const team_fill_member_t &teamMember) const { const nnz_lno_t team_row_begin = teamMember.league_rank() * team_work_size; - const nnz_lno_t team_row_end = - KOKKOSKERNELS_MACRO_MIN(team_row_begin + team_work_size, num_rows); + const nnz_lno_t team_row_end = KOKKOSKERNELS_MACRO_MIN(team_row_begin + team_work_size, num_rows); Kokkos::parallel_for( - Kokkos::TeamThreadRange(teamMember, team_row_begin, team_row_end), - [&](const nnz_lno_t &row_index) { + Kokkos::TeamThreadRange(teamMember, team_row_begin, team_row_end), [&](const nnz_lno_t &row_index) { // const nnz_lno_t teamsize = teamMember.team_size(); // for (nnz_lno_t row_index = team_row_begin + teamMember.team_rank(); // row_index < team_row_end; row_index += teamsize){ const size_type col_begin = xadj[row_index]; const size_type col_end = xadj[row_index + 1]; const nnz_lno_t left_work = col_end - col_begin; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(teamMember, left_work), - [&](nnz_lno_t i) { - const size_type adjind = i + col_begin; - const nnz_lno_t colIndex = adj[adjind]; - typedef - typename std::remove_reference::type - atomic_incr_type; - const size_type pos = Kokkos::atomic_fetch_add( - &(tmp_txadj(colIndex)), atomic_incr_type(1)); - - t_adj(pos) = row_index; - if (transpose_values) { - t_vals(pos) = vals[adjind]; - } - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember, left_work), [&](nnz_lno_t i) { + const size_type adjind = i + col_begin; + const nnz_lno_t colIndex = adj[adjind]; + typedef typename std::remove_reference::type atomic_incr_type; + const size_type pos = Kokkos::atomic_fetch_add(&(tmp_txadj(colIndex)), atomic_incr_type(1)); + + t_adj(pos) = row_index; + if (transpose_values) { + t_vals(pos) = vals[adjind]; + } + }); //} }); } }; -template -void transpose_matrix( - typename in_nnz_view_t::non_const_value_type num_rows, - typename in_nnz_view_t::non_const_value_type num_cols, in_row_view_t xadj, - in_nnz_view_t adj, in_scalar_view_t vals, - out_row_view_t t_xadj, // pre-allocated -- initialized with 0 - out_nnz_view_t t_adj, // pre-allocated -- no need for initialize - out_scalar_view_t t_vals // pre-allocated -- no need for initialize +template +void transpose_matrix(typename in_nnz_view_t::non_const_value_type num_rows, + typename in_nnz_view_t::non_const_value_type num_cols, in_row_view_t xadj, in_nnz_view_t adj, + in_scalar_view_t vals, + out_row_view_t t_xadj, // pre-allocated -- initialized with 0 + out_nnz_view_t t_adj, // pre-allocated -- no need for initialize + out_scalar_view_t t_vals // pre-allocated -- no need for initialize ) { // allocate some memory for work for row pointers - tempwork_row_view_t tmp_row_view( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "tmp_row_view"), - num_cols + 1); + tempwork_row_view_t tmp_row_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, "tmp_row_view"), num_cols + 1); // create the functor for tranpose. - typedef TransposeMatrix + typedef TransposeMatrix TransposeFunctor_t; typedef typename TransposeFunctor_t::team_count_policy_t count_tp_t; @@ -397,31 +355,23 @@ void transpose_matrix( typename in_row_view_t::non_const_value_type nnz = adj.extent(0); // determine vector lanes per thread - int thread_size = kk_get_suggested_vector_size( - num_rows, nnz, - KokkosKernels::Impl::kk_get_exec_space_type()); + int thread_size = + kk_get_suggested_vector_size(num_rows, nnz, KokkosKernels::Impl::kk_get_exec_space_type()); // determine threads per team - int team_size = kk_get_suggested_team_size( - thread_size, KokkosKernels::Impl::kk_get_exec_space_type()); + int team_size = kk_get_suggested_team_size(thread_size, KokkosKernels::Impl::kk_get_exec_space_type()); - TransposeFunctor_t tm(num_rows, num_cols, xadj, adj, vals, t_xadj, t_adj, - t_vals, tmp_row_view, true, team_size); + TransposeFunctor_t tm(num_rows, num_cols, xadj, adj, vals, t_xadj, t_adj, t_vals, tmp_row_view, true, team_size); Kokkos::parallel_for("KokkosSparse::Impl::transpose_matrix::S0", - count_tp_t((num_rows + team_size - 1) / team_size, - team_size, thread_size), - tm); + count_tp_t((num_rows + team_size - 1) / team_size, team_size, thread_size), tm); - KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum( - num_cols + 1, t_xadj); + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(num_cols + 1, t_xadj); Kokkos::deep_copy(tmp_row_view, t_xadj); - Kokkos::parallel_for( - "KokkosSparse::Impl::transpose_matrix::S1", - fill_tp_t((num_rows + team_size - 1) / team_size, team_size, thread_size), - tm); + Kokkos::parallel_for("KokkosSparse::Impl::transpose_matrix::S1", + fill_tp_t((num_rows + team_size - 1) / team_size, team_size, thread_size), tm); MyExecSpace().fence(); } @@ -436,42 +386,30 @@ crsMat_t transpose_matrix(const crsMat_t &A) { using entries_t = typename crsMat_t::index_type::non_const_type; using values_t = typename crsMat_t::values_type::non_const_type; rowmap_t AT_rowmap("Transpose rowmap", A.numCols() + 1); - entries_t AT_entries( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Transpose entries"), - A.nnz()); - values_t AT_values( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Transpose values"), - A.nnz()); - transpose_matrix( - A.numRows(), A.numCols(), A.graph.row_map, A.graph.entries, A.values, - AT_rowmap, AT_entries, AT_values); + entries_t AT_entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Transpose entries"), A.nnz()); + values_t AT_values(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Transpose values"), A.nnz()); + transpose_matrix(A.numRows(), A.numCols(), A.graph.row_map, A.graph.entries, + A.values, AT_rowmap, AT_entries, AT_values); // And construct the transpose crsMat_t - return crsMat_t("Transpose", A.numCols(), A.numRows(), A.nnz(), AT_values, - AT_rowmap, AT_entries); + return crsMat_t("Transpose", A.numCols(), A.numRows(), A.nnz(), AT_values, AT_rowmap, AT_entries); } -template -void transpose_graph( - typename in_nnz_view_t::non_const_value_type num_rows, - typename in_nnz_view_t::non_const_value_type num_cols, in_row_view_t xadj, - in_nnz_view_t adj, - out_row_view_t t_xadj, // pre-allocated -- initialized with 0 - out_nnz_view_t t_adj // pre-allocated -- no need for initialize +void transpose_graph(typename in_nnz_view_t::non_const_value_type num_rows, + typename in_nnz_view_t::non_const_value_type num_cols, in_row_view_t xadj, in_nnz_view_t adj, + out_row_view_t t_xadj, // pre-allocated -- initialized with 0 + out_nnz_view_t t_adj // pre-allocated -- no need for initialize ) { // allocate some memory for work for row pointers - tempwork_row_view_t tmp_row_view( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "tmp_row_view"), - num_cols + 1); + tempwork_row_view_t tmp_row_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, "tmp_row_view"), num_cols + 1); in_nnz_view_t tmp1; out_nnz_view_t tmp2; // create the functor for tranpose. - typedef TransposeMatrix TransposeFunctor_t; @@ -481,37 +419,28 @@ void transpose_graph( typename in_row_view_t::non_const_value_type nnz = adj.extent(0); // determine vector lanes per thread - int thread_size = kk_get_suggested_vector_size( - num_rows, nnz, - KokkosKernels::Impl::kk_get_exec_space_type()); + int thread_size = + kk_get_suggested_vector_size(num_rows, nnz, KokkosKernels::Impl::kk_get_exec_space_type()); // determine threads per team - int team_size = kk_get_suggested_team_size( - thread_size, KokkosKernels::Impl::kk_get_exec_space_type()); + int team_size = kk_get_suggested_team_size(thread_size, KokkosKernels::Impl::kk_get_exec_space_type()); - TransposeFunctor_t tm(num_rows, num_cols, xadj, adj, tmp1, t_xadj, t_adj, - tmp2, tmp_row_view, false, team_size); + TransposeFunctor_t tm(num_rows, num_cols, xadj, adj, tmp1, t_xadj, t_adj, tmp2, tmp_row_view, false, team_size); Kokkos::parallel_for("KokkosKernels::Impl::transpose_graph::S0", - count_tp_t((num_rows + team_size - 1) / team_size, - team_size, thread_size), - tm); + count_tp_t((num_rows + team_size - 1) / team_size, team_size, thread_size), tm); - KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum( - num_cols + 1, t_xadj); + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(num_cols + 1, t_xadj); Kokkos::deep_copy(tmp_row_view, t_xadj); - Kokkos::parallel_for( - "KokkosKernels::Impl::transpose_graph::S1", - fill_tp_t((num_rows + team_size - 1) / team_size, team_size, thread_size), - tm); + Kokkos::parallel_for("KokkosKernels::Impl::transpose_graph::S1", + fill_tp_t((num_rows + team_size - 1) / team_size, team_size, thread_size), tm); MyExecSpace().fence(); } -template struct TransposeBsrMatrix { using ordinal_type = typename in_nnz_view_t::non_const_value_type; @@ -525,10 +454,8 @@ struct TransposeBsrMatrix { out_nnz_view_t tAentries; // allocated out_scalar_view_t tAvalues; // allocated - TransposeBsrMatrix(const int blockSize, in_row_view_t row_mapA, - in_nnz_view_t entriesA, in_scalar_view_t valuesA, - out_row_view_t row_mapAt, out_nnz_view_t entriesAt, - out_scalar_view_t valuesAt) + TransposeBsrMatrix(const int blockSize, in_row_view_t row_mapA, in_nnz_view_t entriesA, in_scalar_view_t valuesA, + out_row_view_t row_mapAt, out_nnz_view_t entriesAt, out_scalar_view_t valuesAt) : block_size(blockSize), Arow_map(row_mapA), Aentries(entriesA), @@ -540,15 +467,13 @@ struct TransposeBsrMatrix { KOKKOS_INLINE_FUNCTION void operator()(const int tArowIdx) const { // Loop over entries in row - for (size_type tAentryIdx = tArow_map(tArowIdx); - tAentryIdx < tArow_map(tArowIdx + 1); ++tAentryIdx) { + for (size_type tAentryIdx = tArow_map(tArowIdx); tAentryIdx < tArow_map(tArowIdx + 1); ++tAentryIdx) { ordinal_type tAcolIdx = tAentries(tAentryIdx); // we have block tA(tArowIdx, tAcolIdx) starting at tAvalues(entryIdx) // we need to find AentryIdx corresponding to A(tAcolIdx, tArowIdx) size_type AentryIdx; - for (AentryIdx = Arow_map(tAcolIdx); AentryIdx < Arow_map(tAcolIdx + 1); - ++AentryIdx) { + for (AentryIdx = Arow_map(tAcolIdx); AentryIdx < Arow_map(tAcolIdx + 1); ++AentryIdx) { if (tArowIdx == Aentries(AentryIdx)) break; } @@ -564,31 +489,25 @@ struct TransposeBsrMatrix { } }; // TransposeBsrMatrix -template -void transpose_bsr_matrix( - typename in_nnz_view_t::non_const_value_type num_rows, - typename in_nnz_view_t::non_const_value_type num_cols, const int block_size, - in_row_view_t xadj, in_nnz_view_t adj, in_scalar_view_t vals, - out_row_view_t t_xadj, // pre-allocated -- initialized with 0 - out_nnz_view_t t_adj, // pre-allocated -- no need for initialize - out_scalar_view_t t_vals // pre-allocated -- no need for initialize +template +void transpose_bsr_matrix(typename in_nnz_view_t::non_const_value_type num_rows, + typename in_nnz_view_t::non_const_value_type num_cols, const int block_size, + in_row_view_t xadj, in_nnz_view_t adj, in_scalar_view_t vals, + out_row_view_t t_xadj, // pre-allocated -- initialized with 0 + out_nnz_view_t t_adj, // pre-allocated -- no need for initialize + out_scalar_view_t t_vals // pre-allocated -- no need for initialize ) { - using TransposeBsrFunctor_type = - TransposeBsrMatrix; + using TransposeBsrFunctor_type = TransposeBsrMatrix; // Step 1: call transpose_graph of bsr matrix - transpose_graph(num_rows, num_cols, xadj, adj, - t_xadj, t_adj); + transpose_graph( + num_rows, num_cols, xadj, adj, t_xadj, t_adj); // Step 2: transpose the values of A Kokkos::RangePolicy my_policy(0, num_cols); - TransposeBsrFunctor_type my_functor(block_size, xadj, adj, vals, t_xadj, - t_adj, t_vals); + TransposeBsrFunctor_type my_functor(block_size, xadj, adj, vals, t_xadj, t_adj, t_vals); Kokkos::parallel_for(my_policy, my_functor); MyExecSpace().fence(); @@ -605,19 +524,14 @@ bsrMat_t transpose_bsr_matrix(const bsrMat_t &A) { using values_t = typename bsrMat_t::values_type::non_const_type; rowmap_t AT_rowmap("Transpose rowmap", A.numCols() + 1); - entries_t AT_entries( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Transpose entries"), - A.nnz()); - values_t AT_values( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Transpose values"), - A.nnz() * A.blockDim() * A.blockDim()); - transpose_bsr_matrix( - A.numRows(), A.numCols(), A.blockDim(), A.graph.row_map, A.graph.entries, - A.values, AT_rowmap, AT_entries, AT_values); + entries_t AT_entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Transpose entries"), A.nnz()); + values_t AT_values(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Transpose values"), + A.nnz() * A.blockDim() * A.blockDim()); + transpose_bsr_matrix(A.numRows(), A.numCols(), A.blockDim(), A.graph.row_map, + A.graph.entries, A.values, AT_rowmap, AT_entries, AT_values); // And construct the transpose crsMat_t - return bsrMat_t("Transpose", A.numCols(), A.numRows(), A.nnz(), AT_values, - AT_rowmap, AT_entries, A.blockDim()); + return bsrMat_t("Transpose", A.numCols(), A.numRows(), A.nnz(), AT_values, AT_rowmap, AT_entries, A.blockDim()); } template @@ -637,10 +551,8 @@ struct Fill_Reverse_Scale_Functor { const reverse_type multiply_shift_for_scale; const reverse_type division_shift_for_bucket; - Fill_Reverse_Scale_Functor(forward_map_type forward_map_, - reverse_map_type reverse_map_xadj_, - reverse_map_type reverse_map_adj_, - reverse_type multiply_shift_for_scale_, + Fill_Reverse_Scale_Functor(forward_map_type forward_map_, reverse_map_type reverse_map_xadj_, + reverse_map_type reverse_map_adj_, reverse_type multiply_shift_for_scale_, reverse_type division_shift_for_bucket_) : forward_map(forward_map_), reverse_map_xadj(reverse_map_xadj_), @@ -653,8 +565,7 @@ struct Fill_Reverse_Scale_Functor { forward_type fm = forward_map[ii]; fm = fm << multiply_shift_for_scale; fm += ii >> division_shift_for_bucket; - typedef typename std::remove_reference::type - atomic_incr_type; + typedef typename std::remove_reference::type atomic_incr_type; Kokkos::atomic_fetch_add(&(reverse_map_xadj(fm)), atomic_incr_type(1)); } @@ -664,11 +575,9 @@ struct Fill_Reverse_Scale_Functor { fm = fm << multiply_shift_for_scale; fm += ii >> division_shift_for_bucket; - typedef typename std::remove_reference::type - atomic_incr_type; - const reverse_type future_index = - Kokkos::atomic_fetch_add(&(reverse_map_xadj(fm)), atomic_incr_type(1)); - reverse_map_adj(future_index) = ii; + typedef typename std::remove_reference::type atomic_incr_type; + const reverse_type future_index = Kokkos::atomic_fetch_add(&(reverse_map_xadj(fm)), atomic_incr_type(1)); + reverse_map_adj(future_index) = ii; } }; @@ -677,8 +586,7 @@ struct StridedCopy1 { const from_view_t from; to_view_t to; const size_t stride; - StridedCopy1(const from_view_t from_, to_view_t to_, size_t stride_) - : from(from_), to(to_), stride(stride_) {} + StridedCopy1(const from_view_t from_, to_view_t to_, size_t stride_) : from(from_), to(to_), stride(stride_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_t &ii) const { to[ii] = from[(ii)*stride]; } @@ -698,29 +606,23 @@ struct Reverse_Map_Functor { reverse_map_type reverse_map_xadj; reverse_map_type reverse_map_adj; - Reverse_Map_Functor(forward_map_type forward_map_, - reverse_map_type reverse_map_xadj_, + Reverse_Map_Functor(forward_map_type forward_map_, reverse_map_type reverse_map_xadj_, reverse_map_type reverse_map_adj_) - : forward_map(forward_map_), - reverse_map_xadj(reverse_map_xadj_), - reverse_map_adj(reverse_map_adj_) {} + : forward_map(forward_map_), reverse_map_xadj(reverse_map_xadj_), reverse_map_adj(reverse_map_adj_) {} KOKKOS_INLINE_FUNCTION void operator()(const CountTag &, const size_t &ii) const { forward_type fm = forward_map[ii]; - typedef typename std::remove_reference::type - atomic_incr_type; + typedef typename std::remove_reference::type atomic_incr_type; Kokkos::atomic_fetch_add(&(reverse_map_xadj(fm)), atomic_incr_type(1)); } KOKKOS_INLINE_FUNCTION void operator()(const FillTag &, const size_t &ii) const { forward_type c = forward_map[ii]; - typedef typename std::remove_reference::type - atomic_incr_type; - const reverse_type future_index = - Kokkos::atomic_fetch_add(&(reverse_map_xadj(c)), atomic_incr_type(1)); - reverse_map_adj(future_index) = ii; + typedef typename std::remove_reference::type atomic_incr_type; + const reverse_type future_index = Kokkos::atomic_fetch_add(&(reverse_map_xadj(c)), atomic_incr_type(1)); + reverse_map_adj(future_index) = ii; } }; @@ -747,15 +649,12 @@ struct Reverse_Map_Functor { /// maps. Its size is num_forward_elements. template -void kk_create_reverse_map( - const typename reverse_array_type::value_type - &num_forward_elements, // num_vertices - const typename forward_array_type::value_type - &num_reverse_elements, // num_colors +void kk_create_reverse_map(const typename reverse_array_type::value_type &num_forward_elements, // num_vertices + const typename forward_array_type::value_type &num_reverse_elements, // num_colors - const forward_array_type &forward_map, // vertex to colors - const reverse_array_type &reverse_map_xadj, // colors to vertex xadj - const reverse_array_type &reverse_map_adj) { // colros to vertex adj + const forward_array_type &forward_map, // vertex to colors + const reverse_array_type &reverse_map_xadj, // colors to vertex xadj + const reverse_array_type &reverse_map_adj) { // colros to vertex adj typedef typename reverse_array_type::value_type lno_t; typedef typename forward_array_type::value_type reverse_lno_t; @@ -774,55 +673,43 @@ void kk_create_reverse_map( const lno_t multiply_shift_for_scale = 10; // there will be 1024 buckets - const lno_t division_shift_for_bucket = - lno_t(ceil(log(double(num_forward_elements) / scale_size) / log(2))); + const lno_t division_shift_for_bucket = lno_t(ceil(log(double(num_forward_elements) / scale_size) / log(2))); // coloring indices are base-1. we end up using not using element 1. - const reverse_lno_t tmp_reverse_size = (num_reverse_elements + 1) - << multiply_shift_for_scale; + const reverse_lno_t tmp_reverse_size = (num_reverse_elements + 1) << multiply_shift_for_scale; - typename reverse_array_type::non_const_type tmp_color_xadj( - "TMP_REVERSE_XADJ", tmp_reverse_size + 1); + typename reverse_array_type::non_const_type tmp_color_xadj("TMP_REVERSE_XADJ", tmp_reverse_size + 1); - typedef Fill_Reverse_Scale_Functor - frsf; + typedef Fill_Reverse_Scale_Functor frsf; typedef typename frsf::CountTag cnt_tag; typedef typename frsf::FillTag fill_tag; typedef Kokkos::RangePolicy my_cnt_exec_space; typedef Kokkos::RangePolicy my_fill_exec_space; - frsf frm(forward_map, tmp_color_xadj, reverse_map_adj, - multiply_shift_for_scale, division_shift_for_bucket); + frsf frm(forward_map, tmp_color_xadj, reverse_map_adj, multiply_shift_for_scale, division_shift_for_bucket); - Kokkos::parallel_for( - "KokkosKernels::Common::CreateReverseMap::NonAtomic::S0", - my_cnt_exec_space(0, num_forward_elements), frm); + Kokkos::parallel_for("KokkosKernels::Common::CreateReverseMap::NonAtomic::S0", + my_cnt_exec_space(0, num_forward_elements), frm); MyExecSpace().fence(); // kk_inclusive_parallel_prefix_sum(tmp_reverse_size + 1, tmp_color_xadj); - KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum( - tmp_reverse_size + 1, tmp_color_xadj); + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(tmp_reverse_size + 1, tmp_color_xadj); MyExecSpace().fence(); Kokkos::parallel_for( - "KokkosKernels::Common::CreateReverseMap::NonAtomic::S1", - my_exec_space(0, num_reverse_elements + 1), - StridedCopy1( - tmp_color_xadj, reverse_map_xadj, scale_size)); + "KokkosKernels::Common::CreateReverseMap::NonAtomic::S1", my_exec_space(0, num_reverse_elements + 1), + StridedCopy1(tmp_color_xadj, reverse_map_xadj, scale_size)); MyExecSpace().fence(); - Kokkos::parallel_for( - "KokkosKernels::Common::CreateReverseMap::NonAtomic::S2", - my_fill_exec_space(0, num_forward_elements), frm); + Kokkos::parallel_for("KokkosKernels::Common::CreateReverseMap::NonAtomic::S2", + my_fill_exec_space(0, num_forward_elements), frm); MyExecSpace().fence(); } else // atomic implementation. { - reverse_array_type tmp_color_xadj("TMP_REVERSE_XADJ", - num_reverse_elements + 1); + reverse_array_type tmp_color_xadj("TMP_REVERSE_XADJ", num_reverse_elements + 1); - typedef Reverse_Map_Functor - rmp_functor_type; + typedef Reverse_Map_Functor rmp_functor_type; typedef typename rmp_functor_type::CountTag cnt_tag; typedef typename rmp_functor_type::FillTag fill_tag; typedef Kokkos::RangePolicy my_cnt_exec_space; @@ -836,8 +723,7 @@ void kk_create_reverse_map( // kk_inclusive_parallel_prefix_sum(num_reverse_elements + 1, reverse_map_xadj); - KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum( - num_reverse_elements + 1, tmp_color_xadj); + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(num_reverse_elements + 1, tmp_color_xadj); MyExecSpace().fence(); Kokkos::deep_copy(reverse_map_xadj, tmp_color_xadj); @@ -849,8 +735,7 @@ void kk_create_reverse_map( } } -template +template struct ColorChecker { typedef typename in_row_view_t::value_type size_type; typedef typename in_nnz_view_t::value_type lno_t; @@ -861,20 +746,14 @@ struct ColorChecker { in_color_view_t color_view; lno_t team_row_chunk_size; - ColorChecker(lno_t num_rows_, in_row_view_t xadj_, in_nnz_view_t adj_, - in_color_view_t color_view_, lno_t chunk_size) - : num_rows(num_rows_), - xadj(xadj_), - adj(adj_), - color_view(color_view_), - team_row_chunk_size(chunk_size) {} + ColorChecker(lno_t num_rows_, in_row_view_t xadj_, in_nnz_view_t adj_, in_color_view_t color_view_, lno_t chunk_size) + : num_rows(num_rows_), xadj(xadj_), adj(adj_), color_view(color_view_), team_row_chunk_size(chunk_size) {} KOKKOS_INLINE_FUNCTION void operator()(const team_member &teamMember, size_t &num_conflicts) const { // get the range of rows for team. const lno_t team_row_begin = teamMember.league_rank() * team_row_chunk_size; - const lno_t team_row_end = - KOKKOSKERNELS_MACRO_MIN(team_row_begin + team_row_chunk_size, num_rows); + const lno_t team_row_end = KOKKOSKERNELS_MACRO_MIN(team_row_begin + team_row_chunk_size, num_rows); size_t nf = 0; Kokkos::parallel_reduce( @@ -911,34 +790,24 @@ struct ColorChecker { /// \param xadj row pointers of the input graph /// \param adj column indices of the input graphw /// \param v_colors The colors at each vertex in the graph. -template -inline size_t kk_is_d1_coloring_valid( - typename in_nnz_view_t::non_const_value_type num_rows, - typename in_nnz_view_t::non_const_value_type /*num_cols*/, - in_row_view_t xadj, in_nnz_view_t adj, in_color_view_t v_colors) { - KokkosKernels::Impl::ExecSpaceType my_exec_space = - KokkosKernels::Impl::kk_get_exec_space_type(); - int vector_size = - kk_get_suggested_vector_size(num_rows, adj.extent(0), my_exec_space); - int suggested_team_size = - kk_get_suggested_team_size(vector_size, my_exec_space); +template +inline size_t kk_is_d1_coloring_valid(typename in_nnz_view_t::non_const_value_type num_rows, + typename in_nnz_view_t::non_const_value_type /*num_cols*/, in_row_view_t xadj, + in_nnz_view_t adj, in_color_view_t v_colors) { + KokkosKernels::Impl::ExecSpaceType my_exec_space = KokkosKernels::Impl::kk_get_exec_space_type(); + int vector_size = kk_get_suggested_vector_size(num_rows, adj.extent(0), my_exec_space); + int suggested_team_size = kk_get_suggested_team_size(vector_size, my_exec_space); ; - typename in_nnz_view_t::non_const_value_type team_work_chunk_size = - suggested_team_size; - typedef Kokkos::TeamPolicy> - dynamic_team_policy; + typename in_nnz_view_t::non_const_value_type team_work_chunk_size = suggested_team_size; + typedef Kokkos::TeamPolicy> dynamic_team_policy; typedef typename dynamic_team_policy::member_type team_member_t; - struct ColorChecker - cc(num_rows, xadj, adj, v_colors, team_work_chunk_size); + struct ColorChecker cc(num_rows, xadj, adj, v_colors, + team_work_chunk_size); size_t num_conf = 0; - Kokkos::parallel_reduce( - "KokkosKernels::Common::IsD1ColoringValid", - dynamic_team_policy(num_rows / team_work_chunk_size + 1, - suggested_team_size, vector_size), - cc, num_conf); + Kokkos::parallel_reduce("KokkosKernels::Common::IsD1ColoringValid", + dynamic_team_policy(num_rows / team_work_chunk_size + 1, suggested_team_size, vector_size), + cc, num_conf); MyExecSpace().fence(); return num_conf; @@ -948,8 +817,7 @@ template struct MinMaxDegreeFunctor { using ReducerVal = typename Reducer::value_type; MinMaxDegreeFunctor(const rowmap_t &rowmap_) : rowmap(rowmap_) {} - KOKKOS_INLINE_FUNCTION void operator()(ordinal_t i, - ReducerVal &lminmax) const { + KOKKOS_INLINE_FUNCTION void operator()(ordinal_t i, ReducerVal &lminmax) const { ordinal_t deg = rowmap(i + 1) - rowmap(i); if (deg < lminmax.min_val) lminmax.min_val = deg; if (deg > lminmax.max_val) lminmax.max_val = deg; @@ -975,15 +843,13 @@ ordinal_t graph_max_degree(const rowmap_t &rowmap) { if (nrows) nrows--; if (nrows == 0) return 0; ordinal_t val; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, nrows), - MaxDegreeFunctor(rowmap), Reducer(val)); + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, nrows), + MaxDegreeFunctor(rowmap), Reducer(val)); return val; } template -void graph_min_max_degree(const rowmap_t &rowmap, ordinal_t &min_degree, - ordinal_t &max_degree) { +void graph_min_max_degree(const rowmap_t &rowmap, ordinal_t &min_degree, ordinal_t &max_degree) { using Reducer = Kokkos::MinMax; ordinal_t nrows = rowmap.extent(0); if (nrows) nrows--; @@ -993,43 +859,13 @@ void graph_min_max_degree(const rowmap_t &rowmap, ordinal_t &min_degree, return; } typename Reducer::value_type result; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, nrows), - MinMaxDegreeFunctor(rowmap), - Reducer(result)); + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, nrows), + MinMaxDegreeFunctor(rowmap), Reducer(result)); min_degree = result.min_val; max_degree = result.max_val; } -template -void kk_get_lower_triangle_count_sequential(const lno_t nv, - const size_type *in_xadj, - const lno_t *in_adj, - size_type *out_xadj, - const lno_t *new_indices = NULL) { - for (lno_t i = 0; i < nv; ++i) { - lno_t row_index = i; - - if (new_indices) row_index = new_indices[i]; - - out_xadj[i] = 0; - size_type begin = in_xadj[i]; - lno_t rowsize = in_xadj[i + 1] - begin; - - for (lno_t j = 0; j < rowsize; ++j) { - lno_t col = in_adj[j + begin]; - lno_t col_index = col; - if (new_indices) col_index = new_indices[col]; - - if (row_index > col_index) { - ++out_xadj[i]; - } - } - } -} - -template +template struct LowerTriangularMatrix { struct CountTag {}; struct FillTag {}; @@ -1040,12 +876,8 @@ struct LowerTriangularMatrix { typedef Kokkos::TeamPolicy team_count_policy_t; typedef Kokkos::TeamPolicy team_fill_policy_t; - typedef Kokkos::TeamPolicy> - dynamic_team_count_policy_t; - typedef Kokkos::TeamPolicy> - dynamic_team_fill_policy_t; + typedef Kokkos::TeamPolicy> dynamic_team_count_policy_t; + typedef Kokkos::TeamPolicy> dynamic_team_fill_policy_t; typedef typename team_count_policy_t::member_type team_count_member_t; typedef typename team_fill_policy_t::member_type team_fill_member_t; @@ -1063,12 +895,11 @@ struct LowerTriangularMatrix { const lno_t team_work_size; const KokkosKernels::Impl::ExecSpaceType exec_space; const bool is_lower; + const bool incl_diag; - LowerTriangularMatrix(const lno_t num_rows_, const size_type *xadj_, - const lno_t *adj_, const scalar_t *in_vals_, - const lno_t *permutation_, size_type *t_xadj_, - lno_t *t_adj_, scalar_t *out_vals_, - const lno_t team_row_work_size_, bool is_lower_ = true) + LowerTriangularMatrix(const lno_t num_rows_, const size_type *xadj_, const lno_t *adj_, const scalar_t *in_vals_, + const lno_t *permutation_, size_type *t_xadj_, lno_t *t_adj_, scalar_t *out_vals_, + const lno_t team_row_work_size_, bool is_lower_ = true, bool incl_diag_ = false) : num_rows(num_rows_), xadj(xadj_), adj(adj_), @@ -1078,156 +909,127 @@ struct LowerTriangularMatrix { t_adj(t_adj_), t_vals(out_vals_), team_work_size(team_row_work_size_), - exec_space( - KokkosKernels::Impl::kk_get_exec_space_type()), - is_lower(is_lower_) {} + exec_space(KokkosKernels::Impl::kk_get_exec_space_type()), + is_lower(is_lower_), + incl_diag(incl_diag_) {} KOKKOS_INLINE_FUNCTION - void operator()(const CountTag &, - const team_count_member_t &teamMember) const { + void operator()(const CountTag &, const team_count_member_t &teamMember) const { const lno_t team_row_begin = teamMember.league_rank() * team_work_size; - const lno_t team_row_end = - KOKKOSKERNELS_MACRO_MIN(team_row_begin + team_work_size, num_rows); - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(teamMember, team_row_begin, team_row_end), - [&](const lno_t &row_index) { - lno_t row_perm = row_index; - if (permutation != NULL) { - row_perm = permutation[row_perm]; - } - - const size_type col_begin = xadj[row_index]; - const size_type col_end = xadj[row_index + 1]; - const lno_t left_work = col_end - col_begin; - lno_t lower_row_size = 0; - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(teamMember, left_work), - [&](lno_t i, lno_t &rowsize_) { - const size_type adjind = i + col_begin; - lno_t colIndex = adj[adjind]; - if (permutation != NULL) { - colIndex = permutation[colIndex]; - } - if (is_lower) { - if (row_perm > colIndex) { - rowsize_ += 1; - } - } else { - if (row_perm < colIndex) { - rowsize_ += 1; - } - } - }, - lower_row_size); - - t_xadj[row_index] = lower_row_size; - }); + const lno_t team_row_end = KOKKOSKERNELS_MACRO_MIN(team_row_begin + team_work_size, num_rows); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, team_row_begin, team_row_end), + [&](const lno_t &row_index) { + lno_t row_perm = row_index; + if (permutation != NULL) { + row_perm = permutation[row_perm]; + } + + const size_type col_begin = xadj[row_index]; + const size_type col_end = xadj[row_index + 1]; + const lno_t left_work = col_end - col_begin; + lno_t lower_row_size = 0; + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(teamMember, left_work), + [&](lno_t i, lno_t &rowsize_) { + const size_type adjind = i + col_begin; + lno_t colIndex = adj[adjind]; + if (permutation != NULL) { + colIndex = permutation[colIndex]; + } + if ((is_lower && row_perm > colIndex) || (!is_lower && row_perm < colIndex) || + (incl_diag && row_perm == colIndex)) { + rowsize_ += 1; + } + }, + lower_row_size); + + t_xadj[row_index] = lower_row_size; + }); } KOKKOS_INLINE_FUNCTION void operator()(const FillTag &, const team_fill_member_t &teamMember) const { const lno_t team_row_begin = teamMember.league_rank() * team_work_size; - const lno_t team_row_end = - KOKKOSKERNELS_MACRO_MIN(team_row_begin + team_work_size, num_rows); - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(teamMember, team_row_begin, team_row_end), - [&](const lno_t &row_index) { - lno_t row_perm = row_index; - if (permutation != NULL) { - row_perm = permutation[row_perm]; - } - - const size_type col_begin = xadj[row_index]; - const size_type col_end = xadj[row_index + 1]; - const lno_t read_left_work = col_end - col_begin; - - const size_type write_begin = t_xadj[row_index]; - const size_type write_end = t_xadj[row_index + 1]; - const lno_t write_left_work = write_end - write_begin; - - // TODO: Write GPU (vector-level) version here: - /* - if(kk_is_gpu_exec_space()) - { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(teamMember, read_left_work), - [&] (lno_t i) { - const size_type adjind = i + col_begin; - const lno_t colIndex = adj[adjind]; - }); - } - else - ... - */ - - for (lno_t r = 0, w = 0; r < read_left_work && w < write_left_work; - ++r) { - const size_type adjind = r + col_begin; - const lno_t colIndex = adj[adjind]; - lno_t colperm = colIndex; - if (permutation != NULL) { - colperm = permutation[colIndex]; - } - if (is_lower) { - if (row_perm > colperm) { - if (in_vals != NULL) { - t_vals[write_begin + w] = in_vals[adjind]; - } - t_adj[write_begin + w++] = colIndex; - } - } else { - if (row_perm < colperm) { - if (in_vals != NULL) { - t_vals[write_begin + w] = in_vals[adjind]; - } - t_adj[write_begin + w++] = colIndex; - } - } - } - }); + const lno_t team_row_end = KOKKOSKERNELS_MACRO_MIN(team_row_begin + team_work_size, num_rows); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, team_row_begin, team_row_end), + [&](const lno_t &row_index) { + lno_t row_perm = row_index; + if (permutation != NULL) { + row_perm = permutation[row_perm]; + } + + const size_type col_begin = xadj[row_index]; + const size_type col_end = xadj[row_index + 1]; + const lno_t read_left_work = col_end - col_begin; + + const size_type write_begin = t_xadj[row_index]; + const size_type write_end = t_xadj[row_index + 1]; + const lno_t write_left_work = write_end - write_begin; + + // TODO: Write GPU (vector-level) version here: + /* + if(kk_is_gpu_exec_space()) + { + Kokkos::parallel_for( + Kokkos::ThreadVectorRange(teamMember, read_left_work), + [&] (lno_t i) { + const size_type adjind = i + col_begin; + const lno_t colIndex = adj[adjind]; + }); + } + else + ... + */ + + for (lno_t r = 0, w = 0; r < read_left_work && w < write_left_work; ++r) { + const size_type adjind = r + col_begin; + const lno_t colIndex = adj[adjind]; + lno_t colperm = colIndex; + if (permutation != NULL) { + colperm = permutation[colIndex]; + } + if ((is_lower && row_perm > colperm) || (!is_lower && row_perm < colperm) || + (incl_diag && row_perm == colperm)) { + if (in_vals != NULL) { + t_vals[write_begin + w] = in_vals[adjind]; + } + t_adj[write_begin + w++] = colIndex; + } + } + }); } }; template -void kk_get_lower_triangle_count_parallel( - const lno_t nv, const size_type ne, const size_type *in_xadj, - const lno_t *in_adj, size_type *out_xadj, const lno_t *new_indices = NULL, - bool use_dynamic_scheduling = false, int chunksize = 4, - bool is_lower = true) { - const int vector_size = kk_get_suggested_vector_size( - nv, ne, KokkosKernels::Impl::kk_get_exec_space_type()); - const int suggested_team_size = kk_get_suggested_team_size( - vector_size, - KokkosKernels::Impl::kk_get_exec_space_type()); +void kk_get_lower_triangle_count_parallel(const lno_t nv, const size_type ne, const size_type *in_xadj, + const lno_t *in_adj, size_type *out_xadj, const lno_t *new_indices = NULL, + bool use_dynamic_scheduling = false, int chunksize = 4, bool is_lower = true, + bool incl_diag = false) { + const int vector_size = + kk_get_suggested_vector_size(nv, ne, KokkosKernels::Impl::kk_get_exec_space_type()); + const int suggested_team_size = + kk_get_suggested_team_size(vector_size, KokkosKernels::Impl::kk_get_exec_space_type()); const int team_work_chunk_size = suggested_team_size * chunksize; typedef LowerTriangularMatrix ltm_t; - ltm_t ltm(nv, in_xadj, in_adj, NULL, new_indices, out_xadj, NULL, NULL, - team_work_chunk_size, is_lower); + ltm_t ltm(nv, in_xadj, in_adj, NULL, new_indices, out_xadj, NULL, NULL, team_work_chunk_size, is_lower, incl_diag); typedef typename ltm_t::team_count_policy_t count_tp_t; typedef typename ltm_t::dynamic_team_count_policy_t d_count_tp_t; if (use_dynamic_scheduling) { - Kokkos::parallel_for( - "KokkosKernels::Common::GetLowerTriangleCount::DynamicSchedule", - d_count_tp_t(nv / team_work_chunk_size + 1, suggested_team_size, - vector_size), - ltm); + Kokkos::parallel_for("KokkosKernels::Common::GetLowerTriangleCount::DynamicSchedule", + d_count_tp_t(nv / team_work_chunk_size + 1, suggested_team_size, vector_size), ltm); } else { - Kokkos::parallel_for( - "KokkosKernels::Common::GetLowerTriangleCount::StaticSchedule", - count_tp_t(nv / team_work_chunk_size + 1, suggested_team_size, - vector_size), - ltm); + Kokkos::parallel_for("KokkosKernels::Common::GetLowerTriangleCount::StaticSchedule", + count_tp_t(nv / team_work_chunk_size + 1, suggested_team_size, vector_size), ltm); } ExecutionSpace().fence(); } template -void kk_sort_by_row_size_sequential(const lno_t nv, const size_type *in_xadj, - lno_t *new_indices, +void kk_sort_by_row_size_sequential(const lno_t nv, const size_type *in_xadj, lno_t *new_indices, int sort_decreasing_order = 1) { std::vector begins(nv); std::vector nexts(nv); @@ -1282,10 +1084,8 @@ void kk_sort_by_row_size_sequential(const lno_t nv, const size_type *in_xadj, } #ifdef KOKKOSKERNELS_HAVE_PARALLEL_GNUSORT template -void kk_sort_by_row_size_parallel(const lno_t nv, const size_type *in_xadj, - lno_t *new_indices, - int sort_decreasing_order = 1, - int num_threads = 1) { +void kk_sort_by_row_size_parallel(const lno_t nv, const size_type *in_xadj, lno_t *new_indices, + int sort_decreasing_order = 1, int num_threads = 1) { typedef Kokkos::RangePolicy my_exec_space; struct SortItem { @@ -1298,31 +1098,24 @@ void kk_sort_by_row_size_parallel(const lno_t nv, const size_type *in_xadj, SortItem *num_elements = &(vnum_elements[0]); Kokkos::parallel_for( - "KokkosKernels::Common::SortByRowSize::S0", my_exec_space(0, nv), - KOKKOS_LAMBDA(const lno_t &row) { + "KokkosKernels::Common::SortByRowSize::S0", my_exec_space(0, nv), KOKKOS_LAMBDA(const lno_t &row) { lno_t row_size = in_xadj[row + 1] - in_xadj[row]; num_elements[row].size = row_size; num_elements[row].id = row; }); - __gnu_parallel::sort(&(num_elements[0]), &(num_elements[0]) + nv, - std::less()); + __gnu_parallel::sort(&(num_elements[0]), &(num_elements[0]) + nv, std::less()); if (sort_decreasing_order == 1) { Kokkos::parallel_for( "KokkosKernels::Common::SortByRowSize::S1", my_exec_space(0, nv), - KOKKOS_LAMBDA(const lno_t &row) { - new_indices[num_elements[row].id] = row; - }); + KOKKOS_LAMBDA(const lno_t &row) { new_indices[num_elements[row].id] = row; }); } else if (sort_decreasing_order == 0) { Kokkos::parallel_for( "KokkosKernels::Common::SortByRowSize::S2", my_exec_space(0, nv), - KOKKOS_LAMBDA(const lno_t &row) { - new_indices[num_elements[row].id] = nv - row - 1; - }); + KOKKOS_LAMBDA(const lno_t &row) { new_indices[num_elements[row].id] = nv - row - 1; }); } else { Kokkos::parallel_for( - "KokkosKernels::Common::SortByRowSize::S3", my_exec_space(0, nv), - KOKKOS_LAMBDA(const lno_t &row) { + "KokkosKernels::Common::SortByRowSize::S3", my_exec_space(0, nv), KOKKOS_LAMBDA(const lno_t &row) { if (row & 1) { new_indices[num_elements[row].id] = nv - (row + 1) / 2; } else { @@ -1335,199 +1128,81 @@ void kk_sort_by_row_size_parallel(const lno_t nv, const size_type *in_xadj, #ifdef KOKKOSKERNELS_HAVE_PARALLEL_GNUSORT template -void kk_sort_by_row_size(const lno_t nv, const size_type *in_xadj, - lno_t *new_indices, int sort_decreasing_order = 1, +void kk_sort_by_row_size(const lno_t nv, const size_type *in_xadj, lno_t *new_indices, int sort_decreasing_order = 1, int num_threads = 64) { std::cout << "Parallel Sort" << std::endl; - kk_sort_by_row_size_parallel( - nv, in_xadj, new_indices, sort_decreasing_order, num_threads); + kk_sort_by_row_size_parallel(nv, in_xadj, new_indices, sort_decreasing_order, + num_threads); } #else template -void kk_sort_by_row_size(const lno_t nv, const size_type *in_xadj, - lno_t *new_indices, int sort_decreasing_order = 1, +void kk_sort_by_row_size(const lno_t nv, const size_type *in_xadj, lno_t *new_indices, int sort_decreasing_order = 1, int /*num_threads*/ = 64) { std::cout << "Sequential Sort" << std::endl; - kk_sort_by_row_size_sequential(nv, in_xadj, new_indices, - sort_decreasing_order); + kk_sort_by_row_size_sequential(nv, in_xadj, new_indices, sort_decreasing_order); } #endif -template -void kk_get_lower_triangle_fill_parallel( - const lno_t nv, const size_type ne, const size_type *in_xadj, - const lno_t *in_adj, const scalar_t *in_vals, size_type *out_xadj, - lno_t *out_adj, scalar_t *out_vals, const lno_t *new_indices = NULL, - bool use_dynamic_scheduling = false, bool chunksize = 4, - bool is_lower = true) { - const int vector_size = kk_get_suggested_vector_size( - nv, ne, KokkosKernels::Impl::kk_get_exec_space_type()); - const int suggested_team_size = kk_get_suggested_team_size( - vector_size, - KokkosKernels::Impl::kk_get_exec_space_type()); +template +void kk_get_lower_triangle_fill_parallel(const lno_t nv, const size_type ne, const size_type *in_xadj, + const lno_t *in_adj, const scalar_t *in_vals, size_type *out_xadj, + lno_t *out_adj, scalar_t *out_vals, const lno_t *new_indices = NULL, + bool use_dynamic_scheduling = false, bool chunksize = 4, bool is_lower = true, + bool incl_diag = false) { + const int vector_size = + kk_get_suggested_vector_size(nv, ne, KokkosKernels::Impl::kk_get_exec_space_type()); + const int suggested_team_size = + kk_get_suggested_team_size(vector_size, KokkosKernels::Impl::kk_get_exec_space_type()); const int team_work_chunk_size = suggested_team_size * chunksize; - typedef LowerTriangularMatrix - ltm_t; - ltm_t ltm(nv, in_xadj, in_adj, in_vals, new_indices, out_xadj, out_adj, - out_vals, team_work_chunk_size, is_lower); + typedef LowerTriangularMatrix ltm_t; + ltm_t ltm(nv, in_xadj, in_adj, in_vals, new_indices, out_xadj, out_adj, out_vals, team_work_chunk_size, is_lower, + incl_diag); typedef typename ltm_t::team_fill_policy_t fill_p_t; typedef typename ltm_t::dynamic_team_fill_policy_t d_fill_p_t; if (use_dynamic_scheduling) { - Kokkos::parallel_for( - "KokkosKernels::Common::GetLowerTriangleFill::DynamicSchedule", - d_fill_p_t(nv / team_work_chunk_size + 1, suggested_team_size, - vector_size), - ltm); + Kokkos::parallel_for("KokkosKernels::Common::GetLowerTriangleFill::DynamicSchedule", + d_fill_p_t(nv / team_work_chunk_size + 1, suggested_team_size, vector_size), ltm); } else { - Kokkos::parallel_for( - "KokkosKernels::Common::GetLowerTriangleFill::StaticSchedule", - fill_p_t(nv / team_work_chunk_size + 1, suggested_team_size, - vector_size), - ltm); + Kokkos::parallel_for("KokkosKernels::Common::GetLowerTriangleFill::StaticSchedule", + fill_p_t(nv / team_work_chunk_size + 1, suggested_team_size, vector_size), ltm); } ExecutionSpace().fence(); } -template -void kk_get_lower_triangle_fill_sequential(lno_t nv, const size_type *in_xadj, - const lno_t *in_adj, - const scalar_t *in_vals, - const size_type *out_xadj, - lno_t *out_adj, scalar_t *out_vals, - const lno_t *new_indices = NULL) { - for (lno_t i = 0; i < nv; ++i) { - lno_t row_index = i; - - if (new_indices) row_index = new_indices[i]; - size_type write_index = out_xadj[i]; - size_type begin = in_xadj[i]; - lno_t rowsize = in_xadj[i + 1] - begin; - for (lno_t j = 0; j < rowsize; ++j) { - lno_t col = in_adj[j + begin]; - lno_t col_index = col; - if (new_indices) col_index = new_indices[col]; - - if (row_index > col_index) { - if (in_vals != NULL && out_vals != NULL) { - out_vals[write_index] = in_vals[j + begin]; - } - out_adj[write_index++] = col; - } - } - } -} + template -void kk_get_lower_triangle_count(const lno_t nv, const size_type ne, - const size_type *in_xadj, const lno_t *in_adj, - size_type *out_xadj, - const lno_t *new_indices = NULL, - bool use_dynamic_scheduling = false, - bool chunksize = 4, bool is_lower = true) { +void kk_get_lower_triangle_count(const lno_t nv, const size_type ne, const size_type *in_xadj, const lno_t *in_adj, + size_type *out_xadj, const lno_t *new_indices = NULL, + bool use_dynamic_scheduling = false, bool chunksize = 4, bool is_lower = true, + bool incl_diag = false) { // Kokkos::Timer timer1; - // kk_get_lower_triangle_count_sequential(nv, in_xadj, in_adj, out_xadj, - // new_indices); kk_get_lower_triangle_count_parallel( - nv, ne, in_xadj, in_adj, out_xadj, new_indices, use_dynamic_scheduling, - chunksize, is_lower); + nv, ne, in_xadj, in_adj, out_xadj, new_indices, use_dynamic_scheduling, chunksize, is_lower, incl_diag); // double count = timer1.seconds(); // std::cout << "lower count time:" << count<< std::endl; } -template -void kk_get_lower_triangle_fill(lno_t nv, size_type ne, - const size_type *in_xadj, const lno_t *in_adj, - const scalar_t *in_vals, size_type *out_xadj, - lno_t *out_adj, scalar_t *out_vals, - const lno_t *new_indices = NULL, - bool use_dynamic_scheduling = false, - bool chunksize = 4, bool is_lower = true) { +template +void kk_get_lower_triangle_fill(lno_t nv, size_type ne, const size_type *in_xadj, const lno_t *in_adj, + const scalar_t *in_vals, size_type *out_xadj, lno_t *out_adj, scalar_t *out_vals, + const lno_t *new_indices = NULL, bool use_dynamic_scheduling = false, + bool chunksize = 4, bool is_lower = true, bool incl_diag = false) { // Kokkos::Timer timer1; - /* - kk_get_lower_triangle_fill_sequential( - nv, in_xadj, in_adj, - in_vals, - out_xadj, - out_adj, - out_vals, - new_indices - ); - */ - - kk_get_lower_triangle_fill_parallel( - nv, ne, in_xadj, in_adj, in_vals, out_xadj, out_adj, out_vals, - new_indices, use_dynamic_scheduling, chunksize, is_lower); + + kk_get_lower_triangle_fill_parallel( + nv, ne, in_xadj, in_adj, in_vals, out_xadj, out_adj, out_vals, new_indices, use_dynamic_scheduling, chunksize, + is_lower, incl_diag); // double fill = timer1.seconds(); // std::cout << "lower fill time:" << fill<< std::endl; } template -crstmat_t kk_get_lower_triangle( - crstmat_t in_crs_matrix, - typename crstmat_t::index_type::value_type *new_indices = NULL, - bool use_dynamic_scheduling = false, bool chunksize = 4) { - typedef typename crstmat_t::execution_space exec_space; - typedef typename crstmat_t::StaticCrsGraphType graph_t; - typedef typename crstmat_t::row_map_type::non_const_type row_map_view_t; - typedef typename crstmat_t::index_type::non_const_type cols_view_t; - typedef typename crstmat_t::values_type::non_const_type values_view_t; - // typedef typename crstmat_t::row_map_type::const_type const_row_map_view_t; - // typedef typename crstmat_t::index_type::const_type const_cols_view_t; - // typedef typename crstmat_t::values_type::const_type const_values_view_t; - - typedef typename row_map_view_t::non_const_value_type size_type; - typedef typename cols_view_t::non_const_value_type lno_t; - typedef typename values_view_t::non_const_value_type scalar_t; - - lno_t nr = in_crs_matrix.numRows(); - - const scalar_t *vals = in_crs_matrix.values.data(); - const size_type *rowmap = in_crs_matrix.graph.row_map.data(); - const lno_t *entries = in_crs_matrix.graph.entries.data(); - const size_type ne = in_crs_matrix.graph.entries.extent(0); - - row_map_view_t new_row_map( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), nr + 1); - kk_get_lower_triangle_count( - nr, ne, rowmap, entries, new_row_map.data(), new_indices, - use_dynamic_scheduling, chunksize); - - KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum( - nr + 1, new_row_map); - exec_space().fence(); - - auto ll_size = Kokkos::subview(new_row_map, nr); - auto h_ll_size = Kokkos::create_mirror_view(ll_size); - Kokkos::deep_copy(h_ll_size, ll_size); - size_type ll_nnz_size = h_ll_size(); - - // cols_view_t new_entries ("LL", ll_nnz_size); - // values_view_t new_values ("LL", ll_nnz_size); - cols_view_t new_entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), - ll_nnz_size); - values_view_t new_values( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), ll_nnz_size); - - kk_get_lower_triangle_fill( - nr, ne, rowmap, entries, vals, new_row_map.data(), new_entries.data(), - new_values.data(), new_indices, use_dynamic_scheduling, chunksize); - - graph_t g(new_entries, new_row_map); - crstmat_t new_ll_mtx("lower triangle", in_crs_matrix.numCols(), new_values, - g); - return new_ll_mtx; -} - -template -crstmat_t kk_get_lower_crs_matrix( - crstmat_t in_crs_matrix, - typename crstmat_t::index_type::value_type *new_indices = NULL, - bool use_dynamic_scheduling = false, bool chunksize = 4) { +crstmat_t kk_get_lower_triangle(crstmat_t in_crs_matrix, typename crstmat_t::index_type::value_type *new_indices = NULL, + bool use_dynamic_scheduling = false, bool chunksize = 4, bool is_lower = true, + bool incl_diag = false) { typedef typename crstmat_t::execution_space exec_space; typedef typename crstmat_t::StaticCrsGraphType graph_t; typedef typename crstmat_t::row_map_type::non_const_type row_map_view_t; @@ -1548,14 +1223,11 @@ crstmat_t kk_get_lower_crs_matrix( const lno_t *entries = in_crs_matrix.graph.entries.data(); const size_type ne = in_crs_matrix.graph.entries.extent(0); - row_map_view_t new_row_map( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), nr + 1); - kk_get_lower_triangle_count( - nr, ne, rowmap, entries, new_row_map.data(), new_indices, - use_dynamic_scheduling, chunksize); + row_map_view_t new_row_map(Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), nr + 1); + kk_get_lower_triangle_count(nr, ne, rowmap, entries, new_row_map.data(), new_indices, + use_dynamic_scheduling, chunksize, is_lower, incl_diag); - KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum( - nr + 1, new_row_map); + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(nr + 1, new_row_map); exec_space().fence(); auto ll_size = Kokkos::subview(new_row_map, nr); @@ -1565,82 +1237,25 @@ crstmat_t kk_get_lower_crs_matrix( // cols_view_t new_entries ("LL", ll_nnz_size); // values_view_t new_values ("LL", ll_nnz_size); - cols_view_t new_entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), - ll_nnz_size); - values_view_t new_values( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), ll_nnz_size); + cols_view_t new_entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), ll_nnz_size); + values_view_t new_values(Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), ll_nnz_size); kk_get_lower_triangle_fill( - nr, ne, rowmap, entries, vals, new_row_map.data(), new_entries.data(), - new_values.data(), new_indices, use_dynamic_scheduling, chunksize); + nr, ne, rowmap, entries, vals, new_row_map.data(), new_entries.data(), new_values.data(), new_indices, + use_dynamic_scheduling, chunksize, is_lower, incl_diag); graph_t g(new_entries, new_row_map); - crstmat_t new_ll_mtx("lower triangle", in_crs_matrix.numCols(), new_values, - g); + crstmat_t new_ll_mtx("lower triangle", in_crs_matrix.numCols(), new_values, g); return new_ll_mtx; } -template -graph_t kk_get_lower_crs_graph(graph_t in_crs_matrix, - typename graph_t::data_type *new_indices = NULL, - bool /*use_dynamic_scheduling*/ = false, - bool /*chunksize*/ = 4) { - typedef typename graph_t::execution_space exec_space; - - typedef typename graph_t::row_map_type::non_const_type row_map_view_t; - typedef typename graph_t::entries_type::non_const_type cols_view_t; - - // typedef typename graph_t::row_map_type::const_type const_row_map_view_t; - // typedef typename graph_t::entries_type::const_type const_cols_view_t; - - typedef typename row_map_view_t::non_const_value_type size_type; - typedef typename cols_view_t::non_const_value_type lno_t; - - lno_t nr = in_crs_matrix.numRows(); - const size_type *rowmap = in_crs_matrix.row_map.data(); - const lno_t *entries = in_crs_matrix.entries.data(); - - const size_type ne = in_crs_matrix.graph.entries.extent(0); - - row_map_view_t new_row_map( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), nr + 1); - kk_get_lower_triangle_count( - nr, ne, rowmap, entries, new_row_map.data(), new_indices); - - KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum( - nr + 1, new_row_map); - exec_space().fence(); - - auto ll_size = Kokkos::subview(new_row_map, nr); - auto h_ll_size = Kokkos::create_mirror_view(ll_size); - Kokkos::deep_copy(h_ll_size, ll_size); - size_type ll_nnz_size = h_ll_size(); - - cols_view_t new_entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), - ll_nnz_size); - - kk_get_lower_triangle_fill( - nr, ne, rowmap, entries, NULL, new_row_map.data(), new_entries.data(), - NULL, new_indices); - - graph_t g(new_entries, new_row_map); - - return g; -} - -template -void kk_get_lower_triangle(typename cols_view_t::non_const_value_type nr, - row_map_view_t in_rowmap, cols_view_t in_entries, - values_view_t in_values, - out_row_map_view_t &out_rowmap, - out_cols_view_t &out_entries, - out_values_view_t &out_values, - new_indices_t &new_indices, - bool use_dynamic_scheduling = false, - bool chunksize = 4, bool is_lower = true) { +template +void kk_get_lower_triangle(typename cols_view_t::non_const_value_type nr, row_map_view_t in_rowmap, + cols_view_t in_entries, values_view_t in_values, out_row_map_view_t &out_rowmap, + out_cols_view_t &out_entries, out_values_view_t &out_values, new_indices_t &new_indices, + bool use_dynamic_scheduling = false, bool chunksize = 4, bool is_lower = true, + bool incl_diag = false) { // typedef typename row_map_view_t::const_type const_row_map_view_t; // typedef typename cols_view_t::const_type const_cols_view_t; // typedef typename values_view_t::const_type const_values_view_t; @@ -1654,14 +1269,12 @@ void kk_get_lower_triangle(typename cols_view_t::non_const_value_type nr, const lno_t *entries = in_entries.data(); const size_type ne = in_entries.extent(0); - out_rowmap = out_row_map_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), nr + 1); - kk_get_lower_triangle_count( - nr, ne, rowmap, entries, out_rowmap.data(), new_indices.data(), - use_dynamic_scheduling, chunksize, is_lower); + out_rowmap = out_row_map_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), nr + 1); + kk_get_lower_triangle_count(nr, ne, rowmap, entries, out_rowmap.data(), + new_indices.data(), use_dynamic_scheduling, chunksize, + is_lower, incl_diag); - KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(nr + 1, - out_rowmap); + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(nr + 1, out_rowmap); exec_space().fence(); auto ll_size = Kokkos::subview(out_rowmap, nr); @@ -1671,27 +1284,24 @@ void kk_get_lower_triangle(typename cols_view_t::non_const_value_type nr, // cols_view_t new_entries ("LL", ll_nnz_size); // values_view_t new_values ("LL", ll_nnz_size); - out_entries = out_cols_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), ll_nnz_size); + out_entries = out_cols_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), ll_nnz_size); if (in_values.data() != NULL) - out_values = out_values_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), ll_nnz_size); + out_values = out_values_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), ll_nnz_size); kk_get_lower_triangle_fill( - nr, ne, rowmap, entries, vals, out_rowmap.data(), out_entries.data(), - out_values.data(), new_indices.data(), use_dynamic_scheduling, chunksize, - is_lower); + nr, ne, rowmap, entries, vals, out_rowmap.data(), out_entries.data(), out_values.data(), new_indices.data(), + use_dynamic_scheduling, chunksize, is_lower, incl_diag); } -template -void kk_create_incidence_tranpose_matrix_from_lower_triangle( - typename cols_view_t::non_const_value_type nr, row_map_view_t in_rowmap, - cols_view_t in_entries, out_row_map_view_t &out_rowmap, - out_cols_view_t &out_entries, bool /*use_dynamic_scheduling */ = false, - bool /*chunksize*/ = 4) { +void kk_create_incidence_tranpose_matrix_from_lower_triangle(typename cols_view_t::non_const_value_type nr, + row_map_view_t in_rowmap, cols_view_t in_entries, + out_row_map_view_t &out_rowmap, + out_cols_view_t &out_entries, + bool /*use_dynamic_scheduling */ = false, + bool /*chunksize*/ = 4) { // typedef typename row_map_view_t::const_type const_row_map_view_t; // typedef typename cols_view_t::const_type const_cols_view_t; @@ -1701,24 +1311,21 @@ void kk_create_incidence_tranpose_matrix_from_lower_triangle( // const size_type *rowmap = in_rowmap.data(); // const lno_t *entries= in_entries.data(); const size_type ne = in_entries.extent(0); - out_rowmap = out_row_map_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), ne + 1); + out_rowmap = out_row_map_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), ne + 1); // const lno_t nr = in_rowmap.extent(0) - 1; typedef Kokkos::RangePolicy my_exec_space; Kokkos::parallel_for( "KokkosKernels::Common::CreateIncidenceTransposeMatrixFromLowerTriangle::" "S0", - my_exec_space(0, ne + 1), - KOKKOS_LAMBDA(const lno_t &i) { out_rowmap[i] = i * 2; }); + my_exec_space(0, ne + 1), KOKKOS_LAMBDA(const lno_t &i) { out_rowmap[i] = i * 2; }); // typedef Kokkos::TeamPolicy team_policy_t; // int vector_size = 2; // team_policy_t(ne) // nv / team_work_chunk_size + 1 , suggested_team_size, vector_size - out_entries = out_cols_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), 2 * ne); + out_entries = out_cols_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "LL"), 2 * ne); // TODO MAKE IT WITH TEAMS. Kokkos::parallel_for( @@ -1737,14 +1344,13 @@ void kk_create_incidence_tranpose_matrix_from_lower_triangle( }); } -template -void kk_create_incidence_matrix_from_original_matrix( - typename cols_view_t::non_const_value_type nr, row_map_view_t in_rowmap, - cols_view_t in_entries, out_row_map_view_t &out_rowmap, - out_cols_view_t &out_entries, permutation_view_t permutation, - bool use_dynamic_scheduling = false, bool chunksize = 4) { +void kk_create_incidence_matrix_from_original_matrix(typename cols_view_t::non_const_value_type nr, + row_map_view_t in_rowmap, cols_view_t in_entries, + out_row_map_view_t &out_rowmap, out_cols_view_t &out_entries, + permutation_view_t permutation, + bool use_dynamic_scheduling = false, bool chunksize = 4) { // typedef typename row_map_view_t::const_type const_row_map_view_t; // typedef typename cols_view_t::const_type const_cols_view_t; @@ -1754,33 +1360,27 @@ void kk_create_incidence_matrix_from_original_matrix( lno_t *perm = permutation.data(); const size_type ne = in_entries.extent(0); - out_rowmap = out_row_map_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "out_rowmap"), nr + 1); - out_entries = out_cols_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "out_cols_view"), ne); + out_rowmap = out_row_map_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "out_rowmap"), nr + 1); + out_entries = out_cols_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "out_cols_view"), ne); // todo: need to try both true and false bool sort_decreasing_order = true; // find the size of rows at upper triangular. // this gives the size of each column in lower triangluar. - kk_get_lower_triangle_count( - nr, ne, in_rowmap.data(), in_entries.data(), out_rowmap.data(), - permutation.data(), use_dynamic_scheduling, chunksize, - sort_decreasing_order); + kk_get_lower_triangle_count(nr, ne, in_rowmap.data(), in_entries.data(), + out_rowmap.data(), permutation.data(), + use_dynamic_scheduling, chunksize, sort_decreasing_order); exec_space().fence(); - KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(nr + 1, - out_rowmap); + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(nr + 1, out_rowmap); // kk_print_1Dview(out_rowmap, false, 20); - out_row_map_view_t out_rowmap_copy( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "tmp"), nr + 1); + out_row_map_view_t out_rowmap_copy(Kokkos::view_alloc(Kokkos::WithoutInitializing, "tmp"), nr + 1); // out_rowmap = out_row_map_view_t("LL", nr+1); Kokkos::parallel_for( "KokkosKernels::Common::" "CreateIncidenceTransposeMatrixFromOriginalTriangle::S0", - my_exec_space(0, nr + 1), - KOKKOS_LAMBDA(const lno_t &i) { out_rowmap_copy[i] = in_rowmap[i]; }); + my_exec_space(0, nr + 1), KOKKOS_LAMBDA(const lno_t &i) { out_rowmap_copy[i] = in_rowmap[i]; }); if (sort_decreasing_order) { Kokkos::parallel_for( @@ -1802,12 +1402,9 @@ void kk_create_incidence_matrix_from_original_matrix( lno_t col_perm = col; if (perm) col_perm = perm[col]; if (row_perm > col_perm) { - typedef typename std::remove_reference::type atomic_incr_type; - size_type row_write_index = Kokkos::atomic_fetch_add( - &(out_rowmap_copy[row]), atomic_incr_type(1)); - size_type col_write_index = Kokkos::atomic_fetch_add( - &(out_rowmap_copy[col]), atomic_incr_type(1)); + typedef typename std::remove_reference::type atomic_incr_type; + size_type row_write_index = Kokkos::atomic_fetch_add(&(out_rowmap_copy[row]), atomic_incr_type(1)); + size_type col_write_index = Kokkos::atomic_fetch_add(&(out_rowmap_copy[col]), atomic_incr_type(1)); out_entries[row_write_index] = used_edge_index + used_count; out_entries[col_write_index] = used_edge_index + used_count; ++used_count; @@ -1835,12 +1432,9 @@ void kk_create_incidence_matrix_from_original_matrix( lno_t col_perm = col; if (perm) col_perm = perm[col]; if (row_perm < col_perm) { - typedef typename std::remove_reference::type atomic_incr_type; - size_type row_write_index = Kokkos::atomic_fetch_add( - &(out_rowmap_copy[row]), atomic_incr_type(1)); - size_type col_write_index = Kokkos::atomic_fetch_add( - &(out_rowmap_copy[col]), atomic_incr_type(1)); + typedef typename std::remove_reference::type atomic_incr_type; + size_type row_write_index = Kokkos::atomic_fetch_add(&(out_rowmap_copy[row]), atomic_incr_type(1)); + size_type col_write_index = Kokkos::atomic_fetch_add(&(out_rowmap_copy[col]), atomic_incr_type(1)); out_entries[row_write_index] = used_edge_index + used_count; out_entries[col_write_index] = used_edge_index + used_count; ++used_count; @@ -1853,8 +1447,7 @@ void kk_create_incidence_matrix_from_original_matrix( Kokkos::parallel_for( "KokkosKernels::Common::" "CreateIncidenceTransposeMatrixFromOriginalTriangle::S3", - my_exec_space(0, nr + 1), - KOKKOS_LAMBDA(const lno_t &i) { out_rowmap[i] = in_rowmap[i]; }); + my_exec_space(0, nr + 1), KOKKOS_LAMBDA(const lno_t &i) { out_rowmap[i] = in_rowmap[i]; }); } template @@ -1862,13 +1455,10 @@ struct ReduceLargerRowCount { view_type rowmap; typename view_type::const_value_type threshold; - ReduceLargerRowCount(view_type view_to_reduce_, - typename view_type::const_value_type threshold_) + ReduceLargerRowCount(view_type view_to_reduce_, typename view_type::const_value_type threshold_) : rowmap(view_to_reduce_), threshold(threshold_) {} KOKKOS_INLINE_FUNCTION - void operator()( - const size_t &i, - typename view_type::non_const_value_type &sum_reduction) const { + void operator()(const size_t &i, typename view_type::non_const_value_type &sum_reduction) const { if (rowmap(i + 1) - rowmap(i) > threshold) { sum_reduction += 1; } @@ -1876,26 +1466,21 @@ struct ReduceLargerRowCount { }; template -void kk_reduce_numrows_larger_than_threshold( - const MyExecSpace &my_exec_space, size_t num_elements, - view_type view_to_reduce, typename view_type::const_value_type threshold, - typename view_type::non_const_value_type &sum_reduction) { +void kk_reduce_numrows_larger_than_threshold(const MyExecSpace &my_exec_space, size_t num_elements, + view_type view_to_reduce, typename view_type::const_value_type threshold, + typename view_type::non_const_value_type &sum_reduction) { typedef Kokkos::RangePolicy range_policy_t; - Kokkos::parallel_reduce( - "KokkosKernels::Common::ReduceNumRowsLargerThanThreshold", - range_policy_t(my_exec_space, 0, num_elements), - ReduceLargerRowCount(view_to_reduce, threshold), - sum_reduction); + Kokkos::parallel_reduce("KokkosKernels::Common::ReduceNumRowsLargerThanThreshold", + range_policy_t(my_exec_space, 0, num_elements), + ReduceLargerRowCount(view_to_reduce, threshold), sum_reduction); } template -void kk_reduce_numrows_larger_than_threshold( - size_t num_elements, view_type view_to_reduce, - typename view_type::const_value_type threshold, - typename view_type::non_const_value_type &sum_reduction) { +void kk_reduce_numrows_larger_than_threshold(size_t num_elements, view_type view_to_reduce, + typename view_type::const_value_type threshold, + typename view_type::non_const_value_type &sum_reduction) { MyExecSpace my_exec_space; - kk_reduce_numrows_larger_than_threshold( - my_exec_space, num_elements, view_to_reduce, threshold, sum_reduction); + kk_reduce_numrows_larger_than_threshold(my_exec_space, num_elements, view_to_reduce, threshold, sum_reduction); } // Note: "block" in member name means it's block internal - otherwise it @@ -1904,12 +1489,8 @@ template class RowIndexBase { public: KOKKOS_INLINE_FUNCTION - RowIndexBase(const lno_t block_size_, const lno_t row_begin_, - const lno_t row_end_) - : block_size(block_size_), - row_begin(row_begin_), - row_end(row_end_), - row_size(row_end_ - row_begin_) { + RowIndexBase(const lno_t block_size_, const lno_t row_begin_, const lno_t row_end_) + : block_size(block_size_), row_begin(row_begin_), row_end(row_end_), row_size(row_end_ - row_begin_) { row_off = row_begin_ * block_mtx_size(); } @@ -1923,10 +1504,7 @@ class RowIndexBase { lno_t size() { return row_size; } KOKKOS_INLINE_FUNCTION - size_type block_mtx_size() { - return static_cast(block_size) * - static_cast(block_size); - } + size_type block_mtx_size() { return static_cast(block_size) * static_cast(block_size); } KOKKOS_INLINE_FUNCTION size_type row_offset() { return row_off; } @@ -1954,53 +1532,43 @@ class MatrixRowIndex; size is 1 */ template -class MatrixRowIndex - : public RowIndexBase { +class MatrixRowIndex : public RowIndexBase { public: using Base = RowIndexBase; KOKKOS_INLINE_FUNCTION - MatrixRowIndex(const lno_t block_size_, const lno_t row_begin_, - const lno_t row_end_) + MatrixRowIndex(const lno_t block_size_, const lno_t row_begin_, const lno_t row_end_) : Base(block_size_, row_begin_, row_end_) {} KOKKOS_INLINE_FUNCTION - size_type block(const lno_t col_idx) { - return Base::row_offset() + col_idx * Base::block_size; - } + size_type block(const lno_t col_idx) { return Base::row_offset() + col_idx * Base::block_size; } KOKKOS_INLINE_FUNCTION size_type block_stride() { return Base::size() * Base::block_size; } KOKKOS_INLINE_FUNCTION - size_type value(const lno_t col_idx, const lno_t block_row, - const lno_t block_col) { + size_type value(const lno_t col_idx, const lno_t block_row, const lno_t block_col) { return block(col_idx) + block_row * block_stride() + block_col; } }; template -class MatrixRowIndex - : public RowIndexBase { +class MatrixRowIndex : public RowIndexBase { public: using Base = RowIndexBase; KOKKOS_INLINE_FUNCTION - MatrixRowIndex(const lno_t block_size_, const lno_t row_begin_, - const lno_t row_end_) + MatrixRowIndex(const lno_t block_size_, const lno_t row_begin_, const lno_t row_end_) : Base(block_size_, row_begin_, row_end_) {} KOKKOS_INLINE_FUNCTION - size_type block(const lno_t col_idx) { - return Base::row_offset() + col_idx * Base::block_mtx_size(); - } + size_type block(const lno_t col_idx) { return Base::row_offset() + col_idx * Base::block_mtx_size(); } KOKKOS_INLINE_FUNCTION size_type block_stride() { return Base::block_size; } KOKKOS_INLINE_FUNCTION - size_type value(const lno_t col_idx, const lno_t block_row, - const lno_t block_col) { + size_type value(const lno_t col_idx, const lno_t block_row, const lno_t block_col) { return block(col_idx) + block_row * block_stride() + block_col; } }; @@ -2008,17 +1576,13 @@ class MatrixRowIndex template struct MatrixTraits; -template -struct MatrixTraits< - KokkosSparse::CrsMatrix> { +template +struct MatrixTraits> { static constexpr auto format = KokkosSparse::SparseMatrixFormat::CRS; }; -template -struct MatrixTraits> { +template +struct MatrixTraits> { static constexpr auto format = KokkosSparse::SparseMatrixFormat::BSR; }; @@ -2027,14 +1591,10 @@ struct MatrixConverter; template <> struct MatrixConverter { - template > + template > static bsrMtx_t from_bsr_formated_point_crsmatrix( - const KokkosSparse::CrsMatrix - &mtx, - lno_t block_size) { + const KokkosSparse::CrsMatrix &mtx, lno_t block_size) { return bsrMtx_t(mtx, block_size); } }; @@ -2045,8 +1605,7 @@ struct CountEntriesFallingEdges { CountEntriesFallingEdges(const Entries &entries_) : entries(entries_) {} - KOKKOS_INLINE_FUNCTION void operator()(size_type i, - size_type &numFallingEdges) const { + KOKKOS_INLINE_FUNCTION void operator()(size_type i, size_type &numFallingEdges) const { if (entries(i) > entries(i + 1)) numFallingEdges++; } @@ -2058,11 +1617,9 @@ struct CountRowBoundaryFallingEdges { using size_type = typename Rowmap::non_const_value_type; using ordinal_type = typename Entries::non_const_value_type; - CountRowBoundaryFallingEdges(const Rowmap &rowmap_, const Entries &entries_) - : rowmap(rowmap_), entries(entries_) {} + CountRowBoundaryFallingEdges(const Rowmap &rowmap_, const Entries &entries_) : rowmap(rowmap_), entries(entries_) {} - KOKKOS_INLINE_FUNCTION void operator()( - ordinal_type i, size_type &numBoundaryFallingEdges) const { + KOKKOS_INLINE_FUNCTION void operator()(ordinal_type i, size_type &numBoundaryFallingEdges) const { // Comparing the entries at end of row i, and beginning of row i+1 size_type rowBegin = rowmap(i); size_type rowEnd = rowmap(i + 1); @@ -2101,21 +1658,17 @@ bool isCrsGraphSorted(const Rowmap &rowmap, const Entries &entries) { // falling edges which cross row boundaries. size_type totalFallingEdges = 0; Kokkos::parallel_reduce(Kokkos::RangePolicy(0, nnz - 1), - CountEntriesFallingEdges(entries), - totalFallingEdges); + CountEntriesFallingEdges(entries), totalFallingEdges); size_type rowBoundaryFallingEdges = 0; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, numRows - 1), - CountRowBoundaryFallingEdges(rowmap, entries), - rowBoundaryFallingEdges); + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, numRows - 1), + CountRowBoundaryFallingEdges(rowmap, entries), rowBoundaryFallingEdges); return totalFallingEdges == rowBoundaryFallingEdges; } template struct CountDroppedEntriesFunctor { using Scalar = typename Values::non_const_value_type; - CountDroppedEntriesFunctor(const Values &values_, Mag tol_) - : values(values_), tol(tol_) {} + CountDroppedEntriesFunctor(const Values &values_, Mag tol_) : values(values_), tol(tol_) {} KOKKOS_INLINE_FUNCTION void operator()(int64_t i, Offset &lcount) const { if (Kokkos::ArithTraits::abs(values(i)) <= tol) lcount++; @@ -2142,11 +1695,9 @@ struct MarkFinalRowEntries { template struct DropEntriesScanner { KOKKOS_DEFAULTED_FUNCTION DropEntriesScanner() = default; - KOKKOS_INLINE_FUNCTION DropEntriesScanner(Offset i_out_, Offset row_) - : i_out(i_out_), row(row_) {} + KOKKOS_INLINE_FUNCTION DropEntriesScanner(Offset i_out_, Offset row_) : i_out(i_out_), row(row_) {} - KOKKOS_INLINE_FUNCTION void operator+=( - const DropEntriesScanner &rhs) { + KOKKOS_INLINE_FUNCTION void operator+=(const DropEntriesScanner &rhs) { i_out += rhs.i_out; row += rhs.row; } @@ -2155,18 +1706,15 @@ struct DropEntriesScanner { Offset row; // The row index (ignoring rows which were empty in input) }; -template +template struct DropEntriesFunctor { using Offset = typename RowmapIn::non_const_value_type; using Scalar = typename ValuesIn::non_const_value_type; - DropEntriesFunctor(const Bitset &rowEndMarkers_, const RowmapIn &rowmapIn_, - const EntriesIn &entriesIn_, const ValuesIn &valuesIn_, - const RowmapOut &compactRowmapOut_, - const EntriesOut &entriesOut_, const ValuesOut &valuesOut_, - Mag tol_) + DropEntriesFunctor(const Bitset &rowEndMarkers_, const RowmapIn &rowmapIn_, const EntriesIn &entriesIn_, + const ValuesIn &valuesIn_, const RowmapOut &compactRowmapOut_, const EntriesOut &entriesOut_, + const ValuesOut &valuesOut_, Mag tol_) : rowEndMarkers(rowEndMarkers_), rowmapIn(rowmapIn_), entriesIn(entriesIn_), @@ -2176,9 +1724,7 @@ struct DropEntriesFunctor { valuesOut(valuesOut_), tol(tol_) {} - KOKKOS_INLINE_FUNCTION void operator()(int64_t i_in, - DropEntriesScanner &scanval, - bool finalPass) const { + KOKKOS_INLINE_FUNCTION void operator()(int64_t i_in, DropEntriesScanner &scanval, bool finalPass) const { // i_in is the index of the input entry being processed // i_out (if finalPass == true) is the index of where that same entry goes // in the filtered matrix @@ -2218,20 +1764,14 @@ template struct ExpandRowmapFunctor { using Offset = typename RowmapIn::non_const_value_type; - ExpandRowmapFunctor(const RowmapIn &rowmapIn_, - const RowmapOut &compactRowmapOut_, - const RowmapOut &rowmapOut_) - : rowmapIn(rowmapIn_), - compactRowmapOut(compactRowmapOut_), - rowmapOut(rowmapOut_) {} + ExpandRowmapFunctor(const RowmapIn &rowmapIn_, const RowmapOut &compactRowmapOut_, const RowmapOut &rowmapOut_) + : rowmapIn(rowmapIn_), compactRowmapOut(compactRowmapOut_), rowmapOut(rowmapOut_) {} - KOKKOS_INLINE_FUNCTION void operator()(Ordinal row, Ordinal &compactRow, - bool finalPass) const { + KOKKOS_INLINE_FUNCTION void operator()(Ordinal row, Ordinal &compactRow, bool finalPass) const { if (finalPass) { rowmapOut(row) = compactRowmapOut(compactRow); } - if (row + 1 < rowmapIn.extent_int(0) && rowmapIn(row + 1) != rowmapIn(row)) - compactRow++; + if (row + 1 < rowmapIn.extent_int(0) && rowmapIn(row + 1) != rowmapIn(row)) compactRow++; } RowmapIn rowmapIn; @@ -2243,10 +1783,8 @@ struct ExpandRowmapFunctor { // If there are no entries to remove, A is returned. // Otherwise a new matrix is returned. template -Matrix removeCrsMatrixZeros( - const Matrix &A, - typename Kokkos::ArithTraits::mag_type tol = - 0) { +Matrix removeCrsMatrixZeros(const Matrix &A, + typename Kokkos::ArithTraits::mag_type tol = 0) { using Ordinal = typename Matrix::non_const_ordinal_type; using Offset = typename Matrix::non_const_size_type; using Device = typename Matrix::device_type; @@ -2255,11 +1793,9 @@ Matrix removeCrsMatrixZeros( using RangePol = Kokkos::RangePolicy; // First, count the number of entries to remove Offset entriesToRemove; - Kokkos::parallel_reduce( - RangePol(0, A.nnz()), - CountDroppedEntriesFunctor( - A.values, tol), - entriesToRemove); + Kokkos::parallel_reduce(RangePol(0, A.nnz()), + CountDroppedEntriesFunctor(A.values, tol), + entriesToRemove); if (entriesToRemove == Offset(0)) { // The matrix has no zeros to remove, so just return it as-is return A; @@ -2268,64 +1804,44 @@ Matrix removeCrsMatrixZeros( // To help construct the new rowmap, for each original entry record whether // it's at the end of its row. Kokkos::Bitset rowEndMarkersNonconst(A.nnz()); - Kokkos::parallel_for( - RangePol(0, A.graph.row_map.extent(0)), - MarkFinalRowEntries(rowEndMarkersNonconst, A.graph.row_map)); + Kokkos::parallel_for(RangePol(0, A.graph.row_map.extent(0)), + MarkFinalRowEntries(rowEndMarkersNonconst, A.graph.row_map)); Offset filteredNNZ = A.nnz() - entriesToRemove; typename Matrix::values_type::non_const_type filteredValues( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Afiltered values"), - filteredNNZ); + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Afiltered values"), filteredNNZ); typename Matrix::index_type::non_const_type filteredEntries( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Afiltered entries"), - filteredNNZ); + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Afiltered entries"), filteredNNZ); typename Matrix::row_map_type::non_const_type compactFilteredRowmap( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Afiltered rowmap (compact)"), - A.numRows() + 1); + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Afiltered rowmap (compact)"), A.numRows() + 1); typename Matrix::row_map_type::non_const_type filteredRowmap( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Afiltered rowmap"), - A.numRows() + 1); + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Afiltered rowmap"), A.numRows() + 1); // Using a parallel scan, compact the non-filtered entries and partially fill // in the rowmap (only marking row begins for rows which were originally // non-empty) The rest can be filled in with a max-scan. Kokkos::ConstBitset rowEndMarkers(rowEndMarkersNonconst); - Kokkos::parallel_scan( - RangePol(0, A.nnz()), - DropEntriesFunctor(rowEndMarkers, A.graph.row_map, A.graph.entries, - A.values, compactFilteredRowmap, filteredEntries, - filteredValues, tol)); + Kokkos::parallel_scan(RangePol(0, A.nnz()), + DropEntriesFunctor(rowEndMarkers, A.graph.row_map, A.graph.entries, A.values, + compactFilteredRowmap, filteredEntries, filteredValues, tol)); Kokkos::parallel_scan( RangePol(0, A.numRows() + 1), - ExpandRowmapFunctor(A.graph.row_map, compactFilteredRowmap, - filteredRowmap)); + ExpandRowmapFunctor( + A.graph.row_map, compactFilteredRowmap, filteredRowmap)); ExecSpace().fence(); - return Matrix("A filtered", A.numRows(), A.numCols(), filteredNNZ, - filteredValues, filteredRowmap, filteredEntries); + return Matrix("A filtered", A.numRows(), A.numCols(), filteredNNZ, filteredValues, filteredRowmap, filteredEntries); } template -void validateCrsMatrix(int m, int n, const Rowmap &rowmapIn, - const Entries &entriesIn, const Values &valuesIn) { - auto rowmap = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rowmapIn); - auto entries = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), entriesIn); - auto values = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), valuesIn); - size_t nnz = entries.extent(0); - if (nnz != values.extent(0)) - throw std::runtime_error( - "Matrix entries/values views have different lengths"); - if ((m == 0 && rowmap.extent(0) > size_t(1)) || - (rowmap.extent(0) != size_t(m + 1))) +void validateCrsMatrix(int m, int n, const Rowmap &rowmapIn, const Entries &entriesIn, const Values &valuesIn) { + auto rowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rowmapIn); + auto entries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), entriesIn); + auto values = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), valuesIn); + size_t nnz = entries.extent(0); + if (nnz != values.extent(0)) throw std::runtime_error("Matrix entries/values views have different lengths"); + if ((m == 0 && rowmap.extent(0) > size_t(1)) || (rowmap.extent(0) != size_t(m + 1))) throw std::runtime_error("Matrix rowmap has wrong length"); - if (m && nnz != rowmap(m)) - throw std::runtime_error("Matrix rowmap final entry doesn't match nnz"); + if (m && nnz != rowmap(m)) throw std::runtime_error("Matrix rowmap final entry doesn't match nnz"); for (int i = 0; i < m; i++) { - if (rowmap(i) > rowmap(i + 1)) - throw std::runtime_error("Matrix rowmap not ascending"); + if (rowmap(i) > rowmap(i + 1)) throw std::runtime_error("Matrix rowmap not ascending"); } for (size_t i = 0; i < size_t(nnz); i++) { if (entries(i) >= n) throw std::runtime_error("Matrix entry out of bounds"); @@ -2337,18 +1853,14 @@ void validateCrsMatrix(int m, int n, const Rowmap &rowmapIn, * and last column indices at each row of the sub-block. This is a host function * used by the kk_extract_diagonal_blocks_crsmatrix_sequential() */ -template +template void kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential( - const row_map_type &A_row_map, const entries_type &A_entries, - const ordinal_type &blk_row_start, const ordinal_type &blk_col_start, - const ordinal_type &blk_nrows, const ordinal_type &blk_ncols, - size_type &blk_nnz, offset_view1d_type &first_indices, - offset_view1d_type &last_indices) { + const row_map_type &A_row_map, const entries_type &A_entries, const ordinal_type &blk_row_start, + const ordinal_type &blk_col_start, const ordinal_type &blk_nrows, const ordinal_type &blk_ncols, size_type &blk_nnz, + offset_view1d_type &first_indices, offset_view1d_type &last_indices) { // Rowmap of i-th row-oriented sub-matrix - auto A_row_map_sub = Kokkos::subview( - A_row_map, - Kokkos::make_pair(blk_row_start, blk_row_start + blk_nrows + 1)); + auto A_row_map_sub = Kokkos::subview(A_row_map, Kokkos::make_pair(blk_row_start, blk_row_start + blk_nrows + 1)); blk_nnz = 0; @@ -2382,16 +1894,13 @@ void kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential( * This is a host function used by the * kk_extract_diagonal_blocks_crsmatrix_sequential() */ -template -void kk_extract_subblock_crsmatrix_sequential( - const entries_type &A_entries, const values_type &A_values, - const ordinal_type &blk_col_start, const ordinal_type &blk_nrows, - const size_type &blk_nnz, const offset_view1d_type &first_indices, - const offset_view1d_type &last_indices, out_row_map_type &blk_row_map, - out_entries_type &blk_entries, out_values_type &blk_values) { +template +void kk_extract_subblock_crsmatrix_sequential(const entries_type &A_entries, const values_type &A_values, + const ordinal_type &blk_col_start, const ordinal_type &blk_nrows, + const size_type &blk_nnz, const offset_view1d_type &first_indices, + const offset_view1d_type &last_indices, out_row_map_type &blk_row_map, + out_entries_type &blk_entries, out_values_type &blk_values) { // - create out_row_map // - copy A_entries to out_entries and update out_entries with local column // indices @@ -2430,36 +1939,31 @@ void kk_extract_subblock_crsmatrix_sequential( */ template std::vector -kk_extract_diagonal_blocks_crsmatrix_sequential( - const crsMat_t &A, std::vector &DiagBlk_v, - bool UseRCMReordering = false) { - using row_map_type = typename crsMat_t::row_map_type; - using entries_type = typename crsMat_t::index_type; - using values_type = typename crsMat_t::values_type; - using graph_t = typename crsMat_t::StaticCrsGraphType; - using out_row_map_type = typename graph_t::row_map_type::non_const_type; - using out_entries_type = typename graph_t::entries_type::non_const_type; - using out_values_type = typename crsMat_t::values_type::non_const_type; +kk_extract_diagonal_blocks_crsmatrix_sequential(const crsMat_t &A, std::vector &DiagBlk_v, + bool UseRCMReordering = false) { + using row_map_type = typename crsMat_t::row_map_type; + using entries_type = typename crsMat_t::index_type; + using values_type = typename crsMat_t::values_type; + using graph_t = typename crsMat_t::StaticCrsGraphType; + using out_row_map_type = typename graph_t::row_map_type::non_const_type; + using out_entries_type = typename graph_t::entries_type::non_const_type; + using out_values_type = typename crsMat_t::values_type::non_const_type; using out_row_map_hostmirror_type = typename out_row_map_type::HostMirror; using out_entries_hostmirror_type = typename out_entries_type::HostMirror; using out_values_hostmirror_type = typename out_values_type::HostMirror; - using ordinal_type = typename crsMat_t::non_const_ordinal_type; - using size_type = typename crsMat_t::non_const_size_type; - using value_type = typename crsMat_t::non_const_value_type; - using offset_view1d_type = - Kokkos::View; + using ordinal_type = typename crsMat_t::non_const_ordinal_type; + using size_type = typename crsMat_t::non_const_size_type; + using value_type = typename crsMat_t::non_const_value_type; + using offset_view1d_type = Kokkos::View; row_map_type A_row_map = A.graph.row_map; entries_type A_entries = A.graph.entries; values_type A_values = A.values; - auto A_row_map_h = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_row_map); - auto A_entries_h = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_entries); - auto A_values_h = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_values); + auto A_row_map_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_row_map); + auto A_entries_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_entries); + auto A_values_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_values); ordinal_type A_nrows = static_cast(A.numRows()); ordinal_type A_ncols = static_cast(A.numCols()); @@ -2492,22 +1996,19 @@ kk_extract_diagonal_blocks_crsmatrix_sequential( if ((n_blocks < 1) || (A_nrows < n_blocks)) { std::ostringstream os; os << "The number of diagonal blocks (" << n_blocks - << ") should be >=1 and <= the number of rows of the matrix A (" - << A_nrows << ")"; + << ") should be >=1 and <= the number of rows of the matrix A (" << A_nrows << ")"; throw std::runtime_error(os.str()); } - ordinal_type rows_per_block = ((A_nrows % n_blocks) == 0) - ? (A_nrows / n_blocks) - : (A_nrows / n_blocks + 1); + ordinal_type rows_per_block = ((A_nrows % n_blocks) == 0) ? (A_nrows / n_blocks) : (A_nrows / n_blocks + 1); if (UseRCMReordering) { perm_v.resize(n_blocks); perm_h_v.resize(n_blocks); } - ordinal_type blk_row_start = 0; // first row index of i-th diagonal block - ordinal_type blk_col_start = 0; // first col index of i-th diagonal block + ordinal_type blk_row_start = 0; // first row index of i-th diagonal block + ordinal_type blk_col_start = 0; // first col index of i-th diagonal block ordinal_type blk_nrows, blk_ncols; // Nrows, Ncols of i-th diagonal block for (ordinal_type i = 0; i < n_blocks; i++) { @@ -2521,66 +2022,45 @@ kk_extract_diagonal_blocks_crsmatrix_sequential( // First round: count i-th non-zeros or size of entries_v[i] and find // the first and last column indices at each row size_type blk_nnz = 0; - offset_view1d_type first( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "first"), - blk_nrows); // first position per row - offset_view1d_type last( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "last"), - blk_nrows); // last position per row + offset_view1d_type first(Kokkos::view_alloc(Kokkos::WithoutInitializing, "first"), + blk_nrows); // first position per row + offset_view1d_type last(Kokkos::view_alloc(Kokkos::WithoutInitializing, "last"), + blk_nrows); // last position per row kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential( - A_row_map_h, A_entries_h, blk_row_start, blk_col_start, blk_nrows, - blk_ncols, blk_nnz, first, last); + A_row_map_h, A_entries_h, blk_row_start, blk_col_start, blk_nrows, blk_ncols, blk_nnz, first, last); // Second round: extract - out_row_map_type row_map( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "row_map"), - blk_nrows + 1); - out_entries_type entries( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "entries"), - blk_nnz); - out_values_type values( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "values"), blk_nnz); - out_row_map_hostmirror_type row_map_h( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "row_map_h"), - blk_nrows + 1); - out_entries_hostmirror_type entries_h( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "entries_h"), - blk_nnz); - out_values_hostmirror_type values_h( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "values_h"), - blk_nnz); - - kk_extract_subblock_crsmatrix_sequential( - A_entries_h, A_values_h, blk_col_start, blk_nrows, blk_nnz, first, - last, row_map_h, entries_h, values_h); + out_row_map_type row_map(Kokkos::view_alloc(Kokkos::WithoutInitializing, "row_map"), blk_nrows + 1); + out_entries_type entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "entries"), blk_nnz); + out_values_type values(Kokkos::view_alloc(Kokkos::WithoutInitializing, "values"), blk_nnz); + out_row_map_hostmirror_type row_map_h(Kokkos::view_alloc(Kokkos::WithoutInitializing, "row_map_h"), + blk_nrows + 1); + out_entries_hostmirror_type entries_h(Kokkos::view_alloc(Kokkos::WithoutInitializing, "entries_h"), blk_nnz); + out_values_hostmirror_type values_h(Kokkos::view_alloc(Kokkos::WithoutInitializing, "values_h"), blk_nnz); + + kk_extract_subblock_crsmatrix_sequential(A_entries_h, A_values_h, blk_col_start, blk_nrows, blk_nnz, first, + last, row_map_h, entries_h, values_h); if (!UseRCMReordering) { Kokkos::deep_copy(row_map, row_map_h); Kokkos::deep_copy(entries, entries_h); Kokkos::deep_copy(values, values_h); } else { - perm_h_v[i] = KokkosGraph::Experimental::graph_rcm< - Kokkos::DefaultHostExecutionSpace>(row_map_h, entries_h); - perm_v[i] = out_entries_type( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_v"), - perm_h_v[i].extent(0)); - - out_row_map_hostmirror_type row_map_perm_h( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "row_map_perm_h"), - blk_nrows + 1); - out_entries_hostmirror_type entries_perm_h( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "entries_perm_h"), - blk_nnz); - out_values_hostmirror_type values_perm_h( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "values_perm_h"), - blk_nnz); - - out_entries_hostmirror_type reverseperm_h( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverseperm_h"), - blk_nrows); - for (ordinal_type ii = 0; ii < blk_nrows; ii++) - reverseperm_h(perm_h_v[i](ii)) = ii; + perm_h_v[i] = KokkosGraph::Experimental::graph_rcm(row_map_h, entries_h); + perm_v[i] = + out_entries_type(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_v"), perm_h_v[i].extent(0)); + + out_row_map_hostmirror_type row_map_perm_h(Kokkos::view_alloc(Kokkos::WithoutInitializing, "row_map_perm_h"), + blk_nrows + 1); + out_entries_hostmirror_type entries_perm_h(Kokkos::view_alloc(Kokkos::WithoutInitializing, "entries_perm_h"), + blk_nnz); + out_values_hostmirror_type values_perm_h(Kokkos::view_alloc(Kokkos::WithoutInitializing, "values_perm_h"), + blk_nnz); + + out_entries_hostmirror_type reverseperm_h(Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverseperm_h"), + blk_nrows); + for (ordinal_type ii = 0; ii < blk_nrows; ii++) reverseperm_h(perm_h_v[i](ii)) = ii; std::map colIdx_Value_rcm; @@ -2589,20 +2069,16 @@ kk_extract_diagonal_blocks_crsmatrix_sequential( for (ordinal_type ii = 0; ii < blk_nrows; ii++) { colIdx_Value_rcm.clear(); // ii: reordered index - ordinal_type origRow = reverseperm_h( - ii); // get the original row idx of the reordered row idx, ii - for (size_type j = row_map_h(origRow); j < row_map_h(origRow + 1); - j++) { + ordinal_type origRow = reverseperm_h(ii); // get the original row idx of the reordered row idx, ii + for (size_type j = row_map_h(origRow); j < row_map_h(origRow + 1); j++) { ordinal_type origEi = entries_h(j); value_type origV = values_h(j); - ordinal_type Ei = - perm_h_v[i](origEi); // get the reordered col idx of the - // original col idx, origEi + ordinal_type Ei = perm_h_v[i](origEi); // get the reordered col idx of the + // original col idx, origEi colIdx_Value_rcm[Ei] = origV; } row_map_perm_h(ii) = cnt; - for (typename std::map::iterator it = - colIdx_Value_rcm.begin(); + for (typename std::map::iterator it = colIdx_Value_rcm.begin(); it != colIdx_Value_rcm.end(); ++it) { entries_perm_h(cnt) = it->first; values_perm_h(cnt) = it->second; @@ -2617,8 +2093,7 @@ kk_extract_diagonal_blocks_crsmatrix_sequential( Kokkos::deep_copy(perm_v[i], perm_h_v[i]); } - DiagBlk_v[i] = crsMat_t("CrsMatrix", blk_nrows, blk_ncols, blk_nnz, - values, row_map, entries); + DiagBlk_v[i] = crsMat_t("CrsMatrix", blk_nrows, blk_ncols, blk_nnz, values, row_map, entries); blk_row_start += blk_nrows; } // for (ordinal_type i = 0; i < n_blocks; i++) diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils_cusparse.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils_cusparse.hpp index 55e7144dba62..07681cb40930 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils_cusparse.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils_cusparse.hpp @@ -25,13 +25,11 @@ namespace KokkosSparse { namespace Impl { -inline void cusparse_internal_error_throw(cusparseStatus_t cusparseStatus, - const char* name, const char* file, +inline void cusparse_internal_error_throw(cusparseStatus_t cusparseStatus, const char* name, const char* file, const int line) { std::ostringstream out; #if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION) - out << name << " error( " << cusparseGetErrorName(cusparseStatus) - << "): " << cusparseGetErrorString(cusparseStatus); + out << name << " error( " << cusparseGetErrorName(cusparseStatus) << "): " << cusparseGetErrorString(cusparseStatus); #else out << name << " error( "; switch (cusparseStatus) { @@ -43,27 +41,13 @@ inline void cusparse_internal_error_throw(cusparseStatus_t cusparseStatus, out << "CUSPARSE_STATUS_ALLOC_FAILED): you might tried to allocate too " "much memory"; break; - case CUSPARSE_STATUS_INVALID_VALUE: - out << "CUSPARSE_STATUS_INVALID_VALUE)"; - break; - case CUSPARSE_STATUS_ARCH_MISMATCH: - out << "CUSPARSE_STATUS_ARCH_MISMATCH)"; - break; - case CUSPARSE_STATUS_MAPPING_ERROR: - out << "CUSPARSE_STATUS_MAPPING_ERROR)"; - break; - case CUSPARSE_STATUS_EXECUTION_FAILED: - out << "CUSPARSE_STATUS_EXECUTION_FAILED)"; - break; - case CUSPARSE_STATUS_INTERNAL_ERROR: - out << "CUSPARSE_STATUS_INTERNAL_ERROR)"; - break; - case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: - out << "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED)"; - break; - case CUSPARSE_STATUS_ZERO_PIVOT: - out << "CUSPARSE_STATUS_ZERO_PIVOT)"; - break; + case CUSPARSE_STATUS_INVALID_VALUE: out << "CUSPARSE_STATUS_INVALID_VALUE)"; break; + case CUSPARSE_STATUS_ARCH_MISMATCH: out << "CUSPARSE_STATUS_ARCH_MISMATCH)"; break; + case CUSPARSE_STATUS_MAPPING_ERROR: out << "CUSPARSE_STATUS_MAPPING_ERROR)"; break; + case CUSPARSE_STATUS_EXECUTION_FAILED: out << "CUSPARSE_STATUS_EXECUTION_FAILED)"; break; + case CUSPARSE_STATUS_INTERNAL_ERROR: out << "CUSPARSE_STATUS_INTERNAL_ERROR)"; break; + case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: out << "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED)"; break; + case CUSPARSE_STATUS_ZERO_PIVOT: out << "CUSPARSE_STATUS_ZERO_PIVOT)"; break; default: out << "unrecognized error code): this is bad!"; break; } #endif // CUSPARSE_VERSION @@ -73,10 +57,8 @@ inline void cusparse_internal_error_throw(cusparseStatus_t cusparseStatus, throw std::runtime_error(out.str()); } -inline void cusparse_internal_safe_call(cusparseStatus_t cusparseStatus, - const char* name, - const char* file = nullptr, - const int line = 0) { +inline void cusparse_internal_safe_call(cusparseStatus_t cusparseStatus, const char* name, const char* file = nullptr, + const int line = 0) { if (CUSPARSE_STATUS_SUCCESS != cusparseStatus) { cusparse_internal_error_throw(cusparseStatus, name, file, line); } @@ -84,9 +66,7 @@ inline void cusparse_internal_safe_call(cusparseStatus_t cusparseStatus, // The macro below defines is the public interface for the safe cusparse calls. // The functions themselves are protected by impl namespace. -#define KOKKOS_CUSPARSE_SAFE_CALL(call) \ - KokkosSparse::Impl::cusparse_internal_safe_call(call, #call, __FILE__, \ - __LINE__) +#define KOKKOS_CUSPARSE_SAFE_CALL(call) KokkosSparse::Impl::cusparse_internal_safe_call(call, #call, __FILE__, __LINE__) template cudaDataType cuda_data_type_from() { @@ -138,8 +118,7 @@ cusparseIndexType_t cusparse_index_type_t_from() { #define AS_STR_LITERAL_IMPL_(x) #x #define AS_STR_LITERAL(x) AS_STR_LITERAL_IMPL_(x) static_assert(!std::is_same::value, - "cuSparse " AS_STR_LITERAL( - CUSPARSE_VERSION) " TPL does not support index type"); + "cuSparse " AS_STR_LITERAL(CUSPARSE_VERSION) " TPL does not support index type"); // static_assert(false, ...) is allowed to error even if the code is not // instantiated. obfuscate the predicate Despite this function being // uncompilable, the compiler may decide that a return statement is missing, @@ -172,15 +151,11 @@ inline cusparseIndexType_t cusparse_index_type_t_from() { // is constructed, and reset to the default stream when this object is // destructed. struct TemporarySetCusparseStream { - TemporarySetCusparseStream(cusparseHandle_t handle_, - const Kokkos::Cuda& exec_) - : handle(handle_) { + TemporarySetCusparseStream(cusparseHandle_t handle_, const Kokkos::Cuda& exec_) : handle(handle_) { KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(handle, exec_.cuda_stream())); } - ~TemporarySetCusparseStream() { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(handle, NULL)); - } + ~TemporarySetCusparseStream() { KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(handle, NULL)); } cusparseHandle_t handle; }; diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils_mkl.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils_mkl.hpp index a14e19f3cf65..37e5750df68a 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils_mkl.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils_mkl.hpp @@ -26,31 +26,23 @@ namespace KokkosSparse { namespace Impl { -inline void mkl_internal_safe_call(sparse_status_t mkl_status, const char *name, - const char *file = nullptr, - const int line = 0) { +inline void mkl_internal_safe_call(sparse_status_t mkl_status, const char *name, const char *file = nullptr, + const int line = 0) { if (SPARSE_STATUS_SUCCESS != mkl_status) { std::ostringstream oss; - oss << "MKL call \"" << name << "\" at " << file << ":" << line - << " encountered error: "; + oss << "MKL call \"" << name << "\" at " << file << ":" << line << " encountered error: "; switch (mkl_status) { - case SPARSE_STATUS_NOT_INITIALIZED: - oss << "SPARSE_STATUS_NOT_INITIALIZED (empty handle or matrix arrays)"; - break; + case SPARSE_STATUS_NOT_INITIALIZED: oss << "SPARSE_STATUS_NOT_INITIALIZED (empty handle or matrix arrays)"; break; case SPARSE_STATUS_ALLOC_FAILED: oss << "SPARSE_STATUS_ALLOC_FAILED (internal error: memory allocation " "failed)"; break; - case SPARSE_STATUS_INVALID_VALUE: - oss << "SPARSE_STATUS_INVALID_VALUE (invalid input value)"; - break; + case SPARSE_STATUS_INVALID_VALUE: oss << "SPARSE_STATUS_INVALID_VALUE (invalid input value)"; break; case SPARSE_STATUS_EXECUTION_FAILED: oss << "SPARSE_STATUS_EXECUTION_FAILED (e.g. 0-diagonal element for " "triangular solver)"; break; - case SPARSE_STATUS_INTERNAL_ERROR: - oss << "SPARSE_STATUS_INTERNAL_ERROR"; - break; + case SPARSE_STATUS_INTERNAL_ERROR: oss << "SPARSE_STATUS_INTERNAL_ERROR"; break; case SPARSE_STATUS_NOT_SUPPORTED: oss << "SPARSE_STATUS_NOT_SUPPORTED (e.g. operation for double " "precision doesn't support other types)"; @@ -65,8 +57,7 @@ inline void mkl_internal_safe_call(sparse_status_t mkl_status, const char *name, } // namespace Impl } // namespace KokkosSparse -#define KOKKOSKERNELS_MKL_SAFE_CALL(call) \ - KokkosSparse::Impl::mkl_internal_safe_call(call, #call, __FILE__, __LINE__) +#define KOKKOSKERNELS_MKL_SAFE_CALL(call) KokkosSparse::Impl::mkl_internal_safe_call(call, #call, __FILE__, __LINE__) namespace KokkosSparse { namespace Impl { @@ -78,8 +69,7 @@ inline sparse_operation_t mode_kk_to_mkl(char mode_kk) { case 'H': return SPARSE_OPERATION_CONJUGATE_TRANSPOSE; default:; } - throw std::invalid_argument( - "Invalid mode for MKL (should be one of N, T, H)"); + throw std::invalid_argument("Invalid mode for MKL (should be one of N, T, H)"); } template @@ -99,8 +89,7 @@ struct mkl_is_supported_value_type> : std::true_type {}; // - provide an easy implicit conversion to that MKL type template struct KokkosToMKLScalar { - static_assert(mkl_is_supported_value_type::value, - "Scalar type not supported by MKL"); + static_assert(mkl_is_supported_value_type::value, "Scalar type not supported by MKL"); using type = Scalar; KokkosToMKLScalar(Scalar val_) : val(val_) {} operator Scalar() const { return val; } @@ -127,15 +116,13 @@ template struct KokkosToOneMKLScalar { // Note: we happen to use the same set of types in classic MKL and OneMKL. // If that changes, update this logic. - static_assert(mkl_is_supported_value_type::value, - "Scalar type not supported by OneMKL"); + static_assert(mkl_is_supported_value_type::value, "Scalar type not supported by OneMKL"); using type = Scalar; }; template struct KokkosToOneMKLScalar> { - static_assert(mkl_is_supported_value_type>::value, - "Scalar type not supported by OneMKL"); + static_assert(mkl_is_supported_value_type>::value, "Scalar type not supported by OneMKL"); using type = std::complex; }; @@ -144,130 +131,104 @@ struct KokkosToOneMKLScalar> { // like value_type, allowing simple client code in kernels. template class MKLSparseMatrix { - static_assert(mkl_is_supported_value_type::value, - "Provided value_type type not supported by MKL"); + static_assert(mkl_is_supported_value_type::value, "Provided value_type type not supported by MKL"); sparse_matrix_t mtx; public: inline MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {} // Constructs MKL sparse matrix from KK sparse views (m rows x n cols) - inline MKLSparseMatrix(const MKL_INT num_rows, const MKL_INT num_cols, - MKL_INT *xadj, MKL_INT *adj, value_type *values) {} + inline MKLSparseMatrix(const MKL_INT num_rows, const MKL_INT num_cols, MKL_INT *xadj, MKL_INT *adj, + value_type *values) {} // Allows using MKLSparseMatrix directly in MKL calls inline operator sparse_matrix_t() const { return mtx; } // Exports MKL sparse matrix contents into KK views - inline void export_data(MKL_INT &num_rows, MKL_INT &num_cols, - MKL_INT *&rows_start, MKL_INT *&columns, + inline void export_data(MKL_INT &num_rows, MKL_INT &num_cols, MKL_INT *&rows_start, MKL_INT *&columns, value_type *&values) {} - inline void destroy() { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_destroy(mtx)); - } + inline void destroy() { KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_destroy(mtx)); } }; template <> -inline MKLSparseMatrix::MKLSparseMatrix(const MKL_INT rows, - const MKL_INT cols, - MKL_INT *xadj, MKL_INT *adj, +inline MKLSparseMatrix::MKLSparseMatrix(const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj, float *values) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_s_create_csr( - &mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, xadj, xadj + 1, adj, values)); + KOKKOSKERNELS_MKL_SAFE_CALL( + mkl_sparse_s_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, xadj, xadj + 1, adj, values)); } template <> -inline MKLSparseMatrix::MKLSparseMatrix(const MKL_INT rows, - const MKL_INT cols, - MKL_INT *xadj, MKL_INT *adj, +inline MKLSparseMatrix::MKLSparseMatrix(const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj, double *values) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_d_create_csr( - &mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, xadj, xadj + 1, adj, values)); + KOKKOSKERNELS_MKL_SAFE_CALL( + mkl_sparse_d_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, xadj, xadj + 1, adj, values)); } template <> -inline MKLSparseMatrix>::MKLSparseMatrix( - const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj, - Kokkos::complex *values) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_c_create_csr( - &mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, xadj, xadj + 1, adj, - reinterpret_cast(values))); +inline MKLSparseMatrix>::MKLSparseMatrix(const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, + MKL_INT *adj, Kokkos::complex *values) { + KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_c_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, xadj, xadj + 1, adj, + reinterpret_cast(values))); } template <> -inline MKLSparseMatrix>::MKLSparseMatrix( - const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj, - Kokkos::complex *values) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_z_create_csr( - &mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, xadj, xadj + 1, adj, - reinterpret_cast(values))); +inline MKLSparseMatrix>::MKLSparseMatrix(const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, + MKL_INT *adj, Kokkos::complex *values) { + KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_z_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, xadj, xadj + 1, adj, + reinterpret_cast(values))); } template <> -inline void MKLSparseMatrix::export_data(MKL_INT &num_rows, - MKL_INT &num_cols, - MKL_INT *&rows_start, - MKL_INT *&columns, - float *&values) { +inline void MKLSparseMatrix::export_data(MKL_INT &num_rows, MKL_INT &num_cols, MKL_INT *&rows_start, + MKL_INT *&columns, float *&values) { sparse_index_base_t indexing; MKL_INT *rows_end; KOKKOSKERNELS_MKL_SAFE_CALL( - mkl_sparse_s_export_csr(mtx, &indexing, &num_rows, &num_cols, &rows_start, - &rows_end, &columns, &values)); + mkl_sparse_s_export_csr(mtx, &indexing, &num_rows, &num_cols, &rows_start, &rows_end, &columns, &values)); if (SPARSE_INDEX_BASE_ZERO != indexing) { - throw std::runtime_error( - "Expected zero based indexing in exported MKL sparse matrix\n"); + throw std::runtime_error("Expected zero based indexing in exported MKL sparse matrix\n"); return; } } template <> -inline void MKLSparseMatrix::export_data(MKL_INT &num_rows, - MKL_INT &num_cols, - MKL_INT *&rows_start, - MKL_INT *&columns, - double *&values) { +inline void MKLSparseMatrix::export_data(MKL_INT &num_rows, MKL_INT &num_cols, MKL_INT *&rows_start, + MKL_INT *&columns, double *&values) { sparse_index_base_t indexing; MKL_INT *rows_end; KOKKOSKERNELS_MKL_SAFE_CALL( - mkl_sparse_d_export_csr(mtx, &indexing, &num_rows, &num_cols, &rows_start, - &rows_end, &columns, &values)); + mkl_sparse_d_export_csr(mtx, &indexing, &num_rows, &num_cols, &rows_start, &rows_end, &columns, &values)); if (SPARSE_INDEX_BASE_ZERO != indexing) { - throw std::runtime_error( - "Expected zero based indexing in exported MKL sparse matrix\n"); + throw std::runtime_error("Expected zero based indexing in exported MKL sparse matrix\n"); return; } } template <> -inline void MKLSparseMatrix>::export_data( - MKL_INT &num_rows, MKL_INT &num_cols, MKL_INT *&rows_start, - MKL_INT *&columns, Kokkos::complex *&values) { +inline void MKLSparseMatrix>::export_data(MKL_INT &num_rows, MKL_INT &num_cols, + MKL_INT *&rows_start, MKL_INT *&columns, + Kokkos::complex *&values) { sparse_index_base_t indexing; MKL_INT *rows_end; - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_c_export_csr( - mtx, &indexing, &num_rows, &num_cols, &rows_start, &rows_end, &columns, - reinterpret_cast(&values))); + KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_c_export_csr(mtx, &indexing, &num_rows, &num_cols, &rows_start, &rows_end, + &columns, reinterpret_cast(&values))); if (SPARSE_INDEX_BASE_ZERO != indexing) { - throw std::runtime_error( - "Expected zero based indexing in exported MKL sparse matrix\n"); + throw std::runtime_error("Expected zero based indexing in exported MKL sparse matrix\n"); return; } } template <> -inline void MKLSparseMatrix>::export_data( - MKL_INT &num_rows, MKL_INT &num_cols, MKL_INT *&rows_start, - MKL_INT *&columns, Kokkos::complex *&values) { +inline void MKLSparseMatrix>::export_data(MKL_INT &num_rows, MKL_INT &num_cols, + MKL_INT *&rows_start, MKL_INT *&columns, + Kokkos::complex *&values) { sparse_index_base_t indexing; MKL_INT *rows_end; - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_z_export_csr( - mtx, &indexing, &num_rows, &num_cols, &rows_start, &rows_end, &columns, - reinterpret_cast(&values))); + KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_z_export_csr(mtx, &indexing, &num_rows, &num_cols, &rows_start, &rows_end, + &columns, reinterpret_cast(&values))); if (SPARSE_INDEX_BASE_ZERO != indexing) { - throw std::runtime_error( - "Expected zero based indexing in exported MKL sparse matrix\n"); + throw std::runtime_error("Expected zero based indexing in exported MKL sparse matrix\n"); return; } } diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils_rocsparse.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils_rocsparse.hpp index baf2d3a8222c..4b99c96c8121 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils_rocsparse.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils_rocsparse.hpp @@ -31,8 +31,7 @@ namespace KokkosSparse { namespace Impl { -inline void rocsparse_internal_error_throw(rocsparse_status rocsparseStatus, - const char* name, const char* file, +inline void rocsparse_internal_error_throw(rocsparse_status rocsparseStatus, const char* name, const char* file, const int line) { std::ostringstream out; out << name << " error( "; @@ -47,9 +46,7 @@ inline void rocsparse_internal_error_throw(rocsparse_status rocsparseStatus, case rocsparse_status_invalid_pointer: out << "rocsparse_status_invalid_pointer): invalid pointer parameter."; break; - case rocsparse_status_invalid_size: - out << "rocsparse_status_invalid_size): invalid size parameter."; - break; + case rocsparse_status_invalid_size: out << "rocsparse_status_invalid_size): invalid size parameter."; break; case rocsparse_status_memory_error: out << "rocsparse_status_memory_error): failed memory allocation, copy, " "dealloc."; @@ -58,22 +55,14 @@ inline void rocsparse_internal_error_throw(rocsparse_status rocsparseStatus, out << "rocsparse_status_internal_error): other internal library " "failure."; break; - case rocsparse_status_invalid_value: - out << "rocsparse_status_invalid_value): invalid value parameter."; - break; - case rocsparse_status_arch_mismatch: - out << "rocsparse_status_arch_mismatch): device arch is not supported."; - break; - case rocsparse_status_zero_pivot: - out << "rocsparse_status_zero_pivot): encountered zero pivot."; - break; + case rocsparse_status_invalid_value: out << "rocsparse_status_invalid_value): invalid value parameter."; break; + case rocsparse_status_arch_mismatch: out << "rocsparse_status_arch_mismatch): device arch is not supported."; break; + case rocsparse_status_zero_pivot: out << "rocsparse_status_zero_pivot): encountered zero pivot."; break; case rocsparse_status_not_initialized: out << "rocsparse_status_not_initialized): descriptor has not been " "initialized."; break; - case rocsparse_status_type_mismatch: - out << "rocsparse_status_type_mismatch): index types do not match."; - break; + case rocsparse_status_type_mismatch: out << "rocsparse_status_type_mismatch): index types do not match."; break; default: out << "unrecognized error code): this is bad!"; break; } if (file) { @@ -82,10 +71,8 @@ inline void rocsparse_internal_error_throw(rocsparse_status rocsparseStatus, throw std::runtime_error(out.str()); } -inline void rocsparse_internal_safe_call(rocsparse_status rocsparseStatus, - const char* name, - const char* file = nullptr, - const int line = 0) { +inline void rocsparse_internal_safe_call(rocsparse_status rocsparseStatus, const char* name, const char* file = nullptr, + const int line = 0) { if (rocsparse_status_success != rocsparseStatus) { rocsparse_internal_error_throw(rocsparseStatus, name, file, line); } @@ -93,18 +80,15 @@ inline void rocsparse_internal_safe_call(rocsparse_status rocsparseStatus, // The macro below defines is the public interface for the safe cusparse calls. // The functions themselves are protected by impl namespace. -#define KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(call) \ - KokkosSparse::Impl::rocsparse_internal_safe_call(call, #call, __FILE__, \ - __LINE__) +#define KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(call) \ + KokkosSparse::Impl::rocsparse_internal_safe_call(call, #call, __FILE__, __LINE__) inline rocsparse_operation mode_kk_to_rocsparse(const char kk_mode[]) { rocsparse_operation myRocsparseOperation; switch (toupper(kk_mode[0])) { case 'N': myRocsparseOperation = rocsparse_operation_none; break; case 'T': myRocsparseOperation = rocsparse_operation_transpose; break; - case 'H': - myRocsparseOperation = rocsparse_operation_conjugate_transpose; - break; + case 'H': myRocsparseOperation = rocsparse_operation_conjugate_transpose; break; default: { std::ostringstream out; out << "Mode " << kk_mode[0] << " invalid for rocSPARSE SpMV.\n"; @@ -124,8 +108,7 @@ inline rocsparse_indextype rocsparse_index_type() { return rocsparse_indextype_i64; } else { std::ostringstream out; - out << "Trying to call rocSPARSE SpMV with unsupported index type: " - << typeid(index_type).name(); + out << "Trying to call rocSPARSE SpMV with unsupported index type: " << typeid(index_type).name(); throw std::logic_error(out.str()); } } @@ -133,8 +116,7 @@ inline rocsparse_indextype rocsparse_index_type() { template inline rocsparse_datatype rocsparse_compute_type() { std::ostringstream out; - out << "Trying to call rocSPARSE SpMV with unsupported compute type: " - << typeid(data_type).name(); + out << "Trying to call rocSPARSE SpMV with unsupported compute type: " << typeid(data_type).name(); throw std::logic_error(out.str()); } @@ -163,8 +145,7 @@ struct kokkos_to_rocsparse_type; // for floats, rocsparse uses c++ builtin types template -struct kokkos_to_rocsparse_type>> { +struct kokkos_to_rocsparse_type>> { using type = T; }; @@ -181,23 +162,17 @@ struct kokkos_to_rocsparse_type> { }; // e.g. 5.4 -> 50400 -#define KOKKOSSPARSE_IMPL_ROCM_VERSION \ - ROCM_VERSION_MAJOR * 10000 + ROCM_VERSION_MINOR * 100 + ROCM_VERSION_PATCH +#define KOKKOSSPARSE_IMPL_ROCM_VERSION ROCM_VERSION_MAJOR * 10000 + ROCM_VERSION_MINOR * 100 + ROCM_VERSION_PATCH // Set the stream on the given rocSPARSE handle when this object // is constructed, and reset to the default stream when this object is // destructed. struct TemporarySetRocsparseStream { - TemporarySetRocsparseStream(rocsparse_handle handle_, - const Kokkos::HIP& exec_) - : handle(handle_) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_set_stream(handle, exec_.hip_stream())); + TemporarySetRocsparseStream(rocsparse_handle handle_, const Kokkos::HIP& exec_) : handle(handle_) { + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_stream(handle, exec_.hip_stream())); } - ~TemporarySetRocsparseStream() { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_stream(handle, NULL)); - } + ~TemporarySetRocsparseStream() { KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_stream(handle, NULL)); } rocsparse_handle handle; }; diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_ccs2crs.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_ccs2crs.hpp index 9b4bae2134ac..3af4263c2ac5 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_ccs2crs.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_ccs2crs.hpp @@ -22,8 +22,7 @@ #define _KOKKOSSPARSE_CCS2CRS_HPP namespace KokkosSparse { namespace Impl { -template +template class Ccs2Crs { private: using CrsST = typename ValViewType::value_type; @@ -50,32 +49,19 @@ class Ccs2Crs { CrsColIdViewType __crs_col_ids; public: - Ccs2Crs(OrdinalType nrows, OrdinalType ncols, SizeType nnz, ValViewType vals, - ColMapViewType col_map, RowIdViewType row_ids) - : __nrows(nrows), - __ncols(ncols), - __nnz(nnz), - __vals(vals), - __col_map(col_map), - __row_ids(row_ids) { - __crs_vals = CrsValsViewType( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "__crs_vals"), nnz); - __crs_row_map = - CrsRowMapViewType(Kokkos::view_alloc("__crs_row_map"), nrows + 1); - __crs_col_ids = CrsColIdViewType( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "__crs_col_ids"), nnz); + Ccs2Crs(OrdinalType nrows, OrdinalType ncols, SizeType nnz, ValViewType vals, ColMapViewType col_map, + RowIdViewType row_ids) + : __nrows(nrows), __ncols(ncols), __nnz(nnz), __vals(vals), __col_map(col_map), __row_ids(row_ids) { + __crs_vals = CrsValsViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "__crs_vals"), nnz); + __crs_row_map = CrsRowMapViewType(Kokkos::view_alloc("__crs_row_map"), nrows + 1); + __crs_col_ids = CrsColIdViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "__crs_col_ids"), nnz); - KokkosSparse::Impl::transpose_matrix< - ColMapViewType, RowIdViewType, ValViewType, CrsRowMapViewType, - CrsColIdViewType, CrsValsViewType, CrsRowMapViewType, CrsET>( - __ncols, __nrows, __col_map, __row_ids, __vals, __crs_row_map, - __crs_col_ids, __crs_vals); + KokkosSparse::Impl::transpose_matrix( + __ncols, __nrows, __col_map, __row_ids, __vals, __crs_row_map, __crs_col_ids, __crs_vals); } - CrsType get_crsMat() { - return CrsType("ccs2crs", __nrows, __ncols, __nnz, __crs_vals, - __crs_row_map, __crs_col_ids); - } + CrsType get_crsMat() { return CrsType("ccs2crs", __nrows, __ncols, __nnz, __crs_vals, __crs_row_map, __crs_col_ids); } }; } // namespace Impl // clang-format off @@ -98,12 +84,16 @@ class Ccs2Crs { /// \note In KokkosKernels sparse code, adj stands for adjacency list /// and here we're passing in a ccs matrix with xadj=col_map and adj=row_ids. // clang-format on -template -auto ccs2crs(OrdinalType nrows, OrdinalType ncols, SizeType nnz, - ValViewType vals, ColMapViewType col_map, RowIdViewType row_ids) { - using Ccs2crsType = Impl::Ccs2Crs; +template +auto ccs2crs(OrdinalType nrows, OrdinalType ncols, SizeType nnz, ValViewType vals, ColMapViewType col_map, + RowIdViewType row_ids) { + static_assert(std::is_same_v, + "ccs2crs: SizeType (type of nnz) must match the element type of " + "ColMapViewType"); + static_assert(std::is_same_v, + "ccs2crs: OrdinalType (type of nrows, ncols) must match the element type " + "of RowIdViewType"); + using Ccs2crsType = Impl::Ccs2Crs; Ccs2crsType ccs2Crs(nrows, ncols, nnz, vals, col_map, row_ids); return ccs2Crs.get_crsMat(); } @@ -120,12 +110,9 @@ auto ccs2crs(OrdinalType nrows, OrdinalType ncols, SizeType nnz, /// \tparam SizeType The ccsMatrix::size_type /// \param ccsMatrix The KokkosSparse::CcsMatrix. /// \return A KokkosSparse::CrsMatrix. -template -auto ccs2crs(KokkosSparse::CcsMatrix &ccsMatrix) { - return ccs2crs(ccsMatrix.numRows(), ccsMatrix.numCols(), ccsMatrix.nnz(), - ccsMatrix.values, ccsMatrix.graph.col_map, +template +auto ccs2crs(KokkosSparse::CcsMatrix &ccsMatrix) { + return ccs2crs(ccsMatrix.numRows(), ccsMatrix.numCols(), ccsMatrix.nnz(), ccsMatrix.values, ccsMatrix.graph.col_map, ccsMatrix.graph.entries); } } // namespace KokkosSparse diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_coo2crs.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_coo2crs.hpp index a29d818cb1ba..d10ef9974c19 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_coo2crs.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_coo2crs.hpp @@ -38,23 +38,15 @@ namespace KokkosSparse { /// \param data the array of data /// \return A KokkosSparse::CrsMatrix. // clang-format on -template -auto coo2crs(DimType m, DimType n, RowViewType row, ColViewType col, - DataViewType data) { +template +auto coo2crs(DimType m, DimType n, RowViewType row, ColViewType col, DataViewType data) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "RowViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "CalViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "DataViewType must be a Kokkos::View."); - static_assert(static_cast(RowViewType::rank) == 1, - "RowViewType must have rank 1."); - static_assert(static_cast(ColViewType::rank) == 1, - "ColViewType must have rank 1."); - static_assert(static_cast(DataViewType::rank) == 1, - "DataViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "RowViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "CalViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "DataViewType must be a Kokkos::View."); + static_assert(static_cast(RowViewType::rank) == 1, "RowViewType must have rank 1."); + static_assert(static_cast(ColViewType::rank) == 1, "ColViewType must have rank 1."); + static_assert(static_cast(DataViewType::rank) == 1, "DataViewType must have rank 1."); #endif static_assert(std::is_integral::value, @@ -69,8 +61,7 @@ auto coo2crs(DimType m, DimType n, RowViewType row, ColViewType col, if (m < 0 || n < 0) Kokkos::abort("m >= 0 and n >= 0 required."); } - using Coo2crsType = - Impl::Coo2Crs; + using Coo2crsType = Impl::Coo2Crs; Coo2crsType Coo2Crs(m, n, row, col, data); return Coo2Crs.get_crsMat(); } @@ -86,12 +77,9 @@ auto coo2crs(DimType m, DimType n, RowViewType row, ColViewType col, /// \param cooMatrix The sparse matrix stored in coordinate ("Coo") format. /// \return A KokkosSparse::CrsMatrix. // clang-format on -template -auto coo2crs(KokkosSparse::CooMatrix &cooMatrix) { - return coo2crs(cooMatrix.numRows(), cooMatrix.numCols(), cooMatrix.row, - cooMatrix.col, cooMatrix.data); +template +auto coo2crs(KokkosSparse::CooMatrix &cooMatrix) { + return coo2crs(cooMatrix.numRows(), cooMatrix.numCols(), cooMatrix.row, cooMatrix.col, cooMatrix.data); } } // namespace KokkosSparse #endif // _KOKKOSSPARSE_COO2CRS_HPP diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_crs2ccs.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_crs2ccs.hpp index c9265842cb2e..6760a4306035 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_crs2ccs.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_crs2ccs.hpp @@ -22,8 +22,7 @@ #define _KOKKOSSPARSE_CRS2CCS_HPP namespace KokkosSparse { namespace Impl { -template +template class Crs2Ccs { private: using CcsST = typename ValViewType::value_type; @@ -48,32 +47,19 @@ class Crs2Ccs { CcsRowIdViewType __ccs_row_ids; public: - Crs2Ccs(OrdinalType nrows, OrdinalType ncols, SizeType nnz, ValViewType vals, - RowMapViewType row_map, ColIdViewType col_ids) - : __nrows(nrows), - __ncols(ncols), - __nnz(nnz), - __vals(vals), - __row_map(row_map), - __col_ids(col_ids) { - __ccs_vals = CcsValsViewType( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "__ccs_vals"), nnz); - __ccs_col_map = - CcsColMapViewType(Kokkos::view_alloc("__ccs_col_map"), ncols + 1); - __ccs_row_ids = CcsRowIdViewType( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "__ccs_row_ids"), nnz); + Crs2Ccs(OrdinalType nrows, OrdinalType ncols, SizeType nnz, ValViewType vals, RowMapViewType row_map, + ColIdViewType col_ids) + : __nrows(nrows), __ncols(ncols), __nnz(nnz), __vals(vals), __row_map(row_map), __col_ids(col_ids) { + __ccs_vals = CcsValsViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "__ccs_vals"), nnz); + __ccs_col_map = CcsColMapViewType(Kokkos::view_alloc("__ccs_col_map"), ncols + 1); + __ccs_row_ids = CcsRowIdViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "__ccs_row_ids"), nnz); - KokkosSparse::Impl::transpose_matrix< - RowMapViewType, ColIdViewType, ValViewType, CcsColMapViewType, - CcsRowIdViewType, CcsValsViewType, CcsColMapViewType, CcsET>( - __nrows, __ncols, __row_map, __col_ids, __vals, __ccs_col_map, - __ccs_row_ids, __ccs_vals); + KokkosSparse::Impl::transpose_matrix( + __nrows, __ncols, __row_map, __col_ids, __vals, __ccs_col_map, __ccs_row_ids, __ccs_vals); } - CcsType get_ccsMat() { - return CcsType("crs2ccs", __nrows, __ncols, __nnz, __ccs_vals, - __ccs_col_map, __ccs_row_ids); - } + CcsType get_ccsMat() { return CcsType("crs2ccs", __nrows, __ncols, __nnz, __ccs_vals, __ccs_col_map, __ccs_row_ids); } }; } // namespace Impl // clang-format off @@ -96,12 +82,16 @@ class Crs2Ccs { /// \note In KokkosKernels sparse code, adj stands for adjacency list /// and here we're passing in a crs matrix with xadj=row_map and adj=col_ids. // clang-format on -template -auto crs2ccs(OrdinalType nrows, OrdinalType ncols, SizeType nnz, - ValViewType vals, RowMapViewType row_map, ColIdViewType col_ids) { - using Crs2ccsType = Impl::Crs2Ccs; +template +auto crs2ccs(OrdinalType nrows, OrdinalType ncols, SizeType nnz, ValViewType vals, RowMapViewType row_map, + ColIdViewType col_ids) { + static_assert(std::is_same_v, + "crs2ccs: SizeType (type of nnz) must match the element type of " + "RowMapViewType"); + static_assert(std::is_same_v, + "crs2ccs: OrdinalType (type of nrows, ncols) must match the element type " + "of ColIdViewType"); + using Crs2ccsType = Impl::Crs2Ccs; Crs2ccsType crs2Ccs(nrows, ncols, nnz, vals, row_map, col_ids); return crs2Ccs.get_ccsMat(); } @@ -118,14 +108,11 @@ auto crs2ccs(OrdinalType nrows, OrdinalType ncols, SizeType nnz, /// \tparam SizeType The crsMatrix::size_type /// \param crsMatrix The KokkosSparse::CrsMatrix. /// \return A KokkosSparse::CcsMatrix. -template -auto crs2ccs(KokkosSparse::CrsMatrix &crsMatrix) { - return crs2ccs(crsMatrix.numRows(), crsMatrix.numCols(), crsMatrix.nnz(), - crsMatrix.values, crsMatrix.graph.row_map, +template +auto crs2ccs(KokkosSparse::CrsMatrix &crsMatrix) { + return crs2ccs(crsMatrix.numRows(), crsMatrix.numCols(), crsMatrix.nnz(), crsMatrix.values, crsMatrix.graph.row_map, crsMatrix.graph.entries); } } // namespace KokkosSparse -#endif // _KOKKOSSPARSE_CRS2CCS_HPP \ No newline at end of file +#endif // _KOKKOSSPARSE_CRS2CCS_HPP diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_crs2coo.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_crs2coo.hpp index 8292b26250f6..8a1da8c9ec7d 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_crs2coo.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_crs2coo.hpp @@ -22,8 +22,7 @@ #define _KOKKOSSPARSE_CRS2COO_HPP namespace KokkosSparse { namespace Impl { -template class Crs2Coo { private: @@ -41,10 +40,10 @@ class Crs2Coo { using device_type = DeviceType; - using row_view = typename Kokkos::View; - using col_view = row_view; + using row_view = typename Kokkos::View; + using col_view = row_view; using non_const_coo_data_view = typename ValViewType::non_const_type; - using coo_type = CooMatrix; + using coo_type = CooMatrix; non_const_ordinal_type m_nrows; non_const_ordinal_type m_ncols; @@ -62,29 +61,18 @@ class Crs2Coo { using copy_tp1_member_type = typename copy_tp1_pt::member_type; public: - Crs2Coo(OrdinalType nrows, OrdinalType ncols, SizeType nnz, ValViewType vals, - RowMapViewType row_map, ColIdViewType col_ids) - : m_nrows(nrows), - m_ncols(ncols), - m_nnz(nnz), - m_vals(vals), - m_row_map(row_map), - m_col_ids(col_ids) { - m_data = non_const_coo_data_view( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "m_data"), nnz); - m_col = - col_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, "m_col"), nnz); - m_row = - row_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, "m_row"), nnz); + Crs2Coo(OrdinalType nrows, OrdinalType ncols, SizeType nnz, ValViewType vals, RowMapViewType row_map, + ColIdViewType col_ids) + : m_nrows(nrows), m_ncols(ncols), m_nnz(nnz), m_vals(vals), m_row_map(row_map), m_col_ids(col_ids) { + m_data = non_const_coo_data_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, "m_data"), nnz); + m_col = col_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, "m_col"), nnz); + m_row = row_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, "m_row"), nnz); copy_tp1_pt policy(m_nrows, 1, 1); { auto vec_len_max = policy.vector_length_max(); copy_tp1_pt query_policy(m_nrows, 1, vec_len_max); - policy = copy_tp1_pt( - m_nrows, - query_policy.team_size_recommended(*this, Kokkos::ParallelForTag()), - vec_len_max); + policy = copy_tp1_pt(m_nrows, query_policy.team_size_recommended(*this, Kokkos::ParallelForTag()), vec_len_max); } Kokkos::parallel_for("Crs2Coo", policy, *this); @@ -98,17 +86,14 @@ class Crs2Coo { auto row_len = m_row_map(i + 1) - row_start; auto row_end = row_start + row_len; - Kokkos::parallel_for(Kokkos::TeamVectorRange(member, row_start, row_end), - [&](const size_type &id) { - m_data(id) = m_vals(id); - m_col(id) = m_col_ids(id); - m_row(id) = i; - }); + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, row_start, row_end), [&](const size_type &id) { + m_data(id) = m_vals(id); + m_col(id) = m_col_ids(id); + m_row(id) = i; + }); } - coo_type get_cooMat() { - return coo_type(m_nrows, m_ncols, m_row, m_col, m_data); - } + coo_type get_cooMat() { return coo_type(m_nrows, m_ncols, m_row, m_col, m_data); } }; } // namespace Impl // clang-format off @@ -130,12 +115,16 @@ class Crs2Coo { /// \return A KokkosSparse::CooMatrix. /// // clang-format on -template -auto crs2coo(OrdinalType nrows, OrdinalType ncols, SizeType nnz, - ValViewType vals, RowMapViewType row_map, ColIdViewType col_ids) { - using Crs2cooType = Impl::Crs2Coo; +template +auto crs2coo(OrdinalType nrows, OrdinalType ncols, SizeType nnz, ValViewType vals, RowMapViewType row_map, + ColIdViewType col_ids) { + static_assert(std::is_same_v, + "crs2coo: SizeType (type of nnz) must match the element type of " + "RowMapViewType"); + static_assert(std::is_same_v, + "crs2coo: OrdinalType (type of nrows, ncols) must match the element type " + "of ColIdViewType"); + using Crs2cooType = Impl::Crs2Coo; Crs2cooType crs2Coo(nrows, ncols, nnz, vals, row_map, col_ids); return crs2Coo.get_cooMat(); } @@ -152,13 +141,10 @@ auto crs2coo(OrdinalType nrows, OrdinalType ncols, SizeType nnz, /// \tparam SizeType The crsMatrix::size_type /// \param crsMatrix The KokkosSparse::CrsMatrix. /// \return A KokkosSparse::CooMatrix. -template -auto crs2coo(KokkosSparse::CrsMatrix &crsMatrix) { - return crs2coo(crsMatrix.numRows(), crsMatrix.numCols(), crsMatrix.nnz(), - crsMatrix.values, crsMatrix.graph.row_map, +template +auto crs2coo(KokkosSparse::CrsMatrix &crsMatrix) { + return crs2coo(crsMatrix.numRows(), crsMatrix.numCols(), crsMatrix.nnz(), crsMatrix.values, crsMatrix.graph.row_map, crsMatrix.graph.entries); } } // namespace KokkosSparse -#endif // _KOKKOSSPARSE_CRS2COO_HPP \ No newline at end of file +#endif // _KOKKOSSPARSE_CRS2COO_HPP diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_findRelOffset.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_findRelOffset.hpp index f4c702a610f6..6dffcdd3d705 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_findRelOffset.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_findRelOffset.hpp @@ -83,11 +83,10 @@ namespace KokkosSparse { /// to be a Kokkos::View. Thankfully, arguments to a C++ function /// behave more like LET* than LET (in ANSI Common Lisp terms). template -KOKKOS_FUNCTION OffsetType findRelOffset( - const IndexViewType& indsToSearch, const OffsetType numEnt, - /* typename IndexViewType::const_value_type */ - const typename std::decay::type indToFind, - const OffsetType hint, const bool isSorted) { +KOKKOS_FUNCTION OffsetType findRelOffset(const IndexViewType& indsToSearch, const OffsetType numEnt, + /* typename IndexViewType::const_value_type */ + const typename std::decay::type indToFind, + const OffsetType hint, const bool isSorted) { // IndexViewType doesn't have to be a Kokkos::View; it just has to // implement operator[] like a 1-D array. // @@ -95,8 +94,7 @@ KOKKOS_FUNCTION OffsetType findRelOffset( // "IndexViewType must be a Kokkos::View"); // static_assert (static_cast (IndexViewType::rank) == 1, // "IndexViewType must be a rank-1 Kokkos::View"); - static_assert(std::is_integral::value, - "OffsetType must be an integer."); + static_assert(std::is_integral::value, "OffsetType must be an integer."); if (hint < numEnt && indsToSearch[hint] == indToFind) { return hint; // hint was correct diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_gauss_seidel.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_gauss_seidel.hpp index 036fe1b11993..647252e19197 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_gauss_seidel.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_gauss_seidel.hpp @@ -44,20 +44,16 @@ namespace Experimental { /// num_rows submatrix of A is structurally symmetric /// @pre handle->create_gs_handle(...) has been called previously /// -template +template void gauss_seidel_symbolic(const ExecutionSpace &space, KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, - lno_row_view_t_ row_map, lno_nnz_view_t_ entries, - bool is_graph_symmetric = true) { - static_assert(std::is_same::value, + typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, bool is_graph_symmetric = true) { + static_assert(std::is_same::value, "KokkosSparse::gauss_seidel_symbolic: Size type of the matrix " "should be same as kernelHandle sizetype."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosSparse::gauss_seidel_symbolic: lno type of the matrix " "should be same as kernelHandle lno_t."); @@ -69,24 +65,20 @@ void gauss_seidel_symbolic(const ExecutionSpace &space, KernelHandle *handle, typedef typename KernelHandle::HandleTempMemorySpace c_temp_t; typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle const_handle_type; // const_handle_type tmp_handle = *handle; const_handle_type tmp_handle(*handle); typedef Kokkos::View::array_layout, - typename lno_row_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename lno_row_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_row_view_t_; typedef Kokkos::View::array_layout, - typename lno_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename lno_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_nnz_view_t_; // Internal_alno_row_view_t_ const_a_r = row_map; @@ -96,12 +88,9 @@ void gauss_seidel_symbolic(const ExecutionSpace &space, KernelHandle *handle, using namespace KokkosSparse::Impl; - GAUSS_SEIDEL_SYMBOLIC< - ExecutionSpace, const_handle_type, Internal_alno_row_view_t_, - Internal_alno_nnz_view_t_>::gauss_seidel_symbolic(space, &tmp_handle, - num_rows, num_cols, - const_a_r, const_a_l, - is_graph_symmetric); + GAUSS_SEIDEL_SYMBOLIC::gauss_seidel_symbolic(space, &tmp_handle, num_rows, num_cols, + const_a_r, const_a_l, is_graph_symmetric); } /// @@ -121,16 +110,12 @@ void gauss_seidel_symbolic(const ExecutionSpace &space, KernelHandle *handle, /// num_rows submatrix of A is structurally symmetric /// @pre handle->create_gs_handle(...) has been called previously /// -template -void gauss_seidel_symbolic(KernelHandle *handle, - typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, - lno_row_view_t_ row_map, lno_nnz_view_t_ entries, - bool is_graph_symmetric = true) { +template +void gauss_seidel_symbolic(KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, + typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, bool is_graph_symmetric = true) { auto my_exec_space = handle->get_gs_handle()->get_execution_space(); - gauss_seidel_symbolic(my_exec_space, handle, num_rows, num_cols, row_map, - entries, is_graph_symmetric); + gauss_seidel_symbolic(my_exec_space, handle, num_rows, num_cols, row_map, entries, is_graph_symmetric); } /// @@ -150,13 +135,11 @@ void gauss_seidel_symbolic(KernelHandle *handle, /// @param is_graph_symmetric Whether the upper-left num_rows x /// num_rows submatrix of A is structurally symmetric /// @pre handle->create_gs_handle(...) has been called previously -template -void block_gauss_seidel_symbolic( - KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, - typename KernelHandle::const_nnz_lno_t block_size, lno_row_view_t_ row_map, - lno_nnz_view_t_ entries, bool is_graph_symmetric = true) { +template +void block_gauss_seidel_symbolic(KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, + typename KernelHandle::const_nnz_lno_t num_cols, + typename KernelHandle::const_nnz_lno_t block_size, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, bool is_graph_symmetric = true) { auto gsHandle = handle->get_point_gs_handle(); if (gsHandle->get_algorithm_type() == GS_CLUSTER) { throw std::runtime_error( @@ -165,8 +148,7 @@ void block_gauss_seidel_symbolic( } gsHandle->set_block_size(block_size); - gauss_seidel_symbolic(handle, num_rows, num_cols, row_map, entries, - is_graph_symmetric); + gauss_seidel_symbolic(handle, num_rows, num_cols, row_map, entries, is_graph_symmetric); } /// @@ -190,30 +172,22 @@ void block_gauss_seidel_symbolic( /// @param is_graph_symmetric Whether the upper-left num_rows x /// num_rows submatrix of A is structurally symmetric /// -template +template void gauss_seidel_numeric(const ExecutionSpace &space, KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, - lno_row_view_t_ row_map, lno_nnz_view_t_ entries, - scalar_nnz_view_t_ values, - bool is_graph_symmetric = true) { - static_assert(std::is_same::value, + typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, bool is_graph_symmetric = true) { + static_assert(std::is_same::value, "KokkosSparse::gauss_seidel_symbolic: Size type of the matrix " "should be same as kernelHandle sizetype."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosSparse::gauss_seidel_symbolic: lno type of the matrix " "should be same as kernelHandle lno_t."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::gauss_seidel_symbolic: scalar type of the matrix should " "be same as kernelHandle scalar_t."); @@ -225,31 +199,25 @@ void gauss_seidel_numeric(const ExecutionSpace &space, KernelHandle *handle, typedef typename KernelHandle::HandleTempMemorySpace c_temp_t; typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle const_handle_type; // const_handle_type tmp_handle = *handle; const_handle_type tmp_handle(*handle); typedef Kokkos::View::array_layout, - typename lno_row_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename lno_row_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_row_view_t_; typedef Kokkos::View::array_layout, - typename lno_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename lno_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_nnz_view_t_; typedef Kokkos::View::array_layout, - typename scalar_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename scalar_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_ascalar_nnz_view_t_; Internal_alno_row_view_t_ const_a_r(row_map.data(), row_map.extent(0)); @@ -258,14 +226,10 @@ void gauss_seidel_numeric(const ExecutionSpace &space, KernelHandle *handle, using namespace KokkosSparse::Impl; - GAUSS_SEIDEL_NUMERIC< - ExecutionSpace, const_handle_type, format, Internal_alno_row_view_t_, - Internal_alno_nnz_view_t_, - Internal_ascalar_nnz_view_t_>::gauss_seidel_numeric(space, &tmp_handle, - num_rows, num_cols, - const_a_r, const_a_l, - const_a_v, - is_graph_symmetric); + GAUSS_SEIDEL_NUMERIC::gauss_seidel_numeric(space, &tmp_handle, num_rows, num_cols, + const_a_r, const_a_l, const_a_v, + is_graph_symmetric); } /// @@ -292,20 +256,14 @@ void gauss_seidel_numeric(const ExecutionSpace &space, KernelHandle *handle, /// the version of gauss_seidel_numeric that /// doesn't take it as an argument. The inverse diagonal will be /// computed internally. -template -void gauss_seidel_numeric(KernelHandle *handle, - typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, - lno_row_view_t_ row_map, lno_nnz_view_t_ entries, - scalar_nnz_view_t_ values, - bool is_graph_symmetric = true) { +template +void gauss_seidel_numeric(KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, + typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, bool is_graph_symmetric = true) { auto my_exec_space = handle->get_gs_handle()->get_execution_space(); - gauss_seidel_numeric( - my_exec_space, handle, num_rows, num_cols, row_map, entries, values, - is_graph_symmetric); + gauss_seidel_numeric(my_exec_space, handle, num_rows, num_cols, row_map, entries, + values, is_graph_symmetric); } /// @@ -335,31 +293,23 @@ void gauss_seidel_numeric(KernelHandle *handle, /// the version of gauss_seidel_numeric that /// doesn't take it as an argument. The inverse diagonal will be /// computed internally. -template +template void gauss_seidel_numeric(const ExecutionSpace &space, KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, - lno_row_view_t_ row_map, lno_nnz_view_t_ entries, - scalar_nnz_view_t_ values, - scalar_nnz_view_t_ given_inverse_diagonal, + typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, scalar_nnz_view_t_ given_inverse_diagonal, bool is_graph_symmetric = true) { - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosSparse::gauss_seidel_symbolic: Size type of the matrix " "should be same as kernelHandle sizetype."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosSparse::gauss_seidel_symbolic: lno type of the matrix " "should be same as kernelHandle lno_t."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::gauss_seidel_symbolic: scalar type of the matrix should " "be same as kernelHandle scalar_t."); @@ -371,49 +321,38 @@ void gauss_seidel_numeric(const ExecutionSpace &space, KernelHandle *handle, typedef typename KernelHandle::HandleTempMemorySpace c_temp_t; typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle const_handle_type; // const_handle_type tmp_handle = *handle; const_handle_type tmp_handle(*handle); typedef Kokkos::View::array_layout, - typename lno_row_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename lno_row_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_row_view_t_; typedef Kokkos::View::array_layout, - typename lno_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename lno_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_nnz_view_t_; typedef Kokkos::View::array_layout, - typename scalar_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename scalar_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_ascalar_nnz_view_t_; Internal_alno_row_view_t_ const_a_r(row_map.data(), row_map.extent(0)); Internal_alno_nnz_view_t_ const_a_l(entries.data(), entries.extent(0)); Internal_ascalar_nnz_view_t_ const_a_v(values.data(), values.extent(0)); - Internal_ascalar_nnz_view_t_ const_a_d(given_inverse_diagonal.data(), - given_inverse_diagonal.extent(0)); + Internal_ascalar_nnz_view_t_ const_a_d(given_inverse_diagonal.data(), given_inverse_diagonal.extent(0)); using namespace KokkosSparse::Impl; - GAUSS_SEIDEL_NUMERIC< - ExecutionSpace, const_handle_type, format, Internal_alno_row_view_t_, - Internal_alno_nnz_view_t_, - Internal_ascalar_nnz_view_t_>::gauss_seidel_numeric(space, &tmp_handle, - num_rows, num_cols, - const_a_r, const_a_l, - const_a_v, const_a_d, - is_graph_symmetric); + GAUSS_SEIDEL_NUMERIC::gauss_seidel_numeric(space, &tmp_handle, num_rows, num_cols, + const_a_r, const_a_l, const_a_v, const_a_d, + is_graph_symmetric); } /// @@ -441,21 +380,15 @@ void gauss_seidel_numeric(const ExecutionSpace &space, KernelHandle *handle, /// the version of gauss_seidel_numeric that /// doesn't take it as an argument. The inverse diagonal will be /// computed internally. -template -void gauss_seidel_numeric(KernelHandle *handle, - typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, - lno_row_view_t_ row_map, lno_nnz_view_t_ entries, - scalar_nnz_view_t_ values, - scalar_nnz_view_t_ given_inverse_diagonal, +template +void gauss_seidel_numeric(KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, + typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, scalar_nnz_view_t_ given_inverse_diagonal, bool is_graph_symmetric = true) { auto my_exec_space = handle->get_gs_handle()->get_execution_space(); - gauss_seidel_numeric( - my_exec_space, handle, num_rows, num_cols, row_map, entries, values, - given_inverse_diagonal, is_graph_symmetric); + gauss_seidel_numeric(my_exec_space, handle, num_rows, num_cols, row_map, entries, + values, given_inverse_diagonal, is_graph_symmetric); } /// @@ -478,16 +411,12 @@ void gauss_seidel_numeric(KernelHandle *handle, /// @param is_graph_symmetric Whether the upper-left num_rows x /// num_rows submatrix of A is structurally symmetric /// -template -void block_gauss_seidel_numeric( - KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, - typename KernelHandle::const_nnz_lno_t block_size, lno_row_view_t_ row_map, - lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, - bool is_graph_symmetric = true) { +template +void block_gauss_seidel_numeric(KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, + typename KernelHandle::const_nnz_lno_t num_cols, + typename KernelHandle::const_nnz_lno_t block_size, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, bool is_graph_symmetric = true) { auto gsHandle = handle->get_point_gs_handle(); if (gsHandle->get_algorithm_type() == GS_CLUSTER) { throw std::runtime_error( @@ -496,8 +425,7 @@ void block_gauss_seidel_numeric( } gsHandle->set_block_size(block_size); - gauss_seidel_numeric(handle, num_rows, num_cols, row_map, entries, - values, is_graph_symmetric); + gauss_seidel_numeric(handle, num_rows, num_cols, row_map, entries, values, is_graph_symmetric); } /// @@ -533,57 +461,44 @@ void block_gauss_seidel_numeric( /// @pre y_rhs_input_vec.extent(0) == num_rows /// @pre x_lhs_output_vec.extent(1) == y_rhs_input_vec.extent(1) /// -template -void symmetric_gauss_seidel_apply( - const ExecutionSpace &space, KernelHandle *handle, - typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, - lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, - x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, - bool init_zero_x_vector, bool update_y_vector, - typename KernelHandle::nnz_scalar_t omega, int numIter) { - static_assert(std::is_same::value, +void symmetric_gauss_seidel_apply(const ExecutionSpace &space, KernelHandle *handle, + typename KernelHandle::const_nnz_lno_t num_rows, + typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, x_scalar_view_t x_lhs_output_vec, + y_scalar_view_t y_rhs_input_vec, bool init_zero_x_vector, bool update_y_vector, + typename KernelHandle::nnz_scalar_t omega, int numIter) { + static_assert(std::is_same::value, "KokkosSparse::symmetric_gauss_seidel_apply: Size type of the " "matrix should be same as kernelHandle sizetype."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosSparse::symmetric_gauss_seidel_apply: lno type of the " "matrix should be same as kernelHandle lno_t."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::symmetric_gauss_seidel_apply: scalar type of the matrix " "should be same as kernelHandle scalar_t."); - static_assert(std::is_same::value, - "KokkosSparse::symmetric_gauss_seidel_apply: scalar type of " - "the y-vector should be same as kernelHandle scalar_t."); - static_assert( - std::is_same::value, - "KokkosSparse::symmetric_gauss_seidel_apply: scalar type of the x-vector " - "should be same as kernelHandle non-const scalar_t."); + std::is_same::value, + "KokkosSparse::symmetric_gauss_seidel_apply: scalar type of " + "the y-vector should be same as kernelHandle scalar_t."); + + static_assert(std::is_same::value, + "KokkosSparse::symmetric_gauss_seidel_apply: scalar type of the x-vector " + "should be same as kernelHandle non-const scalar_t."); - static_assert(!std::is_same::value, + static_assert(!std::is_same::value, "KokkosSparse::symmetric_gauss_seidel_apply: row_map must have " "a contiguous layout (Left or Right, not Stride)"); - static_assert(!std::is_same::value, + static_assert(!std::is_same::value, "KokkosSparse::symmetric_gauss_seidel_apply: entries must have " "a contiguous layout (Left or Right, not Stride)"); - static_assert(!std::is_same::value, + static_assert(!std::is_same::value, "KokkosSparse::symmetric_gauss_seidel_apply: values must have " "a contiguous layout (Left or Right, not Stride)"); @@ -591,8 +506,7 @@ void symmetric_gauss_seidel_apply( if (x_lhs_output_vec.extent(1) != y_rhs_input_vec.extent(1)) { std::ostringstream os; os << "KokkosSparse::symmetric_gauss_seidel_apply: " - << "X has " << x_lhs_output_vec.extent(1) << "columns, Y has " - << y_rhs_input_vec.extent(1) << " columns."; + << "X has " << x_lhs_output_vec.extent(1) << "columns, Y has " << y_rhs_input_vec.extent(1) << " columns."; KokkosKernels::Impl::throw_runtime_exception(os.str()); } @@ -604,67 +518,52 @@ void symmetric_gauss_seidel_apply( typedef typename KernelHandle::HandleTempMemorySpace c_temp_t; typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle const_handle_type; const_handle_type tmp_handle(*handle); typedef Kokkos::View::array_layout, - typename lno_row_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename lno_row_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_row_view_t_; typedef Kokkos::View::array_layout, - typename lno_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename lno_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_nnz_view_t_; typedef Kokkos::View::array_layout, - typename scalar_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename scalar_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_ascalar_nnz_view_t_; typedef Kokkos::View::array_layout, - typename y_scalar_view_t::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename y_scalar_view_t::device_type, Kokkos::MemoryTraits > Internal_yscalar_nnz_view_t_; typedef Kokkos::View::array_layout, - typename x_scalar_view_t::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename x_scalar_view_t::device_type, Kokkos::MemoryTraits > Internal_xscalar_nnz_view_t_; Internal_alno_row_view_t_ const_a_r(row_map.data(), row_map.extent(0)); Internal_alno_nnz_view_t_ const_a_l(entries.data(), entries.extent(0)); Internal_ascalar_nnz_view_t_ const_a_v(values.data(), values.extent(0)); - Internal_xscalar_nnz_view_t_ nonconst_x_v(x_lhs_output_vec.data(), - x_lhs_output_vec.extent(0), + Internal_xscalar_nnz_view_t_ nonconst_x_v(x_lhs_output_vec.data(), x_lhs_output_vec.extent(0), x_lhs_output_vec.extent(1)); - Internal_yscalar_nnz_view_t_ const_y_v(y_rhs_input_vec.data(), - y_rhs_input_vec.extent(0), - y_rhs_input_vec.extent(1)); + Internal_yscalar_nnz_view_t_ const_y_v(y_rhs_input_vec.data(), y_rhs_input_vec.extent(0), y_rhs_input_vec.extent(1)); using namespace KokkosSparse::Impl; - GAUSS_SEIDEL_APPLY:: - gauss_seidel_apply(space, &tmp_handle, num_rows, num_cols, const_a_r, - const_a_l, const_a_v, nonconst_x_v, const_y_v, - init_zero_x_vector, update_y_vector, omega, numIter, - true, true); + Internal_yscalar_nnz_view_t_>::gauss_seidel_apply(space, &tmp_handle, num_rows, num_cols, + const_a_r, const_a_l, const_a_v, nonconst_x_v, + const_y_v, init_zero_x_vector, update_y_vector, + omega, numIter, true, true); } /// @@ -697,23 +596,18 @@ void symmetric_gauss_seidel_apply( /// @pre y_rhs_input_vec.extent(0) == num_rows /// @pre x_lhs_output_vec.extent(1) == y_rhs_input_vec.extent(1) /// -template -void symmetric_gauss_seidel_apply( - KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, - lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, - x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, - bool init_zero_x_vector, bool update_y_vector, - typename KernelHandle::nnz_scalar_t omega, int numIter) { +template +void symmetric_gauss_seidel_apply(KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, + typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, x_scalar_view_t x_lhs_output_vec, + y_scalar_view_t y_rhs_input_vec, bool init_zero_x_vector, bool update_y_vector, + typename KernelHandle::nnz_scalar_t omega, int numIter) { auto my_exec_space = handle->get_gs_handle()->get_execution_space(); - symmetric_gauss_seidel_apply( - my_exec_space, handle, num_rows, num_cols, row_map, entries, values, - x_lhs_output_vec, y_rhs_input_vec, init_zero_x_vector, update_y_vector, - omega, numIter); + symmetric_gauss_seidel_apply(my_exec_space, handle, num_rows, num_cols, row_map, + entries, values, x_lhs_output_vec, y_rhs_input_vec, + init_zero_x_vector, update_y_vector, omega, numIter); } /// @@ -747,27 +641,23 @@ void symmetric_gauss_seidel_apply( /// @pre y_rhs_input_vec.extent(0) == num_rows /// @pre x_lhs_output_vec.extent(1) == y_rhs_input_vec.extent(1) /// -template -void symmetric_block_gauss_seidel_apply( - KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, - typename KernelHandle::const_nnz_lno_t block_size, - - lno_row_view_t_ row_map, lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, - x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, - bool init_zero_x_vector, bool update_y_vector, - typename KernelHandle::nnz_scalar_t omega, int numIter) { +template +void symmetric_block_gauss_seidel_apply(KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, + typename KernelHandle::const_nnz_lno_t num_cols, + typename KernelHandle::const_nnz_lno_t block_size, + + lno_row_view_t_ row_map, lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, + x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, + bool init_zero_x_vector, bool update_y_vector, + typename KernelHandle::nnz_scalar_t omega, int numIter) { // Check compatibility of dimensions at run time. if (x_lhs_output_vec.extent(1) != y_rhs_input_vec.extent(1)) { std::ostringstream os; os << "KokkosSparse::symmetric_block_gauss_seidel_apply: Dimensions of X " "and Y do not match: " - << "X has " << x_lhs_output_vec.extent(1) << "columns, Y has " - << y_rhs_input_vec.extent(1) << " columns."; + << "X has " << x_lhs_output_vec.extent(1) << "columns, Y has " << y_rhs_input_vec.extent(1) << " columns."; KokkosKernels::Impl::throw_runtime_exception(os.str()); } auto gsHandle = handle->get_point_gs_handle(); @@ -778,9 +668,8 @@ void symmetric_block_gauss_seidel_apply( } gsHandle->set_block_size(block_size); - symmetric_gauss_seidel_apply( - handle, num_rows, num_cols, row_map, entries, values, x_lhs_output_vec, - y_rhs_input_vec, init_zero_x_vector, update_y_vector, omega, numIter); + symmetric_gauss_seidel_apply(handle, num_rows, num_cols, row_map, entries, values, x_lhs_output_vec, + y_rhs_input_vec, init_zero_x_vector, update_y_vector, omega, numIter); } /// @@ -814,57 +703,45 @@ void symmetric_block_gauss_seidel_apply( /// @pre y_rhs_input_vec.extent(0) == num_rows /// @pre x_lhs_output_vec.extent(1) == y_rhs_input_vec.extent(1) /// -template -void forward_sweep_gauss_seidel_apply( - const ExecutionSpace &space, KernelHandle *handle, - typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, - lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, - x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, - bool init_zero_x_vector, bool update_y_vector, - typename KernelHandle::nnz_scalar_t omega, int numIter) { - static_assert(std::is_same::value, +void forward_sweep_gauss_seidel_apply(const ExecutionSpace &space, KernelHandle *handle, + typename KernelHandle::const_nnz_lno_t num_rows, + typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, + x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, + bool init_zero_x_vector, bool update_y_vector, + typename KernelHandle::nnz_scalar_t omega, int numIter) { + static_assert(std::is_same::value, "KokkosSparse::forward_sweep_gauss_seidel_apply: Size type of " "the matrix should be same as kernelHandle sizetype."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosSparse::forward_sweep_gauss_seidel_apply: lno type of " "the matrix should be same as kernelHandle lno_t."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::forward_sweep_gauss_seidel_apply: scalar type of the " "matrix should be same as kernelHandle scalar_t."); - static_assert(std::is_same::value, - "KokkosSparse::forward_sweep_gauss_seidel_apply: scalar type " - "of the y-vector should be same as kernelHandle scalar_t."); - static_assert( - std::is_same::value, - "KokkosSparse::forward_sweep_gauss_seidel_apply: scalar type of the " - "x-vector should be same as kernelHandle non-const scalar_t."); + std::is_same::value, + "KokkosSparse::forward_sweep_gauss_seidel_apply: scalar type " + "of the y-vector should be same as kernelHandle scalar_t."); + + static_assert(std::is_same::value, + "KokkosSparse::forward_sweep_gauss_seidel_apply: scalar type of the " + "x-vector should be same as kernelHandle non-const scalar_t."); - static_assert(!std::is_same::value, + static_assert(!std::is_same::value, "KokkosSparse::forward_sweep_gauss_seidel_apply: row_map must " "have a contiguous layout (Left or Right, not Stride)"); - static_assert(!std::is_same::value, + static_assert(!std::is_same::value, "KokkosSparse::forward_sweep_gauss_seidel_apply: entries must " "have a contiguous layout (Left or Right, not Stride)"); - static_assert(!std::is_same::value, + static_assert(!std::is_same::value, "KokkosSparse::forward_sweep_gauss_seidel_apply: values must " "have a contiguous layout (Left or Right, not Stride)"); @@ -873,8 +750,7 @@ void forward_sweep_gauss_seidel_apply( std::ostringstream os; os << "KokkosSparse::forward_sweep_gauss_seidel_apply: Dimensions of X and " "Y do not match: " - << "X has " << x_lhs_output_vec.extent(1) << "columns, Y has " - << y_rhs_input_vec.extent(1) << " columns."; + << "X has " << x_lhs_output_vec.extent(1) << "columns, Y has " << y_rhs_input_vec.extent(1) << " columns."; KokkosKernels::Impl::throw_runtime_exception(os.str()); } @@ -886,68 +762,53 @@ void forward_sweep_gauss_seidel_apply( typedef typename KernelHandle::HandleTempMemorySpace c_temp_t; typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle const_handle_type; // const_handle_type tmp_handle = *handle; const_handle_type tmp_handle(*handle); typedef Kokkos::View::array_layout, - typename lno_row_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename lno_row_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_row_view_t_; typedef Kokkos::View::array_layout, - typename lno_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename lno_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_nnz_view_t_; typedef Kokkos::View::array_layout, - typename scalar_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename scalar_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_ascalar_nnz_view_t_; typedef Kokkos::View::array_layout, - typename y_scalar_view_t::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename y_scalar_view_t::device_type, Kokkos::MemoryTraits > Internal_yscalar_nnz_view_t_; typedef Kokkos::View::array_layout, - typename x_scalar_view_t::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename x_scalar_view_t::device_type, Kokkos::MemoryTraits > Internal_xscalar_nnz_view_t_; Internal_alno_row_view_t_ const_a_r(row_map.data(), row_map.extent(0)); Internal_alno_nnz_view_t_ const_a_l(entries.data(), entries.extent(0)); Internal_ascalar_nnz_view_t_ const_a_v(values.data(), values.extent(0)); - Internal_xscalar_nnz_view_t_ nonconst_x_v(x_lhs_output_vec.data(), - x_lhs_output_vec.extent(0), + Internal_xscalar_nnz_view_t_ nonconst_x_v(x_lhs_output_vec.data(), x_lhs_output_vec.extent(0), x_lhs_output_vec.extent(1)); - Internal_yscalar_nnz_view_t_ const_y_v(y_rhs_input_vec.data(), - y_rhs_input_vec.extent(0), - y_rhs_input_vec.extent(1)); + Internal_yscalar_nnz_view_t_ const_y_v(y_rhs_input_vec.data(), y_rhs_input_vec.extent(0), y_rhs_input_vec.extent(1)); using namespace KokkosSparse::Impl; - GAUSS_SEIDEL_APPLY:: - gauss_seidel_apply(space, &tmp_handle, num_rows, num_cols, const_a_r, - const_a_l, const_a_v, nonconst_x_v, const_y_v, - init_zero_x_vector, update_y_vector, omega, numIter, - true, false); + Internal_yscalar_nnz_view_t_>::gauss_seidel_apply(space, &tmp_handle, num_rows, num_cols, + const_a_r, const_a_l, const_a_v, nonconst_x_v, + const_y_v, init_zero_x_vector, update_y_vector, + omega, numIter, true, false); } /// @@ -979,23 +840,19 @@ void forward_sweep_gauss_seidel_apply( /// @pre y_rhs_input_vec.extent(0) == num_rows /// @pre x_lhs_output_vec.extent(1) == y_rhs_input_vec.extent(1) /// -template -void forward_sweep_gauss_seidel_apply( - KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, - lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, - x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, - bool init_zero_x_vector, bool update_y_vector, - typename KernelHandle::nnz_scalar_t omega, int numIter) { +template +void forward_sweep_gauss_seidel_apply(KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, + typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, + x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, + bool init_zero_x_vector, bool update_y_vector, + typename KernelHandle::nnz_scalar_t omega, int numIter) { auto my_exec_space = handle->get_gs_handle()->get_execution_space(); forward_sweep_gauss_seidel_apply( - my_exec_space, handle, num_rows, num_cols, row_map, entries, values, - x_lhs_output_vec, y_rhs_input_vec, init_zero_x_vector, update_y_vector, - omega, numIter); + my_exec_space, handle, num_rows, num_cols, row_map, entries, values, x_lhs_output_vec, y_rhs_input_vec, + init_zero_x_vector, update_y_vector, omega, numIter); } /// @@ -1028,27 +885,23 @@ void forward_sweep_gauss_seidel_apply( /// @pre y_rhs_input_vec.extent(0) == num_rows /// @pre x_lhs_output_vec.extent(1) == y_rhs_input_vec.extent(1) /// -template -void forward_sweep_block_gauss_seidel_apply( - KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, - typename KernelHandle::const_nnz_lno_t block_size, - - lno_row_view_t_ row_map, lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, - x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, - bool init_zero_x_vector, bool update_y_vector, - typename KernelHandle::nnz_scalar_t omega, int numIter) { +template +void forward_sweep_block_gauss_seidel_apply(KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, + typename KernelHandle::const_nnz_lno_t num_cols, + typename KernelHandle::const_nnz_lno_t block_size, + + lno_row_view_t_ row_map, lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, + x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, + bool init_zero_x_vector, bool update_y_vector, + typename KernelHandle::nnz_scalar_t omega, int numIter) { // Check compatibility of dimensions at run time. if (x_lhs_output_vec.extent(1) != y_rhs_input_vec.extent(1)) { std::ostringstream os; os << "KokkosSparse::forward_sweep_block_gauss_seidel_apply: Dimensions of " "X and Y do not match: " - << "X has " << x_lhs_output_vec.extent(1) << "columns, Y has " - << y_rhs_input_vec.extent(1) << " columns."; + << "X has " << x_lhs_output_vec.extent(1) << "columns, Y has " << y_rhs_input_vec.extent(1) << " columns."; KokkosKernels::Impl::throw_runtime_exception(os.str()); } @@ -1059,9 +912,8 @@ void forward_sweep_block_gauss_seidel_apply( "GS_CLUSTER"); } gsHandle->set_block_size(block_size); - forward_sweep_gauss_seidel_apply( - handle, num_rows, num_cols, row_map, entries, values, x_lhs_output_vec, - y_rhs_input_vec, init_zero_x_vector, update_y_vector, omega, numIter); + forward_sweep_gauss_seidel_apply(handle, num_rows, num_cols, row_map, entries, values, x_lhs_output_vec, + y_rhs_input_vec, init_zero_x_vector, update_y_vector, omega, numIter); } /// @@ -1096,57 +948,45 @@ void forward_sweep_block_gauss_seidel_apply( /// @pre y_rhs_input_vec.extent(0) == num_rows /// @pre x_lhs_output_vec.extent(1) == y_rhs_input_vec.extent(1) /// -template -void backward_sweep_gauss_seidel_apply( - const ExecutionSpace &space, KernelHandle *handle, - typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, - lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, - x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, - bool init_zero_x_vector, bool update_y_vector, - typename KernelHandle::nnz_scalar_t omega, int numIter) { - static_assert(std::is_same::value, +void backward_sweep_gauss_seidel_apply(const ExecutionSpace &space, KernelHandle *handle, + typename KernelHandle::const_nnz_lno_t num_rows, + typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, + x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, + bool init_zero_x_vector, bool update_y_vector, + typename KernelHandle::nnz_scalar_t omega, int numIter) { + static_assert(std::is_same::value, "KokkosSparse::backward_sweep_gauss_seidel_apply: Size type of " "the matrix should be same as kernelHandle sizetype."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "KokkosSparse::backward_sweep_gauss_seidel_apply: lno type of " "the matrix should be same as kernelHandle lno_t."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::backward_sweep_gauss_seidel_apply: scalar type of the " "matrix should be same as kernelHandle scalar_t."); - static_assert(std::is_same::value, - "KokkosSparse::backward_sweep_gauss_seidel_apply: scalar type " - "of the y-vector should be same as kernelHandle scalar_t."); - static_assert( - std::is_same::value, - "KokkosSparse::backward_sweep_gauss_seidel_apply: scalar type of the " - "x-vector should be same as kernelHandle non-const scalar_t."); + std::is_same::value, + "KokkosSparse::backward_sweep_gauss_seidel_apply: scalar type " + "of the y-vector should be same as kernelHandle scalar_t."); + + static_assert(std::is_same::value, + "KokkosSparse::backward_sweep_gauss_seidel_apply: scalar type of the " + "x-vector should be same as kernelHandle non-const scalar_t."); - static_assert(!std::is_same::value, + static_assert(!std::is_same::value, "KokkosSparse::backward_sweep_gauss_seidel_apply: row_map must " "have a contiguous layout (Left or Right, not Stride)"); - static_assert(!std::is_same::value, + static_assert(!std::is_same::value, "KokkosSparse::backward_sweep_gauss_seidel_apply: entries must " "have a contiguous layout (Left or Right, not Stride)"); - static_assert(!std::is_same::value, + static_assert(!std::is_same::value, "KokkosSparse::backward_sweep_gauss_seidel_apply: values must " "have a contiguous layout (Left or Right, not Stride)"); @@ -1155,8 +995,7 @@ void backward_sweep_gauss_seidel_apply( std::ostringstream os; os << "KokkosSparse::backward_sweep_gauss_seidel_apply: Dimensions of X " "and Y do not match: " - << "X has " << x_lhs_output_vec.extent(1) << "columns, Y has " - << y_rhs_input_vec.extent(1) << " columns."; + << "X has " << x_lhs_output_vec.extent(1) << "columns, Y has " << y_rhs_input_vec.extent(1) << " columns."; KokkosKernels::Impl::throw_runtime_exception(os.str()); } @@ -1168,68 +1007,53 @@ void backward_sweep_gauss_seidel_apply( typedef typename KernelHandle::HandleTempMemorySpace c_temp_t; typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle const_handle_type; // const_handle_type tmp_handle = *handle; const_handle_type tmp_handle(*handle); typedef Kokkos::View::array_layout, - typename lno_row_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename lno_row_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_row_view_t_; typedef Kokkos::View::array_layout, - typename lno_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename lno_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_nnz_view_t_; typedef Kokkos::View::array_layout, - typename scalar_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename scalar_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_ascalar_nnz_view_t_; typedef Kokkos::View::array_layout, - typename y_scalar_view_t::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename y_scalar_view_t::device_type, Kokkos::MemoryTraits > Internal_yscalar_nnz_view_t_; typedef Kokkos::View::array_layout, - typename x_scalar_view_t::device_type, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename x_scalar_view_t::device_type, Kokkos::MemoryTraits > Internal_xscalar_nnz_view_t_; Internal_alno_row_view_t_ const_a_r(row_map.data(), row_map.extent(0)); Internal_alno_nnz_view_t_ const_a_l(entries.data(), entries.extent(0)); Internal_ascalar_nnz_view_t_ const_a_v(values.data(), values.extent(0)); - Internal_xscalar_nnz_view_t_ nonconst_x_v(x_lhs_output_vec.data(), - x_lhs_output_vec.extent(0), + Internal_xscalar_nnz_view_t_ nonconst_x_v(x_lhs_output_vec.data(), x_lhs_output_vec.extent(0), x_lhs_output_vec.extent(1)); - Internal_yscalar_nnz_view_t_ const_y_v(y_rhs_input_vec.data(), - y_rhs_input_vec.extent(0), - y_rhs_input_vec.extent(1)); + Internal_yscalar_nnz_view_t_ const_y_v(y_rhs_input_vec.data(), y_rhs_input_vec.extent(0), y_rhs_input_vec.extent(1)); using namespace KokkosSparse::Impl; - GAUSS_SEIDEL_APPLY:: - gauss_seidel_apply(space, &tmp_handle, num_rows, num_cols, const_a_r, - const_a_l, const_a_v, nonconst_x_v, const_y_v, - init_zero_x_vector, update_y_vector, omega, numIter, - false, true); + Internal_yscalar_nnz_view_t_>::gauss_seidel_apply(space, &tmp_handle, num_rows, num_cols, + const_a_r, const_a_l, const_a_v, nonconst_x_v, + const_y_v, init_zero_x_vector, update_y_vector, + omega, numIter, false, true); } /// @@ -1261,23 +1085,19 @@ void backward_sweep_gauss_seidel_apply( /// @pre y_rhs_input_vec.extent(0) == num_rows /// @pre x_lhs_output_vec.extent(1) == y_rhs_input_vec.extent(1) /// -template -void backward_sweep_gauss_seidel_apply( - KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, - lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, - x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, - bool init_zero_x_vector, bool update_y_vector, - typename KernelHandle::nnz_scalar_t omega, int numIter) { +template +void backward_sweep_gauss_seidel_apply(KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, + typename KernelHandle::const_nnz_lno_t num_cols, lno_row_view_t_ row_map, + lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, + x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, + bool init_zero_x_vector, bool update_y_vector, + typename KernelHandle::nnz_scalar_t omega, int numIter) { auto my_exec_space = handle->get_gs_handle()->get_execution_space(); backward_sweep_gauss_seidel_apply( - my_exec_space, handle, num_rows, num_cols, row_map, entries, values, - x_lhs_output_vec, y_rhs_input_vec, init_zero_x_vector, update_y_vector, - omega, numIter); + my_exec_space, handle, num_rows, num_cols, row_map, entries, values, x_lhs_output_vec, y_rhs_input_vec, + init_zero_x_vector, update_y_vector, omega, numIter); } /// @@ -1310,27 +1130,24 @@ void backward_sweep_gauss_seidel_apply( /// @pre y_rhs_input_vec.extent(0) == num_rows /// @pre x_lhs_output_vec.extent(1) == y_rhs_input_vec.extent(1) /// -template -void backward_sweep_block_gauss_seidel_apply( - KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, - typename KernelHandle::const_nnz_lno_t num_cols, - typename KernelHandle::const_nnz_lno_t block_size, - - lno_row_view_t_ row_map, lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, - x_scalar_view_t x_lhs_output_vec, y_scalar_view_t y_rhs_input_vec, - bool init_zero_x_vector, bool update_y_vector, - typename KernelHandle::nnz_scalar_t omega, int numIter) { +template +void backward_sweep_block_gauss_seidel_apply(KernelHandle *handle, typename KernelHandle::const_nnz_lno_t num_rows, + typename KernelHandle::const_nnz_lno_t num_cols, + typename KernelHandle::const_nnz_lno_t block_size, + + lno_row_view_t_ row_map, lno_nnz_view_t_ entries, + scalar_nnz_view_t_ values, x_scalar_view_t x_lhs_output_vec, + y_scalar_view_t y_rhs_input_vec, bool init_zero_x_vector, + bool update_y_vector, typename KernelHandle::nnz_scalar_t omega, + int numIter) { // Check compatibility of dimensions at run time. if (x_lhs_output_vec.extent(1) != y_rhs_input_vec.extent(1)) { std::ostringstream os; os << "KokkosSparse::backward_sweep_block_gauss_seidel_apply: Dimensions " "of X and Y do not match: " - << "X has " << x_lhs_output_vec.extent(1) << "columns, Y has " - << y_rhs_input_vec.extent(1) << " columns."; + << "X has " << x_lhs_output_vec.extent(1) << "columns, Y has " << y_rhs_input_vec.extent(1) << " columns."; KokkosKernels::Impl::throw_runtime_exception(os.str()); } auto gsHandle = handle->get_point_gs_handle(); @@ -1340,9 +1157,8 @@ void backward_sweep_block_gauss_seidel_apply( "GS_CLUSTER"); } gsHandle->set_block_size(block_size); - backward_sweep_gauss_seidel_apply( - handle, num_rows, num_cols, row_map, entries, values, x_lhs_output_vec, - y_rhs_input_vec, init_zero_x_vector, update_y_vector, omega, numIter); + backward_sweep_gauss_seidel_apply(handle, num_rows, num_cols, row_map, entries, values, x_lhs_output_vec, + y_rhs_input_vec, init_zero_x_vector, update_y_vector, omega, numIter); } } // namespace Experimental } // namespace KokkosSparse diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_gauss_seidel_handle.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_gauss_seidel_handle.hpp index 624382ec5b3d..bf5ee8633b14 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_gauss_seidel_handle.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_gauss_seidel_handle.hpp @@ -23,27 +23,18 @@ #ifndef _GAUSSSEIDELHANDLE_HPP #define _GAUSSSEIDELHANDLE_HPP -//#define VERBOSE +// #define VERBOSE namespace KokkosSparse { enum GSAlgorithm { GS_DEFAULT, GS_PERMUTED, GS_TEAM, GS_CLUSTER, GS_TWOSTAGE }; enum GSDirection { GS_FORWARD, GS_BACKWARD, GS_SYMMETRIC }; -enum struct ClusteringAlgorithm { - CLUSTER_DEFAULT, - CLUSTER_MIS2, - CLUSTER_BALLOON, - NUM_CLUSTERING_ALGORITHMS -}; +enum struct ClusteringAlgorithm { CLUSTER_DEFAULT, CLUSTER_MIS2, CLUSTER_BALLOON, NUM_CLUSTERING_ALGORITHMS }; -static constexpr ClusteringAlgorithm CLUSTER_DEFAULT = - ClusteringAlgorithm::CLUSTER_DEFAULT; -static constexpr ClusteringAlgorithm CLUSTER_MIS2 = - ClusteringAlgorithm::CLUSTER_MIS2; -static constexpr ClusteringAlgorithm CLUSTER_BALLOON = - ClusteringAlgorithm::CLUSTER_BALLOON; -static constexpr ClusteringAlgorithm NUM_CLUSTERING_ALGORITHMS = - ClusteringAlgorithm::NUM_CLUSTERING_ALGORITHMS; +static constexpr ClusteringAlgorithm CLUSTER_DEFAULT = ClusteringAlgorithm::CLUSTER_DEFAULT; +static constexpr ClusteringAlgorithm CLUSTER_MIS2 = ClusteringAlgorithm::CLUSTER_MIS2; +static constexpr ClusteringAlgorithm CLUSTER_BALLOON = ClusteringAlgorithm::CLUSTER_BALLOON; +static constexpr ClusteringAlgorithm NUM_CLUSTERING_ALGORITHMS = ClusteringAlgorithm::NUM_CLUSTERING_ALGORITHMS; inline const char *getClusterAlgoName(ClusteringAlgorithm ca) { switch (ca) { @@ -54,8 +45,8 @@ inline const char *getClusterAlgoName(ClusteringAlgorithm ca) { return "INVALID CLUSTERING ALGORITHM"; } -template +template class GaussSeidelHandle { public: typedef ExecutionSpace HandleExecSpace; @@ -71,26 +62,17 @@ class GaussSeidelHandle { typedef typename std::remove_const::type nnz_scalar_t; typedef const nnz_scalar_t const_nnz_scalar_t; - typedef typename Kokkos::View - row_lno_temp_work_view_t; - typedef typename Kokkos::View - row_lno_persistent_work_view_t; - typedef typename row_lno_persistent_work_view_t::HostMirror - row_lno_persistent_work_host_view_t; // Host view type - - typedef typename Kokkos::View - scalar_temp_work_view_t; - typedef typename Kokkos::View - scalar_persistent_work_view_t; - typedef typename scalar_persistent_work_view_t::HostMirror - scalar_persistent_work_host_view_t; // Host view type - - typedef typename Kokkos::View - nnz_lno_temp_work_view_t; - typedef typename Kokkos::View - nnz_lno_persistent_work_view_t; - typedef typename nnz_lno_persistent_work_view_t::HostMirror - nnz_lno_persistent_work_host_view_t; // Host view type + typedef typename Kokkos::View row_lno_temp_work_view_t; + typedef typename Kokkos::View row_lno_persistent_work_view_t; + typedef typename row_lno_persistent_work_view_t::HostMirror row_lno_persistent_work_host_view_t; // Host view type + + typedef typename Kokkos::View scalar_temp_work_view_t; + typedef typename Kokkos::View scalar_persistent_work_view_t; + typedef typename scalar_persistent_work_view_t::HostMirror scalar_persistent_work_host_view_t; // Host view type + + typedef typename Kokkos::View nnz_lno_temp_work_view_t; + typedef typename Kokkos::View nnz_lno_persistent_work_view_t; + typedef typename nnz_lno_persistent_work_view_t::HostMirror nnz_lno_persistent_work_host_view_t; // Host view type protected: HandleExecSpace execution_space; @@ -124,8 +106,7 @@ class GaussSeidelHandle { suggested_vector_size(0), suggested_team_size(0) {} - GaussSeidelHandle(HandleExecSpace handle_exec_space, int n_streams, - GSAlgorithm gs) + GaussSeidelHandle(HandleExecSpace handle_exec_space, int n_streams, GSAlgorithm gs) : execution_space(handle_exec_space), num_streams(n_streams), algorithm_type(gs), @@ -146,12 +127,8 @@ class GaussSeidelHandle { GSAlgorithm get_algorithm_type() const { return this->algorithm_type; } - nnz_lno_persistent_work_host_view_t get_color_xadj() const { - return this->color_xadj; - } - nnz_lno_persistent_work_view_t get_color_adj() const { - return this->color_adj; - } + nnz_lno_persistent_work_host_view_t get_color_xadj() const { return this->color_xadj; } + nnz_lno_persistent_work_view_t get_color_adj() const { return this->color_adj; } nnz_lno_t get_num_colors() const { return this->numColors; } bool is_symbolic_called() const { return this->called_symbolic; } @@ -183,15 +160,9 @@ class GaussSeidelHandle { void set_call_symbolic(bool call = true) { this->called_symbolic = call; } void set_call_numeric(bool call = true) { this->called_numeric = call; } - void set_color_xadj(const nnz_lno_persistent_work_host_view_t &color_xadj_) { - this->color_xadj = color_xadj_; - } - void set_color_adj(const nnz_lno_persistent_work_view_t &color_adj_) { - this->color_adj = color_adj_; - } - void set_num_colors(const nnz_lno_t &numColors_) { - this->numColors = numColors_; - } + void set_color_xadj(const nnz_lno_persistent_work_host_view_t &color_xadj_) { this->color_xadj = color_xadj_; } + void set_color_adj(const nnz_lno_persistent_work_view_t &color_adj_) { this->color_adj = color_adj_; } + void set_num_colors(const nnz_lno_t &numColors_) { this->numColors = numColors_; } void vector_team_size(int max_allowed_team_size, int &suggested_vector_size_, // output @@ -202,24 +173,21 @@ class GaussSeidelHandle { suggested_team_size_ = this->suggested_team_size; return; } else { - KokkosKernels::Impl::get_suggested_vector_size( - suggested_vector_size_, nr, nnz); - KokkosKernels::Impl::get_suggested_team_size( - max_allowed_team_size, suggested_vector_size_, suggested_team_size_); + KokkosKernels::Impl::get_suggested_vector_size(suggested_vector_size_, nr, nnz); + KokkosKernels::Impl::get_suggested_team_size(max_allowed_team_size, suggested_vector_size_, + suggested_team_size_); this->suggested_team_size = suggested_vector_size_; this->suggested_vector_size = suggested_vector_size_; } } }; -template -class PointGaussSeidelHandle - : public GaussSeidelHandle { +template +class PointGaussSeidelHandle : public GaussSeidelHandle { public: - typedef GaussSeidelHandle + typedef GaussSeidelHandle GSHandle; typedef ExecutionSpace HandleExecSpace; typedef TemporaryMemorySpace HandleTempMemorySpace; @@ -234,29 +202,19 @@ class PointGaussSeidelHandle typedef typename std::remove_const::type nnz_scalar_t; typedef const nnz_scalar_t const_nnz_scalar_t; - typedef typename Kokkos::View - row_lno_temp_work_view_t; - typedef typename Kokkos::View - row_lno_persistent_work_view_t; - typedef typename row_lno_persistent_work_view_t::HostMirror - row_lno_persistent_work_host_view_t; // Host view type - - typedef typename Kokkos::View - scalar_temp_work_view_t; - typedef typename Kokkos::View - scalar_persistent_work_view_t; - typedef typename Kokkos::View + typedef typename Kokkos::View row_lno_temp_work_view_t; + typedef typename Kokkos::View row_lno_persistent_work_view_t; + typedef typename row_lno_persistent_work_view_t::HostMirror row_lno_persistent_work_host_view_t; // Host view type + + typedef typename Kokkos::View scalar_temp_work_view_t; + typedef typename Kokkos::View scalar_persistent_work_view_t; + typedef typename Kokkos::View scalar_persistent_work_view2d_t; - typedef typename scalar_persistent_work_view_t::HostMirror - scalar_persistent_work_host_view_t; // Host view type + typedef typename scalar_persistent_work_view_t::HostMirror scalar_persistent_work_host_view_t; // Host view type - typedef typename Kokkos::View - nnz_lno_temp_work_view_t; - typedef typename Kokkos::View - nnz_lno_persistent_work_view_t; - typedef typename nnz_lno_persistent_work_view_t::HostMirror - nnz_lno_persistent_work_host_view_t; // Host view type + typedef typename Kokkos::View nnz_lno_temp_work_view_t; + typedef typename Kokkos::View nnz_lno_persistent_work_view_t; + typedef typename nnz_lno_persistent_work_view_t::HostMirror nnz_lno_persistent_work_host_view_t; // Host view type private: row_lno_persistent_work_view_t permuted_xadj; @@ -293,8 +251,7 @@ class PointGaussSeidelHandle * \brief Default constructor. */ PointGaussSeidelHandle(GSHandle gs_handle, - KokkosGraph::ColoringAlgorithm coloring_algo_ = - KokkosGraph::COLORING_DEFAULT) + KokkosGraph::ColoringAlgorithm coloring_algo_ = KokkosGraph::COLORING_DEFAULT) : GSHandle(gs_handle), permuted_xadj(), permuted_adj(), @@ -311,21 +268,16 @@ class PointGaussSeidelHandle level_2_mem(0), long_row_threshold(0), coloring_algo(coloring_algo_) { - if (gs_handle.get_algorithm_type() == GS_DEFAULT) - this->choose_default_algorithm(); + if (gs_handle.get_algorithm_type() == GS_DEFAULT) this->choose_default_algorithm(); } - PointGaussSeidelHandle(GSAlgorithm gs = GS_DEFAULT, - KokkosGraph::ColoringAlgorithm coloring_algo_ = - KokkosGraph::COLORING_DEFAULT) + PointGaussSeidelHandle(GSAlgorithm gs = GS_DEFAULT, + KokkosGraph::ColoringAlgorithm coloring_algo_ = KokkosGraph::COLORING_DEFAULT) : PointGaussSeidelHandle(GSHandle(gs), coloring_algo_) {} - PointGaussSeidelHandle(HandleExecSpace handle_exec_space, int n_streams, - GSAlgorithm gs = GS_DEFAULT, - KokkosGraph::ColoringAlgorithm coloring_algo_ = - KokkosGraph::COLORING_DEFAULT) - : PointGaussSeidelHandle(GSHandle(handle_exec_space, n_streams, gs), - coloring_algo_) {} + PointGaussSeidelHandle(HandleExecSpace handle_exec_space, int n_streams, GSAlgorithm gs = GS_DEFAULT, + KokkosGraph::ColoringAlgorithm coloring_algo_ = KokkosGraph::COLORING_DEFAULT) + : PointGaussSeidelHandle(GSHandle(handle_exec_space, n_streams, gs), coloring_algo_) {} void set_block_size(nnz_lno_t bs) { this->block_size = bs; } nnz_lno_t get_block_size() const { return this->block_size; } @@ -337,76 +289,42 @@ class PointGaussSeidelHandle this->algorithm_type = GS_PERMUTED; } - KokkosGraph::ColoringAlgorithm get_coloring_algorithm() const { - return this->coloring_algo; - } - void set_coloring_algorithm(KokkosGraph::ColoringAlgorithm algo) { - this->coloring_algo = algo; - } + KokkosGraph::ColoringAlgorithm get_coloring_algorithm() const { return this->coloring_algo; } + void set_coloring_algorithm(KokkosGraph::ColoringAlgorithm algo) { this->coloring_algo = algo; } ~PointGaussSeidelHandle() = default; // getters - row_lno_persistent_work_view_t get_new_xadj() const { - return this->permuted_xadj; - } - nnz_lno_persistent_work_view_t get_new_adj() const { - return this->permuted_adj; - } - scalar_persistent_work_view_t get_new_adj_val() const { - return this->permuted_adj_vals; - } - nnz_lno_persistent_work_view_t get_old_to_new_map() const { - return this->old_to_new_map; - } + row_lno_persistent_work_view_t get_new_xadj() const { return this->permuted_xadj; } + nnz_lno_persistent_work_view_t get_new_adj() const { return this->permuted_adj; } + scalar_persistent_work_view_t get_new_adj_val() const { return this->permuted_adj_vals; } + nnz_lno_persistent_work_view_t get_old_to_new_map() const { return this->old_to_new_map; } // setters - void set_algorithm_type(const GSAlgorithm &sgs_algo) { - this->algorithm_type = sgs_algo; - } + void set_algorithm_type(const GSAlgorithm &sgs_algo) { this->algorithm_type = sgs_algo; } void set_call_symbolic(bool call = true) { this->called_symbolic = call; } void set_call_numeric(bool call = true) { this->called_numeric = call; } - void set_num_colors(const nnz_lno_t &numColors_) { - this->numColors = numColors_; - } + void set_num_colors(const nnz_lno_t &numColors_) { this->numColors = numColors_; } - void set_new_xadj(const row_lno_persistent_work_view_t &xadj_) { - this->permuted_xadj = xadj_; - } - void set_new_adj(const nnz_lno_persistent_work_view_t &adj_) { - this->permuted_adj = adj_; - } - void set_new_adj_val(const scalar_persistent_work_view_t &adj_vals_) { - this->permuted_adj_vals = adj_vals_; - } - void set_old_to_new_map( - const nnz_lno_persistent_work_view_t &old_to_new_map_) { + void set_new_xadj(const row_lno_persistent_work_view_t &xadj_) { this->permuted_xadj = xadj_; } + void set_new_adj(const nnz_lno_persistent_work_view_t &adj_) { this->permuted_adj = adj_; } + void set_new_adj_val(const scalar_persistent_work_view_t &adj_vals_) { this->permuted_adj_vals = adj_vals_; } + void set_old_to_new_map(const nnz_lno_persistent_work_view_t &old_to_new_map_) { this->old_to_new_map = old_to_new_map_; } - void set_permuted_inverse_diagonal( - const scalar_persistent_work_view_t permuted_inverse_diagonal_) { + void set_permuted_inverse_diagonal(const scalar_persistent_work_view_t permuted_inverse_diagonal_) { this->permuted_inverse_diagonal = permuted_inverse_diagonal_; } - scalar_persistent_work_view_t get_permuted_inverse_diagonal() const { - return this->permuted_inverse_diagonal; - } + scalar_persistent_work_view_t get_permuted_inverse_diagonal() const { return this->permuted_inverse_diagonal; } - void set_level_1_mem(size_t _level_1_mem) { - this->level_1_mem = _level_1_mem; - } - void set_level_2_mem(size_t _level_2_mem) { - this->level_2_mem = _level_2_mem; - } + void set_level_1_mem(size_t _level_1_mem) { this->level_1_mem = _level_1_mem; } + void set_level_2_mem(size_t _level_2_mem) { this->level_2_mem = _level_2_mem; } - void set_num_values_in_l1(nnz_lno_t _num_values_in_l1) { - this->num_values_in_l1 = _num_values_in_l1; - } - void set_num_values_in_l2(nnz_lno_t _num_values_in_l2) { - this->num_values_in_l2 = _num_values_in_l2; - } + void set_num_values_in_l1(nnz_lno_t _num_values_in_l1) { this->num_values_in_l1 = _num_values_in_l1; } + void set_num_values_in_l2(nnz_lno_t _num_values_in_l2) { this->num_values_in_l2 = _num_values_in_l2; } void set_num_big_rows(nnz_lno_t _big_rows) { this->num_big_rows = _big_rows; } @@ -421,53 +339,35 @@ class PointGaussSeidelHandle void set_long_row_threshold(nnz_lno_t lrt) { long_row_threshold = lrt; } - nnz_lno_persistent_work_host_view_t get_long_rows_per_color() const { - return long_rows_per_color; - } + nnz_lno_persistent_work_host_view_t get_long_rows_per_color() const { return long_rows_per_color; } - void set_long_rows_per_color( - const nnz_lno_persistent_work_host_view_t &long_rows_per_color_) { + void set_long_rows_per_color(const nnz_lno_persistent_work_host_view_t &long_rows_per_color_) { long_rows_per_color = long_rows_per_color_; } - nnz_lno_persistent_work_host_view_t get_max_row_length_per_color() const { - return max_row_length_per_color; - } + nnz_lno_persistent_work_host_view_t get_max_row_length_per_color() const { return max_row_length_per_color; } - void set_max_row_length_per_color( - const nnz_lno_persistent_work_host_view_t &max_row_length_per_color_) { + void set_max_row_length_per_color(const nnz_lno_persistent_work_host_view_t &max_row_length_per_color_) { max_row_length_per_color = max_row_length_per_color_; } scalar_persistent_work_view_t get_long_row_x() const { return long_row_x; } - void set_long_row_x(const scalar_persistent_work_view_t &long_row_x_) { - long_row_x = long_row_x_; - } + void set_long_row_x(const scalar_persistent_work_view_t &long_row_x_) { long_row_x = long_row_x_; } - void allocate_x_y_vectors(nnz_lno_t num_rows, nnz_lno_t num_cols, - nnz_lno_t num_vecs) { - if (permuted_y_vector.extent(0) != size_t(num_rows) || - permuted_y_vector.extent(1) != size_t(num_vecs)) { - permuted_y_vector = scalar_persistent_work_view2d_t("PERMUTED Y VECTOR", - num_rows, num_vecs); + void allocate_x_y_vectors(nnz_lno_t num_rows, nnz_lno_t num_cols, nnz_lno_t num_vecs) { + if (permuted_y_vector.extent(0) != size_t(num_rows) || permuted_y_vector.extent(1) != size_t(num_vecs)) { + permuted_y_vector = scalar_persistent_work_view2d_t("PERMUTED Y VECTOR", num_rows, num_vecs); } - if (permuted_x_vector.extent(0) != size_t(num_cols) || - permuted_x_vector.extent(1) != size_t(num_vecs)) { - permuted_x_vector = scalar_persistent_work_view2d_t("PERMUTED X VECTOR", - num_cols, num_vecs); + if (permuted_x_vector.extent(0) != size_t(num_cols) || permuted_x_vector.extent(1) != size_t(num_vecs)) { + permuted_x_vector = scalar_persistent_work_view2d_t("PERMUTED X VECTOR", num_cols, num_vecs); } } - scalar_persistent_work_view2d_t get_permuted_y_vector() const { - return this->permuted_y_vector; - } - scalar_persistent_work_view2d_t get_permuted_x_vector() const { - return this->permuted_x_vector; - } + scalar_persistent_work_view2d_t get_permuted_y_vector() const { return this->permuted_y_vector; } + scalar_persistent_work_view2d_t get_permuted_x_vector() const { return this->permuted_x_vector; } - void vector_team_size(int max_allowed_team_size, int &suggested_vector_size_, - int &suggested_team_size_, size_type nr, + void vector_team_size(int max_allowed_team_size, int &suggested_vector_size_, int &suggested_team_size_, size_type nr, size_type nnz) { // suggested_team_size_ = this->suggested_team_size = 1; // suggested_vector_size_=this->suggested_vector_size = 1; @@ -477,24 +377,21 @@ class PointGaussSeidelHandle suggested_team_size_ = this->suggested_team_size; return; } else { - KokkosKernels::Impl::get_suggested_vector_size( - suggested_vector_size_, nr, nnz); - KokkosKernels::Impl::get_suggested_team_size( - max_allowed_team_size, suggested_vector_size_, suggested_team_size_); + KokkosKernels::Impl::get_suggested_vector_size(suggested_vector_size_, nr, nnz); + KokkosKernels::Impl::get_suggested_team_size(max_allowed_team_size, suggested_vector_size_, + suggested_team_size_); this->suggested_team_size = suggested_vector_size_; this->suggested_vector_size = suggested_vector_size_; } } }; -template -class ClusterGaussSeidelHandle - : public GaussSeidelHandle { +template +class ClusterGaussSeidelHandle : public GaussSeidelHandle { public: - typedef GaussSeidelHandle + typedef GaussSeidelHandle GSHandle; typedef ExecutionSpace HandleExecSpace; typedef TemporaryMemorySpace HandleTempMemorySpace; @@ -509,26 +406,17 @@ class ClusterGaussSeidelHandle typedef typename std::remove_const::type nnz_scalar_t; typedef const nnz_scalar_t const_nnz_scalar_t; - typedef typename Kokkos::View - row_lno_temp_work_view_t; - typedef typename Kokkos::View - row_lno_persistent_work_view_t; - typedef typename row_lno_persistent_work_view_t::HostMirror - row_lno_persistent_work_host_view_t; // Host view type - - typedef typename Kokkos::View - scalar_temp_work_view_t; - typedef typename Kokkos::View - scalar_persistent_work_view_t; - typedef typename scalar_persistent_work_view_t::HostMirror - scalar_persistent_work_host_view_t; // Host view type - - typedef typename Kokkos::View - nnz_lno_temp_work_view_t; - typedef typename Kokkos::View - nnz_lno_persistent_work_view_t; - typedef typename nnz_lno_persistent_work_view_t::HostMirror - nnz_lno_persistent_work_host_view_t; // Host view type + typedef typename Kokkos::View row_lno_temp_work_view_t; + typedef typename Kokkos::View row_lno_persistent_work_view_t; + typedef typename row_lno_persistent_work_view_t::HostMirror row_lno_persistent_work_host_view_t; // Host view type + + typedef typename Kokkos::View scalar_temp_work_view_t; + typedef typename Kokkos::View scalar_persistent_work_view_t; + typedef typename scalar_persistent_work_view_t::HostMirror scalar_persistent_work_host_view_t; // Host view type + + typedef typename Kokkos::View nnz_lno_temp_work_view_t; + typedef typename Kokkos::View nnz_lno_persistent_work_view_t; + typedef typename nnz_lno_persistent_work_view_t::HostMirror nnz_lno_persistent_work_host_view_t; // Host view type private: ClusteringAlgorithm cluster_algo; @@ -558,8 +446,7 @@ class ClusterGaussSeidelHandle */ // Constructor for cluster-coloring based GS and SGS - ClusterGaussSeidelHandle(ClusteringAlgorithm cluster_algo_, - nnz_lno_t cluster_size_, + ClusterGaussSeidelHandle(ClusteringAlgorithm cluster_algo_, nnz_lno_t cluster_size_, KokkosGraph::ColoringAlgorithm coloring_algo_) : GSHandle(GS_CLUSTER), cluster_algo(cluster_algo_), @@ -573,58 +460,39 @@ class ClusterGaussSeidelHandle void set_cluster_size(nnz_lno_t cs) { this->cluster_size = cs; } nnz_lno_t get_cluster_size() const { return this->cluster_size; } - KokkosGraph::ColoringAlgorithm get_coloring_algorithm() const { - return this->coloring_algo; - } - void set_coloring_algorithm(KokkosGraph::ColoringAlgorithm algo) { - this->coloring_algo = algo; - } + KokkosGraph::ColoringAlgorithm get_coloring_algorithm() const { return this->coloring_algo; } + void set_coloring_algorithm(KokkosGraph::ColoringAlgorithm algo) { this->coloring_algo = algo; } - void set_vert_clusters(nnz_lno_persistent_work_view_t &vert_clusters_) { - this->vert_clusters = vert_clusters_; - } - void set_cluster_xadj(nnz_lno_persistent_work_view_t &cluster_xadj_) { - this->cluster_xadj = cluster_xadj_; - } - void set_cluster_adj(nnz_lno_persistent_work_view_t &cluster_adj_) { - this->cluster_adj = cluster_adj_; - } + void set_vert_clusters(nnz_lno_persistent_work_view_t &vert_clusters_) { this->vert_clusters = vert_clusters_; } + void set_cluster_xadj(nnz_lno_persistent_work_view_t &cluster_xadj_) { this->cluster_xadj = cluster_xadj_; } + void set_cluster_adj(nnz_lno_persistent_work_view_t &cluster_adj_) { this->cluster_adj = cluster_adj_; } nnz_lno_persistent_work_view_t get_vert_clusters() const { if (!this->is_symbolic_called()) - throw std::runtime_error( - "vert_clusters does not exist until after symbolic setup."); + throw std::runtime_error("vert_clusters does not exist until after symbolic setup."); return vert_clusters; } nnz_lno_persistent_work_view_t get_cluster_xadj() const { if (!this->is_symbolic_called()) - throw std::runtime_error( - "cluster_xadj does not exist until after symbolic setup."); + throw std::runtime_error("cluster_xadj does not exist until after symbolic setup."); return cluster_xadj; } nnz_lno_persistent_work_view_t get_cluster_adj() const { - if (!this->is_symbolic_called()) - throw std::runtime_error( - "cluster_adj does not exist until after symbolic setup."); + if (!this->is_symbolic_called()) throw std::runtime_error("cluster_adj does not exist until after symbolic setup."); return cluster_adj; } - void set_inverse_diagonal(scalar_persistent_work_view_t &inv_diag) { - this->inverse_diagonal = inv_diag; - } + void set_inverse_diagonal(scalar_persistent_work_view_t &inv_diag) { this->inverse_diagonal = inv_diag; } scalar_persistent_work_view_t get_inverse_diagonal() const { if (!this->is_symbolic_called()) - throw std::runtime_error( - "inverse diagonal does not exist until after numeric setup."); + throw std::runtime_error("inverse diagonal does not exist until after numeric setup."); return inverse_diagonal; } - bool use_teams() const { - return KokkosKernels::Impl::kk_is_gpu_exec_space(); - } + bool use_teams() const { return KokkosKernels::Impl::kk_is_gpu_exec_space(); } ~ClusterGaussSeidelHandle() = default; @@ -633,12 +501,10 @@ class ClusterGaussSeidelHandle // ------------------------------------- // Handle for Two-stage/Classical GS -template class TwoStageGaussSeidelHandle - : public GaussSeidelHandle { public: using memory_space = typename ExecutionSpace::memory_space; @@ -647,9 +513,8 @@ class TwoStageGaussSeidelHandle using size_type = typename std::remove_const::type; using device_t = Kokkos::Device; - using crsmat_t = - KokkosSparse::CrsMatrix; - using graph_t = typename crsmat_t::StaticCrsGraphType; + using crsmat_t = KokkosSparse::CrsMatrix; + using graph_t = typename crsmat_t::StaticCrsGraphType; using input_row_map_view_t = typename graph_t::row_map_type; using input_entries_view_t = typename graph_t::entries_type; @@ -669,10 +534,8 @@ class TwoStageGaussSeidelHandle using vector_view_t = Kokkos::View; - using GSHandle = - GaussSeidelHandle; + using GSHandle = GaussSeidelHandle; using HandleExecSpace = typename GSHandle::HandleExecSpace; @@ -698,8 +561,7 @@ class TwoStageGaussSeidelHandle * @brief Construct a new Two Stage Gauss Seidel Handle object * */ - TwoStageGaussSeidelHandle() - : TwoStageGaussSeidelHandle(GSHandle(GS_TWOSTAGE)) {} + TwoStageGaussSeidelHandle() : TwoStageGaussSeidelHandle(GSHandle(GS_TWOSTAGE)) {} /** * @brief Construct a new Two Stage Gauss Seidel Handle object @@ -708,13 +570,10 @@ class TwoStageGaussSeidelHandle * @param n_streams the number of streams */ TwoStageGaussSeidelHandle(HandleExecSpace handle_exec_space, int n_streams) - : TwoStageGaussSeidelHandle( - GSHandle(handle_exec_space, n_streams, GS_TWOSTAGE)) {} + : TwoStageGaussSeidelHandle(GSHandle(handle_exec_space, n_streams, GS_TWOSTAGE)) {} // Sweep direction - void setSweepDirection(GSDirection direction_) { - this->direction = direction_; - } + void setSweepDirection(GSDirection direction_) { this->direction = direction_; } GSDirection getSweepDirection() { return this->direction; } // specify whether to perform inner sweeps @@ -722,27 +581,19 @@ class TwoStageGaussSeidelHandle bool isTwoStage() { return this->two_stage; } // specify whether to use compact form of recurrence - void setCompactForm(bool compact_form_) { - this->compact_form = compact_form_; - } + void setCompactForm(bool compact_form_) { this->compact_form = compact_form_; } bool isCompactForm() { return this->compact_form; } // Number of outer sweeps - void setNumOuterSweeps(int num_outer_sweeps_) { - this->num_outer_sweeps = num_outer_sweeps_; - } + void setNumOuterSweeps(int num_outer_sweeps_) { this->num_outer_sweeps = num_outer_sweeps_; } int getNumOuterSweeps() { return this->num_outer_sweeps; } // Number of inner sweeps - void setNumInnerSweeps(int num_inner_sweeps_) { - this->num_inner_sweeps = num_inner_sweeps_; - } + void setNumInnerSweeps(int num_inner_sweeps_) { this->num_inner_sweeps = num_inner_sweeps_; } int getNumInnerSweeps() { return this->num_inner_sweeps; } // Inner damping factor - void setInnerDampFactor(scalar_t inner_omega_) { - this->inner_omega = inner_omega_; - } + void setInnerDampFactor(scalar_t inner_omega_) { this->inner_omega = inner_omega_; } scalar_t getInnerDampFactor() { return this->inner_omega; } // Workspaces diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_getDiagCopy.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_getDiagCopy.hpp index debc3bb46347..8f67d8a1c6f5 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_getDiagCopy.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_getDiagCopy.hpp @@ -26,21 +26,14 @@ namespace KokkosSparse { template -void getDiagCopy(const DiagType& D, const OffsetsType& offsets, - const CrsMatrixType& A) { - static_assert(Kokkos::is_view::value, - "The DiagType template parameter must be a Kokkos::View."); - static_assert(static_cast(DiagType::rank) == 1, - "The DiagType template parameter must be a 1-D Kokkos::View."); - static_assert( - std::is_same::value, - "The DiagType template parameter must be a nonconst Kokkos::View."); - static_assert(Kokkos::is_view::value, - "The OffsetsType template parameter must be a Kokkos::View."); - static_assert( - static_cast(OffsetsType::rank) == 1, - "The OffsetsType template parameter must be a 1-D Kokkos::View."); +void getDiagCopy(const DiagType& D, const OffsetsType& offsets, const CrsMatrixType& A) { + static_assert(Kokkos::is_view::value, "The DiagType template parameter must be a Kokkos::View."); + static_assert(static_cast(DiagType::rank) == 1, "The DiagType template parameter must be a 1-D Kokkos::View."); + static_assert(std::is_same::value, + "The DiagType template parameter must be a nonconst Kokkos::View."); + static_assert(Kokkos::is_view::value, "The OffsetsType template parameter must be a Kokkos::View."); + static_assert(static_cast(OffsetsType::rank) == 1, + "The OffsetsType template parameter must be a 1-D Kokkos::View."); typedef typename CrsMatrixType::value_type scalar_type; typedef typename CrsMatrixType::ordinal_type ordinal_type; @@ -49,18 +42,14 @@ void getDiagCopy(const DiagType& D, const OffsetsType& offsets, // Standardize on unmanaged Views, in order to avoid proliferation // of instantiations of the implementation type. - Kokkos::View + Kokkos::View D_internal = D; - Kokkos::View offsets_internal = offsets; - typedef Impl::CrsMatrixGetDiagCopyWithOffsets - impl_type; + typedef Impl::CrsMatrixGetDiagCopyWithOffsets impl_type; impl_type::getDiagCopy(D_internal, offsets_internal, A); } diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_gmres.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_gmres.hpp index b0b708a33046..d01193307d0c 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_gmres.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_gmres.hpp @@ -42,9 +42,8 @@ namespace KokkosSparse { namespace Experimental { -#define KOKKOSKERNELS_GMRES_SAME_TYPE(A, B) \ - std::is_same::type, \ - typename std::remove_const::type>::value +#define KOKKOSKERNELS_GMRES_SAME_TYPE(A, B) \ + std::is_same::type, typename std::remove_const::type>::value /// @brief /// @tparam KernelHandle @@ -56,60 +55,48 @@ namespace Experimental { /// @param B /// @param X /// @param precond -template -void gmres(KernelHandle* handle, AMatrix& A, BType& B, XType& X, - Preconditioner* precond = nullptr) { +template +void gmres(KernelHandle* handle, AMatrix& A, BType& B, XType& X, Preconditioner* precond = nullptr) { using scalar_type = typename KernelHandle::nnz_scalar_t; using size_type = typename KernelHandle::size_type; using ordinal_type = typename KernelHandle::nnz_lno_t; - static_assert( - KOKKOSKERNELS_GMRES_SAME_TYPE(typename BType::value_type, scalar_type), - "gmres: B scalar type must match KernelHandle entry " - "type (aka nnz_scalar_t, and const doesn't matter)"); + static_assert(KOKKOSKERNELS_GMRES_SAME_TYPE(typename BType::value_type, scalar_type), + "gmres: B scalar type must match KernelHandle entry " + "type (aka nnz_scalar_t, and const doesn't matter)"); - static_assert( - KOKKOSKERNELS_GMRES_SAME_TYPE(typename XType::value_type, scalar_type), - "gmres: X scalar type must match KernelHandle entry " - "type (aka nnz_scalar_t, and const doesn't matter)"); + static_assert(KOKKOSKERNELS_GMRES_SAME_TYPE(typename XType::value_type, scalar_type), + "gmres: X scalar type must match KernelHandle entry " + "type (aka nnz_scalar_t, and const doesn't matter)"); - static_assert( - KOKKOSKERNELS_GMRES_SAME_TYPE(typename AMatrix::value_type, scalar_type), - "gmres: A scalar type must match KernelHandle entry " - "type (aka nnz_scalar_t, and const doesn't matter)"); + static_assert(KOKKOSKERNELS_GMRES_SAME_TYPE(typename AMatrix::value_type, scalar_type), + "gmres: A scalar type must match KernelHandle entry " + "type (aka nnz_scalar_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_GMRES_SAME_TYPE(typename AMatrix::ordinal_type, - ordinal_type), + static_assert(KOKKOSKERNELS_GMRES_SAME_TYPE(typename AMatrix::ordinal_type, ordinal_type), "gmres: A ordinal type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); + static_assert(KOKKOSKERNELS_GMRES_SAME_TYPE(typename AMatrix::size_type, size_type), + "gmres: A size type must match KernelHandle entry " + "type (aka size_type, and const doesn't matter)"); + static_assert( - KOKKOSKERNELS_GMRES_SAME_TYPE(typename AMatrix::size_type, size_type), - "gmres: A size type must match KernelHandle entry " - "type (aka size_type, and const doesn't matter)"); - - static_assert(KokkosSparse::is_crs_matrix::value || - KokkosSparse::Experimental::is_bsr_matrix::value, - "gmres: A is not a CRS or BSR matrix."); - static_assert(Kokkos::is_view::value, - "gmres: B is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "gmres: X is not a Kokkos::View."); + KokkosSparse::is_crs_matrix::value || KokkosSparse::Experimental::is_bsr_matrix::value, + "gmres: A is not a CRS or BSR matrix."); + static_assert(Kokkos::is_view::value, "gmres: B is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "gmres: X is not a Kokkos::View."); static_assert(BType::rank == 1, "gmres: B must have rank 1"); static_assert(XType::rank == 1, "gmres: X must have rank 1"); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "gmres: The output X must be nonconst."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "gmres: X and B have different device types."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "gmres: A and B have different device types."); using c_size_t = typename KernelHandle::const_size_type; @@ -120,49 +107,40 @@ void gmres(KernelHandle* handle, AMatrix& A, BType& B, XType& X, using c_temp_t = typename KernelHandle::HandleTempMemorySpace; using c_persist_t = typename KernelHandle::HandlePersistentMemorySpace; - if ((X.extent(0) != B.extent(0)) || - (static_cast(A.numPointCols()) != - static_cast(X.extent(0))) || - (static_cast(A.numPointRows()) != - static_cast(B.extent(0)))) { + if ((X.extent(0) != B.extent(0)) || (static_cast(A.numPointCols()) != static_cast(X.extent(0))) || + (static_cast(A.numPointRows()) != static_cast(B.extent(0)))) { std::ostringstream os; os << "KokkosSparse::gmres: Dimensions do not match: " - << ", A: " << A.numRows() << " x " << A.numCols() - << ", x: " << X.extent(0) << ", b: " << B.extent(0); + << ", A: " << A.numRows() << " x " << A.numCols() << ", x: " << X.extent(0) << ", b: " << B.extent(0); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - using const_handle_type = - typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t>; + using const_handle_type = typename KokkosKernels::Experimental::KokkosKernelsHandle; const_handle_type tmp_handle(*handle); - using AMatrix_Bsr_Internal = KokkosSparse::Experimental::BsrMatrix< - typename AMatrix::const_value_type, typename AMatrix::const_ordinal_type, - typename AMatrix::device_type, Kokkos::MemoryTraits, - typename AMatrix::const_size_type>; + using AMatrix_Bsr_Internal = + KokkosSparse::Experimental::BsrMatrix, + typename AMatrix::const_size_type>; using AMatrix_Internal = std::conditional_t< KokkosSparse::is_crs_matrix::value, - KokkosSparse::CrsMatrix, + KokkosSparse::CrsMatrix, typename AMatrix::const_size_type>, AMatrix_Bsr_Internal>; - using B_Internal = Kokkos::View< - typename BType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename BType::device_type, - Kokkos::MemoryTraits >; + using B_Internal = + Kokkos::View::array_layout, typename BType::device_type, + Kokkos::MemoryTraits >; - using X_Internal = Kokkos::View< - typename XType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XType::device_type, - Kokkos::MemoryTraits >; + using X_Internal = + Kokkos::View::array_layout, typename XType::device_type, + Kokkos::MemoryTraits >; using Precond_Internal = Preconditioner; @@ -172,14 +150,10 @@ void gmres(KernelHandle* handle, AMatrix& A, BType& B, XType& X, Precond_Internal* precond_i = reinterpret_cast(precond); - KokkosSparse::Impl::GMRES::gmres(&tmp_handle, A_i, b_i, x_i, - precond_i); + KokkosSparse::Impl::GMRES::gmres(&tmp_handle, A_i, b_i, x_i, precond_i); } // gmres diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_gmres_handle.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_gmres_handle.hpp index 040e98bc0021..e0bf3c1c1a49 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_gmres_handle.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_gmres_handle.hpp @@ -33,8 +33,8 @@ namespace Experimental { * * For more info, see KokkosSparse_gmres.hpp doxygen */ -template +template class GMRESHandle { public: using HandleExecSpace = ExecutionSpace; @@ -62,15 +62,12 @@ class GMRESHandle { using nnz_value_view_t = typename Kokkos::View; - using nnz_value_view2d_t = - typename Kokkos::View; + using nnz_value_view2d_t = typename Kokkos::View; - using signed_integral_t = typename std::make_signed< - typename nnz_row_view_t::non_const_value_type>::type; + using signed_integral_t = typename std::make_signed::type; using signed_nnz_lno_view_t = - Kokkos::View; /** @@ -107,8 +104,7 @@ class GMRESHandle { public: // Use set methods to control ortho, and verbose - GMRESHandle(const size_type m_ = 50, const float_t tol_ = 1e-8, - const size_type max_restart_ = 50) + GMRESHandle(const size_type m_ = 50, const float_t tol_ = 1e-8, const size_type max_restart_ = 50) : m(m_), tol(tol_), max_restart(max_restart_), @@ -118,13 +114,11 @@ class GMRESHandle { end_rel_res(-1), conv_flag_val(NotRun) { if (m <= 0) { - throw std::invalid_argument( - "gmres: Please choose restart size m greater than zero."); + throw std::invalid_argument("gmres: Please choose restart size m greater than zero."); } } - void reset_handle(const size_type m_ = 50, const float_t tol_ = 1e-8, - const size_type max_restart_ = 50) { + void reset_handle(const size_type m_ = 50, const float_t tol_ = 1e-8, const size_type max_restart_ = 50) { set_m(m_); set_tol(tol_); set_max_restart(max_restart_); @@ -148,9 +142,7 @@ class GMRESHandle { size_type get_max_restart() const { return max_restart; } KOKKOS_INLINE_FUNCTION - void set_max_restart(const size_type max_restart_) { - this->max_restart = max_restart_; - } + void set_max_restart(const size_type max_restart_) { this->max_restart = max_restart_; } KOKKOS_INLINE_FUNCTION float_t get_tol() const { return tol; } diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_mdf.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_mdf.hpp index ee8139d6ac6a..a3785c08c1c2 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_mdf.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_mdf.hpp @@ -47,30 +47,24 @@ void mdf_symbolic(const crs_matrix_type& A, MDF_handle& handle) { // allocate L and U size_type nnzL = 0, nnzU = 0; team_range_policy_type setupPolicy(A.numRows(), Kokkos::AUTO); - KokkosSparse::Impl::MDF_count_lower compute_nnzL( - A, handle.permutation, handle.permutation_inv); + KokkosSparse::Impl::MDF_count_lower compute_nnzL(A, handle.permutation, handle.permutation_inv); Kokkos::parallel_reduce(setupPolicy, compute_nnzL, nnzL); nnzU = A.nnz() - nnzL + A.numRows(); handle.allocate_data(nnzL, nnzU); if (handle.verbosity > 0) { - printf("MDF symbolic: nnzL = %d, nnzU = %d\n", static_cast(nnzL), - static_cast(nnzU)); + printf("MDF symbolic: nnzL = %d, nnzU = %d\n", static_cast(nnzL), static_cast(nnzU)); } return; } // mdf_symbolic template -void mdf_print_joined_view( - const view_t& dev_view, const char* sep, - ordinal_t max_count = Kokkos::ArithTraits::max()) { - const auto host_view = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), dev_view); - - max_count = max_count > (ordinal_t)host_view.extent(0) - ? (ordinal_t)host_view.extent(0) - : max_count; +void mdf_print_joined_view(const view_t& dev_view, const char* sep, + ordinal_t max_count = Kokkos::ArithTraits::max()) { + const auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), dev_view); + + max_count = max_count > (ordinal_t)host_view.extent(0) ? (ordinal_t)host_view.extent(0) : max_count; for (ordinal_t i = 0; i < max_count; ++i) { if (i) printf("%s", sep); printf("%g", static_cast(host_view[i])); @@ -79,22 +73,18 @@ void mdf_print_joined_view( template void mdf_numeric(const crs_matrix_type& A, MDF_handle& handle) { - using col_ind_type = typename crs_matrix_type::StaticCrsGraphType:: - entries_type::non_const_type; - using scalar_mag_type = - typename KokkosSparse::Impl::MDF_types::scalar_mag_type; - using values_mag_type = - typename KokkosSparse::Impl::MDF_types::values_mag_type; - using ordinal_type = typename crs_matrix_type::ordinal_type; - using value_mag_type = typename values_mag_type::value_type; - - using device_type = typename crs_matrix_type::device_type; - using execution_space = typename crs_matrix_type::execution_space; - using range_policy_type = Kokkos::RangePolicy; + using col_ind_type = typename crs_matrix_type::StaticCrsGraphType::entries_type::non_const_type; + using scalar_mag_type = typename KokkosSparse::Impl::MDF_types::scalar_mag_type; + using values_mag_type = typename KokkosSparse::Impl::MDF_types::values_mag_type; + using ordinal_type = typename crs_matrix_type::ordinal_type; + using value_mag_type = typename values_mag_type::value_type; + + using device_type = typename crs_matrix_type::device_type; + using execution_space = typename crs_matrix_type::execution_space; + using range_policy_type = Kokkos::RangePolicy; using team_range_policy_type = Kokkos::TeamPolicy; - using permutation_set_type = - Kokkos::UnorderedMap; + using permutation_set_type = Kokkos::UnorderedMap; // Numerical phase: // loop over rows @@ -103,7 +93,7 @@ void mdf_numeric(const crs_matrix_type& A, MDF_handle& handle) { // factorize pivot row of A const int verbosity_level = handle.verbosity; crs_matrix_type Atmp = crs_matrix_type("A fill", A); - crs_matrix_type At = KokkosSparse::Impl::transpose_matrix(A); + crs_matrix_type At = KokkosSparse::Impl::transpose_matrix(A); KokkosSparse::sort_crs_matrix(At); values_mag_type discarded_fill("discarded fill", A.numRows()); col_ind_type deficiency("deficiency", A.numRows()); @@ -114,30 +104,22 @@ void mdf_numeric(const crs_matrix_type& A, MDF_handle& handle) { Kokkos::deep_copy(deficiency, Kokkos::ArithTraits::max()); permutation_set_type permutation_set(A.numRows()); - KokkosSparse::Impl::MDF_discarded_fill_norm - MDF_df_norm(Atmp, At, 0, handle.permutation, permutation_set, - discarded_fill, deficiency, verbosity_level); - Kokkos::parallel_for( - "MDF: initial fill computation", - team_range_policy_type(Atmp.numRows(), Kokkos::AUTO, Kokkos::AUTO), - MDF_df_norm); + KokkosSparse::Impl::MDF_discarded_fill_norm MDF_df_norm( + Atmp, At, 0, handle.permutation, permutation_set, discarded_fill, deficiency, verbosity_level); + Kokkos::parallel_for("MDF: initial fill computation", + team_range_policy_type(Atmp.numRows(), Kokkos::AUTO, Kokkos::AUTO), MDF_df_norm); - for (ordinal_type factorization_step = 0; factorization_step < A.numRows(); - ++factorization_step) { + for (ordinal_type factorization_step = 0; factorization_step < A.numRows(); ++factorization_step) { if (verbosity_level > 0) { - printf("\n\nFactorization step %d\n", - static_cast(factorization_step)); + printf("\n\nFactorization step %d\n", static_cast(factorization_step)); } if (update_list_len > 0) { - team_range_policy_type updatePolicy(update_list_len, Kokkos::AUTO, - Kokkos::AUTO); - KokkosSparse::Impl::MDF_discarded_fill_norm - MDF_update_df_norm(Atmp, At, factorization_step, handle.permutation, - permutation_set, discarded_fill, deficiency, - verbosity_level, update_list); - Kokkos::parallel_for("MDF: updating fill norms", updatePolicy, - MDF_update_df_norm); + team_range_policy_type updatePolicy(update_list_len, Kokkos::AUTO, Kokkos::AUTO); + KokkosSparse::Impl::MDF_discarded_fill_norm MDF_update_df_norm( + Atmp, At, factorization_step, handle.permutation, permutation_set, discarded_fill, deficiency, + verbosity_level, update_list); + Kokkos::parallel_for("MDF: updating fill norms", updatePolicy, MDF_update_df_norm); } if (verbosity_level > 1) { @@ -155,10 +137,8 @@ void mdf_numeric(const crs_matrix_type& A, MDF_handle& handle) { { range_policy_type stepPolicy(factorization_step, Atmp.numRows()); KokkosSparse::Impl::MDF_select_row MDF_row_selector( - factorization_step, discarded_fill, deficiency, Atmp.graph.row_map, - handle.permutation); - Kokkos::parallel_reduce("MDF: select pivot", stepPolicy, MDF_row_selector, - selected_row_idx); + factorization_step, discarded_fill, deficiency, Atmp.graph.row_map, handle.permutation); + Kokkos::parallel_reduce("MDF: select pivot", stepPolicy, MDF_row_selector, selected_row_idx); } ordinal_type selected_row_len = 0; @@ -167,13 +147,12 @@ void mdf_numeric(const crs_matrix_type& A, MDF_handle& handle) { // provided by kokkos (https://github.com/kokkos/kokkos/issues/6259) team_range_policy_type updateListPolicy(1, Kokkos::AUTO); KokkosSparse::Impl::MDF_compute_list_length updateList( - Atmp, At, handle.row_mapL, handle.entriesL, handle.valuesL, - handle.row_mapU, handle.entriesU, handle.valuesU, handle.permutation, - handle.permutation_inv, permutation_set, discarded_fill, factored, - selected_row_idx, factorization_step, update_list, verbosity_level); + Atmp, At, handle.row_mapL, handle.entriesL, handle.valuesL, handle.row_mapU, handle.entriesU, handle.valuesU, + handle.permutation, handle.permutation_inv, permutation_set, discarded_fill, factored, selected_row_idx, + factorization_step, update_list, verbosity_level); update_list_len = 0; - Kokkos::parallel_reduce("MDF: compute update list", updateListPolicy, - updateList, update_list_len, selected_row_len); + Kokkos::parallel_reduce("MDF: compute update list", updateListPolicy, updateList, update_list_len, + selected_row_len); } if (verbosity_level > 1) { @@ -189,36 +168,25 @@ void mdf_numeric(const crs_matrix_type& A, MDF_handle& handle) { printf( " Selected row idx %d with length %d. Requires update of %d fill " "norms.\n", - static_cast(selected_row_idx), - static_cast(selected_row_len), - static_cast(update_list_len)); + static_cast(selected_row_idx), static_cast(selected_row_len), static_cast(update_list_len)); } // If this was the last row no need to update A and At! if (factorization_step < A.numRows() - 1) { - team_range_policy_type factorizePolicy(selected_row_len, Kokkos::AUTO, - Kokkos::AUTO); + team_range_policy_type factorizePolicy(selected_row_len, Kokkos::AUTO, Kokkos::AUTO); KokkosSparse::Impl::MDF_factorize_row factorize_row( - Atmp, At, handle.row_mapL, handle.entriesL, handle.valuesL, - handle.row_mapU, handle.entriesU, handle.valuesU, handle.permutation, - handle.permutation_inv, permutation_set, discarded_fill, factored, - selected_row_idx, factorization_step, update_list, verbosity_level); - Kokkos::parallel_for("MDF: factorize row", factorizePolicy, - factorize_row); + Atmp, At, handle.row_mapL, handle.entriesL, handle.valuesL, handle.row_mapU, handle.entriesU, handle.valuesU, + handle.permutation, handle.permutation_inv, permutation_set, discarded_fill, factored, selected_row_idx, + factorization_step, update_list, verbosity_level); + Kokkos::parallel_for("MDF: factorize row", factorizePolicy, factorize_row); } } // Loop over factorization steps - KokkosSparse::Impl::MDF_reindex_matrix reindex_U( - handle.permutation_inv, handle.entriesU); - Kokkos::parallel_for("MDF: re-index U", - range_policy_type(0, handle.entriesU.extent(0)), - reindex_U); - - KokkosSparse::Impl::MDF_reindex_matrix reindex_L( - handle.permutation_inv, handle.entriesL); - Kokkos::parallel_for("MDF: re-index L", - range_policy_type(0, handle.entriesL.extent(0)), - reindex_L); + KokkosSparse::Impl::MDF_reindex_matrix reindex_U(handle.permutation_inv, handle.entriesU); + Kokkos::parallel_for("MDF: re-index U", range_policy_type(0, handle.entriesU.extent(0)), reindex_U); + + KokkosSparse::Impl::MDF_reindex_matrix reindex_L(handle.permutation_inv, handle.entriesL); + Kokkos::parallel_for("MDF: re-index L", range_policy_type(0, handle.entriesL.extent(0)), reindex_L); handle.L = KokkosSparse::Impl::transpose_matrix(handle.L); diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_mdf_handle.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_mdf_handle.hpp index c6005bee12a3..3a41c7942a02 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_mdf_handle.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_mdf_handle.hpp @@ -37,13 +37,11 @@ template struct MDF_handle { using crs_matrix_type = matrix_type; using execution_space = typename matrix_type::execution_space; - using row_map_type = typename crs_matrix_type::StaticCrsGraphType:: - row_map_type::non_const_type; - using col_ind_type = typename crs_matrix_type::StaticCrsGraphType:: - entries_type::non_const_type; - using values_type = typename crs_matrix_type::values_type::non_const_type; - using size_type = typename crs_matrix_type::size_type; - using ordinal_type = typename crs_matrix_type::ordinal_type; + using row_map_type = typename crs_matrix_type::StaticCrsGraphType::row_map_type::non_const_type; + using col_ind_type = typename crs_matrix_type::StaticCrsGraphType::entries_type::non_const_type; + using values_type = typename crs_matrix_type::values_type::non_const_type; + using size_type = typename crs_matrix_type::size_type; + using ordinal_type = typename crs_matrix_type::ordinal_type; ordinal_type numRows; @@ -76,16 +74,14 @@ struct MDF_handle { entriesL = col_ind_type("entries L", nnzL); valuesL = values_type("values L", nnzL); - L = crs_matrix_type("L", numRows, numRows, nnzL, valuesL, row_mapL, - entriesL); + L = crs_matrix_type("L", numRows, numRows, nnzL, valuesL, row_mapL, entriesL); // Allocate U row_mapU = row_map_type("row map U", numRows + 1); entriesU = col_ind_type("entries U", nnzU); valuesU = values_type("values U", nnzU); - U = crs_matrix_type("U", numRows, numRows, nnzU, valuesU, row_mapU, - entriesU); + U = crs_matrix_type("U", numRows, numRows, nnzU, valuesU, row_mapU, entriesU); } col_ind_type get_permutation() { return permutation; } diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_par_ilut.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_par_ilut.hpp index 8ded6209ec89..b4e1afb430e2 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_par_ilut.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_par_ilut.hpp @@ -44,9 +44,9 @@ namespace KokkosSparse { namespace Experimental { -#define KOKKOSKERNELS_PAR_ILUT_SAME_TYPE(A, B) \ - std::is_same::type, \ - typename std::remove_const::type>::value +// Two types are the same (ignoring const) +template +constexpr bool parilut_same_type = std::is_same_v, typename std::remove_const_t>; /// @brief Performs the symbolic phase of par_ilut. /// This is a non-blocking function. @@ -70,81 +70,62 @@ namespace Experimental { /// (numRows+1) (Output) /// @param U_rowmap The row map for the U CSR, should already be sized correctly /// (numRows+1) (Output) -template -void par_ilut_symbolic(KernelHandle* handle, ARowMapType& A_rowmap, - AEntriesType& A_entries, LRowMapType& L_rowmap, +template +void par_ilut_symbolic(KernelHandle* handle, ARowMapType& A_rowmap, AEntriesType& A_entries, LRowMapType& L_rowmap, URowMapType& U_rowmap) { using size_type = typename KernelHandle::size_type; using ordinal_type = typename KernelHandle::nnz_lno_t; - static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( - typename ARowMapType::non_const_value_type, size_type), + static_assert(parilut_same_type, "par_ilut_symbolic: A size_type must match KernelHandle " "size_type (const doesn't matter)"); - static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( - typename AEntriesType::non_const_value_type, ordinal_type), + static_assert(parilut_same_type, "par_ilut_symbolic: A entry type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( - typename LRowMapType::non_const_value_type, size_type), + static_assert(parilut_same_type, "par_ilut_symbolic: L size_type must match KernelHandle " "size_type (const doesn't matter)"); - static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( - typename URowMapType::non_const_value_type, size_type), + static_assert(parilut_same_type, "par_ilut_symbolic: U size_type must match KernelHandle " "size_type (const doesn't matter)"); - static_assert(Kokkos::is_view::value, - "par_ilut_symbolic: A_rowmap is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "par_ilut_symbolic: A_entries is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "par_ilut_symbolic: L_rowmap is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "par_ilut_symbolic: U_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "par_ilut_symbolic: A_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "par_ilut_symbolic: A_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "par_ilut_symbolic: L_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "par_ilut_symbolic: U_rowmap is not a Kokkos::View."); - static_assert( - (int)LRowMapType::rank == (int)ARowMapType::rank, - "par_ilut_symbolic: The ranks of L_rowmap and A_rowmap do not match."); + static_assert((int)LRowMapType::rank == (int)ARowMapType::rank, + "par_ilut_symbolic: The ranks of L_rowmap and A_rowmap do not match."); - static_assert( - (int)LRowMapType::rank == (int)URowMapType::rank, - "par_ilut_symbolic: The ranks of L_rowmap and U_rowmap do not match."); + static_assert((int)LRowMapType::rank == (int)URowMapType::rank, + "par_ilut_symbolic: The ranks of L_rowmap and U_rowmap do not match."); static_assert(LRowMapType::rank == 1, "par_ilut_symbolic: A_rowmap, L_rowmap and U_rowmap must all " "have rank 1."); - static_assert(std::is_same::value, + static_assert(std::is_same_v, "par_ilut_symbolic: The output L_rowmap must be nonconst."); - static_assert(std::is_same::value, + static_assert(std::is_same_v, "par_ilut_symbolic: The output U_rowmap must be nonconst."); - static_assert(std::is_same::value, + static_assert(std::is_same_v, "par_ilut_symbolic: Views LRowMapType and ARowMapType have " "different device_types."); - static_assert(std::is_same::value, + static_assert(std::is_same_v, "par_ilut_symbolic: Views LRowMapType and URowMapType have " "different device_types."); - static_assert( - std::is_same< - typename LRowMapType::device_type::execution_space, - typename KernelHandle::PAR_ILUTHandleType::execution_space>::value, - "par_ilut_symbolic: KernelHandle and Views have different execution " - "spaces."); + static_assert(std::is_same_v, + "par_ilut_symbolic: KernelHandle and Views have different execution " + "spaces."); if (A_rowmap.extent(0) != 0) { - KK_REQUIRE_MSG(A_rowmap.extent(0) == L_rowmap.extent(0), - "L row map size does not match A row map"); - KK_REQUIRE_MSG(A_rowmap.extent(0) == U_rowmap.extent(0), - "U row map size does not match A row map"); + KK_REQUIRE_MSG(A_rowmap.extent(0) == L_rowmap.extent(0), "L row map size does not match A row map"); + KK_REQUIRE_MSG(A_rowmap.extent(0) == U_rowmap.extent(0), "U row map size does not match A row map"); } using c_size_t = typename KernelHandle::const_size_type; @@ -155,46 +136,39 @@ void par_ilut_symbolic(KernelHandle* handle, ARowMapType& A_rowmap, using c_temp_t = typename KernelHandle::HandleTempMemorySpace; using c_persist_t = typename KernelHandle::HandlePersistentMemorySpace; - using const_handle_type = - typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t>; + using const_handle_type = typename KokkosKernels::Experimental::KokkosKernelsHandle; const_handle_type tmp_handle(*handle); - using ARowMap_Internal = Kokkos::View< - typename ARowMapType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename ARowMapType::device_type, - Kokkos::MemoryTraits >; - - using AEntries_Internal = Kokkos::View< - typename AEntriesType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout< - AEntriesType>::array_layout, - typename AEntriesType::device_type, - Kokkos::MemoryTraits >; - - using LRowMap_Internal = Kokkos::View< - typename LRowMapType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename LRowMapType::device_type, - Kokkos::MemoryTraits >; - - using URowMap_Internal = Kokkos::View< - typename URowMapType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename URowMapType::device_type, - Kokkos::MemoryTraits >; + using ARowMap_Internal = + Kokkos::View::array_layout, + typename ARowMapType::device_type, Kokkos::MemoryTraits>; + + using AEntries_Internal = + Kokkos::View::array_layout, + typename AEntriesType::device_type, Kokkos::MemoryTraits>; + + using LRowMap_Internal = + Kokkos::View::array_layout, + typename LRowMapType::device_type, Kokkos::MemoryTraits>; + + using URowMap_Internal = + Kokkos::View::array_layout, + typename URowMapType::device_type, Kokkos::MemoryTraits>; ARowMap_Internal A_rowmap_i = A_rowmap; AEntries_Internal A_entries_i = A_entries; LRowMap_Internal L_rowmap_i = L_rowmap; URowMap_Internal U_rowmap_i = U_rowmap; - KokkosSparse::Impl::PAR_ILUT_SYMBOLIC< - const_handle_type, ARowMap_Internal, AEntries_Internal, LRowMap_Internal, - URowMap_Internal>::par_ilut_symbolic(&tmp_handle, A_rowmap_i, A_entries_i, - L_rowmap_i, U_rowmap_i); + KokkosSparse::Impl::PAR_ILUT_SYMBOLIC::par_ilut_symbolic(&tmp_handle, A_rowmap_i, A_entries_i, + L_rowmap_i, U_rowmap_i); } // par_ilut_symbolic @@ -226,99 +200,69 @@ void par_ilut_symbolic(KernelHandle* handle, ARowMapType& A_rowmap, /// @param U_rowmap The row map (row nnz offsets) for the U CSR (Input/Output) /// @param U_entries The entries (column ids) for the U CSR (Output) /// @param U_values The values (non-zero matrix values) for the U CSR (Output) -template -void par_ilut_numeric(KernelHandle* handle, ARowMapType& A_rowmap, - AEntriesType& A_entries, AValuesType& A_values, - LRowMapType& L_rowmap, LEntriesType& L_entries, - LValuesType& L_values, URowMapType& U_rowmap, +template +void par_ilut_numeric(KernelHandle* handle, ARowMapType& A_rowmap, AEntriesType& A_entries, AValuesType& A_values, + LRowMapType& L_rowmap, LEntriesType& L_entries, LValuesType& L_values, URowMapType& U_rowmap, UEntriesType& U_entries, UValuesType& U_values) { using size_type = typename KernelHandle::size_type; using ordinal_type = typename KernelHandle::nnz_lno_t; using scalar_type = typename KernelHandle::nnz_scalar_t; - static_assert( - KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( - typename ARowMapType::non_const_value_type, size_type), - "par_ilut_numeric: A size_type must match KernelHandle size_type " - "(const doesn't matter)"); - static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( - typename AEntriesType::non_const_value_type, ordinal_type), + static_assert(parilut_same_type, + "par_ilut_numeric: A size_type must match KernelHandle size_type " + "(const doesn't matter)"); + static_assert(parilut_same_type, "par_ilut_numeric: A entry type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( - typename AValuesType::value_type, scalar_type), + static_assert(parilut_same_type, "par_ilut_numeric: A scalar type must match KernelHandle entry " "type (aka nnz_scalar_t, and const doesn't matter)"); - static_assert( - KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( - typename LRowMapType::non_const_value_type, size_type), - "par_ilut_numeric: L size_type must match KernelHandle size_type " - "(const doesn't matter)"); - static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( - typename LEntriesType::non_const_value_type, ordinal_type), + static_assert(parilut_same_type, + "par_ilut_numeric: L size_type must match KernelHandle size_type " + "(const doesn't matter)"); + static_assert(parilut_same_type, "par_ilut_numeric: L entry type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( - typename LValuesType::value_type, scalar_type), + static_assert(parilut_same_type, "par_ilut_numeric: L scalar type must match KernelHandle entry " "type (aka nnz_scalar_t, and const doesn't matter)"); - static_assert( - KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( - typename URowMapType::non_const_value_type, size_type), - "par_ilut_numeric: U size_type must match KernelHandle size_type " - "(const doesn't matter)"); - static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( - typename UEntriesType::non_const_value_type, ordinal_type), + static_assert(parilut_same_type, + "par_ilut_numeric: U size_type must match KernelHandle size_type " + "(const doesn't matter)"); + static_assert(parilut_same_type, "par_ilut_numeric: U entry type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( - typename UValuesType::value_type, scalar_type), + static_assert(parilut_same_type, "par_ilut_numeric: U scalar type must match KernelHandle entry " "type (aka nnz_scalar_t, and const doesn't matter)"); - static_assert(Kokkos::is_view::value, - "par_ilut_numeric: A_rowmap is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "par_ilut_numeric: A_entries is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "par_ilut_numeric: A_values is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "par_ilut_numeric: L_rowmap is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "par_ilut_numeric: L_entries is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "par_ilut_numeric: L_values is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "par_ilut_numeric: U_rowmap is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "par_ilut_numeric: U_entries is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "par_ilut_numeric: U_values is not a Kokkos::View."); - - static_assert( - (int)LRowMapType::rank == (int)ARowMapType::rank, - "par_ilut_numeric: The ranks of L_rowmap and A_rowmap do not match."); - static_assert( - (int)LEntriesType::rank == (int)AEntriesType::rank, - "par_ilut_numeric: The ranks of L_entries and A_entries do not match."); - static_assert( - (int)LValuesType::rank == (int)AValuesType::rank, - "par_ilut_numeric: The ranks of L_values and A_values do not match."); - - static_assert( - (int)LRowMapType::rank == (int)URowMapType::rank, - "par_ilut_numeric: The ranks of L_rowmap and U_rowmap do not match."); - static_assert( - (int)LEntriesType::rank == (int)UEntriesType::rank, - "par_ilut_numeric: The ranks of L_entries and U_entries do not match."); - static_assert( - (int)LValuesType::rank == (int)UValuesType::rank, - "par_ilut_numeric: The ranks of L_values and U_values do not match."); + static_assert(Kokkos::is_view::value, "par_ilut_numeric: A_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "par_ilut_numeric: A_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "par_ilut_numeric: A_values is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "par_ilut_numeric: L_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "par_ilut_numeric: L_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "par_ilut_numeric: L_values is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "par_ilut_numeric: U_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "par_ilut_numeric: U_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "par_ilut_numeric: U_values is not a Kokkos::View."); + + static_assert((int)LRowMapType::rank == (int)ARowMapType::rank, + "par_ilut_numeric: The ranks of L_rowmap and A_rowmap do not match."); + static_assert((int)LEntriesType::rank == (int)AEntriesType::rank, + "par_ilut_numeric: The ranks of L_entries and A_entries do not match."); + static_assert((int)LValuesType::rank == (int)AValuesType::rank, + "par_ilut_numeric: The ranks of L_values and A_values do not match."); + + static_assert((int)LRowMapType::rank == (int)URowMapType::rank, + "par_ilut_numeric: The ranks of L_rowmap and U_rowmap do not match."); + static_assert((int)LEntriesType::rank == (int)UEntriesType::rank, + "par_ilut_numeric: The ranks of L_entries and U_entries do not match."); + static_assert((int)LValuesType::rank == (int)UValuesType::rank, + "par_ilut_numeric: The ranks of L_values and U_values do not match."); static_assert(LRowMapType::rank == 1, "par_ilut_numeric: A_rowmap, L_rowmap and U_rowmap must all " @@ -330,74 +274,52 @@ void par_ilut_numeric(KernelHandle* handle, ARowMapType& A_rowmap, "par_ilut_numeric: A_values, L_values and U_values must all " "have rank 1."); - static_assert( - std::is_same::value, - "par_ilut_numeric: The output L_entries must be nonconst."); - static_assert(std::is_same::value, + static_assert(std::is_same_v, + "par_ilut_numeric: The output L_entries must be nonconst."); + static_assert(std::is_same_v, "par_ilut_numeric: The output L_values must be nonconst."); - static_assert( - std::is_same::value, - "par_ilut_numeric: The output U_entries must be nonconst."); - static_assert(std::is_same::value, + static_assert(std::is_same_v, + "par_ilut_numeric: The output U_entries must be nonconst."); + static_assert(std::is_same_v, "par_ilut_numeric: The output U_values must be nonconst."); - static_assert(std::is_same::value, + static_assert(std::is_same_v, "par_ilut_numeric: Views LRowMapType and ARowMapType have " "different device_types."); - static_assert(std::is_same::value, + static_assert(std::is_same_v, "par_ilut_numeric: Views LEntriesType and AEntriesType have " "different device_types."); - static_assert(std::is_same::value, + static_assert(std::is_same_v, "par_ilut_numeric: Views LValuesType and AValuesType have " "different device_types."); - static_assert(std::is_same::value, + static_assert(std::is_same_v, "par_ilut_numeric: Views LRowMapType and URowMapType have " "different device_types."); - static_assert(std::is_same::value, + static_assert(std::is_same_v, "par_ilut_numeric: Views LEntriesType and UEntriesType have " "different device_types."); - static_assert(std::is_same::value, + static_assert(std::is_same_v, "par_ilut_numeric: Views LValuesType and UValuesType have " "different device_types."); - static_assert( - std::is_same< - typename LRowMapType::device_type::execution_space, - typename KernelHandle::PAR_ILUTHandleType::execution_space>::value, - "par_ilut_numeric: KernelHandle and Views have different execution " - "spaces."); - static_assert( - std::is_same< - typename LEntriesType::device_type::execution_space, - typename KernelHandle::PAR_ILUTHandleType::execution_space>::value, - "par_ilut_numeric: KernelHandle and Views have different execution " - "spaces."); - static_assert( - std::is_same< - typename LValuesType::device_type::execution_space, - typename KernelHandle::PAR_ILUTHandleType::execution_space>::value, - "par_ilut_numeric: KernelHandle and Views have different execution " - "spaces."); - - static_assert( - std::is_same::value, - "par_ilut_numeric: rowmap and entries have different device types."); - static_assert( - std::is_same::value, - "par_ilut_numeric: rowmap and values have different device types."); + static_assert(std::is_same_v, + "par_ilut_numeric: KernelHandle and Views have different execution " + "spaces."); + static_assert(std::is_same_v, + "par_ilut_numeric: KernelHandle and Views have different execution " + "spaces."); + static_assert(std::is_same_v, + "par_ilut_numeric: KernelHandle and Views have different execution " + "spaces."); + + static_assert(std::is_same_v, + "par_ilut_numeric: rowmap and entries have different device types."); + static_assert(std::is_same_v, + "par_ilut_numeric: rowmap and values have different device types."); // Check if symbolic has been called if (handle->get_par_ilut_handle()->is_symbolic_complete() == false) { @@ -408,10 +330,8 @@ void par_ilut_numeric(KernelHandle* handle, ARowMapType& A_rowmap, KokkosKernels::Impl::throw_runtime_exception(os.str()); } - KK_REQUIRE_MSG(KokkosSparse::Impl::isCrsGraphSorted(L_rowmap, L_entries), - "L is not sorted"); - KK_REQUIRE_MSG(KokkosSparse::Impl::isCrsGraphSorted(U_rowmap, U_entries), - "U is not sorted"); + KK_REQUIRE_MSG(KokkosSparse::Impl::isCrsGraphSorted(L_rowmap, L_entries), "L is not sorted"); + KK_REQUIRE_MSG(KokkosSparse::Impl::isCrsGraphSorted(U_rowmap, U_entries), "U is not sorted"); using c_size_t = typename KernelHandle::const_size_type; using c_lno_t = typename KernelHandle::const_nnz_lno_t; @@ -421,68 +341,53 @@ void par_ilut_numeric(KernelHandle* handle, ARowMapType& A_rowmap, using c_temp_t = typename KernelHandle::HandleTempMemorySpace; using c_persist_t = typename KernelHandle::HandlePersistentMemorySpace; - using const_handle_type = - typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t>; + using const_handle_type = typename KokkosKernels::Experimental::KokkosKernelsHandle; const_handle_type tmp_handle(*handle); - using ARowMap_Internal = Kokkos::View< - typename ARowMapType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename ARowMapType::device_type, - Kokkos::MemoryTraits >; - - using AEntries_Internal = Kokkos::View< - typename AEntriesType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout< - AEntriesType>::array_layout, - typename AEntriesType::device_type, - Kokkos::MemoryTraits >; - - using AValues_Internal = Kokkos::View< - typename AValuesType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename AValuesType::device_type, - Kokkos::MemoryTraits >; - - using LRowMap_Internal = Kokkos::View< - typename LRowMapType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename LRowMapType::device_type, - Kokkos::MemoryTraits >; + using ARowMap_Internal = + Kokkos::View::array_layout, + typename ARowMapType::device_type, Kokkos::MemoryTraits>; + + using AEntries_Internal = + Kokkos::View::array_layout, + typename AEntriesType::device_type, Kokkos::MemoryTraits>; + + using AValues_Internal = + Kokkos::View::array_layout, + typename AValuesType::device_type, Kokkos::MemoryTraits>; + + using LRowMap_Internal = + Kokkos::View::array_layout, + typename LRowMapType::device_type, Kokkos::MemoryTraits>; using LEntries_Internal = Kokkos::View::array_layout, - typename LEntriesType::device_type, - Kokkos::MemoryTraits >; - - using LValues_Internal = Kokkos::View< - typename LValuesType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename LValuesType::device_type, - Kokkos::MemoryTraits >; - - using URowMap_Internal = Kokkos::View< - typename URowMapType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename URowMapType::device_type, - Kokkos::MemoryTraits >; + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename LEntriesType::device_type, Kokkos::MemoryTraits>; + + using LValues_Internal = Kokkos::View::array_layout, + typename LValuesType::device_type, Kokkos::MemoryTraits>; + + using URowMap_Internal = + Kokkos::View::array_layout, + typename URowMapType::device_type, Kokkos::MemoryTraits>; using UEntries_Internal = Kokkos::View::array_layout, - typename UEntriesType::device_type, - Kokkos::MemoryTraits >; + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename UEntriesType::device_type, Kokkos::MemoryTraits>; - using UValues_Internal = Kokkos::View< - typename UValuesType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename UValuesType::device_type, - Kokkos::MemoryTraits >; + using UValues_Internal = Kokkos::View::array_layout, + typename UValuesType::device_type, Kokkos::MemoryTraits>; ARowMap_Internal A_rowmap_i = A_rowmap; AEntries_Internal A_entries_i = A_entries; @@ -494,19 +399,13 @@ void par_ilut_numeric(KernelHandle* handle, ARowMapType& A_rowmap, UEntries_Internal U_entries_i = U_entries; UValues_Internal U_values_i = U_values; - KokkosSparse::Impl::PAR_ILUT_NUMERIC< - const_handle_type, ARowMap_Internal, AEntries_Internal, AValues_Internal, - LRowMap_Internal, LEntries_Internal, LValues_Internal, URowMap_Internal, - UEntries_Internal, UValues_Internal>::par_ilut_numeric(&tmp_handle, - A_rowmap_i, - A_entries_i, - A_values_i, - L_rowmap_i, - L_entries_i, - L_values_i, - U_rowmap_i, - U_entries_i, - U_values_i); + KokkosSparse::Impl::PAR_ILUT_NUMERIC::par_ilut_numeric(&tmp_handle, A_rowmap_i, + A_entries_i, A_values_i, + L_rowmap_i, L_entries_i, + L_values_i, U_rowmap_i, + U_entries_i, U_values_i); // These may have been resized L_entries = L_entries_i; @@ -519,6 +418,4 @@ void par_ilut_numeric(KernelHandle* handle, ARowMapType& A_rowmap, } // namespace Experimental } // namespace KokkosSparse -#undef KOKKOSKERNELS_PAR_ILUT_SAME_TYPE - #endif // KOKKOSSPARSE_PAR_ILUT_HPP_ diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_par_ilut_handle.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_par_ilut_handle.hpp index 5ea4b3c436d6..5f4730241d94 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_par_ilut_handle.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_par_ilut_handle.hpp @@ -30,8 +30,8 @@ namespace Experimental { * * For more info, see KokkosSparse_par_ilut.hpp doxygen */ -template +template class PAR_ILUTHandle { public: using HandleExecSpace = ExecutionSpace; @@ -54,41 +54,36 @@ class PAR_ILUTHandle { using float_t = typename Kokkos::ArithTraits::mag_type; - using nnz_row_view_t = - typename Kokkos::View; + using nnz_row_view_t = typename Kokkos::View; - using nnz_lno_view_t = - typename Kokkos::View; + using nnz_lno_view_t = typename Kokkos::View; - using nnz_value_view_t = - typename Kokkos::View; + using nnz_value_view_t = typename Kokkos::View; - using signed_integral_t = typename std::make_signed< - typename nnz_row_view_t::non_const_value_type>::type; + using signed_integral_t = typename std::make_signed::type; using signed_nnz_lno_view_t = - Kokkos::View; private: // User inputs - size_type max_iter; /// Hard cap on the number of par_ilut iterations + size_type max_iter; /// Hard cap on the number of par_ilut iterations float_t residual_norm_delta_stop; /// When the change in residual from /// iteration to iteration drops below /// this, the algorithm will stop (even if /// max_iters has not been hit) float_t fill_in_limit; /// The threshold for removing candidates - /// from the intermediate L and U is set such - /// that the resulting sparsity pattern has - /// at most `fill_in_limit` times the number - /// of non-zeros of the ILU(0) - /// factorization. This selection is executed - /// separately for both factors L and U. - bool async_update; /// Whether compute LU factors should do asychronous - /// updates. When ON, the algorithm will usually converge - /// faster but it makes the algorithm non-deterministic. - bool verbose; /// Print information while executing par_ilut + /// from the intermediate L and U is set such + /// that the resulting sparsity pattern has + /// at most `fill_in_limit` times the number + /// of non-zeros of the ILU(0) + /// factorization. This selection is executed + /// separately for both factors L and U. + bool async_update; /// Whether compute LU factors should do asychronous + /// updates. When ON, the algorithm will usually converge + /// faster but it makes the algorithm non-deterministic. + bool verbose; /// Print information while executing par_ilut // Stored by parent KokkosKernelsHandle int team_size; /// Kokkos team size. Set by the parent handle. -1 implies @@ -96,26 +91,23 @@ class PAR_ILUTHandle { int vector_size; /// Kokkos vector size. Set by the parent handle. // Stored by symbolic phase - size_type - nrows; /// Number of rows in the CSRs given to the symbolic par_ilut - size_type nnzL; /// Number of non-zero entries in the L part of A in the CSRs - /// given to the symbolic par_ilut - size_type nnzU; /// Number of non-zero entries in the U part of A in the CSRs - /// given to the symbolic par_ilut + size_type nrows; /// Number of rows in the CSRs given to the symbolic par_ilut + size_type nnzL; /// Number of non-zero entries in the L part of A in the CSRs + /// given to the symbolic par_ilut + size_type nnzU; /// Number of non-zero entries in the U part of A in the CSRs + /// given to the symbolic par_ilut bool symbolic_complete; /// Whether symbolic par_ilut has been called // Outputs - int num_iters; /// The number of iterations par_ilut took to finish + int num_iters; /// The number of iterations par_ilut took to finish nnz_scalar_t end_rel_res; /// The A - LU residual norm at the time the /// algorithm finished public: // See KokkosKernelsHandle::create_par_ilut_handle for default user input // values - PAR_ILUTHandle(const size_type max_iter_, - const float_t residual_norm_delta_stop_, - const float_t fill_in_limit_, const bool async_update_, - const bool verbose_) + PAR_ILUTHandle(const size_type max_iter_, const float_t residual_norm_delta_stop_, const float_t fill_in_limit_, + const bool async_update_, const bool verbose_) : max_iter(max_iter_), residual_norm_delta_stop(residual_norm_delta_stop_), fill_in_limit(fill_in_limit_), @@ -168,13 +160,9 @@ class PAR_ILUTHandle { void set_residual_norm_delta_stop(const float_t residual_norm_delta_stop_) { this->residual_norm_delta_stop = residual_norm_delta_stop_; } - float_t get_residual_norm_delta_stop() const { - return this->residual_norm_delta_stop; - } + float_t get_residual_norm_delta_stop() const { return this->residual_norm_delta_stop; } - void set_fill_in_limit(const float_t fill_in_limit_) { - this->fill_in_limit = fill_in_limit_; - } + void set_fill_in_limit(const float_t fill_in_limit_) { this->fill_in_limit = fill_in_limit_; } float_t get_fill_in_limit() const { return this->fill_in_limit; } bool get_verbose() const { return verbose; } @@ -183,9 +171,7 @@ class PAR_ILUTHandle { bool get_async_update() const { return async_update; } - void set_async_update(const bool async_update_) { - this->async_update = async_update_; - } + void set_async_update(const bool async_update_) { this->async_update = async_update_; } TeamPolicy get_default_team_policy() const { if (team_size == -1) { diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd.hpp index 127400c752c5..c82667c85a02 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd.hpp @@ -28,17 +28,14 @@ namespace Experimental { // Symbolic: count entries in each row in C to produce rowmap // kernel handle has information about whether it is sorted add or not. -template -void spadd_symbolic( - const ExecSpace &exec, KernelHandle *handle, - typename KernelHandle::const_nnz_lno_t m, // same type as column indices - typename KernelHandle::const_nnz_lno_t n, const alno_row_view_t_ a_rowmap, - const alno_nnz_view_t_ a_entries, const blno_row_view_t_ b_rowmap, - const blno_nnz_view_t_ b_entries, - clno_row_view_t_ c_rowmap) // c_rowmap must already be allocated (doesn't - // need to be initialized) +template +void spadd_symbolic(const ExecSpace &exec, KernelHandle *handle, + typename KernelHandle::const_nnz_lno_t m, // same type as column indices + typename KernelHandle::const_nnz_lno_t n, const alno_row_view_t_ a_rowmap, + const alno_nnz_view_t_ a_entries, const blno_row_view_t_ b_rowmap, const blno_nnz_view_t_ b_entries, + clno_row_view_t_ c_rowmap) // c_rowmap must already be allocated (doesn't + // need to be initialized) { typedef typename KernelHandle::HandleTempMemorySpace MemSpace; typedef typename KernelHandle::HandlePersistentMemorySpace PersistentMemSpace; @@ -48,61 +45,52 @@ void spadd_symbolic( typedef typename KernelHandle::const_nnz_lno_t c_lno_t; typedef typename KernelHandle::const_nnz_scalar_t c_scalar_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, ExecSpace, MemSpace, PersistentMemSpace> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle ConstKernelHandle; ConstKernelHandle tmp_handle(*handle); typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits> + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits> Internal_a_rowmap; typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits> + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits> Internal_a_entries; typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits> + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits> Internal_b_rowmap; typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits> + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits> Internal_b_entries; typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits> + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits> Internal_c_rowmap; auto addHandle = handle->get_spadd_handle(); bool useFallback = !addHandle->is_input_strict_crs(); if (useFallback) { - KokkosSparse::Impl::SPADD_SYMBOLIC< - ExecSpace, ConstKernelHandle, Internal_a_rowmap, Internal_a_entries, - Internal_b_rowmap, Internal_b_entries, Internal_c_rowmap, false>:: - spadd_symbolic( - exec, &tmp_handle, m, n, - Internal_a_rowmap(a_rowmap.data(), a_rowmap.extent(0)), - Internal_a_entries(a_entries.data(), a_entries.extent(0)), - Internal_b_rowmap(b_rowmap.data(), b_rowmap.extent(0)), - Internal_b_entries(b_entries.data(), b_entries.extent(0)), - Internal_c_rowmap(c_rowmap.data(), c_rowmap.extent(0))); + KokkosSparse::Impl::SPADD_SYMBOLIC::spadd_symbolic(exec, &tmp_handle, m, n, + Internal_a_rowmap(a_rowmap.data(), a_rowmap.extent(0)), + Internal_a_entries(a_entries.data(), a_entries.extent(0)), + Internal_b_rowmap(b_rowmap.data(), b_rowmap.extent(0)), + Internal_b_entries(b_entries.data(), b_entries.extent(0)), + Internal_c_rowmap(c_rowmap.data(), c_rowmap.extent(0))); } else { KokkosSparse::Impl::SPADD_SYMBOLIC< - ExecSpace, ConstKernelHandle, Internal_a_rowmap, Internal_a_entries, - Internal_b_rowmap, Internal_b_entries, Internal_c_rowmap>:: - spadd_symbolic( - exec, &tmp_handle, m, n, - Internal_a_rowmap(a_rowmap.data(), a_rowmap.extent(0)), - Internal_a_entries(a_entries.data(), a_entries.extent(0)), - Internal_b_rowmap(b_rowmap.data(), b_rowmap.extent(0)), - Internal_b_entries(b_entries.data(), b_entries.extent(0)), - Internal_c_rowmap(c_rowmap.data(), c_rowmap.extent(0))); + ExecSpace, ConstKernelHandle, Internal_a_rowmap, Internal_a_entries, Internal_b_rowmap, Internal_b_entries, + Internal_c_rowmap>::spadd_symbolic(exec, &tmp_handle, m, n, + Internal_a_rowmap(a_rowmap.data(), a_rowmap.extent(0)), + Internal_a_entries(a_entries.data(), a_entries.extent(0)), + Internal_b_rowmap(b_rowmap.data(), b_rowmap.extent(0)), + Internal_b_entries(b_entries.data(), b_entries.extent(0)), + Internal_c_rowmap(c_rowmap.data(), c_rowmap.extent(0))); } } @@ -112,23 +100,16 @@ void spadd_symbolic(KernelHandle *handle, Args... args) { spadd_symbolic(typename KernelHandle::HandleExecSpace{}, handle, args...); } -template -void spadd_numeric(const ExecSpace &exec, KernelHandle *handle, - typename KernelHandle::const_nnz_lno_t m, - typename KernelHandle::const_nnz_lno_t n, - const alno_row_view_t_ a_rowmap, - const alno_nnz_view_t_ a_entries, - const ascalar_nnz_view_t_ a_values, const ascalar_t_ alpha, - const blno_row_view_t_ b_rowmap, - const blno_nnz_view_t_ b_entries, - const bscalar_nnz_view_t_ b_values, const bscalar_t_ beta, - const clno_row_view_t_ c_rowmap, clno_nnz_view_t_ c_entries, - cscalar_nnz_view_t_ c_values) { +template +void spadd_numeric(const ExecSpace &exec, KernelHandle *handle, typename KernelHandle::const_nnz_lno_t m, + typename KernelHandle::const_nnz_lno_t n, const alno_row_view_t_ a_rowmap, + const alno_nnz_view_t_ a_entries, const ascalar_nnz_view_t_ a_values, const ascalar_t_ alpha, + const blno_row_view_t_ b_rowmap, const blno_nnz_view_t_ b_entries, + const bscalar_nnz_view_t_ b_values, const bscalar_t_ beta, const clno_row_view_t_ c_rowmap, + clno_nnz_view_t_ c_entries, cscalar_nnz_view_t_ c_values) { typedef typename KernelHandle::HandleTempMemorySpace MemSpace; typedef typename KernelHandle::HandlePersistentMemorySpace PersistentMemSpace; typedef typename Kokkos::Device DeviceType; @@ -137,93 +118,79 @@ void spadd_numeric(const ExecSpace &exec, KernelHandle *handle, typedef typename KernelHandle::const_nnz_lno_t c_lno_t; typedef typename KernelHandle::const_nnz_scalar_t c_scalar_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, ExecSpace, MemSpace, PersistentMemSpace> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle ConstKernelHandle; ConstKernelHandle tmp_handle(*handle); // handle->exec_space is also copied typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits> + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits> Internal_a_rowmap; typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits> + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits> Internal_a_entries; typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits> + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits> Internal_a_values; typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits> + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits> Internal_b_rowmap; typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits> + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits> Internal_b_entries; typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits> + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits> Internal_b_values; typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits> + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits> Internal_c_rowmap; typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits> + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits> Internal_c_entries; typedef Kokkos::View::array_layout, - DeviceType, Kokkos::MemoryTraits> + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, DeviceType, + Kokkos::MemoryTraits> Internal_c_values; auto addHandle = handle->get_spadd_handle(); bool useFallback = !addHandle->is_input_strict_crs(); if (useFallback) { - KokkosSparse::Impl::SPADD_NUMERIC< - ExecSpace, ConstKernelHandle, Internal_a_rowmap, Internal_a_entries, - Internal_a_values, Internal_b_rowmap, Internal_b_entries, - Internal_b_values, Internal_c_rowmap, Internal_c_entries, - Internal_c_values, false>:: - spadd_numeric(exec, &tmp_handle, m, n, alpha, - Internal_a_rowmap(a_rowmap.data(), a_rowmap.extent(0)), - Internal_a_entries(a_entries.data(), a_entries.extent(0)), - Internal_a_values(a_values.data(), a_values.extent(0)), - beta, - Internal_b_rowmap(b_rowmap.data(), b_rowmap.extent(0)), - Internal_b_entries(b_entries.data(), b_entries.extent(0)), - Internal_b_values(b_values.data(), b_values.extent(0)), - Internal_c_rowmap(c_rowmap.data(), c_rowmap.extent(0)), - Internal_c_entries(c_entries.data(), c_entries.extent(0)), - Internal_c_values(c_values.data(), c_values.extent(0))); + KokkosSparse::Impl::SPADD_NUMERIC::spadd_numeric(exec, &tmp_handle, m, n, alpha, + Internal_a_rowmap(a_rowmap.data(), a_rowmap.extent(0)), + Internal_a_entries(a_entries.data(), a_entries.extent(0)), + Internal_a_values(a_values.data(), a_values.extent(0)), + beta, + Internal_b_rowmap(b_rowmap.data(), b_rowmap.extent(0)), + Internal_b_entries(b_entries.data(), b_entries.extent(0)), + Internal_b_values(b_values.data(), b_values.extent(0)), + Internal_c_rowmap(c_rowmap.data(), c_rowmap.extent(0)), + Internal_c_entries(c_entries.data(), c_entries.extent(0)), + Internal_c_values(c_values.data(), c_values.extent(0))); } else { KokkosSparse::Impl::SPADD_NUMERIC< - ExecSpace, ConstKernelHandle, Internal_a_rowmap, Internal_a_entries, - Internal_a_values, Internal_b_rowmap, Internal_b_entries, - Internal_b_values, Internal_c_rowmap, Internal_c_entries, - Internal_c_values>:: - spadd_numeric(exec, &tmp_handle, m, n, alpha, - Internal_a_rowmap(a_rowmap.data(), a_rowmap.extent(0)), - Internal_a_entries(a_entries.data(), a_entries.extent(0)), - Internal_a_values(a_values.data(), a_values.extent(0)), - beta, - Internal_b_rowmap(b_rowmap.data(), b_rowmap.extent(0)), - Internal_b_entries(b_entries.data(), b_entries.extent(0)), - Internal_b_values(b_values.data(), b_values.extent(0)), - Internal_c_rowmap(c_rowmap.data(), c_rowmap.extent(0)), - Internal_c_entries(c_entries.data(), c_entries.extent(0)), - Internal_c_values(c_values.data(), c_values.extent(0))); + ExecSpace, ConstKernelHandle, Internal_a_rowmap, Internal_a_entries, Internal_a_values, Internal_b_rowmap, + Internal_b_entries, Internal_b_values, Internal_c_rowmap, Internal_c_entries, + Internal_c_values>::spadd_numeric(exec, &tmp_handle, m, n, alpha, + Internal_a_rowmap(a_rowmap.data(), a_rowmap.extent(0)), + Internal_a_entries(a_entries.data(), a_entries.extent(0)), + Internal_a_values(a_values.data(), a_values.extent(0)), beta, + Internal_b_rowmap(b_rowmap.data(), b_rowmap.extent(0)), + Internal_b_entries(b_entries.data(), b_entries.extent(0)), + Internal_b_values(b_values.data(), b_values.extent(0)), + Internal_c_rowmap(c_rowmap.data(), c_rowmap.extent(0)), + Internal_c_entries(c_entries.data(), c_entries.extent(0)), + Internal_c_values(c_values.data(), c_values.extent(0))); } } @@ -236,10 +203,8 @@ void spadd_numeric(KernelHandle *handle, Args... args) { // Symbolic: count entries in each row in C to produce rowmap // kernel handle has information about whether it is sorted add or not. -template -void spadd_symbolic(const ExecSpace &exec, KernelHandle *handle, - const AMatrix &A, const BMatrix &B, CMatrix &C) { +template +void spadd_symbolic(const ExecSpace &exec, KernelHandle *handle, const AMatrix &A, const BMatrix &B, CMatrix &C) { using row_map_type = typename CMatrix::row_map_type::non_const_type; using entries_type = typename CMatrix::index_type::non_const_type; using values_type = typename CMatrix::values_type::non_const_type; @@ -247,9 +212,7 @@ void spadd_symbolic(const ExecSpace &exec, KernelHandle *handle, auto addHandle = handle->get_spadd_handle(); // Create the row_map of C, no need to initialize it - row_map_type row_mapC( - Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "row map"), - A.numRows() + 1); + row_map_type row_mapC(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "row map"), A.numRows() + 1); // Shortcuts for special cases as they cause errors in some TPL // implementations (e.g., cusparse and hipsparse) @@ -260,35 +223,27 @@ void spadd_symbolic(const ExecSpace &exec, KernelHandle *handle, Kokkos::deep_copy(exec, row_mapC, A.graph.row_map); addHandle->set_c_nnz(A.graph.entries.extent(0)); } else { - KokkosSparse::Experimental::spadd_symbolic( - exec, handle, A.numRows(), A.numCols(), A.graph.row_map, - A.graph.entries, B.graph.row_map, B.graph.entries, row_mapC); + KokkosSparse::Experimental::spadd_symbolic(exec, handle, A.numRows(), A.numCols(), A.graph.row_map, A.graph.entries, + B.graph.row_map, B.graph.entries, row_mapC); } // Now create and allocate the entries and values // views so we can build a graph and then matrix C // and subsequently construct C. - entries_type entriesC( - Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "entries"), - addHandle->get_c_nnz()); + entries_type entriesC(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "entries"), addHandle->get_c_nnz()); // Finally since we already have the number of nnz handy // we can go ahead and allocate C's values and set them. - values_type valuesC( - Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "values"), - addHandle->get_c_nnz()); + values_type valuesC(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "values"), addHandle->get_c_nnz()); - C = CMatrix("matrix", A.numRows(), A.numCols(), addHandle->get_c_nnz(), - valuesC, row_mapC, entriesC); + C = CMatrix("matrix", A.numRows(), A.numCols(), addHandle->get_c_nnz(), valuesC, row_mapC, entriesC); } // Numeric: fill the column indices and values // kernel handle has information about whether it is sorted add or not. -template -void spadd_numeric(const ExecSpace &exec, KernelHandle *handle, - const AScalar alpha, const AMatrix &A, const BScalar beta, - const BMatrix &B, CMatrix &C) { +template +void spadd_numeric(const ExecSpace &exec, KernelHandle *handle, const AScalar alpha, const AMatrix &A, + const BScalar beta, const BMatrix &B, CMatrix &C) { if (!A.nnz()) { Kokkos::deep_copy(exec, C.graph.entries, B.graph.entries); KokkosBlas::scal(exec, C.values, beta, B.values); @@ -296,27 +251,23 @@ void spadd_numeric(const ExecSpace &exec, KernelHandle *handle, Kokkos::deep_copy(exec, C.graph.entries, A.graph.entries); KokkosBlas::scal(exec, C.values, alpha, A.values); } else { - KokkosSparse::Experimental::spadd_numeric( - exec, handle, A.numRows(), A.numCols(), A.graph.row_map, - A.graph.entries, A.values, alpha, B.graph.row_map, B.graph.entries, - B.values, beta, C.graph.row_map, C.graph.entries, C.values); + KokkosSparse::Experimental::spadd_numeric(exec, handle, A.numRows(), A.numCols(), A.graph.row_map, A.graph.entries, + A.values, alpha, B.graph.row_map, B.graph.entries, B.values, beta, + C.graph.row_map, C.graph.entries, C.values); } } // One without an explicit execution space argument -template -void spadd_symbolic(KernelHandle *handle, const AMatrix &A, const BMatrix &B, - CMatrix &C) { +template +void spadd_symbolic(KernelHandle *handle, const AMatrix &A, const BMatrix &B, CMatrix &C) { spadd_symbolic(typename AMatrix::execution_space{}, handle, A, B, C); } -template -void spadd_numeric(KernelHandle *handle, const AScalar alpha, const AMatrix &A, - const BScalar beta, const BMatrix &B, CMatrix &C) { - spadd_numeric(typename AMatrix::execution_space{}, handle, alpha, A, beta, B, - C); +template +void spadd_numeric(KernelHandle *handle, const AScalar alpha, const AMatrix &A, const BScalar beta, const BMatrix &B, + CMatrix &C) { + spadd_numeric(typename AMatrix::execution_space{}, handle, alpha, A, beta, B, C); } } // namespace KokkosSparse diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd_handle.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd_handle.hpp index 760f912c6d3c..ea9594ca3e2f 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd_handle.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd_handle.hpp @@ -23,8 +23,8 @@ namespace KokkosSparse { -template +template class SPADDHandle { public: typedef typename lno_nnz_view_t_::non_const_type nnz_lno_view_t; @@ -38,12 +38,7 @@ class SPADDHandle { void* workspace; cusparseMatDescr_t descrA, descrB, descrC; - SpaddCusparseData() - : nbytes(0), - workspace(nullptr), - descrA(nullptr), - descrB(nullptr), - descrC(nullptr) {} + SpaddCusparseData() : nbytes(0), workspace(nullptr), descrA(nullptr), descrB(nullptr), descrC(nullptr) {} ~SpaddCusparseData() { Kokkos::kokkos_free(workspace); @@ -89,8 +84,7 @@ class SPADDHandle { /// \brief sets the result nnz size. /// \param a_pos_in The offset into a. /// \param b_pos_in The offset into b. - void set_a_b_pos(const nnz_lno_view_t& a_pos_in, - const nnz_lno_view_t& b_pos_in) { + void set_a_b_pos(const nnz_lno_view_t& a_pos_in, const nnz_lno_view_t& b_pos_in) { a_pos = a_pos_in; b_pos = b_pos_in; } @@ -101,9 +95,7 @@ class SPADDHandle { /// \brief sets the result nnz size. /// \param result_nnz_size_ size of the output matrix. - void set_c_nnz(size_type result_nnz_size_) { - this->result_nnz_size = result_nnz_size_; - } + void set_c_nnz(size_type result_nnz_size_) { this->result_nnz_size = result_nnz_size_; } /** * \brief returns the result nnz size. diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm.hpp index b2737a9e2cbc..5cc052696774 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm.hpp @@ -38,34 +38,27 @@ namespace KokkosSparse { /// @param C //// template -void spgemm_symbolic(KernelHandle& kh, const AMatrix& A, const bool Amode, - const BMatrix& B, const bool Bmode, CMatrix& C) { +void spgemm_symbolic(KernelHandle& kh, const AMatrix& A, const bool Amode, const BMatrix& B, const bool Bmode, + CMatrix& C) { using row_map_type = typename CMatrix::row_map_type::non_const_type; using entries_type = typename CMatrix::index_type::non_const_type; using values_type = typename CMatrix::values_type::non_const_type; - row_map_type row_mapC( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "non_const_lnow_row"), - A.numRows() + 1); + row_map_type row_mapC(Kokkos::view_alloc(Kokkos::WithoutInitializing, "non_const_lnow_row"), A.numRows() + 1); entries_type entriesC; values_type valuesC; - KokkosSparse::Experimental::spgemm_symbolic( - &kh, A.numRows(), B.numRows(), B.numCols(), A.graph.row_map, - A.graph.entries, Amode, B.graph.row_map, B.graph.entries, Bmode, - row_mapC); + KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(), B.numRows(), B.numCols(), A.graph.row_map, + A.graph.entries, Amode, B.graph.row_map, B.graph.entries, Bmode, + row_mapC); const size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz(); if (c_nnz_size) { - entriesC = entries_type( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "entriesC"), - c_nnz_size); - valuesC = values_type( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "valuesC"), c_nnz_size); + entriesC = entries_type(Kokkos::view_alloc(Kokkos::WithoutInitializing, "entriesC"), c_nnz_size); + valuesC = values_type(Kokkos::view_alloc(Kokkos::WithoutInitializing, "valuesC"), c_nnz_size); } - C = CMatrix("C=AB", A.numRows(), B.numCols(), c_nnz_size, valuesC, row_mapC, - entriesC); + C = CMatrix("C=AB", A.numRows(), B.numCols(), c_nnz_size, valuesC, row_mapC, entriesC); } /// @@ -82,10 +75,8 @@ void spgemm_symbolic(KernelHandle& kh, const AMatrix& A, const bool Amode, /// @param transposeB /// @param C /// -template -void block_spgemm_symbolic(KernelHandle& kh, const AMatrixType& A, - const bool transposeA, const BMatrixType& B, +template +void block_spgemm_symbolic(KernelHandle& kh, const AMatrixType& A, const bool transposeA, const BMatrixType& B, const bool transposeB, CMatrixType& C) { using row_map_type = typename CMatrixType::row_map_type::non_const_type; using entries_type = typename CMatrixType::index_type::non_const_type; @@ -93,33 +84,24 @@ void block_spgemm_symbolic(KernelHandle& kh, const AMatrixType& A, auto blockDim = A.blockDim(); if (blockDim != B.blockDim()) { - throw std::invalid_argument( - "Block SpGEMM must be called for matrices with the same block size"); + throw std::invalid_argument("Block SpGEMM must be called for matrices with the same block size"); } - row_map_type row_mapC( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "non_const_lnow_row"), - A.numRows() + 1); + row_map_type row_mapC(Kokkos::view_alloc(Kokkos::WithoutInitializing, "non_const_lnow_row"), A.numRows() + 1); - KokkosSparse::Experimental::spgemm_symbolic( - &kh, A.numRows(), B.numRows(), B.numCols(), A.graph.row_map, - A.graph.entries, transposeA, B.graph.row_map, B.graph.entries, transposeB, - row_mapC); + KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(), B.numRows(), B.numCols(), A.graph.row_map, + A.graph.entries, transposeA, B.graph.row_map, B.graph.entries, transposeB, + row_mapC); entries_type entriesC; values_type valuesC; const size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz(); if (c_nnz_size) { - entriesC = entries_type( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "entriesC"), - c_nnz_size); - valuesC = - values_type(Kokkos::view_alloc(Kokkos::WithoutInitializing, "valuesC"), - c_nnz_size * blockDim * blockDim); + entriesC = entries_type(Kokkos::view_alloc(Kokkos::WithoutInitializing, "entriesC"), c_nnz_size); + valuesC = values_type(Kokkos::view_alloc(Kokkos::WithoutInitializing, "valuesC"), c_nnz_size * blockDim * blockDim); } - C = CMatrixType("C=AB", A.numRows(), B.numCols(), c_nnz_size, valuesC, - row_mapC, entriesC, blockDim); + C = CMatrixType("C=AB", A.numRows(), B.numCols(), c_nnz_size, valuesC, row_mapC, entriesC, blockDim); } /// @@ -137,16 +119,15 @@ void block_spgemm_symbolic(KernelHandle& kh, const AMatrixType& A, /// @param C /// template -void spgemm_numeric(KernelHandle& kh, const AMatrix& A, const bool Amode, - const BMatrix& B, const bool Bmode, CMatrix& C) { +void spgemm_numeric(KernelHandle& kh, const AMatrix& A, const bool Amode, const BMatrix& B, const bool Bmode, + CMatrix& C) { // using row_map_type = typename CMatrix::index_type::non_const_type; // using entries_type = typename CMatrix::row_map_type::non_const_type; // using values_type = typename CMatrix::values_type::non_const_type; - KokkosSparse::Experimental::spgemm_numeric( - &kh, A.numRows(), B.numRows(), B.numCols(), A.graph.row_map, - A.graph.entries, A.values, Amode, B.graph.row_map, B.graph.entries, - B.values, Bmode, C.graph.row_map, C.graph.entries, C.values); + KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(), B.numRows(), B.numCols(), A.graph.row_map, + A.graph.entries, A.values, Amode, B.graph.row_map, B.graph.entries, + B.values, Bmode, C.graph.row_map, C.graph.entries, C.values); } /// @@ -164,18 +145,16 @@ void spgemm_numeric(KernelHandle& kh, const AMatrix& A, const bool Amode, /// @param C /// template -void block_spgemm_numeric(KernelHandle& kh, const AMatrix& A, const bool Amode, - const BMatrix& B, const bool Bmode, CMatrix& C) { +void block_spgemm_numeric(KernelHandle& kh, const AMatrix& A, const bool Amode, const BMatrix& B, const bool Bmode, + CMatrix& C) { auto blockDim = A.blockDim(); if (blockDim != B.blockDim() || blockDim != C.blockDim()) { - throw std::invalid_argument( - "Block SpGEMM must be called for matrices with the same block size"); + throw std::invalid_argument("Block SpGEMM must be called for matrices with the same block size"); } - KokkosSparse::Experimental::spgemm_numeric( - &kh, A.numRows(), B.numRows(), B.numCols(), A.graph.row_map, - A.graph.entries, A.values, Amode, B.graph.row_map, B.graph.entries, - B.values, Bmode, C.graph.row_map, C.graph.entries, C.values, blockDim); + KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(), B.numRows(), B.numCols(), A.graph.row_map, + A.graph.entries, A.values, Amode, B.graph.row_map, B.graph.entries, + B.values, Bmode, C.graph.row_map, C.graph.entries, C.values, blockDim); } /// @@ -191,32 +170,27 @@ void block_spgemm_numeric(KernelHandle& kh, const AMatrix& A, const bool Amode, /// @return CMatrix /// template -CMatrix spgemm(const AMatrix& A, const bool Amode, const BMatrix& B, - const bool Bmode) { +CMatrix spgemm(const AMatrix& A, const bool Amode, const BMatrix& B, const bool Bmode) { // Canonicalize the matrix types: // - Make A,B have const values and entries. // - Make all views in A,B unmanaged, but otherwise default memory traits // - C must have managed memory since its views are allocated in this // function - using AMatrix_Internal = KokkosSparse::CrsMatrix< - typename AMatrix::const_value_type, typename AMatrix::const_ordinal_type, - typename AMatrix::device_type, Kokkos::MemoryTraits, - typename AMatrix::const_size_type>; - using BMatrix_Internal = KokkosSparse::CrsMatrix< - typename BMatrix::const_value_type, typename BMatrix::const_ordinal_type, - typename BMatrix::device_type, Kokkos::MemoryTraits, - typename BMatrix::const_size_type>; + using AMatrix_Internal = + KokkosSparse::CrsMatrix, + typename AMatrix::const_size_type>; + using BMatrix_Internal = + KokkosSparse::CrsMatrix, + typename BMatrix::const_size_type>; using CMatrix_Internal = - KokkosSparse::CrsMatrix; + KokkosSparse::CrsMatrix; // Check now that A, B dimensions are compatible to multiply auto opACols = Amode ? A.numRows() : A.numCols(); auto opBRows = Bmode ? B.numCols() : B.numRows(); - if (Amode || Bmode) - throw std::invalid_argument( - "KokkosSparse::spgemm: transposing A and/or B is not yet supported"); + if (Amode || Bmode) throw std::invalid_argument("KokkosSparse::spgemm: transposing A and/or B is not yet supported"); if (opACols != opBRows) throw std::invalid_argument( "KokkosSparse::spgemm: op(A) and op(B) have incompatible dimensions " @@ -235,16 +209,14 @@ CMatrix spgemm(const AMatrix& A, const bool Amode, const BMatrix& B, if (!A.numRows() || !A.numCols() || !B.numCols() || !A.nnz() || !B.nnz()) { auto Crows = Amode ? A.numCols() : A.numRows(); auto Ccols = Bmode ? B.numRows() : B.numCols(); - typename CMatrix::row_map_type::non_const_type row_mapC("C rowmap", - Crows + 1); + typename CMatrix::row_map_type::non_const_type row_mapC("C rowmap", Crows + 1); typename CMatrix::index_type entriesC; typename CMatrix::values_type valuesC; return CMatrix("C", Crows, Ccols, 0, valuesC, row_mapC, entriesC); } - return CMatrix(KokkosSparse::Impl::SPGEMM_NOREUSE< - CMatrix_Internal, AMatrix_Internal, - BMatrix_Internal>::spgemm_noreuse(A_internal, Amode, - B_internal, Bmode)); + return CMatrix( + KokkosSparse::Impl::SPGEMM_NOREUSE::spgemm_noreuse( + A_internal, Amode, B_internal, Bmode)); } } // namespace KokkosSparse diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_handle.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_handle.hpp index a95c828c963c..9e7679a3a92e 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_handle.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_handle.hpp @@ -22,7 +22,7 @@ #include #include #include -//#define VERBOSE +// #define VERBOSE #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE #include "KokkosSparse_Utils_rocsparse.hpp" @@ -56,8 +56,8 @@ enum SPGEMMAlgorithm { "supported SpGEMM calls, if enabled.")]], // TRIANGLE COUNTING SPECIALIZED - SPGEMM_KK_TRIANGLE_AI, // SPGEMM_KK_TRIANGLE_DEFAULT, SPGEMM_KK_TRIANGLE_MEM, - // SPGEMM_KK_TRIANGLE_DENSE, + SPGEMM_KK_TRIANGLE_AI, // SPGEMM_KK_TRIANGLE_DEFAULT, SPGEMM_KK_TRIANGLE_MEM, + // SPGEMM_KK_TRIANGLE_DENSE, SPGEMM_KK_TRIANGLE_IA_UNION, // SPGEMM_KK_TRIANGLE_DEFAULT_IA_UNION, // SPGEMM_KK_TRIANGLE_MEM_IA_UNION, // SPGEMM_KK_TRIANGLE_DENSE_IA_UNION, @@ -91,8 +91,8 @@ enum SPGEMMAccumulator { SPGEMM_ACC_DENSE, SPGEMM_ACC_SPARSE, }; -template +template class SPGEMMHandle { public: typedef ExecutionSpace HandleExecSpace; @@ -108,29 +108,20 @@ class SPGEMMHandle { typedef typename std::remove_const::type nnz_scalar_t; typedef const nnz_scalar_t const_nnz_scalar_t; - typedef typename Kokkos::View - row_lno_temp_work_view_t; - typedef typename Kokkos::View - row_lno_persistent_work_view_t; - typedef typename row_lno_persistent_work_view_t::HostMirror - row_lno_persistent_work_host_view_t; // Host view type - - typedef typename Kokkos::View - scalar_temp_work_view_t; - typedef typename Kokkos::View - scalar_persistent_work_view_t; - - typedef typename Kokkos::View - nnz_lno_temp_work_view_t; - typedef typename Kokkos::View - nnz_lno_persistent_work_view_t; - typedef typename nnz_lno_persistent_work_view_t::HostMirror - nnz_lno_persistent_work_host_view_t; // Host view type + typedef typename Kokkos::View row_lno_temp_work_view_t; + typedef typename Kokkos::View row_lno_persistent_work_view_t; + typedef typename row_lno_persistent_work_view_t::HostMirror row_lno_persistent_work_host_view_t; // Host view type + + typedef typename Kokkos::View scalar_temp_work_view_t; + typedef typename Kokkos::View scalar_persistent_work_view_t; + + typedef typename Kokkos::View nnz_lno_temp_work_view_t; + typedef typename Kokkos::View nnz_lno_persistent_work_view_t; + typedef typename nnz_lno_persistent_work_view_t::HostMirror nnz_lno_persistent_work_host_view_t; // Host view type #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE struct rocSparseSpgemmHandleType { - KokkosKernels::Experimental::Controls - kkControls; // give a singleton rocsparse handle + KokkosKernels::Experimental::Controls kkControls; // give a singleton rocsparse handle rocsparse_handle rocsparseHandle; rocsparse_operation opA, opB; rocsparse_mat_descr descr_A, descr_B, descr_C, descr_D; @@ -139,10 +130,8 @@ class SPGEMMHandle { void *buffer; rocSparseSpgemmHandleType(bool transposeA, bool transposeB) { - opA = - transposeA ? rocsparse_operation_transpose : rocsparse_operation_none; - opB = - transposeB ? rocsparse_operation_transpose : rocsparse_operation_none; + opA = transposeA ? rocsparse_operation_transpose : rocsparse_operation_none; + opB = transposeB ? rocsparse_operation_transpose : rocsparse_operation_none; bufferSize = 0; buffer = nullptr; @@ -183,10 +172,8 @@ class SPGEMMHandle { void *buffer3, *buffer4, *buffer5; cuSparseSpgemmHandleType(bool transposeA, bool transposeB) { - opA = transposeA ? CUSPARSE_OPERATION_TRANSPOSE - : CUSPARSE_OPERATION_NON_TRANSPOSE; - opB = transposeB ? CUSPARSE_OPERATION_TRANSPOSE - : CUSPARSE_OPERATION_NON_TRANSPOSE; + opA = transposeA ? CUSPARSE_OPERATION_TRANSPOSE : CUSPARSE_OPERATION_NON_TRANSPOSE; + opB = transposeB ? CUSPARSE_OPERATION_TRANSPOSE : CUSPARSE_OPERATION_NON_TRANSPOSE; scalarType = Impl::cuda_data_type_from(); alg = CUSPARSE_SPGEMM_DEFAULT; @@ -219,14 +206,10 @@ class SPGEMMHandle { cusparseHandle = kkControls.getCusparseHandle(); KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&generalDescr)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatType(generalDescr, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatIndexBase(generalDescr, CUSPARSE_INDEX_BASE_ZERO)); - } - ~cuSparseSpgemmHandleType() { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(generalDescr)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(generalDescr, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(generalDescr, CUSPARSE_INDEX_BASE_ZERO)); } + ~cuSparseSpgemmHandleType() { KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(generalDescr)); } }; #endif #endif @@ -237,9 +220,7 @@ class SPGEMMHandle { // Then this assumes ownership of it and will destroy it later. mklSpgemmHandleType(const sparse_matrix_t &C_) : C(C_) {} - ~mklSpgemmHandleType() { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_destroy(C)); - } + ~mklSpgemmHandleType() { KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_destroy(C)); } sparse_matrix_t C; }; @@ -263,8 +244,7 @@ class SPGEMMHandle { nnz_lno_t max_nnz_compressed_result; size_type compressed_b_size; - row_lno_temp_work_view_t - compressed_b_rowmap; // compressed_b_set_begins, compressed_b_set_nexts; + row_lno_temp_work_view_t compressed_b_rowmap; // compressed_b_set_begins, compressed_b_set_nexts; nnz_lno_temp_work_view_t compressed_b_set_indices, compressed_b_sets; row_lno_temp_work_view_t compressed_c_rowmap; @@ -314,32 +294,24 @@ class SPGEMMHandle { this->first_level_hash_cut_off = first_level_hash_cut_off_; } - double get_first_level_hash_cut_off() { - return this->first_level_hash_cut_off; - } + double get_first_level_hash_cut_off() { return this->first_level_hash_cut_off; } - void set_compression_cut_off(double compression_cut_off_) { - this->compression_cut_off = compression_cut_off_; - } + void set_compression_cut_off(double compression_cut_off_) { this->compression_cut_off = compression_cut_off_; } double get_compression_cut_off() { return this->compression_cut_off; } void set_min_hash_size_scale(int scale) { min_hash_size_scale = scale; } int get_min_hash_size_scale() { return min_hash_size_scale; } - void set_read_write_cost_calc(bool read_write_cost_cal) { - this->calculate_read_write_cost = read_write_cost_cal; - } + void set_read_write_cost_calc(bool read_write_cost_cal) { this->calculate_read_write_cost = read_write_cost_cal; } int get_read_write_cost_calc() { return this->calculate_read_write_cost; } - typename Kokkos::View persistent_c_xadj, - persistent_a_xadj, persistent_b_xadj, persistent_a_adj, persistent_b_adj; + typename Kokkos::View persistent_c_xadj, persistent_a_xadj, persistent_b_xadj, + persistent_a_adj, persistent_b_adj; size_t MaxColDenseAcc; bool mkl_keep_output; bool mkl_convert_to_1base; bool is_compression_single_step; - void set_mkl_sort_option(int mkl_sort_option_) { - this->mkl_sort_option = mkl_sort_option_; - } + void set_mkl_sort_option(int mkl_sort_option_) { this->mkl_sort_option = mkl_sort_option_; } int get_mkl_sort_option() { return this->mkl_sort_option; } #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE @@ -363,18 +335,12 @@ class SPGEMMHandle { public: #endif - void set_c_column_indices(nnz_lno_temp_work_view_t c_col_indices_) { - this->c_column_indices = c_col_indices_; - } + void set_c_column_indices(nnz_lno_temp_work_view_t c_col_indices_) { this->c_column_indices = c_col_indices_; } - nnz_lno_temp_work_view_t get_c_column_indices() { - return this->c_column_indices; - } + nnz_lno_temp_work_view_t get_c_column_indices() { return this->c_column_indices; } - void set_color_xadj(nnz_lno_t num_colors_, - nnz_lno_persistent_work_host_view_t color_xadj_, - nnz_lno_persistent_work_view_t color_adj_, - nnz_lno_persistent_work_view_t vertex_colors_, + void set_color_xadj(nnz_lno_t num_colors_, nnz_lno_persistent_work_host_view_t color_xadj_, + nnz_lno_persistent_work_view_t color_adj_, nnz_lno_persistent_work_view_t vertex_colors_, nnz_lno_t num_multi_colors_, nnz_lno_t num_used_colors_) { num_colors = num_colors_; color_xadj = color_xadj_; @@ -387,26 +353,19 @@ class SPGEMMHandle { /// \brief sets the result nnz size. /// \param result_nnz_size_ size of the output matrix. - void set_c_nnz(size_type result_nnz_size_) { - this->result_nnz_size = result_nnz_size_; - } + void set_c_nnz(size_type result_nnz_size_) { this->result_nnz_size = result_nnz_size_; } /** * \brief returns the result nnz size. */ size_type get_c_nnz() { return this->result_nnz_size; } - void set_multi_color_scale(double multi_color_scale_) { - this->multi_color_scale = multi_color_scale_; - } + void set_multi_color_scale(double multi_color_scale_) { this->multi_color_scale = multi_color_scale_; } double get_multi_color_scale() { return this->multi_color_scale; } - void get_color_xadj(nnz_lno_t &num_colors_, - nnz_lno_persistent_work_host_view_t &color_xadj_, - nnz_lno_persistent_work_view_t &color_adj_, - nnz_lno_persistent_work_view_t &vertex_colors_, - nnz_lno_t &num_multi_colors_, - nnz_lno_t &num_used_colors_) { + void get_color_xadj(nnz_lno_t &num_colors_, nnz_lno_persistent_work_host_view_t &color_xadj_, + nnz_lno_persistent_work_view_t &color_adj_, nnz_lno_persistent_work_view_t &vertex_colors_, + nnz_lno_t &num_multi_colors_, nnz_lno_t &num_used_colors_) { num_colors_ = num_colors; color_xadj_ = color_xadj; color_adj_ = color_adj; @@ -415,47 +374,34 @@ class SPGEMMHandle { vertex_colors_ = vertex_colors; } - void set_compressed_c(row_lno_temp_work_view_t compressed_c_rowmap_) { - compressed_c_rowmap = compressed_c_rowmap_; - } + void set_compressed_c(row_lno_temp_work_view_t compressed_c_rowmap_) { compressed_c_rowmap = compressed_c_rowmap_; } - void get_compressed_c(row_lno_temp_work_view_t &compressed_c_rowmap_) { - compressed_c_rowmap_ = compressed_c_rowmap; - } + void get_compressed_c(row_lno_temp_work_view_t &compressed_c_rowmap_) { compressed_c_rowmap_ = compressed_c_rowmap; } - void set_sort_lower_triangular(int option) { - this->sort_lower_triangular = option; - } + void set_sort_lower_triangular(int option) { this->sort_lower_triangular = option; } int get_sort_lower_triangular() { return this->sort_lower_triangular; } - void set_create_lower_triangular(bool option) { - this->create_lower_triangular = option; - } + void set_create_lower_triangular(bool option) { this->create_lower_triangular = option; } bool get_create_lower_triangular() { return this->create_lower_triangular; } void set_lower_triangular_permutation(nnz_lno_persistent_work_view_t ltp_) { this->lower_triangular_permutation = ltp_; } - nnz_lno_persistent_work_view_t get_lower_triangular_permutation() { - return this->lower_triangular_permutation; - } + nnz_lno_persistent_work_view_t get_lower_triangular_permutation() { return this->lower_triangular_permutation; } - void set_lower_triangular_matrix( - row_lno_persistent_work_view_t lower_triangular_matrix_rowmap_, - nnz_lno_persistent_work_view_t lower_triangular_matrix_entries_) { + void set_lower_triangular_matrix(row_lno_persistent_work_view_t lower_triangular_matrix_rowmap_, + nnz_lno_persistent_work_view_t lower_triangular_matrix_entries_) { this->lower_triangular_matrix_rowmap = lower_triangular_matrix_rowmap_; this->lower_triangular_matrix_entries = lower_triangular_matrix_entries_; } - void get_lower_triangular_matrix( - row_lno_persistent_work_view_t &lower_triangular_matrix_rowmap_, - nnz_lno_persistent_work_view_t &lower_triangular_matrix_entries_) { + void get_lower_triangular_matrix(row_lno_persistent_work_view_t &lower_triangular_matrix_rowmap_, + nnz_lno_persistent_work_view_t &lower_triangular_matrix_entries_) { lower_triangular_matrix_rowmap_ = this->lower_triangular_matrix_rowmap; lower_triangular_matrix_entries_ = this->lower_triangular_matrix_entries; } - void set_compressed_b(size_type b_nnz_size, - row_lno_temp_work_view_t compressed_b_rowmap_, + void set_compressed_b(size_type b_nnz_size, row_lno_temp_work_view_t compressed_b_rowmap_, nnz_lno_temp_work_view_t compressed_b_set_indices_, nnz_lno_temp_work_view_t compressed_b_sets_) { compressed_b_size = b_nnz_size; @@ -464,8 +410,7 @@ class SPGEMMHandle { compressed_b_sets = compressed_b_sets_; } - void get_compressed_b(size_type &b_nnz_size, - row_lno_temp_work_view_t &compressed_b_rowmap_, + void get_compressed_b(size_type &b_nnz_size, row_lno_temp_work_view_t &compressed_b_rowmap_, nnz_lno_temp_work_view_t &compressed_b_set_indices_, nnz_lno_temp_work_view_t &compressed_b_sets_) { b_nnz_size = compressed_b_size; @@ -568,13 +513,10 @@ class SPGEMMHandle { #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE void create_rocsparse_spgemm_handle(bool transA, bool transB) { this->destroy_rocsparse_spgemm_handle(); - this->rocsparse_spgemm_handle = - new rocSparseSpgemmHandleType(transA, transB); + this->rocsparse_spgemm_handle = new rocSparseSpgemmHandleType(transA, transB); } - rocSparseSpgemmHandleType *get_rocsparse_spgemm_handle() { - return this->rocsparse_spgemm_handle; - } + rocSparseSpgemmHandleType *get_rocsparse_spgemm_handle() { return this->rocsparse_spgemm_handle; } void destroy_rocsparse_spgemm_handle() { if (this->rocsparse_spgemm_handle != nullptr) { @@ -597,9 +539,7 @@ class SPGEMMHandle { } } - cuSparseSpgemmHandleType *get_cusparse_spgemm_handle() { - return this->cusparse_spgemm_handle; - } + cuSparseSpgemmHandleType *get_cusparse_spgemm_handle() { return this->cusparse_spgemm_handle; } #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL @@ -614,9 +554,7 @@ class SPGEMMHandle { } } - mklSpgemmHandleType *get_mkl_spgemm_handle() { - return this->mkl_spgemm_handle; - } + mklSpgemmHandleType *get_mkl_spgemm_handle() { return this->mkl_spgemm_handle; } #endif void choose_default_algorithm() { @@ -624,8 +562,7 @@ class SPGEMMHandle { if (std::is_same::value) { this->algorithm_type = SPGEMM_SERIAL; #ifdef VERBOSE - std::cout << "Serial Execution Space, Default Algorithm: SPGEMM_SERIAL" - << std::endl; + std::cout << "Serial Execution Space, Default Algorithm: SPGEMM_SERIAL" << std::endl; #endif } #endif @@ -634,8 +571,7 @@ class SPGEMMHandle { if (std::is_same::value) { this->algorithm_type = SPGEMM_SERIAL; #ifdef VERBOSE - std::cout << "THREADS Execution Space, Default Algorithm: SPGEMM_SERIAL" - << std::endl; + std::cout << "THREADS Execution Space, Default Algorithm: SPGEMM_SERIAL" << std::endl; #endif } #endif @@ -644,8 +580,7 @@ class SPGEMMHandle { if (std::is_same::value) { this->algorithm_type = SPGEMM_SERIAL; #ifdef VERBOSE - std::cout << "OpenMP Execution Space, Default Algorithm: SPGEMM_SERIAL" - << std::endl; + std::cout << "OpenMP Execution Space, Default Algorithm: SPGEMM_SERIAL" << std::endl; #endif } #endif @@ -654,8 +589,7 @@ class SPGEMMHandle { if (std::is_same::value) { this->algorithm_type = SPGEMM_KK; #ifdef VERBOSE - std::cout << "Cuda Execution Space, Default Algorithm: SPGEMM_KK" - << std::endl; + std::cout << "Cuda Execution Space, Default Algorithm: SPGEMM_KK" << std::endl; #endif } #endif @@ -664,8 +598,7 @@ class SPGEMMHandle { if (std::is_same::value) { this->algorithm_type = SPGEMM_KK; #ifdef VERBOSE - std::cout << "HIP Execution Space, Default Algorithm: SPGEMM_KK" - << std::endl; + std::cout << "HIP Execution Space, Default Algorithm: SPGEMM_KK" << std::endl; #endif } #endif @@ -674,25 +607,18 @@ class SPGEMMHandle { if (std::is_same::value) { this->algorithm_type = SPGEMM_KK; #ifdef VERBOSE - std::cout << "SYCL Execution Space, Default Algorithm: SPGEMM_KK" - << std::endl; + std::cout << "SYCL Execution Space, Default Algorithm: SPGEMM_KK" << std::endl; #endif } #endif } - void set_compression(bool compress_second_matrix_) { - this->compress_second_matrix = compress_second_matrix_; - } + void set_compression(bool compress_second_matrix_) { this->compress_second_matrix = compress_second_matrix_; } bool get_compression() { return this->compress_second_matrix; } - SPGEMMAccumulator get_accumulator_type() const { - return this->accumulator_type; - } - void set_accumulator_type(const SPGEMMAccumulator &acc_type) { - this->accumulator_type = acc_type; - } + SPGEMMAccumulator get_accumulator_type() const { return this->accumulator_type; } + void set_accumulator_type(const SPGEMMAccumulator &acc_type) { this->accumulator_type = acc_type; } // getters SPGEMMAlgorithm get_algorithm_type() const { return this->algorithm_type; } @@ -706,22 +632,16 @@ class SPGEMMHandle { template nnz_lno_t get_max_result_nnz(const c_row_view_t &row_mapC) { if (!this->computed_max_nnz_inresult) { - this->max_nnz_inresult = - KokkosSparse::Impl::graph_max_degree(row_mapC); + this->max_nnz_inresult = KokkosSparse::Impl::graph_max_degree(row_mapC); this->computed_max_nnz_inresult = true; } return this->max_nnz_inresult; } - nnz_lno_t get_max_compresed_result_nnz() const { - return this->max_nnz_compressed_result; - } + nnz_lno_t get_max_compresed_result_nnz() const { return this->max_nnz_compressed_result; } // setters - void set_algorithm_type(const SPGEMMAlgorithm &sgs_algo) { - this->algorithm_type = sgs_algo; - } + void set_algorithm_type(const SPGEMMAlgorithm &sgs_algo) { this->algorithm_type = sgs_algo; } void set_call_symbolic(bool call = true) { this->called_symbolic = call; } void set_computed_rowptrs() { this->computed_rowptrs = true; } void set_computed_rowflops() { this->computed_rowflops = true; } @@ -733,12 +653,9 @@ class SPGEMMHandle { this->computed_max_nnz_inresult = true; } - void set_max_compresed_result_nnz(nnz_lno_t num_result_nnz_) { - this->max_nnz_compressed_result = num_result_nnz_; - } + void set_max_compresed_result_nnz(nnz_lno_t num_result_nnz_) { this->max_nnz_compressed_result = num_result_nnz_; } - void vector_team_size(int max_allowed_team_size, int &suggested_vector_size_, - int &suggested_team_size_, size_type nr, + void vector_team_size(int max_allowed_team_size, int &suggested_vector_size_, int &suggested_team_size_, size_type nr, size_type nnz) { // suggested_team_size_ = this->suggested_team_size = 1; // suggested_vector_size_=this->suggested_vector_size = 1; @@ -766,14 +683,11 @@ class SPGEMMHandle { this->is_compression_single_step = isCompressionSingleStep; } - void set_min_col_of_row( - nnz_lno_persistent_work_view_t min_result_row_for_each_row_) { + void set_min_col_of_row(nnz_lno_persistent_work_view_t min_result_row_for_each_row_) { this->min_result_row_for_each_row = min_result_row_for_each_row_; } - nnz_lno_persistent_work_view_t get_min_col_of_row() { - return this->min_result_row_for_each_row; - } + nnz_lno_persistent_work_view_t get_min_col_of_row() { return this->min_result_row_for_each_row; } bool get_compression_step() { return is_compression_single_step; } @@ -787,27 +701,20 @@ class SPGEMMHandle { uint32_t b_graph_hash = 0U; public: - template - bool checkMatrixIdentitiesSymbolic(const a_rowptrs_t &a_rowptrsIn, - const a_entries_t &a_entriesIn, - const b_rowptrs_t &b_rowptrsIn, - const b_entries_t &b_entriesIn) { + template + bool checkMatrixIdentitiesSymbolic(const a_rowptrs_t &a_rowptrsIn, const a_entries_t &a_entriesIn, + const b_rowptrs_t &b_rowptrsIn, const b_entries_t &b_entriesIn) { #ifndef NDEBUG // If this is the first symbolic call, assign the handle's CRS pointers to // check against later if (!computedInputHashes) { - a_graph_hash = KokkosKernels::Impl::hashView(a_rowptrsIn) ^ - KokkosKernels::Impl::hashView(a_entriesIn); - b_graph_hash = KokkosKernels::Impl::hashView(b_rowptrsIn) ^ - KokkosKernels::Impl::hashView(b_entriesIn); + a_graph_hash = KokkosKernels::Impl::hashView(a_rowptrsIn) ^ KokkosKernels::Impl::hashView(a_entriesIn); + b_graph_hash = KokkosKernels::Impl::hashView(b_rowptrsIn) ^ KokkosKernels::Impl::hashView(b_entriesIn); computedInputHashes = true; } else { - if (a_graph_hash != (KokkosKernels::Impl::hashView(a_rowptrsIn) ^ - KokkosKernels::Impl::hashView(a_entriesIn))) + if (a_graph_hash != (KokkosKernels::Impl::hashView(a_rowptrsIn) ^ KokkosKernels::Impl::hashView(a_entriesIn))) return false; - if (b_graph_hash != (KokkosKernels::Impl::hashView(b_rowptrsIn) ^ - KokkosKernels::Impl::hashView(b_entriesIn))) + if (b_graph_hash != (KokkosKernels::Impl::hashView(b_rowptrsIn) ^ KokkosKernels::Impl::hashView(b_entriesIn))) return false; } #else @@ -819,18 +726,13 @@ class SPGEMMHandle { return true; } - template - bool checkMatrixIdentitiesNumeric(const a_rowptrs_t &a_rowptrsIn, - const a_entries_t &a_entriesIn, - const b_rowptrs_t &b_rowptrsIn, - const b_entries_t &b_entriesIn) { + template + bool checkMatrixIdentitiesNumeric(const a_rowptrs_t &a_rowptrsIn, const a_entries_t &a_entriesIn, + const b_rowptrs_t &b_rowptrsIn, const b_entries_t &b_entriesIn) { #ifndef NDEBUG - if (a_graph_hash != (KokkosKernels::Impl::hashView(a_rowptrsIn) ^ - KokkosKernels::Impl::hashView(a_entriesIn))) + if (a_graph_hash != (KokkosKernels::Impl::hashView(a_rowptrsIn) ^ KokkosKernels::Impl::hashView(a_entriesIn))) return false; - if (b_graph_hash != (KokkosKernels::Impl::hashView(b_rowptrsIn) ^ - KokkosKernels::Impl::hashView(b_entriesIn))) + if (b_graph_hash != (KokkosKernels::Impl::hashView(b_rowptrsIn) ^ KokkosKernels::Impl::hashView(b_entriesIn))) return false; #else (void)a_rowptrsIn; diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_jacobi.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_jacobi.hpp index a33254dc24b8..e6aea1ce1873 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_jacobi.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_jacobi.hpp @@ -23,96 +23,75 @@ namespace KokkosSparse { namespace Experimental { -template -void spgemm_jacobi(KernelHandle *handle, - typename KernelHandle::const_nnz_lno_t m, - typename KernelHandle::const_nnz_lno_t n, - typename KernelHandle::const_nnz_lno_t k, - - alno_row_view_t_ row_mapA, alno_nnz_view_t_ entriesA, - ascalar_nnz_view_t_ valuesA, - - bool transposeA, blno_row_view_t_ row_mapB, - blno_nnz_view_t_ entriesB, bscalar_nnz_view_t_ valuesB, - - bool transposeB, clno_row_view_t_ row_mapC, - clno_nnz_view_t_ &entriesC, cscalar_nnz_view_t_ &valuesC, - - typename cscalar_nnz_view_t_::const_value_type omega, - dinv_view_t_ dinv) { +template +void spgemm_jacobi(KernelHandle *handle, typename KernelHandle::const_nnz_lno_t m, + typename KernelHandle::const_nnz_lno_t n, typename KernelHandle::const_nnz_lno_t k, + + alno_row_view_t_ row_mapA, alno_nnz_view_t_ entriesA, ascalar_nnz_view_t_ valuesA, + + bool transposeA, blno_row_view_t_ row_mapB, blno_nnz_view_t_ entriesB, bscalar_nnz_view_t_ valuesB, + + bool transposeB, clno_row_view_t_ row_mapC, clno_nnz_view_t_ &entriesC, cscalar_nnz_view_t_ &valuesC, + + typename cscalar_nnz_view_t_::const_value_type omega, dinv_view_t_ dinv) { static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_jacobi: Output matrix rowmap must be non-const."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_jacobi: Output matrix entriesView must be " "non-const."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_jacobi: Output matrix scalar view must be " "non-const."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_jacobi: Size type of left handside matrix should " "be same as kernelHandle sizetype."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_jacobi: Size type of right handside matrix should " "be same as kernelHandle sizetype."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_jacobi: Size type of output matrix should be same " "as kernelHandle sizetype."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_jacobi: lno type of left handside matrix should be " "same as kernelHandle lno_t."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_jacobi: lno type of right handside matrix should " "be same as kernelHandle lno_t."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_jacobi: lno type of output matrix should be same " "as kernelHandle lno_t."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_jacobi: scalar type of left handside matrix should " "be same as kernelHandle scalar."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_jacobi: scalar type of right handside matrix " "should be same as kernelHandle scalar."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_jacobi: scalar type of output matrix should be " "same as kernelHandle scalar."); @@ -131,79 +110,59 @@ void spgemm_jacobi(KernelHandle *handle, typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; typedef typename Kokkos::Device UniformDevice_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle const_handle_type; const_handle_type tmp_handle(*handle); typedef Kokkos::View::array_layout, - UniformDevice_t, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, Kokkos::MemoryTraits > Internal_alno_row_view_t_; typedef Kokkos::View::array_layout, - UniformDevice_t, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, Kokkos::MemoryTraits > Internal_alno_nnz_view_t_; typedef Kokkos::View::array_layout, - UniformDevice_t, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + UniformDevice_t, Kokkos::MemoryTraits > Internal_ascalar_nnz_view_t_; typedef Kokkos::View::array_layout, - UniformDevice_t, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, Kokkos::MemoryTraits > Internal_blno_row_view_t_; typedef Kokkos::View::array_layout, - UniformDevice_t, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, Kokkos::MemoryTraits > Internal_blno_nnz_view_t_; typedef Kokkos::View::array_layout, - UniformDevice_t, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + UniformDevice_t, Kokkos::MemoryTraits > Internal_bscalar_nnz_view_t_; typedef Kokkos::View::array_layout, - UniformDevice_t, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, Kokkos::MemoryTraits > Internal_clno_row_view_t_; typedef Kokkos::View::array_layout, - UniformDevice_t, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, Kokkos::MemoryTraits > Internal_clno_nnz_view_t_; typedef Kokkos::View::array_layout, - UniformDevice_t, - Kokkos::MemoryTraits > + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + UniformDevice_t, Kokkos::MemoryTraits > Internal_cscalar_nnz_view_t_; typedef Kokkos::View::array_layout, - UniformDevice_t, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, Kokkos::MemoryTraits > Internal_dinv_view_t_; @@ -218,18 +177,14 @@ void spgemm_jacobi(KernelHandle *handle, Internal_cscalar_nnz_view_t_ nonconst_c_s(valuesC.data(), valuesC.extent(0)); Internal_dinv_view_t_ const_d_s(dinv.data(), dinv.extent(0), dinv.extent(1)); - KokkosSparse::Impl::SPGEMM_JACOBI< - const_handle_type, Internal_alno_row_view_t_, Internal_alno_nnz_view_t_, - Internal_ascalar_nnz_view_t_, Internal_blno_row_view_t_, - Internal_blno_nnz_view_t_, Internal_bscalar_nnz_view_t_, - Internal_clno_row_view_t_, Internal_clno_nnz_view_t_, - Internal_cscalar_nnz_view_t_, - Internal_dinv_view_t_>::spgemm_jacobi(&tmp_handle, m, n, k, const_a_r, - const_a_l, const_a_s, transposeA, - const_b_r, const_b_l, const_b_s, - transposeB, nonconst_c_r, - nonconst_c_l, nonconst_c_s, omega, - const_d_s); + KokkosSparse::Impl::SPGEMM_JACOBI::spgemm_jacobi(&tmp_handle, m, n, k, const_a_r, const_a_l, + const_a_s, transposeA, const_b_r, const_b_l, + const_b_s, transposeB, nonconst_c_r, + nonconst_c_l, nonconst_c_s, omega, const_d_s); } } // namespace Experimental diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_numeric.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_numeric.hpp index e0930c04eea1..ff22f505a194 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_numeric.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_numeric.hpp @@ -30,87 +30,70 @@ namespace Experimental { // // NOTE: Block CRS format is not yet supported ! // -template -void spgemm_numeric(KernelHandle *handle, - typename KernelHandle::const_nnz_lno_t m, - typename KernelHandle::const_nnz_lno_t n, - typename KernelHandle::const_nnz_lno_t k, - alno_row_view_t_ row_mapA, alno_nnz_view_t_ entriesA, - ascalar_nnz_view_t_ valuesA, - - bool transposeA, blno_row_view_t_ row_mapB, - blno_nnz_view_t_ entriesB, bscalar_nnz_view_t_ valuesB, - bool transposeB, clno_row_view_t_ row_mapC, - clno_nnz_view_t_ &entriesC, cscalar_nnz_view_t_ &valuesC, +void spgemm_numeric(KernelHandle *handle, typename KernelHandle::const_nnz_lno_t m, + typename KernelHandle::const_nnz_lno_t n, typename KernelHandle::const_nnz_lno_t k, + alno_row_view_t_ row_mapA, alno_nnz_view_t_ entriesA, ascalar_nnz_view_t_ valuesA, + + bool transposeA, blno_row_view_t_ row_mapB, blno_nnz_view_t_ entriesB, bscalar_nnz_view_t_ valuesB, + bool transposeB, clno_row_view_t_ row_mapC, clno_nnz_view_t_ &entriesC, + cscalar_nnz_view_t_ &valuesC, typename KernelHandle::const_nnz_lno_t block_dim = 1) { static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_numeric: Output matrix entriesView must be " "non-const."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_numeric: Output matrix scalar view must be " "non-const."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_numeric: Size type of left handside matrix should " "be same as kernelHandle sizetype."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_numeric: Size type of right handside matrix should " "be same as kernelHandle sizetype."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_numeric: Size type of output matrix should be same " "as kernelHandle sizetype."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_numeric: lno type of left handside matrix should " "be same as kernelHandle lno_t."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_numeric: lno type of right handside matrix should " "be same as kernelHandle lno_t."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_numeric: lno type of output matrix should be same " "as kernelHandle lno_t."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_numeric: scalar type of left handside matrix " "should be same as kernelHandle scalar."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_numeric: scalar type of right handside matrix " "should be same as kernelHandle scalar."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_numeric: scalar type of output matrix should be " "same as kernelHandle scalar."); @@ -129,84 +112,72 @@ void spgemm_numeric(KernelHandle *handle, typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; typedef typename Kokkos::Device UniformDevice_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle const_handle_type; const_handle_type tmp_handle(*handle); typedef Kokkos::View::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, // typename // alno_row_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_row_view_t_; typedef Kokkos::View::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, // typename // alno_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_nnz_view_t_; - typedef Kokkos::View< - typename ascalar_nnz_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - ascalar_nnz_view_t_>::array_layout, - UniformDevice_t, // typename ascalar_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + UniformDevice_t, // typename ascalar_nnz_view_t_::device_type, + Kokkos::MemoryTraits > Internal_ascalar_nnz_view_t_; typedef Kokkos::View::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, // typename // blno_row_view_t_::device_type, Kokkos::MemoryTraits > Internal_blno_row_view_t_; typedef Kokkos::View::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, // typename // blno_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_blno_nnz_view_t_; - typedef Kokkos::View< - typename bscalar_nnz_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - bscalar_nnz_view_t_>::array_layout, - UniformDevice_t, // typename bscalar_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + UniformDevice_t, // typename bscalar_nnz_view_t_::device_type, + Kokkos::MemoryTraits > Internal_bscalar_nnz_view_t_; // static assert clno_row_view_t_ can be const type (row map is fixed after // symbolic phase). typedef Kokkos::View::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, // typename // clno_row_view_t_::device_type, Kokkos::MemoryTraits > Internal_clno_row_view_t_; typedef Kokkos::View::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, // typename // clno_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_clno_nnz_view_t_; - typedef Kokkos::View< - typename cscalar_nnz_view_t_::non_const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - cscalar_nnz_view_t_>::array_layout, - UniformDevice_t, // typename cscalar_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + UniformDevice_t, // typename cscalar_nnz_view_t_::device_type, + Kokkos::MemoryTraits > Internal_cscalar_nnz_view_t_; Internal_alno_row_view_t_ const_a_r(row_mapA.data(), row_mapA.extent(0)); @@ -221,18 +192,13 @@ void spgemm_numeric(KernelHandle *handle, if (block_dim > 1) { KokkosSparse::Impl::BSPGEMM_NUMERIC< - const_handle_type, Internal_alno_row_view_t_, Internal_alno_nnz_view_t_, - Internal_ascalar_nnz_view_t_, Internal_blno_row_view_t_, - Internal_blno_nnz_view_t_, Internal_bscalar_nnz_view_t_, - Internal_clno_row_view_t_, Internal_clno_nnz_view_t_, - Internal_cscalar_nnz_view_t_>::bspgemm_numeric(&tmp_handle, m, n, k, - block_dim, const_a_r, - const_a_l, const_a_s, - transposeA, const_b_r, - const_b_l, const_b_s, - transposeB, const_c_r, - nonconst_c_l, - nonconst_c_s); + const_handle_type, Internal_alno_row_view_t_, Internal_alno_nnz_view_t_, Internal_ascalar_nnz_view_t_, + Internal_blno_row_view_t_, Internal_blno_nnz_view_t_, Internal_bscalar_nnz_view_t_, Internal_clno_row_view_t_, + Internal_clno_nnz_view_t_, Internal_cscalar_nnz_view_t_>::bspgemm_numeric(&tmp_handle, m, n, k, block_dim, + const_a_r, const_a_l, const_a_s, + transposeA, const_b_r, const_b_l, + const_b_s, transposeB, const_c_r, + nonconst_c_l, nonconst_c_s); return; } @@ -244,8 +210,7 @@ void spgemm_numeric(KernelHandle *handle, "an SpGEMM handle associated with it."); } - if (!spgemmHandle->checkMatrixIdentitiesNumeric(const_a_r, const_a_l, - const_b_r, const_b_l)) { + if (!spgemmHandle->checkMatrixIdentitiesNumeric(const_a_r, const_a_l, const_b_r, const_b_l)) { throw std::invalid_argument( "KokkosSparse::spgemm_numeric: once used, an spgemm handle cannot be " "reused for a product with a different sparsity pattern.\n" @@ -260,30 +225,20 @@ void spgemm_numeric(KernelHandle *handle, // testing) KokkosSparse::Impl::SPGEMM_NUMERIC< const_handle_type, // KernelHandle, - Internal_alno_row_view_t_, Internal_alno_nnz_view_t_, - Internal_ascalar_nnz_view_t_, Internal_blno_row_view_t_, - Internal_blno_nnz_view_t_, Internal_bscalar_nnz_view_t_, - Internal_clno_row_view_t_, Internal_clno_nnz_view_t_, + Internal_alno_row_view_t_, Internal_alno_nnz_view_t_, Internal_ascalar_nnz_view_t_, Internal_blno_row_view_t_, + Internal_blno_nnz_view_t_, Internal_bscalar_nnz_view_t_, Internal_clno_row_view_t_, Internal_clno_nnz_view_t_, Internal_cscalar_nnz_view_t_, false>::spgemm_numeric(&tmp_handle, // handle, - m, n, k, const_a_r, const_a_l, const_a_s, - transposeA, const_b_r, const_b_l, const_b_s, - transposeB, const_c_r, nonconst_c_l, - nonconst_c_s); + m, n, k, const_a_r, const_a_l, const_a_s, transposeA, const_b_r, const_b_l, const_b_s, + transposeB, const_c_r, nonconst_c_l, nonconst_c_s); } else { KokkosSparse::Impl::SPGEMM_NUMERIC< const_handle_type, // KernelHandle, - Internal_alno_row_view_t_, Internal_alno_nnz_view_t_, - Internal_ascalar_nnz_view_t_, Internal_blno_row_view_t_, - Internal_blno_nnz_view_t_, Internal_bscalar_nnz_view_t_, - Internal_clno_row_view_t_, Internal_clno_nnz_view_t_, + Internal_alno_row_view_t_, Internal_alno_nnz_view_t_, Internal_ascalar_nnz_view_t_, Internal_blno_row_view_t_, + Internal_blno_nnz_view_t_, Internal_bscalar_nnz_view_t_, Internal_clno_row_view_t_, Internal_clno_nnz_view_t_, Internal_cscalar_nnz_view_t_>::spgemm_numeric(&tmp_handle, // handle, - m, n, k, const_a_r, - const_a_l, const_a_s, - transposeA, const_b_r, - const_b_l, const_b_s, - transposeB, const_c_r, - nonconst_c_l, + m, n, k, const_a_r, const_a_l, const_a_s, transposeA, const_b_r, + const_b_l, const_b_s, transposeB, const_c_r, nonconst_c_l, nonconst_c_s); } } diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_symbolic.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_symbolic.hpp index 2bde5f6e2065..049bb8202981 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_symbolic.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spgemm_symbolic.hpp @@ -24,49 +24,39 @@ namespace KokkosSparse { namespace Experimental { -template -void spgemm_symbolic(KernelHandle *handle, - typename KernelHandle::const_nnz_lno_t m, - typename KernelHandle::const_nnz_lno_t n, - typename KernelHandle::const_nnz_lno_t k, - alno_row_view_t_ row_mapA, alno_nnz_view_t_ entriesA, - bool transposeA, blno_row_view_t_ row_mapB, - blno_nnz_view_t_ entriesB, bool transposeB, - clno_row_view_t_ row_mapC, bool computeRowptrs = false) { +void spgemm_symbolic(KernelHandle *handle, typename KernelHandle::const_nnz_lno_t m, + typename KernelHandle::const_nnz_lno_t n, typename KernelHandle::const_nnz_lno_t k, + alno_row_view_t_ row_mapA, alno_nnz_view_t_ entriesA, bool transposeA, blno_row_view_t_ row_mapB, + blno_nnz_view_t_ entriesB, bool transposeB, clno_row_view_t_ row_mapC, + bool computeRowptrs = false) { static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_symbolic: Output matrix rowmap must be non-const."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_symbolic: Size type of left handside matrix should " "be same as kernelHandle sizetype."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_symbolic: Size type of right handside matrix " "should be same as kernelHandle sizetype."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_symbolic: Size type of output matrix should be " "same as kernelHandle sizetype."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_symbolic: lno type of left handside matrix should " "be same as kernelHandle lno_t."); static_assert( - std::is_same::value, + std::is_same::value, "KokkosSparse::spgemm_symbolic: lno type of right handside matrix should " "be same as kernelHandle lno_t."); @@ -85,38 +75,34 @@ void spgemm_symbolic(KernelHandle *handle, typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; typedef typename Kokkos::Device UniformDevice_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle const_handle_type; const_handle_type tmp_handle(*handle); typedef Kokkos::View::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, // typename // alno_row_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_row_view_t_; typedef Kokkos::View::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, // typename // alno_nnz_view_t_::device_type, Kokkos::MemoryTraits > Internal_alno_nnz_view_t_; typedef Kokkos::View::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, // typename // blno_row_view_t_::device_type, Kokkos::MemoryTraits > Internal_blno_row_view_t_; typedef Kokkos::View::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, // typename // blno_nnz_view_t_::device_type, Kokkos::MemoryTraits > @@ -124,8 +110,7 @@ void spgemm_symbolic(KernelHandle *handle, // static assert clno_row_view_t_ cannot be const type. typedef Kokkos::View::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, UniformDevice_t, // typename // clno_row_view_t_::device_type, Kokkos::MemoryTraits > @@ -147,10 +132,8 @@ void spgemm_symbolic(KernelHandle *handle, // it will (currently) not be calling a TPL path. #ifndef NDEBUG if constexpr (KokkosSparse::Impl::spgemm_symbolic_tpl_spec_avail< - const_handle_type, Internal_alno_row_view_t_, - Internal_alno_nnz_view_t_, Internal_blno_row_view_t_, - Internal_blno_nnz_view_t_, - Internal_clno_row_view_t_>::value) { + const_handle_type, Internal_alno_row_view_t_, Internal_alno_nnz_view_t_, Internal_blno_row_view_t_, + Internal_blno_nnz_view_t_, Internal_clno_row_view_t_>::value) { if (!KokkosSparse::Impl::isCrsGraphSorted(const_a_r, const_a_l)) throw std::runtime_error( "KokkosSparse::spgemm_symbolic: entries of A are not sorted within " @@ -170,8 +153,7 @@ void spgemm_symbolic(KernelHandle *handle, "an SpGEMM handle associated with it."); } - if (!spgemmHandle->checkMatrixIdentitiesSymbolic(const_a_r, const_a_l, - const_b_r, const_b_l)) { + if (!spgemmHandle->checkMatrixIdentitiesSymbolic(const_a_r, const_a_l, const_b_r, const_b_l)) { throw std::invalid_argument( "KokkosSparse::spgemm_symbolic: once used, an spgemm handle cannot be " "reused for a product with a different sparsity pattern.\n" @@ -184,26 +166,20 @@ void spgemm_symbolic(KernelHandle *handle, if (algo == SPGEMM_DEBUG || algo == SPGEMM_SERIAL) { // Never call a TPL if serial/debug is requested (this is needed for // testing) - KokkosSparse::Impl::SPGEMM_SYMBOLIC< - const_handle_type, // KernelHandle, - Internal_alno_row_view_t_, Internal_alno_nnz_view_t_, - Internal_blno_row_view_t_, Internal_blno_nnz_view_t_, - Internal_clno_row_view_t_, - false>::spgemm_symbolic(&tmp_handle, // handle, - m, n, k, const_a_r, const_a_l, transposeA, - const_b_r, const_b_l, transposeB, c_r, - computeRowptrs); + KokkosSparse::Impl::SPGEMM_SYMBOLIC::spgemm_symbolic(&tmp_handle, // handle, + m, n, k, const_a_r, const_a_l, transposeA, const_b_r, + const_b_l, transposeB, c_r, computeRowptrs); } else { - KokkosSparse::Impl::SPGEMM_SYMBOLIC< - const_handle_type, // KernelHandle, - Internal_alno_row_view_t_, Internal_alno_nnz_view_t_, - Internal_blno_row_view_t_, Internal_blno_nnz_view_t_, - Internal_clno_row_view_t_>::spgemm_symbolic(&tmp_handle, // handle, - m, n, k, const_a_r, - const_a_l, transposeA, - const_b_r, const_b_l, - transposeB, c_r, - computeRowptrs); + KokkosSparse::Impl::SPGEMM_SYMBOLIC::spgemm_symbolic(&tmp_handle, // handle, + m, n, k, const_a_r, const_a_l, + transposeA, const_b_r, const_b_l, + transposeB, c_r, computeRowptrs); } } diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spiluk.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spiluk.hpp index b3644a8709f5..fb0326def560 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spiluk.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spiluk.hpp @@ -26,7 +26,7 @@ #include -//#include "KokkosSparse_spiluk_handle.hpp" +// #include "KokkosSparse_spiluk_handle.hpp" #include "KokkosKernels_helpers.hpp" #include "KokkosKernels_Error.hpp" #include "KokkosSparse_spiluk_symbolic_spec.hpp" @@ -35,139 +35,99 @@ namespace KokkosSparse { namespace Experimental { -#define KOKKOSKERNELS_SPILUK_SAME_TYPE(A, B) \ - std::is_same::type, \ - typename std::remove_const::type>::value - -template -void spiluk_symbolic(KernelHandle* handle, - typename KernelHandle::const_nnz_lno_t fill_lev, - ARowMapType& A_rowmap, AEntriesType& A_entries, - LRowMapType& L_rowmap, LEntriesType& L_entries, - URowMapType& U_rowmap, UEntriesType& U_entries, - int nstreams = 1) { +#define KOKKOSKERNELS_SPILUK_SAME_TYPE(A, B) \ + std::is_same::type, typename std::remove_const::type>::value + +template +void spiluk_symbolic(KernelHandle* handle, typename KernelHandle::const_nnz_lno_t fill_lev, ARowMapType& A_rowmap, + AEntriesType& A_entries, LRowMapType& L_rowmap, LEntriesType& L_entries, URowMapType& U_rowmap, + UEntriesType& U_entries, int nstreams = 1) { typedef typename KernelHandle::size_type size_type; typedef typename KernelHandle::nnz_lno_t ordinal_type; - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename ARowMapType::non_const_value_type, size_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename ARowMapType::non_const_value_type, size_type), "spiluk_symbolic: A size_type must match KernelHandle " "size_type (const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename AEntriesType::non_const_value_type, ordinal_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename AEntriesType::non_const_value_type, ordinal_type), "spiluk_symbolic: A entry type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename LRowMapType::non_const_value_type, size_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename LRowMapType::non_const_value_type, size_type), "spiluk_symbolic: L size_type must match KernelHandle " "size_type (const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename LEntriesType::non_const_value_type, ordinal_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename LEntriesType::non_const_value_type, ordinal_type), "spiluk_symbolic: L entry type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename URowMapType::non_const_value_type, size_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename URowMapType::non_const_value_type, size_type), "spiluk_symbolic: U size_type must match KernelHandle " "size_type (const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename UEntriesType::non_const_value_type, ordinal_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename UEntriesType::non_const_value_type, ordinal_type), "spiluk_symbolic: U entry type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(Kokkos::is_view::value, - "spiluk_symbolic: A_rowmap is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_symbolic: A_entries is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_symbolic: L_rowmap is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_symbolic: L_entries is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_symbolic: U_rowmap is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_symbolic: U_entries is not a Kokkos::View."); - - static_assert( - (int)LRowMapType::rank == (int)ARowMapType::rank, - "spiluk_symbolic: The ranks of L_rowmap and A_rowmap do not match."); - static_assert( - (int)LEntriesType::rank == (int)AEntriesType::rank, - "spiluk_symbolic: The ranks of L_entries and A_entries do not match."); - - static_assert( - (int)LRowMapType::rank == (int)URowMapType::rank, - "spiluk_symbolic: The ranks of L_rowmap and U_rowmap do not match."); - static_assert( - (int)LEntriesType::rank == (int)UEntriesType::rank, - "spiluk_symbolic: The ranks of L_entries and U_entries do not match."); - - static_assert( - LRowMapType::rank == 1, - "spiluk_symbolic: A_rowmap, L_rowmap and U_rowmap must all have rank 1."); + static_assert(Kokkos::is_view::value, "spiluk_symbolic: A_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_symbolic: A_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_symbolic: L_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_symbolic: L_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_symbolic: U_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_symbolic: U_entries is not a Kokkos::View."); + + static_assert((int)LRowMapType::rank == (int)ARowMapType::rank, + "spiluk_symbolic: The ranks of L_rowmap and A_rowmap do not match."); + static_assert((int)LEntriesType::rank == (int)AEntriesType::rank, + "spiluk_symbolic: The ranks of L_entries and A_entries do not match."); + + static_assert((int)LRowMapType::rank == (int)URowMapType::rank, + "spiluk_symbolic: The ranks of L_rowmap and U_rowmap do not match."); + static_assert((int)LEntriesType::rank == (int)UEntriesType::rank, + "spiluk_symbolic: The ranks of L_entries and U_entries do not match."); + + static_assert(LRowMapType::rank == 1, "spiluk_symbolic: A_rowmap, L_rowmap and U_rowmap must all have rank 1."); static_assert(LEntriesType::rank == 1, "spiluk_symbolic: A_entries, L_entries and U_entries must all " "have rank 1."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "spiluk_symbolic: The output L_rowmap must be nonconst."); - static_assert( - std::is_same::value, - "spiluk_symbolic: The output L_entries must be nonconst."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, + "spiluk_symbolic: The output L_entries must be nonconst."); + static_assert(std::is_same::value, "spiluk_symbolic: The output U_rowmap must be nonconst."); - static_assert( - std::is_same::value, - "spiluk_symbolic: The output U_entries must be nonconst."); + static_assert(std::is_same::value, + "spiluk_symbolic: The output U_entries must be nonconst."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "spiluk_symbolic: Views LRowMapType and ARowMapType have " "different device_types."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "spiluk_symbolic: Views LEntriesType and AEntriesType have " "different device_types."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "spiluk_symbolic: Views LRowMapType and URowMapType have " "different device_types."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "spiluk_symbolic: Views LEntriesType and UEntriesType have " "different device_types."); - static_assert( - std::is_same< - typename LRowMapType::device_type::execution_space, - typename KernelHandle::SPILUKHandleType::execution_space>::value, - "spiluk_symbolic: KernelHandle and Views have different execution " - "spaces."); - static_assert( - std::is_same< - typename LEntriesType::device_type::execution_space, - typename KernelHandle::SPILUKHandleType::execution_space>::value, - "spiluk_symbolic: KernelHandle and Views have different execution " - "spaces."); - - static_assert( - std::is_same::value, - "spiluk_symbolic: rowmap and entries have different device types."); + static_assert(std::is_same::value, + "spiluk_symbolic: KernelHandle and Views have different execution " + "spaces."); + static_assert(std::is_same::value, + "spiluk_symbolic: KernelHandle and Views have different execution " + "spaces."); + + static_assert(std::is_same::value, + "spiluk_symbolic: rowmap and entries have different device types."); // Check validity of fill level if (fill_lev < 0) { std::ostringstream os; - os << "KokkosSparse::Experimental::spiluk_symbolic: fill_lev: " << fill_lev - << ". Valid value is >= 0."; + os << "KokkosSparse::Experimental::spiluk_symbolic: fill_lev: " << fill_lev << ". Valid value is >= 0."; KokkosKernels::Impl::throw_runtime_exception(os.str()); } @@ -179,54 +139,45 @@ void spiluk_symbolic(KernelHandle* handle, typedef typename KernelHandle::HandleTempMemorySpace c_temp_t; typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle const_handle_type; const_handle_type tmp_handle(*handle); - typedef Kokkos::View< - typename ARowMapType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename ARowMapType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename ARowMapType::device_type, + Kokkos::MemoryTraits > ARowMap_Internal; - typedef Kokkos::View< - typename AEntriesType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout< - AEntriesType>::array_layout, - typename AEntriesType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename AEntriesType::device_type, + Kokkos::MemoryTraits > AEntries_Internal; - typedef Kokkos::View< - typename LRowMapType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename LRowMapType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename LRowMapType::device_type, + Kokkos::MemoryTraits > LRowMap_Internal; - typedef Kokkos::View< - typename LEntriesType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout< - LEntriesType>::array_layout, - typename LEntriesType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename LEntriesType::device_type, + Kokkos::MemoryTraits > LEntries_Internal; - typedef Kokkos::View< - typename URowMapType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename URowMapType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename URowMapType::device_type, + Kokkos::MemoryTraits > URowMap_Internal; - typedef Kokkos::View< - typename UEntriesType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout< - UEntriesType>::array_layout, - typename UEntriesType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename UEntriesType::device_type, + Kokkos::MemoryTraits > UEntries_Internal; ARowMap_Internal A_rowmap_i = A_rowmap; @@ -236,192 +187,135 @@ void spiluk_symbolic(KernelHandle* handle, URowMap_Internal U_rowmap_i = U_rowmap; UEntries_Internal U_entries_i = U_entries; - KokkosSparse::Impl::SPILUK_SYMBOLIC< - const_handle_type, ARowMap_Internal, AEntries_Internal, LRowMap_Internal, - LEntries_Internal, URowMap_Internal, - UEntries_Internal>::spiluk_symbolic(&tmp_handle, fill_lev, A_rowmap_i, - A_entries_i, L_rowmap_i, L_entries_i, - U_rowmap_i, U_entries_i, nstreams); + KokkosSparse::Impl::SPILUK_SYMBOLIC::spiluk_symbolic(&tmp_handle, fill_lev, A_rowmap_i, + A_entries_i, L_rowmap_i, L_entries_i, + U_rowmap_i, U_entries_i, nstreams); } // spiluk_symbolic -template -void spiluk_numeric(KernelHandle* handle, - typename KernelHandle::const_nnz_lno_t fill_lev, - ARowMapType& A_rowmap, AEntriesType& A_entries, - AValuesType& A_values, LRowMapType& L_rowmap, - LEntriesType& L_entries, LValuesType& L_values, - URowMapType& U_rowmap, UEntriesType& U_entries, - UValuesType& U_values) { +template +void spiluk_numeric(KernelHandle* handle, typename KernelHandle::const_nnz_lno_t fill_lev, ARowMapType& A_rowmap, + AEntriesType& A_entries, AValuesType& A_values, LRowMapType& L_rowmap, LEntriesType& L_entries, + LValuesType& L_values, URowMapType& U_rowmap, UEntriesType& U_entries, UValuesType& U_values) { typedef typename KernelHandle::size_type size_type; typedef typename KernelHandle::nnz_lno_t ordinal_type; typedef typename KernelHandle::nnz_scalar_t scalar_type; - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename ARowMapType::non_const_value_type, size_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename ARowMapType::non_const_value_type, size_type), "spiluk_numeric: A size_type must match KernelHandle size_type " "(const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename AEntriesType::non_const_value_type, ordinal_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename AEntriesType::non_const_value_type, ordinal_type), "spiluk_numeric: A entry type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename AValuesType::value_type, - scalar_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename AValuesType::value_type, scalar_type), "spiluk_numeric: A scalar type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename LRowMapType::non_const_value_type, size_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename LRowMapType::non_const_value_type, size_type), "spiluk_numeric: L size_type must match KernelHandle size_type " "(const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename LEntriesType::non_const_value_type, ordinal_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename LEntriesType::non_const_value_type, ordinal_type), "spiluk_numeric: L entry type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename LValuesType::value_type, - scalar_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename LValuesType::value_type, scalar_type), "spiluk_numeric: L scalar type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename URowMapType::non_const_value_type, size_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename URowMapType::non_const_value_type, size_type), "spiluk_numeric: U size_type must match KernelHandle size_type " "(const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename UEntriesType::non_const_value_type, ordinal_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename UEntriesType::non_const_value_type, ordinal_type), "spiluk_numeric: U entry type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename UValuesType::value_type, - scalar_type), + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename UValuesType::value_type, scalar_type), "spiluk_numeric: U scalar type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(Kokkos::is_view::value, - "spiluk_numeric: A_rowmap is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric: A_entries is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric: A_values is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric: L_rowmap is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric: L_entries is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric: L_values is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric: U_rowmap is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric: U_entries is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric: U_values is not a Kokkos::View."); - - static_assert( - (int)LRowMapType::rank == (int)ARowMapType::rank, - "spiluk_numeric: The ranks of L_rowmap and A_rowmap do not match."); - static_assert( - (int)LEntriesType::rank == (int)AEntriesType::rank, - "spiluk_numeric: The ranks of L_entries and A_entries do not match."); - static_assert( - (int)LValuesType::rank == (int)AValuesType::rank, - "spiluk_numeric: The ranks of L_values and A_values do not match."); - - static_assert( - (int)LRowMapType::rank == (int)URowMapType::rank, - "spiluk_numeric: The ranks of L_rowmap and U_rowmap do not match."); - static_assert( - (int)LEntriesType::rank == (int)UEntriesType::rank, - "spiluk_numeric: The ranks of L_entries and U_entries do not match."); - static_assert( - (int)LValuesType::rank == (int)UValuesType::rank, - "spiluk_numeric: The ranks of L_values and U_values do not match."); - - static_assert( - LRowMapType::rank == 1, - "spiluk_numeric: A_rowmap, L_rowmap and U_rowmap must all have rank 1."); + static_assert(Kokkos::is_view::value, "spiluk_numeric: A_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric: A_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric: A_values is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric: L_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric: L_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric: L_values is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric: U_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric: U_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric: U_values is not a Kokkos::View."); + + static_assert((int)LRowMapType::rank == (int)ARowMapType::rank, + "spiluk_numeric: The ranks of L_rowmap and A_rowmap do not match."); + static_assert((int)LEntriesType::rank == (int)AEntriesType::rank, + "spiluk_numeric: The ranks of L_entries and A_entries do not match."); + static_assert((int)LValuesType::rank == (int)AValuesType::rank, + "spiluk_numeric: The ranks of L_values and A_values do not match."); + + static_assert((int)LRowMapType::rank == (int)URowMapType::rank, + "spiluk_numeric: The ranks of L_rowmap and U_rowmap do not match."); + static_assert((int)LEntriesType::rank == (int)UEntriesType::rank, + "spiluk_numeric: The ranks of L_entries and U_entries do not match."); + static_assert((int)LValuesType::rank == (int)UValuesType::rank, + "spiluk_numeric: The ranks of L_values and U_values do not match."); + + static_assert(LRowMapType::rank == 1, "spiluk_numeric: A_rowmap, L_rowmap and U_rowmap must all have rank 1."); static_assert(LEntriesType::rank == 1, "spiluk_numeric: A_entries, L_entries and U_entries must all " "have rank 1."); - static_assert( - LValuesType::rank == 1, - "spiluk_numeric: A_values, L_values and U_values must all have rank 1."); - - static_assert( - std::is_same::value, - "spiluk_numeric: The output L_entries must be nonconst."); - static_assert(std::is_same::value, + static_assert(LValuesType::rank == 1, "spiluk_numeric: A_values, L_values and U_values must all have rank 1."); + + static_assert(std::is_same::value, + "spiluk_numeric: The output L_entries must be nonconst."); + static_assert(std::is_same::value, "spiluk_numeric: The output L_values must be nonconst."); - static_assert( - std::is_same::value, - "spiluk_numeric: The output U_entries must be nonconst."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, + "spiluk_numeric: The output U_entries must be nonconst."); + static_assert(std::is_same::value, "spiluk_numeric: The output U_values must be nonconst."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "spiluk_numeric: Views LRowMapType and ARowMapType have " "different device_types."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "spiluk_numeric: Views LEntriesType and AEntriesType have " "different device_types."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "spiluk_numeric: Views LValuesType and AValuesType have " "different device_types."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "spiluk_numeric: Views LRowMapType and URowMapType have " "different device_types."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "spiluk_numeric: Views LEntriesType and UEntriesType have " "different device_types."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "spiluk_numeric: Views LValuesType and UValuesType have " "different device_types."); - static_assert( - std::is_same< - typename LRowMapType::device_type::execution_space, - typename KernelHandle::SPILUKHandleType::execution_space>::value, - "spiluk_numeric: KernelHandle and Views have different execution " - "spaces."); - static_assert( - std::is_same< - typename LEntriesType::device_type::execution_space, - typename KernelHandle::SPILUKHandleType::execution_space>::value, - "spiluk_numeric: KernelHandle and Views have different execution " - "spaces."); - static_assert( - std::is_same< - typename LValuesType::device_type::execution_space, - typename KernelHandle::SPILUKHandleType::execution_space>::value, - "spiluk_numeric: KernelHandle and Views have different execution " - "spaces."); - - static_assert( - std::is_same::value, - "spiluk_numeric: rowmap and entries have different device types."); - static_assert( - std::is_same::value, - "spiluk_numeric: rowmap and values have different device types."); + static_assert(std::is_same::value, + "spiluk_numeric: KernelHandle and Views have different execution " + "spaces."); + static_assert(std::is_same::value, + "spiluk_numeric: KernelHandle and Views have different execution " + "spaces."); + static_assert(std::is_same::value, + "spiluk_numeric: KernelHandle and Views have different execution " + "spaces."); + + static_assert(std::is_same::value, + "spiluk_numeric: rowmap and entries have different device types."); + static_assert(std::is_same::value, + "spiluk_numeric: rowmap and values have different device types."); // Check validity of fill level if (fill_lev < 0) { std::ostringstream os; - os << "KokkosSparse::Experimental::spiluk_numeric: fill_lev: " << fill_lev - << ". Valid value is >= 0."; + os << "KokkosSparse::Experimental::spiluk_numeric: fill_lev: " << fill_lev << ". Valid value is >= 0."; KokkosKernels::Impl::throw_runtime_exception(os.str()); } @@ -441,75 +335,63 @@ void spiluk_numeric(KernelHandle* handle, typedef typename KernelHandle::HandleTempMemorySpace c_temp_t; typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle const_handle_type; const_handle_type tmp_handle(*handle); - typedef Kokkos::View< - typename ARowMapType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename ARowMapType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename ARowMapType::device_type, + Kokkos::MemoryTraits > ARowMap_Internal; - typedef Kokkos::View< - typename AEntriesType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout< - AEntriesType>::array_layout, - typename AEntriesType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename AEntriesType::device_type, + Kokkos::MemoryTraits > AEntries_Internal; - typedef Kokkos::View< - typename AValuesType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename AValuesType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename AValuesType::device_type, + Kokkos::MemoryTraits > AValues_Internal; - typedef Kokkos::View< - typename LRowMapType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename LRowMapType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename LRowMapType::device_type, + Kokkos::MemoryTraits > LRowMap_Internal; - typedef Kokkos::View< - typename LEntriesType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout< - LEntriesType>::array_layout, - typename LEntriesType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename LEntriesType::device_type, + Kokkos::MemoryTraits > LEntries_Internal; - typedef Kokkos::View< - typename LValuesType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename LValuesType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename LValuesType::device_type, + Kokkos::MemoryTraits > LValues_Internal; - typedef Kokkos::View< - typename URowMapType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename URowMapType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename URowMapType::device_type, + Kokkos::MemoryTraits > URowMap_Internal; - typedef Kokkos::View< - typename UEntriesType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout< - UEntriesType>::array_layout, - typename UEntriesType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename UEntriesType::device_type, + Kokkos::MemoryTraits > UEntries_Internal; - typedef Kokkos::View< - typename UValuesType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename UValuesType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename UValuesType::device_type, + Kokkos::MemoryTraits > UValues_Internal; ARowMap_Internal A_rowmap_i = A_rowmap; @@ -522,150 +404,95 @@ void spiluk_numeric(KernelHandle* handle, UEntries_Internal U_entries_i = U_entries; UValues_Internal U_values_i = U_values; - KokkosSparse::Impl::SPILUK_NUMERIC< - typename AValuesType::execution_space, const_handle_type, - ARowMap_Internal, AEntries_Internal, AValues_Internal, LRowMap_Internal, - LEntries_Internal, LValues_Internal, URowMap_Internal, UEntries_Internal, - UValues_Internal>::spiluk_numeric(&tmp_handle, fill_lev, A_rowmap_i, - A_entries_i, A_values_i, L_rowmap_i, - L_entries_i, L_values_i, U_rowmap_i, - U_entries_i, U_values_i); + KokkosSparse::Impl::SPILUK_NUMERIC::spiluk_numeric(&tmp_handle, fill_lev, A_rowmap_i, A_entries_i, + A_values_i, L_rowmap_i, L_entries_i, L_values_i, + U_rowmap_i, U_entries_i, U_values_i); } // spiluk_numeric -template -void spiluk_numeric_streams(const std::vector& execspace_v, - const std::vector& handle_v, - typename KernelHandle::const_nnz_lno_t fill_lev, - const std::vector& A_rowmap_v, - const std::vector& A_entries_v, - const std::vector& A_values_v, - const std::vector& L_rowmap_v, - const std::vector& L_entries_v, - std::vector& L_values_v, - const std::vector& U_rowmap_v, - const std::vector& U_entries_v, - std::vector& U_values_v) { +void spiluk_numeric_streams(const std::vector& execspace_v, const std::vector& handle_v, + typename KernelHandle::const_nnz_lno_t fill_lev, const std::vector& A_rowmap_v, + const std::vector& A_entries_v, const std::vector& A_values_v, + const std::vector& L_rowmap_v, const std::vector& L_entries_v, + std::vector& L_values_v, const std::vector& U_rowmap_v, + const std::vector& U_entries_v, std::vector& U_values_v) { using size_type = typename KernelHandle::size_type; using ordinal_type = typename KernelHandle::nnz_lno_t; using scalar_type = typename KernelHandle::nnz_scalar_t; - static_assert(Kokkos::is_execution_space::value, - "ExecutionSpace is not valid"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename ARowMapType::memory_space>::accessible, - "spiluk_numeric_streams: ExecutionSpace cannot access data in " - "ARowMapType"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename AEntriesType::memory_space>::accessible, - "spiluk_numeric_streams: ExecutionSpace cannot access data in " - "AEntriesType"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename AValuesType::memory_space>::accessible, - "spiluk_numeric_streams: ExecutionSpace cannot access data in " - "AValuesType"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename LRowMapType::memory_space>::accessible, - "spiluk_numeric_streams: ExecutionSpace cannot access data in " - "LRowMapType"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename LEntriesType::memory_space>::accessible, - "spiluk_numeric_streams: ExecutionSpace cannot access data in " - "LEntriesType"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename LValuesType::memory_space>::accessible, - "spiluk_numeric_streams: ExecutionSpace cannot access data in " - "LValuesType"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename URowMapType::memory_space>::accessible, - "spiluk_numeric_streams: ExecutionSpace cannot access data in " - "URowMapType"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename UEntriesType::memory_space>::accessible, - "spiluk_numeric_streams: ExecutionSpace cannot access data in " - "UEntriesType"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename UValuesType::memory_space>::accessible, - "spiluk_numeric_streams: ExecutionSpace cannot access data in " - "UValuesType"); - - static_assert( - KOKKOSKERNELS_SPILUK_SAME_TYPE(typename ARowMapType::non_const_value_type, - size_type), - "spiluk_numeric_streams: A size_type must match KernelHandle size_type " - "(const doesn't matter)"); - static_assert( - KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename AEntriesType::non_const_value_type, ordinal_type), - "spiluk_numeric_streams: A entry type must match KernelHandle entry " - "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert( - KOKKOSKERNELS_SPILUK_SAME_TYPE(typename AValuesType::value_type, - scalar_type), - "spiluk_numeric_streams: A scalar type must match KernelHandle entry " - "type (aka nnz_lno_t, and const doesn't matter)"); - - static_assert( - KOKKOSKERNELS_SPILUK_SAME_TYPE(typename LRowMapType::non_const_value_type, - size_type), - "spiluk_numeric_streams: L size_type must match KernelHandle size_type " - "(const doesn't matter)"); - static_assert( - KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename LEntriesType::non_const_value_type, ordinal_type), - "spiluk_numeric_streams: L entry type must match KernelHandle entry " - "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert( - KOKKOSKERNELS_SPILUK_SAME_TYPE(typename LValuesType::value_type, - scalar_type), - "spiluk_numeric_streams: L scalar type must match KernelHandle entry " - "type (aka nnz_lno_t, and const doesn't matter)"); - - static_assert( - KOKKOSKERNELS_SPILUK_SAME_TYPE(typename URowMapType::non_const_value_type, - size_type), - "spiluk_numeric_streams: U size_type must match KernelHandle size_type " - "(const doesn't matter)"); - static_assert( - KOKKOSKERNELS_SPILUK_SAME_TYPE( - typename UEntriesType::non_const_value_type, ordinal_type), - "spiluk_numeric_streams: U entry type must match KernelHandle entry " - "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert( - KOKKOSKERNELS_SPILUK_SAME_TYPE(typename UValuesType::value_type, - scalar_type), - "spiluk_numeric_streams: U scalar type must match KernelHandle entry " - "type (aka nnz_lno_t, and const doesn't matter)"); - - static_assert(Kokkos::is_view::value, - "spiluk_numeric_streams: A_rowmap is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric_streams: A_entries is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric_streams: A_values is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric_streams: L_rowmap is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric_streams: L_entries is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric_streams: L_values is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric_streams: U_rowmap is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric_streams: U_entries is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "spiluk_numeric_streams: U_values is not a Kokkos::View."); + static_assert(Kokkos::is_execution_space::value, "ExecutionSpace is not valid"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "spiluk_numeric_streams: ExecutionSpace cannot access data in " + "ARowMapType"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "spiluk_numeric_streams: ExecutionSpace cannot access data in " + "AEntriesType"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "spiluk_numeric_streams: ExecutionSpace cannot access data in " + "AValuesType"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "spiluk_numeric_streams: ExecutionSpace cannot access data in " + "LRowMapType"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "spiluk_numeric_streams: ExecutionSpace cannot access data in " + "LEntriesType"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "spiluk_numeric_streams: ExecutionSpace cannot access data in " + "LValuesType"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "spiluk_numeric_streams: ExecutionSpace cannot access data in " + "URowMapType"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "spiluk_numeric_streams: ExecutionSpace cannot access data in " + "UEntriesType"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "spiluk_numeric_streams: ExecutionSpace cannot access data in " + "UValuesType"); + + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename ARowMapType::non_const_value_type, size_type), + "spiluk_numeric_streams: A size_type must match KernelHandle size_type " + "(const doesn't matter)"); + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename AEntriesType::non_const_value_type, ordinal_type), + "spiluk_numeric_streams: A entry type must match KernelHandle entry " + "type (aka nnz_lno_t, and const doesn't matter)"); + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename AValuesType::value_type, scalar_type), + "spiluk_numeric_streams: A scalar type must match KernelHandle entry " + "type (aka nnz_lno_t, and const doesn't matter)"); + + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename LRowMapType::non_const_value_type, size_type), + "spiluk_numeric_streams: L size_type must match KernelHandle size_type " + "(const doesn't matter)"); + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename LEntriesType::non_const_value_type, ordinal_type), + "spiluk_numeric_streams: L entry type must match KernelHandle entry " + "type (aka nnz_lno_t, and const doesn't matter)"); + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename LValuesType::value_type, scalar_type), + "spiluk_numeric_streams: L scalar type must match KernelHandle entry " + "type (aka nnz_lno_t, and const doesn't matter)"); + + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename URowMapType::non_const_value_type, size_type), + "spiluk_numeric_streams: U size_type must match KernelHandle size_type " + "(const doesn't matter)"); + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename UEntriesType::non_const_value_type, ordinal_type), + "spiluk_numeric_streams: U entry type must match KernelHandle entry " + "type (aka nnz_lno_t, and const doesn't matter)"); + static_assert(KOKKOSKERNELS_SPILUK_SAME_TYPE(typename UValuesType::value_type, scalar_type), + "spiluk_numeric_streams: U scalar type must match KernelHandle entry " + "type (aka nnz_lno_t, and const doesn't matter)"); + + static_assert(Kokkos::is_view::value, "spiluk_numeric_streams: A_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric_streams: A_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric_streams: A_values is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric_streams: L_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric_streams: L_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric_streams: L_values is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric_streams: U_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric_streams: U_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "spiluk_numeric_streams: U_values is not a Kokkos::View."); static_assert((int)LRowMapType::rank == (int)ARowMapType::rank, "spiluk_numeric_streams: The ranks of L_rowmap and A_rowmap do " @@ -690,104 +517,70 @@ void spiluk_numeric_streams(const std::vector& execspace_v, static_assert(LRowMapType::rank == 1, "spiluk_numeric_streams: A_rowmap, L_rowmap and U_rowmap must " "all have rank 1."); - static_assert( - LEntriesType::rank == 1, - "spiluk_numeric_streams: A_entries, L_entries and U_entries must all " - "have rank 1."); + static_assert(LEntriesType::rank == 1, + "spiluk_numeric_streams: A_entries, L_entries and U_entries must all " + "have rank 1."); static_assert(LValuesType::rank == 1, "spiluk_numeric_streams: A_values, L_values and U_values must " "all have rank 1."); - static_assert( - std::is_same::value, - "spiluk_numeric_streams: The output L_entries must be nonconst."); - static_assert( - std::is_same::value, - "spiluk_numeric_streams: The output L_values must be nonconst."); - static_assert( - std::is_same::value, - "spiluk_numeric_streams: The output U_entries must be nonconst."); - static_assert( - std::is_same::value, - "spiluk_numeric_streams: The output U_values must be nonconst."); - - static_assert( - std::is_same::value, - "spiluk_numeric_streams: Views LRowMapType and ARowMapType have " - "different device_types."); - static_assert( - std::is_same::value, - "spiluk_numeric_streams: Views LEntriesType and AEntriesType have " - "different device_types."); - static_assert( - std::is_same::value, - "spiluk_numeric_streams: Views LValuesType and AValuesType have " - "different device_types."); - - static_assert( - std::is_same::value, - "spiluk_numeric_streams: Views LRowMapType and URowMapType have " - "different device_types."); - static_assert( - std::is_same::value, - "spiluk_numeric_streams: Views LEntriesType and UEntriesType have " - "different device_types."); - static_assert( - std::is_same::value, - "spiluk_numeric_streams: Views LValuesType and UValuesType have " - "different device_types."); - - static_assert( - std::is_same< - ExecutionSpace, - typename KernelHandle::SPILUKHandleType::execution_space>::value, - "spiluk_numeric_streams: KernelHandle's execution space is different " - "from " - "ExecutionSpace."); - - static_assert( - std::is_same< - typename LRowMapType::device_type::execution_space, - typename KernelHandle::SPILUKHandleType::execution_space>::value, - "spiluk_numeric_streams: KernelHandle and Views have different execution " - "spaces."); - static_assert( - std::is_same< - typename LEntriesType::device_type::execution_space, - typename KernelHandle::SPILUKHandleType::execution_space>::value, - "spiluk_numeric_streams: KernelHandle and Views have different execution " - "spaces."); - static_assert( - std::is_same< - typename LValuesType::device_type::execution_space, - typename KernelHandle::SPILUKHandleType::execution_space>::value, - "spiluk_numeric_streams: KernelHandle and Views have different execution " - "spaces."); - - static_assert(std::is_same::value, + static_assert(std::is_same::value, + "spiluk_numeric_streams: The output L_entries must be nonconst."); + static_assert(std::is_same::value, + "spiluk_numeric_streams: The output L_values must be nonconst."); + static_assert(std::is_same::value, + "spiluk_numeric_streams: The output U_entries must be nonconst."); + static_assert(std::is_same::value, + "spiluk_numeric_streams: The output U_values must be nonconst."); + + static_assert(std::is_same::value, + "spiluk_numeric_streams: Views LRowMapType and ARowMapType have " + "different device_types."); + static_assert(std::is_same::value, + "spiluk_numeric_streams: Views LEntriesType and AEntriesType have " + "different device_types."); + static_assert(std::is_same::value, + "spiluk_numeric_streams: Views LValuesType and AValuesType have " + "different device_types."); + + static_assert(std::is_same::value, + "spiluk_numeric_streams: Views LRowMapType and URowMapType have " + "different device_types."); + static_assert(std::is_same::value, + "spiluk_numeric_streams: Views LEntriesType and UEntriesType have " + "different device_types."); + static_assert(std::is_same::value, + "spiluk_numeric_streams: Views LValuesType and UValuesType have " + "different device_types."); + + static_assert(std::is_same::value, + "spiluk_numeric_streams: KernelHandle's execution space is different " + "from " + "ExecutionSpace."); + + static_assert(std::is_same::value, + "spiluk_numeric_streams: KernelHandle and Views have different execution " + "spaces."); + static_assert(std::is_same::value, + "spiluk_numeric_streams: KernelHandle and Views have different execution " + "spaces."); + static_assert(std::is_same::value, + "spiluk_numeric_streams: KernelHandle and Views have different execution " + "spaces."); + + static_assert(std::is_same::value, "spiluk_numeric_streams: rowmap and entries have different " "device types."); - static_assert( - std::is_same::value, - "spiluk_numeric_streams: rowmap and values have different device types."); + static_assert(std::is_same::value, + "spiluk_numeric_streams: rowmap and values have different device types."); // Check validity of fill level if (fill_lev < 0) { std::ostringstream os; - os << "KokkosSparse::Experimental::spiluk_numeric_streams: fill_lev: " - << fill_lev << ". Valid value is >= 0."; + os << "KokkosSparse::Experimental::spiluk_numeric_streams: fill_lev: " << fill_lev << ". Valid value is >= 0."; KokkosKernels::Impl::throw_runtime_exception(os.str()); } @@ -812,8 +605,7 @@ void spiluk_numeric_streams(const std::vector& execspace_v, std::ostringstream os; os << "KokkosSparse::Experimental::spiluk_numeric_streams: vector sizes " "must match -- execspace_v.size() " - << execspace_v.size() << " vs. A_entries_v.size() " - << A_entries_v.size(); + << execspace_v.size() << " vs. A_entries_v.size() " << A_entries_v.size(); KokkosKernels::Impl::throw_runtime_exception(os.str()); } @@ -837,8 +629,7 @@ void spiluk_numeric_streams(const std::vector& execspace_v, std::ostringstream os; os << "KokkosSparse::Experimental::spiluk_numeric_streams: vector sizes " "must match -- execspace_v.size() " - << execspace_v.size() << " vs. L_entries_v.size() " - << L_entries_v.size(); + << execspace_v.size() << " vs. L_entries_v.size() " << L_entries_v.size(); KokkosKernels::Impl::throw_runtime_exception(os.str()); } @@ -862,8 +653,7 @@ void spiluk_numeric_streams(const std::vector& execspace_v, std::ostringstream os; os << "KokkosSparse::Experimental::spiluk_numeric_streams: vector sizes " "must match -- execspace_v.size() " - << execspace_v.size() << " vs. U_entries_v.size() " - << U_entries_v.size(); + << execspace_v.size() << " vs. U_entries_v.size() " << U_entries_v.size(); KokkosKernels::Impl::throw_runtime_exception(os.str()); } @@ -894,66 +684,53 @@ void spiluk_numeric_streams(const std::vector& execspace_v, using c_temp_t = typename KernelHandle::HandleTempMemorySpace; using c_persist_t = typename KernelHandle::HandlePersistentMemorySpace; - using const_handle_type = - typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t>; - - using ARowMap_Internal = Kokkos::View< - typename ARowMapType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename ARowMapType::device_type, - Kokkos::MemoryTraits >; - - using AEntries_Internal = Kokkos::View< - typename AEntriesType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout< - AEntriesType>::array_layout, - typename AEntriesType::device_type, - Kokkos::MemoryTraits >; - - using AValues_Internal = Kokkos::View< - typename AValuesType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename AValuesType::device_type, - Kokkos::MemoryTraits >; - - using LRowMap_Internal = Kokkos::View< - typename LRowMapType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename LRowMapType::device_type, - Kokkos::MemoryTraits >; - - using LEntries_Internal = Kokkos::View< - typename LEntriesType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout< - LEntriesType>::array_layout, - typename LEntriesType::device_type, - Kokkos::MemoryTraits >; - - using LValues_Internal = Kokkos::View< - typename LValuesType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename LValuesType::device_type, - Kokkos::MemoryTraits >; - - using URowMap_Internal = Kokkos::View< - typename URowMapType::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename URowMapType::device_type, - Kokkos::MemoryTraits >; - - using UEntries_Internal = Kokkos::View< - typename UEntriesType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout< - UEntriesType>::array_layout, - typename UEntriesType::device_type, - Kokkos::MemoryTraits >; - - using UValues_Internal = Kokkos::View< - typename UValuesType::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename UValuesType::device_type, - Kokkos::MemoryTraits >; + using const_handle_type = typename KokkosKernels::Experimental::KokkosKernelsHandle; + + using ARowMap_Internal = + Kokkos::View::array_layout, + typename ARowMapType::device_type, Kokkos::MemoryTraits >; + + using AEntries_Internal = + Kokkos::View::array_layout, + typename AEntriesType::device_type, Kokkos::MemoryTraits >; + + using AValues_Internal = + Kokkos::View::array_layout, + typename AValuesType::device_type, Kokkos::MemoryTraits >; + + using LRowMap_Internal = + Kokkos::View::array_layout, + typename LRowMapType::device_type, Kokkos::MemoryTraits >; + + using LEntries_Internal = + Kokkos::View::array_layout, + typename LEntriesType::device_type, Kokkos::MemoryTraits >; + + using LValues_Internal = + Kokkos::View::array_layout, + typename LValuesType::device_type, Kokkos::MemoryTraits >; + + using URowMap_Internal = + Kokkos::View::array_layout, + typename URowMapType::device_type, Kokkos::MemoryTraits >; + + using UEntries_Internal = + Kokkos::View::array_layout, + typename UEntriesType::device_type, Kokkos::MemoryTraits >; + + using UValues_Internal = + Kokkos::View::array_layout, + typename UValuesType::device_type, Kokkos::MemoryTraits >; std::vector handle_i_v(execspace_v.size()); std::vector A_rowmap_i_v(execspace_v.size()); @@ -979,16 +756,14 @@ void spiluk_numeric_streams(const std::vector& execspace_v, U_values_i_v[i] = U_values_v[i]; } - KokkosSparse::Impl::SPILUK_NUMERIC< - ExecutionSpace, const_handle_type, ARowMap_Internal, AEntries_Internal, - AValues_Internal, LRowMap_Internal, LEntries_Internal, LValues_Internal, - URowMap_Internal, UEntries_Internal, - UValues_Internal>::spiluk_numeric_streams(execspace_v, handle_i_v, - A_rowmap_i_v, A_entries_i_v, - A_values_i_v, L_rowmap_i_v, - L_entries_i_v, L_values_i_v, - U_rowmap_i_v, U_entries_i_v, - U_values_i_v); + KokkosSparse::Impl::SPILUK_NUMERIC::spiluk_numeric_streams(execspace_v, handle_i_v, A_rowmap_i_v, + A_entries_i_v, A_values_i_v, + L_rowmap_i_v, L_entries_i_v, + L_values_i_v, U_rowmap_i_v, + U_entries_i_v, U_values_i_v); } // spiluk_numeric_streams diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spiluk_handle.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spiluk_handle.hpp index 952a14aa2de9..57fb22806ccf 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spiluk_handle.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spiluk_handle.hpp @@ -22,7 +22,7 @@ #ifndef _SPILUKHANDLE_HPP #define _SPILUKHANDLE_HPP -//#define EXPAND_FACT 3 +// #define EXPAND_FACT 3 namespace KokkosSparse { namespace Experimental { @@ -32,8 +32,8 @@ enum class SPILUKAlgorithm { SEQLVLSCHD_TP1 /*, SEQLVLSCHED_TP2*/ }; -template +template class SPILUKHandle { public: using HandleExecSpace = ExecutionSpace; @@ -59,46 +59,37 @@ class SPILUKHandle { using nnz_lno_view_t = Kokkos::View; - using nnz_value_view_t = - typename Kokkos::View; + using nnz_value_view_t = typename Kokkos::View; - using nnz_row_view_host_t = - typename Kokkos::View; + using nnz_row_view_host_t = typename Kokkos::View; - using nnz_lno_view_host_t = - typename Kokkos::View; + using nnz_lno_view_host_t = typename Kokkos::View; - using signed_integral_t = typename std::make_signed< - typename nnz_row_view_t::non_const_value_type>::type; + using signed_integral_t = typename std::make_signed::type; using signed_nnz_lno_view_t = - Kokkos::View; - using work_view_t = Kokkos::View; + using work_view_t = Kokkos::View; private: nnz_row_view_t level_list; // level IDs which the rows belong to nnz_lno_view_t level_idx; // the list of rows in each level - nnz_lno_view_t - level_ptr; // the starting index (into the view level_idx) of each level + nnz_lno_view_t level_ptr; // the starting index (into the view level_idx) of each level // Make hlevel_ptr a separate allocation, since it will be accessed on host // between kernel launches. nnz_lno_view_host_t hlevel_ptr; - nnz_lno_view_host_t level_nchunks; // number of chunks of rows at each level - nnz_lno_view_host_t - level_nrowsperchunk; // maximum number of rows among chunks at each level - work_view_t iw; // working view for mapping dense indices to sparse indices + nnz_lno_view_host_t level_nchunks; // number of chunks of rows at each level + nnz_lno_view_host_t level_nrowsperchunk; // maximum number of rows among chunks at each level + work_view_t iw; // working view for mapping dense indices to sparse indices size_type nrows; size_type nlevels; size_type nnzL; size_type nnzU; size_type block_size; - size_type level_maxrows; // max. number of rows among levels - size_type - level_maxrowsperchunk; // max.number of rows among chunks among levels + size_type level_maxrows; // max. number of rows among levels + size_type level_maxrowsperchunk; // max.number of rows among chunks among levels bool symbolic_complete; @@ -108,8 +99,7 @@ class SPILUKHandle { int vector_size; public: - SPILUKHandle(SPILUKAlgorithm choice, const size_type nrows_, - const size_type nnzL_, const size_type nnzU_, + SPILUKHandle(SPILUKAlgorithm choice, const size_type nrows_, const size_type nnzL_, const size_type nnzU_, const size_type block_size_ = 0, bool symbolic_complete_ = false) : level_list(), level_idx(), @@ -130,9 +120,8 @@ class SPILUKHandle { team_size(-1), vector_size(-1) {} - void reset_handle( - const size_type nrows_, const size_type nnzL_, const size_type nnzU_, - const size_type block_size_ = Kokkos::ArithTraits::max()) { + void reset_handle(const size_type nrows_, const size_type nnzL_, const size_type nnzU_, + const size_type block_size_ = Kokkos::ArithTraits::max()) { set_nrows(nrows_); set_num_levels(0); set_nnzL(nnzL_); @@ -174,14 +163,10 @@ class SPILUKHandle { KOKKOS_INLINE_FUNCTION nnz_lno_view_host_t get_level_nchunks() const { return level_nchunks; } - void alloc_level_nchunks(const size_type nlevels_) { - level_nchunks = nnz_lno_view_host_t("level_nchunks", nlevels_); - } + void alloc_level_nchunks(const size_type nlevels_) { level_nchunks = nnz_lno_view_host_t("level_nchunks", nlevels_); } KOKKOS_INLINE_FUNCTION - nnz_lno_view_host_t get_level_nrowsperchunk() const { - return level_nrowsperchunk; - } + nnz_lno_view_host_t get_level_nrowsperchunk() const { return level_nrowsperchunk; } void alloc_level_nrowsperchunk(const size_type nlevels_) { level_nrowsperchunk = nnz_lno_view_host_t("level_nrowsperchunk", nlevels_); @@ -191,8 +176,7 @@ class SPILUKHandle { work_view_t get_iw() const { return iw; } void alloc_iw(const size_type nrows_, const size_type ncols_) { - iw = work_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "iw"), - nrows_, ncols_); + iw = work_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "iw"), nrows_, ncols_); Kokkos::deep_copy(iw, nnz_lno_t(-1)); } @@ -218,17 +202,13 @@ class SPILUKHandle { size_type get_block_size() const { return block_size; } KOKKOS_INLINE_FUNCTION - void set_block_size(const size_type block_size_) { - this->block_size = block_size_; - } + void set_block_size(const size_type block_size_) { this->block_size = block_size_; } KOKKOS_INLINE_FUNCTION size_type get_level_maxrows() const { return level_maxrows; } KOKKOS_INLINE_FUNCTION - void set_level_maxrows(const size_type level_maxrows_) { - this->level_maxrows = level_maxrows_; - } + void set_level_maxrows(const size_type level_maxrows_) { this->level_maxrows = level_maxrows_; } KOKKOS_INLINE_FUNCTION size_type get_level_maxrowsperchunk() const { return level_maxrowsperchunk; } @@ -255,8 +235,7 @@ class SPILUKHandle { int get_vector_size() const { return this->vector_size; } void print_algorithm() { - if (algm == SPILUKAlgorithm::SEQLVLSCHD_TP1) - std::cout << "SEQLVLSCHD_TP1" << std::endl; + if (algm == SPILUKAlgorithm::SEQLVLSCHD_TP1) std::cout << "SEQLVLSCHD_TP1" << std::endl; /*if ( algm == SPILUKAlgorithm::SEQLVLSCHED_TP2 ) { std::cout << "SEQLVLSCHED_TP2" << std::endl;; diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv.hpp index 336bae4f1d30..e31ff2ef8d60 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv.hpp @@ -72,57 +72,42 @@ struct RANK_TWO {}; /// \param beta [in] Scalar multiplier for the vector y. /// \param y [in/out] Result vector. // clang-format on -template -void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y) { +template +void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], const AlphaType& alpha, const AMatrix& A, + const XVector& x, const BetaType& beta, const YVector& y) { // Make sure A is a CrsMatrix or BsrMatrix. - static_assert( - is_crs_matrix_v || Experimental::is_bsr_matrix_v, - "KokkosSparse::spmv: AMatrix must be a CrsMatrix or BsrMatrix"); + static_assert(is_crs_matrix_v || Experimental::is_bsr_matrix_v, + "KokkosSparse::spmv: AMatrix must be a CrsMatrix or BsrMatrix"); // Make sure that x and y are Views. - static_assert(Kokkos::is_view::value, - "KokkosSparse::spmv: XVector must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosSparse::spmv: YVector must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosSparse::spmv: XVector must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosSparse::spmv: YVector must be a Kokkos::View."); // Make sure A, x, y are accessible to ExecutionSpace - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosSparse::spmv: AMatrix must be accessible from ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosSparse::spmv: XVector must be accessible from ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosSparse::spmv: YVector must be accessible from ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosSparse::spmv: AMatrix must be accessible from ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosSparse::spmv: XVector must be accessible from ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosSparse::spmv: YVector must be accessible from ExecutionSpace"); // Make sure that x and y have the same rank. - static_assert(XVector::rank() == YVector::rank(), - "KokkosSparse::spmv: Vector ranks do not match."); + static_assert(XVector::rank() == YVector::rank(), "KokkosSparse::spmv: Vector ranks do not match."); // Make sure that x (and therefore y) is rank 1 or 2. static_assert(XVector::rank() == size_t(1) || XVector::rank() == size_t(2), "KokkosSparse::spmv: Both Vector inputs must have rank 1 or 2"); // Make sure that y is non-const. - static_assert(!std::is_const_v, - "KokkosSparse::spmv: Output Vector must be non-const."); + static_assert(!std::is_const_v, "KokkosSparse::spmv: Output Vector must be non-const."); // Check that A, X, Y types match that of the Handle // But only check this if Handle is the user-facing type (SPMVHandle). // We may internally call spmv with SPMVHandleImpl, which does not include // the matrix and vector types. if constexpr (KokkosSparse::Impl::is_spmv_handle_v) { - static_assert( - std::is_same_v, - "KokkosSparse::spmv: AMatrix must be identical to Handle::AMatrixType"); - static_assert( - std::is_same_v, - "KokkosSparse::spmv: XVector must be identical to Handle::XVectorType"); - static_assert( - std::is_same_v, - "KokkosSparse::spmv: YVector must be identical to Handle::YVectorType"); + static_assert(std::is_same_v, + "KokkosSparse::spmv: AMatrix must be identical to Handle::AMatrixType"); + static_assert(std::is_same_v, + "KokkosSparse::spmv: XVector must be identical to Handle::XVectorType"); + static_assert(std::is_same_v, + "KokkosSparse::spmv: YVector must be identical to Handle::YVectorType"); } constexpr bool isBSR = Experimental::is_bsr_matrix_v; @@ -139,29 +124,25 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], } if ((mode[0] == NoTranspose[0]) || (mode[0] == Conjugate[0])) { - if ((x.extent(1) != y.extent(1)) || (n != x.extent(0)) || - (m != y.extent(0))) { + if ((x.extent(1) != y.extent(1)) || (n != x.extent(0)) || (m != y.extent(0))) { std::ostringstream os; os << "KokkosSparse::spmv: Dimensions do not match: " - << ", A: " << m << " x " << n << ", x: " << x.extent(0) << " x " - << x.extent(1) << ", y: " << y.extent(0) << " x " << y.extent(1); + << ", A: " << m << " x " << n << ", x: " << x.extent(0) << " x " << x.extent(1) << ", y: " << y.extent(0) + << " x " << y.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } } else { - if ((x.extent(1) != y.extent(1)) || (m != x.extent(0)) || - (n != y.extent(0))) { + if ((x.extent(1) != y.extent(1)) || (m != x.extent(0)) || (n != y.extent(0))) { std::ostringstream os; os << "KokkosSparse::spmv: Dimensions do not match (transpose): " - << ", A: " << A.numRows() << " x " << A.numCols() - << ", x: " << x.extent(0) << " x " << x.extent(1) + << ", A: " << A.numRows() << " x " << A.numCols() << ", x: " << x.extent(0) << " x " << x.extent(1) << ", y: " << y.extent(0) << " x " << y.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } } // Efficiently handle cases where alpha*Op(A) is equivalent to the zero matrix - if (alpha == Kokkos::ArithTraits::zero() || m == 0 || n == 0 || - A.nnz() == 0) { + if (alpha == Kokkos::ArithTraits::zero() || m == 0 || n == 0 || A.nnz() == 0) { // This is required to maintain semantics of KokkosKernels native SpMV: // if y contains NaN but beta = 0, the result y should be filled with 0. // For example, this is useful for passing in uninitialized y and beta=0. @@ -175,17 +156,15 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], // Get the "impl" parent class of Handle, if it's not already the impl using HandleImpl = typename Handle::ImplType; - using ACrs_Internal = CrsMatrix< - typename AMatrix::const_value_type, typename AMatrix::const_ordinal_type, - typename AMatrix::device_type, Kokkos::MemoryTraits, - typename AMatrix::const_size_type>; - using ABsr_Internal = Experimental::BsrMatrix< - typename AMatrix::const_value_type, typename AMatrix::const_ordinal_type, - typename AMatrix::device_type, Kokkos::MemoryTraits, - typename AMatrix::const_size_type>; + using ACrs_Internal = + CrsMatrix, typename AMatrix::const_size_type>; + using ABsr_Internal = + Experimental::BsrMatrix, + typename AMatrix::const_size_type>; - using AMatrix_Internal = - std::conditional_t; + using AMatrix_Internal = std::conditional_t; // Intercept special case: A is a BsrMatrix with blockDim() == 1 // This is exactly equivalent to CrsMatrix (more performant) @@ -200,8 +179,7 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], typename ACrs_Internal::row_map_type rowmap(A.graph.row_map); typename ACrs_Internal::index_type entries(A.graph.entries); typename ACrs_Internal::values_type values(A.values); - ACrs_Internal ACrs(std::string{}, A.numRows(), A.numCols(), A.nnz(), - values, rowmap, entries); + ACrs_Internal ACrs(std::string{}, A.numRows(), A.numCols(), A.nnz(), values, rowmap, entries); spmv(space, handle->get_impl(), mode, alpha, ACrs, x, beta, y); return; } @@ -210,32 +188,27 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], AMatrix_Internal A_i(A); // Note: data_type of a View includes both the scalar and rank - using XVector_Internal = Kokkos::View< - typename XVector::const_data_type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, - Kokkos::MemoryTraits>; + using XVector_Internal = + Kokkos::View::array_layout, typename XVector::device_type, + Kokkos::MemoryTraits>; - using YVector_Internal = Kokkos::View< - typename YVector::non_const_data_type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename YVector::device_type, Kokkos::MemoryTraits>; + using YVector_Internal = Kokkos::View::array_layout, + typename YVector::device_type, Kokkos::MemoryTraits>; // Special case: XVector/YVector are rank-2 but x,y both have one column and // are contiguous. In this case take rank-1 subviews of x,y and call the // rank-1 version. if constexpr (XVector::rank() == 2) { - using XVector_SubInternal = Kokkos::View< - typename XVector::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, - Kokkos::MemoryTraits>; - using YVector_SubInternal = Kokkos::View< - typename YVector::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename YVector::device_type, Kokkos::MemoryTraits>; - if (x.extent(1) == size_t(1) && x.span_is_contiguous() && - y.span_is_contiguous()) { + using XVector_SubInternal = + Kokkos::View::array_layout, + typename XVector::device_type, Kokkos::MemoryTraits>; + using YVector_SubInternal = Kokkos::View::array_layout, + typename YVector::device_type, Kokkos::MemoryTraits>; + if (x.extent(1) == size_t(1) && x.span_is_contiguous() && y.span_is_contiguous()) { XVector_SubInternal xsub(x.data(), x.extent(0)); YVector_SubInternal ysub(y.data(), y.extent(0)); spmv(space, handle->get_impl(), mode, alpha, A, xsub, beta, ysub); @@ -247,13 +220,6 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], YVector_Internal y_i(y); bool useNative = is_spmv_algorithm_native(handle->get_algorithm()); - // Also use the native algorithm if SPMV_FAST_SETUP was selected and - // rocSPARSE is the possible TPL to use. Native is faster in this case. -#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE - if (handle->get_algorithm() == SPMV_FAST_SETUP && - std::is_same_v) - useNative = true; -#endif // Now call the proper implementation depending on isBSR and the rank of X/Y if constexpr (!isBSR) { @@ -263,10 +229,7 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], ///////////////// #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE // cuSPARSE does not support the conjugate mode (C) - if constexpr (std::is_same_v || - std::is_same_v) { + if constexpr (std::is_same_v) { useNative = useNative || (mode[0] == Conjugate[0]); } // cuSPARSE 12 requires that the output (y) vector is 16-byte aligned for @@ -278,74 +241,65 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE - if (std::is_same::value) { + if (std::is_same_v) { useNative = useNative || (mode[0] != NoTranspose[0]); } #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL - if (std::is_same_v) { + if constexpr (std::is_same_v) { useNative = useNative || (mode[0] == Conjugate[0]); } #ifdef KOKKOS_ENABLE_SYCL - if (std::is_same_v) { - useNative = useNative || (mode[0] == Conjugate[0]); + if constexpr (std::is_same_v) { + useNative = useNative || (mode[0] != NoTranspose[0]); } #endif #endif if (useNative) { // Explicitly call the non-TPL SPMV implementation - std::string label = - "KokkosSparse::spmv[NATIVE," + - Kokkos::ArithTraits< - typename AMatrix_Internal::non_const_value_type>::name() + - "]"; + std::string label = "KokkosSparse::spmv[NATIVE," + + Kokkos::ArithTraits::name() + "]"; Kokkos::Profiling::pushRegion(label); - Impl::SPMV::spmv(space, - handle, - mode, alpha, - A_i, x_i, - beta, y_i); + Impl::SPMV::spmv( + space, handle, mode, alpha, A_i, x_i, beta, y_i); Kokkos::Profiling::popRegion(); } else { // note: the cuSPARSE spmv wrapper defines a profiling region, so one is // not needed here. - Impl::SPMV::spmv(space, handle, - mode, alpha, A_i, - x_i, beta, y_i); + Impl::SPMV::spmv( + space, handle, mode, alpha, A_i, x_i, beta, y_i); } } else { ///////////////// // CRS, rank 2 // ///////////////// #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE - useNative = useNative || (Conjugate[0] == mode[0]); + if constexpr (std::is_same_v) { + useNative = useNative || (Conjugate[0] == mode[0]); + } +#endif +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE + if constexpr (std::is_same_v) { + useNative = useNative || (Conjugate[0] == mode[0]); + } #endif if (useNative) { - std::string label = - "KokkosSparse::spmv[NATIVE,MV," + - Kokkos::ArithTraits< - typename AMatrix_Internal::non_const_value_type>::name() + - "]"; + std::string label = "KokkosSparse::spmv[NATIVE,MV," + + Kokkos::ArithTraits::name() + "]"; Kokkos::Profiling::pushRegion(label); - return Impl::SPMV_MV< - ExecutionSpace, HandleImpl, AMatrix_Internal, XVector_Internal, - YVector_Internal, - std::is_integral::value, - false>::spmv_mv(space, handle, mode, alpha, A_i, x_i, beta, y_i); + return Impl::SPMV_MV::value, false>::spmv_mv(space, + handle, + mode, + alpha, A_i, + x_i, beta, + y_i); Kokkos::Profiling::popRegion(); } else { - return Impl::SPMV_MV::spmv_mv(space, handle, mode, - alpha, A_i, x_i, beta, - y_i); + return Impl::SPMV_MV::spmv_mv( + space, handle, mode, alpha, A_i, x_i, beta, y_i); } } } else { @@ -355,47 +309,34 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], ///////////////// #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE // cuSPARSE does not support the modes (C), (T), (H) - if (std::is_same::value || - std::is_same::value) { + if constexpr (std::is_same_v) { useNative = useNative || (mode[0] != NoTranspose[0]); } #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL - if (std::is_same::value) { + if constexpr (std::is_same_v) { useNative = useNative || (mode[0] == Conjugate[0]); } #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE // rocSparse does not support the modes (C), (T), (H) - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { useNative = useNative || (mode[0] != NoTranspose[0]); } #endif if (useNative) { // Explicitly call the non-TPL SPMV_BSRMATRIX implementation - std::string label = - "KokkosSparse::spmv[NATIVE,BSRMATRIX," + - Kokkos::ArithTraits< - typename AMatrix_Internal::non_const_value_type>::name() + - "]"; + std::string label = "KokkosSparse::spmv[NATIVE,BSRMATRIX," + + Kokkos::ArithTraits::name() + "]"; Kokkos::Profiling::pushRegion(label); - Impl::SPMV_BSRMATRIX::spmv_bsrmatrix(space, handle, mode, alpha, - A_i, x_i, beta, y_i); + Impl::SPMV_BSRMATRIX::spmv_bsrmatrix(space, handle, mode, alpha, A_i, x_i, beta, y_i); Kokkos::Profiling::popRegion(); } else { - Impl::SPMV_BSRMATRIX::spmv_bsrmatrix(space, handle, - mode, alpha, A_i, - x_i, beta, y_i); + Impl::SPMV_BSRMATRIX::spmv_bsrmatrix(space, handle, mode, alpha, A_i, x_i, beta, y_i); } } else { ///////////////// @@ -403,43 +344,31 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], ///////////////// #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE // cuSPARSE does not support the modes (C), (T), (H) - if (std::is_same::value || - std::is_same::value) { + if constexpr (std::is_same_v) { useNative = useNative || (mode[0] != NoTranspose[0]); } #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL - if (std::is_same::value) { + if constexpr (std::is_same_v) { useNative = useNative || (mode[0] == Conjugate[0]); } #endif if (useNative) { // Explicitly call the non-TPL SPMV_BSRMATRIX implementation - std::string label = - "KokkosSparse::spmv[NATIVE,MV,BSMATRIX," + - Kokkos::ArithTraits< - typename AMatrix_Internal::non_const_value_type>::name() + - "]"; + std::string label = "KokkosSparse::spmv[NATIVE,MV,BSMATRIX," + + Kokkos::ArithTraits::name() + "]"; Kokkos::Profiling::pushRegion(label); - Impl::SPMV_MV_BSRMATRIX< - ExecutionSpace, HandleImpl, AMatrix_Internal, XVector_Internal, - YVector_Internal, - std::is_integral< - typename AMatrix_Internal::const_value_type>::value, - false>::spmv_mv_bsrmatrix(space, handle, mode, alpha, A_i, x_i, - beta, y_i); + Impl::SPMV_MV_BSRMATRIX::value, + false>::spmv_mv_bsrmatrix(space, handle, mode, alpha, A_i, x_i, beta, y_i); Kokkos::Profiling::popRegion(); } else { Impl::SPMV_MV_BSRMATRIX< - ExecutionSpace, HandleImpl, AMatrix_Internal, XVector_Internal, - YVector_Internal, - std::is_integral:: - value>::spmv_mv_bsrmatrix(space, handle, mode, alpha, A_i, x_i, - beta, y_i); + ExecutionSpace, HandleImpl, AMatrix_Internal, XVector_Internal, YVector_Internal, + std::is_integral::value>::spmv_mv_bsrmatrix(space, handle, + mode, alpha, A_i, + x_i, beta, y_i); } } } @@ -468,21 +397,16 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], /// \param beta [in] Scalar multiplier for the vector y. /// \param y [in/out] Result vector. // clang-format on -template ::value>> -void spmv(const ExecutionSpace& space, const char mode[], - const AlphaType& alpha, const AMatrix& A, const XVector& x, +template ::value>> +void spmv(const ExecutionSpace& space, const char mode[], const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y) { SPMVAlgorithm algo = SPMV_FAST_SETUP; // Without handle reuse, native is overall faster than rocSPARSE #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE - if constexpr (std::is_same_v) - algo = SPMV_NATIVE; + if constexpr (std::is_same_v) algo = SPMV_NATIVE; #endif - SPMVHandle - handle(algo); + SPMVHandle handle(algo); spmv(space, &handle, mode, alpha, A, x, beta, y); } @@ -511,15 +435,11 @@ void spmv(const ExecutionSpace& space, const char mode[], /// \param beta [in] Scalar multiplier for the vector y. /// \param y [in/out] Result vector. // clang-format on -template < - class Handle, class AlphaType, class AMatrix, class XVector, class BetaType, - class YVector, - typename = std::enable_if_t::value>> -void spmv(Handle* handle, const char mode[], const AlphaType& alpha, - const AMatrix& A, const XVector& x, const BetaType& beta, - const YVector& y) { - spmv(typename Handle::ExecutionSpaceType(), handle, mode, alpha, A, x, beta, - y); +template ::value>> +void spmv(Handle* handle, const char mode[], const AlphaType& alpha, const AMatrix& A, const XVector& x, + const BetaType& beta, const YVector& y) { + spmv(typename Handle::ExecutionSpaceType(), handle, mode, alpha, A, x, beta, y); } // clang-format off @@ -541,409 +461,316 @@ void spmv(Handle* handle, const char mode[], const AlphaType& alpha, /// \param beta [in] Scalar multiplier for the vector y. /// \param y [in/out] Result vector. // clang-format on -template -void spmv(const char mode[], const AlphaType& alpha, const AMatrix& A, - const XVector& x, const BetaType& beta, const YVector& y) { +template +void spmv(const char mode[], const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, + const YVector& y) { SPMVAlgorithm algo = SPMV_FAST_SETUP; // Without handle reuse, native is overall faster than rocSPARSE #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE - if constexpr (std::is_same_v) - algo = SPMV_NATIVE; + if constexpr (std::is_same_v) algo = SPMV_NATIVE; #endif - SPMVHandle - handle(algo); - spmv(typename AMatrix::execution_space(), &handle, mode, alpha, A, x, beta, - y); + SPMVHandle handle(algo); + spmv(typename AMatrix::execution_space(), &handle, mode, alpha, A, x, beta, y); } namespace Experimental { -template -void spmv_struct(const ExecutionSpace& space, const char mode[], - const int stencil_type, - const Kokkos::View& structure, - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y, +template +void spmv_struct(const ExecutionSpace& space, const char mode[], const int stencil_type, + const Kokkos::View& structure, + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y, [[maybe_unused]] const RANK_ONE& tag) { // Make sure that both x and y have the same rank. - static_assert((int)XVector::rank == (int)YVector::rank, - "KokkosSparse::spmv_struct: Vector ranks do not match."); + static_assert((int)XVector::rank == (int)YVector::rank, "KokkosSparse::spmv_struct: Vector ranks do not match."); // Make sure A, x, y are accessible to ExecutionSpace - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosSparse::spmv_struct: AMatrix must be accessible from " - "ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosSparse::spmv_struct: XVector must be accessible from " - "ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosSparse::spmv_struct: YVector must be accessible from " - "ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosSparse::spmv_struct: AMatrix must be accessible from " + "ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosSparse::spmv_struct: XVector must be accessible from " + "ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosSparse::spmv_struct: YVector must be accessible from " + "ExecutionSpace"); // Make sure that x (and therefore y) is rank 1. - static_assert( - (int)XVector::rank == 1, - "KokkosSparse::spmv_struct: Both Vector inputs must have rank 1 in " - "order to call this specialization of spmv."); + static_assert((int)XVector::rank == 1, + "KokkosSparse::spmv_struct: Both Vector inputs must have rank 1 in " + "order to call this specialization of spmv."); // Make sure that y is non-const. - static_assert(std::is_same::value, + static_assert(std::is_same_v, "KokkosSparse::spmv_struct: Output Vector must be non-const."); // Check compatibility of dimensions at run time. if ((mode[0] == NoTranspose[0]) || (mode[0] == Conjugate[0])) { - if ((x.extent(1) != y.extent(1)) || - (static_cast(A.numCols()) > static_cast(x.extent(0))) || + if ((x.extent(1) != y.extent(1)) || (static_cast(A.numCols()) > static_cast(x.extent(0))) || (static_cast(A.numRows()) > static_cast(y.extent(0)))) { std::ostringstream os; os << "KokkosSparse::spmv_struct: Dimensions do not match: " - << ", A: " << A.numRows() << " x " << A.numCols() - << ", x: " << x.extent(0) << " x " << x.extent(1) + << ", A: " << A.numRows() << " x " << A.numCols() << ", x: " << x.extent(0) << " x " << x.extent(1) << ", y: " << y.extent(0) << " x " << y.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } } else { - if ((x.extent(1) != y.extent(1)) || - (static_cast(A.numCols()) > static_cast(y.extent(0))) || + if ((x.extent(1) != y.extent(1)) || (static_cast(A.numCols()) > static_cast(y.extent(0))) || (static_cast(A.numRows()) > static_cast(x.extent(0)))) { std::ostringstream os; os << "KokkosSparse::spmv_struct: Dimensions do not match (transpose): " - << ", A: " << A.numRows() << " x " << A.numCols() - << ", x: " << x.extent(0) << " x " << x.extent(1) + << ", A: " << A.numRows() << " x " << A.numCols() << ", x: " << x.extent(0) << " x " << x.extent(1) << ", y: " << y.extent(0) << " x " << y.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } } - typedef KokkosSparse::CrsMatrix< - typename AMatrix::const_value_type, typename AMatrix::const_ordinal_type, - typename AMatrix::device_type, Kokkos::MemoryTraits, - typename AMatrix::const_size_type> + typedef KokkosSparse::CrsMatrix, + typename AMatrix::const_size_type> AMatrix_Internal; - typedef Kokkos::View< - typename XVector::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, - Kokkos::MemoryTraits> + typedef Kokkos::View::array_layout, + typename XVector::device_type, Kokkos::MemoryTraits> XVector_Internal; - typedef Kokkos::View< - typename YVector::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename YVector::device_type, Kokkos::MemoryTraits> + typedef Kokkos::View::array_layout, + typename YVector::device_type, Kokkos::MemoryTraits> YVector_Internal; AMatrix_Internal A_i = A; XVector_Internal x_i = x; YVector_Internal y_i = y; - return KokkosSparse::Impl::SPMV_STRUCT< - ExecutionSpace, AMatrix_Internal, XVector_Internal, - YVector_Internal>::spmv_struct(space, mode, stencil_type, structure, - alpha, A_i, x_i, beta, y_i); + return KokkosSparse::Impl::SPMV_STRUCT::spmv_struct(space, mode, stencil_type, structure, alpha, + A_i, x_i, beta, y_i); } -template +template void spmv_struct(const char mode[], const int stencil_type, - const Kokkos::View& structure, - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y, const RANK_ONE& tag) { - spmv_struct(typename AMatrix::execution_space{}, mode, stencil_type, - structure, alpha, A, x, beta, y, tag); + const Kokkos::View& structure, + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y, + const RANK_ONE& tag) { + spmv_struct(typename AMatrix::execution_space{}, mode, stencil_type, structure, alpha, A, x, beta, y, tag); } namespace Impl { -template +template struct SPMV2D1D_STRUCT { static bool spmv2d1d_struct( const char mode[], const int stencil_type, - const Kokkos::View& structure, - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y); + const Kokkos::View& structure, + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y); template static bool spmv2d1d_struct( const ExecutionSpace& space, const char mode[], const int stencil_type, - const Kokkos::View& structure, - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y); + const Kokkos::View& structure, + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y); }; #if defined(KOKKOSKERNELS_INST_LAYOUTSTRIDE) || !defined(KOKKOSKERNELS_ETI_ONLY) -template -struct SPMV2D1D_STRUCT { +template +struct SPMV2D1D_STRUCT { static bool spmv2d1d_struct( const char mode[], const int stencil_type, - const Kokkos::View& structure, - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y) { - spmv_struct(mode, stencil_type, structure, alpha, A, x, beta, y, - RANK_ONE()); + const Kokkos::View& structure, + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y) { + spmv_struct(mode, stencil_type, structure, alpha, A, x, beta, y, RANK_ONE()); return true; } template static bool spmv2d1d_struct( const ExecutionSpace& space, const char mode[], const int stencil_type, - const Kokkos::View& structure, - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y) { - spmv_struct(space, mode, stencil_type, structure, alpha, A, x, beta, y, - RANK_ONE()); + const Kokkos::View& structure, + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y) { + spmv_struct(space, mode, stencil_type, structure, alpha, A, x, beta, y, RANK_ONE()); return true; } }; #else -template -struct SPMV2D1D_STRUCT { +template +struct SPMV2D1D_STRUCT { static bool spmv2d1d_struct( const char /*mode*/[], const int /*stencil_type*/, - const Kokkos::View& /*structure*/, - const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, - const BetaType& /*beta*/, const YVector& /*y*/) { + const Kokkos::View& /*structure*/, + const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, const BetaType& /*beta*/, + const YVector& /*y*/) { return false; } template static bool spmv2d1d_struct( - const ExecutionSpace& /* space*/, const char /*mode*/[], - const int /*stencil_type*/, - const Kokkos::View& /*structure*/, - const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, - const BetaType& /*beta*/, const YVector& /*y*/) { + const ExecutionSpace& /* space*/, const char /*mode*/[], const int /*stencil_type*/, + const Kokkos::View& /*structure*/, + const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, const BetaType& /*beta*/, + const YVector& /*y*/) { return false; } }; #endif #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || !defined(KOKKOSKERNELS_ETI_ONLY) -template -struct SPMV2D1D_STRUCT { +template +struct SPMV2D1D_STRUCT { static bool spmv2d1d_struct( const char mode[], const int stencil_type, - const Kokkos::View& structure, - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y) { - spmv_struct(mode, stencil_type, structure, alpha, A, x, beta, y, - RANK_ONE()); + const Kokkos::View& structure, + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y) { + spmv_struct(mode, stencil_type, structure, alpha, A, x, beta, y, RANK_ONE()); return true; } template static bool spmv2d1d_struct( const ExecutionSpace& space, const char mode[], const int stencil_type, - const Kokkos::View& structure, - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y) { - spmv_struct(space, mode, stencil_type, structure, alpha, A, x, beta, y, - RANK_ONE()); + const Kokkos::View& structure, + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y) { + spmv_struct(space, mode, stencil_type, structure, alpha, A, x, beta, y, RANK_ONE()); return true; } }; #else -template -struct SPMV2D1D_STRUCT { +template +struct SPMV2D1D_STRUCT { static bool spmv2d1d_struct( const char /*mode*/[], const int /*stencil_type*/, - const Kokkos::View& /*structure*/, - const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, - const BetaType& /*beta*/, const YVector& /*y*/) { + const Kokkos::View& /*structure*/, + const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, const BetaType& /*beta*/, + const YVector& /*y*/) { return false; } template static bool spmv2d1d_struct( - const ExecutionSpace /*space*/, const char /*mode*/[], - const int /*stencil_type*/, - const Kokkos::View& /*structure*/, - const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, - const BetaType& /*beta*/, const YVector& /*y*/) { + const ExecutionSpace /*space*/, const char /*mode*/[], const int /*stencil_type*/, + const Kokkos::View& /*structure*/, + const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, const BetaType& /*beta*/, + const YVector& /*y*/) { return false; } }; #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || !defined(KOKKOSKERNELS_ETI_ONLY) -template -struct SPMV2D1D_STRUCT { +template +struct SPMV2D1D_STRUCT { static bool spmv2d1d_struct( const char mode[], const int stencil_type, - const Kokkos::View& structure, - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y) { - spmv_struct(mode, stencil_type, structure, alpha, A, x, beta, y, - RANK_ONE()); + const Kokkos::View& structure, + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y) { + spmv_struct(mode, stencil_type, structure, alpha, A, x, beta, y, RANK_ONE()); return true; } template static bool spmv2d1d_struct( const ExecutionSpace& space, const char mode[], const int stencil_type, - const Kokkos::View& structure, - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y) { - spmv_struct(space, mode, stencil_type, structure, alpha, A, x, beta, y, - RANK_ONE()); + const Kokkos::View& structure, + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y) { + spmv_struct(space, mode, stencil_type, structure, alpha, A, x, beta, y, RANK_ONE()); return true; } }; #else -template -struct SPMV2D1D_STRUCT { +template +struct SPMV2D1D_STRUCT { static bool spmv2d1d_struct( const char /*mode*/[], const int /*stencil_type*/, - const Kokkos::View& /*structure*/, - const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, - const BetaType& /*beta*/, const YVector& /*y*/) { + const Kokkos::View& /*structure*/, + const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, const BetaType& /*beta*/, + const YVector& /*y*/) { return false; } template static bool spmv2d1d_struct( - const ExecutionSpace& /*space*/, const char /*mode*/[], - const int /*stencil_type*/, - const Kokkos::View& /*structure*/, - const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, - const BetaType& /*beta*/, const YVector& /*y*/) { + const ExecutionSpace& /*space*/, const char /*mode*/[], const int /*stencil_type*/, + const Kokkos::View& /*structure*/, + const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, const BetaType& /*beta*/, + const YVector& /*y*/) { return false; } }; #endif } // namespace Impl -template +template using SPMV2D1D_STRUCT [[deprecated("KokkosSparse::SPMV2D1D_STRUCT is not part of the public " "interface - use KokkosSparse::spmv_struct instead")]] = Impl::SPMV2D1D_STRUCT; -template -void spmv_struct(const ExecutionSpace& space, const char mode[], - const int stencil_type, - const Kokkos::View& structure, - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y, +template +void spmv_struct(const ExecutionSpace& space, const char mode[], const int stencil_type, + const Kokkos::View& structure, + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y, [[maybe_unused]] const RANK_TWO& tag) { // Make sure A, x, y are accessible to ExecutionSpace - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosSparse::spmv_struct: AMatrix must be accessible from " - "ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosSparse::spmv_struct: XVector must be accessible from " - "ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosSparse::spmv_struct: YVector must be accessible from " - "ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosSparse::spmv_struct: AMatrix must be accessible from " + "ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosSparse::spmv_struct: XVector must be accessible from " + "ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "KokkosSparse::spmv_struct: YVector must be accessible from " + "ExecutionSpace"); // Make sure that both x and y have the same rank. - static_assert(XVector::rank == YVector::rank, - "KokkosSparse::spmv: Vector ranks do not match."); + static_assert(XVector::rank == YVector::rank, "KokkosSparse::spmv: Vector ranks do not match."); // Make sure that y is non-const. - static_assert(std::is_same::value, + static_assert(std::is_same_v, "KokkosSparse::spmv: Output Vector must be non-const."); // Check compatibility of dimensions at run time. if ((mode[0] == NoTranspose[0]) || (mode[0] == Conjugate[0])) { - if ((x.extent(1) != y.extent(1)) || - (static_cast(A.numCols()) > static_cast(x.extent(0))) || + if ((x.extent(1) != y.extent(1)) || (static_cast(A.numCols()) > static_cast(x.extent(0))) || (static_cast(A.numRows()) > static_cast(y.extent(0)))) { std::ostringstream os; os << "KokkosSparse::spmv: Dimensions do not match: " - << ", A: " << A.numRows() << " x " << A.numCols() - << ", x: " << x.extent(0) << " x " << x.extent(1) + << ", A: " << A.numRows() << " x " << A.numCols() << ", x: " << x.extent(0) << " x " << x.extent(1) << ", y: " << y.extent(0) << " x " << y.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } } else { - if ((x.extent(1) != y.extent(1)) || - (static_cast(A.numCols()) > static_cast(y.extent(0))) || + if ((x.extent(1) != y.extent(1)) || (static_cast(A.numCols()) > static_cast(y.extent(0))) || (static_cast(A.numRows()) > static_cast(x.extent(0)))) { std::ostringstream os; os << "KokkosSparse::spmv: Dimensions do not match (transpose): " - << ", A: " << A.numRows() << " x " << A.numCols() - << ", x: " << x.extent(0) << " x " << x.extent(1) + << ", A: " << A.numRows() << " x " << A.numCols() << ", x: " << x.extent(0) << " x " << x.extent(1) << ", y: " << y.extent(0) << " x " << y.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } } - typedef KokkosSparse::CrsMatrix< - typename AMatrix::const_value_type, typename AMatrix::const_ordinal_type, - typename AMatrix::device_type, Kokkos::MemoryTraits, - typename AMatrix::const_size_type> + typedef KokkosSparse::CrsMatrix, + typename AMatrix::const_size_type> AMatrix_Internal; AMatrix_Internal A_i = A; // Call single-vector version if appropriate if (x.extent(1) == 1) { - typedef Kokkos::View< - typename XVector::const_value_type*, typename YVector::array_layout, - typename XVector::device_type, - Kokkos::MemoryTraits> + typedef Kokkos::View> XVector_SubInternal; - typedef Kokkos::View< - typename YVector::non_const_value_type*, typename YVector::array_layout, - typename YVector::device_type, Kokkos::MemoryTraits> + typedef Kokkos::View> YVector_SubInternal; XVector_SubInternal x_i = Kokkos::subview(x, Kokkos::ALL(), 0); YVector_SubInternal y_i = Kokkos::subview(y, Kokkos::ALL(), 0); // spmv_struct (mode, alpha, A, x_i, beta, y_i); - if (Impl::SPMV2D1D_STRUCT:: - spmv2d1d_struct(space, mode, stencil_type, structure, alpha, A, x_i, - beta, y_i)) { + if (Impl::SPMV2D1D_STRUCT::spmv2d1d_struct(space, mode, stencil_type, + structure, alpha, A, x_i, + beta, y_i)) { return; } } @@ -952,15 +779,12 @@ void spmv_struct(const ExecutionSpace& space, const char mode[], spmv(space, mode, alpha, A, x, beta, y); } -template +template void spmv_struct(const char mode[], const int stencil_type, - const Kokkos::View& structure, - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y, const RANK_TWO& tag) { - spmv_struct(typename AMatrix::execution_space{}, mode, stencil_type, - structure, alpha, A, x, beta, y, tag); + const Kokkos::View& structure, + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y, + const RANK_TWO& tag) { + spmv_struct(typename AMatrix::execution_space{}, mode, stencil_type, structure, alpha, A, x, beta, y, tag); } /// \brief Public interface to structured local sparse matrix-vector multiply. @@ -985,18 +809,12 @@ void spmv_struct(const char mode[], const int stencil_type, /// \param y [in/out] Either a single vector (rank-1 Kokkos::View) or /// multivector (rank-2 Kokkos::View). It must have the same number /// of columns as x. -template +template void spmv_struct(const char mode[], const int stencil_type, - const Kokkos::View& structure, - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y) { - typedef - typename std::conditional::type - RANK_SPECIALISE; - spmv_struct(mode, stencil_type, structure, alpha, A, x, beta, y, - RANK_SPECIALISE()); + const Kokkos::View& structure, + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y) { + typedef typename std::conditional::type RANK_SPECIALISE; + spmv_struct(mode, stencil_type, structure, alpha, A, x, beta, y, RANK_SPECIALISE()); } /// \brief Public interface to structured local sparse matrix-vector multiply. @@ -1023,19 +841,12 @@ void spmv_struct(const char mode[], const int stencil_type, /// \param y [in/out] Either a single vector (rank-1 Kokkos::View) or /// multivector (rank-2 Kokkos::View). It must have the same number /// of columns as x. -template -void spmv_struct(const ExecutionSpace& space, const char mode[], - const int stencil_type, - const Kokkos::View& structure, - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y) { - typedef - typename std::conditional::type - RANK_SPECIALISE; - spmv_struct(space, mode, stencil_type, structure, alpha, A, x, beta, y, - RANK_SPECIALISE()); +template +void spmv_struct(const ExecutionSpace& space, const char mode[], const int stencil_type, + const Kokkos::View& structure, + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y) { + typedef typename std::conditional::type RANK_SPECIALISE; + spmv_struct(space, mode, stencil_type, structure, alpha, A, x, beta, y, RANK_SPECIALISE()); } } // namespace Experimental diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_deprecated.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_deprecated.hpp index f29caaec0c73..52f8b4a19c5d 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_deprecated.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_deprecated.hpp @@ -25,37 +25,29 @@ namespace KokkosSparse { namespace Impl { -template +template struct SPMV2D1D { - static bool spmv2d1d(const char mode[], const AlphaType& alpha, - const AMatrix& A, const XVector& x, const BetaType& beta, - const YVector& y); + static bool spmv2d1d(const char mode[], const AlphaType& alpha, const AMatrix& A, const XVector& x, + const BetaType& beta, const YVector& y); template - static bool spmv2d1d(const ExecutionSpace& space, const char mode[], - const AlphaType& alpha, const AMatrix& A, - const XVector& x, const BetaType& beta, - const YVector& y); + static bool spmv2d1d(const ExecutionSpace& space, const char mode[], const AlphaType& alpha, const AMatrix& A, + const XVector& x, const BetaType& beta, const YVector& y); }; #if defined(KOKKOSKERNELS_INST_LAYOUTSTRIDE) || !defined(KOKKOSKERNELS_ETI_ONLY) -template -struct SPMV2D1D { - static bool spmv2d1d(const char mode[], const AlphaType& alpha, - const AMatrix& A, const XVector& x, const BetaType& beta, - const YVector& y) { +template +struct SPMV2D1D { + static bool spmv2d1d(const char mode[], const AlphaType& alpha, const AMatrix& A, const XVector& x, + const BetaType& beta, const YVector& y) { spmv(typename AMatrix::execution_space{}, mode, alpha, A, x, beta, y); return true; } template - static bool spmv2d1d(const ExecutionSpace& space, const char mode[], - const AlphaType& alpha, const AMatrix& A, - const XVector& x, const BetaType& beta, - const YVector& y) { + static bool spmv2d1d(const ExecutionSpace& space, const char mode[], const AlphaType& alpha, const AMatrix& A, + const XVector& x, const BetaType& beta, const YVector& y) { spmv(space, mode, alpha, A, x, beta, y); return true; } @@ -63,43 +55,33 @@ struct SPMV2D1D -struct SPMV2D1D { - static bool spmv2d1d(const char /*mode*/[], const AlphaType& /*alpha*/, - const AMatrix& /*A*/, const XVector& /*x*/, +template +struct SPMV2D1D { + static bool spmv2d1d(const char /*mode*/[], const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, const BetaType& /*beta*/, const YVector& /*y*/) { return false; } template - static bool spmv2d1d(const ExecutionSpace& /* space */, const char /*mode*/[], - const AlphaType& /*alpha*/, const AMatrix& /*A*/, - const XVector& /*x*/, const BetaType& /*beta*/, - const YVector& /*y*/) { + static bool spmv2d1d(const ExecutionSpace& /* space */, const char /*mode*/[], const AlphaType& /*alpha*/, + const AMatrix& /*A*/, const XVector& /*x*/, const BetaType& /*beta*/, const YVector& /*y*/) { return false; } }; #endif #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || !defined(KOKKOSKERNELS_ETI_ONLY) -template -struct SPMV2D1D { - static bool spmv2d1d(const char mode[], const AlphaType& alpha, - const AMatrix& A, const XVector& x, const BetaType& beta, - const YVector& y) { +template +struct SPMV2D1D { + static bool spmv2d1d(const char mode[], const AlphaType& alpha, const AMatrix& A, const XVector& x, + const BetaType& beta, const YVector& y) { spmv(typename AMatrix::execution_space{}, mode, alpha, A, x, beta, y); return true; } template - static bool spmv2d1d(const ExecutionSpace& space, const char mode[], - const AlphaType& alpha, const AMatrix& A, - const XVector& x, const BetaType& beta, - const YVector& y) { + static bool spmv2d1d(const ExecutionSpace& space, const char mode[], const AlphaType& alpha, const AMatrix& A, + const XVector& x, const BetaType& beta, const YVector& y) { spmv(space, mode, alpha, A, x, beta, y); return true; } @@ -107,43 +89,33 @@ struct SPMV2D1D -struct SPMV2D1D { - static bool spmv2d1d(const char /*mode*/[], const AlphaType& /*alpha*/, - const AMatrix& /*A*/, const XVector& /*x*/, +template +struct SPMV2D1D { + static bool spmv2d1d(const char /*mode*/[], const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, const BetaType& /*beta*/, const YVector& /*y*/) { return false; } template - static bool spmv2d1d(const ExecutionSpace& /* space */, const char /*mode*/[], - const AlphaType& /*alpha*/, const AMatrix& /*A*/, - const XVector& /*x*/, const BetaType& /*beta*/, - const YVector& /*y*/) { + static bool spmv2d1d(const ExecutionSpace& /* space */, const char /*mode*/[], const AlphaType& /*alpha*/, + const AMatrix& /*A*/, const XVector& /*x*/, const BetaType& /*beta*/, const YVector& /*y*/) { return false; } }; #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || !defined(KOKKOSKERNELS_ETI_ONLY) -template -struct SPMV2D1D { - static bool spmv2d1d(const char mode[], const AlphaType& alpha, - const AMatrix& A, const XVector& x, const BetaType& beta, - const YVector& y) { +template +struct SPMV2D1D { + static bool spmv2d1d(const char mode[], const AlphaType& alpha, const AMatrix& A, const XVector& x, + const BetaType& beta, const YVector& y) { spmv(typename AMatrix::execution_space{}, mode, alpha, A, x, beta, y); return true; } template - static bool spmv2d1d(const ExecutionSpace& space, const char mode[], - const AlphaType& alpha, const AMatrix& A, - const XVector& x, const BetaType& beta, - const YVector& y) { + static bool spmv2d1d(const ExecutionSpace& space, const char mode[], const AlphaType& alpha, const AMatrix& A, + const XVector& x, const BetaType& beta, const YVector& y) { spmv(space, mode, alpha, A, x, beta, y); return true; } @@ -151,60 +123,49 @@ struct SPMV2D1D -struct SPMV2D1D { - static bool spmv2d1d(const char /*mode*/[], const AlphaType& /*alpha*/, - const AMatrix& /*A*/, const XVector& /*x*/, +template +struct SPMV2D1D { + static bool spmv2d1d(const char /*mode*/[], const AlphaType& /*alpha*/, const AMatrix& /*A*/, const XVector& /*x*/, const BetaType& /*beta*/, const YVector& /*y*/) { return false; } template - static bool spmv2d1d(const ExecutionSpace& /* space */, const char /*mode*/[], - const AlphaType& /*alpha*/, const AMatrix& /*A*/, - const XVector& /*x*/, const BetaType& /*beta*/, - const YVector& /*y*/) { + static bool spmv2d1d(const ExecutionSpace& /* space */, const char /*mode*/[], const AlphaType& /*alpha*/, + const AMatrix& /*A*/, const XVector& /*x*/, const BetaType& /*beta*/, const YVector& /*y*/) { return false; } }; #endif } // namespace Impl -template +template using SPMV2D1D [[deprecated("KokkosSparse::SPMV2D1D is not part of the public interface - " - "use KokkosSparse::spmv instead")]] = - Impl::SPMV2D1D; - -template -[ - [deprecated("Use the version of spmv that takes a SPMVHandle instead of " - "Controls")]] void -spmv(const ExecutionSpace& space, - KokkosKernels::Experimental::Controls controls, const char mode[], - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y) { + "use KokkosSparse::spmv instead")]] = Impl::SPMV2D1D; + +template +[[deprecated( + "Use the version of spmv that takes a SPMVHandle instead of " + "Controls")]] void +spmv(const ExecutionSpace& space, KokkosKernels::Experimental::Controls controls, const char mode[], + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y) { // Default to fast setup, since this handle can't be reused SPMVAlgorithm algo = SPMV_FAST_SETUP; // Translate the Controls algorithm selection to the SPMVHandle algorithm. - // This maintains the old behavior, where any manually set name that isn't - // "tpl" gives native. - // - // This also uses the behavior set by #2021: "merge" was a hint to use - // cuSPARSE merge path, but that path is gone so just use the normal TPL. - // "merge-path" means to use the KK merge-path implementation. // // And also support the 3 different BSR algorithms by their old names. if (controls.isParameter("algorithm")) { std::string algoName = controls.getParameter("algorithm"); - if (algoName == "merge" || algoName == "tpl") + if (algoName == "tpl") algo = SPMV_FAST_SETUP; - else if (algoName == "native-merge") + else if (algoName == "native") + algo = SPMV_NATIVE; + else if (algoName == "merge") algo = SPMV_MERGE_PATH; + else if (algoName == "native-merge") + algo = SPMV_NATIVE_MERGE_PATH; else if (algoName == "v4.1") algo = SPMV_BSR_V41; else if (algoName == "v4.2") @@ -212,12 +173,10 @@ spmv(const ExecutionSpace& space, else if (algoName == "experimental_bsr_tc" || algoName == "experimental_tc") algo = SPMV_BSR_TC; else - throw std::invalid_argument( - std::string("KokkosSparse::spmv: controls algorithm name '") + - algoName + "' is not supported.\n"); + throw std::invalid_argument(std::string("KokkosSparse::spmv: controls algorithm name '") + algoName + + "' is not supported.\n"); } - KokkosSparse::SPMVHandle handle( - algo); + KokkosSparse::SPMVHandle handle(algo); // Pull out any expert tuning parameters if (controls.isParameter("schedule")) { if (controls.getParameter("schedule") == "dynamic") { @@ -226,71 +185,57 @@ spmv(const ExecutionSpace& space, handle.force_static_schedule = true; } } - if (controls.isParameter("team size")) - handle.team_size = std::stoi(controls.getParameter("team size")); - if (controls.isParameter("vector length")) - handle.vector_length = std::stoi(controls.getParameter("vector length")); + if (controls.isParameter("team size")) handle.team_size = std::stoi(controls.getParameter("team size")); + if (controls.isParameter("vector length")) handle.vector_length = std::stoi(controls.getParameter("vector length")); if (controls.isParameter("rows per thread")) - handle.rows_per_thread = - std::stoll(controls.getParameter("rows per thread")); + handle.rows_per_thread = std::stoll(controls.getParameter("rows per thread")); spmv(space, &handle, mode, alpha, A, x, beta, y); } -template -[ - [deprecated("Use the version of spmv that takes a SPMVHandle instead of " - "Controls")]] void -spmv(KokkosKernels::Experimental::Controls controls, const char mode[], - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y) { - spmv(typename AMatrix::execution_space{}, controls, mode, alpha, A, x, beta, - y); +template +[[deprecated( + "Use the version of spmv that takes a SPMVHandle instead of " + "Controls")]] void +spmv(KokkosKernels::Experimental::Controls controls, const char mode[], const AlphaType& alpha, const AMatrix& A, + const XVector& x, const BetaType& beta, const YVector& y) { + spmv(typename AMatrix::execution_space{}, controls, mode, alpha, A, x, beta, y); } -template -[ - [deprecated("Use the version of spmv that takes a SPMVHandle instead of " - "Controls")]] void -spmv(const ExecutionSpace& space, - KokkosKernels::Experimental::Controls controls, const char mode[], - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y, const RANK_ONE&) { +template +[[deprecated( + "Use the version of spmv that takes a SPMVHandle instead of " + "Controls")]] void +spmv(const ExecutionSpace& space, KokkosKernels::Experimental::Controls controls, const char mode[], + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y, + const RANK_ONE&) { spmv(space, controls, mode, alpha, A, x, beta, y); } -template -[ - [deprecated("Use the version of spmv that takes a SPMVHandle instead of " - "Controls")]] void -spmv(KokkosKernels::Experimental::Controls controls, const char mode[], - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y, const RANK_ONE&) { +template +[[deprecated( + "Use the version of spmv that takes a SPMVHandle instead of " + "Controls")]] void +spmv(KokkosKernels::Experimental::Controls controls, const char mode[], const AlphaType& alpha, const AMatrix& A, + const XVector& x, const BetaType& beta, const YVector& y, const RANK_ONE&) { spmv(controls, mode, alpha, A, x, beta, y); } -template -[ - [deprecated("Use the version of spmv that takes a SPMVHandle instead of " - "Controls")]] void -spmv(const ExecutionSpace& space, - KokkosKernels::Experimental::Controls controls, const char mode[], - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y, const RANK_TWO&) { +template +[[deprecated( + "Use the version of spmv that takes a SPMVHandle instead of " + "Controls")]] void +spmv(const ExecutionSpace& space, KokkosKernels::Experimental::Controls controls, const char mode[], + const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y, + const RANK_TWO&) { spmv(space, controls, mode, alpha, A, x, beta, y); } -template -[ - [deprecated("Use the version of spmv that takes a SPMVHandle instead of " - "Controls")]] void -spmv(KokkosKernels::Experimental::Controls controls, const char mode[], - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y, const RANK_TWO&) { +template +[[deprecated( + "Use the version of spmv that takes a SPMVHandle instead of " + "Controls")]] void +spmv(KokkosKernels::Experimental::Controls controls, const char mode[], const AlphaType& alpha, const AMatrix& A, + const XVector& x, const BetaType& beta, const YVector& y, const RANK_TWO&) { spmv(controls, mode, alpha, A, x, beta, y); } diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp index b0759bb7a633..e91e53d68d14 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp @@ -30,20 +30,20 @@ namespace KokkosSparse { /// SPMVAlgorithm values can be used to select different algorithms/methods for /// performing SpMV computations. enum SPMVAlgorithm { - SPMV_DEFAULT, /// Default algorithm: best overall performance for repeated - /// applications of SpMV. - SPMV_FAST_SETUP, /// Best performance in the non-reuse case, where the handle - /// is only used once. - SPMV_NATIVE, /// Use the best KokkosKernels implementation, even if a TPL - /// implementation is available. - SPMV_MERGE_PATH, /// Use algorithm optimized for matrices with - /// imbalanced/irregular sparsity patterns (merge path or - /// similar). May call a TPL. For CrsMatrix only. + SPMV_DEFAULT, /// Default algorithm: best overall performance for repeated + /// applications of SpMV. + SPMV_FAST_SETUP, /// Best performance in the non-reuse case, where the handle + /// is only used once. + SPMV_NATIVE, /// Use the best KokkosKernels implementation, even if a TPL + /// implementation is available. + SPMV_MERGE_PATH, /// Use algorithm optimized for matrices with + /// imbalanced/irregular sparsity patterns (merge path or + /// similar). May call a TPL. For CrsMatrix only. SPMV_NATIVE_MERGE_PATH, /// Use the KokkosKernels implementation of merge /// path. For CrsMatrix only. - SPMV_BSR_V41, /// Use experimental version 4.1 algorithm (for BsrMatrix only) - SPMV_BSR_V42, /// Use experimental version 4.2 algorithm (for BsrMatrix only) - SPMV_BSR_TC /// Use experimental tensor core algorithm (for BsrMatrix only) + SPMV_BSR_V41, /// Use experimental version 4.1 algorithm (for BsrMatrix only) + SPMV_BSR_V42, /// Use experimental version 4.2 algorithm (for BsrMatrix only) + SPMV_BSR_TC /// Use experimental tensor core algorithm (for BsrMatrix only) }; namespace Experimental { @@ -67,8 +67,7 @@ inline const char* get_spmv_algorithm_name(SPMVAlgorithm a) { case SPMV_BSR_V42: return "SPMV_BSR_V42"; case SPMV_BSR_TC: return "SPMV_BSR_TC"; } - throw std::invalid_argument( - "SPMVHandle::get_algorithm_name: unknown algorithm"); + throw std::invalid_argument("SPMVHandle::get_algorithm_name: unknown algorithm"); return ""; } @@ -137,9 +136,7 @@ struct CuSparse10_SpMV_Data : public TPL_SpMV_Data { // Data used by cuSPARSE <10.3 for CRS, and >=9 for BSR struct CuSparse9_SpMV_Data : public TPL_SpMV_Data { CuSparse9_SpMV_Data(const Kokkos::Cuda& exec_) : TPL_SpMV_Data(exec_) {} - ~CuSparse9_SpMV_Data() { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(mat)); - } + ~CuSparse9_SpMV_Data() { KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(mat)); } cusparseMatDescr_t mat; }; @@ -182,8 +179,7 @@ struct RocSparse_BSR_SpMV_Data : public TPL_SpMV_Data { // Data for classic MKL (both CRS and BSR) template struct MKL_SpMV_Data : public TPL_SpMV_Data { - MKL_SpMV_Data(const ExecutionSpace& exec_) - : TPL_SpMV_Data(exec_) {} + MKL_SpMV_Data(const ExecutionSpace& exec_) : TPL_SpMV_Data(exec_) {} ~MKL_SpMV_Data() { KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_destroy(mat)); // descr is just a plain-old-data struct, no cleanup to do @@ -194,11 +190,9 @@ struct MKL_SpMV_Data : public TPL_SpMV_Data { }; #endif -#if defined(KOKKOS_ENABLE_SYCL) && \ - !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) +#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) struct OneMKL_SpMV_Data : public TPL_SpMV_Data { - OneMKL_SpMV_Data(const Kokkos::Experimental::SYCL& exec_) - : TPL_SpMV_Data(exec_) {} + OneMKL_SpMV_Data(const Kokkos::Experimental::SYCL& exec_) : TPL_SpMV_Data(exec_) {} ~OneMKL_SpMV_Data() { // Make sure no spmv is still running with this handle, if exec uses an // out-of-order queue (rare case) @@ -210,8 +204,7 @@ struct OneMKL_SpMV_Data : public TPL_SpMV_Data { #else // But in older versions, wait on ev_release before letting mat go out of // scope - auto ev_release = - oneapi::mkl::sparse::release_matrix_handle(exec.sycl_queue(), &mat); + auto ev_release = oneapi::mkl::sparse::release_matrix_handle(exec.sycl_queue(), &mat); ev_release.wait(); #endif } @@ -221,23 +214,18 @@ struct OneMKL_SpMV_Data : public TPL_SpMV_Data { #endif #endif -template +template struct SPMVHandleImpl { using ExecutionSpaceType = ExecutionSpace; // This is its own ImplType - using ImplType = - SPMVHandleImpl; + using ImplType = SPMVHandleImpl; // Do not allow const qualifier on Scalar, Ordinal, Offset (otherwise this // type won't match the ETI'd type). Users should not use SPMVHandleImpl // directly and SPMVHandle explicitly removes const, so this should never // happen in practice. - static_assert(!std::is_const_v, - "SPMVHandleImpl: Scalar must not be a const type"); - static_assert(!std::is_const_v, - "SPMVHandleImpl: Offset must not be a const type"); - static_assert(!std::is_const_v, - "SPMVHandleImpl: Ordinal must not be a const type"); + static_assert(!std::is_const_v, "SPMVHandleImpl: Scalar must not be a const type"); + static_assert(!std::is_const_v, "SPMVHandleImpl: Offset must not be a const type"); + static_assert(!std::is_const_v, "SPMVHandleImpl: Ordinal must not be a const type"); SPMVHandleImpl(SPMVAlgorithm algo_) : algo(algo_) {} ~SPMVHandleImpl() { if (tpl_rank1) delete tpl_rank1; @@ -291,17 +279,12 @@ struct SPMVHandleImpl { template struct SPMVHandle - : public Impl::SPMVHandleImpl { - using ImplType = - Impl::SPMVHandleImpl; + using ImplType = Impl::SPMVHandleImpl; // Note: these typedef names cannot shadow template parameters using AMatrixType = AMatrix; using XVectorType = XVector; @@ -311,34 +294,23 @@ struct SPMVHandle // NOTE: we do not require that ExecutionSpace matches // AMatrix::execution_space. For example, if the matrix's device is it is allowed to run spmv on Serial. - static_assert(is_crs_matrix_v || - Experimental::is_bsr_matrix_v, + static_assert(is_crs_matrix_v || Experimental::is_bsr_matrix_v, "SPMVHandle: AMatrix must be a specialization of CrsMatrix or " "BsrMatrix."); - static_assert(Kokkos::is_view::value, - "SPMVHandle: XVector must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "SPMVHandle: YVector must be a Kokkos::View."); - static_assert(XVector::rank() == YVector::rank(), - "SPMVHandle: ranks of XVector and YVector must match."); - static_assert( - XVector::rank() == size_t(1) || YVector::rank() == size_t(2), - "SPMVHandle: XVector and YVector must be both rank-1 or both rank-2."); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "SPMVHandle: AMatrix must be accessible from ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "SPMVHandle: XVector must be accessible from ExecutionSpace"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "SPMVHandle: YVector must be accessible from ExecutionSpace"); + static_assert(Kokkos::is_view::value, "SPMVHandle: XVector must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "SPMVHandle: YVector must be a Kokkos::View."); + static_assert(XVector::rank() == YVector::rank(), "SPMVHandle: ranks of XVector and YVector must match."); + static_assert(XVector::rank() == size_t(1) || YVector::rank() == size_t(2), + "SPMVHandle: XVector and YVector must be both rank-1 or both rank-2."); + static_assert(Kokkos::SpaceAccessibility::accessible, + "SPMVHandle: AMatrix must be accessible from ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "SPMVHandle: XVector must be accessible from ExecutionSpace"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "SPMVHandle: YVector must be accessible from ExecutionSpace"); // Prevent copying (this object does not support reference counting) - SPMVHandle(const SPMVHandle&) = delete; + SPMVHandle(const SPMVHandle&) = delete; SPMVHandle& operator=(const SPMVHandle&) = delete; /// \brief Create a new SPMVHandle using the given algorithm. @@ -349,8 +321,7 @@ struct SPMVHandle case SPMV_BSR_V41: case SPMV_BSR_V42: case SPMV_BSR_TC: - throw std::invalid_argument(std::string("SPMVHandle: algorithm ") + - get_spmv_algorithm_name(get_algorithm()) + + throw std::invalid_argument(std::string("SPMVHandle: algorithm ") + get_spmv_algorithm_name(get_algorithm()) + " cannot be used if A is a CrsMatrix"); default:; } @@ -358,8 +329,7 @@ struct SPMVHandle switch (get_algorithm()) { case SPMV_MERGE_PATH: case SPMV_NATIVE_MERGE_PATH: - throw std::invalid_argument(std::string("SPMVHandle: algorithm ") + - get_spmv_algorithm_name(get_algorithm()) + + throw std::invalid_argument(std::string("SPMVHandle: algorithm ") + get_spmv_algorithm_name(get_algorithm()) + " cannot be used if A is a BsrMatrix"); default:; } diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_team.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_team.hpp index 5c9e8436696c..7656658b4c30 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_team.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_team.hpp @@ -29,138 +29,88 @@ namespace Experimental { /// \brief Sparse matrix-vector multiply: y = beta*y + alpha*A*x. /// -template -int KOKKOS_INLINE_FUNCTION team_spmv( - const TeamType &team, const ScalarType &alpha, const ValuesViewType &values, - const IntView &row_ptr, const IntView &colIndices, const xViewType &x, - const ScalarType &beta, const yViewType &y, const int dobeta) { +template +int KOKKOS_INLINE_FUNCTION team_spmv(const TeamType &team, const ScalarType &alpha, const ValuesViewType &values, + const IntView &row_ptr, const IntView &colIndices, const xViewType &x, + const ScalarType &beta, const yViewType &y, const int dobeta) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "ValuesViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "IntView must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "xViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "yViewType must be a Kokkos::View."); - static_assert(static_cast(ValuesViewType::rank) == 1, - "ValuesViewType must have rank 1."); - static_assert(static_cast(IntView::rank) == 1, - "IntView must have rank 1."); - static_assert(static_cast(xViewType::rank) == 1, - "xViewType must have rank 1."); - static_assert(static_cast(yViewType::rank) == 1, - "yViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "ValuesViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "IntView must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "xViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "yViewType must be a Kokkos::View."); + static_assert(static_cast(ValuesViewType::rank) == 1, "ValuesViewType must have rank 1."); + static_assert(static_cast(IntView::rank) == 1, "IntView must have rank 1."); + static_assert(static_cast(xViewType::rank) == 1, "xViewType must have rank 1."); + static_assert(static_cast(yViewType::rank) == 1, "yViewType must have rank 1."); // Check compatibility of dimensions at run time. if (values.extent(0) != colIndices.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosSparse::spmv: Dimensions of values and colIndices do not match: " - "values: %d, colIndices: %d", - (int)values.extent(0), (int)colIndices.extent(0)); -#else Kokkos::printf( "KokkosSparse::spmv: Dimensions of values and colIndices do not match: " "values: %d, colIndices: %d", (int)values.extent(0), (int)colIndices.extent(0)); -#endif return 1; } - if (x.extent(0) != y.extent(0) || (x.extent(0) + 1) != row_ptr.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosSparse::spmv: Dimensions of x, y, and row_ptr do not match: " - "x: %d, y: %d, row_ptr: %d", - (int)x.extent(0), (int)y.extent(0), (int)row_ptr.extent(0)); -#else + if ((x.extent(0) + 1) != row_ptr.extent(0)) { Kokkos::printf( "KokkosSparse::spmv: Dimensions of x, y, and row_ptr do not match: " "x: %d, y: %d, row_ptr: %d", (int)x.extent(0), (int)y.extent(0), (int)row_ptr.extent(0)); -#endif return 1; } #endif // KOKKOSKERNELS_DEBUG_LEVEL if (dobeta == 1) - return KokkosSparse::TeamSpmv::template invoke< - ScalarType, ValuesViewType, IntView, xViewType, yViewType, 1>( - team, alpha, values, row_ptr, colIndices, x, beta, y); + return KokkosSparse::TeamSpmv::template invoke(team, alpha, values, row_ptr, colIndices, x, beta, y); else - return KokkosSparse::TeamSpmv::template invoke< - ScalarType, ValuesViewType, IntView, xViewType, yViewType, 0>( - team, alpha, values, row_ptr, colIndices, x, beta, y); + return KokkosSparse::TeamSpmv::template invoke(team, alpha, values, row_ptr, colIndices, x, beta, y); } /// \brief Sparse matrix-vector multiply: y = beta*y + alpha*A*x. /// -template -int KOKKOS_INLINE_FUNCTION team_vector_spmv( - const TeamType &team, const ScalarType &alpha, const ValuesViewType &values, - const IntView &row_ptr, const IntView &colIndices, const xViewType &x, - const ScalarType &beta, const yViewType &y, const int dobeta) { +template +int KOKKOS_INLINE_FUNCTION team_vector_spmv(const TeamType &team, const ScalarType &alpha, const ValuesViewType &values, + const IntView &row_ptr, const IntView &colIndices, const xViewType &x, + const ScalarType &beta, const yViewType &y, const int dobeta) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::is_view::value, - "ValuesViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "IntView must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "xViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "yViewType must be a Kokkos::View."); - static_assert(static_cast(ValuesViewType::rank) == 1, - "ValuesViewType must have rank 1."); - static_assert(static_cast(IntView::rank) == 1, - "IntView must have rank 1."); - static_assert(static_cast(xViewType::rank) == 1, - "xViewType must have rank 1."); - static_assert(static_cast(yViewType::rank) == 1, - "yViewType must have rank 1."); + static_assert(Kokkos::is_view::value, "ValuesViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "IntView must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "xViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "yViewType must be a Kokkos::View."); + static_assert(static_cast(ValuesViewType::rank) == 1, "ValuesViewType must have rank 1."); + static_assert(static_cast(IntView::rank) == 1, "IntView must have rank 1."); + static_assert(static_cast(xViewType::rank) == 1, "xViewType must have rank 1."); + static_assert(static_cast(yViewType::rank) == 1, "yViewType must have rank 1."); // Check compatibility of dimensions at run time. if (values.extent(0) != colIndices.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosSparse::spmv: Dimensions of values and colIndices do not match: " - "values: %d, colIndices: %d", - (int)values.extent(0), (int)colIndices.extent(0)); -#else Kokkos::printf( "KokkosSparse::spmv: Dimensions of values and colIndices do not match: " "values: %d, colIndices: %d", (int)values.extent(0), (int)colIndices.extent(0)); -#endif return 1; } - if (x.extent(0) != y.extent(0) || (x.extent(0) + 1) != row_ptr.extent(0)) { -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "KokkosSparse::spmv: Dimensions of x, y, and row_ptr do not match: " - "x: %d, y: %d, row_ptr: %d", - (int)x.extent(0), (int)y.extent(0), (int)row_ptr.extent(0)); -#else + if ((x.extent(0) + 1) != row_ptr.extent(0)) { Kokkos::printf( "KokkosSparse::spmv: Dimensions of x, y, and row_ptr do not match: " "x: %d, y: %d, row_ptr: %d", (int)x.extent(0), (int)y.extent(0), (int)row_ptr.extent(0)); -#endif return 1; } #endif // KOKKOSKERNELS_DEBUG_LEVEL if (dobeta == 1) - return KokkosSparse::TeamVectorSpmv::template invoke< - ScalarType, ValuesViewType, IntView, xViewType, yViewType, 1>( - team, alpha, values, row_ptr, colIndices, x, beta, y); + return KokkosSparse::TeamVectorSpmv::template invoke(team, alpha, values, row_ptr, + colIndices, x, beta, y); else - return KokkosSparse::TeamVectorSpmv::template invoke< - ScalarType, ValuesViewType, IntView, xViewType, yViewType, 0>( - team, alpha, values, row_ptr, colIndices, x, beta, y); + return KokkosSparse::TeamVectorSpmv::template invoke(team, alpha, values, row_ptr, + colIndices, x, beta, y); } } // namespace Experimental diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv.hpp index 1fef3e9f1b25..f153e207780a 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv.hpp @@ -26,7 +26,7 @@ #include -//#include "KokkosSparse_sptrsv_handle.hpp" +// #include "KokkosSparse_sptrsv_handle.hpp" #include "KokkosKernels_helpers.hpp" #include "KokkosSparse_sptrsv_symbolic_spec.hpp" #include "KokkosSparse_sptrsv_solve_spec.hpp" @@ -36,9 +36,8 @@ namespace KokkosSparse { namespace Experimental { -#define KOKKOSKERNELS_SPTRSV_SAME_TYPE(A, B) \ - std::is_same::type, \ - typename std::remove_const::type>::value +#define KOKKOSKERNELS_SPTRSV_SAME_TYPE(A, B) \ + std::is_same::type, typename std::remove_const::type>::value /** * @brief sptrsv symbolic phase for linear system Ax=b @@ -53,23 +52,19 @@ namespace Experimental { * @param rowmap The CRS matrix's (A) rowmap * @param entries The CRS matrix's (A) entries */ -template -void sptrsv_symbolic(const ExecutionSpace &space, KernelHandle *handle, - lno_row_view_t_ rowmap, lno_nnz_view_t_ entries) { +template +void sptrsv_symbolic(const ExecutionSpace &space, KernelHandle *handle, lno_row_view_t_ rowmap, + lno_nnz_view_t_ entries) { typedef typename KernelHandle::size_type size_type; typedef typename KernelHandle::nnz_lno_t ordinal_type; - static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE( - typename lno_row_view_t_::non_const_value_type, size_type), + static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE(typename lno_row_view_t_::non_const_value_type, size_type), "sptrsv_symbolic: A size_type must match KernelHandle " "size_type (const doesn't matter)"); - static_assert( - KOKKOSKERNELS_SPTRSV_SAME_TYPE( - typename lno_nnz_view_t_::non_const_value_type, ordinal_type), - "sptrsv_symbolic: A entry type must match KernelHandle entry type (aka " - "nnz_lno_t, and const doesn't matter)"); + static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE(typename lno_nnz_view_t_::non_const_value_type, ordinal_type), + "sptrsv_symbolic: A entry type must match KernelHandle entry type (aka " + "nnz_lno_t, and const doesn't matter)"); typedef typename KernelHandle::const_size_type c_size_t; typedef typename KernelHandle::const_nnz_lno_t c_lno_t; @@ -79,25 +74,21 @@ void sptrsv_symbolic(const ExecutionSpace &space, KernelHandle *handle, typedef typename KernelHandle::HandleTempMemorySpace c_temp_t; typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle const_handle_type; const_handle_type tmp_handle(*handle); - typedef Kokkos::View< - typename lno_row_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - lno_row_view_t_>::array_layout, - typename lno_row_view_t_::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename lno_row_view_t_::device_type, + Kokkos::MemoryTraits > RowMap_Internal; - typedef Kokkos::View< - typename lno_nnz_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - lno_nnz_view_t_>::array_layout, - typename lno_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename lno_nnz_view_t_::device_type, + Kokkos::MemoryTraits > Entries_Internal; #ifdef KK_TRISOLVE_TIMERS @@ -106,14 +97,11 @@ void sptrsv_symbolic(const ExecutionSpace &space, KernelHandle *handle, RowMap_Internal rowmap_i = rowmap; Entries_Internal entries_i = entries; - KokkosSparse::Impl::SPTRSV_SYMBOLIC< - ExecutionSpace, const_handle_type, RowMap_Internal, - Entries_Internal>::sptrsv_symbolic(space, &tmp_handle, rowmap_i, - entries_i); + KokkosSparse::Impl::SPTRSV_SYMBOLIC::sptrsv_symbolic(space, &tmp_handle, rowmap_i, entries_i); #ifdef KK_TRISOLVE_TIMERS - std::cout << " > sptrsv_symbolic time = " << timer_sptrsv.seconds() - << std::endl; + std::cout << " > sptrsv_symbolic time = " << timer_sptrsv.seconds() << std::endl; #endif } // sptrsv_symbolic @@ -128,10 +116,8 @@ void sptrsv_symbolic(const ExecutionSpace &space, KernelHandle *handle, * @param rowmap The CRS matrix's (A) rowmap * @param entries The CRS matrix's (A) entries */ -template -void sptrsv_symbolic(KernelHandle *handle, lno_row_view_t_ rowmap, - lno_nnz_view_t_ entries) { +template +void sptrsv_symbolic(KernelHandle *handle, lno_row_view_t_ rowmap, lno_nnz_view_t_ entries) { using ExecutionSpace = typename KernelHandle::HandleExecSpace; auto my_exec_space = ExecutionSpace(); sptrsv_symbolic(my_exec_space, handle, rowmap, entries); @@ -151,33 +137,26 @@ void sptrsv_symbolic(KernelHandle *handle, lno_row_view_t_ rowmap, * @param entries The CRS matrix's (A) entries * @param values The CRS matrix's (A) values */ -template -void sptrsv_symbolic(ExecutionSpace &space, KernelHandle *handle, - lno_row_view_t_ rowmap, lno_nnz_view_t_ entries, +void sptrsv_symbolic(ExecutionSpace &space, KernelHandle *handle, lno_row_view_t_ rowmap, lno_nnz_view_t_ entries, scalar_nnz_view_t_ values) { typedef typename KernelHandle::size_type size_type; typedef typename KernelHandle::nnz_lno_t ordinal_type; typedef typename KernelHandle::nnz_scalar_t scalar_type; - static_assert( - std::is_same_v, - "sptrsv_symbolic: ExecutionSpace and HandleExecSpace need to match!"); + static_assert(std::is_same_v, + "sptrsv_symbolic: ExecutionSpace and HandleExecSpace need to match!"); - static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE( - typename lno_row_view_t_::non_const_value_type, size_type), + static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE(typename lno_row_view_t_::non_const_value_type, size_type), "sptrsv_symbolic: A size_type must match KernelHandle " "size_type (const doesn't matter)"); - static_assert( - KOKKOSKERNELS_SPTRSV_SAME_TYPE( - typename lno_nnz_view_t_::non_const_value_type, ordinal_type), - "sptrsv_symbolic: A entry type must match KernelHandle entry type (aka " - "nnz_lno_t, and const doesn't matter)"); + static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE(typename lno_nnz_view_t_::non_const_value_type, ordinal_type), + "sptrsv_symbolic: A entry type must match KernelHandle entry type (aka " + "nnz_lno_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE( - typename scalar_nnz_view_t_::value_type, scalar_type), + static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE(typename scalar_nnz_view_t_::value_type, scalar_type), "sptrsv_symbolic: A scalar type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); @@ -189,8 +168,8 @@ void sptrsv_symbolic(ExecutionSpace &space, KernelHandle *handle, typedef typename KernelHandle::HandleTempMemorySpace c_temp_t; typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle const_handle_type; const_handle_type tmp_handle(*handle); @@ -198,30 +177,26 @@ void sptrsv_symbolic(ExecutionSpace &space, KernelHandle *handle, Kokkos::Timer timer_sptrsv; #endif auto sptrsv_handle = handle->get_sptrsv_handle(); - if (sptrsv_handle->get_algorithm() == - KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE) { + if (sptrsv_handle->get_algorithm() == KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE) { #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE if constexpr (std::is_same_v) { - using RowMap_Internal = Kokkos::View< - typename lno_row_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - lno_row_view_t_>::array_layout, - typename lno_row_view_t_::device_type, - Kokkos::MemoryTraits >; - - using Entries_Internal = Kokkos::View< - typename lno_nnz_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - lno_nnz_view_t_>::array_layout, - typename lno_nnz_view_t_::device_type, - Kokkos::MemoryTraits >; - - using Values_Internal = Kokkos::View< - typename scalar_nnz_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - scalar_nnz_view_t_>::array_layout, - typename scalar_nnz_view_t_::device_type, - Kokkos::MemoryTraits >; + using RowMap_Internal = + Kokkos::View::array_layout, + typename lno_row_view_t_::device_type, + Kokkos::MemoryTraits >; + + using Entries_Internal = + Kokkos::View::array_layout, + typename lno_nnz_view_t_::device_type, + Kokkos::MemoryTraits >; + + using Values_Internal = + Kokkos::View::array_layout, + typename scalar_nnz_view_t_::device_type, + Kokkos::MemoryTraits >; RowMap_Internal rowmap_i = rowmap; Entries_Internal entries_i = entries; @@ -231,14 +206,12 @@ void sptrsv_symbolic(ExecutionSpace &space, KernelHandle *handle, sptrsvHandleType *sh = handle->get_sptrsv_handle(); auto nrows = sh->get_nrows(); - KokkosSparse::Impl::sptrsvcuSPARSE_symbolic< - ExecutionSpace, sptrsvHandleType, RowMap_Internal, Entries_Internal, - Values_Internal>(space, sh, nrows, rowmap_i, entries_i, values_i, - false); + KokkosSparse::Impl::sptrsvcuSPARSE_symbolic(space, sh, nrows, rowmap_i, entries_i, values_i, + false); } else { (void)values; - KokkosSparse::Experimental::sptrsv_symbolic(space, handle, rowmap, - entries); + KokkosSparse::Experimental::sptrsv_symbolic(space, handle, rowmap, entries); } #else // We better go to the native implementation @@ -250,8 +223,7 @@ void sptrsv_symbolic(ExecutionSpace &space, KernelHandle *handle, KokkosSparse::Experimental::sptrsv_symbolic(space, handle, rowmap, entries); } #ifdef KK_TRISOLVE_TIMERS - std::cout << " + sptrsv_symbolic time = " << timer_sptrsv.seconds() - << std::endl; + std::cout << " + sptrsv_symbolic time = " << timer_sptrsv.seconds() << std::endl; #endif } // sptrsv_symbolic @@ -267,10 +239,8 @@ void sptrsv_symbolic(ExecutionSpace &space, KernelHandle *handle, * @param entries The CRS matrix's (A) entries * @param values The CRS matrix's (A) values */ -template -void sptrsv_symbolic(KernelHandle *handle, lno_row_view_t_ rowmap, - lno_nnz_view_t_ entries, scalar_nnz_view_t_ values) { +template +void sptrsv_symbolic(KernelHandle *handle, lno_row_view_t_ rowmap, lno_nnz_view_t_ entries, scalar_nnz_view_t_ values) { using ExecutionSpace = typename KernelHandle::HandleExecSpace; auto my_exec_space = ExecutionSpace(); @@ -296,58 +266,41 @@ void sptrsv_symbolic(KernelHandle *handle, lno_row_view_t_ rowmap, * @param b The b vector * @param x The x vector */ -template -void sptrsv_solve(ExecutionSpace &space, KernelHandle *handle, - lno_row_view_t_ rowmap, lno_nnz_view_t_ entries, +void sptrsv_solve(ExecutionSpace &space, KernelHandle *handle, lno_row_view_t_ rowmap, lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, BType b, XType x) { typedef typename KernelHandle::size_type size_type; typedef typename KernelHandle::nnz_lno_t ordinal_type; typedef typename KernelHandle::nnz_scalar_t scalar_type; - static_assert( - std::is_same_v, - "sptrsv solve: ExecutionSpace and HandleExecSpace need to match"); + static_assert(std::is_same_v, + "sptrsv solve: ExecutionSpace and HandleExecSpace need to match"); - static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE( - typename lno_row_view_t_::non_const_value_type, size_type), + static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE(typename lno_row_view_t_::non_const_value_type, size_type), "sptrsv_solve: A size_type must match KernelHandle size_type " "(const doesn't matter)"); - static_assert( - KOKKOSKERNELS_SPTRSV_SAME_TYPE( - typename lno_nnz_view_t_::non_const_value_type, ordinal_type), - "sptrsv_solve: A entry type must match KernelHandle entry type (aka " - "nnz_lno_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE( - typename scalar_nnz_view_t_::value_type, scalar_type), + static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE(typename lno_nnz_view_t_::non_const_value_type, ordinal_type), + "sptrsv_solve: A entry type must match KernelHandle entry type (aka " + "nnz_lno_t, and const doesn't matter)"); + static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE(typename scalar_nnz_view_t_::value_type, scalar_type), "sptrsv_solve: A scalar type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(Kokkos::is_view::value, - "sptrsv: b is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "sptrsv: x is not a Kokkos::View."); - static_assert((int)BType::rank == (int)XType::rank, - "sptrsv: The ranks of b and x do not match."); - static_assert(BType::rank == 1, - "sptrsv: b and x must both either have rank 1."); - static_assert(std::is_same::value, + static_assert(Kokkos::is_view::value, "sptrsv: b is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "sptrsv: x is not a Kokkos::View."); + static_assert((int)BType::rank == (int)XType::rank, "sptrsv: The ranks of b and x do not match."); + static_assert(BType::rank == 1, "sptrsv: b and x must both either have rank 1."); + static_assert(std::is_same::value, "sptrsv: The output x must be nonconst."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "sptrsv: Views BType and XType have different device_types."); - static_assert( - std::is_same< - typename BType::device_type::execution_space, - typename KernelHandle::SPTRSVHandleType::execution_space>::value, - "sptrsv: KernelHandle and Views have different execution spaces."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, + "sptrsv: KernelHandle and Views have different execution spaces."); + static_assert(std::is_same::value, "sptrsv: rowmap and entries have different device types."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "sptrsv: rowmap and values have different device types."); typedef typename KernelHandle::const_size_type c_size_t; @@ -358,46 +311,37 @@ void sptrsv_solve(ExecutionSpace &space, KernelHandle *handle, typedef typename KernelHandle::HandleTempMemorySpace c_temp_t; typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle const_handle_type; const_handle_type tmp_handle(*handle); - typedef Kokkos::View< - typename lno_row_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - lno_row_view_t_>::array_layout, - typename lno_row_view_t_::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename lno_row_view_t_::device_type, + Kokkos::MemoryTraits > RowMap_Internal; - typedef Kokkos::View< - typename lno_nnz_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - lno_nnz_view_t_>::array_layout, - typename lno_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename lno_nnz_view_t_::device_type, + Kokkos::MemoryTraits > Entries_Internal; - typedef Kokkos::View< - typename scalar_nnz_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - scalar_nnz_view_t_>::array_layout, - typename scalar_nnz_view_t_::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, + typename scalar_nnz_view_t_::device_type, + Kokkos::MemoryTraits > Values_Internal; - typedef Kokkos::View< - typename BType::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename BType::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, typename BType::device_type, + Kokkos::MemoryTraits > BType_Internal; - typedef Kokkos::View< - typename XType::non_const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XType::device_type, Kokkos::MemoryTraits > + typedef Kokkos::View::array_layout, typename XType::device_type, + Kokkos::MemoryTraits > XType_Internal; RowMap_Internal rowmap_i = rowmap; @@ -408,38 +352,36 @@ void sptrsv_solve(ExecutionSpace &space, KernelHandle *handle, XType_Internal x_i = x; auto sptrsv_handle = handle->get_sptrsv_handle(); - if (sptrsv_handle->get_algorithm() == - KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE) { + if (sptrsv_handle->get_algorithm() == KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE) { #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE if constexpr (std::is_same_v) { typedef typename KernelHandle::SPTRSVHandleType sptrsvHandleType; sptrsvHandleType *sh = handle->get_sptrsv_handle(); auto nrows = sh->get_nrows(); - KokkosSparse::Impl::sptrsvcuSPARSE_solve< - ExecutionSpace, sptrsvHandleType, RowMap_Internal, Entries_Internal, - Values_Internal, BType_Internal, XType_Internal>( + KokkosSparse::Impl::sptrsvcuSPARSE_solve( space, sh, nrows, rowmap_i, entries_i, values_i, b_i, x_i, false); } else { - KokkosSparse::Impl::SPTRSV_SOLVE< - ExecutionSpace, const_handle_type, RowMap_Internal, Entries_Internal, - Values_Internal, BType_Internal, - XType_Internal>::sptrsv_solve(space, &tmp_handle, rowmap_i, entries_i, - values_i, b_i, x_i); + KokkosSparse::Impl::SPTRSV_SOLVE::sptrsv_solve(space, + &tmp_handle, + rowmap_i, + entries_i, + values_i, b_i, + x_i); } #else - KokkosSparse::Impl::SPTRSV_SOLVE< - ExecutionSpace, const_handle_type, RowMap_Internal, Entries_Internal, - Values_Internal, BType_Internal, - XType_Internal>::sptrsv_solve(space, &tmp_handle, rowmap_i, entries_i, - values_i, b_i, x_i); + KokkosSparse::Impl::SPTRSV_SOLVE::sptrsv_solve(space, &tmp_handle, + rowmap_i, entries_i, + values_i, b_i, x_i); #endif } else { - KokkosSparse::Impl::SPTRSV_SOLVE< - ExecutionSpace, const_handle_type, RowMap_Internal, Entries_Internal, - Values_Internal, BType_Internal, - XType_Internal>::sptrsv_solve(space, &tmp_handle, rowmap_i, entries_i, - values_i, b_i, x_i); + KokkosSparse::Impl::SPTRSV_SOLVE::sptrsv_solve(space, &tmp_handle, + rowmap_i, entries_i, + values_i, b_i, x_i); } } // sptrsv_solve @@ -461,12 +403,10 @@ void sptrsv_solve(ExecutionSpace &space, KernelHandle *handle, * @param b The b vector * @param x The x vector */ -template -void sptrsv_solve(KernelHandle *handle, lno_row_view_t_ rowmap, - lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, BType b, - XType x) { +template +void sptrsv_solve(KernelHandle *handle, lno_row_view_t_ rowmap, lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, + BType b, XType x) { using ExecutionSpace = typename KernelHandle::HandleExecSpace; auto my_exec_space = ExecutionSpace(); sptrsv_solve(my_exec_space, handle, rowmap, entries, values, b, x); @@ -486,8 +426,7 @@ void sptrsv_solve(KernelHandle *handle, lno_row_view_t_ rowmap, * @param b The b vector */ template -void sptrsv_solve(ExecutionSpace &space, KernelHandle *handle, XType x, - XType b) { +void sptrsv_solve(ExecutionSpace &space, KernelHandle *handle, XType x, XType b) { auto crsmat = handle->get_sptrsv_handle()->get_crsmat(); auto values = crsmat.values; auto graph = crsmat.graph; @@ -495,11 +434,9 @@ void sptrsv_solve(ExecutionSpace &space, KernelHandle *handle, XType x, auto entries = graph.entries; if (!(handle->get_sptrsv_handle()->is_numeric_complete())) { - std::cout - << std::endl - << " ** needs to call sptrsv_compute before calling sptrsv_solve **" - << std::endl - << std::endl; + std::cout << std::endl + << " ** needs to call sptrsv_compute before calling sptrsv_solve **" << std::endl + << std::endl; return; } @@ -549,8 +486,7 @@ void sptrsv_solve(KernelHandle *handle, XType x, XType b) { * @param b The b vector */ template -void sptrsv_solve(ExecutionSpace &space, KernelHandle *handleL, - KernelHandle *handleU, XType x, XType b) { +void sptrsv_solve(ExecutionSpace &space, KernelHandle *handleL, KernelHandle *handleU, XType x, XType b) { // Lower-triangular solve sptrsv_solve(space, handleL, x, b); @@ -573,103 +509,68 @@ void sptrsv_solve(ExecutionSpace &space, KernelHandle *handleL, * @param b The b vector */ template -void sptrsv_solve(KernelHandle *handleL, KernelHandle *handleU, XType x, - XType b) { +void sptrsv_solve(KernelHandle *handleL, KernelHandle *handleU, XType x, XType b) { using ExecutionSpace = typename KernelHandle::HandleExecSpace; auto my_exec_space = ExecutionSpace(); sptrsv_solve(my_exec_space, handleL, handleU, x, b); } #endif -template -void sptrsv_solve_streams(const std::vector &execspace_v, - const std::vector &handle_v, - const std::vector &rowmap_v, - const std::vector &entries_v, - const std::vector &values_v, - const std::vector &b_v, +template +void sptrsv_solve_streams(const std::vector &execspace_v, const std::vector &handle_v, + const std::vector &rowmap_v, const std::vector &entries_v, + const std::vector &values_v, const std::vector &b_v, std::vector &x_v) { using size_type = typename KernelHandle::size_type; using ordinal_type = typename KernelHandle::nnz_lno_t; using scalar_type = typename KernelHandle::nnz_scalar_t; - static_assert(Kokkos::is_execution_space::value, - "ExecutionSpace is not valid"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename lno_row_view_t_::memory_space>::accessible, - "sptrsv_solve_streams: ExecutionSpace cannot access data in " - "lno_row_view_t_"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename lno_nnz_view_t_::memory_space>::accessible, - "sptrsv_solve_streams: ExecutionSpace cannot access data in " - "lno_nnz_view_t_"); - static_assert(Kokkos::SpaceAccessibility< - ExecutionSpace, - typename scalar_nnz_view_t_::memory_space>::accessible, + static_assert(Kokkos::is_execution_space::value, "ExecutionSpace is not valid"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "sptrsv_solve_streams: ExecutionSpace cannot access data in " + "lno_row_view_t_"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "sptrsv_solve_streams: ExecutionSpace cannot access data in " + "lno_nnz_view_t_"); + static_assert(Kokkos::SpaceAccessibility::accessible, "sptrsv_solve_streams: ExecutionSpace cannot access data in " "scalar_nnz_view_t_"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "sptrsv_solve_streams: ExecutionSpace cannot access data in BType"); - static_assert( - Kokkos::SpaceAccessibility::accessible, - "sptrsv_solve_streams: ExecutionSpace cannot access data in XType"); - - static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE( - typename lno_row_view_t_::non_const_value_type, size_type), + static_assert(Kokkos::SpaceAccessibility::accessible, + "sptrsv_solve_streams: ExecutionSpace cannot access data in BType"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "sptrsv_solve_streams: ExecutionSpace cannot access data in XType"); + + static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE(typename lno_row_view_t_::non_const_value_type, size_type), "sptrsv_solve_streams: A size_type must match KernelHandle " "size_type (const doesn't matter)"); - static_assert( - KOKKOSKERNELS_SPTRSV_SAME_TYPE( - typename lno_nnz_view_t_::non_const_value_type, ordinal_type), - "sptrsv_solve_streams: A entry type must match KernelHandle entry type " - "(aka nnz_lno_t, and const doesn't matter)"); - static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE( - typename scalar_nnz_view_t_::value_type, scalar_type), + static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE(typename lno_nnz_view_t_::non_const_value_type, ordinal_type), + "sptrsv_solve_streams: A entry type must match KernelHandle entry type " + "(aka nnz_lno_t, and const doesn't matter)"); + static_assert(KOKKOSKERNELS_SPTRSV_SAME_TYPE(typename scalar_nnz_view_t_::value_type, scalar_type), "sptrsv_solve_streams: A scalar type must match KernelHandle " "entry type (aka nnz_lno_t, and const doesn't matter)"); - static_assert(Kokkos::is_view::value, - "sptrsv_solve_streams: b is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "sptrsv_solve_streams: x is not a Kokkos::View."); - static_assert((int)BType::rank == (int)XType::rank, - "sptrsv_solve_streams: The ranks of b and x do not match."); - static_assert(BType::rank == 1, - "sptrsv_solve_streams: b and x must both either have rank 1."); - static_assert(std::is_same::value, + static_assert(Kokkos::is_view::value, "sptrsv_solve_streams: b is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "sptrsv_solve_streams: x is not a Kokkos::View."); + static_assert((int)BType::rank == (int)XType::rank, "sptrsv_solve_streams: The ranks of b and x do not match."); + static_assert(BType::rank == 1, "sptrsv_solve_streams: b and x must both either have rank 1."); + static_assert(std::is_same::value, "sptrsv_solve_streams: The output x must be nonconst."); - static_assert(std::is_same::value, + static_assert(std::is_same::value, "sptrsv_solve_streams: Views BType and XType have different " "device_types."); - static_assert( - std::is_same< - ExecutionSpace, - typename KernelHandle::SPTRSVHandleType::execution_space>::value, - "sptrsv_solve_streams: KernelHandle's execution space is different from " - "ExecutionSpace."); - static_assert( - std::is_same< - typename BType::device_type::execution_space, - typename KernelHandle::SPTRSVHandleType::execution_space>::value, - "sptrsv_solve_streams: KernelHandle and Views have different execution " - "spaces."); - static_assert( - std::is_same::value, - "sptrsv_solve_streams: rowmap and entries have different device types."); - static_assert( - std::is_same::value, - "sptrsv_solve_streams: rowmap and values have different device types."); + static_assert(std::is_same::value, + "sptrsv_solve_streams: KernelHandle's execution space is different from " + "ExecutionSpace."); + static_assert(std::is_same::value, + "sptrsv_solve_streams: KernelHandle and Views have different execution " + "spaces."); + static_assert(std::is_same::value, + "sptrsv_solve_streams: rowmap and entries have different device types."); + static_assert(std::is_same::value, + "sptrsv_solve_streams: rowmap and values have different device types."); // Check sizes of vectors if (execspace_v.size() != handle_v.size()) { @@ -727,41 +628,32 @@ void sptrsv_solve_streams(const std::vector &execspace_v, using c_temp_t = typename KernelHandle::HandleTempMemorySpace; using c_persist_t = typename KernelHandle::HandlePersistentMemorySpace; - using const_handle_type = - typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t>; - - using RowMap_Internal = Kokkos::View< - typename lno_row_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - lno_row_view_t_>::array_layout, - typename lno_row_view_t_::device_type, - Kokkos::MemoryTraits >; - - using Entries_Internal = Kokkos::View< - typename lno_nnz_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - lno_nnz_view_t_>::array_layout, - typename lno_nnz_view_t_::device_type, - Kokkos::MemoryTraits >; - - using Values_Internal = Kokkos::View< - typename scalar_nnz_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - scalar_nnz_view_t_>::array_layout, - typename scalar_nnz_view_t_::device_type, - Kokkos::MemoryTraits >; - - using BType_Internal = Kokkos::View< - typename BType::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename BType::device_type, - Kokkos::MemoryTraits >; - - using XType_Internal = Kokkos::View< - typename XType::non_const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XType::device_type, Kokkos::MemoryTraits >; + using const_handle_type = typename KokkosKernels::Experimental::KokkosKernelsHandle; + + using RowMap_Internal = Kokkos::View::array_layout, + typename lno_row_view_t_::device_type, + Kokkos::MemoryTraits >; + + using Entries_Internal = Kokkos::View::array_layout, + typename lno_nnz_view_t_::device_type, + Kokkos::MemoryTraits >; + + using Values_Internal = Kokkos::View::array_layout, + typename scalar_nnz_view_t_::device_type, + Kokkos::MemoryTraits >; + + using BType_Internal = + Kokkos::View::array_layout, typename BType::device_type, + Kokkos::MemoryTraits >; + + using XType_Internal = Kokkos::View::array_layout, + typename XType::device_type, Kokkos::MemoryTraits >; std::vector handle_i_v(execspace_v.size()); std::vector rowmap_i_v(execspace_v.size()); @@ -783,26 +675,28 @@ void sptrsv_solve_streams(const std::vector &execspace_v, KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE) { #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE // NOTE: assume all streams use the same SPTRSV_CUSPARSE algo. - KokkosSparse::Impl::sptrsvcuSPARSE_solve_streams< - ExecutionSpace, const_handle_type, RowMap_Internal, Entries_Internal, - Values_Internal, BType_Internal, XType_Internal>( - execspace_v, handle_i_v, rowmap_i_v, entries_i_v, values_i_v, b_i_v, - x_i_v, false); + KokkosSparse::Impl::sptrsvcuSPARSE_solve_streams( + execspace_v, handle_i_v, rowmap_i_v, entries_i_v, values_i_v, b_i_v, x_i_v, false); #else - KokkosSparse::Impl::SPTRSV_SOLVE< - ExecutionSpace, const_handle_type, RowMap_Internal, Entries_Internal, - Values_Internal, BType_Internal, - XType_Internal>::sptrsv_solve_streams(execspace_v, handle_i_v, - rowmap_i_v, entries_i_v, - values_i_v, b_i_v, x_i_v); + KokkosSparse::Impl::SPTRSV_SOLVE::sptrsv_solve_streams(execspace_v, + handle_i_v, + rowmap_i_v, + entries_i_v, + values_i_v, + b_i_v, + x_i_v); #endif } else { - KokkosSparse::Impl::SPTRSV_SOLVE< - ExecutionSpace, const_handle_type, RowMap_Internal, Entries_Internal, - Values_Internal, BType_Internal, - XType_Internal>::sptrsv_solve_streams(execspace_v, handle_i_v, - rowmap_i_v, entries_i_v, - values_i_v, b_i_v, x_i_v); + KokkosSparse::Impl::SPTRSV_SOLVE::sptrsv_solve_streams(execspace_v, + handle_i_v, + rowmap_i_v, + entries_i_v, + values_i_v, + b_i_v, + x_i_v); } } // sptrsv_solve_streams diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_cholmod.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_cholmod.hpp index 3d11c858d0e3..450a2782bed9 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_cholmod.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_cholmod.hpp @@ -24,8 +24,7 @@ #ifndef KOKKOSSPARSE_SPTRSV_CHOLMOD_HPP_ #define KOKKOSSPARSE_SPTRSV_CHOLMOD_HPP_ -#if defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD) && \ - defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) +#if defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD) && defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) #include "cholmod.h" #include "KokkosSparse_Utils.hpp" @@ -41,17 +40,14 @@ namespace Experimental { /* ========================================================================================= */ template -graph_t read_cholmod_graphL(KernelHandle *kernelHandle, cholmod_factor *L, - cholmod_common *cm) { +graph_t read_cholmod_graphL(KernelHandle *kernelHandle, cholmod_factor *L, cholmod_common *cm) { /* ---------------------------------------------------------------------- */ /* get inputs */ /* ---------------------------------------------------------------------- */ - size_t n = L->n; - size_t nsuper = L->nsuper; // # of supernodal columns - cholmod_int_type *mb = - (cholmod_int_type - *)(L->pi); // mb[s+1] - mb[s] = total number of rows in the s-th - // supernodes (diagonal+off-diagonal) + size_t n = L->n; + size_t nsuper = L->nsuper; // # of supernodal columns + cholmod_int_type *mb = (cholmod_int_type *)(L->pi); // mb[s+1] - mb[s] = total number of rows in the s-th + // supernodes (diagonal+off-diagonal) cholmod_int_type *nb = (cholmod_int_type *)(L->super); cholmod_int_type *colptr = (cholmod_int_type *)(L->px); // colptr cholmod_int_type *rowind = (cholmod_int_type *)(L->s); // rowind @@ -59,11 +55,9 @@ graph_t read_cholmod_graphL(KernelHandle *kernelHandle, cholmod_factor *L, bool ptr_by_column = false; if (kernelHandle->is_sptrsv_column_major()) { int nnzA = colptr[nsuper] - colptr[0]; // overestimated if not block_diag - return read_supernodal_graphL(kernelHandle, n, nsuper, nnzA, - ptr_by_column, mb, nb, rowind); + return read_supernodal_graphL(kernelHandle, n, nsuper, nnzA, ptr_by_column, mb, nb, rowind); } else { - return read_supernodal_graphLt(kernelHandle, n, nsuper, - ptr_by_column, mb, nb, rowind); + return read_supernodal_graphLt(kernelHandle, n, nsuper, ptr_by_column, mb, nb, rowind); } } @@ -91,8 +85,7 @@ void compute_etree_cholmod(cholmod_sparse *A, cholmod_common *cm, int **etree) { */ /* For symbolic analysis */ template -void sptrsv_symbolic(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, - cholmod_factor *L, cholmod_common *cm) { +void sptrsv_symbolic(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, cholmod_factor *L, cholmod_common *cm) { // =================================================================== // load sptrsv-handles auto *handleL = kernelHandleL->get_sptrsv_handle(); @@ -103,10 +96,8 @@ void sptrsv_symbolic(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, int nsuper = L->nsuper; cholmod_int_type *supercols = (cholmod_int_type *)(L->super); // convert supercols into internal-view type - using integer_view_host_t = - typename KernelHandle::SPTRSVHandleType::integer_view_host_t; - integer_view_host_t supercols_view = - integer_view_host_t("supercols", 1 + nsuper); + using integer_view_host_t = typename KernelHandle::SPTRSVHandleType::integer_view_host_t; + integer_view_host_t supercols_view = integer_view_host_t("supercols", 1 + nsuper); for (int i = 0; i <= nsuper; i++) { supercols_view(i) = supercols[i]; } @@ -118,26 +109,22 @@ void sptrsv_symbolic(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, // ============================================== // extract CrsGraph for L from Cholmod using host_graph_t = typename KernelHandle::SPTRSVHandleType::host_graph_t; - auto graphL = - read_cholmod_graphL(kernelHandleL, L, cm); + auto graphL = read_cholmod_graphL(kernelHandleL, L, cm); if (handleU->is_column_major()) { // ============================================== // extract CrsGraph for U from Cholmod handleU->set_column_major(false); - auto graphU = read_cholmod_graphL( - kernelHandleU, L, cm); + auto graphU = read_cholmod_graphL(kernelHandleU, L, cm); handleU->set_column_major(true); // ============================================== // call supnodal symbolic - sptrsv_supernodal_symbolic(nsuper, supercols_view.data(), etree, graphL, - kernelHandleL, graphU, kernelHandleU); + sptrsv_supernodal_symbolic(nsuper, supercols_view.data(), etree, graphL, kernelHandleL, graphU, kernelHandleU); } else { // ============================================== // call supnodal symbolic - sptrsv_supernodal_symbolic(nsuper, supercols_view.data(), etree, graphL, - kernelHandleL, graphL, kernelHandleU); + sptrsv_supernodal_symbolic(nsuper, supercols_view.data(), etree, graphL, kernelHandleL, graphL, kernelHandleU); } } @@ -147,10 +134,8 @@ void sptrsv_symbolic(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, /* ========================================================================================= */ -template -crsmat_t read_cholmod_factor(KernelHandle *kernelHandle, cholmod_factor *L, - cholmod_common *cm, graph_t &static_graph) { +template +crsmat_t read_cholmod_factor(KernelHandle *kernelHandle, cholmod_factor *L, cholmod_common *cm, graph_t &static_graph) { using values_view_t = typename crsmat_t::values_type::non_const_type; using scalar_t = typename values_view_t::value_type; @@ -169,13 +154,11 @@ crsmat_t read_cholmod_factor(KernelHandle *kernelHandle, cholmod_factor *L, bool ptr_by_column = false; if (kernelHandle->is_sptrsv_column_major()) { - return read_supernodal_values(kernelHandle, n, nsuper, - ptr_by_column, mb, nb, colptr, - rowind, Lx, static_graph); + return read_supernodal_values(kernelHandle, n, nsuper, ptr_by_column, mb, nb, colptr, rowind, Lx, + static_graph); } else { - return read_supernodal_valuesLt(kernelHandle, n, nsuper, - ptr_by_column, mb, nb, colptr, - rowind, Lx, static_graph); + return read_supernodal_valuesLt(kernelHandle, n, nsuper, ptr_by_column, mb, nb, colptr, rowind, Lx, + static_graph); } } @@ -183,28 +166,23 @@ crsmat_t read_cholmod_factor(KernelHandle *kernelHandle, cholmod_factor *L, */ /* For numeric computation */ template -void sptrsv_compute(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, - cholmod_factor *L, cholmod_common *cm) { +void sptrsv_compute(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, cholmod_factor *L, cholmod_common *cm) { // ============================================== // load sptrsv-handles auto *handleL = kernelHandleL->get_sptrsv_handle(); auto *handleU = kernelHandleU->get_sptrsv_handle(); - if (!(handleL->is_symbolic_complete()) || - !(handleU->is_symbolic_complete())) { - std::cout - << std::endl - << " ** needs to call sptrsv_symbolic before calling sptrsv_numeric **" - << std::endl - << std::endl; + if (!(handleL->is_symbolic_complete()) || !(handleU->is_symbolic_complete())) { + std::cout << std::endl + << " ** needs to call sptrsv_symbolic before calling sptrsv_numeric **" << std::endl + << std::endl; return; } // ============================================== // load options - bool useSpMV = - (handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV || - handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG); + bool useSpMV = (handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV || + handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG); // ============================================== // load crsGraph @@ -213,8 +191,7 @@ void sptrsv_compute(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, // ============================================== // read numerical values of L from Cholmod using crsmat_t = typename KernelHandle::SPTRSVHandleType::crsmat_t; - auto cholmodL = read_cholmod_factor(kernelHandleL, - L, cm, graph); + auto cholmodL = read_cholmod_factor(kernelHandleL, L, cm, graph); // ============================================== // split the matrix into submatrices for spmv at each level @@ -230,8 +207,7 @@ void sptrsv_compute(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, handleU->set_lower_tri(true); handleU->set_column_major(false); - auto cholmodU = read_cholmod_factor( - kernelHandleU, L, cm, graphU); + auto cholmodU = read_cholmod_factor(kernelHandleU, L, cm, graphU); handleU->set_lower_tri(false); handleU->set_column_major(true); diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_handle.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_handle.hpp index cf23bfdc1f7b..b6ca3dacfd3f 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_handle.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_handle.hpp @@ -25,8 +25,7 @@ #include "KokkosSparse_Utils_cusparse.hpp" #endif -#if defined(KOKKOS_ENABLE_CUDA) && 10000 < CUDA_VERSION && \ - defined(KOKKOSKERNELS_ENABLE_EXP_CUDAGRAPH) +#if defined(KOKKOS_ENABLE_CUDA) && 10000 < CUDA_VERSION && defined(KOKKOSKERNELS_ENABLE_EXP_CUDAGRAPH) #define KOKKOSKERNELS_SPTRSV_CUDAGRAPHSUPPORT #endif @@ -52,80 +51,68 @@ enum class SPTRSVAlgorithm { SUPERNODAL_SPMV_DAG }; -template +template class SPTRSVHandle { public: - typedef ExecutionSpace HandleExecSpace; - typedef TemporaryMemorySpace HandleTempMemorySpace; - typedef PersistentMemorySpace HandlePersistentMemorySpace; - - typedef ExecutionSpace execution_space; - typedef HandlePersistentMemorySpace memory_space; - - typedef typename std::remove_const::type size_type; - typedef const size_type const_size_type; - - typedef typename std::remove_const::type nnz_lno_t; - typedef const nnz_lno_t const_nnz_lno_t; - - typedef typename std::remove_const::type scalar_t; - typedef const scalar_t const_nnz_scalar_t; - - // row_map type (managed memory) - typedef typename Kokkos::View - nnz_row_view_temp_t; - typedef typename Kokkos::View - nnz_row_view_t; - typedef typename nnz_row_view_t::HostMirror host_nnz_row_view_t; - typedef typename Kokkos::View - int_row_view_t; - typedef typename Kokkos::View - int64_row_view_t; + using HandleExecSpace = ExecutionSpace; + using HandleTempMemorySpace = TemporaryMemorySpace; + using HandlePersistentMemorySpace = PersistentMemorySpace; + + using execution_space = ExecutionSpace; + using memory_space = HandlePersistentMemorySpace; + + using TeamPolicy = Kokkos::TeamPolicy; + using RangePolicy = Kokkos::RangePolicy; + + using size_type = typename std::remove_const::type; + using const_size_type = const size_type; + + using nnz_lno_t = typename std::remove_const::type; + using const_nnz_lno_t = const nnz_lno_t; + + using scalar_t = typename std::remove_const::type; + using const_nnz_scalar_t = const scalar_t; + + // Row_map type (managed memory) + using nnz_row_view_temp_t = typename Kokkos::View; + using nnz_row_view_t = typename Kokkos::View; + using host_nnz_row_view_t = typename nnz_row_view_t::HostMirror; + using int_row_view_t = typename Kokkos::View; + using int64_row_view_t = typename Kokkos::View; // typedef typename row_lno_persistent_work_view_t::HostMirror // row_lno_persistent_work_host_view_t; //Host view type - typedef typename Kokkos::View< - const size_type *, HandlePersistentMemorySpace, - Kokkos::MemoryTraits> - nnz_row_unmanaged_view_t; // for rank1 subviews + using nnz_row_unmanaged_view_t = + typename Kokkos::View>; // for rank1 subviews // values type (managed memory) - typedef typename Kokkos::View - nnz_scalar_view_temp_t; - typedef typename Kokkos::View - nnz_scalar_view_t; - typedef typename nnz_scalar_view_t::HostMirror host_nnz_scalar_view_t; - typedef typename Kokkos::View< - const scalar_t *, HandlePersistentMemorySpace, - Kokkos::MemoryTraits> - nnz_scalar_unmanaged_view_t; // for rank1 subviews + using nnz_scalar_view_temp_t = typename Kokkos::View; + using nnz_scalar_view_t = typename Kokkos::View; + using host_nnz_scalar_view_t = typename nnz_scalar_view_t::HostMirror; + using nnz_scalar_unmanaged_view_t = + typename Kokkos::View>; // for rank1 subviews // entries type (managed memory) - typedef typename Kokkos::View - nnz_lno_view_temp_t; - typedef typename Kokkos::View - nnz_lno_view_t; - typedef typename Kokkos::View - hostspace_nnz_lno_view_t; - typedef typename nnz_lno_view_t::HostMirror host_nnz_lno_view_t; - typedef typename Kokkos::View< - const nnz_lno_t *, HandlePersistentMemorySpace, - Kokkos::MemoryTraits> - nnz_lno_unmanaged_view_t; // for rank1 subviews + using nnz_lno_view_temp_t = typename Kokkos::View; + using nnz_lno_view_t = typename Kokkos::View; + using hostspace_nnz_lno_view_t = typename Kokkos::View; + using host_nnz_lno_view_t = typename nnz_lno_view_t::HostMirror; + using nnz_lno_unmanaged_view_t = + typename Kokkos::View>; // for rank1 subviews // typedef typename nnz_lno_persistent_work_view_t::HostMirror // nnz_lno_persistent_work_host_view_t; //Host view type - typedef typename std::make_signed< - typename nnz_row_view_t::non_const_value_type>::type signed_integral_t; - typedef Kokkos::View - signed_nnz_lno_view_t; - typedef typename signed_nnz_lno_view_t::HostMirror host_signed_nnz_lno_view_t; + using signed_integral_t = typename std::make_signed::type; + using signed_nnz_lno_view_t = + Kokkos::View; + + using host_signed_nnz_lno_view_t = typename signed_nnz_lno_view_t::HostMirror; - typedef typename Kokkos::View - mtx_scalar_view_t; + using mtx_scalar_view_t = typename Kokkos::View; #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE #if (CUDA_VERSION >= 11030) @@ -141,8 +128,7 @@ class SPTRSVHandle { cuSparseHandleType(bool transpose_, bool /*is_lower*/) { KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreate(&handle)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST)); if (transpose_) { transpose = CUSPARSE_OPERATION_TRANSPOSE; @@ -214,7 +200,7 @@ class SPTRSVHandle { }; #endif - typedef cuSparseHandleType SPTRSVcuSparseHandleType; + using SPTRSVcuSparseHandleType = cuSparseHandleType; #endif #ifdef KOKKOSKERNELS_SPTRSV_CUDAGRAPHSUPPORT @@ -228,7 +214,7 @@ class SPTRSVHandle { //~cudaGraphWrapperType() { } }; - typedef cudaGraphWrapperType SPTRSVcudaGraphWrapperType; + using SPTRSVcudaGraphWrapperType = cudaGraphWrapperType; void create_SPTRSVcudaGraphWrapperType() { destroy_SPTRSVcudaGraphWrapperType(); @@ -253,22 +239,17 @@ class SPTRSVHandle { using supercols_memory_space = TemporaryMemorySpace; using supercols_host_execution_space = Kokkos::DefaultHostExecutionSpace; - using supercols_host_memory_space = - typename supercols_host_execution_space::memory_space; + using supercols_host_memory_space = typename supercols_host_execution_space::memory_space; using integer_view_t = Kokkos::View; using integer_view_host_t = Kokkos::View; - using workspace_t = typename Kokkos::View< - scalar_t *, Kokkos::Device>; + using workspace_t = typename Kokkos::View>; // - using host_crsmat_t = - KokkosSparse::CrsMatrix; - using crsmat_t = KokkosSparse::CrsMatrix< - scalar_t, nnz_lno_t, - Kokkos::Device, void, size_type>; + using host_crsmat_t = KokkosSparse::CrsMatrix; + using crsmat_t = KokkosSparse::CrsMatrix, + void, size_type>; // using host_graph_t = typename host_crsmat_t::StaticCrsGraphType; @@ -296,6 +277,7 @@ class SPTRSVHandle { nnz_lno_view_t nodes_grouped_by_level; hostspace_nnz_lno_view_t hnodes_grouped_by_level; // NEW size_type nlevel; + size_type block_size; // block_size > 0 implies BSR int team_size; int vector_size; @@ -323,10 +305,8 @@ class SPTRSVHandle { /*|| algm == SPTRSVAlgorithm::SEQLVLSCHED_TP2*/ || algm == SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN #ifdef KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV - || algm == SPTRSVAlgorithm::SUPERNODAL_NAIVE || - algm == SPTRSVAlgorithm::SUPERNODAL_ETREE || - algm == SPTRSVAlgorithm::SUPERNODAL_DAG || - algm == SPTRSVAlgorithm::SUPERNODAL_SPMV || + || algm == SPTRSVAlgorithm::SUPERNODAL_NAIVE || algm == SPTRSVAlgorithm::SUPERNODAL_ETREE || + algm == SPTRSVAlgorithm::SUPERNODAL_DAG || algm == SPTRSVAlgorithm::SUPERNODAL_SPMV || algm == SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG #endif ) { @@ -337,8 +317,7 @@ class SPTRSVHandle { } void set_if_algm_require_symb_chain() { - if (algm == - KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN) { + if (algm == KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN) { require_symbolic_chain_phase = true; } else { require_symbolic_chain_phase = false; @@ -399,9 +378,8 @@ class SPTRSVHandle { integer_view_t perm; // graphs - host_graph_t - original_graph_host; // graph on host before merge (only if merged) - host_graph_t graph_host; // mirror of graph on host + host_graph_t original_graph_host; // graph on host before merge (only if merged) + host_graph_t graph_host; // mirror of graph on host graph_t graph; // crsmat @@ -422,7 +400,7 @@ class SPTRSVHandle { #endif public: - SPTRSVHandle(SPTRSVAlgorithm choice, const size_type nrows_, bool lower_tri_, + SPTRSVHandle(SPTRSVAlgorithm choice, const size_type nrows_, bool lower_tri_, const size_type block_size_ = 0, bool symbolic_complete_ = false, bool numeric_complete_ = false) : #ifdef KOKKOSKERNELS_SPTRSV_CUDAGRAPHSUPPORT @@ -438,6 +416,7 @@ class SPTRSVHandle { nodes_grouped_by_level(), hnodes_grouped_by_level(), nlevel(0), + block_size(block_size_), team_size(-1), vector_size(-1), stored_diagonal(false), @@ -507,13 +486,11 @@ class SPTRSVHandle { // set nsuper and supercols (# of supernodes, and map from supernode to column // id template - void set_supernodes(signed_integral_t nsuper_, input_int_type *supercols_, - int *etree_) { + void set_supernodes(signed_integral_t nsuper_, input_int_type *supercols_, int *etree_) { // set etree (just wrap etree in a view) this->etree_host = integer_view_host_t(etree_, nsuper_); // set supernodes (make a copy, from input_int_type to int) - integer_view_host_t supercols_view = - integer_view_host_t("supercols", 1 + nsuper_); + integer_view_host_t supercols_view = integer_view_host_t("supercols", 1 + nsuper_); for (signed_integral_t i = 0; i <= nsuper_; i++) { supercols_view(i) = supercols_[i]; } @@ -521,16 +498,14 @@ class SPTRSVHandle { set_supernodes(nsuper_, supercols_view, etree_); } - void set_supernodes(signed_integral_t nsuper_, integer_view_host_t supercols_, - int *etree_) { + void set_supernodes(signed_integral_t nsuper_, integer_view_host_t supercols_, int *etree_) { // set etree this->etree_host = integer_view_host_t(etree_, nsuper_); // set supernodes set_supernodes(nsuper_, supercols_); } - void set_supernodes(signed_integral_t nsuper_, - integer_view_host_t supercols_view) { + void set_supernodes(signed_integral_t nsuper_, integer_view_host_t supercols_view) { this->nsuper = nsuper_; // supercols @@ -546,11 +521,10 @@ class SPTRSVHandle { this->work_offset = integer_view_t("workoffset", nsuper_); // kernel type - this->diag_kernel_type_host = - integer_view_host_t("diag_kernel_type_host", nsuper_); - this->diag_kernel_type = integer_view_t("diag_kernel_type", nsuper_); - this->kernel_type_host = integer_view_host_t("kernel_type_host", nsuper_); - this->kernel_type = integer_view_t("kernel_type", nsuper_); + this->diag_kernel_type_host = integer_view_host_t("diag_kernel_type_host", nsuper_); + this->diag_kernel_type = integer_view_t("diag_kernel_type", nsuper_); + this->kernel_type_host = integer_view_host_t("kernel_type_host", nsuper_); + this->kernel_type = integer_view_t("kernel_type", nsuper_); // number of streams this->num_streams = 0; @@ -591,9 +565,7 @@ class SPTRSVHandle { KOKKOS_INLINE_FUNCTION integer_view_t get_work_offset() const { return this->work_offset; } - integer_view_host_t get_work_offset_host() const { - return this->work_offset_host; - } + integer_view_host_t get_work_offset_host() const { return this->work_offset_host; } // specify whether too run KokkosKernels::trmm on device or not void set_trmm_on_device(bool flag) { this->trmm_on_device = flag; } @@ -605,13 +577,9 @@ class SPTRSVHandle { int get_supernode_size_blocked() { return this->sup_size_blocked; } - void set_supernode_size_unblocked(int size_unblocked) { - this->sup_size_unblocked = size_unblocked; - } + void set_supernode_size_unblocked(int size_unblocked) { this->sup_size_unblocked = size_unblocked; } - void set_supernode_size_blocked(int size_blocked) { - this->sup_size_blocked = size_blocked; - } + void set_supernode_size_blocked(int size_blocked) { this->sup_size_blocked = size_blocked; } // specify to merge supernodes void set_merge_supernodes(bool flag) { this->merge_supernodes = flag; } @@ -639,9 +607,7 @@ class SPTRSVHandle { // kernel type integer_view_host_t get_kernel_type_host() { return this->kernel_type_host; } - integer_view_host_t get_diag_kernel_type_host() { - return this->diag_kernel_type_host; - } + integer_view_host_t get_diag_kernel_type_host() { return this->diag_kernel_type_host; } KOKKOS_INLINE_FUNCTION integer_view_t get_kernel_type() { return this->kernel_type; } @@ -665,16 +631,12 @@ class SPTRSVHandle { bool has_perm() { return this->perm_avail; } // graph on host (before merge) - void set_original_graph_host(host_graph_t graph_host_) { - this->original_graph_host = graph_host_; - } + void set_original_graph_host(host_graph_t graph_host_) { this->original_graph_host = graph_host_; } host_graph_t get_original_graph_host() { return this->original_graph_host; } // graph on host - void set_graph_host(host_graph_t graph_host_) { - this->graph_host = graph_host_; - } + void set_graph_host(host_graph_t graph_host_) { this->graph_host = graph_host_; } host_graph_t get_graph_host() { return this->graph_host; } @@ -698,16 +660,12 @@ class SPTRSVHandle { crsmat_t get_crsmat() { return this->crsmat; } // submatrices - void set_submatrices(crsmat_list_t subcrsmats) { - this->sub_crsmats = subcrsmats; - } + void set_submatrices(crsmat_list_t subcrsmats) { this->sub_crsmats = subcrsmats; } crsmat_t get_submatrix(int i) { return this->sub_crsmats[i]; } // diagonal subblocks - void set_diagblocks(crsmat_list_t subcrsmats) { - this->diag_blocks = subcrsmats; - } + void set_diagblocks(crsmat_list_t subcrsmats) { this->diag_blocks = subcrsmats; } crsmat_t get_diagblock(int i) { return this->diag_blocks[i]; } @@ -724,8 +682,7 @@ class SPTRSVHandle { void setNumStreams(int num_streams_) { this->num_streams = num_streams_; if (num_streams_ > 0) { - this->cuda_streams = - (cudaStream_t *)malloc(num_streams_ * sizeof(cudaStream_t)); + this->cuda_streams = (cudaStream_t *)malloc(num_streams_ * sizeof(cudaStream_t)); for (int i = 0; i < num_streams_; i++) { cudaStreamCreate(&(this->cuda_streams[i])); } @@ -747,24 +704,16 @@ class SPTRSVHandle { // TODO: Set sizes differently/smaller, resize during symbolic to save space if (this->require_symbolic_lvlsched_phase == true) { set_num_levels(0); - level_list = signed_nnz_lno_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "level_list"), - nrows_); + level_list = signed_nnz_lno_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "level_list"), nrows_); Kokkos::deep_copy(level_list, signed_integral_t(-1)); // The host side views need to be initialized, but the device-side views // don't. Symbolic computes on the host (and requires these are 0 // initialized), and then copies to device. - hnodes_per_level = - hostspace_nnz_lno_view_t("host nodes_per_level", nrows_); - hnodes_grouped_by_level = - hostspace_nnz_lno_view_t("host nodes_grouped_by_level", nrows_); - nodes_per_level = nnz_lno_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "nodes_per_level"), - nrows_); + hnodes_per_level = hostspace_nnz_lno_view_t("host nodes_per_level", nrows_); + hnodes_grouped_by_level = hostspace_nnz_lno_view_t("host nodes_grouped_by_level", nrows_); + nodes_per_level = nnz_lno_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "nodes_per_level"), nrows_); nodes_grouped_by_level = - nnz_lno_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, - "nodes_grouped_by_level"), - nrows_); + nnz_lno_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "nodes_grouped_by_level"), nrows_); #if 0 std::cout << " newinit_handle: level schedule allocs" << std::endl; @@ -777,12 +726,9 @@ class SPTRSVHandle { } if (stored_diagonal) { - diagonal_offsets = nnz_lno_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "diagonal_offsets"), - nrows_); - diagonal_values = nnz_scalar_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "diagonal_values"), - nrows_); // inserted by rowid + diagonal_offsets = nnz_lno_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "diagonal_offsets"), nrows_); + diagonal_values = nnz_scalar_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "diagonal_values"), + nrows_); // inserted by rowid hdiagonal_offsets = Kokkos::create_mirror_view(diagonal_offsets); hdiagonal_values = Kokkos::create_mirror_view(diagonal_values); } @@ -794,33 +740,28 @@ class SPTRSVHandle { // within a kernel if (team_size == -1) { this->chain_threshold = 0; - h_chain_ptr = host_signed_nnz_lno_view_t("h_chain_ptr", this->nrows); + h_chain_ptr = host_signed_nnz_lno_view_t("h_chain_ptr", this->nrows); } else { std::cout << " Warning: chain_threshold was not set - will default " "to team_size = " - << this->team_size - << " chain_threshold = " << this->chain_threshold - << std::endl; + << this->team_size << " chain_threshold = " << this->chain_threshold << std::endl; this->chain_threshold = this->team_size; - h_chain_ptr = host_signed_nnz_lno_view_t("h_chain_ptr", this->nrows); + h_chain_ptr = host_signed_nnz_lno_view_t("h_chain_ptr", this->nrows); } } else { if (this->team_size >= this->chain_threshold) { h_chain_ptr = host_signed_nnz_lno_view_t("h_chain_ptr", this->nrows); } else if (this->team_size == -1 && chain_threshold > 0) { - std::cout << " Warning: team_size was not set; chain_threshold = " - << this->chain_threshold << std::endl; + std::cout << " Warning: team_size was not set; chain_threshold = " << this->chain_threshold << std::endl; std::cout << " Automatically setting team_size to chain_threshold - " "if this exceeds the hardware limitations relaunch with " "reduced chain_threshold or set a valid team_size" << std::endl; this->team_size = this->chain_threshold; - h_chain_ptr = host_signed_nnz_lno_view_t("h_chain_ptr", this->nrows); + h_chain_ptr = host_signed_nnz_lno_view_t("h_chain_ptr", this->nrows); } else { - std::cout - << " EXPERIMENTAL: team_size less than chain size. team_size = " - << this->team_size - << " chain_threshold = " << this->chain_threshold << std::endl; + std::cout << " EXPERIMENTAL: team_size less than chain size. team_size = " << this->team_size + << " chain_threshold = " << this->chain_threshold << std::endl; h_chain_ptr = host_signed_nnz_lno_view_t("h_chain_ptr", this->nrows); } } @@ -859,13 +800,10 @@ class SPTRSVHandle { } } - SPTRSVcuSparseHandleType *get_cuSparseHandle() { - return this->cuSPARSEHandle; - } + SPTRSVcuSparseHandleType *get_cuSparseHandle() { return this->cuSPARSEHandle; } void allocate_tmp_int_rowmap(size_type N) { - tmp_int_rowmap = int_row_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "tmp_int_rowmap"), N); + tmp_int_rowmap = int_row_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "tmp_int_rowmap"), N); } template int_row_view_t get_int_rowmap_view_copy(const RowViewType &rowmap) { @@ -882,8 +820,7 @@ class SPTRSVHandle { int *get_int_rowmap_ptr() { return tmp_int_rowmap.data(); } void allocate_tmp_int64_rowmap(size_type N) { - tmp_int64_rowmap = int64_row_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "tmp_int64_rowmap"), N); + tmp_int64_rowmap = int64_row_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "tmp_int64_rowmap"), N); } template int64_t *get_int64_rowmap_ptr_copy(const RowViewType &rowmap) { @@ -894,9 +831,7 @@ class SPTRSVHandle { int64_t *get_int64_rowmap_ptr() { return tmp_int64_rowmap.data(); } #endif - bool algm_requires_symb_lvlsched() const { - return require_symbolic_lvlsched_phase; - } + bool algm_requires_symb_lvlsched() const { return require_symbolic_lvlsched_phase; } bool algm_requires_symb_chain() const { return require_symbolic_chain_phase; } @@ -920,9 +855,7 @@ class SPTRSVHandle { return hlevel_list; } - void set_stored_diagonal(const bool stored_diagonal_) { - stored_diagonal = stored_diagonal_; - } + void set_stored_diagonal(const bool stored_diagonal_) { stored_diagonal = stored_diagonal_; } KOKKOS_INLINE_FUNCTION nnz_lno_view_t get_diagonal_offsets() const { return diagonal_offsets; } @@ -931,34 +864,22 @@ class SPTRSVHandle { nnz_scalar_view_t get_diagonal_values() const { return diagonal_values; } KOKKOS_INLINE_FUNCTION - host_nnz_lno_view_t get_host_diagonal_offsets() const { - return hdiagonal_offsets; - } + host_nnz_lno_view_t get_host_diagonal_offsets() const { return hdiagonal_offsets; } KOKKOS_INLINE_FUNCTION - host_nnz_scalar_view_t get_host_diagonal_values() const { - return hdiagonal_values; - } + host_nnz_scalar_view_t get_host_diagonal_values() const { return hdiagonal_values; } - inline host_signed_nnz_lno_view_t get_host_chain_ptr() const { - return h_chain_ptr; - } + inline host_signed_nnz_lno_view_t get_host_chain_ptr() const { return h_chain_ptr; } KOKKOS_INLINE_FUNCTION nnz_lno_view_t get_nodes_per_level() const { return nodes_per_level; } - inline hostspace_nnz_lno_view_t get_host_nodes_per_level() const { - return hnodes_per_level; - } + inline hostspace_nnz_lno_view_t get_host_nodes_per_level() const { return hnodes_per_level; } KOKKOS_INLINE_FUNCTION - nnz_lno_view_t get_nodes_grouped_by_level() const { - return nodes_grouped_by_level; - } + nnz_lno_view_t get_nodes_grouped_by_level() const { return nodes_grouped_by_level; } - inline hostspace_nnz_lno_view_t get_host_nodes_grouped_by_level() const { - return hnodes_grouped_by_level; - } + inline hostspace_nnz_lno_view_t get_host_nodes_grouped_by_level() const { return hnodes_grouped_by_level; } KOKKOS_INLINE_FUNCTION size_type get_nrows() const { return nrows; } @@ -981,18 +902,14 @@ class SPTRSVHandle { // h_chain_ptr = host_signed_nnz_lno_view_t("h_chain_ptr", // this->nrows); } else { - std::cout << " EXPERIMENTAL: team_size < chain_size: team_size = " - << this->team_size - << " chain_threshold = " << this->chain_threshold - << std::endl; + std::cout << " EXPERIMENTAL: team_size < chain_size: team_size = " << this->team_size + << " chain_threshold = " << this->chain_threshold << std::endl; } } } KOKKOS_INLINE_FUNCTION - signed_integral_t get_chain_threshold() const { - return this->chain_threshold; - } + signed_integral_t get_chain_threshold() const { return this->chain_threshold; } bool is_lower_tri() const { return lower_tri; } bool is_upper_tri() const { return !lower_tri; } @@ -1007,6 +924,13 @@ class SPTRSVHandle { void set_num_levels(size_type nlevels_) { this->nlevel = nlevels_; } + KOKKOS_INLINE_FUNCTION + size_type get_block_size() const { return block_size; } + + KOKKOS_INLINE_FUNCTION + void set_block_size(const size_type block_size_) { this->block_size = block_size_; } + + bool is_block_enabled() const { return block_size > 0; } void set_symbolic_complete() { this->symbolic_complete = true; } void set_symbolic_incomplete() { this->symbolic_complete = false; } @@ -1028,12 +952,10 @@ class SPTRSVHandle { void set_num_chain_entries(const int nce) { this->num_chain_entries = nce; } void print_algorithm() { - if (algm == SPTRSVAlgorithm::SEQLVLSCHD_RP) - std::cout << "SEQLVLSCHD_RP" << std::endl; + if (algm == SPTRSVAlgorithm::SEQLVLSCHD_RP) std::cout << "SEQLVLSCHD_RP" << std::endl; ; - if (algm == SPTRSVAlgorithm::SEQLVLSCHD_TP1) - std::cout << "SEQLVLSCHD_TP1" << std::endl; + if (algm == SPTRSVAlgorithm::SEQLVLSCHD_TP1) std::cout << "SEQLVLSCHD_TP1" << std::endl; ; /* if ( algm == SPTRSVAlgorithm::SEQLVLSCHED_TP2 ) { @@ -1042,28 +964,21 @@ class SPTRSVHandle { int-int ordinal-offset pair" << std::endl; } */ - if (algm == SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN) - std::cout << "SEQLVLSCHD_TP1CHAIN" << std::endl; + if (algm == SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN) std::cout << "SEQLVLSCHD_TP1CHAIN" << std::endl; ; - if (algm == SPTRSVAlgorithm::SPTRSV_CUSPARSE) - std::cout << "SPTRSV_CUSPARSE" << std::endl; + if (algm == SPTRSVAlgorithm::SPTRSV_CUSPARSE) std::cout << "SPTRSV_CUSPARSE" << std::endl; ; - if (algm == SPTRSVAlgorithm::SUPERNODAL_NAIVE) - std::cout << "SUPERNODAL_NAIVE" << std::endl; + if (algm == SPTRSVAlgorithm::SUPERNODAL_NAIVE) std::cout << "SUPERNODAL_NAIVE" << std::endl; - if (algm == SPTRSVAlgorithm::SUPERNODAL_ETREE) - std::cout << "SUPERNODAL_ETREE" << std::endl; + if (algm == SPTRSVAlgorithm::SUPERNODAL_ETREE) std::cout << "SUPERNODAL_ETREE" << std::endl; - if (algm == SPTRSVAlgorithm::SUPERNODAL_DAG) - std::cout << "SUPERNODAL_DAG" << std::endl; + if (algm == SPTRSVAlgorithm::SUPERNODAL_DAG) std::cout << "SUPERNODAL_DAG" << std::endl; - if (algm == SPTRSVAlgorithm::SUPERNODAL_SPMV) - std::cout << "SUPERNODAL_SPMV" << std::endl; + if (algm == SPTRSVAlgorithm::SUPERNODAL_SPMV) std::cout << "SUPERNODAL_SPMV" << std::endl; - if (algm == SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG) - std::cout << "SUPERNODAL_SPMV_DAG" << std::endl; + if (algm == SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG) std::cout << "SUPERNODAL_SPMV_DAG" << std::endl; } std::string return_algorithm_string() { @@ -1076,11 +991,9 @@ class SPTRSVHandle { if ( algm == SPTRSVAlgorithm::SEQLVLSCHED_TP2 ) ret_string = "SEQLVLSCHED_TP2"; */ - if (algm == SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN) - ret_string = "SEQLVLSCHD_TP1CHAIN"; + if (algm == SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN) ret_string = "SEQLVLSCHD_TP1CHAIN"; - if (algm == SPTRSVAlgorithm::SPTRSV_CUSPARSE) - ret_string = "SPTRSV_CUSPARSE"; + if (algm == SPTRSVAlgorithm::SPTRSV_CUSPARSE) ret_string = "SPTRSV_CUSPARSE"; return ret_string; } diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_superlu.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_superlu.hpp index 1ebae78f77fc..fdba1415682a 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_superlu.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_superlu.hpp @@ -24,8 +24,7 @@ #ifndef KOKKOSSPARSE_SPTRSV_SUPERLU_HPP_ #define KOKKOSSPARSE_SPTRSV_SUPERLU_HPP_ -#if defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU) && \ - defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) +#if defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU) && defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) #include "slu_ddefs.h" @@ -58,17 +57,15 @@ graph_t read_superlu_graphL(KernelHandle *kernelHandle, SuperMatrix *L) { int *rowind = Lstore->rowind; bool ptr_by_column = true; - int nnzA = colptr[n] - colptr[0]; // overestimated if not block_diag - return read_supernodal_graphL(kernelHandle, n, nsuper, nnzA, - ptr_by_column, mb, nb, rowind); + int nnzA = colptr[n] - colptr[0]; // overestimated if not block_diag + return read_supernodal_graphL(kernelHandle, n, nsuper, nnzA, ptr_by_column, mb, nb, rowind); } /* ========================================================================================= */ // read SuperLU U factor into CSR template -graph_t read_superlu_graphU(KernelHandle *kernelHandle, SuperMatrix *L, - SuperMatrix *U) { +graph_t read_superlu_graphU(KernelHandle *kernelHandle, SuperMatrix *L, SuperMatrix *U) { using row_map_view_t = typename graph_t::row_map_type::non_const_type; using cols_view_t = typename graph_t::entries_type::non_const_type; using host_cols_view_t = typename cols_view_t::HostMirror; @@ -249,8 +246,7 @@ graph_t read_superlu_graphU(KernelHandle *kernelHandle, SuperMatrix *L, std::cout << " * Matrix size = " << n << std::endl; std::cout << " * Total nnz = " << hr(n) << std::endl; std::cout << " * nnz / n = " << hr(n) / n << std::endl; - std::cout << " * time = " << time_count << " + " << time1 << " + " - << time2 << " + " << time3 << std::endl; + std::cout << " * time = " << time_count << " + " << time1 << " + " << time2 << " + " << time3 << std::endl; #endif // deepcopy @@ -266,8 +262,7 @@ graph_t read_superlu_graphU(KernelHandle *kernelHandle, SuperMatrix *L, */ /* Symbolic analysis */ template -void sptrsv_symbolic(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, - SuperMatrix &L, SuperMatrix &U) { +void sptrsv_symbolic(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, SuperMatrix &L, SuperMatrix &U) { Kokkos::Timer timer; Kokkos::Timer tic; timer.reset(); @@ -301,9 +296,8 @@ void sptrsv_symbolic(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE double time_read_U = tic.seconds(); int nrows(graphL_host.row_map.extent(0)); - std::cout << " Conversion Time (from SuperLU to CSR): " << time_read_L - << " + " << time_read_U << ", nnz = " << graphL_host.row_map(nrows) - << " + " << graphU_host.row_map(nrows) << std::endl; + std::cout << " Conversion Time (from SuperLU to CSR): " << time_read_L << " + " << time_read_U + << ", nnz = " << graphL_host.row_map(nrows) << " + " << graphU_host.row_map(nrows) << std::endl; tic.reset(); #endif @@ -319,12 +313,10 @@ void sptrsv_symbolic(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, // =================================================================== // call supnodal symbolic - sptrsv_supernodal_symbolic(nsuper, supercols, etree, graphL_host, - kernelHandleL, graphU_host, kernelHandleU); + sptrsv_supernodal_symbolic(nsuper, supercols, etree, graphL_host, kernelHandleL, graphU_host, kernelHandleU); #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE double time_seconds = tic.seconds(); - std::cout << " SpTRSV Supernodal Symbolic Time : " << time_seconds - << std::endl; + std::cout << " SpTRSV Supernodal Symbolic Time : " << time_seconds << std::endl; #endif } @@ -335,8 +327,7 @@ void sptrsv_symbolic(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, /* ========================================================================================= */ template -crsmat_t read_superlu_valuesL(KernelHandle *kernelHandle, SuperMatrix *L, - graph_t &static_graph) { +crsmat_t read_superlu_valuesL(KernelHandle *kernelHandle, SuperMatrix *L, graph_t &static_graph) { using values_view_t = typename crsmat_t::values_type::non_const_type; using scalar_t = typename values_view_t::value_type; @@ -354,17 +345,15 @@ crsmat_t read_superlu_valuesL(KernelHandle *kernelHandle, SuperMatrix *L, int *rowind = Lstore->rowind; bool ptr_by_column = true; - return read_supernodal_values(kernelHandle, n, nsuper, - ptr_by_column, mb, nb, colptr, rowind, - Lx, static_graph); + return read_supernodal_values(kernelHandle, n, nsuper, ptr_by_column, mb, nb, colptr, rowind, Lx, + static_graph); } /* ========================================================================================= */ // store numerical values of SuperLU U-factor into CSR template -crsmat_t read_superlu_valuesU(KernelHandle *kernelHandle, SuperMatrix *L, - SuperMatrix *U, graph_t &static_graph) { +crsmat_t read_superlu_valuesU(KernelHandle *kernelHandle, SuperMatrix *L, SuperMatrix *U, graph_t &static_graph) { using values_view_t = typename crsmat_t::values_type::non_const_type; using scalar_t = typename values_view_t::value_type; using integer_view_host_t = Kokkos::View; @@ -525,8 +514,7 @@ crsmat_t read_superlu_valuesU(KernelHandle *kernelHandle, SuperMatrix *L, */ /* For numeric computation */ template -void sptrsv_compute(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, - SuperMatrix &L, SuperMatrix &U) { +void sptrsv_compute(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, SuperMatrix &L, SuperMatrix &U) { using crsmat_t = typename KernelHandle::SPTRSVHandleType::crsmat_t; using host_crsmat_t = typename KernelHandle::SPTRSVHandleType::host_crsmat_t; @@ -537,22 +525,18 @@ void sptrsv_compute(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, auto *handleL = kernelHandleL->get_sptrsv_handle(); auto *handleU = kernelHandleU->get_sptrsv_handle(); - if (!(handleL->is_symbolic_complete()) || - !(handleU->is_symbolic_complete())) { - std::cout - << std::endl - << " ** needs to call sptrsv_symbolic before calling sptrsv_numeric **" - << std::endl - << std::endl; + if (!(handleL->is_symbolic_complete()) || !(handleU->is_symbolic_complete())) { + std::cout << std::endl + << " ** needs to call sptrsv_symbolic before calling sptrsv_numeric **" << std::endl + << std::endl; return; } // =================================================================== // load options - bool merge = handleL->get_merge_supernodes(); - bool useSpMV = - (handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV || - handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG); + bool merge = handleL->get_merge_supernodes(); + bool useSpMV = (handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV || + handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG); #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE double time_seconds = 0.0; bool invert_offdiag = handleL->get_invert_offdiagonal(); @@ -581,19 +565,16 @@ void sptrsv_compute(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, bool invert_diag = handleL->get_invert_diagonal(); kernelHandleL->set_sptrsv_invert_diagonal(false); // invert after merge auto original_graphL_host = handleL->get_original_graph_host(); - superluL_host = read_superlu_valuesL(kernelHandleL, &L, - original_graphL_host); + superluL_host = read_superlu_valuesL(kernelHandleL, &L, original_graphL_host); // 2) re-load L into merged crs bool unit_diag = true; // reset invert option kernelHandleL->set_sptrsv_invert_diagonal(invert_diag); if (useSpMV) { - superluL_host = read_merged_supernodes( - kernelHandleL, nsuper, supercols, unit_diag, superluL_host, - graphL_host); + superluL_host = read_merged_supernodes(kernelHandleL, nsuper, supercols, unit_diag, superluL_host, + graphL_host); } else { - superluL = read_merged_supernodes( - kernelHandleL, nsuper, supercols, unit_diag, superluL_host, graphL); + superluL = read_merged_supernodes(kernelHandleL, nsuper, supercols, unit_diag, superluL_host, graphL); } // ======================================================== @@ -602,24 +583,20 @@ void sptrsv_compute(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, invert_diag = handleU->get_invert_diagonal(); kernelHandleU->set_sptrsv_invert_diagonal(false); // invert after merge auto original_graphU_host = handleU->get_original_graph_host(); - superluU_host = read_superlu_valuesU(kernelHandleU, &L, &U, - original_graphU_host); + superluU_host = read_superlu_valuesU(kernelHandleU, &L, &U, original_graphU_host); // 2) re-load U into merged crs unit_diag = false; // reset invert option kernelHandleU->set_sptrsv_invert_diagonal(invert_diag); if (useSpMV) { - superluU_host = read_merged_supernodes( - kernelHandleU, nsuper, supercols, unit_diag, superluU_host, - graphU_host); + superluU_host = read_merged_supernodes(kernelHandleU, nsuper, supercols, unit_diag, superluU_host, + graphU_host); } else { - superluU = read_merged_supernodes( - kernelHandleU, nsuper, supercols, unit_diag, superluU_host, graphU); + superluU = read_merged_supernodes(kernelHandleU, nsuper, supercols, unit_diag, superluU_host, graphU); } #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time_seconds = tic.seconds(); - std::cout << " Time to Merge and Copy to device: " << time_seconds - << std::endl; + std::cout << " Time to Merge and Copy to device: " << time_seconds << std::endl; #endif } else { // ======================================================== @@ -628,8 +605,7 @@ void sptrsv_compute(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, // supported for now tic.reset(); if (useSpMV) { - superluL_host = - read_superlu_valuesL(kernelHandleL, &L, graphL_host); + superluL_host = read_superlu_valuesL(kernelHandleL, &L, graphL_host); } else { superluL = read_superlu_valuesL(kernelHandleL, &L, graphL); } @@ -643,8 +619,7 @@ void sptrsv_compute(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, // kernelHandleU->set_sptrsv_invert_diagonal (true); // only, invert diag is // supported for now if (useSpMV) { - superluU_host = read_superlu_valuesU(kernelHandleU, &L, &U, - graphU_host); + superluU_host = read_superlu_valuesU(kernelHandleU, &L, &U, graphU_host); } else { superluU = read_superlu_valuesU(kernelHandleU, &L, &U, graphU); } @@ -679,8 +654,7 @@ void sptrsv_compute(KernelHandle *kernelHandleL, KernelHandle *kernelHandleU, handleU->set_numeric_complete(); #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time_seconds = timer.seconds(); - std::cout << " Total Compute Time: " << time_seconds << std::endl - << std::endl; + std::cout << " Total Compute Time: " << time_seconds << std::endl << std::endl; #endif } // sptrsv_compute diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_supernode.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_supernode.hpp index c6e5d406a73b..586e8f3a6449 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_supernode.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_sptrsv_supernode.hpp @@ -76,11 +76,9 @@ graph_t deep_copy_graph(host_graph_t &host_graph) { /* ========================================================================================= */ -template -graph_t read_supernodal_graphL(KernelHandle *kernelHandle, int n, int nsuper, - int nnzA, bool ptr_by_column, ptr_type *mb, - size_type *nb, ordinal_type *rowind) { +template +graph_t read_supernodal_graphL(KernelHandle *kernelHandle, int n, int nsuper, int nnzA, bool ptr_by_column, + ptr_type *mb, size_type *nb, ordinal_type *rowind) { using row_map_view_t = typename graph_t::row_map_type::non_const_type; using cols_view_t = typename graph_t::entries_type::non_const_type; using integer_view_host_t = Kokkos::View; @@ -140,12 +138,10 @@ graph_t read_supernodal_graphL(KernelHandle *kernelHandle, int n, int nsuper, i1 = mb[s]; i2 = mb[s + 1]; } - int nsrow = i2 - i1; // "total" number of rows in all the supernodes - // (diagonal+off-diagonal) - int nsrow2 = - nsrow - - nscol; // "total" number of rows in all the off-diagonal supernodes - int ps2 = i1 + nscol; // offset into rowind + int nsrow = i2 - i1; // "total" number of rows in all the supernodes + // (diagonal+off-diagonal) + int nsrow2 = nsrow - nscol; // "total" number of rows in all the off-diagonal supernodes + int ps2 = i1 + nscol; // offset into rowind /* diagonal block */ for (int ii = 0; ii < nscol; ii++) { @@ -170,8 +166,7 @@ graph_t read_supernodal_graphL(KernelHandle *kernelHandle, int n, int nsuper, for (int ii = 0; ii < nsrow2; ii++) { sorted_rowind[ii] = ii; } - std::sort(&(sorted_rowind[0]), &(sorted_rowind[nsrow2]), - sort_indices(&rowind[ps2])); + std::sort(&(sorted_rowind[0]), &(sorted_rowind[nsrow2]), sort_indices(&rowind[ps2])); } for (int kk = 0; kk < nsrow2; kk++) { int ii = (merge ? sorted_rowind[kk] : kk); // sorted rowind @@ -206,11 +201,9 @@ graph_t read_supernodal_graphL(KernelHandle *kernelHandle, int n, int nsuper, /* ========================================================================================= */ -template -graph_t read_supernodal_graphLt(KernelHandle *kernelHandle, int n, int nsuper, - bool ptr_by_column, ptr_type *mb, size_type *nb, - ordinal_type *rowind) { +template +graph_t read_supernodal_graphLt(KernelHandle *kernelHandle, int n, int nsuper, bool ptr_by_column, ptr_type *mb, + size_type *nb, ordinal_type *rowind) { using row_map_view_t = typename graph_t::row_map_type::non_const_type; using cols_view_t = typename graph_t::entries_type::non_const_type; using integer_view_host_t = Kokkos::View; @@ -317,12 +310,10 @@ graph_t read_supernodal_graphLt(KernelHandle *kernelHandle, int n, int nsuper, i1 = mb[s]; i2 = mb[s + 1]; } - int nsrow = i2 - i1; // "total" number of rows in all the supernodes - // (diagonal+off-diagonal) - int nsrow2 = - nsrow - - nscol; // "total" number of rows in all the off-diagonal supernodes - int ps2 = i1 + nscol; // offset into rowind + int nsrow = i2 - i1; // "total" number of rows in all the supernodes + // (diagonal+off-diagonal) + int nsrow2 = nsrow - nscol; // "total" number of rows in all the off-diagonal supernodes + int ps2 = i1 + nscol; // offset into rowind /* diagonal block */ for (int ii = 0; ii < nscol; ii++) { @@ -377,8 +368,7 @@ graph_t read_supernodal_graphLt(KernelHandle *kernelHandle, int n, int nsuper, /* ========================================================================================= */ template -void check_supernode_sizes(const char *title, int n, int nsuper, - input_size_type *nb, input_graph_t &graph) { +void check_supernode_sizes(const char *title, int n, int nsuper, input_size_type *nb, input_graph_t &graph) { auto rowmap_view = graph.row_map; auto hr = Kokkos::create_mirror_view(rowmap_view); Kokkos::deep_copy(hr, rowmap_view); @@ -413,15 +403,13 @@ void check_supernode_sizes(const char *title, int n, int nsuper, tot_nscol += nscol; } } - std::cout << std::endl - << " ------------------------------------- " << std::endl - << std::endl; + std::cout << std::endl << " ------------------------------------- " << std::endl << std::endl; std::cout << " " << title << std::endl; std::cout << " + nsuper = " << nsuper << std::endl; - std::cout << " > nsrow: min = " << min_nsrow << ", max = " << max_nsrow - << ", avg = " << tot_nsrow / nsuper << std::endl; - std::cout << " > nscol: min = " << min_nscol << ", max = " << max_nscol - << ", avg = " << tot_nscol / nsuper << std::endl; + std::cout << " > nsrow: min = " << min_nsrow << ", max = " << max_nsrow << ", avg = " << tot_nsrow / nsuper + << std::endl; + std::cout << " > nscol: min = " << min_nscol << ", max = " << max_nscol << ", avg = " << tot_nscol / nsuper + << std::endl; std::cout << " + Matrix size = " << n << std::endl; std::cout << " + Total nnz = " << hr(n) << std::endl; std::cout << " + nnz / n = " << hr(n) / n << std::endl; @@ -430,17 +418,15 @@ void check_supernode_sizes(const char *title, int n, int nsuper, /* ========================================================================================= */ template -host_graph_t generate_supernodal_graph(bool col_major, graph_t &graph, - int nsuper, const input_size_type *nb) { +host_graph_t generate_supernodal_graph(bool col_major, graph_t &graph, int nsuper, const input_size_type *nb) { #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE double time_seconds = 0.0; Kokkos::Timer timer; #endif - using size_type = typename graph_t::size_type; - using cols_view_host_t = typename host_graph_t::entries_type::non_const_type; - using row_map_view_host_t = - typename host_graph_t::row_map_type::non_const_type; + using size_type = typename graph_t::size_type; + using cols_view_host_t = typename host_graph_t::entries_type::non_const_type; + using row_map_view_host_t = typename host_graph_t::row_map_type::non_const_type; using integer_view_host_t = Kokkos::View; int n = graph.numRows(); @@ -507,8 +493,7 @@ host_graph_t generate_supernodal_graph(bool col_major, graph_t &graph, } #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time_seconds = timer.seconds(); - std::cout << " > Generate Supernodal Graph: count blocks : " - << time_seconds << std::endl; + std::cout << " > Generate Supernodal Graph: count blocks : " << time_seconds << std::endl; timer.reset(); #endif @@ -562,20 +547,16 @@ host_graph_t generate_supernodal_graph(bool col_major, graph_t &graph, } #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time_seconds = timer.seconds(); - std::cout << " > Generate Supernodal Graph: compress graph : " - << time_seconds << " (col_major = " << col_major << ")" - << std::endl; + std::cout << " > Generate Supernodal Graph: compress graph : " << time_seconds << " (col_major = " << col_major + << ")" << std::endl; timer.reset(); #endif // sort column ids per row - KokkosSparse::sort_crs_graph(hr, hc); + KokkosSparse::sort_crs_graph(hr, hc); #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time_seconds = timer.seconds(); - std::cout << " > Generate Supernodal Graph: sort graph : " - << time_seconds << std::endl - << std::endl; + std::cout << " > Generate Supernodal Graph: sort graph : " << time_seconds << std::endl << std::endl; #endif host_graph_t static_graph(hc, hr); @@ -656,18 +637,15 @@ graph_t generate_supernodal_dag(int nsuper, graph_t &supL, graph_t &supU) { /* ========================================================================================= */ template -void merge_supernodal_graph(int *p_nsuper, input_size_type *nb, bool col_majorL, - input_graph_t &graphL, bool col_majorU, +void merge_supernodal_graph(int *p_nsuper, input_size_type *nb, bool col_majorL, input_graph_t &graphL, bool col_majorU, input_graph_t &graphU, int *etree) { int nsuper = *p_nsuper; // --------------------------------------------------------------- // looking for supernodes to merge (i.e., dense diagonal blocks) int nsuper2 = 0; - auto supL = generate_supernodal_graph( - !col_majorL, graphL, nsuper, nb); - auto supU = generate_supernodal_graph( - col_majorU, graphU, nsuper, nb); + auto supL = generate_supernodal_graph(!col_majorL, graphL, nsuper, nb); + auto supU = generate_supernodal_graph(col_majorU, graphU, nsuper, nb); auto row_mapL = supL.row_map; auto entriesL = supL.entries; @@ -690,8 +668,7 @@ void merge_supernodal_graph(int *p_nsuper, input_size_type *nb, bool col_majorL, if (k1 == k2 + 1) { mergedL = true; for (int k = 0; k < k2 && mergedL; k++) { - if (entriesL[row_mapL[s2] + k + 1] != - entriesL[row_mapL[s2 + 1] + k]) { + if (entriesL[row_mapL[s2] + k + 1] != entriesL[row_mapL[s2 + 1] + k]) { mergedL = false; } } @@ -703,8 +680,7 @@ void merge_supernodal_graph(int *p_nsuper, input_size_type *nb, bool col_majorL, if (k1 == k2 + 1) { mergedU = true; for (int k = 0; k < k2 && mergedU; k++) { - if (entriesU[row_mapU[s2] + k + 1] != - entriesU[row_mapU[s2 + 1] + k]) { + if (entriesU[row_mapU[s2] + k + 1] != entriesU[row_mapU[s2 + 1] + k]) { mergedU = false; } } @@ -819,11 +795,9 @@ void merge_supernodal_graph(int *p_nsuper, input_size_type *nb, bool col_majorL, /* ========================================================================================= */ -template -output_graph_t generate_merged_supernodal_graph( - bool lower, int nsuper, const input_size_type *nb, int nsuper2, - input_size_type *nb2, input_graph_t &graph, int *nnz) { +template +output_graph_t generate_merged_supernodal_graph(bool lower, int nsuper, const input_size_type *nb, int nsuper2, + input_size_type *nb2, input_graph_t &graph, int *nnz) { using cols_view_t = typename output_graph_t::entries_type::non_const_type; using row_map_view_t = typename output_graph_t::row_map_type::non_const_type; using size_type = typename input_graph_t::size_type; @@ -926,11 +900,8 @@ output_graph_t generate_merged_supernodal_graph( */ /* For symbolic analysis */ template -void sptrsv_supernodal_symbolic(int nsuper, int *supercols, int *etree, - host_graph_t graphL_host, - KernelHandle *kernelHandleL, - host_graph_t graphU_host, - KernelHandle *kernelHandleU) { +void sptrsv_supernodal_symbolic(int nsuper, int *supercols, int *etree, host_graph_t graphL_host, + KernelHandle *kernelHandleL, host_graph_t graphU_host, KernelHandle *kernelHandleU) { #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE int nrows = graphL_host.numRows(); double time_seconds = 0.0; @@ -956,9 +927,8 @@ void sptrsv_supernodal_symbolic(int nsuper, int *supercols, int *etree, bool col_majorU = handleU->is_column_major(); bool merge = handleL->get_merge_supernodes(); bool UinCSC = handleU->is_column_major(); - bool needEtree = - (handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV || - handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_ETREE); + bool needEtree = (handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV || + handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_ETREE); if (needEtree && etree == nullptr) { std::cout << std::endl << " ** etree needs to be set before calling sptrsv_symbolic " @@ -970,8 +940,7 @@ void sptrsv_supernodal_symbolic(int nsuper, int *supercols, int *etree, // =================================================================== // > make a copy of supercols (merge needs both original and merged supercols) - using integer_view_host_t = - typename KernelHandle::SPTRSVHandleType::integer_view_host_t; + using integer_view_host_t = typename KernelHandle::SPTRSVHandleType::integer_view_host_t; integer_view_host_t supercols_view("supercols view", 1 + nsuper); int *supercols_merged = supercols_view.data(); for (int i = 0; i <= nsuper; i++) { @@ -983,14 +952,11 @@ void sptrsv_supernodal_symbolic(int nsuper, int *supercols, int *etree, int nsuper_merged = nsuper; #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE tic.reset(); - check_supernode_sizes("Original L-structure", nrows, nsuper, - supercols_merged, graphL_host); - check_supernode_sizes("Original U-structure", nrows, nsuper, - supercols_merged, graphU_host); + check_supernode_sizes("Original L-structure", nrows, nsuper, supercols_merged, graphL_host); + check_supernode_sizes("Original U-structure", nrows, nsuper, supercols_merged, graphU_host); #endif // etree will be updated - merge_supernodal_graph(&nsuper_merged, supercols_merged, col_majorL, - graphL_host, col_majorU, graphU_host, etree); + merge_supernodal_graph(&nsuper_merged, supercols_merged, col_majorL, graphL_host, col_majorU, graphU_host, etree); // ================================================================= // generate merged graph for L-solve @@ -1000,18 +966,15 @@ void sptrsv_supernodal_symbolic(int nsuper, int *supercols, int *etree, int nnzL_merged; bool lower = true; handleL->set_original_graph_host(graphL_host); // save graph before merge - graphL_host = generate_merged_supernodal_graph( - lower, nsuper, supercols, nsuper_merged, supercols_merged, graphL_host, - &nnzL_merged); + graphL_host = generate_merged_supernodal_graph(lower, nsuper, supercols, nsuper_merged, + supercols_merged, graphL_host, &nnzL_merged); #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time_seconds = tic.seconds(); - check_supernode_sizes("After Merge", nrows, nsuper_merged, supercols_merged, - graphL_host); + check_supernode_sizes("After Merge", nrows, nsuper_merged, supercols_merged, graphL_host); std::cout << " for L factor:" << std::endl; std::cout << " Merge Supernodes Time: " << time_seconds << std::endl; - std::cout << " Number of nonzeros : " << nnzL << " -> " << nnzL_merged - << " : " << double(nnzL_merged) / double(nnzL) << "x" - << std::endl; + std::cout << " Number of nonzeros : " << nnzL << " -> " << nnzL_merged << " : " + << double(nnzL_merged) / double(nnzL) << "x" << std::endl; #endif // ================================================================= @@ -1023,18 +986,15 @@ void sptrsv_supernodal_symbolic(int nsuper, int *supercols, int *etree, int nnzU_merged; lower = (UinCSC ? false : true); handleU->set_original_graph_host(graphU_host); // save graph before merge - graphU_host = generate_merged_supernodal_graph( - lower, nsuper, supercols, nsuper_merged, supercols_merged, graphU_host, - &nnzU_merged); + graphU_host = generate_merged_supernodal_graph(lower, nsuper, supercols, nsuper_merged, + supercols_merged, graphU_host, &nnzU_merged); #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time_seconds = tic.seconds(); - check_supernode_sizes("After Merge", nrows, nsuper_merged, supercols_merged, - graphU_host); + check_supernode_sizes("After Merge", nrows, nsuper_merged, supercols_merged, graphU_host); std::cout << " for U factor:" << std::endl; std::cout << " Merge Supernodes Time: " << time_seconds << std::endl; - std::cout << " Number of nonzeros : " << nnzU << " -> " << nnzU_merged - << " : " << double(nnzU_merged) / double(nnzU) << "x" - << std::endl; + std::cout << " Number of nonzeros : " << nnzU << " -> " << nnzU_merged << " : " + << double(nnzU_merged) / double(nnzU) << "x" << std::endl; #endif // update the number of supernodes @@ -1062,14 +1022,11 @@ void sptrsv_supernodal_symbolic(int nsuper, int *supercols, int *etree, if (handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_DAG || handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG) { // generate supernodal graphs for DAG scheduling - auto supL = generate_supernodal_graph( - !col_majorL, graphL_host, nsuper, supercols); - auto supU = generate_supernodal_graph(col_majorU, graphU_host, - nsuper, supercols); + auto supL = generate_supernodal_graph(!col_majorL, graphL_host, nsuper, supercols); + auto supU = generate_supernodal_graph(col_majorU, graphU_host, nsuper, supercols); #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time_seconds = tic.seconds(); - std::cout << " Compute Supernodal Graph Time: " << time_seconds - << std::endl; + std::cout << " Compute Supernodal Graph Time: " << time_seconds << std::endl; tic.reset(); #endif @@ -1131,10 +1088,8 @@ void sptrsv_supernodal_symbolic(int nsuper, int *supercols, int *etree, handleU->set_etree(etree); #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time_seconds = timer.seconds(); - std::cout << " Total Symbolic Time: " << time_seconds << std::endl - << std::endl; - std::cout << " Total nnz: " << graphL_host.row_map(nrows) << " + " - << graphU_host.row_map(nrows) << std::endl; + std::cout << " Total Symbolic Time: " << time_seconds << std::endl << std::endl; + std::cout << " Total nnz: " << graphL_host.row_map(nrows) << " + " << graphU_host.row_map(nrows) << std::endl; #endif } @@ -1147,9 +1102,8 @@ void sptrsv_supernodal_symbolic(int nsuper, int *supercols, int *etree, struct Tag_SupTrtriFunctor {}; struct Tag_SupTrtriTrmmFunctor {}; -template +template struct TriSupernodalTrtriFunctor { integer_view_host_t supernode_ids; const input_size_type *nb; @@ -1158,8 +1112,7 @@ struct TriSupernodalTrtriFunctor { values_type hv; KOKKOS_INLINE_FUNCTION - TriSupernodalTrtriFunctor(integer_view_host_t supernode_ids_, - const input_size_type *nb_, row_map_type &hr_, + TriSupernodalTrtriFunctor(integer_view_host_t supernode_ids_, const input_size_type *nb_, row_map_type &hr_, index_type &hc_, values_type &hv_) : supernode_ids(supernode_ids_), nb(nb_), hr(hr_), hc(hc_), hv(hv_) {} @@ -1181,9 +1134,7 @@ struct TriSupernodalTrtriFunctor { // invert diagonal auto nnzD = hr(j1); - Kokkos::View - viewL(&hv(nnzD), nsrow, nscol); + Kokkos::View viewL(&hv(nnzD), nsrow, nscol); auto Ljj = Kokkos::subview(viewL, range_type(0, nscol), Kokkos::ALL()); KokkosBatched::SerialTrtri::invoke(Ljj); } @@ -1208,9 +1159,7 @@ struct TriSupernodalTrtriFunctor { // invert diagonal auto nnzD = hr(j1); - Kokkos::View - viewL(&hv(nnzD), nsrow, nscol); + Kokkos::View viewL(&hv(nnzD), nsrow, nscol); auto Ljj = Kokkos::subview(viewL, range_type(0, nscol), Kokkos::ALL()); KokkosBatched::SerialTrtri::invoke(Ljj); @@ -1218,24 +1167,19 @@ struct TriSupernodalTrtriFunctor { // if (nsrow > nscol && invert_offdiag) { const scalar_t one(1.0); - auto Lij = - Kokkos::subview(viewL, range_type(nscol, nsrow), Kokkos::ALL()); - KokkosBatched::SerialTrmm::invoke(one, Ljj, Lij); + auto Lij = Kokkos::subview(viewL, range_type(nscol, nsrow), Kokkos::ALL()); + KokkosBatched::SerialTrmm::invoke(one, Ljj, + Lij); } } }; /* ========================================================================================= */ -template -void invert_supernodal_columns_batched(KernelHandle *kernelHandle, - bool unit_diag, - const input_size_type *nb, - row_map_type &hr, index_type &hc, - values_type &hv, int num_batches, +template +void invert_supernodal_columns_batched(KernelHandle *kernelHandle, bool unit_diag, const input_size_type *nb, + row_map_type &hr, index_type &hc, values_type &hv, int num_batches, integer_view_host_t supernode_ids) { using execution_space = typename values_type::execution_space; @@ -1253,99 +1197,66 @@ void invert_supernodal_columns_batched(KernelHandle *kernelHandle, if (num_batches > 0) { // lower is always in CSC, if UinCSC, then lower=false, else lower=true bool lower_tri = kernelHandle->is_sptrsv_lower_tri(); - bool lower = ((lower_tri && handle->is_column_major()) || - (!lower_tri && !handle->is_column_major())); + bool lower = ((lower_tri && handle->is_column_major()) || (!lower_tri && !handle->is_column_major())); if (lower) { if (unit_diag) { if (invert_offdiag) { - using range_policy = - Kokkos::RangePolicy; - TriSupernodalTrtriFunctor + using range_policy = Kokkos::RangePolicy; + TriSupernodalTrtriFunctor sptrsv_tritri_functor(supernode_ids, nb, hr, hc, hv); - Kokkos::parallel_for("TriSupernodalTrtriFunctor", - range_policy(0, num_batches), - sptrsv_tritri_functor); + Kokkos::parallel_for("TriSupernodalTrtriFunctor", range_policy(0, num_batches), sptrsv_tritri_functor); } else { - using range_policy = - Kokkos::RangePolicy; - TriSupernodalTrtriFunctor + using range_policy = Kokkos::RangePolicy; + TriSupernodalTrtriFunctor sptrsv_tritri_functor(supernode_ids, nb, hr, hc, hv); - Kokkos::parallel_for("TriSupernodalTrtriFunctor", - range_policy(0, num_batches), - sptrsv_tritri_functor); + Kokkos::parallel_for("TriSupernodalTrtriFunctor", range_policy(0, num_batches), sptrsv_tritri_functor); } } else { if (invert_offdiag) { - using range_policy = - Kokkos::RangePolicy; - TriSupernodalTrtriFunctor + using range_policy = Kokkos::RangePolicy; + TriSupernodalTrtriFunctor sptrsv_tritri_functor(supernode_ids, nb, hr, hc, hv); - Kokkos::parallel_for("TriSupernodalTrtriFunctor", - range_policy(0, num_batches), - sptrsv_tritri_functor); + Kokkos::parallel_for("TriSupernodalTrtriFunctor", range_policy(0, num_batches), sptrsv_tritri_functor); } else { - using range_policy = - Kokkos::RangePolicy; - TriSupernodalTrtriFunctor + using range_policy = Kokkos::RangePolicy; + TriSupernodalTrtriFunctor sptrsv_tritri_functor(supernode_ids, nb, hr, hc, hv); - Kokkos::parallel_for("TriSupernodalTrtriFunctor", - range_policy(0, num_batches), - sptrsv_tritri_functor); + Kokkos::parallel_for("TriSupernodalTrtriFunctor", range_policy(0, num_batches), sptrsv_tritri_functor); } } } else { if (unit_diag) { if (invert_offdiag) { - using range_policy = - Kokkos::RangePolicy; - TriSupernodalTrtriFunctor + using range_policy = Kokkos::RangePolicy; + TriSupernodalTrtriFunctor sptrsv_tritri_functor(supernode_ids, nb, hr, hc, hv); - Kokkos::parallel_for("TriSupernodalTrtriFunctor", - range_policy(0, num_batches), - sptrsv_tritri_functor); + Kokkos::parallel_for("TriSupernodalTrtriFunctor", range_policy(0, num_batches), sptrsv_tritri_functor); } else { - using range_policy = - Kokkos::RangePolicy; - TriSupernodalTrtriFunctor + using range_policy = Kokkos::RangePolicy; + TriSupernodalTrtriFunctor sptrsv_tritri_functor(supernode_ids, nb, hr, hc, hv); - Kokkos::parallel_for("TriSupernodalTrtriFunctor", - range_policy(0, num_batches), - sptrsv_tritri_functor); + Kokkos::parallel_for("TriSupernodalTrtriFunctor", range_policy(0, num_batches), sptrsv_tritri_functor); } } else { if (invert_offdiag) { - using range_policy = - Kokkos::RangePolicy; - TriSupernodalTrtriFunctor + using range_policy = Kokkos::RangePolicy; + TriSupernodalTrtriFunctor sptrsv_tritri_functor(supernode_ids, nb, hr, hc, hv); - Kokkos::parallel_for("TriSupernodalTrtriFunctor", - range_policy(0, num_batches), - sptrsv_tritri_functor); + Kokkos::parallel_for("TriSupernodalTrtriFunctor", range_policy(0, num_batches), sptrsv_tritri_functor); } else { - using range_policy = - Kokkos::RangePolicy; - TriSupernodalTrtriFunctor + using range_policy = Kokkos::RangePolicy; + TriSupernodalTrtriFunctor sptrsv_tritri_functor(supernode_ids, nb, hr, hc, hv); - Kokkos::parallel_for("TriSupernodalTrtriFunctor", - range_policy(0, num_batches), - sptrsv_tritri_functor); + Kokkos::parallel_for("TriSupernodalTrtriFunctor", range_policy(0, num_batches), sptrsv_tritri_functor); } } } @@ -1354,12 +1265,10 @@ void invert_supernodal_columns_batched(KernelHandle *kernelHandle, /* ========================================================================================= */ -template -void invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, - int nsuper, const input_size_type *nb, - row_map_type &hr, index_type &hc, - values_type &hv) { +template +void invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, int nsuper, const input_size_type *nb, + row_map_type &hr, index_type &hc, values_type &hv) { using execution_space = typename values_type::execution_space; using memory_space = typename execution_space::memory_space; using values_view_t = typename values_type::non_const_type; @@ -1376,8 +1285,7 @@ void invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, // lower is always in CSC, if UinCSC, then lower=false, else lower=true bool lower_tri = kernelHandle->is_sptrsv_lower_tri(); - bool lower = ((lower_tri && handle->is_column_major()) || - (!lower_tri && !handle->is_column_major())); + bool lower = ((lower_tri && handle->is_column_major()) || (!lower_tri && !handle->is_column_major())); // quick return if (!invert_diag) return; @@ -1402,16 +1310,15 @@ void invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, // If we are running KokkosKernels::trmm on device, // then we need to allocate a workspace on device using trmm_execution_space = typename KernelHandle::HandleExecSpace; - using trmm_memory_space = typename KernelHandle::HandlePersistentMemorySpace; - using trmm_view_t = Kokkos::View; + using trmm_memory_space = typename KernelHandle::HandlePersistentMemorySpace; + using trmm_view_t = Kokkos::View; #if !defined(KOKKOSKERNELS_ENABLE_TPL_CUBLAS) // use KokkosBlas::trmm only with CUBLAS (since deep-copy to host throws an // error) bool run_trmm_on_device = false; #else bool run_trmm_on_device = - (handle->get_trmm_on_device() && - !std::is_same::value); + (handle->get_trmm_on_device() && !std::is_same::value); #endif // figure out largest supernode @@ -1452,9 +1359,7 @@ void invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, char diag_char = (unit_diag ? 'U' : 'N'); // NOTE: we currently supports only default_layout = LayoutLeft - Kokkos::View - viewL(&hv(nnzD), nsrow, nscol); + Kokkos::View viewL(&hv(nnzD), nsrow, nscol); auto Ljj = Kokkos::subview(viewL, range_type(0, nscol), Kokkos::ALL()); #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE @@ -1462,16 +1367,14 @@ void invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, #endif #if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) if (run_trmm_on_device) { - Kokkos::View - dViewL(trmm_dwork.data(), nsrow, nscol); + Kokkos::View dViewL( + trmm_dwork.data(), nsrow, nscol); // deep-copy the whole supernode column to device Kokkos::deep_copy(dViewL, viewL); // call trtri on device - auto dViewLjj = - Kokkos::subview(dViewL, range_type(0, nscol), Kokkos::ALL()); + auto dViewLjj = Kokkos::subview(dViewL, range_type(0, nscol), Kokkos::ALL()); KokkosLapack::trtri(&uplo_char, &diag_char, dViewLjj); } else #endif @@ -1486,16 +1389,14 @@ void invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, if (nsrow > nscol && invert_offdiag) { char side_char = 'R'; char tran_char = 'N'; - auto Lij = - Kokkos::subview(viewL, range_type(nscol, nsrow), Kokkos::ALL()); + auto Lij = Kokkos::subview(viewL, range_type(nscol, nsrow), Kokkos::ALL()); #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE timer.reset(); #endif if (run_trmm_on_device) { - Kokkos::View - dViewL(trmm_dwork.data(), nsrow, nscol); + Kokkos::View dViewL( + trmm_dwork.data(), nsrow, nscol); #if !defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) // deep-copy the whole supernode column to device @@ -1503,13 +1404,10 @@ void invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, #endif // NOTE: we currently supports only default_layout = LayoutLeft - auto dViewLjj = - Kokkos::subview(dViewL, range_type(0, nscol), Kokkos::ALL()); - auto dViewLij = - Kokkos::subview(dViewL, range_type(nscol, nsrow), Kokkos::ALL()); + auto dViewLjj = Kokkos::subview(dViewL, range_type(0, nscol), Kokkos::ALL()); + auto dViewLij = Kokkos::subview(dViewL, range_type(nscol, nsrow), Kokkos::ALL()); - KokkosBlas::trmm(&side_char, &uplo_char, &tran_char, &diag_char, one, - dViewLjj, dViewLij); + KokkosBlas::trmm(&side_char, &uplo_char, &tran_char, &diag_char, one, dViewLjj, dViewLij); #if !defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) // deep-copy the whole panel back to host (since I cannot just @@ -1517,8 +1415,7 @@ void invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, Kokkos::deep_copy(viewL, dViewL); #endif } else { - KokkosBlas::trmm(&side_char, &uplo_char, &tran_char, &diag_char, one, - Ljj, Lij); + KokkosBlas::trmm(&side_char, &uplo_char, &tran_char, &diag_char, one, Ljj, Lij); } #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time2 += timer.seconds(); @@ -1528,9 +1425,8 @@ void invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, #if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) if (run_trmm_on_device) { // deep-copy the whole supernode column back to host - Kokkos::View - dViewL(trmm_dwork.data(), nsrow, nscol); + Kokkos::View dViewL( + trmm_dwork.data(), nsrow, nscol); Kokkos::deep_copy(viewL, dViewL); } #endif @@ -1545,8 +1441,7 @@ void invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE timer.reset(); #endif - invert_supernodal_columns_batched(kernelHandle, unit_diag, nb, hr, hc, hv, - num_batches, supernode_ids); + invert_supernodal_columns_batched(kernelHandle, unit_diag, nb, hr, hc, hv, num_batches, supernode_ids); #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time3 = timer.seconds(); #endif @@ -1557,8 +1452,7 @@ void invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, } #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE std::cout << " invert_supernodes" << std::endl; - std::cout << " + num supernodes = " << nsuper - << " num batchs = " << num_batches << std::endl; + std::cout << " + num supernodes = " << nsuper << " num batchs = " << num_batches << std::endl; std::cout << " > Time for inversion::trtri : " << time1 << std::endl; std::cout << " > Time for inversion::trmm : " << time2 << std::endl; std::cout << " > Time for batchs : " << time3 << std::endl; @@ -1567,10 +1461,8 @@ void invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, /* ========================================================================================= */ -template -crsmat_t read_merged_supernodes(KernelHandle *kernelHandle, int nsuper, - const input_ptr_type *mb, bool unit_diag, +template +crsmat_t read_merged_supernodes(KernelHandle *kernelHandle, int nsuper, const input_ptr_type *mb, bool unit_diag, input_crsmat_t &L, graph_t &static_graph) { using values_view_t = typename crsmat_t::values_type::non_const_type; using scalar_t = typename values_view_t::value_type; @@ -1665,14 +1557,11 @@ crsmat_t read_merged_supernodes(KernelHandle *kernelHandle, int nsuper, /* ========================================================================================= */ -template -crsmat_t read_supernodal_values(KernelHandle *kernelHandle, int n, int nsuper, - bool ptr_by_column, const input_size_type *mb, - const input_ptr_type *nb, - const size_type *colptr, ordinal_type *rowind, - scalar_t *Lx, graph_t &static_graph) { +template +crsmat_t read_supernodal_values(KernelHandle *kernelHandle, int n, int nsuper, bool ptr_by_column, + const input_size_type *mb, const input_ptr_type *nb, const size_type *colptr, + ordinal_type *rowind, scalar_t *Lx, graph_t &static_graph) { using values_view_t = typename crsmat_t::values_type::non_const_type; using integer_view_host_t = Kokkos::View; @@ -1689,8 +1578,7 @@ crsmat_t read_supernodal_values(KernelHandle *kernelHandle, int n, int nsuper, // lower is always in CSC, if UinCSC, then lower=false, else lower=true bool lower_tri = kernelHandle->is_sptrsv_lower_tri(); - bool lower = ((lower_tri && handle->is_column_major()) || - (!lower_tri && !handle->is_column_major())); + bool lower = ((lower_tri && handle->is_column_major()) || (!lower_tri && !handle->is_column_major())); // load graph auto rowmap_view = static_graph.row_map; @@ -1742,12 +1630,10 @@ crsmat_t read_supernodal_values(KernelHandle *kernelHandle, int n, int nsuper, i1 = mb[s]; i2 = mb[s + 1]; } - int nsrow = i2 - i1; // "total" number of rows in all the supernodes - // (diagonal+off-diagonal) - int nsrow2 = - nsrow - - nscol; // "total" number of rows in all the off-diagonal supernodes - int ps2 = i1 + nscol; // offset into rowind + int nsrow = i2 - i1; // "total" number of rows in all the supernodes + // (diagonal+off-diagonal) + int nsrow2 = nsrow - nscol; // "total" number of rows in all the off-diagonal supernodes + int ps2 = i1 + nscol; // offset into rowind int psx; // offset into data, Lx[s][s] if (ptr_by_column) { @@ -1799,8 +1685,7 @@ crsmat_t read_supernodal_values(KernelHandle *kernelHandle, int n, int nsuper, for (int ii = 0; ii < nsrow2; ii++) { sorted_rowind(ii) = ii; } - std::sort(sorted_rowind.data(), sorted_rowind.data() + nsrow2, - sort_indices(&rowind[ps2])); + std::sort(sorted_rowind.data(), sorted_rowind.data() + nsrow2, sort_indices(&rowind[ps2])); } for (int jj = 0; jj < nscol; jj++) { for (int kk = 0; kk < nsrow2; kk++) { @@ -1818,8 +1703,7 @@ crsmat_t read_supernodal_values(KernelHandle *kernelHandle, int n, int nsuper, hr(0) = 0; #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE - std::cout << " read_supernodal_values(" << (lower ? "lower)" : "upper)") - << std::endl; + std::cout << " read_supernodal_values(" << (lower ? "lower)" : "upper)") << std::endl; std::cout << " * Matrix size = " << n << std::endl; std::cout << " * Total nnz = " << hr(n) << std::endl; std::cout << " * nnz / n = " << hr(n) / n << std::endl; @@ -1841,14 +1725,11 @@ crsmat_t read_supernodal_values(KernelHandle *kernelHandle, int n, int nsuper, /* ========================================================================================= */ -template -crsmat_t read_supernodal_valuesLt(KernelHandle *kernelHandle, int n, int nsuper, - bool ptr_by_column, const input_size_type *mb, - const input_ptr_type *nb, - const size_type *colptr, ordinal_type *rowind, - scalar_t *Lx, graph_t &static_graph) { +template +crsmat_t read_supernodal_valuesLt(KernelHandle *kernelHandle, int n, int nsuper, bool ptr_by_column, + const input_size_type *mb, const input_ptr_type *nb, const size_type *colptr, + ordinal_type *rowind, scalar_t *Lx, graph_t &static_graph) { using values_view_t = typename crsmat_t::values_type::non_const_type; using integer_view_host_t = Kokkos::View; @@ -1918,12 +1799,10 @@ crsmat_t read_supernodal_valuesLt(KernelHandle *kernelHandle, int n, int nsuper, i1 = mb[s]; i2 = mb[s + 1]; } - int nsrow = i2 - i1; // "total" number of rows in all the supernodes - // (diagonal+off-diagonal) - int nsrow2 = - nsrow - - nscol; // "total" number of rows in all the off-diagonal supernodes - int ps2 = i1 + nscol; // offset into rowind + int nsrow = i2 - i1; // "total" number of rows in all the supernodes + // (diagonal+off-diagonal) + int nsrow2 = nsrow - nscol; // "total" number of rows in all the off-diagonal supernodes + int ps2 = i1 + nscol; // offset into rowind int psx; // offset into data, Lx[s][s] if (ptr_by_column) { @@ -2031,9 +1910,8 @@ void split_crsmat(KernelHandle *kernelHandleL, host_crsmat_t superluL) { Kokkos::deep_copy(hnodes_per_level, nodes_per_level); // id of supernodes at each level - auto nodes_grouped_by_level = handleL->get_nodes_grouped_by_level(); - auto nodes_grouped_by_level_host = - Kokkos::create_mirror_view(nodes_grouped_by_level); + auto nodes_grouped_by_level = handleL->get_nodes_grouped_by_level(); + auto nodes_grouped_by_level_host = Kokkos::create_mirror_view(nodes_grouped_by_level); Kokkos::deep_copy(nodes_grouped_by_level_host, nodes_grouped_by_level); // load graphs @@ -2067,13 +1945,10 @@ void split_crsmat(KernelHandle *kernelHandleL, host_crsmat_t superluL) { } } // allocate for all the subgraphs - row_map_view_t total_rowmap_view( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowmap_view"), - 2 * nlevels * (nrows + 1)); - cols_view_t total_column_view( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "colmap_view"), newNnz); - values_view_t total_values_view( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "values_view"), newNnz); + row_map_view_t total_rowmap_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowmap_view"), + 2 * nlevels * (nrows + 1)); + cols_view_t total_column_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, "colmap_view"), newNnz); + values_view_t total_values_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, "values_view"), newNnz); // create host-mirrors row_map_view_host_t total_hr = Kokkos::create_mirror_view(total_rowmap_view); cols_view_host_t total_hc = Kokkos::create_mirror_view(total_column_view); @@ -2095,10 +1970,9 @@ void split_crsmat(KernelHandle *kernelHandleL, host_crsmat_t superluL) { #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE timer.reset(); #endif - int nnzL = 0; - int nnzD = 0; - int lvl_nodes = - hnodes_per_level(lvl); // number of supernodes at this level + int nnzL = 0; + int nnzD = 0; + int lvl_nodes = hnodes_per_level(lvl); // number of supernodes at this level for (int league_rank = 0; league_rank < lvl_nodes; league_rank++) { auto s = nodes_grouped_by_level_host(node_count + league_rank); @@ -2126,37 +2000,25 @@ void split_crsmat(KernelHandle *kernelHandleL, host_crsmat_t superluL) { using range_type = Kokkos::pair; int offset_rowmap = lvl * 2 * (nrows + 1); row_map_view_t rowmap_view = - Kokkos::subview(total_rowmap_view, - range_type(offset_rowmap, offset_rowmap + (nrows + 1))); - cols_view_t column_view = Kokkos::subview( - total_column_view, range_type(offset_view, offset_view + nnzL)); - values_view_t values_view = Kokkos::subview( - total_values_view, range_type(offset_view, offset_view + nnzL)); - - row_map_view_host_t hr = Kokkos::subview( - total_hr, range_type(offset_rowmap, offset_rowmap + (nrows + 1))); - cols_view_host_t hc = - Kokkos::subview(total_hc, range_type(offset_view, offset_view + nnzL)); - values_view_host_t hv = - Kokkos::subview(total_hv, range_type(offset_view, offset_view + nnzL)); + Kokkos::subview(total_rowmap_view, range_type(offset_rowmap, offset_rowmap + (nrows + 1))); + cols_view_t column_view = Kokkos::subview(total_column_view, range_type(offset_view, offset_view + nnzL)); + values_view_t values_view = Kokkos::subview(total_values_view, range_type(offset_view, offset_view + nnzL)); + + row_map_view_host_t hr = Kokkos::subview(total_hr, range_type(offset_rowmap, offset_rowmap + (nrows + 1))); + cols_view_host_t hc = Kokkos::subview(total_hc, range_type(offset_view, offset_view + nnzL)); + values_view_host_t hv = Kokkos::subview(total_hv, range_type(offset_view, offset_view + nnzL)); offset_view += nnzL; // create subviews for the subgraph, just for diagonal blocks offset_rowmap += nrows + 1; row_map_view_t rowmapD_view = - Kokkos::subview(total_rowmap_view, - range_type(offset_rowmap, offset_rowmap + (nrows + 1))); - cols_view_t columnD_view = Kokkos::subview( - total_column_view, range_type(offset_view, offset_view + nnzD)); - values_view_t valuesD_view = Kokkos::subview( - total_values_view, range_type(offset_view, offset_view + nnzD)); - - row_map_view_host_t hrD = Kokkos::subview( - total_hr, range_type(offset_rowmap, offset_rowmap + (nrows + 1))); - cols_view_host_t hcD = - Kokkos::subview(total_hc, range_type(offset_view, offset_view + nnzD)); - values_view_host_t hvD = - Kokkos::subview(total_hv, range_type(offset_view, offset_view + nnzD)); + Kokkos::subview(total_rowmap_view, range_type(offset_rowmap, offset_rowmap + (nrows + 1))); + cols_view_t columnD_view = Kokkos::subview(total_column_view, range_type(offset_view, offset_view + nnzD)); + values_view_t valuesD_view = Kokkos::subview(total_values_view, range_type(offset_view, offset_view + nnzD)); + + row_map_view_host_t hrD = Kokkos::subview(total_hr, range_type(offset_rowmap, offset_rowmap + (nrows + 1))); + cols_view_host_t hcD = Kokkos::subview(total_hc, range_type(offset_view, offset_view + nnzD)); + values_view_host_t hvD = Kokkos::subview(total_hv, range_type(offset_view, offset_view + nnzD)); offset_view += nnzD; #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time3 += timer2.seconds(); @@ -2178,7 +2040,7 @@ void split_crsmat(KernelHandle *kernelHandleL, host_crsmat_t superluL) { auto s = nodes_grouped_by_level_host(node_count + league_rank); int j1 = supercols_host[s]; // start of this supernode int j2 = supercols_host[s + 1]; // start of next supernode - int nscol = j2 - j1; // number of columns in the s-th supernode column + int nscol = j2 - j1; // number of columns in the s-th supernode column for (int j = j1; j < j2; j++) { for (size_type k = row_mapL(j); k < row_mapL(j + 1); k++) { if (valuesL(k) != zero) { @@ -2205,7 +2067,7 @@ void split_crsmat(KernelHandle *kernelHandleL, host_crsmat_t superluL) { // (these column ids are sorted in ascending order at each level) int j1 = supercols_host[s]; // start of this supernode int j2 = supercols_host[s + 1]; // start of next supernode - int nscol = j2 - j1; // number of columns in the s-th supernode column + int nscol = j2 - j1; // number of columns in the s-th supernode column for (int j = j1; j < j2; j++) { // diagonals for (size_type k = row_mapL(j); k < row_mapL(j) + nscol; k++) { @@ -2255,7 +2117,7 @@ void split_crsmat(KernelHandle *kernelHandleL, host_crsmat_t superluL) { // (these column ids are sorted in ascending order at each level) int j1 = supercols_host[s]; // start of this supernode int j2 = supercols_host[s + 1]; // start of next supernode - int nscol = j2 - j1; // number of columns in the s-th supernode column + int nscol = j2 - j1; // number of columns in the s-th supernode column // insert empty columns for the columns skipped (between the previous // and this supernodes) @@ -2316,8 +2178,7 @@ void split_crsmat(KernelHandle *kernelHandleL, host_crsmat_t superluL) { sub_crsmats[lvl] = crsmat_t("CrsMatrix", nrows, values_view, sub_graph); if (!invert_offdiag) { graph_t diag_graph(columnD_view, rowmapD_view); - diag_blocks[lvl] = - crsmat_t("DiagMatrix", nrows, valuesD_view, diag_graph); + diag_blocks[lvl] = crsmat_t("DiagMatrix", nrows, valuesD_view, diag_graph); } #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE // std::cout << " > split nnz(" << lvl << ") = " << nnzL+nnzD << @@ -2344,17 +2205,11 @@ void split_crsmat(KernelHandle *kernelHandleL, host_crsmat_t superluL) { } #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE std::cout << " split_crsmat" << std::endl; - std::cout << " > Time to split to submatrices : " << time1 - << std::endl; - std::cout << " + allocate submatrices : " << time4 - << std::endl; - std::cout << " + create subviews : " << time3 - << std::endl; - std::cout << " > Time to copy submatrices to device : " << time2 - << std::endl; - std::cout << " > Total NNZ : " << oldNnz << " -> " - << newNnz << std::endl - << std::endl; + std::cout << " > Time to split to submatrices : " << time1 << std::endl; + std::cout << " + allocate submatrices : " << time4 << std::endl; + std::cout << " + create subviews : " << time3 << std::endl; + std::cout << " > Time to copy submatrices to device : " << time2 << std::endl; + std::cout << " > Total NNZ : " << oldNnz << " -> " << newNnz << std::endl << std::endl; #endif } @@ -2367,20 +2222,15 @@ void sptrsv_compute(KernelHandle *kernelHandleL, crsmat_input_t L) { // load sptrsv-handles auto *handleL = kernelHandleL->get_sptrsv_handle(); if (!(handleL->is_symbolic_complete())) { - std::cout - << std::endl - << " ** needs to call sptrsv_symbolic before calling sptrsv_numeric **" - << std::endl - << std::endl; + std::cout << std::endl + << " ** needs to call sptrsv_symbolic before calling sptrsv_numeric **" << std::endl + << std::endl; return; } bool merged = handleL->get_merge_supernodes(); if (merged) { // TODO: follow what's done in sptrsv_compute in superlu - std::cout << std::endl - << " ** merge is not supported through this interface, yet **" - << std::endl - << std::endl; + std::cout << std::endl << " ** merge is not supported through this interface, yet **" << std::endl << std::endl; return; } @@ -2393,12 +2243,11 @@ void sptrsv_compute(KernelHandle *kernelHandleL, crsmat_input_t L) { // load crsGraph // auto graph = handleL->get_original_graph_host (); // graph stored in handle // (before merge) - auto graph = handleL->get_graph(); // graph stored in handle (before merge) - auto graph_host = - handleL->get_graph_host(); // graph stored in handle (before merge) - auto row_map = graph_host.row_map; - auto entries = graph_host.entries; - auto nrows = graph_host.numRows(); + auto graph = handleL->get_graph(); // graph stored in handle (before merge) + auto graph_host = handleL->get_graph_host(); // graph stored in handle (before merge) + auto row_map = graph_host.row_map; + auto entries = graph_host.entries; + auto nrows = graph_host.numRows(); // from input CrsMatrix auto values = L.values; // numerical values from input (host), output will be @@ -2408,14 +2257,12 @@ void sptrsv_compute(KernelHandle *kernelHandleL, crsmat_input_t L) { // read numerical values of L from Cholmod using crsmat_t = typename KernelHandle::SPTRSVHandleType::crsmat_t; bool ptr_by_column = true; - auto crsmatL = read_supernodal_values( - kernelHandleL, nrows, nsuper, ptr_by_column, row_map.data(), supercols, - row_map.data(), entries.data(), values.data(), graph); + auto crsmatL = read_supernodal_values(kernelHandleL, nrows, nsuper, ptr_by_column, row_map.data(), + supercols, row_map.data(), entries.data(), values.data(), graph); // =================================================================== - bool useSpMV = - (handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV || - handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG); + bool useSpMV = (handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV || + handleL->get_algorithm() == SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG); if (useSpMV) { // ---------------------------------------------------- // split the matrix into submatrices for spmv at each level diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_trsv.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_trsv.hpp index 9b25811d1019..248d0259da11 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_trsv.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_trsv.hpp @@ -48,29 +48,22 @@ namespace KokkosSparse { /// \param b [in] The input (right-hand side) (multi)vector. /// \param x [in] The output (left-hand side) (multi)vector. template -void trsv(const char uplo[], const char trans[], const char diag[], - const AMatrix& A, const BMV& b, const XMV& x) { +void trsv(const char uplo[], const char trans[], const char diag[], const AMatrix& A, const BMV& b, const XMV& x) { // FIXME (mfh 23 Apr 2015) Need to implement rank-1 version of this function. static_assert(BMV::rank == 2, "KokkosBlas::trsv: Rank-1 version of this " "function has not yet been implemented."); - static_assert(Kokkos::is_view::value, - "KokkosBlas::trsv: b is not a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosBlas::trsv: x is not a Kokkos::View."); - static_assert((int)BMV::rank == (int)XMV::rank, - "KokkosBlas::trsv: The ranks of b and x do not match."); - static_assert( - BMV::rank == 1 || BMV::rank == 2, - "KokkosBlas::trsv: b and x must both either have rank 1, or rank 2."); - static_assert(std::is_same::value, + static_assert(Kokkos::is_view::value, "KokkosBlas::trsv: b is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosBlas::trsv: x is not a Kokkos::View."); + static_assert((int)BMV::rank == (int)XMV::rank, "KokkosBlas::trsv: The ranks of b and x do not match."); + static_assert(BMV::rank == 1 || BMV::rank == 2, "KokkosBlas::trsv: b and x must both either have rank 1, or rank 2."); + static_assert(std::is_same::value, "KokkosBlas::trsv: The output x must be nonconst."); - static_assert(KokkosSparse::is_crs_matrix::value || - KokkosSparse::Experimental::is_bsr_matrix::value, - "KokkosBlas::trsv: A is not a CRS or BSR matrix."); + static_assert( + KokkosSparse::is_crs_matrix::value || KokkosSparse::Experimental::is_bsr_matrix::value, + "KokkosBlas::trsv: A is not a CRS or BSR matrix."); // The following three code lines have been moved up by Massimiliano Lupo // Pasini @@ -79,21 +72,18 @@ void trsv(const char uplo[], const char trans[], const char diag[], const size_type numCols = static_cast(A.numPointCols()); const size_type zero = static_cast(0); - if (zero != numRows && uplo[0] != 'U' && uplo[0] != 'u' && uplo[0] != 'L' && - uplo[0] != 'l') { + if (zero != numRows && uplo[0] != 'U' && uplo[0] != 'u' && uplo[0] != 'L' && uplo[0] != 'l') { std::ostringstream os; os << "Invalid uplo[0] = \'" << uplo << "\'"; KokkosKernels::Impl::throw_runtime_exception(os.str()); } - if (zero != numRows && trans[0] != 'C' && trans[0] != 'c' && - trans[0] != 'T' && trans[0] != 't' && trans[0] != 'N' && + if (zero != numRows && trans[0] != 'C' && trans[0] != 'c' && trans[0] != 'T' && trans[0] != 't' && trans[0] != 'N' && trans[0] != 'n') { std::ostringstream os; os << "Invalid trans[0] = \'" << trans << "\'"; KokkosKernels::Impl::throw_runtime_exception(os.str()); } - if (zero != numRows && diag[0] != 'U' && diag[0] != 'u' && diag[0] != 'N' && - diag[0] != 'n') { + if (zero != numRows && diag[0] != 'U' && diag[0] != 'u' && diag[0] != 'N' && diag[0] != 'n') { std::ostringstream os; os << "Invalid diag[0] = \'" << diag << "\'"; KokkosKernels::Impl::throw_runtime_exception(os.str()); @@ -107,52 +97,44 @@ void trsv(const char uplo[], const char trans[], const char diag[], if (!transpose && (numCols != x.extent(0) || numRows != b.extent(0))) { std::ostringstream os; os << "Dimensions do not match (non-transpose case). " - << "A is " << numRows << " x " << numCols << ", x is " << x.extent(0) - << " x " << x.extent(1) << ", and b is " << b.extent(0) << " x " - << b.extent(1); + << "A is " << numRows << " x " << numCols << ", x is " << x.extent(0) << " x " << x.extent(1) << ", and b is " + << b.extent(0) << " x " << b.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } if (transpose && (numRows != x.extent(0) || numCols != b.extent(0))) { std::ostringstream os; os << "Dimensions do not match (transpose or conjugate transpose case). " - << "A is " << numRows << " x " << numCols << ", x is " << x.extent(0) - << " x " << x.extent(1) << ", and b is " << b.extent(0) << " x " - << b.extent(1); + << "A is " << numRows << " x " << numCols << ", x is " << x.extent(0) << " x " << x.extent(1) << ", and b is " + << b.extent(0) << " x " << b.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } - using AMatrix_Bsr_Internal = KokkosSparse::Experimental::BsrMatrix< - typename AMatrix::const_value_type, typename AMatrix::const_ordinal_type, - typename AMatrix::device_type, Kokkos::MemoryTraits, - typename AMatrix::const_size_type>; + using AMatrix_Bsr_Internal = + KokkosSparse::Experimental::BsrMatrix, + typename AMatrix::const_size_type>; using AMatrix_Internal = std::conditional_t< KokkosSparse::is_crs_matrix::value, - KokkosSparse::CrsMatrix, + KokkosSparse::CrsMatrix, typename AMatrix::const_size_type>, AMatrix_Bsr_Internal>; AMatrix_Internal A_i(A); - typedef Kokkos::View< - typename BMV::const_value_type**, typename BMV::array_layout, - typename BMV::device_type, - Kokkos::MemoryTraits > + typedef Kokkos::View > BMV_Internal; - typedef Kokkos::View > XMV_Internal; BMV_Internal b_i = b; XMV_Internal x_i = x; - KokkosSparse::Impl::TRSV::trsv( - uplo, trans, diag, A_i, b_i, x_i); + KokkosSparse::Impl::TRSV::trsv(uplo, trans, diag, A_i, b_i, x_i); } } // namespace KokkosSparse diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosKernels_tpl_handles_def.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosKernels_tpl_handles_def.hpp index 6c60483cae3b..a88ad12130c6 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosKernels_tpl_handles_def.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosKernels_tpl_handles_def.hpp @@ -49,9 +49,7 @@ namespace Impl { RocsparseSingleton::RocsparseSingleton() { KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_handle(&rocsparseHandle)); - Kokkos::push_finalize_hook([&]() { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_destroy_handle(rocsparseHandle)); - }); + Kokkos::push_finalize_hook([&]() { KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_destroy_handle(rocsparseHandle)); }); } RocsparseSingleton& RocsparseSingleton::singleton() { diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_gauss_seidel_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_gauss_seidel_tpl_spec_avail.hpp index cad991639bc1..fc9252daaaa4 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_gauss_seidel_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_gauss_seidel_tpl_spec_avail.hpp @@ -26,15 +26,14 @@ struct gauss_seidel_symbolic_tpl_spec_avail { }; // Specialization struct which defines whether a specialization exists -template +template struct gauss_seidel_numeric_tpl_spec_avail { enum : bool { value = false }; }; // Specialization struct which defines whether a specialization exists -template +template struct gauss_seidel_apply_tpl_spec_avail { enum : bool { value = false }; }; diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_gmres_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_gmres_tpl_spec_avail.hpp index 6b8748e487a4..31becbbd0bb6 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_gmres_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_gmres_tpl_spec_avail.hpp @@ -22,8 +22,7 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct gmres_tpl_spec_avail { enum : bool { value = false }; }; diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_par_ilut_numeric_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_par_ilut_numeric_tpl_spec_avail.hpp index bdcc36c04091..e312c1b4084b 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_par_ilut_numeric_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_par_ilut_numeric_tpl_spec_avail.hpp @@ -20,10 +20,8 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct par_ilut_numeric_tpl_spec_avail { enum : bool { value = false }; }; diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_par_ilut_symbolic_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_par_ilut_symbolic_tpl_spec_avail.hpp index 08eb851ef38d..36fba2dface9 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_par_ilut_symbolic_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_par_ilut_symbolic_tpl_spec_avail.hpp @@ -20,8 +20,7 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct par_ilut_symbolic_tpl_spec_avail { enum : bool { value = false }; }; diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spadd_numeric_tpl_spec_decl.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spadd_numeric_tpl_spec_decl.hpp index 0952654bdf04..14eac2aee1cf 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spadd_numeric_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spadd_numeric_tpl_spec_decl.hpp @@ -22,127 +22,99 @@ namespace Impl { #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE -#define KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE( \ - TOKEN, KOKKOS_SCALAR_TYPE, TPL_SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, \ - LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE, ETI_SPEC_AVAIL) \ - template <> \ - struct SPADD_NUMERIC< \ - EXEC_SPACE_TYPE, \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const OFFSET_TYPE, const ORDINAL_TYPE, const KOKKOS_SCALAR_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using kernelhandle_t = KokkosKernels::Experimental::KokkosKernelsHandle< \ - const OFFSET_TYPE, const ORDINAL_TYPE, const KOKKOS_SCALAR_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>; \ - using rowmap_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using non_const_rowmap_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using colidx_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using non_const_colidx_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using scalar_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using non_const_scalar_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void spadd_numeric( \ - const EXEC_SPACE_TYPE &exec, kernelhandle_t *handle, ORDINAL_TYPE m, \ - ORDINAL_TYPE n, const KOKKOS_SCALAR_TYPE alpha, rowmap_view_t rowmapA, \ - colidx_view_t colidxA, scalar_view_t valuesA, \ - const KOKKOS_SCALAR_TYPE beta, rowmap_view_t rowmapB, \ - colidx_view_t colidxB, scalar_view_t valuesB, rowmap_view_t rowmapC, \ - non_const_colidx_view_t colidxC, non_const_scalar_view_t valuesC) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosSparse::spadd_numeric[TPL_CUSPARSE," + \ - Kokkos::ArithTraits::name() + "]"); \ - \ - auto addHandle = handle->get_spadd_handle(); \ - auto &cuspData = addHandle->cusparseData; \ - auto &cuspHandle = \ - KokkosKernels::Impl::CusparseSingleton::singleton().cusparseHandle; \ - cusparsePointerMode_t oldPtrMode; \ - \ - KOKKOS_CUSPARSE_SAFE_CALL( \ - cusparseSetStream(cuspHandle, exec.cuda_stream())); \ - KOKKOS_CUSPARSE_SAFE_CALL( \ - cusparseGetPointerMode(cuspHandle, &oldPtrMode)); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode( \ - cuspHandle, CUSPARSE_POINTER_MODE_HOST)); /* alpha, beta on host*/ \ - OFFSET_TYPE nnzA = colidxA.extent(0); \ - OFFSET_TYPE nnzB = colidxB.extent(0); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparse##TOKEN##csrgeam2( \ - cuspHandle, m, n, reinterpret_cast(&alpha), \ - cuspData.descrA, nnzA, \ - reinterpret_cast(valuesA.data()), \ - rowmapA.data(), colidxA.data(), \ - reinterpret_cast(&beta), cuspData.descrB, \ - nnzB, reinterpret_cast(valuesB.data()), \ - rowmapB.data(), colidxB.data(), cuspData.descrC, \ - reinterpret_cast(valuesC.data()), \ - const_cast(rowmapC.data()), colidxC.data(), \ - cuspData.workspace)); \ - KOKKOS_CUSPARSE_SAFE_CALL( \ - cusparseSetPointerMode(cuspHandle, oldPtrMode)); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, NULL)); \ - \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE(TOKEN, KOKKOS_SCALAR_TYPE, TPL_SCALAR_TYPE, ORDINAL_TYPE, \ + OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct SPADD_NUMERIC< \ + EXEC_SPACE_TYPE, \ + KokkosKernels::Experimental::KokkosKernelsHandle, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using kernelhandle_t = KokkosKernels::Experimental::KokkosKernelsHandle; \ + using rowmap_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using non_const_rowmap_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using colidx_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using non_const_colidx_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using scalar_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using non_const_scalar_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void spadd_numeric(const EXEC_SPACE_TYPE &exec, kernelhandle_t *handle, ORDINAL_TYPE m, ORDINAL_TYPE n, \ + const KOKKOS_SCALAR_TYPE alpha, rowmap_view_t rowmapA, colidx_view_t colidxA, \ + scalar_view_t valuesA, const KOKKOS_SCALAR_TYPE beta, rowmap_view_t rowmapB, \ + colidx_view_t colidxB, scalar_view_t valuesB, rowmap_view_t rowmapC, \ + non_const_colidx_view_t colidxC, non_const_scalar_view_t valuesC) { \ + Kokkos::Profiling::pushRegion("KokkosSparse::spadd_numeric[TPL_CUSPARSE," + \ + Kokkos::ArithTraits::name() + "]"); \ + \ + auto addHandle = handle->get_spadd_handle(); \ + auto &cuspData = addHandle->cusparseData; \ + auto &cuspHandle = KokkosKernels::Impl::CusparseSingleton::singleton().cusparseHandle; \ + cusparsePointerMode_t oldPtrMode; \ + \ + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, exec.cuda_stream())); \ + KOKKOS_CUSPARSE_SAFE_CALL(cusparseGetPointerMode(cuspHandle, &oldPtrMode)); \ + KOKKOS_CUSPARSE_SAFE_CALL( \ + cusparseSetPointerMode(cuspHandle, CUSPARSE_POINTER_MODE_HOST)); /* alpha, beta on host*/ \ + OFFSET_TYPE nnzA = colidxA.extent(0); \ + OFFSET_TYPE nnzB = colidxB.extent(0); \ + KOKKOS_CUSPARSE_SAFE_CALL(cusparse##TOKEN##csrgeam2( \ + cuspHandle, m, n, reinterpret_cast(&alpha), cuspData.descrA, nnzA, \ + reinterpret_cast(valuesA.data()), rowmapA.data(), colidxA.data(), \ + reinterpret_cast(&beta), cuspData.descrB, nnzB, \ + reinterpret_cast(valuesB.data()), rowmapB.data(), colidxB.data(), cuspData.descrC, \ + reinterpret_cast(valuesC.data()), const_cast(rowmapC.data()), \ + colidxC.data(), cuspData.workspace)); \ + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(cuspHandle, oldPtrMode)); \ + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, NULL)); \ + \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE_EXT(ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE( \ - S, float, float, int, int, Kokkos::LayoutLeft, Kokkos::Cuda, \ - Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE( \ - D, double, double, int, int, Kokkos::LayoutLeft, Kokkos::Cuda, \ - Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE( \ - C, Kokkos::complex, cuComplex, int, int, Kokkos::LayoutLeft, \ - Kokkos::Cuda, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE( \ - Z, Kokkos::complex, cuDoubleComplex, int, int, \ - Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, ETI_SPEC_AVAIL) +#define KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE_EXT(ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE(S, float, float, int, int, Kokkos::LayoutLeft, Kokkos::Cuda, \ + Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE(D, double, double, int, int, Kokkos::LayoutLeft, Kokkos::Cuda, \ + Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE(C, Kokkos::complex, cuComplex, int, int, \ + Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, \ + ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE(Z, Kokkos::complex, cuDoubleComplex, int, int, \ + Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, \ + ETI_SPEC_AVAIL) KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE_EXT(true) KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE_EXT(false) @@ -150,127 +122,99 @@ KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_CUSPARSE_EXT(false) #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE -#define KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_ROCSPARSE( \ - TOKEN, KOKKOS_SCALAR_TYPE, TPL_SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, \ - LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE, ETI_SPEC_AVAIL) \ - template <> \ - struct SPADD_NUMERIC< \ - EXEC_SPACE_TYPE, \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const OFFSET_TYPE, const ORDINAL_TYPE, const KOKKOS_SCALAR_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using kernelhandle_t = KokkosKernels::Experimental::KokkosKernelsHandle< \ - const OFFSET_TYPE, const ORDINAL_TYPE, const KOKKOS_SCALAR_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>; \ - using rowmap_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using non_const_rowmap_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using colidx_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using non_const_colidx_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using scalar_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using non_const_scalar_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void spadd_numeric( \ - const EXEC_SPACE_TYPE &exec, kernelhandle_t *handle, ORDINAL_TYPE m, \ - ORDINAL_TYPE n, const KOKKOS_SCALAR_TYPE alpha, rowmap_view_t rowmapA, \ - colidx_view_t colidxA, scalar_view_t valuesA, \ - const KOKKOS_SCALAR_TYPE beta, rowmap_view_t rowmapB, \ - colidx_view_t colidxB, scalar_view_t valuesB, rowmap_view_t rowmapC, \ - non_const_colidx_view_t colidxC, non_const_scalar_view_t valuesC) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosSparse::spadd_numeric[TPL_ROCSPARSE," + \ - Kokkos::ArithTraits::name() + "]"); \ - \ - auto addHandle = handle->get_spadd_handle(); \ - auto &rocData = addHandle->rocsparseData; \ - auto &rocspHandle = KokkosKernels::Impl::RocsparseSingleton::singleton() \ - .rocsparseHandle; \ - rocsparse_pointer_mode oldPtrMode; \ - \ - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( \ - rocsparse_set_stream(rocspHandle, exec.hip_stream())); \ - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( \ - rocsparse_get_pointer_mode(rocspHandle, &oldPtrMode)); \ - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_pointer_mode( \ - rocspHandle, rocsparse_pointer_mode_host)); /* alpha, beta on host*/ \ - OFFSET_TYPE nnzA = colidxA.extent(0); \ - OFFSET_TYPE nnzB = colidxB.extent(0); \ - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_##TOKEN##csrgeam( \ - rocspHandle, m, n, \ - reinterpret_cast(&alpha), rocData.descrA, \ - nnzA, reinterpret_cast(valuesA.data()), \ - rowmapA.data(), colidxA.data(), \ - reinterpret_cast(&beta), rocData.descrB, \ - nnzB, reinterpret_cast(valuesB.data()), \ - rowmapB.data(), colidxB.data(), rocData.descrC, \ - reinterpret_cast(valuesC.data()), \ - const_cast(rowmapC.data()), colidxC.data())); \ - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( \ - rocsparse_set_pointer_mode(rocspHandle, oldPtrMode)); \ - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( \ - rocsparse_set_stream(rocspHandle, NULL)); \ - \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_ROCSPARSE(TOKEN, KOKKOS_SCALAR_TYPE, TPL_SCALAR_TYPE, ORDINAL_TYPE, \ + OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct SPADD_NUMERIC< \ + EXEC_SPACE_TYPE, \ + KokkosKernels::Experimental::KokkosKernelsHandle, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using kernelhandle_t = KokkosKernels::Experimental::KokkosKernelsHandle; \ + using rowmap_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using non_const_rowmap_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using colidx_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using non_const_colidx_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using scalar_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using non_const_scalar_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void spadd_numeric(const EXEC_SPACE_TYPE &exec, kernelhandle_t *handle, ORDINAL_TYPE m, ORDINAL_TYPE n, \ + const KOKKOS_SCALAR_TYPE alpha, rowmap_view_t rowmapA, colidx_view_t colidxA, \ + scalar_view_t valuesA, const KOKKOS_SCALAR_TYPE beta, rowmap_view_t rowmapB, \ + colidx_view_t colidxB, scalar_view_t valuesB, rowmap_view_t rowmapC, \ + non_const_colidx_view_t colidxC, non_const_scalar_view_t valuesC) { \ + Kokkos::Profiling::pushRegion("KokkosSparse::spadd_numeric[TPL_ROCSPARSE," + \ + Kokkos::ArithTraits::name() + "]"); \ + \ + auto addHandle = handle->get_spadd_handle(); \ + auto &rocData = addHandle->rocsparseData; \ + auto &rocspHandle = KokkosKernels::Impl::RocsparseSingleton::singleton().rocsparseHandle; \ + rocsparse_pointer_mode oldPtrMode; \ + \ + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_stream(rocspHandle, exec.hip_stream())); \ + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_get_pointer_mode(rocspHandle, &oldPtrMode)); \ + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( \ + rocsparse_set_pointer_mode(rocspHandle, rocsparse_pointer_mode_host)); /* alpha, beta on host*/ \ + OFFSET_TYPE nnzA = colidxA.extent(0); \ + OFFSET_TYPE nnzB = colidxB.extent(0); \ + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_##TOKEN##csrgeam( \ + rocspHandle, m, n, reinterpret_cast(&alpha), rocData.descrA, nnzA, \ + reinterpret_cast(valuesA.data()), rowmapA.data(), colidxA.data(), \ + reinterpret_cast(&beta), rocData.descrB, nnzB, \ + reinterpret_cast(valuesB.data()), rowmapB.data(), colidxB.data(), rocData.descrC, \ + reinterpret_cast(valuesC.data()), const_cast(rowmapC.data()), \ + colidxC.data())); \ + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_pointer_mode(rocspHandle, oldPtrMode)); \ + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_stream(rocspHandle, NULL)); \ + \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_ROCSPARSE_EXT(ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_ROCSPARSE( \ - s, float, float, int, int, Kokkos::LayoutLeft, Kokkos::HIP, \ - Kokkos::HIPSpace, ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_ROCSPARSE( \ - d, double, double, int, int, Kokkos::LayoutLeft, Kokkos::HIP, \ - Kokkos::HIPSpace, ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_ROCSPARSE( \ - c, Kokkos::complex, rocsparse_float_complex, int, int, \ - Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_ROCSPARSE( \ - z, Kokkos::complex, rocsparse_double_complex, int, int, \ - Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, ETI_SPEC_AVAIL) +#define KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_ROCSPARSE_EXT(ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_ROCSPARSE(s, float, float, int, int, Kokkos::LayoutLeft, Kokkos::HIP, \ + Kokkos::HIPSpace, ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_ROCSPARSE(d, double, double, int, int, Kokkos::LayoutLeft, Kokkos::HIP, \ + Kokkos::HIPSpace, ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_ROCSPARSE(c, Kokkos::complex, rocsparse_float_complex, int, int, \ + Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, \ + ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_ROCSPARSE(z, Kokkos::complex, rocsparse_double_complex, int, int, \ + Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, \ + ETI_SPEC_AVAIL) KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_ROCSPARSE_EXT(true) KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_DECL_ROCSPARSE_EXT(false) diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spadd_symbolic_tpl_spec_decl.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spadd_symbolic_tpl_spec_decl.hpp index fe6b51207f7a..514b019f1b32 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spadd_symbolic_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spadd_symbolic_tpl_spec_decl.hpp @@ -22,113 +22,93 @@ namespace Impl { #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE -#define KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE( \ - TOKEN, KOKKOS_SCALAR_TYPE, TPL_SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, \ - LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE, ETI_SPEC_AVAIL) \ - template <> \ - struct SPADD_SYMBOLIC< \ - EXEC_SPACE_TYPE, \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const OFFSET_TYPE, const ORDINAL_TYPE, const KOKKOS_SCALAR_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - using kernelhandle_t = KokkosKernels::Experimental::KokkosKernelsHandle< \ - const OFFSET_TYPE, const ORDINAL_TYPE, const KOKKOS_SCALAR_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>; \ - using rowmap_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits >; \ - using non_const_rowmap_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits >; \ - using colidx_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits >; \ - static void spadd_symbolic(const EXEC_SPACE_TYPE& exec, \ - kernelhandle_t* handle, const ORDINAL_TYPE m, \ - const ORDINAL_TYPE n, rowmap_view_t rowmapA, \ - colidx_view_t colidxA, rowmap_view_t rowmapB, \ - colidx_view_t colidxB, \ - non_const_rowmap_view_t rowmapC) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosSparse::spadd_symbolic[TPL_CUSPARSE," + \ - Kokkos::ArithTraits::name() + "]"); \ - \ - auto addHandle = handle->get_spadd_handle(); \ - auto& cuspData = addHandle->cusparseData; \ - auto& cuspHandle = \ - KokkosKernels::Impl::CusparseSingleton::singleton().cusparseHandle; \ - \ - /* Not easy to init 'one' for cuda complex, so we don't init it. Anyway, \ - * the uninit'ed var won't affect C's pattern. \ - */ \ - TPL_SCALAR_TYPE one; \ - size_t nbytes; \ - OFFSET_TYPE nnzA = colidxA.extent(0); \ - OFFSET_TYPE nnzB = colidxB.extent(0); \ - OFFSET_TYPE nnzC = 0; \ - \ - KOKKOS_CUSPARSE_SAFE_CALL( \ - cusparseSetStream(cuspHandle, exec.cuda_stream())); \ - \ - /* https://docs.nvidia.com/cuda/cusparse/index.html#cusparsecreatematdescr \ - It sets the fields MatrixType and IndexBase to the default values \ - CUSPARSE_MATRIX_TYPE_GENERAL and CUSPARSE_INDEX_BASE_ZERO, \ - respectively, while leaving other fields uninitialized. */ \ - \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrA)); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrB)); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrC)); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparse##TOKEN##csrgeam2_bufferSizeExt( \ - cuspHandle, m, n, &one, cuspData.descrA, nnzA, NULL, rowmapA.data(), \ - colidxA.data(), &one, cuspData.descrB, nnzB, NULL, rowmapB.data(), \ - colidxB.data(), cuspData.descrC, NULL, rowmapC.data(), NULL, \ - &nbytes)); \ - cuspData.nbytes = nbytes; \ - cuspData.workspace = Kokkos::kokkos_malloc(nbytes); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseXcsrgeam2Nnz( \ - cuspHandle, m, n, cuspData.descrA, nnzA, rowmapA.data(), \ - colidxA.data(), cuspData.descrB, nnzB, rowmapB.data(), \ - colidxB.data(), cuspData.descrC, rowmapC.data(), &nnzC, \ - cuspData.workspace)); \ - addHandle->set_c_nnz(nnzC); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, NULL)); \ - \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE(TOKEN, KOKKOS_SCALAR_TYPE, TPL_SCALAR_TYPE, ORDINAL_TYPE, \ + OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct SPADD_SYMBOLIC< \ + EXEC_SPACE_TYPE, \ + KokkosKernels::Experimental::KokkosKernelsHandle, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + using kernelhandle_t = KokkosKernels::Experimental::KokkosKernelsHandle; \ + using rowmap_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits >; \ + using non_const_rowmap_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits >; \ + using colidx_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits >; \ + static void spadd_symbolic(const EXEC_SPACE_TYPE& exec, kernelhandle_t* handle, const ORDINAL_TYPE m, \ + const ORDINAL_TYPE n, rowmap_view_t rowmapA, colidx_view_t colidxA, \ + rowmap_view_t rowmapB, colidx_view_t colidxB, non_const_rowmap_view_t rowmapC) { \ + Kokkos::Profiling::pushRegion("KokkosSparse::spadd_symbolic[TPL_CUSPARSE," + \ + Kokkos::ArithTraits::name() + "]"); \ + \ + auto addHandle = handle->get_spadd_handle(); \ + auto& cuspData = addHandle->cusparseData; \ + auto& cuspHandle = KokkosKernels::Impl::CusparseSingleton::singleton().cusparseHandle; \ + \ + /* Not easy to init 'one' for cuda complex, so we don't init it. Anyway, \ + * the uninit'ed var won't affect C's pattern. \ + */ \ + TPL_SCALAR_TYPE one; \ + size_t nbytes; \ + OFFSET_TYPE nnzA = colidxA.extent(0); \ + OFFSET_TYPE nnzB = colidxB.extent(0); \ + OFFSET_TYPE nnzC = 0; \ + \ + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, exec.cuda_stream())); \ + \ + /* https://docs.nvidia.com/cuda/cusparse/index.html#cusparsecreatematdescr \ + It sets the fields MatrixType and IndexBase to the default values \ + CUSPARSE_MATRIX_TYPE_GENERAL and CUSPARSE_INDEX_BASE_ZERO, \ + respectively, while leaving other fields uninitialized. */ \ + \ + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrA)); \ + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrB)); \ + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrC)); \ + KOKKOS_CUSPARSE_SAFE_CALL(cusparse##TOKEN##csrgeam2_bufferSizeExt( \ + cuspHandle, m, n, &one, cuspData.descrA, nnzA, NULL, rowmapA.data(), colidxA.data(), &one, cuspData.descrB, \ + nnzB, NULL, rowmapB.data(), colidxB.data(), cuspData.descrC, NULL, rowmapC.data(), NULL, &nbytes)); \ + cuspData.nbytes = nbytes; \ + cuspData.workspace = Kokkos::kokkos_malloc(nbytes); \ + KOKKOS_CUSPARSE_SAFE_CALL(cusparseXcsrgeam2Nnz( \ + cuspHandle, m, n, cuspData.descrA, nnzA, rowmapA.data(), colidxA.data(), cuspData.descrB, nnzB, \ + rowmapB.data(), colidxB.data(), cuspData.descrC, rowmapC.data(), &nnzC, cuspData.workspace)); \ + addHandle->set_c_nnz(nnzC); \ + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, NULL)); \ + \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE_EXT(ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE( \ - S, float, float, int, int, Kokkos::LayoutLeft, Kokkos::Cuda, \ - Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE( \ - D, double, double, int, int, Kokkos::LayoutLeft, Kokkos::Cuda, \ - Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE( \ - C, Kokkos::complex, cuComplex, int, int, Kokkos::LayoutLeft, \ - Kokkos::Cuda, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE( \ - Z, Kokkos::complex, cuDoubleComplex, int, int, \ - Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, ETI_SPEC_AVAIL) +#define KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE_EXT(ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE(S, float, float, int, int, Kokkos::LayoutLeft, Kokkos::Cuda, \ + Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE(D, double, double, int, int, Kokkos::LayoutLeft, Kokkos::Cuda, \ + Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE(C, Kokkos::complex, cuComplex, int, int, \ + Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, \ + ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE(Z, Kokkos::complex, cuDoubleComplex, int, int, \ + Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, \ + ETI_SPEC_AVAIL) KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE_EXT(true) KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE_EXT(false) @@ -136,97 +116,74 @@ KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_CUSPARSE_EXT(false) #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE -#define KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_ROCSPARSE( \ - KOKKOS_SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE, ETI_SPEC_AVAIL) \ - template <> \ - struct SPADD_SYMBOLIC< \ - EXEC_SPACE_TYPE, \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const OFFSET_TYPE, const ORDINAL_TYPE, const KOKKOS_SCALAR_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - using kernelhandle_t = KokkosKernels::Experimental::KokkosKernelsHandle< \ - const OFFSET_TYPE, const ORDINAL_TYPE, const KOKKOS_SCALAR_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>; \ - using rowmap_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits >; \ - using non_const_rowmap_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits >; \ - using colidx_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits >; \ - static void spadd_symbolic(const EXEC_SPACE_TYPE& exec, \ - kernelhandle_t* handle, const ORDINAL_TYPE m, \ - const ORDINAL_TYPE n, rowmap_view_t rowmapA, \ - colidx_view_t colidxA, rowmap_view_t rowmapB, \ - colidx_view_t colidxB, \ - non_const_rowmap_view_t rowmapC) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosSparse::spadd_symbolic[TPL_ROCSPARSE," + \ - Kokkos::ArithTraits::name() + "]"); \ - \ - auto addHandle = handle->get_spadd_handle(); \ - auto& rocData = addHandle->rocsparseData; \ - auto& rocspHandle = KokkosKernels::Impl::RocsparseSingleton::singleton() \ - .rocsparseHandle; \ - OFFSET_TYPE nnzA = colidxA.extent(0); \ - OFFSET_TYPE nnzB = colidxB.extent(0); \ - OFFSET_TYPE nnzC = 0; \ - \ - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( \ - rocsparse_set_stream(rocspHandle, exec.hip_stream())); \ - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( \ - rocsparse_create_mat_descr(&rocData.descrA)); \ - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( \ - rocsparse_create_mat_descr(&rocData.descrB)); \ - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( \ - rocsparse_create_mat_descr(&rocData.descrC)); \ - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_csrgeam_nnz( \ - rocspHandle, m, n, rocData.descrA, nnzA, rowmapA.data(), \ - colidxA.data(), rocData.descrB, nnzB, rowmapB.data(), \ - colidxB.data(), rocData.descrC, rowmapC.data(), &nnzC)); \ - addHandle->set_c_nnz(nnzC); \ - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( \ - rocsparse_set_stream(rocspHandle, NULL)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_ROCSPARSE( \ + KOKKOS_SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE, ETI_SPEC_AVAIL) \ + template <> \ + struct SPADD_SYMBOLIC< \ + EXEC_SPACE_TYPE, \ + KokkosKernels::Experimental::KokkosKernelsHandle, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + using kernelhandle_t = KokkosKernels::Experimental::KokkosKernelsHandle; \ + using rowmap_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits >; \ + using non_const_rowmap_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits >; \ + using colidx_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits >; \ + static void spadd_symbolic(const EXEC_SPACE_TYPE& exec, kernelhandle_t* handle, const ORDINAL_TYPE m, \ + const ORDINAL_TYPE n, rowmap_view_t rowmapA, colidx_view_t colidxA, \ + rowmap_view_t rowmapB, colidx_view_t colidxB, non_const_rowmap_view_t rowmapC) { \ + Kokkos::Profiling::pushRegion("KokkosSparse::spadd_symbolic[TPL_ROCSPARSE," + \ + Kokkos::ArithTraits::name() + "]"); \ + \ + auto addHandle = handle->get_spadd_handle(); \ + auto& rocData = addHandle->rocsparseData; \ + auto& rocspHandle = KokkosKernels::Impl::RocsparseSingleton::singleton().rocsparseHandle; \ + OFFSET_TYPE nnzA = colidxA.extent(0); \ + OFFSET_TYPE nnzB = colidxB.extent(0); \ + OFFSET_TYPE nnzC = 0; \ + \ + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_stream(rocspHandle, exec.hip_stream())); \ + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_mat_descr(&rocData.descrA)); \ + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_mat_descr(&rocData.descrB)); \ + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_mat_descr(&rocData.descrC)); \ + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_csrgeam_nnz(rocspHandle, m, n, rocData.descrA, nnzA, rowmapA.data(), \ + colidxA.data(), rocData.descrB, nnzB, rowmapB.data(), \ + colidxB.data(), rocData.descrC, rowmapC.data(), &nnzC)); \ + addHandle->set_c_nnz(nnzC); \ + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_stream(rocspHandle, NULL)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_ROCSPARSE_EXT( \ - ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_ROCSPARSE( \ - float, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIP, \ - Kokkos::HIPSpace, ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_ROCSPARSE( \ - double, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIP, \ - Kokkos::HIPSpace, ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_ROCSPARSE( \ - Kokkos::complex, rocsparse_int, rocsparse_int, \ - Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, ETI_SPEC_AVAIL) \ - KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_ROCSPARSE( \ - Kokkos::complex, rocsparse_int, rocsparse_int, \ - Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, ETI_SPEC_AVAIL) +#define KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_ROCSPARSE_EXT(ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_ROCSPARSE(float, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, \ + Kokkos::HIP, Kokkos::HIPSpace, ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_ROCSPARSE(double, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, \ + Kokkos::HIP, Kokkos::HIPSpace, ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, \ + Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, \ + ETI_SPEC_AVAIL) \ + KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, \ + Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace, \ + ETI_SPEC_AVAIL) KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_ROCSPARSE_EXT(true) KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_DECL_ROCSPARSE_EXT(false) diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spadd_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spadd_tpl_spec_avail.hpp index 6d4db8731fcd..144dc65aeb29 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spadd_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spadd_tpl_spec_avail.hpp @@ -21,123 +21,91 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists // -template +template struct spadd_symbolic_tpl_spec_avail { enum : bool { value = false }; }; -template +template struct spadd_numeric_tpl_spec_avail { enum : bool { value = false }; }; -#define KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_AVAIL( \ - SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ - MEM_SPACE_TYPE) \ - template <> \ - struct spadd_symbolic_tpl_spec_avail< \ - EXEC_SPACE_TYPE, \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_AVAIL(SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct spadd_symbolic_tpl_spec_avail< \ + EXEC_SPACE_TYPE, \ + KokkosKernels::Experimental::KokkosKernelsHandle, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -#define KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_AVAIL( \ - SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ - MEM_SPACE_TYPE) \ - template <> \ - struct spadd_numeric_tpl_spec_avail< \ - EXEC_SPACE_TYPE, \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_AVAIL(SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct spadd_numeric_tpl_spec_avail< \ + EXEC_SPACE_TYPE, \ + KokkosKernels::Experimental::KokkosKernelsHandle, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; -#define KOKKOSSPARSE_SPADD_TPL_SPEC_AVAIL( \ - ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_AVAIL(float, ORDINAL_TYPE, OFFSET_TYPE, \ - LAYOUT_TYPE, EXEC_SPACE_TYPE, \ - MEM_SPACE_TYPE) \ - KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_AVAIL(double, ORDINAL_TYPE, \ - OFFSET_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_AVAIL( \ - Kokkos::complex, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_AVAIL( \ - Kokkos::complex, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_AVAIL(float, ORDINAL_TYPE, OFFSET_TYPE, \ - LAYOUT_TYPE, EXEC_SPACE_TYPE, \ - MEM_SPACE_TYPE) \ - KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_AVAIL(double, ORDINAL_TYPE, OFFSET_TYPE, \ - LAYOUT_TYPE, EXEC_SPACE_TYPE, \ - MEM_SPACE_TYPE) \ - KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_AVAIL( \ - Kokkos::complex, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_AVAIL( \ - Kokkos::complex, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) +#define KOKKOSSPARSE_SPADD_TPL_SPEC_AVAIL(ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_AVAIL(float, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ + MEM_SPACE_TYPE) \ + KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_AVAIL(double, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ + MEM_SPACE_TYPE) \ + KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_AVAIL(Kokkos::complex, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + KOKKOSSPARSE_SPADD_SYMBOLIC_TPL_SPEC_AVAIL(Kokkos::complex, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_AVAIL(float, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ + MEM_SPACE_TYPE) \ + KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_AVAIL(double, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ + MEM_SPACE_TYPE) \ + KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_AVAIL(Kokkos::complex, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + KOKKOSSPARSE_SPADD_NUMERIC_TPL_SPEC_AVAIL(Kokkos::complex, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE -KOKKOSSPARSE_SPADD_TPL_SPEC_AVAIL(int, int, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) +KOKKOSSPARSE_SPADD_TPL_SPEC_AVAIL(int, int, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE -KOKKOSSPARSE_SPADD_TPL_SPEC_AVAIL(rocsparse_int, rocsparse_int, - Kokkos::LayoutLeft, Kokkos::HIP, - Kokkos::HIPSpace) +KOKKOSSPARSE_SPADD_TPL_SPEC_AVAIL(rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) #endif } // namespace Impl diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_jacobi_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_jacobi_tpl_spec_avail.hpp index c045f21318b8..384dafc98832 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_jacobi_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_jacobi_tpl_spec_avail.hpp @@ -21,10 +21,9 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct spgemm_jacobi_tpl_spec_avail { enum : bool { value = false }; }; diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_avail.hpp index ea3edb518f83..133e4d051669 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_avail.hpp @@ -37,18 +37,15 @@ struct spgemm_noreuse_tpl_spec_avail { // But for cuSparse 10, there is only one interface // so just let KokkosSparse::spgemm call the symbolic and numeric wrappers. -#define SPGEMM_NOREUSE_AVAIL_CUSPARSE(SCALAR, MEMSPACE) \ - template <> \ - struct spgemm_noreuse_tpl_spec_avail< \ - KokkosSparse::CrsMatrix< \ - SCALAR, int, Kokkos::Device, void, int>, \ - KokkosSparse::CrsMatrix< \ - const SCALAR, const int, Kokkos::Device, \ - Kokkos::MemoryTraits, const int>, \ - KokkosSparse::CrsMatrix< \ - const SCALAR, const int, Kokkos::Device, \ - Kokkos::MemoryTraits, const int>> { \ - enum : bool { value = true }; \ +#define SPGEMM_NOREUSE_AVAIL_CUSPARSE(SCALAR, MEMSPACE) \ + template <> \ + struct spgemm_noreuse_tpl_spec_avail< \ + KokkosSparse::CrsMatrix, void, int>, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const int>, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const int>> { \ + enum : bool { value = true }; \ }; #define SPGEMM_NOREUSE_AVAIL_CUSPARSE_S(SCALAR) \ @@ -63,21 +60,15 @@ SPGEMM_NOREUSE_AVAIL_CUSPARSE_S(Kokkos::complex) #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -#define SPGEMM_NOREUSE_AVAIL_MKL(SCALAR, EXEC) \ - template <> \ - struct spgemm_noreuse_tpl_spec_avail< \ - KokkosSparse::CrsMatrix, void, \ - MKL_INT>, \ - KokkosSparse::CrsMatrix, \ - Kokkos::MemoryTraits, \ - const MKL_INT>, \ - KokkosSparse::CrsMatrix, \ - Kokkos::MemoryTraits, \ - const MKL_INT>> { \ - enum : bool { value = true }; \ +#define SPGEMM_NOREUSE_AVAIL_MKL(SCALAR, EXEC) \ + template <> \ + struct spgemm_noreuse_tpl_spec_avail< \ + KokkosSparse::CrsMatrix, void, MKL_INT>, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const MKL_INT>, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const MKL_INT>> { \ + enum : bool { value = true }; \ }; #define SPGEMM_NOREUSE_AVAIL_MKL_E(EXEC) \ diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_decl.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_decl.hpp index 1067f3924f2d..6da49b683b91 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_decl.hpp @@ -52,65 +52,52 @@ Matrix spgemm_noreuse_cusparse(const MatrixConst &A, const MatrixConst &B) { int k = B.numCols(); const auto alpha = Kokkos::ArithTraits::one(); const auto beta = Kokkos::ArithTraits::zero(); - typename Matrix::row_map_type::non_const_type row_mapC( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "C rowmap"), m + 1); + typename Matrix::row_map_type::non_const_type row_mapC(Kokkos::view_alloc(Kokkos::WithoutInitializing, "C rowmap"), + m + 1); KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_createDescr(&spgemmDescr)); KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( - &descr_A, m, n, A.graph.entries.extent(0), (void *)A.graph.row_map.data(), - (void *)A.graph.entries.data(), (void *)A.values.data(), - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - cudaScalarType)); + &descr_A, m, n, A.graph.entries.extent(0), (void *)A.graph.row_map.data(), (void *)A.graph.entries.data(), + (void *)A.values.data(), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, cudaScalarType)); KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( - &descr_B, n, k, B.graph.entries.extent(0), (void *)B.graph.row_map.data(), - (void *)B.graph.entries.data(), (void *)B.values.data(), - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - cudaScalarType)); + &descr_B, n, k, B.graph.entries.extent(0), (void *)B.graph.row_map.data(), (void *)B.graph.entries.data(), + (void *)B.values.data(), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, cudaScalarType)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseCreateCsr(&descr_C, m, k, 0, (void *)row_mapC.data(), nullptr, - nullptr, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, cudaScalarType)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&descr_C, m, k, 0, (void *)row_mapC.data(), nullptr, nullptr, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, + cudaScalarType)); //---------------------------------------------------------------------- // query workEstimation buffer size, allocate, then call again with buffer. - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation( - cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C, - cudaScalarType, alg, spgemmDescr, &bufferSize1, nullptr)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, + descr_C, cudaScalarType, alg, spgemmDescr, &bufferSize1, + nullptr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&buffer1, bufferSize1)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation( - cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C, - cudaScalarType, alg, spgemmDescr, &bufferSize1, buffer1)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, + descr_C, cudaScalarType, alg, spgemmDescr, &bufferSize1, + buffer1)); //---------------------------------------------------------------------- // query compute buffer size, allocate, then call again with buffer. - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute( - cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C, - cudaScalarType, alg, spgemmDescr, &bufferSize2, nullptr)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C, + cudaScalarType, alg, spgemmDescr, &bufferSize2, nullptr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&buffer2, bufferSize2)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute( - cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C, - cudaScalarType, alg, spgemmDescr, &bufferSize2, buffer2)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C, + cudaScalarType, alg, spgemmDescr, &bufferSize2, buffer2)); int64_t unused1, unused2, c_nnz; - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSpMatGetSize(descr_C, &unused1, &unused2, &c_nnz)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(descr_C, &unused1, &unused2, &c_nnz)); - typename Matrix::index_type entriesC( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "C entries"), c_nnz); - typename Matrix::values_type valuesC( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "C values"), c_nnz); + typename Matrix::index_type entriesC(Kokkos::view_alloc(Kokkos::WithoutInitializing, "C entries"), c_nnz); + typename Matrix::values_type valuesC(Kokkos::view_alloc(Kokkos::WithoutInitializing, "C values"), c_nnz); KOKKOS_CUSPARSE_SAFE_CALL( - cusparseCsrSetPointers(descr_C, (void *)row_mapC.data(), - (void *)entriesC.data(), (void *)valuesC.data())); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute( - cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C, - cudaScalarType, alg, spgemmDescr, &bufferSize2, buffer2)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSpGEMM_copy(cusparseHandle, op, op, &alpha, descr_A, descr_B, - &beta, descr_C, cudaScalarType, alg, spgemmDescr)); + cusparseCsrSetPointers(descr_C, (void *)row_mapC.data(), (void *)entriesC.data(), (void *)valuesC.data())); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C, + cudaScalarType, alg, spgemmDescr, &bufferSize2, buffer2)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C, + cudaScalarType, alg, spgemmDescr)); KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_A)); KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_B)); KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_C)); @@ -120,33 +107,25 @@ Matrix spgemm_noreuse_cusparse(const MatrixConst &A, const MatrixConst &B) { return Matrix("C", m, k, c_nnz, valuesC, row_mapC, entriesC); } -#define SPGEMM_NOREUSE_DECL_CUSPARSE(SCALAR, MEMSPACE, TPL_AVAIL) \ - template <> \ - struct SPGEMM_NOREUSE< \ - KokkosSparse::CrsMatrix< \ - SCALAR, int, Kokkos::Device, void, int>, \ - KokkosSparse::CrsMatrix< \ - const SCALAR, const int, Kokkos::Device, \ - Kokkos::MemoryTraits, const int>, \ - KokkosSparse::CrsMatrix< \ - const SCALAR, const int, Kokkos::Device, \ - Kokkos::MemoryTraits, const int>, \ - true, TPL_AVAIL> { \ - using Matrix = KokkosSparse::CrsMatrix< \ - SCALAR, int, Kokkos::Device, void, int>; \ - using ConstMatrix = KokkosSparse::CrsMatrix< \ - const SCALAR, const int, Kokkos::Device, \ - Kokkos::MemoryTraits, const int>; \ - static KokkosSparse::CrsMatrix< \ - SCALAR, int, Kokkos::Device, void, int> \ - spgemm_noreuse(const ConstMatrix &A, bool, const ConstMatrix &B, bool) { \ - std::string label = "KokkosSparse::spgemm_noreuse[TPL_CUSPARSE," + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - Matrix C = spgemm_noreuse_cusparse(A, B); \ - Kokkos::Profiling::popRegion(); \ - return C; \ - } \ +#define SPGEMM_NOREUSE_DECL_CUSPARSE(SCALAR, MEMSPACE, TPL_AVAIL) \ + template <> \ + struct SPGEMM_NOREUSE, void, int>, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const int>, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const int>, \ + true, TPL_AVAIL> { \ + using Matrix = KokkosSparse::CrsMatrix, void, int>; \ + using ConstMatrix = KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const int>; \ + static KokkosSparse::CrsMatrix, void, int> spgemm_noreuse( \ + const ConstMatrix &A, bool, const ConstMatrix &B, bool) { \ + std::string label = "KokkosSparse::spgemm_noreuse[TPL_CUSPARSE," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + Matrix C = spgemm_noreuse_cusparse(A, B); \ + Kokkos::Profiling::popRegion(); \ + return C; \ + } \ }; #define SPGEMM_NOREUSE_DECL_CUSPARSE_S(SCALAR, TPL_AVAIL) \ @@ -177,18 +156,15 @@ Matrix spgemm_noreuse_mkl(const MatrixConst &A, const MatrixConst &B) { auto n = A.numCols(); auto k = B.numCols(); MKLMatrix Amkl(m, n, const_cast(A.graph.row_map.data()), - const_cast(A.graph.entries.data()), - const_cast(A.values.data())); + const_cast(A.graph.entries.data()), const_cast(A.values.data())); MKLMatrix Bmkl(n, k, const_cast(B.graph.row_map.data()), - const_cast(B.graph.entries.data()), - const_cast(B.values.data())); + const_cast(B.graph.entries.data()), const_cast(B.values.data())); sparse_matrix_t C; matrix_descr generalDescr; generalDescr.type = SPARSE_MATRIX_TYPE_GENERAL; generalDescr.mode = SPARSE_FILL_MODE_FULL; generalDescr.diag = SPARSE_DIAG_NON_UNIT; - KOKKOSKERNELS_MKL_SAFE_CALL( - mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE, Amkl, Bmkl, &C)); + KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE, Amkl, Bmkl, &C)); KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_order(C)); MKLMatrix wrappedC(C); MKL_INT nrows = 0, ncols = 0; @@ -201,22 +177,16 @@ Matrix spgemm_noreuse_mkl(const MatrixConst &A, const MatrixConst &B) { "KokkosSparse::spgemm: matrix returned by MKL has incorrect " "dimensions"); MKL_INT c_nnz = rowmapRaw[m]; - Kokkos::View> - rowmapRawView(rowmapRaw, m + 1); - Kokkos::View> - entriesRawView(entriesRaw, c_nnz); - Kokkos::View> - valuesRawView(valuesRaw, c_nnz); + Kokkos::View> rowmapRawView(rowmapRaw, m + 1); + Kokkos::View> entriesRawView(entriesRaw, + c_nnz); + Kokkos::View> valuesRawView(valuesRaw, + c_nnz); - typename Matrix::row_map_type::non_const_type row_mapC( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "C rowmap"), m + 1); - typename Matrix::index_type entriesC( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "C entries"), c_nnz); - typename Matrix::values_type valuesC( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "C values"), c_nnz); + typename Matrix::row_map_type::non_const_type row_mapC(Kokkos::view_alloc(Kokkos::WithoutInitializing, "C rowmap"), + m + 1); + typename Matrix::index_type entriesC(Kokkos::view_alloc(Kokkos::WithoutInitializing, "C entries"), c_nnz); + typename Matrix::values_type valuesC(Kokkos::view_alloc(Kokkos::WithoutInitializing, "C values"), c_nnz); Kokkos::deep_copy(ExecSpace(), row_mapC, rowmapRawView); Kokkos::deep_copy(ExecSpace(), entriesC, entriesRawView); @@ -226,39 +196,26 @@ Matrix spgemm_noreuse_mkl(const MatrixConst &A, const MatrixConst &B) { return Matrix("C", m, k, c_nnz, valuesC, row_mapC, entriesC); } -#define SPGEMM_NOREUSE_DECL_MKL(SCALAR, EXEC, TPL_AVAIL) \ - template <> \ - struct SPGEMM_NOREUSE< \ - KokkosSparse::CrsMatrix, void, \ - MKL_INT>, \ - KokkosSparse::CrsMatrix, \ - Kokkos::MemoryTraits, \ - const MKL_INT>, \ - KokkosSparse::CrsMatrix, \ - Kokkos::MemoryTraits, \ - const MKL_INT>, \ - true, TPL_AVAIL> { \ - using Matrix = \ - KokkosSparse::CrsMatrix, void, \ - MKL_INT>; \ - using ConstMatrix = KokkosSparse::CrsMatrix< \ - const SCALAR, const MKL_INT, Kokkos::Device, \ - Kokkos::MemoryTraits, const MKL_INT>; \ - static KokkosSparse::CrsMatrix, \ - void, MKL_INT> \ - spgemm_noreuse(const ConstMatrix &A, bool, const ConstMatrix &B, bool) { \ - std::string label = "KokkosSparse::spgemm_noreuse[TPL_MKL," + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - Matrix C = spgemm_noreuse_mkl(A, B); \ - Kokkos::Profiling::popRegion(); \ - return C; \ - } \ +#define SPGEMM_NOREUSE_DECL_MKL(SCALAR, EXEC, TPL_AVAIL) \ + template <> \ + struct SPGEMM_NOREUSE< \ + KokkosSparse::CrsMatrix, void, MKL_INT>, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const MKL_INT>, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const MKL_INT>, \ + true, TPL_AVAIL> { \ + using Matrix = KokkosSparse::CrsMatrix, void, MKL_INT>; \ + using ConstMatrix = KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const MKL_INT>; \ + static KokkosSparse::CrsMatrix, void, MKL_INT> \ + spgemm_noreuse(const ConstMatrix &A, bool, const ConstMatrix &B, bool) { \ + std::string label = "KokkosSparse::spgemm_noreuse[TPL_MKL," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + Matrix C = spgemm_noreuse_mkl(A, B); \ + Kokkos::Profiling::popRegion(); \ + return C; \ + } \ }; #define SPGEMM_NOREUSE_DECL_MKL_SE(SCALAR, EXEC) \ diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_avail.hpp index 517e10498879..6609d77a81da 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_avail.hpp @@ -27,10 +27,8 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct spgemm_numeric_tpl_spec_avail { enum : bool { value = false }; }; @@ -42,40 +40,30 @@ struct spgemm_numeric_tpl_spec_avail { // offsets and ordinals independently as either 16, 32 or 64-bit, SpGEMM will // just fail at runtime if you don't use 32 for both. -#define SPGEMM_NUMERIC_AVAIL_CUSPARSE(SCALAR, MEMSPACE) \ - template <> \ - struct spgemm_numeric_tpl_spec_avail< \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const int, const int, const SCALAR, Kokkos::Cuda, MEMSPACE, \ - MEMSPACE>, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define SPGEMM_NUMERIC_AVAIL_CUSPARSE(SCALAR, MEMSPACE) \ + template <> \ + struct spgemm_numeric_tpl_spec_avail< \ + KokkosKernels::Experimental::KokkosKernelsHandle, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; #define SPGEMM_NUMERIC_AVAIL_CUSPARSE_S(SCALAR) \ @@ -92,40 +80,30 @@ SPGEMM_NUMERIC_AVAIL_CUSPARSE_S(Kokkos::complex) #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE -#define SPGEMM_NUMERIC_AVAIL_ROCSPARSE(SCALAR) \ - template <> \ - struct spgemm_numeric_tpl_spec_avail< \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const int, const int, const SCALAR, Kokkos::HIP, Kokkos::HIPSpace, \ - Kokkos::HIPSpace>, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define SPGEMM_NUMERIC_AVAIL_ROCSPARSE(SCALAR) \ + template <> \ + struct spgemm_numeric_tpl_spec_avail< \ + KokkosKernels::Experimental::KokkosKernelsHandle, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; SPGEMM_NUMERIC_AVAIL_ROCSPARSE(float) @@ -135,40 +113,30 @@ SPGEMM_NUMERIC_AVAIL_ROCSPARSE(Kokkos::complex) #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -#define SPGEMM_NUMERIC_AVAIL_MKL(SCALAR, EXEC) \ - template <> \ - struct spgemm_numeric_tpl_spec_avail< \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const MKL_INT, const MKL_INT, const SCALAR, EXEC, Kokkos::HostSpace, \ - Kokkos::HostSpace>, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define SPGEMM_NUMERIC_AVAIL_MKL(SCALAR, EXEC) \ + template <> \ + struct spgemm_numeric_tpl_spec_avail< \ + KokkosKernels::Experimental::KokkosKernelsHandle, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; #define SPGEMM_NUMERIC_AVAIL_MKL_E(EXEC) \ diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp index 6c87c60caf6b..5e636eea0e07 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp @@ -41,16 +41,13 @@ namespace Impl { #if (CUDA_VERSION >= 11040) // 11.4+ supports generic API with reuse (full symbolic/numeric separation) -template -void spgemm_numeric_cusparse( - KernelHandle *handle, lno_t /*m*/, lno_t /*n*/, lno_t /*k*/, - const ConstRowMapType &row_mapA, const ConstEntriesType &entriesA, - const ConstValuesType &valuesA, const ConstRowMapType &row_mapB, - const ConstEntriesType &entriesB, const ConstValuesType &valuesB, - const ConstRowMapType &row_mapC, const EntriesType &entriesC, - const ValuesType &valuesC) { +template +void spgemm_numeric_cusparse(KernelHandle *handle, lno_t /*m*/, lno_t /*n*/, lno_t /*k*/, + const ConstRowMapType &row_mapA, const ConstEntriesType &entriesA, + const ConstValuesType &valuesA, const ConstRowMapType &row_mapB, + const ConstEntriesType &entriesB, const ConstValuesType &valuesB, + const ConstRowMapType &row_mapC, const EntriesType &entriesC, const ValuesType &valuesC) { using scalar_type = typename KernelHandle::nnz_scalar_t; using size_type = typename KernelHandle::size_type; auto h = handle->get_cusparse_spgemm_handle(); @@ -60,12 +57,9 @@ void spgemm_numeric_cusparse( // to be done for them, but we must populate row_mapC to zeros if not // already done. if (!handle->are_rowptrs_computed()) { - Kokkos::View> - row_mapC_nonconst(const_cast(row_mapC.data()), - row_mapC.extent(0)); - Kokkos::deep_copy(typename KernelHandle::HandleExecSpace(), - row_mapC_nonconst, size_type(0)); + Kokkos::View> + row_mapC_nonconst(const_cast(row_mapC.data()), row_mapC.extent(0)); + Kokkos::deep_copy(typename KernelHandle::HandleExecSpace(), row_mapC_nonconst, size_type(0)); handle->set_computed_rowptrs(); } handle->set_computed_entries(); @@ -74,31 +68,26 @@ void spgemm_numeric_cusparse( } KOKKOS_CUSPARSE_SAFE_CALL( - cusparseCsrSetPointers(h->descr_A, (void *)row_mapA.data(), - (void *)entriesA.data(), (void *)valuesA.data())); + cusparseCsrSetPointers(h->descr_A, (void *)row_mapA.data(), (void *)entriesA.data(), (void *)valuesA.data())); KOKKOS_CUSPARSE_SAFE_CALL( - cusparseCsrSetPointers(h->descr_B, (void *)row_mapB.data(), - (void *)entriesB.data(), (void *)valuesB.data())); + cusparseCsrSetPointers(h->descr_B, (void *)row_mapB.data(), (void *)entriesB.data(), (void *)valuesB.data())); KOKKOS_CUSPARSE_SAFE_CALL( - cusparseCsrSetPointers(h->descr_C, (void *)row_mapC.data(), - (void *)entriesC.data(), (void *)valuesC.data())); + cusparseCsrSetPointers(h->descr_C, (void *)row_mapC.data(), (void *)entriesC.data(), (void *)valuesC.data())); if (!handle->are_entries_computed()) { if (!h->buffer5) { // If symbolic was previously called with computeRowptrs=true, then // buffer5 will have already been allocated to the correct size. Otherwise // size and allocate it here. - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy( - h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, h->descr_C, - h->alg, h->spgemmDescr, &h->bufferSize5, nullptr)); - KOKKOS_IMPL_CUDA_SAFE_CALL( - cudaMalloc((void **)&h->buffer5, h->bufferSize5)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, + h->descr_C, h->alg, h->spgemmDescr, &h->bufferSize5, nullptr)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer5, h->bufferSize5)); } - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy( - h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, h->descr_C, - h->alg, h->spgemmDescr, &h->bufferSize5, h->buffer5)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, + h->descr_C, h->alg, h->spgemmDescr, &h->bufferSize5, + h->buffer5)); handle->set_computed_rowptrs(); handle->set_computed_entries(); } @@ -111,50 +100,40 @@ void spgemm_numeric_cusparse( // handle, we save/restore the pointer mode to not interference with // others' use cusparsePointerMode_t oldPtrMode; - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseGetPointerMode(h->cusparseHandle, &oldPtrMode)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetPointerMode(h->cusparseHandle, CUSPARSE_POINTER_MODE_HOST)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_compute( - h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, &beta, - h->descr_C, h->scalarType, h->alg, h->spgemmDescr)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetPointerMode(h->cusparseHandle, oldPtrMode)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseGetPointerMode(h->cusparseHandle, &oldPtrMode)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(h->cusparseHandle, CUSPARSE_POINTER_MODE_HOST)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, + h->descr_B, &beta, h->descr_C, h->scalarType, h->alg, + h->spgemmDescr)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(h->cusparseHandle, oldPtrMode)); handle->set_call_numeric(); } #elif (CUDA_VERSION >= 11000) // 11.0-11.3 supports only the generic API, but not reuse. -template -void spgemm_numeric_cusparse( - KernelHandle *handle, lno_t /*m*/, lno_t /*n*/, lno_t /*k*/, - const ConstRowMapType &row_mapA, const ConstEntriesType &entriesA, - const ConstValuesType &valuesA, const ConstRowMapType &row_mapB, - const ConstEntriesType &entriesB, const ConstValuesType &valuesB, - const ConstRowMapType &row_mapC, const EntriesType &entriesC, - const ValuesType &valuesC) { +template +void spgemm_numeric_cusparse(KernelHandle *handle, lno_t /*m*/, lno_t /*n*/, lno_t /*k*/, + const ConstRowMapType &row_mapA, const ConstEntriesType &entriesA, + const ConstValuesType &valuesA, const ConstRowMapType &row_mapB, + const ConstEntriesType &entriesB, const ConstValuesType &valuesB, + const ConstRowMapType &row_mapC, const EntriesType &entriesC, const ValuesType &valuesC) { using scalar_type = typename KernelHandle::nnz_scalar_t; auto h = handle->get_cusparse_spgemm_handle(); KOKKOS_CUSPARSE_SAFE_CALL( - cusparseCsrSetPointers(h->descr_A, (void *)row_mapA.data(), - (void *)entriesA.data(), (void *)valuesA.data())); + cusparseCsrSetPointers(h->descr_A, (void *)row_mapA.data(), (void *)entriesA.data(), (void *)valuesA.data())); KOKKOS_CUSPARSE_SAFE_CALL( - cusparseCsrSetPointers(h->descr_B, (void *)row_mapB.data(), - (void *)entriesB.data(), (void *)valuesB.data())); + cusparseCsrSetPointers(h->descr_B, (void *)row_mapB.data(), (void *)entriesB.data(), (void *)valuesB.data())); KOKKOS_CUSPARSE_SAFE_CALL( - cusparseCsrSetPointers(h->descr_C, (void *)row_mapC.data(), - (void *)entriesC.data(), (void *)valuesC.data())); + cusparseCsrSetPointers(h->descr_C, (void *)row_mapC.data(), (void *)entriesC.data(), (void *)valuesC.data())); const auto alpha = Kokkos::ArithTraits::one(); const auto beta = Kokkos::ArithTraits::zero(); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute( - h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, &beta, - h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, h->spgemmDescr, - &h->bufferSize4, h->buffer4)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy( - h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, &beta, - h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, h->spgemmDescr)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, + &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, + h->spgemmDescr, &h->bufferSize4, h->buffer4)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, + &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, + h->spgemmDescr)); handle->set_computed_entries(); handle->set_call_numeric(); } @@ -165,25 +144,18 @@ void spgemm_numeric_cusparse( // or Z). Accepts Kokkos types (e.g. Kokkos::complex) for Scalar and // handles casting to cuSparse types internally. -#define CUSPARSE_XCSRGEMM_SPEC(KokkosType, CusparseType, Abbreviation) \ - inline cusparseStatus_t cusparseXcsrgemm( \ - cusparseHandle_t handle, cusparseOperation_t transA, \ - cusparseOperation_t transB, int m, int n, int k, \ - const cusparseMatDescr_t descrA, const int nnzA, \ - const KokkosType *csrSortedValA, const int *csrSortedRowPtrA, \ - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, \ - const int nnzB, const KokkosType *csrSortedValB, \ - const int *csrSortedRowPtrB, const int *csrSortedColIndB, \ - const cusparseMatDescr_t descrC, KokkosType *csrSortedValC, \ - const int *csrSortedRowPtrC, int *csrSortedColIndC) { \ - return cusparse##Abbreviation##csrgemm( \ - handle, transA, transB, m, n, k, descrA, nnzA, \ - reinterpret_cast(csrSortedValA), \ - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, \ - reinterpret_cast(csrSortedValB), \ - csrSortedRowPtrB, csrSortedColIndB, descrC, \ - reinterpret_cast(csrSortedValC), csrSortedRowPtrC, \ - csrSortedColIndC); \ +#define CUSPARSE_XCSRGEMM_SPEC(KokkosType, CusparseType, Abbreviation) \ + inline cusparseStatus_t cusparseXcsrgemm( \ + cusparseHandle_t handle, cusparseOperation_t transA, cusparseOperation_t transB, int m, int n, int k, \ + const cusparseMatDescr_t descrA, const int nnzA, const KokkosType *csrSortedValA, const int *csrSortedRowPtrA, \ + const int *csrSortedColIndA, const cusparseMatDescr_t descrB, const int nnzB, const KokkosType *csrSortedValB, \ + const int *csrSortedRowPtrB, const int *csrSortedColIndB, const cusparseMatDescr_t descrC, \ + KokkosType *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC) { \ + return cusparse##Abbreviation##csrgemm( \ + handle, transA, transB, m, n, k, descrA, nnzA, reinterpret_cast(csrSortedValA), \ + csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, reinterpret_cast(csrSortedValB), \ + csrSortedRowPtrB, csrSortedColIndB, descrC, reinterpret_cast(csrSortedValC), csrSortedRowPtrC, \ + csrSortedColIndC); \ } CUSPARSE_XCSRGEMM_SPEC(float, float, S) @@ -194,16 +166,13 @@ CUSPARSE_XCSRGEMM_SPEC(Kokkos::complex, cuDoubleComplex, Z) #undef CUSPARSE_XCSRGEMM_SPEC // 10.x supports the pre-generic interface. -template -void spgemm_numeric_cusparse( - KernelHandle *handle, lno_t m, lno_t n, lno_t k, - const ConstRowMapType &row_mapA, const ConstEntriesType &entriesA, - const ConstValuesType &valuesA, const ConstRowMapType &row_mapB, - const ConstEntriesType &entriesB, const ConstValuesType &valuesB, - const ConstRowMapType &row_mapC, const EntriesType &entriesC, - const ValuesType &valuesC) { +template +void spgemm_numeric_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, const ConstRowMapType &row_mapA, + const ConstEntriesType &entriesA, const ConstValuesType &valuesA, + const ConstRowMapType &row_mapB, const ConstEntriesType &entriesB, + const ConstValuesType &valuesB, const ConstRowMapType &row_mapC, + const EntriesType &entriesC, const ValuesType &valuesC) { auto h = handle->get_cusparse_spgemm_handle(); int nnzA = entriesA.extent(0); @@ -212,11 +181,9 @@ void spgemm_numeric_cusparse( // Only call numeric if C actually has entries if (handle->get_c_nnz()) { KOKKOS_CUSPARSE_SAFE_CALL(cusparseXcsrgemm( - h->cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, - CUSPARSE_OPERATION_NON_TRANSPOSE, m, k, n, h->generalDescr, nnzA, - valuesA.data(), row_mapA.data(), entriesA.data(), h->generalDescr, nnzB, - valuesB.data(), row_mapB.data(), entriesB.data(), h->generalDescr, - valuesC.data(), row_mapC.data(), entriesC.data())); + h->cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, m, k, n, h->generalDescr, + nnzA, valuesA.data(), row_mapA.data(), entriesA.data(), h->generalDescr, nnzB, valuesB.data(), row_mapB.data(), + entriesB.data(), h->generalDescr, valuesC.data(), row_mapC.data(), entriesC.data())); } handle->set_computed_entries(); handle->set_call_numeric(); @@ -224,74 +191,50 @@ void spgemm_numeric_cusparse( #endif -#define SPGEMM_NUMERIC_DECL_CUSPARSE(SCALAR, MEMSPACE, TPL_AVAIL) \ - template <> \ - struct SPGEMM_NUMERIC, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, TPL_AVAIL> { \ - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< \ - const int, const int, const SCALAR, Kokkos::Cuda, MEMSPACE, MEMSPACE>; \ - using c_int_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using int_view_t = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using c_scalar_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using scalar_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void spgemm_numeric(KernelHandle *handle, \ - typename KernelHandle::nnz_lno_t m, \ - typename KernelHandle::nnz_lno_t n, \ - typename KernelHandle::nnz_lno_t k, \ - c_int_view_t row_mapA, c_int_view_t entriesA, \ - c_scalar_view_t valuesA, bool, \ - c_int_view_t row_mapB, c_int_view_t entriesB, \ - c_scalar_view_t valuesB, bool, \ - c_int_view_t row_mapC, int_view_t entriesC, \ - scalar_view_t valuesC) { \ - std::string label = "KokkosSparse::spgemm_numeric[TPL_CUSPARSE," + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spgemm_numeric_cusparse(handle->get_spgemm_handle(), m, n, k, row_mapA, \ - entriesA, valuesA, row_mapB, entriesB, valuesB, \ - row_mapC, entriesC, valuesC); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define SPGEMM_NUMERIC_DECL_CUSPARSE(SCALAR, MEMSPACE, TPL_AVAIL) \ + template <> \ + struct SPGEMM_NUMERIC, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, TPL_AVAIL> { \ + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle; \ + using c_int_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using int_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using c_scalar_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using scalar_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void spgemm_numeric(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, \ + typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, \ + c_int_view_t row_mapA, c_int_view_t entriesA, c_scalar_view_t valuesA, bool, \ + c_int_view_t row_mapB, c_int_view_t entriesB, c_scalar_view_t valuesB, bool, \ + c_int_view_t row_mapC, int_view_t entriesC, scalar_view_t valuesC) { \ + std::string label = "KokkosSparse::spgemm_numeric[TPL_CUSPARSE," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spgemm_numeric_cusparse(handle->get_spgemm_handle(), m, n, k, row_mapA, entriesA, valuesA, row_mapB, entriesB, \ + valuesB, row_mapC, entriesC, valuesC); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #define SPGEMM_NUMERIC_DECL_CUSPARSE_S(SCALAR, TPL_AVAIL) \ @@ -312,29 +255,21 @@ SPGEMM_NUMERIC_DECL_CUSPARSE_S(Kokkos::complex, false) #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE //============================================================================= // Overload rocsparse_Xcsrgemm_numeric() over scalar types -#define ROCSPARSE_XCSRGEMM_NUMERIC_SPEC(scalar_type, TOKEN) \ - inline rocsparse_status rocsparse_Xcsrgemm_numeric( \ - rocsparse_handle handle, rocsparse_operation trans_A, \ - rocsparse_operation trans_B, rocsparse_int m, rocsparse_int n, \ - rocsparse_int k, const scalar_type *alpha, \ - const rocsparse_mat_descr descr_A, rocsparse_int nnz_A, \ - const scalar_type *csr_val_A, const rocsparse_int *csr_row_ptr_A, \ - const rocsparse_int *csr_col_ind_A, const rocsparse_mat_descr descr_B, \ - rocsparse_int nnz_B, const scalar_type *csr_val_B, \ - const rocsparse_int *csr_row_ptr_B, const rocsparse_int *csr_col_ind_B, \ - const scalar_type *beta, const rocsparse_mat_descr descr_D, \ - rocsparse_int nnz_D, const scalar_type *csr_val_D, \ - const rocsparse_int *csr_row_ptr_D, const rocsparse_int *csr_col_ind_D, \ - const rocsparse_mat_descr descr_C, rocsparse_int nnz_C, \ - scalar_type *csr_val_C, const rocsparse_int *csr_row_ptr_C, \ - const rocsparse_int *csr_col_ind_C, const rocsparse_mat_info info_C, \ - void *buffer) { \ - return rocsparse_##TOKEN##csrgemm_numeric( \ - handle, trans_A, trans_B, m, n, k, alpha, descr_A, nnz_A, csr_val_A, \ - csr_row_ptr_A, csr_col_ind_A, descr_B, nnz_B, csr_val_B, \ - csr_row_ptr_B, csr_col_ind_B, beta, descr_D, nnz_D, csr_val_D, \ - csr_row_ptr_D, csr_col_ind_D, descr_C, nnz_C, csr_val_C, \ - csr_row_ptr_C, csr_col_ind_C, info_C, buffer); \ +#define ROCSPARSE_XCSRGEMM_NUMERIC_SPEC(scalar_type, TOKEN) \ + inline rocsparse_status rocsparse_Xcsrgemm_numeric( \ + rocsparse_handle handle, rocsparse_operation trans_A, rocsparse_operation trans_B, rocsparse_int m, \ + rocsparse_int n, rocsparse_int k, const scalar_type *alpha, const rocsparse_mat_descr descr_A, \ + rocsparse_int nnz_A, const scalar_type *csr_val_A, const rocsparse_int *csr_row_ptr_A, \ + const rocsparse_int *csr_col_ind_A, const rocsparse_mat_descr descr_B, rocsparse_int nnz_B, \ + const scalar_type *csr_val_B, const rocsparse_int *csr_row_ptr_B, const rocsparse_int *csr_col_ind_B, \ + const scalar_type *beta, const rocsparse_mat_descr descr_D, rocsparse_int nnz_D, const scalar_type *csr_val_D, \ + const rocsparse_int *csr_row_ptr_D, const rocsparse_int *csr_col_ind_D, const rocsparse_mat_descr descr_C, \ + rocsparse_int nnz_C, scalar_type *csr_val_C, const rocsparse_int *csr_row_ptr_C, \ + const rocsparse_int *csr_col_ind_C, const rocsparse_mat_info info_C, void *buffer) { \ + return rocsparse_##TOKEN##csrgemm_numeric( \ + handle, trans_A, trans_B, m, n, k, alpha, descr_A, nnz_A, csr_val_A, csr_row_ptr_A, csr_col_ind_A, descr_B, \ + nnz_B, csr_val_B, csr_row_ptr_B, csr_col_ind_B, beta, descr_D, nnz_D, csr_val_D, csr_row_ptr_D, csr_col_ind_D, \ + descr_C, nnz_C, csr_val_C, csr_row_ptr_C, csr_col_ind_C, info_C, buffer); \ } ROCSPARSE_XCSRGEMM_NUMERIC_SPEC(float, s) @@ -342,26 +277,21 @@ ROCSPARSE_XCSRGEMM_NUMERIC_SPEC(double, d) ROCSPARSE_XCSRGEMM_NUMERIC_SPEC(rocsparse_float_complex, c) ROCSPARSE_XCSRGEMM_NUMERIC_SPEC(rocsparse_double_complex, z) -template < - typename KernelHandle, typename ain_row_index_view_type, - typename ain_nonzero_index_view_type, typename ain_nonzero_value_view_type, - typename bin_row_index_view_type, typename bin_nonzero_index_view_type, - typename bin_nonzero_value_view_type, typename cin_row_index_view_type, - typename cin_nonzero_index_view_type, typename cin_nonzero_value_view_type> -void spgemm_numeric_rocsparse( - KernelHandle *handle, typename KernelHandle::nnz_lno_t m, - typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, - ain_row_index_view_type rowptrA, ain_nonzero_index_view_type colidxA, - ain_nonzero_value_view_type valuesA, bin_row_index_view_type rowptrB, - bin_nonzero_index_view_type colidxB, bin_nonzero_value_view_type valuesB, - cin_row_index_view_type rowptrC, cin_nonzero_index_view_type colidxC, - cin_nonzero_value_view_type valuesC) { - using scalar_type = typename KernelHandle::nnz_scalar_t; - using rocsparse_scalar_type = - typename kokkos_to_rocsparse_type::type; - - typename KernelHandle::rocSparseSpgemmHandleType *h = - handle->get_rocsparse_spgemm_handle(); +template +void spgemm_numeric_rocsparse(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, + typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, + ain_row_index_view_type rowptrA, ain_nonzero_index_view_type colidxA, + ain_nonzero_value_view_type valuesA, bin_row_index_view_type rowptrB, + bin_nonzero_index_view_type colidxB, bin_nonzero_value_view_type valuesB, + cin_row_index_view_type rowptrC, cin_nonzero_index_view_type colidxC, + cin_nonzero_value_view_type valuesC) { + using scalar_type = typename KernelHandle::nnz_scalar_t; + using rocsparse_scalar_type = typename kokkos_to_rocsparse_type::type; + + typename KernelHandle::rocSparseSpgemmHandleType *h = handle->get_rocsparse_spgemm_handle(); const auto alpha = Kokkos::ArithTraits::one(); const auto beta = Kokkos::ArithTraits::zero(); @@ -371,108 +301,75 @@ void spgemm_numeric_rocsparse( auto nnz_B = colidxB.extent(0); auto nnz_C = colidxC.extent(0); - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_get_pointer_mode(h->rocsparseHandle, &oldPtrMode)); - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_pointer_mode( - h->rocsparseHandle, rocsparse_pointer_mode_host)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_get_pointer_mode(h->rocsparseHandle, &oldPtrMode)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_pointer_mode(h->rocsparseHandle, rocsparse_pointer_mode_host)); if (!handle->are_entries_computed()) { KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_csrgemm_symbolic( - h->rocsparseHandle, h->opA, h->opB, m, k, n, h->descr_A, nnz_A, - rowptrA.data(), colidxA.data(), h->descr_B, nnz_B, rowptrB.data(), - colidxB.data(), h->descr_D, 0, nullptr, nullptr, h->descr_C, nnz_C, - rowptrC.data(), colidxC.data(), h->info_C, h->buffer)); + h->rocsparseHandle, h->opA, h->opB, m, k, n, h->descr_A, nnz_A, rowptrA.data(), colidxA.data(), h->descr_B, + nnz_B, rowptrB.data(), colidxB.data(), h->descr_D, 0, nullptr, nullptr, h->descr_C, nnz_C, rowptrC.data(), + colidxC.data(), h->info_C, h->buffer)); handle->set_computed_entries(); } KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_Xcsrgemm_numeric( - h->rocsparseHandle, h->opA, h->opB, m, k, n, - reinterpret_cast(&alpha), h->descr_A, - nnz_A, reinterpret_cast(valuesA.data()), - rowptrA.data(), colidxA.data(), h->descr_B, nnz_B, - reinterpret_cast(valuesB.data()), - rowptrB.data(), colidxB.data(), - reinterpret_cast(&beta), h->descr_D, 0, - nullptr, nullptr, nullptr, h->descr_C, nnz_C, - reinterpret_cast(valuesC.data()), rowptrC.data(), - colidxC.data(), h->info_C, h->buffer)); + h->rocsparseHandle, h->opA, h->opB, m, k, n, reinterpret_cast(&alpha), h->descr_A, + nnz_A, reinterpret_cast(valuesA.data()), rowptrA.data(), colidxA.data(), + h->descr_B, nnz_B, reinterpret_cast(valuesB.data()), rowptrB.data(), + colidxB.data(), reinterpret_cast(&beta), h->descr_D, 0, nullptr, nullptr, nullptr, + h->descr_C, nnz_C, reinterpret_cast(valuesC.data()), rowptrC.data(), colidxC.data(), + h->info_C, h->buffer)); // Restore old pointer mode - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_set_pointer_mode(h->rocsparseHandle, oldPtrMode)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_pointer_mode(h->rocsparseHandle, oldPtrMode)); handle->set_call_numeric(); } -#define SPGEMM_NUMERIC_DECL_ROCSPARSE(SCALAR, TPL_AVAIL) \ - template <> \ - struct SPGEMM_NUMERIC< \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const int, const int, const SCALAR, Kokkos::HIP, Kokkos::HIPSpace, \ - Kokkos::HIPSpace>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, TPL_AVAIL> { \ - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< \ - const int, const int, const SCALAR, Kokkos::HIP, Kokkos::HIPSpace, \ - Kokkos::HIPSpace>; \ - using c_int_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using int_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using c_scalar_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using scalar_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void spgemm_numeric(KernelHandle *handle, \ - typename KernelHandle::nnz_lno_t m, \ - typename KernelHandle::nnz_lno_t n, \ - typename KernelHandle::nnz_lno_t k, \ - c_int_view_t row_mapA, c_int_view_t entriesA, \ - c_scalar_view_t valuesA, bool, \ - c_int_view_t row_mapB, c_int_view_t entriesB, \ - c_scalar_view_t valuesB, bool, \ - c_int_view_t row_mapC, int_view_t entriesC, \ - scalar_view_t valuesC) { \ - std::string label = "KokkosSparse::spgemm_numeric[TPL_ROCSPARSE," + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spgemm_numeric_rocsparse(handle->get_spgemm_handle(), m, n, k, row_mapA, \ - entriesA, valuesA, row_mapB, entriesB, valuesB, \ - row_mapC, entriesC, valuesC); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define SPGEMM_NUMERIC_DECL_ROCSPARSE(SCALAR, TPL_AVAIL) \ + template <> \ + struct SPGEMM_NUMERIC, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, TPL_AVAIL> { \ + using KernelHandle = \ + KokkosKernels::Experimental::KokkosKernelsHandle; \ + using c_int_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using int_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using c_scalar_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using scalar_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void spgemm_numeric(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, \ + typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, \ + c_int_view_t row_mapA, c_int_view_t entriesA, c_scalar_view_t valuesA, bool, \ + c_int_view_t row_mapB, c_int_view_t entriesB, c_scalar_view_t valuesB, bool, \ + c_int_view_t row_mapC, int_view_t entriesC, scalar_view_t valuesC) { \ + std::string label = "KokkosSparse::spgemm_numeric[TPL_ROCSPARSE," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spgemm_numeric_rocsparse(handle->get_spgemm_handle(), m, n, k, row_mapA, entriesA, valuesA, row_mapB, entriesB, \ + valuesB, row_mapC, entriesC, valuesC); \ + Kokkos::Profiling::popRegion(); \ + } \ }; SPGEMM_NUMERIC_DECL_ROCSPARSE(float, true) @@ -487,20 +384,16 @@ SPGEMM_NUMERIC_DECL_ROCSPARSE(Kokkos::complex, false) #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -template < - typename KernelHandle, typename ain_row_index_view_type, - typename ain_nonzero_index_view_type, typename ain_nonzero_value_view_type, - typename bin_row_index_view_type, typename bin_nonzero_index_view_type, - typename bin_nonzero_value_view_type, typename cin_row_index_view_type, - typename cin_nonzero_index_view_type, typename cin_nonzero_value_view_type> -void spgemm_numeric_mkl( - KernelHandle *handle, typename KernelHandle::nnz_lno_t m, - typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, - ain_row_index_view_type rowptrA, ain_nonzero_index_view_type colidxA, - ain_nonzero_value_view_type valuesA, bin_row_index_view_type rowptrB, - bin_nonzero_index_view_type colidxB, bin_nonzero_value_view_type valuesB, - cin_row_index_view_type rowptrC, cin_nonzero_index_view_type colidxC, - cin_nonzero_value_view_type valuesC) { +template +void spgemm_numeric_mkl(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, typename KernelHandle::nnz_lno_t n, + typename KernelHandle::nnz_lno_t k, ain_row_index_view_type rowptrA, + ain_nonzero_index_view_type colidxA, ain_nonzero_value_view_type valuesA, + bin_row_index_view_type rowptrB, bin_nonzero_index_view_type colidxB, + bin_nonzero_value_view_type valuesB, cin_row_index_view_type rowptrC, + cin_nonzero_index_view_type colidxC, cin_nonzero_value_view_type valuesC) { using ExecSpace = typename KernelHandle::HandleExecSpace; using index_type = typename KernelHandle::nnz_lno_t; using size_type = typename KernelHandle::size_type; @@ -512,25 +405,21 @@ void spgemm_numeric_mkl( handle->set_call_numeric(); return; } - MKLMatrix A(m, n, const_cast(rowptrA.data()), - const_cast(colidxA.data()), + MKLMatrix A(m, n, const_cast(rowptrA.data()), const_cast(colidxA.data()), const_cast(valuesA.data())); - MKLMatrix B(n, k, const_cast(rowptrB.data()), - const_cast(colidxB.data()), + MKLMatrix B(n, k, const_cast(rowptrB.data()), const_cast(colidxB.data()), const_cast(valuesB.data())); auto mklSpgemmHandle = handle->get_mkl_spgemm_handle(); matrix_descr generalDescr; generalDescr.type = SPARSE_MATRIX_TYPE_GENERAL; generalDescr.mode = SPARSE_FILL_MODE_FULL; generalDescr.diag = SPARSE_DIAG_NON_UNIT; - KOKKOSKERNELS_MKL_SAFE_CALL( - mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE, generalDescr, A, - SPARSE_OPERATION_NON_TRANSPOSE, generalDescr, B, - SPARSE_STAGE_FINALIZE_MULT_NO_VAL, &mklSpgemmHandle->C)); - KOKKOSKERNELS_MKL_SAFE_CALL( - mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE, generalDescr, A, - SPARSE_OPERATION_NON_TRANSPOSE, generalDescr, B, - SPARSE_STAGE_FINALIZE_MULT, &mklSpgemmHandle->C)); + KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE, generalDescr, A, + SPARSE_OPERATION_NON_TRANSPOSE, generalDescr, B, + SPARSE_STAGE_FINALIZE_MULT_NO_VAL, &mklSpgemmHandle->C)); + KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE, generalDescr, A, + SPARSE_OPERATION_NON_TRANSPOSE, generalDescr, B, + SPARSE_STAGE_FINALIZE_MULT, &mklSpgemmHandle->C)); KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_order(mklSpgemmHandle->C)); MKLMatrix wrappedC(mklSpgemmHandle->C); MKL_INT nrows = 0, ncols = 0; @@ -538,87 +427,60 @@ void spgemm_numeric_mkl( MKL_INT *colidxRaw = nullptr; scalar_type *valuesRaw = nullptr; wrappedC.export_data(nrows, ncols, rowptrRaw, colidxRaw, valuesRaw); - Kokkos::View> - colidxRawView(colidxRaw, c_nnz); - Kokkos::View> - valuesRawView(valuesRaw, c_nnz); + Kokkos::View> colidxRawView(colidxRaw, + c_nnz); + Kokkos::View> valuesRawView(valuesRaw, + c_nnz); Kokkos::deep_copy(ExecSpace(), colidxC, colidxRawView); Kokkos::deep_copy(ExecSpace(), valuesC, valuesRawView); handle->set_call_numeric(); handle->set_computed_entries(); } -#define SPGEMM_NUMERIC_DECL_MKL(SCALAR, EXEC, TPL_AVAIL) \ - template <> \ - struct SPGEMM_NUMERIC, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, TPL_AVAIL> { \ - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< \ - const MKL_INT, const MKL_INT, const SCALAR, EXEC, Kokkos::HostSpace, \ - Kokkos::HostSpace>; \ - using c_int_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using int_view_t = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using c_scalar_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using scalar_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void spgemm_numeric(KernelHandle *handle, \ - typename KernelHandle::nnz_lno_t m, \ - typename KernelHandle::nnz_lno_t n, \ - typename KernelHandle::nnz_lno_t k, \ - c_int_view_t row_mapA, c_int_view_t entriesA, \ - c_scalar_view_t valuesA, bool, \ - c_int_view_t row_mapB, c_int_view_t entriesB, \ - c_scalar_view_t valuesB, bool, \ - c_int_view_t row_mapC, int_view_t entriesC, \ - scalar_view_t valuesC) { \ - std::string label = "KokkosSparse::spgemm_numeric[TPL_MKL," + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spgemm_numeric_mkl(handle->get_spgemm_handle(), m, n, k, row_mapA, \ - entriesA, valuesA, row_mapB, entriesB, valuesB, \ - row_mapC, entriesC, valuesC); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define SPGEMM_NUMERIC_DECL_MKL(SCALAR, EXEC, TPL_AVAIL) \ + template <> \ + struct SPGEMM_NUMERIC, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, TPL_AVAIL> { \ + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle; \ + using c_int_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using int_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using c_scalar_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using scalar_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void spgemm_numeric(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, \ + typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, \ + c_int_view_t row_mapA, c_int_view_t entriesA, c_scalar_view_t valuesA, bool, \ + c_int_view_t row_mapB, c_int_view_t entriesB, c_scalar_view_t valuesB, bool, \ + c_int_view_t row_mapC, int_view_t entriesC, scalar_view_t valuesC) { \ + std::string label = "KokkosSparse::spgemm_numeric[TPL_MKL," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spgemm_numeric_mkl(handle->get_spgemm_handle(), m, n, k, row_mapA, entriesA, valuesA, row_mapB, entriesB, \ + valuesB, row_mapC, entriesC, valuesC); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #define SPGEMM_NUMERIC_DECL_MKL_SE(SCALAR, EXEC) \ diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_avail.hpp index 41e8802214c6..75615623a525 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_avail.hpp @@ -26,8 +26,8 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct spgemm_symbolic_tpl_spec_avail { enum : bool { value = false }; }; @@ -39,28 +39,22 @@ struct spgemm_symbolic_tpl_spec_avail { // offsets and ordinals independently as either 16, 32 or 64-bit, SpGEMM will // just fail at runtime if you don't use 32 for both. -#define SPGEMM_SYMBOLIC_AVAIL_CUSPARSE(SCALAR, MEMSPACE) \ - template <> \ - struct spgemm_symbolic_tpl_spec_avail< \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const int, const int, const SCALAR, Kokkos::Cuda, MEMSPACE, \ - MEMSPACE>, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define SPGEMM_SYMBOLIC_AVAIL_CUSPARSE(SCALAR, MEMSPACE) \ + template <> \ + struct spgemm_symbolic_tpl_spec_avail< \ + KokkosKernels::Experimental::KokkosKernelsHandle, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; #define SPGEMM_SYMBOLIC_AVAIL_CUSPARSE_S(SCALAR) \ @@ -76,28 +70,22 @@ SPGEMM_SYMBOLIC_AVAIL_CUSPARSE_S(Kokkos::complex) #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE -#define SPGEMM_SYMBOLIC_AVAIL_ROCSPARSE(SCALAR) \ - template <> \ - struct spgemm_symbolic_tpl_spec_avail< \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const int, const int, const SCALAR, Kokkos::HIP, Kokkos::HIPSpace, \ - Kokkos::HIPSpace>, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define SPGEMM_SYMBOLIC_AVAIL_ROCSPARSE(SCALAR) \ + template <> \ + struct spgemm_symbolic_tpl_spec_avail< \ + KokkosKernels::Experimental::KokkosKernelsHandle, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; SPGEMM_SYMBOLIC_AVAIL_ROCSPARSE(float) @@ -107,28 +95,22 @@ SPGEMM_SYMBOLIC_AVAIL_ROCSPARSE(Kokkos::complex) #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -#define SPGEMM_SYMBOLIC_AVAIL_MKL(SCALAR, EXEC) \ - template <> \ - struct spgemm_symbolic_tpl_spec_avail< \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const MKL_INT, const MKL_INT, const SCALAR, EXEC, Kokkos::HostSpace, \ - Kokkos::HostSpace>, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define SPGEMM_SYMBOLIC_AVAIL_MKL(SCALAR, EXEC) \ + template <> \ + struct spgemm_symbolic_tpl_spec_avail< \ + KokkosKernels::Experimental::KokkosKernelsHandle, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; #define SPGEMM_SYMBOLIC_AVAIL_MKL_E(EXEC) \ diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp index 13896faa6a76..4ac41ca80dde 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp @@ -48,14 +48,11 @@ namespace Impl { // 11.4+ supports generic API with reuse (full symbolic/numeric separation) // However, its "symbolic" (cusparseSpGEMMreuse_nnz) does not populate C's // rowptrs. -template -void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, - const ConstRowMapType &row_mapA, - const ConstEntriesType &entriesA, - const ConstRowMapType &row_mapB, - const ConstEntriesType &entriesB, - const RowMapType &row_mapC, bool computeRowptrs) { +template +void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, const ConstRowMapType &row_mapA, + const ConstEntriesType &entriesA, const ConstRowMapType &row_mapB, + const ConstEntriesType &entriesB, const RowMapType &row_mapC, bool computeRowptrs) { // Split symbolic into two sub-phases: handle/buffer setup and nnz(C), and // then rowptrs (if requested). That way, calling symbolic once with // computeRowptrs=false, and then again with computeRowptrs=true will not @@ -75,70 +72,61 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, // which however is not available in this function. So we fake it with the // entries instead. Fortunately, it seems cupsarse does not access that in // the symbolic phase. - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( - &h->descr_A, m, n, entriesA.extent(0), (void *)row_mapA.data(), - (void *)entriesA.data(), (void *)entriesA.data() /*fake*/, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - h->scalarType)); - - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( - &h->descr_B, n, k, entriesB.extent(0), (void *)row_mapB.data(), - (void *)entriesB.data(), (void *)entriesB.data() /*fake*/, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - h->scalarType)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_A, m, n, entriesA.extent(0), (void *)row_mapA.data(), + (void *)entriesA.data(), (void *)entriesA.data() /*fake*/, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, + h->scalarType)); + + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_B, n, k, entriesB.extent(0), (void *)row_mapB.data(), + (void *)entriesB.data(), (void *)entriesB.data() /*fake*/, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, + h->scalarType)); #if CUDA_VERSION >= 12020 // at some point cusparseCreateCsr started to need a non-null row-pointer // array, even if the operation that consumed the handle doesn't need to // read it. This was observed on a system with CUDA 12.2, but it may have // started earlier. - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( - &h->descr_C, m, k, 0, (void *)row_mapC.data(), nullptr, nullptr, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - h->scalarType)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, (void *)row_mapC.data(), nullptr, nullptr, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, + h->scalarType)); #else - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( - &h->descr_C, m, k, 0, nullptr, nullptr, nullptr, CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, nullptr, nullptr, nullptr, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); #endif //---------------------------------------------------------------------- // ask bufferSize1 bytes for external memory - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_workEstimation( - h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, h->descr_C, - h->alg, h->spgemmDescr, &bufferSize1, nullptr)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_workEstimation(h->cusparseHandle, h->opA, h->opB, h->descr_A, + h->descr_B, h->descr_C, h->alg, h->spgemmDescr, + &bufferSize1, nullptr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&buffer1, bufferSize1)); // inspect matrices A and B to understand the memory requirement for the // next step - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_workEstimation( - h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, h->descr_C, - h->alg, h->spgemmDescr, &bufferSize1, buffer1)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_workEstimation(h->cusparseHandle, h->opA, h->opB, h->descr_A, + h->descr_B, h->descr_C, h->alg, h->spgemmDescr, + &bufferSize1, buffer1)); //---------------------------------------------------------------------- // Compute nnz of C - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_nnz( - h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, h->descr_C, - h->alg, h->spgemmDescr, &bufferSize2, nullptr, &h->bufferSize3, nullptr, - &h->bufferSize4, nullptr)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_nnz(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, + h->descr_C, h->alg, h->spgemmDescr, &bufferSize2, nullptr, + &h->bufferSize3, nullptr, &h->bufferSize4, nullptr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&buffer2, bufferSize2)); - KOKKOS_IMPL_CUDA_SAFE_CALL( - cudaMalloc((void **)&h->buffer3, h->bufferSize3)); - KOKKOS_IMPL_CUDA_SAFE_CALL( - cudaMalloc((void **)&h->buffer4, h->bufferSize4)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer3, h->bufferSize3)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer4, h->bufferSize4)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_nnz( - h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, h->descr_C, - h->alg, h->spgemmDescr, &bufferSize2, buffer2, &h->bufferSize3, - h->buffer3, &h->bufferSize4, h->buffer4)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_nnz(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, + h->descr_C, h->alg, h->spgemmDescr, &bufferSize2, buffer2, + &h->bufferSize3, h->buffer3, &h->bufferSize4, h->buffer4)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer2)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer1)); int64_t C_nrow, C_ncol, C_nnz; - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSpMatGetSize(h->descr_C, &C_nrow, &C_ncol, &C_nnz)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(h->descr_C, &C_nrow, &C_ncol, &C_nnz)); if (C_nnz > std::numeric_limits::max()) { throw std::runtime_error("nnz of C overflowed over 32-bit int\n"); } @@ -155,26 +143,20 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, auto h = handle->get_cusparse_spgemm_handle(); // We just want rowptrs, but since C's entries/values are not yet allocated, // we must use dummy versions and then discard them. - KOKKOS_IMPL_CUDA_SAFE_CALL( - cudaMalloc((void **)&dummyEntries, C_nnz * sizeof(Ordinal))); - KOKKOS_IMPL_CUDA_SAFE_CALL( - cudaMalloc((void **)&dummyValues, C_nnz * sizeof(Scalar))); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCsrSetPointers( - h->descr_C, row_mapC.data(), dummyEntries, dummyValues)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&dummyEntries, C_nnz * sizeof(Ordinal))); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&dummyValues, C_nnz * sizeof(Scalar))); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCsrSetPointers(h->descr_C, row_mapC.data(), dummyEntries, dummyValues)); //-------------------------------------------------------------------------- - cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, - h->descr_B, h->descr_C, h->alg, h->spgemmDescr, - &h->bufferSize5, nullptr); - KOKKOS_IMPL_CUDA_SAFE_CALL( - cudaMalloc((void **)&h->buffer5, h->bufferSize5)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy( - h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, h->descr_C, - h->alg, h->spgemmDescr, &h->bufferSize5, h->buffer5)); + cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, h->descr_C, h->alg, + h->spgemmDescr, &h->bufferSize5, nullptr); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer5, h->bufferSize5)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, + h->descr_C, h->alg, h->spgemmDescr, &h->bufferSize5, + h->buffer5)); if (!handle->get_c_nnz()) { // cuSPARSE does not populate C rowptrs if C has no entries - Kokkos::deep_copy(typename KernelHandle::HandleExecSpace(), row_mapC, - Offset(0)); + Kokkos::deep_copy(typename KernelHandle::HandleExecSpace(), row_mapC, Offset(0)); } KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(dummyValues)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(dummyEntries)); @@ -184,14 +166,11 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, #elif (CUDA_VERSION >= 11000) // 11.0-11.3 supports only the generic API, but not reuse. -template -void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, - const ConstRowMapType &row_mapA, - const ConstEntriesType &entriesA, - const ConstRowMapType &row_mapB, - const ConstEntriesType &entriesB, - const RowMapType &row_mapC, bool computeRowptrs) { +template +void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, const ConstRowMapType &row_mapA, + const ConstEntriesType &entriesA, const ConstRowMapType &row_mapB, + const ConstEntriesType &entriesB, const RowMapType &row_mapC, bool computeRowptrs) { using scalar_type = typename KernelHandle::nnz_scalar_t; using ordinal_type = typename KernelHandle::nnz_lno_t; const auto alpha = Kokkos::ArithTraits::one(); @@ -212,54 +191,43 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, // // The dummy values can be uninitialized. cusparseSpGEMM_compute does // not remove numerical zeros from the sparsity pattern. - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc( - &dummyValues_AB, sizeof(scalar_type) * - std::max(entriesA.extent(0), entriesB.extent(0)))); + KOKKOS_IMPL_CUDA_SAFE_CALL( + cudaMalloc(&dummyValues_AB, sizeof(scalar_type) * std::max(entriesA.extent(0), entriesB.extent(0)))); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( - &h->descr_A, m, n, entriesA.extent(0), (void *)row_mapA.data(), - (void *)entriesA.data(), dummyValues_AB, CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_A, m, n, entriesA.extent(0), (void *)row_mapA.data(), + (void *)entriesA.data(), dummyValues_AB, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( - &h->descr_B, n, k, entriesB.extent(0), (void *)row_mapB.data(), - (void *)entriesB.data(), dummyValues_AB, CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_B, n, k, entriesB.extent(0), (void *)row_mapB.data(), + (void *)entriesB.data(), dummyValues_AB, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseCreateCsr(&h->descr_C, m, k, 0, row_mapC.data(), nullptr, - nullptr, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, row_mapC.data(), nullptr, nullptr, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, + h->scalarType)); //---------------------------------------------------------------------- // query workEstimation buffer size, allocate, then call again with buffer. - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation( - h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, - &beta, h->descr_C, h->scalarType, h->alg, h->spgemmDescr, - &h->bufferSize3, nullptr)); - KOKKOS_IMPL_CUDA_SAFE_CALL( - cudaMalloc((void **)&h->buffer3, h->bufferSize3)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation( - h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, - &beta, h->descr_C, h->scalarType, h->alg, h->spgemmDescr, - &h->bufferSize3, h->buffer3)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, + h->descr_B, &beta, h->descr_C, h->scalarType, h->alg, + h->spgemmDescr, &h->bufferSize3, nullptr)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer3, h->bufferSize3)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, + h->descr_B, &beta, h->descr_C, h->scalarType, h->alg, + h->spgemmDescr, &h->bufferSize3, h->buffer3)); //---------------------------------------------------------------------- // query compute buffer size, allocate, then call again with buffer. - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute( - h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, - &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, - h->spgemmDescr, &h->bufferSize4, nullptr)); - KOKKOS_IMPL_CUDA_SAFE_CALL( - cudaMalloc((void **)&h->buffer4, h->bufferSize4)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute( - h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, - &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, - h->spgemmDescr, &h->bufferSize4, h->buffer4)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, + &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, + h->spgemmDescr, &h->bufferSize4, nullptr)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer4, h->bufferSize4)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, + &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, + h->spgemmDescr, &h->bufferSize4, h->buffer4)); int64_t C_nrow, C_ncol, C_nnz; - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSpMatGetSize(h->descr_C, &C_nrow, &C_ncol, &C_nnz)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(h->descr_C, &C_nrow, &C_ncol, &C_nnz)); if (C_nnz > std::numeric_limits::max()) { throw std::runtime_error("nnz of C overflowed over 32-bit int\n"); } @@ -275,28 +243,22 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, // This is not the first call to symbolic, so dummyValues_AB was not // allocated above. But, descr_A and descr_B will have been saved in the // handle, so we can reuse those. - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc( - &dummyValues_AB, sizeof(scalar_type) * std::max(entriesA.extent(0), - entriesB.extent(0)))); + KOKKOS_IMPL_CUDA_SAFE_CALL( + cudaMalloc(&dummyValues_AB, sizeof(scalar_type) * std::max(entriesA.extent(0), entriesB.extent(0)))); KOKKOS_CUSPARSE_SAFE_CALL( - cusparseCsrSetPointers(h->descr_A, (void *)row_mapA.data(), - (void *)entriesA.data(), dummyValues_AB)); + cusparseCsrSetPointers(h->descr_A, (void *)row_mapA.data(), (void *)entriesA.data(), dummyValues_AB)); KOKKOS_CUSPARSE_SAFE_CALL( - cusparseCsrSetPointers(h->descr_B, (void *)row_mapB.data(), - (void *)entriesB.data(), dummyValues_AB)); + cusparseCsrSetPointers(h->descr_B, (void *)row_mapB.data(), (void *)entriesB.data(), dummyValues_AB)); } void *dummyEntries_C, *dummyValues_C; - KOKKOS_IMPL_CUDA_SAFE_CALL( - cudaMalloc(&dummyEntries_C, sizeof(ordinal_type) * C_nnz)); - KOKKOS_IMPL_CUDA_SAFE_CALL( - cudaMalloc(&dummyValues_C, sizeof(scalar_type) * C_nnz)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCsrSetPointers( - h->descr_C, (void *)row_mapC.data(), dummyEntries_C, dummyValues_C)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&dummyEntries_C, sizeof(ordinal_type) * C_nnz)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&dummyValues_C, sizeof(scalar_type) * C_nnz)); + KOKKOS_CUSPARSE_SAFE_CALL( + cusparseCsrSetPointers(h->descr_C, (void *)row_mapC.data(), dummyEntries_C, dummyValues_C)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy( - h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, - &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, - h->spgemmDescr)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, + &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, + h->spgemmDescr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(dummyValues_C)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(dummyEntries_C)); @@ -308,15 +270,11 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, #else // 10.x supports the pre-generic interface (cusparseXcsrgemmNnz). It always // populates C rowptrs. -template -void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, - const ConstRowMapType &row_mapA, - const ConstEntriesType &entriesA, - const ConstRowMapType &row_mapB, - const ConstEntriesType &entriesB, - const RowMapType &row_mapC, - bool /* computeRowptrs */) { +template +void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, const ConstRowMapType &row_mapA, + const ConstEntriesType &entriesA, const ConstRowMapType &row_mapB, + const ConstEntriesType &entriesB, const RowMapType &row_mapC, bool /* computeRowptrs */) { // using scalar_type = typename KernelHandle::nnz_scalar_t; using size_type = typename KernelHandle::size_type; if (handle->are_rowptrs_computed()) return; @@ -330,25 +288,19 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, // In empty (zero entries) matrix case, cusparse does not populate rowptrs to // zeros - if (m == 0 || n == 0 || k == 0 || entriesA.extent(0) == size_type(0) || - entriesB.extent(0) == size_type(0)) { - Kokkos::deep_copy(typename KernelHandle::HandleExecSpace(), row_mapC, - size_type(0)); + if (m == 0 || n == 0 || k == 0 || entriesA.extent(0) == size_type(0) || entriesB.extent(0) == size_type(0)) { + Kokkos::deep_copy(typename KernelHandle::HandleExecSpace(), row_mapC, size_type(0)); nnzC = 0; } else { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseXcsrgemmNnz( - h->cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, - CUSPARSE_OPERATION_NON_TRANSPOSE, m, k, n, h->generalDescr, nnzA, - row_mapA.data(), entriesA.data(), h->generalDescr, nnzB, - row_mapB.data(), entriesB.data(), h->generalDescr, row_mapC.data(), - nnzTotalDevHostPtr)); + KOKKOS_CUSPARSE_SAFE_CALL( + cusparseXcsrgemmNnz(h->cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, m, k, + n, h->generalDescr, nnzA, row_mapA.data(), entriesA.data(), h->generalDescr, nnzB, + row_mapB.data(), entriesB.data(), h->generalDescr, row_mapC.data(), nnzTotalDevHostPtr)); if (nullptr != nnzTotalDevHostPtr) { nnzC = *nnzTotalDevHostPtr; } else { - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemcpy( - &nnzC, row_mapC.data() + m, sizeof(int), cudaMemcpyDeviceToHost)); - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemcpy( - &baseC, row_mapC.data(), sizeof(int), cudaMemcpyDeviceToHost)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemcpy(&nnzC, row_mapC.data() + m, sizeof(int), cudaMemcpyDeviceToHost)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemcpy(&baseC, row_mapC.data(), sizeof(int), cudaMemcpyDeviceToHost)); nnzC -= baseC; } } @@ -359,53 +311,37 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, #endif -#define SPGEMM_SYMBOLIC_DECL_CUSPARSE(SCALAR, MEMSPACE, TPL_AVAIL) \ - template <> \ - struct SPGEMM_SYMBOLIC< \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const int, const int, const SCALAR, Kokkos::Cuda, MEMSPACE, \ - MEMSPACE>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, TPL_AVAIL> { \ - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< \ - const int, const int, const SCALAR, Kokkos::Cuda, MEMSPACE, MEMSPACE>; \ - using c_int_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using int_view_t = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void spgemm_symbolic(KernelHandle *handle, \ - typename KernelHandle::nnz_lno_t m, \ - typename KernelHandle::nnz_lno_t n, \ - typename KernelHandle::nnz_lno_t k, \ - c_int_view_t row_mapA, c_int_view_t entriesA, \ - bool, c_int_view_t row_mapB, \ - c_int_view_t entriesB, bool, \ - int_view_t row_mapC, bool computeRowptrs) { \ - std::string label = "KokkosSparse::spgemm[TPL_CUSPARSE," + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spgemm_symbolic_cusparse(handle->get_spgemm_handle(), m, n, k, row_mapA, \ - entriesA, row_mapB, entriesB, row_mapC, \ - computeRowptrs); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define SPGEMM_SYMBOLIC_DECL_CUSPARSE(SCALAR, MEMSPACE, TPL_AVAIL) \ + template <> \ + struct SPGEMM_SYMBOLIC, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, TPL_AVAIL> { \ + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle; \ + using c_int_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using int_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void spgemm_symbolic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, \ + typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, \ + c_int_view_t row_mapA, c_int_view_t entriesA, bool, c_int_view_t row_mapB, \ + c_int_view_t entriesB, bool, int_view_t row_mapC, bool computeRowptrs) { \ + std::string label = "KokkosSparse::spgemm_symbolic[TPL_CUSPARSE," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spgemm_symbolic_cusparse(handle->get_spgemm_handle(), m, n, k, row_mapA, entriesA, row_mapB, entriesB, row_mapC, \ + computeRowptrs); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #define SPGEMM_SYMBOLIC_DECL_CUSPARSE_S(SCALAR, TPL_AVAIL) \ @@ -427,24 +363,18 @@ SPGEMM_SYMBOLIC_DECL_CUSPARSE_S(Kokkos::complex, false) #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE //============================================================================= // Overload rocsparse_Xcsrgemm_buffer_size() over scalar types -#define ROCSPARSE_XCSRGEMM_BUFFER_SIZE_SPEC(scalar_type, TOKEN) \ - inline rocsparse_status rocsparse_Xcsrgemm_buffer_size( \ - rocsparse_handle handle, rocsparse_operation trans_A, \ - rocsparse_operation trans_B, rocsparse_int m, rocsparse_int n, \ - rocsparse_int k, const scalar_type *alpha, \ - const rocsparse_mat_descr descr_A, rocsparse_int nnz_A, \ - const rocsparse_int *csr_row_ptr_A, const rocsparse_int *csr_col_ind_A, \ - const rocsparse_mat_descr descr_B, rocsparse_int nnz_B, \ - const rocsparse_int *csr_row_ptr_B, const rocsparse_int *csr_col_ind_B, \ - const scalar_type *beta, const rocsparse_mat_descr descr_D, \ - rocsparse_int nnz_D, const rocsparse_int *csr_row_ptr_D, \ - const rocsparse_int *csr_col_ind_D, rocsparse_mat_info info_C, \ - size_t *buffer_size) { \ - return rocsparse_##TOKEN##csrgemm_buffer_size( \ - handle, trans_A, trans_B, m, n, k, alpha, descr_A, nnz_A, \ - csr_row_ptr_A, csr_col_ind_A, descr_B, nnz_B, csr_row_ptr_B, \ - csr_col_ind_B, beta, descr_D, nnz_D, csr_row_ptr_D, csr_col_ind_D, \ - info_C, buffer_size); \ +#define ROCSPARSE_XCSRGEMM_BUFFER_SIZE_SPEC(scalar_type, TOKEN) \ + inline rocsparse_status rocsparse_Xcsrgemm_buffer_size( \ + rocsparse_handle handle, rocsparse_operation trans_A, rocsparse_operation trans_B, rocsparse_int m, \ + rocsparse_int n, rocsparse_int k, const scalar_type *alpha, const rocsparse_mat_descr descr_A, \ + rocsparse_int nnz_A, const rocsparse_int *csr_row_ptr_A, const rocsparse_int *csr_col_ind_A, \ + const rocsparse_mat_descr descr_B, rocsparse_int nnz_B, const rocsparse_int *csr_row_ptr_B, \ + const rocsparse_int *csr_col_ind_B, const scalar_type *beta, const rocsparse_mat_descr descr_D, \ + rocsparse_int nnz_D, const rocsparse_int *csr_row_ptr_D, const rocsparse_int *csr_col_ind_D, \ + rocsparse_mat_info info_C, size_t *buffer_size) { \ + return rocsparse_##TOKEN##csrgemm_buffer_size( \ + handle, trans_A, trans_B, m, n, k, alpha, descr_A, nnz_A, csr_row_ptr_A, csr_col_ind_A, descr_B, nnz_B, \ + csr_row_ptr_B, csr_col_ind_B, beta, descr_D, nnz_D, csr_row_ptr_D, csr_col_ind_D, info_C, buffer_size); \ } ROCSPARSE_XCSRGEMM_BUFFER_SIZE_SPEC(float, s) @@ -452,20 +382,16 @@ ROCSPARSE_XCSRGEMM_BUFFER_SIZE_SPEC(double, d) ROCSPARSE_XCSRGEMM_BUFFER_SIZE_SPEC(rocsparse_float_complex, c) ROCSPARSE_XCSRGEMM_BUFFER_SIZE_SPEC(rocsparse_double_complex, z) -template < - typename KernelHandle, typename ain_row_index_view_type, - typename ain_nonzero_index_view_type, typename bin_row_index_view_type, - typename bin_nonzero_index_view_type, typename cin_row_index_view_type> -void spgemm_symbolic_rocsparse( - KernelHandle *handle, typename KernelHandle::nnz_lno_t m, - typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, - ain_row_index_view_type rowptrA, ain_nonzero_index_view_type colidxA, - bin_row_index_view_type rowptrB, bin_nonzero_index_view_type colidxB, - cin_row_index_view_type rowptrC) { - using index_type = typename KernelHandle::nnz_lno_t; - using scalar_type = typename KernelHandle::nnz_scalar_t; - using rocsparse_scalar_type = - typename kokkos_to_rocsparse_type::type; +template +void spgemm_symbolic_rocsparse(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, + typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, + ain_row_index_view_type rowptrA, ain_nonzero_index_view_type colidxA, + bin_row_index_view_type rowptrB, bin_nonzero_index_view_type colidxB, + cin_row_index_view_type rowptrC) { + using index_type = typename KernelHandle::nnz_lno_t; + using scalar_type = typename KernelHandle::nnz_scalar_t; + using rocsparse_scalar_type = typename kokkos_to_rocsparse_type::type; auto nnz_A = colidxA.extent(0); auto nnz_B = colidxB.extent(0); @@ -474,8 +400,7 @@ void spgemm_symbolic_rocsparse( return; } handle->create_rocsparse_spgemm_handle(false, false); - typename KernelHandle::rocSparseSpgemmHandleType *h = - handle->get_rocsparse_spgemm_handle(); + typename KernelHandle::rocSparseSpgemmHandleType *h = handle->get_rocsparse_spgemm_handle(); // alpha, beta are on host, but since we use singleton on the rocsparse // handle, we save/restore the pointer mode to not interference with @@ -484,90 +409,67 @@ void spgemm_symbolic_rocsparse( const auto beta = Kokkos::ArithTraits::zero(); rocsparse_pointer_mode oldPtrMode; - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_get_pointer_mode(h->rocsparseHandle, &oldPtrMode)); - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_pointer_mode( - h->rocsparseHandle, rocsparse_pointer_mode_host)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_get_pointer_mode(h->rocsparseHandle, &oldPtrMode)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_pointer_mode(h->rocsparseHandle, rocsparse_pointer_mode_host)); // C = alpha * OpA(A) * OpB(B) + beta * D KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_Xcsrgemm_buffer_size( - h->rocsparseHandle, h->opA, h->opB, m, k, n, - reinterpret_cast(&alpha), h->descr_A, - nnz_A, rowptrA.data(), colidxA.data(), h->descr_B, nnz_B, rowptrB.data(), - colidxB.data(), reinterpret_cast(&beta), - h->descr_D, 0, nullptr, nullptr, h->info_C, &h->bufferSize)); + h->rocsparseHandle, h->opA, h->opB, m, k, n, reinterpret_cast(&alpha), h->descr_A, + nnz_A, rowptrA.data(), colidxA.data(), h->descr_B, nnz_B, rowptrB.data(), colidxB.data(), + reinterpret_cast(&beta), h->descr_D, 0, nullptr, nullptr, h->info_C, + &h->bufferSize)); KOKKOS_IMPL_HIP_SAFE_CALL(hipMalloc(&h->buffer, h->bufferSize)); rocsparse_int nnz_C = 0; - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_csrgemm_nnz( - h->rocsparseHandle, h->opA, h->opB, m, k, n, h->descr_A, nnz_A, - rowptrA.data(), colidxA.data(), h->descr_B, nnz_B, rowptrB.data(), - colidxB.data(), h->descr_D, 0, nullptr, nullptr, h->descr_C, - rowptrC.data(), &nnz_C, h->info_C, h->buffer)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_csrgemm_nnz(h->rocsparseHandle, h->opA, h->opB, m, k, n, h->descr_A, nnz_A, + rowptrA.data(), colidxA.data(), h->descr_B, nnz_B, + rowptrB.data(), colidxB.data(), h->descr_D, 0, nullptr, nullptr, + h->descr_C, rowptrC.data(), &nnz_C, h->info_C, h->buffer)); // If C has zero rows, its rowptrs are not populated if (m == 0) { - KOKKOS_IMPL_HIP_SAFE_CALL( - hipMemset(rowptrC.data(), 0, rowptrC.extent(0) * sizeof(index_type))); + KOKKOS_IMPL_HIP_SAFE_CALL(hipMemset(rowptrC.data(), 0, rowptrC.extent(0) * sizeof(index_type))); } // Restore previous pointer mode - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_set_pointer_mode(h->rocsparseHandle, oldPtrMode)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_set_pointer_mode(h->rocsparseHandle, oldPtrMode)); handle->set_c_nnz(nnz_C); handle->set_call_symbolic(); handle->set_computed_rowptrs(); } -#define SPGEMM_SYMBOLIC_DECL_ROCSPARSE(SCALAR, TPL_AVAIL) \ - template <> \ - struct SPGEMM_SYMBOLIC< \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const int, const int, const SCALAR, Kokkos::HIP, Kokkos::HIPSpace, \ - Kokkos::HIPSpace>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, TPL_AVAIL> { \ - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< \ - const int, const int, const SCALAR, Kokkos::HIP, Kokkos::HIPSpace, \ - Kokkos::HIPSpace>; \ - using c_int_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using int_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void spgemm_symbolic(KernelHandle *handle, \ - typename KernelHandle::nnz_lno_t m, \ - typename KernelHandle::nnz_lno_t n, \ - typename KernelHandle::nnz_lno_t k, \ - c_int_view_t row_mapA, c_int_view_t entriesA, \ - bool, c_int_view_t row_mapB, \ - c_int_view_t entriesB, bool, \ - int_view_t row_mapC, bool) { \ - std::string label = "KokkosSparse::spgemm[TPL_ROCSPARSE," + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spgemm_symbolic_rocsparse(handle->get_spgemm_handle(), m, n, k, \ - row_mapA, entriesA, row_mapB, entriesB, \ - row_mapC); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define SPGEMM_SYMBOLIC_DECL_ROCSPARSE(SCALAR, TPL_AVAIL) \ + template <> \ + struct SPGEMM_SYMBOLIC, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, TPL_AVAIL> { \ + using KernelHandle = \ + KokkosKernels::Experimental::KokkosKernelsHandle; \ + using c_int_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using int_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void spgemm_symbolic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, \ + typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, \ + c_int_view_t row_mapA, c_int_view_t entriesA, bool, c_int_view_t row_mapB, \ + c_int_view_t entriesB, bool, int_view_t row_mapC, bool) { \ + std::string label = "KokkosSparse::spgemm_symbolic[TPL_ROCSPARSE," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spgemm_symbolic_rocsparse(handle->get_spgemm_handle(), m, n, k, row_mapA, entriesA, row_mapB, entriesB, \ + row_mapC); \ + Kokkos::Profiling::popRegion(); \ + } \ }; SPGEMM_SYMBOLIC_DECL_ROCSPARSE(float, false) @@ -582,51 +484,42 @@ SPGEMM_SYMBOLIC_DECL_ROCSPARSE(Kokkos::complex, true) #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -template < - typename KernelHandle, typename ain_row_index_view_type, - typename ain_nonzero_index_view_type, typename bin_row_index_view_type, - typename bin_nonzero_index_view_type, typename cin_row_index_view_type> -void spgemm_symbolic_mkl( - KernelHandle *handle, typename KernelHandle::nnz_lno_t m, - typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, - ain_row_index_view_type rowptrA, ain_nonzero_index_view_type colidxA, - bin_row_index_view_type rowptrB, bin_nonzero_index_view_type colidxB, - cin_row_index_view_type rowptrC) { +template +void spgemm_symbolic_mkl(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, typename KernelHandle::nnz_lno_t n, + typename KernelHandle::nnz_lno_t k, ain_row_index_view_type rowptrA, + ain_nonzero_index_view_type colidxA, bin_row_index_view_type rowptrB, + bin_nonzero_index_view_type colidxB, cin_row_index_view_type rowptrC) { using ExecSpace = typename KernelHandle::HandleExecSpace; using index_type = typename KernelHandle::nnz_lno_t; using size_type = typename KernelHandle::size_type; using scalar_type = typename KernelHandle::nnz_scalar_t; using MKLMatrix = MKLSparseMatrix; - if (m == 0 || n == 0 || k == 0 || colidxA.extent(0) == size_type(0) || - colidxB.extent(0) == size_type(0)) { + if (m == 0 || n == 0 || k == 0 || colidxA.extent(0) == size_type(0) || colidxB.extent(0) == size_type(0)) { Kokkos::deep_copy(ExecSpace(), rowptrC, size_type(0)); handle->set_call_symbolic(); handle->set_computed_rowptrs(); handle->set_c_nnz(0); return; } - MKLMatrix A(m, n, (MKL_INT *)rowptrA.data(), (MKL_INT *)colidxA.data(), - nullptr); - MKLMatrix B(n, k, (MKL_INT *)rowptrB.data(), (MKL_INT *)colidxB.data(), - nullptr); + MKLMatrix A(m, n, (MKL_INT *)rowptrA.data(), (MKL_INT *)colidxA.data(), nullptr); + MKLMatrix B(n, k, (MKL_INT *)rowptrB.data(), (MKL_INT *)colidxB.data(), nullptr); sparse_matrix_t C; matrix_descr generalDescr; generalDescr.type = SPARSE_MATRIX_TYPE_GENERAL; generalDescr.mode = SPARSE_FILL_MODE_FULL; generalDescr.diag = SPARSE_DIAG_NON_UNIT; - KOKKOSKERNELS_MKL_SAFE_CALL( - mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE, generalDescr, A, - SPARSE_OPERATION_NON_TRANSPOSE, generalDescr, B, - SPARSE_STAGE_NNZ_COUNT, &C)); + KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE, generalDescr, A, + SPARSE_OPERATION_NON_TRANSPOSE, generalDescr, B, SPARSE_STAGE_NNZ_COUNT, + &C)); MKLMatrix wrappedC(C); MKL_INT nrows = 0, ncols = 0; MKL_INT *rowptrRaw = nullptr; MKL_INT *colidxRaw = nullptr; scalar_type *valuesRaw = nullptr; wrappedC.export_data(nrows, ncols, rowptrRaw, colidxRaw, valuesRaw); - Kokkos::View> - rowptrRawView(rowptrRaw, nrows + 1); + Kokkos::View> rowptrRawView(rowptrRaw, + nrows + 1); Kokkos::deep_copy(ExecSpace(), rowptrC, rowptrRawView); handle->create_mkl_spgemm_handle(C); handle->set_call_symbolic(); @@ -634,53 +527,36 @@ void spgemm_symbolic_mkl( handle->set_c_nnz(rowptrC(m)); } -#define SPGEMM_SYMBOLIC_DECL_MKL(SCALAR, EXEC, TPL_AVAIL) \ - template <> \ - struct SPGEMM_SYMBOLIC< \ - KokkosKernels::Experimental::KokkosKernelsHandle< \ - const MKL_INT, const MKL_INT, const SCALAR, EXEC, Kokkos::HostSpace, \ - Kokkos::HostSpace>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, TPL_AVAIL> { \ - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< \ - const MKL_INT, const MKL_INT, const SCALAR, EXEC, Kokkos::HostSpace, \ - Kokkos::HostSpace>; \ - using c_int_view_t = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using int_view_t = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void spgemm_symbolic(KernelHandle *handle, \ - typename KernelHandle::nnz_lno_t m, \ - typename KernelHandle::nnz_lno_t n, \ - typename KernelHandle::nnz_lno_t k, \ - c_int_view_t row_mapA, c_int_view_t entriesA, \ - bool, c_int_view_t row_mapB, \ - c_int_view_t entriesB, bool, \ - int_view_t row_mapC, bool) { \ - std::string label = "KokkosSparse::spgemm_symbolic[TPL_MKL," + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spgemm_symbolic_mkl(handle->get_spgemm_handle(), m, n, k, row_mapA, \ - entriesA, row_mapB, entriesB, row_mapC); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define SPGEMM_SYMBOLIC_DECL_MKL(SCALAR, EXEC, TPL_AVAIL) \ + template <> \ + struct SPGEMM_SYMBOLIC, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, TPL_AVAIL> { \ + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle; \ + using c_int_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using int_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void spgemm_symbolic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, \ + typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, \ + c_int_view_t row_mapA, c_int_view_t entriesA, bool, c_int_view_t row_mapB, \ + c_int_view_t entriesB, bool, int_view_t row_mapC, bool) { \ + std::string label = "KokkosSparse::spgemm_symbolic[TPL_MKL," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spgemm_symbolic_mkl(handle->get_spgemm_handle(), m, n, k, row_mapA, entriesA, row_mapB, entriesB, row_mapC); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #define SPGEMM_SYMBOLIC_DECL_MKL_SE(SCALAR, EXEC) \ diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spiluk_numeric_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spiluk_numeric_tpl_spec_avail.hpp index 87a4b9f001c3..8327f333ab81 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spiluk_numeric_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spiluk_numeric_tpl_spec_avail.hpp @@ -20,10 +20,9 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct spiluk_numeric_tpl_spec_avail { enum : bool { value = false }; }; diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spiluk_symbolic_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spiluk_symbolic_tpl_spec_avail.hpp index 4730ec2ffd0d..76bf7df6903f 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spiluk_symbolic_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spiluk_symbolic_tpl_spec_avail.hpp @@ -20,9 +20,8 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct spiluk_symbolic_tpl_spec_avail { enum : bool { value = false }; }; diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_avail.hpp index 16bf1abecfb8..c6e0e6ff21d7 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_avail.hpp @@ -24,8 +24,7 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct spmv_bsrmatrix_tpl_spec_avail { enum : bool { value = false }; }; @@ -36,144 +35,90 @@ struct spmv_bsrmatrix_tpl_spec_avail { // These versions of cuSPARSE require the ordinal and offset types to be the // same. For KokkosKernels, this means int/int only. -#define KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE( \ - SCALAR, ORDINAL, OFFSET, XL, YL, MEMSPACE) \ - template <> \ - struct spmv_bsrmatrix_tpl_spec_avail< \ - Kokkos::Cuda, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - ::KokkosSparse::Experimental::BsrMatrix< \ - const SCALAR, const ORDINAL, Kokkos::Device, \ - Kokkos::MemoryTraits, const OFFSET>, \ - Kokkos::View< \ - const SCALAR*, XL, Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(SCALAR, ORDINAL, OFFSET, XL, YL, MEMSPACE) \ + template <> \ + struct spmv_bsrmatrix_tpl_spec_avail< \ + Kokkos::Cuda, KokkosSparse::Impl::SPMVHandleImpl, \ + ::KokkosSparse::Experimental::BsrMatrix, \ + Kokkos::MemoryTraits, const OFFSET>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; #if (9000 <= CUDA_VERSION) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, - int, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, - int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, - int, Kokkos::LayoutRight, - Kokkos::LayoutRight, - Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, - int, int, - Kokkos::LayoutRight, - Kokkos::LayoutRight, - Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, - int, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, - int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, - int, Kokkos::LayoutRight, - Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, - int, int, - Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::CudaUVMSpace) #endif // CUDA/CUSPARSE >= 9.0? #endif // KOKKOSKERNELS_ENABLE_TPL_CUSPARSE #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -#define KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(SCALAR, EXECSPACE) \ - template <> \ - struct spmv_bsrmatrix_tpl_spec_avail< \ - EXECSPACE, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - ::KokkosSparse::Experimental::BsrMatrix< \ - const SCALAR, const MKL_INT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, const MKL_INT>, \ - Kokkos::View< \ - const SCALAR*, Kokkos::LayoutLeft, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(SCALAR, EXECSPACE) \ + template <> \ + struct spmv_bsrmatrix_tpl_spec_avail< \ + EXECSPACE, KokkosSparse::Impl::SPMVHandleImpl, \ + ::KokkosSparse::Experimental::BsrMatrix, \ + Kokkos::MemoryTraits, const MKL_INT>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; #ifdef KOKKOS_ENABLE_SERIAL KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(float, Kokkos::Serial) KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(double, Kokkos::Serial) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, - Kokkos::Serial) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, - Kokkos::Serial) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, Kokkos::Serial) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, Kokkos::Serial) #endif #ifdef KOKKOS_ENABLE_OPENMP KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(float, Kokkos::OpenMP) KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(double, Kokkos::OpenMP) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, - Kokkos::OpenMP) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, - Kokkos::OpenMP) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, Kokkos::OpenMP) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, Kokkos::OpenMP) #endif #endif // Specialization struct which defines whether a specialization exists -template > +template > struct spmv_mv_bsrmatrix_tpl_spec_avail { enum : bool { value = false }; }; @@ -184,97 +129,64 @@ struct spmv_mv_bsrmatrix_tpl_spec_avail { // These versions of cuSPARSE require the ordinal and offset types to be the // same. For KokkosKernels, this means int/int only. // cuSparse level 3 does not currently support LayoutRight -#define KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE( \ - SCALAR, ORDINAL, OFFSET, LAYOUT, MEMSPACE) \ - template <> \ - struct spmv_mv_bsrmatrix_tpl_spec_avail< \ - Kokkos::Cuda, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - ::KokkosSparse::Experimental::BsrMatrix< \ - const SCALAR, const ORDINAL, Kokkos::Device, \ - Kokkos::MemoryTraits, const OFFSET>, \ - Kokkos::View< \ - const SCALAR**, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - false> { \ - enum : bool { value = true }; \ +#define KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(SCALAR, ORDINAL, OFFSET, LAYOUT, MEMSPACE) \ + template <> \ + struct spmv_mv_bsrmatrix_tpl_spec_avail< \ + Kokkos::Cuda, KokkosSparse::Impl::SPMVHandleImpl, \ + ::KokkosSparse::Experimental::BsrMatrix, \ + Kokkos::MemoryTraits, const OFFSET>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + false> { \ + enum : bool { value = true }; \ }; #if (9000 <= CUDA_VERSION) -KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, - int, int, - Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, - int, int, - Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, - int, int, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, - int, int, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) #endif // CUDA/CUSPARSE >= 9.0? #endif // KOKKOSKERNELS_ENABLE_TPL_CUSPARSE #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -#define KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(SCALAR, EXECSPACE) \ - template <> \ - struct spmv_mv_bsrmatrix_tpl_spec_avail< \ - EXECSPACE, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - ::KokkosSparse::Experimental::BsrMatrix< \ - const SCALAR, const int, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, const int>, \ - Kokkos::View< \ - const SCALAR*, Kokkos::LayoutLeft, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true> { \ - enum : bool { value = true }; \ +#define KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(SCALAR, EXECSPACE) \ + template <> \ + struct spmv_mv_bsrmatrix_tpl_spec_avail< \ + EXECSPACE, KokkosSparse::Impl::SPMVHandleImpl, \ + ::KokkosSparse::Experimental::BsrMatrix, \ + Kokkos::MemoryTraits, const int>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true> { \ + enum : bool { value = true }; \ }; #ifdef KOKKOS_ENABLE_SERIAL KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(float, Kokkos::Serial) KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(double, Kokkos::Serial) -KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, - Kokkos::Serial) -KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, - Kokkos::Serial) +KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, Kokkos::Serial) +KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, Kokkos::Serial) #endif #ifdef KOKKOS_ENABLE_OPENMP KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(float, Kokkos::OpenMP) KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(double, Kokkos::OpenMP) -KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, - Kokkos::OpenMP) -KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, - Kokkos::OpenMP) +KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, Kokkos::OpenMP) +KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, Kokkos::OpenMP) #endif #endif @@ -283,62 +195,36 @@ KOKKOSSPARSE_SPMV_MV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, #include "KokkosSparse_Utils_rocsparse.hpp" -#define KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE( \ - SCALAR, ORDINAL, OFFSET, LAYOUT, MEMSPACE) \ - template <> \ - struct spmv_bsrmatrix_tpl_spec_avail< \ - Kokkos::HIP, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - ::KokkosSparse::Experimental::BsrMatrix< \ - const SCALAR, const ORDINAL, Kokkos::Device, \ - Kokkos::MemoryTraits, const OFFSET>, \ - Kokkos::View< \ - const SCALAR*, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(SCALAR, ORDINAL, OFFSET, LAYOUT, MEMSPACE) \ + template <> \ + struct spmv_bsrmatrix_tpl_spec_avail< \ + Kokkos::HIP, KokkosSparse::Impl::SPMVHandleImpl, \ + ::KokkosSparse::Experimental::BsrMatrix, \ + Kokkos::MemoryTraits, const OFFSET>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; #if KOKKOSSPARSE_IMPL_ROCM_VERSION >= 50200 -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(float, rocsparse_int, - rocsparse_int, - Kokkos::LayoutLeft, - Kokkos::HIPSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(double, rocsparse_int, - rocsparse_int, - Kokkos::LayoutLeft, - Kokkos::HIPSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(float, rocsparse_int, - rocsparse_int, - Kokkos::LayoutRight, - Kokkos::HIPSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(double, rocsparse_int, - rocsparse_int, - Kokkos::LayoutRight, - Kokkos::HIPSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, - rocsparse_int, - rocsparse_int, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(float, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, - rocsparse_int, - rocsparse_int, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(double, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, - rocsparse_int, - rocsparse_int, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(float, rocsparse_int, rocsparse_int, Kokkos::LayoutRight, Kokkos::HIPSpace) -KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, - rocsparse_int, - rocsparse_int, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(double, rocsparse_int, rocsparse_int, Kokkos::LayoutRight, Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, + Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, + Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, + Kokkos::LayoutRight, Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, + Kokkos::LayoutRight, Kokkos::HIPSpace) #endif // KOKKOSSPARSE_IMPL_ROCM_VERSION >= 50200 diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp index 5e6fb1fa09b8..11bf82f7b46f 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp @@ -32,12 +32,10 @@ namespace Impl { // Note: Scalar here is the Kokkos type, not the MKL type template -inline void spmv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, - Scalar beta, MKL_INT m, MKL_INT n, MKL_INT b, - const MKL_INT* Arowptrs, const MKL_INT* Aentries, - const Scalar* Avalues, const Scalar* x, Scalar* y) { - using MKLScalar = - typename KokkosSparse::Impl::KokkosToMKLScalar::type; +inline void spmv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, Scalar beta, MKL_INT m, MKL_INT n, + MKL_INT b, const MKL_INT* Arowptrs, const MKL_INT* Aentries, const Scalar* Avalues, + const Scalar* x, Scalar* y) { + using MKLScalar = typename KokkosSparse::Impl::KokkosToMKLScalar::type; using ExecSpace = typename Handle::ExecutionSpaceType; using Subhandle = KokkosSparse::Impl::MKL_SpMV_Data; Subhandle* subhandle; @@ -45,9 +43,7 @@ inline void spmv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, MKLScalar* y_mkl = reinterpret_cast(y); if (handle->tpl_rank1) { subhandle = dynamic_cast(handle->tpl_rank1); - if (!subhandle) - throw std::runtime_error( - "KokkosSparse::spmv: subhandle is not set up for MKL BSR"); + if (!subhandle) throw std::runtime_error("KokkosSparse::spmv: subhandle is not set up for MKL BSR"); // note: classic mkl only runs on synchronous host exec spaces, so no need // to call set_exec_space on the subhandle here } else { @@ -60,64 +56,48 @@ inline void spmv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, subhandle->descr.diag = SPARSE_DIAG_NON_UNIT; // Note: the create_csr routine requires non-const values even though // they're not actually modified - MKLScalar* Avalues_mkl = - reinterpret_cast(const_cast(Avalues)); + MKLScalar* Avalues_mkl = reinterpret_cast(const_cast(Avalues)); if constexpr (std::is_same_v) { KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_s_create_bsr( - &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, - n, b, const_cast(Arowptrs), - const_cast(Arowptrs + 1), const_cast(Aentries), - Avalues_mkl)); + &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), + const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } else if constexpr (std::is_same_v) { KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_d_create_bsr( - &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, - n, b, const_cast(Arowptrs), - const_cast(Arowptrs + 1), const_cast(Aentries), - Avalues_mkl)); + &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), + const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } else if constexpr (std::is_same_v>) { KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_c_create_bsr( - &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, - n, b, const_cast(Arowptrs), - const_cast(Arowptrs + 1), const_cast(Aentries), - Avalues_mkl)); + &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), + const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } else if constexpr (std::is_same_v>) { KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_z_create_bsr( - &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, - n, b, const_cast(Arowptrs), - const_cast(Arowptrs + 1), const_cast(Aentries), - Avalues_mkl)); + &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), + const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } } MKLScalar alpha_mkl = KokkosSparse::Impl::KokkosToMKLScalar(alpha); MKLScalar beta_mkl = KokkosSparse::Impl::KokkosToMKLScalar(beta); if constexpr (std::is_same_v) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_s_mv(op, alpha_mkl, subhandle->mat, - subhandle->descr, x_mkl, - beta_mkl, y_mkl)); + KOKKOSKERNELS_MKL_SAFE_CALL( + mkl_sparse_s_mv(op, alpha_mkl, subhandle->mat, subhandle->descr, x_mkl, beta_mkl, y_mkl)); } else if constexpr (std::is_same_v) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_d_mv(op, alpha_mkl, subhandle->mat, - subhandle->descr, x_mkl, - beta_mkl, y_mkl)); + KOKKOSKERNELS_MKL_SAFE_CALL( + mkl_sparse_d_mv(op, alpha_mkl, subhandle->mat, subhandle->descr, x_mkl, beta_mkl, y_mkl)); } else if constexpr (std::is_same_v>) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_c_mv(op, alpha_mkl, subhandle->mat, - subhandle->descr, x_mkl, - beta_mkl, y_mkl)); + KOKKOSKERNELS_MKL_SAFE_CALL( + mkl_sparse_c_mv(op, alpha_mkl, subhandle->mat, subhandle->descr, x_mkl, beta_mkl, y_mkl)); } else if constexpr (std::is_same_v>) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_z_mv(op, alpha_mkl, subhandle->mat, - subhandle->descr, x_mkl, - beta_mkl, y_mkl)); + KOKKOSKERNELS_MKL_SAFE_CALL( + mkl_sparse_z_mv(op, alpha_mkl, subhandle->mat, subhandle->descr, x_mkl, beta_mkl, y_mkl)); } } // Note: Scalar here is the Kokkos type, not the MKL type template -inline void spmv_mv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, - Scalar beta, MKL_INT m, MKL_INT n, MKL_INT b, - const MKL_INT* Arowptrs, const MKL_INT* Aentries, - const Scalar* Avalues, const Scalar* x, - MKL_INT colx, MKL_INT ldx, Scalar* y, MKL_INT ldy) { - using MKLScalar = - typename KokkosSparse::Impl::KokkosToMKLScalar::type; +inline void spmv_mv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, Scalar beta, MKL_INT m, MKL_INT n, + MKL_INT b, const MKL_INT* Arowptrs, const MKL_INT* Aentries, const Scalar* Avalues, + const Scalar* x, MKL_INT colx, MKL_INT ldx, Scalar* y, MKL_INT ldy) { + using MKLScalar = typename KokkosSparse::Impl::KokkosToMKLScalar::type; using ExecSpace = typename Handle::ExecutionSpaceType; using Subhandle = KokkosSparse::Impl::MKL_SpMV_Data; Subhandle* subhandle; @@ -125,9 +105,7 @@ inline void spmv_mv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, MKLScalar* y_mkl = reinterpret_cast(y); if (handle->tpl_rank2) { subhandle = dynamic_cast(handle->tpl_rank2); - if (!subhandle) - throw std::runtime_error( - "KokkosSparse::spmv: subhandle is not set up for MKL BSR"); + if (!subhandle) throw std::runtime_error("KokkosSparse::spmv: subhandle is not set up for MKL BSR"); // note: classic mkl only runs on synchronous host exec spaces, so no need // to call set_exec_space on the subhandle here } else { @@ -140,198 +118,137 @@ inline void spmv_mv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, subhandle->descr.diag = SPARSE_DIAG_NON_UNIT; // Note: the create_csr routine requires non-const values even though // they're not actually modified - MKLScalar* Avalues_mkl = - reinterpret_cast(const_cast(Avalues)); + MKLScalar* Avalues_mkl = reinterpret_cast(const_cast(Avalues)); if constexpr (std::is_same_v) { KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_s_create_bsr( - &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, - n, b, const_cast(Arowptrs), - const_cast(Arowptrs + 1), const_cast(Aentries), - Avalues_mkl)); + &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), + const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } else if constexpr (std::is_same_v) { KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_d_create_bsr( - &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, - n, b, const_cast(Arowptrs), - const_cast(Arowptrs + 1), const_cast(Aentries), - Avalues_mkl)); + &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), + const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } else if constexpr (std::is_same_v>) { KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_c_create_bsr( - &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, - n, b, const_cast(Arowptrs), - const_cast(Arowptrs + 1), const_cast(Aentries), - Avalues_mkl)); + &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), + const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } else if constexpr (std::is_same_v>) { KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_z_create_bsr( - &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, - n, b, const_cast(Arowptrs), - const_cast(Arowptrs + 1), const_cast(Aentries), - Avalues_mkl)); + &subhandle->mat, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), + const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } } MKLScalar alpha_mkl = KokkosSparse::Impl::KokkosToMKLScalar(alpha); MKLScalar beta_mkl = KokkosSparse::Impl::KokkosToMKLScalar(beta); if constexpr (std::is_same_v) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_s_mm( - op, alpha_mkl, subhandle->mat, subhandle->descr, - SPARSE_LAYOUT_ROW_MAJOR, x_mkl, colx, ldx, beta_mkl, y_mkl, ldy)); + KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_s_mm(op, alpha_mkl, subhandle->mat, subhandle->descr, + SPARSE_LAYOUT_ROW_MAJOR, x_mkl, colx, ldx, beta_mkl, y_mkl, ldy)); } else if constexpr (std::is_same_v) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_d_mm( - op, alpha_mkl, subhandle->mat, subhandle->descr, - SPARSE_LAYOUT_ROW_MAJOR, x_mkl, colx, ldx, beta_mkl, y_mkl, ldy)); + KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_d_mm(op, alpha_mkl, subhandle->mat, subhandle->descr, + SPARSE_LAYOUT_ROW_MAJOR, x_mkl, colx, ldx, beta_mkl, y_mkl, ldy)); } else if constexpr (std::is_same_v>) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_c_mm( - op, alpha_mkl, subhandle->mat, subhandle->descr, - SPARSE_LAYOUT_ROW_MAJOR, x_mkl, colx, ldx, beta_mkl, y_mkl, ldy)); + KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_c_mm(op, alpha_mkl, subhandle->mat, subhandle->descr, + SPARSE_LAYOUT_ROW_MAJOR, x_mkl, colx, ldx, beta_mkl, y_mkl, ldy)); } else if constexpr (std::is_same_v>) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_z_mm( - op, alpha_mkl, subhandle->mat, subhandle->descr, - SPARSE_LAYOUT_ROW_MAJOR, x_mkl, colx, ldx, beta_mkl, y_mkl, ldy)); + KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_z_mm(op, alpha_mkl, subhandle->mat, subhandle->descr, + SPARSE_LAYOUT_ROW_MAJOR, x_mkl, colx, ldx, beta_mkl, y_mkl, ldy)); } } -#define KOKKOSSPARSE_SPMV_MKL(SCALAR, EXECSPACE, COMPILE_LIBRARY) \ - template <> \ - struct SPMV_BSRMATRIX< \ - EXECSPACE, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - ::KokkosSparse::Experimental::BsrMatrix< \ - SCALAR const, MKL_INT const, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, MKL_INT const>, \ - Kokkos::View< \ - SCALAR const*, Kokkos::LayoutLeft, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, COMPILE_LIBRARY> { \ - using device_type = Kokkos::Device; \ - using Handle = \ - KokkosSparse::Impl::SPMVHandleImpl; \ - using AMatrix = ::KokkosSparse::Experimental::BsrMatrix< \ - SCALAR const, MKL_INT const, device_type, \ - Kokkos::MemoryTraits, MKL_INT const>; \ - using XVector = Kokkos::View< \ - SCALAR const*, Kokkos::LayoutLeft, device_type, \ - Kokkos::MemoryTraits>; \ - using YVector = Kokkos::View>; \ - using coefficient_type = typename YVector::non_const_value_type; \ - \ - static void spmv_bsrmatrix(const EXECSPACE&, Handle* handle, \ - const char mode[], \ - const coefficient_type& alpha, \ - const AMatrix& A, const XVector& X, \ - const coefficient_type& beta, \ - const YVector& Y) { \ - std::string label = "KokkosSparse::spmv[TPL_MKL,BSRMATRIX" + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spmv_bsr_mkl(handle, mode_kk_to_mkl(mode[0]), alpha, beta, A.numRows(), \ - A.numCols(), A.blockDim(), A.graph.row_map.data(), \ - A.graph.entries.data(), A.values.data(), X.data(), \ - Y.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSSPARSE_SPMV_MKL(SCALAR, EXECSPACE) \ + template <> \ + struct SPMV_BSRMATRIX, \ + ::KokkosSparse::Experimental::BsrMatrix< \ + SCALAR const, MKL_INT const, Kokkos::Device, \ + Kokkos::MemoryTraits, MKL_INT const>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true> { \ + using device_type = Kokkos::Device; \ + using Handle = KokkosSparse::Impl::SPMVHandleImpl; \ + using AMatrix = ::KokkosSparse::Experimental::BsrMatrix, MKL_INT const>; \ + using XVector = Kokkos::View>; \ + using YVector = Kokkos::View>; \ + using coefficient_type = typename YVector::non_const_value_type; \ + \ + static void spmv_bsrmatrix(const EXECSPACE&, Handle* handle, const char mode[], const coefficient_type& alpha, \ + const AMatrix& A, const XVector& X, const coefficient_type& beta, const YVector& Y) { \ + std::string label = "KokkosSparse::spmv[TPL_MKL,BSRMATRIX," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spmv_bsr_mkl(handle, mode_kk_to_mkl(mode[0]), alpha, beta, A.numRows(), A.numCols(), A.blockDim(), \ + A.graph.row_map.data(), A.graph.entries.data(), A.values.data(), X.data(), Y.data()); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSSPARSE_SPMV_MKL(float, Kokkos::Serial, KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MKL(double, Kokkos::Serial, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::Serial, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::Serial, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +KOKKOSSPARSE_SPMV_MKL(float, Kokkos::Serial) +KOKKOSSPARSE_SPMV_MKL(double, Kokkos::Serial) +KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::Serial) +KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::Serial) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSSPARSE_SPMV_MKL(float, Kokkos::OpenMP, KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MKL(double, Kokkos::OpenMP, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::OpenMP, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::OpenMP, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +KOKKOSSPARSE_SPMV_MKL(float, Kokkos::OpenMP) +KOKKOSSPARSE_SPMV_MKL(double, Kokkos::OpenMP) +KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::OpenMP) +KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::OpenMP) #endif #undef KOKKOSSPARSE_SPMV_MKL -#define KOKKOSSPARSE_SPMV_MV_MKL(SCALAR, EXECSPACE, COMPILE_LIBRARY) \ - template <> \ - struct SPMV_MV_BSRMATRIX< \ - EXECSPACE, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - ::KokkosSparse::Experimental::BsrMatrix< \ - SCALAR const, MKL_INT const, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, MKL_INT const>, \ - Kokkos::View< \ - SCALAR const**, Kokkos::LayoutLeft, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, true, COMPILE_LIBRARY> { \ - using device_type = Kokkos::Device; \ - using Handle = \ - KokkosSparse::Impl::SPMVHandleImpl; \ - using AMatrix = ::KokkosSparse::Experimental::BsrMatrix< \ - SCALAR const, MKL_INT const, device_type, \ - Kokkos::MemoryTraits, MKL_INT const>; \ - using XVector = Kokkos::View< \ - SCALAR const**, Kokkos::LayoutLeft, device_type, \ - Kokkos::MemoryTraits>; \ - using YVector = Kokkos::View>; \ - using coefficient_type = typename YVector::non_const_value_type; \ - \ - static void spmv_mv_bsrmatrix(const EXECSPACE&, Handle* handle, \ - const char mode[], \ - const coefficient_type& alpha, \ - const AMatrix& A, const XVector& X, \ - const coefficient_type& beta, \ - const YVector& Y) { \ - std::string label = "KokkosSparse::spmv[TPL_MKL,BSRMATRIX" + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - MKL_INT colx = static_cast(X.extent(1)); \ - MKL_INT ldx = static_cast(X.stride_1()); \ - MKL_INT ldy = static_cast(Y.stride_1()); \ - spmv_mv_bsr_mkl(handle, mode_kk_to_mkl(mode[0]), alpha, beta, \ - A.numRows(), A.numCols(), A.blockDim(), \ - A.graph.row_map.data(), A.graph.entries.data(), \ - A.values.data(), X.data(), colx, ldx, Y.data(), ldy); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSSPARSE_SPMV_MV_MKL(SCALAR, EXECSPACE) \ + template <> \ + struct SPMV_MV_BSRMATRIX< \ + EXECSPACE, KokkosSparse::Impl::SPMVHandleImpl, \ + ::KokkosSparse::Experimental::BsrMatrix, \ + Kokkos::MemoryTraits, MKL_INT const>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, true> { \ + using device_type = Kokkos::Device; \ + using Handle = KokkosSparse::Impl::SPMVHandleImpl; \ + using AMatrix = ::KokkosSparse::Experimental::BsrMatrix, MKL_INT const>; \ + using XVector = Kokkos::View>; \ + using YVector = Kokkos::View>; \ + using coefficient_type = typename YVector::non_const_value_type; \ + \ + static void spmv_mv_bsrmatrix(const EXECSPACE&, Handle* handle, const char mode[], const coefficient_type& alpha, \ + const AMatrix& A, const XVector& X, const coefficient_type& beta, \ + const YVector& Y) { \ + std::string label = "KokkosSparse::spmv_mv[TPL_MKL,BSRMATRIX," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + MKL_INT colx = static_cast(X.extent(1)); \ + MKL_INT ldx = static_cast(X.stride_1()); \ + MKL_INT ldy = static_cast(Y.stride_1()); \ + spmv_mv_bsr_mkl(handle, mode_kk_to_mkl(mode[0]), alpha, beta, A.numRows(), A.numCols(), A.blockDim(), \ + A.graph.row_map.data(), A.graph.entries.data(), A.values.data(), X.data(), colx, ldx, Y.data(), \ + ldy); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSSPARSE_SPMV_MV_MKL(float, Kokkos::Serial, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_MKL(double, Kokkos::Serial, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_MKL(Kokkos::complex, Kokkos::Serial, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_MKL(Kokkos::complex, Kokkos::Serial, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +KOKKOSSPARSE_SPMV_MV_MKL(float, Kokkos::Serial) +KOKKOSSPARSE_SPMV_MV_MKL(double, Kokkos::Serial) +KOKKOSSPARSE_SPMV_MV_MKL(Kokkos::complex, Kokkos::Serial) +KOKKOSSPARSE_SPMV_MV_MKL(Kokkos::complex, Kokkos::Serial) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSSPARSE_SPMV_MV_MKL(float, Kokkos::OpenMP, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_MKL(double, Kokkos::OpenMP, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_MKL(Kokkos::complex, Kokkos::OpenMP, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_MKL(Kokkos::complex, Kokkos::OpenMP, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +KOKKOSSPARSE_SPMV_MV_MKL(float, Kokkos::OpenMP) +KOKKOSSPARSE_SPMV_MV_MKL(double, Kokkos::OpenMP) +KOKKOSSPARSE_SPMV_MV_MKL(Kokkos::complex, Kokkos::OpenMP) +KOKKOSSPARSE_SPMV_MV_MKL(Kokkos::complex, Kokkos::OpenMP) #endif #undef KOKKOSSPARSE_SPMV_MV_MKL @@ -360,19 +277,15 @@ namespace KokkosSparse { namespace Impl { template -void spmv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, - const char mode[], - typename YVector::non_const_value_type const& alpha, - const AMatrix& A, const XVector& x, - typename YVector::non_const_value_type const& beta, - const YVector& y) { +void spmv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], + typename YVector::const_value_type& alpha, const AMatrix& A, const XVector& x, + typename YVector::const_value_type& beta, const YVector& y) { using offset_type = typename AMatrix::non_const_size_type; using entry_type = typename AMatrix::non_const_ordinal_type; using value_type = typename AMatrix::non_const_value_type; /* initialize cusparse library */ - cusparseHandle_t cusparseHandle = - KokkosKernels::Impl::CusparseSingleton::singleton().cusparseHandle; + cusparseHandle_t cusparseHandle = KokkosKernels::Impl::CusparseSingleton::singleton().cusparseHandle; /* Set cuSPARSE to use the given stream until this function exits */ KokkosSparse::Impl::TemporarySetCusparseStream tscs(cusparseHandle, exec); @@ -389,67 +302,49 @@ void spmv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, KokkosSparse::Impl::CuSparse9_SpMV_Data* subhandle; if (handle->tpl_rank1) { - subhandle = dynamic_cast( - handle->tpl_rank1); - if (!subhandle) - throw std::runtime_error( - "KokkosSparse::spmv: subhandle is not set up for cusparse"); + subhandle = dynamic_cast(handle->tpl_rank1); + if (!subhandle) throw std::runtime_error("KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { /* create and set the subhandle and matrix descriptor */ subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); handle->tpl_rank1 = subhandle; KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); } cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW; /* perform the actual SpMV operation */ - static_assert( - std::is_same_v && std::is_same_v, - "With cuSPARSE non-generic API, offset and entry types must both be int. " - "Something wrong with TPL avail logic."); + static_assert(std::is_same_v && std::is_same_v, + "With cuSPARSE non-generic API, offset and entry types must both be int. " + "Something wrong with TPL avail logic."); if constexpr (std::is_same_v) { KOKKOS_CUSPARSE_SAFE_CALL(cusparseSbsrmv( - cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), - A.nnz(), reinterpret_cast(&alpha), subhandle->mat, - reinterpret_cast(A.values.data()), A.graph.row_map.data(), - A.graph.entries.data(), A.blockDim(), - reinterpret_cast(x.data()), - reinterpret_cast(&beta), - reinterpret_cast(y.data()))); + cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), + reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), + A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), reinterpret_cast(x.data()), + reinterpret_cast(&beta), reinterpret_cast(y.data()))); } else if constexpr (std::is_same_v) { KOKKOS_CUSPARSE_SAFE_CALL(cusparseDbsrmv( - cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), - A.nnz(), reinterpret_cast(&alpha), subhandle->mat, - reinterpret_cast(A.values.data()), - A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), - reinterpret_cast(x.data()), - reinterpret_cast(&beta), - reinterpret_cast(y.data()))); + cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), + reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), + A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), reinterpret_cast(x.data()), + reinterpret_cast(&beta), reinterpret_cast(y.data()))); } else if constexpr (std::is_same_v>) { KOKKOS_CUSPARSE_SAFE_CALL(cusparseCbsrmv( - cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), - A.nnz(), reinterpret_cast(&alpha), subhandle->mat, - reinterpret_cast(A.values.data()), - A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), - reinterpret_cast(x.data()), - reinterpret_cast(&beta), - reinterpret_cast(y.data()))); + cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), + reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), + A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), reinterpret_cast(x.data()), + reinterpret_cast(&beta), reinterpret_cast(y.data()))); } else if constexpr (std::is_same_v>) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseZbsrmv( - cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), - A.nnz(), reinterpret_cast(&alpha), - subhandle->mat, - reinterpret_cast(A.values.data()), - A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), - reinterpret_cast(x.data()), - reinterpret_cast(&beta), - reinterpret_cast(y.data()))); + KOKKOS_CUSPARSE_SAFE_CALL( + cusparseZbsrmv(cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), + reinterpret_cast(&alpha), subhandle->mat, + reinterpret_cast(A.values.data()), A.graph.row_map.data(), + A.graph.entries.data(), A.blockDim(), reinterpret_cast(x.data()), + reinterpret_cast(&beta), reinterpret_cast(y.data()))); } else { static_assert(KokkosKernels::Impl::always_false_v, "Trying to call cusparse[*]bsrmv with a scalar type not " @@ -475,19 +370,15 @@ void spmv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, // so we just do not support LayoutRight in cuSparse TPL now (this is // statically asserted here) template -void spmv_mv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, - const char mode[], - typename YVector::non_const_value_type const& alpha, - const AMatrix& A, const XVector& x, - typename YVector::non_const_value_type const& beta, - const YVector& y) { +void spmv_mv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], + typename YVector::const_value_type& alpha, const AMatrix& A, const XVector& x, + typename YVector::const_value_type& beta, const YVector& y) { using offset_type = typename AMatrix::non_const_size_type; using entry_type = typename AMatrix::non_const_ordinal_type; using value_type = typename AMatrix::non_const_value_type; /* initialize cusparse library */ - cusparseHandle_t cusparseHandle = - KokkosKernels::Impl::CusparseSingleton::singleton().cusparseHandle; + cusparseHandle_t cusparseHandle = KokkosKernels::Impl::CusparseSingleton::singleton().cusparseHandle; /* Set cuSPARSE to use the given stream until this function exits */ KokkosSparse::Impl::TemporarySetCusparseStream tscs(cusparseHandle, exec); @@ -508,78 +399,58 @@ void spmv_mv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, const int ldx = static_cast(x.stride(1)); const int ldy = static_cast(y.stride(1)); - static_assert( - std::is_same_v && - std::is_same_v, - "cuSPARSE requires both X and Y to be LayoutLeft."); + static_assert(std::is_same_v && + std::is_same_v, + "cuSPARSE requires both X and Y to be LayoutLeft."); KokkosSparse::Impl::CuSparse9_SpMV_Data* subhandle; if (handle->tpl_rank2) { - subhandle = dynamic_cast( - handle->tpl_rank2); - if (!subhandle) - throw std::runtime_error( - "KokkosSparse::spmv: subhandle is not set up for cusparse"); + subhandle = dynamic_cast(handle->tpl_rank2); + if (!subhandle) throw std::runtime_error("KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { /* create and set the subhandle and matrix descriptor */ subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); handle->tpl_rank2 = subhandle; KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); } cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW; /* perform the actual SpMV operation */ - static_assert( - std::is_same_v && std::is_same_v, - "With cuSPARSE non-generic API, offset and entry types must both be int. " - "Something wrong with TPL avail logic."); + static_assert(std::is_same_v && std::is_same_v, + "With cuSPARSE non-generic API, offset and entry types must both be int. " + "Something wrong with TPL avail logic."); if constexpr (std::is_same_v) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSbsrmm( - cusparseHandle, dirA, myCusparseOperation, - CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx, A.numCols(), - A.nnz(), reinterpret_cast(&alpha), subhandle->mat, - reinterpret_cast(A.values.data()), A.graph.row_map.data(), - A.graph.entries.data(), A.blockDim(), - reinterpret_cast(x.data()), ldx, - reinterpret_cast(&beta), - reinterpret_cast(y.data()), ldy)); + KOKKOS_CUSPARSE_SAFE_CALL( + cusparseSbsrmm(cusparseHandle, dirA, myCusparseOperation, CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx, + A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, + reinterpret_cast(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(), + A.blockDim(), reinterpret_cast(x.data()), ldx, + reinterpret_cast(&beta), reinterpret_cast(y.data()), ldy)); } else if constexpr (std::is_same_v) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDbsrmm( - cusparseHandle, dirA, myCusparseOperation, - CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx, A.numCols(), - A.nnz(), reinterpret_cast(&alpha), subhandle->mat, - reinterpret_cast(A.values.data()), - A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), - reinterpret_cast(x.data()), ldx, - reinterpret_cast(&beta), - reinterpret_cast(y.data()), ldy)); + KOKKOS_CUSPARSE_SAFE_CALL( + cusparseDbsrmm(cusparseHandle, dirA, myCusparseOperation, CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx, + A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, + reinterpret_cast(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(), + A.blockDim(), reinterpret_cast(x.data()), ldx, + reinterpret_cast(&beta), reinterpret_cast(y.data()), ldy)); } else if constexpr (std::is_same_v>) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCbsrmm( - cusparseHandle, dirA, myCusparseOperation, - CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx, A.numCols(), - A.nnz(), reinterpret_cast(&alpha), subhandle->mat, - reinterpret_cast(A.values.data()), - A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), - reinterpret_cast(x.data()), ldx, - reinterpret_cast(&beta), - reinterpret_cast(y.data()), ldy)); + KOKKOS_CUSPARSE_SAFE_CALL( + cusparseCbsrmm(cusparseHandle, dirA, myCusparseOperation, CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx, + A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, + reinterpret_cast(A.values.data()), A.graph.row_map.data(), + A.graph.entries.data(), A.blockDim(), reinterpret_cast(x.data()), ldx, + reinterpret_cast(&beta), reinterpret_cast(y.data()), ldy)); } else if constexpr (std::is_same_v>) { KOKKOS_CUSPARSE_SAFE_CALL(cusparseZbsrmm( - cusparseHandle, dirA, myCusparseOperation, - CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx, A.numCols(), - A.nnz(), reinterpret_cast(&alpha), - subhandle->mat, - reinterpret_cast(A.values.data()), - A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), - reinterpret_cast(x.data()), ldx, - reinterpret_cast(&beta), - reinterpret_cast(y.data()), ldy)); + cusparseHandle, dirA, myCusparseOperation, CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx, A.numCols(), + A.nnz(), reinterpret_cast(&alpha), subhandle->mat, + reinterpret_cast(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(), + A.blockDim(), reinterpret_cast(x.data()), ldx, + reinterpret_cast(&beta), reinterpret_cast(y.data()), ldy)); } else { static_assert(KokkosKernels::Impl::always_false_v, "Trying to call cusparse[*]bsrmm with a scalar type not " @@ -587,177 +458,106 @@ void spmv_mv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, } } -#define KOKKOSSPARSE_SPMV_CUSPARSE(SCALAR, ORDINAL, OFFSET, LAYOUT, SPACE, \ - COMPILE_LIBRARY) \ - template <> \ - struct SPMV_BSRMATRIX< \ - Kokkos::Cuda, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - ::KokkosSparse::Experimental::BsrMatrix< \ - SCALAR const, ORDINAL const, Kokkos::Device, \ - Kokkos::MemoryTraits, OFFSET const>, \ - Kokkos::View< \ - SCALAR const*, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, COMPILE_LIBRARY> { \ - using device_type = Kokkos::Device; \ - using memory_trait_type = Kokkos::MemoryTraits; \ - using Handle = \ - KokkosSparse::Impl::SPMVHandleImpl; \ - using AMatrix = ::KokkosSparse::Experimental::BsrMatrix< \ - SCALAR const, ORDINAL const, device_type, memory_trait_type, \ - OFFSET const>; \ - using XVector = Kokkos::View< \ - SCALAR const*, LAYOUT, device_type, \ - Kokkos::MemoryTraits>; \ - using YVector = \ - Kokkos::View; \ - \ - using coefficient_type = typename YVector::non_const_value_type; \ - \ - static void spmv_bsrmatrix(const Kokkos::Cuda& exec, Handle* handle, \ - const char mode[], \ - const coefficient_type& alpha, \ - const AMatrix& A, const XVector& x, \ - const coefficient_type& beta, \ - const YVector& y) { \ - std::string label = "KokkosSparse::spmv[TPL_CUSPARSE,BSRMATRIX" + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spmv_bsr_cusparse(exec, handle, mode, alpha, A, x, beta, y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSSPARSE_SPMV_CUSPARSE(SCALAR, ORDINAL, OFFSET, LAYOUT, SPACE) \ + template <> \ + struct SPMV_BSRMATRIX< \ + Kokkos::Cuda, KokkosSparse::Impl::SPMVHandleImpl, \ + ::KokkosSparse::Experimental::BsrMatrix, \ + Kokkos::MemoryTraits, OFFSET const>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true> { \ + using device_type = Kokkos::Device; \ + using memory_trait_type = Kokkos::MemoryTraits; \ + using Handle = KokkosSparse::Impl::SPMVHandleImpl; \ + using AMatrix = ::KokkosSparse::Experimental::BsrMatrix; \ + using XVector = Kokkos::View>; \ + using YVector = Kokkos::View; \ + \ + using coefficient_type = typename YVector::non_const_value_type; \ + \ + static void spmv_bsrmatrix(const Kokkos::Cuda& exec, Handle* handle, const char mode[], \ + const coefficient_type& alpha, const AMatrix& A, const XVector& x, \ + const coefficient_type& beta, const YVector& y) { \ + std::string label = "KokkosSparse::spmv[TPL_CUSPARSE,BSRMATRIX," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spmv_bsr_cusparse(exec, handle, mode, alpha, A, x, beta, y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutRight, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutLeft, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutRight, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) #undef KOKKOSSPARSE_SPMV_CUSPARSE // cuSparse TPL does not support LayoutRight for this operation // only specialize for LayoutLeft -#define KOKKOSSPARSE_SPMV_MV_CUSPARSE(SCALAR, ORDINAL, OFFSET, SPACE, \ - ETI_AVAIL) \ - template <> \ - struct SPMV_MV_BSRMATRIX< \ - Kokkos::Cuda, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - ::KokkosSparse::Experimental::BsrMatrix< \ - SCALAR const, ORDINAL const, Kokkos::Device, \ - Kokkos::MemoryTraits, OFFSET const>, \ - Kokkos::View< \ - SCALAR const**, Kokkos::LayoutLeft, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - false, true, ETI_AVAIL> { \ - using device_type = Kokkos::Device; \ - using memory_trait_type = Kokkos::MemoryTraits; \ - using Handle = \ - KokkosSparse::Impl::SPMVHandleImpl; \ - using AMatrix = ::KokkosSparse::Experimental::BsrMatrix< \ - SCALAR const, ORDINAL const, device_type, memory_trait_type, \ - OFFSET const>; \ - using XVector = Kokkos::View< \ - SCALAR const**, Kokkos::LayoutLeft, device_type, \ - Kokkos::MemoryTraits>; \ - using YVector = Kokkos::View; \ - \ - using coefficient_type = typename YVector::non_const_value_type; \ - \ - static void spmv_mv_bsrmatrix(const Kokkos::Cuda& exec, Handle* handle, \ - const char mode[], \ - const coefficient_type& alpha, \ - const AMatrix& A, const XVector& x, \ - const coefficient_type& beta, \ - const YVector& y) { \ - std::string label = "KokkosSparse::spmv[TPL_CUSPARSE,BSRMATRIX" + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spmv_mv_bsr_cusparse(exec, handle, mode, alpha, A, x, beta, y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSSPARSE_SPMV_MV_CUSPARSE(SCALAR, ORDINAL, OFFSET, SPACE, ETI_AVAIL) \ + template <> \ + struct SPMV_MV_BSRMATRIX< \ + Kokkos::Cuda, KokkosSparse::Impl::SPMVHandleImpl, \ + ::KokkosSparse::Experimental::BsrMatrix, \ + Kokkos::MemoryTraits, OFFSET const>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true, ETI_AVAIL> { \ + using device_type = Kokkos::Device; \ + using memory_trait_type = Kokkos::MemoryTraits; \ + using Handle = KokkosSparse::Impl::SPMVHandleImpl; \ + using AMatrix = ::KokkosSparse::Experimental::BsrMatrix; \ + using XVector = Kokkos::View>; \ + using YVector = Kokkos::View; \ + \ + using coefficient_type = typename YVector::non_const_value_type; \ + \ + static void spmv_mv_bsrmatrix(const Kokkos::Cuda& exec, Handle* handle, const char mode[], \ + const coefficient_type& alpha, const AMatrix& A, const XVector& x, \ + const coefficient_type& beta, const YVector& y) { \ + std::string label = "KokkosSparse::spmv_mv[TPL_CUSPARSE,BSRMATRIX," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spmv_mv_bsr_cusparse(exec, handle, mode, alpha, A, x, beta, y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSSPARSE_SPMV_MV_CUSPARSE(double, int, int, Kokkos::CudaSpace, true) KOKKOSSPARSE_SPMV_MV_CUSPARSE(double, int, int, Kokkos::CudaSpace, false) KOKKOSSPARSE_SPMV_MV_CUSPARSE(float, int, int, Kokkos::CudaSpace, true) KOKKOSSPARSE_SPMV_MV_CUSPARSE(float, int, int, Kokkos::CudaSpace, false) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::CudaSpace, true) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::CudaSpace, false) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::CudaSpace, true) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::CudaSpace, false) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::CudaSpace, true) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::CudaSpace, false) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::CudaSpace, true) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::CudaSpace, false) KOKKOSSPARSE_SPMV_MV_CUSPARSE(double, int, int, Kokkos::CudaUVMSpace, true) KOKKOSSPARSE_SPMV_MV_CUSPARSE(double, int, int, Kokkos::CudaUVMSpace, false) KOKKOSSPARSE_SPMV_MV_CUSPARSE(float, int, int, Kokkos::CudaUVMSpace, true) KOKKOSSPARSE_SPMV_MV_CUSPARSE(float, int, int, Kokkos::CudaUVMSpace, false) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::CudaUVMSpace, true) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::CudaUVMSpace, false) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::CudaUVMSpace, true) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::CudaUVMSpace, false) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::CudaUVMSpace, true) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::CudaUVMSpace, false) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::CudaUVMSpace, true) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::CudaUVMSpace, false) #undef KOKKOSSPARSE_SPMV_MV_CUSPARSE @@ -780,12 +580,9 @@ namespace KokkosSparse { namespace Impl { template -void spmv_bsr_rocsparse(const Kokkos::HIP& exec, Handle* handle, - const char mode[], - typename YVector::non_const_value_type const& alpha, - const AMatrix& A, const XVector& x, - typename YVector::non_const_value_type const& beta, - const YVector& y) { +void spmv_bsr_rocsparse(const Kokkos::HIP& exec, Handle* handle, const char mode[], + typename YVector::const_value_type& alpha, const AMatrix& A, const XVector& x, + typename YVector::const_value_type& beta, const YVector& y) { /* rocm 5.4.0 rocsparse_*bsrmv reference: https://rocsparse.readthedocs.io/en/rocm-5.4.0/usermanual.html#rocsparse-bsrmv-ex @@ -795,47 +592,34 @@ void spmv_bsr_rocsparse(const Kokkos::HIP& exec, Handle* handle, */ - using offset_type = typename AMatrix::non_const_size_type; - using ordinal_type = typename AMatrix::non_const_ordinal_type; - using value_type = typename AMatrix::non_const_value_type; - using rocsparse_value_type = - typename KokkosSparse::Impl::kokkos_to_rocsparse_type::type; + using offset_type = typename AMatrix::non_const_size_type; + using ordinal_type = typename AMatrix::non_const_ordinal_type; + using value_type = typename AMatrix::non_const_value_type; + using rocsparse_value_type = typename KokkosSparse::Impl::kokkos_to_rocsparse_type::type; // assert ordinals and offsets are the expected types - static_assert(std::is_same_v, - "A offset_type must be rocsparse_int"); - static_assert(std::is_same_v, - "A ordinal_type must be rocsparse_int"); + static_assert(std::is_same_v, "A offset_type must be rocsparse_int"); + static_assert(std::is_same_v, "A ordinal_type must be rocsparse_int"); // assert all operands are the same type using x_value_type = typename XVector::non_const_value_type; using y_value_type = typename YVector::non_const_value_type; - static_assert(std::is_same_v, - "A and x must have same value type"); - static_assert(std::is_same_v, - "A and y must have same value type"); + static_assert(std::is_same_v, "A and x must have same value type"); + static_assert(std::is_same_v, "A and y must have same value type"); // assert X and Y are non-stride (pass raw pointers to TPL) - static_assert( - !std::is_same_v, - "x must be contiguous"); - static_assert( - !std::is_same_v, - "y must be contiguous"); + static_assert(!std::is_same_v, "x must be contiguous"); + static_assert(!std::is_same_v, "y must be contiguous"); // assert BSR data is non-stride (pass raw pointers to TPL) - static_assert(!std::is_same_v, + static_assert(!std::is_same_v, "A values must be contiguous"); - static_assert(!std::is_same_v, + static_assert(!std::is_same_v, "A row_map must be contiguous"); - static_assert(!std::is_same_v, + static_assert(!std::is_same_v, "A entries must be contiguous"); - rocsparse_handle rocsparseHandle = - KokkosKernels::Impl::RocsparseSingleton::singleton().rocsparseHandle; + rocsparse_handle rocsparseHandle = KokkosKernels::Impl::RocsparseSingleton::singleton().rocsparseHandle; // resets handle stream to NULL when out of scope KokkosSparse::Impl::TemporarySetRocsparseStream tsrs(rocsparseHandle, exec); @@ -858,91 +642,77 @@ void spmv_bsr_rocsparse(const Kokkos::HIP& exec, Handle* handle, rocsparse_direction_column Parse the matrix by columns. */ // KokkosSparse Bsr matrix blocks are layoutright (row-major) - static_assert( - std::is_same_v, - "A blocks must be stored layout-right"); + static_assert(std::is_same_v, + "A blocks must be stored layout-right"); rocsparse_direction dir = rocsparse_direction_row; - const rocsparse_int mb = rocsparse_int(A.numRows()); // number of block rows - const rocsparse_int nb = rocsparse_int(A.numCols()); // number of block cols - const rocsparse_int nnzb = - rocsparse_int(A.nnz()); // number of non-zero blocks - const rocsparse_value_type* alpha_ = - reinterpret_cast(&alpha); - - const rocsparse_value_type* bsr_val = - reinterpret_cast(A.values.data()); - const rocsparse_int* bsr_row_ptr = A.graph.row_map.data(); - const rocsparse_int* bsr_col_ind = A.graph.entries.data(); - const rocsparse_int block_dim = rocsparse_int(A.blockDim()); - const rocsparse_value_type* x_ = - reinterpret_cast(x.data()); - const rocsparse_value_type* beta_ = - reinterpret_cast(&beta); - rocsparse_value_type* y_ = reinterpret_cast(y.data()); + const rocsparse_int mb = rocsparse_int(A.numRows()); // number of block rows + const rocsparse_int nb = rocsparse_int(A.numCols()); // number of block cols + const rocsparse_int nnzb = rocsparse_int(A.nnz()); // number of non-zero blocks + const rocsparse_value_type* alpha_ = reinterpret_cast(&alpha); + + const rocsparse_value_type* bsr_val = reinterpret_cast(A.values.data()); + const rocsparse_int* bsr_row_ptr = A.graph.row_map.data(); + const rocsparse_int* bsr_col_ind = A.graph.entries.data(); + const rocsparse_int block_dim = rocsparse_int(A.blockDim()); + const rocsparse_value_type* x_ = reinterpret_cast(x.data()); + const rocsparse_value_type* beta_ = reinterpret_cast(&beta); + rocsparse_value_type* y_ = reinterpret_cast(y.data()); KokkosSparse::Impl::RocSparse_BSR_SpMV_Data* subhandle; if (handle->tpl_rank1) { - subhandle = dynamic_cast( - handle->tpl_rank1); - if (!subhandle) - throw std::runtime_error( - "KokkosSparse::spmv: subhandle is not set up for rocsparse BSR"); + subhandle = dynamic_cast(handle->tpl_rank1); + if (!subhandle) throw std::runtime_error("KokkosSparse::spmv: subhandle is not set up for rocsparse BSR"); subhandle->set_exec_space(exec); } else { subhandle = new KokkosSparse::Impl::RocSparse_BSR_SpMV_Data(exec); handle->tpl_rank1 = subhandle; - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_create_mat_descr(&subhandle->mat)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_mat_descr(&subhandle->mat)); // *_ex* functions deprecated in introduced in 6+ #if KOKKOSSPARSE_IMPL_ROCM_VERSION >= 60000 - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_create_mat_info(&subhandle->info)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_mat_info(&subhandle->info)); if constexpr (std::is_same_v) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_sbsrmv_analysis( - rocsparseHandle, dir, trans, mb, nb, nnzb, subhandle->mat, bsr_val, - bsr_row_ptr, bsr_col_ind, block_dim, subhandle->info)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_sbsrmv_analysis(rocsparseHandle, dir, trans, mb, nb, nnzb, + subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, + block_dim, subhandle->info)); } else if constexpr (std::is_same_v) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_dbsrmv_analysis( - rocsparseHandle, dir, trans, mb, nb, nnzb, subhandle->mat, bsr_val, - bsr_row_ptr, bsr_col_ind, block_dim, subhandle->info)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_dbsrmv_analysis(rocsparseHandle, dir, trans, mb, nb, nnzb, + subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, + block_dim, subhandle->info)); } else if constexpr (std::is_same_v>) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_cbsrmv_analysis( - rocsparseHandle, dir, trans, mb, nb, nnzb, subhandle->mat, bsr_val, - bsr_row_ptr, bsr_col_ind, block_dim, subhandle->info)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_cbsrmv_analysis(rocsparseHandle, dir, trans, mb, nb, nnzb, + subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, + block_dim, subhandle->info)); } else if constexpr (std::is_same_v>) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_zbsrmv_analysis( - rocsparseHandle, dir, trans, mb, nb, nnzb, subhandle->mat, bsr_val, - bsr_row_ptr, bsr_col_ind, block_dim, subhandle->info)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_zbsrmv_analysis(rocsparseHandle, dir, trans, mb, nb, nnzb, + subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, + block_dim, subhandle->info)); } else { - static_assert(KokkosKernels::Impl::always_false_v, - "unsupported value type for rocsparse_*bsrmv"); + static_assert(KokkosKernels::Impl::always_false_v, "unsupported value type for rocsparse_*bsrmv"); } // *_ex* functions introduced in 5.4.0 #elif KOKKOSSPARSE_IMPL_ROCM_VERSION < 50400 // No analysis step in the older versions #else - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_create_mat_info(&subhandle->info)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_mat_info(&subhandle->info)); if constexpr (std::is_same_v) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_sbsrmv_ex_analysis( - rocsparseHandle, dir, trans, mb, nb, nnzb, subhandle->mat, bsr_val, - bsr_row_ptr, bsr_col_ind, block_dim, subhandle->info)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_sbsrmv_ex_analysis(rocsparseHandle, dir, trans, mb, nb, nnzb, + subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, + block_dim, subhandle->info)); } else if constexpr (std::is_same_v) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_dbsrmv_ex_analysis( - rocsparseHandle, dir, trans, mb, nb, nnzb, subhandle->mat, bsr_val, - bsr_row_ptr, bsr_col_ind, block_dim, subhandle->info)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_dbsrmv_ex_analysis(rocsparseHandle, dir, trans, mb, nb, nnzb, + subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, + block_dim, subhandle->info)); } else if constexpr (std::is_same_v>) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_cbsrmv_ex_analysis( - rocsparseHandle, dir, trans, mb, nb, nnzb, subhandle->mat, bsr_val, - bsr_row_ptr, bsr_col_ind, block_dim, subhandle->info)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_cbsrmv_ex_analysis(rocsparseHandle, dir, trans, mb, nb, nnzb, + subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, + block_dim, subhandle->info)); } else if constexpr (std::is_same_v>) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_zbsrmv_ex_analysis( - rocsparseHandle, dir, trans, mb, nb, nnzb, subhandle->mat, bsr_val, - bsr_row_ptr, bsr_col_ind, block_dim, subhandle->info)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_zbsrmv_ex_analysis(rocsparseHandle, dir, trans, mb, nb, nnzb, + subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, + block_dim, subhandle->info)); } else { - static_assert(KokkosKernels::Impl::always_false_v, - "unsupported value type for rocsparse_*bsrmv"); + static_assert(KokkosKernels::Impl::always_false_v, "unsupported value type for rocsparse_*bsrmv"); } #endif } @@ -950,151 +720,106 @@ void spmv_bsr_rocsparse(const Kokkos::HIP& exec, Handle* handle, // *_ex* functions deprecated in introduced in 6+ #if KOKKOSSPARSE_IMPL_ROCM_VERSION >= 60000 if constexpr (std::is_same_v) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_sbsrmv(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, - subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, - block_dim, subhandle->info, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_sbsrmv(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, subhandle->mat, + bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, subhandle->info, x_, + beta_, y_)); } else if constexpr (std::is_same_v) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_dbsrmv(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, - subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, - block_dim, subhandle->info, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_dbsrmv(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, subhandle->mat, + bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, subhandle->info, x_, + beta_, y_)); } else if constexpr (std::is_same_v>) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_cbsrmv(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, - subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, - block_dim, subhandle->info, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_cbsrmv(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, subhandle->mat, + bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, subhandle->info, x_, + beta_, y_)); } else if constexpr (std::is_same_v>) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_zbsrmv(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, - subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, - block_dim, subhandle->info, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_zbsrmv(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, subhandle->mat, + bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, subhandle->info, x_, + beta_, y_)); } else { - static_assert(KokkosKernels::Impl::always_false_v, - "unsupported value type for rocsparse_*bsrmv"); + static_assert(KokkosKernels::Impl::always_false_v, "unsupported value type for rocsparse_*bsrmv"); } // *_ex* functions introduced in 5.4.0 #elif KOKKOSSPARSE_IMPL_ROCM_VERSION < 50400 if constexpr (std::is_same_v) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_sbsrmv( - rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, subhandle->mat, - bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_sbsrmv(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, subhandle->mat, + bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, x_, beta_, y_)); } else if constexpr (std::is_same_v) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_dbsrmv( - rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, subhandle->mat, - bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_dbsrmv(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, subhandle->mat, + bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, x_, beta_, y_)); } else if constexpr (std::is_same_v>) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_cbsrmv( - rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, subhandle->mat, - bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_cbsrmv(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, subhandle->mat, + bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, x_, beta_, y_)); } else if constexpr (std::is_same_v>) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_zbsrmv( - rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, subhandle->mat, - bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_zbsrmv(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, subhandle->mat, + bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, x_, beta_, y_)); } else { - static_assert(KokkosKernels::Impl::always_false_v, - "unsupported value type for rocsparse_*bsrmv"); + static_assert(KokkosKernels::Impl::always_false_v, "unsupported value type for rocsparse_*bsrmv"); } #else if constexpr (std::is_same_v) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_sbsrmv_ex(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, - subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, - block_dim, subhandle->info, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_sbsrmv_ex(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, + subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, + subhandle->info, x_, beta_, y_)); } else if constexpr (std::is_same_v) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_dbsrmv_ex(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, - subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, - block_dim, subhandle->info, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_dbsrmv_ex(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, + subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, + subhandle->info, x_, beta_, y_)); } else if constexpr (std::is_same_v>) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_cbsrmv_ex(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, - subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, - block_dim, subhandle->info, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_cbsrmv_ex(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, + subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, + subhandle->info, x_, beta_, y_)); } else if constexpr (std::is_same_v>) { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_zbsrmv_ex(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, - subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, - block_dim, subhandle->info, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_zbsrmv_ex(rocsparseHandle, dir, trans, mb, nb, nnzb, alpha_, + subhandle->mat, bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, + subhandle->info, x_, beta_, y_)); } else { - static_assert(KokkosKernels::Impl::always_false_v, - "unsupported value type for rocsparse_*bsrmv"); + static_assert(KokkosKernels::Impl::always_false_v, "unsupported value type for rocsparse_*bsrmv"); } #endif } // spmv_bsr_rocsparse -#define KOKKOSSPARSE_SPMV_ROCSPARSE(SCALAR, ORDINAL, OFFSET, LAYOUT, SPACE, \ - COMPILE_LIBRARY) \ - template <> \ - struct SPMV_BSRMATRIX< \ - Kokkos::HIP, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - ::KokkosSparse::Experimental::BsrMatrix< \ - SCALAR const, ORDINAL const, Kokkos::Device, \ - Kokkos::MemoryTraits, OFFSET const>, \ - Kokkos::View< \ - SCALAR const*, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, COMPILE_LIBRARY> { \ - using device_type = Kokkos::Device; \ - using memory_trait_type = Kokkos::MemoryTraits; \ - using Handle = \ - KokkosSparse::Impl::SPMVHandleImpl; \ - using AMatrix = ::KokkosSparse::Experimental::BsrMatrix< \ - SCALAR const, ORDINAL const, device_type, memory_trait_type, \ - OFFSET const>; \ - using XVector = Kokkos::View< \ - SCALAR const*, LAYOUT, device_type, \ - Kokkos::MemoryTraits>; \ - using YVector = \ - Kokkos::View; \ - \ - using coefficient_type = typename YVector::non_const_value_type; \ - \ - static void spmv_bsrmatrix(const Kokkos::HIP& exec, Handle* handle, \ - const char mode[], \ - const coefficient_type& alpha, \ - const AMatrix& A, const XVector& x, \ - const coefficient_type& beta, \ - const YVector& y) { \ - std::string label = "KokkosSparse::spmv[TPL_ROCSPARSE,BSRMATRIX" + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spmv_bsr_rocsparse(exec, handle, mode, alpha, A, x, beta, y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSSPARSE_SPMV_ROCSPARSE(SCALAR, ORDINAL, OFFSET, LAYOUT, SPACE) \ + template <> \ + struct SPMV_BSRMATRIX< \ + Kokkos::HIP, KokkosSparse::Impl::SPMVHandleImpl, \ + ::KokkosSparse::Experimental::BsrMatrix, \ + Kokkos::MemoryTraits, OFFSET const>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true> { \ + using device_type = Kokkos::Device; \ + using memory_trait_type = Kokkos::MemoryTraits; \ + using Handle = KokkosSparse::Impl::SPMVHandleImpl; \ + using AMatrix = ::KokkosSparse::Experimental::BsrMatrix; \ + using XVector = Kokkos::View>; \ + using YVector = Kokkos::View; \ + \ + using coefficient_type = typename YVector::non_const_value_type; \ + \ + static void spmv_bsrmatrix(const Kokkos::HIP& exec, Handle* handle, const char mode[], \ + const coefficient_type& alpha, const AMatrix& A, const XVector& x, \ + const coefficient_type& beta, const YVector& y) { \ + std::string label = "KokkosSparse::spmv[TPL_ROCSPARSE,BSRMATRIX," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spmv_bsr_rocsparse(exec, handle, mode, alpha, A, x, beta, y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSSPARSE_SPMV_ROCSPARSE(float, rocsparse_int, rocsparse_int, - Kokkos::LayoutLeft, Kokkos::HIPSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); -KOKKOSSPARSE_SPMV_ROCSPARSE(float, rocsparse_int, rocsparse_int, - Kokkos::LayoutRight, Kokkos::HIPSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); -KOKKOSSPARSE_SPMV_ROCSPARSE(double, rocsparse_int, rocsparse_int, - Kokkos::LayoutLeft, Kokkos::HIPSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); -KOKKOSSPARSE_SPMV_ROCSPARSE(double, rocsparse_int, rocsparse_int, - Kokkos::LayoutRight, Kokkos::HIPSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); -KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, - rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); -KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, - rocsparse_int, Kokkos::LayoutRight, - Kokkos::HIPSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); -KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, - rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); -KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, - rocsparse_int, Kokkos::LayoutRight, - Kokkos::HIPSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); +KOKKOSSPARSE_SPMV_ROCSPARSE(float, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace); +KOKKOSSPARSE_SPMV_ROCSPARSE(float, rocsparse_int, rocsparse_int, Kokkos::LayoutRight, Kokkos::HIPSpace); +KOKKOSSPARSE_SPMV_ROCSPARSE(double, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace); +KOKKOSSPARSE_SPMV_ROCSPARSE(double, rocsparse_int, rocsparse_int, Kokkos::LayoutRight, Kokkos::HIPSpace); +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace); +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, Kokkos::LayoutRight, + Kokkos::HIPSpace); +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, + Kokkos::HIPSpace); +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, Kokkos::LayoutRight, + Kokkos::HIPSpace); #undef KOKKOSSPARSE_SPMV_ROCSPARSE diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_avail.hpp index 44a8098ca3ab..aa13fdb2f7d1 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_avail.hpp @@ -21,31 +21,23 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists -template > +template > struct spmv_mv_tpl_spec_avail { enum : bool { value = false }; }; #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE -#define KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(SCALAR, ORDINAL, OFFSET, \ - XL, YL, MEMSPACE) \ - template <> \ - struct spmv_mv_tpl_spec_avail< \ - Kokkos::Cuda, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - KokkosSparse::CrsMatrix< \ - const SCALAR, const ORDINAL, Kokkos::Device, \ - Kokkos::MemoryTraits, const OFFSET>, \ - Kokkos::View< \ - const SCALAR**, XL, Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(SCALAR, ORDINAL, OFFSET, XL, YL, MEMSPACE) \ + template <> \ + struct spmv_mv_tpl_spec_avail< \ + Kokkos::Cuda, KokkosSparse::Impl::SPMVHandleImpl, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const OFFSET>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; /* CUSPARSE_VERSION 10300 and lower seem to have a bug in cusparseSpMM @@ -58,103 +50,95 @@ CUDA 10.2.89) */ * ALG1 and ALG3 produce completely incorrect results for one set of inputs. * ALG2 works for that case, but has low numerical accuracy in another case. */ -#if defined(CUSPARSE_VERSION) && (10301 <= CUSPARSE_VERSION) && \ - (CUSPARSE_VERSION != 11702) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +#if defined(CUSPARSE_VERSION) && (10301 <= CUSPARSE_VERSION) && (CUSPARSE_VERSION != 11702) +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) #if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::Experimental::half_t, int, - int, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, - Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::Experimental::half_t, int, - int, Kokkos::LayoutRight, - Kokkos::LayoutLeft, - Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::Experimental::half_t, int, int, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::Experimental::half_t, int, int, Kokkos::LayoutRight, + Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::Experimental::half_t, int, - int, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::Experimental::half_t, int, - int, Kokkos::LayoutRight, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::Experimental::half_t, int, int, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::Experimental::half_t, int, int, Kokkos::LayoutRight, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) #endif #endif // defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION) #endif +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE +#define KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_ROCSPARSE(SCALAR, XL, YL, MEMSPACE) \ + template <> \ + struct spmv_mv_tpl_spec_avail< \ + Kokkos::HIP, KokkosSparse::Impl::SPMVHandleImpl, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const rocsparse_int>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +#define AVAIL_ROCSPARSE_SCALAR_MEMSPACE(SCALAR, MEMSPACE) \ + KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_ROCSPARSE(SCALAR, Kokkos::LayoutLeft, Kokkos::LayoutLeft, MEMSPACE) \ + KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_ROCSPARSE(SCALAR, Kokkos::LayoutLeft, Kokkos::LayoutRight, MEMSPACE) \ + KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_ROCSPARSE(SCALAR, Kokkos::LayoutRight, Kokkos::LayoutLeft, MEMSPACE) \ + KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_ROCSPARSE(SCALAR, Kokkos::LayoutRight, Kokkos::LayoutRight, MEMSPACE) + +#define AVAIL_ROCSPARSE_SCALAR(SCALAR) \ + AVAIL_ROCSPARSE_SCALAR_MEMSPACE(SCALAR, Kokkos::HIPSpace) \ + AVAIL_ROCSPARSE_SCALAR_MEMSPACE(SCALAR, Kokkos::HIPManagedSpace) + +AVAIL_ROCSPARSE_SCALAR(float) +AVAIL_ROCSPARSE_SCALAR(double) +AVAIL_ROCSPARSE_SCALAR(Kokkos::complex) +AVAIL_ROCSPARSE_SCALAR(Kokkos::complex) + +#undef AVAIL_ROCSPARSE_SCALAR_MEMSPACE +#undef AVAIL_ROCSPARSE_SCALAR +#undef KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_ROCSPARSE + +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE + } // namespace Impl } // namespace KokkosSparse diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp index 853b93f47e7c..c4cc3fbc88bd 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp @@ -28,8 +28,7 @@ Version 11702 corresponds to CUDA 11.6.1, which also produces incorrect results. 11701 (CUDA 11.6.0) is OK. */ -#if defined(CUSPARSE_VERSION) && (10301 <= CUSPARSE_VERSION) && \ - (CUSPARSE_VERSION != 11702) +#if defined(CUSPARSE_VERSION) && (10301 <= CUSPARSE_VERSION) && (CUSPARSE_VERSION != 11702) #include "cusparse.h" #include "KokkosSparse_Utils_cusparse.hpp" @@ -45,8 +44,7 @@ namespace Impl { cuSparse 11.5.1+ does not support uniform precision for FP16 Otherwise, uniform precision is supported */ -template +template cudaDataType compute_type() { return cuda_data_type_from(); } @@ -70,8 +68,7 @@ cusparseDnMatDescr_t make_cusparse_dn_mat_descr_t(ViewType &view) { // If the view is LayoutRight, we still need to create descr as column-major // but it should be an implicit transpose, meaning dimensions and strides are // swapped - bool transpose = - std::is_same_v; + bool transpose = std::is_same_v; const size_t rows = transpose ? view.extent(1) : view.extent(0); const size_t cols = transpose ? view.extent(0) : view.extent(1); const size_t ld = transpose ? view.stride(0) : view.stride(1); @@ -79,11 +76,9 @@ cusparseDnMatDescr_t make_cusparse_dn_mat_descr_t(ViewType &view) { // cusparseCreateCsr notes it is safe to const_cast this away for input // pointers to a descriptor as long as that descriptor is not an output // parameter - void *values = - const_cast(view.data()); + void *values = const_cast(view.data()); - cudaDataType valueType = - cuda_data_type_from(); + cudaDataType valueType = cuda_data_type_from(); // col-major is the only supported order in 10301 // ignore the layout of the provided view, and expect the caller to @@ -92,24 +87,18 @@ cusparseDnMatDescr_t make_cusparse_dn_mat_descr_t(ViewType &view) { const cusparseOrder_t order = CUSPARSE_ORDER_COL; cusparseDnMatDescr_t descr; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnMat( - &descr, static_cast(rows), static_cast(cols), - static_cast(ld), values, valueType, order)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnMat(&descr, static_cast(rows), static_cast(cols), + static_cast(ld), values, valueType, order)); return descr; } template -void spmv_mv_cusparse(const Kokkos::Cuda &exec, Handle *handle, - const char mode[], - typename YVector::non_const_value_type const &alpha, - const AMatrix &A, const XVector &x, - typename YVector::non_const_value_type const &beta, - const YVector &y) { - static_assert(XVector::rank == 2, - "should only be instantiated for multivector"); - static_assert(YVector::rank == 2, - "should only be instantiated for multivector"); +void spmv_mv_cusparse(const Kokkos::Cuda &exec, Handle *handle, const char mode[], + typename YVector::const_value_type &alpha, const AMatrix &A, const XVector &x, + typename YVector::const_value_type &beta, const YVector &y) { + static_assert(XVector::rank == 2, "should only be instantiated for multivector"); + static_assert(YVector::rank == 2, "should only be instantiated for multivector"); using offset_type = typename AMatrix::non_const_size_type; using entry_type = typename AMatrix::non_const_ordinal_type; @@ -118,17 +107,14 @@ void spmv_mv_cusparse(const Kokkos::Cuda &exec, Handle *handle, using y_value_type = typename YVector::non_const_value_type; /* initialize cusparse library */ - cusparseHandle_t cusparseHandle = - KokkosKernels::Impl::CusparseSingleton::singleton().cusparseHandle; + cusparseHandle_t cusparseHandle = KokkosKernels::Impl::CusparseSingleton::singleton().cusparseHandle; /* Set cuSPARSE to use the given stream until this function exits */ TemporarySetCusparseStream tscs(cusparseHandle, exec); /* Check that cusparse can handle the types of the input Kokkos::CrsMatrix */ - const cusparseIndexType_t myCusparseOffsetType = - cusparse_index_type_t_from(); - const cusparseIndexType_t myCusparseEntryType = - cusparse_index_type_t_from(); - const cudaDataType aCusparseType = cuda_data_type_from(); + const cusparseIndexType_t myCusparseOffsetType = cusparse_index_type_t_from(); + const cusparseIndexType_t myCusparseEntryType = cusparse_index_type_t_from(); + const cudaDataType aCusparseType = cuda_data_type_from(); /* Set the operation mode */ cusparseOperation_t opA; @@ -148,18 +134,14 @@ void spmv_mv_cusparse(const Kokkos::Cuda &exec, Handle *handle, order. For CUSPARSE_VERSION 10301 this is the only supported ordering. if X is not LayoutLeft, we can fix with a transpose. If cusparseSpMM ever supports row-major dense matrices, this logic will have to be reworked */ - constexpr bool xIsLL = - std::is_same::value; - constexpr bool xIsLR = - std::is_same::value; + constexpr bool xIsLL = std::is_same::value; + constexpr bool xIsLR = std::is_same::value; static_assert(xIsLL || xIsLR, "X multivector was not LL or LR (TPL error)"); - static_assert( - std::is_same_v, - "Y multivector was not LL (TPL error)"); + static_assert(std::is_same_v, + "Y multivector was not LL (TPL error)"); cusparseDnMatDescr_t vecX = make_cusparse_dn_mat_descr_t(x); cusparseDnMatDescr_t vecY = make_cusparse_dn_mat_descr_t(y); - cusparseOperation_t opB = - xIsLL ? CUSPARSE_OPERATION_NON_TRANSPOSE : CUSPARSE_OPERATION_TRANSPOSE; + cusparseOperation_t opB = xIsLL ? CUSPARSE_OPERATION_NON_TRANSPOSE : CUSPARSE_OPERATION_TRANSPOSE; // CUSPARSE_MM_ALG_DEFAULT was deprecated in CUDA 11.0.1 / cuSPARSE 11.0.0 and // removed in CUDA 12.0.0 / cuSPARSE 12.0.0 @@ -170,8 +152,7 @@ void spmv_mv_cusparse(const Kokkos::Cuda &exec, Handle *handle, #endif // the precision of the SpMV - const cudaDataType computeType = - compute_type(); + const cudaDataType computeType = compute_type(); // cuSPARSE fails when conjugate_transpose is requested on R types // to avoid this problem we switch to transpose since the two are @@ -187,167 +168,106 @@ void spmv_mv_cusparse(const Kokkos::Cuda &exec, Handle *handle, KokkosSparse::Impl::CuSparse10_SpMV_Data *subhandle; if (handle->tpl_rank2) { - subhandle = dynamic_cast( - handle->tpl_rank2); - if (!subhandle) - throw std::runtime_error( - "KokkosSparse::spmv: subhandle is not set up for cusparse"); + subhandle = dynamic_cast(handle->tpl_rank2); + if (!subhandle) throw std::runtime_error("KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { subhandle = new KokkosSparse::Impl::CuSparse10_SpMV_Data(exec); handle->tpl_rank2 = subhandle; /* create matrix */ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( - &subhandle->mat, A.numRows(), A.numCols(), A.nnz(), - (void *)A.graph.row_map.data(), (void *)A.graph.entries.data(), - (void *)A.values.data(), myCusparseOffsetType, myCusparseEntryType, - CUSPARSE_INDEX_BASE_ZERO, aCusparseType)); - - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMM_bufferSize( - cusparseHandle, opA, opB, &alpha, subhandle->mat, vecX, &beta, vecY, - computeType, algo, &subhandle->bufferSize)); - - KOKKOS_IMPL_CUDA_SAFE_CALL( - cudaMalloc(&subhandle->buffer, subhandle->bufferSize)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&subhandle->mat, A.numRows(), A.numCols(), A.nnz(), + (void *)A.graph.row_map.data(), (void *)A.graph.entries.data(), + (void *)A.values.data(), myCusparseOffsetType, myCusparseEntryType, + CUSPARSE_INDEX_BASE_ZERO, aCusparseType)); + + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMM_bufferSize(cusparseHandle, opA, opB, &alpha, subhandle->mat, vecX, &beta, + vecY, computeType, algo, &subhandle->bufferSize)); + + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&subhandle->buffer, subhandle->bufferSize)); } - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMM(cusparseHandle, opA, opB, &alpha, - subhandle->mat, vecX, &beta, vecY, + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMM(cusparseHandle, opA, opB, &alpha, subhandle->mat, vecX, &beta, vecY, computeType, algo, subhandle->buffer)); KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnMat(vecX)); KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnMat(vecY)); } -#define KOKKOSSPARSE_SPMV_MV_CUSPARSE(SCALAR, ORDINAL, OFFSET, XL, YL, SPACE, \ - COMPILE_LIBRARY) \ - template <> \ - struct SPMV_MV< \ - Kokkos::Cuda, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - KokkosSparse::CrsMatrix< \ - SCALAR const, ORDINAL const, Kokkos::Device, \ - Kokkos::MemoryTraits, OFFSET const>, \ - Kokkos::View< \ - SCALAR const **, XL, Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - false, true, COMPILE_LIBRARY> { \ - using device_type = Kokkos::Device; \ - using memory_trait_type = Kokkos::MemoryTraits; \ - using Handle = \ - KokkosSparse::Impl::SPMVHandleImpl; \ - using AMatrix = CrsMatrix; \ - using XVector = Kokkos::View< \ - SCALAR const **, XL, device_type, \ - Kokkos::MemoryTraits>; \ - using YVector = \ - Kokkos::View; \ - \ - using coefficient_type = typename YVector::non_const_value_type; \ - \ - static void spmv_mv(const Kokkos::Cuda &exec, Handle *handle, \ - const char mode[], const coefficient_type &alpha, \ - const AMatrix &A, const XVector &x, \ - const coefficient_type &beta, const YVector &y) { \ - std::string label = "KokkosSparse::spmv[TPL_CUSPARSE," + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spmv_mv_cusparse(exec, handle, mode, alpha, A, x, beta, y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSSPARSE_SPMV_MV_CUSPARSE(SCALAR, ORDINAL, OFFSET, XL, YL, SPACE) \ + template <> \ + struct SPMV_MV< \ + Kokkos::Cuda, KokkosSparse::Impl::SPMVHandleImpl, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, OFFSET const>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + false, true> { \ + using device_type = Kokkos::Device; \ + using memory_trait_type = Kokkos::MemoryTraits; \ + using Handle = KokkosSparse::Impl::SPMVHandleImpl; \ + using AMatrix = CrsMatrix; \ + using XVector = Kokkos::View>; \ + using YVector = Kokkos::View; \ + \ + using coefficient_type = typename YVector::non_const_value_type; \ + \ + static void spmv_mv(const Kokkos::Cuda &exec, Handle *handle, const char mode[], const coefficient_type &alpha, \ + const AMatrix &A, const XVector &x, const coefficient_type &beta, const YVector &y) { \ + std::string label = "KokkosSparse::spmv_mv[TPL_CUSPARSE," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spmv_mv_cusparse(exec, handle, mode, alpha, A, x, beta, y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; /* cusparseSpMM with following restrictions column-major ordering for Y col-major or row-major for X (see note below) 32-bit indices for matrix A */ -KOKKOSSPARSE_SPMV_MV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(double, int, int, Kokkos::LayoutRight, - Kokkos::LayoutLeft, Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - -KOKKOSSPARSE_SPMV_MV_CUSPARSE(float, int, int, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(float, int, int, Kokkos::LayoutRight, - Kokkos::LayoutLeft, Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, Kokkos::LayoutLeft, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, Kokkos::LayoutLeft, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - -KOKKOSSPARSE_SPMV_MV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(double, int, int, Kokkos::LayoutRight, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - -KOKKOSSPARSE_SPMV_MV_CUSPARSE(float, int, int, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(float, int, int, Kokkos::LayoutRight, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(double, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, Kokkos::CudaSpace) + +KOKKOSSPARSE_SPMV_MV_CUSPARSE(float, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(float, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, Kokkos::CudaSpace) + +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +KOKKOSSPARSE_SPMV_MV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(double, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) + +KOKKOSSPARSE_SPMV_MV_CUSPARSE(float, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(float, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) + +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) + +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) #if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::Experimental::half_t, int, int, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::Experimental::half_t, int, int, - Kokkos::LayoutRight, Kokkos::LayoutLeft, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) - -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::Experimental::half_t, int, int, - Kokkos::LayoutLeft, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::Experimental::half_t, int, int, - Kokkos::LayoutRight, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::Experimental::half_t, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::Experimental::half_t, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::Experimental::half_t, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::Experimental::half_t, int, int, Kokkos::LayoutRight, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) #endif @@ -358,4 +278,163 @@ KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::Experimental::half_t, int, int, #endif // defined(CUSPARSE_VERSION) && (10301 <= CUSPARSE_VERSION) #endif // KOKKOSKERNELS_ENABLE_TPL_CUSPARSE +// rocSPARSE +#if defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) +#include "KokkosSparse_Utils_rocsparse.hpp" + +namespace KokkosSparse { +namespace Impl { + +template +void spmv_mv_rocsparse(const Kokkos::HIP &exec, Handle *handle, const char mode[], + typename YVector::const_value_type &alpha, const AMatrix &A, const XVector &x, + typename YVector::const_value_type &beta, const YVector &y) { + using offset_type = typename AMatrix::non_const_size_type; + using entry_type = typename AMatrix::non_const_ordinal_type; + using value_type = typename AMatrix::non_const_value_type; + + // initialize rocsparse library + rocsparse_handle rocsparseHandle = KokkosKernels::Impl::RocsparseSingleton::singleton().rocsparseHandle; + // Set rocsparse to use the given stream until this function exits + TemporarySetRocsparseStream tsrs(rocsparseHandle, exec); + + rocsparse_operation rocsparseOperation = mode_kk_to_rocsparse(mode); + rocsparse_indextype offset_index_type = rocsparse_index_type(); + rocsparse_indextype entry_index_type = rocsparse_index_type(); + rocsparse_datatype compute_type = rocsparse_compute_type(); + + // Create rocsparse dense multivectors for X and Y + void *x_data = static_cast(const_cast(x.data())); + void *y_data = static_cast(const_cast(y.data())); + + size_t x_ld, y_ld; + rocsparse_order x_order, y_order; + if constexpr (std::is_same_v) { + x_ld = x.stride(1); + x_order = rocsparse_order_column; + } else { + static_assert(std::is_same_v, + "rocsparse_spmm internal logic error: x is neither LayoutLeft nor " + "LayoutRight"); + x_ld = x.stride(0); + x_order = rocsparse_order_row; + } + if constexpr (std::is_same_v) { + y_ld = y.stride(1); + y_order = rocsparse_order_column; + } else { + static_assert(std::is_same_v, + "rocsparse_spmm internal logic error: y is neither LayoutLeft nor " + "LayoutRight"); + y_ld = y.stride(0); + y_order = rocsparse_order_row; + } + + rocsparse_dnmat_descr vecX, vecY; + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( + rocsparse_create_dnmat_descr(&vecX, x.extent(0), x.extent(1), x_ld, x_data, + rocsparse_compute_type(), x_order)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( + rocsparse_create_dnmat_descr(&vecY, y.extent(0), y.extent(1), y_ld, y_data, + rocsparse_compute_type(), y_order)); + + rocsparse_spmm_alg alg = rocsparse_spmm_alg_default; + + KokkosSparse::Impl::RocSparse_CRS_SpMV_Data *subhandle; + if (handle->tpl_rank2) { + subhandle = dynamic_cast(handle->tpl_rank2); + if (!subhandle) + throw std::runtime_error( + "KokkosSparse::spmv: rank-2 subhandle is not set up for rocsparse " + "CRS"); + subhandle->set_exec_space(exec); + } else { + subhandle = new KokkosSparse::Impl::RocSparse_CRS_SpMV_Data(exec); + handle->tpl_rank2 = subhandle; + // Create the rocsparse csr descr + // We need to do some casting to void* + // Note that row_map is always a const view so const_cast is necessary, + // however entries and values may not be const so we need to check first. + void *csr_row_ptr = static_cast(const_cast(A.graph.row_map.data())); + void *csr_col_ind = static_cast(const_cast(A.graph.entries.data())); + void *csr_val = static_cast(const_cast(A.values.data())); + + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_csr_descr( + &subhandle->mat, A.numRows(), A.numCols(), A.nnz(), csr_row_ptr, csr_col_ind, csr_val, offset_index_type, + entry_index_type, rocsparse_index_base_zero, compute_type)); + + // Size and allocate buffer, and analyze the matrix + + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmm(rocsparseHandle, rocsparseOperation, rocsparse_operation_none, + &alpha, subhandle->mat, vecX, &beta, vecY, compute_type, alg, + rocsparse_spmm_stage_buffer_size, &subhandle->bufferSize, nullptr)); + + KOKKOS_IMPL_HIP_SAFE_CALL(hipMalloc(&subhandle->buffer, subhandle->bufferSize)); + + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmm( + rocsparseHandle, rocsparseOperation, rocsparse_operation_none, &alpha, subhandle->mat, vecX, &beta, vecY, + compute_type, alg, rocsparse_spmm_stage_preprocess, &subhandle->bufferSize, subhandle->buffer)); + } + + // Perform the actual computation + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( + rocsparse_spmm(rocsparseHandle, rocsparseOperation, rocsparse_operation_none, &alpha, subhandle->mat, vecX, &beta, + vecY, compute_type, alg, rocsparse_spmm_stage_compute, &subhandle->bufferSize, subhandle->buffer)); + + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_destroy_dnmat_descr(vecY)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_destroy_dnmat_descr(vecX)); +} + +#define KOKKOSSPARSE_SPMV_MV_ROCSPARSE(SCALAR, XL, YL, MEMSPACE) \ + template <> \ + struct SPMV_MV< \ + Kokkos::HIP, KokkosSparse::Impl::SPMVHandleImpl, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, rocsparse_int const>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + false, true> { \ + using device_type = Kokkos::Device; \ + using memory_trait_type = Kokkos::MemoryTraits; \ + using Handle = KokkosSparse::Impl::SPMVHandleImpl; \ + using AMatrix = CrsMatrix; \ + using XVector = Kokkos::View>; \ + using YVector = Kokkos::View; \ + \ + using coefficient_type = typename YVector::non_const_value_type; \ + \ + static void spmv_mv(const Kokkos::HIP &exec, Handle *handle, const char mode[], const coefficient_type &alpha, \ + const AMatrix &A, const XVector &x, const coefficient_type &beta, const YVector &y) { \ + std::string label = "KokkosSparse::spmv_mv[TPL_ROCSPARSE," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spmv_mv_rocsparse(exec, handle, mode, alpha, A, x, beta, y); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define INST_ROCSPARSE_SCALAR_MEMSPACE(SCALAR, MEMSPACE) \ + KOKKOSSPARSE_SPMV_MV_ROCSPARSE(SCALAR, Kokkos::LayoutLeft, Kokkos::LayoutLeft, MEMSPACE) \ + KOKKOSSPARSE_SPMV_MV_ROCSPARSE(SCALAR, Kokkos::LayoutLeft, Kokkos::LayoutRight, MEMSPACE) \ + KOKKOSSPARSE_SPMV_MV_ROCSPARSE(SCALAR, Kokkos::LayoutRight, Kokkos::LayoutLeft, MEMSPACE) \ + KOKKOSSPARSE_SPMV_MV_ROCSPARSE(SCALAR, Kokkos::LayoutRight, Kokkos::LayoutRight, MEMSPACE) + +#define INST_ROCSPARSE_SCALAR(SCALAR) \ + INST_ROCSPARSE_SCALAR_MEMSPACE(SCALAR, Kokkos::HIPSpace) \ + INST_ROCSPARSE_SCALAR_MEMSPACE(SCALAR, Kokkos::HIPManagedSpace) + +INST_ROCSPARSE_SCALAR(float) +INST_ROCSPARSE_SCALAR(double) +INST_ROCSPARSE_SCALAR(Kokkos::complex) +INST_ROCSPARSE_SCALAR(Kokkos::complex) + +#undef INST_ROCSPARSE_SCALAR_MEMSPACE +#undef INST_ROCSPARSE_SCALAR +#undef KOKKOSSPARSE_SPMV_MV_ROCSPARSE + +} // namespace Impl +} // namespace KokkosSparse +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE + #endif // KOKKOSPARSE_SPMV_MV_TPL_SPEC_DECL_HPP_ diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_avail.hpp index 854c2f2b263c..2f5ceca09ee1 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_avail.hpp @@ -24,8 +24,7 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct spmv_tpl_spec_avail { enum : bool { value = false }; }; @@ -36,77 +35,46 @@ struct spmv_tpl_spec_avail { // These versions of cuSPARSE require the ordinal and offset types to be the // same. For KokkosKernels, this means int/int only. -#define KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(SCALAR, ORDINAL, OFFSET, XL, \ - YL, MEMSPACE) \ - template <> \ - struct spmv_tpl_spec_avail< \ - Kokkos::Cuda, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - KokkosSparse::CrsMatrix< \ - const SCALAR, const ORDINAL, Kokkos::Device, \ - Kokkos::MemoryTraits, const OFFSET>, \ - Kokkos::View< \ - const SCALAR*, XL, Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(SCALAR, ORDINAL, OFFSET, XL, YL, MEMSPACE) \ + template <> \ + struct spmv_tpl_spec_avail< \ + Kokkos::Cuda, KokkosSparse::Impl::SPMVHandleImpl, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const OFFSET>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; #if (9000 <= CUDA_VERSION) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutRight, - Kokkos::LayoutRight, - Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutRight, - Kokkos::LayoutRight, - Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int, int, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) // CUDA_VERSION by itself cannot determine whether the generic cuSPARSE API is @@ -117,66 +85,38 @@ KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int, int, // Can enable int64/size_t. // TODO: if Nvidia ever supports int/size_t, add that too. -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int64_t, size_t, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int64_t, size_t, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int64_t, size_t, - Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int64_t, size_t, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int64_t, size_t, - Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int64_t, size_t, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int64_t, size_t, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int64_t, size_t, - Kokkos::LayoutLeft, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int64_t, size_t, - Kokkos::LayoutRight, - Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int64_t, size_t, - Kokkos::LayoutRight, - Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, - size_t, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, - size_t, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, - size_t, Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int64_t, size_t, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, - size_t, Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int64_t, size_t, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, - size_t, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int64_t, size_t, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, - size_t, Kokkos::LayoutLeft, - Kokkos::LayoutLeft, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int64_t, size_t, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, - size_t, Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, int64_t, size_t, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, - size_t, Kokkos::LayoutRight, - Kokkos::LayoutRight, +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, int64_t, size_t, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::CudaUVMSpace) #endif // CUSPARSE >= 10.3 (nested, implies >= 9.0) #endif // CUDA/CUSPARSE >= 9.0? @@ -186,61 +126,43 @@ KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, int64_t, #if defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) -#define KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ROCSPARSE(SCALAR, LAYOUT) \ - template <> \ - struct spmv_tpl_spec_avail< \ - Kokkos::HIP, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - KokkosSparse::CrsMatrix, \ - Kokkos::MemoryTraits, \ - const rocsparse_int>, \ - Kokkos::View< \ - const SCALAR*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ROCSPARSE(SCALAR, LAYOUT) \ + template <> \ + struct spmv_tpl_spec_avail< \ + Kokkos::HIP, \ + KokkosSparse::Impl::SPMVHandleImpl, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const rocsparse_int>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ROCSPARSE(double, Kokkos::LayoutLeft) KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ROCSPARSE(float, Kokkos::LayoutLeft) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, - Kokkos::LayoutLeft) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, - Kokkos::LayoutLeft) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, Kokkos::LayoutLeft) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, Kokkos::LayoutLeft) KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ROCSPARSE(double, Kokkos::LayoutRight) KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ROCSPARSE(float, Kokkos::LayoutRight) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, - Kokkos::LayoutRight) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, - Kokkos::LayoutRight) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, Kokkos::LayoutRight) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, Kokkos::LayoutRight) #endif // KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -#define KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_MKL(SCALAR, EXECSPACE) \ - template <> \ - struct spmv_tpl_spec_avail< \ - EXECSPACE, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - KokkosSparse::CrsMatrix, \ - Kokkos::MemoryTraits, \ - const MKL_INT>, \ - Kokkos::View< \ - const SCALAR*, Kokkos::LayoutLeft, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_MKL(SCALAR, EXECSPACE) \ + template <> \ + struct spmv_tpl_spec_avail< \ + EXECSPACE, KokkosSparse::Impl::SPMVHandleImpl, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const MKL_INT>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; #ifdef KOKKOS_ENABLE_SERIAL @@ -257,26 +179,19 @@ KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_MKL(Kokkos::complex, Kokkos::OpenMP) KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_MKL(Kokkos::complex, Kokkos::OpenMP) #endif -#if defined(KOKKOS_ENABLE_SYCL) && \ - !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) -#define KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ONEMKL(SCALAR, ORDINAL, MEMSPACE) \ - template <> \ - struct spmv_tpl_spec_avail< \ - Kokkos::Experimental::SYCL, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - KokkosSparse::CrsMatrix< \ - const SCALAR, const ORDINAL, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, const ORDINAL>, \ - Kokkos::View< \ - const SCALAR*, Kokkos::LayoutLeft, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) +#define KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ONEMKL(SCALAR, ORDINAL, MEMSPACE) \ + template <> \ + struct spmv_tpl_spec_avail< \ + Kokkos::Experimental::SYCL, \ + KokkosSparse::Impl::SPMVHandleImpl, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, const ORDINAL>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; // intel-oneapi-mkl/2023.2.0: spmv with complex data types produce: @@ -285,10 +200,8 @@ KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_MKL(Kokkos::complex, Kokkos::OpenMP) // TODO: Revisit with later versions and selectively enable this if it's // working. -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ONEMKL( - float, std::int32_t, Kokkos::Experimental::SYCLDeviceUSMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ONEMKL( - double, std::int32_t, Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ONEMKL(float, std::int32_t, Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ONEMKL(double, std::int32_t, Kokkos::Experimental::SYCLDeviceUSMSpace) /* KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ONEMKL( Kokkos::complex, std::int32_t, @@ -298,10 +211,8 @@ KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ONEMKL( Kokkos::Experimental::SYCLDeviceUSMSpace) */ -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ONEMKL( - float, std::int64_t, Kokkos::Experimental::SYCLDeviceUSMSpace) -KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ONEMKL( - double, std::int64_t, Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ONEMKL(float, std::int64_t, Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ONEMKL(double, std::int64_t, Kokkos::Experimental::SYCLDeviceUSMSpace) /* KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ONEMKL( Kokkos::complex, std::int64_t, diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp index bdde1d831492..30e790a3ab40 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp @@ -30,16 +30,13 @@ namespace Impl { template void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], - typename YVector::non_const_value_type const& alpha, - const AMatrix& A, const XVector& x, - typename YVector::non_const_value_type const& beta, - const YVector& y) { + typename YVector::const_value_type& alpha, const AMatrix& A, const XVector& x, + typename YVector::const_value_type& beta, const YVector& y) { using offset_type = typename AMatrix::non_const_size_type; using value_type = typename AMatrix::non_const_value_type; /* initialize cusparse library */ - cusparseHandle_t cusparseHandle = - KokkosKernels::Impl::CusparseSingleton::singleton().cusparseHandle; + cusparseHandle_t cusparseHandle = KokkosKernels::Impl::CusparseSingleton::singleton().cusparseHandle; /* Set cuSPARSE to use the given stream until this function exits */ TemporarySetCusparseStream tscs(cusparseHandle, exec); @@ -48,9 +45,7 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], switch (toupper(mode[0])) { case 'N': myCusparseOperation = CUSPARSE_OPERATION_NON_TRANSPOSE; break; case 'T': myCusparseOperation = CUSPARSE_OPERATION_TRANSPOSE; break; - case 'H': - myCusparseOperation = CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE; - break; + case 'H': myCusparseOperation = CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE; break; default: { std::ostringstream out; out << "Mode " << mode << " invalid for cuSPARSE SpMV.\n"; @@ -59,8 +54,7 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], } // cuSPARSE doesn't directly support mode H with real values, but this is // equivalent to mode T - if (myCusparseOperation == CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE && - !Kokkos::ArithTraits::isComplex) + if (myCusparseOperation == CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE && !Kokkos::ArithTraits::isComplex) myCusparseOperation = CUSPARSE_OPERATION_TRANSPOSE; // Hopefully this corresponds to CUDA reelase 10.1, which is the first to @@ -84,17 +78,13 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], "layer says it is"); /* Check that cusparse can handle the types of the input Kokkos::CrsMatrix */ - const cusparseIndexType_t myCusparseOffsetType = - cusparse_index_type_t_from(); - const cusparseIndexType_t myCusparseEntryType = - cusparse_index_type_t_from(); + const cusparseIndexType_t myCusparseOffsetType = cusparse_index_type_t_from(); + const cusparseIndexType_t myCusparseEntryType = cusparse_index_type_t_from(); /* create lhs and rhs */ cusparseDnVecDescr_t vecX, vecY; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec( - &vecX, x.extent_int(0), (void*)x.data(), myCudaDataType)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec( - &vecY, y.extent_int(0), (void*)y.data(), myCudaDataType)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecX, x.extent_int(0), (void*)x.data(), myCudaDataType)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecY, y.extent_int(0), (void*)y.data(), myCudaDataType)); // Prior to CUDA 11.2.1, ALG2 was more performant than default for imbalanced // matrices. After 11.2.1, the default is performant for imbalanced matrices, @@ -103,57 +93,47 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], #if CUSPARSE_VERSION >= 11402 const bool useAlg2 = false; #else - const bool useAlg2 = handle->get_algorithm() == SPMV_MERGE_PATH; + const bool useAlg2 = handle->get_algorithm() == SPMV_MERGE_PATH; #endif // In CUDA 11.2.0, the algorithm enums were renamed. // This corresponds to CUSPARSE_VERSION >= 11400. #if CUSPARSE_VERSION >= 11400 - cusparseSpMVAlg_t algo = - useAlg2 ? CUSPARSE_SPMV_CSR_ALG2 : CUSPARSE_SPMV_ALG_DEFAULT; + cusparseSpMVAlg_t algo = useAlg2 ? CUSPARSE_SPMV_CSR_ALG2 : CUSPARSE_SPMV_ALG_DEFAULT; #else - cusparseSpMVAlg_t algo = - useAlg2 ? CUSPARSE_CSRMV_ALG2 : CUSPARSE_MV_ALG_DEFAULT; + cusparseSpMVAlg_t algo = useAlg2 ? CUSPARSE_CSRMV_ALG2 : CUSPARSE_MV_ALG_DEFAULT; #endif KokkosSparse::Impl::CuSparse10_SpMV_Data* subhandle; if (handle->tpl_rank1) { - subhandle = dynamic_cast( - handle->tpl_rank1); - if (!subhandle) - throw std::runtime_error( - "KokkosSparse::spmv: subhandle is not set up for cusparse"); + subhandle = dynamic_cast(handle->tpl_rank1); + if (!subhandle) throw std::runtime_error("KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { subhandle = new KokkosSparse::Impl::CuSparse10_SpMV_Data(exec); handle->tpl_rank1 = subhandle; /* create matrix */ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( - &subhandle->mat, A.numRows(), A.numCols(), A.nnz(), - (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(), - (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType, - CUSPARSE_INDEX_BASE_ZERO, myCudaDataType)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&subhandle->mat, A.numRows(), A.numCols(), A.nnz(), + (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(), + (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType, + CUSPARSE_INDEX_BASE_ZERO, myCudaDataType)); /* size and allocate buffer */ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMV_bufferSize( - cusparseHandle, myCusparseOperation, &alpha, subhandle->mat, vecX, - &beta, vecY, myCudaDataType, algo, &subhandle->bufferSize)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMV_bufferSize(cusparseHandle, myCusparseOperation, &alpha, subhandle->mat, vecX, + &beta, vecY, myCudaDataType, algo, &subhandle->bufferSize)); // Async memory management introduced in CUDA 11.2 #if (CUDA_VERSION >= 11020) - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMallocAsync( - &subhandle->buffer, subhandle->bufferSize, exec.cuda_stream())); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMallocAsync(&subhandle->buffer, subhandle->bufferSize, exec.cuda_stream())); #else - KOKKOS_IMPL_CUDA_SAFE_CALL( - cudaMalloc(&subhandle->buffer, subhandle->bufferSize)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&subhandle->buffer, subhandle->bufferSize)); #endif } /* perform SpMV */ - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSpMV(cusparseHandle, myCusparseOperation, &alpha, subhandle->mat, - vecX, &beta, vecY, myCudaDataType, algo, subhandle->buffer)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMV(cusparseHandle, myCusparseOperation, &alpha, subhandle->mat, vecX, &beta, vecY, + myCudaDataType, algo, subhandle->buffer)); KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecX)); KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecY)); @@ -163,65 +143,49 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], KokkosSparse::Impl::CuSparse9_SpMV_Data* subhandle; if (handle->tpl_rank1) { - subhandle = dynamic_cast( - handle->tpl_rank1); - if (!subhandle) - throw std::runtime_error( - "KokkosSparse::spmv: subhandle is not set up for cusparse"); + subhandle = dynamic_cast(handle->tpl_rank1); + if (!subhandle) throw std::runtime_error("KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { /* create and set the subhandle and matrix descriptor */ - subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); - handle->tpl_rank1 = subhandle; + subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); + handle->tpl_rank1 = subhandle; cusparseMatDescr_t descrA = 0; KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL( - cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); } /* perform the actual SpMV operation */ - static_assert( - std::is_same_v, - "With cuSPARSE pre-10.0, offset type must be int. Something wrong with " - "TPL avail logic."); + static_assert(std::is_same_v, + "With cuSPARSE pre-10.0, offset type must be int. Something wrong with " + "TPL avail logic."); if constexpr (std::is_same_v) { KOKKOS_CUSPARSE_SAFE_CALL(cusparseScsrmv( - cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), - reinterpret_cast(&alpha), subhandle->mat, - reinterpret_cast(A.values.data()), A.graph.row_map.data(), - A.graph.entries.data(), reinterpret_cast(x.data()), - reinterpret_cast(&beta), + cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), reinterpret_cast(&alpha), + subhandle->mat, reinterpret_cast(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(), + reinterpret_cast(x.data()), reinterpret_cast(&beta), reinterpret_cast(y.data()))); } else if constexpr (std::is_same_v) { KOKKOS_CUSPARSE_SAFE_CALL(cusparseDcsrmv( - cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), - reinterpret_cast(&alpha), subhandle->mat, - reinterpret_cast(A.values.data()), - A.graph.row_map.data(), A.graph.entries.data(), - reinterpret_cast(x.data()), - reinterpret_cast(&beta), + cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), reinterpret_cast(&alpha), + subhandle->mat, reinterpret_cast(A.values.data()), A.graph.row_map.data(), + A.graph.entries.data(), reinterpret_cast(x.data()), reinterpret_cast(&beta), reinterpret_cast(y.data()))); } else if constexpr (std::is_same_v>) { KOKKOS_CUSPARSE_SAFE_CALL(cusparseCcsrmv( cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), - reinterpret_cast(&alpha), subhandle->mat, - reinterpret_cast(A.values.data()), - A.graph.row_map.data(), A.graph.entries.data(), - reinterpret_cast(x.data()), - reinterpret_cast(&beta), - reinterpret_cast(y.data()))); + reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), + A.graph.row_map.data(), A.graph.entries.data(), reinterpret_cast(x.data()), + reinterpret_cast(&beta), reinterpret_cast(y.data()))); } else if constexpr (std::is_same_v>) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseZcsrmv( - cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), - reinterpret_cast(&alpha), subhandle->mat, - reinterpret_cast(A.values.data()), - A.graph.row_map.data(), A.graph.entries.data(), - reinterpret_cast(x.data()), - reinterpret_cast(&beta), - reinterpret_cast(y.data()))); + KOKKOS_CUSPARSE_SAFE_CALL( + cusparseZcsrmv(cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), + reinterpret_cast(&alpha), subhandle->mat, + reinterpret_cast(A.values.data()), A.graph.row_map.data(), + A.graph.entries.data(), reinterpret_cast(x.data()), + reinterpret_cast(&beta), reinterpret_cast(y.data()))); } else { static_assert( static_assert(KokkosKernels::Impl::always_false_v, @@ -231,147 +195,69 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], #endif // CUDA_VERSION } -#define KOKKOSSPARSE_SPMV_CUSPARSE(SCALAR, ORDINAL, OFFSET, LAYOUT, SPACE, \ - COMPILE_LIBRARY) \ - template <> \ - struct SPMV< \ - Kokkos::Cuda, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - KokkosSparse::CrsMatrix< \ - SCALAR const, ORDINAL const, Kokkos::Device, \ - Kokkos::MemoryTraits, OFFSET const>, \ - Kokkos::View< \ - SCALAR const*, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, COMPILE_LIBRARY> { \ - using device_type = Kokkos::Device; \ - using memory_trait_type = Kokkos::MemoryTraits; \ - using Handle = \ - KokkosSparse::Impl::SPMVHandleImpl; \ - using AMatrix = CrsMatrix; \ - using XVector = Kokkos::View< \ - SCALAR const*, LAYOUT, device_type, \ - Kokkos::MemoryTraits>; \ - using YVector = \ - Kokkos::View; \ - using coefficient_type = typename YVector::non_const_value_type; \ - \ - static void spmv(const Kokkos::Cuda& exec, Handle* handle, \ - const char mode[], const coefficient_type& alpha, \ - const AMatrix& A, const XVector& x, \ - const coefficient_type& beta, const YVector& y) { \ - std::string label = "KokkosSparse::spmv[TPL_CUSPARSE," + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spmv_cusparse(exec, handle, mode, alpha, A, x, beta, y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSSPARSE_SPMV_CUSPARSE(SCALAR, ORDINAL, OFFSET, LAYOUT, SPACE) \ + template <> \ + struct SPMV< \ + Kokkos::Cuda, KokkosSparse::Impl::SPMVHandleImpl, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, OFFSET const>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true> { \ + using device_type = Kokkos::Device; \ + using memory_trait_type = Kokkos::MemoryTraits; \ + using Handle = KokkosSparse::Impl::SPMVHandleImpl; \ + using AMatrix = CrsMatrix; \ + using XVector = Kokkos::View>; \ + using YVector = Kokkos::View; \ + using coefficient_type = typename YVector::non_const_value_type; \ + \ + static void spmv(const Kokkos::Cuda& exec, Handle* handle, const char mode[], const coefficient_type& alpha, \ + const AMatrix& A, const XVector& x, const coefficient_type& beta, const YVector& y) { \ + std::string label = "KokkosSparse::spmv[TPL_CUSPARSE," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spmv_cusparse(exec, handle, mode, alpha, A, x, beta, y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #if (9000 <= CUDA_VERSION) -KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutRight, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutLeft, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutRight, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) #if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION) -KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, size_t, Kokkos::LayoutLeft, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, size_t, Kokkos::LayoutRight, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, size_t, Kokkos::LayoutLeft, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, size_t, Kokkos::LayoutRight, - Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, - Kokkos::LayoutLeft, Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, - Kokkos::LayoutRight, Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, - Kokkos::LayoutLeft, Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, - Kokkos::LayoutRight, Kokkos::CudaSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, size_t, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, size_t, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, size_t, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, size_t, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, size_t, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, size_t, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, size_t, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, size_t, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutRight, Kokkos::CudaSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, size_t, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, size_t, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, size_t, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, size_t, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) #endif // defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION) #endif // 9000 <= CUDA_VERSION @@ -390,17 +276,14 @@ namespace Impl { template void spmv_rocsparse(const Kokkos::HIP& exec, Handle* handle, const char mode[], - typename YVector::non_const_value_type const& alpha, - const AMatrix& A, const XVector& x, - typename YVector::non_const_value_type const& beta, - const YVector& y) { + typename YVector::const_value_type& alpha, const AMatrix& A, const XVector& x, + typename YVector::const_value_type& beta, const YVector& y) { using offset_type = typename AMatrix::non_const_size_type; using entry_type = typename AMatrix::non_const_ordinal_type; using value_type = typename AMatrix::non_const_value_type; /* initialize rocsparse library */ - rocsparse_handle rocsparseHandle = - KokkosKernels::Impl::RocsparseSingleton::singleton().rocsparseHandle; + rocsparse_handle rocsparseHandle = KokkosKernels::Impl::RocsparseSingleton::singleton().rocsparseHandle; /* Set rocsparse to use the given stream until this function exits */ TemporarySetRocsparseStream tsrs(rocsparseHandle, exec); @@ -416,26 +299,27 @@ void spmv_rocsparse(const Kokkos::HIP& exec, Handle* handle, const char mode[], /* Create rocsparse dense vectors for X and Y */ rocsparse_dnvec_descr vecX, vecY; - void* x_data = static_cast( - const_cast(x.data())); - void* y_data = static_cast( - const_cast(y.data())); + void* x_data = static_cast(const_cast(x.data())); + void* y_data = static_cast(const_cast(y.data())); KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_dnvec_descr( - &vecX, x.extent_int(0), x_data, - rocsparse_compute_type())); + &vecX, x.extent_int(0), x_data, rocsparse_compute_type())); KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_dnvec_descr( - &vecY, y.extent_int(0), y_data, - rocsparse_compute_type())); - - rocsparse_spmv_alg alg = rocsparse_spmv_alg_default; + &vecY, y.extent_int(0), y_data, rocsparse_compute_type())); + + // Default to using the "stream" algorithm which has almost no setup cost, + // and performs well for reasonably balanced matrices + rocsparse_spmv_alg alg = rocsparse_spmv_alg_csr_stream; + if (handle->get_algorithm() == SPMV_MERGE_PATH) { + // Only use the "adaptive" algorithm if the user has indicated that the + // matrix is very imbalanced, by asking for merge path. This algorithm + // has fairly expensive setup + alg = rocsparse_spmv_alg_csr_adaptive; + } KokkosSparse::Impl::RocSparse_CRS_SpMV_Data* subhandle; if (handle->tpl_rank1) { - subhandle = dynamic_cast( - handle->tpl_rank1); - if (!subhandle) - throw std::runtime_error( - "KokkosSparse::spmv: subhandle is not set up for rocsparse CRS"); + subhandle = dynamic_cast(handle->tpl_rank1); + if (!subhandle) throw std::runtime_error("KokkosSparse::spmv: subhandle is not set up for rocsparse CRS"); subhandle->set_exec_space(exec); } else { subhandle = new KokkosSparse::Impl::RocSparse_CRS_SpMV_Data(exec); @@ -444,135 +328,99 @@ void spmv_rocsparse(const Kokkos::HIP& exec, Handle* handle, const char mode[], // We need to do some casting to void* // Note that row_map is always a const view so const_cast is necessary, // however entries and values may not be const so we need to check first. - void* csr_row_ptr = - static_cast(const_cast(A.graph.row_map.data())); - void* csr_col_ind = - static_cast(const_cast(A.graph.entries.data())); - void* csr_val = - static_cast(const_cast(A.values.data())); + void* csr_row_ptr = static_cast(const_cast(A.graph.row_map.data())); + void* csr_col_ind = static_cast(const_cast(A.graph.entries.data())); + void* csr_val = static_cast(const_cast(A.values.data())); KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_csr_descr( - &subhandle->mat, A.numRows(), A.numCols(), A.nnz(), csr_row_ptr, - csr_col_ind, csr_val, offset_index_type, entry_index_type, - rocsparse_index_base_zero, compute_type)); + &subhandle->mat, A.numRows(), A.numCols(), A.nnz(), csr_row_ptr, csr_col_ind, csr_val, offset_index_type, + entry_index_type, rocsparse_index_base_zero, compute_type)); /* Size and allocate buffer, and analyze the matrix */ #if KOKKOSSPARSE_IMPL_ROCM_VERSION >= 60000 - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmv( - rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, vecX, - &beta, vecY, compute_type, alg, rocsparse_spmv_stage_buffer_size, - &subhandle->bufferSize, nullptr)); - KOKKOS_IMPL_HIP_SAFE_CALL( - hipMalloc(&subhandle->buffer, subhandle->bufferSize)); - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmv( - rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, vecX, - &beta, vecY, compute_type, alg, rocsparse_spmv_stage_preprocess, - &subhandle->bufferSize, subhandle->buffer)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmv(rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, vecX, + &beta, vecY, compute_type, alg, rocsparse_spmv_stage_buffer_size, + &subhandle->bufferSize, nullptr)); + KOKKOS_IMPL_HIP_SAFE_CALL(hipMalloc(&subhandle->buffer, subhandle->bufferSize)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmv(rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, vecX, + &beta, vecY, compute_type, alg, rocsparse_spmv_stage_preprocess, + &subhandle->bufferSize, subhandle->buffer)); #elif KOKKOSSPARSE_IMPL_ROCM_VERSION >= 50400 + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmv_ex(rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, + vecX, &beta, vecY, compute_type, alg, rocsparse_spmv_stage_auto, + &subhandle->bufferSize, nullptr)); + KOKKOS_IMPL_HIP_SAFE_CALL(hipMalloc(&subhandle->buffer, subhandle->bufferSize)); KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmv_ex( - rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, vecX, - &beta, vecY, compute_type, alg, rocsparse_spmv_stage_auto, - &subhandle->bufferSize, nullptr)); - KOKKOS_IMPL_HIP_SAFE_CALL( - hipMalloc(&subhandle->buffer, subhandle->bufferSize)); - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmv_ex( - rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, vecX, - &beta, vecY, compute_type, alg, rocsparse_spmv_stage_preprocess, - &subhandle->bufferSize, subhandle->buffer)); + rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, vecX, &beta, vecY, compute_type, alg, + rocsparse_spmv_stage_preprocess, &subhandle->bufferSize, subhandle->buffer)); #else - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmv( - rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, vecX, - &beta, vecY, compute_type, alg, &subhandle->bufferSize, nullptr)); - KOKKOS_IMPL_HIP_SAFE_CALL( - hipMalloc(&subhandle->buffer, subhandle->bufferSize)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmv(rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, vecX, + &beta, vecY, compute_type, alg, &subhandle->bufferSize, nullptr)); + KOKKOS_IMPL_HIP_SAFE_CALL(hipMalloc(&subhandle->buffer, subhandle->bufferSize)); #endif } /* Perform the actual computation */ #if KOKKOSSPARSE_IMPL_ROCM_VERSION >= 60000 - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmv( - rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, vecX, - &beta, vecY, compute_type, alg, rocsparse_spmv_stage_compute, - &subhandle->bufferSize, subhandle->buffer)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmv(rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, vecX, + &beta, vecY, compute_type, alg, rocsparse_spmv_stage_compute, + &subhandle->bufferSize, subhandle->buffer)); #elif KOKKOSSPARSE_IMPL_ROCM_VERSION >= 50400 - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmv_ex( - rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, vecX, - &beta, vecY, compute_type, alg, rocsparse_spmv_stage_compute, - &subhandle->bufferSize, subhandle->buffer)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmv_ex(rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, vecX, + &beta, vecY, compute_type, alg, rocsparse_spmv_stage_compute, + &subhandle->bufferSize, subhandle->buffer)); #else - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( - rocsparse_spmv(rocsparseHandle, myRocsparseOperation, &alpha, - subhandle->mat, vecX, &beta, vecY, compute_type, alg, - &subhandle->bufferSize, subhandle->buffer)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_spmv(rocsparseHandle, myRocsparseOperation, &alpha, subhandle->mat, vecX, + &beta, vecY, compute_type, alg, &subhandle->bufferSize, + subhandle->buffer)); #endif KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_destroy_dnvec_descr(vecY)); KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_destroy_dnvec_descr(vecX)); } -#define KOKKOSSPARSE_SPMV_ROCSPARSE(SCALAR, LAYOUT, COMPILE_LIBRARY) \ - template <> \ - struct SPMV< \ - Kokkos::HIP, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - KokkosSparse::CrsMatrix, \ - Kokkos::MemoryTraits, \ - rocsparse_int const>, \ - Kokkos::View< \ - SCALAR const*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, COMPILE_LIBRARY> { \ - using device_type = Kokkos::Device; \ - using memory_trait_type = Kokkos::MemoryTraits; \ - using Handle = KokkosSparse::Impl::SPMVHandleImpl< \ - Kokkos::HIP, Kokkos::HIPSpace, SCALAR, rocsparse_int, rocsparse_int>; \ - using AMatrix = CrsMatrix; \ - using XVector = Kokkos::View< \ - SCALAR const*, LAYOUT, device_type, \ - Kokkos::MemoryTraits>; \ - using YVector = \ - Kokkos::View; \ - \ - using coefficient_type = typename YVector::non_const_value_type; \ - \ - static void spmv(const Kokkos::HIP& exec, Handle* handle, \ - const char mode[], const coefficient_type& alpha, \ - const AMatrix& A, const XVector& x, \ - const coefficient_type& beta, const YVector& y) { \ - std::string label = "KokkosSparse::spmv[TPL_ROCSPARSE," + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spmv_rocsparse(exec, handle, mode, alpha, A, x, beta, y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSSPARSE_SPMV_ROCSPARSE(SCALAR, LAYOUT) \ + template <> \ + struct SPMV< \ + Kokkos::HIP, \ + KokkosSparse::Impl::SPMVHandleImpl, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, rocsparse_int const>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true> { \ + using device_type = Kokkos::Device; \ + using memory_trait_type = Kokkos::MemoryTraits; \ + using Handle = \ + KokkosSparse::Impl::SPMVHandleImpl; \ + using AMatrix = CrsMatrix; \ + using XVector = Kokkos::View>; \ + using YVector = Kokkos::View; \ + \ + using coefficient_type = typename YVector::non_const_value_type; \ + \ + static void spmv(const Kokkos::HIP& exec, Handle* handle, const char mode[], const coefficient_type& alpha, \ + const AMatrix& A, const XVector& x, const coefficient_type& beta, const YVector& y) { \ + std::string label = "KokkosSparse::spmv[TPL_ROCSPARSE," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spmv_rocsparse(exec, handle, mode, alpha, A, x, beta, y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSSPARSE_SPMV_ROCSPARSE(double, Kokkos::LayoutLeft, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_ROCSPARSE(double, Kokkos::LayoutRight, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_ROCSPARSE(float, Kokkos::LayoutLeft, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_ROCSPARSE(float, Kokkos::LayoutRight, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, Kokkos::LayoutLeft, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, Kokkos::LayoutRight, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, Kokkos::LayoutLeft, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, Kokkos::LayoutRight, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +KOKKOSSPARSE_SPMV_ROCSPARSE(double, Kokkos::LayoutLeft) +KOKKOSSPARSE_SPMV_ROCSPARSE(double, Kokkos::LayoutRight) +KOKKOSSPARSE_SPMV_ROCSPARSE(float, Kokkos::LayoutLeft) +KOKKOSSPARSE_SPMV_ROCSPARSE(float, Kokkos::LayoutRight) +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, Kokkos::LayoutLeft) +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, Kokkos::LayoutRight) +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, Kokkos::LayoutLeft) +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, Kokkos::LayoutRight) #undef KOKKOSSPARSE_SPMV_ROCSPARSE @@ -592,10 +440,9 @@ namespace Impl { // Note: Scalar here is the Kokkos type, not the MKL type template -inline void spmv_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, - Scalar beta, MKL_INT m, MKL_INT n, const MKL_INT* Arowptrs, - const MKL_INT* Aentries, const Scalar* Avalues, - const Scalar* x, Scalar* y) { +inline void spmv_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, Scalar beta, MKL_INT m, MKL_INT n, + const MKL_INT* Arowptrs, const MKL_INT* Aentries, const Scalar* Avalues, const Scalar* x, + Scalar* y) { using MKLScalar = typename KokkosToMKLScalar::type; using ExecSpace = typename Handle::ExecutionSpaceType; using Subhandle = MKL_SpMV_Data; @@ -604,9 +451,7 @@ inline void spmv_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, MKLScalar* y_mkl = reinterpret_cast(y); if (handle->tpl_rank1) { subhandle = dynamic_cast(handle->tpl_rank1); - if (!subhandle) - throw std::runtime_error( - "KokkosSparse::spmv: subhandle is not set up for MKL CRS"); + if (!subhandle) throw std::runtime_error("KokkosSparse::spmv: subhandle is not set up for MKL CRS"); // note: classic mkl only runs on synchronous host exec spaces, so no need // to call set_exec_space on the subhandle here } else { @@ -619,123 +464,91 @@ inline void spmv_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, subhandle->descr.diag = SPARSE_DIAG_NON_UNIT; // Note: the create_csr routine requires non-const values even though // they're not actually modified - MKLScalar* Avalues_mkl = - reinterpret_cast(const_cast(Avalues)); + MKLScalar* Avalues_mkl = reinterpret_cast(const_cast(Avalues)); if constexpr (std::is_same_v) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_s_create_csr( - &subhandle->mat, SPARSE_INDEX_BASE_ZERO, m, n, - const_cast(Arowptrs), const_cast(Arowptrs + 1), - const_cast(Aentries), Avalues_mkl)); + KOKKOSKERNELS_MKL_SAFE_CALL( + mkl_sparse_s_create_csr(&subhandle->mat, SPARSE_INDEX_BASE_ZERO, m, n, const_cast(Arowptrs), + const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } else if constexpr (std::is_same_v) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_d_create_csr( - &subhandle->mat, SPARSE_INDEX_BASE_ZERO, m, n, - const_cast(Arowptrs), const_cast(Arowptrs + 1), - const_cast(Aentries), Avalues_mkl)); + KOKKOSKERNELS_MKL_SAFE_CALL( + mkl_sparse_d_create_csr(&subhandle->mat, SPARSE_INDEX_BASE_ZERO, m, n, const_cast(Arowptrs), + const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } else if constexpr (std::is_same_v>) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_c_create_csr( - &subhandle->mat, SPARSE_INDEX_BASE_ZERO, m, n, - const_cast(Arowptrs), const_cast(Arowptrs + 1), - const_cast(Aentries), Avalues_mkl)); + KOKKOSKERNELS_MKL_SAFE_CALL( + mkl_sparse_c_create_csr(&subhandle->mat, SPARSE_INDEX_BASE_ZERO, m, n, const_cast(Arowptrs), + const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } else if constexpr (std::is_same_v>) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_z_create_csr( - &subhandle->mat, SPARSE_INDEX_BASE_ZERO, m, n, - const_cast(Arowptrs), const_cast(Arowptrs + 1), - const_cast(Aentries), Avalues_mkl)); + KOKKOSKERNELS_MKL_SAFE_CALL( + mkl_sparse_z_create_csr(&subhandle->mat, SPARSE_INDEX_BASE_ZERO, m, n, const_cast(Arowptrs), + const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } } MKLScalar alpha_mkl = KokkosToMKLScalar(alpha); MKLScalar beta_mkl = KokkosToMKLScalar(beta); if constexpr (std::is_same_v) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_s_mv(op, alpha_mkl, subhandle->mat, - subhandle->descr, x_mkl, - beta_mkl, y_mkl)); + KOKKOSKERNELS_MKL_SAFE_CALL( + mkl_sparse_s_mv(op, alpha_mkl, subhandle->mat, subhandle->descr, x_mkl, beta_mkl, y_mkl)); } else if constexpr (std::is_same_v) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_d_mv(op, alpha_mkl, subhandle->mat, - subhandle->descr, x_mkl, - beta_mkl, y_mkl)); + KOKKOSKERNELS_MKL_SAFE_CALL( + mkl_sparse_d_mv(op, alpha_mkl, subhandle->mat, subhandle->descr, x_mkl, beta_mkl, y_mkl)); } else if constexpr (std::is_same_v>) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_c_mv(op, alpha_mkl, subhandle->mat, - subhandle->descr, x_mkl, - beta_mkl, y_mkl)); + KOKKOSKERNELS_MKL_SAFE_CALL( + mkl_sparse_c_mv(op, alpha_mkl, subhandle->mat, subhandle->descr, x_mkl, beta_mkl, y_mkl)); } else if constexpr (std::is_same_v>) { - KOKKOSKERNELS_MKL_SAFE_CALL(mkl_sparse_z_mv(op, alpha_mkl, subhandle->mat, - subhandle->descr, x_mkl, - beta_mkl, y_mkl)); + KOKKOSKERNELS_MKL_SAFE_CALL( + mkl_sparse_z_mv(op, alpha_mkl, subhandle->mat, subhandle->descr, x_mkl, beta_mkl, y_mkl)); } } // Note: classic MKL runs on Serial/OpenMP but can't use our execution space // instances -#define KOKKOSSPARSE_SPMV_MKL(SCALAR, EXECSPACE, COMPILE_LIBRARY) \ - template <> \ - struct SPMV, \ - KokkosSparse::CrsMatrix< \ - SCALAR const, MKL_INT const, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, MKL_INT const>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, COMPILE_LIBRARY> { \ - using device_type = Kokkos::Device; \ - using Handle = \ - KokkosSparse::Impl::SPMVHandleImpl; \ - using AMatrix = \ - CrsMatrix, MKL_INT const>; \ - using XVector = Kokkos::View< \ - SCALAR const*, Kokkos::LayoutLeft, device_type, \ - Kokkos::MemoryTraits>; \ - using YVector = Kokkos::View>; \ - using coefficient_type = typename YVector::non_const_value_type; \ - \ - static void spmv(const EXECSPACE&, Handle* handle, const char mode[], \ - const coefficient_type& alpha, const AMatrix& A, \ - const XVector& x, const coefficient_type& beta, \ - const YVector& y) { \ - std::string label = "KokkosSparse::spmv[TPL_MKL," + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spmv_mkl(handle, mode_kk_to_mkl(mode[0]), alpha, beta, A.numRows(), \ - A.numCols(), A.graph.row_map.data(), A.graph.entries.data(), \ - A.values.data(), x.data(), y.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSSPARSE_SPMV_MKL(SCALAR, EXECSPACE) \ + template <> \ + struct SPMV, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, MKL_INT const>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true> { \ + using device_type = Kokkos::Device; \ + using Handle = KokkosSparse::Impl::SPMVHandleImpl; \ + using AMatrix = \ + CrsMatrix, MKL_INT const>; \ + using XVector = Kokkos::View>; \ + using YVector = Kokkos::View>; \ + using coefficient_type = typename YVector::non_const_value_type; \ + \ + static void spmv(const EXECSPACE&, Handle* handle, const char mode[], const coefficient_type& alpha, \ + const AMatrix& A, const XVector& x, const coefficient_type& beta, const YVector& y) { \ + std::string label = "KokkosSparse::spmv[TPL_MKL," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spmv_mkl(handle, mode_kk_to_mkl(mode[0]), alpha, beta, A.numRows(), A.numCols(), A.graph.row_map.data(), \ + A.graph.entries.data(), A.values.data(), x.data(), y.data()); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #ifdef KOKKOS_ENABLE_SERIAL -KOKKOSSPARSE_SPMV_MKL(float, Kokkos::Serial, KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MKL(double, Kokkos::Serial, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::Serial, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::Serial, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +KOKKOSSPARSE_SPMV_MKL(float, Kokkos::Serial) +KOKKOSSPARSE_SPMV_MKL(double, Kokkos::Serial) +KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::Serial) +KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::Serial) #endif #ifdef KOKKOS_ENABLE_OPENMP -KOKKOSSPARSE_SPMV_MKL(float, Kokkos::OpenMP, KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MKL(double, Kokkos::OpenMP, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::OpenMP, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::OpenMP, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +KOKKOSSPARSE_SPMV_MKL(float, Kokkos::OpenMP) +KOKKOSSPARSE_SPMV_MKL(double, Kokkos::OpenMP) +KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::OpenMP) +KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::OpenMP) #endif #undef KOKKOSSPARSE_SPMV_MKL #endif -#if defined(KOKKOS_ENABLE_SYCL) && \ - !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) +#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) inline oneapi::mkl::transpose mode_kk_to_onemkl(char mode_kk) { switch (toupper(mode_kk)) { case 'N': return oneapi::mkl::transpose::nontrans; @@ -743,34 +556,26 @@ inline oneapi::mkl::transpose mode_kk_to_onemkl(char mode_kk) { case 'H': return oneapi::mkl::transpose::conjtrans; default:; } - throw std::invalid_argument( - "Invalid mode for oneMKL (should be one of N, T, H)"); + throw std::invalid_argument("Invalid mode for oneMKL (should be one of N, T, H)"); } -template -inline void spmv_onemkl(const execution_space& exec, Handle* handle, - oneapi::mkl::transpose mkl_mode, - typename matrix_type::non_const_value_type const alpha, - const matrix_type& A, const xview_type& x, - typename matrix_type::non_const_value_type const beta, - const yview_type& y) { +template +inline void spmv_onemkl(const execution_space& exec, Handle* handle, oneapi::mkl::transpose mkl_mode, + typename yview_type::const_value_type& alpha, const matrix_type& A, const xview_type& x, + typename yview_type::const_value_type& beta, const yview_type& y) { using scalar_type = typename matrix_type::non_const_value_type; using onemkl_scalar_type = typename KokkosToOneMKLScalar::type; using ordinal_type = typename matrix_type::non_const_ordinal_type; // oneAPI doesn't directly support mode H with real values, but this is // equivalent to mode T - if (mkl_mode == oneapi::mkl::transpose::conjtrans && - !Kokkos::ArithTraits::isComplex) + if (mkl_mode == oneapi::mkl::transpose::conjtrans && !Kokkos::ArithTraits::isComplex) mkl_mode = oneapi::mkl::transpose::trans; OneMKL_SpMV_Data* subhandle; if (handle->tpl_rank1) { subhandle = dynamic_cast(handle->tpl_rank1); - if (!subhandle) - throw std::runtime_error( - "KokkosSparse::spmv: subhandle is not set up for OneMKL CRS"); + if (!subhandle) throw std::runtime_error("KokkosSparse::spmv: subhandle is not set up for OneMKL CRS"); subhandle->set_exec_space(exec); } else { subhandle = new OneMKL_SpMV_Data(exec); @@ -779,101 +584,73 @@ inline void spmv_onemkl(const execution_space& exec, Handle* handle, // Even for out-of-order SYCL queue, the inputs here do not depend on // kernels being sequenced auto ev = oneapi::mkl::sparse::set_csr_data( - exec.sycl_queue(), subhandle->mat, A.numRows(), A.numCols(), - oneapi::mkl::index_base::zero, - const_cast(A.graph.row_map.data()), - const_cast(A.graph.entries.data()), - reinterpret_cast( - const_cast(A.values.data()))); + exec.sycl_queue(), subhandle->mat, A.numRows(), A.numCols(), oneapi::mkl::index_base::zero, + const_cast(A.graph.row_map.data()), const_cast(A.graph.entries.data()), + reinterpret_cast(const_cast(A.values.data()))); // for out-of-order queue: the fence before gemv below will make sure // optimize_gemv has finished - oneapi::mkl::sparse::optimize_gemv(exec.sycl_queue(), mkl_mode, - subhandle->mat, {ev}); + oneapi::mkl::sparse::optimize_gemv(exec.sycl_queue(), mkl_mode, subhandle->mat, {ev}); } // Uncommon case: an out-of-order SYCL queue does not promise that previously // enqueued kernels finish before starting this one. So fence exec to get the // expected semantics. if (!exec.sycl_queue().is_in_order()) exec.fence(); - oneapi::mkl::sparse::gemv( - exec.sycl_queue(), mkl_mode, alpha, subhandle->mat, - reinterpret_cast(x.data()), beta, - reinterpret_cast(y.data())); + oneapi::mkl::sparse::gemv(exec.sycl_queue(), mkl_mode, alpha, subhandle->mat, + reinterpret_cast(x.data()), beta, + reinterpret_cast(y.data())); } -#define KOKKOSSPARSE_SPMV_ONEMKL(SCALAR, ORDINAL, MEMSPACE, COMPILE_LIBRARY) \ - template <> \ - struct SPMV< \ - Kokkos::Experimental::SYCL, \ - KokkosSparse::Impl::SPMVHandleImpl, \ - KokkosSparse::CrsMatrix< \ - SCALAR const, ORDINAL const, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, ORDINAL const>, \ - Kokkos::View< \ - SCALAR const*, Kokkos::LayoutLeft, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, COMPILE_LIBRARY> { \ - using execution_space = Kokkos::Experimental::SYCL; \ - using device_type = Kokkos::Device; \ - using Handle = KokkosSparse::Impl::SPMVHandleImpl< \ - Kokkos::Experimental::SYCL, MEMSPACE, SCALAR, ORDINAL, ORDINAL>; \ - using AMatrix = \ - CrsMatrix, ORDINAL const>; \ - using XVector = Kokkos::View< \ - SCALAR const*, Kokkos::LayoutLeft, device_type, \ - Kokkos::MemoryTraits>; \ - using YVector = Kokkos::View>; \ - using coefficient_type = typename YVector::non_const_value_type; \ - \ - static void spmv(const execution_space& exec, Handle* handle, \ - const char mode[], const coefficient_type& alpha, \ - const AMatrix& A, const XVector& x, \ - const coefficient_type& beta, const YVector& y) { \ - std::string label = "KokkosSparse::spmv[TPL_ONEMKL," + \ - Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - oneapi::mkl::transpose mkl_mode = mode_kk_to_onemkl(mode[0]); \ - spmv_onemkl(exec, handle, mkl_mode, alpha, A, x, beta, y); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSSPARSE_SPMV_ONEMKL(SCALAR, ORDINAL, MEMSPACE) \ + template <> \ + struct SPMV< \ + Kokkos::Experimental::SYCL, \ + KokkosSparse::Impl::SPMVHandleImpl, \ + KokkosSparse::CrsMatrix, \ + Kokkos::MemoryTraits, ORDINAL const>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true> { \ + using execution_space = Kokkos::Experimental::SYCL; \ + using device_type = Kokkos::Device; \ + using Handle = KokkosSparse::Impl::SPMVHandleImpl; \ + using AMatrix = \ + CrsMatrix, ORDINAL const>; \ + using XVector = Kokkos::View>; \ + using YVector = Kokkos::View>; \ + using coefficient_type = typename YVector::non_const_value_type; \ + \ + static void spmv(const execution_space& exec, Handle* handle, const char mode[], const coefficient_type& alpha, \ + const AMatrix& A, const XVector& x, const coefficient_type& beta, const YVector& y) { \ + std::string label = "KokkosSparse::spmv[TPL_ONEMKL," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + oneapi::mkl::transpose mkl_mode = mode_kk_to_onemkl(mode[0]); \ + spmv_onemkl(exec, handle, mkl_mode, alpha, A, x, beta, y); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSSPARSE_SPMV_ONEMKL(float, std::int32_t, - Kokkos::Experimental::SYCLDeviceUSMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_ONEMKL(double, std::int32_t, - Kokkos::Experimental::SYCLDeviceUSMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +KOKKOSSPARSE_SPMV_ONEMKL(float, std::int32_t, Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSSPARSE_SPMV_ONEMKL(double, std::int32_t, Kokkos::Experimental::SYCLDeviceUSMSpace) /* KOKKOSSPARSE_SPMV_ONEMKL(Kokkos::complex, std::int32_t, - Kokkos::Experimental::SYCLDeviceUSMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) + Kokkos::Experimental::SYCLDeviceUSMSpace) KOKKOSSPARSE_SPMV_ONEMKL(Kokkos::complex, std::int32_t, - Kokkos::Experimental::SYCLDeviceUSMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) + Kokkos::Experimental::SYCLDeviceUSMSpace) */ -KOKKOSSPARSE_SPMV_ONEMKL(float, std::int64_t, - Kokkos::Experimental::SYCLDeviceUSMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -KOKKOSSPARSE_SPMV_ONEMKL(double, std::int64_t, - Kokkos::Experimental::SYCLDeviceUSMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +KOKKOSSPARSE_SPMV_ONEMKL(float, std::int64_t, Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSSPARSE_SPMV_ONEMKL(double, std::int64_t, Kokkos::Experimental::SYCLDeviceUSMSpace) /* KOKKOSSPARSE_SPMV_ONEMKL(Kokkos::complex, std::int64_t, - Kokkos::Experimental::SYCLDeviceUSMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) + Kokkos::Experimental::SYCLDeviceUSMSpace + ) KOKKOSSPARSE_SPMV_ONEMKL(Kokkos::complex, std::int64_t, - Kokkos::Experimental::SYCLDeviceUSMSpace, - KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) + Kokkos::Experimental::SYCLDeviceUSMSpace + ) */ #endif } // namespace Impl diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_avail.hpp index 1a22146d01af..0ff6b5610a93 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_avail.hpp @@ -20,8 +20,8 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct sptrsv_solve_tpl_spec_avail { enum : bool { value = false }; }; diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_trsv_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_trsv_tpl_spec_avail.hpp index a4d0416b0c27..7d7cea4a1af7 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_trsv_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_trsv_tpl_spec_avail.hpp @@ -20,8 +20,7 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct trsv_tpl_spec_avail { enum : bool { value = false }; }; diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse.hpp index 624cd86ff5fe..3663122e9215 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse.hpp @@ -38,6 +38,7 @@ #include "Test_Sparse_gmres.hpp" #include "Test_Sparse_Transpose.hpp" #include "Test_Sparse_TestUtils_RandCsMat.hpp" +#include "Test_Sparse_IOUtils.hpp" #include "Test_Sparse_ccs2crs.hpp" #include "Test_Sparse_crs2ccs.hpp" #include "Test_Sparse_removeCrsMatrixZeros.hpp" diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_BsrMatrix.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_BsrMatrix.hpp index 9ec660a64b0d..6d1c5f9715c3 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_BsrMatrix.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_BsrMatrix.hpp @@ -14,7 +14,7 @@ // //@HEADER -//#include "KokkosKernels_ETIHelperMacros.h" +// #include "KokkosKernels_ETIHelperMacros.h" #include #include #include @@ -51,18 +51,14 @@ using std::endl; // \param nnz [out] The number of stored entries in the matrix. // \param whichMatrix [in] The index of the matrix to create. template -void makeSparseMatrix( - typename sparseMat_t::StaticCrsGraphType::row_map_type::non_const_type &ptr, - typename sparseMat_t::StaticCrsGraphType::entries_type::non_const_type &ind, - typename sparseMat_t::values_type::non_const_type &val, - typename sparseMat_t::ordinal_type &numRows, - typename sparseMat_t::ordinal_type &numCols, - typename sparseMat_t::size_type &nnz, const int whichMatrix, - typename sparseMat_t::ordinal_type &blockDim) { - typedef typename sparseMat_t::StaticCrsGraphType::row_map_type::non_const_type - ptr_type; - typedef typename sparseMat_t::StaticCrsGraphType::entries_type::non_const_type - ind_type; +void makeSparseMatrix(typename sparseMat_t::StaticCrsGraphType::row_map_type::non_const_type &ptr, + typename sparseMat_t::StaticCrsGraphType::entries_type::non_const_type &ind, + typename sparseMat_t::values_type::non_const_type &val, + typename sparseMat_t::ordinal_type &numRows, typename sparseMat_t::ordinal_type &numCols, + typename sparseMat_t::size_type &nnz, const int whichMatrix, + typename sparseMat_t::ordinal_type &blockDim) { + typedef typename sparseMat_t::StaticCrsGraphType::row_map_type::non_const_type ptr_type; + typedef typename sparseMat_t::StaticCrsGraphType::entries_type::non_const_type ind_type; typedef typename sparseMat_t::values_type::non_const_type val_type; typedef typename sparseMat_t::ordinal_type lno_t; typedef typename sparseMat_t::size_type size_type; @@ -79,10 +75,8 @@ void makeSparseMatrix( blockDim = 1; const size_type ptrRaw[] = {0, 4, 8, 10, 12, 14, 16, 20, 24}; - const lno_t indRaw[] = {0, 1, 4, 5, 0, 1, 4, 5, 2, 3, 2, 3, - 4, 5, 4, 5, 2, 3, 6, 7, 2, 3, 6, 7}; - const scalar_t valRaw[] = {.1, 1, 4, 5, -.1, -1, -4, -5, 2, 3, -2, -3, - 4, 5, -4, -5, 2, 3, 6, 7, -2, -3, -6, -7}; + const lno_t indRaw[] = {0, 1, 4, 5, 0, 1, 4, 5, 2, 3, 2, 3, 4, 5, 4, 5, 2, 3, 6, 7, 2, 3, 6, 7}; + const scalar_t valRaw[] = {.1, 1, 4, 5, -.1, -1, -4, -5, 2, 3, -2, -3, 4, 5, -4, -5, 2, 3, 6, 7, -2, -3, -6, -7}; // Create the output Views. ptr = ptr_type("ptr", numRows + 1); @@ -108,8 +102,7 @@ void makeSparseMatrix( const size_type ptrRaw[] = {0, 2, 3, 4, 6}; const lno_t indRaw[] = {0, 2, 1, 2, 1, 3}; // Numerical values stored in BSR format - const scalar_t valRaw[] = {.1, 1, -.1, -1, 4, 5, -4, -5, 2, 3, -2, -3, - 4, 5, -4, -5, 2, 3, -2, -3, 6, 7, -6, -7}; + const scalar_t valRaw[] = {.1, 1, -.1, -1, 4, 5, -4, -5, 2, 3, -2, -3, 4, 5, -4, -5, 2, 3, -2, -3, 6, 7, -6, -7}; // Create the output Views. ptr = ptr_type("ptr", numRows + 1); @@ -128,8 +121,7 @@ void makeSparseMatrix( else { // whichMatrix != 0 std::ostringstream os; - os << "Invalid whichMatrix value " << whichMatrix - << ". Valid value(s) include " << 0 << "."; + os << "Invalid whichMatrix value " << whichMatrix << ". Valid value(s) include " << 0 << "."; throw std::invalid_argument(os.str()); } } @@ -153,8 +145,7 @@ crsMat_t makeCrsMatrix_BlockStructure() { lno_t blockDim; const int whichMatrix = 0; - makeSparseMatrix(ptr, ind, val, numRows, numCols, nnz, whichMatrix, - blockDim); + makeSparseMatrix(ptr, ind, val, numRows, numCols, nnz, whichMatrix, blockDim); return crsMat_t("A", numRows, numCols, nnz, val, ptr, ind); } @@ -176,8 +167,7 @@ blkcrsMat_t makeBsrMatrix() { lno_t blockDim; const int whichMatrix = 1; - makeSparseMatrix(ptr, ind, val, numRows, numCols, nnz, - whichMatrix, blockDim); + makeSparseMatrix(ptr, ind, val, numRows, numCols, nnz, whichMatrix, blockDim); blkcrsMat_t resMat("blk", numRows, numCols, nnz, val, ptr, ind, blockDim); return resMat; } @@ -192,8 +182,7 @@ struct TestFunctor { ResultsType d_results; // Constructor - TestFunctor(MatrixType &A_, ResultsType &d_results_) - : A(A_), d_results(d_results_) {} + TestFunctor(MatrixType &A_, ResultsType &d_results_) : A(A_), d_results(d_results_) {} KOKKOS_INLINE_FUNCTION void operator()(const int /*rid*/) const { @@ -248,14 +237,13 @@ struct TestFunctor { // Test sumIntoValues { - check0 = true; - check1 = true; - check2 = true; - const lno_t ncols = 1; - const lno_t cols[] = {3}; - const lno_t browi = 3; - const scalar_t vals[] = { - 10, 11, 20, 22}; // represents a single block: [10 11; 20 22] + check0 = true; + check1 = true; + check2 = true; + const lno_t ncols = 1; + const lno_t cols[] = {3}; + const lno_t browi = 3; + const scalar_t vals[] = {10, 11, 20, 22}; // represents a single block: [10 11; 20 22] const scalar_t result[] = {16, 18, 14, 15}; // This block will be summed into the existing block [6 7; -6 -7] @@ -286,8 +274,7 @@ struct TestFunctor { const lno_t ncols = 1; const lno_t cols[] = {3}; const lno_t browi = 3; - const scalar_t valsreplace[] = { - -10, -11, -20, -22}; // represents a single block: [-10 -11; -20 -22] + const scalar_t valsreplace[] = {-10, -11, -20, -22}; // represents a single block: [-10 -11; -20 -22] // The existing block to be replaced was: [6 7; -6 -7] A.replaceValues(browi, cols, ncols, valsreplace); @@ -301,7 +288,7 @@ struct TestFunctor { auto entry = iblockrow.local_block_value(relBlk, lrow, lcol); check0 = check0 && (entry == row_ptr[lcol]); check1 = check1 && (entry == view_blk(lrow, lcol)); - check2 = check2 && (entry == valsreplace[lrow * A.blockDim() + lcol]); + check2 = check2 && (entry == valsreplace[lrow * A.blockDim() + lcol]); } // end local col in row } // end local row in blk d_results(7) = check0; @@ -315,16 +302,12 @@ struct TestFunctor { } // namespace Test_Bsr // Create a CrsMatrix and BsrMatrix and test member functions. -template +template void testBsrMatrix() { using namespace Test_Bsr; - typedef KokkosSparse::CrsMatrix - crs_matrix_type; - typedef KokkosSparse::Experimental::BsrMatrix - bsr_matrix_type; + typedef KokkosSparse::CrsMatrix crs_matrix_type; + typedef KokkosSparse::Experimental::BsrMatrix bsr_matrix_type; crs_matrix_type crsA = makeCrsMatrix_BlockStructure(); bsr_matrix_type A = makeBsrMatrix(); @@ -334,10 +317,8 @@ void testBsrMatrix() { result_view_type d_results("d_results"); auto h_results = Kokkos::create_mirror_view(d_results); - Kokkos::parallel_for( - "KokkosSparse::Test_Bsr::BsrMatrix", - Kokkos::RangePolicy(0, 1), - Test_Bsr::TestFunctor(A, d_results)); + Kokkos::parallel_for("KokkosSparse::Test_Bsr::BsrMatrix", Kokkos::RangePolicy(0, 1), + Test_Bsr::TestFunctor(A, d_results)); Kokkos::deep_copy(h_results, d_results); @@ -346,10 +327,9 @@ void testBsrMatrix() { } } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - sparse##_##bsrmatrix##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - testBsrMatrix(); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##bsrmatrix##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + testBsrMatrix(); \ } #include diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_Controls.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_Controls.hpp index 79679f817347..591a778ded3f 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_Controls.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_Controls.hpp @@ -46,8 +46,7 @@ void test_controls_il() { EXPECT_EQ(c.getParameter("key1"), "val1"); } { - KokkosKernels::Experimental::Controls c( - {{"key1", "val1"}, {"key2", "val2"}}); + KokkosKernels::Experimental::Controls c({{"key1", "val1"}, {"key2", "val2"}}); EXPECT_EQ(c.isParameter("blah"), false); EXPECT_EQ(c.getParameter("blah"), ""); EXPECT_EQ(c.getParameter("key1"), "val1"); diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_CrsMatrix.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_CrsMatrix.hpp index e792a99500e1..1dc3bb3c9150 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_CrsMatrix.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_CrsMatrix.hpp @@ -14,7 +14,7 @@ // //@HEADER -//#include "KokkosKernels_ETIHelperMacros.h" +// #include "KokkosKernels_ETIHelperMacros.h" #include #include #include @@ -49,17 +49,13 @@ using std::endl; // \param nnz [out] The number of stored entries in the matrix. // \param whichMatrix [in] The index of the matrix to create. template -void makeSparseMatrix( - typename crsMat_t::StaticCrsGraphType::row_map_type::non_const_type &ptr, - typename crsMat_t::StaticCrsGraphType::entries_type::non_const_type &ind, - typename crsMat_t::values_type::non_const_type &val, - typename crsMat_t::ordinal_type &numRows, - typename crsMat_t::ordinal_type &numCols, typename crsMat_t::size_type &nnz, - const int whichMatrix) { - typedef typename crsMat_t::StaticCrsGraphType::row_map_type::non_const_type - ptr_type; - typedef typename crsMat_t::StaticCrsGraphType::entries_type::non_const_type - ind_type; +void makeSparseMatrix(typename crsMat_t::StaticCrsGraphType::row_map_type::non_const_type &ptr, + typename crsMat_t::StaticCrsGraphType::entries_type::non_const_type &ind, + typename crsMat_t::values_type::non_const_type &val, typename crsMat_t::ordinal_type &numRows, + typename crsMat_t::ordinal_type &numCols, typename crsMat_t::size_type &nnz, + const int whichMatrix) { + typedef typename crsMat_t::StaticCrsGraphType::row_map_type::non_const_type ptr_type; + typedef typename crsMat_t::StaticCrsGraphType::entries_type::non_const_type ind_type; typedef typename crsMat_t::values_type::non_const_type val_type; typedef typename crsMat_t::ordinal_type lno_t; typedef typename crsMat_t::size_type size_type; @@ -74,12 +70,10 @@ void makeSparseMatrix( numCols = 10; nnz = 21; const size_type ptrRaw[] = {0, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21}; - const lno_t indRaw[] = {0, 1, 9, 1, 2, 2, 3, 3, 4, 4, 5, - 5, 6, 6, 7, 7, 8, 8, 9, 1, 9}; + const lno_t indRaw[] = {0, 1, 9, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 1, 9}; - const scalar_t valRaw[] = {1.0, 4.0, 0.5, 0.5, 5.0, 1.0, 6.0, - 1.5, 7.0, 2.0, 8.0, 2.5, 9.0, 3.0, - 10.0, 3.5, 11.0, 4.0, 12.0, 4.5, 13.0}; + const scalar_t valRaw[] = {1.0, 4.0, 0.5, 0.5, 5.0, 1.0, 6.0, 1.5, 7.0, 2.0, 8.0, + 2.5, 9.0, 3.0, 10.0, 3.5, 11.0, 4.0, 12.0, 4.5, 13.0}; // Create the output Views. ptr = ptr_type("ptr", numRows + 1); @@ -96,8 +90,7 @@ void makeSparseMatrix( Kokkos::deep_copy(val, valIn); } else { // whichMatrix != 0 std::ostringstream os; - os << "Invalid whichMatrix value " << whichMatrix - << ". Valid value(s) include " << 0 << "."; + os << "Invalid whichMatrix value " << whichMatrix << ". Valid value(s) include " << 0 << "."; throw std::invalid_argument(os.str()); } } @@ -129,13 +122,11 @@ crsMat_t makeCrsMatrix() { // compiles. However, it does need to initialize the MemorySpace's // default execution space, because it allocates Views and calls // deep_copy a few times. -template +template void testCrsMatrix() { using namespace Test; - typedef KokkosSparse::CrsMatrix - crs_matrix_type; + typedef KokkosSparse::CrsMatrix crs_matrix_type; crs_matrix_type A = makeCrsMatrix(); // mfh 28 Sep 2013: Use A in some way, so the compiler can't // optimize it away completely. This forces the compiler to @@ -143,8 +134,7 @@ void testCrsMatrix() { // printf ("A is %d by %d\n", A.numRows (), A.numCols ()); } -template +template void testCrsMatrixRawConstructor() { int nrows = 5; // note: last 2 columns will be empty. @@ -156,46 +146,35 @@ void testCrsMatrixRawConstructor() { std::vector rowmap = {0, 0, 2, 5, 6, 9}; std::vector entries = {3, 4, 0, 1, 2, 2, 0, 3, 4}; std::vector values; - for (int i = 0; i < nnz; i++) - values.push_back(Kokkos::ArithTraits::one() * - (1.0 * rand() / RAND_MAX)); - KokkosSparse::CrsMatrix A( - "A", nrows, ncols, nnz, values.data(), rowmap.data(), entries.data()); + for (int i = 0; i < nnz; i++) values.push_back(Kokkos::ArithTraits::one() * (1.0 * rand() / RAND_MAX)); + KokkosSparse::CrsMatrix A("A", nrows, ncols, nnz, values.data(), + rowmap.data(), entries.data()); EXPECT_EQ(A.numRows(), nrows); EXPECT_EQ(A.numCols(), ncols); EXPECT_EQ(A.nnz(), nnz); // verify rowmap, entries, values: should all be identical to original raw // arrays (except the rowmap elements are now size_type) - auto checkRowmap = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); - auto checkEntries = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); - auto checkValues = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.values); - for (int i = 0; i < nrows + 1; i++) - EXPECT_EQ(checkRowmap(i), (size_type)rowmap[i]); + auto checkRowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); + auto checkEntries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); + auto checkValues = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.values); + for (int i = 0; i < nrows + 1; i++) EXPECT_EQ(checkRowmap(i), (size_type)rowmap[i]); for (int i = 0; i < nnz; i++) { EXPECT_EQ(checkEntries(i), entries[i]); EXPECT_EQ(checkValues(i), values[i]); } } -template +template void testCrsMatrixHostMirror() { using namespace Test; - using crs_matrix = - KokkosSparse::CrsMatrix; + using crs_matrix = KokkosSparse::CrsMatrix; using crs_matrix_host = typename crs_matrix::HostMirror; using crs_graph = typename crs_matrix::StaticCrsGraphType; using crs_graph_host = typename crs_graph::HostMirror; crs_matrix A = makeCrsMatrix(); - typename crs_matrix::values_type::HostMirror valuesHost("values host", - A.nnz()); - typename crs_matrix::row_map_type::HostMirror rowmapHost("rowmap host", - A.numRows() + 1); - typename crs_matrix::index_type::HostMirror entriesHost("entries host", - A.nnz()); + typename crs_matrix::values_type::HostMirror valuesHost("values host", A.nnz()); + typename crs_matrix::row_map_type::HostMirror rowmapHost("rowmap host", A.numRows() + 1); + typename crs_matrix::index_type::HostMirror entriesHost("entries host", A.nnz()); crs_graph_host graphHost(entriesHost, rowmapHost); // Test the two CrsMatrix constructors that take the StaticCrsGraph crs_matrix_host Ahost1("Ahost1", graphHost, A.numCols()); @@ -217,16 +196,13 @@ void testCrsMatrixHostMirror() { EXPECT_EQ(zeroHost.graph.row_map.extent(0), 0); } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - sparse##_##crsmatrix##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - testCrsMatrix(); \ - testCrsMatrixRawConstructor(); \ - } \ - TEST_F( \ - TestCategory, \ - sparse##_##crsmatrix_host_mirror##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - testCrsMatrixHostMirror(); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##crsmatrix##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + testCrsMatrix(); \ + testCrsMatrixRawConstructor(); \ + } \ + TEST_F(TestCategory, sparse##_##crsmatrix_host_mirror##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + testCrsMatrixHostMirror(); \ } #include diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_IOUtils.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_IOUtils.hpp new file mode 100644 index 000000000000..08c4a5f10c26 --- /dev/null +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_IOUtils.hpp @@ -0,0 +1,179 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#include "KokkosSparse_IOUtils.hpp" +#include "KokkosSparse_Utils.hpp" +#include "Test_vector_fixtures.hpp" + +#include + +namespace Test { + +struct TestIOUtils { + using size_type = size_t; + using lno_t = int; + using scalar_t = double; + + using exe_space = Kokkos::DefaultHostExecutionSpace; + using mem_space = typename exe_space::memory_space; + using host_device = Kokkos::Device; + + using RowMapType = Kokkos::View; + using EntriesType = Kokkos::View; + using ValuesType = Kokkos::View; + + using sp_matrix_type = KokkosSparse::CrsMatrix; + + static std::vector> get_sym_fixture() { + std::vector> A = { + {11.00, 12.00, 13.00, 14.00, 15.00, 16.00}, {12.00, 2.00, 0.00, 0.00, 0.00, 0.00}, + {13.00, 0.00, 0.00, 0.00, 0.00, 0.00}, {14.00, 0.00, 0.00, 4.00, 0.00, 0.00}, + {15.00, 0.00, 0.00, 0.00, 5.00, 0.00}, {16.00, 0.00, 0.00, 0.00, 0.00, 6.00}}; + return A; + } + + static std::vector> get_asym_fixture() { + std::vector> A = {{1.00, 0.00, 0.00, 9.00, 0.00, 0.00}, {0.00, 2.00, 0.00, 0.00, 0.00, 0.00}, + {0.00, 0.00, 0.00, 0.00, 0.00, 8.00}, {0.00, 0.00, 0.00, 4.00, 0.00, 0.00}, + {0.00, 7.00, 0.00, 0.00, 5.00, 0.00}, {0.00, 0.00, 0.00, 0.00, 0.00, 6.00}}; + return A; + } + + static void compare_matrices(const sp_matrix_type& A1, const sp_matrix_type& A2) { + // Compare matrices + auto row_map1 = A1.graph.row_map; + auto entries1 = A1.graph.entries; + auto values1 = A1.values; + auto row_map2 = A2.graph.row_map; + auto entries2 = A2.graph.entries; + auto values2 = A2.values; + ASSERT_EQ(row_map1.size(), row_map2.size()); + ASSERT_EQ(entries1.size(), entries2.size()); + ASSERT_EQ(values1.size(), values2.size()); + ASSERT_EQ(values1.size(), entries1.size()); + for (size_type i = 0; i < row_map1.size(); ++i) { + EXPECT_EQ(row_map1(i), row_map2(i)); + } + for (size_type i = 0; i < entries1.size(); ++i) { + EXPECT_EQ(entries1(i), entries2(i)); + EXPECT_EQ(values1(i), values2(i)); + } + } + + template + static void write_as_hb(const RowMapView& row_map, const EntriesView& entries, const ValuesView& values, + const std::string& filename, const char mtx_type) { + std::ofstream out(filename); + size_type nrows = row_map.size() - 1; + size_type nnz = entries.size(); + + out << "1SYMMETRIC MATRIX, FE APPROXIMATION TO BIHARMONIC OPERATOR ON " + "BEAM. NOS1 \n"; // Title is inaccurate, but doesn't matter + out << " 3 1 1 1 " + " 0 \n"; + out << "R" << mtx_type << "A " << nrows << " " << nrows << " " << nnz + << " 0 \n"; + out << "(16I5) (16I5) (5E16.8) " + " \n"; + for (size_type row_idx = 0; row_idx < nrows + 1; ++row_idx) { + out << row_map(row_idx) + 1 << " "; + } + out << "\n"; + for (size_type n = 0; n < nnz; ++n) { + out << entries[n] + 1 << " "; + } + out << "\n"; + for (size_type n = 0; n < nnz; ++n) { + out << values[n] << " "; + } + out << "\n"; + + out.close(); + } + + template + static void write_as_mtx(const RowMapView& row_map, const EntriesView& entries, const ValuesView& values, + const std::string& filename, const char mtx_type) { + std::ofstream out(filename); + size_type nrows = row_map.size() - 1; + + std::map type_name_map = { + {'U', "general"}, {'S', "symmetric"}, {'H', "hermitian"}, {'Z', "skew-symmetric"}}; + std::string type_name = type_name_map[mtx_type]; + + out << "%%MatrixMarket matrix coordinate real " << type_name << "\n"; + out << nrows << " " << nrows << " " << entries.size() << "\n"; + for (size_type row_idx = 0; row_idx < nrows; ++row_idx) { + const size_type row_nnz_begin = row_map(row_idx); + const size_type row_nnz_end = row_map(row_idx + 1); + for (size_type row_nnz = row_nnz_begin; row_nnz < row_nnz_end; ++row_nnz) { + const auto col_idx = entries(row_nnz); + const scalar_t value = values(row_nnz); + out << row_idx + 1 << " " << col_idx + 1 << " " << value << "\n"; + } + } + + out.close(); + } + + static void full_test(const std::vector>& fixture, const std::string& filename_root, + const char mtx_type) { + RowMapType row_map; + EntriesType entries; + ValuesType values; + compress_matrix(row_map, entries, values, fixture); + sp_matrix_type A("A", row_map.size() - 1, row_map.size() - 1, values.extent(0), values, row_map, entries); + const bool is_symmetric = mtx_type != 'U'; + std::string hb_file = filename_root + ".hb"; + std::string mtx_file = filename_root + ".mtx"; + + if (is_symmetric) { + sp_matrix_type L = KokkosSparse::Impl::kk_get_lower_triangle(A, NULL, false, 4, true, true); + auto lrow_map = L.graph.row_map; + auto lentries = L.graph.entries; + auto lvalues = L.values; + + write_as_hb(lrow_map, lentries, lvalues, hb_file, mtx_type); + write_as_mtx(lrow_map, lentries, lvalues, mtx_file, mtx_type); + } else { + write_as_hb(row_map, entries, values, hb_file, mtx_type); + write_as_mtx(row_map, entries, values, mtx_file, mtx_type); + } + + auto Ahb = KokkosSparse::Impl::read_kokkos_crst_matrix(hb_file.c_str()); + auto Amtx = KokkosSparse::Impl::read_kokkos_crst_matrix(mtx_file.c_str()); + if (mtx_type == 'Z') { + compare_matrices(Ahb, Amtx); + } else { + compare_matrices(Ahb, A); + compare_matrices(Amtx, A); + } + } + + static void test() { + const std::string filename_root = "test_sparse_ioutils"; + auto sym_fix = get_sym_fixture(); + auto asym_fix = get_asym_fixture(); + full_test(asym_fix, filename_root + "_asym", 'U'); + full_test(sym_fix, filename_root + "_sym", 'S'); + full_test(sym_fix, filename_root + "_herm", 'H'); + full_test(sym_fix, filename_root + "_skew", 'Z'); + } +}; + +// Test randomly generated Cs matrices +TEST_F(TestCategory, sparse_ioutils) { TestIOUtils::test(); } + +} // namespace Test diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_MergeMatrix.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_MergeMatrix.hpp index 85c35c004414..6a4f5b4e5d05 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_MergeMatrix.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_MergeMatrix.hpp @@ -31,11 +31,9 @@ namespace Test_Sparse_MergeMatrix { template -View from_std_vec(const std::string &label, - const std::vector &vec) { - Kokkos::View - uvec(vec.data(), vec.size()); +View from_std_vec(const std::string &label, const std::vector &vec) { + Kokkos::View uvec(vec.data(), + vec.size()); View result(label, uvec.size()); Kokkos::deep_copy(result, uvec); return result; @@ -54,13 +52,10 @@ struct CopyMmdToView { }; template -void expect_mmd_entries( - const MMD &mmd, - const std::vector &expected) { +void expect_mmd_entries(const MMD &mmd, const std::vector &expected) { using execution_space = typename MMD::execution_space; using Policy = Kokkos::RangePolicy; - using View = - Kokkos::View; + using View = Kokkos::View; // size is as expected EXPECT_EQ(mmd.size(), expected.size()); @@ -216,8 +211,7 @@ std::tuple view_view_case_3() { // -1 | 0 0 0 // 9 | 1 1 1 // 9 | 1 1 1 - AView a = from_std_vec("view-view-case-3-a", - {AEntry(-1), AEntry(9), AEntry(9)}); + AView a = from_std_vec("view-view-case-3-a", {AEntry(-1), AEntry(9), AEntry(9)}); BView b = from_std_vec("view-view-case-3-b", {0, 2, 7}); // 0: {} // 1: {0} @@ -239,8 +233,7 @@ std::tuple view_view_case_4() { // 6 | 1 1 0 // 6 | 1 1 0 AView a = from_std_vec("view-view-case-4-a", {1, 6, 6}); - BView b = - from_std_vec("view-view-case-4-b", {BEntry(-3), BEntry(-1), 7}); + BView b = from_std_vec("view-view-case-4-b", {BEntry(-3), BEntry(-1), 7}); // 0: {} // 1: {1} // 2: {1,1} @@ -261,10 +254,8 @@ std::tuple view_view_case_5() { // -3 | 0 0 0 // -2 | 0 0 0 // 2 | 1 1 1 - AView a = from_std_vec("view-view-case-5-a", - {AEntry{-3}, AEntry{-2}, AEntry{2}}); - BView b = from_std_vec("view-view-case-5-b", - {BEntry{-2}, BEntry{0}, BEntry{1}}); + AView a = from_std_vec("view-view-case-5-a", {AEntry{-3}, AEntry{-2}, AEntry{2}}); + BView b = from_std_vec("view-view-case-5-b", {BEntry{-2}, BEntry{0}, BEntry{1}}); // 0: {} // 1: {0} // 2: {0,0} @@ -291,8 +282,7 @@ void view_view_full_full() { for (size_t diagonal = 0; diagonal < a.size() + b.size() - 1; ++diagonal) { MMD mmd(a, b, diagonal); // every matrix entry on this diagonal is 0 - expect_mmd_entries( - mmd, std::vector(mmd.size(), mmd_value_type(0))); + expect_mmd_entries(mmd, std::vector(mmd.size(), mmd_value_type(0))); } } { @@ -300,8 +290,7 @@ void view_view_full_full() { for (size_t diagonal = 0; diagonal < a.size() + b.size() - 1; ++diagonal) { MMD mmd(a, b, diagonal); // every matrix entry on this diagonal is 0 - expect_mmd_entries( - mmd, std::vector(mmd.size(), mmd_value_type(1))); + expect_mmd_entries(mmd, std::vector(mmd.size(), mmd_value_type(1))); } } { @@ -490,8 +479,7 @@ void view_iota_full_full() { for (size_t diagonal = 0; diagonal < a.size() + b.size() - 1; ++diagonal) { MMD mmd(a, b, diagonal); // every matrix entry on this diagonal is 0 - expect_mmd_entries( - mmd, std::vector(mmd.size(), mmd_value_type(0))); + expect_mmd_entries(mmd, std::vector(mmd.size(), mmd_value_type(0))); } } { @@ -499,8 +487,7 @@ void view_iota_full_full() { for (size_t diagonal = 0; diagonal < a.size() + b.size() - 1; ++diagonal) { MMD mmd(a, b, diagonal); // every matrix entry on this diagonal is 1 - expect_mmd_entries( - mmd, std::vector(mmd.size(), mmd_value_type(1))); + expect_mmd_entries(mmd, std::vector(mmd.size(), mmd_value_type(1))); } } { @@ -530,16 +517,14 @@ void test_rank() { using AView = Kokkos::View; using BView = Kokkos::View; using MMD = KokkosSparse::Impl::MergeMatrixDiagonal; - static_assert(MMD::rank == 1, - "MergeMatrixDiagonal should look like a rank-1 view"); + static_assert(MMD::rank == 1, "MergeMatrixDiagonal should look like a rank-1 view"); } { using AView = Kokkos::View; using BView = KokkosKernels::Impl::Iota; using MMD = KokkosSparse::Impl::MergeMatrixDiagonal; - static_assert(MMD::rank == 1, - "MergeMatrixDiagonal should look like a rank-1 view"); + static_assert(MMD::rank == 1, "MergeMatrixDiagonal should look like a rank-1 view"); } } diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_SortCrs.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_SortCrs.hpp index c06509b3ecf1..6898b8aa9d9d 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_SortCrs.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_SortCrs.hpp @@ -42,29 +42,23 @@ enum : int { } template -void testSortCRS(default_lno_t numRows, default_lno_t numCols, - default_size_type nnz, bool doValues, bool doStructInterface, - int howExecSpecified) { +void testSortCRS(default_lno_t numRows, default_lno_t numCols, default_size_type nnz, bool doValues, + bool doStructInterface, int howExecSpecified) { using scalar_t = default_scalar; using lno_t = default_lno_t; using size_type = default_size_type; using exec_space = typename device_t::execution_space; - using crsMat_t = - KokkosSparse::CrsMatrix; + using crsMat_t = KokkosSparse::CrsMatrix; // Create a random matrix on device // IMPORTANT: kk_generate_sparse_matrix does not sort the rows, if it did this // wouldn't test anything - crsMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix( - numRows, numCols, nnz, 2, numCols / 2); + crsMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix(numRows, numCols, nnz, 2, numCols / 2); auto rowmap = A.graph.row_map; auto entries = A.graph.entries; auto values = A.values; - Kokkos::View rowmapHost("rowmap host", - numRows + 1); - Kokkos::View entriesHost("sorted entries host", - nnz); - Kokkos::View valuesHost("sorted values host", - nnz); + Kokkos::View rowmapHost("rowmap host", numRows + 1); + Kokkos::View entriesHost("sorted entries host", nnz); + Kokkos::View valuesHost("sorted values host", nnz); Kokkos::deep_copy(rowmapHost, rowmap); Kokkos::deep_copy(entriesHost, entries); Kokkos::deep_copy(valuesHost, values); @@ -72,9 +66,7 @@ void testSortCRS(default_lno_t numRows, default_lno_t numCols, ColValue() {} ColValue(lno_t c, scalar_t v) : col(c), val(v) {} bool operator<(const ColValue& rhs) const { return col < rhs.col; } - bool operator==(const ColValue& rhs) const { - return col == rhs.col && val == rhs.val; - } + bool operator==(const ColValue& rhs) const { return col == rhs.col && val == rhs.val; } lno_t col; scalar_t val; }; @@ -82,8 +74,7 @@ void testSortCRS(default_lno_t numRows, default_lno_t numCols, { for (lno_t i = 0; i < numRows; i++) { std::vector rowCopy; - for (size_type j = rowmapHost(i); j < rowmapHost(i + 1); j++) - rowCopy.emplace_back(entriesHost(j), valuesHost(j)); + for (size_type j = rowmapHost(i); j < rowmapHost(i + 1); j++) rowCopy.emplace_back(entriesHost(j), valuesHost(j)); std::sort(rowCopy.begin(), rowCopy.end()); // write sorted row back for (size_t j = 0; j < rowCopy.size(); j++) { @@ -96,63 +87,45 @@ void testSortCRS(default_lno_t numRows, default_lno_t numCols, if (doValues) { if (doStructInterface) { switch (howExecSpecified) { - case SortCrsTest::Instance: - KokkosSparse::sort_crs_matrix(exec_space(), A); - break; - case SortCrsTest::ExplicitType: - throw std::logic_error("Should not get here"); + case SortCrsTest::Instance: KokkosSparse::sort_crs_matrix(exec_space(), A); break; + case SortCrsTest::ExplicitType: throw std::logic_error("Should not get here"); case SortCrsTest::ImplicitType: KokkosSparse::sort_crs_matrix(A); } } else { switch (howExecSpecified) { case SortCrsTest::Instance: - KokkosSparse::sort_crs_matrix(exec_space(), A.graph.row_map, - A.graph.entries, A.values); + KokkosSparse::sort_crs_matrix(exec_space(), A.graph.row_map, A.graph.entries, A.values); break; case SortCrsTest::ExplicitType: - KokkosSparse::sort_crs_matrix(A.graph.row_map, - A.graph.entries, A.values); + KokkosSparse::sort_crs_matrix(A.graph.row_map, A.graph.entries, A.values); break; - case SortCrsTest::ImplicitType: - KokkosSparse::sort_crs_matrix(A.graph.row_map, A.graph.entries, - A.values); + case SortCrsTest::ImplicitType: KokkosSparse::sort_crs_matrix(A.graph.row_map, A.graph.entries, A.values); } } } else { if (doStructInterface) { switch (howExecSpecified) { - case SortCrsTest::Instance: - KokkosSparse::sort_crs_graph(exec_space(), A.graph); - break; - case SortCrsTest::ExplicitType: - throw std::logic_error("Should not get here"); + case SortCrsTest::Instance: KokkosSparse::sort_crs_graph(exec_space(), A.graph); break; + case SortCrsTest::ExplicitType: throw std::logic_error("Should not get here"); case SortCrsTest::ImplicitType: KokkosSparse::sort_crs_graph(A.graph); } } else { switch (howExecSpecified) { - case SortCrsTest::Instance: - KokkosSparse::sort_crs_graph(exec_space(), A.graph.row_map, - A.graph.entries); - break; + case SortCrsTest::Instance: KokkosSparse::sort_crs_graph(exec_space(), A.graph.row_map, A.graph.entries); break; case SortCrsTest::ExplicitType: - KokkosSparse::sort_crs_graph(A.graph.row_map, - A.graph.entries); + KokkosSparse::sort_crs_graph(A.graph.row_map, A.graph.entries); break; - case SortCrsTest::ImplicitType: - KokkosSparse::sort_crs_graph(A.graph.row_map, A.graph.entries); + case SortCrsTest::ImplicitType: KokkosSparse::sort_crs_graph(A.graph.row_map, A.graph.entries); } } } // Copy to host and compare - Kokkos::View entriesOut("sorted entries host", - nnz); - Kokkos::View valuesOut("sorted values host", - nnz); + Kokkos::View entriesOut("sorted entries host", nnz); + Kokkos::View valuesOut("sorted values host", nnz); Kokkos::deep_copy(entriesOut, entries); Kokkos::deep_copy(valuesOut, values); for (size_type i = 0; i < nnz; i++) { - EXPECT_EQ(entriesHost(i), entriesOut(i)) - << "Sorted column indices are wrong!"; + EXPECT_EQ(entriesHost(i), entriesOut(i)) << "Sorted column indices are wrong!"; if (doValues) { EXPECT_EQ(valuesHost(i), valuesOut(i)) << "Sorted values are wrong!"; } @@ -167,23 +140,19 @@ void testSortCRSUnmanaged(bool doValues, bool doStructInterface) { using size_type = default_size_type; using exec_space = typename device_t::execution_space; using crsMat_t = - KokkosSparse::CrsMatrix, - size_type>; - using crsMat_Managed_t = - KokkosSparse::CrsMatrix; - using rowmap_t = typename crsMat_t::row_map_type; - using entries_t = typename crsMat_t::index_type; - using values_t = typename crsMat_t::values_type; - const lno_t numRows = 50; - const lno_t numCols = numRows; - size_type nnz = numRows * 5; + KokkosSparse::CrsMatrix, size_type>; + using crsMat_Managed_t = KokkosSparse::CrsMatrix; + using rowmap_t = typename crsMat_t::row_map_type; + using entries_t = typename crsMat_t::index_type; + using values_t = typename crsMat_t::values_type; + const lno_t numRows = 50; + const lno_t numCols = numRows; + size_type nnz = numRows * 5; // Create a random matrix on device // IMPORTANT: kk_generate_sparse_matrix does not sort the rows, if it did this // wouldn't test anything crsMat_Managed_t A_managed = - KokkosSparse::Impl::kk_generate_sparse_matrix( - numRows, numCols, nnz, 2, numCols / 2); + KokkosSparse::Impl::kk_generate_sparse_matrix(numRows, numCols, nnz, 2, numCols / 2); crsMat_t A(A_managed); auto rowmap = A.graph.row_map; auto entries = A.graph.entries; @@ -192,32 +161,29 @@ void testSortCRSUnmanaged(bool doValues, bool doStructInterface) { if (doStructInterface) { KokkosSparse::sort_crs_matrix(A); } else { - KokkosSparse::sort_crs_matrix( - A.graph.row_map, A.graph.entries, A.values); + KokkosSparse::sort_crs_matrix(A.graph.row_map, A.graph.entries, + A.values); } } else { if (doStructInterface) { KokkosSparse::sort_crs_graph(A.graph); } else { - KokkosSparse::sort_crs_graph( - A.graph.row_map, A.graph.entries); + KokkosSparse::sort_crs_graph(A.graph.row_map, A.graph.entries); } } } template -void testSortAndMerge(bool justGraph, int howExecSpecified, - bool doStructInterface, bool inPlace, int testCase) { +void testSortAndMerge(bool justGraph, int howExecSpecified, bool doStructInterface, bool inPlace, int testCase) { using size_type = default_size_type; using lno_t = default_lno_t; using scalar_t = default_scalar; using exec_space = typename device_t::execution_space; - using crsMat_t = - KokkosSparse::CrsMatrix; - using graph_t = typename crsMat_t::staticcrsgraph_type; - using rowmap_t = typename crsMat_t::row_map_type::non_const_type; - using entries_t = typename crsMat_t::index_type; - using values_t = typename crsMat_t::values_type; + using crsMat_t = KokkosSparse::CrsMatrix; + using graph_t = typename crsMat_t::staticcrsgraph_type; + using rowmap_t = typename crsMat_t::row_map_type::non_const_type; + using entries_t = typename crsMat_t::index_type; + using values_t = typename crsMat_t::values_type; using Kokkos::HostSpace; using Kokkos::MemoryTraits; using Kokkos::Unmanaged; @@ -326,34 +292,25 @@ void testSortAndMerge(bool justGraph, int howExecSpecified, } } size_type nnz = inEntries.size(); - Kokkos::View> hostInRowmap( - inRowmap.data(), inRowmap.size()); - Kokkos::View> hostInEntries( - inEntries.data(), nnz); - Kokkos::View> hostInValues( - inValues.data(), nnz); + Kokkos::View> hostInRowmap(inRowmap.data(), inRowmap.size()); + Kokkos::View> hostInEntries(inEntries.data(), nnz); + Kokkos::View> hostInValues(inValues.data(), nnz); rowmap_t devInRowmap("in rowmap", inRowmap.size()); entries_t devInEntries("in entries", nnz); values_t devInValues("in values", nnz); Kokkos::deep_copy(devInRowmap, hostInRowmap); Kokkos::deep_copy(devInEntries, hostInEntries); Kokkos::deep_copy(devInValues, hostInValues); - crsMat_t input("Input", nrows, ncols, nnz, devInValues, devInRowmap, - devInEntries); + crsMat_t input("Input", nrows, ncols, nnz, devInValues, devInRowmap, devInEntries); crsMat_t output; if (justGraph) { graph_t outputGraph; // Testing sort_and_merge_graph if (doStructInterface) { switch (howExecSpecified) { - case SortCrsTest::Instance: - outputGraph = - KokkosSparse::sort_and_merge_graph(exec_space(), input.graph); - break; - case SortCrsTest::ExplicitType: - throw std::logic_error("Should not get here"); - case SortCrsTest::ImplicitType: - outputGraph = KokkosSparse::sort_and_merge_graph(input.graph); + case SortCrsTest::Instance: outputGraph = KokkosSparse::sort_and_merge_graph(exec_space(), input.graph); break; + case SortCrsTest::ExplicitType: throw std::logic_error("Should not get here"); + case SortCrsTest::ImplicitType: outputGraph = KokkosSparse::sort_and_merge_graph(input.graph); } } else { rowmap_t devOutRowmap; @@ -361,44 +318,35 @@ void testSortAndMerge(bool justGraph, int howExecSpecified, if (inPlace) { // Start out with the output views containing the input, so that // sort/merge is done in-place - devOutRowmap = rowmap_t("devOutRowmap", input.graph.row_map.extent(0)); - devOutEntries = - entries_t("devOutEntries", input.graph.entries.extent(0)); + devOutRowmap = rowmap_t("devOutRowmap", input.graph.row_map.extent(0)); + devOutEntries = entries_t("devOutEntries", input.graph.entries.extent(0)); Kokkos::deep_copy(devOutRowmap, input.graph.row_map); Kokkos::deep_copy(devOutEntries, input.graph.entries); } switch (howExecSpecified) { case SortCrsTest::Instance: { if (inPlace) { - KokkosSparse::sort_and_merge_graph(exec_space(), devOutRowmap, - devOutEntries, devOutRowmap, - devOutEntries); + KokkosSparse::sort_and_merge_graph(exec_space(), devOutRowmap, devOutEntries, devOutRowmap, devOutEntries); } else { - KokkosSparse::sort_and_merge_graph( - exec_space(), input.graph.row_map, input.graph.entries, - devOutRowmap, devOutEntries); + KokkosSparse::sort_and_merge_graph(exec_space(), input.graph.row_map, input.graph.entries, devOutRowmap, + devOutEntries); } break; } case SortCrsTest::ExplicitType: { if (inPlace) { - KokkosSparse::sort_and_merge_graph( - devOutRowmap, devOutEntries, devOutRowmap, devOutEntries); + KokkosSparse::sort_and_merge_graph(devOutRowmap, devOutEntries, devOutRowmap, devOutEntries); } else { - KokkosSparse::sort_and_merge_graph( - input.graph.row_map, input.graph.entries, devOutRowmap, - devOutEntries); + KokkosSparse::sort_and_merge_graph(input.graph.row_map, input.graph.entries, devOutRowmap, + devOutEntries); } break; } case SortCrsTest::ImplicitType: { if (inPlace) { - KokkosSparse::sort_and_merge_graph(devOutRowmap, devOutEntries, - devOutRowmap, devOutEntries); + KokkosSparse::sort_and_merge_graph(devOutRowmap, devOutEntries, devOutRowmap, devOutEntries); } else { - KokkosSparse::sort_and_merge_graph(input.graph.row_map, - input.graph.entries, - devOutRowmap, devOutEntries); + KokkosSparse::sort_and_merge_graph(input.graph.row_map, input.graph.entries, devOutRowmap, devOutEntries); } } } @@ -410,13 +358,9 @@ void testSortAndMerge(bool justGraph, int howExecSpecified, // Testing sort_and_merge_matrix if (doStructInterface) { switch (howExecSpecified) { - case SortCrsTest::Instance: - output = KokkosSparse::sort_and_merge_matrix(exec_space(), input); - break; - case SortCrsTest::ExplicitType: - throw std::logic_error("Should not get here"); - case SortCrsTest::ImplicitType: - output = KokkosSparse::sort_and_merge_matrix(input); + case SortCrsTest::Instance: output = KokkosSparse::sort_and_merge_matrix(exec_space(), input); break; + case SortCrsTest::ExplicitType: throw std::logic_error("Should not get here"); + case SortCrsTest::ImplicitType: output = KokkosSparse::sort_and_merge_matrix(input); } } else { rowmap_t devOutRowmap; @@ -425,10 +369,9 @@ void testSortAndMerge(bool justGraph, int howExecSpecified, if (inPlace) { // Start out with the output views containing the input, so that // sort/merge is done in-place - devOutRowmap = rowmap_t("devOutRowmap", input.graph.row_map.extent(0)); - devOutEntries = - entries_t("devOutEntries", input.graph.entries.extent(0)); - devOutValues = values_t("devOutValues", input.values.extent(0)); + devOutRowmap = rowmap_t("devOutRowmap", input.graph.row_map.extent(0)); + devOutEntries = entries_t("devOutEntries", input.graph.entries.extent(0)); + devOutValues = values_t("devOutValues", input.values.extent(0)); Kokkos::deep_copy(devOutRowmap, input.graph.row_map); Kokkos::deep_copy(devOutEntries, input.graph.entries); Kokkos::deep_copy(devOutValues, input.values); @@ -436,61 +379,50 @@ void testSortAndMerge(bool justGraph, int howExecSpecified, switch (howExecSpecified) { case SortCrsTest::Instance: { if (inPlace) { - KokkosSparse::sort_and_merge_matrix( - exec_space(), devOutRowmap, devOutEntries, devOutValues, - devOutRowmap, devOutEntries, devOutValues); + KokkosSparse::sort_and_merge_matrix(exec_space(), devOutRowmap, devOutEntries, devOutValues, devOutRowmap, + devOutEntries, devOutValues); } else { - KokkosSparse::sort_and_merge_matrix( - exec_space(), input.graph.row_map, input.graph.entries, - input.values, devOutRowmap, devOutEntries, devOutValues); + KokkosSparse::sort_and_merge_matrix(exec_space(), input.graph.row_map, input.graph.entries, input.values, + devOutRowmap, devOutEntries, devOutValues); } break; } case SortCrsTest::ExplicitType: { if (inPlace) { - KokkosSparse::sort_and_merge_matrix( - devOutRowmap, devOutEntries, devOutValues, devOutRowmap, - devOutEntries, devOutValues); + KokkosSparse::sort_and_merge_matrix(devOutRowmap, devOutEntries, devOutValues, devOutRowmap, + devOutEntries, devOutValues); } else { - KokkosSparse::sort_and_merge_matrix( - input.graph.row_map, input.graph.entries, input.values, - devOutRowmap, devOutEntries, devOutValues); + KokkosSparse::sort_and_merge_matrix(input.graph.row_map, input.graph.entries, input.values, + devOutRowmap, devOutEntries, devOutValues); } break; } case SortCrsTest::ImplicitType: { if (inPlace) { - KokkosSparse::sort_and_merge_matrix(devOutRowmap, devOutEntries, - devOutValues, devOutRowmap, - devOutEntries, devOutValues); + KokkosSparse::sort_and_merge_matrix(devOutRowmap, devOutEntries, devOutValues, devOutRowmap, devOutEntries, + devOutValues); } else { - KokkosSparse::sort_and_merge_matrix( - input.graph.row_map, input.graph.entries, input.values, - devOutRowmap, devOutEntries, devOutValues); + KokkosSparse::sort_and_merge_matrix(input.graph.row_map, input.graph.entries, input.values, devOutRowmap, + devOutEntries, devOutValues); } } } // and then construct output from views - output = crsMat_t("Output", nrows, ncols, devOutValues.extent(0), - devOutValues, devOutRowmap, devOutEntries); + output = crsMat_t("Output", nrows, ncols, devOutValues.extent(0), devOutValues, devOutRowmap, devOutEntries); } EXPECT_EQ(output.numRows(), nrows); EXPECT_EQ(output.numCols(), ncols); } - auto outRowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), - output.graph.row_map); - auto outEntries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), - output.graph.entries); - auto outValues = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output.values); + auto outRowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output.graph.row_map); + auto outEntries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output.graph.entries); + auto outValues = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output.values); EXPECT_EQ(goldRowmap.size(), outRowmap.extent(0)); EXPECT_EQ(goldEntries.size(), outEntries.extent(0)); if (!justGraph) { EXPECT_EQ(goldValues.size(), outValues.extent(0)); EXPECT_EQ(goldValues.size(), output.nnz()); } - for (size_t i = 0; i < goldRowmap.size(); i++) - EXPECT_EQ(goldRowmap[i], outRowmap(i)); + for (size_t i = 0; i < goldRowmap.size(); i++) EXPECT_EQ(goldRowmap[i], outRowmap(i)); for (size_t i = 0; i < goldEntries.size(); i++) { EXPECT_EQ(goldEntries[i], outEntries(i)); if (!justGraph) { @@ -504,14 +436,10 @@ TEST_F(TestCategory, common_sort_crsgraph) { for (int howExecSpecified = 0; howExecSpecified < 3; howExecSpecified++) { // If using the struct interface (StaticCrsGraph), cannot use ExplicitType // because the exec space type is determined from the graph. - if (doStructInterface && howExecSpecified == SortCrsTest::ExplicitType) - continue; - testSortCRS(10, 10, 20, false, doStructInterface, - howExecSpecified); - testSortCRS(100, 100, 2000, false, doStructInterface, - howExecSpecified); - testSortCRS(1000, 1000, 30000, false, doStructInterface, - howExecSpecified); + if (doStructInterface && howExecSpecified == SortCrsTest::ExplicitType) continue; + testSortCRS(10, 10, 20, false, doStructInterface, howExecSpecified); + testSortCRS(100, 100, 2000, false, doStructInterface, howExecSpecified); + testSortCRS(1000, 1000, 30000, false, doStructInterface, howExecSpecified); } testSortCRSUnmanaged(false, doStructInterface); } @@ -523,14 +451,10 @@ TEST_F(TestCategory, common_sort_crsmatrix) { for (int howExecSpecified = 0; howExecSpecified < 3; howExecSpecified++) { // If using the struct interface (CrsMatrix), cannot use ExplicitType // because the exec space type is determined from the matrix. - if (doStructInterface && howExecSpecified == SortCrsTest::ExplicitType) - continue; - testSortCRS(10, 10, 20, true, doStructInterface, - howExecSpecified); - testSortCRS(100, 100, 2000, true, doStructInterface, - howExecSpecified); - testSortCRS(1000, 1000, 30000, true, doStructInterface, - howExecSpecified); + if (doStructInterface && howExecSpecified == SortCrsTest::ExplicitType) continue; + testSortCRS(10, 10, 20, true, doStructInterface, howExecSpecified); + testSortCRS(100, 100, 2000, true, doStructInterface, howExecSpecified); + testSortCRS(1000, 1000, 30000, true, doStructInterface, howExecSpecified); } testSortCRSUnmanaged(true, doStructInterface); } @@ -539,24 +463,18 @@ TEST_F(TestCategory, common_sort_crsmatrix) { TEST_F(TestCategory, common_sort_crs_longrows) { // Matrix/graph with one very long row // Just test this once with graph, and once with matrix - testSortCRS(1, 50000, 10000, false, false, - SortCrsTest::ImplicitType); - testSortCRS(1, 50000, 10000, true, false, - SortCrsTest::ImplicitType); + testSortCRS(1, 50000, 10000, false, false, SortCrsTest::ImplicitType); + testSortCRS(1, 50000, 10000, true, false, SortCrsTest::ImplicitType); } TEST_F(TestCategory, common_sort_merge_crsmatrix) { for (int testCase = 0; testCase < 5; testCase++) { - for (int doStructInterface = 0; doStructInterface < 2; - doStructInterface++) { + for (int doStructInterface = 0; doStructInterface < 2; doStructInterface++) { for (int howExecSpecified = 0; howExecSpecified < 3; howExecSpecified++) { for (int inPlace = 0; inPlace < 2; inPlace++) { - if (doStructInterface && - howExecSpecified == SortCrsTest::ExplicitType) - continue; + if (doStructInterface && howExecSpecified == SortCrsTest::ExplicitType) continue; if (doStructInterface && inPlace) continue; - testSortAndMerge(false, howExecSpecified, - doStructInterface, inPlace, testCase); + testSortAndMerge(false, howExecSpecified, doStructInterface, inPlace, testCase); } } } @@ -565,16 +483,12 @@ TEST_F(TestCategory, common_sort_merge_crsmatrix) { TEST_F(TestCategory, common_sort_merge_crsgraph) { for (int testCase = 0; testCase < 5; testCase++) { - for (int doStructInterface = 0; doStructInterface < 2; - doStructInterface++) { + for (int doStructInterface = 0; doStructInterface < 2; doStructInterface++) { for (int howExecSpecified = 0; howExecSpecified < 3; howExecSpecified++) { for (int inPlace = 0; inPlace < 2; inPlace++) { - if (doStructInterface && - howExecSpecified == SortCrsTest::ExplicitType) - continue; + if (doStructInterface && howExecSpecified == SortCrsTest::ExplicitType) continue; if (doStructInterface && inPlace) continue; - testSortAndMerge(true, howExecSpecified, - doStructInterface, inPlace, testCase); + testSortAndMerge(true, howExecSpecified, doStructInterface, inPlace, testCase); } } } diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_TestUtils_RandCsMat.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_TestUtils_RandCsMat.hpp index 029ddd14b09b..f958e2423f4f 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_TestUtils_RandCsMat.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_TestUtils_RandCsMat.hpp @@ -19,12 +19,13 @@ namespace Test { template void doCsMat(size_t m, size_t n, ScalarType min_val, ScalarType max_val) { + using RandCs = RandCsMatrix; + using size_type = typename RandCs::size_type; auto expected_min = ScalarType(1.0); size_t expected_nnz = 0; - RandCsMatrix cm(m, n, min_val, max_val); + RandCs cm(m, n, min_val, max_val); - for (size_t i = 0; i < cm.get_nnz(); ++i) - ASSERT_GE(cm(i), expected_min) << cm.info; + for (size_type i = 0; i < cm.get_nnz(); ++i) ASSERT_GE(cm(i), expected_min) << cm.info; auto map_d = cm.get_map(); auto map = Kokkos::create_mirror_view(map_d); @@ -66,10 +67,8 @@ void doAllCsMat(size_t m, size_t n) { doCsMat(m, n, min, max); // Verify that CsMat can be instantiated with complex types. - RandCsMatrix, Kokkos::LayoutLeft, ExeSpaceType> cmcf( - m, n, min, max); - RandCsMatrix, Kokkos::LayoutRight, ExeSpaceType> cmcd( - m, n, min, max); + RandCsMatrix, Kokkos::LayoutLeft, ExeSpaceType> cmcf(m, n, min, max); + RandCsMatrix, Kokkos::LayoutRight, ExeSpaceType> cmcd(m, n, min, max); } // Test randomly generated Cs matrices @@ -83,4 +82,4 @@ TEST_F(TestCategory, sparse_randcsmat) { doAllCsMat(dim, dim * 3); } } -} // namespace Test \ No newline at end of file +} // namespace Test diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_Transpose.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_Transpose.hpp index 05773b6b752a..da430c6ca4a0 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_Transpose.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_Transpose.hpp @@ -42,83 +42,62 @@ struct ExactCompare { template void testTranspose(int numRows, int numCols, bool doValues) { - using exec_space = typename device_t::execution_space; - using range_pol = Kokkos::RangePolicy; - using scalar_t = default_scalar; - using lno_t = default_lno_t; - using size_type = default_size_type; - using crsMat_t = typename KokkosSparse::CrsMatrix; - using c_rowmap_t = typename crsMat_t::row_map_type; + using exec_space = typename device_t::execution_space; + using range_pol = Kokkos::RangePolicy; + using scalar_t = default_scalar; + using lno_t = default_lno_t; + using size_type = default_size_type; + using crsMat_t = typename KokkosSparse::CrsMatrix; + using c_rowmap_t = typename crsMat_t::row_map_type; using c_entries_t = typename crsMat_t::index_type; using c_values_t = typename crsMat_t::values_type; using rowmap_t = typename crsMat_t::row_map_type::non_const_type; using entries_t = typename crsMat_t::index_type::non_const_type; using values_t = typename crsMat_t::values_type::non_const_type; - size_type nnz = 10 * numRows; + size_type nnz = (numRows * numCols > 0) ? 10 * numRows : 0; // Generate a matrix that has 0 entries in some rows - crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( - numRows, numCols, nnz, 3 * 10, numRows / 2); + crsMat_t input_mat = + KokkosSparse::Impl::kk_generate_sparse_matrix(numRows, numCols, nnz, 3 * 10, numRows / 2); // compute the transpose while unsorted, then transpose again rowmap_t t_rowmap("Rowmap^T", numCols + 1); // this view is initialized to 0 - entries_t t_entries( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries^T"), - input_mat.graph.entries.extent(0)); - values_t t_values(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values^T"), - input_mat.values.extent(0)); + entries_t t_entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries^T"), input_mat.graph.entries.extent(0)); + values_t t_values(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values^T"), input_mat.values.extent(0)); rowmap_t tt_rowmap("Rowmap^T^T", numRows + 1); // this view is initialized to 0 - entries_t tt_entries( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries^T^T"), - input_mat.graph.entries.extent(0)); - values_t tt_values( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values^T"), - input_mat.values.extent(0)); + entries_t tt_entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries^T^T"), + input_mat.graph.entries.extent(0)); + values_t tt_values(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values^T"), input_mat.values.extent(0)); if (doValues) { - KokkosSparse::Impl::transpose_matrix( - numRows, numCols, input_mat.graph.row_map, input_mat.graph.entries, - input_mat.values, t_rowmap, t_entries, t_values); - KokkosSparse::Impl::transpose_matrix( - numCols, numRows, t_rowmap, t_entries, t_values, tt_rowmap, tt_entries, - tt_values); + KokkosSparse::Impl::transpose_matrix(numRows, numCols, input_mat.graph.row_map, input_mat.graph.entries, + input_mat.values, t_rowmap, t_entries, t_values); + KokkosSparse::Impl::transpose_matrix(numCols, numRows, t_rowmap, t_entries, t_values, tt_rowmap, + tt_entries, tt_values); } else { - KokkosSparse::Impl::transpose_graph( - numRows, numCols, input_mat.graph.row_map, input_mat.graph.entries, - t_rowmap, t_entries); - KokkosSparse::Impl::transpose_graph( + KokkosSparse::Impl::transpose_graph( + numRows, numCols, input_mat.graph.row_map, input_mat.graph.entries, t_rowmap, t_entries); + KokkosSparse::Impl::transpose_graph( numCols, numRows, t_rowmap, t_entries, tt_rowmap, tt_entries); } // Sort both the transpose-transpose, and the original matrix (to compare // directly) KokkosSparse::sort_crs_matrix(input_mat); - KokkosSparse::sort_crs_matrix( - tt_rowmap, tt_entries, tt_values); + KokkosSparse::sort_crs_matrix(tt_rowmap, tt_entries, tt_values); // The views should now be exactly identical, since they represent the same // matrix and are sorted size_type rowmapDiffs; - Kokkos::parallel_reduce( - range_pol(0, numRows + 1), - ExactCompare(input_mat.graph.row_map, tt_rowmap), - rowmapDiffs); + Kokkos::parallel_reduce(range_pol(0, numRows + 1), + ExactCompare(input_mat.graph.row_map, tt_rowmap), rowmapDiffs); size_type entriesDiffs; - Kokkos::parallel_reduce( - range_pol(0, input_mat.nnz()), - ExactCompare(input_mat.graph.entries, tt_entries), - entriesDiffs); + Kokkos::parallel_reduce(range_pol(0, input_mat.nnz()), + ExactCompare(input_mat.graph.entries, tt_entries), entriesDiffs); EXPECT_EQ(size_type(0), rowmapDiffs); EXPECT_EQ(size_type(0), entriesDiffs); if (doValues) { size_type valuesDiffs; - Kokkos::parallel_reduce( - range_pol(0, input_mat.nnz()), - ExactCompare(input_mat.values, tt_values), - valuesDiffs); + Kokkos::parallel_reduce(range_pol(0, input_mat.nnz()), + ExactCompare(input_mat.values, tt_values), valuesDiffs); EXPECT_EQ(size_type(0), valuesDiffs); } } @@ -136,24 +115,19 @@ void CompareBsrMatrices(bsrMat_t& A, bsrMat_t& B) { // matrix and are sorted size_type rowmapDiffs; - Kokkos::parallel_reduce( - range_pol(0, A.numRows() + 1), - ExactCompare(A.graph.row_map, B.graph.row_map), - rowmapDiffs); + Kokkos::parallel_reduce(range_pol(0, A.numRows() + 1), + ExactCompare(A.graph.row_map, B.graph.row_map), rowmapDiffs); size_type entriesDiffs; - Kokkos::parallel_reduce( - range_pol(0, A.nnz()), - ExactCompare(A.graph.entries, B.graph.entries), - entriesDiffs); + Kokkos::parallel_reduce(range_pol(0, A.nnz()), ExactCompare(A.graph.entries, B.graph.entries), + entriesDiffs); EXPECT_EQ(size_type(0), rowmapDiffs); EXPECT_EQ(size_type(0), entriesDiffs); size_type valuesDiffs; Kokkos::parallel_reduce(range_pol(0, A.nnz() * A.blockDim() * A.blockDim()), - ExactCompare(A.values, B.values), - valuesDiffs); + ExactCompare(A.values, B.values), valuesDiffs); EXPECT_EQ(size_type(0), valuesDiffs); } @@ -162,9 +136,7 @@ void testTransposeBsrRef() { using scalar_t = default_scalar; using lno_t = default_lno_t; using size_type = default_size_type; - using bsrMat_t = - typename KokkosSparse::Experimental::BsrMatrix; + using bsrMat_t = typename KokkosSparse::Experimental::BsrMatrix; using rowmap_t = typename bsrMat_t::row_map_type::non_const_type; using entries_t = typename bsrMat_t::index_type::non_const_type; using values_t = typename bsrMat_t::values_type::non_const_type; @@ -182,22 +154,18 @@ void testTransposeBsrRef() { const size_type row_mapPtr[] = {0, 2, 3, 5, 7}; const lno_t entriesPtr[] = {2, 3, 1, 0, 1, 1, 3}; - const scalar_t valuesPtr[] = { - 0.0, 0.1, 0.2, 0.3, 1.0, 1.1, 1.2, 1.3, 2.0, 2.1, 2.2, 2.3, 3.0, 3.1, - 3.2, 3.3, 4.0, 4.1, 4.2, 4.3, 5.0, 5.1, 5.2, 5.3, 6.0, 6.1, 6.2, 6.3}; + const scalar_t valuesPtr[] = {0.0, 0.1, 0.2, 0.3, 1.0, 1.1, 1.2, 1.3, 2.0, 2.1, 2.2, 2.3, 3.0, 3.1, + 3.2, 3.3, 4.0, 4.1, 4.2, 4.3, 5.0, 5.1, 5.2, 5.3, 6.0, 6.1, 6.2, 6.3}; - typename rowmap_t::HostMirror::const_type row_map_h(row_mapPtr, - numRows + 1); + typename rowmap_t::HostMirror::const_type row_map_h(row_mapPtr, numRows + 1); typename entries_t::HostMirror::const_type entries_h(entriesPtr, nnz); - typename values_t::HostMirror::const_type values_h( - valuesPtr, nnz * block_size * block_size); + typename values_t::HostMirror::const_type values_h(valuesPtr, nnz * block_size * block_size); Kokkos::deep_copy(row_map, row_map_h); Kokkos::deep_copy(entries, entries_h); Kokkos::deep_copy(values, values_h); - A = bsrMat_t("A", numRows, numRows, nnz, values, row_map, entries, - block_size); + A = bsrMat_t("A", numRows, numRows, nnz, values, row_map, entries, block_size); } // Constructing the transpose of A manually @@ -209,22 +177,18 @@ void testTransposeBsrRef() { const size_type row_mapPtr[] = {0, 1, 4, 5, 7}; const lno_t entriesPtr[] = {2, 1, 2, 3, 0, 0, 3}; - const scalar_t valuesPtr[] = { - 3.0, 3.2, 3.1, 3.3, 2.0, 2.2, 2.1, 2.3, 4.0, 4.2, 4.1, 4.3, 5.0, 5.2, - 5.1, 5.3, 0.0, 0.2, 0.1, 0.3, 1.0, 1.2, 1.1, 1.3, 6.0, 6.2, 6.1, 6.3}; + const scalar_t valuesPtr[] = {3.0, 3.2, 3.1, 3.3, 2.0, 2.2, 2.1, 2.3, 4.0, 4.2, 4.1, 4.3, 5.0, 5.2, + 5.1, 5.3, 0.0, 0.2, 0.1, 0.3, 1.0, 1.2, 1.1, 1.3, 6.0, 6.2, 6.1, 6.3}; - typename rowmap_t::HostMirror::const_type row_map_h(row_mapPtr, - numRows + 1); + typename rowmap_t::HostMirror::const_type row_map_h(row_mapPtr, numRows + 1); typename entries_t::HostMirror::const_type entries_h(entriesPtr, nnz); - typename values_t::HostMirror::const_type values_h( - valuesPtr, nnz * block_size * block_size); + typename values_t::HostMirror::const_type values_h(valuesPtr, nnz * block_size * block_size); Kokkos::deep_copy(row_map, row_map_h); Kokkos::deep_copy(entries, entries_h); Kokkos::deep_copy(values, values_h); - At_ref = bsrMat_t("A", numRows, numRows, nnz, values, row_map, entries, - block_size); + At_ref = bsrMat_t("A", numRows, numRows, nnz, values, row_map, entries, block_size); } bsrMat_t At = KokkosSparse::Impl::transpose_bsr_matrix(A); @@ -235,13 +199,11 @@ void testTransposeBsrRef() { template void testTransposeBsr(int numRows, int numCols, int blockSize) { - using scalar_t = default_scalar; - using lno_t = default_lno_t; - using size_type = default_size_type; - using exec_space = typename device_t::execution_space; - using bsrMat_t = - typename KokkosSparse::Experimental::BsrMatrix; + using scalar_t = default_scalar; + using lno_t = default_lno_t; + using size_type = default_size_type; + using exec_space = typename device_t::execution_space; + using bsrMat_t = typename KokkosSparse::Experimental::BsrMatrix; using c_rowmap_t = typename bsrMat_t::row_map_type; using c_entries_t = typename bsrMat_t::index_type; using c_values_t = typename bsrMat_t::values_type; @@ -250,38 +212,26 @@ void testTransposeBsr(int numRows, int numCols, int blockSize) { using values_t = typename bsrMat_t::values_type::non_const_type; // Generate a matrix that has 0 entries in some rows - size_type nnz = 10 * numRows; - bsrMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix( - blockSize, numRows, numCols, nnz, 3, numRows / 4); + size_type nnz = (numRows * numCols > 0) ? 10 * numRows : 0; + bsrMat_t A = + KokkosSparse::Impl::kk_generate_sparse_matrix(blockSize, numRows, numCols, nnz, 3, numRows / 4); // compute the transpose while unsorted, then transpose again rowmap_t t_rowmap("Rowmap^T", numCols + 1); // this view is initialized to 0 - entries_t t_entries( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries^T"), - A.graph.entries.extent(0)); - values_t t_values(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values^T"), - A.values.extent(0)); + entries_t t_entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries^T"), A.graph.entries.extent(0)); + values_t t_values(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values^T"), A.values.extent(0)); rowmap_t tt_rowmap("Rowmap^T^T", numRows + 1); // this view is initialized to 0 - entries_t tt_entries( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries^T^T"), - A.graph.entries.extent(0)); - values_t tt_values( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values^T"), - A.values.extent(0)); - - KokkosSparse::Impl::transpose_bsr_matrix( - numRows, numCols, blockSize, A.graph.row_map, A.graph.entries, A.values, - t_rowmap, t_entries, t_values); - - KokkosSparse::Impl::transpose_bsr_matrix< - rowmap_t, entries_t, values_t, rowmap_t, entries_t, values_t, exec_space>( - numCols, numRows, blockSize, t_rowmap, t_entries, t_values, tt_rowmap, - tt_entries, tt_values); - bsrMat_t Att("Att", numRows, numCols, nnz, tt_values, tt_rowmap, tt_entries, - blockSize); + entries_t tt_entries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries^T^T"), A.graph.entries.extent(0)); + values_t tt_values(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values^T"), A.values.extent(0)); + + KokkosSparse::Impl::transpose_bsr_matrix(numRows, numCols, blockSize, A.graph.row_map, A.graph.entries, + A.values, t_rowmap, t_entries, t_values); + + KokkosSparse::Impl::transpose_bsr_matrix( + numCols, numRows, blockSize, t_rowmap, t_entries, t_values, tt_rowmap, tt_entries, tt_values); + bsrMat_t Att("Att", numRows, numCols, nnz, tt_values, tt_rowmap, tt_entries, blockSize); // Sort both the transpose-transpose, and the original matrix (to compare // directly) @@ -294,6 +244,9 @@ void testTransposeBsr(int numRows, int numCols, int blockSize) { TEST_F(TestCategory, sparse_transpose_matrix) { // Test both matrix and graph transpose with various sizes + testTranspose(0, 0, true); + testTranspose(100, 0, true); + testTranspose(0, 100, true); testTranspose(100, 100, true); testTranspose(500, 50, true); testTranspose(50, 500, true); @@ -303,6 +256,9 @@ TEST_F(TestCategory, sparse_transpose_matrix) { } TEST_F(TestCategory, sparse_transpose_graph) { + testTranspose(0, 0, false); + testTranspose(100, 0, false); + testTranspose(0, 100, false); testTranspose(100, 100, false); testTranspose(500, 50, false); testTranspose(50, 500, false); @@ -314,6 +270,9 @@ TEST_F(TestCategory, sparse_transpose_graph) { TEST_F(TestCategory, sparse_transpose_bsr_matrix) { testTransposeBsrRef(); // Test bsrMatrix transpose with various sizes + testTransposeBsr(0, 0, 5); + testTransposeBsr(100, 0, 5); + testTransposeBsr(0, 100, 5); testTransposeBsr(100, 100, 3); testTransposeBsr(500, 50, 5); testTransposeBsr(50, 500, 16); diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_Utils.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_Utils.hpp index cbd81e9b08f9..870aedae7c88 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_Utils.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_Utils.hpp @@ -24,16 +24,14 @@ namespace Test { template vector_t create_random_y_vector(crsMat_t crsMat, vector_t x_vector) { - vector_t y_vector(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Y VECTOR"), - crsMat.numRows()); + vector_t y_vector(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Y VECTOR"), crsMat.numRows()); KokkosSparse::spmv("N", 1, crsMat, x_vector, 0, y_vector); return y_vector; } template vector_t create_random_y_vector_mv(crsMat_t crsMat, vector_t x_vector) { - vector_t y_vector(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Y VECTOR"), - crsMat.numRows(), x_vector.extent(1)); + vector_t y_vector(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Y VECTOR"), crsMat.numRows(), x_vector.extent(1)); KokkosSparse::spmv("N", 1, crsMat, x_vector, 0, y_vector); return y_vector; } @@ -57,18 +55,15 @@ bool is_same_matrix(crsMat_t output_mat_actual, crsMat_t output_mat_reference) { if (nrows_actual != nrows_reference || ncols_actual != ncols_reference) { std::cout << "dimensions (actual):" << nrows_actual << 'x' << ncols_actual - << ", dimensions (reference): " << nrows_reference << 'x' - << ncols_reference << '\n'; + << ", dimensions (reference): " << nrows_reference << 'x' << ncols_reference << '\n'; return false; } if (nentries_actual != nentries_reference) { - std::cout << "nentries_actual:" << nentries_actual - << " nentries_reference:" << nentries_reference << std::endl; + std::cout << "nentries_actual:" << nentries_actual << " nentries_reference:" << nentries_reference << std::endl; return false; } if (nvals_actual != nvals_reference) { - std::cout << "nvals_actual:" << nvals_actual - << " nvals_reference:" << nvals_reference << std::endl; + std::cout << "nvals_actual:" << nvals_actual << " nvals_reference:" << nvals_reference << std::endl; return false; } @@ -77,10 +72,8 @@ bool is_same_matrix(crsMat_t output_mat_actual, crsMat_t output_mat_reference) { // Treat these as equivalent. bool zero_row_equivalent = false; if (nrows_reference == 0) { - auto rm1 = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), output_mat_actual.graph.row_map); - auto rm2 = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), output_mat_reference.graph.row_map); + auto rm1 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_mat_actual.graph.row_map); + auto rm2 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_mat_reference.graph.row_map); if (rm1.extent_int(0) == 0 && rm2.extent_int(0) == 1) { // Make sure the one element of rm2 is 0 zero_row_equivalent = !rm2(0); @@ -90,10 +83,10 @@ bool is_same_matrix(crsMat_t output_mat_actual, crsMat_t output_mat_reference) { } } if (!zero_row_equivalent) { - is_identical = KokkosKernels::Impl::kk_is_identical_view< - typename graph_t::row_map_type, typename graph_t::row_map_type, - typename lno_view_t::value_type, typename device::execution_space>( - output_mat_actual.graph.row_map, output_mat_reference.graph.row_map, 0); + is_identical = + KokkosKernels::Impl::kk_is_identical_view( + output_mat_actual.graph.row_map, output_mat_reference.graph.row_map, 0); } if (!is_identical) { @@ -101,15 +94,14 @@ bool is_same_matrix(crsMat_t output_mat_actual, crsMat_t output_mat_reference) { std::cout << "Actual rowmap:\n"; KokkosKernels::Impl::kk_print_1Dview(output_mat_actual.graph.row_map, true); std::cout << "Correct rowmap:\n"; - KokkosKernels::Impl::kk_print_1Dview(output_mat_reference.graph.row_map, - true); + KokkosKernels::Impl::kk_print_1Dview(output_mat_reference.graph.row_map, true); return false; } - is_identical = KokkosKernels::Impl::kk_is_identical_view< - lno_nnz_view_t, lno_nnz_view_t, typename lno_nnz_view_t::value_type, - typename device::execution_space>(output_mat_actual.graph.entries, - output_mat_reference.graph.entries, 0); + is_identical = + KokkosKernels::Impl::kk_is_identical_view( + output_mat_actual.graph.entries, output_mat_reference.graph.entries, 0); if (!is_identical) { std::cout << "entries are different." << std::endl; @@ -118,12 +110,11 @@ bool is_same_matrix(crsMat_t output_mat_actual, crsMat_t output_mat_reference) { return false; } - typedef typename Kokkos::ArithTraits< - typename scalar_view_t::non_const_value_type>::mag_type eps_type; + typedef typename Kokkos::ArithTraits::mag_type eps_type; eps_type eps = std::is_same::value ? 3.7e-3 : 1e-7; - is_identical = KokkosKernels::Impl::kk_is_relatively_identical_view< - scalar_view_t, scalar_view_t, eps_type, typename device::execution_space>( + is_identical = KokkosKernels::Impl::kk_is_relatively_identical_view( output_mat_actual.values, output_mat_reference.values, eps); if (!is_identical) { diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_block_gauss_seidel.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_block_gauss_seidel.hpp index 11830e02244f..80c23356ce4e 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_block_gauss_seidel.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_block_gauss_seidel.hpp @@ -55,27 +55,23 @@ struct GSTestParams { // Note: GS_DEFAULT is same as GS_TEAM and - for blocks - as GS_PERMUTED // Note: GS_TWOSTAGE and GS_CLUSTER are not supported for blocks - std::vector gs_algorithms = { - KokkosSparse::GS_DEFAULT}; - std::vector shmem_sizes = { + std::vector gs_algorithms = {KokkosSparse::GS_DEFAULT}; + std::vector shmem_sizes = { 32128, 2008 // make the shmem small on gpus so that it will test 2 level // algorithm. }; - std::vector apply_types = {symmetric, forward_sweep, - backward_sweep}; + std::vector apply_types = {symmetric, forward_sweep, backward_sweep}; GSTestParams() = default; }; template int run_block_gauss_seidel_1( - mtx_t input_mat, int block_size, KokkosSparse::GSAlgorithm gs_algorithm, - vector_t x_vector, const_vector_t y_vector, bool is_symmetric_graph, - GSApplyType apply_type = Test::symmetric, bool skip_symbolic = false, + mtx_t input_mat, int block_size, KokkosSparse::GSAlgorithm gs_algorithm, vector_t x_vector, const_vector_t y_vector, + bool is_symmetric_graph, GSApplyType apply_type = Test::symmetric, bool skip_symbolic = false, bool skip_numeric = false, size_t shmem_size = 32128, - typename mtx_t::value_type omega = - Kokkos::ArithTraits::one()) { + typename mtx_t::value_type omega = Kokkos::ArithTraits::one()) { typedef typename mtx_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type lno_view_t; typedef typename graph_t::entries_type lno_nnz_view_t; @@ -87,9 +83,9 @@ int run_block_gauss_seidel_1( constexpr auto format = KokkosSparse::Impl::MatrixTraits::format; - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename mtx_t::execution_space, - typename mtx_t::memory_space, typename mtx_t::memory_space>; + using KernelHandle = + KokkosKernels::Experimental::KokkosKernelsHandle; KernelHandle kh; kh.set_team_work_size(16); kh.set_shmem_size(shmem_size); @@ -101,36 +97,31 @@ int run_block_gauss_seidel_1( const int apply_count = 100; if (!skip_symbolic) { - KSExp::block_gauss_seidel_symbolic( - &kh, num_rows_1, num_cols_1, block_size, input_mat.graph.row_map, - input_mat.graph.entries, is_symmetric_graph); + KSExp::block_gauss_seidel_symbolic(&kh, num_rows_1, num_cols_1, block_size, input_mat.graph.row_map, + input_mat.graph.entries, is_symmetric_graph); } if (!skip_numeric) { - KSExp::block_gauss_seidel_numeric( - &kh, num_rows_1, num_cols_1, block_size, input_mat.graph.row_map, - input_mat.graph.entries, input_mat.values, is_symmetric_graph); + KSExp::block_gauss_seidel_numeric(&kh, num_rows_1, num_cols_1, block_size, input_mat.graph.row_map, + input_mat.graph.entries, input_mat.values, is_symmetric_graph); } switch (apply_type) { case Test::forward_sweep: KSExp::forward_sweep_block_gauss_seidel_apply( - &kh, num_rows_1, num_cols_1, block_size, input_mat.graph.row_map, - input_mat.graph.entries, input_mat.values, x_vector, y_vector, false, - true, omega, apply_count); + &kh, num_rows_1, num_cols_1, block_size, input_mat.graph.row_map, input_mat.graph.entries, input_mat.values, + x_vector, y_vector, false, true, omega, apply_count); break; case Test::backward_sweep: KSExp::backward_sweep_block_gauss_seidel_apply( - &kh, num_rows_1, num_cols_1, block_size, input_mat.graph.row_map, - input_mat.graph.entries, input_mat.values, x_vector, y_vector, false, - true, omega, apply_count); + &kh, num_rows_1, num_cols_1, block_size, input_mat.graph.row_map, input_mat.graph.entries, input_mat.values, + x_vector, y_vector, false, true, omega, apply_count); break; case Test::symmetric: default: KSExp::symmetric_block_gauss_seidel_apply( - &kh, num_rows_1, num_cols_1, block_size, input_mat.graph.row_map, - input_mat.graph.entries, input_mat.values, x_vector, y_vector, false, - true, omega, apply_count); + &kh, num_rows_1, num_cols_1, block_size, input_mat.graph.row_map, input_mat.graph.entries, input_mat.values, + x_vector, y_vector, false, true, omega, apply_count); break; } @@ -140,22 +131,18 @@ int run_block_gauss_seidel_1( } // namespace Test -template -void test_block_gauss_seidel_rank1(lno_t numRows, size_type nnz, - lno_t bandwidth, lno_t row_size_variance) { +template +void test_block_gauss_seidel_rank1(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t row_size_variance) { using namespace Test; srand(245); - using crsMat_t = typename KokkosSparse::CrsMatrix; + using crsMat_t = typename KokkosSparse::CrsMatrix; using MatrixConverter = KokkosSparse::Impl::MatrixConverter; typedef typename device::execution_space exec_space; typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename crsMat_t::values_type::non_const_type scalar_view_t; - typedef typename crsMat_t::StaticCrsGraphType::row_map_type::non_const_type - lno_view_t; - typedef typename crsMat_t::StaticCrsGraphType::entries_type::non_const_type - lno_nnz_view_t; + typedef typename crsMat_t::StaticCrsGraphType::row_map_type::non_const_type lno_view_t; + typedef typename crsMat_t::StaticCrsGraphType::entries_type::non_const_type lno_nnz_view_t; typedef typename Kokkos::ArithTraits::mag_type mag_t; lno_t numCols = numRows; @@ -163,9 +150,8 @@ void test_block_gauss_seidel_rank1(lno_t numRows, size_type nnz, const GSTestParams params; lno_t block_size = params.block_size; - crsMat_t crsmat = - KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< - crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); + crsMat_t crsmat = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( + numRows, numCols, nnz, row_size_variance, bandwidth); lno_view_t pf_rm; lno_nnz_view_t pf_e; @@ -175,20 +161,18 @@ void test_block_gauss_seidel_rank1(lno_t numRows, size_type nnz, // this makes consecutive 5 rows to have same columns. // it will add scalar 0's for those entries that does not exists. // the result is still a point crs matrix. - KokkosSparse::Impl::kk_create_bsr_formated_point_crsmatrix( - block_size, crsmat.numRows(), crsmat.numCols(), crsmat.graph.row_map, - crsmat.graph.entries, crsmat.values, out_r, out_c, pf_rm, pf_e, pf_v); + KokkosSparse::Impl::kk_create_bsr_formated_point_crsmatrix(block_size, crsmat.numRows(), crsmat.numCols(), + crsmat.graph.row_map, crsmat.graph.entries, crsmat.values, + out_r, out_c, pf_rm, pf_e, pf_v); graph_t static_graph2(pf_e, pf_rm); crsMat_t crsmat2("CrsMatrix2", out_c, pf_v, static_graph2); // this converts the previous generated matrix to block matrix. - auto input_mat = - MatrixConverter::from_bsr_formated_point_crsmatrix(crsmat2, block_size); + auto input_mat = MatrixConverter::from_bsr_formated_point_crsmatrix(crsmat2, block_size); lno_t nv = ((crsmat2.numRows() + block_size - 1) / block_size) * block_size; - const scalar_view_t solution_x( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "X"), nv); + const scalar_view_t solution_x(Kokkos::view_alloc(Kokkos::WithoutInitializing, "X"), nv); // create_random_x_vector operates on host mirror, then copies to device. But // create_y does everything on device. create_random_x_vector(solution_x); @@ -208,10 +192,8 @@ void test_block_gauss_seidel_rank1(lno_t numRows, size_type nnz, for (const auto skip_numeric : {false, true}) { Kokkos::Timer timer1; // int res = - run_block_gauss_seidel_1(input_mat, block_size, gs_algorithm, - x_vector, y_vector, is_symmetric_graph, - apply_type, skip_symbolic, skip_numeric, - shmem_size, params.omega); + run_block_gauss_seidel_1(input_mat, block_size, gs_algorithm, x_vector, y_vector, is_symmetric_graph, + apply_type, skip_symbolic, skip_numeric, shmem_size, params.omega); // double gs = timer1.seconds(); // KokkosKernels::Impl::print_1Dview(x_vector); KokkosBlas::axpby(alpha, solution_x, -alpha, x_vector); @@ -225,23 +207,19 @@ void test_block_gauss_seidel_rank1(lno_t numRows, size_type nnz, // device::execution_space::finalize(); } -template -void test_block_gauss_seidel_rank2(lno_t numRows, size_type nnz, - lno_t bandwidth, lno_t row_size_variance) { +template +void test_block_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t row_size_variance) { using namespace Test; srand(245); - using crsMat_t = typename KokkosSparse::CrsMatrix; + using crsMat_t = typename KokkosSparse::CrsMatrix; using MatrixConverter = KokkosSparse::Impl::MatrixConverter; typedef typename device::execution_space exec_space; typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename crsMat_t::values_type::non_const_type scalar_view_t; - typedef typename crsMat_t::StaticCrsGraphType::row_map_type::non_const_type - lno_view_t; - typedef typename crsMat_t::StaticCrsGraphType::entries_type::non_const_type - lno_nnz_view_t; + typedef typename crsMat_t::StaticCrsGraphType::row_map_type::non_const_type lno_view_t; + typedef typename crsMat_t::StaticCrsGraphType::entries_type::non_const_type lno_nnz_view_t; typedef Kokkos::View scalar_view2d_t; typedef typename Kokkos::ArithTraits::mag_type mag_t; @@ -250,9 +228,8 @@ void test_block_gauss_seidel_rank2(lno_t numRows, size_type nnz, const GSTestParams params; lno_t block_size = params.block_size; - crsMat_t crsmat = - KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< - crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); + crsMat_t crsmat = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( + numRows, numCols, nnz, row_size_variance, bandwidth); lno_view_t pf_rm; lno_nnz_view_t pf_e; @@ -262,26 +239,23 @@ void test_block_gauss_seidel_rank2(lno_t numRows, size_type nnz, // this makes consecutive 5 rows to have same columns. // it will add scalar 0's for those entries that does not exists. // the result is still a point crs matrix. - KokkosSparse::Impl::kk_create_bsr_formated_point_crsmatrix( - block_size, crsmat.numRows(), crsmat.numCols(), crsmat.graph.row_map, - crsmat.graph.entries, crsmat.values, out_r, out_c, pf_rm, pf_e, pf_v); + KokkosSparse::Impl::kk_create_bsr_formated_point_crsmatrix(block_size, crsmat.numRows(), crsmat.numCols(), + crsmat.graph.row_map, crsmat.graph.entries, crsmat.values, + out_r, out_c, pf_rm, pf_e, pf_v); graph_t static_graph2(pf_e, pf_rm); crsMat_t crsmat2("CrsMatrix2", out_c, pf_v, static_graph2); - auto input_mat = - MatrixConverter::from_bsr_formated_point_crsmatrix(crsmat2, block_size); + auto input_mat = MatrixConverter::from_bsr_formated_point_crsmatrix(crsmat2, block_size); lno_t nv = ((crsmat2.numRows() + block_size - 1) / block_size) * block_size; const lno_t numVecs = params.numVecs; - scalar_view2d_t solution_x( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "X"), nv, params.numVecs); + scalar_view2d_t solution_x(Kokkos::view_alloc(Kokkos::WithoutInitializing, "X"), nv, params.numVecs); create_random_x_vector(solution_x); scalar_view2d_t y_vector = create_random_y_vector_mv(crsmat2, solution_x); exec_space().fence(); - auto solution_host = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), solution_x); + auto solution_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), solution_x); // Need to fence before reading from solution_host std::vector initial_norms(numVecs); for (lno_t i = 0; i < numVecs; i++) { @@ -289,8 +263,7 @@ void test_block_gauss_seidel_rank2(lno_t numRows, size_type nnz, for (lno_t j = 0; j < nv; j++) { sum += solution_host(j, i) * solution_host(j, i); } - initial_norms[i] = Kokkos::ArithTraits::sqrt( - Kokkos::ArithTraits::abs(sum)); + initial_norms[i] = Kokkos::ArithTraits::sqrt(Kokkos::ArithTraits::abs(sum)); } for (const auto gs_algorithm : params.gs_algorithms) { @@ -308,10 +281,8 @@ void test_block_gauss_seidel_rank2(lno_t numRows, size_type nnz, for (const auto skip_numeric : {false, true}) { Kokkos::Timer timer1; // int res = - run_block_gauss_seidel_1(input_mat, block_size, gs_algorithm, - x_vector, y_vector, is_symmetric_graph, - apply_type, skip_symbolic, skip_numeric, - shmem_size, params.omega); + run_block_gauss_seidel_1(input_mat, block_size, gs_algorithm, x_vector, y_vector, is_symmetric_graph, + apply_type, skip_symbolic, skip_numeric, shmem_size, params.omega); // double gs = timer1.seconds(); // KokkosKernels::Impl::print_1Dview(x_vector); Kokkos::deep_copy(x_host, x_vector); @@ -322,8 +293,7 @@ void test_block_gauss_seidel_rank2(lno_t numRows, size_type nnz, scalar_t diff = x_host(r, c) - solution_host(r, c); sum += diff * diff; } - mag_t result_res = Kokkos::ArithTraits::sqrt( - Kokkos::ArithTraits::abs(sum)); + mag_t result_res = Kokkos::ArithTraits::sqrt(Kokkos::ArithTraits::abs(sum)); EXPECT_LT(result_res, params.tolerance * initial_norms[c]); } } @@ -334,20 +304,18 @@ void test_block_gauss_seidel_rank2(lno_t numRows, size_type nnz, // device::execution_space::finalize(); } -template +template void test_block_gauss_seidel_empty() { using namespace Test; - typedef - typename KokkosSparse::CrsMatrix - crsMat_t; + typedef typename KokkosSparse::CrsMatrix crsMat_t; typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type::non_const_type row_map_type; typedef typename graph_t::entries_type::non_const_type entries_type; typedef typename crsMat_t::values_type::non_const_type scalar_view_t; - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space>; + using KernelHandle = + KokkosKernels::Experimental::KokkosKernelsHandle; // The rowmap of a zero-row matrix can be length 0 or 1, so Gauss-Seidel // should work with both (the setup and apply are essentially no-ops but they // shouldn't crash or throw exceptions) For this test, create size-0 and @@ -363,40 +331,28 @@ void test_block_gauss_seidel_empty() { entries_type entries("Entries", 0); scalar_view_t values("Values", 0); // also, make sure graph symmetrization doesn't crash on zero rows - KSExp::block_gauss_seidel_symbolic(&kh, num_rows, num_rows, block_size, - rowmap, entries, false); - KSExp::block_gauss_seidel_numeric( - &kh, num_rows, num_rows, block_size, rowmap, entries, values, false); + KSExp::block_gauss_seidel_symbolic(&kh, num_rows, num_rows, block_size, rowmap, entries, false); + KSExp::block_gauss_seidel_numeric(&kh, num_rows, num_rows, block_size, rowmap, entries, values, false); scalar_view_t x("X", num_rows); scalar_view_t y("Y", num_rows); scalar_t omega(0.9); - KSExp::symmetric_block_gauss_seidel_apply( - &kh, num_rows, num_rows, block_size, rowmap, entries, values, x, y, - false, true, omega, 3); + KSExp::symmetric_block_gauss_seidel_apply(&kh, num_rows, num_rows, block_size, rowmap, entries, values, + x, y, false, true, omega, 3); kh.destroy_gs_handle(); } } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse_bsr_gauss_seidel_rank1_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_block_gauss_seidel_rank1( \ - 500, 500 * 10, 70, 3); \ - } \ - TEST_F( \ - TestCategory, \ - sparse_bsr_gauss_seidel_rank2_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_block_gauss_seidel_rank2( \ - 500, 500 * 10, 70, 3); \ - } \ - TEST_F( \ - TestCategory, \ - sparse_bsr_gauss_seidel_empty_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_block_gauss_seidel_empty(); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse_bsr_gauss_seidel_rank1_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_block_gauss_seidel_rank1( \ + 500, 500 * 10, 70, 3); \ + } \ + TEST_F(TestCategory, sparse_bsr_gauss_seidel_rank2_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_block_gauss_seidel_rank2( \ + 500, 500 * 10, 70, 3); \ + } \ + TEST_F(TestCategory, sparse_bsr_gauss_seidel_empty_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_block_gauss_seidel_empty(); \ } #include diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_bspgemm.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_bspgemm.hpp index 58a2a18b8a30..32168f1686b4 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_bspgemm.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_bspgemm.hpp @@ -30,18 +30,16 @@ namespace Test { template int run_block_spgemm(const bsrMat_t A, const bsrMat_t B, bsrMat_t &C, // parameters - KokkosSparse::SPGEMMAlgorithm spgemm_algorithm, - bool use_dynamic_scheduling = true, - size_t shmem_size = 0) { + KokkosSparse::SPGEMMAlgorithm spgemm_algorithm, bool use_dynamic_scheduling = true, + size_t shmem_size = 0) { typedef typename bsrMat_t::size_type size_type; typedef typename bsrMat_t::ordinal_type lno_t; typedef typename bsrMat_t::value_type scalar_t; typedef typename bsrMat_t::device_type device; typedef typename bsrMat_t::memory_space memory_space; - typedef KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - memory_space, memory_space> + typedef KokkosKernels::Experimental::KokkosKernelsHandle KernelHandle; KernelHandle kh; @@ -61,8 +59,7 @@ int run_block_spgemm(const bsrMat_t A, const bsrMat_t B, bsrMat_t &C, } template -bool is_same_block_matrix(bsrMat_t output_mat_actual, - bsrMat_t output_mat_reference) { +bool is_same_block_matrix(bsrMat_t output_mat_actual, bsrMat_t output_mat_reference) { using device = typename bsrMat_t::device_type; using graph_t = typename bsrMat_t::StaticCrsGraphType; using lno_view_t = typename graph_t::row_map_type::non_const_type; @@ -78,18 +75,15 @@ bool is_same_block_matrix(bsrMat_t output_mat_actual, size_t nvals_reference = output_mat_reference.values.extent(0); if (nrows_actual != nrows_reference) { - std::cout << "nrows_actual:" << nrows_actual - << " nrows_reference:" << nrows_reference << std::endl; + std::cout << "nrows_actual:" << nrows_actual << " nrows_reference:" << nrows_reference << std::endl; return false; } if (nentries_actual != nentries_reference) { - std::cout << "nentries_actual:" << nentries_actual - << " nentries_reference:" << nentries_reference << std::endl; + std::cout << "nentries_actual:" << nentries_actual << " nentries_reference:" << nentries_reference << std::endl; return false; } if (nvals_actual != nvals_reference) { - std::cout << "nvals_actual:" << nvals_actual - << " nvals_reference:" << nvals_reference << std::endl; + std::cout << "nvals_actual:" << nvals_actual << " nvals_reference:" << nvals_reference << std::endl; return false; } @@ -97,10 +91,10 @@ bool is_same_block_matrix(bsrMat_t output_mat_actual, KokkosSparse::sort_bsr_matrix(output_mat_reference); bool is_identical = true; - is_identical = KokkosKernels::Impl::kk_is_identical_view< - typename graph_t::row_map_type, typename graph_t::row_map_type, - typename lno_view_t::value_type, typename device::execution_space>( - output_mat_actual.graph.row_map, output_mat_reference.graph.row_map, 0); + is_identical = + KokkosKernels::Impl::kk_is_identical_view( + output_mat_actual.graph.row_map, output_mat_reference.graph.row_map, 0); if (!is_identical) { std::cout << "rowmaps are different." << std::endl; @@ -111,10 +105,10 @@ bool is_same_block_matrix(bsrMat_t output_mat_actual, return false; } - is_identical = KokkosKernels::Impl::kk_is_identical_view< - lno_nnz_view_t, lno_nnz_view_t, typename lno_nnz_view_t::value_type, - typename device::execution_space>(output_mat_actual.graph.entries, - output_mat_reference.graph.entries, 0); + is_identical = + KokkosKernels::Impl::kk_is_identical_view( + output_mat_actual.graph.entries, output_mat_reference.graph.entries, 0); if (!is_identical) { std::cout << "entries are different." << std::endl; @@ -123,12 +117,11 @@ bool is_same_block_matrix(bsrMat_t output_mat_actual, return false; } - typedef typename Kokkos::ArithTraits< - typename scalar_view_t::non_const_value_type>::mag_type eps_type; + typedef typename Kokkos::ArithTraits::mag_type eps_type; eps_type eps = std::is_same::value ? 3e-2 : 5e-7; - is_identical = KokkosKernels::Impl::kk_is_relatively_identical_view< - scalar_view_t, scalar_view_t, eps_type, typename device::execution_space>( + is_identical = KokkosKernels::Impl::kk_is_relatively_identical_view( output_mat_actual.values, output_mat_reference.values, eps); if (!is_identical) { @@ -144,18 +137,14 @@ bool is_same_block_matrix(bsrMat_t output_mat_actual, // Generate matrices and test all supported spgemm algorithms. // C := AB, where A is m*k, B is k*n, and C is m*n. -template -void test_bspgemm(lno_t blkDim, lno_t m, lno_t k, lno_t n, size_type nnz, - lno_t bandwidth, lno_t row_size_variance, - const bool use_dynamic_scheduling = true, - const size_t shared_memory_size = 0) { +template +void test_bspgemm(lno_t blkDim, lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, lno_t row_size_variance, + const bool use_dynamic_scheduling = true, const size_t shared_memory_size = 0) { #if defined(KOKKOSKERNELS_ENABLE_TPL_ARMPL) { - std::cerr - << "TEST SKIPPED: See " - "https://github.com/kokkos/kokkos-kernels/issues/1542 for details." - << std::endl; + std::cerr << "TEST SKIPPED: See " + "https://github.com/kokkos/kokkos-kernels/issues/1542 for details." + << std::endl; return; } #endif // KOKKOSKERNELS_ENABLE_TPL_ARMPL @@ -163,31 +152,24 @@ void test_bspgemm(lno_t blkDim, lno_t m, lno_t k, lno_t n, size_type nnz, // device::execution_space::initialize(); // device::execution_space::print_configuration(std::cout); - using bsrMat_t = - KokkosSparse::Experimental::BsrMatrix; + using bsrMat_t = KokkosSparse::Experimental::BsrMatrix; // Generate random compressed sparse row matrix. Randomly generated (non-zero) // values are stored in a 1-D (1 rank) array. - bsrMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix( - blkDim, m, k, nnz, row_size_variance, bandwidth); - bsrMat_t B = KokkosSparse::Impl::kk_generate_sparse_matrix( - blkDim, k, n, nnz, row_size_variance, bandwidth); + bsrMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix(blkDim, m, k, nnz, row_size_variance, bandwidth); + bsrMat_t B = KokkosSparse::Impl::kk_generate_sparse_matrix(blkDim, k, n, nnz, row_size_variance, bandwidth); KokkosSparse::sort_bsr_matrix(A); KokkosSparse::sort_bsr_matrix(B); bsrMat_t output_mat2; - run_block_spgemm(A, B, output_mat2, SPGEMM_DEBUG, use_dynamic_scheduling, - shared_memory_size); + run_block_spgemm(A, B, output_mat2, SPGEMM_DEBUG, use_dynamic_scheduling, shared_memory_size); std::vector algorithms = { - SPGEMM_KK, SPGEMM_KK_MEMORY /* alias SPGEMM_KK_MEMSPEED */, - SPGEMM_KK_SPEED /* alias SPGEMM_KK_DENSE */ + SPGEMM_KK, SPGEMM_KK_MEMORY /* alias SPGEMM_KK_MEMSPEED */, SPGEMM_KK_SPEED /* alias SPGEMM_KK_DENSE */ }; - if (!KokkosKernels::Impl::kk_is_gpu_exec_space< - typename device::execution_space>()) { + if (!KokkosKernels::Impl::kk_is_gpu_exec_space()) { // SPGEMM_KK_LP is useful on CPU to cover MultiCoreTag4 functor // (otherwise skipped) but on GPU it's same as SPGEMM_KK, so we can skip it. algorithms.push_back(SPGEMM_KK_LP); @@ -212,8 +194,7 @@ void test_bspgemm(lno_t blkDim, lno_t m, lno_t k, lno_t n, size_type nnz, bool failed = false; int res = 0; try { - res = run_block_spgemm(A, B, output_mat, spgemm_algorithm, - use_dynamic_scheduling, shared_memory_size); + res = run_block_spgemm(A, B, output_mat, spgemm_algorithm, use_dynamic_scheduling, shared_memory_size); } catch (const char *message) { EXPECT_TRUE(is_expected_to_fail) << algo << ": " << message; failed = true; @@ -243,23 +224,22 @@ void test_bspgemm(lno_t blkDim, lno_t m, lno_t k, lno_t n, size_type nnz, // Note: Tests with shared memory specified aim to trigger specific GPU functors // dispatched by matrix size and the available shared memory. -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - sparse_block_spgemm_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - auto const SHMEM_AUTO = 0; \ - auto test_case = test_bspgemm; \ - /* Trigger SPGEMM_KK_MEMORY_SPREADTEAM on GPU */ \ - test_case(2, 50, 50, 50, 2000, 50, 5, true, 16 * 1024); \ - /* Trigger SPGEMM_KK -> SPGEMM_KK_MEMORY on GPU */ \ - test_case(2, 50, 50, 50, 1000, 50, 5, false, 16 * 1024); \ - /* Trigger SPGEMM_KK_MEMORY_BIGSPREADTEAM on GPU */ \ - test_case(2, 500, 500, 500, 32000, 500, 500, true, 16 * 1024); \ - /* trigger dense dispatch in hash method */ \ - test_case(2, 2, 3, 4, 2, 2, 0, true, 16 * 1024); \ - /* zero-size handling */ \ - test_case(2, 0, 0, 0, 0, 10, 10, true, SHMEM_AUTO); \ - test_case(2, 0, 12, 5, 0, 10, 0, true, SHMEM_AUTO); \ - test_case(2, 10, 10, 0, 0, 10, 10, true, SHMEM_AUTO); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse_block_spgemm_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + auto const SHMEM_AUTO = 0; \ + auto test_case = test_bspgemm; \ + /* Trigger SPGEMM_KK_MEMORY_SPREADTEAM on GPU */ \ + test_case(2, 50, 50, 50, 2000, 50, 5, true, 16 * 1024); \ + /* Trigger SPGEMM_KK -> SPGEMM_KK_MEMORY on GPU */ \ + test_case(2, 50, 50, 50, 1000, 50, 5, false, 16 * 1024); \ + /* Trigger SPGEMM_KK_MEMORY_BIGSPREADTEAM on GPU */ \ + test_case(2, 500, 500, 500, 32000, 500, 500, true, 16 * 1024); \ + /* trigger dense dispatch in hash method */ \ + test_case(2, 2, 3, 4, 2, 2, 0, true, 16 * 1024); \ + /* zero-size handling */ \ + test_case(2, 0, 0, 0, 0, 10, 10, true, SHMEM_AUTO); \ + test_case(2, 0, 12, 5, 0, 10, 0, true, SHMEM_AUTO); \ + test_case(2, 10, 10, 0, 0, 10, 10, true, SHMEM_AUTO); \ } #include diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_ccs2crs.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_ccs2crs.hpp index f7e279775994..9973ab32b409 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_ccs2crs.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_ccs2crs.hpp @@ -19,11 +19,8 @@ #include "KokkosKernels_TestUtils.hpp" namespace Test { -template -void check_crs_matrix(CrsType crsMat, IdType ccs_row_ids_d, - MapType ccs_col_map_d, ValsType ccs_vals_d, - ColsType cols) { +template +void check_crs_matrix(CrsType crsMat, IdType ccs_row_ids_d, MapType ccs_col_map_d, ValsType ccs_vals_d, ColsType cols) { using ordinal_type = typename CrsType::ordinal_type; using size_type = typename CrsType::size_type; @@ -32,14 +29,11 @@ void check_crs_matrix(CrsType crsMat, IdType ccs_row_ids_d, using ViewTypeVals = decltype(ccs_vals_d); // Copy to host - typename ViewTypeRowIds::HostMirror ccs_row_ids = - Kokkos::create_mirror_view(ccs_row_ids_d); + typename ViewTypeRowIds::HostMirror ccs_row_ids = Kokkos::create_mirror_view(ccs_row_ids_d); Kokkos::deep_copy(ccs_row_ids, ccs_row_ids_d); - typename ViewTypeColMap::HostMirror ccs_col_map = - Kokkos::create_mirror_view(ccs_col_map_d); + typename ViewTypeColMap::HostMirror ccs_col_map = Kokkos::create_mirror_view(ccs_col_map_d); Kokkos::deep_copy(ccs_col_map, ccs_col_map_d); - typename ViewTypeVals::HostMirror ccs_vals = - Kokkos::create_mirror_view(ccs_vals_d); + typename ViewTypeVals::HostMirror ccs_vals = Kokkos::create_mirror_view(ccs_vals_d); Kokkos::deep_copy(ccs_vals, ccs_vals_d); auto crs_col_ids_d = crsMat.graph.entries; @@ -51,14 +45,11 @@ void check_crs_matrix(CrsType crsMat, IdType ccs_row_ids_d, using ViewTypeCrsVals = decltype(crs_vals_d); // Copy to host - typename ViewTypeCrsColIds::HostMirror crs_col_ids = - Kokkos::create_mirror_view(crs_col_ids_d); + typename ViewTypeCrsColIds::HostMirror crs_col_ids = Kokkos::create_mirror_view(crs_col_ids_d); Kokkos::deep_copy(crs_col_ids, crs_col_ids_d); - typename ViewTypeCrsRowMap::HostMirror crs_row_map = - Kokkos::create_mirror_view(crs_row_map_d); + typename ViewTypeCrsRowMap::HostMirror crs_row_map = Kokkos::create_mirror_view(crs_row_map_d); Kokkos::deep_copy(crs_row_map, crs_row_map_d); - typename ViewTypeCrsVals::HostMirror crs_vals = - Kokkos::create_mirror_view(crs_vals_d); + typename ViewTypeCrsVals::HostMirror crs_vals = Kokkos::create_mirror_view(crs_vals_d); Kokkos::deep_copy(crs_vals, crs_vals_d); Kokkos::fence(); @@ -83,22 +74,17 @@ void check_crs_matrix(CrsType crsMat, IdType ccs_row_ids_d, } if (l == row_end) - FAIL() << "crs element at (i: " << ccs_row_ids(i) << ", j: " << j - << ") not found!" << std::endl; + FAIL() << "crs element at (i: " << ccs_row_ids(i) << ", j: " << j << ") not found!" << std::endl; - ASSERT_EQ(ccs_vals(i), crs_vals(l)) - << "(i: " << ccs_row_ids(i) << ", j: " << j << ")" << std::endl; + ASSERT_EQ(ccs_vals(i), crs_vals(l)) << "(i: " << ccs_row_ids(i) << ", j: " << j << ")" << std::endl; } } } template -void doCcs2Crs(size_t m, size_t n, ScalarType min_val, ScalarType max_val, - bool fully_sparse = false) { - RandCsMatrix ccsMat( - n, m, min_val, max_val, fully_sparse); +void doCcs2Crs(size_t m, size_t n, ScalarType min_val, ScalarType max_val, bool fully_sparse = false) { + RandCsMatrix ccsMat(n, m, min_val, max_val, fully_sparse); - auto crsMat = KokkosSparse::ccs2crs(ccsMat.get_dim2(), ccsMat.get_dim1(), - ccsMat.get_nnz(), ccsMat.get_vals(), + auto crsMat = KokkosSparse::ccs2crs(ccsMat.get_dim2(), ccsMat.get_dim1(), ccsMat.get_nnz(), ccsMat.get_vals(), ccsMat.get_map(), ccsMat.get_ids()); auto ccs_row_ids_d = ccsMat.get_ids(); @@ -130,9 +116,7 @@ void doAllCcs2crs(size_t m, size_t n) { } TEST_F(TestCategory, sparse_ccs2crs) { - uint64_t ticks = - std::chrono::high_resolution_clock::now().time_since_epoch().count() % - UINT32_MAX; + uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count() % UINT32_MAX; std::srand(ticks); // Empty cases @@ -164,10 +148,9 @@ TEST_F(TestCategory, sparse_ccs2crs) { doCcs2Crs(50, 10, 10, 100, true); // Test the convenience wrapper that accepts a ccs matrix - RandCsMatrix csMat(2, 2, 10, 10, - false); - auto ccsMatrix = crs2ccs(csMat.get_dim1(), csMat.get_dim2(), csMat.get_nnz(), - csMat.get_vals(), csMat.get_map(), csMat.get_ids()); + RandCsMatrix csMat(2, 2, 10, 10, false); + auto ccsMatrix = + crs2ccs(csMat.get_dim1(), csMat.get_dim2(), csMat.get_nnz(), csMat.get_vals(), csMat.get_map(), csMat.get_ids()); auto crsMatrix = ccs2crs(ccsMatrix); auto ccs_row_ids_d = ccsMatrix.graph.entries; diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_coo2crs.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_coo2crs.hpp index 3427ec44cd28..d8dc44a4be11 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_coo2crs.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_coo2crs.hpp @@ -20,14 +20,11 @@ namespace Test { template -CrsType vanilla_coo2crs(size_t m, size_t n, RowType row, ColType col, - DataType data) { +CrsType vanilla_coo2crs(size_t m, size_t n, RowType row, ColType col, DataType data) { using RowIndexType = typename RowType::value_type; using ColIndexType = typename ColType::value_type; using ValueType = typename DataType::value_type; - std::unordered_map *> - umap; + std::unordered_map *> umap; int nnz = 0; for (uint64_t i = 0; i < data.extent(0); i++) { @@ -53,18 +50,13 @@ CrsType vanilla_coo2crs(size_t m, size_t n, RowType row, ColType col, } } - typename CrsType::row_map_type::non_const_type row_map("vanilla_row_map", - m + 1); + typename CrsType::row_map_type::non_const_type row_map("vanilla_row_map", m + 1); typename CrsType::values_type values("vanilla_values", nnz); - typename CrsType::staticcrsgraph_type::entries_type col_ids("vanilla_col_ids", - nnz); + typename CrsType::staticcrsgraph_type::entries_type col_ids("vanilla_col_ids", nnz); - typename CrsType::row_map_type::non_const_type::HostMirror row_map_h = - Kokkos::create_mirror_view(row_map); - typename CrsType::values_type::HostMirror values_h = - Kokkos::create_mirror_view(values); - typename CrsType::staticcrsgraph_type::entries_type::HostMirror col_ids_h = - Kokkos::create_mirror_view(col_ids); + typename CrsType::row_map_type::non_const_type::HostMirror row_map_h = Kokkos::create_mirror_view(row_map); + typename CrsType::values_type::HostMirror values_h = Kokkos::create_mirror_view(values); + typename CrsType::staticcrsgraph_type::entries_type::HostMirror col_ids_h = Kokkos::create_mirror_view(col_ids); int row_len = 0; for (uint64_t i = 0; i < m; i++) { @@ -108,10 +100,9 @@ void check_crs_matrix(CrsType crsMat, RowType row, ColType col, DataType data, typename DataType::HostMirror data_h = Kokkos::create_mirror_view(data); Kokkos::deep_copy(data_h, data); - auto crsMatRef = vanilla_coo2crs( - crsMat.numRows(), crsMat.numCols(), row_h, col_h, data_h); + auto crsMatRef = + vanilla_coo2crs(crsMat.numRows(), crsMat.numCols(), row_h, col_h, data_h); auto crs_col_ids_ref_d = crsMatRef.graph.entries; auto crs_row_map_ref_d = crsMatRef.graph.row_map; @@ -122,14 +113,11 @@ void check_crs_matrix(CrsType crsMat, RowType row, ColType col, DataType data, using ViewTypeCrsValsRef = decltype(crs_vals_ref_d); // Copy crs to host - typename ViewTypeCrsColIdsRef::HostMirror crs_col_ids_ref = - Kokkos::create_mirror_view(crs_col_ids_ref_d); + typename ViewTypeCrsColIdsRef::HostMirror crs_col_ids_ref = Kokkos::create_mirror_view(crs_col_ids_ref_d); Kokkos::deep_copy(crs_col_ids_ref, crs_col_ids_ref_d); - typename ViewTypeCrsRowMapRef::HostMirror crs_row_map_ref = - Kokkos::create_mirror_view(crs_row_map_ref_d); + typename ViewTypeCrsRowMapRef::HostMirror crs_row_map_ref = Kokkos::create_mirror_view(crs_row_map_ref_d); Kokkos::deep_copy(crs_row_map_ref, crs_row_map_ref_d); - typename ViewTypeCrsValsRef::HostMirror crs_vals_ref = - Kokkos::create_mirror_view(crs_vals_ref_d); + typename ViewTypeCrsValsRef::HostMirror crs_vals_ref = Kokkos::create_mirror_view(crs_vals_ref_d); Kokkos::deep_copy(crs_vals_ref, crs_vals_ref_d); auto crs_col_ids_d = crsMat.graph.entries; @@ -141,14 +129,11 @@ void check_crs_matrix(CrsType crsMat, RowType row, ColType col, DataType data, using ViewTypeCrsVals = decltype(crs_vals_d); // Copy crs to host - typename ViewTypeCrsColIds::HostMirror crs_col_ids = - Kokkos::create_mirror_view(crs_col_ids_d); + typename ViewTypeCrsColIds::HostMirror crs_col_ids = Kokkos::create_mirror_view(crs_col_ids_d); Kokkos::deep_copy(crs_col_ids, crs_col_ids_d); - typename ViewTypeCrsRowMap::HostMirror crs_row_map = - Kokkos::create_mirror_view(crs_row_map_d); + typename ViewTypeCrsRowMap::HostMirror crs_row_map = Kokkos::create_mirror_view(crs_row_map_d); Kokkos::deep_copy(crs_row_map, crs_row_map_d); - typename ViewTypeCrsVals::HostMirror crs_vals = - Kokkos::create_mirror_view(crs_vals_d); + typename ViewTypeCrsVals::HostMirror crs_vals = Kokkos::create_mirror_view(crs_vals_d); Kokkos::deep_copy(crs_vals, crs_vals_d); Kokkos::fence(); @@ -158,8 +143,7 @@ void check_crs_matrix(CrsType crsMat, RowType row, ColType col, DataType data, for (int i = 0; i < crsMatRef.numRows(); i++) { ASSERT_EQ(crs_row_map_ref(i), crs_row_map(i)) << "crs_row_map_ref(" << i << " = " << crs_row_map_ref(i) << " != " - << "crs_row_map(" << i << " = " << crs_row_map(i) << " -- " - << failure_info; + << "crs_row_map(" << i << " = " << crs_row_map(i) << " -- " << failure_info; } for (int i = 0; i < crsMatRef.numRows(); ++i) { @@ -175,42 +159,35 @@ void check_crs_matrix(CrsType crsMat, RowType row, ColType col, DataType data, for (auto j = row_start_ref; j < row_stop_ref; ++j) { // Look for the corresponding col_id - auto col_id_ref = crs_col_ids_ref(j); - std::string fail_msg = "row: " + std::to_string(i) + - ", crs_col_ids_ref(" + std::to_string(j) + - ") = " + std::to_string(col_id_ref); + auto col_id_ref = crs_col_ids_ref(j); + std::string fail_msg = + "row: " + std::to_string(i) + ", crs_col_ids_ref(" + std::to_string(j) + ") = " + std::to_string(col_id_ref); auto k = row_start_ref; for (; k < row_stop_ref; ++k) { if (crs_col_ids(k) == col_id_ref) break; } - if (k == row_stop_ref) - FAIL() << fail_msg << " not found in crs_col_ids!" << failure_info; + if (k == row_stop_ref) FAIL() << fail_msg << " not found in crs_col_ids!" << failure_info; // NOTE: ASSERT_EQ doesn't work -- values may be summed in different // orders We sum at most m x n values. - auto eps = - crsMatRef.numCols() * crsMatRef.numRows() * 10e1 * ats::epsilon(); - EXPECT_NEAR_KK(crs_vals_ref(j), crs_vals(k), eps, - fail_msg + " mismatched values!" + failure_info); + auto eps = crsMatRef.numCols() * crsMatRef.numRows() * 10e1 * ats::epsilon(); + EXPECT_NEAR_KK(crs_vals_ref(j), crs_vals(k), eps, fail_msg + " mismatched values!" + failure_info); } } } template void doCoo2Crs(size_t m, size_t n, ScalarType min_val, ScalarType max_val) { - RandCooMat cooMat(m, n, m * n, min_val, - max_val); + RandCooMat cooMat(m, n, m * n, min_val, max_val); auto randRow = cooMat.get_row(); auto randCol = cooMat.get_col(); auto randData = cooMat.get_data(); - std::string failure_info = - "\nBegin arguments for above failure...\n" + cooMat.info + - "scalar: " + std::string(typeid(ScalarType).name()) + "\n" + - "layout: " + std::string(typeid(LayoutType).name()) + "\n" + - "m: " + std::to_string(m) + ", n: " + std::to_string(n) + - "\n...end arguments for above failure.\n"; + std::string failure_info = "\nBegin arguments for above failure...\n" + cooMat.info + + "scalar: " + std::string(typeid(ScalarType).name()) + "\n" + + "layout: " + std::string(typeid(LayoutType).name()) + "\n" + "m: " + std::to_string(m) + + ", n: " + std::to_string(n) + "\n...end arguments for above failure.\n"; auto crsMat = KokkosSparse::coo2crs(m, n, randRow, randCol, randData); check_crs_matrix(crsMat, randRow, randCol, randData, failure_info); @@ -237,9 +214,14 @@ void doAllCoo2Crs(size_t m, size_t n) { } TEST_F(TestCategory, sparse_coo2crs) { - uint64_t ticks = - std::chrono::high_resolution_clock::now().time_since_epoch().count() % - UINT32_MAX; +#if defined(KOKKOS_ENABLE_SYCL) + if constexpr (std::is_same_v) { + std::cout << "Not running coo2csr on SYCL execution space" << std::endl; + return; + } +#endif + + uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count() % UINT32_MAX; std::srand(ticks); doAllCoo2Crs(0, 0); @@ -258,34 +240,34 @@ TEST_F(TestCategory, sparse_coo2crs) { doAllCoo2Crs(m, n); } - RandCooMat cooMat(2, 2, 2 * 2, 10, - 10); - auto crsMatrix = KokkosSparse::coo2crs(2, 2, cooMat.get_row(), - cooMat.get_col(), cooMat.get_data()); + RandCooMat cooMat(2, 2, 2 * 2, 10, 10); + auto crsMatrix = KokkosSparse::coo2crs(2, 2, cooMat.get_row(), cooMat.get_col(), cooMat.get_data()); auto cooMatrix = KokkosSparse::crs2coo(crsMatrix); - check_crs_matrix(crsMatrix, cooMatrix.row(), cooMatrix.col(), - cooMatrix.data()); + check_crs_matrix(crsMatrix, cooMatrix.row(), cooMatrix.col(), cooMatrix.data()); } TEST_F(TestCategory, sparse_coo2crs_staticMatrix_edgeCases) { +#if defined(KOKKOS_ENABLE_SYCL) + if constexpr (std::is_same_v) { + std::cout << "Not running coo2csr on SYCL execution space" << std::endl; + return; + } +#endif + int m = 4; int n = 4; long long staticRow[16]{0, 1, 3, 2, 3, 2, 2, 2, 0, 0, 0, 1, 2, 0, 3, 0}; long long staticCol[16]{1, 1, 2, 3, 3, 2, 3, 2, 0, 0, 1, 3, 1, 2, 0, 0}; - float staticData[16]{7.28411, 8.17991, 8.84304, 5.01788, 9.85646, 5.79404, - 8.42014, 1.90238, 8.24195, 4.39955, 3.2637, 5.4546, - 6.51895, 8.09302, 9.36294, 3.44206}; + float staticData[16]{7.28411, 8.17991, 8.84304, 5.01788, 9.85646, 5.79404, 8.42014, 1.90238, + 8.24195, 4.39955, 3.2637, 5.4546, 6.51895, 8.09302, 9.36294, 3.44206}; Kokkos::View row("coo row", 16); Kokkos::View col("coo col", 16); Kokkos::View data("coo data", 16); - typename Kokkos::View::HostMirror row_h = - Kokkos::create_mirror_view(row); - typename Kokkos::View::HostMirror col_h = - Kokkos::create_mirror_view(col); - typename Kokkos::View::HostMirror data_h = - Kokkos::create_mirror_view(data); + typename Kokkos::View::HostMirror row_h = Kokkos::create_mirror_view(row); + typename Kokkos::View::HostMirror col_h = Kokkos::create_mirror_view(col); + typename Kokkos::View::HostMirror data_h = Kokkos::create_mirror_view(data); for (int i = 0; i < 16; i++) { row_h(i) = staticRow[i]; col_h(i) = staticCol[i]; @@ -303,9 +285,8 @@ TEST_F(TestCategory, sparse_coo2crs_staticMatrix_edgeCases) { // Even partitions, single thread, fully sparse row long long staticRowTs1[16]{0, 3, 0, 2, 2, 3, 0, 3, 2, 0, 0, 0, 0, 3, 3, 0}; long long staticColTs1[16]{3, 1, 3, 1, 2, 2, 1, 1, 2, 3, 3, 1, 1, 0, 0, 0}; - float staticDataTs1[16]{6.1355, 6.53989, 8.58559, 6.37476, 4.18964, 2.41146, - 1.82177, 1.4249, 1.52659, 5.50521, 8.0484, 3.98874, - 6.74709, 3.35072, 7.81944, 5.83494}; + float staticDataTs1[16]{6.1355, 6.53989, 8.58559, 6.37476, 4.18964, 2.41146, 1.82177, 1.4249, + 1.52659, 5.50521, 8.0484, 3.98874, 6.74709, 3.35072, 7.81944, 5.83494}; for (int i = 0; i < 16; i++) { row_h(i) = staticRowTs1[i]; col_h(i) = staticColTs1[i]; diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_crs2ccs.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_crs2ccs.hpp index 46cc2fb361f3..b23bfef0ea66 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_crs2ccs.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_crs2ccs.hpp @@ -19,11 +19,8 @@ #include "KokkosKernels_TestUtils.hpp" namespace Test { -template -void check_ccs_matrix(CcsType ccsMat, IdType crs_col_ids_d, - MapType crs_row_map_d, ValsType crs_vals_d, - ColsType cols) { +template +void check_ccs_matrix(CcsType ccsMat, IdType crs_col_ids_d, MapType crs_row_map_d, ValsType crs_vals_d, ColsType cols) { using ordinal_type = typename CcsType::ordinal_type; using size_type = typename CcsType::size_type; @@ -32,14 +29,11 @@ void check_ccs_matrix(CcsType ccsMat, IdType crs_col_ids_d, using ViewTypeVals = decltype(crs_vals_d); // Copy to host - typename ViewTypeRowIds::HostMirror crs_col_ids = - Kokkos::create_mirror_view(crs_col_ids_d); + typename ViewTypeRowIds::HostMirror crs_col_ids = Kokkos::create_mirror_view(crs_col_ids_d); Kokkos::deep_copy(crs_col_ids, crs_col_ids_d); - typename ViewTypeColMap::HostMirror crs_row_map = - Kokkos::create_mirror_view(crs_row_map_d); + typename ViewTypeColMap::HostMirror crs_row_map = Kokkos::create_mirror_view(crs_row_map_d); Kokkos::deep_copy(crs_row_map, crs_row_map_d); - typename ViewTypeVals::HostMirror crs_vals = - Kokkos::create_mirror_view(crs_vals_d); + typename ViewTypeVals::HostMirror crs_vals = Kokkos::create_mirror_view(crs_vals_d); Kokkos::deep_copy(crs_vals, crs_vals_d); auto ccs_row_ids_d = ccsMat.graph.entries; @@ -51,14 +45,11 @@ void check_ccs_matrix(CcsType ccsMat, IdType crs_col_ids_d, using ViewTypeCrsVals = decltype(ccs_vals_d); // Copy to host - typename ViewTypeCrsColIds::HostMirror ccs_row_ids = - Kokkos::create_mirror_view(ccs_row_ids_d); + typename ViewTypeCrsColIds::HostMirror ccs_row_ids = Kokkos::create_mirror_view(ccs_row_ids_d); Kokkos::deep_copy(ccs_row_ids, ccs_row_ids_d); - typename ViewTypeCrsRowMap::HostMirror ccs_col_map = - Kokkos::create_mirror_view(ccs_col_map_d); + typename ViewTypeCrsRowMap::HostMirror ccs_col_map = Kokkos::create_mirror_view(ccs_col_map_d); Kokkos::deep_copy(ccs_col_map, ccs_col_map_d); - typename ViewTypeCrsVals::HostMirror ccs_vals = - Kokkos::create_mirror_view(ccs_vals_d); + typename ViewTypeCrsVals::HostMirror ccs_vals = Kokkos::create_mirror_view(ccs_vals_d); Kokkos::deep_copy(ccs_vals, ccs_vals_d); for (ordinal_type j = 0; j < cols; ++j) { @@ -81,23 +72,18 @@ void check_ccs_matrix(CcsType ccsMat, IdType crs_col_ids_d, } if (l == row_end) - FAIL() << "ccs element at (i: " << ccs_row_ids(i) << ", j: " << j - << ") not found!" << std::endl; + FAIL() << "ccs element at (i: " << ccs_row_ids(i) << ", j: " << j << ") not found!" << std::endl; - ASSERT_EQ(ccs_vals(i), crs_vals(l)) - << "(i: " << ccs_row_ids(i) << ", j: " << j << ")" << std::endl; + ASSERT_EQ(ccs_vals(i), crs_vals(l)) << "(i: " << ccs_row_ids(i) << ", j: " << j << ")" << std::endl; } } } template -void doCrs2Ccs(size_t m, size_t n, ScalarType min_val, ScalarType max_val, - bool fully_sparse = false) { - RandCsMatrix crsMat( - m, n, min_val, max_val, fully_sparse); +void doCrs2Ccs(size_t m, size_t n, ScalarType min_val, ScalarType max_val, bool fully_sparse = false) { + RandCsMatrix crsMat(m, n, min_val, max_val, fully_sparse); - auto ccsMat = KokkosSparse::crs2ccs(crsMat.get_dim1(), crsMat.get_dim2(), - crsMat.get_nnz(), crsMat.get_vals(), + auto ccsMat = KokkosSparse::crs2ccs(crsMat.get_dim1(), crsMat.get_dim2(), crsMat.get_nnz(), crsMat.get_vals(), crsMat.get_map(), crsMat.get_ids()); auto crs_col_ids_d = crsMat.get_ids(); @@ -128,9 +114,7 @@ void doAllCrs2Ccs(size_t m, size_t n) { } TEST_F(TestCategory, sparse_crs2ccs) { - uint64_t ticks = - std::chrono::high_resolution_clock::now().time_since_epoch().count() % - UINT32_MAX; + uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count() % UINT32_MAX; std::srand(ticks); // Empty cases @@ -162,10 +146,9 @@ TEST_F(TestCategory, sparse_crs2ccs) { doCrs2Ccs(50, 10, 10, 100, true); // Test the convenience wrapper that accepts a crs matrix - RandCsMatrix csMat(2, 2, 10, 10, - false); - auto crsMatrix = ccs2crs(csMat.get_dim2(), csMat.get_dim1(), csMat.get_nnz(), - csMat.get_vals(), csMat.get_map(), csMat.get_ids()); + RandCsMatrix csMat(2, 2, 10, 10, false); + auto crsMatrix = + ccs2crs(csMat.get_dim2(), csMat.get_dim1(), csMat.get_nnz(), csMat.get_vals(), csMat.get_map(), csMat.get_ids()); auto ccsMatrix = crs2ccs(crsMatrix); auto crs_col_ids_d = crsMatrix.graph.entries; diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_crs2coo.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_crs2coo.hpp index 9f81e20f908d..8e490679441a 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_crs2coo.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_crs2coo.hpp @@ -20,8 +20,7 @@ namespace Test { template -void check_coo_matrix(CrsType crsMatRef, RowType row, ColType col, - DataType data) { +void check_coo_matrix(CrsType crsMatRef, RowType row, ColType col, DataType data) { // Copy coo to host typename RowType::HostMirror row_h = Kokkos::create_mirror_view(row); Kokkos::deep_copy(row_h, row); @@ -43,14 +42,11 @@ void check_coo_matrix(CrsType crsMatRef, RowType row, ColType col, using ViewTypeCrsValsRef = decltype(crs_vals_ref_d); // Copy crs to host - typename ViewTypeCrsColIdsRef::HostMirror crs_col_ids_ref = - Kokkos::create_mirror_view(crs_col_ids_ref_d); + typename ViewTypeCrsColIdsRef::HostMirror crs_col_ids_ref = Kokkos::create_mirror_view(crs_col_ids_ref_d); Kokkos::deep_copy(crs_col_ids_ref, crs_col_ids_ref_d); - typename ViewTypeCrsRowMapRef::HostMirror crs_row_map_ref = - Kokkos::create_mirror_view(crs_row_map_ref_d); + typename ViewTypeCrsRowMapRef::HostMirror crs_row_map_ref = Kokkos::create_mirror_view(crs_row_map_ref_d); Kokkos::deep_copy(crs_row_map_ref, crs_row_map_ref_d); - typename ViewTypeCrsValsRef::HostMirror crs_vals_ref = - Kokkos::create_mirror_view(crs_vals_ref_d); + typename ViewTypeCrsValsRef::HostMirror crs_vals_ref = Kokkos::create_mirror_view(crs_vals_ref_d); Kokkos::deep_copy(crs_vals_ref, crs_vals_ref_d); Kokkos::fence(); @@ -60,12 +56,11 @@ void check_coo_matrix(CrsType crsMatRef, RowType row, ColType col, ASSERT_EQ(crsMatRef.nnz(), data.extent(0)); for (decltype(row.extent(0)) idx = 0; idx < row.extent(0); ++idx) { - auto row_id = row_h(idx); - auto col_id = col_h(idx); - auto val = data_h(idx); - std::string fail_msg = "idx - " + std::to_string(idx) + - " row: " + std::to_string(row_id) + - ", col: " + std::to_string(col_id); + auto row_id = row_h(idx); + auto col_id = col_h(idx); + auto val = data_h(idx); + std::string fail_msg = + "idx - " + std::to_string(idx) + " row: " + std::to_string(row_id) + ", col: " + std::to_string(col_id); auto row_start_ref = crs_row_map_ref(row_id); auto row_stop_ref = crs_row_map_ref(row_id + 1); @@ -77,8 +72,7 @@ void check_coo_matrix(CrsType crsMatRef, RowType row, ColType col, if (crs_vals_ref(crs_idx) == val) break; } } - if (crs_idx == row_stop_ref) - FAIL() << fail_msg << " not found in crsMatRef!"; + if (crs_idx == row_stop_ref) FAIL() << fail_msg << " not found in crsMatRef!"; } } @@ -87,13 +81,11 @@ void doCrs2Coo(size_t m, size_t n, ScalarType min_val, ScalarType max_val) { using RandCrsMatType = RandCsMatrix; RandCrsMatType crsMat(m, n, min_val, max_val, m == 0 || n == 0); - using CrsOT = typename RandCrsMatType::IdViewTypeD::value_type; - using CrsType = - typename KokkosSparse::CrsMatrix; - auto map = crsMat.get_map(); - auto ids = crsMat.get_ids(); - CrsType crsMatrix("doCrs2Coo", crsMat.get_dim1(), crsMat.get_dim2(), - crsMat.get_nnz(), crsMat.get_vals(), map, ids); + using CrsOT = typename RandCrsMatType::IdViewTypeD::value_type; + using CrsType = typename KokkosSparse::CrsMatrix; + auto map = crsMat.get_map(); + auto ids = crsMat.get_ids(); + CrsType crsMatrix("doCrs2Coo", crsMat.get_dim1(), crsMat.get_dim2(), crsMat.get_nnz(), crsMat.get_vals(), map, ids); auto cooMat = KokkosSparse::crs2coo(crsMatrix); check_coo_matrix(crsMatrix, cooMat.row(), cooMat.col(), cooMat.data()); @@ -120,9 +112,7 @@ void doAllCrs2Coo(size_t m, size_t n) { } TEST_F(TestCategory, sparse_crs2coo) { - uint64_t ticks = - std::chrono::high_resolution_clock::now().time_since_epoch().count() % - UINT32_MAX; + uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count() % UINT32_MAX; std::srand(ticks); // Square cases diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_csc2csr.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_csc2csr.hpp index aa838a44281b..3c9a8af3a8dd 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_csc2csr.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_csc2csr.hpp @@ -19,13 +19,10 @@ namespace Test { template -void doCsc2Csr(size_t m, size_t n, ScalarType min_val, ScalarType max_val, - bool fully_sparse = false) { - RandCsMatrix cscMat( - n, m, min_val, max_val, fully_sparse); +void doCsc2Csr(size_t m, size_t n, ScalarType min_val, ScalarType max_val, bool fully_sparse = false) { + RandCsMatrix cscMat(n, m, min_val, max_val, fully_sparse); - auto csrMat = KokkosSparse::csc2csr(cscMat.get_dim2(), cscMat.get_dim1(), - cscMat.get_nnz(), cscMat.get_vals(), + auto csrMat = KokkosSparse::csc2csr(cscMat.get_dim2(), cscMat.get_dim1(), cscMat.get_nnz(), cscMat.get_vals(), cscMat.get_map(), cscMat.get_ids()); auto csc_row_ids_d = cscMat.get_ids(); @@ -37,14 +34,11 @@ void doCsc2Csr(size_t m, size_t n, ScalarType min_val, ScalarType max_val, using ViewTypeVals = decltype(csc_vals_d); // Copy to host - typename ViewTypeRowIds::HostMirror csc_row_ids = - Kokkos::create_mirror_view(csc_row_ids_d); + typename ViewTypeRowIds::HostMirror csc_row_ids = Kokkos::create_mirror_view(csc_row_ids_d); Kokkos::deep_copy(csc_row_ids, csc_row_ids_d); - typename ViewTypeColMap::HostMirror csc_col_map = - Kokkos::create_mirror_view(csc_col_map_d); + typename ViewTypeColMap::HostMirror csc_col_map = Kokkos::create_mirror_view(csc_col_map_d); Kokkos::deep_copy(csc_col_map, csc_col_map_d); - typename ViewTypeVals::HostMirror csc_vals = - Kokkos::create_mirror_view(csc_vals_d); + typename ViewTypeVals::HostMirror csc_vals = Kokkos::create_mirror_view(csc_vals_d); Kokkos::deep_copy(csc_vals, csc_vals_d); auto csr_col_ids_d = csrMat.graph.entries; @@ -56,14 +50,11 @@ void doCsc2Csr(size_t m, size_t n, ScalarType min_val, ScalarType max_val, using ViewTypeCsrVals = decltype(csr_vals_d); // Copy to host - typename ViewTypeCsrColIds::HostMirror csr_col_ids = - Kokkos::create_mirror_view(csr_col_ids_d); + typename ViewTypeCsrColIds::HostMirror csr_col_ids = Kokkos::create_mirror_view(csr_col_ids_d); Kokkos::deep_copy(csr_col_ids, csr_col_ids_d); - typename ViewTypeCsrRowMap::HostMirror csr_row_map = - Kokkos::create_mirror_view(csr_row_map_d); + typename ViewTypeCsrRowMap::HostMirror csr_row_map = Kokkos::create_mirror_view(csr_row_map_d); Kokkos::deep_copy(csr_row_map, csr_row_map_d); - typename ViewTypeCsrVals::HostMirror csr_vals = - Kokkos::create_mirror_view(csr_vals_d); + typename ViewTypeCsrVals::HostMirror csr_vals = Kokkos::create_mirror_view(csr_vals_d); Kokkos::deep_copy(csr_vals, csr_vals_d); Kokkos::fence(); @@ -88,11 +79,9 @@ void doCsc2Csr(size_t m, size_t n, ScalarType min_val, ScalarType max_val, } if (l == row_end) - FAIL() << "csr element at (i: " << csc_row_ids(i) << ", j: " << j - << ") not found!" << std::endl; + FAIL() << "csr element at (i: " << csc_row_ids(i) << ", j: " << j << ") not found!" << std::endl; - ASSERT_EQ(csc_vals(i), csr_vals(l)) - << "(i: " << csc_row_ids(i) << ", j: " << j << ")" << std::endl; + ASSERT_EQ(csc_vals(i), csr_vals(l)) << "(i: " << csc_row_ids(i) << ", j: " << j << ")" << std::endl; } } } @@ -118,9 +107,7 @@ void doAllCsc2csr(size_t m, size_t n) { } TEST_F(TestCategory, sparse_csc2csr) { - uint64_t ticks = - std::chrono::high_resolution_clock::now().time_since_epoch().count() % - UINT32_MAX; + uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count() % UINT32_MAX; std::srand(ticks); // Empty cases diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp index 28674ad353fe..29a9c5e401c6 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp @@ -20,8 +20,7 @@ #include "KokkosKernels_TestUtils.hpp" namespace Test { -template +template void run_test_extract_diagonal_blocks(int nrows, int nblocks) { using RowMapType = Kokkos::View; using EntriesType = Kokkos::View; @@ -84,12 +83,9 @@ void run_test_extract_diagonal_blocks(int nrows, int nblocks) { } // Extract - KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(A, - DiagBlks); + KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(A, DiagBlks); - auto perm = - KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential( - A, DiagBlks_rcm, true); + auto perm = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(A, DiagBlks_rcm, true); // Checking lno_t numRows = 0; @@ -106,8 +102,7 @@ void run_test_extract_diagonal_blocks(int nrows, int nblocks) { bool flag = true; lno_t col_start = 0; for (int i = 0; i < nblocks; i++) { - RowMapType_hm hrow_map_diagblk("hrow_map_diagblk", - DiagBlks[i].numRows() + 1); + RowMapType_hm hrow_map_diagblk("hrow_map_diagblk", DiagBlks[i].numRows() + 1); EntriesType_hm hentries_diagblk("hentries_diagblk", DiagBlks[i].nnz()); ValuesType_hm hvalues_diagblk("hvalues_diagblk", DiagBlks[i].nnz()); @@ -147,14 +142,12 @@ void run_test_extract_diagonal_blocks(int nrows, int nblocks) { Kokkos::deep_copy(In, one); - auto h_perm = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), perm[i]); + auto h_perm = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), perm[i]); KokkosSparse::spmv("N", one, DiagBlks_rcm[i], In, zero, Out); Kokkos::deep_copy(h_Out_tmp, Out); - for (lno_t ii = 0; ii < static_cast(DiagBlks[i].numRows()); - ii++) { + for (lno_t ii = 0; ii < static_cast(DiagBlks[i].numRows()); ii++) { lno_t rcm_ii = h_perm(ii); h_Out(ii) = h_Out_tmp(rcm_ii); } @@ -170,24 +163,18 @@ void run_test_extract_diagonal_blocks(int nrows, int nblocks) { } } // namespace Test -template +template void test_extract_diagonal_blocks() { for (int s = 1; s <= 8; s++) { - Test::run_test_extract_diagonal_blocks( - 0, s); - Test::run_test_extract_diagonal_blocks( - 153, s); - Test::run_test_extract_diagonal_blocks( - 1553, s); + Test::run_test_extract_diagonal_blocks(0, s); + Test::run_test_extract_diagonal_blocks(153, s); + Test::run_test_extract_diagonal_blocks(1553, s); } } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##extract_diagonal_blocks##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_extract_diagonal_blocks(); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##extract_diagonal_blocks##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_extract_diagonal_blocks(); \ } #include diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_findRelOffset.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_findRelOffset.hpp index 642f1666e79c..6969571c9326 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_findRelOffset.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_findRelOffset.hpp @@ -20,10 +20,8 @@ // by all backends so the following guard // ensure that the test is not inclueded // on these backends. -#if !defined(TEST_HIP_SPARSE_CPP) && !defined(TEST_SYCL_SPARSE_CPP) && \ - !defined(TEST_OPENMPTARGET_SPARSE_CPP) && \ - (!defined(TEST_CUDA_SPARSE_CPP) || \ - (defined(TEST_CUDA_SPARSE_CPP) && defined(KOKKOS_ENABLE_CUDA_UVM))) +#if !defined(TEST_HIP_SPARSE_CPP) && !defined(TEST_SYCL_SPARSE_CPP) && !defined(TEST_OPENMPTARGET_SPARSE_CPP) && \ + (!defined(TEST_CUDA_SPARSE_CPP) || (defined(TEST_CUDA_SPARSE_CPP) && defined(KOKKOS_ENABLE_CUDA_UVM))) #include "Kokkos_Core.hpp" #include @@ -67,13 +65,11 @@ void generalTest(bool& /*success*/, std::ostream& out) { for (lno_t hint = 0; hint < 3; ++hint) { // Length-zero array is trivially sorted, but try the unsorted // case just to make sure that branch of the code is right. - lno_t offset = findRelOffset(indsToSearch, numEnt, - indToFind, hint, true); + lno_t offset = findRelOffset(indsToSearch, numEnt, indToFind, hint, true); EXPECT_TRUE((offset == numEnt)); // TEST_EQUALITY( offset, numEnt ); // not in the array - offset = findRelOffset(indsToSearch, numEnt, - indToFind, hint, false); + offset = findRelOffset(indsToSearch, numEnt, indToFind, hint, false); EXPECT_TRUE((offset == numEnt)); // TEST_EQUALITY( offset, numEnt ); // not in the array } @@ -86,8 +82,7 @@ void generalTest(bool& /*success*/, std::ostream& out) { lno_t numEnt = 7; const lno_t indsToSearch[7] = {1, 1, 2, 3, 5, 8, 13}; nIVT indsToSearch_view("indsToSearch", numEnt); - typename nIVT::HostMirror h_indsToSearch_view = - Kokkos::create_mirror_view(indsToSearch_view); + typename nIVT::HostMirror h_indsToSearch_view = Kokkos::create_mirror_view(indsToSearch_view); for (int i = 0; i < numEnt; ++i) { // std::cout << "indsToSearch[i]:" << indsToSearch[i] << std::endl; h_indsToSearch_view(i) = indsToSearch[i]; @@ -102,8 +97,7 @@ void generalTest(bool& /*success*/, std::ostream& out) { // This one is in [min, max]. lno_t indNotThere = 4; - lno_t offset = findRelOffset( - indsToSearch_view.data(), numEnt, indNotThere, hint, isSorted); + lno_t offset = findRelOffset(indsToSearch_view.data(), numEnt, indNotThere, hint, isSorted); EXPECT_TRUE((offset == numEnt)); // TEST_EQUALITY( offset, numEnt ); // not in the array @@ -111,23 +105,20 @@ void generalTest(bool& /*success*/, std::ostream& out) { // Test another index that is not in the array. // This one is _not_ in [min, max]. indNotThere = 42; - offset = findRelOffset( - indsToSearch_view.data(), numEnt, indNotThere, hint, isSorted); + offset = findRelOffset(indsToSearch_view.data(), numEnt, indNotThere, hint, isSorted); EXPECT_TRUE((offset == numEnt)); // TEST_EQUALITY( offset, numEnt ); // not in the array // Test all indices that are in the array. for (lno_t k = 0; k < numEnt; ++k) { const lno_t indToFind = indsToSearch[k]; // in the array - offset = findRelOffset( - indsToSearch_view.data(), numEnt, indToFind, hint, isSorted); + offset = findRelOffset(indsToSearch_view.data(), numEnt, indToFind, hint, isSorted); if (indToFind == static_cast(1)) { // 1 is a duplicate in this example. Treat it as a special // case. We don't specify which instance of duplicates the // function must return, so either one is fine. - ASSERT_TRUE((offset == static_cast(0) || - offset == static_cast(1))); + ASSERT_TRUE((offset == static_cast(0) || offset == static_cast(1))); /* TEST_ASSERT( offset == static_cast (0) || offset == static_cast (1) ); @@ -146,8 +137,7 @@ void generalTest(bool& /*success*/, std::ostream& out) { const lno_t indsToSearch[7] = {1, 1, 2, 3, 5, 8, 13}; nIVT indsToSearch_view("indsToSearch", numEnt); - typename nIVT::HostMirror h_indsToSearch_view = - Kokkos::create_mirror_view(indsToSearch_view); + typename nIVT::HostMirror h_indsToSearch_view = Kokkos::create_mirror_view(indsToSearch_view); for (int i = 0; i < numEnt; ++i) h_indsToSearch_view(i) = indsToSearch[i]; Kokkos::deep_copy(indsToSearch_view, h_indsToSearch_view); Kokkos::fence(); @@ -158,30 +148,26 @@ void generalTest(bool& /*success*/, std::ostream& out) { // Test an index that is not in the array. // This one is in [min, max]. lno_t indNotThere = 4; - lno_t offset = findRelOffset(indsToSearch_view, numEnt, - indNotThere, hint, isSorted); + lno_t offset = findRelOffset(indsToSearch_view, numEnt, indNotThere, hint, isSorted); EXPECT_TRUE((offset == numEnt)); // TEST_EQUALITY( offset, numEnt ); // not in the array // Test another index that is not in the array. // This one is _not_ in [min, max]. indNotThere = 42; - offset = findRelOffset(indsToSearch_view, numEnt, indNotThere, - hint, isSorted); + offset = findRelOffset(indsToSearch_view, numEnt, indNotThere, hint, isSorted); EXPECT_TRUE((offset == numEnt)); // TEST_EQUALITY( offset, numEnt ); // not in the array // Test all indices that are in the array. for (lno_t k = 0; k < numEnt; ++k) { const lno_t indToFind = indsToSearch[k]; // in the array - offset = findRelOffset(indsToSearch_view, numEnt, indToFind, - hint, isSorted); + offset = findRelOffset(indsToSearch_view, numEnt, indToFind, hint, isSorted); if (indToFind == static_cast(1)) { // 1 is a duplicate in this example. Treat it as a special // case. We don't specify which instance of duplicates the // function must return, so either one is fine. - ASSERT_TRUE((offset == static_cast(0) || - offset == static_cast(1))); + ASSERT_TRUE((offset == static_cast(0) || offset == static_cast(1))); // TEST_ASSERT( offset == static_cast (0) || // offset == static_cast (1) ); @@ -203,8 +189,7 @@ void generalTest(bool& /*success*/, std::ostream& out) { nIVT indsToSearch_view("indsToSearch", numEnt); - typename nIVT::HostMirror h_indsToSearch_view = - Kokkos::create_mirror_view(indsToSearch_view); + typename nIVT::HostMirror h_indsToSearch_view = Kokkos::create_mirror_view(indsToSearch_view); for (int i = 0; i < numEnt; ++i) h_indsToSearch_view(i) = indsToSearch[i]; Kokkos::deep_copy(indsToSearch_view, h_indsToSearch_view); Kokkos::fence(); @@ -213,30 +198,26 @@ void generalTest(bool& /*success*/, std::ostream& out) { // Test an index that is not in the array. // This one is in [min, max]. lno_t indNotThere = 4; - lno_t offset = findRelOffset( - indsToSearch_view.data(), numEnt, indNotThere, hint, isSorted); + lno_t offset = findRelOffset(indsToSearch_view.data(), numEnt, indNotThere, hint, isSorted); EXPECT_TRUE((offset == numEnt)); // TEST_EQUALITY( offset, numEnt ); // not in the array // Test another index that is not in the array. // This one is _not_ in [min, max]. indNotThere = 42; - offset = findRelOffset( - indsToSearch_view.data(), numEnt, indNotThere, hint, isSorted); + offset = findRelOffset(indsToSearch_view.data(), numEnt, indNotThere, hint, isSorted); EXPECT_TRUE((offset == numEnt)); // TEST_EQUALITY( offset, numEnt ); // not in the array // Test all indices that are in the array. for (lno_t k = 0; k < numEnt; ++k) { const lno_t indToFind = indsToSearch[k]; // in the array - offset = findRelOffset( - indsToSearch_view.data(), numEnt, indToFind, hint, isSorted); + offset = findRelOffset(indsToSearch_view.data(), numEnt, indToFind, hint, isSorted); if (indToFind == static_cast(1)) { // 1 is a duplicate in this example. Treat it as a special // case. We don't specify which instance of duplicates the // function must return, so either one is fine. - ASSERT_TRUE((offset == static_cast(1) || - offset == static_cast(3))); + ASSERT_TRUE((offset == static_cast(1) || offset == static_cast(3))); // TEST_ASSERT( offset == static_cast (1) || // offset == static_cast (3) ); } else { @@ -262,8 +243,7 @@ void generalTest(bool& /*success*/, std::ostream& out) { nIVT indsToSearch_view("indsToSearch", numEnt); - typename nIVT::HostMirror h_indsToSearch_view = - Kokkos::create_mirror_view(indsToSearch_view); + typename nIVT::HostMirror h_indsToSearch_view = Kokkos::create_mirror_view(indsToSearch_view); for (int i = 0; i < numEnt; ++i) h_indsToSearch_view(i) = indsToSearch[i]; Kokkos::deep_copy(indsToSearch_view, h_indsToSearch_view); Kokkos::fence(); @@ -274,30 +254,26 @@ void generalTest(bool& /*success*/, std::ostream& out) { // Test an index that is not in the array. // This one is in [min, max]. lno_t indNotThere = 4; - lno_t offset = findRelOffset(indsToSearch_view, numEnt, - indNotThere, hint, isSorted); + lno_t offset = findRelOffset(indsToSearch_view, numEnt, indNotThere, hint, isSorted); EXPECT_TRUE((offset == numEnt)); // TEST_EQUALITY( offset, numEnt ); // not in the array // Test another index that is not in the array. // This one is _not_ in [min, max]. indNotThere = 42; - offset = findRelOffset(indsToSearch_view, numEnt, indNotThere, - hint, isSorted); + offset = findRelOffset(indsToSearch_view, numEnt, indNotThere, hint, isSorted); EXPECT_TRUE((offset == numEnt)); // TEST_EQUALITY( offset, numEnt ); // not in the array // Test all indices that are in the array. for (lno_t k = 0; k < numEnt; ++k) { const lno_t indToFind = indsToSearch[k]; // in the array - offset = findRelOffset(indsToSearch_view, numEnt, indToFind, - hint, isSorted); + offset = findRelOffset(indsToSearch_view, numEnt, indToFind, hint, isSorted); if (indToFind == static_cast(1)) { // 1 is a duplicate in this example. Treat it as a special // case. We don't specify which instance of duplicates the // function must return, so either one is fine. - ASSERT_TRUE((offset == static_cast(1) || - offset == static_cast(3))); + ASSERT_TRUE((offset == static_cast(1) || offset == static_cast(3))); /* TEST_ASSERT( offset == static_cast (1) || offset == static_cast (3) ); @@ -348,8 +324,7 @@ void testLongArray(bool& /*success*/, std::ostream& out) { typedef Kokkos::View lno_view_t; lno_view_t indsToSearch("indsToSearch", N); - typename lno_view_t::HostMirror h_indsToSearch = - Kokkos::create_mirror_view(indsToSearch); + typename lno_view_t::HostMirror h_indsToSearch = Kokkos::create_mirror_view(indsToSearch); for (lno_t k = 0; k < n; ++k) { h_indsToSearch[2 * k] = 2 * (n - k); @@ -372,14 +347,12 @@ void testLongArray(bool& /*success*/, std::ostream& out) { const lno_t wrongHint = expectedOffset + 7; const lno_t offset0 = - findRelOffset*/ lno_view_t>( - indsToSearch, N, indToFind, correctHint, false); + findRelOffset*/ lno_view_t>(indsToSearch, N, indToFind, correctHint, false); EXPECT_TRUE((offset0 == expectedOffset)); // TEST_EQUALITY( offset0, expectedOffset ); const lno_t offset1 = - findRelOffset*/ lno_view_t>( - indsToSearch, N, indToFind, wrongHint, false); + findRelOffset*/ lno_view_t>(indsToSearch, N, indToFind, wrongHint, false); EXPECT_TRUE((offset1 == expectedOffset)); // TEST_EQUALITY( offset1, expectedOffset ); } @@ -389,8 +362,7 @@ void testLongArray(bool& /*success*/, std::ostream& out) { const lno_t indToFind = N + 1; // not in the array const lno_t hint = 0; const lno_t offset0 = - findRelOffset*/ lno_view_t>( - indsToSearch, N, indToFind, hint, false); + findRelOffset*/ lno_view_t>(indsToSearch, N, indToFind, hint, false); EXPECT_TRUE((offset0 == N)); // TEST_EQUALITY( offset0, N ); } @@ -420,22 +392,18 @@ void test_findRelOffset() { EXPECT_TRUE(success); } -#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##findRelOffset##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_findRelOffset(); \ +#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##findRelOffset##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_findRelOffset(); \ } #if (defined(KOKKOSKERNELS_INST_ORDINAL_INT)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int, int, TestDevice) #endif #if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T)) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int64_t, int, TestDevice) #endif diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_gauss_seidel.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_gauss_seidel.hpp index 48c7d41a9197..92109f02dd67 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_gauss_seidel.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_gauss_seidel.hpp @@ -20,7 +20,7 @@ #include "KokkosKernels_Handle.hpp" #include "KokkosKernels_IOUtils.hpp" #include "KokkosSparse_IOUtils.hpp" -//#include +// #include #include #include #include @@ -55,68 +55,53 @@ namespace Test { // Run GS on the given vectors, where the handle is already set up. template -void run_gauss_seidel( - Handle &kh, crsMat_t input_mat, vec_t x_vector, vec_t y_vector, - bool is_symmetric_graph, typename crsMat_t::value_type omega, - int apply_type = 0 // 0 for symmetric, 1 for forward, 2 for backward. +void run_gauss_seidel(Handle &kh, crsMat_t input_mat, vec_t x_vector, vec_t y_vector, bool is_symmetric_graph, + typename crsMat_t::value_type omega, + int apply_type = 0 // 0 for symmetric, 1 for forward, 2 for backward. ) { const size_t num_rows = input_mat.numRows(); const size_t num_cols = input_mat.numCols(); const int apply_count = 2; - gauss_seidel_symbolic(&kh, num_rows, num_cols, input_mat.graph.row_map, - input_mat.graph.entries, is_symmetric_graph); - gauss_seidel_numeric(&kh, num_rows, num_cols, input_mat.graph.row_map, - input_mat.graph.entries, input_mat.values, + gauss_seidel_symbolic(&kh, num_rows, num_cols, input_mat.graph.row_map, input_mat.graph.entries, is_symmetric_graph); + gauss_seidel_numeric(&kh, num_rows, num_cols, input_mat.graph.row_map, input_mat.graph.entries, input_mat.values, is_symmetric_graph); switch (apply_type) { case 0: - symmetric_gauss_seidel_apply( - &kh, num_rows, num_cols, input_mat.graph.row_map, - input_mat.graph.entries, input_mat.values, x_vector, y_vector, false, - true, omega, apply_count); + symmetric_gauss_seidel_apply(&kh, num_rows, num_cols, input_mat.graph.row_map, input_mat.graph.entries, + input_mat.values, x_vector, y_vector, false, true, omega, apply_count); break; case 1: - forward_sweep_gauss_seidel_apply( - &kh, num_rows, num_cols, input_mat.graph.row_map, - input_mat.graph.entries, input_mat.values, x_vector, y_vector, false, - true, omega, apply_count); + forward_sweep_gauss_seidel_apply(&kh, num_rows, num_cols, input_mat.graph.row_map, input_mat.graph.entries, + input_mat.values, x_vector, y_vector, false, true, omega, apply_count); break; case 2: - backward_sweep_gauss_seidel_apply( - &kh, num_rows, num_cols, input_mat.graph.row_map, - input_mat.graph.entries, input_mat.values, x_vector, y_vector, false, - true, omega, apply_count); + backward_sweep_gauss_seidel_apply(&kh, num_rows, num_cols, input_mat.graph.row_map, input_mat.graph.entries, + input_mat.values, x_vector, y_vector, false, true, omega, apply_count); break; default: - symmetric_gauss_seidel_apply( - &kh, num_rows, num_cols, input_mat.graph.row_map, - input_mat.graph.entries, input_mat.values, x_vector, y_vector, false, - true, omega, apply_count); + symmetric_gauss_seidel_apply(&kh, num_rows, num_cols, input_mat.graph.row_map, input_mat.graph.entries, + input_mat.values, x_vector, y_vector, false, true, omega, apply_count); break; } } template -void run_gauss_seidel( - crsMat_t input_mat, GSAlgorithm gs_algorithm, vec_t x_vector, - vec_t y_vector, bool is_symmetric_graph, - int apply_type = 0, // 0 for symmetric, 1 for forward, 2 for backward. - int cluster_size = 1, - bool classic = - false, // only with two-stage, true for sptrsv instead of richardson - ClusteringAlgorithm clusterAlgo = CLUSTER_DEFAULT, - KokkosGraph::ColoringAlgorithm coloringAlgo = - KokkosGraph::COLORING_DEFAULT) { +void run_gauss_seidel(crsMat_t input_mat, GSAlgorithm gs_algorithm, vec_t x_vector, vec_t y_vector, + bool is_symmetric_graph, + int apply_type = 0, // 0 for symmetric, 1 for forward, 2 for backward. + int cluster_size = 1, + bool classic = false, // only with two-stage, true for sptrsv instead of richardson + ClusteringAlgorithm clusterAlgo = CLUSTER_DEFAULT, + KokkosGraph::ColoringAlgorithm coloringAlgo = KokkosGraph::COLORING_DEFAULT) { using size_type = typename crsMat_t::size_type; using lno_t = typename crsMat_t::ordinal_type; using scalar_t = typename crsMat_t::value_type; using device = typename crsMat_t::device_type; - typedef KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space> + typedef KokkosKernelsHandle KernelHandle; scalar_t omega(0.9); @@ -136,28 +121,23 @@ void run_gauss_seidel( kh.create_gs_handle(GS_DEFAULT, coloringAlgo); } - run_gauss_seidel(kh, input_mat, x_vector, y_vector, is_symmetric_graph, omega, - apply_type); + run_gauss_seidel(kh, input_mat, x_vector, y_vector, is_symmetric_graph, omega, apply_type); kh.destroy_gs_handle(); } -template -void run_gauss_seidel_streams( - std::vector &instances, std::vector &kh, - std::vector &input_mat, std::vector &x_vector, - std::vector &y_vector, bool is_symmetric_graph, - typename crsMat_t::value_type omega, - int apply_type, // 0 for symmetric, 1 for forward, 2 for backward. - int nstreams = 1) { +template +void run_gauss_seidel_streams(std::vector &instances, std::vector &kh, + std::vector &input_mat, std::vector &x_vector, + std::vector &y_vector, bool is_symmetric_graph, + typename crsMat_t::value_type omega, + int apply_type, // 0 for symmetric, 1 for forward, 2 for backward. + int nstreams = 1) { for (int i = 0; i < nstreams; i++) { - gauss_seidel_symbolic(instances[i], &kh[i], input_mat[i].numRows(), - input_mat[i].numCols(), input_mat[i].graph.row_map, - input_mat[i].graph.entries, is_symmetric_graph); - gauss_seidel_numeric(instances[i], &kh[i], input_mat[i].numRows(), - input_mat[i].numCols(), input_mat[i].graph.row_map, - input_mat[i].graph.entries, input_mat[i].values, + gauss_seidel_symbolic(instances[i], &kh[i], input_mat[i].numRows(), input_mat[i].numCols(), + input_mat[i].graph.row_map, input_mat[i].graph.entries, is_symmetric_graph); + gauss_seidel_numeric(instances[i], &kh[i], input_mat[i].numRows(), input_mat[i].numCols(), + input_mat[i].graph.row_map, input_mat[i].graph.entries, input_mat[i].values, is_symmetric_graph); } @@ -165,62 +145,47 @@ void run_gauss_seidel_streams( for (int i = 0; i < nstreams; i++) { switch (apply_type) { case 0: - symmetric_gauss_seidel_apply( - instances[i], &kh[i], input_mat[i].numRows(), - input_mat[i].numCols(), input_mat[i].graph.row_map, - input_mat[i].graph.entries, input_mat[i].values, x_vector[i], - y_vector[i], false, true, omega, apply_count); + symmetric_gauss_seidel_apply(instances[i], &kh[i], input_mat[i].numRows(), input_mat[i].numCols(), + input_mat[i].graph.row_map, input_mat[i].graph.entries, input_mat[i].values, + x_vector[i], y_vector[i], false, true, omega, apply_count); break; case 1: - forward_sweep_gauss_seidel_apply( - instances[i], &kh[i], input_mat[i].numRows(), - input_mat[i].numCols(), input_mat[i].graph.row_map, - input_mat[i].graph.entries, input_mat[i].values, x_vector[i], - y_vector[i], false, true, omega, apply_count); + forward_sweep_gauss_seidel_apply(instances[i], &kh[i], input_mat[i].numRows(), input_mat[i].numCols(), + input_mat[i].graph.row_map, input_mat[i].graph.entries, input_mat[i].values, + x_vector[i], y_vector[i], false, true, omega, apply_count); break; case 2: - backward_sweep_gauss_seidel_apply( - instances[i], &kh[i], input_mat[i].numRows(), - input_mat[i].numCols(), input_mat[i].graph.row_map, - input_mat[i].graph.entries, input_mat[i].values, x_vector[i], - y_vector[i], false, true, omega, apply_count); + backward_sweep_gauss_seidel_apply(instances[i], &kh[i], input_mat[i].numRows(), input_mat[i].numCols(), + input_mat[i].graph.row_map, input_mat[i].graph.entries, input_mat[i].values, + x_vector[i], y_vector[i], false, true, omega, apply_count); break; default: - symmetric_gauss_seidel_apply( - instances[i], &kh[i], input_mat[i].numRows(), - input_mat[i].numCols(), input_mat[i].graph.row_map, - input_mat[i].graph.entries, input_mat[i].values, x_vector[i], - y_vector[i], false, true, omega, apply_count); + symmetric_gauss_seidel_apply(instances[i], &kh[i], input_mat[i].numRows(), input_mat[i].numCols(), + input_mat[i].graph.row_map, input_mat[i].graph.entries, input_mat[i].values, + x_vector[i], y_vector[i], false, true, omega, apply_count); break; } } } } // namespace Test -template -void test_gauss_seidel_rank1(lno_t numRows, size_type nnz, lno_t bandwidth, - lno_t row_size_variance, bool symmetric) { +template +void test_gauss_seidel_rank1(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t row_size_variance, bool symmetric) { using namespace Test; - typedef - typename KokkosSparse::CrsMatrix - crsMat_t; + typedef typename KokkosSparse::CrsMatrix crsMat_t; typedef typename crsMat_t::values_type::non_const_type scalar_view_t; typedef typename Kokkos::ArithTraits::mag_type mag_t; srand(245); - lno_t numCols = numRows; - crsMat_t input_mat = - KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< - crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); + lno_t numCols = numRows; + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( + numRows, numCols, nnz, row_size_variance, bandwidth); if (symmetric) { // Symmetrize on host, rather than relying on the parallel versions (those // can be tested for symmetric=false) - input_mat = Test::symmetrize( - input_mat); + input_mat = Test::symmetrize(input_mat); } lno_t nv = input_mat.numRows(); - scalar_view_t solution_x( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "X (correct)"), nv); + scalar_view_t solution_x(Kokkos::view_alloc(Kokkos::WithoutInitializing, "X (correct)"), nv); create_random_x_vector(solution_x); mag_t initial_norm_res = KokkosBlas::nrm2(solution_x); scalar_view_t y_vector = create_random_y_vector(input_mat, solution_x); @@ -228,16 +193,14 @@ void test_gauss_seidel_rank1(lno_t numRows, size_type nnz, lno_t bandwidth, // behavior of each algorithm _should be_ the same on every execution space, // which is why we just test GS_DEFAULT. int apply_count = 3; // test symmetric, forward, backward - scalar_view_t x_vector( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "x vector"), nv); + scalar_view_t x_vector(Kokkos::view_alloc(Kokkos::WithoutInitializing, "x vector"), nv); const scalar_t one = Kokkos::ArithTraits::one(); const scalar_t zero = Kokkos::ArithTraits::zero(); //*** Point-coloring version **** for (int apply_type = 0; apply_type < apply_count; ++apply_type) { Kokkos::Timer timer1; Kokkos::deep_copy(x_vector, zero); - run_gauss_seidel(input_mat, GS_DEFAULT, x_vector, y_vector, symmetric, - apply_type); + run_gauss_seidel(input_mat, GS_DEFAULT, x_vector, y_vector, symmetric, apply_type); // double gs = timer1.seconds(); // KokkosKernels::Impl::print_1Dview(x_vector); KokkosBlas::axpby(one, solution_x, -one, x_vector); @@ -246,16 +209,15 @@ void test_gauss_seidel_rank1(lno_t numRows, size_type nnz, lno_t bandwidth, } //*** Cluster-coloring version **** int clusterSizes[3] = {2, 5, 34}; - std::vector clusteringAlgos = {CLUSTER_MIS2, - CLUSTER_BALLOON}; + std::vector clusteringAlgos = {CLUSTER_MIS2, CLUSTER_BALLOON}; for (int csize = 0; csize < 3; csize++) { for (auto clusterAlgo : clusteringAlgos) { for (int apply_type = 0; apply_type < apply_count; ++apply_type) { Kokkos::Timer timer1; // Zero out X before solving Kokkos::deep_copy(x_vector, zero); - run_gauss_seidel(input_mat, GS_CLUSTER, x_vector, y_vector, symmetric, - apply_type, clusterSizes[csize], false, clusterAlgo); + run_gauss_seidel(input_mat, GS_CLUSTER, x_vector, y_vector, symmetric, apply_type, clusterSizes[csize], false, + clusterAlgo); KokkosBlas::axpby(one, solution_x, -one, x_vector); mag_t result_norm_res = KokkosBlas::nrm2(x_vector); EXPECT_LT(result_norm_res, initial_norm_res); @@ -265,8 +227,7 @@ void test_gauss_seidel_rank1(lno_t numRows, size_type nnz, lno_t bandwidth, //*** Two-stage version **** for (int apply_type = 0; apply_type < apply_count; ++apply_type) { Kokkos::deep_copy(x_vector, zero); - run_gauss_seidel(input_mat, GS_TWOSTAGE, x_vector, y_vector, symmetric, - apply_type); + run_gauss_seidel(input_mat, GS_TWOSTAGE, x_vector, y_vector, symmetric, apply_type); KokkosBlas::axpby(one, solution_x, -one, x_vector); mag_t result_norm_res = KokkosBlas::nrm2(x_vector); EXPECT_LT(result_norm_res, initial_norm_res); @@ -274,46 +235,35 @@ void test_gauss_seidel_rank1(lno_t numRows, size_type nnz, lno_t bandwidth, //*** Two-stage version (classic) **** for (int apply_type = 0; apply_type < apply_count; ++apply_type) { Kokkos::deep_copy(x_vector, zero); - run_gauss_seidel(input_mat, GS_TWOSTAGE, x_vector, y_vector, symmetric, - apply_type, 0, true); + run_gauss_seidel(input_mat, GS_TWOSTAGE, x_vector, y_vector, symmetric, apply_type, 0, true); KokkosBlas::axpby(one, solution_x, -one, x_vector); mag_t result_norm_res = KokkosBlas::nrm2(x_vector); EXPECT_LT(result_norm_res, initial_norm_res); } } -template -void test_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, - lno_t row_size_variance, lno_t numVecs, +template +void test_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t row_size_variance, lno_t numVecs, bool symmetric) { using namespace Test; srand(245); - typedef - typename KokkosSparse::CrsMatrix - crsMat_t; + typedef typename KokkosSparse::CrsMatrix crsMat_t; typedef Kokkos::View scalar_view2d_t; - typedef Kokkos::View - host_scalar_view2d_t; + typedef Kokkos::View host_scalar_view2d_t; typedef typename Kokkos::ArithTraits::mag_type mag_t; - lno_t numCols = numRows; - crsMat_t input_mat = - KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< - crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); + lno_t numCols = numRows; + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( + numRows, numCols, nnz, row_size_variance, bandwidth); if (symmetric) { // Symmetrize on host, rather than relying on the parallel versions (those // can be tested for symmetric=false) - input_mat = Test::symmetrize( - input_mat); + input_mat = Test::symmetrize(input_mat); } lno_t nv = input_mat.numRows(); - host_scalar_view2d_t solution_x( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "X (correct)"), nv, - numVecs); + host_scalar_view2d_t solution_x(Kokkos::view_alloc(Kokkos::WithoutInitializing, "X (correct)"), nv, numVecs); create_random_x_vector(solution_x); - scalar_view2d_t x_vector(Kokkos::view_alloc(Kokkos::WithoutInitializing, "X"), - nv, numVecs); + scalar_view2d_t x_vector(Kokkos::view_alloc(Kokkos::WithoutInitializing, "X"), nv, numVecs); Kokkos::deep_copy(x_vector, solution_x); scalar_view2d_t y_vector = create_random_y_vector_mv(input_mat, x_vector); auto x_host = Kokkos::create_mirror_view(x_vector); @@ -323,8 +273,7 @@ void test_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, for (lno_t j = 0; j < nv; j++) { sum += solution_x(j, i) * solution_x(j, i); } - initial_norms[i] = Kokkos::ArithTraits::sqrt( - Kokkos::ArithTraits::abs(sum)); + initial_norms[i] = Kokkos::ArithTraits::sqrt(Kokkos::ArithTraits::abs(sum)); } int apply_count = 3; // test symmetric, forward, backward const scalar_t zero = Kokkos::ArithTraits::zero(); @@ -333,8 +282,7 @@ void test_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, Kokkos::Timer timer1; // Zero out X before solving Kokkos::deep_copy(x_vector, zero); - run_gauss_seidel(input_mat, GS_DEFAULT, x_vector, y_vector, symmetric, - apply_type); + run_gauss_seidel(input_mat, GS_DEFAULT, x_vector, y_vector, symmetric, apply_type); Kokkos::deep_copy(x_host, x_vector); for (lno_t i = 0; i < numVecs; i++) { scalar_t diffDot = 0; @@ -342,8 +290,7 @@ void test_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, scalar_t diff = x_host(j, i) - solution_x(j, i); diffDot += diff * diff; } - mag_t res = Kokkos::ArithTraits::sqrt( - Kokkos::ArithTraits::abs(diffDot)); + mag_t res = Kokkos::ArithTraits::sqrt(Kokkos::ArithTraits::abs(diffDot)); EXPECT_LT(res, initial_norms[i]); } } @@ -355,8 +302,7 @@ void test_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, Kokkos::Timer timer1; // Zero out X before solving Kokkos::deep_copy(x_vector, zero); - run_gauss_seidel(input_mat, GS_CLUSTER, x_vector, y_vector, symmetric, - apply_type, clusterSizes[csize], false, + run_gauss_seidel(input_mat, GS_CLUSTER, x_vector, y_vector, symmetric, apply_type, clusterSizes[csize], false, (ClusteringAlgorithm)algo); Kokkos::deep_copy(x_host, x_vector); for (lno_t i = 0; i < numVecs; i++) { @@ -365,8 +311,7 @@ void test_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, scalar_t diff = x_host(j, i) - solution_x(j, i); diffDot += diff * diff; } - mag_t res = Kokkos::ArithTraits::sqrt( - Kokkos::ArithTraits::abs(diffDot)); + mag_t res = Kokkos::ArithTraits::sqrt(Kokkos::ArithTraits::abs(diffDot)); EXPECT_LT(res, initial_norms[i]); } } @@ -376,8 +321,7 @@ void test_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, for (int apply_type = 0; apply_type < apply_count; ++apply_type) { // Zero out X before solving Kokkos::deep_copy(x_vector, zero); - run_gauss_seidel(input_mat, GS_TWOSTAGE, x_vector, y_vector, symmetric, - apply_type); + run_gauss_seidel(input_mat, GS_TWOSTAGE, x_vector, y_vector, symmetric, apply_type); Kokkos::deep_copy(x_host, x_vector); for (lno_t i = 0; i < numVecs; i++) { scalar_t diffDot = 0; @@ -385,8 +329,7 @@ void test_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, scalar_t diff = x_host(j, i) - solution_x(j, i); diffDot += diff * diff; } - mag_t res = Kokkos::ArithTraits::sqrt( - Kokkos::ArithTraits::abs(diffDot)); + mag_t res = Kokkos::ArithTraits::sqrt(Kokkos::ArithTraits::abs(diffDot)); EXPECT_LT(res, initial_norms[i]); } } @@ -394,8 +337,7 @@ void test_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, for (int apply_type = 0; apply_type < apply_count; ++apply_type) { // Zero out X before solving Kokkos::deep_copy(x_vector, zero); - run_gauss_seidel(input_mat, GS_TWOSTAGE, x_vector, y_vector, symmetric, - apply_type, 0, true); + run_gauss_seidel(input_mat, GS_TWOSTAGE, x_vector, y_vector, symmetric, apply_type, 0, true); Kokkos::deep_copy(x_host, x_vector); for (lno_t i = 0; i < numVecs; i++) { scalar_t diffDot = 0; @@ -403,34 +345,25 @@ void test_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, scalar_t diff = x_host(j, i) - solution_x(j, i); diffDot += diff * diff; } - mag_t res = Kokkos::ArithTraits::sqrt( - Kokkos::ArithTraits::abs(diffDot)); + mag_t res = Kokkos::ArithTraits::sqrt(Kokkos::ArithTraits::abs(diffDot)); EXPECT_LT(res, initial_norms[i]); } } } -template -void test_sequential_sor(lno_t numRows, size_type nnz, lno_t bandwidth, - lno_t row_size_variance) { +template +void test_sequential_sor(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t row_size_variance) { const scalar_t zero = Kokkos::ArithTraits::zero(); const scalar_t one = Kokkos::ArithTraits::one(); srand(245); typedef typename device::execution_space exec_space; - typedef - typename KokkosSparse::CrsMatrix - crsMat_t; - lno_t numCols = numRows; - crsMat_t input_mat = - KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< - crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); - auto rowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), - input_mat.graph.row_map); - auto entries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), - input_mat.graph.entries); - auto values = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), - input_mat.values); + typedef typename KokkosSparse::CrsMatrix crsMat_t; + lno_t numCols = numRows; + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( + numRows, numCols, nnz, row_size_variance, bandwidth); + auto rowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), input_mat.graph.row_map); + auto entries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), input_mat.graph.entries); + auto values = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), input_mat.values); // create raw x (unkown), y (rhs) vectors using vector_t = typename crsMat_t::values_type::non_const_type; // Create random x @@ -456,16 +389,14 @@ void test_sequential_sor(lno_t numRows, size_type nnz, lno_t bandwidth, } } for (int i = 0; i < 1; i++) { - KokkosSparse::Impl::Sequential::gaussSeidel( - numRows, 1, rowmap.data(), entries.data(), values.data(), y_host.data(), - numRows, x_host.data(), numRows, invDiag.data(), + KokkosSparse::Impl::Sequential::gaussSeidel( + numRows, 1, rowmap.data(), entries.data(), values.data(), y_host.data(), numRows, x_host.data(), numRows, + invDiag.data(), one, // omega "F"); - KokkosSparse::Impl::Sequential::gaussSeidel( - numRows, 1, rowmap.data(), entries.data(), values.data(), y_host.data(), - numRows, x_host.data(), numRows, invDiag.data(), + KokkosSparse::Impl::Sequential::gaussSeidel( + numRows, 1, rowmap.data(), entries.data(), values.data(), y_host.data(), numRows, x_host.data(), numRows, + invDiag.data(), one, // omega "B"); } @@ -478,39 +409,31 @@ void test_sequential_sor(lno_t numRows, size_type nnz, lno_t bandwidth, EXPECT_TRUE(0.99 < scaledSolutionDot); } -template -void test_balloon_clustering(lno_t numRows, size_type nnzPerRow, - lno_t bandwidth) { +template +void test_balloon_clustering(lno_t numRows, size_type nnzPerRow, lno_t bandwidth) { using namespace Test; - typedef - typename KokkosSparse::CrsMatrix - crsMat_t; + typedef typename KokkosSparse::CrsMatrix crsMat_t; typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type const_lno_row_view_t; typedef typename graph_t::entries_type const_lno_nnz_view_t; typedef typename graph_t::row_map_type::non_const_type lno_row_view_t; typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t; - typedef KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space> + typedef KokkosKernelsHandle KernelHandle; srand(245); size_type nnzTotal = nnzPerRow * numRows; lno_t nnzVariance = nnzPerRow / 4; - crsMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix( - numRows, numRows, nnzTotal, nnzVariance, bandwidth); + crsMat_t A = + KokkosSparse::Impl::kk_generate_sparse_matrix(numRows, numRows, nnzTotal, nnzVariance, bandwidth); lno_row_view_t symRowmap; lno_nnz_view_t symEntries; - KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap< - const_lno_row_view_t, const_lno_nnz_view_t, lno_row_view_t, - lno_nnz_view_t, typename device::execution_space>( + KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap( numRows, A.graph.row_map, A.graph.entries, symRowmap, symEntries); - KokkosSparse::Impl::BalloonClustering - balloon(numRows, symRowmap, symEntries); - for (int clusterSize = 1; clusterSize <= numRows / 16; - clusterSize = std::ceil(clusterSize * 1.3)) { + KokkosSparse::Impl::BalloonClustering balloon(numRows, symRowmap, + symEntries); + for (int clusterSize = 1; clusterSize <= numRows / 16; clusterSize = std::ceil(clusterSize * 1.3)) { auto vertClusters = balloon.run(clusterSize); // validate results: make sure cluster labels are in bounds, and that the // number of clusters is correct @@ -528,20 +451,16 @@ void test_balloon_clustering(lno_t numRows, size_type nnzPerRow, } } -template +template void test_gauss_seidel_empty() { using namespace Test; - typedef - typename KokkosSparse::CrsMatrix - crsMat_t; + typedef typename KokkosSparse::CrsMatrix crsMat_t; typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type::non_const_type row_map_type; typedef typename graph_t::entries_type::non_const_type entries_type; typedef typename crsMat_t::values_type::non_const_type scalar_view_t; - typedef KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space> + typedef KokkosKernelsHandle KernelHandle; // The rowmap of a zero-row matrix can be length 0 or 1, so Gauss-Seidel // should work with both (the setup and apply are essentially no-ops but they @@ -567,21 +486,16 @@ void test_gauss_seidel_empty() { scalar_view_t x("X", nRows); scalar_view_t y("Y", nRows); scalar_t omega(0.9); - symmetric_gauss_seidel_apply(&kh, nRows, nRows, rowmap, entries, values, - x, y, false, true, omega, 3); + symmetric_gauss_seidel_apply(&kh, nRows, nRows, rowmap, entries, values, x, y, false, true, omega, 3); kh.destroy_gs_handle(); } } } -template -void test_gauss_seidel_long_rows(lno_t numRows, lno_t numLongRows, - lno_t nnzPerShortRow, bool symmetric) { +template +void test_gauss_seidel_long_rows(lno_t numRows, lno_t numLongRows, lno_t nnzPerShortRow, bool symmetric) { using namespace Test; - typedef - typename KokkosSparse::CrsMatrix - crsMat_t; + typedef typename KokkosSparse::CrsMatrix crsMat_t; typedef typename crsMat_t::values_type::non_const_type scalar_view_t; typedef typename crsMat_t::index_type::non_const_type entries_view_t; typedef typename crsMat_t::row_map_type::non_const_type rowmap_view_t; @@ -598,8 +512,7 @@ void test_gauss_seidel_long_rows(lno_t numRows, lno_t numLongRows, else rowLengths.push_back(nnzPerShortRow); } - std::shuffle(rowLengths.begin(), rowLengths.end(), - std::mt19937(std::random_device()())); + std::shuffle(rowLengths.begin(), rowLengths.end(), std::mt19937(std::random_device()())); size_type totalEntries = 0; int randSteps = 1000000; scalar_t offDiagBase; @@ -614,37 +527,27 @@ void test_gauss_seidel_long_rows(lno_t numRows, lno_t numLongRows, values.push_back(2.5 * one); } else { entries.push_back(rand() % numRows); - values.push_back((-0.3 + (0.6 * (rand() % randSteps) / randSteps)) * - offDiagBase); + values.push_back((-0.3 + (0.6 * (rand() % randSteps) / randSteps)) * offDiagBase); } } totalEntries += rowLengths[i]; rowmap.push_back(totalEntries); } - scalar_view_t valuesView( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values"), totalEntries); - entries_view_t entriesView( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries"), totalEntries); - rowmap_view_t rowmapView( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Rowmap"), numRows + 1); - Kokkos::deep_copy(valuesView, Kokkos::View( - values.data(), totalEntries)); - Kokkos::deep_copy(entriesView, Kokkos::View( - entries.data(), totalEntries)); - Kokkos::deep_copy(rowmapView, Kokkos::View( - rowmap.data(), numRows + 1)); - crsMat_t input_mat("A", numRows, numRows, totalEntries, valuesView, - rowmapView, entriesView); + scalar_view_t valuesView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values"), totalEntries); + entries_view_t entriesView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries"), totalEntries); + rowmap_view_t rowmapView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Rowmap"), numRows + 1); + Kokkos::deep_copy(valuesView, Kokkos::View(values.data(), totalEntries)); + Kokkos::deep_copy(entriesView, Kokkos::View(entries.data(), totalEntries)); + Kokkos::deep_copy(rowmapView, Kokkos::View(rowmap.data(), numRows + 1)); + crsMat_t input_mat("A", numRows, numRows, totalEntries, valuesView, rowmapView, entriesView); input_mat = KokkosSparse::sort_and_merge_matrix(input_mat); if (symmetric) { // Symmetrize on host, rather than relying on the parallel versions (those // can be tested for symmetric=false) - input_mat = Test::symmetrize( - input_mat); + input_mat = Test::symmetrize(input_mat); } lno_t nv = input_mat.numRows(); - scalar_view_t solution_x( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "X (correct)"), nv); + scalar_view_t solution_x(Kokkos::view_alloc(Kokkos::WithoutInitializing, "X (correct)"), nv); create_random_x_vector(solution_x); mag_t initial_norm_res = KokkosBlas::nrm2(solution_x); scalar_view_t y_vector = create_random_y_vector(input_mat, solution_x); @@ -652,12 +555,10 @@ void test_gauss_seidel_long_rows(lno_t numRows, lno_t numLongRows, // behavior of each algorithm _should be_ the same on every execution space, // which is why we just test GS_DEFAULT. int apply_count = 1; // test symmetric, forward, backward - scalar_view_t x_vector( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "x vector"), nv); + scalar_view_t x_vector(Kokkos::view_alloc(Kokkos::WithoutInitializing, "x vector"), nv); for (int apply_type = 0; apply_type < apply_count; ++apply_type) { - typedef KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space> + typedef KokkosKernelsHandle KernelHandle; KernelHandle kh; @@ -666,47 +567,37 @@ void test_gauss_seidel_long_rows(lno_t numRows, lno_t numLongRows, gsHandle->set_long_row_threshold(3 * nnzPerShortRow); // Reset x vector to 0 Kokkos::deep_copy(x_vector, scalar_t()); - run_gauss_seidel(kh, input_mat, x_vector, y_vector, symmetric, 0.9, - apply_type); + run_gauss_seidel(kh, input_mat, x_vector, y_vector, symmetric, 0.9, apply_type); KokkosBlas::axpby(one, solution_x, -one, x_vector); mag_t result_norm_res = KokkosBlas::nrm2(x_vector); EXPECT_LT(result_norm_res, 0.25 * initial_norm_res); } } -template +template void test_gauss_seidel_custom_coloring(lno_t numRows, lno_t nnzPerRow) { using namespace Test; - typedef - typename KokkosSparse::CrsMatrix - crsMat_t; + typedef typename KokkosSparse::CrsMatrix crsMat_t; typedef typename crsMat_t::values_type::non_const_type scalar_view_t; typedef typename Kokkos::ArithTraits::mag_type mag_t; const scalar_t one = Kokkos::ArithTraits::one(); size_type nnz = nnzPerRow * numRows; - crsMat_t input_mat = - KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< - crsMat_t>(numRows, numRows, nnz, 0, numRows / 10, 2.0 * one); - input_mat = - Test::symmetrize(input_mat); + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( + numRows, numRows, nnz, 0, numRows / 10, 2.0 * one); + input_mat = Test::symmetrize(input_mat); input_mat = KokkosSparse::sort_and_merge_matrix(input_mat); - scalar_view_t solution_x( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "X (correct)"), numRows); + scalar_view_t solution_x(Kokkos::view_alloc(Kokkos::WithoutInitializing, "X (correct)"), numRows); create_random_x_vector(solution_x); mag_t initial_norm_res = KokkosBlas::nrm2(solution_x); scalar_view_t y_vector = create_random_y_vector(input_mat, solution_x); - scalar_view_t x_vector( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "x vector"), numRows); - typedef KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space> + scalar_view_t x_vector(Kokkos::view_alloc(Kokkos::WithoutInitializing, "x vector"), numRows); + typedef KokkosKernelsHandle KernelHandle; KernelHandle kh; kh.create_gs_handle(GS_DEFAULT, KokkosGraph::COLORING_VBBIT); - EXPECT_EQ(kh.get_point_gs_handle()->get_coloring_algorithm(), - KokkosGraph::COLORING_VBBIT); + EXPECT_EQ(kh.get_point_gs_handle()->get_coloring_algorithm(), KokkosGraph::COLORING_VBBIT); // Reset x vector to 0 Kokkos::deep_copy(x_vector, scalar_t()); run_gauss_seidel(kh, input_mat, x_vector, y_vector, true, 0.9, 0); @@ -715,16 +606,13 @@ void test_gauss_seidel_custom_coloring(lno_t numRows, lno_t nnzPerRow) { EXPECT_LT(result_norm_res, 0.25 * initial_norm_res); } -template -void test_gauss_seidel_streams_rank1( - lno_t numRows, size_type nnz, lno_t bandwidth, lno_t row_size_variance, - bool symmetric, double omega, - KokkosGraph::ColoringAlgorithm coloringAlgo = KokkosGraph::COLORING_DEFAULT, - int nstreams = 1) { +template +void test_gauss_seidel_streams_rank1(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t row_size_variance, + bool symmetric, double omega, + KokkosGraph::ColoringAlgorithm coloringAlgo = KokkosGraph::COLORING_DEFAULT, + int nstreams = 1) { using namespace Test; - using crsMat_t = typename KokkosSparse::CrsMatrix; + using crsMat_t = typename KokkosSparse::CrsMatrix; using scalar_view_t = typename crsMat_t::values_type::non_const_type; using mag_t = typename Kokkos::ArithTraits::mag_type; using execution_space = typename device::execution_space; @@ -732,10 +620,8 @@ void test_gauss_seidel_streams_rank1( using const_size_type = const size_type; using const_lno_t = const lno_t; using const_scalar_t = const scalar_t; - using KernelHandle = - KokkosKernelsHandle; + using KernelHandle = KokkosKernelsHandle; srand(245); lno_t numCols = numRows; typename crsMat_t::value_type m_omega = omega; @@ -752,8 +638,7 @@ void test_gauss_seidel_streams_rank1( } #endif // KOKKOS_ENABLE_OPENMP - auto instances = Kokkos::Experimental::partition_space( - execution_space(), std::vector(nstreams, 1)); + auto instances = Kokkos::Experimental::partition_space(execution_space(), std::vector(nstreams, 1)); std::vector kh_v(nstreams); std::vector input_mat_v(nstreams); @@ -766,30 +651,25 @@ void test_gauss_seidel_streams_rank1( const scalar_t zero = Kokkos::ArithTraits::zero(); for (int i = 0; i < nstreams; i++) { - input_mat_v[i] = - KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< - crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); + input_mat_v[i] = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( + numRows, numCols, nnz, row_size_variance, bandwidth); if (symmetric) { // Symmetrize on host, rather than relying on the parallel versions (those // can be tested for symmetric=false) - input_mat_v[i] = - Test::symmetrize( - input_mat_v[i]); + input_mat_v[i] = Test::symmetrize(input_mat_v[i]); } lno_t nv = input_mat_v[i].numRows(); - scalar_view_t solution_x_tmp( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "X (correct)"), nv); + scalar_view_t solution_x_tmp(Kokkos::view_alloc(Kokkos::WithoutInitializing, "X (correct)"), nv); solution_x_v[i] = solution_x_tmp; create_random_x_vector(solution_x_v[i]); initial_norm_res_v[i] = KokkosBlas::nrm2(solution_x_v[i]); - y_vector_v[i] = create_random_y_vector(input_mat_v[i], solution_x_v[i]); + y_vector_v[i] = create_random_y_vector(input_mat_v[i], solution_x_v[i]); // GS_DEFAULT is GS_TEAM on CUDA and GS_PERMUTED on other spaces, and the // behavior of each algorithm _should be_ the same on every execution space, // which is why we just test GS_DEFAULT. - scalar_view_t x_vector_tmp( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "x vector"), nv); + scalar_view_t x_vector_tmp(Kokkos::view_alloc(Kokkos::WithoutInitializing, "x vector"), nv); x_vector_v[i] = x_vector_tmp; kh_v[i] = KernelHandle(); // Initialize KokkosKernelsHandle defaults. @@ -799,108 +679,68 @@ void test_gauss_seidel_streams_rank1( int apply_count = 3; // test symmetric, forward, backward //*** Point-coloring version **** for (int apply_type = 0; apply_type < apply_count; ++apply_type) { - for (int i = 0; i < nstreams; i++) - Kokkos::deep_copy(instances[i], x_vector_v[i], zero); + for (int i = 0; i < nstreams; i++) Kokkos::deep_copy(instances[i], x_vector_v[i], zero); - run_gauss_seidel_streams(instances, kh_v, input_mat_v, x_vector_v, - y_vector_v, symmetric, m_omega, apply_type, + run_gauss_seidel_streams(instances, kh_v, input_mat_v, x_vector_v, y_vector_v, symmetric, m_omega, apply_type, nstreams); for (int i = 0; i < nstreams; i++) { - KokkosBlas::axpby(instances[i], one, solution_x_v[i], -one, - x_vector_v[i]); + KokkosBlas::axpby(instances[i], one, solution_x_v[i], -one, x_vector_v[i]); mag_t result_norm_res = KokkosBlas::nrm2(instances[i], x_vector_v[i]); - EXPECT_LT(result_norm_res, initial_norm_res_v[i]) - << "on stream_idx: " << i; + EXPECT_LT(result_norm_res, initial_norm_res_v[i]) << "on stream_idx: " << i; } } for (int i = 0; i < nstreams; i++) kh_v[i].destroy_gs_handle(); } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##gauss_seidel_asymmetric_rank1##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_gauss_seidel_rank1(2000, 2000 * 20, \ - 200, 10, false); \ - } \ - TEST_F( \ - TestCategory, \ - sparse##_##gauss_seidel_asymmetric_streams_rank1##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_gauss_seidel_streams_rank1( \ - 2000, 2000 * 20, 200, 10, false, 0.9, KokkosGraph::COLORING_DEFAULT, \ - 1); \ - test_gauss_seidel_streams_rank1( \ - 2000, 2000 * 20, 200, 10, false, 0.9, KokkosGraph::COLORING_DEFAULT, \ - 2); \ - test_gauss_seidel_streams_rank1( \ - 2000, 2000 * 20, 200, 10, false, 0.9, KokkosGraph::COLORING_DEFAULT, \ - 3); \ - test_gauss_seidel_streams_rank1( \ - 2000, 2000 * 20, 200, 10, false, 0.9, KokkosGraph::COLORING_DEFAULT, \ - 4); \ - } \ - TEST_F( \ - TestCategory, \ - sparse##_##gauss_seidel_asymmetric_rank2##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_gauss_seidel_rank2( \ - 2000, 2000 * 20, 200, 10, 3, false); \ - } \ - TEST_F( \ - TestCategory, \ - sparse##_##gauss_seidel_symmetric_rank1##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_gauss_seidel_rank1(2000, 2000 * 20, \ - 200, 10, true); \ - } \ - TEST_F( \ - TestCategory, \ - sparse##_##gauss_seidel_symmetric_streams_rank1##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_gauss_seidel_streams_rank1( \ - 2000, 2000 * 20, 200, 10, true, 0.9, KokkosGraph::COLORING_DEFAULT, \ - 1); \ - test_gauss_seidel_streams_rank1( \ - 2000, 2000 * 20, 200, 10, true, 0.9, KokkosGraph::COLORING_DEFAULT, \ - 2); \ - test_gauss_seidel_streams_rank1( \ - 2000, 2000 * 20, 200, 10, true, 0.9, KokkosGraph::COLORING_DEFAULT, \ - 3); \ - test_gauss_seidel_streams_rank1( \ - 2000, 2000 * 20, 200, 10, true, 0.9, KokkosGraph::COLORING_DEFAULT, \ - 4); \ - } \ - TEST_F( \ - TestCategory, \ - sparse##_##gauss_seidel_symmetric_rank2##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_gauss_seidel_rank2( \ - 2000, 2000 * 20, 200, 10, 3, true); \ - } \ - TEST_F( \ - TestCategory, \ - sparse##_##gauss_seidel_empty##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_gauss_seidel_empty(); \ - } \ - TEST_F( \ - TestCategory, \ - sparse##_##balloon_clustering##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_balloon_clustering(5000, 100, 2000); \ - } \ - TEST_F( \ - TestCategory, \ - sparse##_##sequential_sor##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_sequential_sor(1000, 1000 * 15, 50, \ - 10); \ - } \ - TEST_F( \ - TestCategory, \ - sparse##_##gauss_seidel_long_rows##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_gauss_seidel_long_rows(500, 10, 20, \ - true); \ - } \ - TEST_F( \ - TestCategory, \ - sparse##_##gauss_seidel_custom_coloring##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_gauss_seidel_custom_coloring(500, \ - 10); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##gauss_seidel_asymmetric_rank1##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_gauss_seidel_rank1(2000, 2000 * 20, 200, 10, false); \ + } \ + TEST_F(TestCategory, \ + sparse##_##gauss_seidel_asymmetric_streams_rank1##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_gauss_seidel_streams_rank1(2000, 2000 * 20, 200, 10, false, 0.9, \ + KokkosGraph::COLORING_DEFAULT, 1); \ + test_gauss_seidel_streams_rank1(2000, 2000 * 20, 200, 10, false, 0.9, \ + KokkosGraph::COLORING_DEFAULT, 2); \ + test_gauss_seidel_streams_rank1(2000, 2000 * 20, 200, 10, false, 0.9, \ + KokkosGraph::COLORING_DEFAULT, 3); \ + test_gauss_seidel_streams_rank1(2000, 2000 * 20, 200, 10, false, 0.9, \ + KokkosGraph::COLORING_DEFAULT, 4); \ + } \ + TEST_F(TestCategory, sparse##_##gauss_seidel_asymmetric_rank2##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_gauss_seidel_rank2(2000, 2000 * 20, 200, 10, 3, false); \ + } \ + TEST_F(TestCategory, sparse##_##gauss_seidel_symmetric_rank1##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_gauss_seidel_rank1(2000, 2000 * 20, 200, 10, true); \ + } \ + TEST_F(TestCategory, sparse##_##gauss_seidel_symmetric_streams_rank1##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_gauss_seidel_streams_rank1(2000, 2000 * 20, 200, 10, true, 0.9, \ + KokkosGraph::COLORING_DEFAULT, 1); \ + test_gauss_seidel_streams_rank1(2000, 2000 * 20, 200, 10, true, 0.9, \ + KokkosGraph::COLORING_DEFAULT, 2); \ + test_gauss_seidel_streams_rank1(2000, 2000 * 20, 200, 10, true, 0.9, \ + KokkosGraph::COLORING_DEFAULT, 3); \ + test_gauss_seidel_streams_rank1(2000, 2000 * 20, 200, 10, true, 0.9, \ + KokkosGraph::COLORING_DEFAULT, 4); \ + } \ + TEST_F(TestCategory, sparse##_##gauss_seidel_symmetric_rank2##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_gauss_seidel_rank2(2000, 2000 * 20, 200, 10, 3, true); \ + } \ + TEST_F(TestCategory, sparse##_##gauss_seidel_empty##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_gauss_seidel_empty(); \ + } \ + TEST_F(TestCategory, sparse##_##balloon_clustering##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_balloon_clustering(5000, 100, 2000); \ + } \ + TEST_F(TestCategory, sparse##_##sequential_sor##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_sequential_sor(1000, 1000 * 15, 50, 10); \ + } \ + TEST_F(TestCategory, sparse##_##gauss_seidel_long_rows##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_gauss_seidel_long_rows(500, 10, 20, true); \ + } \ + TEST_F(TestCategory, sparse##_##gauss_seidel_custom_coloring##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_gauss_seidel_custom_coloring(500, 10); \ } #include diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_gmres.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_gmres.hpp index ee78d277297b..7b55fe41ce5e 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_gmres.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_gmres.hpp @@ -48,27 +48,23 @@ struct TolMeta { static constexpr float value = 1e-5; // Lower tolerance for floats }; -template ::value>::type* = nullptr> +template ::value>::type* = nullptr> AType get_A(int n, int diagDominance, int) { using lno_t = typename Crs::ordinal_type; typename Crs::non_const_size_type nnz = 10 * n; - auto A = - KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( - n, n, nnz, 0, lno_t(0.01 * n), diagDominance); + auto A = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix(n, n, nnz, 0, lno_t(0.01 * n), + diagDominance); KokkosSparse::sort_crs_matrix(A); return A; } -template ::value>::type* = nullptr> +template ::value>::type* = nullptr> AType get_A(int n, int diagDominance, int block_size) { using lno_t = typename Crs::ordinal_type; typename Crs::non_const_size_type nnz = 10 * n; - auto A_unblocked = - KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( - n, n, nnz, 0, lno_t(0.01 * n), diagDominance); + auto A_unblocked = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( + n, n, nnz, 0, lno_t(0.01 * n), diagDominance); KokkosSparse::sort_crs_matrix(A_unblocked); // Convert to BSR @@ -77,8 +73,7 @@ AType get_A(int n, int diagDominance, int block_size) { return A; } -template +template struct GmresTest { using RowMapType = Kokkos::View; using EntriesType = Kokkos::View; @@ -90,8 +85,8 @@ struct GmresTest { using Crs = CrsMatrix; using Bsr = BsrMatrix; - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, exe_space, mem_space, mem_space>; + using KernelHandle = + KokkosKernels::Experimental::KokkosKernelsHandle; using float_t = typename Kokkos::ArithTraits::mag_type; template @@ -109,16 +104,14 @@ struct GmresTest { auto A = get_A(n, diagDominance, block_size); if (verbose) { - std::cout << "Running GMRES test with block_size=" << block_size - << std::endl; + std::cout << "Running GMRES test with block_size=" << block_size << std::endl; } // Make kernel handles KernelHandle kh; kh.create_gmres_handle(m, tol); - auto gmres_handle = kh.get_gmres_handle(); - using GMRESHandle = - typename std::remove_reference::type; + auto gmres_handle = kh.get_gmres_handle(); + using GMRESHandle = typename std::remove_reference::type; using ViewVectorType = typename GMRESHandle::nnz_value_view_t; // Set initial vectors: @@ -199,18 +192,16 @@ struct GmresTest { } // namespace Test -template +template void test_gmres() { using TestStruct = Test::GmresTest; TestStruct::template run_test_gmres(); TestStruct::template run_test_gmres(); } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - sparse##_##gmres##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_gmres(); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##gmres##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_gmres(); \ } #include diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_mdf.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_mdf.hpp index 4b5b65aeb37d..6585445e0dda 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_mdf.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_mdf.hpp @@ -21,11 +21,9 @@ namespace Test { -template +template void run_test_mdf() { - using crs_matrix_type = KokkosSparse::CrsMatrix; + using crs_matrix_type = KokkosSparse::CrsMatrix; using crs_graph_type = typename crs_matrix_type::StaticCrsGraphType; using row_map_type = typename crs_graph_type::row_map_type::non_const_type; using col_ind_type = typename crs_graph_type::entries_type::non_const_type; @@ -42,33 +40,26 @@ void run_test_mdf() { values_type values("values", numNonZeros); { // create matrix - const size_type row_mapRaw[] = {0, 3, 7, 11, 14, 18, 23, 28, 32, - 36, 41, 46, 50, 53, 57, 61, 64}; - const ordinal_type col_indRaw[] = { - 0, 1, 4, 0, 1, 2, 5, 1, 2, 3, 6, 2, 3, 7, 0, 4, - 5, 8, 1, 4, 5, 6, 9, 2, 5, 6, 7, 10, 3, 6, 7, 11, - 4, 8, 9, 12, 5, 8, 9, 10, 13, 6, 9, 10, 11, 14, 7, 10, - 11, 15, 8, 12, 13, 9, 12, 13, 14, 10, 13, 14, 15, 11, 14, 15}; - const value_type values_Raw[] = { - 4, -1, -1, -1, 4, -1, -1, -1, 4, -1, -1, -1, 4, -1, -1, 4, - -1, -1, -1, -1, 4, -1, -1, -1, -1, 4, -1, -1, -1, -1, 4, -1, - -1, 4, -1, -1, -1, -1, 4, -1, -1, -1, -1, 4, -1, -1, -1, -1, - 4, -1, -1, 4, -1, -1, -1, 4, -1, -1, -1, 4, -1, -1, -1, 4}; - - typename row_map_type::HostMirror::const_type row_map_host(row_mapRaw, - numRows + 1); - typename col_ind_type::HostMirror::const_type col_ind_host(col_indRaw, - numNonZeros); - typename values_type::HostMirror::const_type values_host(values_Raw, - numNonZeros); + const size_type row_mapRaw[] = {0, 3, 7, 11, 14, 18, 23, 28, 32, 36, 41, 46, 50, 53, 57, 61, 64}; + const ordinal_type col_indRaw[] = {0, 1, 4, 0, 1, 2, 5, 1, 2, 3, 6, 2, 3, 7, 0, 4, + 5, 8, 1, 4, 5, 6, 9, 2, 5, 6, 7, 10, 3, 6, 7, 11, + 4, 8, 9, 12, 5, 8, 9, 10, 13, 6, 9, 10, 11, 14, 7, 10, + 11, 15, 8, 12, 13, 9, 12, 13, 14, 10, 13, 14, 15, 11, 14, 15}; + const value_type values_Raw[] = {4, -1, -1, -1, 4, -1, -1, -1, 4, -1, -1, -1, 4, -1, -1, 4, + -1, -1, -1, -1, 4, -1, -1, -1, -1, 4, -1, -1, -1, -1, 4, -1, + -1, 4, -1, -1, -1, -1, 4, -1, -1, -1, -1, 4, -1, -1, -1, -1, + 4, -1, -1, 4, -1, -1, -1, 4, -1, -1, -1, 4, -1, -1, -1, 4}; + + typename row_map_type::HostMirror::const_type row_map_host(row_mapRaw, numRows + 1); + typename col_ind_type::HostMirror::const_type col_ind_host(col_indRaw, numNonZeros); + typename values_type::HostMirror::const_type values_host(values_Raw, numNonZeros); Kokkos::deep_copy(row_map, row_map_host); Kokkos::deep_copy(col_ind, col_ind_host); Kokkos::deep_copy(values, values_host); } - crs_matrix_type A = crs_matrix_type("A", numRows, numCols, numNonZeros, - values, row_map, col_ind); + crs_matrix_type A = crs_matrix_type("A", numRows, numCols, numNonZeros, values, row_map, col_ind); KokkosSparse::Experimental::MDF_handle handle(A); handle.set_verbosity(0); @@ -77,12 +68,10 @@ void run_test_mdf() { col_ind_type permutation = handle.get_permutation(); - bool success = true; - typename col_ind_type::HostMirror permutation_h = - Kokkos::create_mirror(permutation); + bool success = true; + typename col_ind_type::HostMirror permutation_h = Kokkos::create_mirror(permutation); Kokkos::deep_copy(permutation_h, permutation); - const ordinal_type permutation_ref[] = {0, 3, 12, 15, 1, 2, 4, 8, - 7, 11, 13, 14, 5, 6, 9, 10}; + const ordinal_type permutation_ref[] = {0, 3, 12, 15, 1, 2, 4, 8, 7, 11, 13, 14, 5, 6, 9, 10}; printf("MDF ordering: { "); for (ordinal_type idx = 0; idx < A.numRows(); ++idx) { printf("%d ", static_cast(permutation_h(idx))); @@ -91,8 +80,7 @@ void run_test_mdf() { } } printf("}\n"); - EXPECT_TRUE(success) - << "The permutation computed is different from the reference solution!"; + EXPECT_TRUE(success) << "The permutation computed is different from the reference solution!"; // Check the factors L and U handle.sort_factors(); @@ -110,33 +98,25 @@ void run_test_mdf() { auto values_U = Kokkos::create_mirror(U.values); Kokkos::deep_copy(values_U, U.values); - const size_type row_map_U_ref[17] = {0, 3, 6, 9, 12, 15, 17, 20, 22, - 25, 27, 30, 32, 35, 37, 39, 40}; - const ordinal_type entries_U_ref[40] = { - 0, 4, 6, 1, 5, 8, 2, 7, 10, 3, 9, 11, 4, 5, - 12, 5, 13, 6, 7, 12, 7, 14, 8, 9, 13, 9, 15, 10, - 11, 14, 11, 15, 12, 13, 14, 13, 15, 14, 15, 15}; - - const scalar_type val0 = static_cast(15. / 4.); - const scalar_type val1 = static_cast(val0 - 1 / val0); - const scalar_type val2 = static_cast(4 - 2 / val0); - const scalar_type val3 = - static_cast(4 - 1 / val0 - 1 / val1 - 1 / val2); - const scalar_type val4 = static_cast(4 - 2 / val1 - 2 / val3); - const scalar_type values_U_ref[40] = { - 4, -1, -1, 4, -1, -1, 4, -1, -1, 4, -1, -1, val0, -1, -1, - val1, -1, val0, -1, -1, val1, -1, val0, -1, -1, val1, -1, val0, -1, -1, - val1, -1, val2, -1, -1, val3, -1, val3, -1, val4}; + const size_type row_map_U_ref[17] = {0, 3, 6, 9, 12, 15, 17, 20, 22, 25, 27, 30, 32, 35, 37, 39, 40}; + const ordinal_type entries_U_ref[40] = {0, 4, 6, 1, 5, 8, 2, 7, 10, 3, 9, 11, 4, 5, 12, 5, 13, 6, 7, 12, + 7, 14, 8, 9, 13, 9, 15, 10, 11, 14, 11, 15, 12, 13, 14, 13, 15, 14, 15, 15}; + + const scalar_type val0 = static_cast(15. / 4.); + const scalar_type val1 = static_cast(val0 - 1 / val0); + const scalar_type val2 = static_cast(4 - 2 / val0); + const scalar_type val3 = static_cast(4 - 1 / val0 - 1 / val1 - 1 / val2); + const scalar_type val4 = static_cast(4 - 2 / val1 - 2 / val3); + const scalar_type values_U_ref[40] = {4, -1, -1, 4, -1, -1, 4, -1, -1, 4, -1, -1, val0, -1, + -1, val1, -1, val0, -1, -1, val1, -1, val0, -1, -1, val1, -1, val0, + -1, -1, val1, -1, val2, -1, -1, val3, -1, val3, -1, val4}; for (int idx = 0; idx < 17; ++idx) { - EXPECT_TRUE(row_map_U_ref[idx] == row_map_U(idx)) - << "rowmap_U(" << idx << ") is wrong!"; + EXPECT_TRUE(row_map_U_ref[idx] == row_map_U(idx)) << "rowmap_U(" << idx << ") is wrong!"; } for (int idx = 0; idx < 40; ++idx) { - EXPECT_TRUE(entries_U_ref[idx] == entries_U(idx)) - << "entries_U(" << idx << ") is wrong!"; - EXPECT_NEAR_KK(values_U_ref[idx], values_U(idx), - 10 * Kokkos::ArithTraits::eps(), + EXPECT_TRUE(entries_U_ref[idx] == entries_U(idx)) << "entries_U(" << idx << ") is wrong!"; + EXPECT_NEAR_KK(values_U_ref[idx], values_U(idx), 10 * Kokkos::ArithTraits::eps(), "An entry in U.values is wrong!"); } @@ -147,32 +127,23 @@ void run_test_mdf() { auto values_L = Kokkos::create_mirror(L.values); Kokkos::deep_copy(values_L, L.values); - const size_type row_map_L_ref[17] = {0, 1, 2, 3, 4, 6, 9, 11, 14, - 16, 19, 21, 24, 27, 31, 35, 40}; - const ordinal_type entries_L_ref[40] = { - 0, 1, 2, 3, 0, 4, 1, 4, 5, 0, 6, 2, 6, 7, - 1, 8, 3, 8, 9, 2, 10, 3, 10, 11, 4, 6, 12, 5, - 8, 12, 13, 7, 10, 12, 14, 9, 11, 13, 14, 15}; - const scalar_type values_L_ref[40] = { - 1, 1, 1, 1, -1 / four, 1, - -1 / four, -1 / val0, 1, -1 / four, 1, -1 / four, - -1 / val0, 1, -1 / four, 1, -1 / four, -1 / val0, - 1, -1 / four, 1, -1 / four, -1 / val0, 1, - -1 / val0, -1 / val0, 1, -1 / val1, -1 / val0, -1 / val2, - 1, -1 / val1, -1 / val0, -1 / val2, 1, -1 / val1, - -1 / val1, -1 / val3, -1 / val3, 1}; + const size_type row_map_L_ref[17] = {0, 1, 2, 3, 4, 6, 9, 11, 14, 16, 19, 21, 24, 27, 31, 35, 40}; + const ordinal_type entries_L_ref[40] = {0, 1, 2, 3, 0, 4, 1, 4, 5, 0, 6, 2, 6, 7, 1, 8, 3, 8, 9, 2, + 10, 3, 10, 11, 4, 6, 12, 5, 8, 12, 13, 7, 10, 12, 14, 9, 11, 13, 14, 15}; + const scalar_type values_L_ref[40] = { + 1, 1, 1, 1, -1 / four, 1, -1 / four, -1 / val0, 1, -1 / four, + 1, -1 / four, -1 / val0, 1, -1 / four, 1, -1 / four, -1 / val0, 1, -1 / four, + 1, -1 / four, -1 / val0, 1, -1 / val0, -1 / val0, 1, -1 / val1, -1 / val0, -1 / val2, + 1, -1 / val1, -1 / val0, -1 / val2, 1, -1 / val1, -1 / val1, -1 / val3, -1 / val3, 1}; for (int idx = 0; idx < 17; ++idx) { - EXPECT_TRUE(row_map_L_ref[idx] == row_map_L(idx)) - << "rowmap_L(" << idx << ")=" << row_map_L(idx) << " is wrong!"; + EXPECT_TRUE(row_map_L_ref[idx] == row_map_L(idx)) << "rowmap_L(" << idx << ")=" << row_map_L(idx) << " is wrong!"; } for (int idx = 0; idx < 40; ++idx) { EXPECT_TRUE(entries_L_ref[idx] == entries_L(idx)) - << "entries_L(" << idx << ")=" << entries_L(idx) - << " is wrong, entries_L_ref[" << idx << "]=" << entries_L_ref[idx] - << "!"; - EXPECT_NEAR_KK(values_L_ref[idx], values_L(idx), - 10 * Kokkos::ArithTraits::eps(), + << "entries_L(" << idx << ")=" << entries_L(idx) << " is wrong, entries_L_ref[" << idx + << "]=" << entries_L_ref[idx] << "!"; + EXPECT_NEAR_KK(values_L_ref[idx], values_L(idx), 10 * Kokkos::ArithTraits::eps(), "An entry in L.values is wrong!"); } } @@ -180,16 +151,14 @@ void run_test_mdf() { } // namespace Test -template +template void test_mdf() { Test::run_test_mdf(); } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - sparse##_##mdf##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_mdf(); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##mdf##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_mdf(); \ } #include diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_par_ilut.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_par_ilut.hpp index cda09d0639e3..7bb5b54864fd 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_par_ilut.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_par_ilut.hpp @@ -54,21 +54,18 @@ struct TolMeta { } // namespace ParIlut -template +template void run_test_par_ilut() { - using RowMapType = Kokkos::View; - using EntriesType = Kokkos::View; - using ValuesType = Kokkos::View; - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space>; + using RowMapType = Kokkos::View; + using EntriesType = Kokkos::View; + using ValuesType = Kokkos::View; + using KernelHandle = + KokkosKernels::Experimental::KokkosKernelsHandle; // Simple test fixture A - std::vector> A = {{1., 6., 4., 7.}, - {2., -5., 0., 8.}, - {0.5, -3., 6., 0.}, - {0.2, -0.5, -9., 0.}}; + std::vector> A = { + {1., 6., 4., 7.}, {2., -5., 0., 8.}, {0.5, -3., 6., 0.}, {0.2, -0.5, -9., 0.}}; // Allocate device CRS views for A RowMapType row_map("row_map", 0); @@ -105,8 +102,7 @@ void run_test_par_ilut() { EntriesType U_entries("U_entries", nnzU); ValuesType U_values("U_values", nnzU); - par_ilut_numeric(&kh, row_map, entries, values, L_row_map, L_entries, - L_values, U_row_map, U_entries, U_values); + par_ilut_numeric(&kh, row_map, entries, values, L_row_map, L_entries, L_values, U_row_map, U_entries, U_values); // Use this to check LU // std::vector > expected_LU = { @@ -162,13 +158,9 @@ void run_test_par_ilut() { // Use these fixtures to test full numeric std::vector> expected_L_candidates = { - {1., 0., 0., 0.}, - {2., 1., 0., 0.}, - {0.50, 0.35, 1., 0.}, - {0., 0., -1.32, 1.}}; + {1., 0., 0., 0.}, {2., 1., 0., 0.}, {0.50, 0.35, 1., 0.}, {0., 0., -1.32, 1.}}; - check_matrix("L numeric", L_row_map, L_entries, L_values, - expected_L_candidates); + check_matrix("L numeric", L_row_map, L_entries, L_values, expected_L_candidates); std::vector> expected_U_candidates = { {1., 6., 4., 7.}, @@ -177,26 +169,23 @@ void run_test_par_ilut() { {0., 0., 0., 0.} // [3] = 0 for full alg, -2.62 for post-threshold only }; - check_matrix("U numeric", U_row_map, U_entries, U_values, - expected_U_candidates); + check_matrix("U numeric", U_row_map, U_entries, U_values, expected_U_candidates); kh.destroy_par_ilut_handle(); } -template +template void run_test_par_ilut_precond() { // Test using par_ilut as a preconditioner // Does (LU)^inv Ax = (LU)^inv b converge faster than solving Ax=b? - using exe_space = typename device::execution_space; - using mem_space = typename device::memory_space; - using RowMapType = Kokkos::View; - using EntriesType = Kokkos::View; - using ValuesType = Kokkos::View; - using sp_matrix_type = - KokkosSparse::CrsMatrix; - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, exe_space, mem_space, mem_space>; + using exe_space = typename device::execution_space; + using mem_space = typename device::memory_space; + using RowMapType = Kokkos::View; + using EntriesType = Kokkos::View; + using ValuesType = Kokkos::View; + using sp_matrix_type = KokkosSparse::CrsMatrix; + using KernelHandle = + KokkosKernels::Experimental::KokkosKernelsHandle; using float_t = typename Kokkos::ArithTraits::mag_type; // Create a diagonally dominant sparse matrix to test: @@ -211,9 +200,8 @@ void run_test_par_ilut_precond() { constexpr bool verbose = false; size_type nnz = 10 * numRows; - auto A = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< - sp_matrix_type>(numRows, numCols, nnz, 0, lno_t(0.01 * numRows), - diagDominance); + auto A = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( + numRows, numCols, nnz, 0, lno_t(0.01 * numRows), diagDominance); KokkosSparse::sort_crs_matrix(A); @@ -222,8 +210,7 @@ void run_test_par_ilut_precond() { kh.create_gmres_handle(m, tol); auto gmres_handle = kh.get_gmres_handle(); gmres_handle->set_verbose(verbose); - using GMRESHandle = - typename std::remove_reference::type; + using GMRESHandle = typename std::remove_reference::type; using ViewVectorType = typename GMRESHandle::nnz_value_view_t; kh.create_par_ilut_handle(); @@ -250,14 +237,11 @@ void run_test_par_ilut_precond() { EntriesType U_entries("U_entries", nnzU); ValuesType U_values("U_values", nnzU); - par_ilut_numeric(&kh, row_map, entries, values, L_row_map, L_entries, - L_values, U_row_map, U_entries, U_values); + par_ilut_numeric(&kh, row_map, entries, values, L_row_map, L_entries, L_values, U_row_map, U_entries, U_values); // Create CRSs - sp_matrix_type L("L", numRows, numCols, L_values.extent(0), L_values, - L_row_map, L_entries), - U("U", numRows, numCols, U_values.extent(0), U_values, U_row_map, - U_entries); + sp_matrix_type L("L", numRows, numCols, L_values.extent(0), L_values, L_row_map, L_entries), + U("U", numRows, numCols, U_values.extent(0), U_values, U_row_map, U_entries); // Set initial vectors: ViewVectorType X("X", n); // Solution and initial guess @@ -293,8 +277,7 @@ void run_test_par_ilut_precond() { gmres_handle->set_verbose(verbose); // Make precond - KokkosSparse::Experimental::LUPrec myPrec(L, - U); + KokkosSparse::Experimental::LUPrec myPrec(L, U); // reset X for next gmres call Kokkos::deep_copy(X, 0.0); @@ -316,15 +299,14 @@ void run_test_par_ilut_precond() { } } -template +template void run_test_par_ilut_zerorow_A() { - using RowMapType = Kokkos::View; - using EntriesType = Kokkos::View; - using ValuesType = Kokkos::View; - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space>; + using RowMapType = Kokkos::View; + using EntriesType = Kokkos::View; + using ValuesType = Kokkos::View; + using KernelHandle = + KokkosKernels::Experimental::KokkosKernelsHandle; const size_type nrows = 0; @@ -363,8 +345,7 @@ void run_test_par_ilut_zerorow_A() { EntriesType U_entries("U_entries", nnzU); ValuesType U_values("U_values", nnzU); - par_ilut_numeric(&kh, row_map, entries, values, L_row_map, L_entries, - L_values, U_row_map, U_entries, U_values); + par_ilut_numeric(&kh, row_map, entries, values, L_row_map, L_entries, L_values, U_row_map, U_entries, U_values); const auto itrs = par_ilut_handle->get_num_iters(); const auto end_rel_res = par_ilut_handle->get_end_rel_res(); @@ -377,38 +358,30 @@ void run_test_par_ilut_zerorow_A() { } // namespace Test -template +template void test_par_ilut() { Test::run_test_par_ilut(); } -template +template void test_par_ilut_precond() { Test::run_test_par_ilut_precond(); } -template +template void test_par_ilut_zerorow_A() { Test::run_test_par_ilut_zerorow_A(); } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - sparse##_##par_ilut##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_par_ilut(); \ - } \ - TEST_F( \ - TestCategory, \ - sparse##_##par_ilut_zerorow_A##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_par_ilut_zerorow_A(); \ - } \ - TEST_F( \ - TestCategory, \ - sparse##_##par_ilut_precond##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_par_ilut_precond(); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##par_ilut##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_par_ilut(); \ + } \ + TEST_F(TestCategory, sparse##_##par_ilut_zerorow_A##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_par_ilut_zerorow_A(); \ + } \ + TEST_F(TestCategory, sparse##_##par_ilut_precond##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_par_ilut_precond(); \ } #define NO_TEST_COMPLEX diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_removeCrsMatrixZeros.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_removeCrsMatrixZeros.hpp index 52a9a1874b70..8df02a1d4d84 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_removeCrsMatrixZeros.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_removeCrsMatrixZeros.hpp @@ -31,16 +31,13 @@ namespace TestRemoveCrsMatrixZeros { // Simple, sequential implementation of zero-removal to compare against template Matrix removeMatrixZerosReference(const Matrix& A) { - using Offset = typename Matrix::non_const_size_type; - using Ordinal = typename Matrix::ordinal_type; - using Scalar = typename Matrix::value_type; - using KAT = Kokkos::ArithTraits; - auto rowmapHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); - auto entriesHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); - auto valuesHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.values); + using Offset = typename Matrix::non_const_size_type; + using Ordinal = typename Matrix::ordinal_type; + using Scalar = typename Matrix::value_type; + using KAT = Kokkos::ArithTraits; + auto rowmapHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); + auto entriesHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); + auto valuesHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.values); // First, create the filtered rowmap (the CrsMatrix constructor taking host // pointers does expect rowmap to be in Ordinal) Ordinal filteredNNZ = 0; @@ -63,16 +60,13 @@ Matrix removeMatrixZerosReference(const Matrix& A) { } } // Copy all the views back to device and construct matrix - return Matrix("A filtered", A.numRows(), A.numCols(), filteredNNZ, - filteredValues.data(), filteredRowmap.data(), + return Matrix("A filtered", A.numRows(), A.numCols(), filteredNNZ, filteredValues.data(), filteredRowmap.data(), filteredEntries.data()); } template -Matrix loadMatrixFromVectors(int numRows, int numCols, - const std::vector& rowmapRawInt, - const std::vector& entriesRawInt, - const std::vector& valuesRawDouble) { +Matrix loadMatrixFromVectors(int numRows, int numCols, const std::vector& rowmapRawInt, + const std::vector& entriesRawInt, const std::vector& valuesRawDouble) { using Offset = typename Matrix::non_const_size_type; using Ordinal = typename Matrix::ordinal_type; using Scalar = typename Matrix::value_type; @@ -85,8 +79,7 @@ Matrix loadMatrixFromVectors(int numRows, int numCols, for (auto val : entriesRawInt) entriesRaw.push_back(val); for (auto val : valuesRawDouble) valuesRaw.push_back(Scalar(val)); Offset nnz = rowmapRaw.size() ? rowmapRaw[numRows] : 0; - return Matrix("A", numRows, numCols, nnz, valuesRaw.data(), rowmapRaw.data(), - entriesRaw.data()); + return Matrix("A", numRows, numCols, nnz, valuesRaw.data(), rowmapRaw.data(), entriesRaw.data()); } template @@ -99,22 +92,20 @@ void getTestInput(int test, Matrix& A, Matrix& Afiltered_ref) { std::vector rowmap = {0, 0, 0, 0, 0}; std::vector entries; std::vector values; - A = loadMatrixFromVectors(4, 4, rowmap, entries, values); - Afiltered_ref = - loadMatrixFromVectors(4, 4, rowmap, entries, values); + A = loadMatrixFromVectors(4, 4, rowmap, entries, values); + Afiltered_ref = loadMatrixFromVectors(4, 4, rowmap, entries, values); break; } case 1: { // Some empty rows, and some zero values - std::vector rowmap = {0, 0, 3, 3, 5}; - std::vector entries = {0, 1, 3, 1, 2}; - std::vector values = {1, 3, 0, 0, 2}; - A = loadMatrixFromVectors(4, 4, rowmap, entries, values); + std::vector rowmap = {0, 0, 3, 3, 5}; + std::vector entries = {0, 1, 3, 1, 2}; + std::vector values = {1, 3, 0, 0, 2}; + A = loadMatrixFromVectors(4, 4, rowmap, entries, values); std::vector rowmapFilt = {0, 0, 2, 2, 3}; std::vector entriesFilt = {0, 1, 2}; std::vector valuesFilt = {1, 3, 2}; - Afiltered_ref = loadMatrixFromVectors(4, 4, rowmapFilt, - entriesFilt, valuesFilt); + Afiltered_ref = loadMatrixFromVectors(4, 4, rowmapFilt, entriesFilt, valuesFilt); break; } case 2: { @@ -131,67 +122,62 @@ void getTestInput(int test, Matrix& A, Matrix& Afiltered_ref) { std::vector rowmap = {0}; std::vector entries; std::vector values; - A = loadMatrixFromVectors(0, 0, rowmap, entries, values); + A = loadMatrixFromVectors(0, 0, rowmap, entries, values); Afiltered_ref = A; break; } case 4: { // A row of all zeros that will be filtered - std::vector rowmap = {0, 3, 6}; - std::vector entries = {0, 1, 2, 3, 4, 5}; - std::vector values = {0, 0, 0, 1, 1, 1}; - A = loadMatrixFromVectors(2, 6, rowmap, entries, values); + std::vector rowmap = {0, 3, 6}; + std::vector entries = {0, 1, 2, 3, 4, 5}; + std::vector values = {0, 0, 0, 1, 1, 1}; + A = loadMatrixFromVectors(2, 6, rowmap, entries, values); std::vector rowmapFilt = {0, 0, 3}; std::vector entriesFilt = {3, 4, 5}; std::vector valuesFilt = {1, 1, 1}; - Afiltered_ref = loadMatrixFromVectors(2, 6, rowmapFilt, - entriesFilt, valuesFilt); + Afiltered_ref = loadMatrixFromVectors(2, 6, rowmapFilt, entriesFilt, valuesFilt); break; } case 5: { // One zero in each row that will be filtered - std::vector rowmap = {0, 2, 4, 7}; - std::vector entries = {0, 1, 1, 2, 0, 1, 2}; - std::vector values = {0, 1, 1, 0, 0, 3, -3}; - A = loadMatrixFromVectors(3, 3, rowmap, entries, values); + std::vector rowmap = {0, 2, 4, 7}; + std::vector entries = {0, 1, 1, 2, 0, 1, 2}; + std::vector values = {0, 1, 1, 0, 0, 3, -3}; + A = loadMatrixFromVectors(3, 3, rowmap, entries, values); std::vector rowmapFilt = {0, 1, 2, 4}; std::vector entriesFilt = {1, 1, 1, 2}; std::vector valuesFilt = {1, 1, 3, -3}; - Afiltered_ref = loadMatrixFromVectors(3, 3, rowmapFilt, - entriesFilt, valuesFilt); + Afiltered_ref = loadMatrixFromVectors(3, 3, rowmapFilt, entriesFilt, valuesFilt); break; } case 6: { // First and last rows empty - std::vector rowmap = {0, 0, 2, 2}; - std::vector entries = {0, 1}; - std::vector values = {0, 3.14}; - A = loadMatrixFromVectors(3, 2, rowmap, entries, values); + std::vector rowmap = {0, 0, 2, 2}; + std::vector entries = {0, 1}; + std::vector values = {0, 3.14}; + A = loadMatrixFromVectors(3, 2, rowmap, entries, values); std::vector rowmapFilt = {0, 0, 1, 1}; std::vector entriesFilt = {1}; std::vector valuesFilt = {3.14}; - Afiltered_ref = loadMatrixFromVectors(3, 2, rowmapFilt, - entriesFilt, valuesFilt); + Afiltered_ref = loadMatrixFromVectors(3, 2, rowmapFilt, entriesFilt, valuesFilt); break; } case 7: { // First and last rows nonempty, but will be empty after filtering - std::vector rowmap = {0, 2, 4, 6}; - std::vector entries = {0, 1, 1, 2, 0, 3}; - std::vector values = {0, 0, 1, -1, 0, 0}; - A = loadMatrixFromVectors(3, 4, rowmap, entries, values); + std::vector rowmap = {0, 2, 4, 6}; + std::vector entries = {0, 1, 1, 2, 0, 3}; + std::vector values = {0, 0, 1, -1, 0, 0}; + A = loadMatrixFromVectors(3, 4, rowmap, entries, values); std::vector rowmapFilt = {0, 0, 2, 2}; std::vector entriesFilt = {1, 2}; std::vector valuesFilt = {1, -1}; - Afiltered_ref = loadMatrixFromVectors(3, 4, rowmapFilt, - entriesFilt, valuesFilt); + Afiltered_ref = loadMatrixFromVectors(3, 4, rowmapFilt, entriesFilt, valuesFilt); break; } case 8: { // Large, random matrix with 30% of values converted to zero - Offset nnz = 40 * 10000; - A = KokkosSparse::Impl::kk_generate_sparse_matrix(10000, 10000, - nnz, 10, 5000); + Offset nnz = 40 * 10000; + A = KokkosSparse::Impl::kk_generate_sparse_matrix(10000, 10000, nnz, 10, 5000); auto valuesHost = Kokkos::create_mirror_view(A.values); Kokkos::deep_copy(valuesHost, A.values); for (Offset i = 0; i < A.nnz(); i++) { @@ -204,9 +190,8 @@ void getTestInput(int test, Matrix& A, Matrix& Afiltered_ref) { } case 9: { // Large, sparser random matrix with 99% of values converted to zero - Offset nnz = 10 * 40000; - A = KokkosSparse::Impl::kk_generate_sparse_matrix(40000, 40000, - nnz, 10, 10000); + Offset nnz = 10 * 40000; + A = KokkosSparse::Impl::kk_generate_sparse_matrix(40000, 40000, nnz, 10, 10000); auto valuesHost = Kokkos::create_mirror_view(A.values); Kokkos::deep_copy(valuesHost, A.values); for (Offset i = 0; i < A.nnz(); i++) { @@ -222,12 +207,9 @@ void getTestInput(int test, Matrix& A, Matrix& Afiltered_ref) { // If we have a hardcoded reference, check that the reference impl is correct // on this case if (haveHardcodedReference) { - Matrix Afiltered_refimpl = removeMatrixZerosReference(A); - bool referenceImplMatchesHardcoded = - Test::is_same_matrix(Afiltered_ref, - Afiltered_refimpl); - ASSERT_TRUE(referenceImplMatchesHardcoded) - << "Test case " << test << ": reference impl gave wrong answer!"; + Matrix Afiltered_refimpl = removeMatrixZerosReference(A); + bool referenceImplMatchesHardcoded = Test::is_same_matrix(Afiltered_ref, Afiltered_refimpl); + ASSERT_TRUE(referenceImplMatchesHardcoded) << "Test case " << test << ": reference impl gave wrong answer!"; } } @@ -235,21 +217,16 @@ void getTestInput(int test, Matrix& A, Matrix& Afiltered_ref) { void testRemoveCrsMatrixZeros(int testCase) { using namespace TestRemoveCrsMatrixZeros; - using Matrix = KokkosSparse::CrsMatrix; + using Matrix = KokkosSparse::CrsMatrix; Matrix A, Afiltered_ref; getTestInput(testCase, A, Afiltered_ref); Matrix Afiltered_actual = KokkosSparse::removeCrsMatrixZeros(A); - bool matches = - Test::is_same_matrix(Afiltered_actual, Afiltered_ref); - EXPECT_TRUE(matches) - << "Test case " << testCase - << ": matrix with zeros filtered out does not match reference."; + bool matches = Test::is_same_matrix(Afiltered_actual, Afiltered_ref); + EXPECT_TRUE(matches) << "Test case " << testCase << ": matrix with zeros filtered out does not match reference."; } TEST_F(TestCategory, sparse_remove_crs_zeros) { - for (int testCase = 0; testCase < 10; testCase++) - testRemoveCrsMatrixZeros(testCase); + for (int testCase = 0; testCase < 10; testCase++) testRemoveCrsMatrixZeros(testCase); } #endif // KOKKOSSPARSE_REMOVECRSZEROS_HPP diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_replaceSumInto.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_replaceSumInto.hpp index f8427dc9252c..05b887dc9f4c 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_replaceSumInto.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_replaceSumInto.hpp @@ -14,7 +14,7 @@ // //@HEADER -//#include "Teuchos_UnitTestHarness.hpp" +// #include "Teuchos_UnitTestHarness.hpp" #include "Kokkos_ArithTraits.hpp" #include #include "KokkosSparse_CrsMatrix.hpp" @@ -40,8 +40,7 @@ class ModifyEvenNumberedRows { typedef typename CrsMatrixType::ordinal_type ordinal_type; typedef typename CrsMatrixType::value_type value_type; - ModifyEvenNumberedRows(const CrsMatrixType& A, const bool replace, - const bool sorted, const bool /*atomic*/) + ModifyEvenNumberedRows(const CrsMatrixType& A, const bool replace, const bool sorted, const bool /*atomic*/) : A_(A), replace_(replace), sorted_(sorted) {} KOKKOS_FUNCTION void operator()(const ordinal_type& lclRow) const { @@ -76,23 +75,16 @@ namespace { // (anonymous) using std::endl; template -void modifyEvenNumberedRows(const CrsMatrixType& A, const bool replace, - const bool sorted, const bool atomic) { +void modifyEvenNumberedRows(const CrsMatrixType& A, const bool replace, const bool sorted, const bool atomic) { typedef typename CrsMatrixType::device_type::execution_space execution_space; - typedef Kokkos::RangePolicy - policy_type; - - ::Test::ModifyEvenNumberedRows functor(A, replace, sorted, - atomic); - Kokkos::parallel_for("KokkosSparse::Test::ReplaceSumInto", - policy_type(0, A.numRows()), functor); + typedef Kokkos::RangePolicy policy_type; + + ::Test::ModifyEvenNumberedRows functor(A, replace, sorted, atomic); + Kokkos::parallel_for("KokkosSparse::Test::ReplaceSumInto", policy_type(0, A.numRows()), functor); } template -bool checkWhetherEvenNumberedRowsWereModified(const CrsMatrixType& A, - const bool replace, - const bool /* sorted */, +bool checkWhetherEvenNumberedRowsWereModified(const CrsMatrixType& A, const bool replace, const bool /* sorted */, const bool /* atomic */) { typedef typename CrsMatrixType::value_type SC; typedef typename CrsMatrixType::ordinal_type LO; @@ -101,9 +93,8 @@ bool checkWhetherEvenNumberedRowsWereModified(const CrsMatrixType& A, const SC TWO = ONE + ONE; const SC THREE = ONE + ONE + ONE; - typename CrsMatrixType::values_type val = A.values; - typename CrsMatrixType::values_type::HostMirror val_h = - Kokkos::create_mirror_view(val); + typename CrsMatrixType::values_type val = A.values; + typename CrsMatrixType::values_type::HostMirror val_h = Kokkos::create_mirror_view(val); Kokkos::deep_copy(val_h, val); Kokkos::fence(); const LO numRows = A.numRows(); @@ -133,19 +124,16 @@ bool checkWhetherEvenNumberedRowsWereModified(const CrsMatrixType& A, template void testOneCase(bool& /*success*/, // Teuchos::FancyOStream& out, - std::ostream& out, const CrsMatrixType& A, const bool replace, - const bool sorted, const bool atomic) { + std::ostream& out, const CrsMatrixType& A, const bool replace, const bool sorted, const bool atomic) { using Kokkos::ArithTraits; typedef typename CrsMatrixType::value_type value_type; // Teuchos::OSTab tab0 (out); - out << "replace: " << (replace ? "true" : "false") - << ", sorted: " << (sorted ? "true" : "false") + out << "replace: " << (replace ? "true" : "false") << ", sorted: " << (sorted ? "true" : "false") << ", atomic: " << (atomic ? "true" : "false") << endl; modifyEvenNumberedRows(A, replace, sorted, atomic); - const bool lclSuccess = - checkWhetherEvenNumberedRowsWereModified(A, replace, sorted, atomic); + const bool lclSuccess = checkWhetherEvenNumberedRowsWereModified(A, replace, sorted, atomic); EXPECT_TRUE(lclSuccess); // this modifies 'success' and prints to 'out' // Restore original values. Kokkos::deep_copy(A.values, ArithTraits::one()); @@ -155,13 +143,11 @@ void testOneCase(bool& /*success*/, // // This takes the same arguments as if it were declared via the // TEUCHOS_UNIT_TEST macro. -template +template void generalTest(bool& success, std::ostream& out) // Teuchos::FancyOStream& out) { - typedef KokkosSparse::CrsMatrix - matrix_type; + typedef KokkosSparse::CrsMatrix matrix_type; // Teuchos::OSTab tab0 (out); out << "Test KokkosSparse::CrsMatrix::{replace,sumInto}Values*" << endl; @@ -173,9 +159,8 @@ void generalTest(bool& success, std::ostream& out) typename matrix_type::size_type numEnt = 0; // to be updated below typename matrix_type::row_map_type::non_const_type ptr("ptr", numRows + 1); { - typename matrix_type::row_map_type::HostMirror ptr_h = - Kokkos::create_mirror_view(ptr); - ptr_h[0] = 0; + typename matrix_type::row_map_type::HostMirror ptr_h = Kokkos::create_mirror_view(ptr); + ptr_h[0] = 0; for (lno_t lclRow = 0; lclRow < numRows; ++lclRow) { ptr_h[lclRow + 1] = ptr_h[lclRow] + 1; // 1 entry in each row } @@ -185,8 +170,7 @@ void generalTest(bool& success, std::ostream& out) typename matrix_type::index_type::non_const_type ind("ind", numEnt); { - typename matrix_type::index_type::HostMirror ind_h = - Kokkos::create_mirror_view(ind); + typename matrix_type::index_type::HostMirror ind_h = Kokkos::create_mirror_view(ind); for (lno_t lclRow = 0; lclRow < numRows; ++lclRow) { ind_h[lclRow] = lclRow; // diagonal matrix } @@ -195,8 +179,7 @@ void generalTest(bool& success, std::ostream& out) typename matrix_type::values_type val("val", numEnt); { - typename matrix_type::values_type::HostMirror val_h = - Kokkos::create_mirror_view(val); + typename matrix_type::values_type::HostMirror val_h = Kokkos::create_mirror_view(val); for (lno_t lclRow = 0; lclRow < numRows; ++lclRow) { val_h[lclRow] = 1.0; // diagonal matrix } @@ -220,8 +203,7 @@ void generalTest(bool& success, std::ostream& out) } // namespace -template +template void test_replaceSumInto() { using std::endl; class NullBuffer : public std::streambuf { @@ -238,11 +220,9 @@ void test_replaceSumInto() { EXPECT_TRUE(success); } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##replaceSumInto##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_replaceSumInto(); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##replaceSumInto##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_replaceSumInto(); \ } #include diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_replaceSumIntoLonger.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_replaceSumIntoLonger.hpp index 224b72e2b7da..48776ba496cf 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_replaceSumIntoLonger.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_replaceSumIntoLonger.hpp @@ -14,7 +14,7 @@ // //@HEADER -//#include "Teuchos_UnitTestHarness.hpp" +// #include "Teuchos_UnitTestHarness.hpp" #include "Kokkos_ArithTraits.hpp" #include @@ -41,20 +41,17 @@ class ModifyEntries { // The type of the reduction result. typedef ordinal_type value_type; - ModifyEntries(const CrsMatrixType& A, const bool replace, const bool sorted, - const bool /*atomic*/) + ModifyEntries(const CrsMatrixType& A, const bool replace, const bool sorted, const bool /*atomic*/) : A_(A), replace_(replace), sorted_(sorted) {} - KOKKOS_FUNCTION void operator()(const ordinal_type& lclRow, - ordinal_type& numModified) const { + KOKKOS_FUNCTION void operator()(const ordinal_type& lclRow, ordinal_type& numModified) const { typedef Kokkos::ArithTraits KAT; typedef typename KAT::mag_type mag_type; const scalar_type ONE = KAT::one(); - const ordinal_type ncol = - A_.numCols() < static_cast(numEntToModify) - ? A_.numCols() - : static_cast(numEntToModify); + const ordinal_type ncol = A_.numCols() < static_cast(numEntToModify) + ? A_.numCols() + : static_cast(numEntToModify); ordinal_type cols[numEntToModify]; scalar_type vals[numEntToModify]; @@ -71,8 +68,7 @@ class ModifyEntries { // the input (not the matrix's row) is not sorted. for (ordinal_type k = 0; k < ncol; ++k) { - const ordinal_type colToChange = - A_.numCols() - k - static_cast(1); + const ordinal_type colToChange = A_.numCols() - k - static_cast(1); // Cast integers to mag_type first, since direct cast from // (e.g.,) int to Kokkos::complex (or std::complex) doesn't // work. @@ -95,11 +91,9 @@ class ModifyEntries { ordinal_type lclNumModified = 0; if (replace_) { - lclNumModified = - A_.replaceValues(lclRow, cols, ncol, vals, sorted_, atomic_); + lclNumModified = A_.replaceValues(lclRow, cols, ncol, vals, sorted_, atomic_); } else { // sumInto - lclNumModified = - A_.sumIntoValues(lclRow, cols, ncol, vals, sorted_, atomic_); + lclNumModified = A_.sumIntoValues(lclRow, cols, ncol, vals, sorted_, atomic_); } numModified += lclNumModified; } @@ -118,8 +112,7 @@ using std::endl; template void modifyEntries(bool& success, std::ostream& outRef, // Teuchos::FancyOStream& outRef, // see notes - const CrsMatrixType& A, const bool replace, - const bool sorted, const bool atomic, + const CrsMatrixType& A, const bool replace, const bool sorted, const bool atomic, const bool debug = false) { // using Teuchos::RCP; typedef typename CrsMatrixType::device_type::execution_space execution_space; @@ -144,13 +137,11 @@ void modifyEntries(bool& success, std::ostream& outRef, functor_type functor(A, replace, sorted, atomic); ordinal_type numModified = 0; policy_type range(0, A.numRows()); - Kokkos::parallel_reduce("KokkosSparse::Test::ModifyEntries", range, functor, - numModified); + Kokkos::parallel_reduce("KokkosSparse::Test::ModifyEntries", range, functor, numModified); - const ordinal_type numEntShouldModify = - static_cast(numEntToModify) <= A.numCols() - ? static_cast(numEntToModify) - : A.numCols(); + const ordinal_type numEntShouldModify = static_cast(numEntToModify) <= A.numCols() + ? static_cast(numEntToModify) + : A.numCols(); // TEST_EQUALITY( numModified, numEntShouldModify ); EXPECT_TRUE((numModified == numEntShouldModify)); @@ -163,11 +154,10 @@ void modifyEntries(bool& success, std::ostream& outRef, } template -void checkWhetherEntriesWereModified( - bool& success, std::ostream& outRef, - // Teuchos::FancyOStream& outRef, // see notes - const CrsMatrixType& A, const bool /*replace*/, const bool /* sorted */, - const bool /* atomic */, const bool debug = false) { +void checkWhetherEntriesWereModified(bool& success, std::ostream& outRef, + // Teuchos::FancyOStream& outRef, // see notes + const CrsMatrixType& A, const bool /*replace*/, const bool /* sorted */, + const bool /* atomic */, const bool debug = false) { // using Teuchos::RCP; typedef typename CrsMatrixType::value_type value_type; typedef typename CrsMatrixType::ordinal_type ordinal_type; @@ -190,9 +180,7 @@ void checkWhetherEntriesWereModified( std::ostream& out = *outPtr; const value_type ONE = KAT::one(); const ordinal_type ncol = - A.numCols() < static_cast(numEntToModify) - ? A.numCols() - : static_cast(numEntToModify); + A.numCols() < static_cast(numEntToModify) ? A.numCols() : static_cast(numEntToModify); // modifyEntries changes entries with column indices N-1, N-2, // ..., max(N - numEntToModify, 0), where N = A.numCols(). Make // sure that the "lower bound" works for signed or unsigned @@ -201,8 +189,8 @@ void checkWhetherEntriesWereModified( // Teuchos::OSTab tab0 (out); out << "check: " - << "{numCols: " << A.numCols() << ", numEntToModify: " << numEntToModify - << ", ncol: " << ncol << ", lowerBound: " << lowerBound << "}" << endl; + << "{numCols: " << A.numCols() << ", numEntToModify: " << numEntToModify << ", ncol: " << ncol + << ", lowerBound: " << lowerBound << "}" << endl; // Teuchos::OSTab tab1 (out); auto val_h = Kokkos::create_mirror_view(A.values); @@ -227,12 +215,10 @@ void checkWhetherEntriesWereModified( // Kokkos::complex (or std::complex) doesn't work. if (ind_h(k) < lowerBound) { // entry should not have been modified - out << "ind_h(" << k << ") = " << ind_h(k) - << "; entry should not have been modified" << endl; + out << "ind_h(" << k << ") = " << ind_h(k) << "; entry should not have been modified" << endl; expectedVal = curVal; } else { - out << "ind_h(" << k << ") = " << ind_h(k) - << "; entry should have been modified" << endl; + out << "ind_h(" << k << ") = " << ind_h(k) << "; entry should have been modified" << endl; // The expected result for modified entries will always be // -curVal, whether we're doing replace or sumInto. This lets // us make sure that we changed the right value. @@ -244,8 +230,8 @@ void checkWhetherEntriesWereModified( } else { success = false; out << "ERROR: k: " << k << ", ind_h(k): " << ind_h(k) << ", " - << "val_h(" << k << ") = " << val_h(k) << " != " << expectedVal - << " (lowerBound = " << lowerBound << ")" << endl; + << "val_h(" << k << ") = " << val_h(k) << " != " << expectedVal << " (lowerBound = " << lowerBound << ")" + << endl; } } @@ -260,8 +246,7 @@ void checkWhetherEntriesWereModified( template void testOneCaseImpl(bool& /*success*/, std::ostream& out, // Teuchos::FancyOStream& out, - const CrsMatrixType& A, const bool replace, - const bool sorted, const bool atomic, + const CrsMatrixType& A, const bool replace, const bool sorted, const bool atomic, const bool debug = false) { typedef typename CrsMatrixType::value_type scalar_type; typedef typename CrsMatrixType::ordinal_type ordinal_type; @@ -270,12 +255,11 @@ void testOneCaseImpl(bool& /*success*/, std::ostream& out, // Teuchos::OSTab tab0 (out); out << "numEntriesToModify: " << numEntriesToModify << endl; bool lclSuccess = true; - modifyEntries( - lclSuccess, out, A, replace, sorted, atomic, debug); + modifyEntries(lclSuccess, out, A, replace, sorted, atomic, debug); // If modifyEntries didn't work, no need to test further. if (lclSuccess) { - checkWhetherEntriesWereModified( - lclSuccess, out, A, replace, sorted, atomic, debug); + checkWhetherEntriesWereModified(lclSuccess, out, A, replace, sorted, atomic, + debug); EXPECT_TRUE(lclSuccess); // this modifies 'success' and prints to 'out' } @@ -294,17 +278,14 @@ template struct TestOneCase { static void test(bool& success, std::ostream& out, // Teuchos::FancyOStream& out, - const CrsMatrixType& A, const bool replace, - const bool sorted, const bool atomic, + const CrsMatrixType& A, const bool replace, const bool sorted, const bool atomic, const bool debug = false) { - testOneCaseImpl(success, out, A, replace, - sorted, atomic, debug); + testOneCaseImpl(success, out, A, replace, sorted, atomic, debug); if (!success) { return; // Don't bother continuing } // Yay template recursion! - TestOneCase::test( - success, out, A, replace, sorted, atomic, debug); + TestOneCase::test(success, out, A, replace, sorted, atomic, debug); } }; @@ -313,12 +294,10 @@ template struct TestOneCase { static void test(bool& success, std::ostream& out, // Teuchos::FancyOStream& out, - const CrsMatrixType& A, const bool replace, - const bool sorted, const bool atomic, + const CrsMatrixType& A, const bool replace, const bool sorted, const bool atomic, const bool debug = false) { constexpr int numEntriesToModify = 1; - testOneCaseImpl(success, out, A, replace, - sorted, atomic, debug); + testOneCaseImpl(success, out, A, replace, sorted, atomic, debug); // This is the base case, so don't recurse on numEntriesToModify. } }; @@ -330,26 +309,23 @@ template struct TestOneCase { static void test(bool& /* success */, std::ostream& /*out*/, // Teuchos::FancyOStream& /* out */, - const CrsMatrixType& /* A */, const bool /* replace */, - const bool /* sorted */, const bool /* atomic */, - const bool /* debug */) {} + const CrsMatrixType& /* A */, const bool /* replace */, const bool /* sorted */, + const bool /* atomic */, const bool /* debug */) {} }; template void testOneCase(bool& success, std::ostream& out, // Teuchos::FancyOStream& out, - const CrsMatrixType& A, const bool replace, const bool sorted, - const bool atomic, const bool debug = false) { + const CrsMatrixType& A, const bool replace, const bool sorted, const bool atomic, + const bool debug = false) { // Teuchos::OSTab tab0 (out); - out << "replace: " << (replace ? "true" : "false") - << ", sorted: " << (sorted ? "true" : "false") + out << "replace: " << (replace ? "true" : "false") << ", sorted: " << (sorted ? "true" : "false") << ", atomic: " << (atomic ? "true" : "false") << endl; // Teuchos::OSTab tab1 (out); constexpr int maxNumEntriesToModify = 128; // Invoke template recursion. - TestOneCase::test( - success, out, A, replace, sorted, atomic, debug); + TestOneCase::test(success, out, A, replace, sorted, atomic, debug); } template @@ -357,8 +333,7 @@ void testOneSize(bool& success, std::ostream& out, // Teuchos::FancyOStream& out, const CrsMatrixType& A, const bool debug = false) { // Teuchos::OSTab tab0 (out); - out << "testOneSize: {numRows: " << A.numRows() - << ", numCols: " << A.numCols() << "}" << endl; + out << "testOneSize: {numRows: " << A.numRows() << ", numCols: " << A.numCols() << "}" << endl; for (int replaceInt = 0; replaceInt < 2; ++replaceInt) { const bool replace = replaceInt != 0; @@ -382,8 +357,7 @@ void testOneSize(bool& success, template void testAllSizes(bool& success, std::ostream& out, // Teuchos::FancyOStream& out, - const typename CrsMatrixType::size_type maxNumEnt, - const bool debug = false) { + const typename CrsMatrixType::size_type maxNumEnt, const bool debug = false) { typedef CrsMatrixType matrix_type; typedef typename matrix_type::value_type value_type; typedef typename matrix_type::ordinal_type ordinal_type; @@ -408,13 +382,11 @@ void testAllSizes(bool& success, typename matrix_type::values_type::non_const_type val_whole("val", maxNumEnt); auto val_whole_h = Kokkos::create_mirror_view(val_whole); - for (size_type numEnt = 1; numEnt <= maxNumEnt; - numEnt *= static_cast(2)) { + for (size_type numEnt = 1; numEnt <= maxNumEnt; numEnt *= static_cast(2)) { const ordinal_type numCols = numEnt; - out << "Test " << numRows << " x " << numCols << " matrix with " << numEnt - << " entr" << (numEnt != static_cast(1) ? "ies" : "y") - << endl; + out << "Test " << numRows << " x " << numCols << " matrix with " << numEnt << " entr" + << (numEnt != static_cast(1) ? "ies" : "y") << endl; ptr_h[0] = 0; ptr_h[1] = numEnt; @@ -445,13 +417,11 @@ void testAllSizes(bool& success, // The first two arguments let us call Teuchos unit test macros // inside. Those macros expect 'success' and 'out' to have exactly // those names. -template +template void generalTest(bool& success, std::ostream& out, // Teuchos::FancyOStream& out, const bool debug = false) { - typedef KokkosSparse::CrsMatrix - matrix_type; + typedef KokkosSparse::CrsMatrix matrix_type; // Teuchos::OSTab tab0 (out); out << "Test KokkosSparse::CrsMatrix::{replace,sumInto}Values*" << endl; @@ -463,8 +433,7 @@ void generalTest(bool& success, } // namespace -template +template void test_replaceSumIntoLonger() { using std::endl; class NullBuffer : public std::streambuf { @@ -481,11 +450,9 @@ void test_replaceSumIntoLonger() { EXPECT_TRUE(success); } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##replaceSumIntoLonger##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_replaceSumIntoLonger(); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##replaceSumIntoLonger##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_replaceSumIntoLonger(); \ } // FIXME SYCL: test hangs or gives "CL error -46 invalid kernel name" diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_rocsparse.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_rocsparse.hpp index 804c777daa9f..379b422d7fe6 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_rocsparse.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_rocsparse.hpp @@ -35,8 +35,8 @@ void test_rocsparse_version() { rocsparse_get_version(handle, &ver); rocsparse_get_git_rev(handle, rev); - std::cout << "rocSPARSE version: " << ver / 100000 << "." << ver / 100 % 1000 - << "." << ver % 100 << "-" << rev << std::endl; + std::cout << "rocSPARSE version: " << ver / 100000 << "." << ver / 100 % 1000 << "." << ver % 100 << "-" << rev + << std::endl; rocsparse_destroy_handle(handle); } @@ -64,8 +64,7 @@ void test_rocsparse_safe_call() { // fails it throws an error with the // KOKKOS_ROCBLAS_SAFE_CALL_IMPL macro void test_rocsparse_singleton() { - KokkosKernels::Impl::RocsparseSingleton& s = - KokkosKernels::Impl::RocsparseSingleton::singleton(); + KokkosKernels::Impl::RocsparseSingleton& s = KokkosKernels::Impl::RocsparseSingleton::singleton(); (void)s; } diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spadd.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spadd.hpp index 3156801dbd20..91b217d5bb6f 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spadd.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spadd.hpp @@ -38,25 +38,21 @@ typedef Kokkos::complex kokkos_complex_float; // in a row are unique. // sortRows: whether to sort columns in a row template -crsMat_t randomMatrix(ordinal_type nrows, ordinal_type ncols, - ordinal_type minNNZ, ordinal_type maxNNZ, bool sortRows) { +crsMat_t randomMatrix(ordinal_type nrows, ordinal_type ncols, ordinal_type minNNZ, ordinal_type maxNNZ, bool sortRows) { typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type::non_const_type size_type_view_t; typedef typename graph_t::entries_type::non_const_type lno_view_t; typedef typename crsMat_t::values_type::non_const_type scalar_view_t; - typedef - typename size_type_view_t::non_const_value_type size_type; // rowptr type - typedef typename lno_view_t::non_const_value_type lno_t; // colind type + typedef typename size_type_view_t::non_const_value_type size_type; // rowptr type + typedef typename lno_view_t::non_const_value_type lno_t; // colind type typedef typename scalar_view_t::non_const_value_type scalar_t; typedef Kokkos::ArithTraits KAT; - static_assert(std::is_same::value, - "ordinal_type should be same as lno_t from crsMat_t"); + static_assert(std::is_same::value, "ordinal_type should be same as lno_t from crsMat_t"); // first, populate rowmap size_type_view_t rowmap("rowmap", nrows + 1); - typename size_type_view_t::HostMirror h_rowmap = - Kokkos::create_mirror_view(rowmap); - size_type nnz = 0; - size_type maxRowEntries = 0; + typename size_type_view_t::HostMirror h_rowmap = Kokkos::create_mirror_view(rowmap); + size_type nnz = 0; + size_type maxRowEntries = 0; for (lno_t i = 0; i < nrows; i++) { size_type rowEntries = rand() % (maxNNZ - minNNZ + 1) + minNNZ; h_rowmap(i) = nnz; @@ -68,17 +64,14 @@ crsMat_t randomMatrix(ordinal_type nrows, ordinal_type ncols, // allocate values and entries scalar_view_t values("values", nnz); // populate values - typename scalar_view_t::HostMirror h_values = - Kokkos::create_mirror_view(values); + typename scalar_view_t::HostMirror h_values = Kokkos::create_mirror_view(values); for (size_type i = 0; i < nnz; i++) { - h_values(i) = KAT::one() * (((typename KAT::mag_type)rand()) / - static_cast(RAND_MAX)); + h_values(i) = KAT::one() * (((typename KAT::mag_type)rand()) / static_cast(RAND_MAX)); } Kokkos::deep_copy(values, h_values); // populate entries (make sure no entry is repeated within a row) lno_view_t entries("entries", nnz); - typename lno_view_t::HostMirror h_entries = - Kokkos::create_mirror_view(entries); + typename lno_view_t::HostMirror h_entries = Kokkos::create_mirror_view(entries); std::vector indices(std::max((size_type)ncols, maxRowEntries)); auto re = std::mt19937(rand()); for (lno_t i = 0; i < nrows; i++) { @@ -100,11 +93,8 @@ crsMat_t randomMatrix(ordinal_type nrows, ordinal_type ncols, } template -void test_spadd(lno_t numRows, lno_t numCols, size_type minNNZ, - size_type maxNNZ, bool sortRows) { - typedef - typename KokkosSparse::CrsMatrix - crsMat_t; +void test_spadd(lno_t numRows, lno_t numCols, size_type minNNZ, size_type maxNNZ, bool sortRows) { + typedef typename KokkosSparse::CrsMatrix crsMat_t; typedef Kokkos::ArithTraits KAT; typedef typename KAT::mag_type magnitude_t; @@ -113,8 +103,8 @@ void test_spadd(lno_t numRows, lno_t numCols, size_type minNNZ, typedef typename crsMat_t::values_type::non_const_type values_type; typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename Device::execution_space, - typename Device::memory_space, typename Device::memory_space> + size_type, lno_t, scalar_t, typename Device::execution_space, typename Device::memory_space, + typename Device::memory_space> KernelHandle; // Make the test deterministic on a given machine+compiler @@ -124,56 +114,40 @@ void test_spadd(lno_t numRows, lno_t numCols, size_type minNNZ, // If maxNNZ <= numCols, the generated A, B have unique column indices in each // row handle.create_spadd_handle(sortRows, static_cast(maxNNZ) <= numCols); - crsMat_t A = - randomMatrix(numRows, numCols, minNNZ, maxNNZ, sortRows); - crsMat_t B = - randomMatrix(numRows, numCols, minNNZ, maxNNZ, sortRows); - row_map_type c_row_map( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "C row map"), - numRows + 1); + crsMat_t A = randomMatrix(numRows, numCols, minNNZ, maxNNZ, sortRows); + crsMat_t B = randomMatrix(numRows, numCols, minNNZ, maxNNZ, sortRows); + row_map_type c_row_map(Kokkos::view_alloc(Kokkos::WithoutInitializing, "C row map"), numRows + 1); // Make sure that nothing relies on any specific entry of c_row_map being zero // initialized Kokkos::deep_copy(c_row_map, (size_type)5); auto addHandle = handle.get_spadd_handle(); typename Device::execution_space exec{}; - KokkosSparse::Experimental::spadd_symbolic( - exec, &handle, numRows, numCols, A.graph.row_map, A.graph.entries, - B.graph.row_map, B.graph.entries, c_row_map); + KokkosSparse::Experimental::spadd_symbolic(exec, &handle, numRows, numCols, A.graph.row_map, A.graph.entries, + B.graph.row_map, B.graph.entries, c_row_map); size_type c_nnz = addHandle->get_c_nnz(); // Fill values, entries with incorrect incorret - values_type c_values( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "C values"), c_nnz); + values_type c_values(Kokkos::view_alloc(Kokkos::WithoutInitializing, "C values"), c_nnz); Kokkos::deep_copy(c_values, ((typename KAT::mag_type)5) * KAT::one()); entries_type c_entries("C entries", c_nnz); Kokkos::deep_copy(c_entries, (lno_t)5); - KokkosSparse::Experimental::spadd_numeric( - exec, &handle, numRows, numCols, A.graph.row_map, A.graph.entries, - A.values, KAT::one(), B.graph.row_map, B.graph.entries, B.values, - KAT::one(), c_row_map, c_entries, c_values); + KokkosSparse::Experimental::spadd_numeric(exec, &handle, numRows, numCols, A.graph.row_map, A.graph.entries, A.values, + KAT::one(), B.graph.row_map, B.graph.entries, B.values, KAT::one(), + c_row_map, c_entries, c_values); // done with handle // create C using CRS arrays crsMat_t C("C", numRows, numCols, c_nnz, c_values, c_row_map, c_entries); handle.destroy_spadd_handle(); - auto Avalues = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.values); - auto Arowmap = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); - auto Aentries = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); - auto Bvalues = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), B.values); - auto Browmap = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), B.graph.row_map); - auto Bentries = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), B.graph.entries); - auto Cvalues = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), C.values); - auto Crowmap = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), C.graph.row_map); - auto Centries = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), C.graph.entries); - auto zero = KAT::zero(); - auto eps = KAT::epsilon(); + auto Avalues = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.values); + auto Arowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); + auto Aentries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); + auto Bvalues = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), B.values); + auto Browmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), B.graph.row_map); + auto Bentries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), B.graph.entries); + auto Cvalues = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), C.values); + auto Crowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), C.graph.row_map); + auto Centries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), C.graph.entries); + auto zero = KAT::zero(); + auto eps = KAT::epsilon(); // check that C is correct and sorted, row-by-row for (lno_t row = 0; row < numRows; row++) { std::vector correct(numCols, zero); @@ -192,12 +166,10 @@ void test_spadd(lno_t numRows, lno_t numCols, size_type minNNZ, } // make sure C has the right number of entries auto actualNZ = Crowmap(row + 1) - Crowmap(row); - ASSERT_EQ(actualNZ, nz) << "A+B row " << row << " has " << actualNZ - << " entries but should have " << nz; + ASSERT_EQ(actualNZ, nz) << "A+B row " << row << " has " << actualNZ << " entries but should have " << nz; // make sure C's indices are sorted and unique for (size_type i = Crowmap(row) + 1; i < Crowmap(row + 1); i++) { - ASSERT_LT(Centries(i - 1), Centries(i)) - << "C row " << row << " is not sorted"; + ASSERT_LT(Centries(i - 1), Centries(i)) << "C row " << row << " is not sorted"; } // make sure C's indices are exactly the same as "nonzeros" for (size_type i = Crowmap(row); i < Crowmap(row + 1); i++) { @@ -208,12 +180,9 @@ void test_spadd(lno_t numRows, lno_t numCols, size_type minNNZ, scalar_t Cval = Cvalues(i); lno_t Ccol = Centries(i); // Check that result is correct to 1 ULP - magnitude_t maxError = (correct[Ccol] == KAT::zero()) - ? KAT::abs(eps) - : KAT::abs(correct[Ccol] * eps); + magnitude_t maxError = (correct[Ccol] == KAT::zero()) ? KAT::abs(eps) : KAT::abs(correct[Ccol] * eps); ASSERT_LE(KAT::abs(correct[Ccol] - Cval), maxError) - << "A+B row " << row << ", column " << Ccol << " has value " << Cval - << " but should be " << correct[Ccol]; + << "A+B row " << row << ", column " << Ccol << " has value " << Cval << " but should be " << correct[Ccol]; } } } @@ -222,16 +191,14 @@ void test_spadd(lno_t numRows, lno_t numCols, size_type minNNZ, // when there are empty rows/cols template void test_spadd_known_columns() { - using crsMat_t = typename KokkosSparse::CrsMatrix; + using crsMat_t = typename KokkosSparse::CrsMatrix; using row_map_type = typename crsMat_t::row_map_type::non_const_type; using entries_type = typename crsMat_t::index_type::non_const_type; using values_type = typename crsMat_t::values_type::non_const_type; using KAT = Kokkos::ArithTraits; - using KernelHandle = - typename KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename Device::execution_space, - typename Device::memory_space, typename Device::memory_space>; + using KernelHandle = typename KokkosKernels::Experimental::KokkosKernelsHandle< + size_type, lno_t, scalar_t, typename Device::execution_space, typename Device::memory_space, + typename Device::memory_space>; // Create A and B as 4x4 identity matrix, at the top-left of a 6x7 matrix of // zeros int nrows = 6; @@ -264,23 +231,19 @@ void test_spadd_known_columns() { ASSERT_EQ(A.nnz(), C.nnz()); } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##spadd_sorted_input##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_spadd(10, 10, 0, 0, true); \ - test_spadd(10, 10, 0, 2, true); \ - test_spadd(100, 100, 50, 100, true); \ - test_spadd(50, 50, 75, 100, true); \ - test_spadd_known_columns(); \ - } \ - TEST_F( \ - TestCategory, \ - sparse##_##spadd_unsorted_input##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_spadd(10, 10, 0, 0, false); \ - test_spadd(10, 10, 0, 2, false); \ - test_spadd(100, 100, 50, 100, false); \ - test_spadd(50, 50, 75, 100, false); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##spadd_sorted_input##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_spadd(10, 10, 0, 0, true); \ + test_spadd(10, 10, 0, 2, true); \ + test_spadd(100, 100, 50, 100, true); \ + test_spadd(50, 50, 75, 100, true); \ + test_spadd_known_columns(); \ + } \ + TEST_F(TestCategory, sparse##_##spadd_unsorted_input##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_spadd(10, 10, 0, 0, false); \ + test_spadd(10, 10, 0, 2, false); \ + test_spadd(100, 100, 50, 100, false); \ + test_spadd(50, 50, 75, 100, false); \ } #include diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spgemm.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spgemm.hpp index bd1e68c37001..c61639e70f42 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spgemm.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spgemm.hpp @@ -55,11 +55,7 @@ namespace Test { // - symbolic/numeric with Views // - symbolic/numeric with CrsMatrices // - non-reuse with CrsMatrices -enum spgemm_call_mode { - spgemm_reuse_view, - spgemm_reuse_matrix, - spgemm_noreuse -}; +enum spgemm_call_mode { spgemm_reuse_view, spgemm_reuse_matrix, spgemm_noreuse }; // Randomize matrix values again from the same uniform distribution as // kk_generate_sparse_matrix uses. @@ -69,7 +65,10 @@ void randomize_matrix_values(const Values &v) { ScalarType randStart, randEnd; KokkosKernels::Impl::getRandomBounds(50.0, randStart, randEnd); Kokkos::Random_XorShift64_Pool pool(13718); - Kokkos::fill_random(v, pool, randStart, randEnd); + // Instead of sampling from [-50, 50] or [-50-50i, 50+50i], + // sample from [1, 50] or [1+i, 50+50i]. That way relative + // error between values can't become large if values happen to sum close to 0. + Kokkos::fill_random(v, pool, randEnd / 50.0, randEnd); } template @@ -78,17 +77,14 @@ void run_spgemm_noreuse(crsMat_t A, crsMat_t B, crsMat_t &C) { } template -int run_spgemm(crsMat_t &A, crsMat_t &B, - KokkosSparse::SPGEMMAlgorithm spgemm_algorithm, crsMat_t &C, - bool testReuse) { +int run_spgemm(crsMat_t &A, crsMat_t &B, KokkosSparse::SPGEMMAlgorithm spgemm_algorithm, crsMat_t &C, bool testReuse) { typedef typename crsMat_t::size_type size_type; typedef typename crsMat_t::ordinal_type lno_t; typedef typename crsMat_t::value_type scalar_t; typedef typename crsMat_t::values_type::non_const_type scalar_view_t; - typedef KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space> + typedef KokkosKernels::Experimental::KokkosKernelsHandle KernelHandle; KernelHandle kh; @@ -116,12 +112,8 @@ int run_spgemm(crsMat_t &A, crsMat_t &B, if (testReuse) { // Give A and B completely new random values (changing both the pointer // and contents), and re-run just numeric. - A.values = scalar_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "new A values"), - A.nnz()); - B.values = scalar_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "new B values"), - B.nnz()); + A.values = scalar_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "new A values"), A.nnz()); + B.values = scalar_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "new B values"), B.nnz()); randomize_matrix_values(A.values); randomize_matrix_values(B.values); KokkosSparse::spgemm_numeric(kh, A, false, B, false, C); @@ -135,9 +127,8 @@ int run_spgemm(crsMat_t &A, crsMat_t &B, } template -int run_spgemm_old_interface(crsMat_t &A, crsMat_t &B, - KokkosSparse::SPGEMMAlgorithm spgemm_algorithm, - crsMat_t &result, bool testReuse) { +int run_spgemm_old_interface(crsMat_t &A, crsMat_t &B, KokkosSparse::SPGEMMAlgorithm spgemm_algorithm, crsMat_t &result, + bool testReuse) { typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type::non_const_type lno_view_t; typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t; @@ -147,9 +138,8 @@ int run_spgemm_old_interface(crsMat_t &A, crsMat_t &B, typedef typename lno_nnz_view_t::value_type lno_t; typedef typename scalar_view_t::value_type scalar_t; - typedef KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space> + typedef KokkosKernels::Experimental::KokkosKernelsHandle KernelHandle; KernelHandle kh; @@ -174,23 +164,18 @@ int run_spgemm_old_interface(crsMat_t &A, crsMat_t &B, EXPECT_FALSE(sh->are_rowptrs_computed()); EXPECT_FALSE(sh->are_entries_computed()); - KokkosSparse::Experimental::spgemm_symbolic( - &kh, num_rows_A, num_rows_B, num_cols_B, A.graph.row_map, - A.graph.entries, false, B.graph.row_map, B.graph.entries, false, - row_mapC); + KokkosSparse::Experimental::spgemm_symbolic(&kh, num_rows_A, num_rows_B, num_cols_B, A.graph.row_map, + A.graph.entries, false, B.graph.row_map, B.graph.entries, false, + row_mapC); EXPECT_TRUE(sh->is_symbolic_called()); size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz(); - entriesC = lno_nnz_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "entriesC"), - c_nnz_size); - valuesC = scalar_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "valuesC"), c_nnz_size); - KokkosSparse::Experimental::spgemm_numeric( - &kh, num_rows_A, num_rows_B, num_cols_B, A.graph.row_map, - A.graph.entries, A.values, false, B.graph.row_map, B.graph.entries, - B.values, false, row_mapC, entriesC, valuesC); + entriesC = lno_nnz_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "entriesC"), c_nnz_size); + valuesC = scalar_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "valuesC"), c_nnz_size); + KokkosSparse::Experimental::spgemm_numeric(&kh, num_rows_A, num_rows_B, num_cols_B, A.graph.row_map, + A.graph.entries, A.values, false, B.graph.row_map, B.graph.entries, + B.values, false, row_mapC, entriesC, valuesC); EXPECT_TRUE(sh->are_entries_computed()); EXPECT_TRUE(sh->is_numeric_called()); @@ -198,18 +183,13 @@ int run_spgemm_old_interface(crsMat_t &A, crsMat_t &B, if (testReuse) { // Give A and B completely new random values (changing both the pointer // and contents), and re-run just numeric. - A.values = scalar_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "new A values"), - A.nnz()); - B.values = scalar_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "new B values"), - B.nnz()); + A.values = scalar_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "new A values"), A.nnz()); + B.values = scalar_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "new B values"), B.nnz()); randomize_matrix_values(A.values); randomize_matrix_values(B.values); - KokkosSparse::Experimental::spgemm_numeric( - &kh, num_rows_A, num_rows_B, num_cols_B, A.graph.row_map, - A.graph.entries, A.values, false, B.graph.row_map, B.graph.entries, - B.values, false, row_mapC, entriesC, valuesC); + KokkosSparse::Experimental::spgemm_numeric(&kh, num_rows_A, num_rows_B, num_cols_B, A.graph.row_map, + A.graph.entries, A.values, false, B.graph.row_map, B.graph.entries, + B.values, false, row_mapC, entriesC, valuesC); EXPECT_TRUE(sh->are_entries_computed()); EXPECT_TRUE(sh->is_numeric_called()); } @@ -224,17 +204,14 @@ int run_spgemm_old_interface(crsMat_t &A, crsMat_t &B, // Generate matrices and test all supported spgemm algorithms. // C := AB, where A is m*k, B is k*n, and C is m*n. -template -void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, - lno_t row_size_variance, Test::spgemm_call_mode callMode, - bool testReuse = false) { +template +void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, lno_t row_size_variance, + Test::spgemm_call_mode callMode, bool testReuse = false) { #if defined(KOKKOSKERNELS_ENABLE_TPL_ARMPL) { - std::cerr - << "TEST SKIPPED: See " - "https://github.com/kokkos/kokkos-kernels/issues/1542 for details." - << std::endl; + std::cerr << "TEST SKIPPED: See " + "https://github.com/kokkos/kokkos-kernels/issues/1542 for details." + << std::endl; return; } #endif // KOKKOSKERNELS_ENABLE_TPL_ARMPL @@ -250,10 +227,10 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, // Generate random compressed sparse row matrix. Randomly generated (non-zero) // values are stored in a 1-D (1 rank) array. - crsMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix( - m, k, nnz, row_size_variance, bandwidth); - crsMat_t B = KokkosSparse::Impl::kk_generate_sparse_matrix( - k, n, nnz, row_size_variance, bandwidth); + crsMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix(m, k, nnz, row_size_variance, bandwidth); + crsMat_t B = KokkosSparse::Impl::kk_generate_sparse_matrix(k, n, nnz, row_size_variance, bandwidth); + randomize_matrix_values(A.values); + randomize_matrix_values(B.values); KokkosSparse::sort_crs_matrix(A); KokkosSparse::sort_crs_matrix(B); @@ -271,8 +248,7 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, algorithms = {SPGEMM_KK}; } else { algorithms = { - SPGEMM_KK, SPGEMM_KK_LP, - SPGEMM_KK_MEMORY /* alias SPGEMM_KK_MEMSPEED */, + SPGEMM_KK, SPGEMM_KK_LP, SPGEMM_KK_MEMORY /* alias SPGEMM_KK_MEMSPEED */, SPGEMM_KK_SPEED /* alias SPGEMM_KK_DENSE */ }; } @@ -298,12 +274,10 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, try { switch (callMode) { case spgemm_reuse_view: - res = run_spgemm_old_interface( - A, B, spgemm_algorithm, output_mat, testReuse); + res = run_spgemm_old_interface(A, B, spgemm_algorithm, output_mat, testReuse); break; case spgemm_reuse_matrix: - res = run_spgemm(A, B, spgemm_algorithm, output_mat, - testReuse); + res = run_spgemm(A, B, spgemm_algorithm, output_mat, testReuse); break; case spgemm_noreuse: run_spgemm_noreuse(A, B, output_mat); break; } @@ -330,8 +304,7 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, timer1.reset(); if (!is_expected_to_fail) { EXPECT_TRUE((res == 0)) << algo; - bool is_identical = - is_same_matrix(output_mat, output_mat2); + bool is_identical = is_same_matrix(output_mat, output_mat2); EXPECT_TRUE(is_identical) << algo; // EXPECT_TRUE( equal) << algo; } @@ -341,8 +314,7 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, // device::execution_space::finalize(); } -template +template void test_spgemm_symbolic(bool callSymbolicFirst, bool testEmpty) { using crsMat_t = CrsMatrix; using graph_t = typename crsMat_t::StaticCrsGraphType; @@ -350,9 +322,9 @@ void test_spgemm_symbolic(bool callSymbolicFirst, bool testEmpty) { using entries_t = typename graph_t::entries_type; using rowmap_t = typename graph_t::row_map_type::non_const_type; using const_rowmap_t = typename graph_t::row_map_type; - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space>; + using KernelHandle = + KokkosKernels::Experimental::KokkosKernelsHandle; // A is m*n, B is n*k, C is m*k int m = 100; int n = 300; @@ -370,11 +342,9 @@ void test_spgemm_symbolic(bool callSymbolicFirst, bool testEmpty) { B = crsMat_t("B", n, k, 0, emptyValues, B_rowmap, emptyEntries); } else { size_type nnz = 1000; - A = KokkosSparse::Impl::kk_generate_sparse_matrix(m, n, nnz, 10, - 50); - nnz = 1000; - B = KokkosSparse::Impl::kk_generate_sparse_matrix(n, k, nnz, 10, - 50); + A = KokkosSparse::Impl::kk_generate_sparse_matrix(m, n, nnz, 10, 50); + nnz = 1000; + B = KokkosSparse::Impl::kk_generate_sparse_matrix(n, k, nnz, 10, 50); KokkosSparse::sort_crs_matrix(A); KokkosSparse::sort_crs_matrix(B); } @@ -383,37 +353,31 @@ void test_spgemm_symbolic(bool callSymbolicFirst, bool testEmpty) { Test::run_spgemm(A, B, SPGEMM_DEBUG, C_reference, false); // Now call just symbolic, and specifically request that rowptrs be populated // Make sure this never depends on C_rowmap being initialized - rowmap_t C_rowmap(Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowmapC"), - m + 1); + rowmap_t C_rowmap(Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowmapC"), m + 1); Kokkos::deep_copy(C_rowmap, size_type(123)); KernelHandle kh; kh.create_spgemm_handle(); if (callSymbolicFirst) { - KokkosSparse::Experimental::spgemm_symbolic( - &kh, m, n, k, A.graph.row_map, A.graph.entries, false, B.graph.row_map, - B.graph.entries, false, C_rowmap); + KokkosSparse::Experimental::spgemm_symbolic(&kh, m, n, k, A.graph.row_map, A.graph.entries, false, B.graph.row_map, + B.graph.entries, false, C_rowmap); } - KokkosSparse::Experimental::spgemm_symbolic( - &kh, m, n, k, A.graph.row_map, A.graph.entries, false, B.graph.row_map, - B.graph.entries, false, C_rowmap, true); + KokkosSparse::Experimental::spgemm_symbolic(&kh, m, n, k, A.graph.row_map, A.graph.entries, false, B.graph.row_map, + B.graph.entries, false, C_rowmap, true); kh.destroy_spgemm_handle(); - bool isCorrect = KokkosKernels::Impl::kk_is_identical_view< - const_rowmap_t, const_rowmap_t, size_type, - typename device::execution_space>(C_rowmap, C_reference.graph.row_map, 0); - EXPECT_TRUE(isCorrect) - << " spgemm_symbolic produced incorrect rowptrs - callSymbolicFirst = " - << callSymbolicFirst << ", empty A/B = " << testEmpty; + bool isCorrect = KokkosKernels::Impl::kk_is_identical_view( + C_rowmap, C_reference.graph.row_map, 0); + EXPECT_TRUE(isCorrect) << " spgemm_symbolic produced incorrect rowptrs - callSymbolicFirst = " << callSymbolicFirst + << ", empty A/B = " << testEmpty; } -template +template void test_issue402() { #if defined(KOKKOSKERNELS_ENABLE_TPL_ARMPL) { - std::cerr - << "TEST SKIPPED: See " - "https://github.com/kokkos/kokkos-kernels/issues/1542 for details." - << std::endl; + std::cerr << "TEST SKIPPED: See " + "https://github.com/kokkos/kokkos-kernels/issues/1542 for details." + << std::endl; return; } #endif // KOKKOSKERNELS_ENABLE_TPL_ARMPL @@ -436,8 +400,7 @@ void test_issue402() { auto rowmapHost = Kokkos::create_mirror_view(Arowmap); auto entriesHost = Kokkos::create_mirror_view(Aentries); auto valuesHost = Kokkos::create_mirror_view(Avalues); - for (lno_t i = 0; i < numRows + 1; i++) - rowmapHost(i) = MatrixIssue402::rowmap[i]; + for (lno_t i = 0; i < numRows + 1; i++) rowmapHost(i) = MatrixIssue402::rowmap[i]; for (size_type i = 0; i < nnz; i++) { entriesHost(i) = MatrixIssue402::entries[i]; valuesHost(i) = MatrixIssue402::values[i]; @@ -451,9 +414,8 @@ void test_issue402() { lno_view_t Browmap("B = A^T rowmap", numRows + 1); lno_nnz_view_t Bentries("B = A^T entries", nnz); scalar_view_t Bvalues("B = A^T values", nnz); - KokkosSparse::Impl::transpose_matrix< - lno_view_t, lno_nnz_view_t, scalar_view_t, lno_view_t, lno_nnz_view_t, - scalar_view_t, lno_view_t, typename device::execution_space>( + KokkosSparse::Impl::transpose_matrix( numRows, numRows, Arowmap, Aentries, Avalues, Browmap, Bentries, Bvalues); crsMat_t B("B=A^T", numRows, numRows, nnz, Bvalues, Browmap, Bentries); KokkosSparse::sort_crs_matrix(A); @@ -476,15 +438,12 @@ void test_issue402() { errMsg = e.what(); success = false; } - EXPECT_TRUE(success) << "SpGEMM still has issue 402 bug! Error message:\n" - << errMsg << '\n'; + EXPECT_TRUE(success) << "SpGEMM still has issue 402 bug! Error message:\n" << errMsg << '\n'; bool correctResult = is_same_matrix(C, Cgold); - EXPECT_TRUE(correctResult) - << "SpGEMM still has issue 402 bug; C=AA' is incorrect!\n"; + EXPECT_TRUE(correctResult) << "SpGEMM still has issue 402 bug; C=AA' is incorrect!\n"; } -template +template void test_issue1738() { // Make sure that std::invalid_argument is thrown if you: // - call numeric where an input matrix's entries have changed. @@ -492,10 +451,10 @@ void test_issue1738() { // matrices // This check is only enabled in debug builds. #ifndef NDEBUG - using crsMat_t = CrsMatrix; - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space>; + using crsMat_t = CrsMatrix; + using KernelHandle = + KokkosKernels::Experimental::KokkosKernelsHandle; crsMat_t A1 = KokkosSparse::Impl::kk_generate_diag_matrix(100); crsMat_t B1 = KokkosSparse::Impl::kk_generate_diag_matrix(100); crsMat_t A2 = KokkosSparse::Impl::kk_generate_diag_matrix(50); @@ -507,8 +466,7 @@ void test_issue1738() { KokkosSparse::spgemm_symbolic(kh, A1, false, B1, false, C1); KokkosSparse::spgemm_numeric(kh, A1, false, B1, false, C1); crsMat_t C2; - EXPECT_THROW(KokkosSparse::spgemm_symbolic(kh, A2, false, B2, false, C2), - std::invalid_argument); + EXPECT_THROW(KokkosSparse::spgemm_symbolic(kh, A2, false, B2, false, C2), std::invalid_argument); } { KernelHandle kh; @@ -519,58 +477,39 @@ void test_issue1738() { // row is 0. Change it to a 1 and make sure spgemm_numeric notices that it // changed. Kokkos::deep_copy(Kokkos::subview(A1.graph.entries, 0), 1); - EXPECT_THROW(KokkosSparse::spgemm_numeric(kh, A1, false, B1, false, C1), - std::invalid_argument); + EXPECT_THROW(KokkosSparse::spgemm_numeric(kh, A1, false, B1, false, C1), std::invalid_argument); } #endif } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - sparse##_##spgemm##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_spgemm( \ - 10000, 8000, 6000, 8000 * 20, 500, 10, ::Test::spgemm_reuse_matrix); \ - test_spgemm( \ - 10000, 8000, 6000, 8000 * 20, 500, 10, ::Test::spgemm_reuse_view); \ - test_spgemm( \ - 1000, 500, 1600, 1000 * 20, 500, 10, ::Test::spgemm_reuse_matrix, \ - true); \ - test_spgemm( \ - 1000, 500, 1600, 1000 * 20, 500, 10, ::Test::spgemm_reuse_view, true); \ - test_spgemm(0, 0, 0, 0, 10, 10, \ - ::Test::spgemm_reuse_matrix); \ - test_spgemm(0, 0, 0, 0, 10, 10, \ - ::Test::spgemm_reuse_view); \ - test_spgemm(0, 12, 5, 0, 10, 0, \ - ::Test::spgemm_reuse_matrix); \ - test_spgemm(0, 12, 5, 0, 10, 0, \ - ::Test::spgemm_reuse_view); \ - test_spgemm(10, 10, 0, 0, 10, 10, \ - ::Test::spgemm_reuse_matrix); \ - test_spgemm(10, 10, 0, 0, 10, 10, \ - ::Test::spgemm_reuse_view); \ - test_spgemm(10, 10, 10, 0, 0, 0, \ - ::Test::spgemm_reuse_matrix); \ - test_spgemm(10, 10, 10, 0, 0, 0, \ - ::Test::spgemm_reuse_view); \ - test_spgemm( \ - 10000, 8000, 6000, 8000 * 20, 500, 10, ::Test::spgemm_noreuse); \ - test_spgemm( \ - 1000, 500, 1600, 1000 * 20, 500, 10, ::Test::spgemm_noreuse); \ - test_spgemm(0, 0, 0, 0, 10, 10, \ - ::Test::spgemm_noreuse); \ - test_spgemm(0, 12, 5, 0, 10, 0, \ - ::Test::spgemm_noreuse); \ - test_spgemm(10, 10, 0, 0, 10, 10, \ - ::Test::spgemm_noreuse); \ - test_spgemm(10, 10, 10, 0, 0, 0, \ - ::Test::spgemm_noreuse); \ - test_spgemm_symbolic(true, true); \ - test_spgemm_symbolic(false, true); \ - test_spgemm_symbolic(true, false); \ - test_spgemm_symbolic(false, false); \ - test_issue402(); \ - test_issue1738(); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##spgemm##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_spgemm(10000, 8000, 6000, 8000 * 20, 500, 10, ::Test::spgemm_reuse_matrix); \ + test_spgemm(10000, 8000, 6000, 8000 * 20, 500, 10, ::Test::spgemm_reuse_view); \ + test_spgemm(1000, 500, 1600, 1000 * 20, 500, 10, ::Test::spgemm_reuse_matrix, \ + true); \ + test_spgemm(1000, 500, 1600, 1000 * 20, 500, 10, ::Test::spgemm_reuse_view, \ + true); \ + test_spgemm(0, 0, 0, 0, 10, 10, ::Test::spgemm_reuse_matrix); \ + test_spgemm(0, 0, 0, 0, 10, 10, ::Test::spgemm_reuse_view); \ + test_spgemm(0, 12, 5, 0, 10, 0, ::Test::spgemm_reuse_matrix); \ + test_spgemm(0, 12, 5, 0, 10, 0, ::Test::spgemm_reuse_view); \ + test_spgemm(10, 10, 0, 0, 10, 10, ::Test::spgemm_reuse_matrix); \ + test_spgemm(10, 10, 0, 0, 10, 10, ::Test::spgemm_reuse_view); \ + test_spgemm(10, 10, 10, 0, 0, 0, ::Test::spgemm_reuse_matrix); \ + test_spgemm(10, 10, 10, 0, 0, 0, ::Test::spgemm_reuse_view); \ + test_spgemm(10000, 8000, 6000, 8000 * 20, 500, 10, ::Test::spgemm_noreuse); \ + test_spgemm(1000, 500, 1600, 1000 * 20, 500, 10, ::Test::spgemm_noreuse); \ + test_spgemm(0, 0, 0, 0, 10, 10, ::Test::spgemm_noreuse); \ + test_spgemm(0, 12, 5, 0, 10, 0, ::Test::spgemm_noreuse); \ + test_spgemm(10, 10, 0, 0, 10, 10, ::Test::spgemm_noreuse); \ + test_spgemm(10, 10, 10, 0, 0, 0, ::Test::spgemm_noreuse); \ + test_spgemm_symbolic(true, true); \ + test_spgemm_symbolic(false, true); \ + test_spgemm_symbolic(true, false); \ + test_spgemm_symbolic(false, false); \ + test_issue402(); \ + test_issue1738(); \ } // test_spgemm(50000, 50000 * 30, 100, 10); diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spgemm_jacobi.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spgemm_jacobi.hpp index 25a5d155a7ea..42054634a962 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spgemm_jacobi.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spgemm_jacobi.hpp @@ -46,12 +46,9 @@ typedef Kokkos::complex kokkos_complex_float; namespace Test { -template -int run_spgemm_jacobi(crsMat_t input_mat, crsMat_t input_mat2, - scalar_type omega, dinv_view_t dinv, - KokkosSparse::SPGEMMAlgorithm spgemm_algorithm, - crsMat_t &result) { +template +int run_spgemm_jacobi(crsMat_t input_mat, crsMat_t input_mat2, scalar_type omega, dinv_view_t dinv, + KokkosSparse::SPGEMMAlgorithm spgemm_algorithm, crsMat_t &result) { typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type::non_const_type lno_view_t; typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t; @@ -61,9 +58,8 @@ int run_spgemm_jacobi(crsMat_t input_mat, crsMat_t input_mat2, typedef typename lno_nnz_view_t::value_type lno_t; typedef typename scalar_view_t::value_type scalar_t; - typedef KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space> + typedef KokkosKernels::Experimental::KokkosKernelsHandle KernelHandle; KernelHandle kh; @@ -84,24 +80,17 @@ int run_spgemm_jacobi(crsMat_t input_mat, crsMat_t input_mat2, lno_nnz_view_t entriesC; scalar_view_t valuesC; - spgemm_symbolic(&kh, num_rows_1, num_rows_2, num_cols_2, - input_mat.graph.row_map, input_mat.graph.entries, false, - input_mat2.graph.row_map, input_mat2.graph.entries, false, - row_mapC); + spgemm_symbolic(&kh, num_rows_1, num_rows_2, num_cols_2, input_mat.graph.row_map, input_mat.graph.entries, false, + input_mat2.graph.row_map, input_mat2.graph.entries, false, row_mapC); size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz(); if (c_nnz_size) { - entriesC = lno_nnz_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "entriesC"), - c_nnz_size); - valuesC = scalar_view_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "valuesC"), c_nnz_size); + entriesC = lno_nnz_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "entriesC"), c_nnz_size); + valuesC = scalar_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "valuesC"), c_nnz_size); } - spgemm_jacobi(&kh, num_rows_1, num_rows_2, num_cols_2, - input_mat.graph.row_map, input_mat.graph.entries, - input_mat.values, false, input_mat2.graph.row_map, - input_mat2.graph.entries, input_mat2.values, false, row_mapC, - entriesC, valuesC, omega, dinv); + spgemm_jacobi(&kh, num_rows_1, num_rows_2, num_cols_2, input_mat.graph.row_map, input_mat.graph.entries, + input_mat.values, false, input_mat2.graph.row_map, input_mat2.graph.entries, input_mat2.values, false, + row_mapC, entriesC, valuesC, omega, dinv); graph_t static_graph(entriesC, row_mapC); crsMat_t crsmat("CrsMatrix", num_cols_2, valuesC, static_graph); @@ -131,8 +120,7 @@ bool is_same_mat(crsMat_t output_mat1, crsMat_t output_mat2) { return false; } if (nentries1 != nentries2) { - std::cout << "nentries1:" << nentries1 << " nentries2:" << nentries2 - << std::endl; + std::cout << "nentries1:" << nentries1 << " nentries2:" << nentries2 << std::endl; return false; } if (nvals1 != nvals2) { @@ -141,10 +129,10 @@ bool is_same_mat(crsMat_t output_mat1, crsMat_t output_mat2) { } bool is_identical = true; - is_identical = KokkosKernels::Impl::kk_is_identical_view< - typename graph_t::row_map_type, typename graph_t::row_map_type, - typename lno_view_t::value_type, typename device::execution_space>( - output_mat1.graph.row_map, output_mat2.graph.row_map, 0); + is_identical = + KokkosKernels::Impl::kk_is_identical_view( + output_mat1.graph.row_map, output_mat2.graph.row_map, 0); if (!is_identical) { std::cout << "rowmaps are different." << std::endl; @@ -153,10 +141,10 @@ bool is_same_mat(crsMat_t output_mat1, crsMat_t output_mat2) { return false; } - is_identical = KokkosKernels::Impl::kk_is_identical_view< - lno_nnz_view_t, lno_nnz_view_t, typename lno_nnz_view_t::value_type, - typename device::execution_space>(output_mat1.graph.entries, - output_mat2.graph.entries, 0); + is_identical = + KokkosKernels::Impl::kk_is_identical_view(output_mat1.graph.entries, + output_mat2.graph.entries, 0); if (!is_identical) { std::cout << "entries are different." << std::endl; @@ -165,12 +153,11 @@ bool is_same_mat(crsMat_t output_mat1, crsMat_t output_mat2) { return false; } - typedef typename Kokkos::ArithTraits< - typename scalar_view_t::non_const_value_type>::mag_type eps_type; + typedef typename Kokkos::ArithTraits::mag_type eps_type; eps_type eps = std::is_same::value ? 2 * 1e-3 : 1e-7; - is_identical = KokkosKernels::Impl::kk_is_relatively_identical_view< - scalar_view_t, scalar_view_t, eps_type, typename device::execution_space>( + is_identical = KokkosKernels::Impl::kk_is_relatively_identical_view( output_mat1.values, output_mat2.values, eps); if (!is_identical) { @@ -184,26 +171,22 @@ bool is_same_mat(crsMat_t output_mat1, crsMat_t output_mat2) { } } // namespace Test -template -void test_spgemm_jacobi(lno_t numRows, size_type nnz, lno_t bandwidth, - lno_t row_size_variance) { +template +void test_spgemm_jacobi(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t row_size_variance) { #if defined(KOKKOSKERNELS_ENABLE_TPL_ARMPL) { - std::cerr - << "TEST SKIPPED: See " - "https://github.com/kokkos/kokkos-kernels/issues/1542 for details." - << std::endl; + std::cerr << "TEST SKIPPED: See " + "https://github.com/kokkos/kokkos-kernels/issues/1542 for details." + << std::endl; return; } #endif // KOKKOSKERNELS_ENABLE_TPL_ARMPL using namespace Test; typedef CrsMatrix crsMat_t; - lno_t numCols = numRows; - crsMat_t input_mat = - KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< - crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); + lno_t numCols = numRows; + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( + numRows, numCols, nnz, row_size_variance, bandwidth); KokkosSparse::sort_crs_matrix(input_mat); @@ -214,25 +197,21 @@ void test_spgemm_jacobi(lno_t numRows, size_type nnz, lno_t bandwidth, typedef typename device::execution_space c_exec_t; typedef typename device::memory_space c_temp_t; typedef typename Kokkos::Device UniformDevice_t; - typedef typename Kokkos::View::array_layout, - UniformDevice_t> - view_t; + typedef + typename Kokkos::View::array_layout, + UniformDevice_t> + view_t; view_t dinv("Dinv", numRows, 1); Kokkos::deep_copy(dinv, 2.0); - run_spgemm_jacobi( - input_mat, input_mat, omega, dinv, SPGEMM_SERIAL, output_mat2); + run_spgemm_jacobi(input_mat, input_mat, omega, dinv, SPGEMM_SERIAL, output_mat2); - SPGEMMAlgorithm spgemm_algorithm = - SPGEMM_KK_MEMORY; // should we test other SpGEMM algorithms as well? + SPGEMMAlgorithm spgemm_algorithm = SPGEMM_KK_MEMORY; // should we test other SpGEMM algorithms as well? crsMat_t output_mat; - run_spgemm_jacobi(input_mat, input_mat, omega, dinv, - spgemm_algorithm, output_mat); + run_spgemm_jacobi(input_mat, input_mat, omega, dinv, spgemm_algorithm, output_mat); // Sort the reference output_mat2, but not output_mat. It should already be // soted. KokkosSparse::sort_crs_matrix(output_mat2); @@ -240,12 +219,9 @@ void test_spgemm_jacobi(lno_t numRows, size_type nnz, lno_t bandwidth, EXPECT_TRUE(is_identical); } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##spgemm_jacobi##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_spgemm_jacobi(1000, 1000 * 10, 50, \ - 10); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##spgemm_jacobi##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_spgemm_jacobi(1000, 1000 * 10, 50, 10); \ } #include diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spiluk.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spiluk.hpp index 9eaf087c9bfe..1fde1ac5ed1a 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spiluk.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spiluk.hpp @@ -57,39 +57,29 @@ namespace Test { #ifdef TEST_SPILUK_TINY_TEST template std::vector> get_fixture() { - std::vector> A = {{10.00, 1.00, 0.00, 0.00}, - {0.00, 11.00, 0.00, 0.00}, - {0.00, 2.00, 12.00, 0.00}, - {5.00, 0.00, 3.00, 13.00}}; + std::vector> A = { + {10.00, 1.00, 0.00, 0.00}, {0.00, 11.00, 0.00, 0.00}, {0.00, 2.00, 12.00, 0.00}, {5.00, 0.00, 3.00, 13.00}}; return A; } #else template std::vector> get_fixture() { std::vector> A = { - {10.00, 0.00, 0.30, 0.00, 0.00, 0.60, 0.00, 0.00, 0.00}, - {0.00, 11.00, 0.00, 0.00, 0.00, 0.00, 0.70, 0.00, 0.00}, - {0.00, 0.00, 12.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00}, - {5.00, 0.00, 0.00, 13.00, 1.00, 0.00, 0.00, 0.00, 0.00}, - {4.00, 0.00, 0.00, 0.00, 14.00, 0.00, 0.00, 0.00, 0.00}, - {0.00, 3.00, 0.00, 0.00, 0.00, 15.00, 0.00, 0.00, 0.00}, - {0.00, 0.00, 7.00, 0.00, 0.00, 0.00, 16.00, 0.00, 0.00}, - {0.00, 0.00, 0.00, 6.00, 5.00, 0.00, 0.00, 17.00, 0.00}, + {10.00, 0.00, 0.30, 0.00, 0.00, 0.60, 0.00, 0.00, 0.00}, {0.00, 11.00, 0.00, 0.00, 0.00, 0.00, 0.70, 0.00, 0.00}, + {0.00, 0.00, 12.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00}, {5.00, 0.00, 0.00, 13.00, 1.00, 0.00, 0.00, 0.00, 0.00}, + {4.00, 0.00, 0.00, 0.00, 14.00, 0.00, 0.00, 0.00, 0.00}, {0.00, 3.00, 0.00, 0.00, 0.00, 15.00, 0.00, 0.00, 0.00}, + {0.00, 0.00, 7.00, 0.00, 0.00, 0.00, 16.00, 0.00, 0.00}, {0.00, 0.00, 0.00, 6.00, 5.00, 0.00, 0.00, 17.00, 0.00}, {0.00, 0.00, 0.00, 2.00, 2.50, 0.00, 0.00, 0.00, 18.00}}; return A; } #endif -template < - typename MatrixType, typename CRS, - typename std::enable_if::value>::type* = nullptr> +template ::value>::type* = nullptr> MatrixType get_A(CRS A_unblocked, const size_t) { return A_unblocked; } -template < - typename MatrixType, typename CRS, - typename std::enable_if::value>::type* = nullptr> +template ::value>::type* = nullptr> MatrixType get_A(CRS A_unblocked, const size_t block_size) { // Convert to BSR MatrixType A(A_unblocked, block_size); @@ -97,32 +87,23 @@ MatrixType get_A(CRS A_unblocked, const size_t block_size) { return A; } -template < - typename MatrixType, typename RowMapType, typename EntriesType, - typename ValuesType, - typename std::enable_if::value>::type* = nullptr> -MatrixType make_matrix(const char* name, const RowMapType& row_map, - const EntriesType& entries, const ValuesType& values, - const size_t) { +template ::value>::type* = nullptr> +MatrixType make_matrix(const char* name, const RowMapType& row_map, const EntriesType& entries, + const ValuesType& values, const size_t) { const auto nrows = row_map.extent(0) - 1; - return MatrixType(name, nrows, nrows, values.extent(0), values, row_map, - entries); + return MatrixType(name, nrows, nrows, values.extent(0), values, row_map, entries); } -template < - typename MatrixType, typename RowMapType, typename EntriesType, - typename ValuesType, - typename std::enable_if::value>::type* = nullptr> -MatrixType make_matrix(const char* name, const RowMapType& row_map, - const EntriesType& entries, const ValuesType& values, - const size_t block_size) { +template ::value>::type* = nullptr> +MatrixType make_matrix(const char* name, const RowMapType& row_map, const EntriesType& entries, + const ValuesType& values, const size_t block_size) { const auto nrows = row_map.extent(0) - 1; - return MatrixType(name, nrows, nrows, values.extent(0), values, row_map, - entries, block_size); + return MatrixType(name, nrows, nrows, values.extent(0), values, row_map, entries, block_size); } -template +template struct SpilukTest { using RowMapType = Kokkos::View; using EntriesType = Kokkos::View; @@ -137,19 +118,17 @@ struct SpilukTest { using memory_space = typename device::memory_space; using range_policy = Kokkos::RangePolicy; - static constexpr double EPS = - std::is_same::value ? 1e-7 : 1e-4; + static constexpr double EPS = std::is_same::value ? 1e-7 : 1e-4; - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, execution_space, memory_space, memory_space>; + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle; using Crs = CrsMatrix; using Bsr = BsrMatrix; template - static typename AT::mag_type check_result_impl( - const AType& A, const LType& L, const UType& U, const size_type nrows, - const size_type block_size = 1) { + static typename AT::mag_type check_result_impl(const AType& A, const LType& L, const UType& U, const size_type nrows, + const size_type block_size = 1) { const scalar_t ZERO = scalar_t(0); const scalar_t ONE = scalar_t(1); const scalar_t MONE = scalar_t(-1); @@ -175,8 +154,7 @@ struct SpilukTest { return diff_nrm / bb_nrm; } - static bool is_triangular(const RowMapType& drow_map, - const EntriesType& dentries, bool check_lower) { + static bool is_triangular(const RowMapType& drow_map, const EntriesType& dentries, bool check_lower) { const size_type nrows = drow_map.extent(0) - 1; auto row_map = Kokkos::create_mirror_view(drow_map); @@ -200,27 +178,19 @@ struct SpilukTest { } template - static void check_result(const RowMapType& row_map, - const EntriesType& entries, const ValuesType& values, - const RowMapType& L_row_map, - const EntriesType& L_entries, - const ValuesType& L_values, - const RowMapType& U_row_map, - const EntriesType& U_entries, - const ValuesType& U_values, const lno_t fill_lev, - const size_type block_size = 1) { + static void check_result(const RowMapType& row_map, const EntriesType& entries, const ValuesType& values, + const RowMapType& L_row_map, const EntriesType& L_entries, const ValuesType& L_values, + const RowMapType& U_row_map, const EntriesType& U_entries, const ValuesType& U_values, + const lno_t fill_lev, const size_type block_size = 1) { using sp_matrix_type = std::conditional_t; KK_REQUIRE(UseBlocks || (block_size == 1)); // Checking const auto nrows = row_map.extent(0) - 1; - auto A = make_matrix("A_Mtx", row_map, entries, values, - block_size); - auto L = make_matrix("L_Mtx", L_row_map, L_entries, - L_values, block_size); - auto U = make_matrix("U_Mtx", U_row_map, U_entries, - U_values, block_size); + auto A = make_matrix("A_Mtx", row_map, entries, values, block_size); + auto L = make_matrix("L_Mtx", L_row_map, L_entries, L_values, block_size); + auto U = make_matrix("U_Mtx", U_row_map, U_entries, U_values, block_size); EXPECT_TRUE(is_triangular(L_row_map, L_entries, true)); EXPECT_TRUE(is_triangular(U_row_map, U_entries, false)); @@ -237,11 +207,9 @@ struct SpilukTest { } if (TEST_SPILUK_VERBOSE_LEVEL > 1) { std::cout << "L result" << std::endl; - print_matrix( - decompress_matrix(L_row_map, L_entries, L_values, block_size)); + print_matrix(decompress_matrix(L_row_map, L_entries, L_values, block_size)); std::cout << "U result" << std::endl; - print_matrix( - decompress_matrix(U_row_map, U_entries, U_values, block_size)); + print_matrix(decompress_matrix(U_row_map, U_entries, U_values, block_size)); } if (fill_lev > 1) { @@ -250,18 +218,14 @@ struct SpilukTest { } template - static std::tuple - run_and_check_spiluk(KernelHandle& kh, const RowMapType& row_map, - const EntriesType& entries, const ValuesType& values, - SPILUKAlgorithm alg, const lno_t fill_lev, - const size_type block_size = 1) { + static std::tuple run_and_check_spiluk( + KernelHandle& kh, const RowMapType& row_map, const EntriesType& entries, const ValuesType& values, + SPILUKAlgorithm alg, const lno_t fill_lev, const size_type block_size = 1) { KK_REQUIRE(UseBlocks || (block_size == 1)); const size_type block_items = block_size * block_size; const size_type nrows = row_map.extent(0) - 1; - kh.create_spiluk_handle(alg, nrows, 40 * nrows, 40 * nrows, - !UseBlocks ? 0 : block_size); + kh.create_spiluk_handle(alg, nrows, 40 * nrows, 40 * nrows, !UseBlocks ? 0 : block_size); auto spiluk_handle = kh.get_spiluk_handle(); if (TeamSize != -1) { @@ -274,8 +238,7 @@ struct SpilukTest { RowMapType U_row_map("U_row_map", nrows + 1); EntriesType U_entries("U_entries", spiluk_handle->get_nnzU()); - spiluk_symbolic(&kh, fill_lev, row_map, entries, L_row_map, L_entries, - U_row_map, U_entries); + spiluk_symbolic(&kh, fill_lev, row_map, entries, L_row_map, L_entries, U_row_map, U_entries); Kokkos::fence(); @@ -284,24 +247,21 @@ struct SpilukTest { ValuesType L_values("L_values", spiluk_handle->get_nnzL() * block_items); ValuesType U_values("U_values", spiluk_handle->get_nnzU() * block_items); - spiluk_numeric(&kh, fill_lev, row_map, entries, values, L_row_map, - L_entries, L_values, U_row_map, U_entries, U_values); + spiluk_numeric(&kh, fill_lev, row_map, entries, values, L_row_map, L_entries, L_values, U_row_map, U_entries, + U_values); Kokkos::fence(); - check_result(row_map, entries, values, L_row_map, L_entries, - L_values, U_row_map, U_entries, U_values, fill_lev, - block_size); + check_result(row_map, entries, values, L_row_map, L_entries, L_values, U_row_map, U_entries, U_values, + fill_lev, block_size); kh.destroy_spiluk_handle(); #ifdef TEST_SPILUK_FULL_CHECKS // If block_size is 1, results should exactly match unblocked results if (block_size == 1 && UseBlocks) { - const auto [L_row_map_u, L_entries_u, L_values_u, U_row_map_u, - U_entries_u, U_values_u] = - run_and_check_spiluk(kh, row_map, entries, values, - alg, fill_lev); + const auto [L_row_map_u, L_entries_u, L_values_u, U_row_map_u, U_entries_u, U_values_u] = + run_and_check_spiluk(kh, row_map, entries, values, alg, fill_lev); EXPECT_NEAR_KK_1DVIEW(L_row_map, L_row_map_u, EPS); EXPECT_NEAR_KK_1DVIEW(L_entries, L_entries_u, EPS); @@ -313,10 +273,8 @@ struct SpilukTest { // Check that team size = 1 produces same result if (TeamSize != 1) { - const auto [L_row_map_ts1, L_entries_ts1, L_values_ts1, U_row_map_ts1, - U_entries_ts1, U_values_ts1] = - run_and_check_spiluk(kh, row_map, entries, values, alg, - fill_lev, block_size); + const auto [L_row_map_ts1, L_entries_ts1, L_values_ts1, U_row_map_ts1, U_entries_ts1, U_values_ts1] = + run_and_check_spiluk(kh, row_map, entries, values, alg, fill_lev, block_size); EXPECT_NEAR_KK_1DVIEW(L_row_map, L_row_map_ts1, EPS); EXPECT_NEAR_KK_1DVIEW(L_entries, L_entries_ts1, EPS); @@ -327,8 +285,7 @@ struct SpilukTest { } #endif - return std::make_tuple(L_row_map, L_entries, L_values, U_row_map, U_entries, - U_values); + return std::make_tuple(L_row_map, L_entries, L_values, U_row_map, U_entries, U_values); } static void run_test_spiluk() { @@ -349,8 +306,7 @@ struct SpilukTest { KernelHandle kh; - run_and_check_spiluk(kh, row_map, entries, values, - SPILUKAlgorithm::SEQLVLSCHD_TP1, fill_lev); + run_and_check_spiluk(kh, row_map, entries, values, SPILUKAlgorithm::SEQLVLSCHD_TP1, fill_lev); } static void run_test_spiluk_blocks() { @@ -375,8 +331,7 @@ struct SpilukTest { KernelHandle kh; - Crs crs("crs for block spiluk test", nrows, nrows, nnz, values, row_map, - entries); + Crs crs("crs for block spiluk test", nrows, nrows, nnz, values, row_map, entries); std::vector block_sizes = {1, block_size}; @@ -391,8 +346,7 @@ struct SpilukTest { Kokkos::deep_copy(bentries, bsr.graph.entries); Kokkos::deep_copy(bvalues, bsr.values); - run_and_check_spiluk(kh, brow_map, bentries, bvalues, - SPILUKAlgorithm::SEQLVLSCHD_TP1, fill_lev, + run_and_check_spiluk(kh, brow_map, bentries, bvalues, SPILUKAlgorithm::SEQLVLSCHD_TP1, fill_lev, block_size_itr); } } @@ -403,9 +357,8 @@ struct SpilukTest { constexpr auto diagDominance = 2; size_type nnz = 10 * nrows; - auto A = - KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( - nrows, nrows, nnz, 0, lno_t(0.01 * nrows), diagDominance); + auto A = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix(nrows, nrows, nnz, 0, + lno_t(0.01 * nrows), diagDominance); KokkosSparse::sort_crs_matrix(A); @@ -420,8 +373,7 @@ struct SpilukTest { for (lno_t fill_lev = 0; fill_lev < 4; ++fill_lev) { KernelHandle kh; - run_and_check_spiluk(kh, row_map, entries, values, - SPILUKAlgorithm::SEQLVLSCHD_TP1, fill_lev); + run_and_check_spiluk(kh, row_map, entries, values, SPILUKAlgorithm::SEQLVLSCHD_TP1, fill_lev); } } @@ -437,9 +389,8 @@ struct SpilukTest { // const size_type block_size = 10; size_type nnz = 10 * nrows; - auto A = - KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( - nrows, nrows, nnz, 0, lno_t(0.01 * nrows), diagDominance); + auto A = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix(nrows, nrows, nnz, 0, + lno_t(0.01 * nrows), diagDominance); KokkosSparse::sort_crs_matrix(A); @@ -460,8 +411,7 @@ struct SpilukTest { for (lno_t fill_lev = 0; fill_lev < 4; ++fill_lev) { KernelHandle kh; - run_and_check_spiluk(kh, brow_map, bentries, bvalues, - SPILUKAlgorithm::SEQLVLSCHD_TP1, fill_lev, + run_and_check_spiluk(kh, brow_map, bentries, bvalues, SPILUKAlgorithm::SEQLVLSCHD_TP1, fill_lev, block_size); } } @@ -476,16 +426,14 @@ struct SpilukTest { int exec_concurrency = execution_space().concurrency(); if (exec_concurrency < nstreams) { run_streams_test = false; - std::cout << " Skip stream test: concurrency = " << exec_concurrency - << std::endl; + std::cout << " Skip stream test: concurrency = " << exec_concurrency << std::endl; } } #endif if (!run_streams_test) return; std::vector weights(nstreams, 1); - std::vector instances = - Kokkos::Experimental::partition_space(execution_space(), weights); + std::vector instances = Kokkos::Experimental::partition_space(execution_space(), weights); std::vector kh_v(nstreams); std::vector kh_ptr_v(nstreams); @@ -545,9 +493,8 @@ struct SpilukTest { U_entries_v[i] = EntriesType("U_entries", spiluk_handle->get_nnzU()); // Symbolic phase - spiluk_symbolic(kh_ptr_v[i], fill_lev, A_row_map_v[i], A_entries_v[i], - L_row_map_v[i], L_entries_v[i], U_row_map_v[i], - U_entries_v[i], nstreams); + spiluk_symbolic(kh_ptr_v[i], fill_lev, A_row_map_v[i], A_entries_v[i], L_row_map_v[i], L_entries_v[i], + U_row_map_v[i], U_entries_v[i], nstreams); Kokkos::fence(); @@ -558,25 +505,21 @@ struct SpilukTest { } // Done handle creation and spiluk_symbolic on all streams // Numeric phase - spiluk_numeric_streams(instances, kh_ptr_v, fill_lev, A_row_map_v, - A_entries_v, A_values_v, L_row_map_v, L_entries_v, - L_values_v, U_row_map_v, U_entries_v, U_values_v); + spiluk_numeric_streams(instances, kh_ptr_v, fill_lev, A_row_map_v, A_entries_v, A_values_v, L_row_map_v, + L_entries_v, L_values_v, U_row_map_v, U_entries_v, U_values_v); for (int i = 0; i < nstreams; i++) instances[i].fence(); // Checking for (int i = 0; i < nstreams; i++) { - check_result(A_row_map_v[i], A_entries_v[i], A_values_v[i], - L_row_map_v[i], L_entries_v[i], L_values_v[i], - U_row_map_v[i], U_entries_v[i], U_values_v[i], - fill_lev); + check_result(A_row_map_v[i], A_entries_v[i], A_values_v[i], L_row_map_v[i], L_entries_v[i], L_values_v[i], + U_row_map_v[i], U_entries_v[i], U_values_v[i], fill_lev); kh_v[i].destroy_spiluk_handle(); } } - static void run_test_spiluk_streams_blocks(SPILUKAlgorithm test_algo, - int nstreams) { + static void run_test_spiluk_streams_blocks(SPILUKAlgorithm test_algo, int nstreams) { // Workaround for OpenMP: skip tests if concurrency < nstreams because of // not enough resource to partition bool run_streams_test = true; @@ -585,16 +528,14 @@ struct SpilukTest { int exec_concurrency = execution_space().concurrency(); if (exec_concurrency < nstreams) { run_streams_test = false; - std::cout << " Skip stream test: concurrency = " << exec_concurrency - << std::endl; + std::cout << " Skip stream test: concurrency = " << exec_concurrency << std::endl; } } #endif if (!run_streams_test) return; std::vector weights(nstreams, 1); - std::vector instances = - Kokkos::Experimental::partition_space(execution_space(), weights); + std::vector instances = Kokkos::Experimental::partition_space(execution_space(), weights); std::vector kh_v(nstreams); std::vector kh_ptr_v(nstreams); @@ -622,8 +563,7 @@ struct SpilukTest { ASSERT_EQ(nrows % block_size, 0); // Convert to BSR - Crs crs("crs for block spiluk test", nrows, nrows, values.extent(0), values, - row_map, entries); + Crs crs("crs for block spiluk test", nrows, nrows, values.extent(0), values, row_map, entries); Bsr bsr(crs, block_size); // Pull out views from BSR @@ -660,8 +600,7 @@ struct SpilukTest { // Create handle kh_v[i] = KernelHandle(); - kh_v[i].create_spiluk_handle(test_algo, bnrows, 4 * bnrows, 4 * bnrows, - block_size); + kh_v[i].create_spiluk_handle(test_algo, bnrows, 4 * bnrows, 4 * bnrows, block_size); kh_ptr_v[i] = &kh_v[i]; auto spiluk_handle = kh_v[i].get_spiluk_handle(); @@ -673,33 +612,27 @@ struct SpilukTest { U_entries_v[i] = EntriesType("U_entries", spiluk_handle->get_nnzU()); // Symbolic phase - spiluk_symbolic(kh_ptr_v[i], fill_lev, A_row_map_v[i], A_entries_v[i], - L_row_map_v[i], L_entries_v[i], U_row_map_v[i], - U_entries_v[i], nstreams); + spiluk_symbolic(kh_ptr_v[i], fill_lev, A_row_map_v[i], A_entries_v[i], L_row_map_v[i], L_entries_v[i], + U_row_map_v[i], U_entries_v[i], nstreams); Kokkos::fence(); Kokkos::resize(L_entries_v[i], spiluk_handle->get_nnzL()); Kokkos::resize(U_entries_v[i], spiluk_handle->get_nnzU()); - L_values_v[i] = - ValuesType("L_values", spiluk_handle->get_nnzL() * block_items); - U_values_v[i] = - ValuesType("U_values", spiluk_handle->get_nnzU() * block_items); + L_values_v[i] = ValuesType("L_values", spiluk_handle->get_nnzL() * block_items); + U_values_v[i] = ValuesType("U_values", spiluk_handle->get_nnzU() * block_items); } // Done handle creation and spiluk_symbolic on all streams // Numeric phase - spiluk_numeric_streams(instances, kh_ptr_v, fill_lev, A_row_map_v, - A_entries_v, A_values_v, L_row_map_v, L_entries_v, - L_values_v, U_row_map_v, U_entries_v, U_values_v); + spiluk_numeric_streams(instances, kh_ptr_v, fill_lev, A_row_map_v, A_entries_v, A_values_v, L_row_map_v, + L_entries_v, L_values_v, U_row_map_v, U_entries_v, U_values_v); for (int i = 0; i < nstreams; i++) instances[i].fence(); // Checking for (int i = 0; i < nstreams; i++) { - check_result(A_row_map_v[i], A_entries_v[i], A_values_v[i], - L_row_map_v[i], L_entries_v[i], L_values_v[i], - U_row_map_v[i], U_entries_v[i], U_values_v[i], - fill_lev, block_size); + check_result(A_row_map_v[i], A_entries_v[i], A_values_v[i], L_row_map_v[i], L_entries_v[i], L_values_v[i], + U_row_map_v[i], U_entries_v[i], U_values_v[i], fill_lev, block_size); kh_v[i].destroy_spiluk_handle(); } @@ -721,9 +654,7 @@ struct SpilukTest { if (UseBlocks) { // Skip test if not on host. block trsv only works on host - static constexpr bool is_host = - std::is_same::value; + static constexpr bool is_host = std::is_same::value; if (!is_host) { return; } @@ -733,17 +664,15 @@ struct SpilukTest { EntriesType bentries; ValuesType bvalues; - size_type nnz = 10 * nrows; - auto A_unblocked = - KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( - nrows, nrows, nnz, 0, lno_t(0.01 * nrows), diagDominance); + size_type nnz = 10 * nrows; + auto A_unblocked = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix( + nrows, nrows, nnz, 0, lno_t(0.01 * nrows), diagDominance); KokkosSparse::sort_crs_matrix(A_unblocked); std::vector block_sizes_blocked = {1, 2, 4, 10}; std::vector block_sizes_unblocked = {1}; - std::vector block_sizes = - UseBlocks ? block_sizes_blocked : block_sizes_unblocked; + std::vector block_sizes = UseBlocks ? block_sizes_blocked : block_sizes_unblocked; for (auto block_size : block_sizes) { // Convert to BSR if block enabled @@ -762,21 +691,15 @@ struct SpilukTest { kh.create_gmres_handle(m, tol); auto gmres_handle = kh.get_gmres_handle(); gmres_handle->set_verbose(verbose); - using GMRESHandle = - typename std::remove_reference::type; + using GMRESHandle = typename std::remove_reference::type; for (lno_t fill_lev = 0; fill_lev < 4; ++fill_lev) { - const auto [L_row_map, L_entries, L_values, U_row_map, U_entries, - U_values] = - run_and_check_spiluk(kh, brow_map, bentries, bvalues, - SPILUKAlgorithm::SEQLVLSCHD_TP1, - fill_lev, block_size); + const auto [L_row_map, L_entries, L_values, U_row_map, U_entries, U_values] = run_and_check_spiluk( + kh, brow_map, bentries, bvalues, SPILUKAlgorithm::SEQLVLSCHD_TP1, fill_lev, block_size); // Create L, U - auto L = make_matrix("L_Mtx", L_row_map, L_entries, - L_values, block_size); - auto U = make_matrix("U_Mtx", U_row_map, U_entries, - U_values, block_size); + auto L = make_matrix("L_Mtx", L_row_map, L_entries, L_values, block_size); + auto U = make_matrix("U_Mtx", U_row_map, U_entries, U_values, block_size); // Set initial vectors: ValuesType X("X", nrows); // Solution and initial guess @@ -806,8 +729,7 @@ struct SpilukTest { EXPECT_EQ(conv_flag, GMRESHandle::Flag::Conv); if (TEST_SPILUK_VERBOSE_LEVEL > 0) { - std::cout << "Without LUPrec, with block_size=" << block_size - << ", converged in " << num_iters_plain + std::cout << "Without LUPrec, with block_size=" << block_size << ", converged in " << num_iters_plain << " steps with endres=" << endRes << std::endl; } } @@ -818,8 +740,7 @@ struct SpilukTest { gmres_handle->set_verbose(verbose); // Make precond. - KokkosSparse::Experimental::LUPrec - myPrec(L, U); + KokkosSparse::Experimental::LUPrec myPrec(L, U, UseBlocks ? block_size : 0); // reset X for next gmres call Kokkos::deep_copy(X, 0.0); @@ -840,10 +761,8 @@ struct SpilukTest { EXPECT_LT(num_iters_precond, num_iters_plain); if (TEST_SPILUK_VERBOSE_LEVEL > 0) { - std::cout << "With LUPrec, with block_size=" << block_size - << ", and fill_level=" << fill_lev << ", converged in " - << num_iters_precond << " steps with endres=" << endRes - << std::endl; + std::cout << "With LUPrec, with block_size=" << block_size << ", and fill_level=" << fill_lev + << ", converged in " << num_iters_precond << " steps with endres=" << endRes << std::endl; } } } @@ -853,8 +772,7 @@ struct SpilukTest { } // namespace Test -template +template void test_spiluk() { using TestStruct = Test::SpilukTest; TestStruct::run_test_spiluk(); @@ -865,8 +783,7 @@ void test_spiluk() { TestStruct::template run_test_spiluk_precond(); } -template +template void test_spiluk_streams() { using TestStruct = Test::SpilukTest; @@ -875,21 +792,16 @@ void test_spiluk_streams() { TestStruct::run_test_spiluk_streams(SPILUKAlgorithm::SEQLVLSCHD_TP1, 3); TestStruct::run_test_spiluk_streams(SPILUKAlgorithm::SEQLVLSCHD_TP1, 4); - TestStruct::run_test_spiluk_streams_blocks(SPILUKAlgorithm::SEQLVLSCHD_TP1, - 1); - TestStruct::run_test_spiluk_streams_blocks(SPILUKAlgorithm::SEQLVLSCHD_TP1, - 2); - TestStruct::run_test_spiluk_streams_blocks(SPILUKAlgorithm::SEQLVLSCHD_TP1, - 3); - TestStruct::run_test_spiluk_streams_blocks(SPILUKAlgorithm::SEQLVLSCHD_TP1, - 4); + TestStruct::run_test_spiluk_streams_blocks(SPILUKAlgorithm::SEQLVLSCHD_TP1, 1); + TestStruct::run_test_spiluk_streams_blocks(SPILUKAlgorithm::SEQLVLSCHD_TP1, 2); + TestStruct::run_test_spiluk_streams_blocks(SPILUKAlgorithm::SEQLVLSCHD_TP1, 3); + TestStruct::run_test_spiluk_streams_blocks(SPILUKAlgorithm::SEQLVLSCHD_TP1, 4); } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - sparse##_##spiluk##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_spiluk(); \ - test_spiluk_streams(); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##spiluk##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_spiluk(); \ + test_spiluk_streams(); \ } #define NO_TEST_COMPLEX diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spmv.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spmv.hpp index 88927202dad1..43a9bd11e986 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spmv.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spmv.hpp @@ -23,6 +23,7 @@ #include #include #include +#include #include "KokkosKernels_default_types.hpp" @@ -66,37 +67,27 @@ namespace Test { template struct fSPMV { using value_type = int; - using AT = Kokkos::ArithTraits; - using ATM = Kokkos::ArithTraits; - using mag_type = typename AT::mag_type; + using AT = Kokkos::ArithTraits; + using ATM = Kokkos::ArithTraits; + using mag_type = typename AT::mag_type; VectorType0 expected_y; VectorType1 y; mag_type eps; mag_type max_val; - fSPMV(const VectorType0 &_ex_y, const VectorType1 &_y, const mag_type _eps, - const mag_type _max_val = ATM::one()) - : expected_y(_ex_y), - y(_y), - eps(AT::abs(_eps)), - max_val(AT::abs(_max_val)) {} + fSPMV(const VectorType0 &_ex_y, const VectorType1 &_y, const mag_type _eps, const mag_type _max_val = ATM::one()) + : expected_y(_ex_y), y(_y), eps(AT::abs(_eps)), max_val(AT::abs(_max_val)) {} KOKKOS_INLINE_FUNCTION void operator()(const int i, value_type &err) const { const mag_type error = AT::abs(expected_y(i) - y(i)); - if (error > eps * max_val) { + // only one is NaN or error is too large + if ((Kokkos::isnan(AT::abs(expected_y(i))) ^ Kokkos::isnan(AT::abs(y(i)))) || (error > eps * max_val)) { err++; -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "expected_y(%d)=%f, y(%d)=%f err=%e, max_error=%e\n", i, - AT::abs(expected_y(i)), i, AT::abs(y(i)), error, eps * max_val); -#else - Kokkos::printf("expected_y(%d)=%f, y(%d)=%f err=%e, max_error=%e\n", i, - AT::abs(expected_y(i)), i, AT::abs(y(i)), error, - eps * max_val); -#endif + Kokkos::printf("expected_y(%d)=%f, y(%d)=%f err=%e, max_error=%e\n", i, AT::abs(expected_y(i)), i, AT::abs(y(i)), + error, eps * max_val); } } @@ -106,16 +97,8 @@ struct fSPMV { if (error > eps * max_val) { err++; -#if KOKKOS_VERSION < 40199 - KOKKOS_IMPL_DO_NOT_USE_PRINTF( - "expected_y(%d,%d)=%f, y(%d,%d)=%f err=%e, max_error=%e\n", i, j, - AT::abs(expected_y(i, j)), i, j, AT::abs(y(i, j)), error, - eps * max_val); -#else - Kokkos::printf("expected_y(%d,%d)=%f, y(%d,%d)=%f err=%e, max_error=%e\n", - i, j, AT::abs(expected_y(i, j)), i, j, AT::abs(y(i, j)), - error, eps * max_val); -#endif + Kokkos::printf("expected_y(%d,%d)=%f, y(%d,%d)=%f err=%e, max_error=%e\n", i, j, AT::abs(expected_y(i, j)), i, j, + AT::abs(y(i, j)), error, eps * max_val); } } }; @@ -123,28 +106,25 @@ struct fSPMV { template void sequential_spmv(crsMat_t input_mat, x_vector_type x, y_vector_type y, typename y_vector_type::non_const_value_type alpha, - typename y_vector_type::non_const_value_type beta, - const std::string &mode = "N") { + typename y_vector_type::non_const_value_type beta, const std::string &mode = "N") { using graph_t = typename crsMat_t::StaticCrsGraphType; using size_type_view_t = typename graph_t::row_map_type; using lno_view_t = typename graph_t::entries_type; using scalar_view_t = typename crsMat_t::values_type::non_const_type; + using y_scalar_t = typename y_vector_type::non_const_value_type; using size_type = typename size_type_view_t::non_const_value_type; using lno_t = typename lno_view_t::non_const_value_type; using scalar_t = typename scalar_view_t::non_const_value_type; using KAT = Kokkos::ArithTraits; - typename scalar_view_t::HostMirror h_values = - Kokkos::create_mirror_view(input_mat.values); + typename scalar_view_t::HostMirror h_values = Kokkos::create_mirror_view(input_mat.values); Kokkos::deep_copy(h_values, input_mat.values); - typename lno_view_t::HostMirror h_entries = - Kokkos::create_mirror_view(input_mat.graph.entries); + typename lno_view_t::HostMirror h_entries = Kokkos::create_mirror_view(input_mat.graph.entries); Kokkos::deep_copy(h_entries, input_mat.graph.entries); - typename size_type_view_t::HostMirror h_rowmap = - Kokkos::create_mirror_view(input_mat.graph.row_map); + typename size_type_view_t::HostMirror h_rowmap = Kokkos::create_mirror_view(input_mat.graph.row_map); Kokkos::deep_copy(h_rowmap, input_mat.graph.row_map); Kokkos::fence(); @@ -158,7 +138,13 @@ void sequential_spmv(crsMat_t input_mat, x_vector_type x, y_vector_type y, lno_t nr = input_mat.numRows(); // first, scale y by beta - for (size_t i = 0; i < h_y.extent(0); i++) h_y(i) *= beta; + for (size_t i = 0; i < h_y.extent(0); i++) { + if (beta == y_scalar_t(0)) { + h_y(i) = y_scalar_t(0); + } else { + h_y(i) *= beta; + } + } // then go through the matrix and accumulate the matrix-vector product for (lno_t row = 0; row < nr; ++row) { @@ -179,14 +165,11 @@ void sequential_spmv(crsMat_t input_mat, x_vector_type x, y_vector_type y, Kokkos::fence(); } -template -void check_spmv( - handle_t *handle, crsMat_t input_mat, x_vector_type x, y_vector_type y, - typename y_vector_type::non_const_value_type alpha, - typename y_vector_type::non_const_value_type beta, const std::string &mode, - typename Kokkos::ArithTraits::mag_type - max_val) { +template +void check_spmv(handle_t *handle, crsMat_t input_mat, x_vector_type x, y_vector_type y, + typename y_vector_type::non_const_value_type alpha, typename y_vector_type::non_const_value_type beta, + const std::string &mode, + typename Kokkos::ArithTraits::mag_type max_val) { EXPECT_TRUE(mode.size() == 1); using ExecSpace = typename crsMat_t::execution_space; @@ -195,49 +178,40 @@ void check_spmv( using y_value_trait = Kokkos::ArithTraits; using y_value_mag_type = typename y_value_trait::mag_type; - const y_value_mag_type eps = - 10 * Kokkos::ArithTraits::eps(); - bool transposed = (mode == "T") || (mode == "H"); - y_vector_type expected_y( - "expected", transposed ? input_mat.numCols() : input_mat.numRows()); + const y_value_mag_type eps = 10 * Kokkos::ArithTraits::eps(); + + y_vector_type actual_y("actual_y", y.extent(0)); + y_vector_type expected_y("expected_y", y.extent(0)); Kokkos::deep_copy(expected_y, y); + Kokkos::deep_copy(actual_y, y); Kokkos::fence(); sequential_spmv(input_mat, x, expected_y, alpha, beta, mode); bool threw = false; std::string msg; try { - KokkosSparse::spmv(handle, mode.data(), alpha, input_mat, x, beta, y); + KokkosSparse::spmv(handle, mode.data(), alpha, input_mat, x, beta, actual_y); Kokkos::fence(); } catch (std::exception &e) { threw = true; msg = e.what(); } - ASSERT_FALSE(threw) << "KokkosSparse::Test::spmv 1D, mode " << mode - << ": threw exception:\n" - << msg << '\n'; + ASSERT_FALSE(threw) << "KokkosSparse::Test::spmv 1D, mode " << mode << ": threw exception:\n" << msg << '\n'; + int num_errors = 0; - Kokkos::parallel_reduce( - "KokkosSparse::Test::spmv", my_exec_space(0, y.extent(0)), - fSPMV(expected_y, y, eps, max_val), - num_errors); + Kokkos::parallel_reduce("KokkosSparse::Test::spmv", my_exec_space(0, actual_y.extent(0)), + fSPMV(expected_y, actual_y, eps, max_val), num_errors); if (num_errors > 0) - printf("KokkosSparse::Test::spmv: %i errors of %i with params: %lf %lf\n", - num_errors, y.extent_int(0), y_value_trait::abs(alpha), - y_value_trait::abs(beta)); + printf("KokkosSparse::Test::spmv: %i errors of %i with params: %lf %lf\n", num_errors, y.extent_int(0), + y_value_trait::abs(alpha), y_value_trait::abs(beta)); EXPECT_TRUE(num_errors == 0); } -template -void check_spmv_mv( - Handle *handle, crsMat_t input_mat, x_vector_type x, y_vector_type y, - y_vector_type expected_y, - typename y_vector_type::non_const_value_type alpha, - typename y_vector_type::non_const_value_type beta, int numMV, - const std::string &mode, - typename Kokkos::ArithTraits::mag_type - max_val) { +template +void check_spmv_mv(Handle *handle, crsMat_t input_mat, x_vector_type x, y_vector_type y, y_vector_type expected_y, + typename y_vector_type::non_const_value_type alpha, + typename y_vector_type::non_const_value_type beta, int numMV, const std::string &mode, + typename Kokkos::ArithTraits::mag_type max_val) { EXPECT_TRUE(mode.size() == 1); using ExecSpace = typename crsMat_t::execution_space; @@ -249,8 +223,7 @@ void check_spmv_mv( // y is the quantity being tested here, // so let us use y_value_type to determine // the appropriate tolerance precision. - const y_value_mag_type eps = - 10 * Kokkos::ArithTraits::eps(); + const y_value_mag_type eps = 10 * Kokkos::ArithTraits::eps(); Kokkos::deep_copy(expected_y, y); @@ -265,9 +238,7 @@ void check_spmv_mv( threw = true; msg = e.what(); } - ASSERT_FALSE(threw) << "KokkosSparse::Test::spmv 2D, mode " << mode - << ": threw exception:\n" - << msg << '\n'; + ASSERT_FALSE(threw) << "KokkosSparse::Test::spmv 2D, mode " << mode << ": threw exception:\n" << msg << '\n'; for (int i = 0; i < numMV; ++i) { auto x_i = Kokkos::subview(x, Kokkos::ALL(), i); @@ -279,30 +250,21 @@ void check_spmv_mv( auto y_spmv = Kokkos::subview(y, Kokkos::ALL(), i); int num_errors = 0; - Kokkos::parallel_reduce( - "KokkosSparse::Test::spmv_mv", my_exec_space(0, y_i.extent(0)), - fSPMV(y_i, y_spmv, eps, max_val), - num_errors); + Kokkos::parallel_reduce("KokkosSparse::Test::spmv_mv", my_exec_space(0, y_i.extent(0)), + fSPMV(y_i, y_spmv, eps, max_val), num_errors); if (num_errors > 0) - std::cout << "KokkosSparse::Test::spmv_mv: " << num_errors - << " errors of " << y_i.extent_int(0) << " for mv " << i - << " (alpha=" << alpha << ", beta=" << beta - << ", mode = " << mode << ")\n"; + std::cout << "KokkosSparse::Test::spmv_mv: " << num_errors << " errors of " << y_i.extent_int(0) << " for mv " + << i << " (alpha=" << alpha << ", beta=" << beta << ", mode = " << mode << ")\n"; EXPECT_TRUE(num_errors == 0); } } template -void check_spmv_struct( - const crsMat_t input_mat, const int stencil_type, - const Kokkos::View - structure, - x_vector_type x, y_vector_type y, - typename y_vector_type::non_const_value_type alpha, - typename y_vector_type::non_const_value_type beta, - typename Kokkos::ArithTraits::mag_type - max_val) { +void check_spmv_struct(const crsMat_t input_mat, const int stencil_type, + const Kokkos::View structure, + x_vector_type x, y_vector_type y, typename y_vector_type::non_const_value_type alpha, + typename y_vector_type::non_const_value_type beta, + typename Kokkos::ArithTraits::mag_type max_val) { using ExecSpace = typename crsMat_t::execution_space; using my_exec_space = Kokkos::RangePolicy; using y_value_type = typename y_vector_type::non_const_value_type; @@ -319,35 +281,27 @@ void check_spmv_struct( Kokkos::fence(); sequential_spmv(input_mat, x, expected_y, alpha, beta); - KokkosSparse::Experimental::spmv_struct("N", stencil_type, structure, alpha, - input_mat, x, beta, y); + KokkosSparse::Experimental::spmv_struct("N", stencil_type, structure, alpha, input_mat, x, beta, y); int num_errors = 0; - Kokkos::parallel_reduce( - "KokkosKernels::UnitTests::spmv_struct", my_exec_space(0, y.extent(0)), - fSPMV(expected_y, y, eps, max_val), - num_errors); + Kokkos::parallel_reduce("KokkosKernels::UnitTests::spmv_struct", my_exec_space(0, y.extent(0)), + fSPMV(expected_y, y, eps, max_val), num_errors); if (num_errors > 0) { printf( "KokkosKernels::UnitTests::spmv_struct: %i errors of %i with params: " "%d %lf %lf\n", - num_errors, y.extent_int(0), stencil_type, y_value_trait::abs(alpha), - y_value_trait::abs(beta)); + num_errors, y.extent_int(0), stencil_type, y_value_trait::abs(alpha), y_value_trait::abs(beta)); } EXPECT_TRUE(num_errors == 0); } // check_spmv_struct template -void check_spmv_mv_struct( - const crsMat_t input_mat, const int stencil_type, - const Kokkos::View - structure, - x_vector_type x, y_vector_type y, y_vector_type expected_y, - typename y_vector_type::non_const_value_type alpha, - typename y_vector_type::non_const_value_type beta, int numMV, - typename Kokkos::ArithTraits::mag_type - max_val) { +void check_spmv_mv_struct(const crsMat_t input_mat, const int stencil_type, + const Kokkos::View structure, + x_vector_type x, y_vector_type y, y_vector_type expected_y, + typename y_vector_type::non_const_value_type alpha, + typename y_vector_type::non_const_value_type beta, int numMV, + typename Kokkos::ArithTraits::mag_type max_val) { using ExecSpace = typename crsMat_t::execution_space; using my_exec_space = Kokkos::RangePolicy; using y_value_type = typename y_vector_type::non_const_value_type; @@ -361,8 +315,7 @@ void check_spmv_mv_struct( Kokkos::deep_copy(expected_y, y); Kokkos::fence(); - KokkosSparse::Experimental::spmv_struct("N", stencil_type, structure, alpha, - input_mat, x, beta, y); + KokkosSparse::Experimental::spmv_struct("N", stencil_type, structure, alpha, input_mat, x, beta, y); for (int vectorIdx = 0; vectorIdx < numMV; ++vectorIdx) { auto x_i = Kokkos::subview(x, Kokkos::ALL(), vectorIdx); @@ -373,17 +326,13 @@ void check_spmv_mv_struct( auto y_spmv = Kokkos::subview(y, Kokkos::ALL(), vectorIdx); int num_errors = 0; - Kokkos::parallel_reduce( - "KokkosKernels::UnitTests::spmv_mv_struct", - my_exec_space(0, y.extent(0)), - fSPMV(y_i, y_spmv, eps, max_val), - num_errors); + Kokkos::parallel_reduce("KokkosKernels::UnitTests::spmv_mv_struct", my_exec_space(0, y.extent(0)), + fSPMV(y_i, y_spmv, eps, max_val), num_errors); if (num_errors > 0) printf( "KokkosKernels::UnitTests::spmv_mv_struct: %i errors of %i with " "params: %d %lf %lf, in vector %i\n", - num_errors, y.extent_int(0), stencil_type, y_value_trait::abs(alpha), - y_value_trait::abs(beta), vectorIdx); + num_errors, y.extent_int(0), stencil_type, y_value_trait::abs(alpha), y_value_trait::abs(beta), vectorIdx); EXPECT_TRUE(num_errors == 0); } } // check_spmv_mv_struct @@ -405,18 +354,16 @@ Kokkos::complex randomUpperBound>(int mag) { return Kokkos::complex(mag, mag); } -template -void test_spmv(KokkosSparse::SPMVAlgorithm algo, lno_t numRows, size_type nnz, - lno_t bandwidth, lno_t row_size_variance, bool heavy) { - using crsMat_t = typename KokkosSparse::CrsMatrix; +template +void test_spmv(KokkosSparse::SPMVAlgorithm algo, lno_t numRows, size_type nnz, lno_t bandwidth, lno_t row_size_variance, + bool heavy) { + using crsMat_t = typename KokkosSparse::CrsMatrix; using scalar_view_t = typename crsMat_t::values_type::non_const_type; using x_vector_type = scalar_view_t; using y_vector_type = scalar_view_t; using mag_t = typename Kokkos::ArithTraits::mag_type; - using handle_t = - KokkosSparse::SPMVHandle; + using handle_t = KokkosSparse::SPMVHandle; + using y_policy = Kokkos::RangePolicy; constexpr mag_t max_x = static_cast(1); constexpr mag_t max_y = static_cast(1); @@ -424,32 +371,46 @@ void test_spmv(KokkosSparse::SPMVAlgorithm algo, lno_t numRows, size_type nnz, lno_t numCols = numRows; - crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( - numRows, numCols, nnz, row_size_variance, bandwidth); + crsMat_t input_mat = + KokkosSparse::Impl::kk_generate_sparse_matrix(numRows, numCols, nnz, row_size_variance, bandwidth); lno_t nr = input_mat.numRows(); lno_t nc = input_mat.numCols(); - const lno_t max_nnz_per_row = - numRows ? (nnz / numRows + row_size_variance) : 0; + const lno_t max_nnz_per_row = numRows ? (nnz / numRows + row_size_variance) : 0; + // Create vectors with and without nans x_vector_type input_x("x", nc); - y_vector_type output_y("y", nr); x_vector_type input_xt("x", nr); - y_vector_type output_yt("y", nc); + y_vector_type input_y("y", nr), input_y_nans("y_nans", nr); + y_vector_type input_yt("y", nc), input_yt_nans("y_nans", nc); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); Kokkos::fill_random(input_x, rand_pool, randomUpperBound(max_x)); - Kokkos::fill_random(output_y, rand_pool, randomUpperBound(max_y)); + Kokkos::fill_random(input_y, rand_pool, randomUpperBound(max_y)); Kokkos::fill_random(input_xt, rand_pool, randomUpperBound(max_x)); - Kokkos::fill_random(output_yt, rand_pool, randomUpperBound(max_y)); + Kokkos::fill_random(input_yt, rand_pool, randomUpperBound(max_y)); + + // sprinkle in some nans + Kokkos::deep_copy(input_y_nans, input_y); + Kokkos::deep_copy(input_yt_nans, input_yt); + Kokkos::parallel_for( + y_policy(0, input_y_nans.extent(0)), KOKKOS_LAMBDA(const size_t i) { + if (0 == (i % 19)) { + input_y_nans(i) = KokkosKernels::Impl::quiet_NaN(); + } + }); + Kokkos::parallel_for( + y_policy(0, input_yt_nans.extent(0)), KOKKOS_LAMBDA(const size_t i) { + if (0 == (i % 23)) { + input_yt_nans(i) = KokkosKernels::Impl::quiet_NaN(); + } + }); // We also need to bound the values // in the matrix to bound the cancellations // coming from arithmetic operations. - Kokkos::fill_random(input_mat.values, rand_pool, - randomUpperBound(max_val)); + Kokkos::fill_random(input_mat.values, rand_pool, randomUpperBound(max_val)); std::vector nonTransModes = {"N"}; std::vector transModes = {"T"}; @@ -468,10 +429,11 @@ void test_spmv(KokkosSparse::SPMVAlgorithm algo, lno_t numRows, size_type nnz, for (auto mode : nonTransModes) { for (double alpha : testAlphaBeta) { for (double beta : testAlphaBeta) { - mag_t max_error = - beta * max_y + alpha * max_nnz_per_row * max_val * max_x; - Test::check_spmv(&handle, input_mat, input_x, output_y, alpha, beta, - mode, max_error); + mag_t max_error = beta * max_y + alpha * max_nnz_per_row * max_val * max_x; + Test::check_spmv(&handle, input_mat, input_x, input_y, alpha, beta, mode, max_error); + if (0 == beta) { + Test::check_spmv(&handle, input_mat, input_x, input_y_nans, alpha, beta, mode, max_error); + } } } } @@ -479,34 +441,29 @@ void test_spmv(KokkosSparse::SPMVAlgorithm algo, lno_t numRows, size_type nnz, for (double alpha : testAlphaBeta) { for (double beta : testAlphaBeta) { // hoping the transpose won't have a long column... - mag_t max_error = - beta * max_y + alpha * max_nnz_per_row * max_val * max_x; - Test::check_spmv(&handle, input_mat, input_xt, output_yt, alpha, beta, - mode, max_error); + mag_t max_error = beta * max_y + alpha * max_nnz_per_row * max_val * max_x; + Test::check_spmv(&handle, input_mat, input_xt, input_yt, alpha, beta, mode, max_error); + if (0 == beta) { + Test::check_spmv(&handle, input_mat, input_x, input_yt_nans, alpha, beta, mode, max_error); + } } } } } -template -void test_spmv_algorithms(lno_t numRows, size_type nnz, lno_t bandwidth, - lno_t row_size_variance, bool heavy) { +template +void test_spmv_algorithms(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t row_size_variance, bool heavy) { using namespace KokkosSparse; // Here, SPMV_MERGE_PATH will test a TPL's algorithm for imbalanced matrices // if available (like cuSPARSE ALG2). SPMV_NATIVE_MERGE_PATH will always call // the KokkosKernels implmentation of merge path. - for (SPMVAlgorithm algo : - {SPMV_DEFAULT, SPMV_NATIVE, SPMV_MERGE_PATH, SPMV_NATIVE_MERGE_PATH}) { - test_spmv(algo, numRows, nnz, bandwidth, - row_size_variance, heavy); + for (SPMVAlgorithm algo : {SPMV_DEFAULT, SPMV_NATIVE, SPMV_MERGE_PATH, SPMV_NATIVE_MERGE_PATH}) { + test_spmv(algo, numRows, nnz, bandwidth, row_size_variance, heavy); } } -template -void test_spmv_mv(lno_t numRows, size_type nnz, lno_t bandwidth, - lno_t row_size_variance, bool heavy, int numMV) { +template +void test_spmv_mv(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t row_size_variance, bool heavy, int numMV) { using mag_t = typename Kokkos::ArithTraits::mag_type; constexpr mag_t max_x = static_cast(1); @@ -515,12 +472,10 @@ void test_spmv_mv(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t numCols = numRows; - using crsMat_t = typename KokkosSparse::CrsMatrix; + using crsMat_t = typename KokkosSparse::CrsMatrix; using ViewTypeX = Kokkos::View; using ViewTypeY = Kokkos::View; - using handle_t = - KokkosSparse::SPMVHandle; + using handle_t = KokkosSparse::SPMVHandle; ViewTypeX b_x("A", numCols, numMV); ViewTypeY b_y("B", numRows, numMV); @@ -530,24 +485,21 @@ void test_spmv_mv(lno_t numRows, size_type nnz, lno_t bandwidth, ViewTypeY b_yt("B", numCols, numMV); ViewTypeY b_yt_copy("B", numCols, numMV); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); Kokkos::fill_random(b_x, rand_pool, randomUpperBound(max_x)); Kokkos::fill_random(b_y, rand_pool, randomUpperBound(max_y)); Kokkos::fill_random(b_xt, rand_pool, randomUpperBound(max_x)); Kokkos::fill_random(b_yt, rand_pool, randomUpperBound(max_y)); - crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( - numRows, numCols, nnz, row_size_variance, bandwidth); + crsMat_t input_mat = + KokkosSparse::Impl::kk_generate_sparse_matrix(numRows, numCols, nnz, row_size_variance, bandwidth); - const lno_t max_nnz_per_row = - numRows ? (nnz / numRows + row_size_variance) : 0; + const lno_t max_nnz_per_row = numRows ? (nnz / numRows + row_size_variance) : 0; // We also need to bound the values // in the matrix to bound the cancellations // coming from arithmetic operations. - Kokkos::fill_random(input_mat.values, rand_pool, - randomUpperBound(max_val)); + Kokkos::fill_random(input_mat.values, rand_pool, randomUpperBound(max_val)); Kokkos::deep_copy(b_y_copy, b_y); Kokkos::deep_copy(b_yt_copy, b_yt); @@ -565,10 +517,8 @@ void test_spmv_mv(lno_t numRows, size_type nnz, lno_t bandwidth, for (auto mode : nonTransModes) { for (double alpha : testAlphaBeta) { for (double beta : testAlphaBeta) { - mag_t max_error = - beta * max_y + alpha * max_nnz_per_row * max_val * max_x; - Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, alpha, beta, - numMV, mode, max_error); + mag_t max_error = beta * max_y + alpha * max_nnz_per_row * max_val * max_x; + Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, alpha, beta, numMV, mode, max_error); } } } @@ -576,47 +526,39 @@ void test_spmv_mv(lno_t numRows, size_type nnz, lno_t bandwidth, for (double alpha : testAlphaBeta) { for (double beta : testAlphaBeta) { // hoping the transpose won't have a long column... - mag_t max_error = - beta * max_y + alpha * max_nnz_per_row * max_val * max_x; - Test::check_spmv_mv(&handle, input_mat, b_xt, b_yt, b_yt_copy, alpha, - beta, numMV, mode, max_error); + mag_t max_error = beta * max_y + alpha * max_nnz_per_row * max_val * max_x; + Test::check_spmv_mv(&handle, input_mat, b_xt, b_yt, b_yt_copy, alpha, beta, numMV, mode, max_error); } } } } -template -void test_spmv_mv_heavy(lno_t numRows, lno_t numCols, size_type nnz, - lno_t bandwidth, lno_t row_size_variance, int numMV) { +template +void test_spmv_mv_heavy(lno_t numRows, lno_t numCols, size_type nnz, lno_t bandwidth, lno_t row_size_variance, + int numMV) { #if defined(KOKKOSKERNELS_ENABLE_TPL_ARMPL) || defined(KOKKOS_ARCH_A64FX) if (std::is_same>::value) { - std::cerr - << "TEST SKIPPED: See " - "https://github.com/kokkos/kokkos-kernels/issues/1331 for details." - << std::endl; + std::cerr << "TEST SKIPPED: See " + "https://github.com/kokkos/kokkos-kernels/issues/1331 for details." + << std::endl; return; } #endif // KOKKOSKERNELS_ENABLE_TPL_ARMPL || KOKKOS_ARCH_A64FX - using crsMat_t = typename KokkosSparse::CrsMatrix; + using crsMat_t = typename KokkosSparse::CrsMatrix; using ViewTypeX = Kokkos::View; using ViewTypeY = Kokkos::View; using mag_t = typename Kokkos::ArithTraits::mag_type; - using handle_t = - KokkosSparse::SPMVHandle; + using handle_t = KokkosSparse::SPMVHandle; constexpr mag_t max_x = static_cast(10); constexpr mag_t max_y = static_cast(10); constexpr mag_t max_val = static_cast(10); - crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( - numRows, numCols, nnz, row_size_variance, bandwidth); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + crsMat_t input_mat = + KokkosSparse::Impl::kk_generate_sparse_matrix(numRows, numCols, nnz, row_size_variance, bandwidth); + Kokkos::Random_XorShift64_Pool rand_pool(13718); - const lno_t max_nnz_per_row = - numRows ? (nnz / numRows + row_size_variance) : 0; + const lno_t max_nnz_per_row = numRows ? (nnz / numRows + row_size_variance) : 0; for (int nv = 1; nv <= numMV; nv++) { ViewTypeX b_x("A", numCols, nv); @@ -638,30 +580,24 @@ void test_spmv_mv_heavy(lno_t numRows, lno_t numCols, size_type nnz, handle_t handle; - Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, 1.0, 0.0, nv, - "N", max_nnz_per_row * max_val * max_x); - Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, 0.0, 1.0, nv, - "N", max_y); - Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, 1.0, 1.0, nv, - "N", max_y + max_nnz_per_row * max_val * max_x); - Test::check_spmv_mv(&handle, input_mat, b_xt, b_yt, b_yt_copy, 1.0, 0.0, nv, - "T", max_nnz_per_row * max_val * max_x); - Test::check_spmv_mv(&handle, input_mat, b_xt, b_yt, b_yt_copy, 0.0, 1.0, nv, - "T", max_y); + Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, 1.0, 0.0, nv, "N", max_nnz_per_row * max_val * max_x); + Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, 0.0, 1.0, nv, "N", max_y); + Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, 1.0, 1.0, nv, "N", + max_y + max_nnz_per_row * max_val * max_x); + Test::check_spmv_mv(&handle, input_mat, b_xt, b_yt, b_yt_copy, 1.0, 0.0, nv, "T", + max_nnz_per_row * max_val * max_x); + Test::check_spmv_mv(&handle, input_mat, b_xt, b_yt, b_yt_copy, 0.0, 1.0, nv, "T", max_y); // Testing all modes together, since matrix is square std::vector modes = {"N", "C", "T", "H"}; std::vector testAlphaBeta = {0.0, 1.0, -1.0, 2.5}; for (auto mode : modes) { for (double alpha : testAlphaBeta) { for (double beta : testAlphaBeta) { - mag_t max_error = - beta * max_y + alpha * max_nnz_per_row * max_val * max_x; + mag_t max_error = beta * max_y + alpha * max_nnz_per_row * max_val * max_x; if (*mode == 'N' || *mode == 'C') { - Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, alpha, - beta, nv, mode, max_error); + Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, alpha, beta, nv, mode, max_error); } else { - Test::check_spmv_mv(&handle, input_mat, b_xt, b_yt, b_yt_copy, - alpha, beta, nv, mode, max_error); + Test::check_spmv_mv(&handle, input_mat, b_xt, b_yt, b_yt_copy, alpha, beta, nv, mode, max_error); } } } @@ -671,8 +607,7 @@ void test_spmv_mv_heavy(lno_t numRows, lno_t numCols, size_type nnz, template void test_spmv_struct_1D(lno_t nx, lno_t leftBC, lno_t rightBC) { - using crsMat_t = typename KokkosSparse::CrsMatrix; + using crsMat_t = typename KokkosSparse::CrsMatrix; using scalar_view_t = typename crsMat_t::values_type::non_const_type; using x_vector_type = scalar_view_t; using y_vector_type = scalar_view_t; @@ -684,8 +619,7 @@ void test_spmv_struct_1D(lno_t nx, lno_t leftBC, lno_t rightBC) { Kokkos::View structure("Spmv Structure", 1); structure(0) = nx; - Kokkos::View mat_structure("Matrix Structure", - 1); + Kokkos::View mat_structure("Matrix Structure", 1); mat_structure(0, 0) = nx; if (leftBC == 1) { mat_structure(0, 1) = 1; @@ -694,8 +628,7 @@ void test_spmv_struct_1D(lno_t nx, lno_t leftBC, lno_t rightBC) { mat_structure(0, 2) = 1; } - crsMat_t input_mat = - Test::generate_structured_matrix1D(mat_structure); + crsMat_t input_mat = Test::generate_structured_matrix1D(mat_structure); lno_t nr = input_mat.numRows(); lno_t nc = input_mat.numCols(); @@ -703,27 +636,21 @@ void test_spmv_struct_1D(lno_t nx, lno_t leftBC, lno_t rightBC) { x_vector_type input_x("x", nc); y_vector_type output_y("y", nr); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); Kokkos::fill_random(input_x, rand_pool, max_x); Kokkos::fill_random(output_y, rand_pool, max_y); const mag_t max_error = max_y + 3 * max_val * max_x; - Test::check_spmv_struct(input_mat, 1, structure, input_x, output_y, 1.0, 0.0, - max_error); - Test::check_spmv_struct(input_mat, 1, structure, input_x, output_y, 0.0, 1.0, - max_error); - Test::check_spmv_struct(input_mat, 1, structure, input_x, output_y, 1.0, 1.0, - max_error); + Test::check_spmv_struct(input_mat, 1, structure, input_x, output_y, 1.0, 0.0, max_error); + Test::check_spmv_struct(input_mat, 1, structure, input_x, output_y, 0.0, 1.0, max_error); + Test::check_spmv_struct(input_mat, 1, structure, input_x, output_y, 1.0, 1.0, max_error); } template -void test_spmv_struct_2D(lno_t nx, lno_t ny, lno_t horizontalBC, - lno_t verticalBC) { - using crsMat_t = typename KokkosSparse::CrsMatrix; +void test_spmv_struct_2D(lno_t nx, lno_t ny, lno_t horizontalBC, lno_t verticalBC) { + using crsMat_t = typename KokkosSparse::CrsMatrix; using scalar_view_t = typename crsMat_t::values_type::non_const_type; using x_vector_type = scalar_view_t; using y_vector_type = scalar_view_t; @@ -735,8 +662,7 @@ void test_spmv_struct_2D(lno_t nx, lno_t ny, lno_t horizontalBC, Kokkos::View structure("Spmv Structure", 2); structure(0) = nx; structure(1) = ny; - Kokkos::View mat_structure("Matrix Structure", - 2); + Kokkos::View mat_structure("Matrix Structure", 2); mat_structure(0, 0) = nx; if (horizontalBC == 1 || horizontalBC == 3) { mat_structure(0, 1) = 1; @@ -752,10 +678,8 @@ void test_spmv_struct_2D(lno_t nx, lno_t ny, lno_t horizontalBC, mat_structure(1, 2) = 1; } - crsMat_t input_mat_FD = - Test::generate_structured_matrix2D("FD", mat_structure); - crsMat_t input_mat_FE = - Test::generate_structured_matrix2D("FE", mat_structure); + crsMat_t input_mat_FD = Test::generate_structured_matrix2D("FD", mat_structure); + crsMat_t input_mat_FE = Test::generate_structured_matrix2D("FE", mat_structure); lno_t nr = input_mat_FD.numRows(); lno_t nc = input_mat_FD.numCols(); @@ -763,8 +687,7 @@ void test_spmv_struct_2D(lno_t nx, lno_t ny, lno_t horizontalBC, x_vector_type input_x("x", nc); y_vector_type output_y("y", nr); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); Kokkos::fill_random(input_x, rand_pool, max_x); Kokkos::fill_random(output_y, rand_pool, max_y); @@ -772,31 +695,23 @@ void test_spmv_struct_2D(lno_t nx, lno_t ny, lno_t horizontalBC, { constexpr mag_t max_val = static_cast(4); constexpr mag_t max_error = max_y + 5 * max_val * max_x; - Test::check_spmv_struct(input_mat_FD, 1, structure, input_x, output_y, 1.0, - 0.0, max_error); - Test::check_spmv_struct(input_mat_FD, 1, structure, input_x, output_y, 0.0, - 1.0, max_error); - Test::check_spmv_struct(input_mat_FD, 1, structure, input_x, output_y, 1.0, - 1.0, max_error); + Test::check_spmv_struct(input_mat_FD, 1, structure, input_x, output_y, 1.0, 0.0, max_error); + Test::check_spmv_struct(input_mat_FD, 1, structure, input_x, output_y, 0.0, 1.0, max_error); + Test::check_spmv_struct(input_mat_FD, 1, structure, input_x, output_y, 1.0, 1.0, max_error); } { constexpr mag_t max_val = static_cast(8); constexpr mag_t max_error = max_y + 9 * max_val * max_x; - Test::check_spmv_struct(input_mat_FE, 2, structure, input_x, output_y, 1.0, - 0.0, max_error); - Test::check_spmv_struct(input_mat_FE, 2, structure, input_x, output_y, 0.0, - 1.0, max_error); - Test::check_spmv_struct(input_mat_FE, 2, structure, input_x, output_y, 1.0, - 1.0, max_error); + Test::check_spmv_struct(input_mat_FE, 2, structure, input_x, output_y, 1.0, 0.0, max_error); + Test::check_spmv_struct(input_mat_FE, 2, structure, input_x, output_y, 0.0, 1.0, max_error); + Test::check_spmv_struct(input_mat_FE, 2, structure, input_x, output_y, 1.0, 1.0, max_error); } } template -void test_spmv_struct_3D(lno_t nx, lno_t ny, lno_t nz, lno_t horizontal1BC, - lno_t horizontal2BC, lno_t verticalBC) { - using crsMat_t = typename KokkosSparse::CrsMatrix; +void test_spmv_struct_3D(lno_t nx, lno_t ny, lno_t nz, lno_t horizontal1BC, lno_t horizontal2BC, lno_t verticalBC) { + using crsMat_t = typename KokkosSparse::CrsMatrix; using scalar_view_t = typename crsMat_t::values_type::non_const_type; using x_vector_type = scalar_view_t; using y_vector_type = scalar_view_t; @@ -809,8 +724,7 @@ void test_spmv_struct_3D(lno_t nx, lno_t ny, lno_t nz, lno_t horizontal1BC, structure(0) = nx; structure(1) = ny; structure(2) = nz; - Kokkos::View mat_structure("Matrix Structure", - 3); + Kokkos::View mat_structure("Matrix Structure", 3); mat_structure(0, 0) = nx; if (horizontal1BC == 1 || horizontal1BC == 3) { mat_structure(0, 1) = 1; @@ -833,10 +747,8 @@ void test_spmv_struct_3D(lno_t nx, lno_t ny, lno_t nz, lno_t horizontal1BC, mat_structure(2, 2) = 1; } - crsMat_t input_mat_FD = - Test::generate_structured_matrix3D("FD", mat_structure); - crsMat_t input_mat_FE = - Test::generate_structured_matrix3D("FE", mat_structure); + crsMat_t input_mat_FD = Test::generate_structured_matrix3D("FD", mat_structure); + crsMat_t input_mat_FE = Test::generate_structured_matrix3D("FE", mat_structure); lno_t nr = input_mat_FD.numRows(); lno_t nc = input_mat_FD.numCols(); @@ -844,8 +756,7 @@ void test_spmv_struct_3D(lno_t nx, lno_t ny, lno_t nz, lno_t horizontal1BC, x_vector_type input_x("x", nc); y_vector_type output_y("y", nr); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); Kokkos::fill_random(input_x, rand_pool, max_x); Kokkos::fill_random(output_y, rand_pool, max_y); @@ -853,31 +764,23 @@ void test_spmv_struct_3D(lno_t nx, lno_t ny, lno_t nz, lno_t horizontal1BC, { constexpr mag_t max_val = static_cast(6); constexpr mag_t max_error = max_y + 7 * max_val * max_x; - Test::check_spmv_struct(input_mat_FD, 1, structure, input_x, output_y, 1.0, - 0.0, max_error); - Test::check_spmv_struct(input_mat_FD, 1, structure, input_x, output_y, 0.0, - 1.0, max_error); - Test::check_spmv_struct(input_mat_FD, 1, structure, input_x, output_y, 1.0, - 1.0, max_error); + Test::check_spmv_struct(input_mat_FD, 1, structure, input_x, output_y, 1.0, 0.0, max_error); + Test::check_spmv_struct(input_mat_FD, 1, structure, input_x, output_y, 0.0, 1.0, max_error); + Test::check_spmv_struct(input_mat_FD, 1, structure, input_x, output_y, 1.0, 1.0, max_error); } { constexpr mag_t max_val = static_cast(26); constexpr mag_t max_error = max_y + 27 * max_val * max_x; - Test::check_spmv_struct(input_mat_FE, 2, structure, input_x, output_y, 1.0, - 0.0, max_error); - Test::check_spmv_struct(input_mat_FE, 2, structure, input_x, output_y, 0.0, - 1.0, max_error); - Test::check_spmv_struct(input_mat_FE, 2, structure, input_x, output_y, 1.0, - 1.0, max_error); + Test::check_spmv_struct(input_mat_FE, 2, structure, input_x, output_y, 1.0, 0.0, max_error); + Test::check_spmv_struct(input_mat_FE, 2, structure, input_x, output_y, 0.0, 1.0, max_error); + Test::check_spmv_struct(input_mat_FE, 2, structure, input_x, output_y, 1.0, 1.0, max_error); } } -template +template void test_spmv_mv_struct_1D(lno_t nx, int numMV) { - using crsMat_t = typename KokkosSparse::CrsMatrix; + using crsMat_t = typename KokkosSparse::CrsMatrix; using x_multivector_type = Kokkos::View; using y_multivector_type = Kokkos::View; using mag_t = typename Kokkos::ArithTraits::mag_type; @@ -887,14 +790,12 @@ void test_spmv_mv_struct_1D(lno_t nx, int numMV) { Kokkos::View structure("Spmv Structure", 1); structure(0) = nx; - Kokkos::View mat_structure("Matrix Structure", - 1); + Kokkos::View mat_structure("Matrix Structure", 1); mat_structure(0, 0) = nx; mat_structure(0, 1) = 1; mat_structure(0, 2) = 1; - crsMat_t input_mat = - Test::generate_structured_matrix1D(mat_structure); + crsMat_t input_mat = Test::generate_structured_matrix1D(mat_structure); lno_t nr = input_mat.numRows(); lno_t nc = input_mat.numCols(); @@ -903,8 +804,7 @@ void test_spmv_mv_struct_1D(lno_t nx, int numMV) { y_multivector_type output_y("y", nr, numMV); y_multivector_type output_y_copy("y_copy", nr, numMV); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); Kokkos::fill_random(input_x, rand_pool, max_x); Kokkos::fill_random(output_y, rand_pool, max_y); @@ -913,12 +813,9 @@ void test_spmv_mv_struct_1D(lno_t nx, int numMV) { Kokkos::deep_copy(output_y_copy, output_y); - Test::check_spmv_mv_struct(input_mat, 1, structure, input_x, output_y, - output_y_copy, 1.0, 0.0, numMV, max_error); - Test::check_spmv_mv_struct(input_mat, 1, structure, input_x, output_y, - output_y_copy, 0.0, 1.0, numMV, max_error); - Test::check_spmv_mv_struct(input_mat, 1, structure, input_x, output_y, - output_y_copy, 1.0, 1.0, numMV, max_error); + Test::check_spmv_mv_struct(input_mat, 1, structure, input_x, output_y, output_y_copy, 1.0, 0.0, numMV, max_error); + Test::check_spmv_mv_struct(input_mat, 1, structure, input_x, output_y, output_y_copy, 0.0, 1.0, numMV, max_error); + Test::check_spmv_mv_struct(input_mat, 1, structure, input_x, output_y, output_y_copy, 1.0, 1.0, numMV, max_error); } // call it if ordinal int and, scalar float and double are instantiated. @@ -926,12 +823,11 @@ template void test_github_issue_101() { typedef KokkosSparse::CrsMatrix float_matrix_type; typedef KokkosSparse::CrsMatrix double_matrix_type; - static_assert( - std::is_same::value, - "Two KokkosSparse::CrsMatrix types that differ only in the type of " - "matrix values, appear to have two different StaticCrsGraphType " - "typedefs. This should never happen."); + static_assert(std::is_same::value, + "Two KokkosSparse::CrsMatrix types that differ only in the type of " + "matrix values, appear to have two different StaticCrsGraphType " + "typedefs. This should never happen."); typedef typename float_matrix_type::StaticCrsGraphType graph_type; constexpr int numRows = 1; @@ -997,8 +893,7 @@ void test_github_issue_101() { Kokkos::deep_copy(y_h, y); const double expectedResult_allDouble = - static_cast(1.0) + - static_cast(EPS_f) / static_cast(2.0); + static_cast(1.0) + static_cast(EPS_f) / static_cast(2.0); EXPECT_NE(expectedResult_allDouble, ZERO_d); EXPECT_EQ(y_h[0], expectedResult_allDouble); @@ -1042,8 +937,7 @@ void test_github_issue_101() { Kokkos::deep_copy(y_h, y); const double expectedResult_mixed = - static_cast(1.0) + - static_cast(EPS_f) / static_cast(2.0); + static_cast(1.0) + static_cast(EPS_f) / static_cast(2.0); EXPECT_NE(expectedResult_mixed, ZERO_d); EXPECT_EQ(y_h[0], expectedResult_mixed); @@ -1066,33 +960,28 @@ void test_github_issue_101() { } } -template +template void test_spmv_all_interfaces_light() { // Using a small matrix, run through the various SpMV interfaces and // make sure they produce the correct results. using execution_space = typename DeviceType::execution_space; using mag_t = typename Kokkos::ArithTraits::mag_type; - using crsMat_t = typename KokkosSparse::CrsMatrix; + using crsMat_t = typename KokkosSparse::CrsMatrix; Kokkos::Random_XorShift64_Pool rand_pool(13718); const lno_t m = 111; const lno_t n = 99; const mag_t maxVal = 10.0; const mag_t eps = 10.0 * Kokkos::ArithTraits::eps(); size_type nnz = 600; - crsMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix( - m, n, nnz, 2, lno_t(n * 0.7)); + crsMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix(m, n, nnz, 2, lno_t(n * 0.7)); // note: A's values are in range [0, 50) const mag_t maxError = (nnz / m) * 50.0 * maxVal; using multivector_t = Kokkos::View; using vector_t = Kokkos::View; using range1D_t = Kokkos::RangePolicy; - using range2D_t = Kokkos::MDRangePolicy>; - using v_handle_t = - KokkosSparse::SPMVHandle; - using mv_handle_t = KokkosSparse::SPMVHandle; + using range2D_t = Kokkos::MDRangePolicy>; + using v_handle_t = KokkosSparse::SPMVHandle; + using mv_handle_t = KokkosSparse::SPMVHandle; multivector_t x_mv("x_mv", n, 3); vector_t x("x", n); // Randomize x (it won't be modified after that) @@ -1104,25 +993,20 @@ void test_spmv_all_interfaces_light() { multivector_t ygold_mv("ygold_mv", m, 3); vector_t ygold("ygold", m); for (lno_t i = 0; i < 3; i++) - Test::sequential_spmv(A, Kokkos::subview(x_mv, Kokkos::ALL(), i), - Kokkos::subview(ygold_mv, Kokkos::ALL(), i), 1.0, + Test::sequential_spmv(A, Kokkos::subview(x_mv, Kokkos::ALL(), i), Kokkos::subview(ygold_mv, Kokkos::ALL(), i), 1.0, 0.0); Test::sequential_spmv(A, x, ygold, 1.0, 0.0); auto clear_y = [&]() { Kokkos::deep_copy(y_mv, scalar_t(0)); }; auto verify = [&]() { int num_errors = 0; - Kokkos::parallel_reduce( - "KokkosSparse::Test::spmv", range1D_t(0, m), - Test::fSPMV(ygold, y, eps, maxError), num_errors); + Kokkos::parallel_reduce("KokkosSparse::Test::spmv", range1D_t(0, m), + Test::fSPMV(ygold, y, eps, maxError), num_errors); EXPECT_EQ(num_errors, 0); }; auto verify_mv = [&]() { int num_errors = 0; - Kokkos::parallel_reduce("KokkosSparse::Test::spmv", - range2D_t({0, 0}, {m, 3}), - Test::fSPMV( - ygold_mv, y_mv, eps, maxError), - num_errors); + Kokkos::parallel_reduce("KokkosSparse::Test::spmv", range2D_t({0, 0}, {m, 3}), + Test::fSPMV(ygold_mv, y_mv, eps, maxError), num_errors); EXPECT_EQ(num_errors, 0); }; // Now run through the interfaces and check results each time. @@ -1170,94 +1054,63 @@ void test_spmv_all_interfaces_light() { clear_y(); } -#define EXECUTE_TEST_ISSUE_101(DEVICE) \ - TEST_F(TestCategory, sparse##_##spmv_issue_101##_##OFFSET##_##DEVICE) { \ - test_github_issue_101(); \ - } - -#define EXECUTE_TEST_FN(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - sparse##_##spmv##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_spmv_algorithms(1000, 1000 * 3, 200, \ - 10, true); \ - test_spmv_algorithms(1000, 1000 * 3, 100, \ - 10, true); \ - test_spmv_algorithms(1000, 1000 * 20, \ - 100, 5, true); \ - test_spmv_algorithms(50000, 50000 * 3, \ - 20, 10, false); \ - test_spmv_algorithms(50000, 50000 * 3, \ - 100, 10, false); \ - test_spmv_algorithms(10000, 10000 * 2, \ - 100, 5, false); \ +#define EXECUTE_TEST_ISSUE_101(DEVICE) \ + TEST_F(TestCategory, sparse##_##spmv_issue_101##_##OFFSET##_##DEVICE) { test_github_issue_101(); } + +#define EXECUTE_TEST_FN(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##spmv##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_spmv_algorithms(1000, 1000 * 3, 200, 10, true); \ + test_spmv_algorithms(1000, 1000 * 3, 100, 10, true); \ + test_spmv_algorithms(1000, 1000 * 20, 100, 5, true); \ + test_spmv_algorithms(50000, 50000 * 3, 20, 10, false); \ + test_spmv_algorithms(50000, 50000 * 3, 100, 10, false); \ + test_spmv_algorithms(10000, 10000 * 2, 100, 5, false); \ } -#define EXECUTE_TEST_INTERFACES(SCALAR, ORDINAL, OFFSET, LAYOUT, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse_spmv_interfaces_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \ - test_spmv_all_interfaces_light(); \ +#define EXECUTE_TEST_INTERFACES(SCALAR, ORDINAL, OFFSET, LAYOUT, DEVICE) \ + TEST_F(TestCategory, sparse_spmv_interfaces_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \ + test_spmv_all_interfaces_light(); \ } -#define EXECUTE_TEST_MV(SCALAR, ORDINAL, OFFSET, LAYOUT, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##spmv_mv##_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \ - test_spmv_mv( \ - 1001, 1001 * 3, 200, 10, true, 1); \ - test_spmv_mv( \ - 999, 999 * 3, 100, 10, true, 5); \ - test_spmv_mv( \ - 1003, 1003 * 2, 100, 5, true, 10); \ - test_spmv_mv( \ - 50007, 50007 * 3, 20, 10, false, 1); \ - test_spmv_mv( \ - 50002, 50002 * 3, 100, 10, false, 1); \ - test_spmv_mv( \ - 10000, 10000 * 2, 100, 5, false, 5); \ - test_spmv_mv_heavy(204, 201, 204 * 10, 60, 4, 30); \ - test_spmv_mv_heavy(2, 3, 5, 3, 1, 10); \ +#define EXECUTE_TEST_MV(SCALAR, ORDINAL, OFFSET, LAYOUT, DEVICE) \ + TEST_F(TestCategory, sparse##_##spmv_mv##_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \ + test_spmv_mv(1001, 1001 * 3, 200, 10, true, 1); \ + test_spmv_mv(999, 999 * 3, 100, 10, true, 5); \ + test_spmv_mv(1003, 1003 * 2, 100, 5, true, 10); \ + test_spmv_mv(50007, 50007 * 3, 20, 10, false, 1); \ + test_spmv_mv(50002, 50002 * 3, 100, 10, false, 1); \ + test_spmv_mv(10000, 10000 * 2, 100, 5, false, 5); \ + test_spmv_mv_heavy(204, 201, 204 * 10, 60, 4, \ + 30); \ + test_spmv_mv_heavy(2, 3, 5, 3, 1, 10); \ } -#define EXECUTE_TEST_MV_MIXED_LAYOUT(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##spmv_mv_mixed_layout##_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \ - test_spmv_mv_heavy(99, 101, 100 * 15, 40, 4, \ - 20); \ +#define EXECUTE_TEST_MV_MIXED_LAYOUT(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##spmv_mv_mixed_layout##_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \ + test_spmv_mv_heavy(99, 101, 100 * 15, \ + 40, 4, 20); \ } -#define EXECUTE_TEST_STRUCT(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##spmv_struct##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_spmv_struct_1D(10, 1, 1); \ - test_spmv_struct_2D(25, 21, 3, 3); \ - test_spmv_struct_2D(20, 25, 3, 3); \ - test_spmv_struct_2D(22, 22, 3, 3); \ - test_spmv_struct_3D(20, 20, 20, 3, 3, 3); \ - test_spmv_struct_3D(22, 22, 22, 3, 3, 3); \ - test_spmv_struct_3D(25, 10, 20, 3, 3, 3); \ - test_spmv_struct_3D(10, 20, 25, 3, 3, 3); \ - test_spmv_struct_3D(10, 24, 20, 3, 3, 3); \ +#define EXECUTE_TEST_STRUCT(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##spmv_struct##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_spmv_struct_1D(10, 1, 1); \ + test_spmv_struct_2D(25, 21, 3, 3); \ + test_spmv_struct_2D(20, 25, 3, 3); \ + test_spmv_struct_2D(22, 22, 3, 3); \ + test_spmv_struct_3D(20, 20, 20, 3, 3, 3); \ + test_spmv_struct_3D(22, 22, 22, 3, 3, 3); \ + test_spmv_struct_3D(25, 10, 20, 3, 3, 3); \ + test_spmv_struct_3D(10, 20, 25, 3, 3, 3); \ + test_spmv_struct_3D(10, 24, 20, 3, 3, 3); \ } -#define EXECUTE_TEST_MV_STRUCT(SCALAR, ORDINAL, OFFSET, LAYOUT, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##spmv_mv_struct##_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \ - test_spmv_mv_struct_1D( \ - 10, 1); \ - test_spmv_mv_struct_1D( \ - 10, 2); \ +#define EXECUTE_TEST_MV_STRUCT(SCALAR, ORDINAL, OFFSET, LAYOUT, DEVICE) \ + TEST_F(TestCategory, sparse##_##spmv_mv_struct##_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \ + test_spmv_mv_struct_1D(10, 1); \ + test_spmv_mv_struct_1D(10, 2); \ } -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST_ISSUE_101(TestDevice) #endif @@ -1270,8 +1123,7 @@ EXECUTE_TEST_ISSUE_101(TestDevice) #undef KOKKOSKERNELS_EXECUTE_TEST #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ EXECUTE_TEST_MV(SCALAR, ORDINAL, OFFSET, LayoutLeft, TestDevice) \ @@ -1285,8 +1137,7 @@ EXECUTE_TEST_ISSUE_101(TestDevice) #endif // defined(KOKKOSKERNELS_INST_LAYOUTLEFT) #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ EXECUTE_TEST_MV(SCALAR, ORDINAL, OFFSET, LayoutRight, TestDevice) \ @@ -1299,8 +1150,7 @@ EXECUTE_TEST_ISSUE_101(TestDevice) // Test that requires mixing LayoutLeft and LayoutRight (never an ETI'd // combination) -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ EXECUTE_TEST_MV_MIXED_LAYOUT(SCALAR, ORDINAL, OFFSET, TestDevice) diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spmv_bsr.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spmv_bsr.hpp index 6482d33d8a9a..dbc1fb7e027c 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spmv_bsr.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spmv_bsr.hpp @@ -41,6 +41,7 @@ #include #include #include "KokkosKernels_default_types.hpp" +#include #include "KokkosSparse_spmv.hpp" #include "KokkosSparse_BsrMatrix.hpp" @@ -79,9 +80,7 @@ constexpr T max_y() { } /*! \brief whether the mode transposes the matrix*/ -inline bool mode_is_transpose(const char *mode) { - return mode[0] == 'T' || mode[0] == 'H'; -} +inline bool mode_is_transpose(const char *mode) { return mode[0] == 'T' || mode[0] == 'H'; } /*! \brief Get the max nonzeros (not max nonzero _blocks_) per row of Op(A) */ template @@ -89,13 +88,11 @@ inline size_t opMaxNnzPerRow(const Bsr &A, bool trans) { if (trans) { auto At = KokkosSparse::Impl::transpose_bsr_matrix(A); return At.blockDim() * - (size_t)KokkosSparse::Impl::graph_max_degree< - typename Bsr::execution_space, typename Bsr::ordinal_type>( + (size_t)KokkosSparse::Impl::graph_max_degree( At.graph.row_map); } else { return A.blockDim() * - (size_t)KokkosSparse::Impl::graph_max_degree< - typename Bsr::execution_space, typename Bsr::ordinal_type>( + (size_t)KokkosSparse::Impl::graph_max_degree( A.graph.row_map); } } @@ -123,20 +120,15 @@ Bsr bsr_random(const int blockSize, const int blockRows, const int blockCols) { using scalar_type = typename Bsr::non_const_value_type; using ordinal_type = typename Bsr::non_const_ordinal_type; using size_type = typename Bsr::non_const_size_type; - using Crs = - KokkosSparse::CrsMatrix; - using Graph = typename Crs::staticcrsgraph_type; + using Crs = KokkosSparse::CrsMatrix; + using Graph = typename Crs::staticcrsgraph_type; // construct a random Crs Matrix - Test::RandCsMatrix - rcs(blockRows, blockCols, scalar_type(0), max_a()); - - const auto colids = Kokkos::subview( - rcs.get_ids(), Kokkos::make_pair(size_t(0), rcs.get_nnz())); - const auto vals = Kokkos::subview( - rcs.get_vals(), Kokkos::make_pair(size_t(0), rcs.get_nnz())); + Test::RandCsMatrix rcs( + blockRows, blockCols, scalar_type(0), max_a()); + + const auto colids = Kokkos::subview(rcs.get_ids(), Kokkos::make_pair(size_type(0), rcs.get_nnz())); + const auto vals = Kokkos::subview(rcs.get_vals(), Kokkos::make_pair(size_type(0), rcs.get_nnz())); Graph graph(colids, rcs.get_map()); Crs crs("crs", blockCols, vals, graph); @@ -147,11 +139,9 @@ Bsr bsr_random(const int blockSize, const int blockRows, const int blockCols) { /*! \brief test a specific spmv */ -template -void test_spmv(Handle *handle, const char *mode, const Alpha &alpha, - const Beta &beta, const Bsr &a, const Crs &acrs, +template +void test_spmv(Handle *handle, const char *mode, const Alpha &alpha, const Beta &beta, const Bsr &a, const Crs &acrs, size_t maxNnzPerRow, const XVector &x, const YVector &y) { using scalar_type = typename Bsr::non_const_value_type; using ordinal_type = typename Bsr::non_const_ordinal_type; @@ -181,10 +171,9 @@ void test_spmv(Handle *handle, const char *mode, const Alpha &alpha, 10x means same order of magnitude */ - const mag_type tolerance = - KATS::eps() * KATS::abs(beta) * KATS::abs(max_y()) + - 10 * KATS::eps() * maxNnzPerRow * KATS::abs(alpha) * - KATS::abs(max_a()) * KATS::abs(max_x()); + const mag_type tolerance = KATS::eps() * KATS::abs(beta) * KATS::abs(max_y()) + + 10 * KATS::eps() * maxNnzPerRow * KATS::abs(alpha) * KATS::abs(max_a()) * + KATS::abs(max_x()); std::vector errIdx; @@ -195,15 +184,12 @@ void test_spmv(Handle *handle, const char *mode, const Alpha &alpha, } if (!errIdx.empty()) { - std::string alg = - KokkosSparse::get_spmv_algorithm_name(handle->get_algorithm()); + std::string alg = KokkosSparse::get_spmv_algorithm_name(handle->get_algorithm()); - std::cerr << __FILE__ << ":" << __LINE__ << " BsrMatrix SpMV failure!" - << std::endl; + std::cerr << __FILE__ << ":" << __LINE__ << " BsrMatrix SpMV failure!" << std::endl; std::cerr << "alg: " << alg << std::endl; std::cerr << "mode: " << mode << std::endl; - std::cerr << "A: " << a.numRows() << "x" << a.numCols() - << std::endl; + std::cerr << "A: " << a.numRows() << "x" << a.numCols() << std::endl; std::cerr << "A blockdim: " << a.blockDim() << std::endl; std::cerr << "alpha: " << alpha << std::endl; std::cerr << "beta: " << beta << std::endl; @@ -229,14 +215,12 @@ void test_spmv(Handle *handle, const char *mode, const Alpha &alpha, template struct VectorTypeFor { - using type = Kokkos::View; + using type = Kokkos::View; }; template -std::tuple::type, - typename VectorTypeFor::type> -spmv_corner_case_0_by_0(const char * /*mode*/, const int blockSize) { +std::tuple::type, typename VectorTypeFor::type> spmv_corner_case_0_by_0( + const char * /*mode*/, const int blockSize) { using vector_type = typename VectorTypeFor::type; Bsr a = bsr_corner_case_0_by_0(blockSize); vector_type x("x", 0); @@ -245,9 +229,8 @@ spmv_corner_case_0_by_0(const char * /*mode*/, const int blockSize) { } template -std::tuple::type, - typename VectorTypeFor::type> -spmv_corner_case_0_by_1(const char *mode, const int blockSize) { +std::tuple::type, typename VectorTypeFor::type> spmv_corner_case_0_by_1( + const char *mode, const int blockSize) { using vector_type = typename VectorTypeFor::type; using execution_space = typename Bsr::execution_space; using scalar_type = typename Bsr::non_const_value_type; @@ -269,9 +252,8 @@ spmv_corner_case_0_by_1(const char *mode, const int blockSize) { } template -std::tuple::type, - typename VectorTypeFor::type> -spmv_corner_case_1_by_0(const char *mode, const int blockSize) { +std::tuple::type, typename VectorTypeFor::type> spmv_corner_case_1_by_0( + const char *mode, const int blockSize) { using vector_type = typename VectorTypeFor::type; using execution_space = typename Bsr::execution_space; using scalar_type = typename Bsr::non_const_value_type; @@ -296,10 +278,10 @@ spmv_corner_case_1_by_0(const char *mode, const int blockSize) { */ template -std::tuple::type, - typename VectorTypeFor::type> -spmv_random(const char *mode, const int blockSize, const int blockRows, - const int blockCols) { +std::tuple::type, typename VectorTypeFor::type> spmv_random(const char *mode, + const int blockSize, + const int blockRows, + const int blockCols) { using scalar_type = typename Bsr::non_const_value_type; // expand to Bsr matrix @@ -327,10 +309,12 @@ spmv_random(const char *mode, const int blockSize, const int blockRows, /*! \brief create random x and y multivectors for a given matrix and spmv mode */ template -auto random_vecs_for_spmv(const char *mode, const Bsr &a) { +auto random_vecs_for_spmv(const char *mode, const Bsr &a, const bool nans = false) + -> std::tuple::type, typename VectorTypeFor::type> { using scalar_type = typename Bsr::non_const_value_type; using vector_type = typename VectorTypeFor::type; using execution_space = typename Bsr::execution_space; + using policy_type = Kokkos::RangePolicy; size_t nx = a.numCols() * a.blockDim(); size_t ny = a.numRows() * a.blockDim(); @@ -344,26 +328,41 @@ auto random_vecs_for_spmv(const char *mode, const Bsr &a) { Kokkos::fill_random(x, random, max_x()); Kokkos::fill_random(y, random, max_y()); + if (nans) { + Kokkos::parallel_for( + policy_type(0, x.extent(0)), KOKKOS_LAMBDA(size_t i) { + if (0 == (i % 17)) { + x(i) = KokkosKernels::Impl::quiet_NaN(); + } + }); + Kokkos::parallel_for( + policy_type(0, y.extent(0)), KOKKOS_LAMBDA(size_t i) { + if (0 == (i % 17)) { + y(i) = KokkosKernels::Impl::quiet_NaN(); + } + }); + } + return std::make_tuple(x, y); } /*! \brief test all combos of the provided matrix */ template -void test_spmv_combos(const char *mode, const Bsr &a, const Crs &acrs, - size_t maxNnzPerRow) { +void test_spmv_combos(const char *mode, const Bsr &a, const Crs &acrs, size_t maxNnzPerRow) { using namespace KokkosSparse; using scalar_type = typename Bsr::non_const_value_type; using execution_space = typename Bsr::execution_space; - auto [x, y] = random_vecs_for_spmv(mode, a); + auto [x, y] = random_vecs_for_spmv(mode, a); + auto [x_with_nans, y_with_nans] = random_vecs_for_spmv(mode, a, true); using handle_t = SPMVHandle; // cover a variety of algorithms - std::vector handles; + std::vector> handles; for (SPMVAlgorithm algo : {SPMV_DEFAULT, SPMV_NATIVE, SPMV_BSR_V41}) - handles.push_back(new handle_t(algo)); + handles.push_back(std::make_unique(algo)); // Tensor core algorithm temporarily disabled, fails on V100 /* @@ -383,12 +382,13 @@ void test_spmv_combos(const char *mode, const Bsr &a, const Crs &acrs, } */ - for (handle_t *handle : handles) { - for (scalar_type alpha : - {scalar_type(0), scalar_type(1), scalar_type(-1), scalar_type(3.7)}) { - for (scalar_type beta : {scalar_type(0), scalar_type(1), scalar_type(-1), - scalar_type(-1.5)}) { - test_spmv(handle, mode, alpha, beta, a, acrs, maxNnzPerRow, x, y); + for (std::unique_ptr &handle : handles) { + for (scalar_type alpha : {scalar_type(0), scalar_type(1), scalar_type(-1), scalar_type(3.7)}) { + for (scalar_type beta : {scalar_type(0), scalar_type(1), scalar_type(-1), scalar_type(-1.5)}) { + test_spmv(handle.get(), mode, alpha, beta, a, acrs, maxNnzPerRow, x, y); + if (beta == scalar_type(0)) { + test_spmv(handle.get(), mode, alpha, beta, a, acrs, maxNnzPerRow, x_with_nans, y_with_nans); + } } } } @@ -398,8 +398,7 @@ void test_spmv_combos(const char *mode, const Bsr &a, const Crs &acrs, */ template void test_spmv_corner_cases() { - using Bsr = KokkosSparse::Experimental::BsrMatrix; + using Bsr = KokkosSparse::Experimental::BsrMatrix; using Crs = KokkosSparse::CrsMatrix; for (auto mode : {"N", "T", "C", "H"}) { for (int bs : {1, 2, 5, 9}) { @@ -424,8 +423,7 @@ void test_spmv_corner_cases() { template void test_spmv_random() { - using Bsr = KokkosSparse::Experimental::BsrMatrix; + using Bsr = KokkosSparse::Experimental::BsrMatrix; using Crs = KokkosSparse::CrsMatrix; // thoroughly test smaller matrices std::vector> shapes = {{10, 10}, {10, 50}, {50, 10}}; @@ -436,9 +434,7 @@ void test_spmv_random() { size_t maxNnzPerRow = opMaxNnzPerRow(A, false); size_t maxNnzPerRowTrans = opMaxNnzPerRow(A, true); for (auto mode : {"N", "T", "C", "H"}) { - test_spmv_combos( - mode, A, Acrs, - mode_is_transpose(mode) ? maxNnzPerRowTrans : maxNnzPerRow); + test_spmv_combos(mode, A, Acrs, mode_is_transpose(mode) ? maxNnzPerRowTrans : maxNnzPerRow); } } } @@ -448,14 +444,12 @@ void test_spmv_random() { constexpr int blockSizePrime = 7; constexpr int smallPrime = 11; constexpr int largePrime = 499; - auto A = bsr_random(blockSizePrime, smallPrime, largePrime); - auto Acrs = KokkosSparse::Impl::bsr_to_crs(A); - size_t maxNnzPerRow = opMaxNnzPerRow(A, false); - size_t maxNnzPerRowTrans = opMaxNnzPerRow(A, true); + auto A = bsr_random(blockSizePrime, smallPrime, largePrime); + auto Acrs = KokkosSparse::Impl::bsr_to_crs(A); + size_t maxNnzPerRow = opMaxNnzPerRow(A, false); + size_t maxNnzPerRowTrans = opMaxNnzPerRow(A, true); for (auto mode : {"N", "T"}) { - test_spmv_combos( - mode, A, Acrs, - mode_is_transpose(mode) ? maxNnzPerRowTrans : maxNnzPerRow); + test_spmv_combos(mode, A, Acrs, mode_is_transpose(mode) ? maxNnzPerRowTrans : maxNnzPerRow); } } } @@ -472,10 +466,9 @@ void test_spmv() { // Note: if mode_is_transpose(mode), then maxNnzPerRow is for A^T. Otherwise, // it's for A. -template -void test_spm_mv(Handle *handle, const char *mode, const Alpha &alpha, - const Beta &beta, const Bsr &a, const Crs &acrs, +template +void test_spm_mv(Handle *handle, const char *mode, const Alpha &alpha, const Beta &beta, const Bsr &a, const Crs &acrs, size_t maxNnzPerRow, const XVector &x, const YVector &y) { using scalar_type = typename Bsr::non_const_value_type; using ordinal_type = typename Bsr::non_const_ordinal_type; @@ -503,10 +496,9 @@ void test_spm_mv(Handle *handle, const char *mode, const Alpha &alpha, scaling y is one op dot product of x is two ops per entry (mul and add) */ - const mag_type tolerance = - KATS::eps() * KATS::abs(beta) * KATS::abs(max_y()) + - 10 * KATS::eps() * maxNnzPerRow * KATS::abs(alpha) * - KATS::abs(max_a()) * KATS::abs(max_x()); + const mag_type tolerance = KATS::eps() * KATS::abs(beta) * KATS::abs(max_y()) + + 10 * KATS::eps() * maxNnzPerRow * KATS::abs(alpha) * KATS::abs(max_a()) * + KATS::abs(max_x()); std::vector> errIdx; @@ -519,15 +511,12 @@ void test_spm_mv(Handle *handle, const char *mode, const Alpha &alpha, } if (!errIdx.empty()) { - std::string alg = - KokkosSparse::get_spmv_algorithm_name(handle->get_algorithm()); + std::string alg = KokkosSparse::get_spmv_algorithm_name(handle->get_algorithm()); - std::cerr << __FILE__ << ":" << __LINE__ << " BsrMatrix SpMMV failure!" - << std::endl; + std::cerr << __FILE__ << ":" << __LINE__ << " BsrMatrix SpMMV failure!" << std::endl; std::cerr << "alg: " << alg << std::endl; std::cerr << "mode: " << mode << std::endl; - std::cerr << "A: " << a.numRows() << "x" << a.numCols() - << std::endl; + std::cerr << "A: " << a.numRows() << "x" << a.numCols() << std::endl; std::cerr << "A blockdim: " << a.blockDim() << std::endl; std::cerr << "alpha: " << alpha << std::endl; std::cerr << "beta: " << beta << std::endl; @@ -555,18 +544,18 @@ void test_spm_mv(Handle *handle, const char *mode, const Alpha &alpha, template struct MultiVectorTypeFor { - using type = Kokkos::View; + using type = Kokkos::View; }; /*! \brief create random x and y multivectors for a given matrix and spmv mode */ template -auto random_multivecs_for_spm_mv(const char *mode, const Bsr &a, - const size_t numVecs) { +auto random_multivecs_for_spm_mv(const char *mode, const Bsr &a, const size_t numVecs, const bool nans = false) + -> std::tuple::type, typename MultiVectorTypeFor::type> { using scalar_type = typename Bsr::non_const_value_type; using vector_type = typename MultiVectorTypeFor::type; using execution_space = typename Bsr::execution_space; + using policy_type = Kokkos::RangePolicy; size_t nx = a.numCols() * a.blockDim(); size_t ny = a.numRows() * a.blockDim(); @@ -580,23 +569,41 @@ auto random_multivecs_for_spm_mv(const char *mode, const Bsr &a, Kokkos::fill_random(x, random, max_x()); Kokkos::fill_random(y, random, max_y()); + // sprinkle some "random" NaNs in + if (nans) { + Kokkos::parallel_for( + policy_type(0, x.extent(0)), KOKKOS_LAMBDA(size_t i) { + for (size_t j = 0; j < x.extent(1); ++j) { + if (0 == ((i * x.extent(1) + j) % 13)) { + x(i, j) = KokkosKernels::Impl::quiet_NaN(); + } + } + }); + Kokkos::parallel_for( + policy_type(0, y.extent(0)), KOKKOS_LAMBDA(size_t i) { + for (size_t j = 0; j < y.extent(1); ++j) { + if (0 == ((i * y.extent(1) + j) % 17)) { + y(i, j) = KokkosKernels::Impl::quiet_NaN(); + } + } + }); + } + return std::make_tuple(x, y); } template -void test_spm_mv_combos(const char *mode, const Bsr &a, const Crs &acrs, - size_t maxNnzPerRow) { +void test_spm_mv_combos(const char *mode, const Bsr &a, const Crs &acrs, size_t maxNnzPerRow) { using namespace KokkosSparse; using execution_space = typename Bsr::execution_space; using scalar_type = typename Bsr::non_const_value_type; using multivector_t = typename MultiVectorTypeFor::type; - using handle_t = - SPMVHandle; + using handle_t = SPMVHandle; // cover a variety of algorithms - std::vector handles; + std::vector> handles; for (SPMVAlgorithm algo : {SPMV_DEFAULT, SPMV_NATIVE, SPMV_BSR_V41}) - handles.push_back(new handle_t(algo)); + handles.push_back(std::make_unique(algo)); // Tensor core algorithm temporarily disabled, fails on V100 /* @@ -617,13 +624,15 @@ void test_spm_mv_combos(const char *mode, const Bsr &a, const Crs &acrs, */ for (size_t numVecs : {1, 7}) { // num multivecs - auto [x, y] = random_multivecs_for_spm_mv(mode, a, numVecs); - for (handle_t *handle : handles) { - for (scalar_type alpha : {scalar_type(0), scalar_type(1), scalar_type(-1), - scalar_type(3.7)}) { - for (scalar_type beta : {scalar_type(0), scalar_type(1), - scalar_type(-1), scalar_type(-1.5)}) { - test_spm_mv(handle, mode, alpha, beta, a, acrs, maxNnzPerRow, x, y); + auto [x, y] = random_multivecs_for_spm_mv(mode, a, numVecs); + auto [x_with_nans, y_with_nans] = random_multivecs_for_spm_mv(mode, a, numVecs, true); + for (std::unique_ptr &handle : handles) { + for (scalar_type alpha : {scalar_type(0), scalar_type(1), scalar_type(-1), scalar_type(3.7)}) { + for (scalar_type beta : {scalar_type(0), scalar_type(1), scalar_type(-1), scalar_type(-1.5)}) { + test_spm_mv(handle.get(), mode, alpha, beta, a, acrs, maxNnzPerRow, x, y); + if (beta == scalar_type(0)) { + test_spm_mv(handle.get(), mode, alpha, beta, a, acrs, maxNnzPerRow, x_with_nans, y_with_nans); + } } } } @@ -632,11 +641,9 @@ void test_spm_mv_combos(const char *mode, const Bsr &a, const Crs &acrs, /*! \brief test all combos of all matrices with different block sizes */ -template +template void test_spm_mv_corner_cases() { - using Bsr = KokkosSparse::Experimental::BsrMatrix; + using Bsr = KokkosSparse::Experimental::BsrMatrix; using Crs = KokkosSparse::CrsMatrix; for (auto mode : {"N", "T", "C", "H"}) { for (int bs : {1, 2, 5, 9}) { @@ -659,11 +666,9 @@ void test_spm_mv_corner_cases() { } } -template +template void test_spm_mv_random() { - using Bsr = KokkosSparse::Experimental::BsrMatrix; + using Bsr = KokkosSparse::Experimental::BsrMatrix; using Crs = KokkosSparse::CrsMatrix; // thoroughly test smaller matrices std::vector> shapes = {{10, 10}, {10, 50}, {50, 10}}; @@ -674,9 +679,7 @@ void test_spm_mv_random() { size_t maxNnzPerRow = opMaxNnzPerRow(A, false); size_t maxNnzPerRowTrans = opMaxNnzPerRow(A, true); for (auto mode : {"N", "T", "C", "H"}) { - test_spm_mv_combos( - mode, A, Acrs, - mode_is_transpose(mode) ? maxNnzPerRowTrans : maxNnzPerRow); + test_spm_mv_combos(mode, A, Acrs, mode_is_transpose(mode) ? maxNnzPerRowTrans : maxNnzPerRow); } } } @@ -686,20 +689,17 @@ void test_spm_mv_random() { constexpr int blockSizePrime = 7; constexpr int smallPrime = 11; constexpr int largePrime = 499; - auto A = bsr_random(blockSizePrime, smallPrime, largePrime); - auto Acrs = KokkosSparse::Impl::bsr_to_crs(A); - size_t maxNnzPerRow = opMaxNnzPerRow(A, false); - size_t maxNnzPerRowTrans = opMaxNnzPerRow(A, true); + auto A = bsr_random(blockSizePrime, smallPrime, largePrime); + auto Acrs = KokkosSparse::Impl::bsr_to_crs(A); + size_t maxNnzPerRow = opMaxNnzPerRow(A, false); + size_t maxNnzPerRowTrans = opMaxNnzPerRow(A, true); for (auto mode : {"N", "T"}) { - test_spm_mv_combos( - mode, A, Acrs, - mode_is_transpose(mode) ? maxNnzPerRowTrans : maxNnzPerRow); + test_spm_mv_combos(mode, A, Acrs, mode_is_transpose(mode) ? maxNnzPerRowTrans : maxNnzPerRow); } } } -template +template void test_spm_mv() { test_spm_mv_corner_cases(); test_spm_mv_random(); @@ -709,10 +709,9 @@ void test_spm_mv() { ////////////////////////// -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - sparse##_##bsr_spmv##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - Test_Spmv_Bsr::test_spmv(); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##bsr_spmv##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + Test_Spmv_Bsr::test_spmv(); \ } #include @@ -721,12 +720,9 @@ void test_spm_mv() { ////////////////////////// -#define EXECUTE_BSR_TIMES_MVEC_TEST(SCALAR, ORDINAL, OFFSET, LAYOUT, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##bsr_spmmv##_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \ - Test_Spmv_Bsr::test_spm_mv(); \ +#define EXECUTE_BSR_TIMES_MVEC_TEST(SCALAR, ORDINAL, OFFSET, LAYOUT, DEVICE) \ + TEST_F(TestCategory, sparse##_##bsr_spmmv##_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \ + Test_Spmv_Bsr::test_spm_mv(); \ } #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_sptrsv.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_sptrsv.hpp index b8b35bc422be..385367bca2df 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_sptrsv.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_sptrsv.hpp @@ -43,20 +43,16 @@ using kokkos_complex_float = Kokkos::complex; namespace Test { -template +template struct SptrsvTest { // Define useful types - using RowMapType = Kokkos::View; - using EntriesType = Kokkos::View; - using ValuesType = Kokkos::View; - using RowMapType_hostmirror = typename RowMapType::HostMirror; - using EntriesType_hostmirror = typename EntriesType::HostMirror; - using ValuesType_hostmirror = typename ValuesType::HostMirror; - using execution_space = typename device::execution_space; - using memory_space = typename device::memory_space; - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, execution_space, memory_space, memory_space>; + using RowMapType = Kokkos::View; + using EntriesType = Kokkos::View; + using ValuesType = Kokkos::View; + using execution_space = typename device::execution_space; + using memory_space = typename device::memory_space; + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle; using Crs = CrsMatrix; using Bsr = BsrMatrix; @@ -74,6 +70,13 @@ struct SptrsvTest { return A; } + static std::vector> get_6x6_ut_ones_fixture() { + std::vector> A = {{1.00, 1.00, 0.00, 0.00, 0.00, 0.00}, {0.00, 1.00, 0.00, 0.00, 0.00, 1.00}, + {0.00, 0.00, 1.00, 1.00, 0.00, 1.00}, {0.00, 0.00, 0.00, 1.00, 0.00, 1.00}, + {0.00, 0.00, 0.00, 0.00, 1.00, 1.00}, {0.00, 0.00, 0.00, 0.00, 0.00, 1.00}}; + return A; + } + static std::vector> get_5x5_ut_fixture() { const auto KZ = KEEP_ZERO(); std::vector> A = {{5.00, 1.00, 1.00, 0.00, KZ}, @@ -103,6 +106,22 @@ struct SptrsvTest { return A; } + static std::vector> get_6x6_lt_ones_fixture() { + std::vector> A = {{1.00, 0.00, 0.00, 0.00, 0.00, 0.00}, {1.00, 1.00, 0.00, 0.00, 0.00, 0.00}, + {0.00, 0.00, 1.00, 0.00, 0.00, 0.00}, {0.00, 0.00, 0.00, 1.00, 0.00, 0.00}, + {0.00, 0.00, 0.00, 1.00, 1.00, 0.00}, {0.00, 1.00, 1.00, 1.00, 1.00, 1.00}}; + return A; + } + + static bool do_cusparse() { +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE + return (std::is_same::value && std::is_same::value && + std::is_same::value); +#else + return false; +#endif + } + struct ReductionCheck { ValuesType lhs; @@ -112,17 +131,83 @@ struct SptrsvTest { void operator()(lno_t i, scalar_t &tsum) const { tsum += lhs(i); } }; - static void run_test_sptrsv() { - scalar_t ZERO = scalar_t(0); - scalar_t ONE = scalar_t(1); + static std::tuple create_crs_lhs_rhs(const std::vector> &fixture) { + RowMapType row_map; + EntriesType entries; + ValuesType values; + + compress_matrix(row_map, entries, values, fixture); + const auto nrows = row_map.size() - 1; + const auto nnz = values.size(); + + // Create known_lhs, generate rhs, then solve for lhs to compare to + // known_lhs + ValuesType known_lhs("known_lhs", nrows); + // Create known solution lhs set to all 1's + Kokkos::deep_copy(known_lhs, scalar_t(1)); + + // Solution to find + ValuesType lhs("lhs", nrows); + + // A*known_lhs generates rhs: rhs is dense, use spmv + ValuesType rhs("rhs", nrows); + + Crs triMtx("triMtx", nrows, nrows, nnz, values, row_map, entries); + KokkosSparse::spmv("N", scalar_t(1), triMtx, known_lhs, scalar_t(0), rhs); + + return std::make_tuple(triMtx, lhs, rhs); + } + + template + static void basic_check(const SpMatrix &triMtx, const ValuesType &lhs, const ValuesType &rhs, const bool is_lower, + const size_type block_size = 0) { + // FIXME Issues with some integral type combos for SEQLVLSCHED_TP2, + // currently unavailable + std::vector algs = {SPTRSVAlgorithm::SEQLVLSCHD_RP, SPTRSVAlgorithm::SEQLVLSCHD_TP1}; + if (block_size == 0) { + // SEQLVLSCHD_TP1CHAIN and SPTRSV_CUSPARSE are not supported for blocks + algs.push_back(SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN); + if (do_cusparse()) { + algs.push_back(SPTRSVAlgorithm::SPTRSV_CUSPARSE); + } + } + + auto row_map = triMtx.graph.row_map; + auto entries = triMtx.graph.entries; + auto values = triMtx.values; + const size_type nrows = row_map.size() - 1; + + for (auto alg : algs) { + KernelHandle kh; + kh.create_sptrsv_handle(alg, nrows, is_lower, block_size); + if (alg == SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN) { + auto chain_threshold = 1; + kh.get_sptrsv_handle()->reset_chain_threshold(chain_threshold); + } + + sptrsv_symbolic(&kh, row_map, entries, values); + Kokkos::fence(); + + sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); + Kokkos::fence(); + + scalar_t sum = 0.0; + Kokkos::parallel_reduce(range_policy_t(0, lhs.extent(0)), ReductionCheck(lhs), sum); + EXPECT_EQ(sum, lhs.extent(0)); + + Kokkos::deep_copy(lhs, scalar_t(0)); + + kh.destroy_sptrsv_handle(); + } + } + + static void run_test_sptrsv() { const size_type nrows = 5; - const size_type nnz = 10; #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) - using host_crsmat_t = - typename KernelHandle::SPTRSVHandleType::host_crsmat_t; - using host_graph_t = typename host_crsmat_t::StaticCrsGraphType; + using host_crsmat_t = typename KernelHandle::SPTRSVHandleType::host_crsmat_t; + using host_graph_t = typename host_crsmat_t::StaticCrsGraphType; using row_map_view_t = typename host_graph_t::row_map_type::non_const_type; using cols_view_t = typename host_graph_t::entries_type::non_const_type; @@ -142,120 +227,11 @@ struct SptrsvTest { // Upper tri { - RowMapType row_map; - EntriesType entries; - ValuesType values; - - auto fixture = get_5x5_ut_ones_fixture(); - - compress_matrix(row_map, entries, values, fixture); - - // Create known_lhs, generate rhs, then solve for lhs to compare to - // known_lhs - ValuesType known_lhs("known_lhs", nrows); - // Create known solution lhs set to all 1's - Kokkos::deep_copy(known_lhs, ONE); - - // Solution to find - ValuesType lhs("lhs", nrows); - - // A*known_lhs generates rhs: rhs is dense, use spmv - ValuesType rhs("rhs", nrows); - - Crs triMtx("triMtx", nrows, nrows, nnz, values, row_map, entries); - KokkosSparse::spmv("N", ONE, triMtx, known_lhs, ZERO, rhs); - - { - KernelHandle kh; - bool is_lower_tri = false; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, - is_lower_tri); - - sptrsv_symbolic(&kh, row_map, entries); - Kokkos::fence(); - - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - scalar_t sum = 0.0; - Kokkos::parallel_reduce(range_policy_t(0, lhs.extent(0)), - ReductionCheck(lhs), sum); - EXPECT_EQ(sum, lhs.extent(0)); - - Kokkos::deep_copy(lhs, ZERO); - kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHD_RP); - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - sum = 0.0; - Kokkos::parallel_reduce(range_policy_t(0, lhs.extent(0)), - ReductionCheck(lhs), sum); - EXPECT_EQ(sum, lhs.extent(0)); - - // FIXME Issues with various integral type combos - algorithm currently - // unavailable and commented out until fixed - /* - Kokkos::deep_copy(lhs, ZERO); - kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHED_TP2); - sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); - Kokkos::fence(); - - sum = 0.0; - Kokkos::parallel_reduce(range_policy_t(0, lhs.extent(0)), - ReductionCheck(lhs), sum); - EXPECT_EQ(sum, lhs.extent(0) ); - */ - - kh.destroy_sptrsv_handle(); - } - { - Kokkos::deep_copy(lhs, ZERO); - KernelHandle kh; - bool is_lower_tri = false; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN, nrows, - is_lower_tri); - auto chain_threshold = 1; - kh.get_sptrsv_handle()->reset_chain_threshold(chain_threshold); - - sptrsv_symbolic(&kh, row_map, entries); - Kokkos::fence(); - - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - scalar_t sum = 0.0; - Kokkos::parallel_reduce(range_policy_t(0, lhs.extent(0)), - ReductionCheck(lhs), sum); - EXPECT_EQ(sum, lhs.extent(0)); - - kh.destroy_sptrsv_handle(); - } - -#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE - if (std::is_same::value && - std::is_same::value && - std::is_same::value) { - Kokkos::deep_copy(lhs, ZERO); - KernelHandle kh; - bool is_lower_tri = false; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SPTRSV_CUSPARSE, nrows, - is_lower_tri); - - sptrsv_symbolic(&kh, row_map, entries, values); - Kokkos::fence(); - - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); + const auto [triMtx, lhs, rhs] = create_crs_lhs_rhs(get_5x5_ut_ones_fixture()); - scalar_t sum = 0.0; - Kokkos::parallel_reduce(range_policy_t(0, lhs.extent(0)), - ReductionCheck(lhs), sum); - EXPECT_EQ(sum, lhs.extent(0)); - - kh.destroy_sptrsv_handle(); + basic_check(triMtx, lhs, rhs, false); } -#endif #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) const scalar_t FIVE = scalar_t(5); @@ -281,8 +257,7 @@ struct SptrsvTest { // create handle for Supernodal Sptrsv bool is_lower_tri = false; - khU.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, - is_lower_tri); + khU.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, is_lower_tri); // X = U*ONES to generate B = A*ONES (on device) { @@ -295,8 +270,8 @@ struct SptrsvTest { Kokkos::deep_copy(Uvalues, hUvalues); Crs mtxU("mtxU", nrows, nrows, nnz_sp, Uvalues, Urowptr, Ucolind); - Kokkos::deep_copy(B, ONE); - KokkosSparse::spmv("N", ONE, mtxU, B, ZERO, X); + Kokkos::deep_copy(B, scalar_t(1)); + KokkosSparse::spmv("N", scalar_t(1), mtxU, B, scalar_t(0), X); } } @@ -340,23 +315,23 @@ struct SptrsvTest { // values // first column (first supernode) hUvalues(0) = FIVE; - hUvalues(1) = ZERO; + hUvalues(1) = scalar_t(0); // second column (first supernode) - hUvalues(2) = ONE; + hUvalues(2) = scalar_t(1); hUvalues(3) = FIVE; // third column (second supernode) hUvalues(4) = FIVE; - hUvalues(5) = ONE; - hUvalues(6) = ZERO; + hUvalues(5) = scalar_t(1); + hUvalues(6) = scalar_t(0); // fourth column (third supernode) hUvalues(7) = FIVE; - hUvalues(8) = ONE; + hUvalues(8) = scalar_t(1); // fifth column (fourth supernode) hUvalues(9) = FIVE; - hUvalues(10) = ZERO; - hUvalues(11) = ONE; - hUvalues(12) = ONE; - hUvalues(13) = ONE; + hUvalues(10) = scalar_t(0); + hUvalues(11) = scalar_t(1); + hUvalues(12) = scalar_t(1); + hUvalues(13) = scalar_t(1); // store Ut in crsmat host_graph_t static_graph(hUrowind, hUcolptr); @@ -367,119 +342,11 @@ struct SptrsvTest { // Lower tri { - auto fixture = get_5x5_lt_ones_fixture(); - RowMapType row_map; - EntriesType entries; - ValuesType values; - - compress_matrix(row_map, entries, values, fixture); - - // Create known_lhs, generate rhs, then solve for lhs to compare to - // known_lhs - ValuesType known_lhs("known_lhs", nrows); - // Create known solution lhs set to all 1's - Kokkos::deep_copy(known_lhs, ONE); - - // Solution to find - ValuesType lhs("lhs", nrows); - - // A*known_lhs generates rhs: rhs is dense, use spmv - ValuesType rhs("rhs", nrows); - - Crs triMtx("triMtx", nrows, nrows, nnz, values, row_map, entries); - KokkosSparse::spmv("N", ONE, triMtx, known_lhs, ZERO, rhs); - - { - KernelHandle kh; - bool is_lower_tri = true; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, - is_lower_tri); - - sptrsv_symbolic(&kh, row_map, entries); - Kokkos::fence(); - - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - scalar_t sum = 0.0; - Kokkos::parallel_reduce(range_policy_t(0, lhs.extent(0)), - ReductionCheck(lhs), sum); - EXPECT_EQ(sum, lhs.extent(0)); - - Kokkos::deep_copy(lhs, ZERO); - kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHD_RP); - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - sum = 0.0; - Kokkos::parallel_reduce(range_policy_t(0, lhs.extent(0)), - ReductionCheck(lhs), sum); - EXPECT_EQ(sum, lhs.extent(0)); - - // FIXME Issues with various integral type combos - algorithm currently - // unavailable and commented out until fixed - /* - Kokkos::deep_copy(lhs, ZERO); - kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHED_TP2); - sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); - Kokkos::fence(); - - sum = 0.0; - Kokkos::parallel_reduce( range_policy_t(0, lhs.extent(0)), - ReductionCheck(lhs), sum); - EXPECT_EQ( sum, lhs.extent(0) ); - */ - - kh.destroy_sptrsv_handle(); - } - { - Kokkos::deep_copy(lhs, ZERO); - KernelHandle kh; - bool is_lower_tri = true; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN, nrows, - is_lower_tri); - auto chain_threshold = 1; - kh.get_sptrsv_handle()->reset_chain_threshold(chain_threshold); - - sptrsv_symbolic(&kh, row_map, entries); - Kokkos::fence(); - - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - scalar_t sum = 0.0; - Kokkos::parallel_reduce(range_policy_t(0, lhs.extent(0)), - ReductionCheck(lhs), sum); - EXPECT_EQ(sum, lhs.extent(0)); - - kh.destroy_sptrsv_handle(); - } - -#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE - if (std::is_same::value && - std::is_same::value && - std::is_same::value) { - Kokkos::deep_copy(lhs, ZERO); - KernelHandle kh; - bool is_lower_tri = true; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SPTRSV_CUSPARSE, nrows, - is_lower_tri); - - sptrsv_symbolic(&kh, row_map, entries, values); - Kokkos::fence(); + const auto [triMtx, lhs, rhs] = create_crs_lhs_rhs(get_5x5_lt_ones_fixture()); - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - scalar_t sum = 0.0; - Kokkos::parallel_reduce(range_policy_t(0, lhs.extent(0)), - ReductionCheck(lhs), sum); - EXPECT_EQ(sum, lhs.extent(0)); - - kh.destroy_sptrsv_handle(); + basic_check(triMtx, lhs, rhs, true); } -#endif #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) { @@ -503,8 +370,7 @@ struct SptrsvTest { L = host_crsmat_t("CrsMatrixL", nrows, hLvalues, static_graph); bool is_lower_tri = true; - khL.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, - is_lower_tri); + khL.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, is_lower_tri); // generate B = A*ONES = L*(U*ONES), where X = U*ONES (on device) { @@ -517,7 +383,7 @@ struct SptrsvTest { Kokkos::deep_copy(Lvalues, hLvalues); Crs mtxL("mtxL", nrows, nrows, nnz_sp, Lvalues, Lcolptr, Lrowind); - KokkosSparse::spmv("T", ONE, mtxL, X, ZERO, B); + KokkosSparse::spmv("T", scalar_t(1), mtxL, X, scalar_t(0), B); } } @@ -525,8 +391,7 @@ struct SptrsvTest { // unit-test for supernode SpTrsv (default) // > set up supernodes (block size = one) size_type nsupers = 4; - Kokkos::View supercols("supercols", - 1 + nsupers); + Kokkos::View supercols("supercols", 1 + nsupers); supercols(0) = 0; supercols(1) = 2; // two columns supercols(2) = 3; // one column @@ -540,8 +405,7 @@ struct SptrsvTest { khU.set_sptrsv_invert_diagonal(invert_diag); // > symbolic (on host) - sptrsv_supernodal_symbolic(nsupers, supercols.data(), etree, L.graph, - &khL, U.graph, &khU); + sptrsv_supernodal_symbolic(nsupers, supercols.data(), etree, L.graph, &khL, U.graph, &khU); // > numeric (on host) sptrsv_compute(&khL, L); sptrsv_compute(&khU, U); @@ -550,15 +414,13 @@ struct SptrsvTest { // > solve ValuesType b("b", nrows); Kokkos::deep_copy(b, B); - Kokkos::deep_copy(X, ZERO); + Kokkos::deep_copy(X, scalar_t(0)); sptrsv_solve(&khL, &khU, X, b); Kokkos::fence(); // > check scalar_t sum = 0.0; - Kokkos::parallel_reduce(range_policy_t(0, X.extent(0)), - ReductionCheck(X), sum); - EXPECT_EQ(sum, lhs.extent(0)); + Kokkos::parallel_reduce(range_policy_t(0, X.extent(0)), ReductionCheck(X), sum); EXPECT_EQ(sum, X.extent(0)); khL.destroy_sptrsv_handle(); @@ -569,8 +431,7 @@ struct SptrsvTest { // unit-test for supernode SpTrsv (running TRMM on device for compute) // > set up supernodes size_type nsupers = 4; - Kokkos::View supercols("supercols", - 1 + nsupers); + Kokkos::View supercols("supercols", 1 + nsupers); supercols(0) = 0; supercols(1) = 2; // two columns supercols(2) = 3; // one column @@ -582,8 +443,7 @@ struct SptrsvTest { KernelHandle khLd; KernelHandle khUd; khLd.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, true); - khUd.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, - false); + khUd.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, false); // > invert diagonal blocks bool invert_diag = true; @@ -601,8 +461,7 @@ struct SptrsvTest { khUd.set_sptrsv_diag_supernode_sizes(1, 1); // > symbolic (on host) - sptrsv_supernodal_symbolic(nsupers, supercols.data(), etree, L.graph, - &khLd, Ut.graph, &khUd); + sptrsv_supernodal_symbolic(nsupers, supercols.data(), etree, L.graph, &khLd, Ut.graph, &khUd); // > numeric (on host) sptrsv_compute(&khLd, L); sptrsv_compute(&khUd, Ut); @@ -611,15 +470,13 @@ struct SptrsvTest { // > solve ValuesType b("b", nrows); Kokkos::deep_copy(b, B); - Kokkos::deep_copy(X, ZERO); + Kokkos::deep_copy(X, scalar_t(0)); sptrsv_solve(&khLd, &khUd, X, b); Kokkos::fence(); // > check scalar_t sum = 0.0; - Kokkos::parallel_reduce(range_policy_t(0, X.extent(0)), - ReductionCheck(X), sum); - EXPECT_EQ(sum, lhs.extent(0)); + Kokkos::parallel_reduce(range_policy_t(0, X.extent(0)), ReductionCheck(X), sum); EXPECT_EQ(sum, X.extent(0)); khLd.destroy_sptrsv_handle(); @@ -629,7 +486,22 @@ struct SptrsvTest { } } - static void run_test_sptrsv_streams(int test_algo, int nstreams) { + static void run_test_sptrsv_blocks_impl(const bool is_lower, const size_type block_size) { + auto fixture = is_lower ? get_6x6_lt_ones_fixture() : get_6x6_ut_ones_fixture(); + const auto [triMtx_crs, lhs, rhs] = create_crs_lhs_rhs(fixture); + + Bsr triMtx(triMtx_crs, block_size); + basic_check(triMtx, lhs, rhs, is_lower, block_size); + } + + static void run_test_sptrsv_blocks() { + for (size_type block_size : {1, 2, 3}) { + run_test_sptrsv_blocks_impl(true, block_size); + run_test_sptrsv_blocks_impl(false, block_size); + } + } + + static void run_test_sptrsv_streams(SPTRSVAlgorithm test_algo, int nstreams, const bool is_lower) { // Workaround for OpenMP: skip tests if concurrency < nstreams because of // not enough resource to partition bool run_streams_test = true; @@ -638,21 +510,16 @@ struct SptrsvTest { int exec_concurrency = execution_space().concurrency(); if (exec_concurrency < nstreams) { run_streams_test = false; - std::cout << " Skip stream test: concurrency = " << exec_concurrency - << std::endl; + std::cout << " Skip stream test: concurrency = " << exec_concurrency << std::endl; } } #endif if (!run_streams_test) return; - scalar_t ZERO = scalar_t(0); - scalar_t ONE = scalar_t(1); - const size_type nrows = 5; const size_type nnz = 10; - auto instances = Kokkos::Experimental::partition_space( - execution_space(), std::vector(nstreams, 1)); + auto instances = Kokkos::Experimental::partition_space(execution_space(), std::vector(nstreams, 1)); std::vector kh_v(nstreams); std::vector kh_ptr_v(nstreams); @@ -662,193 +529,93 @@ struct SptrsvTest { std::vector rhs_v(nstreams); std::vector lhs_v(nstreams); - RowMapType_hostmirror hrow_map; - EntriesType_hostmirror hentries; - ValuesType_hostmirror hvalues; - - // Upper tri - { - auto fixture = get_5x5_ut_ones_fixture(); - compress_matrix(hrow_map, hentries, hvalues, fixture); - - for (int i = 0; i < nstreams; i++) { - // Allocate U - row_map_v[i] = RowMapType("row_map", nrows + 1); - entries_v[i] = EntriesType("entries", nnz); - values_v[i] = ValuesType("values", nnz); - - // Copy from host to device - Kokkos::deep_copy(row_map_v[i], hrow_map); - Kokkos::deep_copy(entries_v[i], hentries); - Kokkos::deep_copy(values_v[i], hvalues); - - // Create known_lhs, generate rhs, then solve for lhs to compare to - // known_lhs - ValuesType known_lhs("known_lhs", nrows); - // Create known solution lhs set to all 1's - Kokkos::deep_copy(known_lhs, ONE); - - // Solution to find - lhs_v[i] = ValuesType("lhs", nrows); + auto fixture = is_lower ? get_5x5_lt_ones_fixture() : get_5x5_ut_ones_fixture(); + const auto [triMtx, lhs, rhs] = create_crs_lhs_rhs(fixture); - // A*known_lhs generates rhs: rhs is dense, use spmv - rhs_v[i] = ValuesType("rhs", nrows); + auto row_map = triMtx.graph.row_map; + auto entries = triMtx.graph.entries; + auto values = triMtx.values; - Crs triMtx("triMtx", nrows, nrows, nnz, values_v[i], row_map_v[i], - entries_v[i]); + for (int i = 0; i < nstreams; i++) { + // Allocate + row_map_v[i] = RowMapType("row_map", nrows + 1); + entries_v[i] = EntriesType("entries", nnz); + values_v[i] = ValuesType("values", nnz); - KokkosSparse::spmv("N", ONE, triMtx, known_lhs, ZERO, rhs_v[i]); - Kokkos::fence(); - - // Create handle - kh_v[i] = KernelHandle(); - bool is_lower_tri = false; - if (test_algo == 0) - kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_RP, nrows, - is_lower_tri); - else if (test_algo == 1) - kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, - is_lower_tri); - else - kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SPTRSV_CUSPARSE, nrows, - is_lower_tri); - - kh_ptr_v[i] = &kh_v[i]; - - // Symbolic phase - sptrsv_symbolic(kh_ptr_v[i], row_map_v[i], entries_v[i], values_v[i]); - Kokkos::fence(); - } // Done handle creation and sptrsv_symbolic on all streams - - // Solve phase - sptrsv_solve_streams(instances, kh_ptr_v, row_map_v, entries_v, values_v, - rhs_v, lhs_v); - - for (int i = 0; i < nstreams; i++) instances[i].fence(); - - // Checking - for (int i = 0; i < nstreams; i++) { - scalar_t sum = 0.0; - Kokkos::parallel_reduce(range_policy_t(0, lhs_v[i].extent(0)), - ReductionCheck(lhs_v[i]), sum); - EXPECT_EQ(sum, lhs_v[i].extent(0)); - - kh_v[i].destroy_sptrsv_handle(); - } - } + // Copy + Kokkos::deep_copy(row_map_v[i], row_map); + Kokkos::deep_copy(entries_v[i], entries); + Kokkos::deep_copy(values_v[i], values); - // Lower tri - { - auto fixture = get_5x5_lt_ones_fixture(); - compress_matrix(hrow_map, hentries, hvalues, fixture); - - for (int i = 0; i < nstreams; i++) { - // Allocate L - row_map_v[i] = RowMapType("row_map", nrows + 1); - entries_v[i] = EntriesType("entries", nnz); - values_v[i] = ValuesType("values", nnz); - - // Copy from host to device - Kokkos::deep_copy(row_map_v[i], hrow_map); - Kokkos::deep_copy(entries_v[i], hentries); - Kokkos::deep_copy(values_v[i], hvalues); - - // Create known_lhs, generate rhs, then solve for lhs to compare to - // known_lhs - ValuesType known_lhs("known_lhs", nrows); - // Create known solution lhs set to all 1's - Kokkos::deep_copy(known_lhs, ONE); - - // Solution to find - lhs_v[i] = ValuesType("lhs", nrows); + // Create known_lhs, generate rhs, then solve for lhs to compare to + // known_lhs + ValuesType known_lhs("known_lhs", nrows); + // Create known solution lhs set to all 1's + Kokkos::deep_copy(known_lhs, scalar_t(1)); - // A*known_lhs generates rhs: rhs is dense, use spmv - rhs_v[i] = ValuesType("rhs", nrows); + // Solution to find + lhs_v[i] = ValuesType("lhs", nrows); - Crs triMtx("triMtx", nrows, nrows, nnz, values_v[i], row_map_v[i], - entries_v[i]); + // A*known_lhs generates rhs: rhs is dense, use spmv + rhs_v[i] = ValuesType("rhs", nrows); - KokkosSparse::spmv("N", ONE, triMtx, known_lhs, ZERO, rhs_v[i]); - Kokkos::fence(); + KokkosSparse::spmv("N", scalar_t(1), triMtx, known_lhs, scalar_t(0), rhs_v[i]); + Kokkos::fence(); - // Create handle - kh_v[i] = KernelHandle(); - bool is_lower_tri = true; - if (test_algo == 0) - kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_RP, nrows, - is_lower_tri); - else if (test_algo == 1) - kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, - is_lower_tri); - else - kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SPTRSV_CUSPARSE, nrows, - is_lower_tri); - - kh_ptr_v[i] = &kh_v[i]; - - // Symbolic phase - sptrsv_symbolic(kh_ptr_v[i], row_map_v[i], entries_v[i], values_v[i]); - Kokkos::fence(); - } // Done handle creation and sptrsv_symbolic on all streams + // Create handle + kh_v[i] = KernelHandle(); + kh_v[i].create_sptrsv_handle(test_algo, nrows, is_lower); + kh_ptr_v[i] = &kh_v[i]; - // Solve phase - sptrsv_solve_streams(instances, kh_ptr_v, row_map_v, entries_v, values_v, - rhs_v, lhs_v); + // Symbolic phase + sptrsv_symbolic(kh_ptr_v[i], row_map_v[i], entries_v[i], values_v[i]); + Kokkos::fence(); + } // Done handle creation and sptrsv_symbolic on all streams - for (int i = 0; i < nstreams; i++) instances[i].fence(); + // Solve phase + sptrsv_solve_streams(instances, kh_ptr_v, row_map_v, entries_v, values_v, rhs_v, lhs_v); - // Checking - for (int i = 0; i < nstreams; i++) { - scalar_t sum = 0.0; - Kokkos::parallel_reduce(range_policy_t(0, lhs_v[i].extent(0)), - ReductionCheck(lhs_v[i]), sum); - EXPECT_EQ(sum, lhs_v[i].extent(0)); + for (int i = 0; i < nstreams; i++) instances[i].fence(); - kh_v[i].destroy_sptrsv_handle(); - } + // Checking + for (int i = 0; i < nstreams; i++) { + scalar_t sum = 0.0; + Kokkos::parallel_reduce(range_policy_t(0, lhs_v[i].extent(0)), ReductionCheck(lhs_v[i]), sum); + EXPECT_EQ(sum, lhs_v[i].extent(0)); + kh_v[i].destroy_sptrsv_handle(); } } }; } // namespace Test -template +template void test_sptrsv() { using TestStruct = Test::SptrsvTest; TestStruct::run_test_sptrsv(); + TestStruct::run_test_sptrsv_blocks(); } -template +template void test_sptrsv_streams() { - using TestStruct = Test::SptrsvTest; + using TestStruct = Test::SptrsvTest; + std::vector algs = {SPTRSVAlgorithm::SEQLVLSCHD_RP, SPTRSVAlgorithm::SEQLVLSCHD_TP1}; + if (TestStruct::do_cusparse()) { + algs.push_back(SPTRSVAlgorithm::SPTRSV_CUSPARSE); + } - TestStruct::run_test_sptrsv_streams(0, 1); - TestStruct::run_test_sptrsv_streams(0, 2); - TestStruct::run_test_sptrsv_streams(0, 3); - TestStruct::run_test_sptrsv_streams(0, 4); - TestStruct::run_test_sptrsv_streams(1, 1); - TestStruct::run_test_sptrsv_streams(1, 2); - TestStruct::run_test_sptrsv_streams(1, 3); - TestStruct::run_test_sptrsv_streams(1, 4); - -#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) - if (std::is_same::value && - std::is_same::value) { - TestStruct::run_test_sptrsv_streams(2, 1); - TestStruct::run_test_sptrsv_streams(2, 2); - TestStruct::run_test_sptrsv_streams(2, 3); - TestStruct::run_test_sptrsv_streams(2, 4); + for (auto alg : algs) { + for (int nstreams = 1; nstreams <= 4; ++nstreams) { + TestStruct::run_test_sptrsv_streams(alg, nstreams, true); + TestStruct::run_test_sptrsv_streams(alg, nstreams, false); + } } -#endif } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - sparse##_##sptrsv##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_sptrsv(); \ - test_sptrsv_streams(); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, sparse##_##sptrsv##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_sptrsv(); \ + test_sptrsv_streams(); \ } #include diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_trsv.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_trsv.hpp index 8fb4763d7121..bffdfef757ff 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_trsv.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_trsv.hpp @@ -15,8 +15,7 @@ //@HEADER #if !defined(TEST_HIP_SPARSE_CPP) && !defined(TEST_SYCL_SPARSE_CPP) && \ !defined(TEST_OPENMPTARGET_BATCHED_DENSE_CPP) && \ - (!defined(TEST_CUDA_SPARSE_CPP) || \ - (defined(TEST_CUDA_SPARSE_CPP) && defined(KOKKOS_ENABLE_CUDA_UVM))) + (!defined(TEST_CUDA_SPARSE_CPP) || (defined(TEST_CUDA_SPARSE_CPP) && defined(KOKKOS_ENABLE_CUDA_UVM))) #include #include @@ -35,25 +34,20 @@ typedef Kokkos::complex kokkos_complex_float; namespace Test { -template < - typename Crs, typename LUType, typename size_type, - typename std::enable_if::value>::type* = nullptr> -LUType get_LU(char l_or_u, int n, size_type& nnz, int row_size_variance, - int bandwidth, int) { - auto LU = KokkosSparse::Impl::kk_generate_triangular_sparse_matrix( - l_or_u, n, n, nnz, row_size_variance, bandwidth); +template ::value>::type* = nullptr> +LUType get_LU(char l_or_u, int n, size_type& nnz, int row_size_variance, int bandwidth, int) { + auto LU = + KokkosSparse::Impl::kk_generate_triangular_sparse_matrix(l_or_u, n, n, nnz, row_size_variance, bandwidth); return LU; } -template < - typename Crs, typename LUType, typename size_type, - typename std::enable_if::value>::type* = nullptr> -LUType get_LU(char l_or_u, int n, size_type& nnz, int row_size_variance, - int bandwidth, int block_size) { +template ::value>::type* = nullptr> +LUType get_LU(char l_or_u, int n, size_type& nnz, int row_size_variance, int bandwidth, int block_size) { auto LU_unblocked = - KokkosSparse::Impl::kk_generate_triangular_sparse_matrix( - l_or_u, n, n, nnz, row_size_variance, bandwidth); + KokkosSparse::Impl::kk_generate_triangular_sparse_matrix(l_or_u, n, n, nnz, row_size_variance, bandwidth); // Convert to BSR LUType LU(LU_unblocked, block_size); @@ -61,8 +55,7 @@ LUType get_LU(char l_or_u, int n, size_type& nnz, int row_size_variance, return LU; } -template +template struct TrsvTest { using View2D = Kokkos::View; using execution_space = typename device::execution_space; @@ -73,15 +66,13 @@ struct TrsvTest { // TODO: remove this once MD develop branch is merge. // The below functionolity exists in SparseUtils. template - static void check_trsv_mv(sp_matrix_type input_mat, View2D x, View2D b, - View2D expected_x, int numMV, const char uplo[], - const char trans[]) { - double eps = (std::is_same::value - ? 2 * 1e-2 - : (std::is_same>::value || - std::is_same>::value) - ? 2 * 1e-1 - : 1e-7); + static void check_trsv_mv(sp_matrix_type input_mat, View2D x, View2D b, View2D expected_x, int numMV, + const char uplo[], const char trans[]) { + double eps = + (std::is_same::value ? 2 * 1e-2 + : (std::is_same>::value || std::is_same>::value) + ? 2 * 1e-1 + : 1e-7); Kokkos::fence(); KokkosSparse::trsv(uplo, trans, "N", input_mat, b, x); @@ -96,8 +87,7 @@ struct TrsvTest { } template - static void test_trsv_mv(lno_t numRows, size_type nnz, lno_t bandwidth, - lno_t row_size_variance, int numMV) { + static void test_trsv_mv(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t row_size_variance, int numMV) { using sp_matrix_type = std::conditional_t; constexpr auto block_size = UseBlocks ? 10 : 1; @@ -115,12 +105,10 @@ struct TrsvTest { scalar_t beta = 0; // this function creates a dense lower and upper triangular matrix. - auto lower_part = get_LU( - 'L', numRows, nnz, row_size_variance, bandwidth, block_size); + auto lower_part = + get_LU('L', numRows, nnz, row_size_variance, bandwidth, block_size); - Test::shuffleMatrixEntries(lower_part.graph.row_map, - lower_part.graph.entries, lower_part.values, - block_size); + Test::shuffleMatrixEntries(lower_part.graph.row_map, lower_part.graph.entries, lower_part.values, block_size); KokkosSparse::spmv("N", alpha, lower_part, b_x_copy, beta, b_y); check_trsv_mv(lower_part, b_x, b_y, b_x_copy, numMV, "L", "N"); @@ -130,12 +118,10 @@ struct TrsvTest { check_trsv_mv(lower_part, b_x, b_y, b_x_copy, numMV, "L", "T"); } - auto upper_part = get_LU( - 'U', numRows, nnz, row_size_variance, bandwidth, block_size); + auto upper_part = + get_LU('U', numRows, nnz, row_size_variance, bandwidth, block_size); - Test::shuffleMatrixEntries(upper_part.graph.row_map, - upper_part.graph.entries, upper_part.values, - block_size); + Test::shuffleMatrixEntries(upper_part.graph.row_map, upper_part.graph.entries, upper_part.values, block_size); KokkosSparse::spmv("N", alpha, upper_part, b_x_copy, beta, b_y); check_trsv_mv(upper_part, b_x, b_y, b_x_copy, numMV, "U", "N"); @@ -149,8 +135,7 @@ struct TrsvTest { } // namespace Test -template +template void test_trsv_mv() { using TestStruct = Test::TrsvTest; TestStruct::template test_trsv_mv(1000, 1000 * 30, 200, 10, 1); @@ -164,16 +149,13 @@ void test_trsv_mv() { // Note BMK 7-22: the matrix generator used by this test always // generates a dense triangle. It ignores bandwidth, nnz and row size variance. -#define EXECUTE_TEST_MV(SCALAR, ORDINAL, OFFSET, LAYOUT, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##trsv_mv##_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \ - test_trsv_mv(); \ +#define EXECUTE_TEST_MV(SCALAR, ORDINAL, OFFSET, LAYOUT, DEVICE) \ + TEST_F(TestCategory, sparse##_##trsv_mv##_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \ + test_trsv_mv(); \ } #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ EXECUTE_TEST_MV(SCALAR, ORDINAL, OFFSET, LayoutLeft, TestDevice) @@ -185,8 +167,7 @@ void test_trsv_mv() { #endif // KOKKOSKERNELS_INST_LAYOUTLEFT #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) #define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ EXECUTE_TEST_MV(SCALAR, ORDINAL, OFFSET, LayoutRight, TestDevice) diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_vector_fixtures.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_vector_fixtures.hpp index 2037a5485e9f..514290d1694a 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_vector_fixtures.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_vector_fixtures.hpp @@ -33,10 +33,8 @@ scalar_t KEEP_ZERO() { } template -void compress_matrix( - MapT& map, EntriesT& entries, ValuesT& values, - const std::vector>& - fixture) { +void compress_matrix(MapT& map, EntriesT& entries, ValuesT& values, + const std::vector>& fixture) { using size_type = typename MapT::non_const_value_type; using scalar_t = typename ValuesT::non_const_value_type; @@ -90,11 +88,10 @@ void compress_matrix( Kokkos::deep_copy(values, hvalues); } -template -std::vector> -decompress_matrix(const RowMapT& row_map, const EntriesT& entries, - const ValuesT& values) { +template +std::vector> decompress_matrix(const RowMapT& row_map, + const EntriesT& entries, + const ValuesT& values) { using size_type = typename RowMapT::non_const_value_type; using scalar_t = typename ValuesT::non_const_value_type; @@ -132,10 +129,9 @@ decompress_matrix(const RowMapT& row_map, const EntriesT& entries, } template -std::vector> -decompress_matrix(const RowMapT& row_map, const EntriesT& entries, - const ValuesT& values, - typename RowMapT::const_value_type block_size) { +std::vector> decompress_matrix( + const RowMapT& row_map, const EntriesT& entries, const ValuesT& values, + typename RowMapT::const_value_type block_size) { using size_type = typename RowMapT::non_const_value_type; using scalar_t = typename ValuesT::non_const_value_type; @@ -165,9 +161,8 @@ decompress_matrix(const RowMapT& row_map, const EntriesT& entries, for (size_type i = 0; i < block_size; ++i) { const size_type unc_row_idx = row_idx * block_size + i; for (size_type j = 0; j < block_size; ++j) { - const size_type unc_col_idx = col_idx * block_size + j; - result[unc_row_idx][unc_col_idx] = - hvalues(row_nnz * block_items + i * block_size + j); + const size_type unc_col_idx = col_idx * block_size + j; + result[unc_row_idx][unc_col_idx] = hvalues(row_nnz * block_items + i * block_size + j); } } } @@ -177,11 +172,8 @@ decompress_matrix(const RowMapT& row_map, const EntriesT& entries, } template -void check_matrix( - const std::string& name, const RowMapT& row_map, const EntriesT& entries, - const ValuesT& values, - const std::vector>& - expected) { +void check_matrix(const std::string& name, const RowMapT& row_map, const EntriesT& entries, const ValuesT& values, + const std::vector>& expected) { using size_type = typename RowMapT::non_const_value_type; const auto decompressed_mtx = decompress_matrix(row_map, entries, values); @@ -189,10 +181,8 @@ void check_matrix( const size_type nrows = row_map.size() - 1; for (size_type row_idx = 0; row_idx < nrows; ++row_idx) { for (size_type col_idx = 0; col_idx < nrows; ++col_idx) { - EXPECT_NEAR(expected[row_idx][col_idx], - decompressed_mtx[row_idx][col_idx], 0.01) - << "Failed check is: " << name << "[" << row_idx << "][" << col_idx - << "]"; + EXPECT_NEAR(expected[row_idx][col_idx], decompressed_mtx[row_idx][col_idx], 0.01) + << "Failed check is: " << name << "[" << row_idx << "][" << col_idx << "]"; } } } diff --git a/packages/kokkos-kernels/sparse/unit_test/matrixIssue402.hpp b/packages/kokkos-kernels/sparse/unit_test/matrixIssue402.hpp index 124e59108c32..1035ec682440 100644 --- a/packages/kokkos-kernels/sparse/unit_test/matrixIssue402.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/matrixIssue402.hpp @@ -17,2685 +17,1809 @@ #define MATRIX_ISSUE_402 namespace MatrixIssue402 { static double values[11156] = { - 0.000018, -0.000001, 0.000013, -0.000013, -0.000017, 0.000048, -0.000046, - 0.000034, -0.000034, 0.000018, -0.000001, -0.000013, 0.000013, -0.000017, - 0.000028, -0.000021, -0.000005, 0.000028, -0.000020, 0.000020, -0.000001, - 0.000048, -0.000046, 0.000034, -0.000034, 0.000078, -0.000004, 0.000055, - -0.000074, -0.000055, 0.000048, -0.000046, 0.000034, -0.000034, 0.000270, - -0.000015, -0.000074, 0.000055, -0.000099, -0.000099, 0.000034, -0.000002, - -0.000032, 0.000024, -0.000024, 0.000028, -0.000020, -0.000001, 0.000020, - 0.000044, -0.000017, -0.000078, 0.000042, 0.000029, -0.000029, 0.000018, - -0.000001, -0.000017, 0.000013, -0.000013, 0.000028, -0.000021, -0.000005, - 0.000018, -0.000001, 0.000013, -0.000013, -0.000017, 0.000018, -0.000001, - -0.000017, -0.000013, 0.000013, 0.000018, -0.000001, -0.000017, 0.000013, - -0.000013, 0.000018, -0.000001, -0.000017, -0.000013, 0.000013, 0.000018, - -0.000001, 0.000013, -0.000013, -0.000017, 0.000028, -0.000021, -0.000005, - -0.000017, 0.000134, -0.000007, -0.000127, -0.000047, 0.000047, 0.000060, - -0.000047, 0.000028, -0.000021, -0.000005, -0.000074, 0.000270, -0.000015, - 0.000055, -0.000099, -0.000099, 0.000124, -0.000059, -0.000059, -0.000044, - 0.000044, -0.000003, -0.000044, 0.000041, 0.000018, -0.000001, -0.000013, - -0.000017, 0.000013, 0.000034, -0.000002, -0.000024, 0.000024, -0.000032, - 0.000028, 0.000020, -0.000020, -0.000001, 0.000028, -0.000020, 0.000020, - -0.000001, 0.000028, -0.000021, -0.000005, 0.000034, -0.000002, -0.000032, - 0.000024, -0.000024, 0.000018, -0.000001, -0.000013, -0.000017, 0.000013, - 0.000018, -0.000001, -0.000017, -0.000013, 0.000013, 0.000048, -0.000046, - 0.000034, -0.000034, 0.000098, -0.000017, -0.000175, 0.000078, 0.000065, - -0.000065, 0.000028, -0.000020, -0.000001, 0.000020, 0.000018, -0.000001, - -0.000017, -0.000013, 0.000013, 0.000044, -0.000017, 0.000029, 0.000042, - -0.000078, -0.000029, 0.000028, -0.000001, 0.000020, -0.000020, 0.000124, - -0.000059, -0.000059, -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, - 0.000065, -0.000021, -0.000033, 0.000028, -0.000020, 0.000020, -0.000001, - 0.000065, -0.000021, -0.000033, 0.000065, -0.000021, -0.000033, 0.000078, - -0.000004, 0.000055, -0.000074, -0.000055, 0.000024, 0.000017, -0.000023, - -0.000017, 0.000048, -0.000002, 0.000034, -0.000034, -0.000046, 0.000044, - -0.000017, 0.000029, -0.000078, 0.000042, -0.000029, 0.000098, -0.000175, - -0.000017, 0.000078, 0.000065, -0.000065, 0.000024, 0.000017, -0.000023, - -0.000017, 0.000124, -0.000059, -0.000059, -0.000003, -0.000044, 0.000044, - -0.000044, 0.000041, 0.000028, -0.000021, -0.000005, 0.000018, -0.000001, - -0.000013, -0.000017, 0.000013, 0.000048, -0.000046, 0.000034, -0.000034, - 0.000298, -0.000027, -0.000266, 0.000120, 0.000099, -0.000209, -0.000017, - 0.000185, -0.000175, -0.000009, 0.000078, 0.000065, -0.000065, -0.000065, - 0.000024, -0.000023, 0.000017, -0.000017, 0.000024, -0.000017, -0.000023, - 0.000017, 0.000028, -0.000020, -0.000001, 0.000020, 0.000018, -0.000001, - 0.000013, -0.000013, -0.000017, -0.000046, 0.000283, -0.000266, -0.000015, - 0.000099, 0.000134, -0.000099, -0.000099, -0.000017, -0.000009, 0.000185, - -0.000065, 0.000078, -0.000175, 0.000065, -0.000065, 0.000298, -0.000027, - -0.000266, 0.000120, 0.000099, -0.000209, 0.000028, -0.000021, -0.000005, - 0.000028, 0.000020, -0.000020, -0.000001, 0.000124, -0.000003, -0.000003, - 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, -0.000003, - 0.000044, -0.000044, -0.000059, -0.000059, 0.000048, -0.000046, 0.000034, - -0.000034, 0.000024, -0.000017, -0.000023, 0.000017, 0.000028, -0.000020, - 0.000020, -0.000001, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, - -0.000059, -0.000059, 0.000028, -0.000021, -0.000005, 0.000048, -0.000046, - 0.000034, -0.000034, 0.000028, -0.000021, -0.000005, 0.000018, -0.000001, - 0.000013, -0.000017, -0.000013, -0.000001, 0.000018, -0.000013, 0.000013, - -0.000017, 0.000028, -0.000001, 0.000020, -0.000020, -0.000027, -0.000027, - 0.000298, -0.000266, 0.000099, 0.000120, -0.000209, 0.000028, -0.000020, - -0.000001, 0.000020, 0.000018, -0.000001, -0.000013, -0.000017, 0.000013, - 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, - 0.000028, -0.000020, 0.000020, -0.000001, 0.000065, -0.000021, -0.000033, - 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, - 0.000124, -0.000059, -0.000059, -0.000044, 0.000044, -0.000003, -0.000044, - 0.000041, 0.000124, -0.000059, -0.000059, -0.000044, 0.000044, -0.000003, - -0.000044, 0.000041, 0.000024, 0.000017, -0.000023, -0.000017, 0.000028, - -0.000001, -0.000027, -0.000020, 0.000020, 0.000048, -0.000046, 0.000034, - -0.000034, 0.000048, -0.000046, 0.000034, -0.000034, 0.000078, -0.000004, - 0.000055, -0.000055, -0.000074, -0.000074, 0.000270, -0.000015, 0.000055, - -0.000099, -0.000099, 0.000034, -0.000002, -0.000024, 0.000024, -0.000032, - -0.000032, 0.000283, -0.000266, -0.000015, -0.000099, 0.000123, 0.000099, - -0.000099, 0.000028, 0.000020, -0.000020, -0.000001, 0.000298, -0.000266, - -0.000027, 0.000099, 0.000120, -0.000209, 0.000024, -0.000017, -0.000023, - 0.000017, 0.000065, -0.000021, -0.000033, -0.000074, 0.000270, -0.000015, - -0.000099, 0.000055, -0.000099, 0.000124, -0.000003, -0.000003, -0.000059, - -0.000059, 0.000044, -0.000044, 0.000028, 0.000020, -0.000020, -0.000001, - 0.000124, -0.000059, -0.000059, -0.000044, 0.000044, -0.000003, -0.000044, - 0.000041, 0.000048, -0.000046, -0.000034, 0.000034, 0.000048, -0.000046, - 0.000034, -0.000034, 0.000098, -0.000175, -0.000017, 0.000065, 0.000078, - -0.000065, 0.000024, 0.000017, -0.000023, -0.000017, 0.000124, -0.000003, - -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000065, -0.000021, - -0.000033, 0.000034, -0.000002, -0.000024, 0.000024, -0.000032, 0.000048, - -0.000046, 0.000034, -0.000034, 0.000048, -0.000046, 0.000034, -0.000034, - 0.000065, -0.000021, -0.000033, 0.000124, -0.000003, -0.000003, -0.000059, - -0.000059, 0.000044, -0.000044, 0.000124, -0.000059, -0.000059, -0.000044, - -0.000003, 0.000044, -0.000044, 0.000041, 0.000124, -0.000059, -0.000059, - -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, 0.000138, -0.000095, - -0.000099, 0.000071, 0.000048, -0.000046, -0.000034, 0.000034, 0.000124, - -0.000003, -0.000003, -0.000059, -0.000059, 0.000044, -0.000044, 0.000028, - -0.000021, -0.000005, 0.000028, -0.000021, -0.000005, 0.000028, -0.000021, - -0.000005, 0.000124, -0.000003, -0.000003, -0.000059, -0.000059, 0.000044, - -0.000044, 0.000048, -0.000046, 0.000034, -0.000034, 0.000124, -0.000003, - -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, - -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000138, -0.000095, - 0.000071, -0.000099, 0.000028, -0.000021, -0.000005, 0.000124, -0.000003, - -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000065, -0.000021, - -0.000033, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, - -0.000059, 0.000028, -0.000021, -0.000005, 0.000071, -0.000127, -0.000017, - 0.000047, 0.000060, -0.000047, 0.000028, 0.000020, -0.000020, -0.000001, - 0.000048, -0.000046, 0.000034, -0.000034, -0.000266, 0.000283, -0.000015, - -0.000046, -0.000099, -0.000099, 0.000134, 0.000099, -0.000032, -0.000015, - -0.000266, 0.000283, -0.000099, 0.000123, 0.000099, -0.000099, 0.000241, - -0.000013, 0.000161, -0.000161, -0.000432, 0.000124, -0.000003, -0.000003, - 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, -0.000003, - 0.000044, -0.000044, -0.000059, -0.000059, 0.000048, -0.000046, 0.000034, - -0.000034, -0.000059, -0.000059, 0.000124, -0.000044, 0.000044, -0.000003, - -0.000044, 0.000041, -0.000059, -0.000059, 0.000124, -0.000044, 0.000044, - -0.000003, -0.000044, 0.000041, 0.000028, -0.000020, 0.000020, -0.000001, - 0.000138, -0.000095, -0.000099, 0.000071, 0.000028, -0.000021, -0.000005, - 0.000124, -0.000059, -0.000059, -0.000003, 0.000044, -0.000044, -0.000003, - 0.000124, -0.000003, -0.000003, -0.000059, 0.000044, -0.000044, -0.000059, - -0.000044, 0.000044, -0.000059, 0.000124, -0.000059, -0.000044, -0.000003, - 0.000044, -0.000044, 0.000041, -0.000059, 0.000124, -0.000059, -0.000044, - -0.000003, 0.000044, -0.000044, 0.000041, 0.000065, -0.000021, -0.000033, - 0.000138, -0.000095, -0.000099, 0.000071, 0.000048, -0.000046, 0.000034, - -0.000034, 0.000028, -0.000001, 0.000020, -0.000027, -0.000020, -0.000059, - -0.000059, 0.000124, -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, - 0.000138, -0.000095, -0.000099, 0.000071, -0.000001, 0.000018, 0.000013, - -0.000013, -0.000017, 0.000048, -0.000046, 0.000034, -0.000034, 0.000048, - -0.000046, 0.000034, -0.000034, 0.000044, -0.000017, -0.000078, 0.000029, - 0.000042, -0.000029, -0.000059, -0.000059, 0.000124, -0.000044, -0.000003, - 0.000044, -0.000044, 0.000041, 0.000071, -0.000127, -0.000017, 0.000060, - 0.000047, -0.000047, -0.000017, 0.000134, -0.000007, -0.000127, 0.000047, - 0.000060, -0.000047, -0.000047, 0.000124, -0.000003, -0.000003, 0.000044, - -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, -0.000003, 0.000044, - -0.000044, -0.000059, -0.000059, 0.000138, -0.000099, 0.000071, -0.000095, - -0.000059, 0.000124, -0.000059, -0.000044, -0.000003, 0.000044, -0.000044, - 0.000041, -0.000002, 0.000048, -0.000046, -0.000034, 0.000034, 0.000018, - -0.000001, 0.000013, -0.000013, -0.000017, -0.000007, -0.000017, 0.000134, - 0.000047, 0.000060, -0.000047, -0.000047, -0.000127, -0.000001, 0.000018, - -0.000013, 0.000013, -0.000017, -0.000032, 0.000283, -0.000015, -0.000266, - 0.000099, 0.000123, -0.000099, -0.000099, -0.000027, -0.000266, 0.000298, - -0.000209, 0.000120, 0.000099, 0.000018, -0.000001, -0.000017, -0.000013, - 0.000013, 0.000138, -0.000095, -0.000099, 0.000071, 0.000138, -0.000095, - -0.000099, 0.000071, -0.000015, 0.000283, -0.000046, -0.000266, 0.000134, - 0.000099, -0.000099, -0.000099, 0.000028, 0.000020, -0.000020, -0.000001, - 0.000028, -0.000021, -0.000005, 0.000065, -0.000021, 0.000016, -0.000048, - -0.000266, -0.000015, 0.000283, -0.000046, -0.000099, -0.000099, 0.000134, - 0.000099, -0.000266, 0.000283, -0.000015, -0.000046, 0.000134, 0.000099, - -0.000099, -0.000099, -0.000032, -0.000015, -0.000266, 0.000283, -0.000099, - -0.000099, 0.000099, 0.000123, 0.000185, -0.000009, -0.000017, 0.000065, - -0.000175, 0.000078, -0.000065, -0.000065, -0.000002, 0.000048, -0.000034, - -0.000046, 0.000034, -0.000001, 0.000018, 0.000013, -0.000013, -0.000017, - 0.000018, -0.000001, -0.000017, -0.000013, 0.000013, -0.000009, -0.000175, - -0.000017, 0.000185, -0.000065, -0.000065, 0.000078, 0.000065, -0.000001, - 0.000018, 0.000013, -0.000013, -0.000017, 0.000028, -0.000021, -0.000005, - 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, - 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, - -0.000059, 0.000124, -0.000059, -0.000044, -0.000003, 0.000044, -0.000044, - 0.000041, -0.000059, 0.000124, -0.000059, -0.000044, -0.000003, 0.000044, - -0.000044, 0.000041, -0.000002, 0.000048, -0.000034, -0.000046, 0.000034, - 0.000138, 0.000071, -0.000095, -0.000099, -0.000001, 0.000018, 0.000013, - -0.000013, -0.000017, 0.000018, -0.000001, 0.000013, -0.000013, -0.000017, - 0.000018, -0.000001, -0.000017, -0.000013, 0.000013, 0.000065, -0.000021, - -0.000033, -0.000001, 0.000028, -0.000020, -0.000027, 0.000020, 0.000138, - -0.000099, 0.000071, -0.000095, 0.000141, -0.000007, 0.000099, -0.000099, - -0.000133, 0.000141, -0.000007, -0.000133, -0.000099, 0.000099, 0.000065, - -0.000021, -0.000033, 0.000018, -0.000001, -0.000017, -0.000013, 0.000013, - -0.000127, -0.000017, 0.000071, 0.000060, 0.000047, -0.000047, 0.000018, - -0.000001, 0.000013, -0.000013, -0.000017, 0.000065, -0.000021, -0.000033, - 0.000060, -0.000003, 0.000034, -0.000041, 0.000042, 0.000138, -0.000095, - -0.000099, 0.000071, 0.000185, -0.000009, -0.000017, -0.000175, 0.000065, - 0.000078, -0.000065, -0.000065, 0.000018, -0.000001, -0.000013, 0.000013, - -0.000017, -0.000009, -0.000175, -0.000017, 0.000185, -0.000065, -0.000065, - 0.000078, 0.000065, 0.000141, -0.000007, 0.000099, -0.000133, -0.000099, - 0.000141, -0.000007, -0.000133, -0.000099, 0.000099, -0.000133, 0.000297, - 0.000099, 0.000099, -0.000133, -0.000209, 0.000124, -0.000003, -0.000003, - -0.000059, -0.000059, 0.000044, -0.000044, -0.000001, 0.000018, -0.000013, - 0.000013, -0.000017, 0.000124, -0.000003, -0.000003, -0.000059, -0.000059, - 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, -0.000044, - 0.000044, -0.000044, 0.000041, -0.000059, -0.000059, 0.000124, -0.000003, - -0.000044, 0.000044, -0.000044, 0.000041, 0.000034, -0.000002, -0.000032, - -0.000024, 0.000024, -0.000032, 0.000283, -0.000015, -0.000266, -0.000099, - -0.000099, 0.000123, 0.000099, 0.000048, -0.000002, 0.000034, -0.000034, - -0.000046, -0.000001, 0.000018, 0.000013, -0.000013, -0.000017, 0.000028, - -0.000021, -0.000005, 0.000124, -0.000003, -0.000003, -0.000059, -0.000059, - 0.000044, -0.000044, 0.000124, -0.000003, -0.000003, -0.000059, -0.000059, - 0.000044, -0.000044, 0.000138, 0.000071, -0.000095, -0.000099, 0.000065, - -0.000021, -0.000033, 0.000141, -0.000007, 0.000099, -0.000133, -0.000099, - 0.000141, -0.000007, -0.000099, -0.000133, 0.000099, 0.000138, -0.000095, - -0.000099, 0.000071, 0.000138, -0.000095, 0.000071, -0.000099, -0.000017, - 0.000134, -0.000007, -0.000047, 0.000060, -0.000127, 0.000047, -0.000047, - 0.000028, -0.000021, -0.000005, 0.000141, -0.000099, 0.000099, -0.000133, - -0.000007, -0.000017, 0.000071, -0.000127, 0.000047, 0.000060, -0.000047, - 0.000141, -0.000007, -0.000133, -0.000099, 0.000099, -0.000133, 0.000297, - 0.000099, -0.000133, 0.000099, -0.000209, 0.000124, -0.000003, -0.000003, - 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, -0.000003, - 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, 0.000124, -0.000059, - -0.000003, -0.000044, -0.000044, 0.000044, 0.000041, -0.000059, 0.000124, - -0.000059, -0.000003, -0.000044, -0.000044, 0.000044, 0.000041, -0.000017, - -0.000007, 0.000134, -0.000127, -0.000047, 0.000060, -0.000047, 0.000047, - -0.000059, -0.000059, 0.000124, -0.000003, -0.000044, 0.000044, -0.000044, - 0.000041, 0.000138, 0.000071, -0.000099, -0.000095, -0.000059, -0.000059, - 0.000124, -0.000003, -0.000044, 0.000044, -0.000044, 0.000041, 0.000028, - -0.000021, -0.000005, 0.000138, -0.000099, -0.000095, 0.000071, 0.000141, - -0.000099, 0.000099, -0.000007, -0.000133, 0.000141, -0.000007, -0.000133, - 0.000099, -0.000099, 0.000065, -0.000021, -0.000033, 0.000028, -0.000021, - -0.000005, 0.000018, -0.000001, -0.000017, -0.000013, 0.000013, -0.000017, - 0.000071, -0.000127, 0.000060, 0.000047, -0.000047, 0.000124, -0.000059, - -0.000059, -0.000003, 0.000044, -0.000044, -0.000044, 0.000041, -0.000059, - -0.000059, 0.000124, -0.000003, -0.000044, 0.000044, -0.000044, 0.000041, - -0.000059, -0.000059, 0.000124, -0.000003, -0.000044, 0.000044, -0.000044, - 0.000041, 0.000078, -0.000004, -0.000055, 0.000055, -0.000074, -0.000015, - -0.000074, 0.000270, -0.000099, -0.000099, 0.000055, 0.000141, -0.000099, - -0.000007, -0.000133, 0.000099, 0.000141, -0.000007, -0.000099, -0.000133, - 0.000099, -0.000133, 0.000297, 0.000099, -0.000133, 0.000099, -0.000209, - 0.000018, -0.000001, -0.000017, 0.000013, -0.000013, -0.000017, 0.000098, - -0.000175, -0.000065, 0.000065, 0.000078, 0.000018, -0.000001, -0.000013, - 0.000013, -0.000017, -0.000127, -0.000017, 0.000071, 0.000060, 0.000047, - -0.000047, 0.000018, -0.000001, -0.000013, -0.000017, 0.000013, -0.000059, - -0.000059, 0.000124, 0.000044, -0.000003, -0.000044, -0.000044, 0.000041, - -0.000059, -0.000059, 0.000124, 0.000044, -0.000003, -0.000044, -0.000044, - 0.000041, 0.000124, -0.000059, -0.000059, 0.000044, -0.000044, -0.000003, - -0.000044, 0.000041, 0.000138, 0.000071, -0.000095, -0.000099, 0.000141, - 0.000099, -0.000007, -0.000133, -0.000099, 0.000138, -0.000095, 0.000071, - -0.000099, 0.000028, -0.000001, 0.000020, -0.000020, -0.000027, 0.000141, - -0.000007, 0.000099, -0.000099, -0.000133, -0.000133, 0.000297, 0.000099, - 0.000099, -0.000133, -0.000209, 0.000028, -0.000021, -0.000005, 0.000034, - -0.000002, -0.000032, 0.000024, -0.000024, -0.000015, -0.000032, 0.000283, - -0.000266, -0.000099, 0.000099, 0.000123, -0.000099, -0.000133, 0.000297, - 0.000099, -0.000133, 0.000099, -0.000209, -0.000059, 0.000124, -0.000059, - -0.000044, 0.000044, -0.000003, 0.000041, -0.000044, -0.000017, 0.000185, - -0.000175, -0.000009, -0.000065, -0.000065, 0.000078, 0.000065, 0.000141, - -0.000099, 0.000099, -0.000133, -0.000007, 0.000141, -0.000007, -0.000133, - -0.000099, 0.000099, -0.000017, -0.000009, 0.000185, 0.000065, -0.000175, - 0.000078, -0.000065, -0.000065, 0.000048, 0.000034, -0.000046, -0.000034, - 0.000048, 0.000034, -0.000046, -0.000034, 0.000138, -0.000099, 0.000071, - -0.000095, -0.000001, 0.000018, -0.000017, 0.000013, -0.000013, 0.000065, - -0.000021, -0.000033, 0.000028, -0.000001, 0.000020, -0.000020, -0.000027, - -0.000266, -0.000027, 0.000298, 0.000099, 0.000120, -0.000209, -0.000003, - 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, - 0.000124, -0.000059, -0.000044, 0.000044, -0.000003, -0.000044, 0.000041, - 0.000065, -0.000021, -0.000033, 0.000228, -0.000013, -0.000161, -0.000216, - -0.000216, -0.000216, 0.000390, 0.000161, -0.000271, 0.000161, 0.000028, - -0.000021, -0.000005, 0.000065, -0.000021, -0.000033, -0.000003, 0.000124, - -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000044, -0.000044, - 0.000028, -0.000021, -0.000005, -0.000017, 0.000185, -0.000009, -0.000065, - -0.000175, -0.000065, 0.000078, 0.000065, 0.000124, -0.000003, -0.000003, - 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, -0.000003, - 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, -0.000059, 0.000124, - 0.000044, -0.000003, -0.000044, -0.000044, 0.000041, 0.000138, 0.000071, - -0.000099, -0.000095, -0.000059, -0.000059, 0.000124, 0.000044, -0.000044, - -0.000003, -0.000044, 0.000041, -0.000003, 0.000124, -0.000003, 0.000044, - -0.000044, -0.000059, -0.000059, 0.000044, -0.000044, -0.000059, -0.000059, - 0.000124, -0.000003, 0.000044, -0.000044, -0.000003, -0.000059, -0.000059, - 0.000124, 0.000044, -0.000044, -0.000003, -0.000044, 0.000041, -0.000017, - 0.000185, -0.000009, -0.000175, -0.000065, 0.000078, -0.000065, 0.000065, - -0.000009, -0.000017, -0.000175, 0.000185, -0.000065, 0.000065, -0.000065, - 0.000078, 0.000028, -0.000020, 0.000020, -0.000001, -0.000059, -0.000059, - 0.000124, 0.000044, -0.000003, -0.000044, -0.000044, 0.000041, 0.000048, - -0.000046, 0.000034, -0.000034, -0.000266, -0.000027, 0.000298, 0.000099, - 0.000120, -0.000209, -0.000002, 0.000048, -0.000046, -0.000034, 0.000034, - 0.000018, -0.000001, -0.000017, -0.000013, 0.000013, -0.000175, -0.000017, - 0.000098, 0.000078, 0.000065, -0.000065, 0.000048, -0.000046, 0.000034, - -0.000034, 0.000028, -0.000020, -0.000001, 0.000020, 0.000078, -0.000004, - -0.000055, -0.000074, 0.000055, -0.000015, -0.000074, 0.000270, -0.000099, - 0.000055, -0.000099, 0.000028, -0.000021, -0.000005, 0.000138, 0.000071, - -0.000095, -0.000099, 0.000028, -0.000021, -0.000005, -0.000001, 0.000028, - -0.000020, 0.000020, -0.000027, -0.000133, 0.000297, 0.000099, -0.000133, - 0.000099, -0.000209, 0.000065, -0.000021, -0.000048, 0.000016, 0.000018, - -0.000001, -0.000013, 0.000013, -0.000017, -0.000175, -0.000017, 0.000098, - 0.000078, 0.000065, -0.000065, 0.000018, -0.000001, -0.000013, -0.000017, - 0.000013, -0.000127, -0.000017, 0.000071, 0.000060, 0.000047, -0.000047, - -0.000133, 0.000297, 0.000099, 0.000099, -0.000133, -0.000209, -0.000001, - 0.000018, -0.000017, -0.000013, 0.000013, 0.000065, -0.000021, -0.000033, - -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, - 0.000138, 0.000071, -0.000099, -0.000095, 0.000138, -0.000095, -0.000099, - 0.000071, -0.000003, -0.000003, 0.000124, -0.000059, 0.000044, -0.000044, - -0.000059, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, - -0.000059, -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, - -0.000059, -0.000059, -0.000059, 0.000124, -0.000003, 0.000044, -0.000044, - -0.000044, 0.000041, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, - -0.000059, -0.000059, -0.000059, -0.000059, 0.000124, 0.000044, -0.000044, - -0.000003, -0.000044, 0.000041, -0.000017, 0.000185, -0.000009, -0.000175, - 0.000065, 0.000078, -0.000065, -0.000065, -0.000017, -0.000175, 0.000098, - 0.000078, 0.000065, -0.000065, -0.000059, -0.000059, 0.000124, 0.000044, - -0.000003, -0.000044, -0.000044, 0.000041, -0.000059, -0.000059, 0.000124, - 0.000044, -0.000003, -0.000044, -0.000044, 0.000041, -0.000059, -0.000059, - 0.000124, 0.000044, -0.000044, -0.000003, -0.000044, 0.000041, 0.000138, - 0.000071, -0.000099, -0.000095, 0.000065, -0.000021, 0.000016, -0.000048, - -0.000017, -0.000009, 0.000185, -0.000175, -0.000065, -0.000065, 0.000078, - 0.000065, 0.000065, -0.000021, -0.000033, 0.000065, -0.000021, -0.000033, - -0.000003, -0.000003, 0.000124, 0.000044, -0.000044, -0.000059, -0.000059, - 0.000065, -0.000021, -0.000033, 0.000138, -0.000095, 0.000071, -0.000099, - 0.000124, -0.000059, -0.000059, -0.000044, -0.000003, 0.000044, -0.000044, - 0.000041, 0.000028, 0.000020, -0.000001, -0.000020, 0.000065, -0.000021, - -0.000033, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, - -0.000059, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, - -0.000059, -0.000059, -0.000059, 0.000124, 0.000044, -0.000003, -0.000044, - -0.000044, 0.000041, -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, - -0.000059, -0.000059, 0.000138, -0.000095, -0.000099, 0.000071, -0.000003, - 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, - 0.000124, -0.000059, -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, - -0.000059, -0.000059, 0.000124, -0.000003, -0.000044, 0.000044, -0.000044, - 0.000041, -0.000059, -0.000059, 0.000124, 0.000044, -0.000003, -0.000044, - -0.000044, 0.000041, -0.000059, -0.000059, 0.000124, -0.000003, -0.000044, - 0.000044, -0.000044, 0.000041, 0.000270, -0.000015, 0.000055, -0.000074, - -0.000099, -0.000099, -0.000003, -0.000003, 0.000124, 0.000044, -0.000044, - -0.000059, -0.000059, -0.000017, 0.000044, 0.000029, -0.000078, 0.000042, - -0.000029, 0.000044, -0.000017, 0.000042, 0.000029, -0.000078, -0.000029, - 0.000065, -0.000021, -0.000033, 0.000028, -0.000021, -0.000005, 0.000048, - -0.000002, -0.000046, -0.000034, 0.000034, -0.000015, -0.000266, -0.000046, - 0.000283, -0.000099, -0.000099, 0.000134, 0.000099, -0.000059, -0.000059, - 0.000124, -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, -0.000059, - -0.000059, 0.000124, -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, - 0.000138, -0.000095, -0.000099, 0.000071, 0.000048, -0.000046, 0.000034, - -0.000034, 0.000048, -0.000046, 0.000034, -0.000034, -0.000003, 0.000124, - -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, -0.000059, - 0.000124, -0.000044, 0.000044, -0.000003, -0.000044, 0.000041, 0.000028, - -0.000021, -0.000005, -0.000004, 0.000078, 0.000055, -0.000055, -0.000074, - -0.000032, 0.000283, -0.000015, -0.000266, 0.000123, -0.000099, 0.000099, - -0.000099, -0.000027, -0.000266, 0.000298, 0.000099, 0.000120, -0.000209, - 0.000028, 0.000020, -0.000020, -0.000001, 0.000078, -0.000004, -0.000055, - -0.000074, 0.000055, 0.000028, -0.000021, -0.000005, 0.000048, -0.000046, - 0.000034, -0.000034, -0.000074, 0.000270, -0.000015, -0.000099, 0.000055, - -0.000099, -0.000046, -0.000015, -0.000266, 0.000283, -0.000099, 0.000099, - -0.000099, 0.000134, 0.000018, -0.000001, -0.000017, 0.000013, -0.000013, - -0.000175, -0.000017, 0.000098, 0.000065, 0.000078, -0.000065, -0.000133, - 0.000297, -0.000133, 0.000099, -0.000209, 0.000099, -0.000003, -0.000003, - 0.000124, -0.000059, 0.000044, -0.000044, -0.000059, -0.000003, 0.000124, - -0.000003, -0.000059, -0.000059, 0.000044, -0.000044, -0.000003, -0.000003, - 0.000124, -0.000059, 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, - 0.000124, 0.000044, -0.000044, -0.000003, -0.000044, 0.000041, -0.000059, - -0.000059, 0.000124, 0.000044, -0.000044, -0.000003, -0.000044, 0.000041, - -0.000003, 0.000124, -0.000003, -0.000059, -0.000059, 0.000044, -0.000044, - -0.000059, -0.000059, 0.000124, -0.000044, -0.000003, 0.000044, -0.000044, - 0.000041, 0.000065, -0.000021, -0.000033, 0.000138, -0.000095, 0.000071, - -0.000099, 0.000065, -0.000021, -0.000033, 0.000048, -0.000046, 0.000034, - -0.000034, 0.000048, -0.000046, 0.000034, -0.000034, 0.000065, -0.000021, - -0.000033, 0.000065, -0.000021, -0.000033, -0.000003, -0.000003, 0.000124, - -0.000059, 0.000044, -0.000044, -0.000059, 0.000028, -0.000021, -0.000005, - -0.000017, 0.000134, -0.000007, -0.000127, 0.000047, 0.000060, -0.000047, - -0.000047, -0.000017, -0.000127, 0.000071, 0.000060, 0.000047, -0.000047, - -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, - 0.000034, -0.000002, -0.000032, -0.000024, 0.000024, -0.000015, -0.000266, - -0.000032, 0.000283, -0.000099, -0.000099, 0.000123, 0.000099, 0.000065, - -0.000021, -0.000033, -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, - -0.000059, -0.000059, -0.000059, -0.000059, 0.000124, -0.000044, -0.000003, - 0.000044, -0.000044, 0.000041, -0.000003, 0.000124, -0.000003, 0.000044, - -0.000044, -0.000059, -0.000059, -0.000059, -0.000059, 0.000124, 0.000044, - -0.000044, -0.000003, -0.000044, 0.000041, 0.000065, -0.000021, -0.000033, - 0.000138, -0.000095, -0.000099, 0.000071, 0.000048, -0.000046, 0.000034, - -0.000034, 0.000138, -0.000095, 0.000071, -0.000099, 0.000028, -0.000020, - -0.000001, 0.000020, 0.000028, -0.000020, -0.000001, 0.000020, -0.000003, - 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000028, - -0.000020, -0.000001, 0.000020, -0.000003, -0.000003, 0.000124, 0.000044, - -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, -0.000003, 0.000044, - -0.000044, -0.000059, -0.000059, 0.000044, -0.000044, 0.000028, -0.000021, - -0.000005, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, - -0.000059, 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, - 0.000044, -0.000044, -0.000003, -0.000001, 0.000028, 0.000020, -0.000020, - -0.000027, -0.000017, 0.000185, -0.000009, -0.000175, -0.000065, 0.000065, - 0.000078, -0.000065, -0.000017, -0.000009, -0.000175, 0.000185, 0.000078, - 0.000065, -0.000065, -0.000065, -0.000001, 0.000018, -0.000013, 0.000013, - -0.000017, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, - -0.000059, -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, - -0.000059, -0.000059, -0.000059, 0.000124, -0.000003, -0.000044, 0.000044, - -0.000044, 0.000041, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, - -0.000059, -0.000059, 0.000028, -0.000021, -0.000005, -0.000059, -0.000059, - 0.000124, -0.000003, 0.000044, -0.000044, -0.000003, -0.000003, -0.000003, - 0.000124, -0.000059, -0.000059, 0.000044, -0.000044, -0.000059, -0.000059, - 0.000124, -0.000044, 0.000044, -0.000003, -0.000044, 0.000041, 0.000138, - -0.000099, 0.000071, -0.000095, 0.000048, -0.000034, -0.000046, 0.000034, - 0.000048, -0.000034, -0.000046, 0.000034, -0.000003, 0.000124, -0.000003, - -0.000059, 0.000044, -0.000044, -0.000059, -0.000044, 0.000044, -0.000059, - -0.000059, 0.000124, -0.000044, 0.000044, -0.000003, 0.000041, -0.000044, - -0.000017, 0.000044, 0.000042, -0.000078, 0.000029, -0.000029, -0.000003, - -0.000003, 0.000124, -0.000059, -0.000059, 0.000044, -0.000044, 0.000124, - -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, - -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, - -0.000059, 0.000124, 0.000044, -0.000003, -0.000044, -0.000044, 0.000041, - -0.000059, -0.000059, 0.000124, -0.000044, 0.000044, -0.000003, -0.000044, - 0.000041, 0.000018, -0.000001, -0.000013, 0.000013, -0.000017, -0.000007, - -0.000017, 0.000134, -0.000127, -0.000047, -0.000047, 0.000060, 0.000047, - -0.000001, 0.000018, 0.000013, -0.000013, -0.000017, -0.000059, -0.000059, - 0.000124, 0.000044, -0.000003, -0.000044, -0.000044, 0.000041, 0.000048, - -0.000002, -0.000046, -0.000034, 0.000034, -0.000266, -0.000015, -0.000046, - 0.000283, -0.000099, -0.000099, 0.000134, 0.000099, 0.000001, 1.000000, - 0.000001, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 0.000045, -0.000003, -0.000014, -0.000006, - 0.000027, 0.000003, -0.000008, 0.000027, 0.000003, -0.000008, 0.000027, - 0.000003, -0.000008, 0.000047, -0.000043, -0.000005, 0.000077, 0.000402, - -0.000358, 0.000047, -0.000043, -0.000005, 0.000077, 0.000402, -0.000358, - 0.000027, 0.000003, -0.000008, 0.000022, 0.000021, -0.000006, 0.000022, - 0.000021, -0.000006, 0.000045, -0.000004, -0.000003, -0.000014, -0.000006, - 0.000027, 0.000003, -0.000008, 0.000022, 0.000021, -0.000006, 0.000047, - -0.000043, -0.000005, 0.000075, -0.000067, 0.000044, -0.000044, -0.000008, - 0.000075, -0.000067, 0.000044, -0.000044, -0.000008, 0.000027, 0.000003, - -0.000008, 0.000138, -0.000099, -0.000095, 0.000071, 0.000027, 0.000003, - -0.000008, 0.000100, -0.000005, -0.000071, 0.000071, 0.000047, 0.000045, - -0.000021, -0.000021, 0.000016, 0.000016, -0.000013, 0.000027, 0.000003, - -0.000008, 0.000236, -0.000010, -0.000010, -0.000113, 0.000036, -0.000080, - -0.000080, 0.000032, 0.000045, -0.000003, -0.000004, -0.000014, -0.000006, - 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000022, - 0.000021, -0.000006, 0.000075, -0.000067, 0.000044, -0.000044, -0.000008, - -0.000059, -0.000059, 0.000066, 0.000044, 0.000044, 0.000035, -0.000018, - -0.000059, -0.000059, 0.000066, 0.000044, 0.000044, 0.000035, -0.000018, - 0.000022, 0.000021, -0.000006, -0.000059, -0.000059, 0.000066, 0.000044, - 0.000044, 0.000035, -0.000018, 0.000022, 0.000021, -0.000006, 0.000132, - 0.000675, -0.000594, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, - -0.000006, -0.000059, -0.000059, 0.000066, 0.000044, 0.000044, 0.000035, - -0.000018, 0.000067, -0.000113, 0.000036, 0.000008, 0.000044, -0.000007, - -0.000005, -0.000044, 0.000322, -0.000044, -0.000117, -0.000046, -0.000024, - 0.000045, -0.000003, -0.000014, -0.000006, 0.000022, 0.000021, -0.000006, - 0.000202, -0.000010, -0.000007, -0.000072, -0.000070, -0.000021, 0.000100, - -0.000005, -0.000071, 0.000071, 0.000047, -0.000043, -0.000005, 0.000022, - 0.000021, -0.000006, -0.000010, 0.000227, -0.000080, -0.000080, -0.000010, - -0.000024, 0.000022, 0.000021, -0.000006, 0.000055, -0.000088, 0.000034, - 0.000028, 0.000049, -0.000005, 0.000022, 0.000021, -0.000006, 0.000047, - -0.000043, -0.000005, 0.000075, -0.000044, -0.000067, 0.000044, -0.000008, - 0.000100, -0.000071, -0.000005, 0.000071, 0.000013, 0.000012, -0.000004, - 0.000047, -0.000043, -0.000005, 0.000075, -0.000067, 0.000044, -0.000044, - -0.000008, 0.000027, 0.000003, -0.000008, 0.000132, 0.000675, -0.000594, - 0.000075, -0.000067, 0.000044, -0.000044, -0.000008, 0.000027, 0.000003, - -0.000008, -0.000113, 0.000067, 0.000036, 0.000044, 0.000008, -0.000007, - 0.000022, 0.000021, -0.000006, 0.000077, 0.000402, -0.000358, 0.000047, - -0.000043, -0.000005, 0.000009, -0.000054, 0.000050, 0.000014, 0.000012, - -0.000007, 0.000109, -0.000036, -0.000007, -0.000024, -0.000059, -0.000059, - 0.000066, 0.000044, 0.000044, 0.000035, -0.000018, -0.000059, -0.000059, - 0.000066, 0.000044, 0.000044, 0.000035, -0.000018, 0.000027, 0.000003, - -0.000008, -0.000003, -0.000003, 0.000137, -0.000013, -0.000044, -0.000044, - -0.000006, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, - 0.000236, -0.000010, -0.000010, 0.000036, -0.000113, -0.000080, -0.000080, - 0.000032, -0.000009, 0.000199, -0.000088, -0.000065, -0.000088, 0.000065, - -0.000065, 0.000065, -0.000010, -0.000005, 0.000022, 0.000021, -0.000006, - 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, -0.000113, - 0.000067, 0.000044, 0.000036, 0.000008, -0.000007, 0.000047, -0.000043, - -0.000005, -0.000059, -0.000059, 0.000066, 0.000044, 0.000079, -0.000018, - -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, -0.000019, - 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000047, - -0.000043, -0.000005, 0.000075, -0.000044, -0.000067, 0.000044, -0.000008, - 0.000236, -0.000080, -0.000080, -0.000010, 0.000036, -0.000010, -0.000113, - 0.000032, 0.000027, 0.000003, -0.000008, -0.000005, 0.000125, -0.000005, - -0.000065, -0.000021, -0.000012, 0.000022, 0.000021, -0.000006, 0.000022, - 0.000021, -0.000006, 0.000054, 0.000049, -0.000063, 0.000023, 0.000075, - -0.000067, 0.000044, -0.000044, -0.000008, -0.000070, -0.000072, 0.000202, - -0.000007, -0.000010, -0.000021, -0.000001, -0.000001, 0.000127, -0.000005, - -0.000005, -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, 0.000018, - -0.000013, -0.000001, -0.000017, 0.000013, 0.000067, -0.000113, 0.000008, - 0.000036, -0.000007, 0.000060, 0.000035, -0.000018, 0.000022, 0.000021, - -0.000006, -0.000002, 0.000137, -0.000063, -0.000011, -0.000090, 0.000028, - 0.000016, -0.000009, 0.000022, 0.000021, -0.000006, -0.000095, 0.000056, - 0.000186, -0.000165, 0.000100, -0.000071, -0.000005, 0.000071, 0.000027, - 0.000003, -0.000008, 0.000018, -0.000001, -0.000013, -0.000017, 0.000013, - -0.000002, -0.000008, 0.000118, -0.000063, -0.000078, 0.000016, 0.000028, - -0.000005, -0.000003, -0.000003, 0.000137, -0.000044, -0.000013, -0.000044, - -0.000006, 0.000071, 0.000173, -0.000018, 0.000022, 0.000021, -0.000006, - 0.000045, 0.000045, -0.000013, 0.000008, 0.000067, -0.000113, 0.000036, - 0.000044, -0.000007, -0.000010, 0.000236, 0.000036, -0.000113, -0.000010, - -0.000080, -0.000080, 0.000032, 0.000045, 0.000045, -0.000013, 0.000069, - 0.000035, -0.000113, 0.000036, 0.000044, -0.000007, 0.000022, 0.000021, - -0.000006, 0.000027, 0.000003, -0.000008, -0.000007, -0.000017, 0.000134, - 0.000060, -0.000127, -0.000047, -0.000047, 0.000047, -0.000003, -0.000003, - -0.000013, 0.000137, -0.000044, -0.000044, -0.000006, 0.000027, 0.000003, - -0.000008, 0.000077, 0.000402, -0.000358, -0.000055, 0.000022, -0.000094, - 0.000062, 0.000080, -0.000026, 0.000139, 0.000710, -0.000636, 0.000022, - 0.000021, -0.000006, 0.000027, 0.000003, -0.000008, 0.000018, -0.000013, - -0.000017, -0.000001, 0.000013, 0.000022, 0.000021, -0.000006, 0.000054, - -0.000063, 0.000049, 0.000028, 0.000016, -0.000005, 0.000022, 0.000021, - -0.000006, -0.000059, -0.000059, 0.000066, 0.000044, 0.000079, -0.000018, - 0.000057, 0.000049, -0.000100, 0.000036, 0.000034, -0.000002, -0.000059, - -0.000059, 0.000044, 0.000066, 0.000044, 0.000035, -0.000018, 0.000103, - -0.000011, -0.000013, -0.000043, -0.000018, 0.000027, 0.000003, -0.000008, - 0.000022, 0.000021, -0.000006, -0.000059, -0.000059, 0.000044, 0.000066, - 0.000044, 0.000035, -0.000018, 0.000027, 0.000003, -0.000008, -0.000161, - 0.000509, 0.000161, -0.000178, -0.000216, -0.000013, -0.000026, -0.000021, - 0.000047, -0.000043, -0.000005, 0.000027, 0.000003, -0.000008, 0.000077, - 0.000402, -0.000358, -0.000059, -0.000059, 0.000066, 0.000044, 0.000044, - 0.000035, -0.000018, 0.000100, -0.000005, -0.000071, 0.000071, 0.000027, - -0.000005, 0.000054, 0.000049, -0.000063, 0.000023, 0.000022, 0.000021, - -0.000006, 0.000028, 0.000019, 0.000013, 0.000012, -0.000004, -0.000005, - 0.000322, -0.000044, -0.000046, -0.000044, -0.000117, -0.000024, 0.000028, - 0.000027, -0.000008, -0.000012, 0.000107, -0.000062, 0.000062, -0.000094, - -0.000007, 0.000148, -0.000063, -0.000047, -0.000063, 0.000047, -0.000047, - 0.000047, -0.000010, -0.000005, 0.000075, -0.000067, 0.000044, -0.000044, - -0.000008, 0.000022, 0.000021, -0.000006, -0.000010, 0.000227, -0.000080, - -0.000010, -0.000024, 0.000036, 0.000236, -0.000080, -0.000080, -0.000010, - -0.000113, -0.000010, 0.000032, 0.000047, -0.000043, -0.000005, 0.000100, - -0.000071, -0.000005, 0.000071, -0.000006, -0.000006, 0.000285, -0.000015, - -0.000055, -0.000056, -0.000044, -0.000044, -0.000037, 0.000675, 0.000132, - -0.000594, 0.000027, 0.000003, -0.000008, 0.000027, 0.000003, -0.000008, - 0.000027, 0.000003, -0.000008, 0.000044, 0.000079, 0.000089, -0.000113, - 0.000024, 0.000027, 0.000003, -0.000008, 0.000075, -0.000067, 0.000044, - -0.000044, -0.000008, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, - -0.000006, 0.000044, 0.000089, 0.000036, -0.000113, 0.000079, -0.000012, - 0.000022, 0.000021, -0.000006, 0.000027, 0.000003, -0.000008, -0.000009, - 0.000199, 0.000065, -0.000088, 0.000065, -0.000065, -0.000065, -0.000088, - -0.000010, -0.000005, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, - -0.000006, 0.000016, 0.000014, -0.000005, 0.000047, -0.000043, -0.000005, - 0.000030, -0.000006, -0.000002, -0.000007, -0.000004, -0.000085, 0.000096, - 0.000056, -0.000056, -0.000011, 0.000022, 0.000021, -0.000006, 0.000022, - 0.000021, -0.000006, 0.000092, -0.000015, -0.000046, -0.000012, -0.000013, - -0.000005, -0.000044, 0.000322, -0.000044, -0.000046, -0.000117, -0.000024, - 0.000027, 0.000003, -0.000008, -0.000095, 0.000065, 0.000186, -0.000113, - 0.000044, 0.000036, -0.000153, 0.000022, 0.000021, -0.000006, -0.000002, - -0.000008, 0.000118, -0.000063, -0.000078, 0.000028, 0.000016, -0.000005, - -0.000006, 0.000285, -0.000006, -0.000015, -0.000055, -0.000056, -0.000044, - -0.000044, -0.000037, -0.000002, -0.000008, -0.000063, 0.000118, -0.000078, - 0.000016, 0.000028, -0.000005, 0.000054, 0.000049, -0.000063, 0.000028, - 0.000016, -0.000005, -0.000001, -0.000001, 0.000127, -0.000005, -0.000005, - -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, 0.000027, 0.000003, - -0.000008, 0.000027, 0.000003, -0.000008, 0.000013, 0.000012, -0.000004, - 0.000045, 0.000045, -0.000013, 0.000069, 0.000036, 0.000035, -0.000113, - 0.000044, -0.000007, 0.000227, -0.000010, -0.000010, -0.000080, -0.000080, - -0.000024, -0.000007, 0.000164, -0.000047, -0.000047, -0.000005, -0.000021, - -0.000012, -0.000085, 0.000096, -0.000056, 0.000056, -0.000011, 0.000089, - 0.000044, -0.000113, 0.000036, 0.000079, -0.000012, 0.000045, 0.000045, - -0.000013, -0.000080, 0.000236, -0.000080, -0.000010, -0.000010, -0.000113, - 0.000036, 0.000032, 0.000069, 0.000036, -0.000113, 0.000044, 0.000035, - -0.000007, 0.000047, -0.000043, -0.000005, -0.000070, 0.000202, -0.000007, - -0.000072, -0.000010, -0.000021, -0.000059, -0.000059, 0.000066, 0.000044, - 0.000044, 0.000035, -0.000018, 0.000100, -0.000071, -0.000005, 0.000071, - 0.000057, 0.000049, 0.000036, -0.000100, 0.000034, -0.000002, 0.000045, - 0.000045, -0.000013, 0.000069, 0.000036, -0.000113, 0.000044, 0.000035, - -0.000007, 0.000047, -0.000043, -0.000005, -0.000059, -0.000059, 0.000044, - 0.000066, 0.000044, 0.000035, -0.000018, 0.000044, 0.000036, 0.000089, - 0.000079, -0.000113, -0.000012, 0.000027, 0.000003, -0.000008, 0.000236, - -0.000010, -0.000080, -0.000080, 0.000036, -0.000010, -0.000113, 0.000032, - -0.000002, -0.000008, 0.000118, -0.000063, -0.000078, 0.000028, 0.000016, - -0.000005, -0.000002, -0.000008, 0.000118, -0.000063, -0.000078, 0.000016, - 0.000028, -0.000005, 0.000022, 0.000021, -0.000006, -0.000133, -0.000007, - 0.000155, 0.000099, -0.000099, -0.000010, -0.000005, -0.000002, 0.000118, - -0.000008, -0.000078, -0.000063, 0.000028, 0.000016, -0.000005, -0.000067, - 0.000075, 0.000044, -0.000044, -0.000008, 0.000075, -0.000067, 0.000044, - -0.000044, -0.000008, -0.000002, 0.000118, -0.000078, -0.000008, -0.000063, - 0.000016, 0.000028, -0.000005, 0.000027, 0.000003, -0.000008, 0.000047, - -0.000043, -0.000005, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, - -0.000006, 0.000010, 0.000001, -0.000003, -0.000059, -0.000059, 0.000044, - 0.000066, 0.000044, 0.000035, -0.000018, -0.000159, 0.000117, 0.000055, - 0.000056, 0.000154, -0.000011, 0.000055, 0.000049, 0.000034, 0.000028, - -0.000088, -0.000005, 0.000022, 0.000021, -0.000006, 0.000054, 0.000049, - 0.000028, -0.000063, 0.000016, -0.000005, -0.000009, 0.000213, -0.000065, - -0.000005, -0.000065, -0.000021, -0.000012, 0.000044, -0.000113, 0.000089, - 0.000036, 0.000079, -0.000012, -0.000003, -0.000003, 0.000137, -0.000013, - -0.000044, -0.000044, -0.000006, -0.000003, -0.000003, -0.000013, 0.000137, - -0.000044, -0.000044, -0.000006, -0.000004, -0.000055, 0.000078, -0.000074, - 0.000055, -0.000004, 0.000096, -0.000039, -0.000029, 0.000029, -0.000029, - 0.000029, -0.000039, -0.000010, -0.000005, 0.000077, 0.000402, -0.000358, - 0.000027, 0.000003, -0.000008, 0.000013, 0.000012, -0.000004, -0.000043, - 0.000027, 0.000028, 0.000021, 0.000003, 0.000130, -0.000125, 0.000041, - -0.000075, 0.000041, -0.000013, 0.000022, 0.000021, -0.000006, 0.000089, - 0.000079, 0.000044, 0.000036, -0.000113, -0.000012, 0.000022, 0.000021, - -0.000006, 0.000027, 0.000003, -0.000008, 0.000077, 0.000402, -0.000358, - -0.000005, -0.000044, 0.000322, -0.000117, -0.000044, -0.000046, -0.000024, - -0.000113, 0.000044, 0.000036, 0.000089, 0.000079, -0.000012, 0.000054, - 0.000049, 0.000028, -0.000063, 0.000016, -0.000005, 0.000022, 0.000021, - -0.000006, 0.000055, -0.000088, 0.000034, 0.000028, 0.000049, -0.000005, - 0.000016, 0.000010, -0.000113, 0.000067, 0.000008, 0.000036, 0.000044, - -0.000007, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, - -0.000006, 0.000027, 0.000003, -0.000008, 0.000067, 0.000036, 0.000044, - 0.000008, -0.000113, -0.000007, -0.000003, -0.000003, -0.000044, 0.000137, - -0.000013, -0.000044, -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, - 0.000137, -0.000044, -0.000006, -0.000007, 0.000148, 0.000047, -0.000063, - -0.000063, 0.000047, -0.000047, -0.000047, -0.000010, -0.000005, 0.000139, - 0.000710, -0.000636, 0.000027, 0.000003, -0.000008, 0.000013, 0.000012, - -0.000004, 0.000013, 0.000012, -0.000004, 0.000055, 0.000049, -0.000088, - 0.000034, 0.000028, -0.000005, 0.000022, 0.000021, -0.000006, 0.000045, - -0.000003, -0.000014, -0.000006, 0.000117, -0.000159, 0.000055, 0.000056, - 0.000154, -0.000011, 0.000227, -0.000010, -0.000080, -0.000080, -0.000010, - -0.000024, 0.000027, 0.000003, -0.000008, 0.000027, 0.000003, -0.000008, - 0.000285, -0.000015, -0.000006, -0.000006, -0.000044, -0.000044, -0.000055, - -0.000056, -0.000037, 0.000055, 0.000049, -0.000088, 0.000034, 0.000028, - -0.000005, 0.000071, 0.000173, -0.000018, -0.000133, -0.000007, 0.000155, - -0.000099, 0.000099, -0.000010, -0.000005, 0.000022, 0.000021, -0.000006, - 0.000022, 0.000021, -0.000006, 0.000100, -0.000071, -0.000005, 0.000071, - 0.000027, 0.000003, -0.000008, -0.000003, -0.000003, -0.000044, 0.000137, - -0.000013, -0.000044, -0.000006, 0.000045, 0.000045, -0.000013, 0.000036, - 0.000067, -0.000113, 0.000044, 0.000008, -0.000007, 0.000075, -0.000067, - 0.000044, -0.000044, -0.000008, -0.000003, -0.000003, -0.000044, 0.000137, - -0.000013, -0.000044, -0.000006, 0.000022, 0.000021, -0.000006, 0.000022, - 0.000021, -0.000006, -0.000080, -0.000010, 0.000227, -0.000080, -0.000010, - -0.000024, 0.000035, 0.000036, 0.000069, 0.000044, -0.000113, -0.000007, - 0.000075, -0.000067, 0.000044, -0.000044, -0.000008, 0.000075, -0.000067, - 0.000044, -0.000044, -0.000008, 0.000034, 0.000028, -0.000088, 0.000055, - 0.000049, -0.000005, 0.000022, 0.000021, -0.000006, 0.000028, 0.000019, - 0.000027, 0.000028, -0.000043, 0.000021, 0.000003, -0.000060, 0.000176, - -0.000010, -0.000005, -0.000084, -0.000007, 0.000202, -0.000070, -0.000072, - -0.000010, -0.000021, -0.000059, -0.000059, 0.000066, 0.000044, 0.000044, - 0.000035, -0.000018, 0.000045, -0.000004, -0.000003, -0.000014, -0.000006, - 0.000027, 0.000003, -0.000008, -0.000095, 0.000065, -0.000113, 0.000044, - 0.000036, 0.000186, -0.000153, -0.000059, -0.000059, 0.000066, 0.000044, - 0.000044, 0.000035, -0.000018, 0.000067, -0.000113, 0.000036, 0.000044, - 0.000008, -0.000007, -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, - 0.000021, -0.000043, 0.000027, 0.000028, 0.000003, 0.000036, -0.000113, - -0.000010, 0.000236, -0.000010, -0.000080, -0.000080, 0.000032, -0.000113, - 0.000067, 0.000044, 0.000036, 0.000008, -0.000007, -0.000012, 0.000092, - -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, 0.000027, 0.000028, - 0.000003, 0.000022, 0.000021, -0.000006, -0.000113, 0.000036, 0.000044, - 0.000067, 0.000008, -0.000007, -0.000003, -0.000003, -0.000044, -0.000013, - 0.000137, -0.000044, -0.000006, 0.000132, 0.000675, -0.000594, 0.000054, - 0.000028, 0.000016, -0.000063, 0.000049, -0.000005, 0.000047, 0.000045, - -0.000021, 0.000016, -0.000021, 0.000016, -0.000013, 0.000100, -0.000071, - -0.000005, 0.000071, 0.000027, 0.000003, -0.000008, -0.000059, -0.000059, - 0.000044, 0.000066, 0.000044, 0.000035, -0.000018, 0.000057, 0.000036, - 0.000049, -0.000100, 0.000034, -0.000002, 0.000022, 0.000021, -0.000006, - -0.000029, 0.000099, -0.000004, -0.000005, -0.000010, -0.000036, -0.000017, - 0.000042, -0.000078, 0.000044, -0.000029, 0.000029, 0.000022, 0.000021, - -0.000006, -0.000070, 0.000202, -0.000007, -0.000072, -0.000021, 0.000034, - -0.000088, 0.000055, 0.000049, 0.000028, -0.000005, 0.000045, 0.000045, - -0.000013, 0.000017, 0.000023, -0.000136, 0.000017, 0.000017, 0.000106, - 0.000029, -0.000019, 0.000155, -0.000136, 0.000088, -0.000088, -0.000000, - 0.000088, 0.000155, -0.000019, -0.000088, -0.000136, -0.000019, -0.000136, - 0.000155, 0.000088, -0.000088, -0.000000, 0.000044, 0.000089, 0.000079, - -0.000113, 0.000036, -0.000012, 0.000022, 0.000021, -0.000006, 0.000022, - 0.000021, -0.000006, 0.000047, -0.000043, -0.000005, 0.000013, 0.000012, - -0.000004, -0.000005, -0.000044, -0.000117, -0.000044, 0.000322, -0.000046, - -0.000024, -0.000026, 0.000029, -0.000018, -0.000003, -0.000070, 0.000202, - -0.000072, -0.000010, -0.000007, -0.000021, 0.000067, 0.000044, 0.000036, - 0.000008, -0.000113, -0.000007, -0.000003, -0.000003, -0.000044, 0.000137, - -0.000013, -0.000044, -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, - 0.000137, -0.000044, -0.000006, 0.000044, -0.000113, 0.000089, 0.000079, - 0.000036, -0.000012, -0.000010, 0.000176, -0.000060, -0.000005, -0.000084, - 0.000010, 0.000019, 0.000034, -0.000029, 0.000011, -0.000001, -0.000001, - -0.000001, -0.000001, -0.000003, 0.000087, -0.000013, -0.000013, -0.000013, - -0.000021, -0.000012, -0.000067, 0.000044, 0.000075, -0.000044, -0.000008, - 0.000117, 0.000055, 0.000056, -0.000159, 0.000154, -0.000011, -0.000002, - -0.000001, -0.000002, -0.000020, -0.000004, 0.000216, -0.000005, -0.000024, - -0.000034, -0.000021, -0.000068, 0.000027, 0.000003, -0.000008, 0.000019, - 0.000010, 0.000011, 0.000034, -0.000029, -0.000001, 0.000022, 0.000021, - -0.000006, 0.000028, 0.000019, -0.000001, -0.000001, 0.000127, -0.000005, - -0.000005, -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, 0.000227, - -0.000010, -0.000010, -0.000080, -0.000080, -0.000024, 0.000022, 0.000021, - -0.000006, 0.000092, -0.000015, -0.000046, -0.000012, -0.000013, -0.000043, - 0.000027, 0.000028, 0.000021, 0.000003, -0.000005, -0.000044, 0.000322, - -0.000046, -0.000044, -0.000117, -0.000024, -0.000113, 0.000089, 0.000044, - 0.000079, 0.000036, -0.000012, 0.000027, 0.000003, -0.000008, -0.000113, - 0.000089, 0.000044, 0.000036, 0.000079, -0.000012, 0.000027, 0.000028, - -0.000043, 0.000021, 0.000003, 0.000027, 0.000028, -0.000043, 0.000021, - 0.000003, -0.000027, 0.000010, 0.000016, 0.000036, 0.000056, -0.000029, - 0.000011, -0.000082, 0.000055, 0.000049, -0.000088, 0.000034, 0.000028, - -0.000005, 0.000022, 0.000021, -0.000006, -0.000009, 0.000164, -0.000052, - -0.000006, -0.000063, -0.000021, -0.000113, 0.000069, 0.000044, 0.000035, - 0.000036, -0.000007, 0.000027, -0.000043, 0.000021, 0.000028, 0.000003, - -0.000007, 0.000164, -0.000047, -0.000005, -0.000047, -0.000021, -0.000012, - -0.000006, -0.000006, 0.000285, -0.000015, -0.000055, -0.000056, -0.000044, - -0.000044, -0.000037, -0.000159, 0.000117, 0.000055, 0.000056, 0.000154, - -0.000011, -0.000001, -0.000001, -0.000003, 0.000133, -0.000013, -0.000047, - -0.000013, -0.000003, -0.000021, -0.000012, 0.000013, 0.000012, -0.000004, - 0.000202, -0.000072, -0.000007, -0.000070, -0.000010, -0.000021, -0.000133, - -0.000007, 0.000155, -0.000099, 0.000099, -0.000010, -0.000005, 0.000045, - 0.000047, -0.000021, -0.000021, 0.000016, 0.000016, -0.000013, 0.000045, - 0.000045, -0.000013, 0.000069, -0.000113, 0.000044, 0.000036, 0.000035, - -0.000007, -0.000001, -0.000001, 0.000070, -0.000013, -0.000013, -0.000003, - -0.000021, -0.000012, -0.000009, 0.000213, -0.000065, -0.000065, -0.000005, - -0.000021, -0.000012, 0.000044, 0.000089, 0.000036, -0.000113, 0.000079, - -0.000012, 0.000176, -0.000010, -0.000060, -0.000005, -0.000084, 0.000045, - 0.000045, -0.000013, 0.000036, -0.000113, 0.000044, 0.000069, 0.000035, - -0.000007, -0.000001, -0.000001, 0.000127, -0.000005, -0.000005, -0.000088, - -0.000019, -0.000043, 0.000047, -0.000005, 0.000028, 0.000019, 0.000126, - -0.000043, -0.000080, 0.000028, -0.000032, 0.000008, -0.000050, 0.000046, - 0.000024, 0.000126, -0.000043, -0.000052, -0.000032, 0.000055, 0.000049, - -0.000088, 0.000034, 0.000028, -0.000005, 0.000045, -0.000003, -0.000014, - -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, - -0.000006, 0.000036, 0.000069, -0.000113, 0.000044, 0.000035, -0.000007, - 0.000067, -0.000113, 0.000036, 0.000044, 0.000008, -0.000007, -0.000113, - -0.000010, 0.000236, -0.000080, -0.000080, 0.000036, -0.000010, 0.000032, - 0.000034, -0.000088, 0.000028, 0.000055, 0.000049, -0.000005, 0.000022, - 0.000021, -0.000006, -0.000043, 0.000126, -0.000052, -0.000032, -0.000026, - 0.000029, -0.000018, -0.000003, 0.000044, 0.000036, 0.000089, 0.000079, - -0.000113, -0.000012, 0.000027, 0.000021, 0.000028, -0.000043, 0.000003, - 0.000028, 0.000027, 0.000021, -0.000043, 0.000003, -0.000007, -0.000007, - -0.000104, -0.000099, 0.000327, -0.000036, -0.000021, -0.000054, 0.000036, - 0.000013, -0.000031, 0.000100, -0.000071, -0.000005, 0.000071, -0.000002, - -0.000008, 0.000118, -0.000078, -0.000063, 0.000028, 0.000016, -0.000005, - 0.000027, 0.000003, -0.000008, -0.000002, -0.000008, 0.000118, -0.000078, - -0.000063, 0.000016, 0.000028, -0.000005, 0.000057, 0.000049, -0.000100, - 0.000032, 0.000092, -0.000015, -0.000012, -0.000046, -0.000013, -0.000043, - 0.000027, 0.000021, 0.000028, 0.000003, 0.000092, -0.000015, -0.000012, - -0.000046, -0.000013, 0.000012, 0.000013, -0.000004, -0.000001, 0.000036, - -0.000003, -0.000013, -0.000010, -0.000006, -0.000070, -0.000010, 0.000202, - -0.000072, -0.000007, -0.000021, -0.000113, 0.000044, 0.000008, 0.000036, - 0.000067, -0.000007, 0.000034, -0.000088, 0.000028, 0.000055, 0.000049, - -0.000005, 0.000285, -0.000015, -0.000006, -0.000055, -0.000006, -0.000056, - -0.000044, -0.000044, -0.000037, 0.000402, 0.000077, -0.000358, 0.000013, - 0.000012, -0.000004, -0.000005, 0.000322, -0.000044, -0.000046, -0.000044, - -0.000117, -0.000024, -0.000113, 0.000089, 0.000079, 0.000044, 0.000036, - -0.000012, 0.000202, -0.000072, -0.000007, -0.000070, -0.000010, -0.000021, - 0.000176, -0.000010, -0.000060, -0.000005, -0.000084, 0.000028, 0.000027, - -0.000008, -0.000010, -0.000007, 0.000202, -0.000070, -0.000072, -0.000021, - -0.000004, 0.000029, 0.000096, -0.000039, 0.000029, -0.000039, -0.000029, - -0.000029, -0.000010, -0.000005, -0.000002, 0.000118, -0.000008, -0.000063, - 0.000028, 0.000016, -0.000078, -0.000005, -0.000067, 0.000075, -0.000044, - 0.000044, -0.000008, 0.000055, 0.000049, 0.000034, 0.000028, -0.000088, - -0.000005, 0.000022, 0.000021, -0.000006, -0.000002, 0.000118, -0.000008, - -0.000063, 0.000016, 0.000028, -0.000078, -0.000005, 0.000045, 0.000045, - -0.000013, 0.000008, 0.000036, 0.000067, -0.000113, 0.000044, -0.000007, - -0.000113, -0.000080, -0.000010, 0.000236, -0.000010, 0.000036, -0.000080, - 0.000032, 0.000008, 0.000044, -0.000113, 0.000067, 0.000036, -0.000007, - 0.000047, -0.000043, -0.000005, 0.000022, 0.000021, -0.000006, -0.000003, - -0.000003, 0.000137, -0.000013, -0.000044, -0.000044, -0.000006, -0.000003, - -0.000003, -0.000013, 0.000137, -0.000044, -0.000044, -0.000006, 0.000054, - 0.000049, -0.000063, 0.000028, 0.000016, -0.000005, 0.000092, -0.000015, - -0.000012, -0.000046, -0.000013, 0.000041, 0.000130, -0.000125, -0.000075, - 0.000041, -0.000013, -0.000059, -0.000059, 0.000044, 0.000066, 0.000044, - 0.000035, -0.000018, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, - -0.000006, 0.000079, 0.000089, 0.000044, 0.000036, -0.000113, -0.000012, - 0.000045, 0.000045, -0.000013, 0.000067, -0.000113, 0.000008, 0.000036, - 0.000044, -0.000007, 0.000022, 0.000021, -0.000006, -0.000010, 0.000227, - -0.000010, -0.000080, -0.000080, -0.000024, 0.000022, 0.000021, -0.000006, - 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, -0.000059, - -0.000059, 0.000066, 0.000044, 0.000044, 0.000035, -0.000018, 0.000045, - 0.000047, -0.000021, 0.000016, -0.000021, 0.000016, -0.000013, 0.000027, - 0.000003, -0.000008, -0.000113, 0.000036, 0.000044, 0.000067, 0.000008, - -0.000007, 0.000027, 0.000003, -0.000008, 0.000089, 0.000079, -0.000113, - 0.000044, 0.000036, -0.000012, -0.000029, -0.000002, 0.000077, -0.000005, - -0.000021, -0.000012, -0.000113, 0.000067, 0.000008, 0.000044, 0.000036, - -0.000007, -0.000026, 0.000029, 0.000018, -0.000018, -0.000003, -0.000027, - -0.000029, 0.000010, 0.000056, 0.000016, 0.000011, 0.000036, -0.000082, - 0.000013, 0.000012, -0.000004, 0.000034, -0.000088, 0.000055, 0.000049, - 0.000028, -0.000005, 0.000027, 0.000003, -0.000008, -0.000002, -0.000008, - 0.000118, -0.000078, 0.000028, 0.000016, -0.000063, -0.000005, -0.000002, - 0.000118, -0.000078, -0.000008, 0.000016, 0.000028, -0.000063, -0.000005, - 0.000092, -0.000015, -0.000012, -0.000046, -0.000013, -0.000043, 0.000027, - 0.000021, 0.000028, 0.000003, 0.000092, -0.000046, -0.000015, -0.000012, - -0.000013, -0.000029, 0.000019, 0.000010, 0.000011, 0.000034, -0.000001, - 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000013, - 0.000012, -0.000004, -0.000005, -0.000044, -0.000044, -0.000117, -0.000046, - 0.000322, -0.000024, 0.000022, 0.000021, -0.000006, -0.000010, -0.000070, - 0.000202, -0.000007, -0.000072, -0.000021, -0.000001, -0.000001, -0.000005, - 0.000127, -0.000005, -0.000088, -0.000019, 0.000016, 0.000014, -0.000005, - 0.000045, 0.000045, -0.000013, 0.000117, -0.000159, 0.000055, 0.000056, - 0.000154, -0.000011, 0.000027, -0.000043, 0.000028, 0.000021, 0.000003, - 0.000126, -0.000043, -0.000052, -0.000032, 0.000126, -0.000043, -0.000052, - -0.000032, -0.000004, -0.000029, 0.000099, -0.000029, -0.000005, -0.000010, - -0.000007, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, - -0.000019, -0.000043, 0.000126, -0.000052, -0.000032, -0.000001, -0.000001, - -0.000005, 0.000127, -0.000005, -0.000088, -0.000019, -0.000043, 0.000047, - -0.000005, 0.000045, 0.000045, -0.000013, 0.000036, 0.000035, 0.000069, - -0.000113, 0.000044, -0.000007, 0.000036, 0.000044, 0.000067, 0.000008, - -0.000113, -0.000007, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, - -0.000044, -0.000006, -0.000059, -0.000059, 0.000044, 0.000066, 0.000044, - 0.000035, -0.000018, -0.000009, -0.000065, 0.000213, -0.000005, -0.000065, - -0.000021, -0.000012, -0.000027, -0.000029, 0.000010, 0.000056, 0.000016, - 0.000011, 0.000036, -0.000082, -0.000005, -0.000044, 0.000322, -0.000117, - -0.000044, -0.000046, -0.000024, -0.000113, 0.000067, 0.000008, 0.000036, - 0.000044, -0.000007, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, - -0.000044, -0.000006, -0.000159, 0.000117, 0.000055, 0.000056, 0.000154, - -0.000011, 0.000126, -0.000043, -0.000052, -0.000032, -0.000043, 0.000126, - -0.000052, -0.000032, 0.000022, 0.000021, -0.000006, -0.000133, -0.000007, - 0.000099, 0.000155, -0.000099, -0.000010, -0.000005, 0.000027, 0.000028, - 0.000021, -0.000043, 0.000003, -0.000027, 0.000056, 0.000010, 0.000016, - 0.000011, -0.000029, 0.000036, -0.000082, -0.000059, -0.000059, 0.000044, - 0.000066, 0.000044, 0.000035, -0.000018, -0.000007, 0.000164, -0.000047, - -0.000047, -0.000021, -0.000005, -0.000012, -0.000088, 0.000055, 0.000049, - 0.000028, 0.000034, -0.000005, 0.000022, 0.000021, -0.000006, 0.000092, - -0.000015, -0.000012, -0.000046, -0.000013, -0.000001, -0.000001, -0.000001, - 0.000087, -0.000003, -0.000013, -0.000013, -0.000021, -0.000025, 0.000028, - 0.000019, -0.000060, 0.000176, -0.000010, -0.000005, -0.000084, 0.000027, - 0.000003, -0.000008, 0.000044, 0.000036, 0.000089, -0.000113, 0.000079, - -0.000012, 0.000161, -0.000161, 0.000509, -0.000178, -0.000216, -0.000013, - -0.000026, -0.000021, 0.000036, 0.000057, -0.000100, 0.000034, 0.000049, - -0.000002, 0.000096, 0.000056, -0.000085, -0.000056, -0.000011, 0.000012, - 0.000013, -0.000004, -0.000001, -0.000002, 0.000092, -0.000003, -0.000029, - -0.000013, -0.000021, -0.000012, -0.000001, -0.000001, -0.000003, 0.000133, - -0.000003, -0.000047, -0.000013, -0.000013, -0.000021, -0.000012, 0.000010, - 0.000034, 0.000019, -0.000029, 0.000011, -0.000001, 0.000022, 0.000021, - -0.000006, -0.000002, 0.000118, -0.000078, -0.000008, -0.000063, 0.000028, - 0.000016, -0.000005, -0.000015, 0.000092, -0.000046, -0.000012, -0.000013, - 0.000022, 0.000021, -0.000006, -0.000067, 0.000044, 0.000075, -0.000044, - -0.000008, 0.000022, 0.000021, -0.000006, 0.000008, 0.000067, -0.000113, - 0.000036, 0.000044, -0.000007, 0.000045, 0.000047, -0.000021, 0.000016, - -0.000021, 0.000016, -0.000013, -0.000002, 0.000118, -0.000078, -0.000008, - -0.000063, 0.000016, 0.000028, -0.000005, -0.000015, -0.000046, 0.000092, - -0.000012, -0.000013, 0.000036, -0.000113, -0.000010, 0.000236, -0.000010, - -0.000080, -0.000080, 0.000032, -0.000004, -0.000001, -0.000002, -0.000002, - -0.000055, 0.000221, -0.000024, -0.000005, -0.000003, -0.000034, -0.000020, - -0.000021, -0.000014, 0.000047, -0.000021, 0.000016, 0.000045, -0.000021, - 0.000016, -0.000013, -0.000001, -0.000041, 0.000171, -0.000015, -0.000059, - -0.000017, -0.000027, -0.000005, -0.000044, -0.000117, 0.000322, -0.000044, - -0.000046, -0.000024, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, - -0.000044, -0.000006, -0.000088, 0.000028, 0.000055, 0.000034, 0.000049, - -0.000005, 0.000022, 0.000021, -0.000006, -0.000007, 0.000202, -0.000070, - -0.000010, -0.000072, -0.000021, -0.000043, 0.000027, 0.000021, 0.000028, - 0.000003, 0.000045, 0.000016, -0.000021, 0.000047, -0.000021, 0.000016, - -0.000013, -0.000067, 0.000075, -0.000044, 0.000044, -0.000008, 0.000045, - 0.000045, -0.000013, 0.000036, 0.000067, -0.000113, 0.000044, 0.000008, - -0.000007, -0.000080, -0.000010, 0.000227, -0.000080, -0.000010, -0.000024, - -0.000113, 0.000036, 0.000069, 0.000044, 0.000035, -0.000007, 0.000044, - -0.000113, 0.000036, 0.000067, 0.000008, -0.000007, 0.000034, -0.000088, - 0.000055, 0.000028, 0.000049, -0.000005, 0.000057, 0.000157, -0.000014, - 0.000036, -0.000113, 0.000044, 0.000067, 0.000008, -0.000007, -0.000003, - -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, 0.000071, - 0.000173, -0.000018, 0.000022, 0.000021, -0.000006, 0.000013, 0.000012, - -0.000004, 0.000045, 0.000045, -0.000013, -0.000003, -0.000047, -0.000005, - 0.000101, -0.000021, -0.000012, 0.000054, 0.000049, -0.000063, 0.000028, - 0.000016, -0.000005, 0.000022, 0.000021, -0.000006, -0.000002, 0.000118, - -0.000078, -0.000008, -0.000063, 0.000028, 0.000016, -0.000005, -0.000015, - -0.000046, 0.000092, -0.000012, -0.000013, -0.000067, 0.000044, 0.000075, - -0.000044, -0.000008, -0.000002, 0.000118, -0.000078, -0.000063, -0.000008, - 0.000016, 0.000028, -0.000005, -0.000015, -0.000046, 0.000092, -0.000012, - -0.000013, 0.000117, -0.000159, 0.000055, 0.000056, 0.000154, -0.000011, - 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000027, - 0.000003, -0.000008, -0.000006, -0.000006, 0.000285, -0.000044, -0.000044, - -0.000015, -0.000055, -0.000056, -0.000037, 0.000402, 0.000077, -0.000358, - 0.000027, 0.000003, -0.000008, 0.000079, -0.000113, 0.000044, 0.000036, - 0.000089, -0.000012, 0.000100, -0.000005, -0.000071, 0.000071, -0.000004, - -0.000029, -0.000029, 0.000099, -0.000005, -0.000010, -0.000007, -0.000026, - 0.000018, 0.000029, -0.000018, -0.000003, -0.000005, -0.000065, 0.000125, - -0.000005, -0.000021, -0.000012, -0.000010, -0.000060, 0.000176, -0.000005, - -0.000084, 0.000045, 0.000045, -0.000013, -0.000113, 0.000036, 0.000044, - 0.000035, 0.000069, -0.000007, 0.000012, 0.000013, -0.000004, 0.000044, - -0.000113, 0.000036, 0.000067, 0.000008, -0.000007, -0.000003, -0.000003, - -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, 0.000022, 0.000021, - -0.000006, -0.000007, 0.000202, -0.000070, -0.000072, -0.000010, -0.000021, - 0.000045, 0.000045, -0.000013, 0.000036, 0.000067, -0.000113, 0.000044, - 0.000008, -0.000007, 0.000044, 0.000067, -0.000113, 0.000036, 0.000008, - -0.000007, -0.000159, 0.000117, 0.000055, 0.000056, 0.000154, -0.000011, - -0.000080, -0.000080, -0.000010, 0.000227, -0.000010, -0.000024, 0.000022, - 0.000021, -0.000006, -0.000113, 0.000067, 0.000036, 0.000044, 0.000008, - -0.000007, -0.000113, 0.000067, 0.000008, 0.000044, 0.000036, -0.000007, - -0.000003, -0.000003, 0.000137, -0.000044, -0.000013, -0.000044, -0.000006, - -0.000001, -0.000059, -0.000015, 0.000171, -0.000017, -0.000041, -0.000027, - -0.000005, -0.000044, -0.000117, 0.000322, -0.000044, -0.000046, -0.000024, - 0.000132, 0.000675, -0.000594, 0.000022, 0.000021, -0.000006, 0.000126, - -0.000043, -0.000052, -0.000032, 0.000045, 0.000047, -0.000021, -0.000021, - 0.000016, 0.000016, -0.000013, 0.000044, 0.000036, -0.000113, 0.000067, - 0.000008, -0.000007, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, - -0.000044, -0.000006, -0.000001, -0.000001, -0.000005, -0.000005, 0.000127, - -0.000088, -0.000019, 0.000045, 0.000045, -0.000013, 0.000036, 0.000069, - -0.000113, 0.000044, 0.000035, -0.000007, 0.000027, 0.000021, -0.000043, - 0.000028, 0.000003, 0.000161, 0.000026, 0.000258, -0.000178, -0.000255, - 0.000000, -0.000001, -0.000015, 0.000171, -0.000059, -0.000017, -0.000041, - -0.000027, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, - -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, - -0.000006, -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, - -0.000043, 0.000027, 0.000028, 0.000003, -0.000009, -0.000065, 0.000213, - -0.000005, -0.000065, -0.000021, -0.000012, -0.000027, 0.000036, 0.000010, - -0.000029, 0.000056, 0.000016, 0.000011, -0.000082, -0.000005, -0.000044, - 0.000322, -0.000046, -0.000044, -0.000117, -0.000024, 0.000079, -0.000113, - 0.000089, 0.000044, 0.000036, -0.000012, 0.000100, -0.000005, -0.000071, - 0.000071, 0.000057, 0.000049, -0.000100, 0.000032, 0.000036, 0.000044, - -0.000113, 0.000069, 0.000035, -0.000007, -0.000001, -0.000001, -0.000005, - 0.000127, -0.000005, -0.000088, -0.000019, 0.000028, 0.000019, -0.000133, - -0.000007, 0.000099, 0.000155, -0.000099, -0.000010, -0.000005, -0.000005, - 0.000322, -0.000044, -0.000117, -0.000044, -0.000046, -0.000024, 0.000027, - 0.000021, -0.000043, 0.000028, 0.000003, -0.000021, 0.000016, 0.000045, - 0.000047, -0.000021, 0.000016, -0.000013, 0.000027, 0.000003, -0.000008, - -0.000095, -0.000113, 0.000065, 0.000044, 0.000036, 0.000186, -0.000153, - -0.000006, 0.000164, -0.000009, -0.000063, -0.000052, -0.000021, 0.000034, - 0.000028, 0.000055, 0.000049, -0.000088, -0.000005, 0.000022, 0.000021, - -0.000006, -0.000003, -0.000003, 0.000137, -0.000044, -0.000013, -0.000044, - -0.000006, -0.000059, -0.000059, 0.000044, 0.000066, 0.000044, 0.000035, - -0.000018, -0.000113, 0.000067, 0.000008, 0.000044, 0.000036, -0.000007, - 0.000049, 0.000034, 0.000028, 0.000055, -0.000088, -0.000005, 0.000041, - -0.000125, 0.000130, 0.000041, -0.000075, -0.000013, 0.000022, 0.000021, - -0.000006, -0.000043, 0.000027, 0.000021, 0.000028, 0.000003, -0.000021, - -0.000021, 0.000047, 0.000016, 0.000045, 0.000016, -0.000013, 0.000077, - 0.000402, -0.000358, -0.000059, -0.000059, 0.000044, 0.000066, 0.000044, - 0.000035, -0.000018, 0.000027, 0.000003, -0.000008, 0.000044, 0.000079, - -0.000113, 0.000036, 0.000089, -0.000012, -0.000001, -0.000001, -0.000005, - 0.000127, -0.000005, -0.000088, -0.000019, 0.000057, 0.000049, -0.000100, - 0.000034, 0.000036, -0.000002, -0.000015, -0.000046, -0.000012, 0.000092, - -0.000013, -0.000026, 0.000018, 0.000029, -0.000018, -0.000003, -0.000001, - -0.000059, 0.000171, -0.000041, -0.000015, -0.000017, -0.000027, -0.000161, - 0.000161, -0.000178, 0.000509, -0.000013, -0.000216, -0.000026, -0.000021, - 0.000041, -0.000125, 0.000041, 0.000130, -0.000075, -0.000013, 0.000044, - -0.000113, 0.000036, 0.000089, 0.000079, -0.000012, -0.000080, -0.000080, - 0.000227, -0.000010, -0.000010, -0.000024, 0.000044, 0.000089, 0.000036, - -0.000113, 0.000079, -0.000012, -0.000001, -0.000001, 0.000127, -0.000005, - -0.000005, -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, -0.000003, - -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000003, - -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, -0.000001, - -0.000001, 0.000070, -0.000003, -0.000013, -0.000021, -0.000025, 0.000011, - -0.000029, 0.000010, 0.000034, 0.000019, -0.000001, 0.000022, 0.000021, - -0.000006, -0.000067, 0.000044, 0.000075, -0.000044, -0.000008, 0.000092, - -0.000015, -0.000012, -0.000046, -0.000013, -0.000043, 0.000027, 0.000021, - 0.000028, 0.000003, -0.000059, -0.000059, 0.000066, 0.000044, 0.000044, - 0.000035, -0.000018, -0.000004, -0.000029, 0.000099, -0.000029, -0.000005, - -0.000010, -0.000007, -0.000007, -0.000007, -0.000099, 0.000327, -0.000104, - -0.000036, -0.000021, 0.000028, 0.000019, 0.000021, 0.000027, -0.000043, - 0.000028, 0.000003, -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, - 0.000021, -0.000043, 0.000027, 0.000028, 0.000003, 0.000019, 0.000010, - 0.000011, 0.000034, -0.000029, -0.000001, -0.000067, 0.000044, 0.000075, - -0.000044, -0.000008, 0.000055, 0.000049, 0.000034, 0.000028, -0.000088, - -0.000005, 0.000022, 0.000021, -0.000006, -0.000002, -0.000004, -0.000001, - -0.000002, -0.000034, -0.000055, -0.000005, -0.000020, 0.000221, -0.000003, - -0.000024, -0.000021, -0.000014, 0.000008, 0.000005, -0.000001, -0.000010, - -0.000070, 0.000202, -0.000072, -0.000007, -0.000021, 0.000044, -0.000113, - 0.000036, 0.000008, 0.000067, -0.000007, 0.000047, 0.000045, -0.000021, - 0.000016, -0.000021, 0.000016, -0.000013, 0.000044, 0.000036, 0.000089, - 0.000079, -0.000113, -0.000012, -0.000080, -0.000080, 0.000227, -0.000010, - -0.000010, -0.000024, 0.000028, 0.000027, 0.000021, -0.000043, 0.000003, - 0.000045, -0.000021, 0.000016, 0.000047, 0.000016, -0.000021, -0.000013, - 0.000126, -0.000043, -0.000052, -0.000032, 0.000013, 0.000012, -0.000004, - -0.000005, -0.000044, -0.000117, 0.000322, -0.000044, -0.000046, -0.000024, - 0.000027, 0.000003, -0.000008, 0.000079, 0.000044, 0.000036, -0.000113, - 0.000089, -0.000012, -0.000007, 0.000030, -0.000006, -0.000002, -0.000004, - 0.000173, 0.000071, -0.000018, -0.000013, -0.000020, 0.000013, 0.000022, - -0.000002, -0.000100, 0.000057, 0.000049, 0.000034, 0.000036, -0.000002, - 0.000126, -0.000043, -0.000052, -0.000032, -0.000043, 0.000126, -0.000052, - -0.000032, -0.000001, -0.000001, 0.000127, -0.000005, -0.000005, -0.000088, - -0.000019, -0.000043, 0.000047, -0.000005, -0.000003, -0.000003, -0.000044, - 0.000137, -0.000013, -0.000044, -0.000006, -0.000003, -0.000003, -0.000044, - -0.000013, 0.000137, -0.000044, -0.000006, -0.000003, -0.000003, 0.000137, - -0.000013, -0.000044, -0.000044, -0.000006, -0.000003, -0.000003, -0.000013, - 0.000137, -0.000044, -0.000044, -0.000006, -0.000006, -0.000006, 0.000285, - -0.000015, -0.000044, -0.000044, -0.000055, -0.000056, -0.000037, -0.000159, - 0.000117, 0.000055, 0.000056, 0.000154, -0.000011, 0.000022, 0.000021, - -0.000006, -0.000003, -0.000003, 0.000137, -0.000013, -0.000044, -0.000044, - -0.000006, -0.000003, -0.000003, -0.000013, 0.000137, -0.000044, -0.000044, - -0.000006, -0.000159, 0.000117, 0.000055, 0.000056, 0.000154, -0.000011, - 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, -0.000113, - -0.000095, 0.000065, 0.000044, 0.000036, 0.000186, -0.000153, 0.000126, - -0.000043, -0.000052, -0.000032, -0.000005, -0.000044, -0.000046, -0.000044, - -0.000117, 0.000322, -0.000024, -0.000001, -0.000001, -0.000005, 0.000127, - -0.000005, -0.000088, -0.000019, -0.000043, 0.000126, -0.000052, -0.000032, - 0.000202, -0.000070, -0.000072, -0.000007, -0.000010, -0.000021, 0.000028, - 0.000019, 0.000132, 0.000675, -0.000594, 0.000022, 0.000021, -0.000006, - -0.000007, -0.000047, 0.000164, -0.000021, -0.000005, -0.000060, 0.000022, - 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, - -0.000006, -0.000010, 0.000176, -0.000060, -0.000005, -0.000084, 0.000045, - 0.000047, -0.000021, 0.000016, -0.000021, 0.000016, -0.000013, 0.000008, - 0.000067, -0.000113, 0.000036, 0.000044, -0.000007, -0.000010, 0.000236, - -0.000010, 0.000036, -0.000113, -0.000080, -0.000080, 0.000032, 0.000008, - -0.000113, 0.000067, 0.000044, 0.000036, -0.000007, 0.000055, 0.000049, - -0.000088, 0.000028, 0.000034, -0.000005, 0.000022, 0.000021, -0.000006, - -0.000007, -0.000047, 0.000164, -0.000047, -0.000005, -0.000021, -0.000012, - -0.000026, 0.000029, -0.000018, -0.000003, 0.000126, -0.000043, -0.000052, - -0.000032, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, - -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, - -0.000006, 0.000022, 0.000021, -0.000006, -0.000071, 0.000100, -0.000005, - 0.000071, -0.000010, 0.000202, -0.000007, -0.000072, -0.000070, -0.000021, - 0.000049, -0.000088, 0.000055, 0.000028, 0.000034, -0.000005, -0.000009, - -0.000065, 0.000213, -0.000005, -0.000065, -0.000021, -0.000012, -0.000027, - -0.000029, 0.000010, 0.000011, 0.000056, 0.000016, 0.000036, -0.000082, - 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000022, - 0.000021, -0.000006, -0.000012, -0.000015, 0.000092, -0.000046, -0.000013, - 0.000027, 0.000003, -0.000008, -0.000001, -0.000001, -0.000005, 0.000127, - -0.000005, -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, 0.000176, - -0.000060, -0.000010, -0.000005, -0.000084, 0.000027, 0.000003, -0.000008, - 0.000044, 0.000079, 0.000089, 0.000036, -0.000113, -0.000012, 0.000013, - 0.000012, -0.000004, -0.000005, -0.000044, -0.000044, -0.000117, -0.000046, - 0.000322, -0.000024, 0.000028, 0.000019, 0.000022, 0.000021, -0.000006, - -0.000003, -0.000003, 0.000137, -0.000013, -0.000044, -0.000044, -0.000006, - 0.000117, -0.000159, 0.000055, 0.000056, 0.000154, -0.000011, 0.000045, - 0.000045, -0.000013, 0.000008, 0.000036, 0.000067, -0.000113, 0.000044, - -0.000007, -0.000113, 0.000044, 0.000036, 0.000067, 0.000008, -0.000007, - -0.000113, 0.000036, -0.000080, -0.000010, 0.000236, -0.000080, -0.000010, - 0.000032, 0.000035, 0.000036, 0.000069, 0.000044, -0.000113, -0.000007, - 0.000008, 0.000044, -0.000113, 0.000036, 0.000067, -0.000007, 0.000139, - 0.000710, -0.000636, -0.000088, 0.000028, 0.000055, 0.000049, 0.000034, - -0.000005, 0.000022, 0.000021, -0.000006, 0.000139, 0.000710, -0.000636, - -0.000006, 0.000164, -0.000063, -0.000009, -0.000052, -0.000021, 0.000710, - 0.000139, -0.000636, -0.000006, 0.000164, -0.000063, -0.000009, -0.000052, - -0.000021, 0.000710, 0.000139, -0.000636, 0.000139, 0.000710, -0.000636, - 0.000047, 0.000045, -0.000021, 0.000016, -0.000021, 0.000016, -0.000013, - 0.000013, 0.000012, -0.000004, 0.000139, 0.000710, -0.000636, -0.000009, - -0.000052, 0.000164, -0.000006, -0.000063, -0.000021, 0.000710, 0.000139, - -0.000636, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, - -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, - -0.000006, -0.000133, -0.000007, 0.000099, 0.000155, -0.000099, -0.000010, - -0.000005, -0.000001, -0.000013, -0.000003, 0.000036, -0.000010, -0.000006, - -0.000027, 0.000056, -0.000029, 0.000010, 0.000011, 0.000036, 0.000016, - -0.000082, -0.000002, -0.000011, -0.000090, 0.000137, -0.000063, 0.000028, - 0.000016, -0.000009, 0.000022, 0.000021, -0.000006, 0.000092, -0.000015, - -0.000012, -0.000046, -0.000013, -0.000043, 0.000027, 0.000021, 0.000028, - 0.000003, -0.000043, 0.000027, 0.000021, 0.000028, 0.000003, 0.000045, - -0.000021, 0.000047, 0.000016, -0.000021, 0.000016, -0.000013, 0.000139, - 0.000710, -0.000636, -0.000009, -0.000052, 0.000164, -0.000063, -0.000006, - -0.000021, 0.000045, 0.000045, -0.000013, 0.000045, 0.000045, -0.000013, - 0.000036, 0.000067, -0.000113, 0.000044, 0.000008, -0.000007, -0.000080, - -0.000010, 0.000227, -0.000080, -0.000010, -0.000024, 0.000035, 0.000036, - 0.000069, -0.000113, 0.000044, -0.000007, 0.000021, 0.000027, -0.000043, - 0.000028, 0.000003, -0.000015, -0.000006, 0.000285, -0.000006, -0.000044, - -0.000044, -0.000055, -0.000056, -0.000037, -0.000002, -0.000078, -0.000063, - 0.000118, -0.000008, 0.000016, 0.000028, -0.000005, -0.000012, 0.000092, - -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, 0.000027, 0.000028, - 0.000003, -0.000007, -0.000007, 0.000327, -0.000104, -0.000099, -0.000036, - -0.000021, 0.000173, 0.000071, -0.000018, -0.000095, 0.000186, 0.000044, - 0.000065, 0.000036, -0.000113, -0.000153, 0.000092, -0.000046, -0.000015, - -0.000012, -0.000013, 0.000034, -0.000029, 0.000019, 0.000010, 0.000011, - -0.000001, 0.000011, -0.000029, 0.000010, 0.000019, 0.000034, -0.000001, - 0.000044, -0.000067, 0.000075, -0.000044, -0.000008, -0.000026, 0.000029, - -0.000018, -0.000003, -0.000070, -0.000010, 0.000202, -0.000072, -0.000007, - -0.000021, -0.000113, 0.000008, 0.000044, 0.000036, 0.000067, -0.000007, - 0.000034, 0.000049, -0.000088, 0.000028, 0.000055, -0.000005, -0.000010, - -0.000010, -0.000080, -0.000080, 0.000227, -0.000024, -0.000001, -0.000002, - -0.000029, 0.000092, -0.000013, -0.000003, -0.000021, -0.000012, -0.000063, - 0.000016, 0.000054, 0.000049, 0.000028, -0.000005, -0.000015, -0.000012, - 0.000092, -0.000046, -0.000013, 0.000022, 0.000021, -0.000006, -0.000001, - -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, -0.000019, -0.000043, - 0.000126, -0.000052, -0.000032, 0.000027, 0.000003, -0.000008, -0.000113, - 0.000089, 0.000044, 0.000036, 0.000079, -0.000012, 0.000022, 0.000021, - -0.000006, 0.000022, 0.000021, -0.000006, -0.000043, 0.000028, 0.000027, - 0.000021, 0.000003, 0.000045, -0.000021, -0.000021, 0.000016, 0.000047, - 0.000016, -0.000013, 0.000013, 0.000012, -0.000004, -0.000005, -0.000044, - -0.000044, -0.000046, 0.000322, -0.000117, -0.000024, 0.000027, 0.000003, - -0.000008, 0.000034, -0.000088, 0.000028, 0.000055, 0.000049, -0.000005, - 0.000022, 0.000021, -0.000006, 0.000008, -0.000113, 0.000067, 0.000044, - 0.000036, -0.000007, 0.000034, 0.000028, 0.000055, 0.000049, -0.000088, - -0.000005, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, - -0.000019, -0.000043, 0.000047, -0.000005, 0.000047, 0.000045, -0.000021, - 0.000016, -0.000021, 0.000016, -0.000013, 0.000044, 0.000036, -0.000113, - 0.000067, 0.000008, -0.000007, -0.000003, -0.000003, -0.000044, 0.000137, - -0.000013, -0.000044, -0.000006, -0.000059, -0.000059, 0.000044, 0.000066, - 0.000044, 0.000035, -0.000018, -0.000005, -0.000044, -0.000044, 0.000322, - -0.000046, -0.000117, -0.000024, 0.000044, -0.000113, 0.000089, 0.000036, - 0.000079, -0.000012, 0.000022, 0.000021, -0.000006, -0.000001, -0.000041, - 0.000171, -0.000059, -0.000015, -0.000017, -0.000027, -0.000004, -0.000002, - -0.000001, -0.000002, -0.000020, -0.000024, -0.000055, -0.000005, 0.000216, - -0.000034, -0.000021, -0.000012, 0.000126, -0.000043, -0.000052, -0.000032, - -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, -0.000019, - -0.000043, 0.000047, -0.000005, -0.000002, -0.000001, -0.000002, -0.000004, - -0.000020, -0.000055, -0.000024, 0.000216, -0.000005, -0.000034, -0.000021, - -0.000012, -0.000070, -0.000072, -0.000007, 0.000202, -0.000010, -0.000021, - -0.000095, 0.000044, 0.000186, 0.000065, 0.000036, -0.000113, -0.000153, - 0.000126, -0.000043, -0.000052, -0.000032, 0.000044, 0.000036, -0.000113, - 0.000089, 0.000079, -0.000012, -0.000080, -0.000010, -0.000010, 0.000227, - -0.000080, -0.000024, 0.000092, -0.000046, -0.000015, -0.000012, -0.000013, - -0.000027, 0.000056, -0.000029, 0.000016, 0.000036, 0.000010, 0.000011, - -0.000082, -0.000001, -0.000001, -0.000013, 0.000070, -0.000013, -0.000021, - -0.000003, -0.000012, 0.000027, 0.000003, -0.000008, 0.000044, 0.000036, - -0.000095, 0.000186, 0.000065, -0.000113, -0.000153, -0.000006, 0.000056, - -0.000033, -0.000017, 0.000013, 0.000012, -0.000004, -0.000005, 0.000322, - -0.000044, -0.000117, -0.000044, -0.000046, -0.000024, -0.000002, -0.000008, - 0.000118, -0.000078, -0.000063, 0.000028, 0.000016, -0.000005, -0.000002, - -0.000008, 0.000118, -0.000078, -0.000063, 0.000016, 0.000028, -0.000005, - -0.000113, 0.000067, 0.000008, 0.000036, 0.000044, -0.000007, -0.000003, - -0.000003, -0.000013, -0.000044, 0.000137, -0.000044, -0.000006, 0.000044, - 0.000036, -0.000113, 0.000067, 0.000008, -0.000007, -0.000003, -0.000003, - -0.000044, 0.000137, -0.000044, -0.000013, -0.000006, -0.000012, 0.000092, - -0.000046, -0.000015, -0.000013, 0.000022, 0.000011, -0.000029, 0.000018, - 0.000010, -0.000001, 0.000021, -0.000043, 0.000028, 0.000027, 0.000003, - -0.000012, 0.000092, -0.000046, -0.000015, -0.000013, 0.000011, -0.000029, - 0.000018, 0.000010, 0.000022, -0.000001, 0.000021, -0.000043, 0.000028, - 0.000027, 0.000003, 0.000016, 0.000047, 0.000045, 0.000016, -0.000021, - -0.000021, -0.000013, 0.000012, 0.000013, -0.000004, 0.000028, 0.000019, - -0.000025, -0.000013, -0.000161, 0.000683, -0.000003, -0.000021, -0.000012, - 0.000016, 0.000002, -0.000012, -0.000009, -0.000017, 0.000078, 0.000185, - -0.000065, -0.000175, -0.000065, 0.000065, -0.000113, 0.000067, 0.000008, - 0.000036, 0.000044, -0.000007, -0.000003, -0.000003, -0.000013, 0.000137, - -0.000044, -0.000044, -0.000006, 0.000092, -0.000015, -0.000046, -0.000012, - -0.000013, -0.000043, 0.000027, 0.000028, 0.000021, 0.000003, 0.000047, - 0.000045, -0.000021, 0.000016, -0.000021, 0.000016, -0.000013, -0.000070, - 0.000202, -0.000007, -0.000072, -0.000010, -0.000021, 0.000049, 0.000034, - -0.000088, 0.000055, 0.000028, -0.000005, -0.000043, 0.000126, -0.000052, - -0.000032, 0.000044, 0.000036, 0.000089, 0.000079, -0.000113, -0.000012, - -0.000060, -0.000010, 0.000176, -0.000005, -0.000084, -0.000003, -0.000003, - -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000003, -0.000003, - -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, -0.000007, -0.000070, - -0.000010, -0.000072, 0.000202, -0.000021, -0.000012, -0.000015, 0.000092, - -0.000046, -0.000013, -0.000043, 0.000126, -0.000052, -0.000032, 0.000047, - -0.000043, -0.000005, 0.000075, -0.000067, 0.000044, -0.000044, -0.000008, - -0.000001, -0.000001, -0.000001, -0.000005, -0.000003, -0.000013, -0.000013, - 0.000174, -0.000065, -0.000013, -0.000021, -0.000012, -0.000027, 0.000056, - 0.000010, 0.000011, 0.000036, 0.000016, -0.000029, -0.000082, 0.000126, - -0.000043, 0.000028, -0.000080, -0.000032, 0.000045, -0.000013, 0.000028, - 0.000019, 0.000045, 0.000047, -0.000021, 0.000016, -0.000021, 0.000016, - -0.000013, -0.000007, 0.000202, -0.000070, -0.000072, -0.000010, -0.000021, - 0.000013, 0.000012, -0.000004, -0.000005, -0.000044, -0.000044, -0.000046, - -0.000117, 0.000322, -0.000024, 0.000045, 0.000045, -0.000013, -0.000113, - 0.000044, 0.000036, 0.000069, 0.000035, -0.000007, -0.000059, -0.000059, - 0.000044, 0.000066, 0.000044, 0.000035, -0.000018, 0.000027, 0.000003, - -0.000008, -0.000113, 0.000044, 0.000036, 0.000079, 0.000089, -0.000012, - 0.000045, 0.000045, -0.000013, 0.000008, 0.000036, 0.000067, -0.000113, - 0.000044, -0.000007, -0.000029, 0.000010, 0.000011, 0.000019, 0.000034, - -0.000001, -0.000001, -0.000013, -0.000003, 0.000036, -0.000010, -0.000006, - 0.000117, 0.000055, -0.000159, 0.000056, 0.000154, -0.000011, -0.000070, - -0.000072, -0.000007, 0.000202, -0.000010, -0.000021, 0.000036, -0.000080, - -0.000010, 0.000236, -0.000010, -0.000080, -0.000113, 0.000032, 0.000008, - 0.000044, -0.000113, 0.000067, 0.000036, -0.000007, -0.000002, -0.000001, - -0.000002, -0.000004, -0.000005, 0.000221, -0.000055, -0.000003, -0.000034, - -0.000024, -0.000021, -0.000020, -0.000014, 0.000036, 0.000044, -0.000113, - 0.000067, 0.000008, -0.000007, -0.000003, -0.000003, -0.000044, -0.000013, - 0.000137, -0.000044, -0.000006, 0.000049, 0.000057, -0.000100, 0.000034, - 0.000036, -0.000002, -0.000059, -0.000059, 0.000044, 0.000066, 0.000044, - 0.000035, -0.000018, -0.000001, -0.000059, 0.000171, -0.000041, -0.000015, - -0.000017, -0.000027, -0.000161, 0.000161, -0.000161, 0.000161, -0.000013, - -0.000178, 0.000737, -0.000216, -0.000216, -0.000013, -0.000026, -0.000021, - 0.000041, -0.000125, 0.000041, 0.000130, -0.000075, -0.000013, 0.000028, - 0.000019, -0.000001, 0.000036, -0.000003, -0.000010, -0.000019, -0.000029, - 0.000010, 0.000011, 0.000019, 0.000034, -0.000001, -0.000001, -0.000001, - -0.000001, -0.000013, -0.000013, -0.000003, 0.000087, -0.000013, -0.000021, - -0.000012, -0.000002, -0.000004, -0.000002, -0.000001, -0.000020, -0.000055, - 0.000216, -0.000034, -0.000005, -0.000021, -0.000024, -0.000012, 0.000022, - 0.000021, -0.000006, -0.000059, -0.000059, 0.000044, 0.000066, 0.000044, - 0.000035, -0.000018, 0.000036, 0.000044, -0.000113, 0.000069, 0.000035, - -0.000007, -0.000001, -0.000001, -0.000005, -0.000005, 0.000127, -0.000088, - -0.000019, -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, - -0.000043, 0.000027, 0.000028, 0.000003, 0.000034, 0.000011, -0.000029, - 0.000010, 0.000019, -0.000001, -0.000012, 0.000092, -0.000015, -0.000046, - -0.000013, 0.000021, -0.000043, 0.000027, 0.000028, 0.000003, -0.000088, - 0.000034, 0.000028, 0.000055, 0.000049, -0.000005, 0.000022, 0.000021, - -0.000006, -0.000009, 0.000164, -0.000006, -0.000084, 0.000049, -0.000063, - 0.000054, 0.000023, 0.000057, 0.000034, 0.000036, -0.000100, 0.000049, - -0.000002, -0.000027, 0.000056, 0.000011, 0.000010, -0.000029, 0.000016, - 0.000036, -0.000082, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, - -0.000006, -0.000006, -0.000006, -0.000015, 0.000285, -0.000044, -0.000044, - -0.000055, -0.000056, -0.000037, 0.000402, 0.000077, -0.000358, 0.000075, - -0.000067, 0.000044, -0.000044, -0.000008, 0.000045, 0.000045, -0.000013, - 0.000008, 0.000036, 0.000067, -0.000113, 0.000044, -0.000007, -0.000080, - -0.000010, 0.000227, -0.000010, -0.000080, -0.000024, 0.000008, 0.000044, - -0.000113, 0.000067, 0.000036, -0.000007, -0.000113, 0.000036, 0.000044, - 0.000069, 0.000035, -0.000007, -0.000001, -0.000001, -0.000005, -0.000005, - 0.000127, -0.000088, -0.000019, 0.000013, 0.000012, -0.000004, -0.000005, - -0.000044, -0.000044, -0.000046, 0.000322, -0.000141, -0.000010, -0.000060, - 0.000176, -0.000005, -0.000084, 0.000028, 0.000027, -0.000008, -0.000133, - -0.000007, -0.000099, 0.000155, 0.000099, -0.000010, -0.000005, -0.000043, - 0.000028, 0.000027, 0.000021, 0.000003, -0.000021, -0.000021, 0.000016, - 0.000047, 0.000016, 0.000045, -0.000013, 0.000044, 0.000036, 0.000089, - 0.000079, -0.000113, -0.000012, -0.000010, -0.000060, 0.000176, -0.000005, - -0.000084, -0.000002, -0.000063, 0.000118, -0.000008, 0.000028, 0.000016, - -0.000078, -0.000005, -0.000067, 0.000075, -0.000044, 0.000044, -0.000008, - -0.000067, 0.000044, 0.000075, -0.000044, -0.000008, -0.000001, -0.000001, - -0.000001, -0.000005, -0.000003, -0.000065, -0.000013, 0.000174, -0.000013, - -0.000021, -0.000013, -0.000012, -0.000027, -0.000029, 0.000010, 0.000056, - 0.000011, 0.000036, 0.000016, -0.000082, -0.000002, -0.000063, 0.000118, - -0.000008, 0.000016, 0.000028, -0.000078, -0.000005, -0.000067, 0.000075, - -0.000044, 0.000044, -0.000008, -0.000015, -0.000006, -0.000006, 0.000285, - -0.000055, -0.000056, -0.000044, -0.000044, -0.000037, 0.000402, 0.000077, - -0.000358, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, - -0.000019, -0.000007, -0.000007, -0.000104, -0.000099, 0.000327, -0.000036, - -0.000021, 0.000028, 0.000019, 0.000041, -0.000125, 0.000041, 0.000130, - -0.000075, -0.000013, 1.000000, 0.000045, 0.000045, -0.000013, 0.000036, - 0.000035, 0.000069, -0.000113, 0.000044, -0.000007, -0.000029, 0.000010, - 0.000019, 0.000034, 0.000011, -0.000001, -0.000001, -0.000001, -0.000013, - -0.000013, -0.000003, 0.000070, -0.000021, -0.000012, 0.000027, 0.000003, - -0.000008, 0.000044, 0.000036, -0.000113, 0.000079, 0.000089, -0.000012, - -0.000113, 0.000036, 0.000044, 0.000069, 0.000035, -0.000007, -0.000001, - -0.000001, -0.000005, -0.000005, 0.000127, -0.000088, -0.000019, -0.000001, - -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, -0.000019, -0.000043, - 0.000126, -0.000052, -0.000032, 0.000055, 0.000049, -0.000088, 0.000034, - 0.000028, -0.000005, 0.000022, 0.000021, -0.000006, -0.000001, -0.000001, - -0.000005, 0.000127, -0.000005, -0.000088, -0.000019, -0.000043, 0.000126, - -0.000052, -0.000032, 0.000045, 0.000045, -0.000013, 0.000036, 0.000044, - -0.000113, 0.000069, 0.000035, -0.000007, -0.000100, 0.000034, 0.000057, - 0.000049, 0.000036, -0.000002, -0.000015, -0.000012, 0.000092, -0.000046, - -0.000013, 0.000044, 0.000036, 0.000089, 0.000079, -0.000113, -0.000012, - -0.000007, -0.000070, -0.000072, 0.000202, -0.000010, -0.000021, -0.000006, - -0.000009, 0.000164, -0.000063, -0.000052, -0.000021, 0.000675, 0.000132, - -0.000594, -0.000007, 0.000202, -0.000070, -0.000072, -0.000010, -0.000021, - 0.000008, 0.000067, -0.000113, 0.000036, 0.000044, -0.000007, -0.000006, - -0.000006, 0.000285, -0.000015, -0.000044, -0.000044, -0.000055, -0.000056, - -0.000037, -0.000159, 0.000117, 0.000055, 0.000056, 0.000154, -0.000011, - -0.000010, 0.000227, -0.000010, -0.000080, -0.000080, -0.000024, 0.000008, - -0.000113, 0.000067, 0.000044, 0.000036, -0.000007, 0.000027, 0.000003, - -0.000008, 0.000044, 0.000036, -0.000113, 0.000079, 0.000089, -0.000012, - -0.000009, 0.000164, -0.000006, -0.000063, -0.000052, -0.000021, 0.000049, - -0.000063, 0.000054, 0.000028, 0.000016, -0.000005, -0.000012, 0.000092, - -0.000046, -0.000015, -0.000013, -0.000027, 0.000056, 0.000011, 0.000036, - -0.000029, 0.000016, 0.000010, -0.000082, 0.000021, -0.000043, 0.000028, - 0.000027, 0.000003, 0.000016, -0.000063, 0.000054, 0.000049, 0.000028, - -0.000005, -0.000015, 0.000092, -0.000012, -0.000046, -0.000013, -0.000012, - 0.000092, -0.000046, -0.000015, -0.000013, 0.000011, 0.000034, -0.000029, - 0.000019, 0.000010, -0.000001, 0.000021, -0.000043, 0.000028, 0.000027, - 0.000003, 0.000049, 0.000057, -0.000100, 0.000032, -0.000029, 0.000010, - 0.000011, 0.000019, 0.000034, -0.000001, -0.000001, -0.000001, -0.000001, - -0.000013, -0.000013, -0.000003, 0.000087, -0.000013, -0.000021, -0.000012, - 0.000047, -0.000021, 0.000016, -0.000021, 0.000016, 0.000045, -0.000013, - -0.000005, -0.000044, -0.000044, 0.000322, -0.000046, -0.000117, -0.000024, - 0.000044, 0.000079, -0.000113, 0.000089, 0.000036, -0.000012, 0.000044, - 0.000079, -0.000113, 0.000036, 0.000089, -0.000012, 0.000013, 0.000012, - -0.000004, -0.000005, -0.000044, -0.000044, -0.000117, -0.000046, 0.000322, - -0.000024, -0.000005, -0.000044, 0.000322, -0.000046, -0.000044, -0.000117, - -0.000024, 0.000079, -0.000113, 0.000089, 0.000044, 0.000036, -0.000012, - -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, - 0.000027, 0.000028, 0.000003, 0.000011, 0.000034, -0.000029, 0.000010, - 0.000019, -0.000001, 0.000071, 0.000173, -0.000018, -0.000007, -0.000007, - -0.000099, -0.000104, 0.000327, -0.000036, -0.000021, -0.000012, 0.000092, - -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, 0.000027, 0.000028, - 0.000003, -0.000027, 0.000011, 0.000036, -0.000029, 0.000010, 0.000016, - 0.000056, -0.000082, -0.000004, -0.000029, 0.000099, -0.000005, -0.000029, - -0.000010, -0.000007, -0.000027, 0.000036, 0.000011, -0.000029, 0.000010, - 0.000056, 0.000016, -0.000082, -0.000043, 0.000126, -0.000052, -0.000032, - 0.000045, 0.000045, -0.000013, 0.000012, 0.000013, -0.000004, 0.000028, - 0.000019, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, - -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, - -0.000006, -0.000005, -0.000044, -0.000044, 0.000322, -0.000046, -0.000117, - -0.000024, 0.000044, -0.000113, 0.000089, 0.000036, 0.000079, -0.000012, - 0.000044, 0.000186, -0.000113, 0.000036, 0.000065, -0.000095, -0.000153, - 0.000011, -0.000029, 0.000010, 0.000019, 0.000034, -0.000001, -0.000001, - -0.000013, -0.000003, 0.000036, -0.000010, -0.000006, -0.000071, -0.000005, - 0.000100, 0.000071, 0.000161, -0.000013, -0.000161, -0.000161, -0.000216, - -0.000013, 0.000725, -0.000178, -0.000026, -0.000021, -0.000002, -0.000008, - 0.000028, 0.000118, -0.000063, 0.000016, -0.000078, -0.000005, -0.000015, - -0.000044, -0.000006, 0.000285, -0.000044, -0.000006, -0.000055, -0.000056, - -0.000037, 0.000088, 0.000064, -0.000018, -0.000002, -0.000008, 0.000016, - -0.000063, 0.000028, 0.000118, -0.000078, -0.000005, -0.000043, 0.000028, - 0.000027, 0.000021, 0.000003, 0.000045, -0.000021, 0.000016, -0.000021, - 0.000016, 0.000047, -0.000013, -0.000007, -0.000047, -0.000047, 0.000164, - -0.000005, -0.000021, -0.000012, -0.000027, 0.000011, -0.000029, 0.000010, - 0.000056, 0.000016, 0.000036, -0.000082, -0.000012, 0.000092, -0.000046, - -0.000015, -0.000013, 0.000022, 0.000011, -0.000029, 0.000018, 0.000010, - -0.000001, 0.000021, -0.000043, 0.000028, 0.000027, 0.000003, 0.000022, - 0.000021, -0.000006, -0.000012, 0.000092, -0.000046, -0.000015, -0.000013, - 0.000022, 0.000011, -0.000029, 0.000018, 0.000010, -0.000001, 0.000021, - -0.000043, 0.000028, 0.000027, 0.000003, 0.000044, -0.000113, 0.000036, - 0.000186, 0.000065, -0.000095, -0.000153, -0.000100, 0.000034, 0.000057, - 0.000049, 0.000036, -0.000002, 0.000045, 0.000045, -0.000013, -0.000113, - 0.000067, 0.000008, 0.000044, 0.000036, -0.000007, -0.000003, -0.000003, - -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, -0.000070, -0.000010, - 0.000202, -0.000072, -0.000007, -0.000021, 0.000044, -0.000113, 0.000036, - 0.000067, 0.000008, -0.000007, 0.000049, 0.000034, -0.000088, 0.000028, - 0.000055, -0.000005, -0.000003, 0.000101, -0.000047, -0.000005, -0.000021, - -0.000012, -0.000159, 0.000117, 0.000055, 0.000056, 0.000154, -0.000011, - 0.000022, 0.000021, -0.000006, 0.000007, 0.000007, -0.000002, 0.000022, - 0.000021, -0.000006, -0.000006, -0.000063, 0.000164, -0.000052, -0.000009, - -0.000021, -0.000020, 0.000055, -0.000018, 0.000049, 0.000034, -0.000100, - 0.000036, 0.000057, -0.000002, 0.000126, -0.000043, -0.000052, -0.000032, - -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, - -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, - 0.000010, 0.000001, -0.000003, 0.000173, 0.000071, -0.000018, -0.000001, - -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, -0.000019, -0.000043, - 0.000047, -0.000005, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, - -0.000006, 0.000022, 0.000021, -0.000006, 0.000045, 0.000045, -0.000013, - 0.000035, -0.000113, 0.000036, 0.000044, 0.000069, -0.000007, 0.000011, - -0.000029, 0.000010, 0.000019, 0.000034, -0.000001, -0.000001, -0.000001, - -0.000013, -0.000013, -0.000003, 0.000070, -0.000021, -0.000012, 0.000027, - 0.000003, -0.000008, 0.000044, -0.000113, 0.000036, 0.000089, 0.000079, - -0.000012, -0.000010, -0.000060, 0.000176, -0.000005, -0.000084, -0.000113, - 0.000186, 0.000044, 0.000036, 0.000065, -0.000095, -0.000153, -0.000113, - 0.000067, 0.000008, 0.000044, 0.000036, -0.000007, -0.000003, -0.000003, - -0.000013, -0.000044, 0.000137, -0.000044, -0.000006, 0.000013, 0.000013, - -0.000004, -0.000007, -0.000007, -0.000104, -0.000099, 0.000327, -0.000036, - -0.000021, 0.000045, 0.000045, -0.000013, 0.000036, 0.000035, -0.000113, - 0.000044, 0.000069, -0.000007, -0.000006, -0.000009, 0.000164, -0.000084, - 0.000710, 0.000139, -0.000636, -0.000001, -0.000001, -0.000005, -0.000005, - 0.000127, -0.000088, -0.000019, -0.000052, -0.000009, 0.000164, -0.000063, - -0.000006, -0.000021, 0.000082, 0.000033, -0.000008, 0.000049, 0.000016, - -0.000063, 0.000028, 0.000054, -0.000005, -0.000010, -0.000070, 0.000202, - -0.000072, -0.000007, -0.000021, -0.000113, 0.000044, 0.000036, 0.000008, - 0.000067, -0.000007, 0.000049, 0.000034, -0.000088, 0.000028, 0.000055, - -0.000005, -0.000071, 0.000100, -0.000005, 0.000071, 0.000027, 0.000003, - -0.000008, -0.000095, -0.000113, 0.000186, 0.000044, 0.000036, 0.000065, - -0.000153, -0.000002, -0.000008, 0.000118, -0.000078, -0.000063, 0.000028, - 0.000016, -0.000005, -0.000002, -0.000008, 0.000118, -0.000078, -0.000063, - 0.000016, 0.000028, -0.000005, -0.000012, 0.000092, -0.000046, -0.000015, - -0.000013, -0.000027, 0.000036, 0.000011, 0.000056, -0.000029, 0.000016, - 0.000010, -0.000082, 0.000021, -0.000043, 0.000028, 0.000027, 0.000003, - -0.000010, -0.000060, 0.000176, -0.000005, -0.000084, -0.000012, 0.000092, - -0.000046, -0.000015, -0.000013, 0.000034, 0.000011, -0.000029, 0.000019, - 0.000010, -0.000001, 0.000021, -0.000043, 0.000028, 0.000027, 0.000003, - 0.000013, 0.000012, -0.000004, -0.000005, -0.000044, -0.000044, -0.000046, - -0.000117, 0.000322, -0.000024, 0.000016, 0.000016, -0.000021, -0.000021, - 0.000047, 0.000045, -0.000013, 0.000013, 0.000012, -0.000004, 0.000022, - 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, - -0.000006, -0.000070, -0.000010, 0.000202, -0.000072, -0.000007, -0.000021, - -0.000113, 0.000044, 0.000036, 0.000008, 0.000067, -0.000007, 0.000034, - 0.000049, -0.000088, 0.000028, 0.000055, -0.000005, -0.000001, -0.000001, - -0.000005, 0.000127, -0.000005, -0.000044, -0.000044, -0.000019, -0.000043, - 0.000047, -0.000005, 0.000045, 0.000045, -0.000013, 0.000012, 0.000013, - -0.000004, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, - -0.000019, -0.000043, 0.000126, -0.000052, -0.000032, -0.000002, -0.000063, - 0.000118, -0.000078, -0.000008, 0.000028, 0.000016, -0.000005, -0.000015, - -0.000012, -0.000046, 0.000092, -0.000013, -0.000067, 0.000044, 0.000075, - -0.000044, -0.000008, 0.000469, 0.000220, -0.000516, -0.000001, -0.000001, - -0.000005, 0.000127, -0.000044, -0.000005, -0.000044, -0.000019, 1.000000, - -0.000043, 0.000028, 0.000126, -0.000080, -0.000032, -0.000002, -0.000063, - 0.000118, -0.000078, -0.000008, 0.000016, 0.000028, -0.000005, -0.000046, - -0.000015, -0.000012, 0.000092, -0.000013, -0.000044, -0.000067, 0.000044, - 0.000075, -0.000008, 0.000027, 0.000003, -0.000008, 0.000186, -0.000095, - -0.000113, 0.000065, 0.000044, 0.000036, -0.000153, -0.000043, 0.000126, - -0.000052, -0.000032, -0.000005, -0.000044, -0.000044, 0.000322, -0.000046, - -0.000117, -0.000024, 0.000079, 0.000044, -0.000113, 0.000089, 0.000036, - -0.000012, 0.000044, 0.000079, -0.000113, 0.000036, 0.000089, -0.000012, - -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, - 0.000027, 0.000028, 0.000003, -0.000012, 0.000092, -0.000015, -0.000046, - -0.000013, 0.000021, -0.000043, 0.000027, 0.000028, 0.000003, -0.000027, - 0.000011, 0.000056, -0.000029, 0.000010, 0.000016, 0.000036, -0.000082, - 0.000016, -0.000021, -0.000021, 0.000016, 0.000047, 0.000045, -0.000013, - 0.000013, 0.000012, -0.000004, -0.000059, -0.000059, 0.000044, 0.000066, - 0.000044, 0.000035, -0.000018, -0.000001, -0.000001, -0.000005, 0.000127, - -0.000005, -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, -0.000001, - -0.000001, -0.000001, -0.000013, -0.000013, -0.000003, -0.000013, 0.000087, - -0.000021, -0.000012, -0.000027, 0.000056, 0.000011, 0.000010, -0.000029, - 0.000036, 0.000016, -0.000082, -0.000113, 0.000044, 0.000067, 0.000008, - -0.000007, -0.000003, -0.000003, -0.000013, -0.000044, 0.000137, -0.000044, - -0.000006, -0.000001, -0.000017, -0.000041, 0.000171, -0.000059, -0.000015, - -0.000027, -0.000002, -0.000002, -0.000001, -0.000004, -0.000020, -0.000034, - -0.000024, -0.000005, 0.000216, -0.000055, -0.000021, -0.000012, -0.000075, - 0.000041, -0.000125, 0.000041, 0.000130, -0.000013, 0.000011, -0.000029, - 0.000010, 0.000019, 0.000034, -0.000001, -0.000001, -0.000013, -0.000003, - 0.000036, -0.000010, -0.000006, -0.000059, -0.000059, 0.000044, 0.000066, - 0.000044, 0.000035, -0.000018, 0.000022, 0.000021, -0.000006, 0.000036, - 0.000044, -0.000113, 0.000069, 0.000035, -0.000007, -0.000001, -0.000001, - -0.000005, -0.000005, 0.000127, -0.000044, -0.000044, -0.000019, -0.000001, - -0.000059, -0.000017, 0.000171, -0.000041, -0.000015, -0.000027, -0.000161, - -0.000013, -0.000178, 0.000496, -0.000026, -0.000021, 0.000041, -0.000075, - -0.000125, 0.000041, 0.000130, -0.000013, -0.000003, -0.000003, -0.000044, - 0.000137, -0.000013, -0.000044, -0.000006, -0.000003, -0.000003, -0.000044, - -0.000013, 0.000137, -0.000044, -0.000006, 0.000050, 0.000036, -0.000010, - 0.000045, 0.000045, -0.000013, -0.000113, 0.000036, 0.000044, 0.000035, - 0.000069, -0.000007, 0.000071, 0.000173, -0.000018, -0.000007, -0.000007, - -0.000099, -0.000104, 0.000327, -0.000036, -0.000021, -0.000043, 0.000126, - -0.000052, -0.000032, -0.000133, -0.000007, 0.000099, 0.000155, -0.000010, - -0.000099, -0.000005, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, - -0.000044, -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, - -0.000044, -0.000006, -0.000015, -0.000006, -0.000006, -0.000055, 0.000285, - -0.000056, -0.000044, -0.000044, -0.000037, 0.000675, 0.000132, -0.000594, - -0.000070, -0.000072, -0.000007, 0.000202, -0.000010, -0.000021, -0.000095, - 0.000044, -0.000113, 0.000036, 0.000186, 0.000065, -0.000153, 0.000022, - 0.000021, -0.000006, -0.000006, -0.000006, -0.000055, 0.000285, -0.000015, - -0.000056, -0.000044, -0.000044, -0.000037, 0.000055, -0.000159, 0.000117, - 0.000056, 0.000154, -0.000011, 0.000022, 0.000021, -0.000006, -0.000059, - -0.000059, 0.000044, 0.000066, 0.000044, 0.000035, -0.000018, 0.000028, - 0.000019, -0.000071, -0.000005, 0.000100, 0.000071, -0.000060, 0.000176, - -0.000010, -0.000005, -0.000084, -0.000007, 0.000202, -0.000070, -0.000010, - -0.000072, -0.000021, 0.000045, 0.000045, -0.000013, 0.000035, 0.000036, - -0.000113, 0.000044, 0.000069, -0.000007, -0.000006, -0.000006, -0.000044, - -0.000044, -0.000055, 0.000285, -0.000056, -0.000015, -0.000037, -0.000091, - 0.000346, -0.000121, 0.000055, -0.000159, 0.000056, 0.000117, 0.000154, - -0.000011, -0.000007, -0.000007, -0.000099, 0.000327, -0.000104, -0.000036, - -0.000021, 0.000173, 0.000071, -0.000018, -0.000060, -0.000010, 0.000176, - -0.000005, -0.000084, -0.000007, -0.000070, -0.000010, -0.000072, 0.000202, - -0.000021, 0.000044, -0.000095, 0.000036, -0.000113, 0.000186, 0.000065, - -0.000153, 0.000010, 0.000011, -0.000029, 0.000019, 0.000034, -0.000001, - -0.000001, -0.000001, -0.000001, -0.000013, -0.000013, -0.000003, 0.000087, - -0.000013, -0.000021, -0.000012, -0.000043, 0.000126, -0.000052, -0.000032, - 0.000027, 0.000003, -0.000008, -0.000113, 0.000044, 0.000036, 0.000079, - 0.000089, -0.000012, 0.000013, 0.000012, -0.000004, -0.000005, -0.000044, - -0.000044, -0.000117, -0.000046, 0.000322, -0.000024, -0.000070, -0.000010, - 0.000202, -0.000072, -0.000007, -0.000021, -0.000113, 0.000044, 0.000036, - 0.000008, 0.000067, -0.000007, 0.000049, 0.000034, -0.000088, 0.000028, - 0.000055, -0.000005, 0.000045, 0.000045, -0.000013, 0.000008, 0.000036, - 0.000067, -0.000113, 0.000044, -0.000007, 0.000045, 0.000045, -0.000013, - 0.000036, 0.000044, 0.000035, -0.000113, 0.000069, -0.000007, 0.000036, - -0.000113, -0.000080, -0.000010, 0.000236, -0.000080, -0.000010, 0.000032, - 0.000035, -0.000113, 0.000036, 0.000069, 0.000044, -0.000007, 0.000008, - 0.000044, -0.000113, 0.000036, 0.000067, -0.000007, -0.000065, -0.000009, - 0.000213, -0.000005, -0.000021, -0.000078, -0.000027, 0.000036, -0.000029, - 0.000010, 0.000011, 0.000056, 0.000016, -0.000082, -0.000020, 0.000013, - 0.000022, -0.000013, -0.000002, 0.000173, 0.000071, -0.000018, -0.000003, - -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000003, - -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, -0.000004, - -0.000029, -0.000029, 0.000099, -0.000005, -0.000010, -0.000007, -0.000027, - 0.000010, 0.000011, 0.000036, -0.000029, 0.000056, 0.000016, -0.000082, - -0.000043, 0.000126, -0.000052, -0.000032, -0.000071, -0.000005, 0.000100, - 0.000071, 0.000161, -0.000161, -0.000216, -0.000161, -0.000026, -0.000017, - -0.000013, -0.000005, 0.000530, -0.000026, -0.000024, -0.000002, -0.000008, - 0.000028, 0.000118, -0.000063, 0.000016, -0.000078, -0.000005, -0.000001, - -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, -0.000019, -0.000043, - 0.000047, -0.000005, -0.000015, -0.000055, -0.000056, -0.000044, -0.000006, - 0.000285, -0.000044, -0.000006, -0.000037, 0.000143, 0.000078, -0.000022, - -0.000002, -0.000008, 0.000016, -0.000063, 0.000028, 0.000118, -0.000078, - -0.000005, -0.000012, 0.000092, -0.000046, -0.000015, -0.000013, -0.000027, - 0.000036, 0.000011, 0.000056, -0.000029, 0.000016, 0.000010, -0.000082, - 0.000021, -0.000043, 0.000028, 0.000027, 0.000003, -0.000012, 0.000092, - -0.000046, -0.000015, -0.000013, 0.000034, 0.000011, -0.000029, 0.000019, - 0.000010, -0.000001, 0.000021, -0.000043, 0.000028, 0.000027, 0.000003, - -0.000161, 0.000161, 0.000161, -0.000216, -0.000178, -0.000216, 0.000522, - -0.000026, -0.000021, -0.000021, 0.000016, -0.000021, 0.000047, 0.000045, - 0.000016, -0.000013, 0.000012, 0.000013, -0.000004, -0.000059, -0.000059, - 0.000044, 0.000066, 0.000035, 0.000044, -0.000018, -0.000059, -0.000059, - 1.000000, 0.000044, -0.000063, 0.000024, -0.000063, 0.000024, -0.000063, - 0.000024, -0.000063, -0.000063, 0.000024, 0.000024, 0.000024, -0.000063, - -0.000063, 0.000024, 0.000024, 0.000024, 0.000024, -0.000063, -0.000063, - 0.000024, 0.000024, -0.000063, -0.000063, 0.000024, -0.000063, -0.000063, - 0.000075, 0.000024, -0.000063, 0.000044, 0.000386, -0.000043, 0.000126, - -0.000052, -0.000032, -0.000005, -0.000044, -0.000044, 0.000322, -0.000046, - -0.000117, -0.000024, 0.000044, 0.000079, -0.000113, 0.000089, 0.000036, - -0.000012, 0.000044, 0.000079, -0.000113, 0.000036, 0.000089, -0.000012, - 0.000013, 0.000012, -0.000004, -0.000005, -0.000044, -0.000046, -0.000044, - -0.000117, 0.000322, -0.000024, 0.000028, 0.000019, -0.000005, -0.000060, - 0.000176, -0.000010, -0.000084, -0.000012, 0.000092, -0.000015, -0.000046, - -0.000013, 0.000021, -0.000043, 0.000027, 0.000028, 0.000003, 0.000011, - 0.000034, -0.000029, 0.000010, 0.000019, -0.000001, -0.000006, 0.000164, - -0.000009, -0.000084, 0.000049, -0.000100, 0.000057, 0.000032, -0.000012, - 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, 0.000027, - 0.000028, 0.000003, -0.000027, 0.000011, 0.000056, 0.000036, -0.000029, - 0.000010, 0.000016, -0.000082, -0.000007, 0.000202, -0.000010, -0.000072, - -0.000070, -0.000021, 0.000027, 0.000003, -0.000008, 0.000079, 0.000044, - -0.000113, 0.000036, 0.000089, -0.000012, 0.000022, 0.000021, -0.000006, - 1.000000, 0.000024, -0.000063, 0.000005, 0.000028, -0.000002, -0.000005, - -0.000029, 0.000077, -0.000021, -0.000012, 1.000000, -0.000003, -0.000003, - -0.000044, 0.000137, -0.000044, -0.000013, -0.000006, 0.000016, -0.000063, - 0.000028, 0.000054, 0.000049, -0.000005, -0.000046, -0.000015, -0.000012, - 0.000092, -0.000013, -0.000005, -0.000044, -0.000044, 0.000322, -0.000046, - -0.000117, -0.000024, 0.000044, 0.000079, -0.000113, 0.000089, 0.000036, - -0.000012, 0.000044, -0.000095, 0.000186, -0.000113, 0.000036, 0.000065, - -0.000153, 0.000010, -0.000029, 0.000011, 0.000019, 0.000034, -0.000001, - -0.000001, -0.000001, -0.000013, -0.000013, -0.000021, -0.000003, 0.000070, - -0.000012, -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, - -0.000043, 0.000027, 0.000028, 0.000003, -0.000027, 0.000011, 0.000036, - 0.000056, -0.000029, 0.000010, 0.000016, -0.000082, 0.000045, 0.000045, - -0.000013, 0.000022, 0.000021, -0.000006, 0.000036, -0.000113, -0.000080, - 0.000236, -0.000080, -0.000010, -0.000010, 0.000032, 0.000035, -0.000113, - 0.000069, 0.000044, 0.000036, -0.000007, 0.000008, 0.000044, -0.000113, - 0.000036, 0.000067, -0.000007, 0.000022, 0.000021, -0.000006, -0.000059, - -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, - -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, - -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, - -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, - -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, - -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, - -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, - -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, - -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, 1.000000, 0.000044, - 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, - 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000024, 0.000044, - 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, - 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, - 0.000044, 0.000044, 0.000044, -0.000063, 0.001236, 0.000044, 0.000283, - 0.000028, -0.000006, -0.000063, 0.000164, -0.000052, -0.000009, -0.000021, - -0.000040, 0.000086, -0.000029, 0.000049, 0.000034, -0.000100, 0.000036, - 0.000057, -0.000002, -0.000006, -0.000009, -0.000063, -0.000052, 0.000164, - -0.000021, 0.000710, 0.000139, -0.000636, 0.000036, -0.000113, 0.000044, - 0.000067, 0.000008, -0.000007, -0.000003, -0.000003, -0.000044, -0.000013, - -0.000044, 0.000137, -0.000006, 0.000045, 0.000045, -0.000013, -0.000113, - 0.000044, 0.000035, 0.000036, 0.000069, -0.000007, -0.000009, 0.000164, - -0.000006, -0.000084, 0.000049, -0.000063, 0.000054, 0.000023, -0.000003, - -0.000003, -0.000044, -0.000044, 0.000137, -0.000013, -0.000006, -0.000003, - -0.000003, -0.000044, -0.000044, -0.000013, 0.000137, -0.000006, -0.000012, - 0.000092, -0.000046, -0.000015, -0.000013, -0.000027, 0.000056, 0.000011, - 0.000036, -0.000029, 0.000016, 0.000010, -0.000082, 0.000021, -0.000043, - 0.000028, 0.000027, 0.000003, 0.000036, 0.000044, -0.000113, 0.000067, - 0.000008, -0.000007, -0.000003, -0.000003, -0.000044, -0.000013, -0.000044, - 0.000137, -0.000006, -0.000012, 0.000092, -0.000046, -0.000015, -0.000013, - 0.000011, 0.000034, -0.000029, 0.000019, 0.000010, -0.000001, 0.000021, - -0.000043, 0.000028, 0.000027, 0.000003, -0.000005, -0.000044, -0.000044, - 0.000322, -0.000117, -0.000046, -0.000024, 0.000044, 0.000186, -0.000095, - -0.000113, 0.000065, 0.000036, -0.000153, 0.000079, 0.000044, -0.000113, - 0.000036, 0.000089, -0.000012, 1.000000, -0.000000, -0.000025, -0.000025, - -0.000025, -0.000043, -0.000069, -0.000043, -0.000069, -0.000025, -0.000020, - -0.000020, -0.000000, -0.000025, -0.000020, -0.000043, -0.000067, -0.000067, - -0.000025, -0.000025, -0.000013, -0.000025, -0.000000, -0.000000, -0.000020, - -0.000020, -0.000020, -0.000067, -0.000054, -0.000054, -0.000020, -0.000054, - -0.000020, -0.000117, -0.000020, -0.000020, -0.000054, -0.000054, 0.000008, - -0.000000, -0.000020, 0.000046, -0.000043, -0.000020, 0.000046, -0.000020, - -0.000043, -0.000020, -0.000043, -0.000067, 0.000015, -0.000043, -0.000067, - -0.000025, -0.000117, -0.000067, -0.000025, -0.000054, -0.000020, -0.000069, - -0.000043, -0.000000, -0.000000, -0.000054, -0.000054, -0.000025, 0.000030, - -0.000020, -0.000020, -0.000000, -0.000014, -0.000020, -0.000020, -0.000020, - -0.000054, -0.000043, -0.000054, 0.000036, -0.000020, -0.000020, -0.000043, - -0.000067, -0.000000, -0.000025, -0.000074, -0.000020, -0.000020, -0.000043, - -0.000067, -0.000115, 0.000036, -0.000043, -0.000054, -0.000054, -0.000020, - -0.000000, -0.000020, -0.000050, -0.000025, -0.000000, 0.000030, -0.000050, - -0.000020, -0.000040, -0.000054, -0.000000, -0.000040, -0.000028, -0.000020, - -0.000025, 0.000030, -0.000025, -0.000069, -0.000007, -0.000123, 0.000009, - -0.000025, -0.000020, -0.000043, -0.000020, -0.000054, -0.000043, -0.000054, - -0.000000, -0.000025, -0.000020, -0.000054, -0.000025, -0.000065, -0.000043, - -0.000025, -0.000069, -0.000054, -0.000025, -0.000043, -0.000020, -0.000025, - 0.000015, 0.000008, -0.000025, -0.000012, -0.000014, -0.000067, -0.000020, - 0.000046, -0.000000, -0.000043, -0.000477, -0.000117, -0.000025, -0.000025, - 0.000019, -0.000017, -0.000025, -0.000067, -0.000020, -0.000020, -0.000053, - -0.000020, -0.000025, -0.000014, -0.000020, -0.000020, -0.000014, -0.000043, - 0.000010, -0.000085, -0.000020, -0.000020, 0.000028, 0.000008, -0.000025, - -0.000033, 0.000009, -0.000000, -0.000477, -0.000000, -0.000043, 0.000036, - -0.000043, -0.000025, -0.000025, 0.000015, -0.000040, -0.000028, 0.000046, - -0.000074, -0.000085, -0.000053, -0.000040, -0.000000, -0.000028, -0.000043, - -0.000115, -0.000054, -0.000043, -0.000040, -0.000028, -0.000043, -0.000054, - -0.000053, -0.000025, -0.000000, -0.000000, -0.000000, 0.000009, -0.000014, - -0.000000, -0.000067, -0.000067, -0.000000, -0.000025, -0.000043, -0.000020, - -0.000020, 0.000001, -0.000054, -0.000085, -0.000043, 0.000009, -0.000043, - -0.000074, -0.000053, 0.000030, 0.000030, -0.000014, -0.000069, -0.000025, - 0.000015, -0.000020, -0.000115, -0.000020, -0.000053, -0.000020, -0.000025, - -0.000069, 0.000008, -0.000053, -0.000043, 0.000009, -0.000043, -0.000085, - -0.000054, 0.000030, -0.000025, -0.000054, 0.000030, 0.000030, -0.000014, - -0.000123, -0.000025, 0.000015, 0.000015, -0.000043, 0.000009, -0.000000, - -0.000085, 0.000046, -0.000025, -0.000025, -0.000289, -0.000043, -0.000037, - -0.000014, -0.000020, -0.000020, -0.000025, 0.000030, -0.000040, -0.000054, - -0.000067, 0.000030, -0.000020, -0.000020, 0.000046, -0.000028, -0.000067, - -0.000067, -0.000043, 0.000009, -0.000025, -0.000020, 0.000046, -0.000115, - -0.000054, -0.000000, -0.000025, -0.000033, -0.000054, -0.000054, -0.000000, - -0.000020, -0.000000, -0.000054, -0.000000, -0.000020, -0.000020, -0.000054, - 0.000030, -0.000117, -0.000043, -0.000013, -0.000025, -0.000054, -0.000043, - 0.000009, -0.000057, 0.000009, 0.000046, -0.000043, -0.000040, -0.000000, - -0.000000, -0.000019, -0.000000, -0.000053, -0.000020, -0.000020, -0.000043, - 0.000015, 0.000008, -0.000026, 0.000046, -0.000054, 0.000030, 0.000030, - -0.000053, 0.000046, -0.000011, -0.000062, -0.000067, -0.000085, -0.000038, - -0.000025, -0.000011, -0.000020, -0.000025, 0.000036, -0.000043, 0.000046, - 0.000009, 0.000028, -0.000020, 0.000008, -0.000053, -0.000025, -0.000053, - -0.000020, -0.000020, -0.000008, -0.000043, 0.000009, -0.000513, -0.000028, - -0.000020, -0.000074, -0.000289, -0.000085, -0.000062, 0.000015, -0.000115, - -0.000014, -0.000013, -0.000040, -0.000028, -0.000062, -0.000074, -0.000053, - 0.000046, -0.000040, -0.000028, 0.000036, -0.000043, -0.000025, -0.000121, - -0.000000, -0.000121, -0.000043, -0.000000, 0.000030, -0.000028, -0.000054, - -0.000000, -0.000043, -0.000020, -0.000121, -0.000026, -0.000053, -0.000020, - -0.000020, -0.000061, -0.000007, -0.000000, -0.000025, -0.000000, -0.000043, - -0.000000, -0.000020, -0.000000, 0.000015, -0.000045, 0.000046, -0.000054, - -0.000043, -0.000289, -0.000069, 0.000015, 0.000008, -0.000053, 0.000046, - 0.000046, -0.000025, -0.000098, -0.000014, -0.000000, -0.000067, -0.000043, - 0.000009, -0.000000, -0.000040, -0.000054, -0.000000, -0.000054, -0.000043, - 0.000009, 0.000030, 0.000030, -0.000043, 0.000028, -0.000115, -0.000054, - -0.000020, -0.000020, -0.000053, -0.000040, -0.000054, -0.000020, 0.000046, - 0.000009, -0.000020, -0.000020, -0.000054, -0.000013, -0.000025, -0.000054, - -0.000025, -0.000053, -0.000074, -0.000054, -0.000026, -0.000008, 0.000015, - -0.000043, -0.000025, -0.000000, -0.000000, -0.000000, -0.000020, -0.000000, - -0.000011, 0.000009, -0.000020, 0.000015, 0.000008, -0.000020, -0.000115, - 0.000036, -0.000014, 0.000002, -0.000085, -0.000020, -0.000121, -0.000121, - -0.000057, 0.000036, -0.000121, 0.000036, -0.000043, -0.000040, -0.000028, - -0.000054, 0.000030, -0.000054, -0.000074, -0.000008, 0.000008, -0.000054, - 0.000030, -0.000085, -0.000121, -0.000121, -0.000020, -0.000014, -0.000020, - -0.000008, -0.000054, -0.000074, -0.000043, 0.000009, 0.000028, -0.000062, - -0.000025, 0.000046, -0.000025, -0.000053, -0.000048, -0.000043, -0.000085, - 0.000015, -0.000062, -0.000062, -0.000011, 0.000009, -0.000000, -0.000000, - -0.000020, -0.000067, -0.000020, -0.000054, -0.000013, -0.000000, -0.000000, - -0.000000, -0.000006, -0.000013, 0.000022, 0.000008, 0.000030, -0.000043, - -0.000020, 0.000046, -0.000020, -0.000013, -0.000067, -0.000040, -0.000054, - 0.000046, -0.000028, -0.000054, -0.000043, 0.000173, -0.000054, 0.000030, - -0.000050, 0.000009, 0.000015, -0.000030, -0.000074, -0.000043, 0.000009, - -0.000000, -0.000000, -0.000067, -0.000000, -0.000000, -0.000085, -0.000020, - -0.000020, -0.000025, -0.000289, -0.000069, -0.000025, -0.000053, -0.000057, - -0.000026, -0.000074, 0.000046, -0.000040, -0.000028, 0.000015, -0.000054, - 0.000030, -0.000020, 0.000046, -0.000040, -0.000054, -0.000054, -0.000085, - 0.000046, 0.000009, -0.000054, -0.000054, 0.000030, 0.000022, 0.000008, - -0.000117, -0.000020, -0.000121, -0.000013, -0.000054, 0.000030, 0.000036, - -0.000040, -0.000028, -0.000020, -0.000025, 0.000022, 0.000030, 0.000030, - 0.000028, -0.000020, -0.000074, -0.000008, 0.000008, -0.000053, -0.000043, - -0.000028, 0.000036, -0.000025, -0.000014, 0.000008, -0.000020, -0.000013, - -0.000025, -0.000033, -0.000477, -0.000043, 0.000009, 0.000030, -0.000054, - -0.000054, -0.000043, -0.000115, -0.000020, -0.000020, -0.000013, -0.000069, - -0.000054, -0.000025, -0.000053, 0.000036, -0.000043, 0.000028, -0.000026, - 0.000022, -0.000065, -0.000115, -0.000053, 0.000046, -0.000053, 0.000036, - -0.000043, 0.000030, 0.000030, -0.000062, -0.000011, -0.000020, -0.000067, - 0.000028, -0.000020, -0.000054, -0.000057, -0.000008, -0.000025, -0.000020, - 0.000028, -0.000020, -0.000011, -0.000067, -0.000043, 0.000009, -0.000078, - -0.000001, -0.000115, -0.000054, -0.000013, -0.000053, 0.000046, -0.000020, - -0.000013, -0.000121, 0.000015, 0.000008, -0.000025, -0.000053, 0.000010, - -0.000045, -0.000020, -0.000043, -0.000121, -0.000121, 0.000036, -0.000043, - 0.000030, 0.000030, 0.000030, 0.000030, -0.000289, -0.000085, -0.000020, - 0.000030, 0.000030, -0.000085, -0.000020, -0.000020, -0.000033, -0.000121, - 0.000008, 0.000036, -0.000121, -0.000115, -0.000025, -0.000117, 0.000009, - -0.000074, -0.000020, -0.000020, -0.000020, 0.000046, -0.000013, -0.000054, - -0.000000, -0.000054, -0.000043, 0.000009, -0.000074, -0.000026, -0.000121, - 0.000030, 0.000030, -0.000020, 0.000046, -0.000043, -0.000074, -0.000008, - 0.000009, -0.000020, -0.000020, 0.000028, -0.000025, 0.000036, -0.000043, - 0.000046, -0.000025, -0.000053, 0.000015, 0.000008, -0.000025, -0.000020, - 0.000030, -0.000085, -0.000040, -0.000054, -0.000054, -0.000000, -0.000028, - -0.000054, -0.000123, -0.000043, 0.000009, -0.000123, -0.000513, -0.000123, - -0.000513, -0.000123, -0.000123, -0.000013, 0.000015, -0.000123, -0.000513, - -0.000123, 0.000030, 0.000030, -0.000014, -0.000045, -0.000008, -0.000000, - -0.000020, 0.000028, -0.000020, -0.000020, -0.000013, -0.000123, -0.000513, - 0.000002, -0.000040, -0.000054, 0.000046, -0.000028, -0.000020, -0.000289, - -0.000000, 0.000028, -0.000020, -0.000008, -0.000050, -0.000033, 0.000028, - -0.000011, -0.000011, -0.000067, -0.000026, -0.000115, -0.000054, -0.000043, - 0.000046, -0.000062, -0.000043, 0.000028, 0.000009, 0.000036, -0.000121, - -0.000025, -0.000053, -0.000020, -0.000020, -0.000020, -0.000013, 0.000015, - 0.000008, -0.000025, -0.000043, 0.000009, -0.000054, -0.000043, 0.000036, - -0.000043, -0.000013, -0.000054, 0.000030, -0.000054, 0.000008, -0.000053, - -0.000020, 0.000022, -0.000038, -0.000121, 0.000036, -0.000043, -0.000038, - -0.000115, -0.000033, -0.000121, -0.000053, 0.000046, 0.000028, -0.000008, - -0.000062, -0.000025, -0.000033, -0.000006, 0.000015, 0.000008, -0.000000, - -0.000000, -0.000054, 0.000030, -0.000054, 0.000030, -0.000000, -0.000014, - -0.000020, -0.000000, -0.000014, -0.000020, -0.000013, 0.000015, -0.000025, - -0.000326, 0.000002, -0.000054, 0.000030, 0.000028, -0.000020, -0.000013, - 0.000046, -0.000043, -0.000121, -0.000053, 0.000046, 0.000030, 0.000030, - 0.000046, 0.000028, -0.000121, -0.000043, -0.000067, -0.000062, -0.000008, - -0.000121, -0.000040, -0.000025, -0.000013, -0.000115, 0.000015, 0.000008, - -0.000040, -0.000028, -0.000054, -0.000025, -0.000053, -0.000040, -0.000054, - -0.000011, -0.000045, -0.000085, 0.000046, -0.000000, -0.000054, -0.000006, - -0.000054, 0.000030, -0.000043, -0.000054, 0.000022, -0.000106, -0.000115, - -0.000025, -0.000045, -0.000011, -0.000062, -0.000038, 0.000009, -0.000054, - -0.000028, 0.000036, -0.000000, -0.000020, -0.000011, -0.000000, -0.000020, - -0.000043, 0.000009, -0.000513, -0.000043, -0.000043, -0.000008, -0.000020, - -0.000020, -0.000289, -0.000069, -0.000067, -0.000040, -0.000054, 0.000046, - -0.000054, -0.000028, 0.000036, 0.000015, -0.000105, 0.000046, 0.000011, - -0.000014, -0.000020, -0.000013, -0.000053, 0.000046, -0.000000, -0.000067, - -0.000067, -0.000062, -0.000008, -0.000000, -0.000067, -0.000289, -0.000069, - 0.000036, -0.000008, -0.000025, -0.000115, 0.000599, -0.000040, -0.000028, - -0.000011, -0.000062, -0.000025, -0.000053, -0.000028, 0.000036, 0.000036, - -0.000121, -0.000043, 0.000009, 0.000036, -0.000121, -0.000040, -0.000028, - -0.000043, 0.000028, -0.000053, 0.000046, -0.000477, -0.000117, 0.000046, - -0.000054, -0.000289, -0.000085, 0.000046, -0.000054, -0.000025, -0.000053, - -0.000477, -0.000043, 0.000028, -0.000008, -0.000020, -0.000043, 0.000028, - 0.000028, -0.000011, -0.000020, -0.000043, -0.000011, -0.000062, -0.000013, - 0.000008, -0.000053, -0.000053, 0.000015, 0.000008, 0.000008, -0.000053, - -0.000000, -0.000020, -0.000011, -0.000050, -0.000008, -0.000000, -0.000020, - -0.000008, -0.000057, -0.000008, -0.000121, -0.000030, 0.000015, -0.000025, - 0.000030, 0.000030, 0.000008, -0.000053, -0.000033, -0.000011, -0.000045, - -0.000029, -0.000000, -0.000289, -0.000054, -0.000000, -0.000020, -0.000013, - -0.000074, -0.000008, -0.000000, -0.000014, -0.000020, 0.000009, -0.000000, - -0.000014, -0.000020, -0.000033, -0.000043, 0.000002, -0.000054, 0.000030, - 0.000046, -0.000054, -0.000043, -0.000074, -0.000085, -0.000020, -0.000002, - -0.000020, -0.000513, -0.000054, -0.000043, -0.000121, 0.000030, 0.000030, - 0.000001, -0.000045, 0.000036, -0.000043, 0.000009, -0.000020, -0.000020, - -0.000040, -0.000028, -0.000011, -0.000062, -0.000025, -0.000053, 0.000046, - -0.000033, -0.000054, 0.000030, -0.000012, 0.000056, -0.000040, -0.000028, - -0.000513, -0.000123, 0.000036, -0.000513, -0.000025, -0.000043, -0.000115, - -0.000054, -0.000043, -0.000025, -0.000033, -0.000000, -0.000000, -0.000000, - -0.000008, -0.000020, 0.000046, -0.000000, -0.000011, -0.000020, 0.000015, - 0.000008, -0.000013, 0.000015, 0.000009, -0.000020, -0.000020, -0.000115, - -0.000054, -0.000043, 0.000036, -0.000043, 0.000002, 0.000015, 0.000036, - -0.000121, -0.000000, -0.000000, -0.000067, 0.001219, 0.000036, 0.000027, - -0.000121, -0.000000, -0.000000, -0.000067, -0.000025, -0.000033, -0.000121, - 0.000008, -0.000053, -0.000053, -0.000000, -0.000020, -0.000000, -0.000020, - -0.000008, -0.000013, 0.000015, -0.000054, 0.000036, -0.000043, -0.000062, - -0.000008, -0.000054, 0.000030, 0.000022, -0.000038, -0.000115, -0.000011, - -0.000045, -0.000054, 0.000009, -0.000028, 0.000036, 0.000022, -0.000065, - -0.000115, 0.000030, 0.000030, 0.000025, -0.000040, -0.000028, -0.000050, - -0.000008, -0.000121, -0.000014, 0.000030, 0.000030, -0.000477, -0.000117, - -0.000115, -0.000033, -0.000020, -0.000477, -0.000085, -0.000020, -0.000054, - -0.000025, 0.000046, -0.000115, -0.000040, -0.000028, -0.000289, -0.000337, - -0.000085, -0.000008, -0.000050, 0.000046, -0.000115, -0.000033, -0.000011, - -0.000062, -0.000121, -0.000025, -0.000053, 0.000015, 0.000008, 0.000046, - -0.000054, -0.000043, -0.000040, -0.000054, -0.000040, -0.000028, -0.000000, - -0.000028, -0.000054, -0.000074, -0.000008, -0.000020, -0.000037, 0.000030, - 0.000030, -0.000057, -0.000008, -0.000121, 0.000000, -0.000000, 0.000036, - -0.000043, -0.000289, -0.000064, -0.000000, -0.000000, -0.000008, -0.000020, - -0.000000, -0.000011, -0.000020, -0.000048, -0.000013, 0.000015, -0.000054, - 0.001682, -0.000121, 0.000008, -0.000053, -0.000053, 0.000015, 0.000008, - -0.000025, 0.000046, -0.000000, -0.000020, -0.000011, -0.000513, -0.000043, - -0.000000, -0.000020, -0.000008, -0.000115, -0.000025, -0.000053, 0.000009, - 0.000096, -0.000074, 0.001684, 0.000030, -0.000043, 0.000028, 0.000008, - -0.000053, -0.000033, -0.000011, -0.000062, 0.000028, -0.000020, -0.000008, - -0.000040, -0.000020, -0.000000, -0.000028, -0.000054, -0.000020, 0.005404, - -0.000477, -0.000085, -0.000043, -0.000513, -0.000123, -0.000054, 0.000030, - -0.000040, -0.000028, -0.000513, -0.000043, 0.000030, 0.000030, 0.000028, - -0.000008, -0.000020, -0.000054, 0.000030, 0.000028, -0.000011, -0.000020, - 0.000008, -0.000033, -0.000053, 1.000000, 0.035299}; + 0.000018, -0.000001, 0.000013, -0.000013, -0.000017, 0.000048, -0.000046, 0.000034, -0.000034, 0.000018, + -0.000001, -0.000013, 0.000013, -0.000017, 0.000028, -0.000021, -0.000005, 0.000028, -0.000020, 0.000020, + -0.000001, 0.000048, -0.000046, 0.000034, -0.000034, 0.000078, -0.000004, 0.000055, -0.000074, -0.000055, + 0.000048, -0.000046, 0.000034, -0.000034, 0.000270, -0.000015, -0.000074, 0.000055, -0.000099, -0.000099, + 0.000034, -0.000002, -0.000032, 0.000024, -0.000024, 0.000028, -0.000020, -0.000001, 0.000020, 0.000044, + -0.000017, -0.000078, 0.000042, 0.000029, -0.000029, 0.000018, -0.000001, -0.000017, 0.000013, -0.000013, + 0.000028, -0.000021, -0.000005, 0.000018, -0.000001, 0.000013, -0.000013, -0.000017, 0.000018, -0.000001, + -0.000017, -0.000013, 0.000013, 0.000018, -0.000001, -0.000017, 0.000013, -0.000013, 0.000018, -0.000001, + -0.000017, -0.000013, 0.000013, 0.000018, -0.000001, 0.000013, -0.000013, -0.000017, 0.000028, -0.000021, + -0.000005, -0.000017, 0.000134, -0.000007, -0.000127, -0.000047, 0.000047, 0.000060, -0.000047, 0.000028, + -0.000021, -0.000005, -0.000074, 0.000270, -0.000015, 0.000055, -0.000099, -0.000099, 0.000124, -0.000059, + -0.000059, -0.000044, 0.000044, -0.000003, -0.000044, 0.000041, 0.000018, -0.000001, -0.000013, -0.000017, + 0.000013, 0.000034, -0.000002, -0.000024, 0.000024, -0.000032, 0.000028, 0.000020, -0.000020, -0.000001, + 0.000028, -0.000020, 0.000020, -0.000001, 0.000028, -0.000021, -0.000005, 0.000034, -0.000002, -0.000032, + 0.000024, -0.000024, 0.000018, -0.000001, -0.000013, -0.000017, 0.000013, 0.000018, -0.000001, -0.000017, + -0.000013, 0.000013, 0.000048, -0.000046, 0.000034, -0.000034, 0.000098, -0.000017, -0.000175, 0.000078, + 0.000065, -0.000065, 0.000028, -0.000020, -0.000001, 0.000020, 0.000018, -0.000001, -0.000017, -0.000013, + 0.000013, 0.000044, -0.000017, 0.000029, 0.000042, -0.000078, -0.000029, 0.000028, -0.000001, 0.000020, + -0.000020, 0.000124, -0.000059, -0.000059, -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, 0.000065, + -0.000021, -0.000033, 0.000028, -0.000020, 0.000020, -0.000001, 0.000065, -0.000021, -0.000033, 0.000065, + -0.000021, -0.000033, 0.000078, -0.000004, 0.000055, -0.000074, -0.000055, 0.000024, 0.000017, -0.000023, + -0.000017, 0.000048, -0.000002, 0.000034, -0.000034, -0.000046, 0.000044, -0.000017, 0.000029, -0.000078, + 0.000042, -0.000029, 0.000098, -0.000175, -0.000017, 0.000078, 0.000065, -0.000065, 0.000024, 0.000017, + -0.000023, -0.000017, 0.000124, -0.000059, -0.000059, -0.000003, -0.000044, 0.000044, -0.000044, 0.000041, + 0.000028, -0.000021, -0.000005, 0.000018, -0.000001, -0.000013, -0.000017, 0.000013, 0.000048, -0.000046, + 0.000034, -0.000034, 0.000298, -0.000027, -0.000266, 0.000120, 0.000099, -0.000209, -0.000017, 0.000185, + -0.000175, -0.000009, 0.000078, 0.000065, -0.000065, -0.000065, 0.000024, -0.000023, 0.000017, -0.000017, + 0.000024, -0.000017, -0.000023, 0.000017, 0.000028, -0.000020, -0.000001, 0.000020, 0.000018, -0.000001, + 0.000013, -0.000013, -0.000017, -0.000046, 0.000283, -0.000266, -0.000015, 0.000099, 0.000134, -0.000099, + -0.000099, -0.000017, -0.000009, 0.000185, -0.000065, 0.000078, -0.000175, 0.000065, -0.000065, 0.000298, + -0.000027, -0.000266, 0.000120, 0.000099, -0.000209, 0.000028, -0.000021, -0.000005, 0.000028, 0.000020, + -0.000020, -0.000001, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, + -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000048, -0.000046, 0.000034, -0.000034, + 0.000024, -0.000017, -0.000023, 0.000017, 0.000028, -0.000020, 0.000020, -0.000001, 0.000124, -0.000003, + -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000028, -0.000021, -0.000005, 0.000048, -0.000046, + 0.000034, -0.000034, 0.000028, -0.000021, -0.000005, 0.000018, -0.000001, 0.000013, -0.000017, -0.000013, + -0.000001, 0.000018, -0.000013, 0.000013, -0.000017, 0.000028, -0.000001, 0.000020, -0.000020, -0.000027, + -0.000027, 0.000298, -0.000266, 0.000099, 0.000120, -0.000209, 0.000028, -0.000020, -0.000001, 0.000020, + 0.000018, -0.000001, -0.000013, -0.000017, 0.000013, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, + -0.000059, -0.000059, 0.000028, -0.000020, 0.000020, -0.000001, 0.000065, -0.000021, -0.000033, 0.000124, + -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, -0.000059, -0.000059, -0.000044, + 0.000044, -0.000003, -0.000044, 0.000041, 0.000124, -0.000059, -0.000059, -0.000044, 0.000044, -0.000003, + -0.000044, 0.000041, 0.000024, 0.000017, -0.000023, -0.000017, 0.000028, -0.000001, -0.000027, -0.000020, + 0.000020, 0.000048, -0.000046, 0.000034, -0.000034, 0.000048, -0.000046, 0.000034, -0.000034, 0.000078, + -0.000004, 0.000055, -0.000055, -0.000074, -0.000074, 0.000270, -0.000015, 0.000055, -0.000099, -0.000099, + 0.000034, -0.000002, -0.000024, 0.000024, -0.000032, -0.000032, 0.000283, -0.000266, -0.000015, -0.000099, + 0.000123, 0.000099, -0.000099, 0.000028, 0.000020, -0.000020, -0.000001, 0.000298, -0.000266, -0.000027, + 0.000099, 0.000120, -0.000209, 0.000024, -0.000017, -0.000023, 0.000017, 0.000065, -0.000021, -0.000033, + -0.000074, 0.000270, -0.000015, -0.000099, 0.000055, -0.000099, 0.000124, -0.000003, -0.000003, -0.000059, + -0.000059, 0.000044, -0.000044, 0.000028, 0.000020, -0.000020, -0.000001, 0.000124, -0.000059, -0.000059, + -0.000044, 0.000044, -0.000003, -0.000044, 0.000041, 0.000048, -0.000046, -0.000034, 0.000034, 0.000048, + -0.000046, 0.000034, -0.000034, 0.000098, -0.000175, -0.000017, 0.000065, 0.000078, -0.000065, 0.000024, + 0.000017, -0.000023, -0.000017, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, + 0.000065, -0.000021, -0.000033, 0.000034, -0.000002, -0.000024, 0.000024, -0.000032, 0.000048, -0.000046, + 0.000034, -0.000034, 0.000048, -0.000046, 0.000034, -0.000034, 0.000065, -0.000021, -0.000033, 0.000124, + -0.000003, -0.000003, -0.000059, -0.000059, 0.000044, -0.000044, 0.000124, -0.000059, -0.000059, -0.000044, + -0.000003, 0.000044, -0.000044, 0.000041, 0.000124, -0.000059, -0.000059, -0.000044, -0.000003, 0.000044, + -0.000044, 0.000041, 0.000138, -0.000095, -0.000099, 0.000071, 0.000048, -0.000046, -0.000034, 0.000034, + 0.000124, -0.000003, -0.000003, -0.000059, -0.000059, 0.000044, -0.000044, 0.000028, -0.000021, -0.000005, + 0.000028, -0.000021, -0.000005, 0.000028, -0.000021, -0.000005, 0.000124, -0.000003, -0.000003, -0.000059, + -0.000059, 0.000044, -0.000044, 0.000048, -0.000046, 0.000034, -0.000034, 0.000124, -0.000003, -0.000003, + 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, + -0.000059, 0.000138, -0.000095, 0.000071, -0.000099, 0.000028, -0.000021, -0.000005, 0.000124, -0.000003, + -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000065, -0.000021, -0.000033, 0.000124, -0.000003, + -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000028, -0.000021, -0.000005, 0.000071, -0.000127, + -0.000017, 0.000047, 0.000060, -0.000047, 0.000028, 0.000020, -0.000020, -0.000001, 0.000048, -0.000046, + 0.000034, -0.000034, -0.000266, 0.000283, -0.000015, -0.000046, -0.000099, -0.000099, 0.000134, 0.000099, + -0.000032, -0.000015, -0.000266, 0.000283, -0.000099, 0.000123, 0.000099, -0.000099, 0.000241, -0.000013, + 0.000161, -0.000161, -0.000432, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, + 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000048, -0.000046, 0.000034, + -0.000034, -0.000059, -0.000059, 0.000124, -0.000044, 0.000044, -0.000003, -0.000044, 0.000041, -0.000059, + -0.000059, 0.000124, -0.000044, 0.000044, -0.000003, -0.000044, 0.000041, 0.000028, -0.000020, 0.000020, + -0.000001, 0.000138, -0.000095, -0.000099, 0.000071, 0.000028, -0.000021, -0.000005, 0.000124, -0.000059, + -0.000059, -0.000003, 0.000044, -0.000044, -0.000003, 0.000124, -0.000003, -0.000003, -0.000059, 0.000044, + -0.000044, -0.000059, -0.000044, 0.000044, -0.000059, 0.000124, -0.000059, -0.000044, -0.000003, 0.000044, + -0.000044, 0.000041, -0.000059, 0.000124, -0.000059, -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, + 0.000065, -0.000021, -0.000033, 0.000138, -0.000095, -0.000099, 0.000071, 0.000048, -0.000046, 0.000034, + -0.000034, 0.000028, -0.000001, 0.000020, -0.000027, -0.000020, -0.000059, -0.000059, 0.000124, -0.000044, + -0.000003, 0.000044, -0.000044, 0.000041, 0.000138, -0.000095, -0.000099, 0.000071, -0.000001, 0.000018, + 0.000013, -0.000013, -0.000017, 0.000048, -0.000046, 0.000034, -0.000034, 0.000048, -0.000046, 0.000034, + -0.000034, 0.000044, -0.000017, -0.000078, 0.000029, 0.000042, -0.000029, -0.000059, -0.000059, 0.000124, + -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, 0.000071, -0.000127, -0.000017, 0.000060, 0.000047, + -0.000047, -0.000017, 0.000134, -0.000007, -0.000127, 0.000047, 0.000060, -0.000047, -0.000047, 0.000124, + -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, -0.000003, 0.000044, + -0.000044, -0.000059, -0.000059, 0.000138, -0.000099, 0.000071, -0.000095, -0.000059, 0.000124, -0.000059, + -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, -0.000002, 0.000048, -0.000046, -0.000034, 0.000034, + 0.000018, -0.000001, 0.000013, -0.000013, -0.000017, -0.000007, -0.000017, 0.000134, 0.000047, 0.000060, + -0.000047, -0.000047, -0.000127, -0.000001, 0.000018, -0.000013, 0.000013, -0.000017, -0.000032, 0.000283, + -0.000015, -0.000266, 0.000099, 0.000123, -0.000099, -0.000099, -0.000027, -0.000266, 0.000298, -0.000209, + 0.000120, 0.000099, 0.000018, -0.000001, -0.000017, -0.000013, 0.000013, 0.000138, -0.000095, -0.000099, + 0.000071, 0.000138, -0.000095, -0.000099, 0.000071, -0.000015, 0.000283, -0.000046, -0.000266, 0.000134, + 0.000099, -0.000099, -0.000099, 0.000028, 0.000020, -0.000020, -0.000001, 0.000028, -0.000021, -0.000005, + 0.000065, -0.000021, 0.000016, -0.000048, -0.000266, -0.000015, 0.000283, -0.000046, -0.000099, -0.000099, + 0.000134, 0.000099, -0.000266, 0.000283, -0.000015, -0.000046, 0.000134, 0.000099, -0.000099, -0.000099, + -0.000032, -0.000015, -0.000266, 0.000283, -0.000099, -0.000099, 0.000099, 0.000123, 0.000185, -0.000009, + -0.000017, 0.000065, -0.000175, 0.000078, -0.000065, -0.000065, -0.000002, 0.000048, -0.000034, -0.000046, + 0.000034, -0.000001, 0.000018, 0.000013, -0.000013, -0.000017, 0.000018, -0.000001, -0.000017, -0.000013, + 0.000013, -0.000009, -0.000175, -0.000017, 0.000185, -0.000065, -0.000065, 0.000078, 0.000065, -0.000001, + 0.000018, 0.000013, -0.000013, -0.000017, 0.000028, -0.000021, -0.000005, 0.000124, -0.000003, -0.000003, + 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, + -0.000059, -0.000059, 0.000124, -0.000059, -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, -0.000059, + 0.000124, -0.000059, -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, -0.000002, 0.000048, -0.000034, + -0.000046, 0.000034, 0.000138, 0.000071, -0.000095, -0.000099, -0.000001, 0.000018, 0.000013, -0.000013, + -0.000017, 0.000018, -0.000001, 0.000013, -0.000013, -0.000017, 0.000018, -0.000001, -0.000017, -0.000013, + 0.000013, 0.000065, -0.000021, -0.000033, -0.000001, 0.000028, -0.000020, -0.000027, 0.000020, 0.000138, + -0.000099, 0.000071, -0.000095, 0.000141, -0.000007, 0.000099, -0.000099, -0.000133, 0.000141, -0.000007, + -0.000133, -0.000099, 0.000099, 0.000065, -0.000021, -0.000033, 0.000018, -0.000001, -0.000017, -0.000013, + 0.000013, -0.000127, -0.000017, 0.000071, 0.000060, 0.000047, -0.000047, 0.000018, -0.000001, 0.000013, + -0.000013, -0.000017, 0.000065, -0.000021, -0.000033, 0.000060, -0.000003, 0.000034, -0.000041, 0.000042, + 0.000138, -0.000095, -0.000099, 0.000071, 0.000185, -0.000009, -0.000017, -0.000175, 0.000065, 0.000078, + -0.000065, -0.000065, 0.000018, -0.000001, -0.000013, 0.000013, -0.000017, -0.000009, -0.000175, -0.000017, + 0.000185, -0.000065, -0.000065, 0.000078, 0.000065, 0.000141, -0.000007, 0.000099, -0.000133, -0.000099, + 0.000141, -0.000007, -0.000133, -0.000099, 0.000099, -0.000133, 0.000297, 0.000099, 0.000099, -0.000133, + -0.000209, 0.000124, -0.000003, -0.000003, -0.000059, -0.000059, 0.000044, -0.000044, -0.000001, 0.000018, + -0.000013, 0.000013, -0.000017, 0.000124, -0.000003, -0.000003, -0.000059, -0.000059, 0.000044, -0.000044, + -0.000059, -0.000059, 0.000124, -0.000003, -0.000044, 0.000044, -0.000044, 0.000041, -0.000059, -0.000059, + 0.000124, -0.000003, -0.000044, 0.000044, -0.000044, 0.000041, 0.000034, -0.000002, -0.000032, -0.000024, + 0.000024, -0.000032, 0.000283, -0.000015, -0.000266, -0.000099, -0.000099, 0.000123, 0.000099, 0.000048, + -0.000002, 0.000034, -0.000034, -0.000046, -0.000001, 0.000018, 0.000013, -0.000013, -0.000017, 0.000028, + -0.000021, -0.000005, 0.000124, -0.000003, -0.000003, -0.000059, -0.000059, 0.000044, -0.000044, 0.000124, + -0.000003, -0.000003, -0.000059, -0.000059, 0.000044, -0.000044, 0.000138, 0.000071, -0.000095, -0.000099, + 0.000065, -0.000021, -0.000033, 0.000141, -0.000007, 0.000099, -0.000133, -0.000099, 0.000141, -0.000007, + -0.000099, -0.000133, 0.000099, 0.000138, -0.000095, -0.000099, 0.000071, 0.000138, -0.000095, 0.000071, + -0.000099, -0.000017, 0.000134, -0.000007, -0.000047, 0.000060, -0.000127, 0.000047, -0.000047, 0.000028, + -0.000021, -0.000005, 0.000141, -0.000099, 0.000099, -0.000133, -0.000007, -0.000017, 0.000071, -0.000127, + 0.000047, 0.000060, -0.000047, 0.000141, -0.000007, -0.000133, -0.000099, 0.000099, -0.000133, 0.000297, + 0.000099, -0.000133, 0.000099, -0.000209, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, + -0.000059, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, 0.000124, + -0.000059, -0.000003, -0.000044, -0.000044, 0.000044, 0.000041, -0.000059, 0.000124, -0.000059, -0.000003, + -0.000044, -0.000044, 0.000044, 0.000041, -0.000017, -0.000007, 0.000134, -0.000127, -0.000047, 0.000060, + -0.000047, 0.000047, -0.000059, -0.000059, 0.000124, -0.000003, -0.000044, 0.000044, -0.000044, 0.000041, + 0.000138, 0.000071, -0.000099, -0.000095, -0.000059, -0.000059, 0.000124, -0.000003, -0.000044, 0.000044, + -0.000044, 0.000041, 0.000028, -0.000021, -0.000005, 0.000138, -0.000099, -0.000095, 0.000071, 0.000141, + -0.000099, 0.000099, -0.000007, -0.000133, 0.000141, -0.000007, -0.000133, 0.000099, -0.000099, 0.000065, + -0.000021, -0.000033, 0.000028, -0.000021, -0.000005, 0.000018, -0.000001, -0.000017, -0.000013, 0.000013, + -0.000017, 0.000071, -0.000127, 0.000060, 0.000047, -0.000047, 0.000124, -0.000059, -0.000059, -0.000003, + 0.000044, -0.000044, -0.000044, 0.000041, -0.000059, -0.000059, 0.000124, -0.000003, -0.000044, 0.000044, + -0.000044, 0.000041, -0.000059, -0.000059, 0.000124, -0.000003, -0.000044, 0.000044, -0.000044, 0.000041, + 0.000078, -0.000004, -0.000055, 0.000055, -0.000074, -0.000015, -0.000074, 0.000270, -0.000099, -0.000099, + 0.000055, 0.000141, -0.000099, -0.000007, -0.000133, 0.000099, 0.000141, -0.000007, -0.000099, -0.000133, + 0.000099, -0.000133, 0.000297, 0.000099, -0.000133, 0.000099, -0.000209, 0.000018, -0.000001, -0.000017, + 0.000013, -0.000013, -0.000017, 0.000098, -0.000175, -0.000065, 0.000065, 0.000078, 0.000018, -0.000001, + -0.000013, 0.000013, -0.000017, -0.000127, -0.000017, 0.000071, 0.000060, 0.000047, -0.000047, 0.000018, + -0.000001, -0.000013, -0.000017, 0.000013, -0.000059, -0.000059, 0.000124, 0.000044, -0.000003, -0.000044, + -0.000044, 0.000041, -0.000059, -0.000059, 0.000124, 0.000044, -0.000003, -0.000044, -0.000044, 0.000041, + 0.000124, -0.000059, -0.000059, 0.000044, -0.000044, -0.000003, -0.000044, 0.000041, 0.000138, 0.000071, + -0.000095, -0.000099, 0.000141, 0.000099, -0.000007, -0.000133, -0.000099, 0.000138, -0.000095, 0.000071, + -0.000099, 0.000028, -0.000001, 0.000020, -0.000020, -0.000027, 0.000141, -0.000007, 0.000099, -0.000099, + -0.000133, -0.000133, 0.000297, 0.000099, 0.000099, -0.000133, -0.000209, 0.000028, -0.000021, -0.000005, + 0.000034, -0.000002, -0.000032, 0.000024, -0.000024, -0.000015, -0.000032, 0.000283, -0.000266, -0.000099, + 0.000099, 0.000123, -0.000099, -0.000133, 0.000297, 0.000099, -0.000133, 0.000099, -0.000209, -0.000059, + 0.000124, -0.000059, -0.000044, 0.000044, -0.000003, 0.000041, -0.000044, -0.000017, 0.000185, -0.000175, + -0.000009, -0.000065, -0.000065, 0.000078, 0.000065, 0.000141, -0.000099, 0.000099, -0.000133, -0.000007, + 0.000141, -0.000007, -0.000133, -0.000099, 0.000099, -0.000017, -0.000009, 0.000185, 0.000065, -0.000175, + 0.000078, -0.000065, -0.000065, 0.000048, 0.000034, -0.000046, -0.000034, 0.000048, 0.000034, -0.000046, + -0.000034, 0.000138, -0.000099, 0.000071, -0.000095, -0.000001, 0.000018, -0.000017, 0.000013, -0.000013, + 0.000065, -0.000021, -0.000033, 0.000028, -0.000001, 0.000020, -0.000020, -0.000027, -0.000266, -0.000027, + 0.000298, 0.000099, 0.000120, -0.000209, -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, + -0.000059, -0.000059, 0.000124, -0.000059, -0.000044, 0.000044, -0.000003, -0.000044, 0.000041, 0.000065, + -0.000021, -0.000033, 0.000228, -0.000013, -0.000161, -0.000216, -0.000216, -0.000216, 0.000390, 0.000161, + -0.000271, 0.000161, 0.000028, -0.000021, -0.000005, 0.000065, -0.000021, -0.000033, -0.000003, 0.000124, + -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000044, -0.000044, 0.000028, -0.000021, -0.000005, + -0.000017, 0.000185, -0.000009, -0.000065, -0.000175, -0.000065, 0.000078, 0.000065, 0.000124, -0.000003, + -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, + -0.000059, -0.000059, -0.000059, -0.000059, 0.000124, 0.000044, -0.000003, -0.000044, -0.000044, 0.000041, + 0.000138, 0.000071, -0.000099, -0.000095, -0.000059, -0.000059, 0.000124, 0.000044, -0.000044, -0.000003, + -0.000044, 0.000041, -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000044, + -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, 0.000044, -0.000044, -0.000003, -0.000059, -0.000059, + 0.000124, 0.000044, -0.000044, -0.000003, -0.000044, 0.000041, -0.000017, 0.000185, -0.000009, -0.000175, + -0.000065, 0.000078, -0.000065, 0.000065, -0.000009, -0.000017, -0.000175, 0.000185, -0.000065, 0.000065, + -0.000065, 0.000078, 0.000028, -0.000020, 0.000020, -0.000001, -0.000059, -0.000059, 0.000124, 0.000044, + -0.000003, -0.000044, -0.000044, 0.000041, 0.000048, -0.000046, 0.000034, -0.000034, -0.000266, -0.000027, + 0.000298, 0.000099, 0.000120, -0.000209, -0.000002, 0.000048, -0.000046, -0.000034, 0.000034, 0.000018, + -0.000001, -0.000017, -0.000013, 0.000013, -0.000175, -0.000017, 0.000098, 0.000078, 0.000065, -0.000065, + 0.000048, -0.000046, 0.000034, -0.000034, 0.000028, -0.000020, -0.000001, 0.000020, 0.000078, -0.000004, + -0.000055, -0.000074, 0.000055, -0.000015, -0.000074, 0.000270, -0.000099, 0.000055, -0.000099, 0.000028, + -0.000021, -0.000005, 0.000138, 0.000071, -0.000095, -0.000099, 0.000028, -0.000021, -0.000005, -0.000001, + 0.000028, -0.000020, 0.000020, -0.000027, -0.000133, 0.000297, 0.000099, -0.000133, 0.000099, -0.000209, + 0.000065, -0.000021, -0.000048, 0.000016, 0.000018, -0.000001, -0.000013, 0.000013, -0.000017, -0.000175, + -0.000017, 0.000098, 0.000078, 0.000065, -0.000065, 0.000018, -0.000001, -0.000013, -0.000017, 0.000013, + -0.000127, -0.000017, 0.000071, 0.000060, 0.000047, -0.000047, -0.000133, 0.000297, 0.000099, 0.000099, + -0.000133, -0.000209, -0.000001, 0.000018, -0.000017, -0.000013, 0.000013, 0.000065, -0.000021, -0.000033, + -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000138, 0.000071, -0.000099, + -0.000095, 0.000138, -0.000095, -0.000099, 0.000071, -0.000003, -0.000003, 0.000124, -0.000059, 0.000044, + -0.000044, -0.000059, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, -0.000003, + 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, -0.000059, 0.000124, -0.000003, + 0.000044, -0.000044, -0.000044, 0.000041, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, + -0.000059, -0.000059, -0.000059, 0.000124, 0.000044, -0.000044, -0.000003, -0.000044, 0.000041, -0.000017, + 0.000185, -0.000009, -0.000175, 0.000065, 0.000078, -0.000065, -0.000065, -0.000017, -0.000175, 0.000098, + 0.000078, 0.000065, -0.000065, -0.000059, -0.000059, 0.000124, 0.000044, -0.000003, -0.000044, -0.000044, + 0.000041, -0.000059, -0.000059, 0.000124, 0.000044, -0.000003, -0.000044, -0.000044, 0.000041, -0.000059, + -0.000059, 0.000124, 0.000044, -0.000044, -0.000003, -0.000044, 0.000041, 0.000138, 0.000071, -0.000099, + -0.000095, 0.000065, -0.000021, 0.000016, -0.000048, -0.000017, -0.000009, 0.000185, -0.000175, -0.000065, + -0.000065, 0.000078, 0.000065, 0.000065, -0.000021, -0.000033, 0.000065, -0.000021, -0.000033, -0.000003, + -0.000003, 0.000124, 0.000044, -0.000044, -0.000059, -0.000059, 0.000065, -0.000021, -0.000033, 0.000138, + -0.000095, 0.000071, -0.000099, 0.000124, -0.000059, -0.000059, -0.000044, -0.000003, 0.000044, -0.000044, + 0.000041, 0.000028, 0.000020, -0.000001, -0.000020, 0.000065, -0.000021, -0.000033, 0.000124, -0.000003, + -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, + -0.000059, -0.000059, -0.000059, -0.000059, 0.000124, 0.000044, -0.000003, -0.000044, -0.000044, 0.000041, + -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000138, -0.000095, -0.000099, + 0.000071, -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, 0.000124, + -0.000059, -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, -0.000059, -0.000059, 0.000124, -0.000003, + -0.000044, 0.000044, -0.000044, 0.000041, -0.000059, -0.000059, 0.000124, 0.000044, -0.000003, -0.000044, + -0.000044, 0.000041, -0.000059, -0.000059, 0.000124, -0.000003, -0.000044, 0.000044, -0.000044, 0.000041, + 0.000270, -0.000015, 0.000055, -0.000074, -0.000099, -0.000099, -0.000003, -0.000003, 0.000124, 0.000044, + -0.000044, -0.000059, -0.000059, -0.000017, 0.000044, 0.000029, -0.000078, 0.000042, -0.000029, 0.000044, + -0.000017, 0.000042, 0.000029, -0.000078, -0.000029, 0.000065, -0.000021, -0.000033, 0.000028, -0.000021, + -0.000005, 0.000048, -0.000002, -0.000046, -0.000034, 0.000034, -0.000015, -0.000266, -0.000046, 0.000283, + -0.000099, -0.000099, 0.000134, 0.000099, -0.000059, -0.000059, 0.000124, -0.000044, -0.000003, 0.000044, + -0.000044, 0.000041, -0.000059, -0.000059, 0.000124, -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, + 0.000138, -0.000095, -0.000099, 0.000071, 0.000048, -0.000046, 0.000034, -0.000034, 0.000048, -0.000046, + 0.000034, -0.000034, -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, + -0.000059, 0.000124, -0.000044, 0.000044, -0.000003, -0.000044, 0.000041, 0.000028, -0.000021, -0.000005, + -0.000004, 0.000078, 0.000055, -0.000055, -0.000074, -0.000032, 0.000283, -0.000015, -0.000266, 0.000123, + -0.000099, 0.000099, -0.000099, -0.000027, -0.000266, 0.000298, 0.000099, 0.000120, -0.000209, 0.000028, + 0.000020, -0.000020, -0.000001, 0.000078, -0.000004, -0.000055, -0.000074, 0.000055, 0.000028, -0.000021, + -0.000005, 0.000048, -0.000046, 0.000034, -0.000034, -0.000074, 0.000270, -0.000015, -0.000099, 0.000055, + -0.000099, -0.000046, -0.000015, -0.000266, 0.000283, -0.000099, 0.000099, -0.000099, 0.000134, 0.000018, + -0.000001, -0.000017, 0.000013, -0.000013, -0.000175, -0.000017, 0.000098, 0.000065, 0.000078, -0.000065, + -0.000133, 0.000297, -0.000133, 0.000099, -0.000209, 0.000099, -0.000003, -0.000003, 0.000124, -0.000059, + 0.000044, -0.000044, -0.000059, -0.000003, 0.000124, -0.000003, -0.000059, -0.000059, 0.000044, -0.000044, + -0.000003, -0.000003, 0.000124, -0.000059, 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, 0.000124, + 0.000044, -0.000044, -0.000003, -0.000044, 0.000041, -0.000059, -0.000059, 0.000124, 0.000044, -0.000044, + -0.000003, -0.000044, 0.000041, -0.000003, 0.000124, -0.000003, -0.000059, -0.000059, 0.000044, -0.000044, + -0.000059, -0.000059, 0.000124, -0.000044, -0.000003, 0.000044, -0.000044, 0.000041, 0.000065, -0.000021, + -0.000033, 0.000138, -0.000095, 0.000071, -0.000099, 0.000065, -0.000021, -0.000033, 0.000048, -0.000046, + 0.000034, -0.000034, 0.000048, -0.000046, 0.000034, -0.000034, 0.000065, -0.000021, -0.000033, 0.000065, + -0.000021, -0.000033, -0.000003, -0.000003, 0.000124, -0.000059, 0.000044, -0.000044, -0.000059, 0.000028, + -0.000021, -0.000005, -0.000017, 0.000134, -0.000007, -0.000127, 0.000047, 0.000060, -0.000047, -0.000047, + -0.000017, -0.000127, 0.000071, 0.000060, 0.000047, -0.000047, -0.000003, 0.000124, -0.000003, 0.000044, + -0.000044, -0.000059, -0.000059, 0.000034, -0.000002, -0.000032, -0.000024, 0.000024, -0.000015, -0.000266, + -0.000032, 0.000283, -0.000099, -0.000099, 0.000123, 0.000099, 0.000065, -0.000021, -0.000033, -0.000003, + 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, -0.000059, 0.000124, -0.000044, + -0.000003, 0.000044, -0.000044, 0.000041, -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, + -0.000059, -0.000059, -0.000059, 0.000124, 0.000044, -0.000044, -0.000003, -0.000044, 0.000041, 0.000065, + -0.000021, -0.000033, 0.000138, -0.000095, -0.000099, 0.000071, 0.000048, -0.000046, 0.000034, -0.000034, + 0.000138, -0.000095, 0.000071, -0.000099, 0.000028, -0.000020, -0.000001, 0.000020, 0.000028, -0.000020, + -0.000001, 0.000020, -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000028, + -0.000020, -0.000001, 0.000020, -0.000003, -0.000003, 0.000124, 0.000044, -0.000044, -0.000059, -0.000059, + 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000044, -0.000044, 0.000028, + -0.000021, -0.000005, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000044, + -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, 0.000044, -0.000044, -0.000003, -0.000001, 0.000028, + 0.000020, -0.000020, -0.000027, -0.000017, 0.000185, -0.000009, -0.000175, -0.000065, 0.000065, 0.000078, + -0.000065, -0.000017, -0.000009, -0.000175, 0.000185, 0.000078, 0.000065, -0.000065, -0.000065, -0.000001, + 0.000018, -0.000013, 0.000013, -0.000017, 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, + -0.000059, -0.000003, 0.000124, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, -0.000059, + 0.000124, -0.000003, -0.000044, 0.000044, -0.000044, 0.000041, 0.000124, -0.000003, -0.000003, 0.000044, + -0.000044, -0.000059, -0.000059, 0.000028, -0.000021, -0.000005, -0.000059, -0.000059, 0.000124, -0.000003, + 0.000044, -0.000044, -0.000003, -0.000003, -0.000003, 0.000124, -0.000059, -0.000059, 0.000044, -0.000044, + -0.000059, -0.000059, 0.000124, -0.000044, 0.000044, -0.000003, -0.000044, 0.000041, 0.000138, -0.000099, + 0.000071, -0.000095, 0.000048, -0.000034, -0.000046, 0.000034, 0.000048, -0.000034, -0.000046, 0.000034, + -0.000003, 0.000124, -0.000003, -0.000059, 0.000044, -0.000044, -0.000059, -0.000044, 0.000044, -0.000059, + -0.000059, 0.000124, -0.000044, 0.000044, -0.000003, 0.000041, -0.000044, -0.000017, 0.000044, 0.000042, + -0.000078, 0.000029, -0.000029, -0.000003, -0.000003, 0.000124, -0.000059, -0.000059, 0.000044, -0.000044, + 0.000124, -0.000003, -0.000003, 0.000044, -0.000044, -0.000059, -0.000059, 0.000124, -0.000003, -0.000003, + 0.000044, -0.000044, -0.000059, -0.000059, -0.000059, -0.000059, 0.000124, 0.000044, -0.000003, -0.000044, + -0.000044, 0.000041, -0.000059, -0.000059, 0.000124, -0.000044, 0.000044, -0.000003, -0.000044, 0.000041, + 0.000018, -0.000001, -0.000013, 0.000013, -0.000017, -0.000007, -0.000017, 0.000134, -0.000127, -0.000047, + -0.000047, 0.000060, 0.000047, -0.000001, 0.000018, 0.000013, -0.000013, -0.000017, -0.000059, -0.000059, + 0.000124, 0.000044, -0.000003, -0.000044, -0.000044, 0.000041, 0.000048, -0.000002, -0.000046, -0.000034, + 0.000034, -0.000266, -0.000015, -0.000046, 0.000283, -0.000099, -0.000099, 0.000134, 0.000099, 0.000001, + 1.000000, 0.000001, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, + 1.000000, 0.000045, -0.000003, -0.000014, -0.000006, 0.000027, 0.000003, -0.000008, 0.000027, 0.000003, + -0.000008, 0.000027, 0.000003, -0.000008, 0.000047, -0.000043, -0.000005, 0.000077, 0.000402, -0.000358, + 0.000047, -0.000043, -0.000005, 0.000077, 0.000402, -0.000358, 0.000027, 0.000003, -0.000008, 0.000022, + 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000045, -0.000004, -0.000003, -0.000014, -0.000006, + 0.000027, 0.000003, -0.000008, 0.000022, 0.000021, -0.000006, 0.000047, -0.000043, -0.000005, 0.000075, + -0.000067, 0.000044, -0.000044, -0.000008, 0.000075, -0.000067, 0.000044, -0.000044, -0.000008, 0.000027, + 0.000003, -0.000008, 0.000138, -0.000099, -0.000095, 0.000071, 0.000027, 0.000003, -0.000008, 0.000100, + -0.000005, -0.000071, 0.000071, 0.000047, 0.000045, -0.000021, -0.000021, 0.000016, 0.000016, -0.000013, + 0.000027, 0.000003, -0.000008, 0.000236, -0.000010, -0.000010, -0.000113, 0.000036, -0.000080, -0.000080, + 0.000032, 0.000045, -0.000003, -0.000004, -0.000014, -0.000006, 0.000022, 0.000021, -0.000006, 0.000022, + 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000075, -0.000067, 0.000044, -0.000044, -0.000008, + -0.000059, -0.000059, 0.000066, 0.000044, 0.000044, 0.000035, -0.000018, -0.000059, -0.000059, 0.000066, + 0.000044, 0.000044, 0.000035, -0.000018, 0.000022, 0.000021, -0.000006, -0.000059, -0.000059, 0.000066, + 0.000044, 0.000044, 0.000035, -0.000018, 0.000022, 0.000021, -0.000006, 0.000132, 0.000675, -0.000594, + 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, -0.000059, -0.000059, 0.000066, 0.000044, + 0.000044, 0.000035, -0.000018, 0.000067, -0.000113, 0.000036, 0.000008, 0.000044, -0.000007, -0.000005, + -0.000044, 0.000322, -0.000044, -0.000117, -0.000046, -0.000024, 0.000045, -0.000003, -0.000014, -0.000006, + 0.000022, 0.000021, -0.000006, 0.000202, -0.000010, -0.000007, -0.000072, -0.000070, -0.000021, 0.000100, + -0.000005, -0.000071, 0.000071, 0.000047, -0.000043, -0.000005, 0.000022, 0.000021, -0.000006, -0.000010, + 0.000227, -0.000080, -0.000080, -0.000010, -0.000024, 0.000022, 0.000021, -0.000006, 0.000055, -0.000088, + 0.000034, 0.000028, 0.000049, -0.000005, 0.000022, 0.000021, -0.000006, 0.000047, -0.000043, -0.000005, + 0.000075, -0.000044, -0.000067, 0.000044, -0.000008, 0.000100, -0.000071, -0.000005, 0.000071, 0.000013, + 0.000012, -0.000004, 0.000047, -0.000043, -0.000005, 0.000075, -0.000067, 0.000044, -0.000044, -0.000008, + 0.000027, 0.000003, -0.000008, 0.000132, 0.000675, -0.000594, 0.000075, -0.000067, 0.000044, -0.000044, + -0.000008, 0.000027, 0.000003, -0.000008, -0.000113, 0.000067, 0.000036, 0.000044, 0.000008, -0.000007, + 0.000022, 0.000021, -0.000006, 0.000077, 0.000402, -0.000358, 0.000047, -0.000043, -0.000005, 0.000009, + -0.000054, 0.000050, 0.000014, 0.000012, -0.000007, 0.000109, -0.000036, -0.000007, -0.000024, -0.000059, + -0.000059, 0.000066, 0.000044, 0.000044, 0.000035, -0.000018, -0.000059, -0.000059, 0.000066, 0.000044, + 0.000044, 0.000035, -0.000018, 0.000027, 0.000003, -0.000008, -0.000003, -0.000003, 0.000137, -0.000013, + -0.000044, -0.000044, -0.000006, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000236, + -0.000010, -0.000010, 0.000036, -0.000113, -0.000080, -0.000080, 0.000032, -0.000009, 0.000199, -0.000088, + -0.000065, -0.000088, 0.000065, -0.000065, 0.000065, -0.000010, -0.000005, 0.000022, 0.000021, -0.000006, + 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, -0.000113, 0.000067, 0.000044, 0.000036, + 0.000008, -0.000007, 0.000047, -0.000043, -0.000005, -0.000059, -0.000059, 0.000066, 0.000044, 0.000079, + -0.000018, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, -0.000019, 0.000022, 0.000021, + -0.000006, 0.000022, 0.000021, -0.000006, 0.000047, -0.000043, -0.000005, 0.000075, -0.000044, -0.000067, + 0.000044, -0.000008, 0.000236, -0.000080, -0.000080, -0.000010, 0.000036, -0.000010, -0.000113, 0.000032, + 0.000027, 0.000003, -0.000008, -0.000005, 0.000125, -0.000005, -0.000065, -0.000021, -0.000012, 0.000022, + 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000054, 0.000049, -0.000063, 0.000023, 0.000075, + -0.000067, 0.000044, -0.000044, -0.000008, -0.000070, -0.000072, 0.000202, -0.000007, -0.000010, -0.000021, + -0.000001, -0.000001, 0.000127, -0.000005, -0.000005, -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, + 0.000018, -0.000013, -0.000001, -0.000017, 0.000013, 0.000067, -0.000113, 0.000008, 0.000036, -0.000007, + 0.000060, 0.000035, -0.000018, 0.000022, 0.000021, -0.000006, -0.000002, 0.000137, -0.000063, -0.000011, + -0.000090, 0.000028, 0.000016, -0.000009, 0.000022, 0.000021, -0.000006, -0.000095, 0.000056, 0.000186, + -0.000165, 0.000100, -0.000071, -0.000005, 0.000071, 0.000027, 0.000003, -0.000008, 0.000018, -0.000001, + -0.000013, -0.000017, 0.000013, -0.000002, -0.000008, 0.000118, -0.000063, -0.000078, 0.000016, 0.000028, + -0.000005, -0.000003, -0.000003, 0.000137, -0.000044, -0.000013, -0.000044, -0.000006, 0.000071, 0.000173, + -0.000018, 0.000022, 0.000021, -0.000006, 0.000045, 0.000045, -0.000013, 0.000008, 0.000067, -0.000113, + 0.000036, 0.000044, -0.000007, -0.000010, 0.000236, 0.000036, -0.000113, -0.000010, -0.000080, -0.000080, + 0.000032, 0.000045, 0.000045, -0.000013, 0.000069, 0.000035, -0.000113, 0.000036, 0.000044, -0.000007, + 0.000022, 0.000021, -0.000006, 0.000027, 0.000003, -0.000008, -0.000007, -0.000017, 0.000134, 0.000060, + -0.000127, -0.000047, -0.000047, 0.000047, -0.000003, -0.000003, -0.000013, 0.000137, -0.000044, -0.000044, + -0.000006, 0.000027, 0.000003, -0.000008, 0.000077, 0.000402, -0.000358, -0.000055, 0.000022, -0.000094, + 0.000062, 0.000080, -0.000026, 0.000139, 0.000710, -0.000636, 0.000022, 0.000021, -0.000006, 0.000027, + 0.000003, -0.000008, 0.000018, -0.000013, -0.000017, -0.000001, 0.000013, 0.000022, 0.000021, -0.000006, + 0.000054, -0.000063, 0.000049, 0.000028, 0.000016, -0.000005, 0.000022, 0.000021, -0.000006, -0.000059, + -0.000059, 0.000066, 0.000044, 0.000079, -0.000018, 0.000057, 0.000049, -0.000100, 0.000036, 0.000034, + -0.000002, -0.000059, -0.000059, 0.000044, 0.000066, 0.000044, 0.000035, -0.000018, 0.000103, -0.000011, + -0.000013, -0.000043, -0.000018, 0.000027, 0.000003, -0.000008, 0.000022, 0.000021, -0.000006, -0.000059, + -0.000059, 0.000044, 0.000066, 0.000044, 0.000035, -0.000018, 0.000027, 0.000003, -0.000008, -0.000161, + 0.000509, 0.000161, -0.000178, -0.000216, -0.000013, -0.000026, -0.000021, 0.000047, -0.000043, -0.000005, + 0.000027, 0.000003, -0.000008, 0.000077, 0.000402, -0.000358, -0.000059, -0.000059, 0.000066, 0.000044, + 0.000044, 0.000035, -0.000018, 0.000100, -0.000005, -0.000071, 0.000071, 0.000027, -0.000005, 0.000054, + 0.000049, -0.000063, 0.000023, 0.000022, 0.000021, -0.000006, 0.000028, 0.000019, 0.000013, 0.000012, + -0.000004, -0.000005, 0.000322, -0.000044, -0.000046, -0.000044, -0.000117, -0.000024, 0.000028, 0.000027, + -0.000008, -0.000012, 0.000107, -0.000062, 0.000062, -0.000094, -0.000007, 0.000148, -0.000063, -0.000047, + -0.000063, 0.000047, -0.000047, 0.000047, -0.000010, -0.000005, 0.000075, -0.000067, 0.000044, -0.000044, + -0.000008, 0.000022, 0.000021, -0.000006, -0.000010, 0.000227, -0.000080, -0.000010, -0.000024, 0.000036, + 0.000236, -0.000080, -0.000080, -0.000010, -0.000113, -0.000010, 0.000032, 0.000047, -0.000043, -0.000005, + 0.000100, -0.000071, -0.000005, 0.000071, -0.000006, -0.000006, 0.000285, -0.000015, -0.000055, -0.000056, + -0.000044, -0.000044, -0.000037, 0.000675, 0.000132, -0.000594, 0.000027, 0.000003, -0.000008, 0.000027, + 0.000003, -0.000008, 0.000027, 0.000003, -0.000008, 0.000044, 0.000079, 0.000089, -0.000113, 0.000024, + 0.000027, 0.000003, -0.000008, 0.000075, -0.000067, 0.000044, -0.000044, -0.000008, 0.000022, 0.000021, + -0.000006, 0.000022, 0.000021, -0.000006, 0.000044, 0.000089, 0.000036, -0.000113, 0.000079, -0.000012, + 0.000022, 0.000021, -0.000006, 0.000027, 0.000003, -0.000008, -0.000009, 0.000199, 0.000065, -0.000088, + 0.000065, -0.000065, -0.000065, -0.000088, -0.000010, -0.000005, 0.000022, 0.000021, -0.000006, 0.000022, + 0.000021, -0.000006, 0.000016, 0.000014, -0.000005, 0.000047, -0.000043, -0.000005, 0.000030, -0.000006, + -0.000002, -0.000007, -0.000004, -0.000085, 0.000096, 0.000056, -0.000056, -0.000011, 0.000022, 0.000021, + -0.000006, 0.000022, 0.000021, -0.000006, 0.000092, -0.000015, -0.000046, -0.000012, -0.000013, -0.000005, + -0.000044, 0.000322, -0.000044, -0.000046, -0.000117, -0.000024, 0.000027, 0.000003, -0.000008, -0.000095, + 0.000065, 0.000186, -0.000113, 0.000044, 0.000036, -0.000153, 0.000022, 0.000021, -0.000006, -0.000002, + -0.000008, 0.000118, -0.000063, -0.000078, 0.000028, 0.000016, -0.000005, -0.000006, 0.000285, -0.000006, + -0.000015, -0.000055, -0.000056, -0.000044, -0.000044, -0.000037, -0.000002, -0.000008, -0.000063, 0.000118, + -0.000078, 0.000016, 0.000028, -0.000005, 0.000054, 0.000049, -0.000063, 0.000028, 0.000016, -0.000005, + -0.000001, -0.000001, 0.000127, -0.000005, -0.000005, -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, + 0.000027, 0.000003, -0.000008, 0.000027, 0.000003, -0.000008, 0.000013, 0.000012, -0.000004, 0.000045, + 0.000045, -0.000013, 0.000069, 0.000036, 0.000035, -0.000113, 0.000044, -0.000007, 0.000227, -0.000010, + -0.000010, -0.000080, -0.000080, -0.000024, -0.000007, 0.000164, -0.000047, -0.000047, -0.000005, -0.000021, + -0.000012, -0.000085, 0.000096, -0.000056, 0.000056, -0.000011, 0.000089, 0.000044, -0.000113, 0.000036, + 0.000079, -0.000012, 0.000045, 0.000045, -0.000013, -0.000080, 0.000236, -0.000080, -0.000010, -0.000010, + -0.000113, 0.000036, 0.000032, 0.000069, 0.000036, -0.000113, 0.000044, 0.000035, -0.000007, 0.000047, + -0.000043, -0.000005, -0.000070, 0.000202, -0.000007, -0.000072, -0.000010, -0.000021, -0.000059, -0.000059, + 0.000066, 0.000044, 0.000044, 0.000035, -0.000018, 0.000100, -0.000071, -0.000005, 0.000071, 0.000057, + 0.000049, 0.000036, -0.000100, 0.000034, -0.000002, 0.000045, 0.000045, -0.000013, 0.000069, 0.000036, + -0.000113, 0.000044, 0.000035, -0.000007, 0.000047, -0.000043, -0.000005, -0.000059, -0.000059, 0.000044, + 0.000066, 0.000044, 0.000035, -0.000018, 0.000044, 0.000036, 0.000089, 0.000079, -0.000113, -0.000012, + 0.000027, 0.000003, -0.000008, 0.000236, -0.000010, -0.000080, -0.000080, 0.000036, -0.000010, -0.000113, + 0.000032, -0.000002, -0.000008, 0.000118, -0.000063, -0.000078, 0.000028, 0.000016, -0.000005, -0.000002, + -0.000008, 0.000118, -0.000063, -0.000078, 0.000016, 0.000028, -0.000005, 0.000022, 0.000021, -0.000006, + -0.000133, -0.000007, 0.000155, 0.000099, -0.000099, -0.000010, -0.000005, -0.000002, 0.000118, -0.000008, + -0.000078, -0.000063, 0.000028, 0.000016, -0.000005, -0.000067, 0.000075, 0.000044, -0.000044, -0.000008, + 0.000075, -0.000067, 0.000044, -0.000044, -0.000008, -0.000002, 0.000118, -0.000078, -0.000008, -0.000063, + 0.000016, 0.000028, -0.000005, 0.000027, 0.000003, -0.000008, 0.000047, -0.000043, -0.000005, 0.000022, + 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000010, 0.000001, -0.000003, -0.000059, -0.000059, + 0.000044, 0.000066, 0.000044, 0.000035, -0.000018, -0.000159, 0.000117, 0.000055, 0.000056, 0.000154, + -0.000011, 0.000055, 0.000049, 0.000034, 0.000028, -0.000088, -0.000005, 0.000022, 0.000021, -0.000006, + 0.000054, 0.000049, 0.000028, -0.000063, 0.000016, -0.000005, -0.000009, 0.000213, -0.000065, -0.000005, + -0.000065, -0.000021, -0.000012, 0.000044, -0.000113, 0.000089, 0.000036, 0.000079, -0.000012, -0.000003, + -0.000003, 0.000137, -0.000013, -0.000044, -0.000044, -0.000006, -0.000003, -0.000003, -0.000013, 0.000137, + -0.000044, -0.000044, -0.000006, -0.000004, -0.000055, 0.000078, -0.000074, 0.000055, -0.000004, 0.000096, + -0.000039, -0.000029, 0.000029, -0.000029, 0.000029, -0.000039, -0.000010, -0.000005, 0.000077, 0.000402, + -0.000358, 0.000027, 0.000003, -0.000008, 0.000013, 0.000012, -0.000004, -0.000043, 0.000027, 0.000028, + 0.000021, 0.000003, 0.000130, -0.000125, 0.000041, -0.000075, 0.000041, -0.000013, 0.000022, 0.000021, + -0.000006, 0.000089, 0.000079, 0.000044, 0.000036, -0.000113, -0.000012, 0.000022, 0.000021, -0.000006, + 0.000027, 0.000003, -0.000008, 0.000077, 0.000402, -0.000358, -0.000005, -0.000044, 0.000322, -0.000117, + -0.000044, -0.000046, -0.000024, -0.000113, 0.000044, 0.000036, 0.000089, 0.000079, -0.000012, 0.000054, + 0.000049, 0.000028, -0.000063, 0.000016, -0.000005, 0.000022, 0.000021, -0.000006, 0.000055, -0.000088, + 0.000034, 0.000028, 0.000049, -0.000005, 0.000016, 0.000010, -0.000113, 0.000067, 0.000008, 0.000036, + 0.000044, -0.000007, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, 0.000027, + 0.000003, -0.000008, 0.000067, 0.000036, 0.000044, 0.000008, -0.000113, -0.000007, -0.000003, -0.000003, + -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, + -0.000044, -0.000006, -0.000007, 0.000148, 0.000047, -0.000063, -0.000063, 0.000047, -0.000047, -0.000047, + -0.000010, -0.000005, 0.000139, 0.000710, -0.000636, 0.000027, 0.000003, -0.000008, 0.000013, 0.000012, + -0.000004, 0.000013, 0.000012, -0.000004, 0.000055, 0.000049, -0.000088, 0.000034, 0.000028, -0.000005, + 0.000022, 0.000021, -0.000006, 0.000045, -0.000003, -0.000014, -0.000006, 0.000117, -0.000159, 0.000055, + 0.000056, 0.000154, -0.000011, 0.000227, -0.000010, -0.000080, -0.000080, -0.000010, -0.000024, 0.000027, + 0.000003, -0.000008, 0.000027, 0.000003, -0.000008, 0.000285, -0.000015, -0.000006, -0.000006, -0.000044, + -0.000044, -0.000055, -0.000056, -0.000037, 0.000055, 0.000049, -0.000088, 0.000034, 0.000028, -0.000005, + 0.000071, 0.000173, -0.000018, -0.000133, -0.000007, 0.000155, -0.000099, 0.000099, -0.000010, -0.000005, + 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000100, -0.000071, -0.000005, 0.000071, + 0.000027, 0.000003, -0.000008, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, + 0.000045, 0.000045, -0.000013, 0.000036, 0.000067, -0.000113, 0.000044, 0.000008, -0.000007, 0.000075, + -0.000067, 0.000044, -0.000044, -0.000008, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, + -0.000006, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, -0.000080, -0.000010, 0.000227, + -0.000080, -0.000010, -0.000024, 0.000035, 0.000036, 0.000069, 0.000044, -0.000113, -0.000007, 0.000075, + -0.000067, 0.000044, -0.000044, -0.000008, 0.000075, -0.000067, 0.000044, -0.000044, -0.000008, 0.000034, + 0.000028, -0.000088, 0.000055, 0.000049, -0.000005, 0.000022, 0.000021, -0.000006, 0.000028, 0.000019, + 0.000027, 0.000028, -0.000043, 0.000021, 0.000003, -0.000060, 0.000176, -0.000010, -0.000005, -0.000084, + -0.000007, 0.000202, -0.000070, -0.000072, -0.000010, -0.000021, -0.000059, -0.000059, 0.000066, 0.000044, + 0.000044, 0.000035, -0.000018, 0.000045, -0.000004, -0.000003, -0.000014, -0.000006, 0.000027, 0.000003, + -0.000008, -0.000095, 0.000065, -0.000113, 0.000044, 0.000036, 0.000186, -0.000153, -0.000059, -0.000059, + 0.000066, 0.000044, 0.000044, 0.000035, -0.000018, 0.000067, -0.000113, 0.000036, 0.000044, 0.000008, + -0.000007, -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, 0.000027, 0.000028, + 0.000003, 0.000036, -0.000113, -0.000010, 0.000236, -0.000010, -0.000080, -0.000080, 0.000032, -0.000113, + 0.000067, 0.000044, 0.000036, 0.000008, -0.000007, -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, + 0.000021, -0.000043, 0.000027, 0.000028, 0.000003, 0.000022, 0.000021, -0.000006, -0.000113, 0.000036, + 0.000044, 0.000067, 0.000008, -0.000007, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, + -0.000006, 0.000132, 0.000675, -0.000594, 0.000054, 0.000028, 0.000016, -0.000063, 0.000049, -0.000005, + 0.000047, 0.000045, -0.000021, 0.000016, -0.000021, 0.000016, -0.000013, 0.000100, -0.000071, -0.000005, + 0.000071, 0.000027, 0.000003, -0.000008, -0.000059, -0.000059, 0.000044, 0.000066, 0.000044, 0.000035, + -0.000018, 0.000057, 0.000036, 0.000049, -0.000100, 0.000034, -0.000002, 0.000022, 0.000021, -0.000006, + -0.000029, 0.000099, -0.000004, -0.000005, -0.000010, -0.000036, -0.000017, 0.000042, -0.000078, 0.000044, + -0.000029, 0.000029, 0.000022, 0.000021, -0.000006, -0.000070, 0.000202, -0.000007, -0.000072, -0.000021, + 0.000034, -0.000088, 0.000055, 0.000049, 0.000028, -0.000005, 0.000045, 0.000045, -0.000013, 0.000017, + 0.000023, -0.000136, 0.000017, 0.000017, 0.000106, 0.000029, -0.000019, 0.000155, -0.000136, 0.000088, + -0.000088, -0.000000, 0.000088, 0.000155, -0.000019, -0.000088, -0.000136, -0.000019, -0.000136, 0.000155, + 0.000088, -0.000088, -0.000000, 0.000044, 0.000089, 0.000079, -0.000113, 0.000036, -0.000012, 0.000022, + 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000047, -0.000043, -0.000005, 0.000013, 0.000012, + -0.000004, -0.000005, -0.000044, -0.000117, -0.000044, 0.000322, -0.000046, -0.000024, -0.000026, 0.000029, + -0.000018, -0.000003, -0.000070, 0.000202, -0.000072, -0.000010, -0.000007, -0.000021, 0.000067, 0.000044, + 0.000036, 0.000008, -0.000113, -0.000007, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, + -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, 0.000044, -0.000113, + 0.000089, 0.000079, 0.000036, -0.000012, -0.000010, 0.000176, -0.000060, -0.000005, -0.000084, 0.000010, + 0.000019, 0.000034, -0.000029, 0.000011, -0.000001, -0.000001, -0.000001, -0.000001, -0.000003, 0.000087, + -0.000013, -0.000013, -0.000013, -0.000021, -0.000012, -0.000067, 0.000044, 0.000075, -0.000044, -0.000008, + 0.000117, 0.000055, 0.000056, -0.000159, 0.000154, -0.000011, -0.000002, -0.000001, -0.000002, -0.000020, + -0.000004, 0.000216, -0.000005, -0.000024, -0.000034, -0.000021, -0.000068, 0.000027, 0.000003, -0.000008, + 0.000019, 0.000010, 0.000011, 0.000034, -0.000029, -0.000001, 0.000022, 0.000021, -0.000006, 0.000028, + 0.000019, -0.000001, -0.000001, 0.000127, -0.000005, -0.000005, -0.000088, -0.000019, -0.000043, 0.000047, + -0.000005, 0.000227, -0.000010, -0.000010, -0.000080, -0.000080, -0.000024, 0.000022, 0.000021, -0.000006, + 0.000092, -0.000015, -0.000046, -0.000012, -0.000013, -0.000043, 0.000027, 0.000028, 0.000021, 0.000003, + -0.000005, -0.000044, 0.000322, -0.000046, -0.000044, -0.000117, -0.000024, -0.000113, 0.000089, 0.000044, + 0.000079, 0.000036, -0.000012, 0.000027, 0.000003, -0.000008, -0.000113, 0.000089, 0.000044, 0.000036, + 0.000079, -0.000012, 0.000027, 0.000028, -0.000043, 0.000021, 0.000003, 0.000027, 0.000028, -0.000043, + 0.000021, 0.000003, -0.000027, 0.000010, 0.000016, 0.000036, 0.000056, -0.000029, 0.000011, -0.000082, + 0.000055, 0.000049, -0.000088, 0.000034, 0.000028, -0.000005, 0.000022, 0.000021, -0.000006, -0.000009, + 0.000164, -0.000052, -0.000006, -0.000063, -0.000021, -0.000113, 0.000069, 0.000044, 0.000035, 0.000036, + -0.000007, 0.000027, -0.000043, 0.000021, 0.000028, 0.000003, -0.000007, 0.000164, -0.000047, -0.000005, + -0.000047, -0.000021, -0.000012, -0.000006, -0.000006, 0.000285, -0.000015, -0.000055, -0.000056, -0.000044, + -0.000044, -0.000037, -0.000159, 0.000117, 0.000055, 0.000056, 0.000154, -0.000011, -0.000001, -0.000001, + -0.000003, 0.000133, -0.000013, -0.000047, -0.000013, -0.000003, -0.000021, -0.000012, 0.000013, 0.000012, + -0.000004, 0.000202, -0.000072, -0.000007, -0.000070, -0.000010, -0.000021, -0.000133, -0.000007, 0.000155, + -0.000099, 0.000099, -0.000010, -0.000005, 0.000045, 0.000047, -0.000021, -0.000021, 0.000016, 0.000016, + -0.000013, 0.000045, 0.000045, -0.000013, 0.000069, -0.000113, 0.000044, 0.000036, 0.000035, -0.000007, + -0.000001, -0.000001, 0.000070, -0.000013, -0.000013, -0.000003, -0.000021, -0.000012, -0.000009, 0.000213, + -0.000065, -0.000065, -0.000005, -0.000021, -0.000012, 0.000044, 0.000089, 0.000036, -0.000113, 0.000079, + -0.000012, 0.000176, -0.000010, -0.000060, -0.000005, -0.000084, 0.000045, 0.000045, -0.000013, 0.000036, + -0.000113, 0.000044, 0.000069, 0.000035, -0.000007, -0.000001, -0.000001, 0.000127, -0.000005, -0.000005, + -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, 0.000028, 0.000019, 0.000126, -0.000043, -0.000080, + 0.000028, -0.000032, 0.000008, -0.000050, 0.000046, 0.000024, 0.000126, -0.000043, -0.000052, -0.000032, + 0.000055, 0.000049, -0.000088, 0.000034, 0.000028, -0.000005, 0.000045, -0.000003, -0.000014, -0.000006, + -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, 0.000036, 0.000069, -0.000113, + 0.000044, 0.000035, -0.000007, 0.000067, -0.000113, 0.000036, 0.000044, 0.000008, -0.000007, -0.000113, + -0.000010, 0.000236, -0.000080, -0.000080, 0.000036, -0.000010, 0.000032, 0.000034, -0.000088, 0.000028, + 0.000055, 0.000049, -0.000005, 0.000022, 0.000021, -0.000006, -0.000043, 0.000126, -0.000052, -0.000032, + -0.000026, 0.000029, -0.000018, -0.000003, 0.000044, 0.000036, 0.000089, 0.000079, -0.000113, -0.000012, + 0.000027, 0.000021, 0.000028, -0.000043, 0.000003, 0.000028, 0.000027, 0.000021, -0.000043, 0.000003, + -0.000007, -0.000007, -0.000104, -0.000099, 0.000327, -0.000036, -0.000021, -0.000054, 0.000036, 0.000013, + -0.000031, 0.000100, -0.000071, -0.000005, 0.000071, -0.000002, -0.000008, 0.000118, -0.000078, -0.000063, + 0.000028, 0.000016, -0.000005, 0.000027, 0.000003, -0.000008, -0.000002, -0.000008, 0.000118, -0.000078, + -0.000063, 0.000016, 0.000028, -0.000005, 0.000057, 0.000049, -0.000100, 0.000032, 0.000092, -0.000015, + -0.000012, -0.000046, -0.000013, -0.000043, 0.000027, 0.000021, 0.000028, 0.000003, 0.000092, -0.000015, + -0.000012, -0.000046, -0.000013, 0.000012, 0.000013, -0.000004, -0.000001, 0.000036, -0.000003, -0.000013, + -0.000010, -0.000006, -0.000070, -0.000010, 0.000202, -0.000072, -0.000007, -0.000021, -0.000113, 0.000044, + 0.000008, 0.000036, 0.000067, -0.000007, 0.000034, -0.000088, 0.000028, 0.000055, 0.000049, -0.000005, + 0.000285, -0.000015, -0.000006, -0.000055, -0.000006, -0.000056, -0.000044, -0.000044, -0.000037, 0.000402, + 0.000077, -0.000358, 0.000013, 0.000012, -0.000004, -0.000005, 0.000322, -0.000044, -0.000046, -0.000044, + -0.000117, -0.000024, -0.000113, 0.000089, 0.000079, 0.000044, 0.000036, -0.000012, 0.000202, -0.000072, + -0.000007, -0.000070, -0.000010, -0.000021, 0.000176, -0.000010, -0.000060, -0.000005, -0.000084, 0.000028, + 0.000027, -0.000008, -0.000010, -0.000007, 0.000202, -0.000070, -0.000072, -0.000021, -0.000004, 0.000029, + 0.000096, -0.000039, 0.000029, -0.000039, -0.000029, -0.000029, -0.000010, -0.000005, -0.000002, 0.000118, + -0.000008, -0.000063, 0.000028, 0.000016, -0.000078, -0.000005, -0.000067, 0.000075, -0.000044, 0.000044, + -0.000008, 0.000055, 0.000049, 0.000034, 0.000028, -0.000088, -0.000005, 0.000022, 0.000021, -0.000006, + -0.000002, 0.000118, -0.000008, -0.000063, 0.000016, 0.000028, -0.000078, -0.000005, 0.000045, 0.000045, + -0.000013, 0.000008, 0.000036, 0.000067, -0.000113, 0.000044, -0.000007, -0.000113, -0.000080, -0.000010, + 0.000236, -0.000010, 0.000036, -0.000080, 0.000032, 0.000008, 0.000044, -0.000113, 0.000067, 0.000036, + -0.000007, 0.000047, -0.000043, -0.000005, 0.000022, 0.000021, -0.000006, -0.000003, -0.000003, 0.000137, + -0.000013, -0.000044, -0.000044, -0.000006, -0.000003, -0.000003, -0.000013, 0.000137, -0.000044, -0.000044, + -0.000006, 0.000054, 0.000049, -0.000063, 0.000028, 0.000016, -0.000005, 0.000092, -0.000015, -0.000012, + -0.000046, -0.000013, 0.000041, 0.000130, -0.000125, -0.000075, 0.000041, -0.000013, -0.000059, -0.000059, + 0.000044, 0.000066, 0.000044, 0.000035, -0.000018, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, + -0.000006, 0.000079, 0.000089, 0.000044, 0.000036, -0.000113, -0.000012, 0.000045, 0.000045, -0.000013, + 0.000067, -0.000113, 0.000008, 0.000036, 0.000044, -0.000007, 0.000022, 0.000021, -0.000006, -0.000010, + 0.000227, -0.000010, -0.000080, -0.000080, -0.000024, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, + -0.000006, 0.000022, 0.000021, -0.000006, -0.000059, -0.000059, 0.000066, 0.000044, 0.000044, 0.000035, + -0.000018, 0.000045, 0.000047, -0.000021, 0.000016, -0.000021, 0.000016, -0.000013, 0.000027, 0.000003, + -0.000008, -0.000113, 0.000036, 0.000044, 0.000067, 0.000008, -0.000007, 0.000027, 0.000003, -0.000008, + 0.000089, 0.000079, -0.000113, 0.000044, 0.000036, -0.000012, -0.000029, -0.000002, 0.000077, -0.000005, + -0.000021, -0.000012, -0.000113, 0.000067, 0.000008, 0.000044, 0.000036, -0.000007, -0.000026, 0.000029, + 0.000018, -0.000018, -0.000003, -0.000027, -0.000029, 0.000010, 0.000056, 0.000016, 0.000011, 0.000036, + -0.000082, 0.000013, 0.000012, -0.000004, 0.000034, -0.000088, 0.000055, 0.000049, 0.000028, -0.000005, + 0.000027, 0.000003, -0.000008, -0.000002, -0.000008, 0.000118, -0.000078, 0.000028, 0.000016, -0.000063, + -0.000005, -0.000002, 0.000118, -0.000078, -0.000008, 0.000016, 0.000028, -0.000063, -0.000005, 0.000092, + -0.000015, -0.000012, -0.000046, -0.000013, -0.000043, 0.000027, 0.000021, 0.000028, 0.000003, 0.000092, + -0.000046, -0.000015, -0.000012, -0.000013, -0.000029, 0.000019, 0.000010, 0.000011, 0.000034, -0.000001, + 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000013, 0.000012, -0.000004, -0.000005, + -0.000044, -0.000044, -0.000117, -0.000046, 0.000322, -0.000024, 0.000022, 0.000021, -0.000006, -0.000010, + -0.000070, 0.000202, -0.000007, -0.000072, -0.000021, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, + -0.000088, -0.000019, 0.000016, 0.000014, -0.000005, 0.000045, 0.000045, -0.000013, 0.000117, -0.000159, + 0.000055, 0.000056, 0.000154, -0.000011, 0.000027, -0.000043, 0.000028, 0.000021, 0.000003, 0.000126, + -0.000043, -0.000052, -0.000032, 0.000126, -0.000043, -0.000052, -0.000032, -0.000004, -0.000029, 0.000099, + -0.000029, -0.000005, -0.000010, -0.000007, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, + -0.000019, -0.000043, 0.000126, -0.000052, -0.000032, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, + -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, 0.000045, 0.000045, -0.000013, 0.000036, 0.000035, + 0.000069, -0.000113, 0.000044, -0.000007, 0.000036, 0.000044, 0.000067, 0.000008, -0.000113, -0.000007, + -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000059, -0.000059, 0.000044, + 0.000066, 0.000044, 0.000035, -0.000018, -0.000009, -0.000065, 0.000213, -0.000005, -0.000065, -0.000021, + -0.000012, -0.000027, -0.000029, 0.000010, 0.000056, 0.000016, 0.000011, 0.000036, -0.000082, -0.000005, + -0.000044, 0.000322, -0.000117, -0.000044, -0.000046, -0.000024, -0.000113, 0.000067, 0.000008, 0.000036, + 0.000044, -0.000007, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000159, + 0.000117, 0.000055, 0.000056, 0.000154, -0.000011, 0.000126, -0.000043, -0.000052, -0.000032, -0.000043, + 0.000126, -0.000052, -0.000032, 0.000022, 0.000021, -0.000006, -0.000133, -0.000007, 0.000099, 0.000155, + -0.000099, -0.000010, -0.000005, 0.000027, 0.000028, 0.000021, -0.000043, 0.000003, -0.000027, 0.000056, + 0.000010, 0.000016, 0.000011, -0.000029, 0.000036, -0.000082, -0.000059, -0.000059, 0.000044, 0.000066, + 0.000044, 0.000035, -0.000018, -0.000007, 0.000164, -0.000047, -0.000047, -0.000021, -0.000005, -0.000012, + -0.000088, 0.000055, 0.000049, 0.000028, 0.000034, -0.000005, 0.000022, 0.000021, -0.000006, 0.000092, + -0.000015, -0.000012, -0.000046, -0.000013, -0.000001, -0.000001, -0.000001, 0.000087, -0.000003, -0.000013, + -0.000013, -0.000021, -0.000025, 0.000028, 0.000019, -0.000060, 0.000176, -0.000010, -0.000005, -0.000084, + 0.000027, 0.000003, -0.000008, 0.000044, 0.000036, 0.000089, -0.000113, 0.000079, -0.000012, 0.000161, + -0.000161, 0.000509, -0.000178, -0.000216, -0.000013, -0.000026, -0.000021, 0.000036, 0.000057, -0.000100, + 0.000034, 0.000049, -0.000002, 0.000096, 0.000056, -0.000085, -0.000056, -0.000011, 0.000012, 0.000013, + -0.000004, -0.000001, -0.000002, 0.000092, -0.000003, -0.000029, -0.000013, -0.000021, -0.000012, -0.000001, + -0.000001, -0.000003, 0.000133, -0.000003, -0.000047, -0.000013, -0.000013, -0.000021, -0.000012, 0.000010, + 0.000034, 0.000019, -0.000029, 0.000011, -0.000001, 0.000022, 0.000021, -0.000006, -0.000002, 0.000118, + -0.000078, -0.000008, -0.000063, 0.000028, 0.000016, -0.000005, -0.000015, 0.000092, -0.000046, -0.000012, + -0.000013, 0.000022, 0.000021, -0.000006, -0.000067, 0.000044, 0.000075, -0.000044, -0.000008, 0.000022, + 0.000021, -0.000006, 0.000008, 0.000067, -0.000113, 0.000036, 0.000044, -0.000007, 0.000045, 0.000047, + -0.000021, 0.000016, -0.000021, 0.000016, -0.000013, -0.000002, 0.000118, -0.000078, -0.000008, -0.000063, + 0.000016, 0.000028, -0.000005, -0.000015, -0.000046, 0.000092, -0.000012, -0.000013, 0.000036, -0.000113, + -0.000010, 0.000236, -0.000010, -0.000080, -0.000080, 0.000032, -0.000004, -0.000001, -0.000002, -0.000002, + -0.000055, 0.000221, -0.000024, -0.000005, -0.000003, -0.000034, -0.000020, -0.000021, -0.000014, 0.000047, + -0.000021, 0.000016, 0.000045, -0.000021, 0.000016, -0.000013, -0.000001, -0.000041, 0.000171, -0.000015, + -0.000059, -0.000017, -0.000027, -0.000005, -0.000044, -0.000117, 0.000322, -0.000044, -0.000046, -0.000024, + -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000088, 0.000028, 0.000055, + 0.000034, 0.000049, -0.000005, 0.000022, 0.000021, -0.000006, -0.000007, 0.000202, -0.000070, -0.000010, + -0.000072, -0.000021, -0.000043, 0.000027, 0.000021, 0.000028, 0.000003, 0.000045, 0.000016, -0.000021, + 0.000047, -0.000021, 0.000016, -0.000013, -0.000067, 0.000075, -0.000044, 0.000044, -0.000008, 0.000045, + 0.000045, -0.000013, 0.000036, 0.000067, -0.000113, 0.000044, 0.000008, -0.000007, -0.000080, -0.000010, + 0.000227, -0.000080, -0.000010, -0.000024, -0.000113, 0.000036, 0.000069, 0.000044, 0.000035, -0.000007, + 0.000044, -0.000113, 0.000036, 0.000067, 0.000008, -0.000007, 0.000034, -0.000088, 0.000055, 0.000028, + 0.000049, -0.000005, 0.000057, 0.000157, -0.000014, 0.000036, -0.000113, 0.000044, 0.000067, 0.000008, + -0.000007, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, 0.000071, 0.000173, + -0.000018, 0.000022, 0.000021, -0.000006, 0.000013, 0.000012, -0.000004, 0.000045, 0.000045, -0.000013, + -0.000003, -0.000047, -0.000005, 0.000101, -0.000021, -0.000012, 0.000054, 0.000049, -0.000063, 0.000028, + 0.000016, -0.000005, 0.000022, 0.000021, -0.000006, -0.000002, 0.000118, -0.000078, -0.000008, -0.000063, + 0.000028, 0.000016, -0.000005, -0.000015, -0.000046, 0.000092, -0.000012, -0.000013, -0.000067, 0.000044, + 0.000075, -0.000044, -0.000008, -0.000002, 0.000118, -0.000078, -0.000063, -0.000008, 0.000016, 0.000028, + -0.000005, -0.000015, -0.000046, 0.000092, -0.000012, -0.000013, 0.000117, -0.000159, 0.000055, 0.000056, + 0.000154, -0.000011, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000027, 0.000003, + -0.000008, -0.000006, -0.000006, 0.000285, -0.000044, -0.000044, -0.000015, -0.000055, -0.000056, -0.000037, + 0.000402, 0.000077, -0.000358, 0.000027, 0.000003, -0.000008, 0.000079, -0.000113, 0.000044, 0.000036, + 0.000089, -0.000012, 0.000100, -0.000005, -0.000071, 0.000071, -0.000004, -0.000029, -0.000029, 0.000099, + -0.000005, -0.000010, -0.000007, -0.000026, 0.000018, 0.000029, -0.000018, -0.000003, -0.000005, -0.000065, + 0.000125, -0.000005, -0.000021, -0.000012, -0.000010, -0.000060, 0.000176, -0.000005, -0.000084, 0.000045, + 0.000045, -0.000013, -0.000113, 0.000036, 0.000044, 0.000035, 0.000069, -0.000007, 0.000012, 0.000013, + -0.000004, 0.000044, -0.000113, 0.000036, 0.000067, 0.000008, -0.000007, -0.000003, -0.000003, -0.000044, + -0.000013, 0.000137, -0.000044, -0.000006, 0.000022, 0.000021, -0.000006, -0.000007, 0.000202, -0.000070, + -0.000072, -0.000010, -0.000021, 0.000045, 0.000045, -0.000013, 0.000036, 0.000067, -0.000113, 0.000044, + 0.000008, -0.000007, 0.000044, 0.000067, -0.000113, 0.000036, 0.000008, -0.000007, -0.000159, 0.000117, + 0.000055, 0.000056, 0.000154, -0.000011, -0.000080, -0.000080, -0.000010, 0.000227, -0.000010, -0.000024, + 0.000022, 0.000021, -0.000006, -0.000113, 0.000067, 0.000036, 0.000044, 0.000008, -0.000007, -0.000113, + 0.000067, 0.000008, 0.000044, 0.000036, -0.000007, -0.000003, -0.000003, 0.000137, -0.000044, -0.000013, + -0.000044, -0.000006, -0.000001, -0.000059, -0.000015, 0.000171, -0.000017, -0.000041, -0.000027, -0.000005, + -0.000044, -0.000117, 0.000322, -0.000044, -0.000046, -0.000024, 0.000132, 0.000675, -0.000594, 0.000022, + 0.000021, -0.000006, 0.000126, -0.000043, -0.000052, -0.000032, 0.000045, 0.000047, -0.000021, -0.000021, + 0.000016, 0.000016, -0.000013, 0.000044, 0.000036, -0.000113, 0.000067, 0.000008, -0.000007, -0.000003, + -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, -0.000001, -0.000001, -0.000005, -0.000005, + 0.000127, -0.000088, -0.000019, 0.000045, 0.000045, -0.000013, 0.000036, 0.000069, -0.000113, 0.000044, + 0.000035, -0.000007, 0.000027, 0.000021, -0.000043, 0.000028, 0.000003, 0.000161, 0.000026, 0.000258, + -0.000178, -0.000255, 0.000000, -0.000001, -0.000015, 0.000171, -0.000059, -0.000017, -0.000041, -0.000027, + -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000003, -0.000003, -0.000044, + -0.000013, 0.000137, -0.000044, -0.000006, -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, + -0.000043, 0.000027, 0.000028, 0.000003, -0.000009, -0.000065, 0.000213, -0.000005, -0.000065, -0.000021, + -0.000012, -0.000027, 0.000036, 0.000010, -0.000029, 0.000056, 0.000016, 0.000011, -0.000082, -0.000005, + -0.000044, 0.000322, -0.000046, -0.000044, -0.000117, -0.000024, 0.000079, -0.000113, 0.000089, 0.000044, + 0.000036, -0.000012, 0.000100, -0.000005, -0.000071, 0.000071, 0.000057, 0.000049, -0.000100, 0.000032, + 0.000036, 0.000044, -0.000113, 0.000069, 0.000035, -0.000007, -0.000001, -0.000001, -0.000005, 0.000127, + -0.000005, -0.000088, -0.000019, 0.000028, 0.000019, -0.000133, -0.000007, 0.000099, 0.000155, -0.000099, + -0.000010, -0.000005, -0.000005, 0.000322, -0.000044, -0.000117, -0.000044, -0.000046, -0.000024, 0.000027, + 0.000021, -0.000043, 0.000028, 0.000003, -0.000021, 0.000016, 0.000045, 0.000047, -0.000021, 0.000016, + -0.000013, 0.000027, 0.000003, -0.000008, -0.000095, -0.000113, 0.000065, 0.000044, 0.000036, 0.000186, + -0.000153, -0.000006, 0.000164, -0.000009, -0.000063, -0.000052, -0.000021, 0.000034, 0.000028, 0.000055, + 0.000049, -0.000088, -0.000005, 0.000022, 0.000021, -0.000006, -0.000003, -0.000003, 0.000137, -0.000044, + -0.000013, -0.000044, -0.000006, -0.000059, -0.000059, 0.000044, 0.000066, 0.000044, 0.000035, -0.000018, + -0.000113, 0.000067, 0.000008, 0.000044, 0.000036, -0.000007, 0.000049, 0.000034, 0.000028, 0.000055, + -0.000088, -0.000005, 0.000041, -0.000125, 0.000130, 0.000041, -0.000075, -0.000013, 0.000022, 0.000021, + -0.000006, -0.000043, 0.000027, 0.000021, 0.000028, 0.000003, -0.000021, -0.000021, 0.000047, 0.000016, + 0.000045, 0.000016, -0.000013, 0.000077, 0.000402, -0.000358, -0.000059, -0.000059, 0.000044, 0.000066, + 0.000044, 0.000035, -0.000018, 0.000027, 0.000003, -0.000008, 0.000044, 0.000079, -0.000113, 0.000036, + 0.000089, -0.000012, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, -0.000019, 0.000057, + 0.000049, -0.000100, 0.000034, 0.000036, -0.000002, -0.000015, -0.000046, -0.000012, 0.000092, -0.000013, + -0.000026, 0.000018, 0.000029, -0.000018, -0.000003, -0.000001, -0.000059, 0.000171, -0.000041, -0.000015, + -0.000017, -0.000027, -0.000161, 0.000161, -0.000178, 0.000509, -0.000013, -0.000216, -0.000026, -0.000021, + 0.000041, -0.000125, 0.000041, 0.000130, -0.000075, -0.000013, 0.000044, -0.000113, 0.000036, 0.000089, + 0.000079, -0.000012, -0.000080, -0.000080, 0.000227, -0.000010, -0.000010, -0.000024, 0.000044, 0.000089, + 0.000036, -0.000113, 0.000079, -0.000012, -0.000001, -0.000001, 0.000127, -0.000005, -0.000005, -0.000088, + -0.000019, -0.000043, 0.000047, -0.000005, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, + -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, -0.000001, -0.000001, + 0.000070, -0.000003, -0.000013, -0.000021, -0.000025, 0.000011, -0.000029, 0.000010, 0.000034, 0.000019, + -0.000001, 0.000022, 0.000021, -0.000006, -0.000067, 0.000044, 0.000075, -0.000044, -0.000008, 0.000092, + -0.000015, -0.000012, -0.000046, -0.000013, -0.000043, 0.000027, 0.000021, 0.000028, 0.000003, -0.000059, + -0.000059, 0.000066, 0.000044, 0.000044, 0.000035, -0.000018, -0.000004, -0.000029, 0.000099, -0.000029, + -0.000005, -0.000010, -0.000007, -0.000007, -0.000007, -0.000099, 0.000327, -0.000104, -0.000036, -0.000021, + 0.000028, 0.000019, 0.000021, 0.000027, -0.000043, 0.000028, 0.000003, -0.000012, 0.000092, -0.000015, + -0.000046, -0.000013, 0.000021, -0.000043, 0.000027, 0.000028, 0.000003, 0.000019, 0.000010, 0.000011, + 0.000034, -0.000029, -0.000001, -0.000067, 0.000044, 0.000075, -0.000044, -0.000008, 0.000055, 0.000049, + 0.000034, 0.000028, -0.000088, -0.000005, 0.000022, 0.000021, -0.000006, -0.000002, -0.000004, -0.000001, + -0.000002, -0.000034, -0.000055, -0.000005, -0.000020, 0.000221, -0.000003, -0.000024, -0.000021, -0.000014, + 0.000008, 0.000005, -0.000001, -0.000010, -0.000070, 0.000202, -0.000072, -0.000007, -0.000021, 0.000044, + -0.000113, 0.000036, 0.000008, 0.000067, -0.000007, 0.000047, 0.000045, -0.000021, 0.000016, -0.000021, + 0.000016, -0.000013, 0.000044, 0.000036, 0.000089, 0.000079, -0.000113, -0.000012, -0.000080, -0.000080, + 0.000227, -0.000010, -0.000010, -0.000024, 0.000028, 0.000027, 0.000021, -0.000043, 0.000003, 0.000045, + -0.000021, 0.000016, 0.000047, 0.000016, -0.000021, -0.000013, 0.000126, -0.000043, -0.000052, -0.000032, + 0.000013, 0.000012, -0.000004, -0.000005, -0.000044, -0.000117, 0.000322, -0.000044, -0.000046, -0.000024, + 0.000027, 0.000003, -0.000008, 0.000079, 0.000044, 0.000036, -0.000113, 0.000089, -0.000012, -0.000007, + 0.000030, -0.000006, -0.000002, -0.000004, 0.000173, 0.000071, -0.000018, -0.000013, -0.000020, 0.000013, + 0.000022, -0.000002, -0.000100, 0.000057, 0.000049, 0.000034, 0.000036, -0.000002, 0.000126, -0.000043, + -0.000052, -0.000032, -0.000043, 0.000126, -0.000052, -0.000032, -0.000001, -0.000001, 0.000127, -0.000005, + -0.000005, -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, -0.000003, -0.000003, -0.000044, 0.000137, + -0.000013, -0.000044, -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, + -0.000003, -0.000003, 0.000137, -0.000013, -0.000044, -0.000044, -0.000006, -0.000003, -0.000003, -0.000013, + 0.000137, -0.000044, -0.000044, -0.000006, -0.000006, -0.000006, 0.000285, -0.000015, -0.000044, -0.000044, + -0.000055, -0.000056, -0.000037, -0.000159, 0.000117, 0.000055, 0.000056, 0.000154, -0.000011, 0.000022, + 0.000021, -0.000006, -0.000003, -0.000003, 0.000137, -0.000013, -0.000044, -0.000044, -0.000006, -0.000003, + -0.000003, -0.000013, 0.000137, -0.000044, -0.000044, -0.000006, -0.000159, 0.000117, 0.000055, 0.000056, + 0.000154, -0.000011, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, -0.000113, -0.000095, + 0.000065, 0.000044, 0.000036, 0.000186, -0.000153, 0.000126, -0.000043, -0.000052, -0.000032, -0.000005, + -0.000044, -0.000046, -0.000044, -0.000117, 0.000322, -0.000024, -0.000001, -0.000001, -0.000005, 0.000127, + -0.000005, -0.000088, -0.000019, -0.000043, 0.000126, -0.000052, -0.000032, 0.000202, -0.000070, -0.000072, + -0.000007, -0.000010, -0.000021, 0.000028, 0.000019, 0.000132, 0.000675, -0.000594, 0.000022, 0.000021, + -0.000006, -0.000007, -0.000047, 0.000164, -0.000021, -0.000005, -0.000060, 0.000022, 0.000021, -0.000006, + 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, -0.000010, 0.000176, -0.000060, -0.000005, + -0.000084, 0.000045, 0.000047, -0.000021, 0.000016, -0.000021, 0.000016, -0.000013, 0.000008, 0.000067, + -0.000113, 0.000036, 0.000044, -0.000007, -0.000010, 0.000236, -0.000010, 0.000036, -0.000113, -0.000080, + -0.000080, 0.000032, 0.000008, -0.000113, 0.000067, 0.000044, 0.000036, -0.000007, 0.000055, 0.000049, + -0.000088, 0.000028, 0.000034, -0.000005, 0.000022, 0.000021, -0.000006, -0.000007, -0.000047, 0.000164, + -0.000047, -0.000005, -0.000021, -0.000012, -0.000026, 0.000029, -0.000018, -0.000003, 0.000126, -0.000043, + -0.000052, -0.000032, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000003, + -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, 0.000022, 0.000021, -0.000006, -0.000071, + 0.000100, -0.000005, 0.000071, -0.000010, 0.000202, -0.000007, -0.000072, -0.000070, -0.000021, 0.000049, + -0.000088, 0.000055, 0.000028, 0.000034, -0.000005, -0.000009, -0.000065, 0.000213, -0.000005, -0.000065, + -0.000021, -0.000012, -0.000027, -0.000029, 0.000010, 0.000011, 0.000056, 0.000016, 0.000036, -0.000082, + 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, -0.000012, + -0.000015, 0.000092, -0.000046, -0.000013, 0.000027, 0.000003, -0.000008, -0.000001, -0.000001, -0.000005, + 0.000127, -0.000005, -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, 0.000176, -0.000060, -0.000010, + -0.000005, -0.000084, 0.000027, 0.000003, -0.000008, 0.000044, 0.000079, 0.000089, 0.000036, -0.000113, + -0.000012, 0.000013, 0.000012, -0.000004, -0.000005, -0.000044, -0.000044, -0.000117, -0.000046, 0.000322, + -0.000024, 0.000028, 0.000019, 0.000022, 0.000021, -0.000006, -0.000003, -0.000003, 0.000137, -0.000013, + -0.000044, -0.000044, -0.000006, 0.000117, -0.000159, 0.000055, 0.000056, 0.000154, -0.000011, 0.000045, + 0.000045, -0.000013, 0.000008, 0.000036, 0.000067, -0.000113, 0.000044, -0.000007, -0.000113, 0.000044, + 0.000036, 0.000067, 0.000008, -0.000007, -0.000113, 0.000036, -0.000080, -0.000010, 0.000236, -0.000080, + -0.000010, 0.000032, 0.000035, 0.000036, 0.000069, 0.000044, -0.000113, -0.000007, 0.000008, 0.000044, + -0.000113, 0.000036, 0.000067, -0.000007, 0.000139, 0.000710, -0.000636, -0.000088, 0.000028, 0.000055, + 0.000049, 0.000034, -0.000005, 0.000022, 0.000021, -0.000006, 0.000139, 0.000710, -0.000636, -0.000006, + 0.000164, -0.000063, -0.000009, -0.000052, -0.000021, 0.000710, 0.000139, -0.000636, -0.000006, 0.000164, + -0.000063, -0.000009, -0.000052, -0.000021, 0.000710, 0.000139, -0.000636, 0.000139, 0.000710, -0.000636, + 0.000047, 0.000045, -0.000021, 0.000016, -0.000021, 0.000016, -0.000013, 0.000013, 0.000012, -0.000004, + 0.000139, 0.000710, -0.000636, -0.000009, -0.000052, 0.000164, -0.000006, -0.000063, -0.000021, 0.000710, + 0.000139, -0.000636, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000003, + -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, -0.000133, -0.000007, 0.000099, 0.000155, + -0.000099, -0.000010, -0.000005, -0.000001, -0.000013, -0.000003, 0.000036, -0.000010, -0.000006, -0.000027, + 0.000056, -0.000029, 0.000010, 0.000011, 0.000036, 0.000016, -0.000082, -0.000002, -0.000011, -0.000090, + 0.000137, -0.000063, 0.000028, 0.000016, -0.000009, 0.000022, 0.000021, -0.000006, 0.000092, -0.000015, + -0.000012, -0.000046, -0.000013, -0.000043, 0.000027, 0.000021, 0.000028, 0.000003, -0.000043, 0.000027, + 0.000021, 0.000028, 0.000003, 0.000045, -0.000021, 0.000047, 0.000016, -0.000021, 0.000016, -0.000013, + 0.000139, 0.000710, -0.000636, -0.000009, -0.000052, 0.000164, -0.000063, -0.000006, -0.000021, 0.000045, + 0.000045, -0.000013, 0.000045, 0.000045, -0.000013, 0.000036, 0.000067, -0.000113, 0.000044, 0.000008, + -0.000007, -0.000080, -0.000010, 0.000227, -0.000080, -0.000010, -0.000024, 0.000035, 0.000036, 0.000069, + -0.000113, 0.000044, -0.000007, 0.000021, 0.000027, -0.000043, 0.000028, 0.000003, -0.000015, -0.000006, + 0.000285, -0.000006, -0.000044, -0.000044, -0.000055, -0.000056, -0.000037, -0.000002, -0.000078, -0.000063, + 0.000118, -0.000008, 0.000016, 0.000028, -0.000005, -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, + 0.000021, -0.000043, 0.000027, 0.000028, 0.000003, -0.000007, -0.000007, 0.000327, -0.000104, -0.000099, + -0.000036, -0.000021, 0.000173, 0.000071, -0.000018, -0.000095, 0.000186, 0.000044, 0.000065, 0.000036, + -0.000113, -0.000153, 0.000092, -0.000046, -0.000015, -0.000012, -0.000013, 0.000034, -0.000029, 0.000019, + 0.000010, 0.000011, -0.000001, 0.000011, -0.000029, 0.000010, 0.000019, 0.000034, -0.000001, 0.000044, + -0.000067, 0.000075, -0.000044, -0.000008, -0.000026, 0.000029, -0.000018, -0.000003, -0.000070, -0.000010, + 0.000202, -0.000072, -0.000007, -0.000021, -0.000113, 0.000008, 0.000044, 0.000036, 0.000067, -0.000007, + 0.000034, 0.000049, -0.000088, 0.000028, 0.000055, -0.000005, -0.000010, -0.000010, -0.000080, -0.000080, + 0.000227, -0.000024, -0.000001, -0.000002, -0.000029, 0.000092, -0.000013, -0.000003, -0.000021, -0.000012, + -0.000063, 0.000016, 0.000054, 0.000049, 0.000028, -0.000005, -0.000015, -0.000012, 0.000092, -0.000046, + -0.000013, 0.000022, 0.000021, -0.000006, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, + -0.000019, -0.000043, 0.000126, -0.000052, -0.000032, 0.000027, 0.000003, -0.000008, -0.000113, 0.000089, + 0.000044, 0.000036, 0.000079, -0.000012, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, + -0.000043, 0.000028, 0.000027, 0.000021, 0.000003, 0.000045, -0.000021, -0.000021, 0.000016, 0.000047, + 0.000016, -0.000013, 0.000013, 0.000012, -0.000004, -0.000005, -0.000044, -0.000044, -0.000046, 0.000322, + -0.000117, -0.000024, 0.000027, 0.000003, -0.000008, 0.000034, -0.000088, 0.000028, 0.000055, 0.000049, + -0.000005, 0.000022, 0.000021, -0.000006, 0.000008, -0.000113, 0.000067, 0.000044, 0.000036, -0.000007, + 0.000034, 0.000028, 0.000055, 0.000049, -0.000088, -0.000005, -0.000001, -0.000001, -0.000005, 0.000127, + -0.000005, -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, 0.000047, 0.000045, -0.000021, 0.000016, + -0.000021, 0.000016, -0.000013, 0.000044, 0.000036, -0.000113, 0.000067, 0.000008, -0.000007, -0.000003, + -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000059, -0.000059, 0.000044, 0.000066, + 0.000044, 0.000035, -0.000018, -0.000005, -0.000044, -0.000044, 0.000322, -0.000046, -0.000117, -0.000024, + 0.000044, -0.000113, 0.000089, 0.000036, 0.000079, -0.000012, 0.000022, 0.000021, -0.000006, -0.000001, + -0.000041, 0.000171, -0.000059, -0.000015, -0.000017, -0.000027, -0.000004, -0.000002, -0.000001, -0.000002, + -0.000020, -0.000024, -0.000055, -0.000005, 0.000216, -0.000034, -0.000021, -0.000012, 0.000126, -0.000043, + -0.000052, -0.000032, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, -0.000019, -0.000043, + 0.000047, -0.000005, -0.000002, -0.000001, -0.000002, -0.000004, -0.000020, -0.000055, -0.000024, 0.000216, + -0.000005, -0.000034, -0.000021, -0.000012, -0.000070, -0.000072, -0.000007, 0.000202, -0.000010, -0.000021, + -0.000095, 0.000044, 0.000186, 0.000065, 0.000036, -0.000113, -0.000153, 0.000126, -0.000043, -0.000052, + -0.000032, 0.000044, 0.000036, -0.000113, 0.000089, 0.000079, -0.000012, -0.000080, -0.000010, -0.000010, + 0.000227, -0.000080, -0.000024, 0.000092, -0.000046, -0.000015, -0.000012, -0.000013, -0.000027, 0.000056, + -0.000029, 0.000016, 0.000036, 0.000010, 0.000011, -0.000082, -0.000001, -0.000001, -0.000013, 0.000070, + -0.000013, -0.000021, -0.000003, -0.000012, 0.000027, 0.000003, -0.000008, 0.000044, 0.000036, -0.000095, + 0.000186, 0.000065, -0.000113, -0.000153, -0.000006, 0.000056, -0.000033, -0.000017, 0.000013, 0.000012, + -0.000004, -0.000005, 0.000322, -0.000044, -0.000117, -0.000044, -0.000046, -0.000024, -0.000002, -0.000008, + 0.000118, -0.000078, -0.000063, 0.000028, 0.000016, -0.000005, -0.000002, -0.000008, 0.000118, -0.000078, + -0.000063, 0.000016, 0.000028, -0.000005, -0.000113, 0.000067, 0.000008, 0.000036, 0.000044, -0.000007, + -0.000003, -0.000003, -0.000013, -0.000044, 0.000137, -0.000044, -0.000006, 0.000044, 0.000036, -0.000113, + 0.000067, 0.000008, -0.000007, -0.000003, -0.000003, -0.000044, 0.000137, -0.000044, -0.000013, -0.000006, + -0.000012, 0.000092, -0.000046, -0.000015, -0.000013, 0.000022, 0.000011, -0.000029, 0.000018, 0.000010, + -0.000001, 0.000021, -0.000043, 0.000028, 0.000027, 0.000003, -0.000012, 0.000092, -0.000046, -0.000015, + -0.000013, 0.000011, -0.000029, 0.000018, 0.000010, 0.000022, -0.000001, 0.000021, -0.000043, 0.000028, + 0.000027, 0.000003, 0.000016, 0.000047, 0.000045, 0.000016, -0.000021, -0.000021, -0.000013, 0.000012, + 0.000013, -0.000004, 0.000028, 0.000019, -0.000025, -0.000013, -0.000161, 0.000683, -0.000003, -0.000021, + -0.000012, 0.000016, 0.000002, -0.000012, -0.000009, -0.000017, 0.000078, 0.000185, -0.000065, -0.000175, + -0.000065, 0.000065, -0.000113, 0.000067, 0.000008, 0.000036, 0.000044, -0.000007, -0.000003, -0.000003, + -0.000013, 0.000137, -0.000044, -0.000044, -0.000006, 0.000092, -0.000015, -0.000046, -0.000012, -0.000013, + -0.000043, 0.000027, 0.000028, 0.000021, 0.000003, 0.000047, 0.000045, -0.000021, 0.000016, -0.000021, + 0.000016, -0.000013, -0.000070, 0.000202, -0.000007, -0.000072, -0.000010, -0.000021, 0.000049, 0.000034, + -0.000088, 0.000055, 0.000028, -0.000005, -0.000043, 0.000126, -0.000052, -0.000032, 0.000044, 0.000036, + 0.000089, 0.000079, -0.000113, -0.000012, -0.000060, -0.000010, 0.000176, -0.000005, -0.000084, -0.000003, + -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, + 0.000137, -0.000044, -0.000006, -0.000007, -0.000070, -0.000010, -0.000072, 0.000202, -0.000021, -0.000012, + -0.000015, 0.000092, -0.000046, -0.000013, -0.000043, 0.000126, -0.000052, -0.000032, 0.000047, -0.000043, + -0.000005, 0.000075, -0.000067, 0.000044, -0.000044, -0.000008, -0.000001, -0.000001, -0.000001, -0.000005, + -0.000003, -0.000013, -0.000013, 0.000174, -0.000065, -0.000013, -0.000021, -0.000012, -0.000027, 0.000056, + 0.000010, 0.000011, 0.000036, 0.000016, -0.000029, -0.000082, 0.000126, -0.000043, 0.000028, -0.000080, + -0.000032, 0.000045, -0.000013, 0.000028, 0.000019, 0.000045, 0.000047, -0.000021, 0.000016, -0.000021, + 0.000016, -0.000013, -0.000007, 0.000202, -0.000070, -0.000072, -0.000010, -0.000021, 0.000013, 0.000012, + -0.000004, -0.000005, -0.000044, -0.000044, -0.000046, -0.000117, 0.000322, -0.000024, 0.000045, 0.000045, + -0.000013, -0.000113, 0.000044, 0.000036, 0.000069, 0.000035, -0.000007, -0.000059, -0.000059, 0.000044, + 0.000066, 0.000044, 0.000035, -0.000018, 0.000027, 0.000003, -0.000008, -0.000113, 0.000044, 0.000036, + 0.000079, 0.000089, -0.000012, 0.000045, 0.000045, -0.000013, 0.000008, 0.000036, 0.000067, -0.000113, + 0.000044, -0.000007, -0.000029, 0.000010, 0.000011, 0.000019, 0.000034, -0.000001, -0.000001, -0.000013, + -0.000003, 0.000036, -0.000010, -0.000006, 0.000117, 0.000055, -0.000159, 0.000056, 0.000154, -0.000011, + -0.000070, -0.000072, -0.000007, 0.000202, -0.000010, -0.000021, 0.000036, -0.000080, -0.000010, 0.000236, + -0.000010, -0.000080, -0.000113, 0.000032, 0.000008, 0.000044, -0.000113, 0.000067, 0.000036, -0.000007, + -0.000002, -0.000001, -0.000002, -0.000004, -0.000005, 0.000221, -0.000055, -0.000003, -0.000034, -0.000024, + -0.000021, -0.000020, -0.000014, 0.000036, 0.000044, -0.000113, 0.000067, 0.000008, -0.000007, -0.000003, + -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, 0.000049, 0.000057, -0.000100, 0.000034, + 0.000036, -0.000002, -0.000059, -0.000059, 0.000044, 0.000066, 0.000044, 0.000035, -0.000018, -0.000001, + -0.000059, 0.000171, -0.000041, -0.000015, -0.000017, -0.000027, -0.000161, 0.000161, -0.000161, 0.000161, + -0.000013, -0.000178, 0.000737, -0.000216, -0.000216, -0.000013, -0.000026, -0.000021, 0.000041, -0.000125, + 0.000041, 0.000130, -0.000075, -0.000013, 0.000028, 0.000019, -0.000001, 0.000036, -0.000003, -0.000010, + -0.000019, -0.000029, 0.000010, 0.000011, 0.000019, 0.000034, -0.000001, -0.000001, -0.000001, -0.000001, + -0.000013, -0.000013, -0.000003, 0.000087, -0.000013, -0.000021, -0.000012, -0.000002, -0.000004, -0.000002, + -0.000001, -0.000020, -0.000055, 0.000216, -0.000034, -0.000005, -0.000021, -0.000024, -0.000012, 0.000022, + 0.000021, -0.000006, -0.000059, -0.000059, 0.000044, 0.000066, 0.000044, 0.000035, -0.000018, 0.000036, + 0.000044, -0.000113, 0.000069, 0.000035, -0.000007, -0.000001, -0.000001, -0.000005, -0.000005, 0.000127, + -0.000088, -0.000019, -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, 0.000027, + 0.000028, 0.000003, 0.000034, 0.000011, -0.000029, 0.000010, 0.000019, -0.000001, -0.000012, 0.000092, + -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, 0.000027, 0.000028, 0.000003, -0.000088, 0.000034, + 0.000028, 0.000055, 0.000049, -0.000005, 0.000022, 0.000021, -0.000006, -0.000009, 0.000164, -0.000006, + -0.000084, 0.000049, -0.000063, 0.000054, 0.000023, 0.000057, 0.000034, 0.000036, -0.000100, 0.000049, + -0.000002, -0.000027, 0.000056, 0.000011, 0.000010, -0.000029, 0.000016, 0.000036, -0.000082, 0.000022, + 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, -0.000006, -0.000006, -0.000015, 0.000285, -0.000044, + -0.000044, -0.000055, -0.000056, -0.000037, 0.000402, 0.000077, -0.000358, 0.000075, -0.000067, 0.000044, + -0.000044, -0.000008, 0.000045, 0.000045, -0.000013, 0.000008, 0.000036, 0.000067, -0.000113, 0.000044, + -0.000007, -0.000080, -0.000010, 0.000227, -0.000010, -0.000080, -0.000024, 0.000008, 0.000044, -0.000113, + 0.000067, 0.000036, -0.000007, -0.000113, 0.000036, 0.000044, 0.000069, 0.000035, -0.000007, -0.000001, + -0.000001, -0.000005, -0.000005, 0.000127, -0.000088, -0.000019, 0.000013, 0.000012, -0.000004, -0.000005, + -0.000044, -0.000044, -0.000046, 0.000322, -0.000141, -0.000010, -0.000060, 0.000176, -0.000005, -0.000084, + 0.000028, 0.000027, -0.000008, -0.000133, -0.000007, -0.000099, 0.000155, 0.000099, -0.000010, -0.000005, + -0.000043, 0.000028, 0.000027, 0.000021, 0.000003, -0.000021, -0.000021, 0.000016, 0.000047, 0.000016, + 0.000045, -0.000013, 0.000044, 0.000036, 0.000089, 0.000079, -0.000113, -0.000012, -0.000010, -0.000060, + 0.000176, -0.000005, -0.000084, -0.000002, -0.000063, 0.000118, -0.000008, 0.000028, 0.000016, -0.000078, + -0.000005, -0.000067, 0.000075, -0.000044, 0.000044, -0.000008, -0.000067, 0.000044, 0.000075, -0.000044, + -0.000008, -0.000001, -0.000001, -0.000001, -0.000005, -0.000003, -0.000065, -0.000013, 0.000174, -0.000013, + -0.000021, -0.000013, -0.000012, -0.000027, -0.000029, 0.000010, 0.000056, 0.000011, 0.000036, 0.000016, + -0.000082, -0.000002, -0.000063, 0.000118, -0.000008, 0.000016, 0.000028, -0.000078, -0.000005, -0.000067, + 0.000075, -0.000044, 0.000044, -0.000008, -0.000015, -0.000006, -0.000006, 0.000285, -0.000055, -0.000056, + -0.000044, -0.000044, -0.000037, 0.000402, 0.000077, -0.000358, -0.000001, -0.000001, -0.000005, 0.000127, + -0.000005, -0.000088, -0.000019, -0.000007, -0.000007, -0.000104, -0.000099, 0.000327, -0.000036, -0.000021, + 0.000028, 0.000019, 0.000041, -0.000125, 0.000041, 0.000130, -0.000075, -0.000013, 1.000000, 0.000045, + 0.000045, -0.000013, 0.000036, 0.000035, 0.000069, -0.000113, 0.000044, -0.000007, -0.000029, 0.000010, + 0.000019, 0.000034, 0.000011, -0.000001, -0.000001, -0.000001, -0.000013, -0.000013, -0.000003, 0.000070, + -0.000021, -0.000012, 0.000027, 0.000003, -0.000008, 0.000044, 0.000036, -0.000113, 0.000079, 0.000089, + -0.000012, -0.000113, 0.000036, 0.000044, 0.000069, 0.000035, -0.000007, -0.000001, -0.000001, -0.000005, + -0.000005, 0.000127, -0.000088, -0.000019, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, + -0.000019, -0.000043, 0.000126, -0.000052, -0.000032, 0.000055, 0.000049, -0.000088, 0.000034, 0.000028, + -0.000005, 0.000022, 0.000021, -0.000006, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, + -0.000019, -0.000043, 0.000126, -0.000052, -0.000032, 0.000045, 0.000045, -0.000013, 0.000036, 0.000044, + -0.000113, 0.000069, 0.000035, -0.000007, -0.000100, 0.000034, 0.000057, 0.000049, 0.000036, -0.000002, + -0.000015, -0.000012, 0.000092, -0.000046, -0.000013, 0.000044, 0.000036, 0.000089, 0.000079, -0.000113, + -0.000012, -0.000007, -0.000070, -0.000072, 0.000202, -0.000010, -0.000021, -0.000006, -0.000009, 0.000164, + -0.000063, -0.000052, -0.000021, 0.000675, 0.000132, -0.000594, -0.000007, 0.000202, -0.000070, -0.000072, + -0.000010, -0.000021, 0.000008, 0.000067, -0.000113, 0.000036, 0.000044, -0.000007, -0.000006, -0.000006, + 0.000285, -0.000015, -0.000044, -0.000044, -0.000055, -0.000056, -0.000037, -0.000159, 0.000117, 0.000055, + 0.000056, 0.000154, -0.000011, -0.000010, 0.000227, -0.000010, -0.000080, -0.000080, -0.000024, 0.000008, + -0.000113, 0.000067, 0.000044, 0.000036, -0.000007, 0.000027, 0.000003, -0.000008, 0.000044, 0.000036, + -0.000113, 0.000079, 0.000089, -0.000012, -0.000009, 0.000164, -0.000006, -0.000063, -0.000052, -0.000021, + 0.000049, -0.000063, 0.000054, 0.000028, 0.000016, -0.000005, -0.000012, 0.000092, -0.000046, -0.000015, + -0.000013, -0.000027, 0.000056, 0.000011, 0.000036, -0.000029, 0.000016, 0.000010, -0.000082, 0.000021, + -0.000043, 0.000028, 0.000027, 0.000003, 0.000016, -0.000063, 0.000054, 0.000049, 0.000028, -0.000005, + -0.000015, 0.000092, -0.000012, -0.000046, -0.000013, -0.000012, 0.000092, -0.000046, -0.000015, -0.000013, + 0.000011, 0.000034, -0.000029, 0.000019, 0.000010, -0.000001, 0.000021, -0.000043, 0.000028, 0.000027, + 0.000003, 0.000049, 0.000057, -0.000100, 0.000032, -0.000029, 0.000010, 0.000011, 0.000019, 0.000034, + -0.000001, -0.000001, -0.000001, -0.000001, -0.000013, -0.000013, -0.000003, 0.000087, -0.000013, -0.000021, + -0.000012, 0.000047, -0.000021, 0.000016, -0.000021, 0.000016, 0.000045, -0.000013, -0.000005, -0.000044, + -0.000044, 0.000322, -0.000046, -0.000117, -0.000024, 0.000044, 0.000079, -0.000113, 0.000089, 0.000036, + -0.000012, 0.000044, 0.000079, -0.000113, 0.000036, 0.000089, -0.000012, 0.000013, 0.000012, -0.000004, + -0.000005, -0.000044, -0.000044, -0.000117, -0.000046, 0.000322, -0.000024, -0.000005, -0.000044, 0.000322, + -0.000046, -0.000044, -0.000117, -0.000024, 0.000079, -0.000113, 0.000089, 0.000044, 0.000036, -0.000012, + -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, 0.000027, 0.000028, 0.000003, + 0.000011, 0.000034, -0.000029, 0.000010, 0.000019, -0.000001, 0.000071, 0.000173, -0.000018, -0.000007, + -0.000007, -0.000099, -0.000104, 0.000327, -0.000036, -0.000021, -0.000012, 0.000092, -0.000015, -0.000046, + -0.000013, 0.000021, -0.000043, 0.000027, 0.000028, 0.000003, -0.000027, 0.000011, 0.000036, -0.000029, + 0.000010, 0.000016, 0.000056, -0.000082, -0.000004, -0.000029, 0.000099, -0.000005, -0.000029, -0.000010, + -0.000007, -0.000027, 0.000036, 0.000011, -0.000029, 0.000010, 0.000056, 0.000016, -0.000082, -0.000043, + 0.000126, -0.000052, -0.000032, 0.000045, 0.000045, -0.000013, 0.000012, 0.000013, -0.000004, 0.000028, + 0.000019, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000003, -0.000003, + -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, -0.000005, -0.000044, -0.000044, 0.000322, -0.000046, + -0.000117, -0.000024, 0.000044, -0.000113, 0.000089, 0.000036, 0.000079, -0.000012, 0.000044, 0.000186, + -0.000113, 0.000036, 0.000065, -0.000095, -0.000153, 0.000011, -0.000029, 0.000010, 0.000019, 0.000034, + -0.000001, -0.000001, -0.000013, -0.000003, 0.000036, -0.000010, -0.000006, -0.000071, -0.000005, 0.000100, + 0.000071, 0.000161, -0.000013, -0.000161, -0.000161, -0.000216, -0.000013, 0.000725, -0.000178, -0.000026, + -0.000021, -0.000002, -0.000008, 0.000028, 0.000118, -0.000063, 0.000016, -0.000078, -0.000005, -0.000015, + -0.000044, -0.000006, 0.000285, -0.000044, -0.000006, -0.000055, -0.000056, -0.000037, 0.000088, 0.000064, + -0.000018, -0.000002, -0.000008, 0.000016, -0.000063, 0.000028, 0.000118, -0.000078, -0.000005, -0.000043, + 0.000028, 0.000027, 0.000021, 0.000003, 0.000045, -0.000021, 0.000016, -0.000021, 0.000016, 0.000047, + -0.000013, -0.000007, -0.000047, -0.000047, 0.000164, -0.000005, -0.000021, -0.000012, -0.000027, 0.000011, + -0.000029, 0.000010, 0.000056, 0.000016, 0.000036, -0.000082, -0.000012, 0.000092, -0.000046, -0.000015, + -0.000013, 0.000022, 0.000011, -0.000029, 0.000018, 0.000010, -0.000001, 0.000021, -0.000043, 0.000028, + 0.000027, 0.000003, 0.000022, 0.000021, -0.000006, -0.000012, 0.000092, -0.000046, -0.000015, -0.000013, + 0.000022, 0.000011, -0.000029, 0.000018, 0.000010, -0.000001, 0.000021, -0.000043, 0.000028, 0.000027, + 0.000003, 0.000044, -0.000113, 0.000036, 0.000186, 0.000065, -0.000095, -0.000153, -0.000100, 0.000034, + 0.000057, 0.000049, 0.000036, -0.000002, 0.000045, 0.000045, -0.000013, -0.000113, 0.000067, 0.000008, + 0.000044, 0.000036, -0.000007, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, + -0.000070, -0.000010, 0.000202, -0.000072, -0.000007, -0.000021, 0.000044, -0.000113, 0.000036, 0.000067, + 0.000008, -0.000007, 0.000049, 0.000034, -0.000088, 0.000028, 0.000055, -0.000005, -0.000003, 0.000101, + -0.000047, -0.000005, -0.000021, -0.000012, -0.000159, 0.000117, 0.000055, 0.000056, 0.000154, -0.000011, + 0.000022, 0.000021, -0.000006, 0.000007, 0.000007, -0.000002, 0.000022, 0.000021, -0.000006, -0.000006, + -0.000063, 0.000164, -0.000052, -0.000009, -0.000021, -0.000020, 0.000055, -0.000018, 0.000049, 0.000034, + -0.000100, 0.000036, 0.000057, -0.000002, 0.000126, -0.000043, -0.000052, -0.000032, -0.000003, -0.000003, + -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, + -0.000044, -0.000006, 0.000010, 0.000001, -0.000003, 0.000173, 0.000071, -0.000018, -0.000001, -0.000001, + -0.000005, 0.000127, -0.000005, -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, 0.000022, 0.000021, + -0.000006, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000045, 0.000045, -0.000013, + 0.000035, -0.000113, 0.000036, 0.000044, 0.000069, -0.000007, 0.000011, -0.000029, 0.000010, 0.000019, + 0.000034, -0.000001, -0.000001, -0.000001, -0.000013, -0.000013, -0.000003, 0.000070, -0.000021, -0.000012, + 0.000027, 0.000003, -0.000008, 0.000044, -0.000113, 0.000036, 0.000089, 0.000079, -0.000012, -0.000010, + -0.000060, 0.000176, -0.000005, -0.000084, -0.000113, 0.000186, 0.000044, 0.000036, 0.000065, -0.000095, + -0.000153, -0.000113, 0.000067, 0.000008, 0.000044, 0.000036, -0.000007, -0.000003, -0.000003, -0.000013, + -0.000044, 0.000137, -0.000044, -0.000006, 0.000013, 0.000013, -0.000004, -0.000007, -0.000007, -0.000104, + -0.000099, 0.000327, -0.000036, -0.000021, 0.000045, 0.000045, -0.000013, 0.000036, 0.000035, -0.000113, + 0.000044, 0.000069, -0.000007, -0.000006, -0.000009, 0.000164, -0.000084, 0.000710, 0.000139, -0.000636, + -0.000001, -0.000001, -0.000005, -0.000005, 0.000127, -0.000088, -0.000019, -0.000052, -0.000009, 0.000164, + -0.000063, -0.000006, -0.000021, 0.000082, 0.000033, -0.000008, 0.000049, 0.000016, -0.000063, 0.000028, + 0.000054, -0.000005, -0.000010, -0.000070, 0.000202, -0.000072, -0.000007, -0.000021, -0.000113, 0.000044, + 0.000036, 0.000008, 0.000067, -0.000007, 0.000049, 0.000034, -0.000088, 0.000028, 0.000055, -0.000005, + -0.000071, 0.000100, -0.000005, 0.000071, 0.000027, 0.000003, -0.000008, -0.000095, -0.000113, 0.000186, + 0.000044, 0.000036, 0.000065, -0.000153, -0.000002, -0.000008, 0.000118, -0.000078, -0.000063, 0.000028, + 0.000016, -0.000005, -0.000002, -0.000008, 0.000118, -0.000078, -0.000063, 0.000016, 0.000028, -0.000005, + -0.000012, 0.000092, -0.000046, -0.000015, -0.000013, -0.000027, 0.000036, 0.000011, 0.000056, -0.000029, + 0.000016, 0.000010, -0.000082, 0.000021, -0.000043, 0.000028, 0.000027, 0.000003, -0.000010, -0.000060, + 0.000176, -0.000005, -0.000084, -0.000012, 0.000092, -0.000046, -0.000015, -0.000013, 0.000034, 0.000011, + -0.000029, 0.000019, 0.000010, -0.000001, 0.000021, -0.000043, 0.000028, 0.000027, 0.000003, 0.000013, + 0.000012, -0.000004, -0.000005, -0.000044, -0.000044, -0.000046, -0.000117, 0.000322, -0.000024, 0.000016, + 0.000016, -0.000021, -0.000021, 0.000047, 0.000045, -0.000013, 0.000013, 0.000012, -0.000004, 0.000022, + 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, 0.000022, 0.000021, -0.000006, -0.000070, -0.000010, + 0.000202, -0.000072, -0.000007, -0.000021, -0.000113, 0.000044, 0.000036, 0.000008, 0.000067, -0.000007, + 0.000034, 0.000049, -0.000088, 0.000028, 0.000055, -0.000005, -0.000001, -0.000001, -0.000005, 0.000127, + -0.000005, -0.000044, -0.000044, -0.000019, -0.000043, 0.000047, -0.000005, 0.000045, 0.000045, -0.000013, + 0.000012, 0.000013, -0.000004, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, -0.000019, + -0.000043, 0.000126, -0.000052, -0.000032, -0.000002, -0.000063, 0.000118, -0.000078, -0.000008, 0.000028, + 0.000016, -0.000005, -0.000015, -0.000012, -0.000046, 0.000092, -0.000013, -0.000067, 0.000044, 0.000075, + -0.000044, -0.000008, 0.000469, 0.000220, -0.000516, -0.000001, -0.000001, -0.000005, 0.000127, -0.000044, + -0.000005, -0.000044, -0.000019, 1.000000, -0.000043, 0.000028, 0.000126, -0.000080, -0.000032, -0.000002, + -0.000063, 0.000118, -0.000078, -0.000008, 0.000016, 0.000028, -0.000005, -0.000046, -0.000015, -0.000012, + 0.000092, -0.000013, -0.000044, -0.000067, 0.000044, 0.000075, -0.000008, 0.000027, 0.000003, -0.000008, + 0.000186, -0.000095, -0.000113, 0.000065, 0.000044, 0.000036, -0.000153, -0.000043, 0.000126, -0.000052, + -0.000032, -0.000005, -0.000044, -0.000044, 0.000322, -0.000046, -0.000117, -0.000024, 0.000079, 0.000044, + -0.000113, 0.000089, 0.000036, -0.000012, 0.000044, 0.000079, -0.000113, 0.000036, 0.000089, -0.000012, + -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, 0.000027, 0.000028, 0.000003, + -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, 0.000027, 0.000028, 0.000003, + -0.000027, 0.000011, 0.000056, -0.000029, 0.000010, 0.000016, 0.000036, -0.000082, 0.000016, -0.000021, + -0.000021, 0.000016, 0.000047, 0.000045, -0.000013, 0.000013, 0.000012, -0.000004, -0.000059, -0.000059, + 0.000044, 0.000066, 0.000044, 0.000035, -0.000018, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, + -0.000088, -0.000019, -0.000043, 0.000047, -0.000005, -0.000001, -0.000001, -0.000001, -0.000013, -0.000013, + -0.000003, -0.000013, 0.000087, -0.000021, -0.000012, -0.000027, 0.000056, 0.000011, 0.000010, -0.000029, + 0.000036, 0.000016, -0.000082, -0.000113, 0.000044, 0.000067, 0.000008, -0.000007, -0.000003, -0.000003, + -0.000013, -0.000044, 0.000137, -0.000044, -0.000006, -0.000001, -0.000017, -0.000041, 0.000171, -0.000059, + -0.000015, -0.000027, -0.000002, -0.000002, -0.000001, -0.000004, -0.000020, -0.000034, -0.000024, -0.000005, + 0.000216, -0.000055, -0.000021, -0.000012, -0.000075, 0.000041, -0.000125, 0.000041, 0.000130, -0.000013, + 0.000011, -0.000029, 0.000010, 0.000019, 0.000034, -0.000001, -0.000001, -0.000013, -0.000003, 0.000036, + -0.000010, -0.000006, -0.000059, -0.000059, 0.000044, 0.000066, 0.000044, 0.000035, -0.000018, 0.000022, + 0.000021, -0.000006, 0.000036, 0.000044, -0.000113, 0.000069, 0.000035, -0.000007, -0.000001, -0.000001, + -0.000005, -0.000005, 0.000127, -0.000044, -0.000044, -0.000019, -0.000001, -0.000059, -0.000017, 0.000171, + -0.000041, -0.000015, -0.000027, -0.000161, -0.000013, -0.000178, 0.000496, -0.000026, -0.000021, 0.000041, + -0.000075, -0.000125, 0.000041, 0.000130, -0.000013, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, + -0.000044, -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, 0.000050, + 0.000036, -0.000010, 0.000045, 0.000045, -0.000013, -0.000113, 0.000036, 0.000044, 0.000035, 0.000069, + -0.000007, 0.000071, 0.000173, -0.000018, -0.000007, -0.000007, -0.000099, -0.000104, 0.000327, -0.000036, + -0.000021, -0.000043, 0.000126, -0.000052, -0.000032, -0.000133, -0.000007, 0.000099, 0.000155, -0.000010, + -0.000099, -0.000005, -0.000003, -0.000003, -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000003, + -0.000003, -0.000044, -0.000013, 0.000137, -0.000044, -0.000006, -0.000015, -0.000006, -0.000006, -0.000055, + 0.000285, -0.000056, -0.000044, -0.000044, -0.000037, 0.000675, 0.000132, -0.000594, -0.000070, -0.000072, + -0.000007, 0.000202, -0.000010, -0.000021, -0.000095, 0.000044, -0.000113, 0.000036, 0.000186, 0.000065, + -0.000153, 0.000022, 0.000021, -0.000006, -0.000006, -0.000006, -0.000055, 0.000285, -0.000015, -0.000056, + -0.000044, -0.000044, -0.000037, 0.000055, -0.000159, 0.000117, 0.000056, 0.000154, -0.000011, 0.000022, + 0.000021, -0.000006, -0.000059, -0.000059, 0.000044, 0.000066, 0.000044, 0.000035, -0.000018, 0.000028, + 0.000019, -0.000071, -0.000005, 0.000100, 0.000071, -0.000060, 0.000176, -0.000010, -0.000005, -0.000084, + -0.000007, 0.000202, -0.000070, -0.000010, -0.000072, -0.000021, 0.000045, 0.000045, -0.000013, 0.000035, + 0.000036, -0.000113, 0.000044, 0.000069, -0.000007, -0.000006, -0.000006, -0.000044, -0.000044, -0.000055, + 0.000285, -0.000056, -0.000015, -0.000037, -0.000091, 0.000346, -0.000121, 0.000055, -0.000159, 0.000056, + 0.000117, 0.000154, -0.000011, -0.000007, -0.000007, -0.000099, 0.000327, -0.000104, -0.000036, -0.000021, + 0.000173, 0.000071, -0.000018, -0.000060, -0.000010, 0.000176, -0.000005, -0.000084, -0.000007, -0.000070, + -0.000010, -0.000072, 0.000202, -0.000021, 0.000044, -0.000095, 0.000036, -0.000113, 0.000186, 0.000065, + -0.000153, 0.000010, 0.000011, -0.000029, 0.000019, 0.000034, -0.000001, -0.000001, -0.000001, -0.000001, + -0.000013, -0.000013, -0.000003, 0.000087, -0.000013, -0.000021, -0.000012, -0.000043, 0.000126, -0.000052, + -0.000032, 0.000027, 0.000003, -0.000008, -0.000113, 0.000044, 0.000036, 0.000079, 0.000089, -0.000012, + 0.000013, 0.000012, -0.000004, -0.000005, -0.000044, -0.000044, -0.000117, -0.000046, 0.000322, -0.000024, + -0.000070, -0.000010, 0.000202, -0.000072, -0.000007, -0.000021, -0.000113, 0.000044, 0.000036, 0.000008, + 0.000067, -0.000007, 0.000049, 0.000034, -0.000088, 0.000028, 0.000055, -0.000005, 0.000045, 0.000045, + -0.000013, 0.000008, 0.000036, 0.000067, -0.000113, 0.000044, -0.000007, 0.000045, 0.000045, -0.000013, + 0.000036, 0.000044, 0.000035, -0.000113, 0.000069, -0.000007, 0.000036, -0.000113, -0.000080, -0.000010, + 0.000236, -0.000080, -0.000010, 0.000032, 0.000035, -0.000113, 0.000036, 0.000069, 0.000044, -0.000007, + 0.000008, 0.000044, -0.000113, 0.000036, 0.000067, -0.000007, -0.000065, -0.000009, 0.000213, -0.000005, + -0.000021, -0.000078, -0.000027, 0.000036, -0.000029, 0.000010, 0.000011, 0.000056, 0.000016, -0.000082, + -0.000020, 0.000013, 0.000022, -0.000013, -0.000002, 0.000173, 0.000071, -0.000018, -0.000003, -0.000003, + -0.000044, 0.000137, -0.000013, -0.000044, -0.000006, -0.000003, -0.000003, -0.000044, -0.000013, 0.000137, + -0.000044, -0.000006, -0.000004, -0.000029, -0.000029, 0.000099, -0.000005, -0.000010, -0.000007, -0.000027, + 0.000010, 0.000011, 0.000036, -0.000029, 0.000056, 0.000016, -0.000082, -0.000043, 0.000126, -0.000052, + -0.000032, -0.000071, -0.000005, 0.000100, 0.000071, 0.000161, -0.000161, -0.000216, -0.000161, -0.000026, + -0.000017, -0.000013, -0.000005, 0.000530, -0.000026, -0.000024, -0.000002, -0.000008, 0.000028, 0.000118, + -0.000063, 0.000016, -0.000078, -0.000005, -0.000001, -0.000001, -0.000005, 0.000127, -0.000005, -0.000088, + -0.000019, -0.000043, 0.000047, -0.000005, -0.000015, -0.000055, -0.000056, -0.000044, -0.000006, 0.000285, + -0.000044, -0.000006, -0.000037, 0.000143, 0.000078, -0.000022, -0.000002, -0.000008, 0.000016, -0.000063, + 0.000028, 0.000118, -0.000078, -0.000005, -0.000012, 0.000092, -0.000046, -0.000015, -0.000013, -0.000027, + 0.000036, 0.000011, 0.000056, -0.000029, 0.000016, 0.000010, -0.000082, 0.000021, -0.000043, 0.000028, + 0.000027, 0.000003, -0.000012, 0.000092, -0.000046, -0.000015, -0.000013, 0.000034, 0.000011, -0.000029, + 0.000019, 0.000010, -0.000001, 0.000021, -0.000043, 0.000028, 0.000027, 0.000003, -0.000161, 0.000161, + 0.000161, -0.000216, -0.000178, -0.000216, 0.000522, -0.000026, -0.000021, -0.000021, 0.000016, -0.000021, + 0.000047, 0.000045, 0.000016, -0.000013, 0.000012, 0.000013, -0.000004, -0.000059, -0.000059, 0.000044, + 0.000066, 0.000035, 0.000044, -0.000018, -0.000059, -0.000059, 1.000000, 0.000044, -0.000063, 0.000024, + -0.000063, 0.000024, -0.000063, 0.000024, -0.000063, -0.000063, 0.000024, 0.000024, 0.000024, -0.000063, + -0.000063, 0.000024, 0.000024, 0.000024, 0.000024, -0.000063, -0.000063, 0.000024, 0.000024, -0.000063, + -0.000063, 0.000024, -0.000063, -0.000063, 0.000075, 0.000024, -0.000063, 0.000044, 0.000386, -0.000043, + 0.000126, -0.000052, -0.000032, -0.000005, -0.000044, -0.000044, 0.000322, -0.000046, -0.000117, -0.000024, + 0.000044, 0.000079, -0.000113, 0.000089, 0.000036, -0.000012, 0.000044, 0.000079, -0.000113, 0.000036, + 0.000089, -0.000012, 0.000013, 0.000012, -0.000004, -0.000005, -0.000044, -0.000046, -0.000044, -0.000117, + 0.000322, -0.000024, 0.000028, 0.000019, -0.000005, -0.000060, 0.000176, -0.000010, -0.000084, -0.000012, + 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, 0.000027, 0.000028, 0.000003, 0.000011, + 0.000034, -0.000029, 0.000010, 0.000019, -0.000001, -0.000006, 0.000164, -0.000009, -0.000084, 0.000049, + -0.000100, 0.000057, 0.000032, -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, + 0.000027, 0.000028, 0.000003, -0.000027, 0.000011, 0.000056, 0.000036, -0.000029, 0.000010, 0.000016, + -0.000082, -0.000007, 0.000202, -0.000010, -0.000072, -0.000070, -0.000021, 0.000027, 0.000003, -0.000008, + 0.000079, 0.000044, -0.000113, 0.000036, 0.000089, -0.000012, 0.000022, 0.000021, -0.000006, 1.000000, + 0.000024, -0.000063, 0.000005, 0.000028, -0.000002, -0.000005, -0.000029, 0.000077, -0.000021, -0.000012, + 1.000000, -0.000003, -0.000003, -0.000044, 0.000137, -0.000044, -0.000013, -0.000006, 0.000016, -0.000063, + 0.000028, 0.000054, 0.000049, -0.000005, -0.000046, -0.000015, -0.000012, 0.000092, -0.000013, -0.000005, + -0.000044, -0.000044, 0.000322, -0.000046, -0.000117, -0.000024, 0.000044, 0.000079, -0.000113, 0.000089, + 0.000036, -0.000012, 0.000044, -0.000095, 0.000186, -0.000113, 0.000036, 0.000065, -0.000153, 0.000010, + -0.000029, 0.000011, 0.000019, 0.000034, -0.000001, -0.000001, -0.000001, -0.000013, -0.000013, -0.000021, + -0.000003, 0.000070, -0.000012, -0.000012, 0.000092, -0.000015, -0.000046, -0.000013, 0.000021, -0.000043, + 0.000027, 0.000028, 0.000003, -0.000027, 0.000011, 0.000036, 0.000056, -0.000029, 0.000010, 0.000016, + -0.000082, 0.000045, 0.000045, -0.000013, 0.000022, 0.000021, -0.000006, 0.000036, -0.000113, -0.000080, + 0.000236, -0.000080, -0.000010, -0.000010, 0.000032, 0.000035, -0.000113, 0.000069, 0.000044, 0.000036, + -0.000007, 0.000008, 0.000044, -0.000113, 0.000036, 0.000067, -0.000007, 0.000022, 0.000021, -0.000006, + -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, + -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, + -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, + -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, + -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, + -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, -0.000059, + -0.000059, -0.000059, 1.000000, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, + 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000024, 0.000044, 0.000044, 0.000044, + 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, + 0.000044, 0.000044, 0.000044, 0.000044, 0.000044, -0.000063, 0.001236, 0.000044, 0.000283, 0.000028, + -0.000006, -0.000063, 0.000164, -0.000052, -0.000009, -0.000021, -0.000040, 0.000086, -0.000029, 0.000049, + 0.000034, -0.000100, 0.000036, 0.000057, -0.000002, -0.000006, -0.000009, -0.000063, -0.000052, 0.000164, + -0.000021, 0.000710, 0.000139, -0.000636, 0.000036, -0.000113, 0.000044, 0.000067, 0.000008, -0.000007, + -0.000003, -0.000003, -0.000044, -0.000013, -0.000044, 0.000137, -0.000006, 0.000045, 0.000045, -0.000013, + -0.000113, 0.000044, 0.000035, 0.000036, 0.000069, -0.000007, -0.000009, 0.000164, -0.000006, -0.000084, + 0.000049, -0.000063, 0.000054, 0.000023, -0.000003, -0.000003, -0.000044, -0.000044, 0.000137, -0.000013, + -0.000006, -0.000003, -0.000003, -0.000044, -0.000044, -0.000013, 0.000137, -0.000006, -0.000012, 0.000092, + -0.000046, -0.000015, -0.000013, -0.000027, 0.000056, 0.000011, 0.000036, -0.000029, 0.000016, 0.000010, + -0.000082, 0.000021, -0.000043, 0.000028, 0.000027, 0.000003, 0.000036, 0.000044, -0.000113, 0.000067, + 0.000008, -0.000007, -0.000003, -0.000003, -0.000044, -0.000013, -0.000044, 0.000137, -0.000006, -0.000012, + 0.000092, -0.000046, -0.000015, -0.000013, 0.000011, 0.000034, -0.000029, 0.000019, 0.000010, -0.000001, + 0.000021, -0.000043, 0.000028, 0.000027, 0.000003, -0.000005, -0.000044, -0.000044, 0.000322, -0.000117, + -0.000046, -0.000024, 0.000044, 0.000186, -0.000095, -0.000113, 0.000065, 0.000036, -0.000153, 0.000079, + 0.000044, -0.000113, 0.000036, 0.000089, -0.000012, 1.000000, -0.000000, -0.000025, -0.000025, -0.000025, + -0.000043, -0.000069, -0.000043, -0.000069, -0.000025, -0.000020, -0.000020, -0.000000, -0.000025, -0.000020, + -0.000043, -0.000067, -0.000067, -0.000025, -0.000025, -0.000013, -0.000025, -0.000000, -0.000000, -0.000020, + -0.000020, -0.000020, -0.000067, -0.000054, -0.000054, -0.000020, -0.000054, -0.000020, -0.000117, -0.000020, + -0.000020, -0.000054, -0.000054, 0.000008, -0.000000, -0.000020, 0.000046, -0.000043, -0.000020, 0.000046, + -0.000020, -0.000043, -0.000020, -0.000043, -0.000067, 0.000015, -0.000043, -0.000067, -0.000025, -0.000117, + -0.000067, -0.000025, -0.000054, -0.000020, -0.000069, -0.000043, -0.000000, -0.000000, -0.000054, -0.000054, + -0.000025, 0.000030, -0.000020, -0.000020, -0.000000, -0.000014, -0.000020, -0.000020, -0.000020, -0.000054, + -0.000043, -0.000054, 0.000036, -0.000020, -0.000020, -0.000043, -0.000067, -0.000000, -0.000025, -0.000074, + -0.000020, -0.000020, -0.000043, -0.000067, -0.000115, 0.000036, -0.000043, -0.000054, -0.000054, -0.000020, + -0.000000, -0.000020, -0.000050, -0.000025, -0.000000, 0.000030, -0.000050, -0.000020, -0.000040, -0.000054, + -0.000000, -0.000040, -0.000028, -0.000020, -0.000025, 0.000030, -0.000025, -0.000069, -0.000007, -0.000123, + 0.000009, -0.000025, -0.000020, -0.000043, -0.000020, -0.000054, -0.000043, -0.000054, -0.000000, -0.000025, + -0.000020, -0.000054, -0.000025, -0.000065, -0.000043, -0.000025, -0.000069, -0.000054, -0.000025, -0.000043, + -0.000020, -0.000025, 0.000015, 0.000008, -0.000025, -0.000012, -0.000014, -0.000067, -0.000020, 0.000046, + -0.000000, -0.000043, -0.000477, -0.000117, -0.000025, -0.000025, 0.000019, -0.000017, -0.000025, -0.000067, + -0.000020, -0.000020, -0.000053, -0.000020, -0.000025, -0.000014, -0.000020, -0.000020, -0.000014, -0.000043, + 0.000010, -0.000085, -0.000020, -0.000020, 0.000028, 0.000008, -0.000025, -0.000033, 0.000009, -0.000000, + -0.000477, -0.000000, -0.000043, 0.000036, -0.000043, -0.000025, -0.000025, 0.000015, -0.000040, -0.000028, + 0.000046, -0.000074, -0.000085, -0.000053, -0.000040, -0.000000, -0.000028, -0.000043, -0.000115, -0.000054, + -0.000043, -0.000040, -0.000028, -0.000043, -0.000054, -0.000053, -0.000025, -0.000000, -0.000000, -0.000000, + 0.000009, -0.000014, -0.000000, -0.000067, -0.000067, -0.000000, -0.000025, -0.000043, -0.000020, -0.000020, + 0.000001, -0.000054, -0.000085, -0.000043, 0.000009, -0.000043, -0.000074, -0.000053, 0.000030, 0.000030, + -0.000014, -0.000069, -0.000025, 0.000015, -0.000020, -0.000115, -0.000020, -0.000053, -0.000020, -0.000025, + -0.000069, 0.000008, -0.000053, -0.000043, 0.000009, -0.000043, -0.000085, -0.000054, 0.000030, -0.000025, + -0.000054, 0.000030, 0.000030, -0.000014, -0.000123, -0.000025, 0.000015, 0.000015, -0.000043, 0.000009, + -0.000000, -0.000085, 0.000046, -0.000025, -0.000025, -0.000289, -0.000043, -0.000037, -0.000014, -0.000020, + -0.000020, -0.000025, 0.000030, -0.000040, -0.000054, -0.000067, 0.000030, -0.000020, -0.000020, 0.000046, + -0.000028, -0.000067, -0.000067, -0.000043, 0.000009, -0.000025, -0.000020, 0.000046, -0.000115, -0.000054, + -0.000000, -0.000025, -0.000033, -0.000054, -0.000054, -0.000000, -0.000020, -0.000000, -0.000054, -0.000000, + -0.000020, -0.000020, -0.000054, 0.000030, -0.000117, -0.000043, -0.000013, -0.000025, -0.000054, -0.000043, + 0.000009, -0.000057, 0.000009, 0.000046, -0.000043, -0.000040, -0.000000, -0.000000, -0.000019, -0.000000, + -0.000053, -0.000020, -0.000020, -0.000043, 0.000015, 0.000008, -0.000026, 0.000046, -0.000054, 0.000030, + 0.000030, -0.000053, 0.000046, -0.000011, -0.000062, -0.000067, -0.000085, -0.000038, -0.000025, -0.000011, + -0.000020, -0.000025, 0.000036, -0.000043, 0.000046, 0.000009, 0.000028, -0.000020, 0.000008, -0.000053, + -0.000025, -0.000053, -0.000020, -0.000020, -0.000008, -0.000043, 0.000009, -0.000513, -0.000028, -0.000020, + -0.000074, -0.000289, -0.000085, -0.000062, 0.000015, -0.000115, -0.000014, -0.000013, -0.000040, -0.000028, + -0.000062, -0.000074, -0.000053, 0.000046, -0.000040, -0.000028, 0.000036, -0.000043, -0.000025, -0.000121, + -0.000000, -0.000121, -0.000043, -0.000000, 0.000030, -0.000028, -0.000054, -0.000000, -0.000043, -0.000020, + -0.000121, -0.000026, -0.000053, -0.000020, -0.000020, -0.000061, -0.000007, -0.000000, -0.000025, -0.000000, + -0.000043, -0.000000, -0.000020, -0.000000, 0.000015, -0.000045, 0.000046, -0.000054, -0.000043, -0.000289, + -0.000069, 0.000015, 0.000008, -0.000053, 0.000046, 0.000046, -0.000025, -0.000098, -0.000014, -0.000000, + -0.000067, -0.000043, 0.000009, -0.000000, -0.000040, -0.000054, -0.000000, -0.000054, -0.000043, 0.000009, + 0.000030, 0.000030, -0.000043, 0.000028, -0.000115, -0.000054, -0.000020, -0.000020, -0.000053, -0.000040, + -0.000054, -0.000020, 0.000046, 0.000009, -0.000020, -0.000020, -0.000054, -0.000013, -0.000025, -0.000054, + -0.000025, -0.000053, -0.000074, -0.000054, -0.000026, -0.000008, 0.000015, -0.000043, -0.000025, -0.000000, + -0.000000, -0.000000, -0.000020, -0.000000, -0.000011, 0.000009, -0.000020, 0.000015, 0.000008, -0.000020, + -0.000115, 0.000036, -0.000014, 0.000002, -0.000085, -0.000020, -0.000121, -0.000121, -0.000057, 0.000036, + -0.000121, 0.000036, -0.000043, -0.000040, -0.000028, -0.000054, 0.000030, -0.000054, -0.000074, -0.000008, + 0.000008, -0.000054, 0.000030, -0.000085, -0.000121, -0.000121, -0.000020, -0.000014, -0.000020, -0.000008, + -0.000054, -0.000074, -0.000043, 0.000009, 0.000028, -0.000062, -0.000025, 0.000046, -0.000025, -0.000053, + -0.000048, -0.000043, -0.000085, 0.000015, -0.000062, -0.000062, -0.000011, 0.000009, -0.000000, -0.000000, + -0.000020, -0.000067, -0.000020, -0.000054, -0.000013, -0.000000, -0.000000, -0.000000, -0.000006, -0.000013, + 0.000022, 0.000008, 0.000030, -0.000043, -0.000020, 0.000046, -0.000020, -0.000013, -0.000067, -0.000040, + -0.000054, 0.000046, -0.000028, -0.000054, -0.000043, 0.000173, -0.000054, 0.000030, -0.000050, 0.000009, + 0.000015, -0.000030, -0.000074, -0.000043, 0.000009, -0.000000, -0.000000, -0.000067, -0.000000, -0.000000, + -0.000085, -0.000020, -0.000020, -0.000025, -0.000289, -0.000069, -0.000025, -0.000053, -0.000057, -0.000026, + -0.000074, 0.000046, -0.000040, -0.000028, 0.000015, -0.000054, 0.000030, -0.000020, 0.000046, -0.000040, + -0.000054, -0.000054, -0.000085, 0.000046, 0.000009, -0.000054, -0.000054, 0.000030, 0.000022, 0.000008, + -0.000117, -0.000020, -0.000121, -0.000013, -0.000054, 0.000030, 0.000036, -0.000040, -0.000028, -0.000020, + -0.000025, 0.000022, 0.000030, 0.000030, 0.000028, -0.000020, -0.000074, -0.000008, 0.000008, -0.000053, + -0.000043, -0.000028, 0.000036, -0.000025, -0.000014, 0.000008, -0.000020, -0.000013, -0.000025, -0.000033, + -0.000477, -0.000043, 0.000009, 0.000030, -0.000054, -0.000054, -0.000043, -0.000115, -0.000020, -0.000020, + -0.000013, -0.000069, -0.000054, -0.000025, -0.000053, 0.000036, -0.000043, 0.000028, -0.000026, 0.000022, + -0.000065, -0.000115, -0.000053, 0.000046, -0.000053, 0.000036, -0.000043, 0.000030, 0.000030, -0.000062, + -0.000011, -0.000020, -0.000067, 0.000028, -0.000020, -0.000054, -0.000057, -0.000008, -0.000025, -0.000020, + 0.000028, -0.000020, -0.000011, -0.000067, -0.000043, 0.000009, -0.000078, -0.000001, -0.000115, -0.000054, + -0.000013, -0.000053, 0.000046, -0.000020, -0.000013, -0.000121, 0.000015, 0.000008, -0.000025, -0.000053, + 0.000010, -0.000045, -0.000020, -0.000043, -0.000121, -0.000121, 0.000036, -0.000043, 0.000030, 0.000030, + 0.000030, 0.000030, -0.000289, -0.000085, -0.000020, 0.000030, 0.000030, -0.000085, -0.000020, -0.000020, + -0.000033, -0.000121, 0.000008, 0.000036, -0.000121, -0.000115, -0.000025, -0.000117, 0.000009, -0.000074, + -0.000020, -0.000020, -0.000020, 0.000046, -0.000013, -0.000054, -0.000000, -0.000054, -0.000043, 0.000009, + -0.000074, -0.000026, -0.000121, 0.000030, 0.000030, -0.000020, 0.000046, -0.000043, -0.000074, -0.000008, + 0.000009, -0.000020, -0.000020, 0.000028, -0.000025, 0.000036, -0.000043, 0.000046, -0.000025, -0.000053, + 0.000015, 0.000008, -0.000025, -0.000020, 0.000030, -0.000085, -0.000040, -0.000054, -0.000054, -0.000000, + -0.000028, -0.000054, -0.000123, -0.000043, 0.000009, -0.000123, -0.000513, -0.000123, -0.000513, -0.000123, + -0.000123, -0.000013, 0.000015, -0.000123, -0.000513, -0.000123, 0.000030, 0.000030, -0.000014, -0.000045, + -0.000008, -0.000000, -0.000020, 0.000028, -0.000020, -0.000020, -0.000013, -0.000123, -0.000513, 0.000002, + -0.000040, -0.000054, 0.000046, -0.000028, -0.000020, -0.000289, -0.000000, 0.000028, -0.000020, -0.000008, + -0.000050, -0.000033, 0.000028, -0.000011, -0.000011, -0.000067, -0.000026, -0.000115, -0.000054, -0.000043, + 0.000046, -0.000062, -0.000043, 0.000028, 0.000009, 0.000036, -0.000121, -0.000025, -0.000053, -0.000020, + -0.000020, -0.000020, -0.000013, 0.000015, 0.000008, -0.000025, -0.000043, 0.000009, -0.000054, -0.000043, + 0.000036, -0.000043, -0.000013, -0.000054, 0.000030, -0.000054, 0.000008, -0.000053, -0.000020, 0.000022, + -0.000038, -0.000121, 0.000036, -0.000043, -0.000038, -0.000115, -0.000033, -0.000121, -0.000053, 0.000046, + 0.000028, -0.000008, -0.000062, -0.000025, -0.000033, -0.000006, 0.000015, 0.000008, -0.000000, -0.000000, + -0.000054, 0.000030, -0.000054, 0.000030, -0.000000, -0.000014, -0.000020, -0.000000, -0.000014, -0.000020, + -0.000013, 0.000015, -0.000025, -0.000326, 0.000002, -0.000054, 0.000030, 0.000028, -0.000020, -0.000013, + 0.000046, -0.000043, -0.000121, -0.000053, 0.000046, 0.000030, 0.000030, 0.000046, 0.000028, -0.000121, + -0.000043, -0.000067, -0.000062, -0.000008, -0.000121, -0.000040, -0.000025, -0.000013, -0.000115, 0.000015, + 0.000008, -0.000040, -0.000028, -0.000054, -0.000025, -0.000053, -0.000040, -0.000054, -0.000011, -0.000045, + -0.000085, 0.000046, -0.000000, -0.000054, -0.000006, -0.000054, 0.000030, -0.000043, -0.000054, 0.000022, + -0.000106, -0.000115, -0.000025, -0.000045, -0.000011, -0.000062, -0.000038, 0.000009, -0.000054, -0.000028, + 0.000036, -0.000000, -0.000020, -0.000011, -0.000000, -0.000020, -0.000043, 0.000009, -0.000513, -0.000043, + -0.000043, -0.000008, -0.000020, -0.000020, -0.000289, -0.000069, -0.000067, -0.000040, -0.000054, 0.000046, + -0.000054, -0.000028, 0.000036, 0.000015, -0.000105, 0.000046, 0.000011, -0.000014, -0.000020, -0.000013, + -0.000053, 0.000046, -0.000000, -0.000067, -0.000067, -0.000062, -0.000008, -0.000000, -0.000067, -0.000289, + -0.000069, 0.000036, -0.000008, -0.000025, -0.000115, 0.000599, -0.000040, -0.000028, -0.000011, -0.000062, + -0.000025, -0.000053, -0.000028, 0.000036, 0.000036, -0.000121, -0.000043, 0.000009, 0.000036, -0.000121, + -0.000040, -0.000028, -0.000043, 0.000028, -0.000053, 0.000046, -0.000477, -0.000117, 0.000046, -0.000054, + -0.000289, -0.000085, 0.000046, -0.000054, -0.000025, -0.000053, -0.000477, -0.000043, 0.000028, -0.000008, + -0.000020, -0.000043, 0.000028, 0.000028, -0.000011, -0.000020, -0.000043, -0.000011, -0.000062, -0.000013, + 0.000008, -0.000053, -0.000053, 0.000015, 0.000008, 0.000008, -0.000053, -0.000000, -0.000020, -0.000011, + -0.000050, -0.000008, -0.000000, -0.000020, -0.000008, -0.000057, -0.000008, -0.000121, -0.000030, 0.000015, + -0.000025, 0.000030, 0.000030, 0.000008, -0.000053, -0.000033, -0.000011, -0.000045, -0.000029, -0.000000, + -0.000289, -0.000054, -0.000000, -0.000020, -0.000013, -0.000074, -0.000008, -0.000000, -0.000014, -0.000020, + 0.000009, -0.000000, -0.000014, -0.000020, -0.000033, -0.000043, 0.000002, -0.000054, 0.000030, 0.000046, + -0.000054, -0.000043, -0.000074, -0.000085, -0.000020, -0.000002, -0.000020, -0.000513, -0.000054, -0.000043, + -0.000121, 0.000030, 0.000030, 0.000001, -0.000045, 0.000036, -0.000043, 0.000009, -0.000020, -0.000020, + -0.000040, -0.000028, -0.000011, -0.000062, -0.000025, -0.000053, 0.000046, -0.000033, -0.000054, 0.000030, + -0.000012, 0.000056, -0.000040, -0.000028, -0.000513, -0.000123, 0.000036, -0.000513, -0.000025, -0.000043, + -0.000115, -0.000054, -0.000043, -0.000025, -0.000033, -0.000000, -0.000000, -0.000000, -0.000008, -0.000020, + 0.000046, -0.000000, -0.000011, -0.000020, 0.000015, 0.000008, -0.000013, 0.000015, 0.000009, -0.000020, + -0.000020, -0.000115, -0.000054, -0.000043, 0.000036, -0.000043, 0.000002, 0.000015, 0.000036, -0.000121, + -0.000000, -0.000000, -0.000067, 0.001219, 0.000036, 0.000027, -0.000121, -0.000000, -0.000000, -0.000067, + -0.000025, -0.000033, -0.000121, 0.000008, -0.000053, -0.000053, -0.000000, -0.000020, -0.000000, -0.000020, + -0.000008, -0.000013, 0.000015, -0.000054, 0.000036, -0.000043, -0.000062, -0.000008, -0.000054, 0.000030, + 0.000022, -0.000038, -0.000115, -0.000011, -0.000045, -0.000054, 0.000009, -0.000028, 0.000036, 0.000022, + -0.000065, -0.000115, 0.000030, 0.000030, 0.000025, -0.000040, -0.000028, -0.000050, -0.000008, -0.000121, + -0.000014, 0.000030, 0.000030, -0.000477, -0.000117, -0.000115, -0.000033, -0.000020, -0.000477, -0.000085, + -0.000020, -0.000054, -0.000025, 0.000046, -0.000115, -0.000040, -0.000028, -0.000289, -0.000337, -0.000085, + -0.000008, -0.000050, 0.000046, -0.000115, -0.000033, -0.000011, -0.000062, -0.000121, -0.000025, -0.000053, + 0.000015, 0.000008, 0.000046, -0.000054, -0.000043, -0.000040, -0.000054, -0.000040, -0.000028, -0.000000, + -0.000028, -0.000054, -0.000074, -0.000008, -0.000020, -0.000037, 0.000030, 0.000030, -0.000057, -0.000008, + -0.000121, 0.000000, -0.000000, 0.000036, -0.000043, -0.000289, -0.000064, -0.000000, -0.000000, -0.000008, + -0.000020, -0.000000, -0.000011, -0.000020, -0.000048, -0.000013, 0.000015, -0.000054, 0.001682, -0.000121, + 0.000008, -0.000053, -0.000053, 0.000015, 0.000008, -0.000025, 0.000046, -0.000000, -0.000020, -0.000011, + -0.000513, -0.000043, -0.000000, -0.000020, -0.000008, -0.000115, -0.000025, -0.000053, 0.000009, 0.000096, + -0.000074, 0.001684, 0.000030, -0.000043, 0.000028, 0.000008, -0.000053, -0.000033, -0.000011, -0.000062, + 0.000028, -0.000020, -0.000008, -0.000040, -0.000020, -0.000000, -0.000028, -0.000054, -0.000020, 0.005404, + -0.000477, -0.000085, -0.000043, -0.000513, -0.000123, -0.000054, 0.000030, -0.000040, -0.000028, -0.000513, + -0.000043, 0.000030, 0.000030, 0.000028, -0.000008, -0.000020, -0.000054, 0.000030, 0.000028, -0.000011, + -0.000020, 0.000008, -0.000033, -0.000053, 1.000000, 0.035299}; static int rowmap[1814] = { - 0, 5, 9, 14, 17, 21, 25, 30, 34, 40, 45, 49, - 55, 60, 63, 68, 73, 78, 83, 88, 91, 99, 102, 108, - 116, 121, 126, 130, 134, 137, 142, 147, 152, 156, 162, 166, - 171, 177, 181, 189, 192, 196, 199, 202, 207, 211, 216, 222, - 228, 232, 240, 243, 248, 252, 258, 266, 270, 274, 278, 283, - 291, 299, 305, 308, 312, 319, 326, 330, 334, 338, 345, 348, - 352, 355, 360, 365, 370, 376, 380, 385, 392, 396, 399, 406, - 414, 422, 426, 431, 435, 439, 444, 450, 455, 463, 467, 473, - 477, 480, 486, 493, 497, 505, 509, 513, 519, 523, 530, 533, - 538, 542, 546, 549, 556, 564, 572, 576, 580, 587, 590, 593, - 596, 603, 607, 614, 621, 625, 628, 635, 638, 645, 648, 654, - 658, 662, 670, 678, 683, 690, 697, 701, 709, 717, 721, 725, - 728, 735, 744, 752, 760, 763, 767, 771, 776, 784, 788, 793, - 797, 801, 807, 815, 821, 829, 836, 843, 847, 855, 860, 865, - 873, 878, 886, 892, 897, 901, 905, 913, 917, 920, 924, 932, - 940, 948, 956, 961, 966, 971, 979, 984, 987, 994, 1001, 1009, - 1017, 1022, 1026, 1031, 1036, 1041, 1044, 1049, 1053, 1058, 1063, 1066, - 1071, 1077, 1082, 1085, 1090, 1094, 1102, 1107, 1115, 1120, 1125, 1131, - 1138, 1143, 1150, 1158, 1166, 1171, 1179, 1184, 1189, 1192, 1199, 1206, - 1210, 1213, 1218, 1223, 1227, 1231, 1239, 1242, 1247, 1253, 1258, 1264, - 1271, 1278, 1286, 1294, 1302, 1310, 1314, 1322, 1325, 1329, 1334, 1339, - 1342, 1345, 1350, 1356, 1364, 1372, 1380, 1385, 1391, 1396, 1401, 1407, - 1412, 1418, 1423, 1429, 1434, 1442, 1450, 1458, 1462, 1467, 1471, 1476, - 1481, 1487, 1490, 1495, 1503, 1509, 1517, 1525, 1530, 1535, 1543, 1547, - 1551, 1555, 1560, 1563, 1568, 1574, 1581, 1589, 1592, 1596, 1602, 1605, - 1608, 1617, 1620, 1628, 1635, 1642, 1650, 1654, 1662, 1671, 1678, 1686, - 1694, 1702, 1706, 1714, 1718, 1724, 1729, 1734, 1740, 1744, 1748, 1753, - 1759, 1762, 1766, 1769, 1774, 1780, 1784, 1789, 1795, 1800, 1806, 1812, - 1817, 1820, 1827, 1831, 1835, 1842, 1849, 1856, 1864, 1871, 1879, 1887, - 1893, 1901, 1909, 1917, 1921, 1925, 1933, 1936, 1939, 1946, 1949, 1953, - 1961, 1965, 1968, 1975, 1982, 1990, 1997, 2001, 2008, 2016, 2024, 2032, - 2040, 2046, 2053, 2059, 2065, 2068, 2071, 2076, 2084, 2092, 2100, 2104, - 2108, 2112, 2119, 2127, 2130, 2135, 2143, 2149, 2153, 2158, 2161, 2165, - 2171, 2179, 2184, 2190, 2196, 2203, 2210, 2217, 2225, 2233, 2240, 2248, - 2251, 2255, 2258, 2262, 2266, 2269, 2272, 2279, 2282, 2290, 2296, 2303, - 2308, 2316, 2319, 2326, 2334, 2341, 2349, 2352, 2356, 2360, 2364, 2368, - 2372, 2379, 2383, 2390, 2399, 2402, 2411, 2418, 2423, 2431, 2439, 2444, - 2451, 2458, 2466, 2473, 2476, 2483, 2490, 2498, 2502, 2506, 2510, 2519, - 2527, 2533, 2540, 2547, 2554, 2562, 2570, 2575, 2583, 2588, 2596, 2601, - 2609, 2611, 2613, 2614, 2615, 2616, 2617, 2618, 2619, 2620, 2621, 2625, - 2628, 2631, 2634, 2637, 2640, 2643, 2646, 2649, 2652, 2655, 2660, 2663, - 2666, 2669, 2674, 2679, 2682, 2686, 2689, 2693, 2700, 2703, 2711, 2716, - 2719, 2722, 2725, 2730, 2737, 2744, 2747, 2754, 2757, 2760, 2763, 2766, - 2773, 2779, 2786, 2790, 2793, 2799, 2803, 2806, 2809, 2815, 2818, 2824, - 2827, 2830, 2835, 2839, 2842, 2845, 2850, 2853, 2856, 2861, 2864, 2870, - 2873, 2876, 2879, 2884, 2889, 2896, 2903, 2906, 2913, 2916, 2919, 2927, - 2937, 2940, 2943, 2946, 2952, 2955, 2961, 2968, 2971, 2974, 2977, 2982, - 2990, 2993, 2999, 3002, 3005, 3009, 3014, 3020, 3027, 3030, 3035, 3040, - 3043, 3046, 3054, 3057, 3061, 3065, 3068, 3073, 3081, 3088, 3091, 3094, - 3097, 3103, 3111, 3114, 3120, 3123, 3126, 3134, 3141, 3144, 3147, 3153, - 3156, 3159, 3162, 3167, 3170, 3176, 3179, 3185, 3191, 3198, 3203, 3206, - 3209, 3216, 3219, 3227, 3230, 3233, 3236, 3243, 3247, 3249, 3253, 3256, - 3258, 3261, 3268, 3271, 3276, 3286, 3291, 3294, 3299, 3307, 3310, 3314, - 3323, 3326, 3329, 3332, 3335, 3340, 3343, 3348, 3351, 3354, 3360, 3363, - 3366, 3376, 3379, 3382, 3385, 3388, 3393, 3398, 3401, 3404, 3409, 3416, - 3419, 3426, 3429, 3437, 3446, 3454, 3460, 3467, 3470, 3473, 3476, 3479, - 3482, 3488, 3494, 3501, 3506, 3512, 3515, 3523, 3529, 3532, 3538, 3545, - 3549, 3555, 3558, 3564, 3567, 3574, 3580, 3583, 3591, 3599, 3607, 3610, - 3617, 3625, 3630, 3635, 3643, 3646, 3649, 3652, 3655, 3658, 3665, 3671, - 3677, 3680, 3686, 3693, 3699, 3706, 3713, 3718, 3728, 3731, 3734, 3737, - 3742, 3748, 3751, 3757, 3760, 3763, 3766, 3773, 3779, 3785, 3788, 3794, - 3796, 3802, 3809, 3812, 3818, 3825, 3832, 3842, 3845, 3848, 3851, 3854, - 3860, 3863, 3867, 3873, 3879, 3882, 3885, 3894, 3900, 3903, 3910, 3913, - 3916, 3920, 3923, 3930, 3933, 3939, 3944, 3951, 3954, 3957, 3963, 3969, - 3974, 3979, 3985, 3988, 3990, 3995, 4000, 4006, 4013, 4018, 4021, 4028, - 4035, 4041, 4046, 4051, 4059, 4065, 4070, 4075, 4078, 4084, 4091, 4094, - 4100, 4107, 4111, 4114, 4121, 4127, 4130, 4136, 4142, 4145, 4150, 4156, - 4159, 4166, 4172, 4177, 4183, 4189, 4192, 4195, 4198, 4201, 4208, 4212, - 4218, 4224, 4231, 4238, 4244, 4249, 4255, 4265, 4270, 4276, 4287, 4290, - 4296, 4299, 4301, 4308, 4311, 4317, 4320, 4325, 4330, 4337, 4343, 4346, - 4352, 4357, 4362, 4370, 4376, 4379, 4385, 4391, 4396, 4403, 4412, 4418, - 4428, 4431, 4437, 4444, 4451, 4454, 4460, 4468, 4475, 4481, 4486, 4489, - 4495, 4502, 4505, 4507, 4512, 4516, 4520, 4526, 4530, 4537, 4543, 4549, - 4557, 4563, 4566, 4570, 4574, 4580, 4585, 4590, 4597, 4601, 4605, 4613, - 4616, 4624, 4628, 4633, 4638, 4643, 4646, 4652, 4658, 4664, 4670, 4679, - 4682, 4685, 4692, 4698, 4704, 4709, 4712, 4718, 4728, 4736, 4741, 4747, - 4750, 4758, 4761, 4767, 4775, 4781, 4784, 4787, 4794, 4801, 4807, 4812, - 4818, 4825, 4828, 4831, 4837, 4840, 4846, 4849, 4855, 4858, 4861, 4864, - 4871, 4878, 4881, 4887, 4890, 4896, 4902, 4908, 4913, 4921, 4924, 4930, - 4933, 4941, 4949, 4954, 4959, 4964, 4970, 4973, 4976, 4979, 4986, 4989, - 4995, 5002, 5005, 5008, 5014, 5019, 5023, 5027, 5034, 5041, 5045, 5052, - 5055, 5058, 5064, 5070, 5077, 5084, 5091, 5099, 5106, 5112, 5119, 5125, - 5129, 5133, 5136, 5143, 5148, 5156, 5163, 5170, 5176, 5179, 5184, 5193, - 5195, 5200, 5203, 5209, 5217, 5223, 5228, 5231, 5239, 5249, 5255, 5258, - 5266, 5271, 5274, 5279, 5282, 5288, 5295, 5303, 5308, 5316, 5329, 5336, - 5343, 5350, 5357, 5363, 5366, 5372, 5377, 5384, 5389, 5392, 5398, 5404, - 5410, 5416, 5422, 5425, 5431, 5438, 5441, 5444, 5447, 5450, 5456, 5462, - 5465, 5473, 5478, 5483, 5491, 5496, 5502, 5505, 5508, 5511, 5520, 5523, - 5526, 5532, 5536, 5543, 5548, 5554, 5559, 5562, 5568, 5571, 5577, 5584, - 5587, 5593, 5596, 5602, 5608, 5614, 5620, 5623, 5629, 5635, 5642, 5649, - 5656, 5659, 5662, 5666, 5673, 5679, 5686, 5693, 5696, 5702, 5707, 5713, - 5720, 5727, 5734, 5739, 5744, 5751, 5759, 5766, 5772, 5776, 5780, 5786, - 5793, 5795, 5802, 5809, 5814, 5821, 5824, 5831, 5837, 5843, 5846, 5853, - 5860, 5866, 5872, 5878, 5881, 5886, 5893, 5896, 5903, 5906, 5912, 5919, - 5925, 5930, 5935, 5942, 5950, 5956, 5962, 5968, 5974, 5981, 5984, 5991, - 5998, 6005, 6011, 6014, 6019, 6024, 6029, 6036, 6043, 6050, 6052, 6057, - 6062, 6067, 6073, 6078, 6084, 6087, 6100, 6103, 6109, 6115, 6122, 6128, - 6134, 6139, 6146, 6150, 6153, 6160, 6163, 6169, 6174, 6177, 6182, 6188, - 6192, 6196, 6203, 6206, 6213, 6220, 6227, 6234, 6243, 6249, 6252, 6259, - 6266, 6272, 6275, 6278, 6285, 6289, 6296, 6303, 6307, 6313, 6315, 6318, - 6321, 6327, 6330, 6333, 6336, 6341, 6348, 6354, 6362, 6368, 6374, 6377, - 6384, 6388, 6392, 6399, 6406, 6409, 6413, 6419, 6425, 6432, 6440, 6443, - 6446, 6449, 6454, 6457, 6464, 6467, 6472, 6475, 6481, 6484, 6491, 6493, - 6496, 6503, 6509, 6512, 6518, 6524, 6532, 6538, 6544, 6547, 6553, 6556, - 6559, 6565, 6568, 6574, 6577, 6580, 6587, 6590, 6593, 6599, 6602, 6609, - 6616, 6623, 6629, 6637, 6645, 6648, 6653, 6658, 6663, 6670, 6673, 6679, - 6682, 6685, 6691, 6697, 6703, 6708, 6717, 6725, 6730, 6735, 6742, 6745, - 6752, 6757, 6763, 6769, 6774, 6778, 6784, 6790, 6796, 6802, 6810, 6816, - 6821, 6824, 6831, 6835, 6838, 6844, 6847, 6850, 6855, 6862, 6865, 6872, - 6875, 6881, 6884, 6890, 6896, 6903, 6906, 6913, 6919, 6926, 6933, 6940, - 6946, 6949, 6956, 6968, 6972, 6979, 6982, 6994, 7000, 7007, 7011, 7017, - 7023, 7028, 7036, 7044, 7047, 7054, 7058, 7061, 7068, 7076, 7084, 7090, - 7097, 7103, 7110, 7115, 7121, 7126, 7131, 7137, 7142, 7149, 7152, 7154, - 7161, 7164, 7172, 7178, 7185, 7190, 7195, 7202, 7208, 7214, 7218, 7224, - 7229, 7236, 7243, 7249, 7254, 7258, 7261, 7266, 7278, 7286, 7291, 7293, - 7295, 7302, 7308, 7311, 7318, 7321, 7327, 7334, 7337, 7343, 7346, 7352, - 7358, 7364, 7370, 7376, 7384, 7390, 7403, 7409, 7416, 7422, 7429, 7436, - 7448, 7454, 7456, 7461, 7467, 7477, 7489, 7492, 7499, 7505, 7512, 7517, - 7522, 7528, 7533, 7538, 7544, 7547, 7551, 7555, 7561, 7569, 7572, 7575, - 7584, 7587, 7592, 7595, 7601, 7607, 7613, 7619, 7626, 7629, 7635, 7640, - 7643, 7650, 7655, 7662, 7668, 7673, 7681, 7686, 7691, 7703, 7711, 7719, - 7724, 7733, 7736, 7743, 7750, 7752, 7758, 7759, 7762, 7768, 7774, 7782, - 7785, 7791, 7797, 7804, 7811, 7815, 7821, 7824, 7831, 7835, 7838, 7844, - 7850, 7855, 7861, 7867, 7873, 7876, 7882, 7888, 7897, 7903, 7909, 7915, - 7918, 7924, 7930, 7936, 7941, 7949, 7954, 7960, 7965, 7970, 7976, 7981, - 7985, 7991, 8001, 8008, 8015, 8021, 8027, 8030, 8037, 8044, 8050, 8055, - 8060, 8066, 8069, 8076, 8081, 8086, 8094, 8101, 8109, 8113, 8116, 8119, - 8121, 8128, 8135, 8142, 8148, 8155, 8161, 8167, 8171, 8181, 8189, 8198, - 8201, 8209, 8214, 8221, 8228, 8236, 8241, 8247, 8252, 8255, 8260, 8266, - 8271, 8278, 8284, 8287, 8293, 8300, 8306, 8312, 8318, 8324, 8330, 8333, - 8336, 8339, 8345, 8348, 8354, 8358, 8365, 8372, 8375, 8378, 8385, 8388, - 8391, 8394, 8397, 8400, 8406, 8412, 8420, 8423, 8429, 8434, 8441, 8447, - 8454, 8457, 8464, 8467, 8473, 8477, 8480, 8487, 8493, 8496, 8502, 8508, - 8514, 8520, 8524, 8527, 8534, 8542, 8550, 8555, 8563, 8568, 8573, 8578, - 8584, 8589, 8592, 8599, 8606, 8609, 8612, 8615, 8618, 8624, 8630, 8636, - 8644, 8647, 8650, 8653, 8660, 8664, 8672, 8677, 8682, 8685, 8693, 8694, - 8699, 8707, 8712, 8717, 8720, 8727, 8731, 8738, 8744, 8750, 8755, 8760, - 8765, 8770, 8778, 8785, 8788, 8795, 8802, 8805, 8815, 8823, 8828, 8835, - 8842, 8854, 8860, 8866, 8872, 8879, 8882, 8888, 8896, 8903, 8909, 8915, - 8922, 8929, 8932, 8935, 8941, 8944, 8951, 8955, 8962, 8969, 8976, 8985, - 8988, 8994, 9001, 9004, 9013, 9019, 9022, 9029, 9031, 9035, 9040, 9046, - 9049, 9055, 9064, 9067, 9073, 9080, 9083, 9088, 9094, 9101, 9107, 9117, - 9121, 9124, 9130, 9133, 9140, 9146, 9152, 9158, 9161, 9167, 9170, 9176, - 9184, 9190, 9196, 9202, 9210, 9215, 9218, 9225, 9232, 9239, 9247, 9251, - 9255, 9266, 9274, 9281, 9284, 9293, 9296, 9304, 9309, 9317, 9322, 9327, - 9333, 9338, 9347, 9354, 9357, 9364, 9399, 9403, 9410, 9416, 9422, 9425, - 9432, 9434, 9439, 9444, 9449, 9455, 9459, 9463, 9468, 9473, 9481, 9487, - 9490, 9496, 9499, 9504, 9510, 9511, 9518, 9524, 9529, 9536, 9542, 9549, - 9555, 9563, 9568, 9573, 9581, 9584, 9587, 9595, 9601, 9607, 9610, 9710, - 9716, 9719, 9725, 9731, 9734, 9740, 9747, 9750, 9756, 9760, 9764, 9771, - 9778, 9783, 9791, 9796, 9802, 9809, 9814, 9820, 9825, 9832, 9839, 9845, - 9846, 11156}; + 0, 5, 9, 14, 17, 21, 25, 30, 34, 40, 45, 49, 55, 60, 63, 68, 73, 78, 83, + 88, 91, 99, 102, 108, 116, 121, 126, 130, 134, 137, 142, 147, 152, 156, 162, 166, 171, 177, + 181, 189, 192, 196, 199, 202, 207, 211, 216, 222, 228, 232, 240, 243, 248, 252, 258, 266, 270, + 274, 278, 283, 291, 299, 305, 308, 312, 319, 326, 330, 334, 338, 345, 348, 352, 355, 360, 365, + 370, 376, 380, 385, 392, 396, 399, 406, 414, 422, 426, 431, 435, 439, 444, 450, 455, 463, 467, + 473, 477, 480, 486, 493, 497, 505, 509, 513, 519, 523, 530, 533, 538, 542, 546, 549, 556, 564, + 572, 576, 580, 587, 590, 593, 596, 603, 607, 614, 621, 625, 628, 635, 638, 645, 648, 654, 658, + 662, 670, 678, 683, 690, 697, 701, 709, 717, 721, 725, 728, 735, 744, 752, 760, 763, 767, 771, + 776, 784, 788, 793, 797, 801, 807, 815, 821, 829, 836, 843, 847, 855, 860, 865, 873, 878, 886, + 892, 897, 901, 905, 913, 917, 920, 924, 932, 940, 948, 956, 961, 966, 971, 979, 984, 987, 994, + 1001, 1009, 1017, 1022, 1026, 1031, 1036, 1041, 1044, 1049, 1053, 1058, 1063, 1066, 1071, 1077, 1082, 1085, 1090, + 1094, 1102, 1107, 1115, 1120, 1125, 1131, 1138, 1143, 1150, 1158, 1166, 1171, 1179, 1184, 1189, 1192, 1199, 1206, + 1210, 1213, 1218, 1223, 1227, 1231, 1239, 1242, 1247, 1253, 1258, 1264, 1271, 1278, 1286, 1294, 1302, 1310, 1314, + 1322, 1325, 1329, 1334, 1339, 1342, 1345, 1350, 1356, 1364, 1372, 1380, 1385, 1391, 1396, 1401, 1407, 1412, 1418, + 1423, 1429, 1434, 1442, 1450, 1458, 1462, 1467, 1471, 1476, 1481, 1487, 1490, 1495, 1503, 1509, 1517, 1525, 1530, + 1535, 1543, 1547, 1551, 1555, 1560, 1563, 1568, 1574, 1581, 1589, 1592, 1596, 1602, 1605, 1608, 1617, 1620, 1628, + 1635, 1642, 1650, 1654, 1662, 1671, 1678, 1686, 1694, 1702, 1706, 1714, 1718, 1724, 1729, 1734, 1740, 1744, 1748, + 1753, 1759, 1762, 1766, 1769, 1774, 1780, 1784, 1789, 1795, 1800, 1806, 1812, 1817, 1820, 1827, 1831, 1835, 1842, + 1849, 1856, 1864, 1871, 1879, 1887, 1893, 1901, 1909, 1917, 1921, 1925, 1933, 1936, 1939, 1946, 1949, 1953, 1961, + 1965, 1968, 1975, 1982, 1990, 1997, 2001, 2008, 2016, 2024, 2032, 2040, 2046, 2053, 2059, 2065, 2068, 2071, 2076, + 2084, 2092, 2100, 2104, 2108, 2112, 2119, 2127, 2130, 2135, 2143, 2149, 2153, 2158, 2161, 2165, 2171, 2179, 2184, + 2190, 2196, 2203, 2210, 2217, 2225, 2233, 2240, 2248, 2251, 2255, 2258, 2262, 2266, 2269, 2272, 2279, 2282, 2290, + 2296, 2303, 2308, 2316, 2319, 2326, 2334, 2341, 2349, 2352, 2356, 2360, 2364, 2368, 2372, 2379, 2383, 2390, 2399, + 2402, 2411, 2418, 2423, 2431, 2439, 2444, 2451, 2458, 2466, 2473, 2476, 2483, 2490, 2498, 2502, 2506, 2510, 2519, + 2527, 2533, 2540, 2547, 2554, 2562, 2570, 2575, 2583, 2588, 2596, 2601, 2609, 2611, 2613, 2614, 2615, 2616, 2617, + 2618, 2619, 2620, 2621, 2625, 2628, 2631, 2634, 2637, 2640, 2643, 2646, 2649, 2652, 2655, 2660, 2663, 2666, 2669, + 2674, 2679, 2682, 2686, 2689, 2693, 2700, 2703, 2711, 2716, 2719, 2722, 2725, 2730, 2737, 2744, 2747, 2754, 2757, + 2760, 2763, 2766, 2773, 2779, 2786, 2790, 2793, 2799, 2803, 2806, 2809, 2815, 2818, 2824, 2827, 2830, 2835, 2839, + 2842, 2845, 2850, 2853, 2856, 2861, 2864, 2870, 2873, 2876, 2879, 2884, 2889, 2896, 2903, 2906, 2913, 2916, 2919, + 2927, 2937, 2940, 2943, 2946, 2952, 2955, 2961, 2968, 2971, 2974, 2977, 2982, 2990, 2993, 2999, 3002, 3005, 3009, + 3014, 3020, 3027, 3030, 3035, 3040, 3043, 3046, 3054, 3057, 3061, 3065, 3068, 3073, 3081, 3088, 3091, 3094, 3097, + 3103, 3111, 3114, 3120, 3123, 3126, 3134, 3141, 3144, 3147, 3153, 3156, 3159, 3162, 3167, 3170, 3176, 3179, 3185, + 3191, 3198, 3203, 3206, 3209, 3216, 3219, 3227, 3230, 3233, 3236, 3243, 3247, 3249, 3253, 3256, 3258, 3261, 3268, + 3271, 3276, 3286, 3291, 3294, 3299, 3307, 3310, 3314, 3323, 3326, 3329, 3332, 3335, 3340, 3343, 3348, 3351, 3354, + 3360, 3363, 3366, 3376, 3379, 3382, 3385, 3388, 3393, 3398, 3401, 3404, 3409, 3416, 3419, 3426, 3429, 3437, 3446, + 3454, 3460, 3467, 3470, 3473, 3476, 3479, 3482, 3488, 3494, 3501, 3506, 3512, 3515, 3523, 3529, 3532, 3538, 3545, + 3549, 3555, 3558, 3564, 3567, 3574, 3580, 3583, 3591, 3599, 3607, 3610, 3617, 3625, 3630, 3635, 3643, 3646, 3649, + 3652, 3655, 3658, 3665, 3671, 3677, 3680, 3686, 3693, 3699, 3706, 3713, 3718, 3728, 3731, 3734, 3737, 3742, 3748, + 3751, 3757, 3760, 3763, 3766, 3773, 3779, 3785, 3788, 3794, 3796, 3802, 3809, 3812, 3818, 3825, 3832, 3842, 3845, + 3848, 3851, 3854, 3860, 3863, 3867, 3873, 3879, 3882, 3885, 3894, 3900, 3903, 3910, 3913, 3916, 3920, 3923, 3930, + 3933, 3939, 3944, 3951, 3954, 3957, 3963, 3969, 3974, 3979, 3985, 3988, 3990, 3995, 4000, 4006, 4013, 4018, 4021, + 4028, 4035, 4041, 4046, 4051, 4059, 4065, 4070, 4075, 4078, 4084, 4091, 4094, 4100, 4107, 4111, 4114, 4121, 4127, + 4130, 4136, 4142, 4145, 4150, 4156, 4159, 4166, 4172, 4177, 4183, 4189, 4192, 4195, 4198, 4201, 4208, 4212, 4218, + 4224, 4231, 4238, 4244, 4249, 4255, 4265, 4270, 4276, 4287, 4290, 4296, 4299, 4301, 4308, 4311, 4317, 4320, 4325, + 4330, 4337, 4343, 4346, 4352, 4357, 4362, 4370, 4376, 4379, 4385, 4391, 4396, 4403, 4412, 4418, 4428, 4431, 4437, + 4444, 4451, 4454, 4460, 4468, 4475, 4481, 4486, 4489, 4495, 4502, 4505, 4507, 4512, 4516, 4520, 4526, 4530, 4537, + 4543, 4549, 4557, 4563, 4566, 4570, 4574, 4580, 4585, 4590, 4597, 4601, 4605, 4613, 4616, 4624, 4628, 4633, 4638, + 4643, 4646, 4652, 4658, 4664, 4670, 4679, 4682, 4685, 4692, 4698, 4704, 4709, 4712, 4718, 4728, 4736, 4741, 4747, + 4750, 4758, 4761, 4767, 4775, 4781, 4784, 4787, 4794, 4801, 4807, 4812, 4818, 4825, 4828, 4831, 4837, 4840, 4846, + 4849, 4855, 4858, 4861, 4864, 4871, 4878, 4881, 4887, 4890, 4896, 4902, 4908, 4913, 4921, 4924, 4930, 4933, 4941, + 4949, 4954, 4959, 4964, 4970, 4973, 4976, 4979, 4986, 4989, 4995, 5002, 5005, 5008, 5014, 5019, 5023, 5027, 5034, + 5041, 5045, 5052, 5055, 5058, 5064, 5070, 5077, 5084, 5091, 5099, 5106, 5112, 5119, 5125, 5129, 5133, 5136, 5143, + 5148, 5156, 5163, 5170, 5176, 5179, 5184, 5193, 5195, 5200, 5203, 5209, 5217, 5223, 5228, 5231, 5239, 5249, 5255, + 5258, 5266, 5271, 5274, 5279, 5282, 5288, 5295, 5303, 5308, 5316, 5329, 5336, 5343, 5350, 5357, 5363, 5366, 5372, + 5377, 5384, 5389, 5392, 5398, 5404, 5410, 5416, 5422, 5425, 5431, 5438, 5441, 5444, 5447, 5450, 5456, 5462, 5465, + 5473, 5478, 5483, 5491, 5496, 5502, 5505, 5508, 5511, 5520, 5523, 5526, 5532, 5536, 5543, 5548, 5554, 5559, 5562, + 5568, 5571, 5577, 5584, 5587, 5593, 5596, 5602, 5608, 5614, 5620, 5623, 5629, 5635, 5642, 5649, 5656, 5659, 5662, + 5666, 5673, 5679, 5686, 5693, 5696, 5702, 5707, 5713, 5720, 5727, 5734, 5739, 5744, 5751, 5759, 5766, 5772, 5776, + 5780, 5786, 5793, 5795, 5802, 5809, 5814, 5821, 5824, 5831, 5837, 5843, 5846, 5853, 5860, 5866, 5872, 5878, 5881, + 5886, 5893, 5896, 5903, 5906, 5912, 5919, 5925, 5930, 5935, 5942, 5950, 5956, 5962, 5968, 5974, 5981, 5984, 5991, + 5998, 6005, 6011, 6014, 6019, 6024, 6029, 6036, 6043, 6050, 6052, 6057, 6062, 6067, 6073, 6078, 6084, 6087, 6100, + 6103, 6109, 6115, 6122, 6128, 6134, 6139, 6146, 6150, 6153, 6160, 6163, 6169, 6174, 6177, 6182, 6188, 6192, 6196, + 6203, 6206, 6213, 6220, 6227, 6234, 6243, 6249, 6252, 6259, 6266, 6272, 6275, 6278, 6285, 6289, 6296, 6303, 6307, + 6313, 6315, 6318, 6321, 6327, 6330, 6333, 6336, 6341, 6348, 6354, 6362, 6368, 6374, 6377, 6384, 6388, 6392, 6399, + 6406, 6409, 6413, 6419, 6425, 6432, 6440, 6443, 6446, 6449, 6454, 6457, 6464, 6467, 6472, 6475, 6481, 6484, 6491, + 6493, 6496, 6503, 6509, 6512, 6518, 6524, 6532, 6538, 6544, 6547, 6553, 6556, 6559, 6565, 6568, 6574, 6577, 6580, + 6587, 6590, 6593, 6599, 6602, 6609, 6616, 6623, 6629, 6637, 6645, 6648, 6653, 6658, 6663, 6670, 6673, 6679, 6682, + 6685, 6691, 6697, 6703, 6708, 6717, 6725, 6730, 6735, 6742, 6745, 6752, 6757, 6763, 6769, 6774, 6778, 6784, 6790, + 6796, 6802, 6810, 6816, 6821, 6824, 6831, 6835, 6838, 6844, 6847, 6850, 6855, 6862, 6865, 6872, 6875, 6881, 6884, + 6890, 6896, 6903, 6906, 6913, 6919, 6926, 6933, 6940, 6946, 6949, 6956, 6968, 6972, 6979, 6982, 6994, 7000, 7007, + 7011, 7017, 7023, 7028, 7036, 7044, 7047, 7054, 7058, 7061, 7068, 7076, 7084, 7090, 7097, 7103, 7110, 7115, 7121, + 7126, 7131, 7137, 7142, 7149, 7152, 7154, 7161, 7164, 7172, 7178, 7185, 7190, 7195, 7202, 7208, 7214, 7218, 7224, + 7229, 7236, 7243, 7249, 7254, 7258, 7261, 7266, 7278, 7286, 7291, 7293, 7295, 7302, 7308, 7311, 7318, 7321, 7327, + 7334, 7337, 7343, 7346, 7352, 7358, 7364, 7370, 7376, 7384, 7390, 7403, 7409, 7416, 7422, 7429, 7436, 7448, 7454, + 7456, 7461, 7467, 7477, 7489, 7492, 7499, 7505, 7512, 7517, 7522, 7528, 7533, 7538, 7544, 7547, 7551, 7555, 7561, + 7569, 7572, 7575, 7584, 7587, 7592, 7595, 7601, 7607, 7613, 7619, 7626, 7629, 7635, 7640, 7643, 7650, 7655, 7662, + 7668, 7673, 7681, 7686, 7691, 7703, 7711, 7719, 7724, 7733, 7736, 7743, 7750, 7752, 7758, 7759, 7762, 7768, 7774, + 7782, 7785, 7791, 7797, 7804, 7811, 7815, 7821, 7824, 7831, 7835, 7838, 7844, 7850, 7855, 7861, 7867, 7873, 7876, + 7882, 7888, 7897, 7903, 7909, 7915, 7918, 7924, 7930, 7936, 7941, 7949, 7954, 7960, 7965, 7970, 7976, 7981, 7985, + 7991, 8001, 8008, 8015, 8021, 8027, 8030, 8037, 8044, 8050, 8055, 8060, 8066, 8069, 8076, 8081, 8086, 8094, 8101, + 8109, 8113, 8116, 8119, 8121, 8128, 8135, 8142, 8148, 8155, 8161, 8167, 8171, 8181, 8189, 8198, 8201, 8209, 8214, + 8221, 8228, 8236, 8241, 8247, 8252, 8255, 8260, 8266, 8271, 8278, 8284, 8287, 8293, 8300, 8306, 8312, 8318, 8324, + 8330, 8333, 8336, 8339, 8345, 8348, 8354, 8358, 8365, 8372, 8375, 8378, 8385, 8388, 8391, 8394, 8397, 8400, 8406, + 8412, 8420, 8423, 8429, 8434, 8441, 8447, 8454, 8457, 8464, 8467, 8473, 8477, 8480, 8487, 8493, 8496, 8502, 8508, + 8514, 8520, 8524, 8527, 8534, 8542, 8550, 8555, 8563, 8568, 8573, 8578, 8584, 8589, 8592, 8599, 8606, 8609, 8612, + 8615, 8618, 8624, 8630, 8636, 8644, 8647, 8650, 8653, 8660, 8664, 8672, 8677, 8682, 8685, 8693, 8694, 8699, 8707, + 8712, 8717, 8720, 8727, 8731, 8738, 8744, 8750, 8755, 8760, 8765, 8770, 8778, 8785, 8788, 8795, 8802, 8805, 8815, + 8823, 8828, 8835, 8842, 8854, 8860, 8866, 8872, 8879, 8882, 8888, 8896, 8903, 8909, 8915, 8922, 8929, 8932, 8935, + 8941, 8944, 8951, 8955, 8962, 8969, 8976, 8985, 8988, 8994, 9001, 9004, 9013, 9019, 9022, 9029, 9031, 9035, 9040, + 9046, 9049, 9055, 9064, 9067, 9073, 9080, 9083, 9088, 9094, 9101, 9107, 9117, 9121, 9124, 9130, 9133, 9140, 9146, + 9152, 9158, 9161, 9167, 9170, 9176, 9184, 9190, 9196, 9202, 9210, 9215, 9218, 9225, 9232, 9239, 9247, 9251, 9255, + 9266, 9274, 9281, 9284, 9293, 9296, 9304, 9309, 9317, 9322, 9327, 9333, 9338, 9347, 9354, 9357, 9364, 9399, 9403, + 9410, 9416, 9422, 9425, 9432, 9434, 9439, 9444, 9449, 9455, 9459, 9463, 9468, 9473, 9481, 9487, 9490, 9496, 9499, + 9504, 9510, 9511, 9518, 9524, 9529, 9536, 9542, 9549, 9555, 9563, 9568, 9573, 9581, 9584, 9587, 9595, 9601, 9607, + 9610, 9710, 9716, 9719, 9725, 9731, 9734, 9740, 9747, 9750, 9756, 9760, 9764, 9771, 9778, 9783, 9791, 9796, 9802, + 9809, 9814, 9820, 9825, 9832, 9839, 9845, 9846, 11156}; static int entries[11156] = { - 0, 346, 711, 727, 1408, 1, 696, 785, 1694, 2, 353, 839, - 1056, 1408, 3, 666, 1786, 4, 565, 1375, 1376, 5, 699, 781, - 1694, 6, 22, 660, 1017, 1464, 7, 1044, 1045, 1694, 8, 133, - 388, 1170, 1748, 1749, 9, 180, 1409, 1809, 1810, 10, 1220, 1221, - 1501, 11, 335, 715, 1319, 1681, 1768, 12, 20, 1004, 1328, 1443, - 13, 1466, 1786, 14, 160, 940, 1170, 1776, 15, 243, 851, 1319, - 1681, 16, 236, 851, 1191, 1579, 17, 282, 1702, 1748, 1749, 18, - 285, 940, 1170, 1702, 19, 830, 1786, 12, 20, 204, 738, 1300, - 1328, 1443, 1599, 21, 864, 1786, 6, 22, 467, 1464, 1527, 1528, - 23, 293, 356, 554, 723, 924, 1745, 1786, 24, 233, 837, 858, - 1548, 25, 134, 927, 998, 1326, 26, 858, 1211, 1603, 27, 859, - 1651, 1652, 28, 1126, 1786, 29, 169, 1397, 1636, 1705, 30, 54, - 819, 822, 1581, 31, 60, 822, 837, 1548, 32, 583, 651, 1694, - 33, 74, 312, 722, 1336, 1768, 34, 1539, 1540, 1577, 35, 302, - 994, 1509, 1764, 36, 183, 1135, 1162, 1724, 1768, 37, 842, 1004, - 1041, 38, 367, 373, 566, 688, 810, 1745, 1786, 39, 1623, 1786, - 40, 967, 1261, 1262, 41, 1454, 1786, 42, 1467, 1786, 43, 97, - 1328, 1409, 1443, 44, 1409, 1664, 1665, 45, 59, 711, 727, 1157, - 46, 154, 660, 1147, 1464, 1768, 47, 211, 216, 860, 880, 1768, - 48, 1017, 1130, 1131, 49, 432, 444, 544, 567, 926, 1745, 1786, - 50, 1136, 1786, 51, 441, 1300, 1448, 1599, 52, 662, 1417, 1694, - 53, 327, 420, 1099, 1125, 1768, 30, 54, 60, 398, 819, 1581, - 1636, 1705, 55, 1091, 1157, 1728, 56, 1532, 1655, 1656, 57, 1058, - 1424, 1530, 58, 440, 1328, 1443, 1448, 45, 59, 259, 420, 711, - 727, 1504, 1505, 31, 54, 60, 819, 837, 1096, 1548, 1581, 61, - 198, 279, 808, 1700, 1768, 62, 1177, 1786, 63, 894, 1518, 1519, - 64, 139, 140, 933, 934, 981, 1552, 65, 139, 140, 933, 934, - 981, 1552, 66, 1047, 1048, 1694, 67, 614, 1019, 1397, 68, 990, - 1334, 1801, 69, 146, 147, 955, 958, 1179, 1180, 70, 970, 1786, - 71, 692, 1515, 1694, 72, 1227, 1786, 73, 312, 1191, 1375, 1579, - 33, 74, 722, 1336, 1375, 75, 76, 902, 1133, 1326, 75, 76, - 134, 902, 1133, 1768, 77, 1147, 1645, 1659, 78, 311, 1319, 1375, - 1681, 79, 152, 158, 1425, 1426, 1676, 1677, 80, 941, 1293, 1761, - 81, 558, 1786, 82, 164, 423, 1009, 1011, 1368, 1369, 83, 340, - 402, 586, 1051, 1686, 1745, 1786, 84, 340, 402, 586, 1051, 1686, - 1745, 1786, 85, 1326, 1402, 1403, 86, 170, 1397, 1773, 1774, 87, - 908, 1643, 1694, 88, 912, 1641, 1694, 89, 90, 1135, 1162, 1322, - 89, 90, 178, 1162, 1639, 1640, 91, 92, 860, 880, 1322, 91, - 92, 178, 316, 676, 860, 880, 1330, 93, 851, 1556, 1736, 94, - 180, 439, 1283, 1366, 1768, 95, 999, 1321, 1322, 96, 1297, 1786, - 43, 97, 179, 1300, 1443, 1599, 98, 190, 191, 1181, 1182, 1558, - 1560, 99, 994, 1718, 1719, 100, 365, 385, 519, 764, 1386, 1745, - 1786, 101, 1445, 1694, 1759, 102, 664, 1414, 1694, 103, 185, 186, - 1110, 1388, 1768, 104, 825, 1078, 1741, 105, 152, 158, 1425, 1426, - 1676, 1677, 106, 1413, 1786, 107, 389, 722, 1336, 1656, 108, 1007, - 1008, 1694, 109, 1014, 1015, 1694, 110, 1435, 1786, 111, 218, 219, - 919, 920, 1682, 1685, 112, 400, 414, 592, 1123, 1190, 1745, 1786, - 113, 400, 414, 592, 1123, 1190, 1745, 1786, 114, 1098, 1099, 1125, - 115, 1450, 1694, 1754, 116, 218, 219, 919, 920, 1682, 1685, 117, - 1312, 1786, 118, 1324, 1786, 119, 1569, 1786, 120, 241, 242, 1186, - 1187, 1781, 1785, 121, 1625, 1626, 1694, 122, 244, 246, 1201, 1202, - 1565, 1566, 123, 244, 246, 1201, 1202, 1565, 1566, 124, 726, 902, - 1133, 125, 1619, 1786, 126, 256, 257, 1223, 1224, 1722, 1723, 127, - 571, 1786, 128, 256, 257, 1223, 1224, 1722, 1723, 129, 1649, 1786, - 130, 463, 464, 711, 727, 1768, 131, 1392, 1724, 1725, 132, 1632, - 1633, 1694, 8, 133, 134, 165, 927, 998, 1748, 1749, 25, 76, - 133, 134, 902, 927, 998, 1133, 135, 297, 598, 905, 1355, 136, - 268, 269, 487, 488, 817, 818, 137, 268, 269, 487, 488, 817, - 818, 138, 886, 890, 1694, 64, 65, 139, 787, 934, 1648, 1745, - 1786, 64, 65, 140, 787, 934, 1648, 1745, 1786, 141, 1060, 1448, - 1449, 142, 1772, 1773, 1774, 143, 1730, 1786, 144, 300, 308, 557, - 578, 985, 1786, 145, 281, 455, 1114, 1301, 1302, 1654, 1745, 1766, - 69, 146, 434, 603, 795, 958, 1745, 1786, 69, 147, 434, 603, - 795, 958, 1745, 1786, 148, 1471, 1786, 149, 1638, 1639, 1640, 150, - 577, 1521, 1694, 151, 316, 676, 1322, 1330, 79, 105, 152, 1023, - 1317, 1426, 1745, 1786, 153, 836, 837, 1548, 46, 154, 660, 1464, - 1659, 155, 948, 950, 1694, 156, 949, 952, 1694, 157, 289, 907, - 1328, 1443, 1768, 79, 105, 158, 1023, 1317, 1426, 1745, 1786, 159, - 167, 168, 1527, 1528, 1768, 14, 160, 266, 848, 940, 1170, 1748, - 1749, 161, 307, 310, 509, 511, 1524, 1525, 162, 307, 310, 509, - 511, 1524, 1525, 163, 1231, 1477, 1610, 82, 164, 422, 527, 705, - 1011, 1745, 1786, 133, 165, 1326, 1748, 1749, 166, 167, 660, 1464, - 1577, 159, 166, 167, 660, 1464, 1527, 1528, 1539, 159, 168, 1527, - 1528, 1577, 29, 169, 170, 174, 1636, 1705, 1773, 1774, 86, 169, - 170, 1768, 1773, 1774, 171, 347, 1408, 1504, 1505, 172, 1526, 1527, - 1528, 173, 1503, 1504, 1505, 169, 174, 182, 323, 819, 1581, 1636, - 1705, 175, 1140, 1200, 1779, 176, 1454, 1786, 177, 1629, 1630, 1786, - 90, 92, 178, 317, 860, 880, 1639, 1640, 97, 179, 180, 192, - 1300, 1599, 1809, 1810, 9, 94, 179, 180, 1283, 1366, 1809, 1810, - 181, 185, 194, 660, 1220, 1464, 1527, 1528, 174, 182, 819, 1397, - 1581, 36, 183, 1135, 1162, 1392, 184, 185, 1501, 1527, 1528, 103, - 181, 184, 185, 1110, 1388, 1527, 1528, 103, 186, 1110, 1388, 1501, - 187, 1467, 1786, 188, 348, 349, 523, 525, 1798, 1799, 189, 348, - 349, 523, 525, 1798, 1799, 98, 190, 449, 605, 612, 1560, 1745, - 1786, 98, 191, 449, 605, 612, 1560, 1745, 1786, 179, 192, 1300, - 1409, 1599, 193, 778, 1020, 1488, 181, 194, 660, 1464, 1501, 195, - 416, 1135, 1162, 1334, 196, 417, 1334, 1639, 1640, 197, 666, 1786, - 61, 198, 808, 1017, 1700, 199, 927, 998, 1193, 200, 695, 1437, - 1508, 1673, 201, 334, 695, 957, 1610, 202, 1086, 1786, 203, 204, - 1004, 1300, 1599, 20, 203, 204, 1300, 1599, 1768, 205, 374, 940, - 1170, 1261, 206, 1103, 1786, 207, 598, 883, 1157, 1403, 208, 1318, - 1319, 1681, 209, 211, 223, 859, 1135, 1162, 1639, 1640, 210, 211, - 1639, 1640, 1651, 47, 209, 210, 211, 860, 880, 1639, 1640, 212, - 753, 1233, 1585, 1638, 213, 214, 753, 813, 1383, 213, 214, 813, - 1037, 1585, 1768, 215, 369, 371, 712, 713, 1572, 1573, 47, 216, - 860, 880, 1651, 217, 369, 371, 712, 713, 1572, 1573, 111, 116, - 218, 935, 1067, 1685, 1745, 1786, 111, 116, 219, 935, 1067, 1685, - 1745, 1786, 220, 221, 825, 1231, 1477, 220, 221, 292, 379, 645, - 689, 1231, 1477, 222, 396, 1191, 1579, 1656, 209, 223, 1135, 1162, - 1651, 224, 1623, 1786, 225, 380, 381, 1236, 1359, 1614, 1615, 226, - 380, 381, 1236, 1359, 1614, 1615, 227, 940, 1079, 1170, 228, 1466, - 1786, 229, 854, 1079, 1696, 1747, 230, 280, 726, 854, 1193, 231, - 901, 1300, 1599, 232, 1339, 1636, 1705, 24, 233, 333, 819, 837, - 1211, 1548, 1581, 234, 1194, 1786, 235, 517, 625, 883, 986, 16, - 236, 243, 1191, 1579, 1768, 237, 238, 986, 1098, 1503, 237, 238, - 585, 883, 1098, 1768, 239, 403, 404, 548, 549, 1769, 1793, 240, - 403, 404, 548, 549, 1769, 1793, 120, 241, 457, 507, 611, 1766, - 1785, 1786, 120, 242, 457, 507, 611, 1766, 1785, 1786, 15, 236, - 243, 628, 1191, 1319, 1579, 1681, 122, 123, 244, 775, 828, 1202, - 1745, 1786, 245, 1135, 1162, 1233, 122, 123, 246, 775, 828, 1202, - 1745, 1786, 247, 960, 1786, 248, 808, 1306, 1700, 249, 658, 836, - 1105, 1148, 250, 399, 1105, 1339, 1772, 251, 1177, 1786, 252, 968, - 1786, 253, 254, 1140, 1509, 1764, 253, 254, 594, 1509, 1764, 1768, - 255, 337, 342, 510, 755, 1264, 1745, 1786, 126, 128, 256, 515, - 622, 1224, 1745, 1786, 126, 128, 257, 515, 622, 1224, 1745, 1786, - 258, 259, 839, 1056, 1157, 59, 258, 259, 711, 727, 839, 260, - 979, 1260, 1281, 1318, 261, 262, 1020, 1260, 1751, 261, 262, 1020, - 1281, 1282, 1768, 263, 264, 1448, 1809, 1810, 263, 264, 441, 1768, - 1809, 1810, 265, 266, 1748, 1749, 1776, 160, 265, 266, 1748, 1749, - 1768, 267, 456, 837, 894, 1548, 136, 137, 268, 488, 682, 1081, - 1745, 1786, 136, 137, 269, 488, 682, 1081, 1745, 1786, 270, 418, - 424, 656, 1320, 1411, 1745, 1786, 271, 660, 1168, 1464, 272, 1168, - 1440, 1455, 1526, 273, 517, 711, 727, 274, 390, 778, 1488, 1656, - 275, 276, 1106, 1306, 1440, 275, 276, 752, 1306, 1455, 1768, 277, - 1297, 1786, 278, 279, 1017, 1110, 1388, 61, 278, 279, 467, 808, - 1110, 1388, 1700, 230, 280, 726, 1696, 1697, 1768, 145, 281, 454, - 539, 1302, 1744, 1745, 1786, 17, 282, 285, 331, 927, 998, 1748, - 1749, 283, 901, 1507, 1514, 1675, 284, 328, 1675, 1707, 1808, 18, - 282, 285, 940, 977, 1170, 1748, 1749, 286, 961, 1278, 1694, 287, - 1040, 1263, 1694, 288, 722, 1336, 1751, 157, 289, 1293, 1328, 1443, - 290, 960, 1786, 291, 292, 645, 689, 825, 221, 291, 292, 645, - 689, 1768, 23, 293, 294, 721, 723, 733, 872, 293, 294, 356, - 554, 723, 924, 1745, 1786, 295, 968, 1786, 296, 297, 598, 1355, - 135, 296, 297, 598, 868, 905, 298, 1413, 1786, 299, 1569, 1786, - 144, 300, 309, 576, 578, 736, 737, 1630, 1786, 301, 1435, 1786, - 35, 302, 319, 1231, 1357, 1477, 1509, 1764, 303, 305, 314, 503, - 504, 975, 1085, 304, 305, 314, 503, 504, 975, 1085, 303, 304, - 305, 504, 1146, 1185, 1745, 1786, 306, 1283, 1366, 1707, 161, 162, - 307, 511, 630, 1401, 1745, 1786, 144, 308, 309, 576, 578, 736, - 737, 1630, 1786, 300, 308, 309, 557, 578, 985, 1786, 161, 162, - 310, 511, 630, 1401, 1745, 1786, 78, 311, 312, 551, 1191, 1319, - 1579, 1681, 33, 73, 311, 312, 722, 1191, 1336, 1579, 313, 798, - 1406, 1492, 303, 304, 314, 504, 1146, 1185, 1745, 1786, 315, 693, - 1510, 1694, 92, 151, 316, 676, 1330, 1768, 178, 317, 1322, 1639, - 1640, 318, 319, 994, 1231, 1477, 302, 318, 319, 1231, 1477, 1768, - 320, 1340, 1346, 1694, 321, 710, 988, 1408, 322, 323, 837, 1397, - 1548, 174, 322, 323, 819, 837, 1581, 324, 558, 1786, 325, 676, - 813, 1330, 326, 1663, 1745, 53, 327, 1099, 1125, 1157, 284, 328, - 1513, 1514, 1707, 1768, 329, 1663, 1745, 1766, 330, 331, 927, 998, - 1702, 282, 330, 331, 927, 998, 1768, 332, 333, 819, 858, 1581, - 233, 332, 333, 819, 1581, 1768, 201, 334, 957, 1672, 1673, 1768, - 11, 335, 1003, 1319, 1681, 336, 830, 1786, 255, 337, 343, 754, - 755, 1092, 1093, 338, 1328, 1443, 1507, 339, 658, 819, 1581, 83, - 84, 340, 584, 1050, 1051, 1583, 341, 345, 350, 513, 514, 1667, - 1668, 255, 342, 343, 754, 755, 1092, 1093, 337, 342, 343, 510, - 755, 1264, 1745, 1786, 344, 345, 350, 513, 514, 1667, 1668, 341, - 344, 345, 514, 930, 976, 1745, 1786, 0, 346, 347, 353, 711, - 727, 1504, 1505, 171, 346, 347, 1504, 1505, 1768, 188, 189, 348, - 525, 545, 1119, 1745, 1786, 188, 189, 349, 525, 545, 1119, 1745, - 1786, 341, 344, 350, 514, 930, 976, 1745, 1786, 351, 645, 689, - 957, 352, 1619, 1630, 1786, 2, 346, 353, 710, 711, 727, 839, - 1056, 354, 1730, 1786, 355, 1649, 1786, 23, 294, 356, 721, 723, - 733, 872, 357, 864, 1786, 358, 1106, 1110, 1388, 359, 401, 405, - 505, 608, 650, 1745, 1786, 360, 1003, 1517, 1767, 361, 1136, 1786, - 362, 364, 370, 643, 646, 758, 1036, 363, 364, 370, 643, 646, - 758, 1036, 362, 363, 364, 646, 779, 1203, 1745, 1786, 100, 365, - 386, 763, 764, 1316, 1399, 366, 1508, 1509, 1764, 38, 367, 368, - 809, 810, 1258, 1259, 367, 368, 373, 566, 688, 810, 1745, 1786, - 215, 217, 369, 508, 877, 1573, 1745, 1786, 362, 363, 370, 646, - 779, 1203, 1745, 1786, 215, 217, 371, 508, 877, 1573, 1745, 1786, - 372, 379, 640, 714, 1509, 1764, 38, 368, 373, 809, 810, 1258, - 1259, 205, 374, 940, 967, 1170, 1768, 375, 442, 839, 1056, 1058, - 1768, 376, 1126, 1786, 377, 1086, 1786, 378, 379, 825, 1509, 1764, - 221, 372, 378, 379, 1231, 1477, 1509, 1764, 225, 226, 380, 644, - 989, 1615, 1745, 1786, 225, 226, 381, 644, 989, 1615, 1745, 1786, - 382, 625, 839, 1056, 383, 1533, 1541, 1694, 384, 1536, 1545, 1694, - 100, 385, 386, 763, 764, 1316, 1399, 365, 385, 386, 519, 764, - 1386, 1745, 1786, 387, 1103, 1786, 8, 388, 940, 1170, 1326, 107, - 389, 390, 396, 722, 778, 1336, 1488, 274, 389, 390, 778, 1488, - 1768, 391, 822, 1096, 1097, 392, 395, 1319, 1656, 1681, 393, 1471, - 1786, 394, 888, 892, 1694, 392, 395, 396, 1191, 1319, 1579, 222, - 389, 395, 396, 722, 1191, 1336, 1579, 397, 398, 822, 1636, 1705, - 54, 397, 398, 1636, 1705, 1768, 250, 399, 1148, 1721, 1768, 1772, - 112, 113, 400, 1077, 1189, 1190, 1343, 359, 401, 406, 547, 595, - 649, 650, 83, 84, 402, 584, 1050, 1051, 1583, 239, 240, 403, - 549, 1142, 1660, 1745, 1786, 239, 240, 404, 549, 1142, 1660, 1745, - 1786, 359, 405, 406, 547, 595, 649, 650, 401, 405, 406, 505, - 608, 650, 1745, 1786, 407, 1227, 1786, 408, 979, 1191, 1579, 409, - 1312, 1786, 410, 1600, 1602, 1694, 411, 1601, 1606, 1694, 412, 1590, - 1786, 413, 970, 1786, 112, 113, 414, 1077, 1189, 1190, 1343, 415, - 1590, 1786, 195, 416, 417, 990, 1135, 1162, 1639, 1640, 196, 416, - 417, 1639, 1640, 1768, 270, 418, 425, 655, 656, 1345, 1804, 419, - 420, 1157, 1504, 1505, 53, 59, 419, 420, 1099, 1125, 1504, 1505, - 421, 1324, 1786, 164, 422, 423, 1009, 1011, 1368, 1369, 82, 422, - 423, 527, 705, 1011, 1745, 1786, 270, 424, 425, 655, 656, 1345, - 1804, 418, 424, 425, 656, 1320, 1411, 1745, 1786, 426, 1194, 1786, - 427, 1747, 1748, 1749, 428, 1341, 1349, 1694, 429, 1808, 1809, 1810, - 430, 977, 978, 1702, 431, 848, 944, 1776, 49, 432, 445, 925, - 926, 1138, 1139, 433, 674, 1333, 1462, 146, 147, 434, 955, 958, - 1179, 1180, 435, 438, 448, 702, 703, 762, 789, 1630, 1786, 436, - 571, 1786, 437, 438, 448, 702, 703, 762, 789, 1630, 1786, 435, - 437, 438, 606, 703, 1216, 1786, 94, 439, 1283, 1366, 1409, 58, - 440, 441, 648, 1300, 1328, 1443, 1599, 51, 264, 440, 441, 1300, - 1599, 1809, 1810, 375, 442, 839, 1056, 1530, 443, 450, 461, 552, - 553, 1021, 1066, 49, 444, 445, 925, 926, 1138, 1139, 432, 444, - 445, 544, 567, 926, 1745, 1786, 446, 450, 461, 552, 553, 1021, - 1066, 447, 1629, 1786, 435, 437, 448, 606, 703, 1216, 1786, 190, - 191, 449, 1181, 1182, 1558, 1560, 443, 446, 450, 491, 553, 1115, - 1745, 1786, 451, 860, 880, 1383, 452, 1694, 1729, 1735, 453, 1694, - 1734, 1738, 281, 454, 455, 1114, 1301, 1302, 1654, 1745, 1766, 145, - 454, 455, 539, 1302, 1744, 1745, 1786, 267, 456, 837, 1518, 1548, - 1768, 241, 242, 457, 1186, 1187, 1781, 1785, 458, 460, 465, 559, - 560, 1214, 1215, 459, 460, 465, 559, 560, 1214, 1215, 458, 459, - 460, 560, 618, 1235, 1745, 1786, 443, 446, 461, 491, 553, 1115, - 1745, 1786, 462, 463, 839, 1056, 1462, 130, 462, 463, 674, 711, - 727, 839, 1056, 130, 464, 711, 727, 1462, 458, 459, 465, 560, - 618, 1235, 1745, 1786, 466, 467, 1017, 1527, 1528, 22, 279, 466, - 467, 1110, 1388, 1527, 1528, 468, 476, 469, 477, 1786, 1745, 1630, - 1768, 1766, 1458, 468, 469, 478, 1212, 1455, 1812, 479, 837, 1812, - 480, 860, 1812, 481, 1099, 1812, 482, 1454, 1812, 483, 750, 1812, - 484, 1467, 1812, 485, 1732, 1812, 486, 645, 1812, 487, 1786, 1812, - 488, 1786, 1812, 489, 585, 1059, 1148, 1812, 490, 1639, 1812, 491, - 1745, 1812, 492, 558, 1812, 493, 662, 1417, 1628, 1812, 494, 664, - 1414, 1628, 1812, 495, 727, 1812, 496, 640, 1437, 1812, 497, 1528, - 1812, 498, 579, 1594, 1812, 499, 531, 781, 785, 1540, 1576, 1812, - 500, 1505, 1812, 501, 538, 555, 820, 829, 928, 1102, 1812, 502, - 943, 1584, 1696, 1812, 503, 1786, 1812, 504, 1786, 1812, 505, 1745, - 1812, 506, 1533, 1541, 1628, 1812, 241, 242, 507, 611, 1766, 1786, - 1812, 369, 371, 508, 877, 1745, 1786, 1812, 509, 1786, 1812, 255, - 343, 510, 1264, 1745, 1786, 1812, 511, 1786, 1812, 512, 1787, 1812, - 513, 1786, 1812, 514, 1786, 1812, 256, 257, 515, 622, 1745, 1786, - 1812, 516, 524, 587, 712, 873, 1812, 273, 495, 517, 669, 711, - 727, 1812, 518, 814, 1673, 1812, 519, 1745, 1812, 520, 846, 991, - 1344, 1803, 1812, 521, 660, 1159, 1812, 522, 1194, 1812, 523, 1786, - 1812, 516, 524, 587, 873, 896, 1812, 525, 1786, 1812, 526, 774, - 780, 784, 1765, 1812, 527, 1745, 1812, 528, 960, 1812, 529, 1628, - 1729, 1735, 1812, 530, 1289, 1599, 1812, 531, 1106, 1812, 532, 968, - 1812, 533, 583, 651, 1628, 1812, 534, 1640, 1812, 535, 663, 1812, - 536, 1536, 1545, 1628, 1812, 537, 1191, 1812, 501, 538, 928, 1102, - 1799, 1812, 539, 1786, 1812, 540, 1277, 1812, 541, 1297, 1812, 542, - 543, 731, 883, 1812, 542, 543, 731, 884, 1812, 49, 445, 544, - 567, 1745, 1786, 1812, 348, 349, 545, 1119, 1745, 1786, 1812, 546, - 676, 1812, 401, 405, 547, 595, 649, 1786, 1812, 548, 1786, 1812, - 549, 1786, 1812, 550, 1076, 1342, 1354, 1438, 1794, 1795, 1812, 311, - 551, 565, 1319, 1375, 1568, 1681, 1721, 1768, 1812, 552, 1786, 1812, - 553, 1786, 1812, 554, 1745, 1812, 501, 555, 928, 1102, 1798, 1812, - 556, 1413, 1812, 144, 309, 557, 985, 1786, 1812, 81, 324, 492, - 558, 1372, 1786, 1812, 559, 1786, 1812, 560, 1786, 1812, 561, 1435, - 1812, 562, 1628, 1734, 1738, 1812, 563, 685, 686, 938, 1104, 1240, - 1605, 1812, 564, 927, 1812, 551, 565, 1376, 1721, 1768, 1812, 566, - 1745, 1812, 567, 1745, 1812, 568, 1496, 1588, 1812, 569, 886, 890, - 1628, 1812, 538, 555, 570, 769, 1102, 1812, 127, 436, 571, 572, - 984, 1786, 1812, 571, 572, 1812, 573, 640, 799, 1406, 1812, 574, - 631, 1114, 1378, 1812, 575, 1619, 1812, 576, 1630, 1812, 150, 577, - 635, 654, 1521, 1728, 1733, 1812, 578, 1786, 1812, 498, 579, 1594, - 1812, 580, 774, 1283, 1812, 581, 1748, 1812, 582, 594, 640, 1140, - 1812, 32, 533, 583, 635, 651, 1728, 1733, 1812, 340, 402, 584, - 1050, 1583, 1786, 1812, 585, 883, 1812, 586, 1745, 1812, 587, 873, - 1812, 584, 588, 589, 1586, 1587, 1812, 588, 589, 866, 1061, 1582, - 1586, 1587, 1812, 590, 591, 1812, 591, 830, 906, 974, 1084, 1812, - 592, 1745, 1812, 593, 1328, 1812, 254, 582, 594, 640, 1200, 1509, - 1764, 1812, 401, 405, 547, 595, 649, 1786, 1812, 596, 1581, 1812, - 597, 1693, 1812, 207, 598, 627, 883, 1157, 1812, 599, 845, 1812, - 600, 1621, 1812, 601, 689, 1812, 602, 640, 994, 1357, 1812, 603, - 1745, 1812, 604, 1111, 1360, 1669, 1788, 1812, 605, 1745, 1812, 438, - 448, 606, 1216, 1786, 1812, 607, 657, 845, 999, 1592, 1812, 359, - 406, 505, 608, 1745, 1786, 1812, 609, 675, 1585, 1696, 1812, 610, - 1283, 1812, 611, 1766, 1812, 190, 191, 605, 612, 1745, 1786, 1812, - 613, 1749, 1812, 489, 614, 776, 1019, 1532, 1728, 1768, 1812, 615, - 1471, 1812, 616, 1810, 1812, 617, 1183, 1812, 460, 465, 618, 1235, - 1745, 1786, 1812, 619, 778, 1616, 1812, 620, 1812, 621, 661, 1757, - 1812, 622, 1745, 1812, 623, 1812, 624, 625, 1812, 382, 625, 838, - 839, 1055, 1056, 1812, 626, 731, 1812, 598, 627, 883, 1403, 1812, - 243, 628, 851, 1319, 1556, 1568, 1681, 1721, 1768, 1812, 629, 692, - 1515, 1628, 1812, 630, 1745, 1812, 574, 631, 1378, 1653, 1812, 623, - 632, 671, 672, 788, 1367, 1595, 1812, 633, 1590, 1812, 634, 1327, - 1328, 1812, 577, 583, 635, 706, 1628, 1694, 1728, 1733, 1812, 635, - 636, 1812, 637, 1443, 1812, 638, 902, 1812, 639, 640, 1812, 620, - 631, 640, 1437, 1812, 641, 808, 1812, 642, 693, 1510, 1628, 1812, - 643, 1786, 1812, 644, 1745, 1812, 601, 645, 689, 957, 1024, 1812, - 646, 1786, 1812, 647, 1477, 1812, 440, 648, 752, 1060, 1172, 1328, - 1443, 1448, 1768, 1812, 649, 1786, 1812, 650, 1786, 1812, 651, 1521, - 1812, 652, 1629, 1812, 653, 1568, 1720, 1721, 1812, 577, 654, 1521, - 1628, 1812, 655, 1786, 1812, 656, 1786, 1812, 657, 719, 978, 1108, - 1812, 339, 596, 658, 748, 819, 1581, 1812, 659, 660, 1812, 521, - 660, 1159, 1168, 1463, 1464, 1812, 661, 962, 1812, 52, 493, 662, - 663, 1417, 1728, 1733, 1812, 662, 663, 664, 982, 1628, 1694, 1728, - 1733, 1812, 102, 494, 663, 664, 1414, 1728, 1733, 1812, 665, 1151, - 1479, 1669, 1788, 1812, 3, 197, 666, 667, 878, 1786, 1812, 666, - 667, 1812, 668, 1527, 1812, 669, 711, 1812, 670, 1098, 1812, 671, - 672, 1812, 672, 788, 960, 1594, 1595, 1812, 673, 942, 1358, 1384, - 1385, 1812, 463, 674, 839, 1056, 1333, 1768, 1812, 609, 675, 1585, - 1696, 1812, 676, 749, 813, 1330, 1481, 1812, 677, 679, 1812, 677, - 678, 679, 1035, 1160, 1204, 1405, 1812, 679, 1035, 1159, 1160, 1649, - 1812, 680, 1086, 1812, 588, 681, 802, 1582, 1587, 1812, 268, 269, - 682, 1081, 1745, 1786, 1812, 683, 1680, 1681, 1812, 684, 834, 999, - 1256, 1592, 1812, 685, 686, 1812, 686, 938, 959, 1240, 1312, 1812, - 687, 1103, 1812, 38, 368, 566, 688, 1745, 1786, 1812, 486, 645, - 689, 747, 957, 1812, 690, 1700, 1812, 691, 732, 1087, 1088, 1149, - 1290, 1444, 1812, 71, 629, 692, 849, 1515, 1728, 1733, 1812, 315, - 642, 693, 849, 1510, 1728, 1733, 1812, 694, 1786, 1812, 200, 201, - 695, 1508, 1610, 1768, 1812, 1, 696, 697, 785, 1183, 1532, 1535, - 1812, 696, 697, 785, 1628, 1812, 698, 888, 892, 1628, 1812, 5, - 699, 781, 823, 1183, 1532, 1535, 1812, 700, 722, 1812, 701, 1466, - 1812, 702, 1630, 1812, 703, 1786, 1812, 704, 1172, 1812, 164, 423, - 527, 705, 1745, 1786, 1812, 635, 706, 1628, 1694, 1745, 1812, 707, - 708, 760, 1065, 1370, 1812, 708, 1745, 1812, 709, 1279, 1439, 1561, - 1562, 1812, 353, 710, 839, 988, 1056, 1768, 1812, 495, 517, 711, - 727, 1553, 1812, 215, 217, 712, 713, 1572, 1786, 1812, 215, 217, - 712, 713, 1572, 1786, 1812, 372, 640, 714, 825, 1812, 11, 715, - 1003, 1319, 1568, 1681, 1721, 1767, 1768, 1812, 716, 1534, 1812, 717, - 1319, 1812, 718, 1526, 1812, 657, 719, 978, 1108, 1812, 720, 1091, - 1157, 1458, 1728, 1812, 721, 1786, 1812, 722, 861, 1335, 1336, 1751, - 1812, 723, 1786, 1812, 724, 1773, 1812, 725, 849, 1812, 124, 638, - 726, 902, 1052, 1133, 1812, 517, 669, 711, 727, 931, 1812, 728, - 729, 1669, 1787, 1788, 1812, 729, 1271, 1812, 730, 1068, 1070, 1071, - 1199, 1812, 731, 1812, 691, 732, 733, 1087, 1088, 1812, 293, 356, - 721, 733, 872, 1786, 1812, 734, 1135, 1812, 735, 862, 863, 1179, - 1292, 1812, 300, 308, 576, 736, 737, 1630, 1812, 300, 308, 576, - 736, 737, 1630, 1812, 20, 738, 752, 1004, 1041, 1172, 1328, 1443, - 1768, 1812, 739, 1757, 1812, 740, 1681, 1812, 741, 1638, 1812, 742, - 979, 1812, 743, 744, 1481, 1482, 1486, 1812, 744, 1745, 1812, 745, - 1288, 1514, 1812, 746, 1534, 1628, 1694, 1745, 1812, 747, 1075, 1459, - 1460, 1709, 1812, 748, 819, 1812, 749, 1330, 1812, 750, 1072, 1445, - 1450, 1532, 1535, 1628, 1694, 1812, 751, 800, 1381, 1390, 1396, 1812, - 752, 1455, 1812, 212, 213, 753, 1383, 1638, 1768, 1812, 754, 1786, - 1812, 755, 1786, 1812, 756, 1196, 1336, 1812, 757, 880, 1812, 362, - 363, 643, 758, 1036, 1786, 1812, 759, 766, 1812, 759, 760, 765, - 766, 1021, 1812, 761, 948, 950, 1628, 1812, 435, 437, 702, 762, - 789, 1630, 1812, 763, 1786, 1812, 764, 1786, 1812, 759, 760, 765, - 766, 1065, 1812, 571, 760, 766, 1065, 1370, 1812, 767, 1340, 1346, - 1628, 1812, 768, 1341, 1349, 1628, 1812, 538, 555, 570, 769, 770, - 1812, 770, 1745, 1812, 771, 1812, 772, 821, 1015, 1646, 1812, 771, - 773, 783, 1766, 1812, 526, 774, 780, 784, 1063, 1812, 244, 246, - 775, 828, 1745, 1786, 1812, 776, 804, 1129, 1281, 1812, 777, 778, - 1812, 619, 778, 1020, 1487, 1488, 1616, 1812, 364, 370, 779, 1203, - 1745, 1786, 1812, 780, 783, 1062, 1063, 1186, 1812, 499, 781, 782, - 1576, 1812, 499, 781, 782, 1576, 1812, 771, 773, 780, 783, 784, - 1062, 1063, 1812, 783, 784, 1062, 1063, 1187, 1812, 499, 785, 786, - 1540, 1812, 499, 785, 786, 1540, 1812, 787, 1745, 1812, 632, 671, - 672, 788, 789, 1812, 435, 437, 702, 762, 789, 1630, 1812, 790, - 1489, 1812, 791, 1439, 1562, 1790, 1805, 1812, 792, 900, 1754, 1756, - 1759, 1761, 1812, 793, 1699, 1700, 1812, 794, 1504, 1812, 146, 147, - 603, 795, 1745, 1786, 1812, 796, 999, 1144, 1270, 1592, 1812, 797, - 1745, 1812, 640, 798, 799, 1492, 1768, 1812, 573, 640, 798, 799, - 1768, 1812, 800, 1745, 1812, 574, 801, 946, 1653, 1812, 588, 681, - 802, 1038, 1582, 1812, 803, 846, 1812, 731, 804, 805, 883, 1148, - 1281, 1812, 804, 805, 807, 1148, 1281, 1812, 731, 806, 807, 883, - 1812, 805, 806, 807, 883, 1148, 1812, 690, 808, 904, 1306, 1700, - 1812, 809, 1786, 1812, 810, 1786, 1812, 811, 1663, 1812, 812, 813, - 1812, 325, 546, 676, 749, 813, 1330, 1812, 518, 814, 1673, 1812, - 735, 815, 816, 863, 876, 1812, 816, 862, 863, 1180, 1292, 1812, - 136, 137, 487, 817, 818, 1786, 1812, 136, 137, 487, 817, 818, - 1786, 1812, 596, 658, 819, 820, 1581, 1812, 501, 820, 829, 1745, - 1812, 772, 821, 822, 1015, 1646, 1812, 30, 31, 397, 821, 822, - 1548, 1581, 1636, 1768, 1812, 699, 781, 823, 1628, 1812, 824, 1628, - 1694, 1732, 1745, 1812, 220, 291, 378, 645, 714, 825, 1078, 1477, - 1764, 1768, 1812, 826, 1162, 1812, 827, 882, 1352, 1375, 1633, 1812, - 828, 1745, 1812, 829, 1812, 19, 336, 830, 831, 1176, 1786, 1812, - 830, 831, 1812, 832, 980, 1551, 1691, 1692, 1812, 833, 1745, 1812, - 834, 835, 944, 1742, 1812, 834, 835, 944, 1742, 1812, 153, 479, - 836, 837, 887, 1548, 1812, 836, 837, 887, 1061, 1548, 1812, 838, - 839, 1812, 625, 839, 1055, 1056, 1274, 1812, 840, 1005, 1048, 1611, - 1812, 841, 842, 1045, 1611, 1812, 37, 841, 842, 1004, 1041, 1045, - 1611, 1812, 843, 844, 906, 974, 1084, 1812, 844, 1745, 1812, 607, - 845, 999, 1294, 1592, 1812, 520, 846, 1344, 1471, 1803, 1812, 847, - 1495, 1742, 1775, 1812, 160, 848, 940, 944, 1170, 1768, 1812, 692, - 693, 849, 850, 1628, 1694, 1728, 1733, 1812, 849, 850, 1628, 1694, - 1745, 1812, 15, 16, 628, 851, 1191, 1568, 1681, 1739, 1768, 1812, - 852, 1508, 1812, 853, 1076, 1245, 1342, 1795, 1812, 229, 230, 854, - 1193, 1747, 1768, 1812, 718, 855, 1641, 1643, 1645, 1658, 1812, 856, - 857, 1812, 857, 1218, 1315, 1398, 1730, 1812, 24, 332, 858, 1548, - 1581, 1607, 1768, 1812, 209, 859, 1135, 1162, 1652, 1768, 1812, 757, - 860, 880, 1383, 1485, 1812, 861, 875, 1456, 1745, 1812, 862, 863, - 1812, 735, 815, 816, 863, 1136, 1812, 21, 357, 864, 865, 1365, - 1786, 1812, 864, 865, 1812, 866, 1812, 867, 1663, 1745, 1766, 1812, - 868, 1337, 1458, 1812, 869, 1413, 1786, 1812, 870, 1006, 1024, 1029, - 1032, 1812, 871, 879, 1585, 1812, 293, 356, 721, 733, 872, 1786, - 1812, 516, 873, 895, 896, 1227, 1812, 874, 875, 972, 973, 1181, - 1812, 861, 874, 875, 972, 973, 1456, 1617, 1812, 735, 815, 816, - 876, 954, 1812, 877, 1745, 1812, 666, 878, 1786, 1812, 871, 879, - 1585, 1812, 480, 860, 880, 895, 1383, 1812, 881, 1352, 1376, 1626, - 1812, 827, 882, 1352, 1633, 1812, 235, 238, 585, 625, 883, 1768, - 1812, 543, 883, 884, 1812, 885, 1381, 1774, 1812, 138, 569, 886, - 890, 1427, 1532, 1535, 1812, 887, 1548, 1812, 394, 698, 888, 892, - 1427, 1532, 1535, 1812, 889, 1265, 1421, 1812, 890, 891, 1121, 1221, - 1812, 890, 891, 1121, 1221, 1812, 892, 1120, 1121, 1500, 1812, 658, - 893, 1812, 267, 894, 1416, 1548, 1768, 1812, 516, 873, 895, 896, - 897, 1812, 524, 587, 713, 873, 896, 1812, 516, 895, 896, 897, - 1571, 1812, 898, 1188, 1625, 1628, 1632, 1694, 1728, 1733, 1812, 898, - 899, 1812, 900, 901, 1812, 231, 901, 1299, 1300, 1598, 1599, 1812, - 726, 902, 903, 1052, 1133, 1812, 903, 942, 1022, 1358, 1385, 1812, - 904, 915, 1197, 1745, 1812, 905, 906, 1812, 591, 843, 906, 974, - 1084, 1812, 157, 752, 907, 941, 1172, 1293, 1328, 1443, 1768, 1812, - 87, 908, 909, 1483, 1532, 1535, 1643, 1812, 908, 909, 1628, 1643, - 1812, 910, 911, 1431, 1433, 1478, 1812, 911, 1786, 1812, 88, 912, - 1027, 1483, 1532, 1535, 1641, 1812, 913, 1412, 1812, 818, 913, 914, - 915, 1412, 1812, 904, 913, 914, 915, 916, 1197, 1412, 1812, 817, - 913, 915, 916, 1412, 1812, 917, 1126, 1812, 918, 1745, 1812, 111, - 116, 919, 920, 1682, 1786, 1812, 111, 116, 919, 920, 1682, 1786, - 1812, 921, 922, 1250, 1439, 1562, 1812, 922, 1267, 1268, 1391, 1812, - 825, 923, 1078, 1458, 1741, 1812, 23, 294, 554, 924, 1745, 1786, - 1812, 925, 1786, 1812, 926, 1786, 1812, 673, 927, 997, 998, 1193, - 1812, 928, 1102, 1812, 929, 931, 1215, 1473, 1474, 1812, 930, 1745, - 1812, 929, 931, 1116, 1473, 1474, 1812, 932, 1745, 1812, 933, 1786, - 1812, 934, 1786, 1812, 218, 219, 935, 1067, 1745, 1786, 1812, 742, - 936, 1510, 1512, 1515, 1517, 1812, 937, 1809, 1812, 563, 685, 686, - 938, 1676, 1812, 939, 940, 1812, 940, 1068, 1079, 1169, 1170, 1812, - 752, 907, 941, 1761, 1768, 1812, 673, 942, 1236, 1384, 1385, 1812, - 502, 943, 1584, 1696, 1812, 431, 834, 835, 848, 944, 1742, 1776, - 1812, 945, 1747, 1812, 574, 801, 946, 1296, 1653, 1812, 947, 1774, - 1812, 155, 761, 948, 950, 1532, 1535, 1693, 1812, 156, 949, 952, - 1154, 1532, 1535, 1693, 1812, 950, 951, 1442, 1449, 1812, 950, 951, - 1442, 1449, 1812, 952, 953, 1441, 1442, 1812, 952, 953, 1441, 1442, - 1448, 1812, 954, 1745, 1812, 955, 1786, 1812, 956, 957, 1812, 351, - 486, 601, 645, 689, 957, 1812, 958, 1786, 1812, 686, 938, 959, - 1033, 1240, 1812, 247, 290, 528, 960, 1746, 1786, 1812, 961, 1040, - 1812, 962, 1436, 1812, 963, 1427, 1628, 1694, 1745, 1812, 964, 1008, - 1097, 1646, 1812, 965, 1227, 1786, 1812, 966, 970, 1786, 1812, 374, - 940, 967, 1170, 1262, 1768, 1812, 252, 295, 532, 968, 969, 1786, - 1812, 968, 969, 1786, 1812, 70, 413, 966, 970, 971, 1786, 1812, - 970, 971, 1812, 972, 973, 1812, 874, 968, 973, 1616, 1617, 1812, - 590, 591, 974, 975, 1073, 1812, 303, 304, 503, 975, 1085, 1786, - 1812, 345, 350, 930, 976, 1745, 1786, 1812, 285, 940, 977, 978, - 1170, 1768, 1812, 430, 657, 719, 977, 978, 1108, 1702, 1812, 408, - 537, 979, 1191, 1578, 1579, 1812, 832, 980, 981, 1691, 1692, 1812, - 64, 65, 933, 981, 1552, 1786, 1812, 663, 982, 1628, 1694, 1745, - 1812, 983, 1569, 1786, 1812, 571, 984, 1786, 1812, 985, 1786, 1812, - 235, 237, 517, 986, 1503, 1768, 1812, 987, 988, 1026, 1128, 1812, - 321, 710, 987, 988, 1026, 1128, 1408, 1812, 380, 381, 644, 989, - 1745, 1786, 1812, 416, 990, 1135, 1162, 1768, 1801, 1812, 520, 991, - 992, 1344, 1803, 1812, 992, 1745, 1812, 993, 1025, 1026, 1407, 1812, - 35, 318, 602, 994, 1285, 1477, 1764, 1768, 1812, 995, 1812, 995, - 996, 1016, 1745, 1812, 997, 998, 1812, 564, 927, 998, 1193, 1363, - 1812, 502, 871, 999, 1321, 1403, 1741, 1768, 1812, 999, 1000, 1591, - 1592, 1621, 1812, 1001, 1040, 1263, 1628, 1812, 726, 1002, 1812, 335, - 715, 1003, 1512, 1568, 1681, 1768, 1812, 12, 203, 738, 1004, 1005, - 1172, 1328, 1599, 1768, 1812, 840, 1004, 1005, 1048, 1611, 1812, 1006, - 1745, 1812, 108, 1007, 1008, 1010, 1683, 1728, 1733, 1812, 964, 1008, - 1097, 1646, 1812, 1009, 1786, 1812, 1007, 1008, 1010, 1628, 1812, 1011, - 1786, 1812, 920, 1012, 1016, 1713, 1714, 1812, 893, 1013, 1414, 1416, - 1417, 1519, 1812, 109, 1014, 1015, 1143, 1683, 1728, 1733, 1812, 772, - 821, 1015, 1646, 1812, 995, 996, 1012, 1016, 1310, 1713, 1714, 1812, - 6, 198, 278, 466, 660, 1017, 1110, 1130, 1172, 1528, 1700, 1768, - 1812, 1018, 1094, 1141, 1609, 1777, 1779, 1812, 67, 614, 1019, 1118, - 1397, 1458, 1812, 193, 777, 778, 1020, 1487, 1488, 1812, 443, 446, - 552, 1021, 1066, 1786, 1812, 903, 942, 1022, 1358, 1613, 1812, 1023, - 1745, 1812, 870, 1024, 1029, 1031, 1032, 1812, 993, 1025, 1026, 1407, - 1812, 670, 988, 993, 1026, 1128, 1407, 1812, 912, 1027, 1628, 1641, - 1812, 1028, 1031, 1812, 1028, 1029, 1030, 1031, 1369, 1812, 1028, 1029, - 1030, 1031, 1032, 1812, 1024, 1029, 1031, 1032, 1629, 1812, 1028, 1030, - 1031, 1032, 1368, 1812, 938, 959, 1033, 1240, 1410, 1812, 1034, 1355, - 1812, 677, 678, 679, 1035, 1036, 1812, 362, 363, 643, 758, 1036, - 1786, 1812, 1037, 1585, 1812, 1038, 1745, 1812, 1039, 1503, 1812, 1040, - 1167, 1812, 738, 752, 842, 1041, 1768, 1812, 1042, 1043, 1248, 1439, - 1562, 1812, 1043, 1550, 1812, 7, 1044, 1045, 1046, 1053, 1532, 1535, - 1812, 841, 842, 1045, 1611, 1812, 1044, 1045, 1046, 1628, 1812, 66, - 1047, 1048, 1053, 1447, 1532, 1535, 1812, 840, 1005, 1048, 1611, 1812, - 1049, 1452, 1628, 1694, 1745, 1812, 1050, 1786, 1812, 1051, 1786, 1812, - 1052, 1133, 1812, 1044, 1047, 1053, 1532, 1535, 1557, 1628, 1694, 1812, - 1053, 1054, 1812, 1055, 1056, 1812, 520, 625, 838, 839, 1056, 1812, - 1057, 1191, 1690, 1812, 375, 839, 1056, 1058, 1424, 1768, 1812, 489, - 585, 1059, 1148, 1812, 648, 752, 1060, 1449, 1768, 1812, 589, 866, - 1061, 1745, 1812, 1062, 1063, 1812, 774, 780, 784, 970, 1063, 1812, - 517, 1064, 1812, 759, 765, 766, 1065, 1066, 1812, 443, 446, 552, - 1021, 1066, 1786, 1812, 1067, 1745, 1812, 730, 1068, 1070, 1071, 1465, - 1812, 1069, 1465, 1812, 1069, 1070, 1331, 1465, 1565, 1812, 1069, 1071, - 1331, 1465, 1566, 1812, 750, 1072, 1628, 1694, 1745, 1812, 590, 591, - 974, 1073, 1084, 1812, 1074, 1745, 1812, 747, 1075, 1459, 1460, 1667, - 1812, 550, 1076, 1077, 1794, 1795, 1812, 400, 414, 1077, 1189, 1343, - 1786, 1812, 104, 825, 923, 1078, 1458, 1741, 1812, 227, 939, 940, - 1079, 1169, 1170, 1812, 1080, 1111, 1812, 1081, 1745, 1812, 1082, 1086, - 1786, 1812, 1064, 1083, 1151, 1225, 1424, 1529, 1812, 590, 591, 1073, - 1084, 1085, 1812, 303, 304, 503, 975, 1085, 1786, 1812, 202, 377, - 680, 1082, 1086, 1786, 1812, 1087, 1088, 1812, 732, 1088, 1289, 1290, - 1454, 1812, 1089, 1268, 1476, 1725, 1812, 626, 731, 1090, 1091, 1728, - 1812, 55, 720, 1091, 1157, 1458, 1728, 1812, 337, 342, 754, 1092, - 1093, 1786, 1812, 337, 342, 754, 1092, 1093, 1786, 1812, 1018, 1094, - 1095, 1141, 1812, 1018, 1094, 1095, 1141, 1812, 60, 837, 1096, 1097, - 1548, 1768, 1812, 391, 822, 964, 1008, 1096, 1097, 1646, 1812, 114, - 481, 1098, 1099, 1124, 1125, 1812, 1073, 1098, 1099, 1124, 1125, 1812, - 1100, 1110, 1762, 1812, 1101, 1777, 1796, 1812, 538, 555, 570, 1102, - 1103, 1812, 206, 387, 687, 1103, 1703, 1786, 1812, 1104, 1812, 249, - 250, 658, 1105, 1339, 1768, 1812, 358, 1106, 1109, 1110, 1387, 1388, - 1812, 1107, 1108, 1295, 1701, 1812, 657, 978, 1002, 1108, 1295, 1701, - 1812, 1109, 1110, 1812, 1100, 1106, 1110, 1387, 1388, 1762, 1812, 604, - 1111, 1174, 1669, 1788, 1812, 914, 916, 1112, 1113, 1327, 1812, 1113, - 1745, 1812, 145, 454, 1114, 1301, 1654, 1766, 1812, 450, 461, 491, - 1115, 1745, 1786, 1812, 931, 1116, 1214, 1473, 1474, 1812, 918, 929, - 1116, 1117, 1394, 1812, 614, 1019, 1118, 1397, 1458, 1812, 1119, 1745, - 1812, 892, 1120, 1121, 1500, 1812, 890, 892, 1121, 1221, 1305, 1500, - 1812, 1122, 1483, 1812, 112, 113, 592, 1123, 1745, 1786, 1812, 1124, - 1125, 1812, 481, 815, 1098, 1099, 1125, 1812, 28, 376, 917, 1126, - 1520, 1786, 1812, 1127, 1128, 1489, 1669, 1788, 1812, 987, 988, 1026, - 1128, 1812, 776, 804, 1129, 1281, 1812, 48, 1017, 1130, 1131, 1132, - 1458, 1812, 478, 745, 1130, 1131, 1532, 1665, 1768, 1812, 1017, 1130, - 1131, 1132, 1458, 1812, 638, 726, 902, 1133, 1134, 1812, 803, 846, - 1134, 1344, 1803, 1812, 826, 1135, 1162, 1233, 1708, 1812, 50, 361, - 1136, 1137, 1674, 1786, 1812, 1136, 1137, 1812, 432, 444, 925, 1138, - 1139, 1786, 1812, 432, 444, 925, 1138, 1139, 1786, 1812, 253, 582, - 1140, 1141, 1764, 1768, 1812, 1018, 1094, 1095, 1140, 1141, 1812, 1142, - 1745, 1812, 1014, 1015, 1143, 1628, 1812, 1144, 1145, 1165, 1262, 1812, - 1144, 1145, 1165, 1262, 1812, 305, 314, 1146, 1185, 1745, 1786, 1812, - 46, 660, 1147, 1464, 1645, 1768, 1812, 249, 399, 836, 1148, 1721, - 1768, 1812, 1149, 1812, 1083, 1150, 1225, 1424, 1812, 1083, 1151, 1152, - 1529, 1812, 1083, 1151, 1152, 1529, 1812, 1153, 1164, 1165, 1261, 1771, - 1812, 949, 952, 1154, 1628, 1812, 1155, 1156, 1239, 1243, 1699, 1812, - 1156, 1745, 1812, 45, 258, 327, 419, 711, 1056, 1091, 1125, 1157, - 1158, 1505, 1768, 1812, 1157, 1158, 1812, 679, 1035, 1159, 1160, 1308, - 1812, 677, 678, 679, 758, 1160, 1812, 1161, 1338, 1602, 1603, 1606, - 1607, 1812, 734, 1135, 1162, 1163, 1233, 1812, 856, 857, 1163, 1315, - 1398, 1812, 1153, 1164, 1165, 1771, 1812, 945, 1144, 1153, 1165, 1262, - 1771, 1812, 1166, 1312, 1786, 1812, 1167, 1168, 1812, 271, 659, 660, - 1168, 1463, 1464, 1812, 1169, 1170, 1812, 765, 939, 940, 1079, 1170, - 1812, 752, 1171, 1172, 1173, 1812, 1017, 1172, 1812, 752, 1171, 1172, - 1173, 1812, 1111, 1174, 1332, 1669, 1788, 1812, 1175, 1590, 1786, 1812, - 830, 1176, 1786, 1812, 62, 251, 1177, 1178, 1726, 1786, 1812, 1177, - 1178, 1812, 69, 434, 955, 1179, 1180, 1786, 1812, 69, 434, 955, - 1179, 1180, 1786, 1812, 98, 449, 1181, 1182, 1558, 1786, 1812, 98, - 449, 1181, 1182, 1558, 1786, 1812, 696, 699, 1183, 1184, 1532, 1535, - 1628, 1694, 1812, 1183, 1184, 1628, 1694, 1745, 1812, 1185, 1745, 1812, - 120, 457, 1186, 1187, 1781, 1786, 1812, 120, 457, 1186, 1187, 1781, - 1786, 1812, 898, 1188, 1628, 1694, 1745, 1812, 1189, 1786, 1812, 1190, - 1786, 1812, 979, 1057, 1191, 1578, 1579, 1690, 1812, 1192, 1324, 1786, - 1812, 199, 564, 927, 997, 998, 1193, 1812, 234, 426, 522, 1194, - 1195, 1786, 1812, 1194, 1195, 1786, 1812, 1196, 1206, 1208, 1419, 1671, - 1812, 1197, 1812, 1198, 1683, 1812, 1199, 1745, 1812, 594, 640, 1200, - 1768, 1779, 1812, 1201, 1786, 1812, 1202, 1786, 1812, 1203, 1745, 1812, - 678, 1204, 1405, 1745, 1812, 812, 1205, 1279, 1286, 1371, 1652, 1812, - 1093, 1206, 1207, 1670, 1671, 1812, 1206, 1207, 1208, 1523, 1580, 1670, - 1671, 1812, 1092, 1207, 1208, 1670, 1671, 1812, 1209, 1210, 1762, 1784, - 1792, 1812, 1210, 1745, 1812, 233, 837, 1211, 1548, 1603, 1768, 1812, - 478, 1212, 1455, 1812, 1213, 1466, 1786, 1812, 458, 459, 559, 1214, - 1215, 1786, 1812, 458, 459, 559, 1214, 1215, 1786, 1812, 1216, 1786, - 1812, 570, 1217, 1636, 1812, 857, 1218, 1219, 1315, 1398, 1812, 797, - 1218, 1219, 1315, 1398, 1812, 181, 660, 1220, 1221, 1464, 1768, 1812, - 10, 890, 891, 1121, 1220, 1221, 1501, 1812, 1222, 1745, 1812, 1223, - 1786, 1812, 1224, 1786, 1812, 1083, 1150, 1225, 1424, 1812, 1226, 1366, - 1812, 72, 407, 965, 1227, 1228, 1786, 1812, 1227, 1228, 1812, 1229, - 1234, 1241, 1745, 1812, 1230, 1231, 1812, 647, 1030, 1231, 1477, 1610, - 1812, 1232, 1233, 1812, 245, 734, 826, 1135, 1162, 1233, 1812, 1234, - 1812, 1235, 1745, 1812, 225, 226, 1236, 1359, 1614, 1786, 1812, 1237, - 1277, 1628, 1694, 1745, 1812, 1238, 1242, 1812, 1139, 1238, 1239, 1241, - 1242, 1812, 563, 685, 686, 1240, 1677, 1812, 1229, 1234, 1238, 1239, - 1241, 1242, 1243, 1812, 1086, 1239, 1242, 1243, 1699, 1812, 1138, 1238, - 1241, 1242, 1243, 1812, 1244, 1796, 1812, 853, 1076, 1245, 1246, 1342, - 1812, 1246, 1745, 1812, 1247, 1421, 1812, 1042, 1248, 1439, 1549, 1562, - 1812, 1248, 1249, 1812, 921, 1250, 1439, 1475, 1562, 1812, 1250, 1251, - 1812, 1252, 1591, 1812, 1253, 1254, 1346, 1347, 1349, 1350, 1812, 1254, - 1318, 1812, 1255, 1270, 1812, 684, 999, 1256, 1494, 1592, 1812, 1256, - 1257, 1812, 367, 373, 809, 1258, 1259, 1786, 1812, 367, 373, 809, - 1258, 1259, 1786, 1812, 260, 261, 979, 1260, 1751, 1768, 1812, 205, - 940, 1153, 1261, 1768, 1812, 40, 967, 1144, 1145, 1165, 1261, 1262, - 1812, 287, 1001, 1040, 1263, 1277, 1532, 1535, 1812, 1264, 1745, 1812, - 1265, 1266, 1304, 1719, 1812, 1265, 1266, 1304, 1719, 1812, 922, 1267, - 1268, 1391, 1812, 741, 922, 1268, 1391, 1476, 1725, 1812, 1269, 1561, - 1812, 796, 999, 1270, 1592, 1770, 1812, 624, 1271, 1812, 1272, 1275, - 1812, 1272, 1273, 1274, 1275, 1723, 1812, 1272, 1273, 1274, 1275, 1554, - 1812, 1194, 1273, 1275, 1553, 1554, 1812, 1205, 1276, 1371, 1652, 1812, - 1237, 1263, 1277, 1278, 1532, 1535, 1628, 1694, 1812, 286, 961, 1277, - 1278, 1287, 1532, 1535, 1812, 1205, 1279, 1280, 1286, 1812, 1205, 1279, - 1280, 1286, 1812, 260, 262, 1281, 1282, 1318, 1768, 1812, 1281, 1282, - 1812, 580, 774, 1226, 1283, 1366, 1707, 1812, 1284, 1285, 1303, 1304, - 1812, 994, 1284, 1285, 1303, 1304, 1812, 1205, 1279, 1280, 1286, 1651, - 1812, 961, 1278, 1287, 1628, 1812, 745, 1288, 1514, 1812, 732, 1088, - 1289, 1290, 1291, 1812, 691, 872, 1087, 1088, 1290, 1812, 732, 1074, - 1289, 1290, 1291, 1812, 735, 816, 862, 863, 1292, 1812, 289, 907, - 1172, 1293, 1328, 1756, 1768, 1812, 845, 999, 1294, 1295, 1592, 1812, - 1107, 1108, 1295, 1701, 1812, 1296, 1786, 1812, 96, 277, 541, 1297, - 1298, 1786, 1812, 1297, 1298, 1786, 1812, 1299, 1300, 1812, 901, 1300, - 1598, 1599, 1605, 1812, 1301, 1766, 1812, 1302, 1745, 1812, 1284, 1285, - 1303, 1304, 1812, 956, 1265, 1284, 1285, 1304, 1719, 1812, 1305, 1306, - 1812, 248, 641, 690, 808, 1306, 1700, 1812, 1307, 1509, 1812, 1035, - 1159, 1160, 1308, 1309, 1812, 1309, 1745, 1812, 919, 1016, 1310, 1713, - 1714, 1812, 1012, 1310, 1311, 1661, 1680, 1812, 117, 409, 1166, 1312, - 1313, 1786, 1812, 1312, 1313, 1812, 1314, 1382, 1800, 1801, 1805, 1806, - 1812, 856, 857, 1163, 1315, 1316, 1812, 365, 385, 763, 1316, 1399, - 1786, 1812, 152, 158, 1023, 1317, 1745, 1786, 1812, 208, 717, 740, - 1318, 1319, 1681, 1812, 740, 1318, 1319, 1681, 1689, 1812, 1320, 1745, - 1812, 95, 999, 1321, 1322, 1457, 1458, 1812, 89, 91, 151, 317, - 676, 880, 1135, 1321, 1322, 1640, 1768, 1812, 1323, 1730, 1786, 1812, - 118, 421, 1192, 1324, 1325, 1786, 1812, 1324, 1325, 1812, 25, 75, - 165, 388, 902, 940, 998, 1326, 1402, 1749, 1768, 1812, 914, 916, - 1112, 1327, 1412, 1812, 634, 637, 1327, 1328, 1443, 1507, 1812, 1329, - 1435, 1786, 1812, 546, 676, 813, 1330, 1331, 1812, 1069, 1070, 1071, - 1331, 1465, 1812, 1332, 1333, 1537, 1538, 1812, 433, 674, 1332, 1333, - 1462, 1537, 1538, 1812, 195, 196, 1135, 1334, 1640, 1768, 1806, 1812, - 1335, 1336, 1812, 700, 722, 756, 1196, 1336, 1751, 1812, 868, 1337, - 1458, 1812, 1338, 1339, 1812, 232, 1339, 1635, 1636, 1704, 1705, 1812, - 320, 767, 1340, 1346, 1452, 1728, 1733, 1812, 428, 768, 1341, 1349, - 1452, 1728, 1733, 1812, 550, 1342, 1343, 1794, 1795, 1812, 400, 414, - 1077, 1189, 1343, 1786, 1812, 803, 846, 1134, 1344, 1345, 1812, 418, - 424, 655, 1345, 1786, 1804, 1812, 1253, 1346, 1347, 1348, 1812, 653, - 1253, 1346, 1347, 1348, 1812, 1253, 1346, 1347, 1348, 1812, 1253, 1349, - 1350, 1351, 1812, 1253, 1349, 1350, 1351, 1567, 1812, 1253, 1349, 1350, - 1351, 1812, 827, 1352, 1353, 1376, 1626, 1633, 1812, 1020, 1353, 1812, - 1354, 1812, 135, 296, 598, 1355, 1356, 1768, 1812, 1355, 1356, 1812, - 302, 602, 640, 1357, 1509, 1718, 1764, 1812, 673, 1358, 1359, 1384, - 1385, 1812, 225, 226, 1236, 1359, 1614, 1786, 1812, 1360, 1361, 1461, - 1538, 1812, 1360, 1361, 1461, 1538, 1812, 1362, 1506, 1541, 1542, 1545, - 1546, 1812, 980, 1363, 1364, 1551, 1692, 1812, 932, 980, 1363, 1364, - 1551, 1812, 864, 1365, 1786, 1812, 610, 1283, 1366, 1367, 1707, 1812, - 623, 632, 1367, 1786, 1812, 82, 422, 1009, 1368, 1369, 1786, 1812, - 82, 422, 1009, 1368, 1369, 1786, 1812, 707, 760, 766, 1065, 1370, - 1812, 1205, 1276, 1371, 1652, 1812, 558, 1372, 1786, 1812, 1373, 1623, - 1812, 1374, 1600, 1602, 1628, 1812, 73, 74, 78, 551, 827, 1191, - 1336, 1375, 1568, 1681, 1768, 1812, 4, 565, 881, 1352, 1375, 1376, - 1626, 1812, 1377, 1619, 1630, 1786, 1812, 1378, 1812, 1379, 1812, 852, - 1380, 1491, 1492, 1496, 1497, 1812, 751, 1381, 1390, 1396, 1434, 1812, - 1382, 1383, 1812, 451, 480, 757, 860, 880, 1383, 1812, 1384, 1385, - 1812, 903, 942, 1358, 1385, 1623, 1812, 100, 386, 519, 1386, 1745, - 1786, 1812, 1387, 1388, 1812, 1106, 1109, 1110, 1229, 1388, 1812, 1389, - 1434, 1812, 1259, 1389, 1390, 1395, 1434, 1812, 922, 1267, 1268, 1391, - 1392, 1812, 183, 1135, 1391, 1392, 1768, 1812, 1393, 1628, 1678, 1694, - 1745, 1812, 929, 1116, 1117, 1394, 1474, 1812, 1379, 1389, 1390, 1395, - 1396, 1434, 1698, 1812, 1258, 1389, 1395, 1396, 1434, 1812, 29, 86, - 182, 322, 1019, 1397, 1548, 1568, 1581, 1636, 1768, 1774, 1812, 856, - 857, 1163, 1398, 1399, 1812, 365, 385, 763, 1316, 1399, 1786, 1812, - 1225, 1400, 1479, 1669, 1788, 1812, 307, 310, 630, 1401, 1745, 1786, - 1812, 85, 1326, 1402, 1403, 1404, 1458, 1812, 502, 542, 609, 804, - 999, 1402, 1403, 1532, 1728, 1741, 1768, 1812, 1326, 1402, 1403, 1404, - 1458, 1812, 1405, 1812, 573, 1406, 1497, 1768, 1812, 993, 1025, 1026, - 1407, 1408, 1812, 0, 2, 171, 711, 1056, 1407, 1408, 1505, 1768, - 1812, 9, 43, 192, 439, 1283, 1328, 1409, 1599, 1664, 1768, 1809, - 1812, 1410, 1745, 1812, 270, 425, 1320, 1411, 1745, 1786, 1812, 914, - 916, 1327, 1412, 1413, 1812, 106, 298, 556, 869, 1413, 1786, 1812, - 1013, 1414, 1415, 1416, 1812, 1013, 1414, 1415, 1416, 1812, 894, 1013, - 1414, 1415, 1416, 1812, 1013, 1417, 1418, 1519, 1812, 1013, 1417, 1418, - 1519, 1812, 1196, 1206, 1208, 1419, 1420, 1812, 1420, 1745, 1812, 889, - 1421, 1422, 1812, 1284, 1421, 1422, 1812, 1423, 1439, 1562, 1790, 1800, - 1812, 57, 1058, 1083, 1150, 1225, 1424, 1530, 1812, 1425, 1786, 1812, - 1426, 1786, 1812, 886, 888, 963, 1427, 1532, 1535, 1628, 1694, 1812, - 1427, 1428, 1812, 1429, 1601, 1606, 1628, 1812, 1430, 1662, 1812, 736, - 1430, 1431, 1432, 1662, 1812, 1430, 1431, 1432, 1433, 1662, 1812, 737, - 1430, 1432, 1433, 1662, 1812, 1381, 1390, 1396, 1434, 1435, 1812, 110, - 301, 561, 1329, 1435, 1786, 1812, 1436, 1437, 1812, 496, 620, 639, - 640, 1437, 1812, 550, 1354, 1438, 1745, 1812, 1439, 1741, 1812, 272, - 275, 1106, 1440, 1526, 1768, 1812, 952, 953, 1441, 1442, 1812, 950, - 952, 953, 1442, 1449, 1706, 1812, 593, 1328, 1443, 1444, 1507, 1812, - 691, 1149, 1444, 1745, 1812, 101, 750, 1445, 1446, 1532, 1535, 1759, - 1812, 1445, 1446, 1628, 1759, 1812, 1047, 1048, 1447, 1628, 1812, 51, - 58, 263, 648, 953, 1172, 1328, 1448, 1599, 1768, 1809, 1812, 141, - 950, 951, 1060, 1442, 1448, 1449, 1812, 115, 750, 1450, 1451, 1532, - 1535, 1754, 1812, 1450, 1451, 1628, 1754, 1812, 1049, 1340, 1341, 1452, - 1628, 1694, 1728, 1733, 1812, 1452, 1453, 1812, 41, 176, 482, 1454, - 1637, 1786, 1812, 272, 276, 752, 1168, 1455, 1768, 1812, 1456, 1812, - 999, 1321, 1322, 1457, 1458, 1812, 475, 1459, 1460, 1812, 1075, 1297, - 1460, 1708, 1709, 1812, 1360, 1361, 1461, 1462, 1538, 1812, 462, 464, - 711, 1056, 1461, 1462, 1768, 1812, 1463, 1464, 1812, 659, 660, 1168, - 1438, 1464, 1812, 1068, 1070, 1071, 1465, 1466, 1812, 13, 228, 701, - 1213, 1466, 1786, 1812, 42, 187, 484, 1467, 1468, 1786, 1812, 1467, - 1468, 1786, 1812, 1469, 1470, 1690, 1712, 1717, 1812, 1470, 1745, 1812, - 148, 393, 615, 1471, 1472, 1786, 1812, 1471, 1472, 1786, 1812, 1473, - 1474, 1812, 929, 1116, 1394, 1474, 1569, 1812, 1250, 1439, 1475, 1476, - 1562, 1812, 1089, 1268, 1476, 1725, 1812, 1230, 1231, 1477, 1478, 1610, - 1812, 910, 1431, 1433, 1478, 1662, 1812, 665, 1400, 1479, 1669, 1788, - 1812, 1479, 1480, 1812, 743, 1481, 1482, 1486, 1575, 1812, 547, 1482, - 1485, 1574, 1575, 1812, 908, 912, 1483, 1484, 1532, 1535, 1628, 1694, - 1812, 1483, 1484, 1628, 1694, 1745, 1812, 1482, 1485, 1486, 1574, 1575, - 1812, 595, 1485, 1486, 1574, 1575, 1812, 1487, 1488, 1812, 777, 778, - 1020, 1204, 1488, 1812, 1127, 1489, 1490, 1669, 1788, 1812, 993, 1489, - 1490, 1669, 1788, 1812, 1380, 1491, 1492, 1493, 1812, 313, 798, 1380, - 1406, 1491, 1492, 1493, 1812, 1380, 1491, 1492, 1493, 1812, 999, 1256, - 1494, 1495, 1592, 1812, 847, 1495, 1742, 1775, 1812, 1380, 1496, 1497, - 1498, 1812, 1380, 1406, 1496, 1497, 1498, 1812, 1380, 1496, 1497, 1498, - 1812, 1491, 1499, 1588, 1812, 892, 1120, 1121, 1500, 1501, 1812, 184, - 186, 194, 660, 1110, 1500, 1501, 1528, 1768, 1812, 1502, 1735, 1736, - 1738, 1739, 1750, 1812, 173, 500, 794, 1503, 1504, 1505, 1812, 500, - 1292, 1503, 1504, 1505, 1812, 794, 1394, 1503, 1504, 1505, 1812, 1506, - 1507, 1812, 338, 593, 637, 1328, 1443, 1507, 1812, 366, 1307, 1508, - 1509, 1763, 1764, 1812, 1432, 1508, 1509, 1763, 1764, 1812, 936, 1510, - 1511, 1512, 1812, 936, 1510, 1511, 1512, 1812, 936, 1003, 1510, 1511, - 1512, 1812, 1513, 1514, 1812, 283, 328, 1507, 1513, 1514, 1768, 1812, - 936, 1515, 1516, 1517, 1812, 936, 1515, 1516, 1517, 1812, 360, 936, - 1003, 1515, 1516, 1517, 1767, 1812, 456, 837, 1518, 1519, 1548, 1768, - 1812, 63, 894, 1013, 1417, 1418, 1518, 1519, 1812, 1126, 1520, 1786, - 1812, 1521, 1522, 1812, 836, 1522, 1812, 1523, 1812, 161, 162, 509, - 1524, 1525, 1786, 1812, 161, 162, 509, 1524, 1525, 1786, 1812, 172, - 497, 668, 1526, 1527, 1528, 1812, 497, 1526, 1527, 1528, 1753, 1812, - 668, 853, 1526, 1527, 1528, 1531, 1812, 1083, 1151, 1152, 1529, 1530, - 1812, 442, 1056, 1529, 1530, 1768, 1812, 853, 1528, 1531, 1812, 478, - 614, 776, 804, 1131, 1403, 1532, 1655, 1768, 1812, 383, 506, 1532, - 1533, 1534, 1535, 1541, 1812, 746, 1532, 1533, 1534, 1535, 1536, 1628, - 1694, 1812, 1532, 1535, 1812, 384, 536, 1532, 1534, 1535, 1536, 1545, - 1812, 1332, 1333, 1537, 1538, 1812, 1039, 1332, 1333, 1360, 1461, 1538, - 1812, 167, 660, 1464, 1539, 1540, 1768, 1812, 34, 499, 785, 786, - 1539, 1540, 1577, 1812, 1362, 1541, 1542, 1543, 1812, 1171, 1362, 1541, - 1542, 1543, 1812, 1362, 1541, 1542, 1543, 1812, 1544, 1745, 1812, 1362, - 1545, 1546, 1547, 1812, 704, 1362, 1545, 1546, 1547, 1812, 1362, 1545, - 1546, 1547, 1812, 479, 836, 837, 906, 1548, 1688, 1812, 1248, 1439, - 1549, 1550, 1562, 1812, 1232, 1550, 1812, 832, 1551, 1552, 1691, 1692, - 1812, 64, 65, 933, 981, 1552, 1786, 1812, 1273, 1275, 1553, 1554, - 1555, 1812, 1272, 1274, 1275, 1554, 1722, 1812, 1222, 1273, 1553, 1554, - 1555, 1812, 628, 1556, 1721, 1736, 1768, 1812, 1053, 1557, 1628, 1694, - 1766, 1812, 1558, 1786, 1812, 1559, 1728, 1812, 1560, 1786, 1812, 709, - 1439, 1561, 1562, 1563, 1812, 1439, 1562, 1812, 1371, 1439, 1561, 1562, - 1563, 1812, 1564, 1649, 1786, 1812, 122, 123, 1201, 1565, 1566, 1786, - 1812, 122, 123, 1201, 1565, 1566, 1786, 1812, 1567, 1568, 1812, 1397, - 1568, 1812, 119, 299, 983, 1569, 1570, 1786, 1812, 1569, 1570, 1812, - 1571, 1745, 1812, 1572, 1786, 1812, 1573, 1786, 1812, 1574, 1575, 1812, - 1126, 1481, 1482, 1486, 1575, 1812, 499, 781, 782, 1576, 1577, 1812, - 166, 168, 660, 1528, 1576, 1577, 1768, 1812, 1578, 1579, 1812, 537, - 979, 1191, 1579, 1580, 1812, 1207, 1523, 1580, 1745, 1812, 658, 681, - 748, 819, 1581, 1727, 1812, 589, 1582, 1583, 1586, 1587, 1812, 340, - 402, 584, 1050, 1583, 1786, 1812, 1584, 1585, 1812, 212, 214, 1037, - 1233, 1585, 1768, 1812, 1586, 1587, 1812, 588, 666, 681, 1582, 1587, - 1812, 568, 1499, 1588, 1812, 1588, 1589, 1812, 412, 415, 633, 1175, - 1590, 1786, 1812, 999, 1000, 1591, 1592, 1593, 1812, 999, 1592, 1812, - 600, 999, 1591, 1592, 1593, 1812, 672, 788, 1594, 1595, 1596, 1812, - 632, 671, 672, 762, 1595, 1812, 694, 788, 1594, 1595, 1596, 1812, - 959, 1597, 1809, 1812, 1598, 1599, 1812, 530, 901, 1289, 1299, 1300, - 1599, 1812, 410, 1374, 1600, 1602, 1678, 1728, 1733, 1812, 411, 1429, - 1601, 1606, 1678, 1728, 1733, 1812, 1161, 1602, 1603, 1604, 1812, 26, - 858, 1161, 1211, 1602, 1603, 1604, 1812, 1161, 1602, 1603, 1604, 1812, - 563, 1104, 1605, 1745, 1812, 1161, 1606, 1607, 1608, 1812, 858, 1161, - 1606, 1607, 1608, 1812, 1161, 1606, 1607, 1608, 1812, 1609, 1610, 1812, - 163, 647, 1230, 1231, 1477, 1610, 1812, 842, 1005, 1045, 1048, 1611, - 1612, 1812, 1612, 1808, 1812, 1613, 1745, 1812, 1614, 1786, 1812, 1615, - 1786, 1812, 874, 973, 1616, 1617, 1618, 1812, 875, 972, 973, 1182, - 1617, 1812, 874, 1544, 1616, 1617, 1618, 1812, 125, 352, 1377, 1619, - 1620, 1630, 1786, 1812, 1619, 1620, 1812, 1621, 1622, 1812, 1079, 1622, - 1812, 39, 224, 1373, 1623, 1624, 1786, 1812, 1623, 1624, 1786, 1812, - 121, 898, 1625, 1626, 1627, 1728, 1733, 1812, 881, 1352, 1376, 1626, - 1812, 1625, 1626, 1627, 1628, 1812, 1355, 1628, 1812, 177, 447, 652, - 1629, 1630, 1631, 1786, 1812, 472, 1629, 1630, 1631, 1786, 1812, 132, - 898, 1632, 1633, 1634, 1728, 1733, 1812, 827, 882, 1352, 1633, 1812, - 1628, 1632, 1633, 1634, 1812, 1635, 1636, 1812, 570, 1217, 1339, 1636, - 1704, 1705, 1812, 1454, 1637, 1786, 1812, 149, 490, 534, 1638, 1639, - 1640, 1812, 524, 534, 1638, 1639, 1640, 1812, 490, 1218, 1638, 1639, - 1640, 1812, 855, 1641, 1642, 1658, 1812, 855, 1641, 1642, 1658, 1812, - 855, 1643, 1644, 1645, 1812, 855, 1643, 1644, 1645, 1812, 77, 855, - 1147, 1643, 1644, 1645, 1659, 1812, 821, 1008, 1015, 1097, 1646, 1647, - 1812, 1647, 1772, 1812, 139, 140, 787, 1648, 1745, 1786, 1812, 129, - 355, 1564, 1649, 1650, 1786, 1812, 1649, 1650, 1812, 210, 216, 223, - 880, 1135, 1286, 1640, 1651, 1768, 1812, 27, 859, 1205, 1276, 1371, - 1651, 1652, 1812, 631, 1378, 1653, 1654, 1812, 145, 454, 1114, 1301, - 1654, 1766, 1812, 56, 1458, 1532, 1655, 1656, 1657, 1812, 107, 222, - 274, 392, 778, 1191, 1336, 1655, 1656, 1681, 1768, 1812, 1458, 1532, - 1655, 1656, 1657, 1812, 855, 1641, 1642, 1658, 1659, 1812, 154, 660, - 1658, 1659, 1768, 1812, 403, 404, 1142, 1660, 1745, 1786, 1812, 1661, - 1745, 1812, 1431, 1433, 1478, 1662, 1663, 1812, 326, 329, 811, 867, - 1663, 1745, 1766, 1812, 44, 1409, 1458, 1664, 1665, 1666, 1812, 745, - 1131, 1664, 1665, 1768, 1812, 1409, 1458, 1664, 1665, 1666, 1812, 341, - 344, 513, 1667, 1668, 1786, 1812, 341, 344, 513, 1667, 1668, 1786, - 1812, 1403, 1669, 1812, 1670, 1671, 1812, 1196, 1206, 1208, 1590, 1671, - 1812, 1672, 1673, 1812, 200, 334, 1437, 1672, 1673, 1768, 1812, 1136, - 1674, 1786, 1812, 283, 284, 901, 1675, 1768, 1808, 1812, 79, 105, - 1425, 1676, 1677, 1786, 1812, 79, 105, 1425, 1676, 1677, 1786, 1812, - 1393, 1600, 1601, 1628, 1678, 1694, 1728, 1733, 1812, 1678, 1679, 1812, - 1012, 1310, 1311, 1680, 1714, 1812, 683, 717, 1318, 1319, 1680, 1681, - 1812, 1682, 1786, 1812, 1007, 1014, 1628, 1683, 1684, 1694, 1728, 1733, - 1812, 1628, 1683, 1684, 1694, 1745, 1812, 1685, 1786, 1812, 83, 84, - 586, 1686, 1745, 1786, 1812, 1687, 1812, 906, 1548, 1688, 1812, 1687, - 1689, 1715, 1745, 1812, 1469, 1690, 1712, 1716, 1717, 1812, 1691, 1692, - 1812, 558, 980, 1363, 1551, 1692, 1812, 948, 949, 1532, 1535, 1628, - 1693, 1694, 1695, 1812, 1034, 1694, 1812, 1628, 1693, 1694, 1695, 1786, - 1812, 229, 280, 1079, 1696, 1697, 1768, 1812, 1696, 1697, 1812, 1379, - 1395, 1698, 1745, 1812, 1155, 1239, 1242, 1243, 1699, 1812, 641, 793, - 808, 1306, 1699, 1700, 1812, 1107, 1108, 1295, 1701, 1702, 1812, 17, - 18, 330, 940, 998, 1701, 1702, 1749, 1768, 1812, 1103, 1703, 1786, - 1812, 1704, 1705, 1812, 1339, 1635, 1636, 1698, 1705, 1812, 1706, 1707, - 1812, 306, 610, 1226, 1283, 1366, 1707, 1812, 1075, 1460, 1708, 1709, - 1710, 1812, 747, 1459, 1460, 1668, 1709, 1812, 833, 1075, 1708, 1709, - 1710, 1812, 1711, 1716, 1812, 1525, 1711, 1712, 1715, 1716, 1812, 1713, - 1714, 1812, 1012, 1310, 1467, 1680, 1714, 1812, 1687, 1689, 1711, 1712, - 1715, 1716, 1717, 1812, 1324, 1690, 1712, 1716, 1717, 1812, 1524, 1711, - 1715, 1716, 1717, 1812, 640, 1357, 1718, 1719, 1768, 1812, 99, 994, - 1265, 1266, 1304, 1718, 1719, 1812, 653, 1568, 1720, 1721, 1812, 1148, - 1721, 1812, 126, 128, 1223, 1722, 1723, 1786, 1812, 126, 128, 1223, - 1722, 1723, 1786, 1812, 36, 1135, 1162, 1724, 1725, 1768, 1812, 131, - 1089, 1268, 1392, 1476, 1724, 1725, 1812, 1177, 1726, 1786, 1812, 681, - 1581, 1727, 1812, 489, 542, 614, 626, 731, 1090, 1403, 1559, 1728, - 1768, 1812, 452, 529, 1728, 1729, 1732, 1733, 1735, 1812, 143, 354, - 1323, 1730, 1731, 1786, 1812, 1730, 1731, 1812, 824, 1628, 1694, 1728, - 1729, 1732, 1733, 1734, 1812, 1728, 1733, 1812, 453, 562, 1728, 1732, - 1733, 1734, 1738, 1812, 1502, 1735, 1736, 1737, 1812, 93, 851, 1502, - 1556, 1735, 1736, 1737, 1812, 1502, 1735, 1736, 1737, 1812, 1502, 1738, - 1739, 1740, 1812, 851, 1502, 1738, 1739, 1740, 1812, 1502, 1738, 1739, - 1740, 1812, 518, 609, 871, 999, 1078, 1403, 1741, 1768, 1812, 834, - 944, 1495, 1742, 1743, 1775, 1812, 1193, 1743, 1812, 281, 455, 539, - 1744, 1745, 1786, 1812, 281, 455, 471, 539, 820, 829, 861, 866, - 904, 995, 996, 1061, 1104, 1149, 1197, 1204, 1229, 1234, 1354, 1379, - 1405, 1438, 1444, 1456, 1523, 1580, 1605, 1687, 1689, 1698, 1745, 1752, - 1753, 1786, 1812, 960, 1746, 1786, 1812, 427, 581, 613, 1747, 1748, - 1749, 1812, 613, 832, 1747, 1748, 1749, 1812, 581, 1370, 1747, 1748, - 1749, 1812, 1750, 1751, 1812, 288, 700, 722, 1335, 1336, 1751, 1812, - 1752, 1812, 1745, 1752, 1753, 1782, 1812, 792, 1754, 1755, 1756, 1812, - 792, 1754, 1755, 1756, 1812, 792, 1293, 1754, 1755, 1756, 1812, 621, - 1757, 1758, 1812, 962, 1757, 1758, 1812, 792, 1759, 1760, 1761, 1812, - 792, 1759, 1760, 1761, 1812, 80, 792, 941, 1293, 1759, 1760, 1761, - 1812, 1209, 1762, 1783, 1784, 1792, 1812, 1763, 1764, 1812, 801, 1307, - 1508, 1509, 1764, 1812, 1765, 1766, 1812, 474, 771, 773, 1766, 1812, - 715, 1517, 1721, 1767, 1768, 1812, 473, 239, 240, 548, 1769, 1786, - 1793, 1812, 999, 1270, 1592, 1770, 1771, 1812, 1153, 1164, 1165, 1771, - 1812, 142, 724, 947, 1772, 1773, 1774, 1812, 947, 996, 1772, 1773, - 1774, 1812, 724, 885, 1381, 1772, 1773, 1774, 1812, 847, 1495, 1742, - 1775, 1776, 1812, 14, 265, 940, 1749, 1768, 1775, 1776, 1812, 1018, - 1777, 1778, 1779, 1812, 1018, 1777, 1778, 1779, 1812, 175, 1018, 1140, - 1200, 1777, 1778, 1779, 1812, 1780, 1783, 1812, 1781, 1786, 1812, 1752, - 1753, 1780, 1782, 1783, 1784, 1792, 1812, 864, 1762, 1783, 1784, 1792, - 1812, 1769, 1780, 1782, 1783, 1784, 1812, 1785, 1786, 1812, 23, 38, - 49, 83, 84, 100, 112, 113, 139, 140, 144, 146, 147, 152, - 158, 164, 190, 191, 218, 219, 241, 242, 244, 246, 255, 256, - 257, 268, 269, 270, 294, 305, 307, 309, 310, 314, 343, 345, - 348, 349, 350, 359, 364, 368, 369, 370, 371, 380, 381, 386, - 403, 404, 406, 423, 425, 438, 445, 448, 450, 460, 461, 465, - 470, 491, 505, 519, 527, 554, 566, 567, 586, 592, 603, 605, - 611, 622, 623, 630, 644, 787, 828, 877, 930, 985, 1023, 1067, - 1081, 1119, 1142, 1185, 1203, 1216, 1235, 1264, 1320, 1367, 1745, 1766, - 1786, 1812, 728, 1669, 1787, 1788, 1789, 1812, 1669, 1788, 1812, 1271, - 1669, 1787, 1788, 1789, 1812, 791, 1423, 1439, 1562, 1790, 1812, 1790, - 1791, 1812, 1780, 1782, 1783, 1792, 1793, 1812, 239, 240, 548, 1769, - 1786, 1793, 1812, 1794, 1795, 1812, 853, 1076, 1177, 1342, 1795, 1812, - 1101, 1796, 1797, 1812, 1094, 1796, 1797, 1812, 188, 189, 523, 1786, - 1798, 1799, 1812, 188, 189, 523, 1786, 1798, 1799, 1812, 1314, 1800, - 1801, 1802, 1812, 68, 990, 1314, 1334, 1800, 1801, 1802, 1812, 1314, - 1800, 1801, 1802, 1812, 803, 846, 1134, 1803, 1804, 1812, 418, 424, - 655, 1345, 1786, 1804, 1812, 1314, 1805, 1806, 1807, 1812, 1314, 1334, - 1805, 1806, 1807, 1812, 1314, 1805, 1806, 1807, 1812, 429, 616, 937, - 1808, 1809, 1810, 1812, 616, 959, 1597, 1808, 1809, 1810, 1812, 773, - 937, 1808, 1809, 1810, 1812, 1812, 478, 479, 480, 481, 482, 483, - 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, - 497, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, - 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 522, - 523, 524, 525, 526, 527, 528, 529, 531, 532, 533, 534, 535, - 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, - 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, - 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, - 572, 574, 575, 576, 577, 578, 579, 581, 583, 584, 585, 586, - 587, 588, 589, 590, 591, 592, 593, 595, 596, 597, 598, 599, - 600, 601, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, - 613, 614, 615, 616, 617, 618, 620, 621, 622, 623, 624, 625, - 626, 627, 628, 629, 630, 631, 632, 633, 635, 636, 637, 638, - 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, - 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, - 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, - 675, 676, 677, 678, 679, 680, 681, 682, 684, 685, 686, 687, - 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, - 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, - 712, 713, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, - 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, - 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, - 749, 750, 751, 752, 753, 754, 755, 757, 758, 759, 760, 761, - 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, - 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, - 786, 787, 788, 789, 790, 791, 792, 794, 795, 796, 797, 798, - 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, - 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, - 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, - 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, - 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, - 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, - 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, - 884, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, - 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, - 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, - 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, - 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, - 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, - 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, - 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, - 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, - 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, - 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, - 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, - 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, - 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, - 1053, 1054, 1055, 1056, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, - 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, - 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, - 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1101, 1102, - 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, - 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, - 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, - 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, - 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, - 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, - 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, - 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, - 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, - 1211, 1212, 1213, 1214, 1215, 1216, 1218, 1219, 1220, 1221, 1222, 1223, - 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, - 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, - 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, - 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, - 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, - 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, - 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, - 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, - 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1329, 1330, 1331, - 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343, - 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, - 1356, 1358, 1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, - 1369, 1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, - 1381, 1382, 1383, 1384, 1385, 1386, 1387, 1388, 1389, 1390, 1391, 1392, - 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, - 1405, 1406, 1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1415, 1416, - 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424, 1425, 1426, 1427, 1428, - 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439, 1440, - 1441, 1442, 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, - 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, - 1465, 1466, 1467, 1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1476, - 1477, 1478, 1479, 1480, 1481, 1482, 1483, 1484, 1485, 1486, 1487, 1488, - 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, - 1501, 1502, 1503, 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, - 1513, 1514, 1515, 1516, 1517, 1518, 1519, 1520, 1521, 1522, 1523, 1524, - 1525, 1526, 1527, 1528, 1529, 1530, 1532, 1533, 1534, 1535, 1536, 1537, - 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, - 1550, 1551, 1552, 1553, 1554, 1555, 1556, 1557, 1558, 1559, 1560, 1561, - 1562, 1563, 1564, 1565, 1566, 1567, 1568, 1569, 1570, 1571, 1572, 1573, - 1574, 1575, 1576, 1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, - 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, 1595, 1596, 1598, - 1599, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, - 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, - 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, - 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, - 1647, 1648, 1649, 1650, 1651, 1652, 1653, 1654, 1655, 1656, 1657, 1658, - 1659, 1660, 1661, 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, - 1671, 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, - 1683, 1684, 1685, 1686, 1687, 1689, 1690, 1691, 1692, 1693, 1694, 1695, - 1696, 1697, 1698, 1699, 1700, 1701, 1702, 1703, 1704, 1705, 1706, 1707, - 1708, 1709, 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718, 1719, - 1720, 1721, 1722, 1723, 1724, 1725, 1726, 1728, 1729, 1730, 1731, 1732, - 1733, 1734, 1735, 1736, 1737, 1738, 1739, 1740, 1741, 1742, 1743, 1744, - 1745, 1746, 1747, 1748, 1749, 1750, 1751, 1752, 1753, 1754, 1755, 1756, - 1757, 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, 1766, 1767, 1768, - 1769, 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1780, - 1781, 1782, 1783, 1784, 1785, 1786, 1787, 1788, 1789, 1790, 1791, 1792, - 1793, 1794, 1795, 1796, 1797, 1798, 1799, 1800, 1801, 1802, 1803, 1804, - 1805, 1806, 1807, 1808, 1809, 1810, 1811, 1812}; + 0, 346, 711, 727, 1408, 1, 696, 785, 1694, 2, 353, 839, 1056, 1408, 3, 666, 1786, 4, 565, + 1375, 1376, 5, 699, 781, 1694, 6, 22, 660, 1017, 1464, 7, 1044, 1045, 1694, 8, 133, 388, 1170, + 1748, 1749, 9, 180, 1409, 1809, 1810, 10, 1220, 1221, 1501, 11, 335, 715, 1319, 1681, 1768, 12, 20, + 1004, 1328, 1443, 13, 1466, 1786, 14, 160, 940, 1170, 1776, 15, 243, 851, 1319, 1681, 16, 236, 851, + 1191, 1579, 17, 282, 1702, 1748, 1749, 18, 285, 940, 1170, 1702, 19, 830, 1786, 12, 20, 204, 738, + 1300, 1328, 1443, 1599, 21, 864, 1786, 6, 22, 467, 1464, 1527, 1528, 23, 293, 356, 554, 723, 924, + 1745, 1786, 24, 233, 837, 858, 1548, 25, 134, 927, 998, 1326, 26, 858, 1211, 1603, 27, 859, 1651, + 1652, 28, 1126, 1786, 29, 169, 1397, 1636, 1705, 30, 54, 819, 822, 1581, 31, 60, 822, 837, 1548, + 32, 583, 651, 1694, 33, 74, 312, 722, 1336, 1768, 34, 1539, 1540, 1577, 35, 302, 994, 1509, 1764, + 36, 183, 1135, 1162, 1724, 1768, 37, 842, 1004, 1041, 38, 367, 373, 566, 688, 810, 1745, 1786, 39, + 1623, 1786, 40, 967, 1261, 1262, 41, 1454, 1786, 42, 1467, 1786, 43, 97, 1328, 1409, 1443, 44, 1409, + 1664, 1665, 45, 59, 711, 727, 1157, 46, 154, 660, 1147, 1464, 1768, 47, 211, 216, 860, 880, 1768, + 48, 1017, 1130, 1131, 49, 432, 444, 544, 567, 926, 1745, 1786, 50, 1136, 1786, 51, 441, 1300, 1448, + 1599, 52, 662, 1417, 1694, 53, 327, 420, 1099, 1125, 1768, 30, 54, 60, 398, 819, 1581, 1636, 1705, + 55, 1091, 1157, 1728, 56, 1532, 1655, 1656, 57, 1058, 1424, 1530, 58, 440, 1328, 1443, 1448, 45, 59, + 259, 420, 711, 727, 1504, 1505, 31, 54, 60, 819, 837, 1096, 1548, 1581, 61, 198, 279, 808, 1700, + 1768, 62, 1177, 1786, 63, 894, 1518, 1519, 64, 139, 140, 933, 934, 981, 1552, 65, 139, 140, 933, + 934, 981, 1552, 66, 1047, 1048, 1694, 67, 614, 1019, 1397, 68, 990, 1334, 1801, 69, 146, 147, 955, + 958, 1179, 1180, 70, 970, 1786, 71, 692, 1515, 1694, 72, 1227, 1786, 73, 312, 1191, 1375, 1579, 33, + 74, 722, 1336, 1375, 75, 76, 902, 1133, 1326, 75, 76, 134, 902, 1133, 1768, 77, 1147, 1645, 1659, + 78, 311, 1319, 1375, 1681, 79, 152, 158, 1425, 1426, 1676, 1677, 80, 941, 1293, 1761, 81, 558, 1786, + 82, 164, 423, 1009, 1011, 1368, 1369, 83, 340, 402, 586, 1051, 1686, 1745, 1786, 84, 340, 402, 586, + 1051, 1686, 1745, 1786, 85, 1326, 1402, 1403, 86, 170, 1397, 1773, 1774, 87, 908, 1643, 1694, 88, 912, + 1641, 1694, 89, 90, 1135, 1162, 1322, 89, 90, 178, 1162, 1639, 1640, 91, 92, 860, 880, 1322, 91, + 92, 178, 316, 676, 860, 880, 1330, 93, 851, 1556, 1736, 94, 180, 439, 1283, 1366, 1768, 95, 999, + 1321, 1322, 96, 1297, 1786, 43, 97, 179, 1300, 1443, 1599, 98, 190, 191, 1181, 1182, 1558, 1560, 99, + 994, 1718, 1719, 100, 365, 385, 519, 764, 1386, 1745, 1786, 101, 1445, 1694, 1759, 102, 664, 1414, 1694, + 103, 185, 186, 1110, 1388, 1768, 104, 825, 1078, 1741, 105, 152, 158, 1425, 1426, 1676, 1677, 106, 1413, + 1786, 107, 389, 722, 1336, 1656, 108, 1007, 1008, 1694, 109, 1014, 1015, 1694, 110, 1435, 1786, 111, 218, + 219, 919, 920, 1682, 1685, 112, 400, 414, 592, 1123, 1190, 1745, 1786, 113, 400, 414, 592, 1123, 1190, + 1745, 1786, 114, 1098, 1099, 1125, 115, 1450, 1694, 1754, 116, 218, 219, 919, 920, 1682, 1685, 117, 1312, + 1786, 118, 1324, 1786, 119, 1569, 1786, 120, 241, 242, 1186, 1187, 1781, 1785, 121, 1625, 1626, 1694, 122, + 244, 246, 1201, 1202, 1565, 1566, 123, 244, 246, 1201, 1202, 1565, 1566, 124, 726, 902, 1133, 125, 1619, + 1786, 126, 256, 257, 1223, 1224, 1722, 1723, 127, 571, 1786, 128, 256, 257, 1223, 1224, 1722, 1723, 129, + 1649, 1786, 130, 463, 464, 711, 727, 1768, 131, 1392, 1724, 1725, 132, 1632, 1633, 1694, 8, 133, 134, + 165, 927, 998, 1748, 1749, 25, 76, 133, 134, 902, 927, 998, 1133, 135, 297, 598, 905, 1355, 136, + 268, 269, 487, 488, 817, 818, 137, 268, 269, 487, 488, 817, 818, 138, 886, 890, 1694, 64, 65, + 139, 787, 934, 1648, 1745, 1786, 64, 65, 140, 787, 934, 1648, 1745, 1786, 141, 1060, 1448, 1449, 142, + 1772, 1773, 1774, 143, 1730, 1786, 144, 300, 308, 557, 578, 985, 1786, 145, 281, 455, 1114, 1301, 1302, + 1654, 1745, 1766, 69, 146, 434, 603, 795, 958, 1745, 1786, 69, 147, 434, 603, 795, 958, 1745, 1786, + 148, 1471, 1786, 149, 1638, 1639, 1640, 150, 577, 1521, 1694, 151, 316, 676, 1322, 1330, 79, 105, 152, + 1023, 1317, 1426, 1745, 1786, 153, 836, 837, 1548, 46, 154, 660, 1464, 1659, 155, 948, 950, 1694, 156, + 949, 952, 1694, 157, 289, 907, 1328, 1443, 1768, 79, 105, 158, 1023, 1317, 1426, 1745, 1786, 159, 167, + 168, 1527, 1528, 1768, 14, 160, 266, 848, 940, 1170, 1748, 1749, 161, 307, 310, 509, 511, 1524, 1525, + 162, 307, 310, 509, 511, 1524, 1525, 163, 1231, 1477, 1610, 82, 164, 422, 527, 705, 1011, 1745, 1786, + 133, 165, 1326, 1748, 1749, 166, 167, 660, 1464, 1577, 159, 166, 167, 660, 1464, 1527, 1528, 1539, 159, + 168, 1527, 1528, 1577, 29, 169, 170, 174, 1636, 1705, 1773, 1774, 86, 169, 170, 1768, 1773, 1774, 171, + 347, 1408, 1504, 1505, 172, 1526, 1527, 1528, 173, 1503, 1504, 1505, 169, 174, 182, 323, 819, 1581, 1636, + 1705, 175, 1140, 1200, 1779, 176, 1454, 1786, 177, 1629, 1630, 1786, 90, 92, 178, 317, 860, 880, 1639, + 1640, 97, 179, 180, 192, 1300, 1599, 1809, 1810, 9, 94, 179, 180, 1283, 1366, 1809, 1810, 181, 185, + 194, 660, 1220, 1464, 1527, 1528, 174, 182, 819, 1397, 1581, 36, 183, 1135, 1162, 1392, 184, 185, 1501, + 1527, 1528, 103, 181, 184, 185, 1110, 1388, 1527, 1528, 103, 186, 1110, 1388, 1501, 187, 1467, 1786, 188, + 348, 349, 523, 525, 1798, 1799, 189, 348, 349, 523, 525, 1798, 1799, 98, 190, 449, 605, 612, 1560, + 1745, 1786, 98, 191, 449, 605, 612, 1560, 1745, 1786, 179, 192, 1300, 1409, 1599, 193, 778, 1020, 1488, + 181, 194, 660, 1464, 1501, 195, 416, 1135, 1162, 1334, 196, 417, 1334, 1639, 1640, 197, 666, 1786, 61, + 198, 808, 1017, 1700, 199, 927, 998, 1193, 200, 695, 1437, 1508, 1673, 201, 334, 695, 957, 1610, 202, + 1086, 1786, 203, 204, 1004, 1300, 1599, 20, 203, 204, 1300, 1599, 1768, 205, 374, 940, 1170, 1261, 206, + 1103, 1786, 207, 598, 883, 1157, 1403, 208, 1318, 1319, 1681, 209, 211, 223, 859, 1135, 1162, 1639, 1640, + 210, 211, 1639, 1640, 1651, 47, 209, 210, 211, 860, 880, 1639, 1640, 212, 753, 1233, 1585, 1638, 213, + 214, 753, 813, 1383, 213, 214, 813, 1037, 1585, 1768, 215, 369, 371, 712, 713, 1572, 1573, 47, 216, + 860, 880, 1651, 217, 369, 371, 712, 713, 1572, 1573, 111, 116, 218, 935, 1067, 1685, 1745, 1786, 111, + 116, 219, 935, 1067, 1685, 1745, 1786, 220, 221, 825, 1231, 1477, 220, 221, 292, 379, 645, 689, 1231, + 1477, 222, 396, 1191, 1579, 1656, 209, 223, 1135, 1162, 1651, 224, 1623, 1786, 225, 380, 381, 1236, 1359, + 1614, 1615, 226, 380, 381, 1236, 1359, 1614, 1615, 227, 940, 1079, 1170, 228, 1466, 1786, 229, 854, 1079, + 1696, 1747, 230, 280, 726, 854, 1193, 231, 901, 1300, 1599, 232, 1339, 1636, 1705, 24, 233, 333, 819, + 837, 1211, 1548, 1581, 234, 1194, 1786, 235, 517, 625, 883, 986, 16, 236, 243, 1191, 1579, 1768, 237, + 238, 986, 1098, 1503, 237, 238, 585, 883, 1098, 1768, 239, 403, 404, 548, 549, 1769, 1793, 240, 403, + 404, 548, 549, 1769, 1793, 120, 241, 457, 507, 611, 1766, 1785, 1786, 120, 242, 457, 507, 611, 1766, + 1785, 1786, 15, 236, 243, 628, 1191, 1319, 1579, 1681, 122, 123, 244, 775, 828, 1202, 1745, 1786, 245, + 1135, 1162, 1233, 122, 123, 246, 775, 828, 1202, 1745, 1786, 247, 960, 1786, 248, 808, 1306, 1700, 249, + 658, 836, 1105, 1148, 250, 399, 1105, 1339, 1772, 251, 1177, 1786, 252, 968, 1786, 253, 254, 1140, 1509, + 1764, 253, 254, 594, 1509, 1764, 1768, 255, 337, 342, 510, 755, 1264, 1745, 1786, 126, 128, 256, 515, + 622, 1224, 1745, 1786, 126, 128, 257, 515, 622, 1224, 1745, 1786, 258, 259, 839, 1056, 1157, 59, 258, + 259, 711, 727, 839, 260, 979, 1260, 1281, 1318, 261, 262, 1020, 1260, 1751, 261, 262, 1020, 1281, 1282, + 1768, 263, 264, 1448, 1809, 1810, 263, 264, 441, 1768, 1809, 1810, 265, 266, 1748, 1749, 1776, 160, 265, + 266, 1748, 1749, 1768, 267, 456, 837, 894, 1548, 136, 137, 268, 488, 682, 1081, 1745, 1786, 136, 137, + 269, 488, 682, 1081, 1745, 1786, 270, 418, 424, 656, 1320, 1411, 1745, 1786, 271, 660, 1168, 1464, 272, + 1168, 1440, 1455, 1526, 273, 517, 711, 727, 274, 390, 778, 1488, 1656, 275, 276, 1106, 1306, 1440, 275, + 276, 752, 1306, 1455, 1768, 277, 1297, 1786, 278, 279, 1017, 1110, 1388, 61, 278, 279, 467, 808, 1110, + 1388, 1700, 230, 280, 726, 1696, 1697, 1768, 145, 281, 454, 539, 1302, 1744, 1745, 1786, 17, 282, 285, + 331, 927, 998, 1748, 1749, 283, 901, 1507, 1514, 1675, 284, 328, 1675, 1707, 1808, 18, 282, 285, 940, + 977, 1170, 1748, 1749, 286, 961, 1278, 1694, 287, 1040, 1263, 1694, 288, 722, 1336, 1751, 157, 289, 1293, + 1328, 1443, 290, 960, 1786, 291, 292, 645, 689, 825, 221, 291, 292, 645, 689, 1768, 23, 293, 294, + 721, 723, 733, 872, 293, 294, 356, 554, 723, 924, 1745, 1786, 295, 968, 1786, 296, 297, 598, 1355, + 135, 296, 297, 598, 868, 905, 298, 1413, 1786, 299, 1569, 1786, 144, 300, 309, 576, 578, 736, 737, + 1630, 1786, 301, 1435, 1786, 35, 302, 319, 1231, 1357, 1477, 1509, 1764, 303, 305, 314, 503, 504, 975, + 1085, 304, 305, 314, 503, 504, 975, 1085, 303, 304, 305, 504, 1146, 1185, 1745, 1786, 306, 1283, 1366, + 1707, 161, 162, 307, 511, 630, 1401, 1745, 1786, 144, 308, 309, 576, 578, 736, 737, 1630, 1786, 300, + 308, 309, 557, 578, 985, 1786, 161, 162, 310, 511, 630, 1401, 1745, 1786, 78, 311, 312, 551, 1191, + 1319, 1579, 1681, 33, 73, 311, 312, 722, 1191, 1336, 1579, 313, 798, 1406, 1492, 303, 304, 314, 504, + 1146, 1185, 1745, 1786, 315, 693, 1510, 1694, 92, 151, 316, 676, 1330, 1768, 178, 317, 1322, 1639, 1640, + 318, 319, 994, 1231, 1477, 302, 318, 319, 1231, 1477, 1768, 320, 1340, 1346, 1694, 321, 710, 988, 1408, + 322, 323, 837, 1397, 1548, 174, 322, 323, 819, 837, 1581, 324, 558, 1786, 325, 676, 813, 1330, 326, + 1663, 1745, 53, 327, 1099, 1125, 1157, 284, 328, 1513, 1514, 1707, 1768, 329, 1663, 1745, 1766, 330, 331, + 927, 998, 1702, 282, 330, 331, 927, 998, 1768, 332, 333, 819, 858, 1581, 233, 332, 333, 819, 1581, + 1768, 201, 334, 957, 1672, 1673, 1768, 11, 335, 1003, 1319, 1681, 336, 830, 1786, 255, 337, 343, 754, + 755, 1092, 1093, 338, 1328, 1443, 1507, 339, 658, 819, 1581, 83, 84, 340, 584, 1050, 1051, 1583, 341, + 345, 350, 513, 514, 1667, 1668, 255, 342, 343, 754, 755, 1092, 1093, 337, 342, 343, 510, 755, 1264, + 1745, 1786, 344, 345, 350, 513, 514, 1667, 1668, 341, 344, 345, 514, 930, 976, 1745, 1786, 0, 346, + 347, 353, 711, 727, 1504, 1505, 171, 346, 347, 1504, 1505, 1768, 188, 189, 348, 525, 545, 1119, 1745, + 1786, 188, 189, 349, 525, 545, 1119, 1745, 1786, 341, 344, 350, 514, 930, 976, 1745, 1786, 351, 645, + 689, 957, 352, 1619, 1630, 1786, 2, 346, 353, 710, 711, 727, 839, 1056, 354, 1730, 1786, 355, 1649, + 1786, 23, 294, 356, 721, 723, 733, 872, 357, 864, 1786, 358, 1106, 1110, 1388, 359, 401, 405, 505, + 608, 650, 1745, 1786, 360, 1003, 1517, 1767, 361, 1136, 1786, 362, 364, 370, 643, 646, 758, 1036, 363, + 364, 370, 643, 646, 758, 1036, 362, 363, 364, 646, 779, 1203, 1745, 1786, 100, 365, 386, 763, 764, + 1316, 1399, 366, 1508, 1509, 1764, 38, 367, 368, 809, 810, 1258, 1259, 367, 368, 373, 566, 688, 810, + 1745, 1786, 215, 217, 369, 508, 877, 1573, 1745, 1786, 362, 363, 370, 646, 779, 1203, 1745, 1786, 215, + 217, 371, 508, 877, 1573, 1745, 1786, 372, 379, 640, 714, 1509, 1764, 38, 368, 373, 809, 810, 1258, + 1259, 205, 374, 940, 967, 1170, 1768, 375, 442, 839, 1056, 1058, 1768, 376, 1126, 1786, 377, 1086, 1786, + 378, 379, 825, 1509, 1764, 221, 372, 378, 379, 1231, 1477, 1509, 1764, 225, 226, 380, 644, 989, 1615, + 1745, 1786, 225, 226, 381, 644, 989, 1615, 1745, 1786, 382, 625, 839, 1056, 383, 1533, 1541, 1694, 384, + 1536, 1545, 1694, 100, 385, 386, 763, 764, 1316, 1399, 365, 385, 386, 519, 764, 1386, 1745, 1786, 387, + 1103, 1786, 8, 388, 940, 1170, 1326, 107, 389, 390, 396, 722, 778, 1336, 1488, 274, 389, 390, 778, + 1488, 1768, 391, 822, 1096, 1097, 392, 395, 1319, 1656, 1681, 393, 1471, 1786, 394, 888, 892, 1694, 392, + 395, 396, 1191, 1319, 1579, 222, 389, 395, 396, 722, 1191, 1336, 1579, 397, 398, 822, 1636, 1705, 54, + 397, 398, 1636, 1705, 1768, 250, 399, 1148, 1721, 1768, 1772, 112, 113, 400, 1077, 1189, 1190, 1343, 359, + 401, 406, 547, 595, 649, 650, 83, 84, 402, 584, 1050, 1051, 1583, 239, 240, 403, 549, 1142, 1660, + 1745, 1786, 239, 240, 404, 549, 1142, 1660, 1745, 1786, 359, 405, 406, 547, 595, 649, 650, 401, 405, + 406, 505, 608, 650, 1745, 1786, 407, 1227, 1786, 408, 979, 1191, 1579, 409, 1312, 1786, 410, 1600, 1602, + 1694, 411, 1601, 1606, 1694, 412, 1590, 1786, 413, 970, 1786, 112, 113, 414, 1077, 1189, 1190, 1343, 415, + 1590, 1786, 195, 416, 417, 990, 1135, 1162, 1639, 1640, 196, 416, 417, 1639, 1640, 1768, 270, 418, 425, + 655, 656, 1345, 1804, 419, 420, 1157, 1504, 1505, 53, 59, 419, 420, 1099, 1125, 1504, 1505, 421, 1324, + 1786, 164, 422, 423, 1009, 1011, 1368, 1369, 82, 422, 423, 527, 705, 1011, 1745, 1786, 270, 424, 425, + 655, 656, 1345, 1804, 418, 424, 425, 656, 1320, 1411, 1745, 1786, 426, 1194, 1786, 427, 1747, 1748, 1749, + 428, 1341, 1349, 1694, 429, 1808, 1809, 1810, 430, 977, 978, 1702, 431, 848, 944, 1776, 49, 432, 445, + 925, 926, 1138, 1139, 433, 674, 1333, 1462, 146, 147, 434, 955, 958, 1179, 1180, 435, 438, 448, 702, + 703, 762, 789, 1630, 1786, 436, 571, 1786, 437, 438, 448, 702, 703, 762, 789, 1630, 1786, 435, 437, + 438, 606, 703, 1216, 1786, 94, 439, 1283, 1366, 1409, 58, 440, 441, 648, 1300, 1328, 1443, 1599, 51, + 264, 440, 441, 1300, 1599, 1809, 1810, 375, 442, 839, 1056, 1530, 443, 450, 461, 552, 553, 1021, 1066, + 49, 444, 445, 925, 926, 1138, 1139, 432, 444, 445, 544, 567, 926, 1745, 1786, 446, 450, 461, 552, + 553, 1021, 1066, 447, 1629, 1786, 435, 437, 448, 606, 703, 1216, 1786, 190, 191, 449, 1181, 1182, 1558, + 1560, 443, 446, 450, 491, 553, 1115, 1745, 1786, 451, 860, 880, 1383, 452, 1694, 1729, 1735, 453, 1694, + 1734, 1738, 281, 454, 455, 1114, 1301, 1302, 1654, 1745, 1766, 145, 454, 455, 539, 1302, 1744, 1745, 1786, + 267, 456, 837, 1518, 1548, 1768, 241, 242, 457, 1186, 1187, 1781, 1785, 458, 460, 465, 559, 560, 1214, + 1215, 459, 460, 465, 559, 560, 1214, 1215, 458, 459, 460, 560, 618, 1235, 1745, 1786, 443, 446, 461, + 491, 553, 1115, 1745, 1786, 462, 463, 839, 1056, 1462, 130, 462, 463, 674, 711, 727, 839, 1056, 130, + 464, 711, 727, 1462, 458, 459, 465, 560, 618, 1235, 1745, 1786, 466, 467, 1017, 1527, 1528, 22, 279, + 466, 467, 1110, 1388, 1527, 1528, 468, 476, 469, 477, 1786, 1745, 1630, 1768, 1766, 1458, 468, 469, 478, + 1212, 1455, 1812, 479, 837, 1812, 480, 860, 1812, 481, 1099, 1812, 482, 1454, 1812, 483, 750, 1812, 484, + 1467, 1812, 485, 1732, 1812, 486, 645, 1812, 487, 1786, 1812, 488, 1786, 1812, 489, 585, 1059, 1148, 1812, + 490, 1639, 1812, 491, 1745, 1812, 492, 558, 1812, 493, 662, 1417, 1628, 1812, 494, 664, 1414, 1628, 1812, + 495, 727, 1812, 496, 640, 1437, 1812, 497, 1528, 1812, 498, 579, 1594, 1812, 499, 531, 781, 785, 1540, + 1576, 1812, 500, 1505, 1812, 501, 538, 555, 820, 829, 928, 1102, 1812, 502, 943, 1584, 1696, 1812, 503, + 1786, 1812, 504, 1786, 1812, 505, 1745, 1812, 506, 1533, 1541, 1628, 1812, 241, 242, 507, 611, 1766, 1786, + 1812, 369, 371, 508, 877, 1745, 1786, 1812, 509, 1786, 1812, 255, 343, 510, 1264, 1745, 1786, 1812, 511, + 1786, 1812, 512, 1787, 1812, 513, 1786, 1812, 514, 1786, 1812, 256, 257, 515, 622, 1745, 1786, 1812, 516, + 524, 587, 712, 873, 1812, 273, 495, 517, 669, 711, 727, 1812, 518, 814, 1673, 1812, 519, 1745, 1812, + 520, 846, 991, 1344, 1803, 1812, 521, 660, 1159, 1812, 522, 1194, 1812, 523, 1786, 1812, 516, 524, 587, + 873, 896, 1812, 525, 1786, 1812, 526, 774, 780, 784, 1765, 1812, 527, 1745, 1812, 528, 960, 1812, 529, + 1628, 1729, 1735, 1812, 530, 1289, 1599, 1812, 531, 1106, 1812, 532, 968, 1812, 533, 583, 651, 1628, 1812, + 534, 1640, 1812, 535, 663, 1812, 536, 1536, 1545, 1628, 1812, 537, 1191, 1812, 501, 538, 928, 1102, 1799, + 1812, 539, 1786, 1812, 540, 1277, 1812, 541, 1297, 1812, 542, 543, 731, 883, 1812, 542, 543, 731, 884, + 1812, 49, 445, 544, 567, 1745, 1786, 1812, 348, 349, 545, 1119, 1745, 1786, 1812, 546, 676, 1812, 401, + 405, 547, 595, 649, 1786, 1812, 548, 1786, 1812, 549, 1786, 1812, 550, 1076, 1342, 1354, 1438, 1794, 1795, + 1812, 311, 551, 565, 1319, 1375, 1568, 1681, 1721, 1768, 1812, 552, 1786, 1812, 553, 1786, 1812, 554, 1745, + 1812, 501, 555, 928, 1102, 1798, 1812, 556, 1413, 1812, 144, 309, 557, 985, 1786, 1812, 81, 324, 492, + 558, 1372, 1786, 1812, 559, 1786, 1812, 560, 1786, 1812, 561, 1435, 1812, 562, 1628, 1734, 1738, 1812, 563, + 685, 686, 938, 1104, 1240, 1605, 1812, 564, 927, 1812, 551, 565, 1376, 1721, 1768, 1812, 566, 1745, 1812, + 567, 1745, 1812, 568, 1496, 1588, 1812, 569, 886, 890, 1628, 1812, 538, 555, 570, 769, 1102, 1812, 127, + 436, 571, 572, 984, 1786, 1812, 571, 572, 1812, 573, 640, 799, 1406, 1812, 574, 631, 1114, 1378, 1812, + 575, 1619, 1812, 576, 1630, 1812, 150, 577, 635, 654, 1521, 1728, 1733, 1812, 578, 1786, 1812, 498, 579, + 1594, 1812, 580, 774, 1283, 1812, 581, 1748, 1812, 582, 594, 640, 1140, 1812, 32, 533, 583, 635, 651, + 1728, 1733, 1812, 340, 402, 584, 1050, 1583, 1786, 1812, 585, 883, 1812, 586, 1745, 1812, 587, 873, 1812, + 584, 588, 589, 1586, 1587, 1812, 588, 589, 866, 1061, 1582, 1586, 1587, 1812, 590, 591, 1812, 591, 830, + 906, 974, 1084, 1812, 592, 1745, 1812, 593, 1328, 1812, 254, 582, 594, 640, 1200, 1509, 1764, 1812, 401, + 405, 547, 595, 649, 1786, 1812, 596, 1581, 1812, 597, 1693, 1812, 207, 598, 627, 883, 1157, 1812, 599, + 845, 1812, 600, 1621, 1812, 601, 689, 1812, 602, 640, 994, 1357, 1812, 603, 1745, 1812, 604, 1111, 1360, + 1669, 1788, 1812, 605, 1745, 1812, 438, 448, 606, 1216, 1786, 1812, 607, 657, 845, 999, 1592, 1812, 359, + 406, 505, 608, 1745, 1786, 1812, 609, 675, 1585, 1696, 1812, 610, 1283, 1812, 611, 1766, 1812, 190, 191, + 605, 612, 1745, 1786, 1812, 613, 1749, 1812, 489, 614, 776, 1019, 1532, 1728, 1768, 1812, 615, 1471, 1812, + 616, 1810, 1812, 617, 1183, 1812, 460, 465, 618, 1235, 1745, 1786, 1812, 619, 778, 1616, 1812, 620, 1812, + 621, 661, 1757, 1812, 622, 1745, 1812, 623, 1812, 624, 625, 1812, 382, 625, 838, 839, 1055, 1056, 1812, + 626, 731, 1812, 598, 627, 883, 1403, 1812, 243, 628, 851, 1319, 1556, 1568, 1681, 1721, 1768, 1812, 629, + 692, 1515, 1628, 1812, 630, 1745, 1812, 574, 631, 1378, 1653, 1812, 623, 632, 671, 672, 788, 1367, 1595, + 1812, 633, 1590, 1812, 634, 1327, 1328, 1812, 577, 583, 635, 706, 1628, 1694, 1728, 1733, 1812, 635, 636, + 1812, 637, 1443, 1812, 638, 902, 1812, 639, 640, 1812, 620, 631, 640, 1437, 1812, 641, 808, 1812, 642, + 693, 1510, 1628, 1812, 643, 1786, 1812, 644, 1745, 1812, 601, 645, 689, 957, 1024, 1812, 646, 1786, 1812, + 647, 1477, 1812, 440, 648, 752, 1060, 1172, 1328, 1443, 1448, 1768, 1812, 649, 1786, 1812, 650, 1786, 1812, + 651, 1521, 1812, 652, 1629, 1812, 653, 1568, 1720, 1721, 1812, 577, 654, 1521, 1628, 1812, 655, 1786, 1812, + 656, 1786, 1812, 657, 719, 978, 1108, 1812, 339, 596, 658, 748, 819, 1581, 1812, 659, 660, 1812, 521, + 660, 1159, 1168, 1463, 1464, 1812, 661, 962, 1812, 52, 493, 662, 663, 1417, 1728, 1733, 1812, 662, 663, + 664, 982, 1628, 1694, 1728, 1733, 1812, 102, 494, 663, 664, 1414, 1728, 1733, 1812, 665, 1151, 1479, 1669, + 1788, 1812, 3, 197, 666, 667, 878, 1786, 1812, 666, 667, 1812, 668, 1527, 1812, 669, 711, 1812, 670, + 1098, 1812, 671, 672, 1812, 672, 788, 960, 1594, 1595, 1812, 673, 942, 1358, 1384, 1385, 1812, 463, 674, + 839, 1056, 1333, 1768, 1812, 609, 675, 1585, 1696, 1812, 676, 749, 813, 1330, 1481, 1812, 677, 679, 1812, + 677, 678, 679, 1035, 1160, 1204, 1405, 1812, 679, 1035, 1159, 1160, 1649, 1812, 680, 1086, 1812, 588, 681, + 802, 1582, 1587, 1812, 268, 269, 682, 1081, 1745, 1786, 1812, 683, 1680, 1681, 1812, 684, 834, 999, 1256, + 1592, 1812, 685, 686, 1812, 686, 938, 959, 1240, 1312, 1812, 687, 1103, 1812, 38, 368, 566, 688, 1745, + 1786, 1812, 486, 645, 689, 747, 957, 1812, 690, 1700, 1812, 691, 732, 1087, 1088, 1149, 1290, 1444, 1812, + 71, 629, 692, 849, 1515, 1728, 1733, 1812, 315, 642, 693, 849, 1510, 1728, 1733, 1812, 694, 1786, 1812, + 200, 201, 695, 1508, 1610, 1768, 1812, 1, 696, 697, 785, 1183, 1532, 1535, 1812, 696, 697, 785, 1628, + 1812, 698, 888, 892, 1628, 1812, 5, 699, 781, 823, 1183, 1532, 1535, 1812, 700, 722, 1812, 701, 1466, + 1812, 702, 1630, 1812, 703, 1786, 1812, 704, 1172, 1812, 164, 423, 527, 705, 1745, 1786, 1812, 635, 706, + 1628, 1694, 1745, 1812, 707, 708, 760, 1065, 1370, 1812, 708, 1745, 1812, 709, 1279, 1439, 1561, 1562, 1812, + 353, 710, 839, 988, 1056, 1768, 1812, 495, 517, 711, 727, 1553, 1812, 215, 217, 712, 713, 1572, 1786, + 1812, 215, 217, 712, 713, 1572, 1786, 1812, 372, 640, 714, 825, 1812, 11, 715, 1003, 1319, 1568, 1681, + 1721, 1767, 1768, 1812, 716, 1534, 1812, 717, 1319, 1812, 718, 1526, 1812, 657, 719, 978, 1108, 1812, 720, + 1091, 1157, 1458, 1728, 1812, 721, 1786, 1812, 722, 861, 1335, 1336, 1751, 1812, 723, 1786, 1812, 724, 1773, + 1812, 725, 849, 1812, 124, 638, 726, 902, 1052, 1133, 1812, 517, 669, 711, 727, 931, 1812, 728, 729, + 1669, 1787, 1788, 1812, 729, 1271, 1812, 730, 1068, 1070, 1071, 1199, 1812, 731, 1812, 691, 732, 733, 1087, + 1088, 1812, 293, 356, 721, 733, 872, 1786, 1812, 734, 1135, 1812, 735, 862, 863, 1179, 1292, 1812, 300, + 308, 576, 736, 737, 1630, 1812, 300, 308, 576, 736, 737, 1630, 1812, 20, 738, 752, 1004, 1041, 1172, + 1328, 1443, 1768, 1812, 739, 1757, 1812, 740, 1681, 1812, 741, 1638, 1812, 742, 979, 1812, 743, 744, 1481, + 1482, 1486, 1812, 744, 1745, 1812, 745, 1288, 1514, 1812, 746, 1534, 1628, 1694, 1745, 1812, 747, 1075, 1459, + 1460, 1709, 1812, 748, 819, 1812, 749, 1330, 1812, 750, 1072, 1445, 1450, 1532, 1535, 1628, 1694, 1812, 751, + 800, 1381, 1390, 1396, 1812, 752, 1455, 1812, 212, 213, 753, 1383, 1638, 1768, 1812, 754, 1786, 1812, 755, + 1786, 1812, 756, 1196, 1336, 1812, 757, 880, 1812, 362, 363, 643, 758, 1036, 1786, 1812, 759, 766, 1812, + 759, 760, 765, 766, 1021, 1812, 761, 948, 950, 1628, 1812, 435, 437, 702, 762, 789, 1630, 1812, 763, + 1786, 1812, 764, 1786, 1812, 759, 760, 765, 766, 1065, 1812, 571, 760, 766, 1065, 1370, 1812, 767, 1340, + 1346, 1628, 1812, 768, 1341, 1349, 1628, 1812, 538, 555, 570, 769, 770, 1812, 770, 1745, 1812, 771, 1812, + 772, 821, 1015, 1646, 1812, 771, 773, 783, 1766, 1812, 526, 774, 780, 784, 1063, 1812, 244, 246, 775, + 828, 1745, 1786, 1812, 776, 804, 1129, 1281, 1812, 777, 778, 1812, 619, 778, 1020, 1487, 1488, 1616, 1812, + 364, 370, 779, 1203, 1745, 1786, 1812, 780, 783, 1062, 1063, 1186, 1812, 499, 781, 782, 1576, 1812, 499, + 781, 782, 1576, 1812, 771, 773, 780, 783, 784, 1062, 1063, 1812, 783, 784, 1062, 1063, 1187, 1812, 499, + 785, 786, 1540, 1812, 499, 785, 786, 1540, 1812, 787, 1745, 1812, 632, 671, 672, 788, 789, 1812, 435, + 437, 702, 762, 789, 1630, 1812, 790, 1489, 1812, 791, 1439, 1562, 1790, 1805, 1812, 792, 900, 1754, 1756, + 1759, 1761, 1812, 793, 1699, 1700, 1812, 794, 1504, 1812, 146, 147, 603, 795, 1745, 1786, 1812, 796, 999, + 1144, 1270, 1592, 1812, 797, 1745, 1812, 640, 798, 799, 1492, 1768, 1812, 573, 640, 798, 799, 1768, 1812, + 800, 1745, 1812, 574, 801, 946, 1653, 1812, 588, 681, 802, 1038, 1582, 1812, 803, 846, 1812, 731, 804, + 805, 883, 1148, 1281, 1812, 804, 805, 807, 1148, 1281, 1812, 731, 806, 807, 883, 1812, 805, 806, 807, + 883, 1148, 1812, 690, 808, 904, 1306, 1700, 1812, 809, 1786, 1812, 810, 1786, 1812, 811, 1663, 1812, 812, + 813, 1812, 325, 546, 676, 749, 813, 1330, 1812, 518, 814, 1673, 1812, 735, 815, 816, 863, 876, 1812, + 816, 862, 863, 1180, 1292, 1812, 136, 137, 487, 817, 818, 1786, 1812, 136, 137, 487, 817, 818, 1786, + 1812, 596, 658, 819, 820, 1581, 1812, 501, 820, 829, 1745, 1812, 772, 821, 822, 1015, 1646, 1812, 30, + 31, 397, 821, 822, 1548, 1581, 1636, 1768, 1812, 699, 781, 823, 1628, 1812, 824, 1628, 1694, 1732, 1745, + 1812, 220, 291, 378, 645, 714, 825, 1078, 1477, 1764, 1768, 1812, 826, 1162, 1812, 827, 882, 1352, 1375, + 1633, 1812, 828, 1745, 1812, 829, 1812, 19, 336, 830, 831, 1176, 1786, 1812, 830, 831, 1812, 832, 980, + 1551, 1691, 1692, 1812, 833, 1745, 1812, 834, 835, 944, 1742, 1812, 834, 835, 944, 1742, 1812, 153, 479, + 836, 837, 887, 1548, 1812, 836, 837, 887, 1061, 1548, 1812, 838, 839, 1812, 625, 839, 1055, 1056, 1274, + 1812, 840, 1005, 1048, 1611, 1812, 841, 842, 1045, 1611, 1812, 37, 841, 842, 1004, 1041, 1045, 1611, 1812, + 843, 844, 906, 974, 1084, 1812, 844, 1745, 1812, 607, 845, 999, 1294, 1592, 1812, 520, 846, 1344, 1471, + 1803, 1812, 847, 1495, 1742, 1775, 1812, 160, 848, 940, 944, 1170, 1768, 1812, 692, 693, 849, 850, 1628, + 1694, 1728, 1733, 1812, 849, 850, 1628, 1694, 1745, 1812, 15, 16, 628, 851, 1191, 1568, 1681, 1739, 1768, + 1812, 852, 1508, 1812, 853, 1076, 1245, 1342, 1795, 1812, 229, 230, 854, 1193, 1747, 1768, 1812, 718, 855, + 1641, 1643, 1645, 1658, 1812, 856, 857, 1812, 857, 1218, 1315, 1398, 1730, 1812, 24, 332, 858, 1548, 1581, + 1607, 1768, 1812, 209, 859, 1135, 1162, 1652, 1768, 1812, 757, 860, 880, 1383, 1485, 1812, 861, 875, 1456, + 1745, 1812, 862, 863, 1812, 735, 815, 816, 863, 1136, 1812, 21, 357, 864, 865, 1365, 1786, 1812, 864, + 865, 1812, 866, 1812, 867, 1663, 1745, 1766, 1812, 868, 1337, 1458, 1812, 869, 1413, 1786, 1812, 870, 1006, + 1024, 1029, 1032, 1812, 871, 879, 1585, 1812, 293, 356, 721, 733, 872, 1786, 1812, 516, 873, 895, 896, + 1227, 1812, 874, 875, 972, 973, 1181, 1812, 861, 874, 875, 972, 973, 1456, 1617, 1812, 735, 815, 816, + 876, 954, 1812, 877, 1745, 1812, 666, 878, 1786, 1812, 871, 879, 1585, 1812, 480, 860, 880, 895, 1383, + 1812, 881, 1352, 1376, 1626, 1812, 827, 882, 1352, 1633, 1812, 235, 238, 585, 625, 883, 1768, 1812, 543, + 883, 884, 1812, 885, 1381, 1774, 1812, 138, 569, 886, 890, 1427, 1532, 1535, 1812, 887, 1548, 1812, 394, + 698, 888, 892, 1427, 1532, 1535, 1812, 889, 1265, 1421, 1812, 890, 891, 1121, 1221, 1812, 890, 891, 1121, + 1221, 1812, 892, 1120, 1121, 1500, 1812, 658, 893, 1812, 267, 894, 1416, 1548, 1768, 1812, 516, 873, 895, + 896, 897, 1812, 524, 587, 713, 873, 896, 1812, 516, 895, 896, 897, 1571, 1812, 898, 1188, 1625, 1628, + 1632, 1694, 1728, 1733, 1812, 898, 899, 1812, 900, 901, 1812, 231, 901, 1299, 1300, 1598, 1599, 1812, 726, + 902, 903, 1052, 1133, 1812, 903, 942, 1022, 1358, 1385, 1812, 904, 915, 1197, 1745, 1812, 905, 906, 1812, + 591, 843, 906, 974, 1084, 1812, 157, 752, 907, 941, 1172, 1293, 1328, 1443, 1768, 1812, 87, 908, 909, + 1483, 1532, 1535, 1643, 1812, 908, 909, 1628, 1643, 1812, 910, 911, 1431, 1433, 1478, 1812, 911, 1786, 1812, + 88, 912, 1027, 1483, 1532, 1535, 1641, 1812, 913, 1412, 1812, 818, 913, 914, 915, 1412, 1812, 904, 913, + 914, 915, 916, 1197, 1412, 1812, 817, 913, 915, 916, 1412, 1812, 917, 1126, 1812, 918, 1745, 1812, 111, + 116, 919, 920, 1682, 1786, 1812, 111, 116, 919, 920, 1682, 1786, 1812, 921, 922, 1250, 1439, 1562, 1812, + 922, 1267, 1268, 1391, 1812, 825, 923, 1078, 1458, 1741, 1812, 23, 294, 554, 924, 1745, 1786, 1812, 925, + 1786, 1812, 926, 1786, 1812, 673, 927, 997, 998, 1193, 1812, 928, 1102, 1812, 929, 931, 1215, 1473, 1474, + 1812, 930, 1745, 1812, 929, 931, 1116, 1473, 1474, 1812, 932, 1745, 1812, 933, 1786, 1812, 934, 1786, 1812, + 218, 219, 935, 1067, 1745, 1786, 1812, 742, 936, 1510, 1512, 1515, 1517, 1812, 937, 1809, 1812, 563, 685, + 686, 938, 1676, 1812, 939, 940, 1812, 940, 1068, 1079, 1169, 1170, 1812, 752, 907, 941, 1761, 1768, 1812, + 673, 942, 1236, 1384, 1385, 1812, 502, 943, 1584, 1696, 1812, 431, 834, 835, 848, 944, 1742, 1776, 1812, + 945, 1747, 1812, 574, 801, 946, 1296, 1653, 1812, 947, 1774, 1812, 155, 761, 948, 950, 1532, 1535, 1693, + 1812, 156, 949, 952, 1154, 1532, 1535, 1693, 1812, 950, 951, 1442, 1449, 1812, 950, 951, 1442, 1449, 1812, + 952, 953, 1441, 1442, 1812, 952, 953, 1441, 1442, 1448, 1812, 954, 1745, 1812, 955, 1786, 1812, 956, 957, + 1812, 351, 486, 601, 645, 689, 957, 1812, 958, 1786, 1812, 686, 938, 959, 1033, 1240, 1812, 247, 290, + 528, 960, 1746, 1786, 1812, 961, 1040, 1812, 962, 1436, 1812, 963, 1427, 1628, 1694, 1745, 1812, 964, 1008, + 1097, 1646, 1812, 965, 1227, 1786, 1812, 966, 970, 1786, 1812, 374, 940, 967, 1170, 1262, 1768, 1812, 252, + 295, 532, 968, 969, 1786, 1812, 968, 969, 1786, 1812, 70, 413, 966, 970, 971, 1786, 1812, 970, 971, + 1812, 972, 973, 1812, 874, 968, 973, 1616, 1617, 1812, 590, 591, 974, 975, 1073, 1812, 303, 304, 503, + 975, 1085, 1786, 1812, 345, 350, 930, 976, 1745, 1786, 1812, 285, 940, 977, 978, 1170, 1768, 1812, 430, + 657, 719, 977, 978, 1108, 1702, 1812, 408, 537, 979, 1191, 1578, 1579, 1812, 832, 980, 981, 1691, 1692, + 1812, 64, 65, 933, 981, 1552, 1786, 1812, 663, 982, 1628, 1694, 1745, 1812, 983, 1569, 1786, 1812, 571, + 984, 1786, 1812, 985, 1786, 1812, 235, 237, 517, 986, 1503, 1768, 1812, 987, 988, 1026, 1128, 1812, 321, + 710, 987, 988, 1026, 1128, 1408, 1812, 380, 381, 644, 989, 1745, 1786, 1812, 416, 990, 1135, 1162, 1768, + 1801, 1812, 520, 991, 992, 1344, 1803, 1812, 992, 1745, 1812, 993, 1025, 1026, 1407, 1812, 35, 318, 602, + 994, 1285, 1477, 1764, 1768, 1812, 995, 1812, 995, 996, 1016, 1745, 1812, 997, 998, 1812, 564, 927, 998, + 1193, 1363, 1812, 502, 871, 999, 1321, 1403, 1741, 1768, 1812, 999, 1000, 1591, 1592, 1621, 1812, 1001, 1040, + 1263, 1628, 1812, 726, 1002, 1812, 335, 715, 1003, 1512, 1568, 1681, 1768, 1812, 12, 203, 738, 1004, 1005, + 1172, 1328, 1599, 1768, 1812, 840, 1004, 1005, 1048, 1611, 1812, 1006, 1745, 1812, 108, 1007, 1008, 1010, 1683, + 1728, 1733, 1812, 964, 1008, 1097, 1646, 1812, 1009, 1786, 1812, 1007, 1008, 1010, 1628, 1812, 1011, 1786, 1812, + 920, 1012, 1016, 1713, 1714, 1812, 893, 1013, 1414, 1416, 1417, 1519, 1812, 109, 1014, 1015, 1143, 1683, 1728, + 1733, 1812, 772, 821, 1015, 1646, 1812, 995, 996, 1012, 1016, 1310, 1713, 1714, 1812, 6, 198, 278, 466, + 660, 1017, 1110, 1130, 1172, 1528, 1700, 1768, 1812, 1018, 1094, 1141, 1609, 1777, 1779, 1812, 67, 614, 1019, + 1118, 1397, 1458, 1812, 193, 777, 778, 1020, 1487, 1488, 1812, 443, 446, 552, 1021, 1066, 1786, 1812, 903, + 942, 1022, 1358, 1613, 1812, 1023, 1745, 1812, 870, 1024, 1029, 1031, 1032, 1812, 993, 1025, 1026, 1407, 1812, + 670, 988, 993, 1026, 1128, 1407, 1812, 912, 1027, 1628, 1641, 1812, 1028, 1031, 1812, 1028, 1029, 1030, 1031, + 1369, 1812, 1028, 1029, 1030, 1031, 1032, 1812, 1024, 1029, 1031, 1032, 1629, 1812, 1028, 1030, 1031, 1032, 1368, + 1812, 938, 959, 1033, 1240, 1410, 1812, 1034, 1355, 1812, 677, 678, 679, 1035, 1036, 1812, 362, 363, 643, + 758, 1036, 1786, 1812, 1037, 1585, 1812, 1038, 1745, 1812, 1039, 1503, 1812, 1040, 1167, 1812, 738, 752, 842, + 1041, 1768, 1812, 1042, 1043, 1248, 1439, 1562, 1812, 1043, 1550, 1812, 7, 1044, 1045, 1046, 1053, 1532, 1535, + 1812, 841, 842, 1045, 1611, 1812, 1044, 1045, 1046, 1628, 1812, 66, 1047, 1048, 1053, 1447, 1532, 1535, 1812, + 840, 1005, 1048, 1611, 1812, 1049, 1452, 1628, 1694, 1745, 1812, 1050, 1786, 1812, 1051, 1786, 1812, 1052, 1133, + 1812, 1044, 1047, 1053, 1532, 1535, 1557, 1628, 1694, 1812, 1053, 1054, 1812, 1055, 1056, 1812, 520, 625, 838, + 839, 1056, 1812, 1057, 1191, 1690, 1812, 375, 839, 1056, 1058, 1424, 1768, 1812, 489, 585, 1059, 1148, 1812, + 648, 752, 1060, 1449, 1768, 1812, 589, 866, 1061, 1745, 1812, 1062, 1063, 1812, 774, 780, 784, 970, 1063, + 1812, 517, 1064, 1812, 759, 765, 766, 1065, 1066, 1812, 443, 446, 552, 1021, 1066, 1786, 1812, 1067, 1745, + 1812, 730, 1068, 1070, 1071, 1465, 1812, 1069, 1465, 1812, 1069, 1070, 1331, 1465, 1565, 1812, 1069, 1071, 1331, + 1465, 1566, 1812, 750, 1072, 1628, 1694, 1745, 1812, 590, 591, 974, 1073, 1084, 1812, 1074, 1745, 1812, 747, + 1075, 1459, 1460, 1667, 1812, 550, 1076, 1077, 1794, 1795, 1812, 400, 414, 1077, 1189, 1343, 1786, 1812, 104, + 825, 923, 1078, 1458, 1741, 1812, 227, 939, 940, 1079, 1169, 1170, 1812, 1080, 1111, 1812, 1081, 1745, 1812, + 1082, 1086, 1786, 1812, 1064, 1083, 1151, 1225, 1424, 1529, 1812, 590, 591, 1073, 1084, 1085, 1812, 303, 304, + 503, 975, 1085, 1786, 1812, 202, 377, 680, 1082, 1086, 1786, 1812, 1087, 1088, 1812, 732, 1088, 1289, 1290, + 1454, 1812, 1089, 1268, 1476, 1725, 1812, 626, 731, 1090, 1091, 1728, 1812, 55, 720, 1091, 1157, 1458, 1728, + 1812, 337, 342, 754, 1092, 1093, 1786, 1812, 337, 342, 754, 1092, 1093, 1786, 1812, 1018, 1094, 1095, 1141, + 1812, 1018, 1094, 1095, 1141, 1812, 60, 837, 1096, 1097, 1548, 1768, 1812, 391, 822, 964, 1008, 1096, 1097, + 1646, 1812, 114, 481, 1098, 1099, 1124, 1125, 1812, 1073, 1098, 1099, 1124, 1125, 1812, 1100, 1110, 1762, 1812, + 1101, 1777, 1796, 1812, 538, 555, 570, 1102, 1103, 1812, 206, 387, 687, 1103, 1703, 1786, 1812, 1104, 1812, + 249, 250, 658, 1105, 1339, 1768, 1812, 358, 1106, 1109, 1110, 1387, 1388, 1812, 1107, 1108, 1295, 1701, 1812, + 657, 978, 1002, 1108, 1295, 1701, 1812, 1109, 1110, 1812, 1100, 1106, 1110, 1387, 1388, 1762, 1812, 604, 1111, + 1174, 1669, 1788, 1812, 914, 916, 1112, 1113, 1327, 1812, 1113, 1745, 1812, 145, 454, 1114, 1301, 1654, 1766, + 1812, 450, 461, 491, 1115, 1745, 1786, 1812, 931, 1116, 1214, 1473, 1474, 1812, 918, 929, 1116, 1117, 1394, + 1812, 614, 1019, 1118, 1397, 1458, 1812, 1119, 1745, 1812, 892, 1120, 1121, 1500, 1812, 890, 892, 1121, 1221, + 1305, 1500, 1812, 1122, 1483, 1812, 112, 113, 592, 1123, 1745, 1786, 1812, 1124, 1125, 1812, 481, 815, 1098, + 1099, 1125, 1812, 28, 376, 917, 1126, 1520, 1786, 1812, 1127, 1128, 1489, 1669, 1788, 1812, 987, 988, 1026, + 1128, 1812, 776, 804, 1129, 1281, 1812, 48, 1017, 1130, 1131, 1132, 1458, 1812, 478, 745, 1130, 1131, 1532, + 1665, 1768, 1812, 1017, 1130, 1131, 1132, 1458, 1812, 638, 726, 902, 1133, 1134, 1812, 803, 846, 1134, 1344, + 1803, 1812, 826, 1135, 1162, 1233, 1708, 1812, 50, 361, 1136, 1137, 1674, 1786, 1812, 1136, 1137, 1812, 432, + 444, 925, 1138, 1139, 1786, 1812, 432, 444, 925, 1138, 1139, 1786, 1812, 253, 582, 1140, 1141, 1764, 1768, + 1812, 1018, 1094, 1095, 1140, 1141, 1812, 1142, 1745, 1812, 1014, 1015, 1143, 1628, 1812, 1144, 1145, 1165, 1262, + 1812, 1144, 1145, 1165, 1262, 1812, 305, 314, 1146, 1185, 1745, 1786, 1812, 46, 660, 1147, 1464, 1645, 1768, + 1812, 249, 399, 836, 1148, 1721, 1768, 1812, 1149, 1812, 1083, 1150, 1225, 1424, 1812, 1083, 1151, 1152, 1529, + 1812, 1083, 1151, 1152, 1529, 1812, 1153, 1164, 1165, 1261, 1771, 1812, 949, 952, 1154, 1628, 1812, 1155, 1156, + 1239, 1243, 1699, 1812, 1156, 1745, 1812, 45, 258, 327, 419, 711, 1056, 1091, 1125, 1157, 1158, 1505, 1768, + 1812, 1157, 1158, 1812, 679, 1035, 1159, 1160, 1308, 1812, 677, 678, 679, 758, 1160, 1812, 1161, 1338, 1602, + 1603, 1606, 1607, 1812, 734, 1135, 1162, 1163, 1233, 1812, 856, 857, 1163, 1315, 1398, 1812, 1153, 1164, 1165, + 1771, 1812, 945, 1144, 1153, 1165, 1262, 1771, 1812, 1166, 1312, 1786, 1812, 1167, 1168, 1812, 271, 659, 660, + 1168, 1463, 1464, 1812, 1169, 1170, 1812, 765, 939, 940, 1079, 1170, 1812, 752, 1171, 1172, 1173, 1812, 1017, + 1172, 1812, 752, 1171, 1172, 1173, 1812, 1111, 1174, 1332, 1669, 1788, 1812, 1175, 1590, 1786, 1812, 830, 1176, + 1786, 1812, 62, 251, 1177, 1178, 1726, 1786, 1812, 1177, 1178, 1812, 69, 434, 955, 1179, 1180, 1786, 1812, + 69, 434, 955, 1179, 1180, 1786, 1812, 98, 449, 1181, 1182, 1558, 1786, 1812, 98, 449, 1181, 1182, 1558, + 1786, 1812, 696, 699, 1183, 1184, 1532, 1535, 1628, 1694, 1812, 1183, 1184, 1628, 1694, 1745, 1812, 1185, 1745, + 1812, 120, 457, 1186, 1187, 1781, 1786, 1812, 120, 457, 1186, 1187, 1781, 1786, 1812, 898, 1188, 1628, 1694, + 1745, 1812, 1189, 1786, 1812, 1190, 1786, 1812, 979, 1057, 1191, 1578, 1579, 1690, 1812, 1192, 1324, 1786, 1812, + 199, 564, 927, 997, 998, 1193, 1812, 234, 426, 522, 1194, 1195, 1786, 1812, 1194, 1195, 1786, 1812, 1196, + 1206, 1208, 1419, 1671, 1812, 1197, 1812, 1198, 1683, 1812, 1199, 1745, 1812, 594, 640, 1200, 1768, 1779, 1812, + 1201, 1786, 1812, 1202, 1786, 1812, 1203, 1745, 1812, 678, 1204, 1405, 1745, 1812, 812, 1205, 1279, 1286, 1371, + 1652, 1812, 1093, 1206, 1207, 1670, 1671, 1812, 1206, 1207, 1208, 1523, 1580, 1670, 1671, 1812, 1092, 1207, 1208, + 1670, 1671, 1812, 1209, 1210, 1762, 1784, 1792, 1812, 1210, 1745, 1812, 233, 837, 1211, 1548, 1603, 1768, 1812, + 478, 1212, 1455, 1812, 1213, 1466, 1786, 1812, 458, 459, 559, 1214, 1215, 1786, 1812, 458, 459, 559, 1214, + 1215, 1786, 1812, 1216, 1786, 1812, 570, 1217, 1636, 1812, 857, 1218, 1219, 1315, 1398, 1812, 797, 1218, 1219, + 1315, 1398, 1812, 181, 660, 1220, 1221, 1464, 1768, 1812, 10, 890, 891, 1121, 1220, 1221, 1501, 1812, 1222, + 1745, 1812, 1223, 1786, 1812, 1224, 1786, 1812, 1083, 1150, 1225, 1424, 1812, 1226, 1366, 1812, 72, 407, 965, + 1227, 1228, 1786, 1812, 1227, 1228, 1812, 1229, 1234, 1241, 1745, 1812, 1230, 1231, 1812, 647, 1030, 1231, 1477, + 1610, 1812, 1232, 1233, 1812, 245, 734, 826, 1135, 1162, 1233, 1812, 1234, 1812, 1235, 1745, 1812, 225, 226, + 1236, 1359, 1614, 1786, 1812, 1237, 1277, 1628, 1694, 1745, 1812, 1238, 1242, 1812, 1139, 1238, 1239, 1241, 1242, + 1812, 563, 685, 686, 1240, 1677, 1812, 1229, 1234, 1238, 1239, 1241, 1242, 1243, 1812, 1086, 1239, 1242, 1243, + 1699, 1812, 1138, 1238, 1241, 1242, 1243, 1812, 1244, 1796, 1812, 853, 1076, 1245, 1246, 1342, 1812, 1246, 1745, + 1812, 1247, 1421, 1812, 1042, 1248, 1439, 1549, 1562, 1812, 1248, 1249, 1812, 921, 1250, 1439, 1475, 1562, 1812, + 1250, 1251, 1812, 1252, 1591, 1812, 1253, 1254, 1346, 1347, 1349, 1350, 1812, 1254, 1318, 1812, 1255, 1270, 1812, + 684, 999, 1256, 1494, 1592, 1812, 1256, 1257, 1812, 367, 373, 809, 1258, 1259, 1786, 1812, 367, 373, 809, + 1258, 1259, 1786, 1812, 260, 261, 979, 1260, 1751, 1768, 1812, 205, 940, 1153, 1261, 1768, 1812, 40, 967, + 1144, 1145, 1165, 1261, 1262, 1812, 287, 1001, 1040, 1263, 1277, 1532, 1535, 1812, 1264, 1745, 1812, 1265, 1266, + 1304, 1719, 1812, 1265, 1266, 1304, 1719, 1812, 922, 1267, 1268, 1391, 1812, 741, 922, 1268, 1391, 1476, 1725, + 1812, 1269, 1561, 1812, 796, 999, 1270, 1592, 1770, 1812, 624, 1271, 1812, 1272, 1275, 1812, 1272, 1273, 1274, + 1275, 1723, 1812, 1272, 1273, 1274, 1275, 1554, 1812, 1194, 1273, 1275, 1553, 1554, 1812, 1205, 1276, 1371, 1652, + 1812, 1237, 1263, 1277, 1278, 1532, 1535, 1628, 1694, 1812, 286, 961, 1277, 1278, 1287, 1532, 1535, 1812, 1205, + 1279, 1280, 1286, 1812, 1205, 1279, 1280, 1286, 1812, 260, 262, 1281, 1282, 1318, 1768, 1812, 1281, 1282, 1812, + 580, 774, 1226, 1283, 1366, 1707, 1812, 1284, 1285, 1303, 1304, 1812, 994, 1284, 1285, 1303, 1304, 1812, 1205, + 1279, 1280, 1286, 1651, 1812, 961, 1278, 1287, 1628, 1812, 745, 1288, 1514, 1812, 732, 1088, 1289, 1290, 1291, + 1812, 691, 872, 1087, 1088, 1290, 1812, 732, 1074, 1289, 1290, 1291, 1812, 735, 816, 862, 863, 1292, 1812, + 289, 907, 1172, 1293, 1328, 1756, 1768, 1812, 845, 999, 1294, 1295, 1592, 1812, 1107, 1108, 1295, 1701, 1812, + 1296, 1786, 1812, 96, 277, 541, 1297, 1298, 1786, 1812, 1297, 1298, 1786, 1812, 1299, 1300, 1812, 901, 1300, + 1598, 1599, 1605, 1812, 1301, 1766, 1812, 1302, 1745, 1812, 1284, 1285, 1303, 1304, 1812, 956, 1265, 1284, 1285, + 1304, 1719, 1812, 1305, 1306, 1812, 248, 641, 690, 808, 1306, 1700, 1812, 1307, 1509, 1812, 1035, 1159, 1160, + 1308, 1309, 1812, 1309, 1745, 1812, 919, 1016, 1310, 1713, 1714, 1812, 1012, 1310, 1311, 1661, 1680, 1812, 117, + 409, 1166, 1312, 1313, 1786, 1812, 1312, 1313, 1812, 1314, 1382, 1800, 1801, 1805, 1806, 1812, 856, 857, 1163, + 1315, 1316, 1812, 365, 385, 763, 1316, 1399, 1786, 1812, 152, 158, 1023, 1317, 1745, 1786, 1812, 208, 717, + 740, 1318, 1319, 1681, 1812, 740, 1318, 1319, 1681, 1689, 1812, 1320, 1745, 1812, 95, 999, 1321, 1322, 1457, + 1458, 1812, 89, 91, 151, 317, 676, 880, 1135, 1321, 1322, 1640, 1768, 1812, 1323, 1730, 1786, 1812, 118, + 421, 1192, 1324, 1325, 1786, 1812, 1324, 1325, 1812, 25, 75, 165, 388, 902, 940, 998, 1326, 1402, 1749, + 1768, 1812, 914, 916, 1112, 1327, 1412, 1812, 634, 637, 1327, 1328, 1443, 1507, 1812, 1329, 1435, 1786, 1812, + 546, 676, 813, 1330, 1331, 1812, 1069, 1070, 1071, 1331, 1465, 1812, 1332, 1333, 1537, 1538, 1812, 433, 674, + 1332, 1333, 1462, 1537, 1538, 1812, 195, 196, 1135, 1334, 1640, 1768, 1806, 1812, 1335, 1336, 1812, 700, 722, + 756, 1196, 1336, 1751, 1812, 868, 1337, 1458, 1812, 1338, 1339, 1812, 232, 1339, 1635, 1636, 1704, 1705, 1812, + 320, 767, 1340, 1346, 1452, 1728, 1733, 1812, 428, 768, 1341, 1349, 1452, 1728, 1733, 1812, 550, 1342, 1343, + 1794, 1795, 1812, 400, 414, 1077, 1189, 1343, 1786, 1812, 803, 846, 1134, 1344, 1345, 1812, 418, 424, 655, + 1345, 1786, 1804, 1812, 1253, 1346, 1347, 1348, 1812, 653, 1253, 1346, 1347, 1348, 1812, 1253, 1346, 1347, 1348, + 1812, 1253, 1349, 1350, 1351, 1812, 1253, 1349, 1350, 1351, 1567, 1812, 1253, 1349, 1350, 1351, 1812, 827, 1352, + 1353, 1376, 1626, 1633, 1812, 1020, 1353, 1812, 1354, 1812, 135, 296, 598, 1355, 1356, 1768, 1812, 1355, 1356, + 1812, 302, 602, 640, 1357, 1509, 1718, 1764, 1812, 673, 1358, 1359, 1384, 1385, 1812, 225, 226, 1236, 1359, + 1614, 1786, 1812, 1360, 1361, 1461, 1538, 1812, 1360, 1361, 1461, 1538, 1812, 1362, 1506, 1541, 1542, 1545, 1546, + 1812, 980, 1363, 1364, 1551, 1692, 1812, 932, 980, 1363, 1364, 1551, 1812, 864, 1365, 1786, 1812, 610, 1283, + 1366, 1367, 1707, 1812, 623, 632, 1367, 1786, 1812, 82, 422, 1009, 1368, 1369, 1786, 1812, 82, 422, 1009, + 1368, 1369, 1786, 1812, 707, 760, 766, 1065, 1370, 1812, 1205, 1276, 1371, 1652, 1812, 558, 1372, 1786, 1812, + 1373, 1623, 1812, 1374, 1600, 1602, 1628, 1812, 73, 74, 78, 551, 827, 1191, 1336, 1375, 1568, 1681, 1768, + 1812, 4, 565, 881, 1352, 1375, 1376, 1626, 1812, 1377, 1619, 1630, 1786, 1812, 1378, 1812, 1379, 1812, 852, + 1380, 1491, 1492, 1496, 1497, 1812, 751, 1381, 1390, 1396, 1434, 1812, 1382, 1383, 1812, 451, 480, 757, 860, + 880, 1383, 1812, 1384, 1385, 1812, 903, 942, 1358, 1385, 1623, 1812, 100, 386, 519, 1386, 1745, 1786, 1812, + 1387, 1388, 1812, 1106, 1109, 1110, 1229, 1388, 1812, 1389, 1434, 1812, 1259, 1389, 1390, 1395, 1434, 1812, 922, + 1267, 1268, 1391, 1392, 1812, 183, 1135, 1391, 1392, 1768, 1812, 1393, 1628, 1678, 1694, 1745, 1812, 929, 1116, + 1117, 1394, 1474, 1812, 1379, 1389, 1390, 1395, 1396, 1434, 1698, 1812, 1258, 1389, 1395, 1396, 1434, 1812, 29, + 86, 182, 322, 1019, 1397, 1548, 1568, 1581, 1636, 1768, 1774, 1812, 856, 857, 1163, 1398, 1399, 1812, 365, + 385, 763, 1316, 1399, 1786, 1812, 1225, 1400, 1479, 1669, 1788, 1812, 307, 310, 630, 1401, 1745, 1786, 1812, + 85, 1326, 1402, 1403, 1404, 1458, 1812, 502, 542, 609, 804, 999, 1402, 1403, 1532, 1728, 1741, 1768, 1812, + 1326, 1402, 1403, 1404, 1458, 1812, 1405, 1812, 573, 1406, 1497, 1768, 1812, 993, 1025, 1026, 1407, 1408, 1812, + 0, 2, 171, 711, 1056, 1407, 1408, 1505, 1768, 1812, 9, 43, 192, 439, 1283, 1328, 1409, 1599, 1664, + 1768, 1809, 1812, 1410, 1745, 1812, 270, 425, 1320, 1411, 1745, 1786, 1812, 914, 916, 1327, 1412, 1413, 1812, + 106, 298, 556, 869, 1413, 1786, 1812, 1013, 1414, 1415, 1416, 1812, 1013, 1414, 1415, 1416, 1812, 894, 1013, + 1414, 1415, 1416, 1812, 1013, 1417, 1418, 1519, 1812, 1013, 1417, 1418, 1519, 1812, 1196, 1206, 1208, 1419, 1420, + 1812, 1420, 1745, 1812, 889, 1421, 1422, 1812, 1284, 1421, 1422, 1812, 1423, 1439, 1562, 1790, 1800, 1812, 57, + 1058, 1083, 1150, 1225, 1424, 1530, 1812, 1425, 1786, 1812, 1426, 1786, 1812, 886, 888, 963, 1427, 1532, 1535, + 1628, 1694, 1812, 1427, 1428, 1812, 1429, 1601, 1606, 1628, 1812, 1430, 1662, 1812, 736, 1430, 1431, 1432, 1662, + 1812, 1430, 1431, 1432, 1433, 1662, 1812, 737, 1430, 1432, 1433, 1662, 1812, 1381, 1390, 1396, 1434, 1435, 1812, + 110, 301, 561, 1329, 1435, 1786, 1812, 1436, 1437, 1812, 496, 620, 639, 640, 1437, 1812, 550, 1354, 1438, + 1745, 1812, 1439, 1741, 1812, 272, 275, 1106, 1440, 1526, 1768, 1812, 952, 953, 1441, 1442, 1812, 950, 952, + 953, 1442, 1449, 1706, 1812, 593, 1328, 1443, 1444, 1507, 1812, 691, 1149, 1444, 1745, 1812, 101, 750, 1445, + 1446, 1532, 1535, 1759, 1812, 1445, 1446, 1628, 1759, 1812, 1047, 1048, 1447, 1628, 1812, 51, 58, 263, 648, + 953, 1172, 1328, 1448, 1599, 1768, 1809, 1812, 141, 950, 951, 1060, 1442, 1448, 1449, 1812, 115, 750, 1450, + 1451, 1532, 1535, 1754, 1812, 1450, 1451, 1628, 1754, 1812, 1049, 1340, 1341, 1452, 1628, 1694, 1728, 1733, 1812, + 1452, 1453, 1812, 41, 176, 482, 1454, 1637, 1786, 1812, 272, 276, 752, 1168, 1455, 1768, 1812, 1456, 1812, + 999, 1321, 1322, 1457, 1458, 1812, 475, 1459, 1460, 1812, 1075, 1297, 1460, 1708, 1709, 1812, 1360, 1361, 1461, + 1462, 1538, 1812, 462, 464, 711, 1056, 1461, 1462, 1768, 1812, 1463, 1464, 1812, 659, 660, 1168, 1438, 1464, + 1812, 1068, 1070, 1071, 1465, 1466, 1812, 13, 228, 701, 1213, 1466, 1786, 1812, 42, 187, 484, 1467, 1468, + 1786, 1812, 1467, 1468, 1786, 1812, 1469, 1470, 1690, 1712, 1717, 1812, 1470, 1745, 1812, 148, 393, 615, 1471, + 1472, 1786, 1812, 1471, 1472, 1786, 1812, 1473, 1474, 1812, 929, 1116, 1394, 1474, 1569, 1812, 1250, 1439, 1475, + 1476, 1562, 1812, 1089, 1268, 1476, 1725, 1812, 1230, 1231, 1477, 1478, 1610, 1812, 910, 1431, 1433, 1478, 1662, + 1812, 665, 1400, 1479, 1669, 1788, 1812, 1479, 1480, 1812, 743, 1481, 1482, 1486, 1575, 1812, 547, 1482, 1485, + 1574, 1575, 1812, 908, 912, 1483, 1484, 1532, 1535, 1628, 1694, 1812, 1483, 1484, 1628, 1694, 1745, 1812, 1482, + 1485, 1486, 1574, 1575, 1812, 595, 1485, 1486, 1574, 1575, 1812, 1487, 1488, 1812, 777, 778, 1020, 1204, 1488, + 1812, 1127, 1489, 1490, 1669, 1788, 1812, 993, 1489, 1490, 1669, 1788, 1812, 1380, 1491, 1492, 1493, 1812, 313, + 798, 1380, 1406, 1491, 1492, 1493, 1812, 1380, 1491, 1492, 1493, 1812, 999, 1256, 1494, 1495, 1592, 1812, 847, + 1495, 1742, 1775, 1812, 1380, 1496, 1497, 1498, 1812, 1380, 1406, 1496, 1497, 1498, 1812, 1380, 1496, 1497, 1498, + 1812, 1491, 1499, 1588, 1812, 892, 1120, 1121, 1500, 1501, 1812, 184, 186, 194, 660, 1110, 1500, 1501, 1528, + 1768, 1812, 1502, 1735, 1736, 1738, 1739, 1750, 1812, 173, 500, 794, 1503, 1504, 1505, 1812, 500, 1292, 1503, + 1504, 1505, 1812, 794, 1394, 1503, 1504, 1505, 1812, 1506, 1507, 1812, 338, 593, 637, 1328, 1443, 1507, 1812, + 366, 1307, 1508, 1509, 1763, 1764, 1812, 1432, 1508, 1509, 1763, 1764, 1812, 936, 1510, 1511, 1512, 1812, 936, + 1510, 1511, 1512, 1812, 936, 1003, 1510, 1511, 1512, 1812, 1513, 1514, 1812, 283, 328, 1507, 1513, 1514, 1768, + 1812, 936, 1515, 1516, 1517, 1812, 936, 1515, 1516, 1517, 1812, 360, 936, 1003, 1515, 1516, 1517, 1767, 1812, + 456, 837, 1518, 1519, 1548, 1768, 1812, 63, 894, 1013, 1417, 1418, 1518, 1519, 1812, 1126, 1520, 1786, 1812, + 1521, 1522, 1812, 836, 1522, 1812, 1523, 1812, 161, 162, 509, 1524, 1525, 1786, 1812, 161, 162, 509, 1524, + 1525, 1786, 1812, 172, 497, 668, 1526, 1527, 1528, 1812, 497, 1526, 1527, 1528, 1753, 1812, 668, 853, 1526, + 1527, 1528, 1531, 1812, 1083, 1151, 1152, 1529, 1530, 1812, 442, 1056, 1529, 1530, 1768, 1812, 853, 1528, 1531, + 1812, 478, 614, 776, 804, 1131, 1403, 1532, 1655, 1768, 1812, 383, 506, 1532, 1533, 1534, 1535, 1541, 1812, + 746, 1532, 1533, 1534, 1535, 1536, 1628, 1694, 1812, 1532, 1535, 1812, 384, 536, 1532, 1534, 1535, 1536, 1545, + 1812, 1332, 1333, 1537, 1538, 1812, 1039, 1332, 1333, 1360, 1461, 1538, 1812, 167, 660, 1464, 1539, 1540, 1768, + 1812, 34, 499, 785, 786, 1539, 1540, 1577, 1812, 1362, 1541, 1542, 1543, 1812, 1171, 1362, 1541, 1542, 1543, + 1812, 1362, 1541, 1542, 1543, 1812, 1544, 1745, 1812, 1362, 1545, 1546, 1547, 1812, 704, 1362, 1545, 1546, 1547, + 1812, 1362, 1545, 1546, 1547, 1812, 479, 836, 837, 906, 1548, 1688, 1812, 1248, 1439, 1549, 1550, 1562, 1812, + 1232, 1550, 1812, 832, 1551, 1552, 1691, 1692, 1812, 64, 65, 933, 981, 1552, 1786, 1812, 1273, 1275, 1553, + 1554, 1555, 1812, 1272, 1274, 1275, 1554, 1722, 1812, 1222, 1273, 1553, 1554, 1555, 1812, 628, 1556, 1721, 1736, + 1768, 1812, 1053, 1557, 1628, 1694, 1766, 1812, 1558, 1786, 1812, 1559, 1728, 1812, 1560, 1786, 1812, 709, 1439, + 1561, 1562, 1563, 1812, 1439, 1562, 1812, 1371, 1439, 1561, 1562, 1563, 1812, 1564, 1649, 1786, 1812, 122, 123, + 1201, 1565, 1566, 1786, 1812, 122, 123, 1201, 1565, 1566, 1786, 1812, 1567, 1568, 1812, 1397, 1568, 1812, 119, + 299, 983, 1569, 1570, 1786, 1812, 1569, 1570, 1812, 1571, 1745, 1812, 1572, 1786, 1812, 1573, 1786, 1812, 1574, + 1575, 1812, 1126, 1481, 1482, 1486, 1575, 1812, 499, 781, 782, 1576, 1577, 1812, 166, 168, 660, 1528, 1576, + 1577, 1768, 1812, 1578, 1579, 1812, 537, 979, 1191, 1579, 1580, 1812, 1207, 1523, 1580, 1745, 1812, 658, 681, + 748, 819, 1581, 1727, 1812, 589, 1582, 1583, 1586, 1587, 1812, 340, 402, 584, 1050, 1583, 1786, 1812, 1584, + 1585, 1812, 212, 214, 1037, 1233, 1585, 1768, 1812, 1586, 1587, 1812, 588, 666, 681, 1582, 1587, 1812, 568, + 1499, 1588, 1812, 1588, 1589, 1812, 412, 415, 633, 1175, 1590, 1786, 1812, 999, 1000, 1591, 1592, 1593, 1812, + 999, 1592, 1812, 600, 999, 1591, 1592, 1593, 1812, 672, 788, 1594, 1595, 1596, 1812, 632, 671, 672, 762, + 1595, 1812, 694, 788, 1594, 1595, 1596, 1812, 959, 1597, 1809, 1812, 1598, 1599, 1812, 530, 901, 1289, 1299, + 1300, 1599, 1812, 410, 1374, 1600, 1602, 1678, 1728, 1733, 1812, 411, 1429, 1601, 1606, 1678, 1728, 1733, 1812, + 1161, 1602, 1603, 1604, 1812, 26, 858, 1161, 1211, 1602, 1603, 1604, 1812, 1161, 1602, 1603, 1604, 1812, 563, + 1104, 1605, 1745, 1812, 1161, 1606, 1607, 1608, 1812, 858, 1161, 1606, 1607, 1608, 1812, 1161, 1606, 1607, 1608, + 1812, 1609, 1610, 1812, 163, 647, 1230, 1231, 1477, 1610, 1812, 842, 1005, 1045, 1048, 1611, 1612, 1812, 1612, + 1808, 1812, 1613, 1745, 1812, 1614, 1786, 1812, 1615, 1786, 1812, 874, 973, 1616, 1617, 1618, 1812, 875, 972, + 973, 1182, 1617, 1812, 874, 1544, 1616, 1617, 1618, 1812, 125, 352, 1377, 1619, 1620, 1630, 1786, 1812, 1619, + 1620, 1812, 1621, 1622, 1812, 1079, 1622, 1812, 39, 224, 1373, 1623, 1624, 1786, 1812, 1623, 1624, 1786, 1812, + 121, 898, 1625, 1626, 1627, 1728, 1733, 1812, 881, 1352, 1376, 1626, 1812, 1625, 1626, 1627, 1628, 1812, 1355, + 1628, 1812, 177, 447, 652, 1629, 1630, 1631, 1786, 1812, 472, 1629, 1630, 1631, 1786, 1812, 132, 898, 1632, + 1633, 1634, 1728, 1733, 1812, 827, 882, 1352, 1633, 1812, 1628, 1632, 1633, 1634, 1812, 1635, 1636, 1812, 570, + 1217, 1339, 1636, 1704, 1705, 1812, 1454, 1637, 1786, 1812, 149, 490, 534, 1638, 1639, 1640, 1812, 524, 534, + 1638, 1639, 1640, 1812, 490, 1218, 1638, 1639, 1640, 1812, 855, 1641, 1642, 1658, 1812, 855, 1641, 1642, 1658, + 1812, 855, 1643, 1644, 1645, 1812, 855, 1643, 1644, 1645, 1812, 77, 855, 1147, 1643, 1644, 1645, 1659, 1812, + 821, 1008, 1015, 1097, 1646, 1647, 1812, 1647, 1772, 1812, 139, 140, 787, 1648, 1745, 1786, 1812, 129, 355, + 1564, 1649, 1650, 1786, 1812, 1649, 1650, 1812, 210, 216, 223, 880, 1135, 1286, 1640, 1651, 1768, 1812, 27, + 859, 1205, 1276, 1371, 1651, 1652, 1812, 631, 1378, 1653, 1654, 1812, 145, 454, 1114, 1301, 1654, 1766, 1812, + 56, 1458, 1532, 1655, 1656, 1657, 1812, 107, 222, 274, 392, 778, 1191, 1336, 1655, 1656, 1681, 1768, 1812, + 1458, 1532, 1655, 1656, 1657, 1812, 855, 1641, 1642, 1658, 1659, 1812, 154, 660, 1658, 1659, 1768, 1812, 403, + 404, 1142, 1660, 1745, 1786, 1812, 1661, 1745, 1812, 1431, 1433, 1478, 1662, 1663, 1812, 326, 329, 811, 867, + 1663, 1745, 1766, 1812, 44, 1409, 1458, 1664, 1665, 1666, 1812, 745, 1131, 1664, 1665, 1768, 1812, 1409, 1458, + 1664, 1665, 1666, 1812, 341, 344, 513, 1667, 1668, 1786, 1812, 341, 344, 513, 1667, 1668, 1786, 1812, 1403, + 1669, 1812, 1670, 1671, 1812, 1196, 1206, 1208, 1590, 1671, 1812, 1672, 1673, 1812, 200, 334, 1437, 1672, 1673, + 1768, 1812, 1136, 1674, 1786, 1812, 283, 284, 901, 1675, 1768, 1808, 1812, 79, 105, 1425, 1676, 1677, 1786, + 1812, 79, 105, 1425, 1676, 1677, 1786, 1812, 1393, 1600, 1601, 1628, 1678, 1694, 1728, 1733, 1812, 1678, 1679, + 1812, 1012, 1310, 1311, 1680, 1714, 1812, 683, 717, 1318, 1319, 1680, 1681, 1812, 1682, 1786, 1812, 1007, 1014, + 1628, 1683, 1684, 1694, 1728, 1733, 1812, 1628, 1683, 1684, 1694, 1745, 1812, 1685, 1786, 1812, 83, 84, 586, + 1686, 1745, 1786, 1812, 1687, 1812, 906, 1548, 1688, 1812, 1687, 1689, 1715, 1745, 1812, 1469, 1690, 1712, 1716, + 1717, 1812, 1691, 1692, 1812, 558, 980, 1363, 1551, 1692, 1812, 948, 949, 1532, 1535, 1628, 1693, 1694, 1695, + 1812, 1034, 1694, 1812, 1628, 1693, 1694, 1695, 1786, 1812, 229, 280, 1079, 1696, 1697, 1768, 1812, 1696, 1697, + 1812, 1379, 1395, 1698, 1745, 1812, 1155, 1239, 1242, 1243, 1699, 1812, 641, 793, 808, 1306, 1699, 1700, 1812, + 1107, 1108, 1295, 1701, 1702, 1812, 17, 18, 330, 940, 998, 1701, 1702, 1749, 1768, 1812, 1103, 1703, 1786, + 1812, 1704, 1705, 1812, 1339, 1635, 1636, 1698, 1705, 1812, 1706, 1707, 1812, 306, 610, 1226, 1283, 1366, 1707, + 1812, 1075, 1460, 1708, 1709, 1710, 1812, 747, 1459, 1460, 1668, 1709, 1812, 833, 1075, 1708, 1709, 1710, 1812, + 1711, 1716, 1812, 1525, 1711, 1712, 1715, 1716, 1812, 1713, 1714, 1812, 1012, 1310, 1467, 1680, 1714, 1812, 1687, + 1689, 1711, 1712, 1715, 1716, 1717, 1812, 1324, 1690, 1712, 1716, 1717, 1812, 1524, 1711, 1715, 1716, 1717, 1812, + 640, 1357, 1718, 1719, 1768, 1812, 99, 994, 1265, 1266, 1304, 1718, 1719, 1812, 653, 1568, 1720, 1721, 1812, + 1148, 1721, 1812, 126, 128, 1223, 1722, 1723, 1786, 1812, 126, 128, 1223, 1722, 1723, 1786, 1812, 36, 1135, + 1162, 1724, 1725, 1768, 1812, 131, 1089, 1268, 1392, 1476, 1724, 1725, 1812, 1177, 1726, 1786, 1812, 681, 1581, + 1727, 1812, 489, 542, 614, 626, 731, 1090, 1403, 1559, 1728, 1768, 1812, 452, 529, 1728, 1729, 1732, 1733, + 1735, 1812, 143, 354, 1323, 1730, 1731, 1786, 1812, 1730, 1731, 1812, 824, 1628, 1694, 1728, 1729, 1732, 1733, + 1734, 1812, 1728, 1733, 1812, 453, 562, 1728, 1732, 1733, 1734, 1738, 1812, 1502, 1735, 1736, 1737, 1812, 93, + 851, 1502, 1556, 1735, 1736, 1737, 1812, 1502, 1735, 1736, 1737, 1812, 1502, 1738, 1739, 1740, 1812, 851, 1502, + 1738, 1739, 1740, 1812, 1502, 1738, 1739, 1740, 1812, 518, 609, 871, 999, 1078, 1403, 1741, 1768, 1812, 834, + 944, 1495, 1742, 1743, 1775, 1812, 1193, 1743, 1812, 281, 455, 539, 1744, 1745, 1786, 1812, 281, 455, 471, + 539, 820, 829, 861, 866, 904, 995, 996, 1061, 1104, 1149, 1197, 1204, 1229, 1234, 1354, 1379, 1405, 1438, + 1444, 1456, 1523, 1580, 1605, 1687, 1689, 1698, 1745, 1752, 1753, 1786, 1812, 960, 1746, 1786, 1812, 427, 581, + 613, 1747, 1748, 1749, 1812, 613, 832, 1747, 1748, 1749, 1812, 581, 1370, 1747, 1748, 1749, 1812, 1750, 1751, + 1812, 288, 700, 722, 1335, 1336, 1751, 1812, 1752, 1812, 1745, 1752, 1753, 1782, 1812, 792, 1754, 1755, 1756, + 1812, 792, 1754, 1755, 1756, 1812, 792, 1293, 1754, 1755, 1756, 1812, 621, 1757, 1758, 1812, 962, 1757, 1758, + 1812, 792, 1759, 1760, 1761, 1812, 792, 1759, 1760, 1761, 1812, 80, 792, 941, 1293, 1759, 1760, 1761, 1812, + 1209, 1762, 1783, 1784, 1792, 1812, 1763, 1764, 1812, 801, 1307, 1508, 1509, 1764, 1812, 1765, 1766, 1812, 474, + 771, 773, 1766, 1812, 715, 1517, 1721, 1767, 1768, 1812, 473, 239, 240, 548, 1769, 1786, 1793, 1812, 999, + 1270, 1592, 1770, 1771, 1812, 1153, 1164, 1165, 1771, 1812, 142, 724, 947, 1772, 1773, 1774, 1812, 947, 996, + 1772, 1773, 1774, 1812, 724, 885, 1381, 1772, 1773, 1774, 1812, 847, 1495, 1742, 1775, 1776, 1812, 14, 265, + 940, 1749, 1768, 1775, 1776, 1812, 1018, 1777, 1778, 1779, 1812, 1018, 1777, 1778, 1779, 1812, 175, 1018, 1140, + 1200, 1777, 1778, 1779, 1812, 1780, 1783, 1812, 1781, 1786, 1812, 1752, 1753, 1780, 1782, 1783, 1784, 1792, 1812, + 864, 1762, 1783, 1784, 1792, 1812, 1769, 1780, 1782, 1783, 1784, 1812, 1785, 1786, 1812, 23, 38, 49, 83, + 84, 100, 112, 113, 139, 140, 144, 146, 147, 152, 158, 164, 190, 191, 218, 219, 241, 242, 244, + 246, 255, 256, 257, 268, 269, 270, 294, 305, 307, 309, 310, 314, 343, 345, 348, 349, 350, 359, + 364, 368, 369, 370, 371, 380, 381, 386, 403, 404, 406, 423, 425, 438, 445, 448, 450, 460, 461, + 465, 470, 491, 505, 519, 527, 554, 566, 567, 586, 592, 603, 605, 611, 622, 623, 630, 644, 787, + 828, 877, 930, 985, 1023, 1067, 1081, 1119, 1142, 1185, 1203, 1216, 1235, 1264, 1320, 1367, 1745, 1766, 1786, + 1812, 728, 1669, 1787, 1788, 1789, 1812, 1669, 1788, 1812, 1271, 1669, 1787, 1788, 1789, 1812, 791, 1423, 1439, + 1562, 1790, 1812, 1790, 1791, 1812, 1780, 1782, 1783, 1792, 1793, 1812, 239, 240, 548, 1769, 1786, 1793, 1812, + 1794, 1795, 1812, 853, 1076, 1177, 1342, 1795, 1812, 1101, 1796, 1797, 1812, 1094, 1796, 1797, 1812, 188, 189, + 523, 1786, 1798, 1799, 1812, 188, 189, 523, 1786, 1798, 1799, 1812, 1314, 1800, 1801, 1802, 1812, 68, 990, + 1314, 1334, 1800, 1801, 1802, 1812, 1314, 1800, 1801, 1802, 1812, 803, 846, 1134, 1803, 1804, 1812, 418, 424, + 655, 1345, 1786, 1804, 1812, 1314, 1805, 1806, 1807, 1812, 1314, 1334, 1805, 1806, 1807, 1812, 1314, 1805, 1806, + 1807, 1812, 429, 616, 937, 1808, 1809, 1810, 1812, 616, 959, 1597, 1808, 1809, 1810, 1812, 773, 937, 1808, + 1809, 1810, 1812, 1812, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, + 493, 494, 495, 497, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, + 514, 515, 516, 517, 518, 519, 520, 522, 523, 524, 525, 526, 527, 528, 529, 531, 532, 533, 534, + 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, + 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, + 574, 575, 576, 577, 578, 579, 581, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 595, + 596, 597, 598, 599, 600, 601, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, + 616, 617, 618, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 635, 636, + 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, + 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, + 675, 676, 677, 678, 679, 680, 681, 682, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, + 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, + 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, + 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, + 753, 754, 755, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, + 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, + 792, 794, 795, 796, 797, 798, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, + 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, + 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, + 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, + 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 886, 887, 888, 889, + 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, + 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, + 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, + 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, + 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, + 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, + 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, + 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, + 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1058, 1059, 1060, 1061, + 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, + 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, + 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, + 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, + 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, + 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, + 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, + 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, + 1215, 1216, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, + 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, + 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, + 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, + 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, + 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1329, + 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, + 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1358, 1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, + 1369, 1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1386, 1387, + 1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, 1405, 1406, + 1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424, 1425, + 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, + 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, + 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, 1482, + 1483, 1484, 1485, 1486, 1487, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, + 1502, 1503, 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1516, 1517, 1518, 1519, 1520, + 1521, 1522, 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, 1532, 1533, 1534, 1535, 1536, 1537, 1538, 1539, 1540, + 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1553, 1554, 1555, 1556, 1557, 1558, 1559, + 1560, 1561, 1562, 1563, 1564, 1565, 1566, 1567, 1568, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, + 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, 1595, 1596, 1598, + 1599, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615, 1616, 1617, + 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, + 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647, 1648, 1649, 1650, 1651, 1652, 1653, 1654, 1655, + 1656, 1657, 1658, 1659, 1660, 1661, 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, 1671, 1672, 1673, 1674, + 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1689, 1690, 1691, 1692, 1693, 1694, + 1695, 1696, 1697, 1698, 1699, 1700, 1701, 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, 1710, 1711, 1712, 1713, + 1714, 1715, 1716, 1717, 1718, 1719, 1720, 1721, 1722, 1723, 1724, 1725, 1726, 1728, 1729, 1730, 1731, 1732, 1733, + 1734, 1735, 1736, 1737, 1738, 1739, 1740, 1741, 1742, 1743, 1744, 1745, 1746, 1747, 1748, 1749, 1750, 1751, 1752, + 1753, 1754, 1755, 1756, 1757, 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, 1766, 1767, 1768, 1769, 1770, 1771, + 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1782, 1783, 1784, 1785, 1786, 1787, 1788, 1789, 1790, + 1791, 1792, 1793, 1794, 1795, 1796, 1797, 1798, 1799, 1800, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1808, 1809, + 1810, 1811, 1812}; } // namespace MatrixIssue402 #endif // MATRIX_ISSUE_402 diff --git a/packages/kokkos-kernels/test_common/KokkosKernels_MatrixConverter.cpp b/packages/kokkos-kernels/test_common/KokkosKernels_MatrixConverter.cpp index ece082143cec..ad69fd3aeb51 100644 --- a/packages/kokkos-kernels/test_common/KokkosKernels_MatrixConverter.cpp +++ b/packages/kokkos-kernels/test_common/KokkosKernels_MatrixConverter.cpp @@ -33,8 +33,7 @@ int main(int argc, char *argv[]) { for (int i = 1; i < argc; ++i) { if (0 == Test::string_compare_no_case(argv[i], "--symmetrize")) { symmetrize = true; - } else if (0 == - Test::string_compare_no_case(argv[i], "--remove_diagonal")) { + } else if (0 == Test::string_compare_no_case(argv[i], "--remove_diagonal")) { remove_diagonal = true; } else if (0 == Test::string_compare_no_case(argv[i], "--transpose")) { transpose = true; @@ -76,9 +75,7 @@ int main(int argc, char *argv[]) { { typedef Kokkos::DefaultHostExecutionSpace MyExecSpace; - typedef - typename KokkosSparse::CrsMatrix - crstmat_t; + typedef typename KokkosSparse::CrsMatrix crstmat_t; typedef typename crstmat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type::non_const_type row_map_view_t; typedef typename graph_t::entries_type::non_const_type cols_view_t; @@ -88,8 +85,7 @@ int main(int argc, char *argv[]) { typedef typename graph_t::entries_type::const_type c_cols_view_t; typedef typename crstmat_t::values_type::const_type c_values_view_t; - crstmat_t a_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix(in_mtx); + crstmat_t a_crsmat = KokkosKernels::Impl::read_kokkos_crst_matrix(in_mtx); c_row_map_view_t orm = a_crsmat.graph.row_map; c_cols_view_t oentries = a_crsmat.graph.entries; @@ -150,8 +146,7 @@ int main(int argc, char *argv[]) { } } if (obegin != nrm[i + 1]) { - std::cout << "i:" << i << " nrm[i+1]:" << nrm[i + 1] - << " obegin:" << obegin << std::endl; + std::cout << "i:" << i << " nrm[i+1]:" << nrm[i + 1] << " obegin:" << obegin << std::endl; exit(1); } } @@ -171,8 +166,7 @@ int main(int argc, char *argv[]) { } graph_t transpose_graph(new_entries, new_rowmap); - crstmat_t transpose_matrix("transpose", numrows, new_values, - transpose_graph); + crstmat_t transpose_matrix("transpose", numrows, new_values, transpose_graph); a_crsmat = transpose_matrix; orm = a_crsmat.graph.row_map; @@ -192,18 +186,16 @@ int main(int argc, char *argv[]) { row_map_view_t new_rowmap; cols_view_t new_entries; - KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap< - c_row_map_view_t, c_cols_view_t, row_map_view_t, cols_view_t, - MyExecSpace>(numrows, orm, oentries, new_rowmap, new_entries); + KokkosKernels::Impl::symmetrize_graph_symbolic_hashmap(numrows, orm, oentries, + new_rowmap, new_entries); values_view_t new_values("new_values", new_entries.extent(0)); - KokkosKernels::sort_crs_matrix(new_rowmap, new_entries, - new_values); + KokkosKernels::sort_crs_matrix(new_rowmap, new_entries, + new_values); graph_t symmetric_graph(new_entries, new_rowmap); - crstmat_t symmetric_marix("transpose", numrows, new_values, - symmetric_graph); + crstmat_t symmetric_marix("transpose", numrows, new_values, symmetric_graph); a_crsmat = symmetric_marix; orm = a_crsmat.graph.row_map; @@ -223,19 +215,16 @@ int main(int argc, char *argv[]) { cols_view_t new_entries("new_rowmap", a_crsmat.nnz()); values_view_t new_values("new_rowmap", a_crsmat.nnz()); - KokkosSparse::Impl::transpose_matrix< - c_row_map_view_t, c_cols_view_t, c_values_view_t, row_map_view_t, - cols_view_t, values_view_t, row_map_view_t, MyExecSpace>( - a_crsmat.numRows(), a_crsmat.numCols(), a_crsmat.graph.row_map, - a_crsmat.graph.entries, a_crsmat.values, new_rowmap, new_entries, - new_values); + KokkosSparse::Impl::transpose_matrix( + a_crsmat.numRows(), a_crsmat.numCols(), a_crsmat.graph.row_map, a_crsmat.graph.entries, a_crsmat.values, + new_rowmap, new_entries, new_values); std::cout << 1 << std::endl; std::cout << 2 << std::endl; - KokkosKernels::sort_crs_matrix(new_rowmap, new_entries, - new_values); + KokkosKernels::sort_crs_matrix(new_rowmap, new_entries, + new_values); std::cout << 3 << std::endl; MyExecSpace().fence(); @@ -243,8 +232,7 @@ int main(int argc, char *argv[]) { KokkosKernels::Impl::kk_print_1Dview(new_values); graph_t transpose_graph(new_entries, new_rowmap); - crstmat_t transpose_matrix("transpose", a_crsmat.numRows(), new_values, - transpose_graph); + crstmat_t transpose_matrix("transpose", a_crsmat.numRows(), new_values, transpose_graph); a_crsmat = transpose_matrix; orm = a_crsmat.graph.row_map; diff --git a/packages/kokkos-kernels/test_common/KokkosKernels_MyCRSMatrix.hpp b/packages/kokkos-kernels/test_common/KokkosKernels_MyCRSMatrix.hpp index 3a70aa6ef6f8..1ac36e1716af 100644 --- a/packages/kokkos-kernels/test_common/KokkosKernels_MyCRSMatrix.hpp +++ b/packages/kokkos-kernels/test_common/KokkosKernels_MyCRSMatrix.hpp @@ -17,8 +17,7 @@ #include "KokkosKernels_Utils.hpp" namespace MyKokkosSparse { -template +template class StaticCrsGraph { public: typedef OrdinalType data_type; @@ -37,15 +36,12 @@ class StaticCrsGraph { StaticCrsGraph() : entries(), row_map(), num_cols() {} //! Copy constructor (shallow copy). - StaticCrsGraph(const StaticCrsGraph& rhs) - : entries(rhs.entries), row_map(rhs.row_map), num_cols(rhs.num_cols) {} + StaticCrsGraph(const StaticCrsGraph& rhs) : entries(rhs.entries), row_map(rhs.row_map), num_cols(rhs.num_cols) {} template - StaticCrsGraph(const EntriesType& entries_, const RowMapType& row_map_) - : entries(entries_), row_map(row_map_) {} + StaticCrsGraph(const EntriesType& entries_, const RowMapType& row_map_) : entries(entries_), row_map(row_map_) {} template - StaticCrsGraph(const EntriesType& entries_, const RowMapType& row_map_, - OrdinalType numCols_) + StaticCrsGraph(const EntriesType& entries_, const RowMapType& row_map_, OrdinalType numCols_) : entries(entries_), row_map(row_map_), num_cols(numCols_) {} /** \brief Assign to a view of the rhs array. * If the old view is the last view @@ -66,19 +62,14 @@ class StaticCrsGraph { ~StaticCrsGraph() {} KOKKOS_INLINE_FUNCTION data_type numRows() const { - return (row_map.extent(0) != 0) - ? row_map.extent(0) - static_cast(1) - : static_cast(0); + return (row_map.extent(0) != 0) ? row_map.extent(0) - static_cast(1) : static_cast(0); } }; -template +template class CrsMatrix { public: - typedef - typename Kokkos::ViewTraits::host_mirror_space host_mirror_space; + typedef typename Kokkos::ViewTraits::host_mirror_space host_mirror_space; typedef typename Device::execution_space execution_space; typedef typename Device::memory_space memory_space; @@ -88,49 +79,38 @@ class CrsMatrix { typedef MemoryTraits memory_traits; typedef SizeType size_type; - typedef StaticCrsGraph - StaticCrsGraphType; + typedef StaticCrsGraph StaticCrsGraphType; typedef typename StaticCrsGraphType::entries_type index_type; typedef typename index_type::non_const_value_type const_ordinal_type; typedef typename index_type::non_const_value_type non_const_ordinal_type; typedef typename StaticCrsGraphType::row_map_type row_map_type; - typedef Kokkos::View - values_type; - typedef CrsMatrix - HostMirror; + typedef Kokkos::View values_type; + typedef CrsMatrix HostMirror; StaticCrsGraphType graph; values_type values; CrsMatrix() : numCols_(0) {} - CrsMatrix(const std::string& label, const OrdinalType& ncols, - const values_type& vals, const StaticCrsGraphType& graph_) + CrsMatrix(const std::string& label, const OrdinalType& ncols, const values_type& vals, + const StaticCrsGraphType& graph_) : graph(graph_), values(vals), numCols_(ncols) {} //! The number of rows in the sparse matrix. - KOKKOS_INLINE_FUNCTION ordinal_type numRows() const { - return graph.numRows(); - } + KOKKOS_INLINE_FUNCTION ordinal_type numRows() const { return graph.numRows(); } //! The number of columns in the sparse matrix. KOKKOS_INLINE_FUNCTION ordinal_type numCols() const { return numCols_; } //! The number of stored entries in the sparse matrix. - KOKKOS_INLINE_FUNCTION size_type nnz() const { - return graph.entries.extent(0); - } + KOKKOS_INLINE_FUNCTION size_type nnz() const { return graph.entries.extent(0); } ordinal_type numCols_; }; template -crsMat_t get_crsmat( - typename crsMat_t::row_map_type::non_const_type::value_type* xadj, - typename crsMat_t::index_type::non_const_type::value_type* adj, - typename crsMat_t::values_type::non_const_type::value_type* ew, - typename crsMat_t::row_map_type::non_const_type::value_type ne, - typename crsMat_t::index_type::non_const_type::value_type nv, - int is_one_based) { +crsMat_t get_crsmat(typename crsMat_t::row_map_type::non_const_type::value_type* xadj, + typename crsMat_t::index_type::non_const_type::value_type* adj, + typename crsMat_t::values_type::non_const_type::value_type* ew, + typename crsMat_t::row_map_type::non_const_type::value_type ne, + typename crsMat_t::index_type::non_const_type::value_type nv, int is_one_based) { typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename crsMat_t::row_map_type::non_const_type row_map_view_t; typedef typename crsMat_t::index_type::non_const_type cols_view_t; @@ -144,27 +124,21 @@ crsMat_t get_crsmat( cols_view_t columns_view("colsmap_view", ne); values_view_t values_view("values_view", ne); - KokkosKernels::Impl::copy_vector( - ne, ew, values_view); - KokkosKernels::Impl::copy_vector( - ne, adj, columns_view); - KokkosKernels::Impl::copy_vector( - nv + 1, xadj, rowmap_view); + KokkosKernels::Impl::copy_vector(ne, ew, values_view); + KokkosKernels::Impl::copy_vector(ne, adj, columns_view); + KokkosKernels::Impl::copy_vector(nv + 1, xadj, rowmap_view); size_type ncols = 0; - KokkosKernels::Impl::view_reduce_max( - ne, columns_view, ncols); + KokkosKernels::Impl::view_reduce_max(ne, columns_view, ncols); ncols += 1; if (is_one_based) { // if algorithm is mkl_csrmultcsr convert to 1 base so that we dont // dublicate the memory at the experiments/ - KokkosKernels::Impl::kk_a_times_x_plus_b( - nv + 1, rowmap_view, rowmap_view, 1, 1); - KokkosKernels::Impl::kk_a_times_x_plus_b(ne, columns_view, - columns_view, 1, 1); + KokkosKernels::Impl::kk_a_times_x_plus_b(nv + 1, rowmap_view, + rowmap_view, 1, 1); + KokkosKernels::Impl::kk_a_times_x_plus_b(ne, columns_view, + columns_view, 1, 1); } graph_t static_graph(columns_view, rowmap_view); diff --git a/packages/kokkos-kernels/test_common/KokkosKernels_TestUtils.hpp b/packages/kokkos-kernels/test_common/KokkosKernels_TestUtils.hpp index 232b66242a27..24c07925e547 100644 --- a/packages/kokkos-kernels/test_common/KokkosKernels_TestUtils.hpp +++ b/packages/kokkos-kernels/test_common/KokkosKernels_TestUtils.hpp @@ -27,20 +27,16 @@ #include "../tpls/gtest/gtest/gtest.h" //for EXPECT_** // Simplify ETI macros -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) #define KOKKOSKERNELS_TEST_ALL_TYPES #endif -#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - defined(KOKKOSKERNELS_TEST_ALL_TYPES) +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || defined(KOKKOSKERNELS_TEST_ALL_TYPES) #define KOKKOSKERNELS_TEST_LAYOUTLEFT #endif -#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - defined(KOKKOSKERNELS_TEST_ALL_TYPES) +#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || defined(KOKKOSKERNELS_TEST_ALL_TYPES) #define KOKKOSKERNELS_TEST_LAYOUTRIGHT #endif -#if defined(KOKKOSKERNELS_INST_LAYOUTSTRIDE) || \ - defined(KOKKOSKERNELS_TEST_ALL_TYPES) +#if defined(KOKKOSKERNELS_INST_LAYOUTSTRIDE) || defined(KOKKOSKERNELS_TEST_ALL_TYPES) #define KOKKOSKERNELS_TEST_LAYOUTSTRIDE #endif #if defined(KOKKOSKERNELS_INST_FLOAT) || defined(KOKKOSKERNELS_TEST_ALL_TYPES) @@ -52,12 +48,10 @@ #if defined(KOKKOSKERNELS_INST_INT) || defined(KOKKOSKERNELS_TEST_ALL_TYPES) #define KOKKOSKERNELS_TEST_INT #endif -#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ - defined(KOKKOSKERNELS_TEST_ALL_TYPES) +#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || defined(KOKKOSKERNELS_TEST_ALL_TYPES) #define KOKKOSKERNELS_TEST_COMPLEX_FLOAT #endif -#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - defined(KOKKOSKERNELS_TEST_ALL_TYPES) +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || defined(KOKKOSKERNELS_TEST_ALL_TYPES) #define KOKKOSKERNELS_TEST_COMPLEX_DOUBLE #endif @@ -86,13 +80,10 @@ namespace Test { // - 'false', then this utility class will use Kokkos::create_mirror() template struct view_stride_adapter { - static_assert(Kokkos::is_view_v, - "view_stride_adapter: ViewType must be a Kokkos::View"); - static_assert(ViewType::rank >= 1 && ViewType::rank <= 2, - "view_stride_adapter: ViewType must be rank 1 or rank 2"); + static_assert(Kokkos::is_view_v, "view_stride_adapter: ViewType must be a Kokkos::View"); + static_assert(ViewType::rank >= 1 && ViewType::rank <= 2, "view_stride_adapter: ViewType must be rank 1 or rank 2"); - static constexpr bool strided = std::is_same::value; + static constexpr bool strided = std::is_same::value; static constexpr int rank = ViewType::rank; using DView = ViewType; @@ -101,40 +92,31 @@ struct view_stride_adapter { // But if strided, the base views have one additional dimension, so that // d_view/h_view have stride > 1 between consecutive elements. using DViewBase = std::conditional_t< - strided, - Kokkos::View, - DView>; + strided, Kokkos::View, DView>; using HViewBase = typename DViewBase::HostMirror; view_stride_adapter(const std::string& label, int m, int n = 1) { if constexpr (rank == 1) { if constexpr (strided) { d_base = DViewBase(label, m, 2); - h_base = createMirrorView ? Kokkos::create_mirror_view(d_base) - : Kokkos::create_mirror(d_base); + h_base = createMirrorView ? Kokkos::create_mirror_view(d_base) : Kokkos::create_mirror(d_base); d_view = Kokkos::subview(d_base, Kokkos::ALL(), 0); h_view = Kokkos::subview(h_base, Kokkos::ALL(), 0); } else { d_base = DViewBase(label, m); - h_base = createMirrorView ? Kokkos::create_mirror_view(d_base) - : Kokkos::create_mirror(d_base); + h_base = createMirrorView ? Kokkos::create_mirror_view(d_base) : Kokkos::create_mirror(d_base); d_view = d_base; h_view = h_base; } } else { if constexpr (strided) { d_base = DViewBase(label, m, n, 2); - h_base = createMirrorView ? Kokkos::create_mirror_view(d_base) - : Kokkos::create_mirror(d_base); - d_view = - Kokkos::subview(d_base, Kokkos::ALL(), Kokkos::make_pair(0, n), 0); - h_view = - Kokkos::subview(h_base, Kokkos::ALL(), Kokkos::make_pair(0, n), 0); + h_base = createMirrorView ? Kokkos::create_mirror_view(d_base) : Kokkos::create_mirror(d_base); + d_view = Kokkos::subview(d_base, Kokkos::ALL(), Kokkos::make_pair(0, n), 0); + h_view = Kokkos::subview(h_base, Kokkos::ALL(), Kokkos::make_pair(0, n), 0); } else { d_base = DViewBase(label, m, n); - h_base = createMirrorView ? Kokkos::create_mirror_view(d_base) - : Kokkos::create_mirror(d_base); + h_base = createMirrorView ? Kokkos::create_mirror_view(d_base) : Kokkos::create_mirror(d_base); d_view = d_base; h_view = h_base; } @@ -152,16 +134,14 @@ struct view_stride_adapter { }; template -void EXPECT_NEAR_KK(Scalar1 val1, Scalar2 val2, Scalar3 tol, - std::string msg = "") { +void EXPECT_NEAR_KK(Scalar1 val1, Scalar2 val2, Scalar3 tol, std::string msg = "") { typedef Kokkos::ArithTraits AT1; typedef Kokkos::ArithTraits AT3; EXPECT_LE((double)AT1::abs(val1 - val2), (double)AT3::abs(tol)) << msg; } template -void EXPECT_NEAR_KK_REL(Scalar1 val1, Scalar2 val2, Scalar3 tol, - std::string msg = "") { +void EXPECT_NEAR_KK_REL(Scalar1 val1, Scalar2 val2, Scalar3 tol, std::string msg = "") { typedef typename std::remove_reference::type hv1_type; typedef typename std::remove_reference::type hv2_type; const auto ahv1 = Kokkos::ArithTraits::abs(val1); @@ -171,10 +151,9 @@ void EXPECT_NEAR_KK_REL(Scalar1 val1, Scalar2 val2, Scalar3 tol, // Special overload for accurate value by value SIMD vectors comparison template -void EXPECT_NEAR_KK_REL( - const KokkosBatched::Vector, VecLen>& val1, - const KokkosBatched::Vector, VecLen>& val2, - Tolerance tol, std::string msg = "") { +void EXPECT_NEAR_KK_REL(const KokkosBatched::Vector, VecLen>& val1, + const KokkosBatched::Vector, VecLen>& val2, Tolerance tol, + std::string msg = "") { for (int i = 0; i < VecLen; ++i) { EXPECT_NEAR_KK_REL(val1[i], val2[i], tol, msg); } @@ -219,9 +198,7 @@ void EXPECT_NEAR_KK_REL_1DVIEW(ViewType1 v1, ViewType2 v2, Scalar tol) { /// filename where the failure originated \param func The function where the /// failure originated \param line The line number where the failure originated /// \return a new string containing: " > from file:func:line\n > " -static inline const std::string kk_failure_str(std::string file, - std::string func, - const int line) { +static inline const std::string kk_failure_str(std::string file, std::string func, const int line) { std::string failure_msg = " > from "; failure_msg += (file + ":" + func + ":" + std::to_string(line) + "\n > "); return std::string(failure_msg); @@ -235,8 +212,7 @@ using halfScalarType = Kokkos::Experimental::half_t; using bhalfScalarType = Kokkos::Experimental::bhalf_t; #endif // KOKKOS_BHALF_T_IS_FLOAT -template +template struct SharedVanillaGEMM { bool A_t, B_t, A_c, B_c; int C_rows, C_cols, A_cols; @@ -247,52 +223,43 @@ struct SharedVanillaGEMM { typedef typename ViewTypeA::value_type ScalarA; typedef typename ViewTypeB::value_type ScalarB; typedef typename ViewTypeC::value_type ScalarC; - typedef Kokkos::View - SubviewTypeA; - typedef Kokkos::View - SubviewTypeB; + typedef Kokkos::View SubviewTypeA; + typedef Kokkos::View SubviewTypeB; typedef Kokkos::ArithTraits APT; typedef typename APT::mag_type mag_type; ScalarA alpha; ScalarC beta; KOKKOS_INLINE_FUNCTION - void operator()( - const typename Kokkos::TeamPolicy::member_type& team) - const { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, C_rows), [&](const int& i) { - // Give each kokkos thread a vector of A - SubviewTypeA a_vec; - if (A_t) - a_vec = Kokkos::subview(A, Kokkos::ALL(), i); - else - a_vec = Kokkos::subview(A, i, Kokkos::ALL()); - - // Have all vector lanes perform the dot product - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(team, C_cols), [&](const int& j) { - SubviewTypeB b_vec; - if (B_t) - b_vec = Kokkos::subview(B, j, Kokkos::ALL()); - else - b_vec = Kokkos::subview(B, Kokkos::ALL(), j); - ScalarC ab = ScalarC(0); - for (int k = 0; k < A_cols; k++) { - auto a = A_c ? APT::conj(a_vec(k)) : a_vec(k); - auto b = B_c ? APT::conj(b_vec(k)) : b_vec(k); - ab += a * b; - } - C(i, j) = beta * C(i, j) + alpha * ab; - }); - }); + void operator()(const typename Kokkos::TeamPolicy::member_type& team) const { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, C_rows), [&](const int& i) { + // Give each kokkos thread a vector of A + SubviewTypeA a_vec; + if (A_t) + a_vec = Kokkos::subview(A, Kokkos::ALL(), i); + else + a_vec = Kokkos::subview(A, i, Kokkos::ALL()); + + // Have all vector lanes perform the dot product + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, C_cols), [&](const int& j) { + SubviewTypeB b_vec; + if (B_t) + b_vec = Kokkos::subview(B, j, Kokkos::ALL()); + else + b_vec = Kokkos::subview(B, Kokkos::ALL(), j); + ScalarC ab = ScalarC(0); + for (int k = 0; k < A_cols; k++) { + auto a = A_c ? APT::conj(a_vec(k)) : a_vec(k); + auto b = B_c ? APT::conj(b_vec(k)) : b_vec(k); + ab += a * b; + } + C(i, j) = beta * C(i, j) + alpha * ab; + }); + }); } }; // C(i,:,:) = alpha * (A(i,:,:) * B(i,:,:)) + beta * C(i,:,:) -template +template struct Functor_BatchedVanillaGEMM { bool A_t, B_t, A_c, B_c, batch_size_last_dim = false; ViewTypeA A; @@ -302,20 +269,15 @@ struct Functor_BatchedVanillaGEMM { using ScalarA = typename ViewTypeA::value_type; using ScalarB = typename ViewTypeB::value_type; using ScalarC = typename ViewTypeC::value_type; - using SubviewTypeA = typename Kokkos::View; - using SubviewTypeB = typename Kokkos::View; - using SubviewTypeC = typename Kokkos::View; + using SubviewTypeA = typename Kokkos::View; + using SubviewTypeB = typename Kokkos::View; + using SubviewTypeC = typename Kokkos::View; ScalarA alpha; ScalarC beta; KOKKOS_INLINE_FUNCTION - void operator()( - const typename Kokkos::TeamPolicy::member_type& team) - const { + void operator()(const typename Kokkos::TeamPolicy::member_type& team) const { int i = team.league_rank(); SubviewTypeA _A; SubviewTypeB _B; @@ -330,41 +292,35 @@ struct Functor_BatchedVanillaGEMM { _B = Kokkos::subview(B, i, Kokkos::ALL(), Kokkos::ALL()); _C = Kokkos::subview(C, i, Kokkos::ALL(), Kokkos::ALL()); } - struct SharedVanillaGEMM - vgemm; + struct SharedVanillaGEMM vgemm; vgemm.A_t = A_t; vgemm.B_t = B_t; vgemm.A_c = A_c; vgemm.B_c = B_c; vgemm.C_rows = batch_size_last_dim ? C.extent(0) : C.extent(1); vgemm.C_cols = batch_size_last_dim ? C.extent(1) : C.extent(2); - vgemm.A_cols = batch_size_last_dim ? (A_t ? A.extent(0) : A.extent(1)) - : (A_t ? A.extent(1) : A.extent(2)); - vgemm.A = _A; - vgemm.B = _B; - vgemm.C = _C; - vgemm.alpha = alpha; - vgemm.beta = beta; + vgemm.A_cols = batch_size_last_dim ? (A_t ? A.extent(0) : A.extent(1)) : (A_t ? A.extent(1) : A.extent(2)); + vgemm.A = _A; + vgemm.B = _B; + vgemm.C = _C; + vgemm.alpha = alpha; + vgemm.beta = beta; vgemm(team); } inline void run() { Kokkos::parallel_for( "Test::VanillaGEMM", - Kokkos::TeamPolicy( - batch_size_last_dim ? C.extent(2) : C.extent(0), Kokkos::AUTO, - KokkosKernels::Impl::kk_get_max_vector_size()), + Kokkos::TeamPolicy(batch_size_last_dim ? C.extent(2) : C.extent(0), Kokkos::AUTO, + KokkosKernels::Impl::kk_get_max_vector_size()), *this); } }; // Compute C := alpha * AB + beta * C template -void vanillaGEMM(typename ViewTypeC::non_const_value_type alpha, - const ViewTypeA& A, const ViewTypeB& B, - typename ViewTypeC::non_const_value_type beta, - const ViewTypeC& C) { +void vanillaGEMM(typename ViewTypeC::non_const_value_type alpha, const ViewTypeA& A, const ViewTypeB& B, + typename ViewTypeC::non_const_value_type beta, const ViewTypeC& C) { using value_type = typename ViewTypeC::non_const_value_type; using KAT = Kokkos::ArithTraits; int m = A.extent(0); @@ -381,13 +337,11 @@ void vanillaGEMM(typename ViewTypeC::non_const_value_type alpha, } } -template -KOKKOS_INLINE_FUNCTION void vanillaGEMV(char mode, AlphaType alpha, - const ViewTypeA& A, const ViewTypeX& x, +template +KOKKOS_INLINE_FUNCTION void vanillaGEMV(char mode, AlphaType alpha, const ViewTypeA& A, const ViewTypeX& x, BetaType beta, const ViewTypeY& y) { - using ScalarY = typename ViewTypeY::non_const_value_type; - using KAT_A = Kokkos::ArithTraits; + using ScalarY = typename ViewTypeY::non_const_value_type; + using KAT_A = Kokkos::ArithTraits; const bool transposed = mode == 'T' || mode == 'C'; const bool conjugated = mode == 'C'; const bool has_beta = beta != Kokkos::ArithTraits::zero(); @@ -411,42 +365,18 @@ class epsilon { constexpr static double value = std::numeric_limits::epsilon(); }; -#if KOKKOS_VERSION < 40199 -// explicit epsilon specializations -#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT -template <> -class epsilon { - public: - constexpr static double value = 0.0009765625F; -}; -#endif // KOKKOS_HALF_T_IS_FLOAT - -// explicit epsilon specializations -#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT -template <> -class epsilon { - public: - constexpr static double value = 0.0078125F; -}; -#endif // KOKKOS_HALF_T_IS_FLOAT -#endif // KOKKOS_VERSION < 40199 - using KokkosKernels::Impl::getRandomBounds; -template +template crsMat_t symmetrize(crsMat_t A) { typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename crsMat_t::values_type::non_const_type scalar_view_t; typedef typename graph_t::row_map_type::non_const_type lno_view_t; typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t; - auto host_rowmap = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); - auto host_entries = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); - auto host_values = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.values); - lno_t numRows = A.numRows(); + auto host_rowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); + auto host_entries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); + auto host_values = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.values); + lno_t numRows = A.numRows(); // symmetrize as input_mat + input_mat^T, to still have a diagonally dominant // matrix typedef std::map Row; @@ -469,18 +399,15 @@ crsMat_t symmetrize(crsMat_t A) { } } // Count entries - Kokkos::View - new_host_rowmap("Rowmap", numRows + 1); + Kokkos::View new_host_rowmap("Rowmap", numRows + 1); size_t accum = 0; for (lno_t r = 0; r <= numRows; r++) { new_host_rowmap(r) = accum; if (r < numRows) accum += symRows[r].size(); } // Allocate new entries/values - Kokkos::View new_host_entries( - "Entries", accum); - Kokkos::View - new_host_values("Values", accum); + Kokkos::View new_host_entries("Entries", accum); + Kokkos::View new_host_values("Values", accum); for (lno_t r = 0; r < numRows; r++) { auto rowIt = symRows[r].begin(); for (size_type i = new_host_rowmap(r); i < new_host_rowmap(r + 1); i++) { @@ -495,8 +422,7 @@ crsMat_t symmetrize(crsMat_t A) { Kokkos::deep_copy(new_rowmap, new_host_rowmap); Kokkos::deep_copy(new_entries, new_host_entries); Kokkos::deep_copy(new_values, new_host_values); - return crsMat_t("SymA", numRows, numRows, accum, new_values, new_rowmap, - new_entries); + return crsMat_t("SymA", numRows, numRows, accum, new_values, new_rowmap, new_entries); } // create_random_x_vector and create_random_y_vector can be used together to @@ -507,8 +433,7 @@ vec_t create_random_x_vector(vec_t& kok_x, double max_value = 10.0) { auto h_x = Kokkos::create_mirror_view(kok_x); for (size_t j = 0; j < h_x.extent(1); ++j) { for (size_t i = 0; i < h_x.extent(0); ++i) { - scalar_t r = static_cast(rand()) / - static_cast(RAND_MAX / max_value); + scalar_t r = static_cast(rand()) / static_cast(RAND_MAX / max_value); h_x.access(i, j) = r; } } @@ -563,10 +488,8 @@ std::string value_type_name>() { int string_compare_no_case(const char* str1, const char* str2) { std::string str1_s(str1); std::string str2_s(str2); - for (size_t i = 0; i < str1_s.size(); i++) - str1_s[i] = std::tolower(str1_s[i]); - for (size_t i = 0; i < str2_s.size(); i++) - str2_s[i] = std::tolower(str2_s[i]); + for (size_t i = 0; i < str1_s.size(); i++) str1_s[i] = std::tolower(str1_s[i]); + for (size_t i = 0; i < str2_s.size(); i++) str2_s[i] = std::tolower(str2_s[i]); return strcmp(str1_s.c_str(), str2_s.c_str()); } @@ -590,8 +513,7 @@ class RandCooMat { template T __getter_copy_helper(T src) { - T dst(std::string("RandCooMat.") + typeid(T).name() + " copy", - src.extent(0)); + T dst(std::string("RandCooMat.") + typeid(T).name() + " copy", src.extent(0)); Kokkos::deep_copy(dst, src); ExeSpaceType().fence(); return dst; @@ -605,15 +527,11 @@ class RandCooMat { /// \param n_tuples The number of tuples. /// \param min_val The minimum scalar value in the matrix. /// \param max_val The maximum scalar value in the matrix. - RandCooMat(int64_t m, int64_t n, int64_t n_tuples, ScalarType min_val, - ScalarType max_val) { - uint64_t ticks = - std::chrono::high_resolution_clock::now().time_since_epoch().count() % - UINT32_MAX; - - info = std::string(std::string("RandCooMat<") + typeid(ScalarType).name() + - ", " + typeid(LayoutType).name() + ", " + - typeid(ExeSpaceType).name() + std::to_string(n) + + RandCooMat(int64_t m, int64_t n, int64_t n_tuples, ScalarType min_val, ScalarType max_val) { + uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count() % UINT32_MAX; + + info = std::string(std::string("RandCooMat<") + typeid(ScalarType).name() + ", " + typeid(LayoutType).name() + + ", " + typeid(ExeSpaceType).name() + std::to_string(n) + "...): rand seed: " + std::to_string(ticks) + "\n"); Kokkos::Random_XorShift64_Pool random(ticks); @@ -642,10 +560,8 @@ class RandCooMat { /// \tparam ScalarType /// \tparam LayoutType /// \tparam Device -template ::size_type> +template class RandCsMatrix { public: using value_type = ScalarType; @@ -705,19 +621,14 @@ class RandCsMatrix { // Copy to device Kokkos::deep_copy(__map_d, __map); - IdViewTypeD tight_ids(Kokkos::view_alloc(Kokkos::WithoutInitializing, - "RandCsMatrix.IdViewTypeD"), - __nnz); - Kokkos::deep_copy( - tight_ids, - Kokkos::subview(__ids, Kokkos::make_pair(0, static_cast(__nnz)))); + IdViewTypeD tight_ids(Kokkos::view_alloc(Kokkos::WithoutInitializing, "RandCsMatrix.IdViewTypeD"), __nnz); + Kokkos::deep_copy(tight_ids, Kokkos::subview(__ids, Kokkos::make_pair(0, static_cast(__nnz)))); __ids_d = tight_ids; } template T __getter_copy_helper(T src) { - T dst(std::string("RandCsMatrix.") + typeid(T).name() + " copy", - src.extent(0)); + T dst(std::string("RandCsMatrix.") + typeid(T).name() + " copy", src.extent(0)); Kokkos::deep_copy(dst, src); return dst; } @@ -729,27 +640,22 @@ class RandCsMatrix { /// \param dim2 The second dimension: columns for Crs or rows for Ccs /// \param min_val The minimum scalar value in the matrix. /// \param max_val The maximum scalar value in the matrix. - RandCsMatrix(Ordinal dim1, Ordinal dim2, ScalarType min_val, - ScalarType max_val, bool fully_sparse = false) { + RandCsMatrix(Ordinal dim1, Ordinal dim2, ScalarType min_val, ScalarType max_val, bool fully_sparse = false) { __dim1 = dim1; __dim2 = dim2; __fully_sparse = fully_sparse; __map_d = MapViewTypeD("RandCsMatrix.ColMapViewType", __dim1 + 1); __map = Kokkos::create_mirror_view(__map_d); __ids_d = IdViewTypeD("RandCsMatrix.RowIdViewType", - dim2 * dim1 + 1); // over-allocated + dim2 * dim1 + 1); // over-allocated __ids = Kokkos::create_mirror_view(__ids_d); - uint64_t ticks = - std::chrono::high_resolution_clock::now().time_since_epoch().count() % - UINT32_MAX; + uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count() % UINT32_MAX; - info = std::string( - std::string("RandCsMatrix<") + typeid(ScalarType).name() + ", " + - typeid(LayoutType).name() + ", " + execution_space().name() + ">(" + - std::to_string(dim2) + ", " + std::to_string(dim1) + - "...): rand seed: " + std::to_string(ticks) + - ", fully sparse: " + (__fully_sparse ? "true" : "false") + "\n"); + info = std::string(std::string("RandCsMatrix<") + typeid(ScalarType).name() + ", " + typeid(LayoutType).name() + + ", " + execution_space().name() + ">(" + std::to_string(dim2) + ", " + std::to_string(dim1) + + "...): rand seed: " + std::to_string(ticks) + + ", fully sparse: " + (__fully_sparse ? "true" : "false") + "\n"); Kokkos::Random_XorShift64_Pool random(ticks); __populate_random_cs_mat(ticks); @@ -765,7 +671,7 @@ class RandCsMatrix { // O(c), where c is a constant. ScalarType operator()(Size idx) { return __vals(idx); } - size_t get_nnz() { return size_t(__nnz); } + Size get_nnz() { return __nnz; } // dimension2: This is either columns for a Crs matrix or rows for a Ccs // matrix. Ordinal get_dim2() { return __dim2; } @@ -779,18 +685,13 @@ class RandCsMatrix { /// \brief Randomly shuffle the entries in each row (col) of a Crs (Ccs) or Bsr /// matrix. template -void shuffleMatrixEntries(Rowptrs rowptrs, Entries entries, Values values, - const size_t block_size = 1) { - using size_type = typename Rowptrs::non_const_value_type; - using ordinal_type = typename Entries::value_type; - auto rowptrsHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rowptrs); - auto entriesHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), entries); - auto valuesHost = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), values); - ordinal_type numRows = - rowptrsHost.extent(0) ? (rowptrsHost.extent(0) - 1) : 0; +void shuffleMatrixEntries(Rowptrs rowptrs, Entries entries, Values values, const size_t block_size = 1) { + using size_type = typename Rowptrs::non_const_value_type; + using ordinal_type = typename Entries::value_type; + auto rowptrsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rowptrs); + auto entriesHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), entries); + auto valuesHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), values); + ordinal_type numRows = rowptrsHost.extent(0) ? (rowptrsHost.extent(0) - 1) : 0; const size_t block_items = block_size * block_size; for (ordinal_type i = 0; i < numRows; i++) { size_type rowBegin = rowptrsHost(i); @@ -799,8 +700,7 @@ void shuffleMatrixEntries(Rowptrs rowptrs, Entries entries, Values values, ordinal_type swapRange = rowEnd - j; size_type swapOffset = j + (rand() % swapRange); std::swap(entriesHost(j), entriesHost(swapOffset)); - std::swap_ranges(valuesHost.data() + j * block_items, - valuesHost.data() + (j + 1) * block_items, + std::swap_ranges(valuesHost.data() + j * block_items, valuesHost.data() + (j + 1) * block_items, valuesHost.data() + swapOffset * block_items); } } diff --git a/packages/kokkos-kernels/test_common/KokkosKernels_Test_Structured_Matrix.hpp b/packages/kokkos-kernels/test_common/KokkosKernels_Test_Structured_Matrix.hpp index 65579896d00b..33a72966f297 100644 --- a/packages/kokkos-kernels/test_common/KokkosKernels_Test_Structured_Matrix.hpp +++ b/packages/kokkos-kernels/test_common/KokkosKernels_Test_Structured_Matrix.hpp @@ -49,10 +49,8 @@ struct fill_1D_matrix_functor { cols_view_t columns; scalar_view_t values; - fill_1D_matrix_functor(const ordinal_type numNodes_, const int leftBC_, - const int rightBC_, const row_map_view_t rowmap_, - const cols_view_t columns_, - const scalar_view_t values_) + fill_1D_matrix_functor(const ordinal_type numNodes_, const int leftBC_, const int rightBC_, + const row_map_view_t rowmap_, const cols_view_t columns_, const scalar_view_t values_) : numNodes(numNodes_), leftBC(leftBC_), rightBC(rightBC_), @@ -76,24 +74,19 @@ struct fill_1D_matrix_functor { void compute() { // Fill interior points if (0 < numInterior) { - Kokkos::RangePolicy interiorPolicy( - 0, numInterior); - Kokkos::parallel_for("Fill 1D matrix: interior points", interiorPolicy, - *this); + Kokkos::RangePolicy interiorPolicy(0, numInterior); + Kokkos::parallel_for("Fill 1D matrix: interior points", interiorPolicy, *this); } // Fill exterior points a.k.a. boundary points Kokkos::RangePolicy exteriorPolicy(0, 1); - Kokkos::parallel_for("Fill 1D matrix: exterior points", exteriorPolicy, - *this); + Kokkos::parallel_for("Fill 1D matrix: exterior points", exteriorPolicy, *this); } KOKKOS_INLINE_FUNCTION void operator()(const interiorTag&, const ordinal_type idx) const { - const ordinal_type rowIdx = - idx + 1; // Offset by one since first node has BC - const size_type rowOffset = - size_type(rowIdx) * interiorStencilLength + cornerStencilLength; + const ordinal_type rowIdx = idx + 1; // Offset by one since first node has BC + const size_type rowOffset = size_type(rowIdx) * interiorStencilLength + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; @@ -155,16 +148,14 @@ CrsMatrix_t generate_structured_matrix1D(const mat_structure& structure) { const ordinal_type numInterior = (nx - leftBC - rightBC); const ordinal_type numCorner = leftBC + rightBC; const ordinal_type interiorStencilLength = 3, cornerStencilLength = 2; - const size_type numEntries = - numInterior * interiorStencilLength + numCorner * cornerStencilLength; + const size_type numEntries = numInterior * interiorStencilLength + numCorner * cornerStencilLength; // Create matrix data row_map_view_t rowmap_view("rowmap_view", numNodes + 1); cols_view_t columns_view("colsmap_view", numEntries); scalar_view_t values_view("values_view", numEntries); - fill_1D_matrix_functor fill_matrix( - numNodes, leftBC, rightBC, rowmap_view, columns_view, values_view); + fill_1D_matrix_functor fill_matrix(numNodes, leftBC, rightBC, rowmap_view, columns_view, values_view); fill_matrix.compute(); graph_t static_graph(columns_view, rowmap_view); @@ -219,12 +210,9 @@ struct fill_2D_matrix_functor { ordinal_type numEntriesBottomRow; size_type numEntries; - fill_2D_matrix_functor(const int stencil_type_, const ordinal_type nx_, - const ordinal_type ny_, const int leftBC_, - const int rightBC_, const int bottomBC_, - const int topBC_, const row_map_view_t rowmap_, - const cols_view_t columns_, - const scalar_view_t values_) + fill_2D_matrix_functor(const int stencil_type_, const ordinal_type nx_, const ordinal_type ny_, const int leftBC_, + const int rightBC_, const int bottomBC_, const int topBC_, const row_map_view_t rowmap_, + const cols_view_t columns_, const scalar_view_t values_) : stencil_type(stencil_type_), nx(nx_), ny(ny_), @@ -258,14 +246,11 @@ struct fill_2D_matrix_functor { numYEdge = ny - 2; numCorner = 4; - numEntriesPerGridRow = - (nx - 2) * interiorStencilLength + 2 * edgeStencilLength; + numEntriesPerGridRow = (nx - 2) * interiorStencilLength + 2 * edgeStencilLength; - numEntriesBottomRow = - (nx - 2) * edgeStencilLength + 2 * cornerStencilLength; + numEntriesBottomRow = (nx - 2) * edgeStencilLength + 2 * cornerStencilLength; - numEntries = numInterior * interiorStencilLength + - (2 * numXEdge + 2 * numYEdge) * edgeStencilLength + + numEntries = numInterior * interiorStencilLength + (2 * numXEdge + 2 * numYEdge) * edgeStencilLength + numCorner * cornerStencilLength; } @@ -273,15 +258,11 @@ struct fill_2D_matrix_functor { // Fill interior points if (0 < numInterior) { if (stencil_type == FD) { - Kokkos::RangePolicy policy(0, - numInterior); - Kokkos::parallel_for("Fill 2D FD matrix: interior points", policy, - *this); + Kokkos::RangePolicy policy(0, numInterior); + Kokkos::parallel_for("Fill 2D FD matrix: interior points", policy, *this); } else if (stencil_type == FE) { - Kokkos::RangePolicy policy(0, - numInterior); - Kokkos::parallel_for("Fill 2D FE matrix: interior points", policy, - *this); + Kokkos::RangePolicy policy(0, numInterior); + Kokkos::parallel_for("Fill 2D FE matrix: interior points", policy, *this); } } @@ -329,9 +310,8 @@ struct fill_2D_matrix_functor { const ordinal_type rowIdx = (j + 1) * nx + i + 1; // Compute rowOffset - const size_type rowOffset = - size_type(j) * numEntriesPerGridRow + numEntriesBottomRow + - size_type(i + 1) * interiorStencilLength + edgeStencilLength; + const size_type rowOffset = size_type(j) * numEntriesPerGridRow + numEntriesBottomRow + + size_type(i + 1) * interiorStencilLength + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; @@ -356,8 +336,7 @@ struct fill_2D_matrix_functor { /* Bottom edge */ /***************/ ordinal_type rowIdx = idx + 1; - size_type rowOffset = - size_type(idx + 1) * edgeStencilLength + cornerStencilLength; + size_type rowOffset = size_type(idx + 1) * edgeStencilLength + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; @@ -415,8 +394,7 @@ struct fill_2D_matrix_functor { /* Left edge */ /*************/ ordinal_type rowIdx = (idx + 1) * nx; - size_type rowOffset = size_type(idx) * numEntriesPerGridRow + - numEntriesBottomRow + edgeStencilLength; + size_type rowOffset = size_type(idx) * numEntriesPerGridRow + numEntriesBottomRow + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; @@ -442,8 +420,8 @@ struct fill_2D_matrix_functor { /**************/ /* Right edge */ /**************/ - rowIdx = (idx + 2) * nx - 1; - rowOffset = size_type(idx + 1) * numEntriesPerGridRow + numEntriesBottomRow; + rowIdx = (idx + 2) * nx - 1; + rowOffset = size_type(idx + 1) * numEntriesPerGridRow + numEntriesBottomRow; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -505,9 +483,8 @@ struct fill_2D_matrix_functor { } // Top-left corner - rowIdx = (ny - 1) * nx; - rowOffset = size_type(ny - 2) * numEntriesPerGridRow + numEntriesBottomRow + - cornerStencilLength; + rowIdx = (ny - 1) * nx; + rowOffset = size_type(ny - 2) * numEntriesPerGridRow + numEntriesBottomRow + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; columns(rowOffset - 3) = rowIdx - nx; @@ -552,9 +529,8 @@ struct fill_2D_matrix_functor { const ordinal_type rowIdx = (j + 1) * nx + i + 1; // Compute rowOffset - const size_type rowOffset = - size_type(j) * numEntriesPerGridRow + numEntriesBottomRow + - size_type(i + 1) * interiorStencilLength + edgeStencilLength; + const size_type rowOffset = size_type(j) * numEntriesPerGridRow + numEntriesBottomRow + + size_type(i + 1) * interiorStencilLength + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -586,9 +562,8 @@ struct fill_2D_matrix_functor { /* Bottom edge */ /***************/ ordinal_type rowIdx = idx + 1; - size_type rowOffset = - size_type(idx + 1) * edgeStencilLength + cornerStencilLength; - rowmap(rowIdx + 1) = rowOffset; + size_type rowOffset = size_type(idx + 1) * edgeStencilLength + cornerStencilLength; + rowmap(rowIdx + 1) = rowOffset; // Fill column indices columns(rowOffset - 6) = rowIdx - 1; @@ -655,9 +630,8 @@ struct fill_2D_matrix_functor { /* Left edge */ /*************/ ordinal_type rowIdx = (idx + 1) * nx; - size_type rowOffset = size_type(idx) * numEntriesPerGridRow + - numEntriesBottomRow + edgeStencilLength; - rowmap(rowIdx + 1) = rowOffset; + size_type rowOffset = size_type(idx) * numEntriesPerGridRow + numEntriesBottomRow + edgeStencilLength; + rowmap(rowIdx + 1) = rowOffset; // Fill column indices columns(rowOffset - 6) = rowIdx - nx; @@ -687,8 +661,8 @@ struct fill_2D_matrix_functor { /**************/ /* Right edge */ /**************/ - rowIdx = (idx + 2) * nx - 1; - rowOffset = size_type(idx + 1) * numEntriesPerGridRow + numEntriesBottomRow; + rowIdx = (idx + 2) * nx - 1; + rowOffset = size_type(idx + 1) * numEntriesPerGridRow + numEntriesBottomRow; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -762,9 +736,8 @@ struct fill_2D_matrix_functor { } // Top-left corner - rowIdx = (ny - 1) * nx; - rowOffset = size_type(ny - 2) * numEntriesPerGridRow + numEntriesBottomRow + - cornerStencilLength; + rowIdx = (ny - 1) * nx; + rowOffset = size_type(ny - 2) * numEntriesPerGridRow + numEntriesBottomRow + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; columns(rowOffset - 4) = rowIdx - nx; @@ -807,8 +780,7 @@ struct fill_2D_matrix_functor { }; template -CrsMatrix_t generate_structured_matrix2D(const std::string stencil, - const mat_structure& structure) { +CrsMatrix_t generate_structured_matrix2D(const std::string stencil, const mat_structure& structure) { typedef typename CrsMatrix_t::StaticCrsGraphType graph_t; typedef typename CrsMatrix_t::row_map_type::non_const_type row_map_view_t; typedef typename CrsMatrix_t::index_type::non_const_type cols_view_t; @@ -840,8 +812,7 @@ CrsMatrix_t generate_structured_matrix2D(const std::string stencil, const ordinal_type numInterior = (nx - 2) * (ny - 2); const ordinal_type numEdge = 2 * (nx - 2) + 2 * (ny - 2); const ordinal_type numCorner = 4; - ordinal_type interiorStencilLength = 0, edgeStencilLength = 0, - cornerStencilLength = 0; + ordinal_type interiorStencilLength = 0, edgeStencilLength = 0, cornerStencilLength = 0; if (stencil_type == FD) { interiorStencilLength = 5; @@ -853,18 +824,16 @@ CrsMatrix_t generate_structured_matrix2D(const std::string stencil, cornerStencilLength = 4; } - const size_type numEntries = numInterior * interiorStencilLength + - numEdge * edgeStencilLength + - numCorner * cornerStencilLength; + const size_type numEntries = + numInterior * interiorStencilLength + numEdge * edgeStencilLength + numCorner * cornerStencilLength; // Create matrix data row_map_view_t rowmap_view("rowmap_view", numNodes + 1); cols_view_t columns_view("colsmap_view", numEntries); scalar_view_t values_view("values_view", numEntries); - fill_2D_matrix_functor fill_2D_matrix( - stencil_type, nx, ny, leftBC, rightBC, bottomBC, topBC, rowmap_view, - columns_view, values_view); + fill_2D_matrix_functor fill_2D_matrix(stencil_type, nx, ny, leftBC, rightBC, bottomBC, topBC, + rowmap_view, columns_view, values_view); fill_2D_matrix.compute(); @@ -945,14 +914,10 @@ struct fill_3D_matrix_functor { ordinal_type numEntriesFrontRow; ordinal_type numEntriesBottomFrontRow; - fill_3D_matrix_functor(const int stencil_type_, const ordinal_type nx_, - const ordinal_type ny_, const ordinal_type nz_, - const int leftBC_, const int rightBC_, - const int frontBC_, const int backBC_, - const int bottomBC_, const int topBC_, - const row_map_view_t rowmap_, - const cols_view_t columns_, - const scalar_view_t values_) + fill_3D_matrix_functor(const int stencil_type_, const ordinal_type nx_, const ordinal_type ny_, + const ordinal_type nz_, const int leftBC_, const int rightBC_, const int frontBC_, + const int backBC_, const int bottomBC_, const int topBC_, const row_map_view_t rowmap_, + const cols_view_t columns_, const scalar_view_t values_) : stencil_type(stencil_type_), nx(nx_), ny(ny_), @@ -986,38 +951,28 @@ struct fill_3D_matrix_functor { numYEdge = ny - 2; numZEdge = nz - 2; - numEntries = numInterior * interiorStencilLength + - 2 * (numXFace + numYFace + numZFace) * faceStencilLength + - 4 * (numXEdge + numYEdge + numZEdge) * edgeStencilLength + - 8 * cornerStencilLength; - numEntriesPerGridPlane = - numZFace * interiorStencilLength + 2 * numXEdge * faceStencilLength + - 2 * numYEdge * faceStencilLength + 4 * edgeStencilLength; + numEntries = numInterior * interiorStencilLength + 2 * (numXFace + numYFace + numZFace) * faceStencilLength + + 4 * (numXEdge + numYEdge + numZEdge) * edgeStencilLength + 8 * cornerStencilLength; + numEntriesPerGridPlane = numZFace * interiorStencilLength + 2 * numXEdge * faceStencilLength + + 2 * numYEdge * faceStencilLength + 4 * edgeStencilLength; ; - numEntriesBottomPlane = - numZFace * faceStencilLength + 2 * numXEdge * edgeStencilLength + - 2 * numYEdge * edgeStencilLength + 4 * cornerStencilLength; + numEntriesBottomPlane = numZFace * faceStencilLength + 2 * numXEdge * edgeStencilLength + + 2 * numYEdge * edgeStencilLength + 4 * cornerStencilLength; ; - numEntriesPerGridRow = - numXEdge * interiorStencilLength + 2 * faceStencilLength; - numEntriesFrontRow = numXEdge * faceStencilLength + 2 * edgeStencilLength; - numEntriesBottomFrontRow = - numXEdge * edgeStencilLength + 2 * cornerStencilLength; + numEntriesPerGridRow = numXEdge * interiorStencilLength + 2 * faceStencilLength; + numEntriesFrontRow = numXEdge * faceStencilLength + 2 * edgeStencilLength; + numEntriesBottomFrontRow = numXEdge * edgeStencilLength + 2 * cornerStencilLength; } void compute() { // Fill interior points if (0 < numInterior) { if (stencil_type == FD) { - Kokkos::RangePolicy policy(0, - numInterior); - Kokkos::parallel_for("Fill 3D FD matrix: interior points", policy, - *this); + Kokkos::RangePolicy policy(0, numInterior); + Kokkos::parallel_for("Fill 3D FD matrix: interior points", policy, *this); } else if (stencil_type == FE) { - Kokkos::RangePolicy policy(0, - numInterior); - Kokkos::parallel_for("Fill 3D FE matrix: interior points", policy, - *this); + Kokkos::RangePolicy policy(0, numInterior); + Kokkos::parallel_for("Fill 3D FE matrix: interior points", policy, *this); } } @@ -1106,10 +1061,9 @@ struct fill_3D_matrix_functor { const ordinal_type rowIdx = (k + 1) * ny * nx + (j + 1) * nx + i + 1; // Compute rowOffset - const size_type rowOffset = - size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j) * numEntriesPerGridRow + numEntriesFrontRow + - size_type(i + 1) * interiorStencilLength + faceStencilLength; + const size_type rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + + size_type(j) * numEntriesPerGridRow + numEntriesFrontRow + + size_type(i + 1) * interiorStencilLength + faceStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1143,10 +1097,8 @@ struct fill_3D_matrix_functor { ordinal_type rowIdx = (k + 1) * ny * nx + (j + 1) * nx + i; // Compute rowOffset - size_type rowOffset = size_type(k) * numEntriesPerGridPlane + - numEntriesBottomPlane + - size_type(j) * numEntriesPerGridRow + - numEntriesFrontRow + faceStencilLength; + size_type rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + + size_type(j) * numEntriesPerGridRow + numEntriesFrontRow + faceStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1225,9 +1177,8 @@ struct fill_3D_matrix_functor { ordinal_type rowIdx = (k + 1) * ny * nx + i + 1; // Compute rowOffset - size_type rowOffset = - size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(i + 1) * faceStencilLength + edgeStencilLength; + size_type rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + + size_type(i + 1) * faceStencilLength + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1265,9 +1216,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + (j + 1) * nx + i + 1; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j) * numEntriesPerGridRow + numEntriesFrontRow + - size_type(i + 1) * faceStencilLength + edgeStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(j) * numEntriesPerGridRow + + numEntriesFrontRow + size_type(i + 1) * faceStencilLength + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1307,9 +1257,8 @@ struct fill_3D_matrix_functor { ordinal_type rowIdx = (j + 1) * nx + i + 1; // Compute rowOffset - size_type rowOffset = - size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + - size_type(i + 1) * faceStencilLength + edgeStencilLength; + size_type rowOffset = size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + + size_type(i + 1) * faceStencilLength + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1347,9 +1296,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + (j + 1) * nx + i + 1; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + - size_type(i + 1) * faceStencilLength + edgeStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(j) * numEntriesFrontRow + + numEntriesBottomFrontRow + size_type(i + 1) * faceStencilLength + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1385,9 +1333,8 @@ struct fill_3D_matrix_functor { ordinal_type rowIdx = i + 1; // Compute rowOffset - size_type rowOffset = - size_type(i + 1) * edgeStencilLength + cornerStencilLength; - rowmap(rowIdx + 1) = rowOffset; + size_type rowOffset = size_type(i + 1) * edgeStencilLength + cornerStencilLength; + rowmap(rowIdx + 1) = rowOffset; // Fill column indices columns(rowOffset - 5) = rowIdx - 1; @@ -1417,8 +1364,8 @@ struct fill_3D_matrix_functor { rowIdx = (j + 1) * nx + i + 1; // Compute rowOffset - rowOffset = size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + - size_type(i + 1) * edgeStencilLength + cornerStencilLength; + rowOffset = size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + size_type(i + 1) * edgeStencilLength + + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1449,8 +1396,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + i + 1; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(i + 1) * edgeStencilLength + cornerStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(i + 1) * edgeStencilLength + + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1482,9 +1429,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + (j + 1) * nx + i + 1; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + - size_type(i + 1) * edgeStencilLength + cornerStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(j) * numEntriesFrontRow + + numEntriesBottomFrontRow + size_type(i + 1) * edgeStencilLength + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1517,9 +1463,8 @@ struct fill_3D_matrix_functor { ordinal_type rowIdx = (j + 1) * nx; // Compute rowOffset - size_type rowOffset = size_type(j) * numEntriesFrontRow + - numEntriesBottomFrontRow + edgeStencilLength; - rowmap(rowIdx + 1) = rowOffset; + size_type rowOffset = size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + edgeStencilLength; + rowmap(rowIdx + 1) = rowOffset; // Fill column indices columns(rowOffset - 5) = rowIdx - nx; @@ -1549,8 +1494,7 @@ struct fill_3D_matrix_functor { rowIdx = (j + 1) * nx + i; // Compute rowOffset - rowOffset = - size_type(j + 1) * numEntriesFrontRow + numEntriesBottomFrontRow; + rowOffset = size_type(j + 1) * numEntriesFrontRow + numEntriesBottomFrontRow; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1581,9 +1525,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + (j + 1) * nx; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + - edgeStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(j) * numEntriesFrontRow + + numEntriesBottomFrontRow + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1615,8 +1558,7 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + (j + 1) * nx + i; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j + 1) * numEntriesFrontRow + + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(j + 1) * numEntriesFrontRow + numEntriesBottomFrontRow; rowmap(rowIdx + 1) = rowOffset; @@ -1650,9 +1592,8 @@ struct fill_3D_matrix_functor { ordinal_type rowIdx = (k + 1) * ny * nx; // Compute rowOffset - size_type rowOffset = size_type(k) * numEntriesPerGridPlane + - numEntriesBottomPlane + edgeStencilLength; - rowmap(rowIdx + 1) = rowOffset; + size_type rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + edgeStencilLength; + rowmap(rowIdx + 1) = rowOffset; // Fill column indices columns(rowOffset - 5) = rowIdx - ny * nx; @@ -1682,8 +1623,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + i + 1; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(i) * faceStencilLength + 2 * edgeStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(i) * faceStencilLength + + 2 * edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1714,9 +1655,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + (j + 1) * nx; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j) * numEntriesPerGridRow + numEntriesFrontRow + - edgeStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(j) * numEntriesPerGridRow + + numEntriesFrontRow + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1748,9 +1688,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + (j + 1) * nx + i + 1; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j) * numEntriesPerGridRow + numEntriesFrontRow + - size_type(i) * faceStencilLength + 2 * edgeStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(j) * numEntriesPerGridRow + + numEntriesFrontRow + size_type(i) * faceStencilLength + 2 * edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1825,9 +1764,8 @@ struct fill_3D_matrix_functor { values(rowOffset - 1) = -1.0; } - rowIdx = (ny - 1) * nx; - rowOffset = size_type(ny - 2) * numEntriesFrontRow + - numEntriesBottomFrontRow + cornerStencilLength; + rowIdx = (ny - 1) * nx; + rowOffset = size_type(ny - 2) * numEntriesFrontRow + numEntriesBottomFrontRow + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1872,9 +1810,8 @@ struct fill_3D_matrix_functor { values(rowOffset - 1) = -1.0; } - rowIdx = (nz - 1) * ny * nx; - rowOffset = size_type(nz - 2) * numEntriesPerGridPlane + - numEntriesBottomPlane + cornerStencilLength; + rowIdx = (nz - 1) * ny * nx; + rowOffset = size_type(nz - 2) * numEntriesPerGridPlane + numEntriesBottomPlane + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1896,9 +1833,8 @@ struct fill_3D_matrix_functor { values(rowOffset - 1) = -1.0; } - rowIdx = (nz - 1) * ny * nx + nx - 1; - rowOffset = size_type(nz - 2) * numEntriesPerGridPlane + - numEntriesBottomPlane + numEntriesBottomFrontRow; + rowIdx = (nz - 1) * ny * nx + nx - 1; + rowOffset = size_type(nz - 2) * numEntriesPerGridPlane + numEntriesBottomPlane + numEntriesBottomFrontRow; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -1921,8 +1857,7 @@ struct fill_3D_matrix_functor { } rowIdx = nz * ny * nx - nx; - rowOffset = size_type(nz - 2) * numEntriesPerGridPlane + - numEntriesBottomPlane + (ny - 2) * numEntriesFrontRow + + rowOffset = size_type(nz - 2) * numEntriesPerGridPlane + numEntriesBottomPlane + (ny - 2) * numEntriesFrontRow + numEntriesBottomFrontRow + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; @@ -1979,10 +1914,9 @@ struct fill_3D_matrix_functor { const ordinal_type rowIdx = (k + 1) * ny * nx + (j + 1) * nx + i + 1; // Compute rowOffset - const size_type rowOffset = - size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j) * numEntriesPerGridRow + numEntriesFrontRow + - size_type(i + 1) * interiorStencilLength + faceStencilLength; + const size_type rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + + size_type(j) * numEntriesPerGridRow + numEntriesFrontRow + + size_type(i + 1) * interiorStencilLength + faceStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -2056,10 +1990,8 @@ struct fill_3D_matrix_functor { ordinal_type rowIdx = (k + 1) * ny * nx + (j + 1) * nx + i; // Compute rowOffset - size_type rowOffset = size_type(k) * numEntriesPerGridPlane + - numEntriesBottomPlane + - size_type(j) * numEntriesPerGridRow + - numEntriesFrontRow + faceStencilLength; + size_type rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + + size_type(j) * numEntriesPerGridRow + numEntriesFrontRow + faceStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -2210,9 +2142,8 @@ struct fill_3D_matrix_functor { ordinal_type rowIdx = (k + 1) * ny * nx + i + 1; // Compute rowOffset - size_type rowOffset = - size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(i + 1) * faceStencilLength + edgeStencilLength; + size_type rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + + size_type(i + 1) * faceStencilLength + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -2286,9 +2217,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + (j + 1) * nx + i + 1; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j) * numEntriesPerGridRow + numEntriesFrontRow + - size_type(i + 1) * faceStencilLength + edgeStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(j) * numEntriesPerGridRow + + numEntriesFrontRow + size_type(i + 1) * faceStencilLength + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -2364,9 +2294,8 @@ struct fill_3D_matrix_functor { ordinal_type rowIdx = (j + 1) * nx + i + 1; // Compute rowOffset - size_type rowOffset = - size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + - size_type(i + 1) * faceStencilLength + edgeStencilLength; + size_type rowOffset = size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + + size_type(i + 1) * faceStencilLength + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -2440,9 +2369,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + (j + 1) * nx + i + 1; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + - size_type(i + 1) * faceStencilLength + edgeStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(j) * numEntriesFrontRow + + numEntriesBottomFrontRow + size_type(i + 1) * faceStencilLength + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -2514,9 +2442,8 @@ struct fill_3D_matrix_functor { ordinal_type rowIdx = i + 1; // Compute rowOffset - size_type rowOffset = - size_type(i + 1) * edgeStencilLength + cornerStencilLength; - rowmap(rowIdx + 1) = rowOffset; + size_type rowOffset = size_type(i + 1) * edgeStencilLength + cornerStencilLength; + rowmap(rowIdx + 1) = rowOffset; // Fill column indices columns(rowOffset - 12) = rowIdx - 1; @@ -2567,8 +2494,8 @@ struct fill_3D_matrix_functor { rowIdx = (j + 1) * nx + i + 1; // Compute rowOffset - rowOffset = size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + - size_type(i + 1) * edgeStencilLength + cornerStencilLength; + rowOffset = size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + size_type(i + 1) * edgeStencilLength + + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -2620,8 +2547,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + i + 1; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(i + 1) * edgeStencilLength + cornerStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(i + 1) * edgeStencilLength + + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -2674,9 +2601,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + (j + 1) * nx + i + 1; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + - size_type(i + 1) * edgeStencilLength + cornerStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(j) * numEntriesFrontRow + + numEntriesBottomFrontRow + size_type(i + 1) * edgeStencilLength + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -2730,9 +2656,8 @@ struct fill_3D_matrix_functor { ordinal_type rowIdx = (j + 1) * nx; // Compute rowOffset - size_type rowOffset = size_type(j) * numEntriesFrontRow + - numEntriesBottomFrontRow + edgeStencilLength; - rowmap(rowIdx + 1) = rowOffset; + size_type rowOffset = size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + edgeStencilLength; + rowmap(rowIdx + 1) = rowOffset; // Fill column indices columns(rowOffset - 12) = rowIdx - nx; @@ -2783,8 +2708,7 @@ struct fill_3D_matrix_functor { rowIdx = (j + 1) * nx + i; // Compute rowOffset - rowOffset = - size_type(j + 1) * numEntriesFrontRow + numEntriesBottomFrontRow; + rowOffset = size_type(j + 1) * numEntriesFrontRow + numEntriesBottomFrontRow; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -2836,9 +2760,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + (j + 1) * nx; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j) * numEntriesFrontRow + numEntriesBottomFrontRow + - edgeStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(j) * numEntriesFrontRow + + numEntriesBottomFrontRow + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -2891,8 +2814,7 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + (j + 1) * nx + i; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j + 1) * numEntriesFrontRow + + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(j + 1) * numEntriesFrontRow + numEntriesBottomFrontRow; rowmap(rowIdx + 1) = rowOffset; @@ -2947,9 +2869,8 @@ struct fill_3D_matrix_functor { ordinal_type rowIdx = (k + 1) * ny * nx; // Compute rowOffset - size_type rowOffset = size_type(k) * numEntriesPerGridPlane + - numEntriesBottomPlane + edgeStencilLength; - rowmap(rowIdx + 1) = rowOffset; + size_type rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + edgeStencilLength; + rowmap(rowIdx + 1) = rowOffset; // Fill column indices columns(rowOffset - 12) = rowIdx - nx * ny; @@ -3000,8 +2921,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + i + 1; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(i) * faceStencilLength + 2 * edgeStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(i) * faceStencilLength + + 2 * edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -3053,9 +2974,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + (j + 1) * nx; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j) * numEntriesPerGridRow + numEntriesFrontRow + - edgeStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(j) * numEntriesPerGridRow + + numEntriesFrontRow + edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -3108,9 +3028,8 @@ struct fill_3D_matrix_functor { rowIdx = (k + 1) * ny * nx + (j + 1) * nx + i + 1; // Compute rowOffset - rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + - size_type(j) * numEntriesPerGridRow + numEntriesFrontRow + - size_type(i) * faceStencilLength + 2 * edgeStencilLength; + rowOffset = size_type(k) * numEntriesPerGridPlane + numEntriesBottomPlane + size_type(j) * numEntriesPerGridRow + + numEntriesFrontRow + size_type(i) * faceStencilLength + 2 * edgeStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -3230,9 +3149,8 @@ struct fill_3D_matrix_functor { values(rowOffset - 1) = -1.0; } - rowIdx = (ny - 1) * nx; - rowOffset = size_type(ny - 2) * numEntriesFrontRow + - numEntriesBottomFrontRow + cornerStencilLength; + rowIdx = (ny - 1) * nx; + rowOffset = size_type(ny - 2) * numEntriesFrontRow + numEntriesBottomFrontRow + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -3302,9 +3220,8 @@ struct fill_3D_matrix_functor { } // Top corners - rowIdx = (nz - 1) * ny * nx; - rowOffset = size_type(nz - 2) * numEntriesPerGridPlane + - numEntriesBottomPlane + cornerStencilLength; + rowIdx = (nz - 1) * ny * nx; + rowOffset = size_type(nz - 2) * numEntriesPerGridPlane + numEntriesBottomPlane + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -3338,9 +3255,8 @@ struct fill_3D_matrix_functor { values(rowOffset - 1) = -1.0; } - rowIdx = (nz - 1) * ny * nx + nx - 1; - rowOffset = size_type(nz - 2) * numEntriesPerGridPlane + - numEntriesBottomPlane + numEntriesBottomFrontRow; + rowIdx = (nz - 1) * ny * nx + nx - 1; + rowOffset = size_type(nz - 2) * numEntriesPerGridPlane + numEntriesBottomPlane + numEntriesBottomFrontRow; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -3375,9 +3291,8 @@ struct fill_3D_matrix_functor { } rowIdx = nz * ny * nx - nx; - rowOffset = size_type(nz - 2) * numEntriesPerGridPlane + - numEntriesBottomPlane + size_type(ny - 2) * numEntriesFrontRow + - numEntriesBottomFrontRow + cornerStencilLength; + rowOffset = size_type(nz - 2) * numEntriesPerGridPlane + numEntriesBottomPlane + + size_type(ny - 2) * numEntriesFrontRow + numEntriesBottomFrontRow + cornerStencilLength; rowmap(rowIdx + 1) = rowOffset; // Fill column indices @@ -3449,8 +3364,7 @@ struct fill_3D_matrix_functor { }; template -CrsMatrix_t generate_structured_matrix3D(const std::string stencil, - const mat_structure& structure) { +CrsMatrix_t generate_structured_matrix3D(const std::string stencil, const mat_structure& structure) { typedef typename CrsMatrix_t::StaticCrsGraphType graph_t; typedef typename CrsMatrix_t::row_map_type::non_const_type row_map_view_t; typedef typename CrsMatrix_t::index_type::non_const_type cols_view_t; @@ -3482,29 +3396,18 @@ CrsMatrix_t generate_structured_matrix3D(const std::string stencil, const ordinal_type backBC = structure(1, 2); const ordinal_type bottomBC = structure(2, 1); const ordinal_type topBC = structure(2, 2); - const ordinal_type numInterior = (nx - leftBC - rightBC) * - (ny - frontBC - backBC) * - (nz - bottomBC - topBC); - const ordinal_type numFace = - (leftBC + rightBC) * (ny - frontBC - backBC) * (nz - bottomBC - topBC) + - (frontBC + backBC) * (nx - leftBC - rightBC) * (nz - bottomBC - topBC) + - (bottomBC + topBC) * (nx - leftBC - rightBC) * (ny - frontBC - backBC); - const ordinal_type numEdge = (frontBC * bottomBC + frontBC * topBC + - backBC * bottomBC + backBC * topBC) * - (nx - leftBC - rightBC) + - (leftBC * bottomBC + leftBC * topBC + - rightBC * bottomBC + rightBC * topBC) * - (ny - frontBC - backBC) + - (leftBC * frontBC + leftBC * backBC + - rightBC * frontBC + rightBC * backBC) * - (nz - bottomBC - topBC); - const ordinal_type numCorner = - leftBC * frontBC * bottomBC + rightBC * frontBC * bottomBC + - leftBC * backBC * bottomBC + rightBC * backBC * bottomBC + - leftBC * frontBC * topBC + rightBC * frontBC * topBC + - leftBC * backBC * topBC + rightBC * backBC * topBC; - ordinal_type interiorStencilLength = 0, faceStencilLength = 0, - edgeStencilLength = 0, cornerStencilLength = 0; + const ordinal_type numInterior = (nx - leftBC - rightBC) * (ny - frontBC - backBC) * (nz - bottomBC - topBC); + const ordinal_type numFace = (leftBC + rightBC) * (ny - frontBC - backBC) * (nz - bottomBC - topBC) + + (frontBC + backBC) * (nx - leftBC - rightBC) * (nz - bottomBC - topBC) + + (bottomBC + topBC) * (nx - leftBC - rightBC) * (ny - frontBC - backBC); + const ordinal_type numEdge = + (frontBC * bottomBC + frontBC * topBC + backBC * bottomBC + backBC * topBC) * (nx - leftBC - rightBC) + + (leftBC * bottomBC + leftBC * topBC + rightBC * bottomBC + rightBC * topBC) * (ny - frontBC - backBC) + + (leftBC * frontBC + leftBC * backBC + rightBC * frontBC + rightBC * backBC) * (nz - bottomBC - topBC); + const ordinal_type numCorner = leftBC * frontBC * bottomBC + rightBC * frontBC * bottomBC + + leftBC * backBC * bottomBC + rightBC * backBC * bottomBC + leftBC * frontBC * topBC + + rightBC * frontBC * topBC + leftBC * backBC * topBC + rightBC * backBC * topBC; + ordinal_type interiorStencilLength = 0, faceStencilLength = 0, edgeStencilLength = 0, cornerStencilLength = 0; if (stencil_type == FD) { interiorStencilLength = 7; @@ -3518,9 +3421,8 @@ CrsMatrix_t generate_structured_matrix3D(const std::string stencil, cornerStencilLength = 8; } - const size_type numEntries = - numInterior * interiorStencilLength + numFace * faceStencilLength + - numEdge * edgeStencilLength + numCorner * cornerStencilLength; + const size_type numEntries = numInterior * interiorStencilLength + numFace * faceStencilLength + + numEdge * edgeStencilLength + numCorner * cornerStencilLength; // Create matrix data row_map_view_t rowmap_view("rowmap_view", numNodes + 1); @@ -3531,9 +3433,8 @@ CrsMatrix_t generate_structured_matrix3D(const std::string stencil, // To start simple we construct 2D 5pt stencil Laplacian. // We assume Neumann boundary conditions on the edge of the domain. - fill_3D_matrix_functor fill_3D_matrix( - stencil_type, nx, ny, nz, leftBC, rightBC, frontBC, backBC, bottomBC, - topBC, rowmap_view, columns_view, values_view); + fill_3D_matrix_functor fill_3D_matrix(stencil_type, nx, ny, nz, leftBC, rightBC, frontBC, backBC, + bottomBC, topBC, rowmap_view, columns_view, values_view); fill_3D_matrix.compute(); diff --git a/packages/kokkos-kernels/test_common/KokkosKernels_WriteBinaryFromBinSrcDst.cpp b/packages/kokkos-kernels/test_common/KokkosKernels_WriteBinaryFromBinSrcDst.cpp index 1c9ddca09a01..0f2da76c8e05 100644 --- a/packages/kokkos-kernels/test_common/KokkosKernels_WriteBinaryFromBinSrcDst.cpp +++ b/packages/kokkos-kernels/test_common/KokkosKernels_WriteBinaryFromBinSrcDst.cpp @@ -34,27 +34,22 @@ int main(int argc, char **argv) { } else if (0 == Test::string_compare_no_case(argv[i], "in_dst")) { in_dst = argv[++i]; } else { - std::cerr << "Usage:" << argv[0] << " in_src srcs.bin in_dst dsts.bin" - << std::endl; + std::cerr << "Usage:" << argv[0] << " in_src srcs.bin in_dst dsts.bin" << std::endl; exit(1); } } if (in_src == NULL || in_dst == NULL) { - std::cerr << "Usage:" << argv[0] << " in_src srcs.bin in_dst dsts.bin" - << std::endl; + std::cerr << "Usage:" << argv[0] << " in_src srcs.bin in_dst dsts.bin" << std::endl; exit(1); } size_t numEdges = 0; size_t *srcs, *dst; // this type is hard coded - KokkosKernels::Impl::buildEdgeListFromBinSrcTarg_undirected( - in_src, in_dst, numEdges, &srcs, &dst); + KokkosKernels::Impl::buildEdgeListFromBinSrcTarg_undirected(in_src, in_dst, numEdges, &srcs, &dst); std::cout << "read numEdges:" << numEdges << std::endl; size_t num_vertex = 0; for (size_t i = 0; i < numEdges; ++i) { - if (srcs[i] == 0 || dst[i] == 0) - std::cout << "i:" << i << " src:" << srcs[i] << " dst:" << dst[i] - << std::endl; + if (srcs[i] == 0 || dst[i] == 0) std::cout << "i:" << i << " src:" << srcs[i] << " dst:" << dst[i] << std::endl; if (num_vertex < srcs[i]) num_vertex = srcs[i]; if (num_vertex < dst[i]) num_vertex = dst[i]; } @@ -75,8 +70,7 @@ int main(int argc, char **argv) { KokkosKernels::Impl::md_malloc(&adj, ne); std::cout << "converting" << std::endl; - KokkosKernels::Impl::convert_undirected_edge_list_to_csr( + KokkosKernels::Impl::convert_undirected_edge_list_to_csr( nv, numEdges, // numEdges should be num undirected edges. srcs, dst, xadj, adj); delete[] srcs; @@ -97,41 +91,34 @@ int main(int argc, char **argv) { KokkosKernels::Impl::write_graph_bin(nv, ne, xadj, adj, ew, "actual.bin"); std::cout << "calculating incidence transpose" << std::endl; - KokkosKernels::Impl::kk_sequential_create_incidence_matrix_transpose( - nv, ne, xadj, adj, - &(i_xadj[0]), // output. preallocated - &(i_adj[0]) // output. preallocated + KokkosKernels::Impl::kk_sequential_create_incidence_matrix_transpose(nv, ne, xadj, adj, + &(i_xadj[0]), // output. preallocated + &(i_adj[0]) // output. preallocated ); std::cout << "writing bin incidence transpose" << std::endl; - KokkosKernels::Impl::write_graph_bin(lno_t(ne / 2), ne, &(i_xadj[0]), - &(i_adj[0]), ew, - "incidence-transpose.bin"); + KokkosKernels::Impl::write_graph_bin(lno_t(ne / 2), ne, &(i_xadj[0]), &(i_adj[0]), ew, "incidence-transpose.bin"); size_type *i_adj2; KokkosKernels::Impl::md_malloc(&i_adj2, ne); std::cout << "calculating incidence " << std::endl; - KokkosKernels::Impl::kk_sequential_create_incidence_matrix( - nv, xadj, adj, - &(i_adj2[0]) // output. preallocated + KokkosKernels::Impl::kk_sequential_create_incidence_matrix(nv, xadj, adj, + &(i_adj2[0]) // output. preallocated ); std::cout << "writing bin incidence" << std::endl; - KokkosKernels::Impl::write_graph_bin(nv, ne, xadj, i_adj2, ew, - "incidence.bin"); + KokkosKernels::Impl::write_graph_bin(nv, ne, xadj, i_adj2, ew, "incidence.bin"); lno_t average_degree = ne / nv; std::vector row_sizes(nv, 0); for (lno_t i = 0; i < nv; ++i) { size_type row_s = xadj[i + 1] - xadj[i]; if (row_s > 1000) - std::cout << "row:" << i << " size:" << row_s - << " average_degree:" << average_degree << std::endl; + std::cout << "row:" << i << " size:" << row_s << " average_degree:" << average_degree << std::endl; row_sizes[row_s] += 1; } for (lno_t i = 0; i < nv; ++i) { if (row_sizes[i] != 0) { - std::cout << row_sizes[i] << " rows has " << i << " nonzeroes" - << std::endl; + std::cout << row_sizes[i] << " rows has " << i << " nonzeroes" << std::endl; } } delete[] i_xadj; diff --git a/packages/kokkos-kernels/test_common/Kokkos_Performance.hpp b/packages/kokkos-kernels/test_common/Kokkos_Performance.hpp index c4ff95654cff..648f7c53563d 100644 --- a/packages/kokkos-kernels/test_common/Kokkos_Performance.hpp +++ b/packages/kokkos-kernels/test_common/Kokkos_Performance.hpp @@ -80,11 +80,9 @@ class Performance { * \param tolerance_low [in] Lower bound of tolerance. * \param tolerance_high [in] Upper bound of tolerance. */ - void set_result(const std::string& name, double val, double tolerance_low, - double tolerance_high) { + void set_result(const std::string& name, double val, double tolerance_low, double tolerance_high) { validate_input_result_name(name); - results_node[name] = - Performance::Tolerance(val, tolerance_low, tolerance_high).as_string(); + results_node[name] = Performance::Tolerance(val, tolerance_low, tolerance_high).as_string(); } /** @@ -113,16 +111,7 @@ class Performance { /** * \brief Result codes after creating/comparing a test entry */ - enum Result { - Failed, - Passed, - NewMachine, - NewConfiguration, - NewTest, - NewTestConfiguration, - UpdatedTest, - Unknown - }; + enum Result { Failed, Passed, NewMachine, NewConfiguration, NewTest, NewTestConfiguration, UpdatedTest, Unknown }; /** * \brief Processes the test and update the yaml archive @@ -162,8 +151,7 @@ class Performance { * This will only happen if all the old result values are present in * the new ones, and are within their respective tolerances. */ - Result run(const std::string& archive_name, const std::string& test_name, - const std::string& host_name = "") const; + Result run(const std::string& archive_name, const std::string& test_name, const std::string& host_name = "") const; /** * \brief print_archive will std::cout the yaml archive for inspection. @@ -246,8 +234,7 @@ Performance::node_t Performance::get_machine_configuration() const { } if (line.find("physical id") < line.size()) { unsigned int socketid = atoi(line.substr(line.find(":") + 2).c_str()); - highest_socketid = - highest_socketid > socketid ? highest_socketid : socketid; + highest_socketid = highest_socketid > socketid ? highest_socketid : socketid; } if (line.find("cpu cores") < line.size()) { cores_per_socket = atoi(line.substr(line.find(":") + 2).c_str()); @@ -258,9 +245,8 @@ Performance::node_t Performance::get_machine_configuration() const { int compiler_version = 0; #if defined __clang__ - compiler_name = "Clang"; - compiler_version = - __clang_major__ * 100 + __clang_minor__ * 10 + __clang_patchlevel__; + compiler_name = "Clang"; + compiler_version = __clang_major__ * 100 + __clang_minor__ * 10 + __clang_patchlevel__; #endif #if defined __GNUC__ && !defined KOKKOS_COMPILER_NAME && !defined __clang__ @@ -283,8 +269,7 @@ Performance::node_t Performance::get_machine_configuration() const { return machine_config; } -Performance::Result Performance::run(const std::string& archive_name, - const std::string& test_name, +Performance::Result Performance::run(const std::string& archive_name, const std::string& test_name, const std::string& host_name) const { // These are abitrary category names used in the yaml const std::string test_configuration_string = "TestConfiguration"; @@ -293,8 +278,7 @@ Performance::Result Performance::run(const std::string& archive_name, const std::string tests_string = "Tests"; // Now create the test entry - combincation of configuration and times/results - Performance::node_t - new_test_entry; // the entry will have two bits added below + Performance::node_t new_test_entry; // the entry will have two bits added below new_test_entry[test_configuration_string] = test_configuration_node; new_test_entry[test_results_string] = results_node; @@ -325,19 +309,16 @@ Performance::Result Performance::run(const std::string& archive_name, Performance::node_t machine = database[host_setting]; // Find matching machine configuration - for (size_t machine_index = 0; machine_index < machine.size(); - ++machine_index) { + for (size_t machine_index = 0; machine_index < machine.size(); ++machine_index) { Performance::node_t configuration = machine[machine_index]; - if (!configuration[machine_configuration_string] || - !configuration[tests_string]) { + if (!configuration[machine_configuration_string] || !configuration[tests_string]) { throw std::logic_error( "Configuration must has child MachineConfiguration and a child " "\"Tests\"."); } - Performance::node_t machine_configuration = - configuration[machine_configuration_string]; - Performance::node_t old_tests = configuration[tests_string]; + Performance::node_t machine_configuration = configuration[machine_configuration_string]; + Performance::node_t old_tests = configuration[tests_string]; if (hasSameElements(machine_configuration, machine_configuration_node)) { is_new_config = false; @@ -345,29 +326,22 @@ Performance::Result Performance::run(const std::string& archive_name, if (old_tests[test_name]) { Performance::node_t old_test_array = old_tests[test_name]; int match_test_index = -1; - for (size_t entry_index = 0; entry_index < old_test_array.size(); - ++entry_index) { + for (size_t entry_index = 0; entry_index < old_test_array.size(); ++entry_index) { Performance::node_t old_test_entry = old_test_array[entry_index]; - if (hasSameElements(old_test_entry[test_configuration_string], - new_test_entry[test_configuration_string])) { + if (hasSameElements(old_test_entry[test_configuration_string], new_test_entry[test_configuration_string])) { match_test_index = static_cast(entry_index); } } if (match_test_index == -1) { - database[host_setting][machine_index][tests_string][test_name] - .push_back(new_test_entry); + database[host_setting][machine_index][tests_string][test_name].push_back(new_test_entry); return_value = Performance::NewTestConfiguration; } else { - bool deviation = false; - Performance::node_t old_test_entry = - old_test_array[match_test_index]; - Performance::node_t old_results = - old_test_entry[test_results_string]; - Performance::node_t new_results = - new_test_entry[test_results_string]; + bool deviation = false; + Performance::node_t old_test_entry = old_test_array[match_test_index]; + Performance::node_t old_results = old_test_entry[test_results_string]; + Performance::node_t new_results = new_test_entry[test_results_string]; // Compare all entries - for (YAML::const_iterator old_r = old_results.begin(); - old_r != old_results.end(); ++old_r) { + for (YAML::const_iterator old_r = old_results.begin(); old_r != old_results.end(); ++old_r) { Performance::node_t result_entry = old_r->second; // Finding entry with same name std::string result_name = old_r->first.Scalar(); @@ -377,50 +351,37 @@ Performance::Result Performance::run(const std::string& archive_name, std::string old_test_name = test_name; std::ostringstream new_result_entry_name_stream; new_result_entry_name_stream << new_results[result_name]; - std::string new_result_data = - new_result_entry_name_stream.str(); + std::string new_result_data = new_result_entry_name_stream.str(); // based on name does result use tolerance? // if it has the '*' key character appended it means it's an // exact if (!string_includes_exact_code(result_name)) { Performance::Tolerance old_valtol(oldv_str); - Performance::Tolerance new_valtol( - new_results[result_name].Scalar()); + Performance::Tolerance new_valtol(new_results[result_name].Scalar()); if (old_valtol.use_tolerance) { double diff = old_valtol.value - new_valtol.value; diff *= diff; double normalization = old_valtol.value; normalization *= normalization; - if (normalization == 0 - ? diff > 0 - : diff / normalization > - old_valtol.tolerance * old_valtol.tolerance) { + if (normalization == 0 ? diff > 0 + : diff / normalization > old_valtol.tolerance * old_valtol.tolerance) { deviation = true; std::cout << std::endl - << " DeviationA in Test: \"" << old_test_name - << "\" for entry \"" << result_name << "\"" - << std::endl; - std::cout << " Existing Value: \"" << oldv_str << "\"" - << std::endl; - std::cout << " New Value: \"" << new_result_data - << "\"" << std::endl - << std::endl; + << " DeviationA in Test: \"" << old_test_name << "\" for entry \"" << result_name + << "\"" << std::endl; + std::cout << " Existing Value: \"" << oldv_str << "\"" << std::endl; + std::cout << " New Value: \"" << new_result_data << "\"" << std::endl << std::endl; } } else { - if ((old_valtol.lower > new_valtol.value) || - (old_valtol.upper < new_valtol.value)) { + if ((old_valtol.lower > new_valtol.value) || (old_valtol.upper < new_valtol.value)) { deviation = true; std::cout << std::endl - << " DeviationB in Test: \"" << old_test_name - << "\" for entry \"" << result_name << "\"" - << std::endl; - std::cout << " Existing Value: \"" << oldv_str << "\"" - << std::endl; - std::cout << " New Value: \"" << new_result_data - << "\"" << std::endl - << std::endl; + << " DeviationB in Test: \"" << old_test_name << "\" for entry \"" << result_name + << "\"" << std::endl; + std::cout << " Existing Value: \"" << oldv_str << "\"" << std::endl; + std::cout << " New Value: \"" << new_result_data << "\"" << std::endl << std::endl; } } } else { @@ -428,14 +389,10 @@ Performance::Result Performance::run(const std::string& archive_name, if (oldv_str.compare(new_result_data) != 0) { deviation = true; std::cout << std::endl - << " DeviationC in Test: \"" << old_test_name - << "\" for entry \"" << result_name << "\"" - << std::endl; - std::cout << " Existing Value: \"" << oldv_str << "\"" - << std::endl; - std::cout << " New Value: \"" << new_result_data - << "\"" << std::endl + << " DeviationC in Test: \"" << old_test_name << "\" for entry \"" << result_name << "\"" << std::endl; + std::cout << " Existing Value: \"" << oldv_str << "\"" << std::endl; + std::cout << " New Value: \"" << new_result_data << "\"" << std::endl << std::endl; } } } @@ -452,8 +409,7 @@ Performance::Result Performance::run(const std::string& archive_name, } else { // Did someone add new values to the test? if (new_results.size() != old_results.size()) { - for (YAML::const_iterator new_r = new_results.begin(); - new_r != new_results.end(); ++new_r) { + for (YAML::const_iterator new_r = new_results.begin(); new_r != new_results.end(); ++new_r) { if (!old_results[new_r->first.Scalar()]) { old_results[new_r->first.Scalar()] = (new_r->second); } @@ -464,8 +420,7 @@ Performance::Result Performance::run(const std::string& archive_name, } } else { // End Test Exists // Add new test if no match was found - database[host_setting][machine_index][tests_string][test_name] - .push_back(new_test_entry); + database[host_setting][machine_index][tests_string][test_name].push_back(new_test_entry); return_value = Performance::NewTest; } } // End MachineConfiguration Exists @@ -495,8 +450,7 @@ Performance::Result Performance::run(const std::string& archive_name, return return_value; } -bool Performance::hasSameElements(const Performance::node_t& a, - const Performance::node_t& b) const { +bool Performance::hasSameElements(const Performance::node_t& a, const Performance::node_t& b) const { if (a.size() != b.size()) { return false; } @@ -525,10 +479,7 @@ bool Performance::string_includes_exact_code(const std::string& name) const { return (name.length() != 0 && name[name.length() - 1] == '*'); } -std::string Performance::mark_name_with_exact_code( - const std::string& name) const { - return name + "*"; -} +std::string Performance::mark_name_with_exact_code(const std::string& name) const { return name + "*"; } void Performance::validate_input_result_name(const std::string& name) const { if (string_includes_exact_code(name)) { @@ -544,9 +495,7 @@ void Performance::print_archive(const std::string& archiveName) { std::cout << YAML::LoadFile(archiveName) << std::endl; } -void Performance::erase_archive(const std::string& yamlArchive) { - std::ofstream(yamlArchive) << std::endl; -} +void Performance::erase_archive(const std::string& yamlArchive) { std::ofstream(yamlArchive) << std::endl; } Performance::Tolerance::Tolerance() { value = 0; @@ -575,8 +524,7 @@ Performance::Tolerance::Tolerance(double val, double low, double up) { Performance::Tolerance::Tolerance(std::string str) { from_string(str); } bool Performance::Tolerance::operator==(const Tolerance& rhs) { - return (value == rhs.value) && (tolerance == rhs.tolerance) && - (lower == rhs.lower) && (upper == rhs.upper) && + return (value == rhs.value) && (tolerance == rhs.tolerance) && (lower == rhs.lower) && (upper == rhs.upper) && (use_tolerance == rhs.use_tolerance); } diff --git a/packages/kokkos-kernels/test_common/Test_Common_Test_All_Type_Combos.hpp b/packages/kokkos-kernels/test_common/Test_Common_Test_All_Type_Combos.hpp index a51d79663221..18ce1b1428ec 100644 --- a/packages/kokkos-kernels/test_common/Test_Common_Test_All_Type_Combos.hpp +++ b/packages/kokkos-kernels/test_common/Test_Common_Test_All_Type_Combos.hpp @@ -26,8 +26,7 @@ #error Test_Common_Test_All_Type_Combos.hpp requires KOKKOSKERNELS_EXECUTE_TEST to be set #endif -#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#if (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) // ETI is off, test all possible type combos @@ -57,100 +56,84 @@ KOKKOSKERNELS_EXECUTE_TEST(kokkos_complex_float, int64_t, size_t, TestDevice) // ETI is on, only test instantiated type combos -#if (defined(KOKKOSKERNELS_INST_DOUBLE) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ +#if (defined(KOKKOSKERNELS_INST_DOUBLE) && defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ defined(KOKKOSKERNELS_INST_OFFSET_INT)) KOKKOSKERNELS_EXECUTE_TEST(double, int, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_DOUBLE) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ +#if (defined(KOKKOSKERNELS_INST_DOUBLE) && defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ defined(KOKKOSKERNELS_INST_OFFSET_INT)) KOKKOSKERNELS_EXECUTE_TEST(double, int64_t, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_DOUBLE) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ +#if (defined(KOKKOSKERNELS_INST_DOUBLE) && defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) KOKKOSKERNELS_EXECUTE_TEST(double, int, size_t, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_DOUBLE) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ +#if (defined(KOKKOSKERNELS_INST_DOUBLE) && defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) KOKKOSKERNELS_EXECUTE_TEST(double, int64_t, size_t, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_FLOAT) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ +#if (defined(KOKKOSKERNELS_INST_FLOAT) && defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ defined(KOKKOSKERNELS_INST_OFFSET_INT)) KOKKOSKERNELS_EXECUTE_TEST(float, int, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_FLOAT) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ +#if (defined(KOKKOSKERNELS_INST_FLOAT) && defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ defined(KOKKOSKERNELS_INST_OFFSET_INT)) KOKKOSKERNELS_EXECUTE_TEST(float, int64_t, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_FLOAT) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ +#if (defined(KOKKOSKERNELS_INST_FLOAT) && defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) KOKKOSKERNELS_EXECUTE_TEST(float, int, size_t, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_FLOAT) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ +#if (defined(KOKKOSKERNELS_INST_FLOAT) && defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) KOKKOSKERNELS_EXECUTE_TEST(float, int64_t, size_t, TestDevice) #endif #if !defined(NO_TEST_COMPLEX) -#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ +#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) && defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ defined(KOKKOSKERNELS_INST_OFFSET_INT)) KOKKOSKERNELS_EXECUTE_TEST(kokkos_complex_double, int, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ +#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) && defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ defined(KOKKOSKERNELS_INST_OFFSET_INT)) KOKKOSKERNELS_EXECUTE_TEST(kokkos_complex_double, int64_t, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ +#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) && defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) KOKKOSKERNELS_EXECUTE_TEST(kokkos_complex_double, int, size_t, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ +#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) && defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) KOKKOSKERNELS_EXECUTE_TEST(kokkos_complex_double, int64_t, size_t, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ +#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) && defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ defined(KOKKOSKERNELS_INST_OFFSET_INT)) KOKKOSKERNELS_EXECUTE_TEST(kokkos_complex_float, int, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ +#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) && defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ defined(KOKKOSKERNELS_INST_OFFSET_INT)) KOKKOSKERNELS_EXECUTE_TEST(kokkos_complex_float, int64_t, int, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ +#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) && defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \ defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) KOKKOSKERNELS_EXECUTE_TEST(kokkos_complex_float, int, size_t, TestDevice) #endif -#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ +#if (defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) && defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) && \ defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T)) KOKKOSKERNELS_EXECUTE_TEST(kokkos_complex_float, int64_t, size_t, TestDevice) #endif diff --git a/packages/kokkos-kernels/test_common/Test_Cuda.hpp b/packages/kokkos-kernels/test_common/Test_Cuda.hpp index cf1042a2c409..136a53ab2292 100644 --- a/packages/kokkos-kernels/test_common/Test_Cuda.hpp +++ b/packages/kokkos-kernels/test_common/Test_Cuda.hpp @@ -40,16 +40,14 @@ using CudaUVMSpaceDevice = Kokkos::Device; // KOKKOS_ENABLE_CUDA_UVM macro and cmake option is deprecated // But if it is defined, test with CudaUVMSpace. // Make sure it's instantiated first: -#if defined(KOKKOSKERNELS_TEST_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +#if defined(KOKKOSKERNELS_TEST_ETI_ONLY) && !defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) #error \ "Deprecated option KOKKOS_ENABLE_CUDA_UVM is defined, so KokkosKernels will test with CudaUVMSpace. " \ "KokkosKernels_INST_MEMSPACE_CUDAUVMSPACE=ON must be set in configuration." #endif #define TestDevice CudaUVMSpaceDevice // Prefer for any testing where only one exec space is used -#elif defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) && \ - !defined(KOKKOSKERNELS_INST_MEMSPACE_CUDASPACE) +#elif defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) && !defined(KOKKOSKERNELS_INST_MEMSPACE_CUDASPACE) #define TestDevice CudaUVMSpaceDevice #else #define TestDevice CudaSpaceDevice diff --git a/packages/kokkos-kernels/test_common/Test_HIP.hpp b/packages/kokkos-kernels/test_common/Test_HIP.hpp index dfb8e1d687b0..44c99c0ad975 100644 --- a/packages/kokkos-kernels/test_common/Test_HIP.hpp +++ b/packages/kokkos-kernels/test_common/Test_HIP.hpp @@ -31,15 +31,13 @@ class hip : public ::testing::Test { static void TearDownTestCase() {} }; -using HIPSpaceDevice = Kokkos::Device; -using HIPManagedSpaceDevice = - Kokkos::Device; +using HIPSpaceDevice = Kokkos::Device; +using HIPManagedSpaceDevice = Kokkos::Device; #define TestCategory hip // Prefer for any testing where only one exec space is used -#if defined(KOKKOSKERNELS_INST_MEMSPACE_HIPMANAGEDSPACE) && \ - !defined(KOKKOSKERNELS_INST_MEMSPACE_HIPSPACE) +#if defined(KOKKOSKERNELS_INST_MEMSPACE_HIPMANAGEDSPACE) && !defined(KOKKOSKERNELS_INST_MEMSPACE_HIPSPACE) #define TestDevice HIPManagedSpaceDevice #else #define TestDevice HIPSpaceDevice From 2095d659506a6dc82e5d8e390410d39aace49bca Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Thu, 8 Aug 2024 15:38:48 -0600 Subject: [PATCH 16/37] tpetra: update kokkos supported version to 4.4.0 Signed-off-by: Nathan Ellingwood --- packages/tpetra/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tpetra/CMakeLists.txt b/packages/tpetra/CMakeLists.txt index ca2a83bbcadf..6dbaa6f6485b 100644 --- a/packages/tpetra/CMakeLists.txt +++ b/packages/tpetra/CMakeLists.txt @@ -24,7 +24,7 @@ TRIBITS_ADD_OPTION_AND_DEFINE( # Supported Kokkos version in Trilinos # NOTE: When we snapshot Kokkos into Trilinos, we have to update these numbers to maintain # compatibility with external Kokkos -SET(Tpetra_SUPPORTED_KOKKOS_VERSION "4.3.1") +SET(Tpetra_SUPPORTED_KOKKOS_VERSION "4.4.0") # Option to allow developers to ignore incompatible Kokkos versions From e6e2c7bfe85890733510fedfb261a1c7d9186208 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Fri, 9 Aug 2024 13:47:54 -0600 Subject: [PATCH 17/37] Force Tpetra::CrsMatrix::getLocalMatrixDevice to be inlined on ARM. On Stria, PR 13052 caused a 25% performance regression in the CGSolve performance test that is fixed by forcing getLocalMatrixDevice to always be inlined. Restrict the fix to the specific toolchain where the problem was observed. --- .../tpetra/core/src/Tpetra_CrsMatrix_decl.hpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp index 062df9fde96c..e8281eb7bba6 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp @@ -2288,6 +2288,18 @@ namespace Tpetra { const crs_graph_type& getCrsGraphRef () const; public: +#if __armclang_major__ == 22 && __armclang_minor__ == 1 + // On Stria, PR 13052 caused a 25% performance regression in the + // CGSolve performance test that is fixed by forcing + // getLocalMatrixDevice to always be inlined. Restrict the fix + // to the specific toolchain where the problem was observed +#define TPETRA_DETAILS_ALWAYS_INLINE __attribute__((always_inline)) +#else +#define TPETRA_DETAILS_ALWAYS_INLINE +#endif + /// \brief The local sparse matrix. + /// + /// \warning It is only valid to call this method under certain /// \brief The local sparse matrix. /// /// \warning It is only valid to call this method under certain @@ -2297,8 +2309,10 @@ namespace Tpetra { /// least once. This method will do no error checking, so you /// are responsible for knowing when it is safe to call this /// method. - local_matrix_device_type getLocalMatrixDevice () const; + TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type + getLocalMatrixDevice () const; local_matrix_host_type getLocalMatrixHost () const; +#undef TPETRA_DETAILS_ALWAYS_INLINE #if KOKKOSKERNELS_VERSION < 40299 /// \brief The local sparse matrix operator From bcf8266d381c5962d4031023b913c2ab3ed30a13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=92=D0=BB=D0=B0=D0=B4=D0=B8=D1=81=D0=BB=D0=B0=D0=B2=20?= =?UTF-8?q?=D0=A1=D0=B5=D0=BC=D1=8B=D0=BA=D0=B8=D0=BD?= Date: Sat, 10 Aug 2024 13:29:06 +0300 Subject: [PATCH 18/37] Fixing 'syntax errors' when compiling under MSVC. In Windows OS used '__restrict' keyword instead of Linux '__restrict__'. So, all occurences with '__restrict__' replaced with 'KOKKOS_RESTRICT' macro to provide correct compilation under MSVC --- .../tacho/src/impl/Tacho_Blas_Team.hpp | 118 +++++++++--------- .../src/impl/Tacho_CholSupernodes_Serial.hpp | 8 +- .../impl/Tacho_CholSupernodes_SerialPanel.hpp | 12 +- .../tacho/src/impl/Tacho_LDL_External.hpp | 10 +- .../tacho/src/impl/Tacho_LDL_Internal.hpp | 18 +-- .../tacho/src/impl/Tacho_LDL_OnDevice.hpp | 14 +-- .../tacho/src/impl/Tacho_LDL_Serial.hpp | 10 +- .../tacho/src/impl/Tacho_LU_External.hpp | 8 +- .../tacho/src/impl/Tacho_LU_Internal.hpp | 4 +- .../tacho/src/impl/Tacho_LU_OnDevice.hpp | 4 +- .../tacho/src/impl/Tacho_LU_Serial.hpp | 8 +- .../tacho/src/impl/Tacho_Lapack_Serial.hpp | 24 ++-- .../tacho/src/impl/Tacho_Lapack_Team.hpp | 46 +++---- .../impl/Tacho_TeamFunctor_FactorizeChol.hpp | 4 +- .../impl/Tacho_TeamFunctor_FactorizeLDL.hpp | 4 +- .../impl/Tacho_TeamFunctor_FactorizeLU.hpp | 4 +- 16 files changed, 148 insertions(+), 148 deletions(-) diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Blas_Team.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Blas_Team.hpp index 079a088f5c84..56ce47686e3e 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Blas_Team.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Blas_Team.hpp @@ -23,19 +23,19 @@ template struct BlasTeam { struct Impl { template static KOKKOS_INLINE_FUNCTION void set(MemberType &member, int m, const T alpha, - /* */ T *__restrict__ a, int as0) { + /* */ T *KOKKOS_RESTRICT a, int as0) { Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int &i) { a[i * as0] = alpha; }); } template static KOKKOS_INLINE_FUNCTION void scale(MemberType &member, int m, const T alpha, - /* */ T *__restrict__ a, int as0) { + /* */ T *KOKKOS_RESTRICT a, int as0) { Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int &i) { a[i * as0] *= alpha; }); } template static KOKKOS_INLINE_FUNCTION void set(MemberType &member, int m, int n, const T alpha, - /* */ T *__restrict__ a, int as0, int as1) { + /* */ T *KOKKOS_RESTRICT a, int as0, int as1) { if (as0 == 1 || as0 < as1) Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, m), @@ -50,7 +50,7 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void scale(MemberType &member, int m, int n, const T alpha, - /* */ T *__restrict__ a, int as0, int as1) { + /* */ T *KOKKOS_RESTRICT a, int as0, int as1) { if (as0 == 1 || as0 < as1) Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, m), @@ -65,7 +65,7 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void set_upper(MemberType &member, int m, int n, int offset, const T alpha, - /* */ T *__restrict__ a, int as0, int as1) { + /* */ T *KOKKOS_RESTRICT a, int as0, int as1) { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, j + 1 - offset), [&](const int &i) { a[i * as0 + j * as1] = alpha; }); @@ -74,7 +74,7 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void scale_upper(MemberType &member, int m, int n, int offset, const T alpha, - /* */ T *__restrict__ a, int as0, int as1) { + /* */ T *KOKKOS_RESTRICT a, int as0, int as1) { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, j + 1 - offset), [&](const int &i) { a[i * as0 + j * as1] *= alpha; }); @@ -83,7 +83,7 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void set_lower(MemberType &member, int m, int n, int offset, const T alpha, - /* */ T *__restrict__ a, int as0, int as1) { + /* */ T *KOKKOS_RESTRICT a, int as0, int as1) { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { const int jj = j + offset; Kokkos::parallel_for( @@ -96,7 +96,7 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void scale_lower(MemberType &member, int m, int n, int offset, const T alpha, - /* */ T *__restrict__ a, int as0, int as1) { + /* */ T *KOKKOS_RESTRICT a, int as0, int as1) { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { const int jj = j + offset; Kokkos::parallel_for( @@ -109,9 +109,9 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void gemv(MemberType &member, const ConjType &cj, const int m, const int n, - const T alpha, const T *__restrict__ A, const int as0, const int as1, - const T *__restrict__ x, const int xs0, const T beta, - /* */ T *__restrict__ y, const int ys0) { + const T alpha, const T *KOKKOS_RESTRICT A, const int as0, const int as1, + const T *KOKKOS_RESTRICT x, const int xs0, const T beta, + /* */ T *KOKKOS_RESTRICT y, const int ys0) { const T one(1), zero(0); if (beta == zero) @@ -146,18 +146,18 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void trsv_upper(MemberType &member, const ConjType &cjA, const char diag, const int m, - const T *__restrict__ A, const int as0, const int as1, - /* */ T *__restrict__ b, const int bs0) { + const T *KOKKOS_RESTRICT A, const int as0, const int as1, + /* */ T *KOKKOS_RESTRICT b, const int bs0) { if (m <= 0) return; const bool use_unit_diag = diag == 'U' || diag == 'u'; - T *__restrict__ b0 = b; + T *KOKKOS_RESTRICT b0 = b; for (int p = (m - 1); p >= 0; --p) { const int iend = p; - const T *__restrict__ a01 = A + p * as1; - /**/ T *__restrict__ beta1 = b + p * bs0; + const T *KOKKOS_RESTRICT a01 = A + p * as1; + /**/ T *KOKKOS_RESTRICT beta1 = b + p * bs0; /// make sure the previous iteration update is done member.team_barrier(); @@ -177,19 +177,19 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void trsv_lower(MemberType &member, const ConjType &cjA, const char diag, const int m, - const T *__restrict__ A, const int as0, const int as1, - /* */ T *__restrict__ b, const int bs0) { + const T *KOKKOS_RESTRICT A, const int as0, const int as1, + /* */ T *KOKKOS_RESTRICT b, const int bs0) { if (m <= 0) return; const bool use_unit_diag = diag == 'U' || diag == 'u'; - // T *__restrict__ b0 = b; + // T *KOKKOS_RESTRICT b0 = b; for (int p = 0; p < m; ++p) { const int iend = m - p - 1; - const T *__restrict__ a21 = iend ? A + (p + 1) * as0 + p * as1 : NULL; + const T *KOKKOS_RESTRICT a21 = iend ? A + (p + 1) * as0 + p * as1 : NULL; - T *__restrict__ beta1 = b + p * bs0, *__restrict__ b2 = iend ? beta1 + bs0 : NULL; + T *KOKKOS_RESTRICT beta1 = b + p * bs0, *KOKKOS_RESTRICT b2 = iend ? beta1 + bs0 : NULL; /// make sure that the previous iteration update is done member.team_barrier(); @@ -209,10 +209,10 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void gemm(MemberType &member, const ConjTypeA &cjA, const ConjTypeB &cjB, const int m, - const int n, const int k, const T alpha, const T *__restrict__ A, - const int as0, const int as1, const T *__restrict__ B, const int bs0, + const int n, const int k, const T alpha, const T *KOKKOS_RESTRICT A, + const int as0, const int as1, const T *KOKKOS_RESTRICT B, const int bs0, const int bs1, const T beta, - /* */ T *__restrict__ C, const int cs0, const int cs1) { + /* */ T *KOKKOS_RESTRICT C, const int cs0, const int cs1) { const T one(1), zero(0); if (beta == zero) @@ -228,7 +228,7 @@ template struct BlasTeam { { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, m), [&](const int &i) { - const T *__restrict__ pA = A + i * as0, *__restrict__ pB = B + j * bs1; + const T *KOKKOS_RESTRICT pA = A + i * as0, *KOKKOS_RESTRICT pB = B + j * bs1; T c(0); for (int p = 0; p < k; ++p) c += cjA(pA[p * as1]) * cjB(pB[p * bs0]); @@ -242,9 +242,9 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void gemm_upper(MemberType &member, const ConjTypeA &cjA, const ConjTypeB &cjB, const int m, const int n, const int k, const T alpha, - const T *__restrict__ A, const int as0, const int as1, - const T *__restrict__ B, const int bs0, const int bs1, const T beta, - /* */ T *__restrict__ C, const int cs0, const int cs1) { + const T *KOKKOS_RESTRICT A, const int as0, const int as1, + const T *KOKKOS_RESTRICT B, const int bs0, const int bs1, const T beta, + /* */ T *KOKKOS_RESTRICT C, const int cs0, const int cs1) { const T one(1), zero(0); if (beta == zero) @@ -260,7 +260,7 @@ template struct BlasTeam { { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, j + 1), [&](const int &i) { - const T *__restrict__ pA = A + i * as0, *__restrict__ pB = B + j * bs1; + const T *KOKKOS_RESTRICT pA = A + i * as0, *KOKKOS_RESTRICT pB = B + j * bs1; T c(0); for (int p = 0; p < k; ++p) c += cjA(pA[p * as1]) * cjB(pB[p * bs0]); @@ -273,9 +273,9 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void herk_upper(MemberType &member, const ConjTypeA &cjA, const ConjTypeB &cjB, - const int n, const int k, const T alpha, const T *__restrict__ A, + const int n, const int k, const T alpha, const T *KOKKOS_RESTRICT A, const int as0, const int as1, const T beta, - /* */ T *__restrict__ C, const int cs0, const int cs1) { + /* */ T *KOKKOS_RESTRICT C, const int cs0, const int cs1) { const T one(1), zero(0); if (beta == zero) @@ -290,9 +290,9 @@ template struct BlasTeam { member.team_barrier(); { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { - const T *__restrict__ pA = A + j * as0; + const T *KOKKOS_RESTRICT pA = A + j * as0; Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, j + 1), [&](const int &i) { - const T *__restrict__ pB = A + i * as0; + const T *KOKKOS_RESTRICT pB = A + i * as0; T c(0); for (int p = 0; p < k; ++p) c += cjA(pA[p * as1]) * cjB(pB[p * as1]); @@ -305,9 +305,9 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void herk_lower(MemberType &member, const ConjTypeA &cjA, const ConjTypeB &cjB, - const int n, const int k, const T alpha, const T *__restrict__ A, + const int n, const int k, const T alpha, const T *KOKKOS_RESTRICT A, const int as0, const int as1, const T beta, - /* */ T *__restrict__ C, const int cs0, const int cs1) { + /* */ T *KOKKOS_RESTRICT C, const int cs0, const int cs1) { const T one(1), zero(0); if (beta == zero) @@ -324,7 +324,7 @@ template struct BlasTeam { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, n - j), [&](const int &i) { const int ii = i + j; - const T *__restrict__ pA = A + j * as0, *__restrict__ pB = A + ii * as0; + const T *KOKKOS_RESTRICT pA = A + j * as0, *KOKKOS_RESTRICT pB = A + ii * as0; T c(0); for (int p = 0; p < k; ++p) c += cjA(pA[p * as1]) * cjB(pB[p * as1]); @@ -337,9 +337,9 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void trsm_left_lower(MemberType &member, const ConjType &cjA, const char diag, - const int m, const int n, const T alpha, const T *__restrict__ A, + const int m, const int n, const T alpha, const T *KOKKOS_RESTRICT A, const int as0, const int as1, - /* */ T *__restrict__ B, const int bs0, const int bs1) { + /* */ T *KOKKOS_RESTRICT B, const int bs0, const int bs1) { const T one(1), zero(0); if (alpha == zero) @@ -354,9 +354,9 @@ template struct BlasTeam { for (int p = 0; p < m; ++p) { const int iend = m - p - 1, jend = n; - const T *__restrict__ a21 = iend ? A + (p + 1) * as0 + p * as1 : NULL; + const T *KOKKOS_RESTRICT a21 = iend ? A + (p + 1) * as0 + p * as1 : NULL; - T *__restrict__ b1t = B + p * bs0, *__restrict__ B2 = iend ? B + (p + 1) * bs0 : NULL; + T *KOKKOS_RESTRICT b1t = B + p * bs0, *KOKKOS_RESTRICT B2 = iend ? B + (p + 1) * bs0 : NULL; member.team_barrier(); if (!use_unit_diag) { @@ -375,9 +375,9 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void trsm_left_upper(MemberType &member, const ConjType &cjA, const char diag, - const int m, const int n, const T alpha, const T *__restrict__ A, + const int m, const int n, const T alpha, const T *KOKKOS_RESTRICT A, const int as0, const int as1, - /* */ T *__restrict__ B, const int bs0, const int bs1) { + /* */ T *KOKKOS_RESTRICT B, const int bs0, const int bs1) { const T one(1.0), zero(0.0); // note that parallel range is different ( m*n vs m-1*n); @@ -390,12 +390,12 @@ template struct BlasTeam { return; const bool use_unit_diag = diag == 'U' || diag == 'u'; - T *__restrict__ B0 = B; + T *KOKKOS_RESTRICT B0 = B; for (int p = (m - 1); p >= 0; --p) { const int iend = p, jend = n; - const T *__restrict__ a01 = A + p * as1; - /**/ T *__restrict__ b1t = B + p * bs0; + const T *KOKKOS_RESTRICT a01 = A + p * as1; + /**/ T *KOKKOS_RESTRICT b1t = B + p * bs0; member.team_barrier(); if (!use_unit_diag) { @@ -415,9 +415,9 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void gemv(MemberType &member, const char trans, const int m, const int n, const T alpha, - const T *__restrict__ a, const int lda, const T *__restrict__ x, const int xs, + const T *KOKKOS_RESTRICT a, const int lda, const T *KOKKOS_RESTRICT x, const int xs, const T beta, - /* */ T *__restrict__ y, const int ys) { + /* */ T *KOKKOS_RESTRICT y, const int ys) { switch (trans) { case 'N': case 'n': { @@ -444,8 +444,8 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void trsv(MemberType &member, const char uplo, const char trans, const char diag, - const int m, const T *__restrict__ a, const int lda, - /* */ T *__restrict__ b, const int bs) { + const int m, const T *KOKKOS_RESTRICT a, const int lda, + /* */ T *KOKKOS_RESTRICT b, const int bs) { if (uplo == 'U' || uplo == 'u') { switch (trans) { case 'N': @@ -497,9 +497,9 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void gemm(MemberType &member, const char transa, const char transb, const int m, - const int n, const int k, const T alpha, const T *__restrict__ a, int lda, - const T *__restrict__ b, int ldb, const T beta, - /* */ T *__restrict__ c, int ldc) { + const int n, const int k, const T alpha, const T *KOKKOS_RESTRICT a, int lda, + const T *KOKKOS_RESTRICT b, int ldb, const T beta, + /* */ T *KOKKOS_RESTRICT c, int ldc) { if (transa == 'N' || transa == 'n') { const NoConjugate cjA; @@ -580,9 +580,9 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void gemm_upper(MemberType &member, const char transa, const char transb, const int m, - const int n, const int k, const T alpha, const T *__restrict__ a, - int lda, const T *__restrict__ b, int ldb, const T beta, - /* */ T *__restrict__ c, int ldc) { + const int n, const int k, const T alpha, const T *KOKKOS_RESTRICT a, + int lda, const T *KOKKOS_RESTRICT b, int ldb, const T beta, + /* */ T *KOKKOS_RESTRICT c, int ldc) { if (transa == 'N' || transa == 'n') { const NoConjugate cjA; @@ -663,9 +663,9 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void herk(MemberType &member, const char uplo, const char trans, const int n, - const int k, const T alpha, const T *__restrict__ a, const int lda, + const int k, const T alpha, const T *KOKKOS_RESTRICT a, const int lda, const T beta, - /* */ T *__restrict__ c, const int ldc) { + /* */ T *KOKKOS_RESTRICT c, const int ldc) { if (uplo == 'U' || uplo == 'u') switch (trans) { case 'N': @@ -711,8 +711,8 @@ template struct BlasTeam { template static KOKKOS_INLINE_FUNCTION void trsm(MemberType &member, const char side, const char uplo, const char trans, const char diag, const int m, const int n, const T alpha, - const T *__restrict__ a, const int lda, - /* */ T *__restrict__ b, const int ldb) { + const T *KOKKOS_RESTRICT a, const int lda, + /* */ T *KOKKOS_RESTRICT b, const int ldb) { /// /// side left /// diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_CholSupernodes_Serial.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_CholSupernodes_Serial.hpp index 95a738061369..107294178f14 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_CholSupernodes_Serial.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_CholSupernodes_Serial.hpp @@ -129,8 +129,8 @@ template <> struct CholSupernodes { Kokkos::store_fence(); for (ordinal_type j = 0; j < srcsize; ++j) { - const value_type *__restrict__ ss = src + j * srcsize; - /* */ value_type *__restrict__ tt = tgt + j * srcsize; + const value_type *KOKKOS_RESTRICT ss = src + j * srcsize; + /* */ value_type *KOKKOS_RESTRICT tt = tgt + j * srcsize; const ordinal_type iend = update_lower ? srcsize : j + 1; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll @@ -144,8 +144,8 @@ template <> struct CholSupernodes { Kokkos::load_fence(); } else { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, srcsize), [&](const ordinal_type &j) { - const value_type *__restrict__ ss = src + j * srcsize; - /* */ value_type *__restrict__ tt = tgt + j * srcsize; + const value_type *KOKKOS_RESTRICT ss = src + j * srcsize; + /* */ value_type *KOKKOS_RESTRICT tt = tgt + j * srcsize; const ordinal_type iend = update_lower ? srcsize : j + 1; Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, iend), [&](const ordinal_type &i) { Kokkos::atomic_add(&tt[i], ss[i]); }); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_CholSupernodes_SerialPanel.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_CholSupernodes_SerialPanel.hpp index 48fe8ebadfd2..7cdf86d5c1e7 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_CholSupernodes_SerialPanel.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_CholSupernodes_SerialPanel.hpp @@ -157,8 +157,8 @@ template <> struct CholSupernodes { case 1: { for (ordinal_type js = 0; js < nb; ++js) { const ordinal_type jt = js + offn; - const value_type *__restrict__ ss = src + js * srcsize; - /* */ value_type *__restrict__ tt = tgt + jt * srcsize; + const value_type *KOKKOS_RESTRICT ss = src + js * srcsize; + /* */ value_type *KOKKOS_RESTRICT tt = tgt + jt * srcsize; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif @@ -175,8 +175,8 @@ template <> struct CholSupernodes { for (ordinal_type js = 0; js < nb; ++js) { const ordinal_type jt = js + offn; - const value_type *__restrict__ ss = src + js * srcsize; - /* */ value_type *__restrict__ tt = tgt + jt * srcsize; + const value_type *KOKKOS_RESTRICT ss = src + js * srcsize; + /* */ value_type *KOKKOS_RESTRICT tt = tgt + jt * srcsize; #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #pragma unroll #endif @@ -193,8 +193,8 @@ template <> struct CholSupernodes { } else { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, nb), [&](const ordinal_type &js) { const ordinal_type jt = js + offn; - const value_type *__restrict__ ss = src + js * srcsize; - /* */ value_type *__restrict__ tt = tgt + jt * srcsize; + const value_type *KOKKOS_RESTRICT ss = src + js * srcsize; + /* */ value_type *KOKKOS_RESTRICT tt = tgt + jt * srcsize; Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, jt + 1), [&](const ordinal_type &i) { Kokkos::atomic_fetch_add(&tt[i], ss[i]); }); }); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_External.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_External.hpp index 56da9fc54080..04ebf6fa03ea 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_External.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_External.hpp @@ -83,9 +83,9 @@ template <> struct LDL { const ordinal_type m = A.extent(0); if (m > 0) { - value_type *__restrict__ Aptr = A.data(); - ordinal_type *__restrict__ ipiv = P.data(), *__restrict__ fpiv = ipiv + m, *__restrict__ perm = fpiv + m, - *__restrict__ peri = perm + m; + value_type *KOKKOS_RESTRICT Aptr = A.data(); + ordinal_type *KOKKOS_RESTRICT ipiv = P.data(), *KOKKOS_RESTRICT fpiv = ipiv + m, *KOKKOS_RESTRICT perm = fpiv + m, + *KOKKOS_RESTRICT peri = perm + m; const value_type one(1), zero(0); for (ordinal_type i = 0; i < m; ++i) @@ -107,8 +107,8 @@ template <> struct LDL { const ordinal_type fla_pivot = -ipiv[i] - i - 1; fpiv[i] = fla_pivot; if (fla_pivot) { - value_type *__restrict__ src = Aptr + i; - value_type *__restrict__ tgt = src + fla_pivot; + value_type *KOKKOS_RESTRICT src = Aptr + i; + value_type *KOKKOS_RESTRICT tgt = src + fla_pivot; for (ordinal_type j = 0; j < (i - 1); ++j) { const ordinal_type idx = j * m; swap(src[idx], tgt[idx]); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_Internal.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_Internal.hpp index 6e78a303bc26..af9b36d1e605 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_Internal.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_Internal.hpp @@ -59,9 +59,9 @@ template <> struct LDL { int r_val = 0; const ordinal_type m = A.extent(0); if (m > 0) { - value_type *__restrict__ Aptr = A.data(); - ordinal_type *__restrict__ ipiv = P.data(), *__restrict__ fpiv = ipiv + m, *__restrict__ perm = fpiv + m, - *__restrict__ peri = perm + m; + value_type *KOKKOS_RESTRICT Aptr = A.data(); + ordinal_type *KOKKOS_RESTRICT ipiv = P.data(), *KOKKOS_RESTRICT fpiv = ipiv + m, *KOKKOS_RESTRICT perm = fpiv + m, + *KOKKOS_RESTRICT peri = perm + m; const value_type one(1); Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int &i) { perm[i] = i; }); member.team_barrier(); @@ -84,8 +84,8 @@ template <> struct LDL { // fpiv[i] = fla_pivot; // } // if (fla_pivot) { - // value_type *__restrict__ src = Aptr + i; - // value_type *__restrict__ tgt = src + fla_pivot; + // value_type *KOKKOS_RESTRICT src = Aptr + i; + // value_type *KOKKOS_RESTRICT tgt = src + fla_pivot; // if (j<(i-1)) { // const ordinal_type idx = j*m; // swap(src[idx], tgt[idx]); @@ -135,10 +135,10 @@ template <> struct LDL { /// no piv version // if (m > 0) { // ordinal_type - // *__restrict__ ipiv = P.data(), - // *__restrict__ fpiv = ipiv + m, - // *__restrict__ perm = fpiv + m, - // *__restrict__ peri = perm + m; + // *KOKKOS_RESTRICT ipiv = P.data(), + // *KOKKOS_RESTRICT fpiv = ipiv + m, + // *KOKKOS_RESTRICT perm = fpiv + m, + // *KOKKOS_RESTRICT peri = perm + m; // const value_type one(1); // Kokkos::parallel_for(Kokkos::TeamVectorRange(member,m),[&](const int &i) { // D(i,0) = A(i,i); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_OnDevice.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_OnDevice.hpp index e7c1ab81933a..35a54142cb7c 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_OnDevice.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_OnDevice.hpp @@ -134,11 +134,11 @@ template <> struct LDL { int r_val(0); if (m > 0) { - value_type *__restrict__ Aptr = A.data(); - ordinal_type *__restrict__ ipiv = P.data(); - ordinal_type *__restrict__ fpiv = ipiv + m; - ordinal_type *__restrict__ perm = fpiv + m; - ordinal_type *__restrict__ peri = perm + m; + value_type *KOKKOS_RESTRICT Aptr = A.data(); + ordinal_type *KOKKOS_RESTRICT ipiv = P.data(); + ordinal_type *KOKKOS_RESTRICT fpiv = ipiv + m; + ordinal_type *KOKKOS_RESTRICT perm = fpiv + m; + ordinal_type *KOKKOS_RESTRICT peri = perm + m; const value_type one(1), zero(0); Kokkos::RangePolicy range_policy(exec_instance, 0, m); @@ -169,8 +169,8 @@ template <> struct LDL { fpiv[i] = fla_pivot; } if (fla_pivot) { - value_type *__restrict__ src = Aptr + i; - value_type *__restrict__ tgt = src + fla_pivot; + value_type *KOKKOS_RESTRICT src = Aptr + i; + value_type *KOKKOS_RESTRICT tgt = src + fla_pivot; if (j < (i - 1)) { const ordinal_type idx = j * m; swap(src[idx], tgt[idx]); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_Serial.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_Serial.hpp index d50a21fff3b5..d34f41a46d08 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_Serial.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL_Serial.hpp @@ -83,9 +83,9 @@ template <> struct LDL { const ordinal_type m = A.extent(0); if (m > 0) { - value_type *__restrict__ Aptr = A.data(); - ordinal_type *__restrict__ ipiv = P.data(), *__restrict__ fpiv = ipiv + m, *__restrict__ perm = fpiv + m, - *__restrict__ peri = perm + m; + value_type *KOKKOS_RESTRICT Aptr = A.data(); + ordinal_type *KOKKOS_RESTRICT ipiv = P.data(), *KOKKOS_RESTRICT fpiv = ipiv + m, *KOKKOS_RESTRICT perm = fpiv + m, + *KOKKOS_RESTRICT peri = perm + m; const value_type one(1), zero(0); for (ordinal_type i = 0; i < m; ++i) @@ -107,8 +107,8 @@ template <> struct LDL { const ordinal_type fla_pivot = -ipiv[i] - i - 1; fpiv[i] = fla_pivot; if (fla_pivot) { - value_type *__restrict__ src = Aptr + i; - value_type *__restrict__ tgt = src + fla_pivot; + value_type *KOKKOS_RESTRICT src = Aptr + i; + value_type *KOKKOS_RESTRICT tgt = src + fla_pivot; for (ordinal_type j = 0; j < (i - 1); ++j) { const ordinal_type idx = j * m; swap(src[idx], tgt[idx]); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_External.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_External.hpp index 90ae8663fd9f..32f48d3abc48 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_External.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_External.hpp @@ -73,10 +73,10 @@ template <> struct LU { TACHO_TEST_FOR_EXCEPTION(int(P.extent(0)) < 4 * m, std::runtime_error, "P should be 4*m."); if (m > 0) { - ordinal_type *__restrict__ ipiv = P.data(); - ordinal_type *__restrict__ fpiv = ipiv + m; - ordinal_type *__restrict__ perm = fpiv + m; - ordinal_type *__restrict__ peri = perm + m; + ordinal_type *KOKKOS_RESTRICT ipiv = P.data(); + ordinal_type *KOKKOS_RESTRICT fpiv = ipiv + m; + ordinal_type *KOKKOS_RESTRICT perm = fpiv + m; + ordinal_type *KOKKOS_RESTRICT peri = perm + m; for (ordinal_type i = 0; i < m; ++i) perm[i] = i; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Internal.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Internal.hpp index 4542d84d083a..b30c0c85c34f 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Internal.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Internal.hpp @@ -49,8 +49,8 @@ template <> struct LU { int r_val = 0; if (m > 0) { - ordinal_type *__restrict__ ipiv = P.data(), *__restrict__ fpiv = ipiv + m, *__restrict__ perm = fpiv + m, - *__restrict__ peri = perm + m; + ordinal_type *KOKKOS_RESTRICT ipiv = P.data(), *KOKKOS_RESTRICT fpiv = ipiv + m, *KOKKOS_RESTRICT perm = fpiv + m, + *KOKKOS_RESTRICT peri = perm + m; Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int &i) { perm[i] = i; fpiv[i] = ipiv[i] - i - 1; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_OnDevice.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_OnDevice.hpp index 42f39eb0f4f3..b9def54f4f61 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_OnDevice.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_OnDevice.hpp @@ -125,8 +125,8 @@ template <> struct LU { int r_val(0); if (m > 0) { - ordinal_type *__restrict__ ipiv = P.data(), *__restrict__ fpiv = ipiv + m, *__restrict__ perm = fpiv + m, - *__restrict__ peri = perm + m; + ordinal_type *KOKKOS_RESTRICT ipiv = P.data(), *KOKKOS_RESTRICT fpiv = ipiv + m, *KOKKOS_RESTRICT perm = fpiv + m, + *KOKKOS_RESTRICT peri = perm + m; Kokkos::RangePolicy range_policy(exec_instance, 0, m); Kokkos::RangePolicy single_policy(exec_instance, 0, 1); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Serial.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Serial.hpp index d498587ce064..b0fa4c8d3885 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Serial.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Serial.hpp @@ -73,10 +73,10 @@ template <> struct LU { TACHO_TEST_FOR_EXCEPTION(int(P.extent(0)) < 4 * m, std::runtime_error, "P should be 4*m."); if (m > 0) { - ordinal_type *__restrict__ ipiv = P.data(); - ordinal_type *__restrict__ fpiv = ipiv + m; - ordinal_type *__restrict__ perm = fpiv + m; - ordinal_type *__restrict__ peri = perm + m; + ordinal_type *KOKKOS_RESTRICT ipiv = P.data(); + ordinal_type *KOKKOS_RESTRICT fpiv = ipiv + m; + ordinal_type *KOKKOS_RESTRICT perm = fpiv + m; + ordinal_type *KOKKOS_RESTRICT peri = perm + m; for (ordinal_type i = 0; i < m; ++i) perm[i] = i; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Serial.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Serial.hpp index d6fb8a868faf..6a29cda72c5a 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Serial.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Serial.hpp @@ -23,8 +23,8 @@ template struct LapackSerial { #if 0 struct Impl { template - static KOKKOS_INLINE_FUNCTION void sytrf_lower(const MemberType &member, const int m, T *__restrict__ A, - const int as0, const int as1, int *__restrict__ ipiv, int *info) { + static KOKKOS_INLINE_FUNCTION void sytrf_lower(const MemberType &member, const int m, T *KOKKOS_RESTRICT A, + const int as0, const int as1, int *KOKKOS_RESTRICT ipiv, int *info) { *info = 0; if (m <= 0) return; @@ -40,9 +40,9 @@ template struct LapackSerial { for (int p = 0; p < m; ++p) { const int iend = m - p - 1; - T *__restrict__ alpha11 = A + (p)*as0 + (p)*as1, - *__restrict__ a21 = A + (p + 1) * as0 + (p) * as1, - *__restrict__ A22 = A + (p + 1) * as0 + (p + 1) * as1; + T *KOKKOS_RESTRICT alpha11 = A + (p)*as0 + (p)*as1, + *KOKKOS_RESTRICT a21 = A + (p + 1) * as0 + (p) * as1, + *KOKKOS_RESTRICT A22 = A + (p + 1) * as0 + (p + 1) * as1; mag_type lambda1(0); int idx(0); @@ -125,7 +125,7 @@ template struct LapackSerial { } template - static KOKKOS_INLINE_FUNCTION void sytrf_lower_nopiv(const MemberType &member, const int m, T *__restrict__ A, + static KOKKOS_INLINE_FUNCTION void sytrf_lower_nopiv(const MemberType &member, const int m, T *KOKKOS_RESTRICT A, const int as0, const int as1, int *info) { *info = 0; if (m <= 0) @@ -135,8 +135,8 @@ template struct LapackSerial { for (int p = 0; p < m; ++p) { const int iend = m - p - 1; - T *__restrict__ alpha11 = A + (p)*as0 + (p)*as1, *__restrict__ a21 = A + (p + 1) * as0 + (p)*as1, - *__restrict__ A22 = A + (p + 1) * as0 + (p + 1) * as1; + T *KOKKOS_RESTRICT alpha11 = A + (p)*as0 + (p)*as1, *KOKKOS_RESTRICT a21 = A + (p + 1) * as0 + (p)*as1, + *KOKKOS_RESTRICT A22 = A + (p + 1) * as0 + (p + 1) * as1; const auto alpha = *alpha11; // arith_traits::real(*alpha11); Kokkos::parallel_for(Kokkos::TeamVectorRange(member, iend), [&](const int &i) { a21[i * as0] /= alpha; }); @@ -155,7 +155,7 @@ template struct LapackSerial { template static KOKKOS_INLINE_FUNCTION void potrf(const MemberType &member, const char uplo, const int m, - /* */ T *__restrict__ A, const int lda, int *info) { + /* */ T *KOKKOS_RESTRICT A, const int lda, int *info) { switch (uplo) { case 'U': case 'u': { @@ -174,9 +174,9 @@ template struct LapackSerial { template static KOKKOS_INLINE_FUNCTION void sytrf(const MemberType &member, const char uplo, const int m, - /* */ T *__restrict__ A, const int lda, - /* */ int *__restrict__ P, - /* */ T *__restrict__ W, int *info) { + /* */ T *KOKKOS_RESTRICT A, const int lda, + /* */ int *KOKKOS_RESTRICT P, + /* */ T *KOKKOS_RESTRICT W, int *info) { switch (uplo) { case 'U': case 'u': { diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp index c54fd8b78848..cde52b82693a 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp @@ -22,7 +22,7 @@ namespace Tacho { template struct LapackTeam { struct Impl { template - static KOKKOS_INLINE_FUNCTION void potrf_upper(const MemberType &member, const int m, T *__restrict__ A, + static KOKKOS_INLINE_FUNCTION void potrf_upper(const MemberType &member, const int m, T *KOKKOS_RESTRICT A, const int as0, const int as1, int *info) { *info = 0; if (m <= 0) @@ -33,8 +33,8 @@ template struct LapackTeam { for (int p = 0; p < m; ++p) { const int jend = m - p - 1; - T *__restrict__ alpha11 = A + (p)*as0 + (p)*as1, *__restrict__ a12t = A + (p)*as0 + (p + 1) * as1, - *__restrict__ A22 = A + (p + 1) * as0 + (p + 1) * as1; + T *KOKKOS_RESTRICT alpha11 = A + (p)*as0 + (p)*as1, *KOKKOS_RESTRICT a12t = A + (p)*as0 + (p + 1) * as1, + *KOKKOS_RESTRICT A22 = A + (p + 1) * as0 + (p + 1) * as1; Kokkos::single(Kokkos::PerTeam(member), [&]() { if (*info == 0 && arith_traits::real(*alpha11) <= zero) { @@ -58,8 +58,8 @@ template struct LapackTeam { } template - static KOKKOS_INLINE_FUNCTION void sytrf_lower(const MemberType &member, const int m, T *__restrict__ A, - const int as0, const int as1, int *__restrict__ ipiv, int *info) { + static KOKKOS_INLINE_FUNCTION void sytrf_lower(const MemberType &member, const int m, T *KOKKOS_RESTRICT A, + const int as0, const int as1, int *KOKKOS_RESTRICT ipiv, int *info) { *info = 0; if (m <= 0) return; @@ -75,9 +75,9 @@ template struct LapackTeam { for (int p = 0; p < m; ++p) { const int iend = m - p - 1; - T *__restrict__ alpha11 = A + (p)*as0 + (p)*as1, - *__restrict__ a21 = A + (p + 1) * as0 + (p) * as1, - *__restrict__ A22 = A + (p + 1) * as0 + (p + 1) * as1; + T *KOKKOS_RESTRICT alpha11 = A + (p)*as0 + (p)*as1, + *KOKKOS_RESTRICT a21 = A + (p + 1) * as0 + (p) * as1, + *KOKKOS_RESTRICT A22 = A + (p + 1) * as0 + (p + 1) * as1; mag_type lambda1(0); int idx(0); @@ -160,7 +160,7 @@ template struct LapackTeam { } template - static KOKKOS_INLINE_FUNCTION void sytrf_lower_nopiv(const MemberType &member, const int m, T *__restrict__ A, + static KOKKOS_INLINE_FUNCTION void sytrf_lower_nopiv(const MemberType &member, const int m, T *KOKKOS_RESTRICT A, const int as0, const int as1, int *info) { *info = 0; if (m <= 0) @@ -170,8 +170,8 @@ template struct LapackTeam { for (int p = 0; p < m; ++p) { const int iend = m - p - 1; - T *__restrict__ alpha11 = A + (p)*as0 + (p)*as1, *__restrict__ a21 = A + (p + 1) * as0 + (p)*as1, - *__restrict__ A22 = A + (p + 1) * as0 + (p + 1) * as1; + T *KOKKOS_RESTRICT alpha11 = A + (p)*as0 + (p)*as1, *KOKKOS_RESTRICT a21 = A + (p + 1) * as0 + (p)*as1, + *KOKKOS_RESTRICT A22 = A + (p + 1) * as0 + (p + 1) * as1; const auto alpha = *alpha11; // arith_traits::real(*alpha11); Kokkos::parallel_for(Kokkos::TeamVectorRange(member, iend), [&](const int &i) { a21[i * as0] /= alpha; }); @@ -190,7 +190,7 @@ template struct LapackTeam { template static KOKKOS_INLINE_FUNCTION void potrf(const MemberType &member, const char uplo, const int m, - /* */ T *__restrict__ A, const int lda, int *info) { + /* */ T *KOKKOS_RESTRICT A, const int lda, int *info) { switch (uplo) { case 'U': case 'u': { @@ -209,9 +209,9 @@ template struct LapackTeam { template static KOKKOS_INLINE_FUNCTION void sytrf(const MemberType &member, const char uplo, const int m, - /* */ T *__restrict__ A, const int lda, - /* */ int *__restrict__ P, - /* */ T *__restrict__ W, int *info) { + /* */ T *KOKKOS_RESTRICT A, const int lda, + /* */ int *KOKKOS_RESTRICT P, + /* */ T *KOKKOS_RESTRICT W, int *info) { switch (uplo) { case 'U': case 'u': { @@ -229,8 +229,8 @@ template struct LapackTeam { } template - static KOKKOS_INLINE_FUNCTION void getrf(const MemberType &member, const int m, const int n, T *__restrict__ A, - const int as1, int *__restrict__ ipiv, int *info) { + static KOKKOS_INLINE_FUNCTION void getrf(const MemberType &member, const int m, const int n, T *KOKKOS_RESTRICT A, + const int as1, int *KOKKOS_RESTRICT ipiv, int *info) { if (m <= 0 || n <= 0) return; @@ -241,12 +241,12 @@ template struct LapackTeam { const int as0 = 1; for (int p = 0; p < m; ++p) { const int iend = m - p - 1, jend = n - p - 1; - T *__restrict__ alpha11 = A + (p)*as0 + (p)*as1, // as0 & as1 are leading dimension for rows & cols - *__restrict__ AB = A + (p) * as0, - *__restrict__ ABR = alpha11, - *__restrict__ a21 = A + (p + 1) * as0 + (p) * as1, - *__restrict__ a12 = A + (p) * as0 + (p + 1) * as1, - *__restrict__ A22 = A + (p + 1) * as0 + (p + 1) * as1; + T *KOKKOS_RESTRICT alpha11 = A + (p)*as0 + (p)*as1, // as0 & as1 are leading dimension for rows & cols + *KOKKOS_RESTRICT AB = A + (p) * as0, + *KOKKOS_RESTRICT ABR = alpha11, + *KOKKOS_RESTRICT a21 = A + (p + 1) * as0 + (p) * as1, + *KOKKOS_RESTRICT a12 = A + (p) * as0 + (p + 1) * as1, + *KOKKOS_RESTRICT A22 = A + (p + 1) * as0 + (p + 1) * as1; { int idx(0); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeChol.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeChol.hpp index d345880bb59a..5e52ceb61595 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeChol.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeChol.hpp @@ -225,8 +225,8 @@ template struct TeamFunctor_FactorizeChol { Kokkos::TeamThreadRange(member, srcsize), [&, srcsize, src, tgt](const ordinal_type &j) { // Value capture is a workaround for cuda + gcc-7.2 compiler bug w/c++14 - const value_type *__restrict__ ss = src + j * srcsize; - /* */ value_type *__restrict__ tt = tgt + j * srcsize; + const value_type *KOKKOS_RESTRICT ss = src + j * srcsize; + /* */ value_type *KOKKOS_RESTRICT tt = tgt + j * srcsize; Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, j + 1), [&](const ordinal_type &i) { Kokkos::atomic_add(&tt[i], ss[i]); }); }); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLDL.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLDL.hpp index 3df7a30ee21a..877cb1b45dba 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLDL.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLDL.hpp @@ -268,8 +268,8 @@ template struct TeamFunctor_FactorizeLDL { Kokkos::TeamThreadRange(member, srcsize), [&, srcsize, src, tgt](const ordinal_type &j) { // Value capture is a workaround for cuda + gcc-7.2 compiler bug w/c++14 - const value_type *__restrict__ ss = src + j * srcsize; - /* */ value_type *__restrict__ tt = tgt + j * srcsize; + const value_type *KOKKOS_RESTRICT ss = src + j * srcsize; + /* */ value_type *KOKKOS_RESTRICT tt = tgt + j * srcsize; Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, j + 1), [&](const ordinal_type &i) { Kokkos::atomic_add(&tt[i], ss[i]); }); }); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp index b04f8bf8ca4a..3ad435b8e853 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp @@ -255,8 +255,8 @@ template struct TeamFunctor_FactorizeLU { Kokkos::TeamThreadRange(member, srcsize), [&, srcsize, src, tgt](const ordinal_type &j) { // Value capture is a workaround for cuda + gcc-7.2 compiler bug w/c++14 - const value_type *__restrict__ ss = src + j * srcsize; - /* */ value_type *__restrict__ tt = tgt + j * srcsize; + const value_type *KOKKOS_RESTRICT ss = src + j * srcsize; + /* */ value_type *KOKKOS_RESTRICT tt = tgt + j * srcsize; Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, srcsize), [&](const ordinal_type &i) { Kokkos::atomic_add(&tt[i], ss[i]); }); }); From 4e273a8847ea815a9d0916128aefc70ec13e994a Mon Sep 17 00:00:00 2001 From: Vladislav Semykin <34096407+ViNN280801@users.noreply.github.com> Date: Mon, 12 Aug 2024 19:52:25 +0300 Subject: [PATCH 19/37] Return Kokkos_View.hpp to the prev version --- packages/kokkos/core/src/Kokkos_View.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/kokkos/core/src/Kokkos_View.hpp b/packages/kokkos/core/src/Kokkos_View.hpp index 2c5ade5cae4f..484a0e6f62e4 100644 --- a/packages/kokkos/core/src/Kokkos_View.hpp +++ b/packages/kokkos/core/src/Kokkos_View.hpp @@ -944,8 +944,8 @@ class View : public ViewTraits { template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && - (2 == rank) && is_default_map && is_layout_right && (rank_dynamic == 0) && std::is_integral::value && std::is_integral::value), + (Kokkos::Impl::always_true::value && // + (2 == rank) && is_default_map && is_layout_right && (rank_dynamic == 0)), reference_type> operator()(I0 i0, I1 i1) const { check_operator_parens_valid_args(i0, i1); @@ -955,8 +955,8 @@ class View : public ViewTraits { template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && - (2 == rank) && is_default_map && is_layout_right && (rank_dynamic != 0) && std::is_integral::value && std::is_integral::value), + (Kokkos::Impl::always_true::value && // + (2 == rank) && is_default_map && is_layout_right && (rank_dynamic != 0)), reference_type> operator()(I0 i0, I1 i1) const { check_operator_parens_valid_args(i0, i1); @@ -1088,7 +1088,7 @@ class View : public ViewTraits { template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::always_true::value && (2 == rank) && - is_default_map && is_layout_right && (rank_dynamic == 0) && std::is_integral::value && std::is_integral::value), + is_default_map && is_layout_right && (rank_dynamic == 0)), reference_type> access(I0 i0, I1 i1, Is... extra) const { check_access_member_function_valid_args(i0, i1, extra...); @@ -1099,7 +1099,7 @@ class View : public ViewTraits { template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::always_true::value && (2 == rank) && - is_default_map && is_layout_right && (rank_dynamic != 0) && std::is_integral::value && std::is_integral::value), + is_default_map && is_layout_right && (rank_dynamic != 0)), reference_type> access(I0 i0, I1 i1, Is... extra) const { check_access_member_function_valid_args(i0, i1, extra...); From 0bcb0d8fbd4f350e90f48a6c268ad3925ca7bb4f Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Mon, 12 Aug 2024 11:43:49 -0600 Subject: [PATCH 20/37] tacho: match macro names at construction/destruction of handles Use TACHO_HAVE_CUSPARSE in place of KOKKOS_ENABLE_CUDA at cusparse handle destruction --- .../shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp index 9b98c80ec399..08d77d163d25 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp @@ -2069,7 +2069,7 @@ class NumericToolsLevelSet : public NumericToolsBase { #endif } } -#if defined(KOKKOS_ENABLE_CUDA) +#if defined(TACHO_HAVE_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) cusparseDestroy(cusparseHandle); cusparseDestroyDnMat(matL); cusparseDestroyDnVec(vecL); From 80f9352007cbd37f886a06be96fbc87819a10318 Mon Sep 17 00:00:00 2001 From: Curtis Ober Date: Mon, 12 Aug 2024 16:34:00 -0600 Subject: [PATCH 21/37] Update COPYRIGHT --- packages/zoltan/COPYRIGHT | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/packages/zoltan/COPYRIGHT b/packages/zoltan/COPYRIGHT index 0dbfc43b8a4b..4e663b5cb739 100644 --- a/packages/zoltan/COPYRIGHT +++ b/packages/zoltan/COPYRIGHT @@ -1,10 +1,9 @@ - ??: description - Copyright (c) 20?? NTESS +Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring + Copyright (c) 2012 NTESS -Copyright 20?? National Technology & Engineering Solutions of Sandia, +Copyright 2012 National Technology & Engineering Solutions of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights in this software. -Copyright the ?? contributors. - +Copyright the Zoltan contributors. From b4014e0b557e7c993841deb2972a09a4c2cb8225 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Aug 2024 22:55:02 +0000 Subject: [PATCH 22/37] Bump github/codeql-action from 3.25.15 to 3.26.0 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.25.15 to 3.26.0. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/afb54ba388a7dca6ecae48f608c4ff05ff4cc77a...eb055d739abdc2e8de2e5f4ba1a8b246daa779aa) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/codeql.yml | 4 ++-- .github/workflows/scorecards.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index dd2945da790d..d6aceb7c147e 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -62,7 +62,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a # v3.25.15 + uses: github/codeql-action/init@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} @@ -85,6 +85,6 @@ jobs: make -j 2 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a # v3.25.15 + uses: github/codeql-action/analyze@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 52b22251f53a..ad742b5ffdff 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -66,6 +66,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a # v3.25.15 + uses: github/codeql-action/upload-sarif@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0 with: sarif_file: results.sarif From a09d908c37189979e5d1fae5a106dbd618132709 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Aug 2024 22:55:04 +0000 Subject: [PATCH 23/37] Bump DoozyX/clang-format-lint-action from 0.17 to 0.18 Bumps [DoozyX/clang-format-lint-action](https://github.com/doozyx/clang-format-lint-action) from 0.17 to 0.18. - [Release notes](https://github.com/doozyx/clang-format-lint-action/releases) - [Commits](https://github.com/doozyx/clang-format-lint-action/compare/11b773b1598aa4ae3b32f023701bca5201c3817d...d7f6a5bada32b7ea520b5918416e92997678e3fd) --- updated-dependencies: - dependency-name: DoozyX/clang-format-lint-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/clang_format.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/clang_format.yml b/.github/workflows/clang_format.yml index fdcc82067cab..12b5dbe74c85 100644 --- a/.github/workflows/clang_format.yml +++ b/.github/workflows/clang_format.yml @@ -12,7 +12,7 @@ jobs: steps: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - - uses: DoozyX/clang-format-lint-action@11b773b1598aa4ae3b32f023701bca5201c3817d # v0.17 + - uses: DoozyX/clang-format-lint-action@d7f6a5bada32b7ea520b5918416e92997678e3fd # v0.18 with: source: './packages/muelu ./packages/tempus ./packages/teko ./packages/xpetra' exclude: './packages/tempus/examples' From 7f166e37adb103056ec458518c7e33fd967d2bb9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Aug 2024 22:55:09 +0000 Subject: [PATCH 24/37] Bump actions/upload-artifact from 4.3.5 to 4.3.6 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.5 to 4.3.6. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/89ef406dd8d7e03cfd12d9e0a4a378f454709029...834a144ee995460fba8ed112a2fc961b36a5ec5a) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/clang_format.yml | 2 +- .github/workflows/scorecards.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/clang_format.yml b/.github/workflows/clang_format.yml index fdcc82067cab..a4d9e0436232 100644 --- a/.github/workflows/clang_format.yml +++ b/.github/workflows/clang_format.yml @@ -22,7 +22,7 @@ jobs: - run: git diff HEAD > format_patch.txt - run: if [ "$(cat format_patch.txt)" == "" ] ; then rm format_patch.txt ; else cat format_patch.txt; fi - - uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 # v4.3.5 + - uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6 id: upload-artf if: ${{ hashFiles('format_patch.txt') != '' }} with: diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 52b22251f53a..37015b8ca616 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -58,7 +58,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 # v4.3.5 + uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6 with: name: SARIF file path: results.sarif From a03c5e6b7db16086a194cd9d64ffda97c3481e7f Mon Sep 17 00:00:00 2001 From: Alan Williams Date: Mon, 12 Aug 2024 20:31:39 -0600 Subject: [PATCH 25/37] STK: Snapshot 08-12-24 20:31 from Sierra 5.21.3-99-g30df4ff9 --- packages/krino/.dont_scrape | 0 packages/krino/CMakeLists.txt | 25 +- packages/krino/SierraCmakeCode.cmake | 1161 +++++++++++++++++ packages/krino/cmake/krinoConfig.cmake | 13 + .../krino/cmake_install_test/load_gcc_modules | 38 +- .../krino/cmake_install_test/run_cmake_krino | 1 + .../Akri_DeleteSmallElementsMain.cpp | 1 - .../krino/geometry/Akri_WindingNumber.cpp | 225 ++++ .../krino/geometry/Akri_WindingNumber.hpp | 70 + .../krino_lib/Akri_AdaptiveElementContour.cpp | 430 ++++++ .../krino_lib/Akri_AdaptiveElementContour.hpp | 31 + .../krino_lib/Akri_AdaptivityHelpers.cpp | 2 +- .../krino/krino_lib/Akri_AuxMetaData.cpp | 8 + .../krino/krino_lib/Akri_AuxMetaData.hpp | 1 + .../krino/krino_lib/Akri_BoundingBoxMesh.cpp | 23 +- .../krino/krino_lib/Akri_BoundingBoxMesh.hpp | 1 + .../krino/krino_lib/Akri_CDFEM_Support.cpp | 7 +- .../krino/krino_lib/Akri_CDFEM_Support.hpp | 4 +- .../krino/krino/krino_lib/Akri_CDMesh.cpp | 2 - .../krino/krino/krino_lib/Akri_CDMesh.hpp | 2 +- .../krino_lib/Akri_CDMesh_Refinement.cpp | 12 +- .../krino_lib/Akri_DetermineElementSign.cpp | 3 +- .../krino_lib/Akri_ElementCutterUtils.cpp | 1 + .../krino/krino_lib/Akri_Fast_Marching.cpp | 260 ++-- .../krino/krino_lib/Akri_Fast_Marching.hpp | 28 +- .../krino/krino/krino_lib/Akri_IC_Alg.hpp | 4 + .../krino/krino/krino_lib/Akri_LevelSet.cpp | 144 +- .../krino/krino/krino_lib/Akri_LevelSet.hpp | 14 +- .../Akri_LevelSetInterfaceGeometry.cpp | 2 +- .../Akri_LevelSetSurfaceInterfaceGeometry.cpp | 2 +- .../Akri_LevelSetSurfaceInterfaceGeometry.hpp | 2 + .../Akri_MasterElementDeterminer.hpp | 2 +- .../krino/krino_lib/Akri_MeshFromFile.cpp | 1 - .../krino_lib/Akri_NodalSurfaceDistance.cpp | 26 + .../krino_lib/Akri_NodalSurfaceDistance.hpp | 19 +- .../krino/krino_lib/Akri_OutputUtils.cpp | 33 +- .../krino/krino/krino_lib/Akri_PhaseTag.hpp | 4 +- .../krino/krino_lib/Akri_PostProcess.cpp | 80 ++ .../krino/krino_lib/Akri_PostProcess.hpp | 32 + .../krino_lib/Akri_RefinementInterface.cpp | 83 +- .../krino_lib/Akri_RefinementInterface.hpp | 14 +- .../krino/krino_lib/Akri_SemiLagrangian.cpp | 477 +++++++ .../krino/krino_lib/Akri_SemiLagrangian.hpp | 89 ++ .../krino/krino_lib/Akri_SharpFeature.cpp | 3 +- packages/krino/krino/krino_lib/Akri_Snap.cpp | 1 + .../Akri_SubElementChildNodeAncestry.cpp | 1 + .../krino/krino/math_utils/Akri_MathUtil.cpp | 29 + .../krino/krino/math_utils/Akri_MathUtil.hpp | 2 + packages/krino/krino/mesh_utils/Akri_Edge.hpp | 2 +- .../krino/mesh_utils/Akri_EntityIdPool.cpp | 17 +- .../krino/mesh_utils/Akri_EntityIdPool.hpp | 1 + .../krino/krino/mesh_utils/Akri_FieldRef.hpp | 5 + .../krino/mesh_utils/Akri_MeshHelpers.cpp | 145 +- .../krino/mesh_utils/Akri_MeshHelpers.hpp | 34 +- .../krino/krino/mesh_utils/Akri_QuadFace.cpp | 74 ++ .../krino/krino/mesh_utils/Akri_QuadFace.hpp | 76 ++ .../mesh_utils/Akri_SideAttachedElements.cpp | 339 +++++ .../mesh_utils/Akri_SideAttachedElements.hpp | 18 + .../krino/parser/Akri_LevelSet_Parser.cpp | 15 + .../krino/krino/parser/Akri_Region_Parser.cpp | 26 + .../krino/refinement/Akri_HexRefiner.cpp | 91 ++ .../krino/refinement/Akri_HexRefiner.hpp | 26 + .../krino/refinement/Akri_NodeRefiner.cpp | 193 ++- .../krino/refinement/Akri_NodeRefiner.hpp | 23 +- .../krino/refinement/Akri_QuadRefiner.cpp | 94 ++ .../krino/refinement/Akri_QuadRefiner.hpp | 26 + .../krino/refinement/Akri_Refinement.cpp | 458 ++++--- .../krino/refinement/Akri_Refinement.hpp | 57 +- .../krino/refinement/Akri_RefinerUtils.hpp | 31 + .../Akri_TransitionElementEdgeMarker.cpp | 427 ++++-- .../Akri_TransitionElementEdgeMarker.hpp | 104 +- .../krino/refinement/Akri_TriRefiner.cpp | 22 +- packages/krino/krino/region/Akri_Region.cpp | 38 +- packages/krino/krino/region/Akri_Region.hpp | 4 + .../krino/krino/region/Akri_Simulation.cpp | 1 + .../krino/krino/region/Akri_Simulation.hpp | 2 + packages/krino/krino/surface/Akri_Facet.cpp | 1 - .../krino/surface/Akri_Faceted_Surface.cpp | 2 - .../krino/surface/Akri_Faceted_Surface.hpp | 6 +- .../Akri_String_Function_Expression.cpp | 8 + .../Akri_String_Function_Expression.hpp | 4 + .../krino/krino/unit_tests/Akri_MeshSpecs.hpp | 162 +++ .../krino/unit_tests/Akri_StkMeshBuilder.cpp | 40 +- .../krino/unit_tests/Akri_StkMeshBuilder.hpp | 3 + .../krino/unit_tests/Akri_StkMeshFixture.hpp | 12 +- .../krino/unit_tests/Akri_UnitMathUtils.cpp | 19 + .../Akri_Unit_DecomposeWithSensitivities.cpp | 147 ++- .../unit_tests/Akri_Unit_FastMarching.cpp | 140 ++ .../unit_tests/Akri_Unit_MeshHelpers.cpp | 1 - .../unit_tests/Akri_Unit_OutputUtils.cpp | 80 ++ .../unit_tests/Akri_Unit_RebalanceUtils.cpp | 5 +- .../unit_tests/Akri_Unit_Refine_Beam.cpp | 65 + .../unit_tests/Akri_Unit_Refine_CDMesh.cpp | 2 +- .../krino/unit_tests/Akri_Unit_Refine_Hex.cpp | 74 ++ .../unit_tests/Akri_Unit_Refine_Quad.cpp | 72 + .../krino/unit_tests/Akri_Unit_Refine_Tet.cpp | 105 +- .../krino/unit_tests/Akri_Unit_Refine_Tri.cpp | 171 ++- .../Akri_Unit_RefinementFixture.hpp | 25 +- .../unit_tests/Akri_Unit_SemiLagrangian.cpp | 107 ++ .../Akri_Unit_SideAttachedElements.cpp | 109 ++ .../Akri_Unit_Single_Element_Fixtures.hpp | 1 - .../unit_tests/Akri_Unit_WindingNumber.cpp | 186 +++ .../krino/krino/unit_tests/Akri_Unit_main.cpp | 2 +- .../krino_mesh_adapt/KrinoMeshAdaptMain.cpp | 1 - .../mesh_adapt_lib/KrinoMeshAdapt.cpp | 2 + packages/krino/tools/trilinos_snapshot.sh | 12 +- packages/percept/src/adapt/Colorer.cpp | 6 +- .../percept/src/adapt/DiscretizeWedge.hpp | 6 +- .../percept/src/adapt/FindValidCentroid.cpp | 4 +- packages/percept/src/adapt/FixSideSets.cpp | 173 ++- packages/percept/src/adapt/FixSideSets.hpp | 6 + packages/percept/src/adapt/IEdgeAdapter.cpp | 2 - .../adapt/IElementBasedAdapterPredicate.hpp | 2 +- .../adapt/PredicateBasedElementAdapter.hpp | 1 + .../src/adapt/PredicateTemplateAdapter.hpp | 1 + packages/percept/src/adapt/Refiner.cpp | 127 +- packages/percept/src/adapt/Refiner.hpp | 6 +- .../RefinerPattern_Tet4_Tet4_HangingNode.hpp | 9 - .../src/adapt/RefinerPattern_Wedge6_Het_N.hpp | 26 +- .../percept/src/adapt/RefinerUnrefine.cpp | 1 - packages/percept/src/adapt/RefinerUtil.cpp | 1 - packages/percept/src/adapt/SDCEntityType.hpp | 10 +- packages/percept/src/adapt/SubDimCell.hpp | 10 +- .../src/adapt/TransitionElementAdapter.hpp | 6 + .../src/adapt/UniformRefinerPattern.cpp | 2 +- .../UniformRefinerPattern_Quad4_Tri3_2.hpp | 2 + ...iformRefinerPattern_Tet4_Tet4_8_sierra.hpp | 2 - packages/percept/src/adapt/main/MeshAdapt.cpp | 13 +- .../src/adapt/main/MeshAdaptMemberVarInit.hpp | 4 +- packages/percept/src/adapt/markers/Marker.cpp | 2 +- .../percept/src/percept/GeometryVerifier.cpp | 4 +- packages/percept/src/percept/Percept.hpp | 4 +- packages/percept/src/percept/PerceptMesh.cpp | 211 +-- packages/percept/src/percept/PerceptMesh.hpp | 3 +- .../src/percept/PerceptMeshReadWrite.hpp | 2 +- .../src/percept/eigen_verify/EigenVerify.cpp | 1 - .../src/percept/fixtures/BeamFixture.cpp | 1 - .../percept/fixtures/HeterogeneousFixture.cpp | 1 - .../src/percept/fixtures/PyramidFixture.cpp | 1 - .../src/percept/fixtures/QuadFixture.hpp | 1 - .../src/percept/fixtures/SingleTetFixture.cpp | 1 - .../src/percept/fixtures/TetWedgeFixture.cpp | 1 - .../percept/fixtures/TriQuadSurfaceMesh3D.cpp | 1 - .../src/percept/mesh/gen/SweepMesher.cpp | 1 - .../kernel/GeometryKernelGregoryPatch.cpp | 1 - .../geometry/kernel/GeometryKernelPGEOM.cpp | 8 +- .../stk_geom/3D/FitGregoryPatches.cpp | 3 - .../GenericAlgorithm_total_element_metric.hpp | 4 +- .../mesh/mod/smoother/MeshSmoother.cpp | 3 - .../mesh_transfer/RotationTranslation.hpp | 45 +- .../src/percept/norm/IntrepidManager.cpp | 4 +- .../src/percept/norm/IntrepidManager.hpp | 1 - .../percept/stk_rebalance/ZoltanPartition.cpp | 20 +- .../src/percept/uq/main/RFRealizeMain.cpp | 1 - .../src/percept/uq/main/RFSuiteMain.cpp | 1 - .../src/percept/util/GeneralFunction.hpp | 10 +- packages/stk/CHANGELOG.md | 20 + packages/stk/CMakeLists.txt | 45 +- packages/stk/cmake/STK_Trilinos_config.h.in | 6 +- packages/stk/cmake/stkConfig.cmake | 10 + packages/stk/cmake/stkLapackGeneric.cmake | 2 + packages/stk/cmake/stkLapackSierra.cmake | 1 + .../stk_balance/stk_balance/CMakeLists.txt | 23 +- .../stk_balance/internal/Diagnostics.hpp | 2 +- .../stk_balance/internal/OutputMesh.cpp | 1 - .../internal/OutputSerializerBulkData.cpp | 14 +- .../stk_balance/stk_balance/io/BalanceIO.cpp | 3 - .../stk_balance/m2n/M2NOutputMesh.cpp | 1 - .../m2n/M2NOutputSerializerBulkData.cpp | 14 +- .../stk_balance/m2n/m2nRebalance.cpp | 1 - .../stk_balance/setup/LifeCycle.cpp | 1 - packages/stk/stk_coupling/Jamfile | 3 +- .../stk_coupling/stk_coupling/CMakeLists.txt | 2 +- .../stk_balance/howToFixPMR1Violation.cpp | 1 - .../stk_balance/howToUseStkBalance.cpp | 4 +- .../stk_expreval/BasicHostEvaluation.cpp | 2 + .../stk/stk_doc_tests/stk_io/QueryExoVars.cpp | 6 +- .../stk_io/RenamedInputFields.cpp | 3 +- .../addFileContentsToOutputDatabase.cpp | 2 - .../stk_doc_tests/stk_io/appendResults.cpp | 4 +- .../stk_io/handleMissingFieldOnRead.cpp | 2 - .../stk_io/handleMissingFieldOnReadThrow.cpp | 2 - .../howToCreateAndWriteNodesetOrSideset.cpp | 4 +- .../stk_io/howToCreateAssemblies.cpp | 3 - .../stk_doc_tests/stk_io/howToReadWriteQa.cpp | 4 +- .../stk_io/howToUseTextMeshWithStkIO.cpp | 2 - .../stk_doc_tests/stk_io/howToWriteMesh.cpp | 9 +- .../stk_io/howToWriteMeshWithEdges.cpp | 5 +- .../stk_io/howToWriteMeshWithFaces.cpp | 3 - .../howToWriteMeshWithInternalSidesets.cpp | 3 +- .../stk_io/howToWriteRestartWithEdges.cpp | 2 - .../stk_io/howToWriteRestartWithFaces.cpp | 2 - .../stk_io/interpolateFieldCyclic.cpp | 2 - .../stk_io/interpolateFieldNegativeTime.cpp | 2 - .../interpolateFieldNonMonotonicTime.cpp | 2 - .../stk_io/interpolateIntegerFieldInvalid.cpp | 1 - .../stk_io/interpolateNodalField.cpp | 2 - .../stk_io/interpolateOutsideRange.cpp | 2 - .../stk_io/interpolateSingleStep.cpp | 2 - .../stk_doc_tests/stk_io/readAttributes.cpp | 3 +- .../stk_io/readInitialCondition.cpp | 2 - .../readInitialConditionMultiSubset.cpp | 1 - .../readInitialConditionNodalSubset.cpp | 1 - .../stk_io/readInitialConditionOnce.cpp | 2 - .../readInitialConditionSpecifiedTime.cpp | 2 - .../stk_io/readInitialConditionSubset.cpp | 1 - .../readInitialConditionTwoFieldSubset.cpp | 1 - .../stk/stk_doc_tests/stk_io/readMesh.cpp | 2 - .../stk_io/readMeshDelayFieldAllocation.cpp | 2 - .../stk_doc_tests/stk_io/replaceBulkData.cpp | 2 - .../stk_io/requestedResultsFieldName.cpp | 2 - .../stk_io/restartInterpolatedField.cpp | 3 - .../stk_doc_tests/stk_io/restartTestUtils.hpp | 1 - .../setOptionToNotCollapseSequencedFields.cpp | 6 +- .../stk_io/singleStepOnRestart.cpp | 1 - .../stk_io/subsettingOutputDB.cpp | 1 - .../stk_io/useNodesetDbVarForNodalField.cpp | 1 - .../stk_doc_tests/stk_io/usingHeartbeat.cpp | 1 - .../usingHeartbeatCSVChangePrecision.cpp | 1 - .../usingHeartbeatOverrideSeparator.cpp | 1 - .../stk_io/usingHeartbeatSpyhisFormat.cpp | 1 - .../stk/stk_doc_tests/stk_io/usingHistory.cpp | 6 - .../stk/stk_doc_tests/stk_io/usingResults.cpp | 1 - .../stk/stk_doc_tests/stk_io/writeResults.cpp | 2 - .../stk_io/writeResultsAndRestart.cpp | 3 - .../writingAndReadingGlobalParameters.cpp | 2 - .../writingAndReadingGlobalParametersAuto.cpp | 2 - .../writingAndReadingGlobalVariables.cpp | 2 - .../stk_io/writingMultipleOutputFiles.cpp | 1 - .../stk_mesh/CreateFacesHexesShells.cpp | 3 - .../stk_mesh/CreateFacesLayeredShellsHex.cpp | 1 - .../stk_mesh/IOSidesetFaceCreation.cpp | 4 +- .../stk_mesh/UnitTestCommMeshCounts.cpp | 6 +- .../stk_mesh/UnitTestGhostParts.cpp | 3 - .../stk_mesh/changeEntityOwner.cpp | 1 - .../stk_mesh/changeEntityParts.cpp | 1 - .../stk_mesh/communicateFieldData.cpp | 2 +- .../stk_mesh/createFacesEdgesHex.cpp | 5 - .../stk_doc_tests/stk_mesh/createFacesHex.cpp | 1 - .../stk_mesh/createSelectedFaces.cpp | 1 - .../stk_mesh/createSharedNodes.cpp | 3 - .../stk_doc_tests/stk_mesh/createStkMesh.cpp | 1 - .../stk_mesh/createStkMeshAlt1.cpp | 3 - .../stk_doc_tests/stk_mesh/customGhosting.cpp | 2 - .../stk_doc_tests/stk_mesh/entityState.cpp | 6 - .../stk_mesh/generateNewEntities.cpp | 1 - .../stk_mesh/howToDestroyElementsInList.cpp | 1 - .../howToDestroyElementsOfTopology.cpp | 1 - .../stk_mesh/howToEnableMeshDiagnostics.cpp | 1 - .../stk_doc_tests/stk_mesh/howToGetFields.cpp | 1 - .../stk_mesh/howToIterateConnectivity.cpp | 3 - .../stk_mesh/howToIterateEntities.cpp | 2 - .../stk/stk_doc_tests/stk_mesh/howToNgp.cpp | 29 +- .../stk_mesh/howToNgpMultistateFields.cpp | 3 - .../stk_doc_tests/stk_mesh/howToSkinMesh.cpp | 3 - .../stk_mesh/howToSortEntities.cpp | 2 +- .../stk_doc_tests/stk_mesh/howToUseAura.cpp | 1 - .../stk_mesh/howToUseEquivalent.cpp | 2 +- .../stk_mesh/howToUseGenerateNewIds.cpp | 4 +- .../stk_mesh/howToUseNgpFieldAsyncCopy.cpp | 1 - .../stk_mesh/howToUseSelectors.cpp | 5 +- .../stk_mesh/howToVisitEdgeNodes.cpp | 84 ++ .../stk_mesh/setAndGetTopology.cpp | 1 - .../stk_mesh/useAdvancedFields.cpp | 1 - .../stk_doc_tests/stk_mesh/useFieldBLAS.cpp | 3 +- .../stk_mesh/useMultistateFields.cpp | 1 - .../stk_mesh/useSimpleFields.cpp | 3 +- .../howToNgpSearchElemNodeNeighbors.cpp | 38 +- .../stk_search/howToUseCoarseSearch.cpp | 3 +- .../stk_search/howToUseFilterCoarseSearch.cpp | 1 - .../stk_transfer/howToUseCopyTransfer.cpp | 2 - .../howToUseLeastSquaresInterpolation.cpp | 5 - .../stk/stk_doc_tests/stk_util/TimerHowTo.cpp | 6 +- .../stk_util/TimerHowToParallel.cpp | 2 +- .../stk_emend/independent_set/CMakeLists.txt | 1 + packages/stk/stk_expreval/Jamfile | 3 +- .../stk_expreval/stk_expreval/CMakeLists.txt | 3 +- .../stk/stk_expreval/stk_expreval/Eval.cpp | 1 + .../stk_expreval/stk_expreval/Function.cpp | 1 + .../stk_expreval/stk_expreval/Function.hpp | 15 + .../stk/stk_expreval/stk_expreval/NgpNode.hpp | 7 + packages/stk/stk_integration_tests/Jamfile | 3 +- .../build_stk_standalone_serial_using_cmake | 2 +- .../cmake_install_test/build_stk_using_cmake | 7 + .../load_aue_serial_modules_no_boost | 6 + .../cmake_install_test/load_gcc_modules | 18 +- .../cmake_install_test/run_cmake_stk | 4 +- .../run_cmake_stk_standalone_serial | 5 +- .../cmake_install_test/spack.cuda.yaml | 156 +++ .../cmake_install_test/spack.gcc.yaml | 156 +++ .../stk_spack_build_test_cuda.sh | 132 ++ .../stk_spack_build_test_gcc.sh | 124 ++ .../stk_test_app/src/test_stk_io.cpp | 1 - .../mock_apps/MockMeshUtils.hpp | 1 - .../mock_apps/mock_aria.cpp | 2 - .../mock_apps/mock_fuego.cpp | 2 - .../IntegrationTestAttributeOrdering.cpp | 2 +- .../IntegrationTestBalanceNodes.cpp | 8 +- .../IntegrationTestBasicLoadBalance.cpp | 2 +- .../IntegrationTestCoincidentElems.cpp | 4 +- .../IntegrationTestIncrementalRebalance.cpp | 2 +- .../IntegrationTestLoadBalance.cpp | 25 +- .../IntegrationTestLoadBalanceActiveOnly.cpp | 4 +- ...egrationTestLoadBalanceEmptyMeshOnProc.cpp | 2 +- ...IntegrationTestLoadBalanceMultiPhysics.cpp | 2 +- ...grationTestLoadBalanceMultipleCriteria.cpp | 2 +- .../IntegrationTestLoadBalanceParentChild.cpp | 1 - .../IntegrationTestLoadBalanceParticles.cpp | 12 +- .../IntegrationTestSpiderElements.cpp | 4 +- .../IntegrationTestTransientFields.cpp | 42 +- .../IntegrationTestUserSupport.cpp | 8 +- .../CheckSupportedInternalSidesetCases.cpp | 2 +- .../stk_io/IntegrationTestStkIo.cpp | 4 +- .../stk_io/WriteSidesetsUsingMetaData.cpp | 2 +- .../IntegrationTestDetectOrphanNodes.cpp | 4 +- .../IntegrationTestElementBlockMembership.cpp | 4 +- .../stk_mesh/IntegrationTestTopology.cpp | 4 +- .../IntegrationTestCheckExposedBoundary.cpp | 4 +- .../IntegrationTestSkinAllBoundaries.cpp | 2 +- .../IntegrationTestSkinWithModifications.cpp | 4 +- .../IntegrationTestTicket13009.cpp | 2 +- .../IntegrationTestTicket13227.cpp | 2 +- .../stk_mesh_doc/IntegrationTestBulkData.cpp | 8 +- .../stk_middle_mesh/MeshQualityImprover.cpp | 4 +- .../stk_middle_mesh/NonConformalInterface.cpp | 4 +- .../AperiCMC_NeighborSearchTest.cpp | 476 +++++++ .../stk_search/CMakeLists.txt | 4 - .../stk_search/UnitTestNaluPerformance.cpp | 51 +- .../stk_transfer/IntgTestCopyTransfer.cpp | 6 +- .../test_utils/OptionsForTesting.hpp | 28 +- packages/stk/stk_io/Jamfile | 9 +- .../stk_io/example/io_lowlevel_example.cpp | 1 - packages/stk/stk_io/stk_io/CMakeLists.txt | 2 +- packages/stk/stk_io/stk_io/IossBridge.cpp | 196 +-- packages/stk/stk_io/stk_io/IossBridge.hpp | 3 +- packages/stk/stk_io/stk_io/OutputFile.hpp | 1 + .../stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp | 39 +- .../stk/stk_io/stk_io/SidesetTranslator.hpp | 4 +- .../stk/stk_io/stk_io/StkMeshIoBroker.cpp | 85 +- .../stk/stk_io/stk_io/StkMeshIoBroker.hpp | 31 +- packages/stk/stk_io/stk_io/WriteMesh.cpp | 40 + packages/stk/stk_io/stk_io/WriteMesh.hpp | 11 + .../stk/stk_io/stk_io/util/CMakeLists.txt | 2 +- .../stk_io/util/Gmesh_STKmesh_Fixture.cpp | 10 +- .../stk_io/util/Gmesh_STKmesh_Fixture.hpp | 10 +- packages/stk/stk_math/Jamfile | 3 +- packages/stk/stk_math/stk_math/CMakeLists.txt | 3 +- .../stk/stk_math/stk_math/SideGeometry.cpp | 40 - .../stk/stk_math/stk_math/SideGeometry.hpp | 29 +- packages/stk/stk_mesh/Jamfile | 5 +- .../stk_mesh/base/BoundaryAnalysis.cpp | 3 +- .../stk/stk_mesh/stk_mesh/base/Bucket.cpp | 377 +++--- .../stk/stk_mesh/stk_mesh/base/Bucket.hpp | 28 +- .../stk_mesh/base/BucketConnectivity.hpp | 833 +----------- .../stk/stk_mesh/stk_mesh/base/BulkData.cpp | 744 ++--------- .../stk/stk_mesh/stk_mesh/base/BulkData.hpp | 195 +-- .../stk_mesh/base/BulkModification.cpp | 3 +- .../stk/stk_mesh/stk_mesh/base/CMakeLists.txt | 10 +- .../stk_mesh/base/CoordinateSystems.hpp | 401 +----- .../stk_mesh/stk_mesh/base/CreateEdges.cpp | 6 +- .../stk_mesh/stk_mesh/base/CreateFaces.cpp | 5 +- .../stk_mesh/base/DestroyRelations.cpp | 93 ++ .../stk_mesh/base/DestroyRelations.hpp | 51 + .../stk_mesh/stk_mesh/base/DeviceField.hpp | 26 +- .../stk/stk_mesh/stk_mesh/base/DeviceMesh.cpp | 8 +- .../stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp | 41 +- .../stk_mesh/stk_mesh/base/DumpMeshInfo.hpp | 2 +- .../stk/stk_mesh/stk_mesh/base/Entity.hpp | 2 + .../stk_mesh/base/EntityCommDatabase.cpp | 2 +- .../stk_mesh/base/EntityCommDatabase.hpp | 2 +- .../stk/stk_mesh/stk_mesh/base/EntityLess.hpp | 88 ++ .../stk_mesh/base/EntityParallelState.hpp | 3 - .../stk_mesh/base/EntitySorterBase.hpp | 2 + .../stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp | 29 +- .../stk/stk_mesh/stk_mesh/base/FEMHelpers.hpp | 6 +- packages/stk/stk_mesh/stk_mesh/base/Field.hpp | 1 - .../stk/stk_mesh/stk_mesh/base/FieldBLAS.hpp | 5 +- .../stk/stk_mesh/stk_mesh/base/FieldBase.cpp | 52 +- .../stk/stk_mesh/stk_mesh/base/FieldBase.hpp | 42 - .../stk_mesh/stk_mesh/base/FieldTraits.hpp | 98 +- .../stk_mesh/base/FindPermutation.cpp | 82 ++ .../stk_mesh/base/FindPermutation.hpp | 61 + .../stk_mesh/stk_mesh/base/GetEntities.cpp | 3 +- .../stk/stk_mesh/stk_mesh/base/HostField.hpp | 24 - .../stk/stk_mesh/stk_mesh/base/HostMesh.hpp | 28 - .../stk_mesh/base/LegacyFieldTraits.hpp | 136 -- .../stk/stk_mesh/stk_mesh/base/MetaData.cpp | 13 +- .../stk/stk_mesh/stk_mesh/base/MetaData.hpp | 375 +----- .../stk_mesh/stk_mesh/base/NgpFieldBLAS.hpp | 160 +-- .../stk_mesh/stk_mesh/base/NgpFieldBase.hpp | 3 - .../stk_mesh/base/NgpForEachEntity.hpp | 151 +-- .../stk_mesh/stk_mesh/base/PolarityUtil.hpp | 2 +- .../stk/stk_mesh/stk_mesh/base/Relation.cpp | 34 +- .../stk/stk_mesh/stk_mesh/base/Relation.hpp | 41 +- .../stk_mesh/stk_mesh/base/SideSetUtil.cpp | 1 + .../stk/stk_mesh/stk_mesh/base/SkinMesh.cpp | 3 +- .../stk_mesh/base/TopologyDimensions.hpp | 72 +- packages/stk/stk_mesh/stk_mesh/base/Types.hpp | 4 +- .../stk_mesh/baseImpl/AuraGhosting.cpp | 12 +- .../stk_mesh/baseImpl/BucketConnDynamic.hpp | 509 ++++++++ .../stk_mesh/baseImpl/BucketRepository.cpp | 11 +- .../stk_mesh/baseImpl/ConnectEdgesImpl.cpp | 6 +- .../stk_mesh/baseImpl/DeletedEntityCache.hpp | 1 + .../stk_mesh/baseImpl/ElemDeathImpl.cpp | 179 +++ .../stk_mesh/baseImpl/ElemDeathImpl.hpp | 62 + .../stk_mesh/baseImpl/FieldRepository.cpp | 42 +- .../baseImpl/GlobalIdEntitySorter.cpp | 62 + .../baseImpl/GlobalIdEntitySorter.hpp | 65 + .../stk_mesh/baseImpl/MeshCommImplUtils.cpp | 1 + .../stk_mesh/baseImpl/MeshCommVerify.cpp | 66 + .../stk_mesh/baseImpl/MeshCommVerify.hpp | 14 + .../stk_mesh/baseImpl/MeshImplUtils.cpp | 125 +- .../stk_mesh/baseImpl/MeshImplUtils.hpp | 119 +- .../stk_mesh/baseImpl/MeshModification.cpp | 286 +++- .../stk_mesh/baseImpl/MeshModification.hpp | 5 +- .../stk_mesh/baseImpl/NgpFieldBLASImpl.hpp | 314 +++-- .../stk_mesh/stk_mesh/baseImpl/Partition.cpp | 4 +- .../stk_mesh/baseImpl/SideSetPartImpl.cpp | 86 ++ .../stk_mesh/baseImpl/SideSetPartImpl.hpp | 56 + .../stk_mesh/baseImpl/SideSetUtilImpl.hpp | 2 +- .../stk_mesh/stk_mesh/baseImpl/Visitors.hpp | 1 + packages/stk/stk_middle_mesh/Jamfile | 6 +- .../stk_middle_mesh/CMakeLists.txt | 5 +- .../stk_middle_mesh/mesh_agglomerator.cpp | 2 - packages/stk/stk_middle_mesh_util/Jamfile | 3 +- .../stk_middle_mesh_util/CMakeLists.txt | 2 +- .../stk_middle_mesh_util/create_stk_mesh.hpp | 1 - .../stk_middle_mesh_util/exodus_writer.hpp | 1 - .../stk_middle_mesh_util/stk_interface.hpp | 1 - packages/stk/stk_ngp_test/Jamfile | 3 +- .../stk_ngp_test/stk_ngp_test/CMakeLists.txt | 2 +- .../stk_ngp_test/stk_ngp_test/ngp_test.hpp | 5 +- packages/stk/stk_performance_tests/Jamfile | 3 +- .../stk_balance/balanceHexesEdgesNodes.cpp | 4 +- .../stk_io/perfMeshRead.cpp | 6 +- .../stk_mesh/ChangeEntityPartPerfTest.cpp | 24 +- .../stk_mesh/CommunicateFieldData.cpp | 11 +- .../stk_mesh/GatherGears.cpp | 4 +- .../stk_mesh/GearsSkinning.cpp | 18 +- .../ManyBlocksSidesetsPerformance.cpp | 34 +- .../stk_mesh/MeshOperations.cpp | 3 +- .../stk_mesh/NgpFieldAccess.cpp | 10 +- .../stk_mesh/NgpFieldAsync.cpp | 70 +- .../stk_mesh/NgpFieldUpdate.cpp | 28 +- .../stk_mesh/NgpMeshUpdate.cpp | 14 +- .../stk_mesh/NodalFieldPerf.cpp | 10 +- .../stk_mesh/ParallelSum.cpp | 1 - .../stk_mesh/Selector.cpp | 4 +- .../stk_mesh/SkinningLargeCube.cpp | 5 +- .../stk_mesh/TetSTKfaces.cpp | 4 +- .../stk_mesh/entity_sorting/entitySorting.cpp | 7 +- .../stk_mesh/perfCommNeighbors.cpp | 12 +- .../stk_mesh/perfCreateFaces.cpp | 18 +- .../stk_mesh/perfDeleteElementTopology.cpp | 10 +- .../stk_mesh/perfElemGraph.cpp | 28 +- .../stk_mesh/perfExposedBlockBoundary.cpp | 19 +- .../stk_mesh/perfParts.cpp | 6 +- .../stk_mesh/perfSidesetPolarity.cpp | 8 +- .../stk_mesh/perfSkinMesh.cpp | 19 +- .../stk_mesh/perfStressEntityKeyMapping.cpp | 2 +- .../perfMiddleMeshConstruction.cpp | 4 +- .../perfMiddleMeshEntityOps.cpp | 10 +- .../perfMiddleMeshQualityImprover.cpp | 18 +- .../stk_search/SurfaceToSurface.cpp | 198 ++- .../stk_search/VolumeToOne.cpp | 76 +- .../stk_search/VolumeToSurface.cpp | 25 +- .../stk_search/VolumeToVolume.cpp | 8 +- .../stk_util/perfParallelExchange.cpp | 6 +- packages/stk/stk_search/Jamfile | 3 +- .../stk/stk_search/stk_search/BoxIdent.hpp | 3 + .../stk/stk_search/stk_search/CMakeLists.txt | 7 +- .../stk_search/stk_search/CoarseSearch.hpp | 24 +- .../stk_search/CommonSearchUtil.hpp | 113 ++ .../stk_search/stk_search/DeviceMPIUtils.hpp | 338 +++++ .../stk_search/FilterCoarseSearch.hpp | 18 +- .../stk_search/stk_search/HelperTraits.hpp | 187 +++ .../stk_search/LocalCoarseSearch.hpp | 13 +- .../stk_search/arborx/CoarseSearchArborX.hpp | 27 +- .../arborx/LocalCoarseSearchArborX.hpp | 137 +- .../morton_lbvh/CoarseSearchMortonLBVH.hpp | 342 +++-- .../LocalCoarseSearchMortonLBVH.hpp | 276 ++-- .../MortonLBVH_ParallelConsistencyUtils.hpp | 225 +++- .../morton_lbvh/MortonLBVH_Search.hpp | 111 +- .../MortonLBVH_TreeManipulationUtils.hpp | 175 +-- packages/stk/stk_search_util/Jamfile | 3 +- .../stk_search_util/CMakeLists.txt | 2 +- packages/stk/stk_simd/Jamfile | 3 +- packages/stk/stk_simd/stk_simd/CMakeLists.txt | 2 + packages/stk/stk_tools/Jamfile | 3 +- .../stk/stk_tools/stk_tools/CMakeLists.txt | 8 +- .../block_extractor/ExtractBlocks.cpp | 5 - .../stk_tools/mesh_clone/MeshClone.cpp | 9 +- .../stk_tools/mesh_clone/MeshClone.hpp | 4 +- .../stk_tools/mesh_tools/DisconnectBlocks.cpp | 36 +- .../stk_tools/mesh_tools/DisconnectBlocks.hpp | 25 +- .../mesh_tools/DisconnectBlocksImpl.cpp | 45 +- .../mesh_tools/DisconnectBlocksImpl.hpp | 12 +- .../pmesh_lib/UnitTest/UnitTestPmesh.cpp | 1 - .../stk_tools/pmesh_lib/makeparfiles.cpp | 30 +- .../stk_topology/stk_topology/CMakeLists.txt | 2 +- packages/stk/stk_transfer/Jamfile | 3 +- .../stk_transfer/stk_transfer/CMakeLists.txt | 1 + packages/stk/stk_transfer_util/Jamfile | 3 +- .../stk_transfer_util/CMakeLists.txt | 15 +- .../stk_transfer_util/Patch.hpp | 1 + .../stk_unit_test_utils/BuildMesh.hpp | 2 - .../stk_unit_test_utils/BulkDataTester.hpp | 5 +- .../stk_unit_test_utils/CMakeLists.txt | 15 +- .../stk_unit_test_utils/ConstructedMesh.cpp | 9 +- .../stk_unit_test_utils/ConstructedMesh.hpp | 87 +- .../ElemGraphMultipleSharedSidesUtils.hpp | 34 +- .../ElemGraphTestUtils.hpp | 3 +- .../FaceCreationTestUtils.hpp | 3 +- .../stk_unit_test_utils/FaceTestingUtils.cpp | 26 +- .../stk_unit_test_utils/FaceTestingUtils.hpp | 32 + .../GenerateALefRAMesh.cpp | 420 +++--- .../GenerateALefRAMesh.hpp | 11 + .../GeneratedMeshToFile.cpp | 9 +- .../GeneratedMeshToFile.hpp | 8 +- .../stk_unit_test_utils/GetMeshSpec.cpp | 70 + .../stk_unit_test_utils/GetMeshSpec.hpp | 36 +- .../stk_unit_test_utils/MeshFileFixture.hpp | 7 +- .../stk_unit_test_utils/MeshFixture.hpp | 70 +- .../MeshUtilsForBoundingVolumes.cpp | 383 ++++++ .../MeshUtilsForBoundingVolumes.hpp | 376 +----- .../ParallelGtestOutput.cpp | 10 +- .../ParallelGtestOutput.hpp | 3 + .../stk_unit_test_utils/ParticleUtils.hpp | 5 +- .../stk_unit_test_utils/PerformanceTester.cpp | 56 + .../stk_unit_test_utils/PerformanceTester.hpp | 106 +- .../stk_unit_test_utils/PrintType.hpp | 2 + .../ReadWriteSidesetTester.cpp | 23 +- .../ReadWriteSidesetTester.hpp | 28 +- .../Search_UnitTestUtils.cpp | 61 + .../Search_UnitTestUtils.hpp | 79 +- .../StkBalanceUnitTestSettings.hpp | 3 +- .../StkMeshFromGeneratedMesh.hpp | 4 +- .../StkReportRedirector.hpp | 5 +- .../stk_unit_test_utils/TextMesh.cpp | 94 +- .../stk_unit_test_utils/TextMesh.hpp | 4 + .../stk_unit_test_utils/TextMeshFixture.cpp | 25 +- .../stk_unit_test_utils/TextMeshFixture.hpp | 6 +- .../TextMeshStkTopologyMapping.hpp | 6 +- .../stk_unit_test_utils/TextMeshToFile.cpp | 1 - .../stk_unit_test_utils/TextMeshToFile.hpp | 3 +- .../stk_unit_test_utils/exampleMeshes.cpp | 25 + .../stk_unit_test_utils/exampleMeshes.h | 17 +- .../stk_unit_test_utils/getOption.cpp | 21 + .../stk_unit_test_utils/getOption.h | 14 +- .../stk_unit_test_utils/ioUtils.cpp | 67 +- .../stk_unit_test_utils/ioUtils.hpp | 15 +- .../meshCreationHelpers.cpp | 8 +- .../meshCreationHelpers.hpp | 3 + .../stk_mesh_fixtures/BoxFixture.cpp | 8 +- .../stk_mesh_fixtures/BoxFixture.hpp | 5 +- .../stk_mesh_fixtures/CMakeLists.txt | 2 +- .../stk_mesh_fixtures/FixtureNodeSharing.hpp | 2 + .../stk_mesh_fixtures/Gear.hpp | 9 +- .../stk_mesh_fixtures/GearsFixture.cpp | 62 +- .../stk_mesh_fixtures/GearsFixture.hpp | 34 +- .../stk_mesh_fixtures/GridFixture.cpp | 10 +- .../stk_mesh_fixtures/GridFixture.hpp | 3 +- .../stk_mesh_fixtures/Hex20Fixture.cpp | 27 +- .../stk_mesh_fixtures/Hex20Fixture.hpp | 9 +- .../stk_mesh_fixtures/Hex27Fixture.cpp | 27 +- .../stk_mesh_fixtures/Hex27Fixture.hpp | 6 +- .../stk_mesh_fixtures/HexFixture.cpp | 42 +- .../stk_mesh_fixtures/HexFixture.hpp | 12 +- .../stk_mesh_fixtures/PyramidFixture.cpp | 27 +- .../stk_mesh_fixtures/PyramidFixture.hpp | 10 +- .../stk_mesh_fixtures/QuadFixture.cpp | 150 +-- .../stk_mesh_fixtures/QuadFixture.hpp | 17 +- .../stk_mesh_fixtures/QuadShellFixture.cpp | 33 +- .../stk_mesh_fixtures/QuadShellFixture.hpp | 7 +- .../stk_mesh_fixtures/RingFixture.cpp | 10 +- .../stk_mesh_fixtures/RingFixture.hpp | 3 +- .../stk_mesh_fixtures/SelectorFixture.cpp | 28 +- .../stk_mesh_fixtures/SelectorFixture.hpp | 10 +- .../stk_mesh_fixtures/TestHexFixture.hpp | 24 +- .../stk_mesh_fixtures/Tet10Fixture.cpp | 27 +- .../stk_mesh_fixtures/Tet10Fixture.hpp | 9 +- .../stk_mesh_fixtures/TetFixture.cpp | 27 +- .../stk_mesh_fixtures/TetFixture.hpp | 18 +- .../stk_mesh_fixtures/TriFixture.cpp | 21 +- .../stk_mesh_fixtures/TriFixture.hpp | 12 +- .../stk_mesh_fixtures/WedgeFixture.cpp | 27 +- .../stk_mesh_fixtures/WedgeFixture.hpp | 11 +- .../stk_mesh_fixtures/degenerate_mesh.cpp | 2 +- .../stk_mesh_fixtures/degenerate_mesh.hpp | 4 +- .../stk_mesh_fixtures/heterogeneous_mesh.cpp | 4 +- .../stk_mesh_fixtures/heterogeneous_mesh.hpp | 4 +- .../stk_transfer_fixtures/CMakeLists.txt | 20 +- .../stringAndNumberComparisons.cpp | 5 + .../stringAndNumberComparisons.hpp | 9 +- .../stk_unit_test_utils/timer.hpp | 16 +- .../stk_unit_test_utils/unittestMeshUtils.hpp | 4 + .../stk_balance/MeshFixtureDecomposer.hpp | 2 +- .../stk_balance/MeshFixtureM2NDecomposer.hpp | 2 +- .../stk_balance/MeshFixtureM2NRebalance.hpp | 18 +- .../stk_balance/MeshFixtureRebalance.hpp | 18 +- .../stk_balance/UnitTestBalanceFromField.cpp | 2 +- .../stk_balance/UnitTestBalanceNodes.cpp | 2 +- .../stk_balance/UnitTestBlockWeights.cpp | 2 +- .../stk_balance/UnitTestBoundingBoxSearch.cpp | 64 +- .../UnitTestBoundingBoxSearch2D.cpp | 58 +- .../stk_balance/UnitTestColoring.cpp | 14 +- .../UnitTestCommandLineParsing.cpp | 2 +- .../UnitTestCrossProcessorEdge.cpp | 2 +- .../UnitTestDiagnosticsComputation.cpp | 16 +- .../UnitTestElementConnectivity.cpp | 16 +- .../stk_balance/UnitTestFileNames.cpp | 2 +- .../UnitTestGeometricMethodsWithSelector.cpp | 2 +- .../UnitTestLastStepFieldWriter.cpp | 2 +- .../stk_balance/UnitTestLearningZoltan2.cpp | 2 +- .../stk_balance/UnitTestLifeCycle.cpp | 12 +- .../stk_balance/UnitTestLogFile.cpp | 4 +- .../UnitTestM2NCommandLineParsing.cpp | 2 +- .../stk_balance/UnitTestM2NLogFile.cpp | 2 +- .../stk_balance/UnitTestMechanismBuster.cpp | 10 +- .../stk_balance/UnitTestSearchTolerance.cpp | 8 +- .../UnitTestSettingVertexWeights.cpp | 2 +- .../stk_balance/UnitTestSpiderElements.cpp | 4 +- .../stk_balance/UnitTestSpiderMeshSetup.hpp | 15 +- .../UnitTestStkBalanceDecomposition.cpp | 12 +- .../UnitTestStkBalancePartitioning.cpp | 30 +- .../UnitTestTransientFieldTransferById.cpp | 2 +- .../UnitTestZoltanGraphGeneration.cpp | 18 +- .../stk_expreval/UnitTestEvaluator.cpp | 23 + .../stk/stk_unit_tests/stk_io/Assembly.hpp | 1 - .../stk_unit_tests/stk_io/IOMeshFixture.hpp | 2 +- .../stk_io/UnitTestAccessCommSet.cpp | 1 - .../stk_io/UnitTestAttributes.cpp | 4 +- .../stk_io/UnitTestCustomMeshBuilder.cpp | 6 - .../stk_io/UnitTestFieldNames.cpp | 3 - .../stk_io/UnitTestFieldTypes.cpp | 16 +- .../stk_io/UnitTestGlobalVariables.cpp | 12 - .../stk_io/UnitTestGmeshFixture.cpp | 2 +- .../stk_io/UnitTestInvalidCallOrdering.cpp | 1 - .../stk_io/UnitTestMeshData.cpp | 10 +- .../stk_io/UnitTestMeshGroupingEntity.cpp | 6 - .../UnitTestNodeBucketsHaveValidTopology.cpp | 1 - .../stk_io/UnitTestReadFieldData.cpp | 43 +- .../UnitTestReadWriteDistributionFactors.cpp | 3 +- .../stk_io/UnitTestReadWriteEdges.cpp | 1 - .../stk_io/UnitTestReadWriteEdges.hpp | 4 +- .../UnitTestReadWriteEdgesForFieldIO.cpp | 1 - .../stk_io/UnitTestReadWriteFaces.hpp | 4 +- .../stk_io/UnitTestReadWriteSideSets.cpp | 13 +- .../stk_unit_tests/stk_io/UnitTestRestart.cpp | 3 +- .../stk_io/UnitTestResultsOutputMeshMod.cpp | 128 ++ .../stk_unit_tests/stk_io/UnitTestUtils.cpp | 1 - .../stk_io/UnitTestWriteSTKMesh.cpp | 4 +- .../stk_mesh/UnitTest3Tets3Procs.cpp | 6 +- ...tTestAddNodeSharingWithInternalSideset.cpp | 2 +- .../stk_unit_tests/stk_mesh/UnitTestAura.cpp | 4 +- .../stk_mesh/UnitTestBoundaryAnalysis.cpp | 6 +- .../stk_mesh/UnitTestBoxFixture.cpp | 2 +- .../stk_mesh/UnitTestBucket.cpp | 17 +- .../stk_mesh/UnitTestBucketConnectivity.cpp | 702 ++++++++-- .../stk_mesh/UnitTestBucketRepository.cpp | 1 - .../stk_mesh/UnitTestBulkData.cpp | 70 +- .../stk_mesh/UnitTestBulkDataAura.cpp | 9 +- .../stk_mesh/UnitTestBulkDataIdMapper.cpp | 2 +- .../UnitTestBulkDataNotifications.cpp | 3 - .../stk_mesh/UnitTestBulkDataSharing.cpp | 5 - .../stk_mesh/UnitTestBulkData_ChangeParts.cpp | 2 +- .../stk_mesh/UnitTestBulkData_Destroy.cpp | 3 +- .../stk_mesh/UnitTestBulkData_new.cpp | 24 +- .../stk_mesh/UnitTestBulkModification.cpp | 2 +- .../stk_unit_tests/stk_mesh/UnitTestCEO.cpp | 16 +- .../stk_unit_tests/stk_mesh/UnitTestCEOME.cpp | 14 +- .../stk_mesh/UnitTestChangeEntityId.cpp | 2 +- .../UnitTestChangeEntityOwnerCommMaps.cpp | 10 +- .../stk_mesh/UnitTestChangeParts.cpp | 9 +- ...UnitTestCheckOwnedOrphanedSidesOrEdges.cpp | 4 +- .../stk_mesh/UnitTestCheckUniqueGlobalIds.cpp | 2 +- .../stk_mesh/UnitTestCommInfoObserver.cpp | 1 - .../stk_mesh/UnitTestCrackMesh.cpp | 6 +- .../UnitTestCreateAdjacentEntities.cpp | 6 +- .../stk_mesh/UnitTestCreateEdges.cpp | 20 +- .../stk_mesh/UnitTestCreateFaces.cpp | 28 +- .../stk_mesh/UnitTestDebugPrinting.cpp | 16 +- .../stk_mesh/UnitTestDeclareElement.cpp | 2 +- .../stk_mesh/UnitTestDeleteEntities.cpp | 14 +- .../stk_mesh/UnitTestDeletedEntityCache.cpp | 2 +- .../stk_mesh/UnitTestDestroyElements.cpp | 18 +- .../UnitTestDistributedIndexWithBulkData.cpp | 20 +- .../UnitTestElemGraphCoincidentElements.cpp | 4 +- .../UnitTestEntitiesNodesHaveInCommon.cpp | 2 +- .../stk_mesh/UnitTestFEMHelper.cpp | 4 +- .../stk_mesh/UnitTestFEMMetaData.cpp | 23 - .../stk_unit_tests/stk_mesh/UnitTestField.cpp | 16 +- .../stk_mesh/UnitTestFieldBLAS.cpp | 2 - .../stk_mesh/UnitTestFieldDataManager.cpp | 11 - .../stk_mesh/UnitTestFieldImpl.cpp | 1 - .../stk_mesh/UnitTestFieldParallel.cpp | 2 +- .../stk_mesh/UnitTestFieldQueryFunctions.cpp | 12 +- .../stk_mesh/UnitTestFieldRestriction.cpp | 7 - .../stk_mesh/UnitTestGenIds.cpp | 4 +- .../stk_mesh/UnitTestGetBuckets.cpp | 2 +- .../stk_mesh/UnitTestGetEntities.cpp | 2 +- .../stk_mesh/UnitTestGetFieldByName.cpp | 1 - .../UnitTestGhostingWithModification.cpp | 1 - .../stk_mesh/UnitTestGhostingWithShared.cpp | 3 - .../stk_mesh/UnitTestGloballyShared.cpp | 2 +- .../stk_mesh/UnitTestGridFixture.cpp | 2 +- .../stk_mesh/UnitTestHexFixture.cpp | 22 +- .../stk_mesh/UnitTestInducedPart.cpp | 4 +- .../stk_mesh/UnitTestLocalIds.cpp | 4 +- .../stk_mesh/UnitTestMeshBuilder.cpp | 4 + .../stk_mesh/UnitTestMeshImplUtils.cpp | 6 +- .../stk_mesh/UnitTestMeshModLogObserver.cpp | 1 - .../stk_mesh/UnitTestMetaData.cpp | 52 +- .../stk_mesh/UnitTestModificationEnd.cpp | 14 +- .../stk_mesh/UnitTestModificationEnd.hpp | 1 - .../stk_mesh/UnitTestModificationSummary.cpp | 1 - .../stk_mesh/UnitTestParallelGraphInfo.cpp | 16 +- .../stk_unit_tests/stk_mesh/UnitTestPart.cpp | 2 - .../stk_mesh/UnitTestPartAfterCommit.cpp | 3 - .../stk_mesh/UnitTestPartAlias.cpp | 9 - .../stk_mesh/UnitTestPartRepository.cpp | 1 - .../stk_mesh/UnitTestPartToBucket.cpp | 6 - .../stk_mesh/UnitTestPartitions.cpp | 3 +- .../stk_mesh/UnitTestRelation.cpp | 9 +- .../stk_mesh/UnitTestRingFixture.cpp | 2 +- .../stk_mesh/UnitTestRingFixture.hpp | 2 +- .../stk_mesh/UnitTestRootTopology.cpp | 2 - .../stk_mesh/UnitTestSelector.cpp | 4 +- .../stk_mesh/UnitTestSidePolarity.cpp | 40 +- .../stk_mesh/UnitTestSideSet.cpp | 41 +- .../stk_mesh/UnitTestStkTextMesh.cpp | 2 +- .../stk_mesh/UnitTestTextMeshFixture.hpp | 12 +- .../stk_mesh/UnitTestTopology.cpp | 23 +- .../stk_mesh/UnitTestVisitAura.cpp | 2 +- .../change_parts/CustomGhostEntities.cpp | 4 +- ...geEntityOwnerKeepUnaffectedCustomGhost.cpp | 1 - ...tTestKeepCustomGhostAfterLossOfSharing.cpp | 1 - .../UnitTestRemoveNeededRecvGhost.cpp | 1 - .../edge_creation/UnitTestEdgeConnection.cpp | 12 +- .../entitySorting/UnitTestEntitySorting.cpp | 2 +- .../face_creation/FaceCreatorFixture.hpp | 4 +- .../TwQuads2DTwoProcsElemGraph.cpp | 2 +- .../element_graph/ElementGraphAddElements.cpp | 4 +- .../ElementGraphDeleteElements.cpp | 2 +- .../element_graph/UnitTestElemElemGraph.cpp | 38 +- .../UnitTestElemElemGraphChangeOwner.cpp | 5 +- .../UnitTestElemElemGraphDeathIntegration.cpp | 3 +- .../UnitTestElemGraphMultipleSharedSides.cpp | 8 +- .../element_graph/UnitTestElementDeath.cpp | 8 +- .../element_graph/UnitTestSideIdPool.cpp | 2 +- .../UnitTestSkinMeshElementDeath.cpp | 5 +- .../UnitTestRuleThreeViolation.cpp | 2 +- .../skin_mesh/UnitTestSkinIrregular.cpp | 1 - .../skin_mesh/UnitTestSkinMesh.cpp | 20 +- .../UnitTestSkinMeshCoincidentElements.cpp | 2 +- .../skin_mesh/UnitTestSkinMeshCreateEdges.cpp | 4 +- .../skin_mesh/UnitTestSkinMeshCreateFaces.cpp | 4 +- .../skin_mesh/UnitTestSkinMeshDegenerate.cpp | 8 +- .../UnitTestSkinMeshExposedBoundary.cpp | 1 - .../UnitTestSkinMeshHeterogenous.cpp | 8 +- .../UnitTestSkinMeshMultipleSharedSides.cpp | 6 +- .../skin_mesh/UnitTestSkinMeshShell.cpp | 4 +- .../skin_mesh/UnitTestSkinMeshSkinPart.cpp | 2 +- .../skin_mesh/UnitTestSkinning.cpp | 2 +- .../ngp/NgpDebugFieldSync_Fixtures.cpp | 4 + .../ngp/NgpDebugFieldSync_Fixtures.hpp | 2 +- .../stk_mesh/ngp/NgpFieldAsyncTest.cpp | 18 +- .../stk_mesh/ngp/NgpFieldTestUtils.hpp | 33 +- .../stk_mesh/ngp/NgpMeshTest.cpp | 31 +- .../stk_mesh/ngp/NgpParallelSumTest.cpp | 43 +- .../stk_mesh/ngp/NgpUnitTestUtils.hpp | 10 +- .../stk_mesh/ngp/TestNgpMeshUpdate.cpp | 6 +- .../ngp/UnitTestNgpDebugFieldSync.cpp | 4 - ...estNgpDebugFieldSync_PartialAllocation.cpp | 4 - .../stk_mesh/ngp/ngpFieldBLASTest.cpp | 129 +- .../stk_mesh/ngp/ngpFieldTest.cpp | 74 +- .../stk_mesh/ngp/ngpMultiStateFieldTests.cpp | 2 +- .../test_stk_interface.cpp | 4 +- .../stk_search/UnitTestCoarseSearch.cpp | 142 +- ...rchBoxOverlappingEightSurroundingBoxes.cpp | 92 +- ...nitTestCoarseSearchLineOfBoundingBoxes.cpp | 24 +- .../stk_search/UnitTestCoarseSearchTwoBox.cpp | 20 +- .../UnitTestCoarseSearchTwoSpheres.cpp | 20 +- .../stk_search/UnitTestDeviceMPIUtils.cpp | 318 +++++ .../stk_search/UnitTestHelperTraits.cpp | 317 +++++ .../UnitTestParallelConsistencyUtils.cpp | 395 ++++++ .../UnitTestSearchWithPeriodicBC.cpp | 22 +- .../stk_simd/UnitTestPrintSimdInfo.cpp | 4 +- .../stk_unit_tests/stk_tools/blockIdQuery.cpp | 6 +- .../block_extractor/UnitTestExtractBlocks.cpp | 4 +- .../stk_unit_tests/stk_tools/createMesh.cpp | 4 +- .../stk_tools/elementExtractor.cpp | 26 +- .../mesh_clone/UnitTestBulkDataClone.cpp | 4 +- .../mesh_clone/UnitTestCloneIntoCommSelf.cpp | 2 - .../mesh_clone/UnitTestMeshClone.cpp | 4 +- .../DisconnectBlocksMeshConstruction.cpp | 140 +- .../mesh_tools/UnitTestCustomAura.hpp | 6 +- .../mesh_tools/UnitTestDetectHinge.cpp | 8 +- .../mesh_tools/UnitTestDisconnectBlocks.cpp | 74 +- .../mesh_tools/UnitTestMechanism.cpp | 2 +- .../stk_topology/utest_a/unit_test_hex.cpp | 3 + .../stk_topology/utest_a/unit_test_node.cpp | 1 + .../utest_a/unit_test_particle.cpp | 1 + .../utest_b/unit_test_pyramid.cpp | 3 + .../stk_topology/utest_b/unit_test_quad.cpp | 4 + .../stk_topology/utest_b/unit_test_spring.cpp | 2 + .../stk_topology/utest_b/unit_test_tet.cpp | 4 + .../utest_c/unit_test_shell_quad.cpp | 5 +- .../utest_c/unit_test_shell_side_beam.cpp | 2 + .../utest_c/unit_test_shell_tri.cpp | 5 +- .../UnitTestLeastSquares.cpp | 16 +- .../stk_transfer_util/UnitTestPatch.cpp | 4 +- .../stk_util/algorithmTimerTest.cpp | 6 +- .../stk_util/diag/UnitTestTimer.cpp | 4 +- .../stk_util/util/UnitTestMCSR.cpp | 8 + packages/stk/stk_util/Jamfile | 8 +- packages/stk/stk_util/stk_util/Version.hpp | 2 +- .../stk_util/command_line/CMakeLists.txt | 2 +- .../stk/stk_util/stk_util/diag/CMakeLists.txt | 2 +- .../stk_util/environment/CMakeLists.txt | 2 +- .../stk/stk_util/stk_util/ngp/CMakeLists.txt | 2 + .../stk_util/stk_util/parallel/CMakeLists.txt | 4 +- .../stk_util/stk_util/parallel/CommBuffer.cpp | 3 +- .../stk_util/stk_util/parallel/CommBuffer.hpp | 48 +- .../stk_util/stk_util/registry/CMakeLists.txt | 2 +- .../stk_util/registry/ProductRegistry.cpp | 2 +- .../stk_util/registry/ProductRegistry.hpp | 4 +- packages/stk/stk_util/stk_util/stk_config.h | 2 +- .../stk/stk_util/stk_util/util/BlasLapack.hpp | 4 +- .../stk/stk_util/stk_util/util/CMakeLists.txt | 12 +- packages/stk/stk_util/stk_util/util/MCSR.hpp | 39 +- .../stk_util/stk_util/util/StkNgpVector.hpp | 4 +- .../stk_util/util/concat_variable_name.cpp | 45 +- 833 files changed, 18421 insertions(+), 9049 deletions(-) create mode 100644 packages/krino/.dont_scrape create mode 100644 packages/krino/SierraCmakeCode.cmake create mode 100644 packages/krino/cmake/krinoConfig.cmake create mode 100644 packages/krino/krino/geometry/Akri_WindingNumber.cpp create mode 100644 packages/krino/krino/geometry/Akri_WindingNumber.hpp create mode 100644 packages/krino/krino/krino_lib/Akri_AdaptiveElementContour.cpp create mode 100644 packages/krino/krino/krino_lib/Akri_AdaptiveElementContour.hpp create mode 100644 packages/krino/krino/krino_lib/Akri_PostProcess.cpp create mode 100644 packages/krino/krino/krino_lib/Akri_PostProcess.hpp create mode 100644 packages/krino/krino/krino_lib/Akri_SemiLagrangian.cpp create mode 100644 packages/krino/krino/krino_lib/Akri_SemiLagrangian.hpp create mode 100644 packages/krino/krino/mesh_utils/Akri_QuadFace.cpp create mode 100644 packages/krino/krino/mesh_utils/Akri_QuadFace.hpp create mode 100644 packages/krino/krino/mesh_utils/Akri_SideAttachedElements.cpp create mode 100644 packages/krino/krino/mesh_utils/Akri_SideAttachedElements.hpp create mode 100644 packages/krino/krino/refinement/Akri_HexRefiner.cpp create mode 100644 packages/krino/krino/refinement/Akri_HexRefiner.hpp create mode 100644 packages/krino/krino/refinement/Akri_QuadRefiner.cpp create mode 100644 packages/krino/krino/refinement/Akri_QuadRefiner.hpp create mode 100644 packages/krino/krino/refinement/Akri_RefinerUtils.hpp create mode 100644 packages/krino/krino/unit_tests/Akri_Unit_FastMarching.cpp create mode 100644 packages/krino/krino/unit_tests/Akri_Unit_OutputUtils.cpp create mode 100644 packages/krino/krino/unit_tests/Akri_Unit_Refine_Beam.cpp create mode 100644 packages/krino/krino/unit_tests/Akri_Unit_Refine_Hex.cpp create mode 100644 packages/krino/krino/unit_tests/Akri_Unit_Refine_Quad.cpp create mode 100644 packages/krino/krino/unit_tests/Akri_Unit_SemiLagrangian.cpp create mode 100644 packages/krino/krino/unit_tests/Akri_Unit_SideAttachedElements.cpp create mode 100644 packages/krino/krino/unit_tests/Akri_Unit_WindingNumber.cpp create mode 100644 packages/stk/cmake/stkConfig.cmake create mode 100644 packages/stk/cmake/stkLapackGeneric.cmake create mode 100644 packages/stk/cmake/stkLapackSierra.cmake create mode 100644 packages/stk/stk_doc_tests/stk_mesh/howToVisitEdgeNodes.cpp create mode 100644 packages/stk/stk_integration_tests/cmake_install_test/load_aue_serial_modules_no_boost create mode 100644 packages/stk/stk_integration_tests/cmake_install_test/spack.cuda.yaml create mode 100644 packages/stk/stk_integration_tests/cmake_install_test/spack.gcc.yaml create mode 100755 packages/stk/stk_integration_tests/cmake_install_test/stk_spack_build_test_cuda.sh create mode 100755 packages/stk/stk_integration_tests/cmake_install_test/stk_spack_build_test_gcc.sh create mode 100644 packages/stk/stk_integration_tests/stk_search/AperiCMC_NeighborSearchTest.cpp create mode 100644 packages/stk/stk_mesh/stk_mesh/base/DestroyRelations.cpp create mode 100644 packages/stk/stk_mesh/stk_mesh/base/DestroyRelations.hpp create mode 100644 packages/stk/stk_mesh/stk_mesh/base/FindPermutation.cpp create mode 100644 packages/stk/stk_mesh/stk_mesh/base/FindPermutation.hpp delete mode 100644 packages/stk/stk_mesh/stk_mesh/base/LegacyFieldTraits.hpp create mode 100644 packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp create mode 100644 packages/stk/stk_mesh/stk_mesh/baseImpl/ElemDeathImpl.cpp create mode 100644 packages/stk/stk_mesh/stk_mesh/baseImpl/ElemDeathImpl.hpp create mode 100644 packages/stk/stk_mesh/stk_mesh/baseImpl/GlobalIdEntitySorter.cpp create mode 100644 packages/stk/stk_mesh/stk_mesh/baseImpl/GlobalIdEntitySorter.hpp create mode 100644 packages/stk/stk_mesh/stk_mesh/baseImpl/SideSetPartImpl.cpp create mode 100644 packages/stk/stk_mesh/stk_mesh/baseImpl/SideSetPartImpl.hpp create mode 100644 packages/stk/stk_search/stk_search/DeviceMPIUtils.hpp create mode 100644 packages/stk/stk_search/stk_search/HelperTraits.hpp create mode 100644 packages/stk/stk_unit_test_utils/stk_unit_test_utils/GetMeshSpec.cpp create mode 100644 packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshUtilsForBoundingVolumes.cpp create mode 100644 packages/stk/stk_unit_test_utils/stk_unit_test_utils/PerformanceTester.cpp create mode 100644 packages/stk/stk_unit_test_utils/stk_unit_test_utils/Search_UnitTestUtils.cpp create mode 100644 packages/stk/stk_unit_test_utils/stk_unit_test_utils/exampleMeshes.cpp create mode 100644 packages/stk/stk_unit_test_utils/stk_unit_test_utils/getOption.cpp create mode 100644 packages/stk/stk_unit_tests/stk_io/UnitTestResultsOutputMeshMod.cpp create mode 100644 packages/stk/stk_unit_tests/stk_search/UnitTestDeviceMPIUtils.cpp create mode 100644 packages/stk/stk_unit_tests/stk_search/UnitTestHelperTraits.cpp create mode 100644 packages/stk/stk_unit_tests/stk_search/UnitTestParallelConsistencyUtils.cpp diff --git a/packages/krino/.dont_scrape b/packages/krino/.dont_scrape new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/packages/krino/CMakeLists.txt b/packages/krino/CMakeLists.txt index 27d8dfb2f7ae..fbfb639f106e 100644 --- a/packages/krino/CMakeLists.txt +++ b/packages/krino/CMakeLists.txt @@ -1,13 +1,20 @@ +cmake_minimum_required(VERSION 3.23) -message("Building Krino as a Trilinos package") -TRIBITS_PACKAGE(Krino) +IF(COMMAND TRIBITS_PACKAGE_DECL) + message("Building Krino as a Trilinos package") + TRIBITS_PACKAGE(Krino) -TRIBITS_ADD_DEBUG_OPTION() -TRIBITS_ADD_SHOW_DEPRECATED_WARNINGS_OPTION() + TRIBITS_ADD_DEBUG_OPTION() + TRIBITS_ADD_SHOW_DEPRECATED_WARNINGS_OPTION() -if (${${PROJECT_NAME}_ENABLE_Krino}) - add_subdirectory(krino) - add_subdirectory(delete_small_elements) -endif() + if (${${PROJECT_NAME}_ENABLE_Krino}) + add_subdirectory(krino) + add_subdirectory(delete_small_elements) + endif() -TRIBITS_PACKAGE_POSTPROCESS() + TRIBITS_PACKAGE_POSTPROCESS() +ELSE() + MESSAGE("*** Building Krino as a stand-alone cmake package. ***") + + include(SierraCmakeCode.cmake) +ENDIF() diff --git a/packages/krino/SierraCmakeCode.cmake b/packages/krino/SierraCmakeCode.cmake new file mode 100644 index 000000000000..8b15e9e2460d --- /dev/null +++ b/packages/krino/SierraCmakeCode.cmake @@ -0,0 +1,1161 @@ +cmake_minimum_required(VERSION 3.23) +project(krino LANGUAGES C CXX Fortran) +cmake_path(GET CMAKE_CURRENT_LIST_DIR PARENT_PATH SIERRA_SOURCE_DIR) +list(PREPEND CMAKE_MODULE_PATH ${SIERRA_SOURCE_DIR}/modules) +include(${SIERRA_SOURCE_DIR}/modules/addParserCommands.cmake) +add_parser_commands(TARGET krino_commands + XML_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/krino_sierra/xml/Akri_Levelset.xml) +install(FILES ${CMAKE_BINARY_DIR}/krino_commands.xmldb DESTINATION xml) + +add_library(krino_diagwriter) +target_sources(krino_diagwriter PRIVATE krino/diagwriter/Akri_DiagWriter.cpp) +find_package(stk REQUIRED) +target_link_libraries(krino_diagwriter PUBLIC stk::stk_util_diag) +target_include_directories(krino_diagwriter PUBLIC + $ + $ + $ + $) +target_sources(krino_diagwriter PUBLIC + FILE_SET krino_diagwriter_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/diagwriter/Akri_DiagWriter.hpp + krino/diagwriter/Akri_DiagWriter_fwd.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_diagwriter PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_diagwriter PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_diagwriter PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_diagwriter PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_diagwriter + EXPORT krinoTargets + FILE_SET krino_diagwriter_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(krino_geometry) +target_sources(krino_geometry PRIVATE krino/geometry/Akri_BoundingBox.cpp + krino/geometry/Akri_BoundingBoxDistance.cpp + krino/geometry/Akri_Plane_Intersections.cpp + krino/geometry/Akri_WindingNumber.cpp) +find_package(stk REQUIRED) +target_link_libraries(krino_geometry PUBLIC stk::stk_math) +target_link_libraries(krino_geometry PUBLIC stk::stk_util_parallel) +target_include_directories(krino_geometry PUBLIC + $ + $ + $ + $) +target_sources(krino_geometry PUBLIC + FILE_SET krino_geometry_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/geometry/Akri_BoundingBox.hpp + krino/geometry/Akri_BoundingBoxDistance.hpp + krino/geometry/Akri_Plane_Intersections.hpp + krino/geometry/Akri_SearchTree.hpp + krino/geometry/Akri_Segment.hpp + krino/geometry/Akri_Triangle.hpp + krino/geometry/Akri_WindingNumber.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_geometry PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_geometry PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_geometry PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_geometry PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_geometry + EXPORT krinoTargets + FILE_SET krino_geometry_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(krino_surface) +target_sources(krino_surface PRIVATE krino/surface/Akri_AnalyticSurf.cpp + krino/surface/Akri_Composite_Surface.cpp + krino/surface/Akri_Facet.cpp + krino/surface/Akri_FacetedSurfaceCalcs.cpp + krino/surface/Akri_Faceted_Surface.cpp + krino/surface/Akri_String_Function_Expression.cpp + krino/surface/Akri_Surface.cpp + krino/surface/Akri_SurfaceIntersectionFromSignedDistance.cpp + krino/surface/Akri_Transformation.cpp) +target_link_libraries(krino_surface PUBLIC krino_diagwriter) +target_link_libraries(krino_surface PUBLIC krino_geometry) +target_link_libraries(krino_surface PUBLIC krino_math_utils) +find_package(stk REQUIRED) +target_link_libraries(krino_surface PUBLIC stk::stk_expreval) +target_include_directories(krino_surface PUBLIC + $ + $ + $ + $) +target_sources(krino_surface PUBLIC + FILE_SET krino_surface_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/surface/Akri_AnalyticSurf.hpp + krino/surface/Akri_Composite_Surface.hpp + krino/surface/Akri_Facet.hpp + krino/surface/Akri_FacetedSurfaceCalcs.hpp + krino/surface/Akri_Faceted_Surface.hpp + krino/surface/Akri_String_Function_Expression.hpp + krino/surface/Akri_Surface.hpp + krino/surface/Akri_SurfaceIntersectionFromSignedDistance.hpp + krino/surface/Akri_Transformation.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_surface PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_surface PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_surface PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_surface PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_surface + EXPORT krinoTargets + FILE_SET krino_surface_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(krino_mesh_surface) +target_sources(krino_mesh_surface PRIVATE krino/mesh_surface/Akri_MeshSurface.cpp) +target_link_libraries(krino_mesh_surface PUBLIC krino_surface) +find_package(stk REQUIRED) +target_link_libraries(krino_mesh_surface PUBLIC stk::stk_mesh_base) +target_link_libraries(krino_mesh_surface PUBLIC stk::stk_tools_lib) +target_link_libraries(krino_mesh_surface PUBLIC stk::stk_topology) +target_include_directories(krino_mesh_surface PUBLIC + $ + $ + $ + $) +target_sources(krino_mesh_surface PUBLIC + FILE_SET krino_mesh_surface_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/mesh_surface/Akri_MeshSurface.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_mesh_surface PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_mesh_surface PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_mesh_surface PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_mesh_surface PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_mesh_surface + EXPORT krinoTargets + FILE_SET krino_mesh_surface_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(krino_master_element) +target_sources(krino_master_element PRIVATE krino/master_element/Akri_MasterElementCalc.cpp + krino/master_element/Akri_MasterElementHybrid.cpp + krino/master_element/Akri_MasterElementIntrepid.cpp) +find_package(Intrepid2 REQUIRED) +target_link_libraries(krino_master_element PUBLIC Intrepid2::all_libs) +find_package(stk REQUIRED) +target_link_libraries(krino_master_element PUBLIC stk::stk_mesh_base) +target_link_libraries(krino_master_element PUBLIC stk::stk_topology) +target_include_directories(krino_master_element PUBLIC + $ + $ + $ + $) +target_sources(krino_master_element PUBLIC + FILE_SET krino_master_element_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/master_element/Akri_MasterElement.hpp + krino/master_element/Akri_MasterElementBasis.hpp + krino/master_element/Akri_MasterElementCalc.hpp + krino/master_element/Akri_MasterElementHybrid.hpp + krino/master_element/Akri_MasterElementIntrepid.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_master_element PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_master_element PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_master_element PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_master_element PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_master_element + EXPORT krinoTargets + FILE_SET krino_master_element_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(krino_math_utils) +target_sources(krino_math_utils PRIVATE krino/math_utils/Akri_CramersRuleSolver.cpp + krino/math_utils/Akri_CurvatureLeastSquares.cpp + krino/math_utils/Akri_MathUtil.cpp) +target_link_libraries(krino_math_utils PUBLIC krino_diagwriter) +find_package(stk REQUIRED) +target_link_libraries(krino_math_utils PUBLIC stk::stk_math) +target_include_directories(krino_math_utils PUBLIC + $ + $ + $ + $) +target_sources(krino_math_utils PUBLIC + FILE_SET krino_math_utils_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/math_utils/Akri_CramersRuleSolver.hpp + krino/math_utils/Akri_CurvatureLeastSquares.hpp + krino/math_utils/Akri_MathUtil.hpp + krino/math_utils/Akri_MortonIndex.hpp + krino/math_utils/Akri_Sign.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_math_utils PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_math_utils PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_math_utils PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_math_utils PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_math_utils + EXPORT krinoTargets + FILE_SET krino_math_utils_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(krino_mesh_utils) +target_sources(krino_mesh_utils PRIVATE krino/mesh_utils/Akri_ChildNodeCreator.cpp + krino/mesh_utils/Akri_Edge.cpp + krino/mesh_utils/Akri_EntityIdPool.cpp + krino/mesh_utils/Akri_FieldRef.cpp + krino/mesh_utils/Akri_MeshHelpers.cpp + krino/mesh_utils/Akri_ParallelErrorMessage.cpp + krino/mesh_utils/Akri_QuadFace.cpp + krino/mesh_utils/Akri_SideAttachedElements.cpp) +target_link_libraries(krino_mesh_utils PUBLIC krino_diagwriter) +find_package(stk REQUIRED) +target_link_libraries(krino_mesh_utils PUBLIC stk::stk_io) +target_link_libraries(krino_mesh_utils PUBLIC stk::stk_math) +target_link_libraries(krino_mesh_utils PUBLIC stk::stk_mesh_base) +target_link_libraries(krino_mesh_utils PUBLIC stk::stk_topology) +target_include_directories(krino_mesh_utils PUBLIC + $ + $ + $ + $) +target_sources(krino_mesh_utils PUBLIC + FILE_SET krino_mesh_utils_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/mesh_utils/Akri_AllReduce.hpp + krino/mesh_utils/Akri_ChildNodeCreator.hpp + krino/mesh_utils/Akri_Edge.hpp + krino/mesh_utils/Akri_EntityIdPool.hpp + krino/mesh_utils/Akri_FieldRef.hpp + krino/mesh_utils/Akri_MeshHelpers.hpp + krino/mesh_utils/Akri_ParallelErrorMessage.hpp + krino/mesh_utils/Akri_QuadFace.hpp + krino/mesh_utils/Akri_ReportHandler.hpp + krino/mesh_utils/Akri_SideAttachedElements.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_mesh_utils PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_mesh_utils PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_mesh_utils PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_mesh_utils PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_mesh_utils + EXPORT krinoTargets + FILE_SET krino_mesh_utils_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(krino_quality_metric) +target_sources(krino_quality_metric PRIVATE krino/quality_metric/Akri_QualityMetric.cpp) +find_package(stk REQUIRED) +target_link_libraries(krino_quality_metric PUBLIC stk::stk_math) +target_link_libraries(krino_quality_metric PUBLIC stk::stk_topology) +target_include_directories(krino_quality_metric PUBLIC + $ + $ + $ + $) +target_sources(krino_quality_metric PUBLIC + FILE_SET krino_quality_metric_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/quality_metric/Akri_QualityMetric.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_quality_metric PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_quality_metric PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_quality_metric PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_quality_metric PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_quality_metric + EXPORT krinoTargets + FILE_SET krino_quality_metric_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(krino_refinement) +target_sources(krino_refinement PRIVATE krino/refinement/Akri_HexRefiner.cpp + krino/refinement/Akri_MOAB_TetRefiner.cpp + krino/refinement/Akri_NodeRefiner.cpp + krino/refinement/Akri_QuadRefiner.cpp + krino/refinement/Akri_Refinement.cpp + krino/refinement/Akri_TransitionElementEdgeMarker.cpp + krino/refinement/Akri_TriRefiner.cpp) +target_link_libraries(krino_refinement PUBLIC krino_mesh_utils) +target_link_libraries(krino_refinement PUBLIC krino_quality_metric) +find_package(stk REQUIRED) +target_link_libraries(krino_refinement PUBLIC stk::stk_math) +target_link_libraries(krino_refinement PUBLIC stk::stk_mesh_base) +target_link_libraries(krino_refinement PUBLIC stk::stk_topology) +target_include_directories(krino_refinement PUBLIC + $ + $ + $ + $) +target_sources(krino_refinement PUBLIC + FILE_SET krino_refinement_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/refinement/Akri_HexRefiner.hpp + krino/refinement/Akri_MOAB_TetRefiner.hpp + krino/refinement/Akri_NodeRefiner.hpp + krino/refinement/Akri_QuadRefiner.hpp + krino/refinement/Akri_Refinement.hpp + krino/refinement/Akri_RefinerUtils.hpp + krino/refinement/Akri_TransitionElementEdgeMarker.hpp + krino/refinement/Akri_TriRefiner.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_refinement PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_refinement PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_refinement PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_refinement PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_refinement + EXPORT krinoTargets + FILE_SET krino_refinement_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(krino_refinement_rebalance) +target_sources(krino_refinement_rebalance PRIVATE krino/refinement_rebalance/Akri_RefinementRebalance.cpp) +target_link_libraries(krino_refinement_rebalance PUBLIC krino_refinement) +find_package(stk REQUIRED) +target_link_libraries(krino_refinement_rebalance PUBLIC stk::stk_balance_lib) +target_include_directories(krino_refinement_rebalance PUBLIC + $ + $ + $ + $) +target_sources(krino_refinement_rebalance PUBLIC + FILE_SET krino_refinement_rebalance_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/refinement_rebalance/Akri_RefinementRebalance.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_refinement_rebalance PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_refinement_rebalance PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_refinement_rebalance PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_refinement_rebalance PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_refinement_rebalance + EXPORT krinoTargets + FILE_SET krino_refinement_rebalance_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(krino_lib) +target_sources(krino_lib PRIVATE krino/krino_lib/Akri_AdaptiveElementContour.cpp + krino/krino_lib/Akri_AdaptivityHelpers.cpp + krino/krino_lib/Akri_AnalyticSurfaceInterfaceGeometry.cpp + krino/krino_lib/Akri_AuxMetaData.cpp + krino/krino_lib/Akri_BoundingBoxMesh.cpp + krino/krino_lib/Akri_BoundingSurface.cpp + krino/krino_lib/Akri_CDFEM_Parent_Edge.cpp + krino/krino_lib/Akri_CDFEM_Parent_Edges.cpp + krino/krino_lib/Akri_CDFEM_Support.cpp + krino/krino_lib/Akri_CDMesh.cpp + krino/krino_lib/Akri_CDMesh_Debug.cpp + krino/krino_lib/Akri_CDMesh_Refinement.cpp + krino/krino_lib/Akri_CDMesh_Utils.cpp + krino/krino_lib/Akri_ChildNodeStencil.cpp + krino/krino_lib/Akri_Compute_Surface_Distance.cpp + krino/krino_lib/Akri_ConformingPhaseParts.cpp + krino/krino_lib/Akri_ContourElement.cpp + krino/krino_lib/Akri_ContourSubElement.cpp + krino/krino_lib/Akri_CreateInterfaceGeometry.cpp + krino/krino_lib/Akri_Cutting_Surface.cpp + krino/krino_lib/Akri_DecompositionHasChanged.cpp + krino/krino_lib/Akri_DetermineElementSign.cpp + krino/krino_lib/Akri_DistanceSweeper.cpp + krino/krino_lib/Akri_Eikonal_Calc.cpp + krino/krino_lib/Akri_Element.cpp + krino/krino_lib/Akri_ElementCutterUtils.cpp + krino/krino_lib/Akri_Element_Cutter.cpp + krino/krino_lib/Akri_Element_Intersections.cpp + krino/krino_lib/Akri_FastIterativeMethod.cpp + krino/krino_lib/Akri_Fast_Marching.cpp + krino/krino_lib/Akri_IC_Alg.cpp + krino/krino_lib/Akri_IC_Calculator.cpp + krino/krino_lib/Akri_IO_Helpers.cpp + krino/krino_lib/Akri_Intersection_Points.cpp + krino/krino_lib/Akri_LevelSet.cpp + krino/krino_lib/Akri_LevelSetInterfaceGeometry.cpp + krino/krino_lib/Akri_LevelSetPolicy.cpp + krino/krino_lib/Akri_LevelSetShapeSensitivities.cpp + krino/krino_lib/Akri_LevelSetSurfaceInterfaceGeometry.cpp + krino/krino_lib/Akri_LowerEnvelope.cpp + krino/krino_lib/Akri_MasterElementDeterminer.cpp + krino/krino_lib/Akri_MeshClone.cpp + krino/krino_lib/Akri_MeshDiagnostics.cpp + krino/krino_lib/Akri_MeshFromFile.cpp + krino/krino_lib/Akri_MeshInputOptions.cpp + krino/krino_lib/Akri_NodalBoundingBox.cpp + krino/krino_lib/Akri_NodalSurfaceDistance.cpp + krino/krino_lib/Akri_NodeToCapturedDomains.cpp + krino/krino_lib/Akri_OutputUtils.cpp + krino/krino_lib/Akri_ParentsToChildMapper.cpp + krino/krino_lib/Akri_PatchInterpolator.cpp + krino/krino_lib/Akri_PhaseTag.cpp + krino/krino_lib/Akri_Phase_Support.cpp + krino/krino_lib/Akri_PostProcess.cpp + krino/krino_lib/Akri_ProlongationData.cpp + krino/krino_lib/Akri_Quality.cpp + krino/krino_lib/Akri_RefineNearLevelSets.cpp + krino/krino_lib/Akri_RefinementInterface.cpp + krino/krino_lib/Akri_RefinementSupport.cpp + krino/krino_lib/Akri_SemiLagrangian.cpp + krino/krino_lib/Akri_SharpFeature.cpp + krino/krino_lib/Akri_Snap.cpp + krino/krino_lib/Akri_SnapInfo.cpp + krino/krino_lib/Akri_SnapToNode.cpp + krino/krino_lib/Akri_SubElement.cpp + krino/krino_lib/Akri_SubElementChildNodeAncestry.cpp + krino/krino_lib/Akri_SubElementNodeAncestry.cpp + krino/krino_lib/Akri_Surface_Manager.cpp + krino/krino_lib/Akri_VolumePreservingSnappingLimiter.cpp) +find_package(MPI REQUIRED COMPONENTS C Fortran) +target_link_libraries(krino_lib PUBLIC MPI::MPI_C) +target_link_libraries(krino_lib PUBLIC MPI::MPI_Fortran) +find_package(SEACAS REQUIRED COMPONENTS SEACASIoss) +target_link_libraries(krino_lib PUBLIC SEACASIoss::Ioss) +target_link_libraries(krino_lib PUBLIC krino_geometry) +target_link_libraries(krino_lib PUBLIC krino_master_element) +target_link_libraries(krino_lib PUBLIC krino_math_utils) +target_link_libraries(krino_lib PUBLIC krino_mesh_surface) +target_link_libraries(krino_lib PUBLIC krino_mesh_utils) +target_link_libraries(krino_lib PUBLIC krino_quality_metric) +target_link_libraries(krino_lib PUBLIC krino_refinement) +target_link_libraries(krino_lib PUBLIC krino_surface) +find_package(stk REQUIRED) +target_link_libraries(krino_lib PUBLIC stk::stk_emend) +target_link_libraries(krino_lib PUBLIC stk::stk_io) +target_link_libraries(krino_lib PUBLIC stk::stk_math) +target_link_libraries(krino_lib PUBLIC stk::stk_mesh_base) +target_link_libraries(krino_lib PUBLIC stk::stk_search) +target_link_libraries(krino_lib PUBLIC stk::stk_tools_lib) +target_link_libraries(krino_lib PUBLIC stk::stk_util_diag) +target_include_directories(krino_lib PUBLIC + $ + $ + $ + $) +target_sources(krino_lib PUBLIC + FILE_SET krino_lib_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/krino_lib/Akri_AdaptiveElementContour.hpp + krino/krino_lib/Akri_AdaptivityHelpers.hpp + krino/krino_lib/Akri_AnalyticSurfaceInterfaceGeometry.hpp + krino/krino_lib/Akri_AuxMetaData.hpp + krino/krino_lib/Akri_BoundingBoxMesh.hpp + krino/krino_lib/Akri_BoundingSurface.hpp + krino/krino_lib/Akri_CDFEM_Parent_Edge.hpp + krino/krino_lib/Akri_CDFEM_Parent_Edges.hpp + krino/krino_lib/Akri_CDFEM_Snapper.hpp + krino/krino_lib/Akri_CDFEM_Support.hpp + krino/krino_lib/Akri_CDMesh.hpp + krino/krino_lib/Akri_CDMesh_Debug.hpp + krino/krino_lib/Akri_CDMesh_Refinement.hpp + krino/krino_lib/Akri_CDMesh_Utils.hpp + krino/krino_lib/Akri_ChildNodeStencil.hpp + krino/krino_lib/Akri_Compute_Surface_Distance.hpp + krino/krino_lib/Akri_ConformingPhaseParts.hpp + krino/krino_lib/Akri_ContourElement.hpp + krino/krino_lib/Akri_ContourSubElement.hpp + krino/krino_lib/Akri_CreateInterfaceGeometry.hpp + krino/krino_lib/Akri_Cutting_Surface.hpp + krino/krino_lib/Akri_DecompositionHasChanged.hpp + krino/krino_lib/Akri_DetermineElementSign.hpp + krino/krino_lib/Akri_DistanceSweeper.hpp + krino/krino_lib/Akri_Eikonal_Calc.hpp + krino/krino_lib/Akri_Element.hpp + krino/krino_lib/Akri_ElementCutterUtils.hpp + krino/krino_lib/Akri_Element_Cutter.hpp + krino/krino_lib/Akri_Element_Intersections.hpp + krino/krino_lib/Akri_FastIterativeMethod.hpp + krino/krino_lib/Akri_Fast_Marching.hpp + krino/krino_lib/Akri_IC_Alg.hpp + krino/krino_lib/Akri_IC_Calculator.hpp + krino/krino_lib/Akri_IO_Helpers.hpp + krino/krino_lib/Akri_InterfaceGeometry.hpp + krino/krino_lib/Akri_InterfaceID.hpp + krino/krino_lib/Akri_Interface_Name_Generator.hpp + krino/krino_lib/Akri_Intersection_Points.hpp + krino/krino_lib/Akri_LevelSet.hpp + krino/krino_lib/Akri_LevelSetInterfaceGeometry.hpp + krino/krino_lib/Akri_LevelSetPolicy.hpp + krino/krino_lib/Akri_LevelSetShapeSensitivities.hpp + krino/krino_lib/Akri_LevelSetSurfaceInterfaceGeometry.hpp + krino/krino_lib/Akri_LowerEnvelope.hpp + krino/krino_lib/Akri_MasterElementDeterminer.hpp + krino/krino_lib/Akri_MeshClone.hpp + krino/krino_lib/Akri_MeshDiagnostics.hpp + krino/krino_lib/Akri_MeshFromFile.hpp + krino/krino_lib/Akri_MeshInputOptions.hpp + krino/krino_lib/Akri_MeshInterface.hpp + krino/krino_lib/Akri_NodalBoundingBox.hpp + krino/krino_lib/Akri_NodalSurfaceDistance.hpp + krino/krino_lib/Akri_NodeToCapturedDomains.hpp + krino/krino_lib/Akri_OrderedIdPair.hpp + krino/krino_lib/Akri_OutputUtils.hpp + krino/krino_lib/Akri_ParentsToChildMapper.hpp + krino/krino_lib/Akri_PatchInterpolator.hpp + krino/krino_lib/Akri_PhaseTag.hpp + krino/krino_lib/Akri_Phase_Support.hpp + krino/krino_lib/Akri_PostProcess.hpp + krino/krino_lib/Akri_ProlongationData.hpp + krino/krino_lib/Akri_Quality.hpp + krino/krino_lib/Akri_RefineNearLevelSets.hpp + krino/krino_lib/Akri_RefinementInterface.hpp + krino/krino_lib/Akri_RefinementSupport.hpp + krino/krino_lib/Akri_SemiLagrangian.hpp + krino/krino_lib/Akri_SharpFeature.hpp + krino/krino_lib/Akri_Snap.hpp + krino/krino_lib/Akri_SnapIndependentSetFinder.hpp + krino/krino_lib/Akri_SnapInfo.hpp + krino/krino_lib/Akri_SnapToNode.hpp + krino/krino_lib/Akri_SubElement.hpp + krino/krino_lib/Akri_SubElementChildNodeAncestry.hpp + krino/krino_lib/Akri_SubElementNodeAncestry.hpp + krino/krino_lib/Akri_Surface_Identifier.hpp + krino/krino_lib/Akri_Surface_Manager.hpp + krino/krino_lib/Akri_TopologyData.hpp + krino/krino_lib/Akri_TypeDefs.hpp + krino/krino_lib/Akri_Utility.hpp + krino/krino_lib/Akri_VolumePreservingSnappingLimiter.hpp + krino/krino_lib/Akri_config.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_lib PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_lib PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_lib PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_lib PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_lib + EXPORT krinoTargets + FILE_SET krino_lib_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(krino_rebalance_utils) +target_sources(krino_rebalance_utils PRIVATE krino/rebalance_utils/Akri_RebalanceUtils.cpp + krino/rebalance_utils/Akri_RebalanceUtils_Impl.cpp) +target_link_libraries(krino_rebalance_utils PUBLIC krino_lib) +find_package(stk REQUIRED) +target_link_libraries(krino_rebalance_utils PUBLIC stk::stk_balance_lib) +target_link_libraries(krino_rebalance_utils PUBLIC stk::stk_mesh_base) +target_include_directories(krino_rebalance_utils PUBLIC + $ + $ + $ + $) +target_sources(krino_rebalance_utils PUBLIC + FILE_SET krino_rebalance_utils_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/rebalance_utils/Akri_RebalanceUtils.hpp + krino/rebalance_utils/Akri_RebalanceUtils_Impl.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_rebalance_utils PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_rebalance_utils PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_rebalance_utils PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_rebalance_utils PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_rebalance_utils + EXPORT krinoTargets + FILE_SET krino_rebalance_utils_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(krino_region) +target_sources(krino_region PRIVATE krino/region/Akri_Region.cpp + krino/region/Akri_RegisterProduct.cpp + krino/region/Akri_Simulation.cpp + krino/region/Akri_Startup.cpp) +target_link_libraries(krino_region PUBLIC krino_lib) +target_include_directories(krino_region PUBLIC + $ + $ + $ + $) +target_sources(krino_region PUBLIC + FILE_SET krino_region_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/region/Akri_Region.hpp + krino/region/Akri_RegisterProduct.hpp + krino/region/Akri_ResultsOutputOptions.hpp + krino/region/Akri_Simulation.hpp + krino/region/Akri_Startup.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_region PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_region PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_region PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_region PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_region + EXPORT krinoTargets + FILE_SET krino_region_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(krino_sierra) +target_sources(krino_sierra PRIVATE krino_sierra/Akri_CDFEM_Options_SierraParser.cpp + krino_sierra/Akri_Events.cpp + krino_sierra/Akri_IC_SierraParser.cpp + krino_sierra/Akri_LevelSet_Sctl.cpp + krino_sierra/Akri_LevelSet_SierraParser.cpp + krino_sierra/Akri_Motion_SierraParser.cpp + krino_sierra/Akri_PerceptRefinementInterface.cpp + krino_sierra/Akri_Phase_SierraParser.cpp + krino_sierra/Akri_RegionInterface.cpp) +target_link_libraries(krino_sierra PUBLIC krino_lib) +find_package(sierra_common REQUIRED) +target_link_libraries(krino_sierra PUBLIC sierra_common::sierra) +target_link_libraries(krino_sierra PUBLIC sierra_common::sierra_util_sctl) +target_link_libraries(krino_sierra PUBLIC sierra_common::sierra_util_user_input_function_parser) +target_link_libraries(krino_sierra PUBLIC sierra_common::sierraparser) +target_include_directories(krino_sierra PUBLIC + $ + $ + $ + $) +target_sources(krino_sierra PUBLIC + FILE_SET krino_sierra_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_sierra PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_sierra PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_sierra PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_sierra PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_sierra + EXPORT krinoTargets + FILE_SET krino_sierra_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(krino_parser) +target_sources(krino_parser PRIVATE krino/parser/Akri_CDFEM_Options_Parser.cpp + krino/parser/Akri_IC_Parser.cpp + krino/parser/Akri_LevelSet_Parser.cpp + krino/parser/Akri_MeshInput_Parser.cpp + krino/parser/Akri_Parser.cpp + krino/parser/Akri_Phase_Parser.cpp + krino/parser/Akri_Region_Parser.cpp + krino/parser/Akri_ResultsOutput_Parser.cpp + krino/parser/Akri_Simulation_Parser.cpp + krino/parser/Akri_Surface_Parser.cpp) +target_link_libraries(krino_parser PUBLIC krino_lib) +target_link_libraries(krino_parser PUBLIC krino_region) +find_package(yaml-cpp REQUIRED) +target_link_libraries(krino_parser PUBLIC yaml-cpp) +target_include_directories(krino_parser PUBLIC + $ + $ + $ + $) +target_sources(krino_parser PUBLIC + FILE_SET krino_parser_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/parser/Akri_CDFEM_Options_Parser.hpp + krino/parser/Akri_IC_Parser.hpp + krino/parser/Akri_LevelSet_Parser.hpp + krino/parser/Akri_MeshInput_Parser.hpp + krino/parser/Akri_Parser.hpp + krino/parser/Akri_Phase_Parser.hpp + krino/parser/Akri_Region_Parser.hpp + krino/parser/Akri_ResultsOutput_Parser.hpp + krino/parser/Akri_Simulation_Parser.hpp + krino/parser/Akri_Surface_Parser.hpp + krino/parser/Akri_YAML.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_parser PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_parser PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(krino_parser PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(krino_parser PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS krino_parser + EXPORT krinoTargets + FILE_SET krino_parser_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_library(mesh_adapt_lib) +target_sources(mesh_adapt_lib PRIVATE krino_mesh_adapt/mesh_adapt_lib/KrinoMeshAdapt.cpp + krino_mesh_adapt/mesh_adapt_lib/KrinoMeshAdaptParser.cpp) +target_link_libraries(mesh_adapt_lib PUBLIC krino_refinement) +find_package(stk REQUIRED) +target_link_libraries(mesh_adapt_lib PUBLIC stk::stk_io) +target_link_libraries(mesh_adapt_lib PUBLIC stk::stk_mesh_base) +target_link_libraries(mesh_adapt_lib PUBLIC stk::stk_tools_lib) +target_link_libraries(mesh_adapt_lib PUBLIC stk::stk_util_diag) +target_include_directories(mesh_adapt_lib PUBLIC + $ + $ + $ + $) +target_sources(mesh_adapt_lib PUBLIC + FILE_SET mesh_adapt_lib_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino_mesh_adapt/mesh_adapt_lib/KrinoMeshAdapt.hpp + krino_mesh_adapt/mesh_adapt_lib/KrinoMeshAdaptAlgorithmParameters.hpp + krino_mesh_adapt/mesh_adapt_lib/KrinoMeshAdaptInputData.hpp + krino_mesh_adapt/mesh_adapt_lib/KrinoMeshAdaptParser.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(mesh_adapt_lib PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(mesh_adapt_lib PUBLIC Build64) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + target_compile_options(mesh_adapt_lib PUBLIC $<$:-Wshadow -Winconsistent-missing-override>) +endif () +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(mesh_adapt_lib PUBLIC $<$:-Wshadow>) +endif () +install( + TARGETS mesh_adapt_lib + EXPORT krinoTargets + FILE_SET mesh_adapt_lib_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_executable(krino) +target_sources(krino PRIVATE krino/Apps_krino.cpp) +target_link_libraries(krino PUBLIC krino_parser) +target_link_libraries(krino PUBLIC krino_region) +find_package(stk REQUIRED) +target_link_libraries(krino PUBLIC stk::stk_util_registry) +target_include_directories(krino PUBLIC + $ + $) +target_sources(krino PUBLIC + FILE_SET krino_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino PUBLIC Build64) +endif () +install( + TARGETS krino + FILE_SET krino_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +if (${SIERRA_DEVELOPER_BUILD}) + add_executable(krino_unit) + target_sources(krino_unit PRIVATE krino/unit_tests/Akri_StkMeshBuilder.cpp + krino/unit_tests/Akri_UnitMathUtils.cpp + krino/unit_tests/Akri_UnitMeshUtils.cpp + krino/unit_tests/Akri_UnitTestUtils.cpp + krino/unit_tests/Akri_Unit_Analytic_CDMesh.cpp + krino/unit_tests/Akri_Unit_BoundingBoxDistance.cpp + krino/unit_tests/Akri_Unit_CDFEM_Parent_Edge.cpp + krino/unit_tests/Akri_Unit_CDMesh.cpp + krino/unit_tests/Akri_Unit_Constrained_Redistance.cpp + krino/unit_tests/Akri_Unit_ContourElement.cpp + krino/unit_tests/Akri_Unit_CurvatureLeastSquares.cpp + krino/unit_tests/Akri_Unit_DecomposeWithSensitivities.cpp + krino/unit_tests/Akri_Unit_Eikonal.cpp + krino/unit_tests/Akri_Unit_Element.cpp + krino/unit_tests/Akri_Unit_Element_Cutter.cpp + krino/unit_tests/Akri_Unit_Explicit_Hamilton_Jacobi.cpp + krino/unit_tests/Akri_Unit_FastMarching.cpp + krino/unit_tests/Akri_Unit_Geometry.cpp + krino/unit_tests/Akri_Unit_InterfaceGeometry.cpp + krino/unit_tests/Akri_Unit_LogRedirecter.cpp + krino/unit_tests/Akri_Unit_LowerEnvelope.cpp + krino/unit_tests/Akri_Unit_MeshHelpers.cpp + krino/unit_tests/Akri_Unit_MortonIndex.cpp + krino/unit_tests/Akri_Unit_OutputUtils.cpp + krino/unit_tests/Akri_Unit_ParallelErrorMessage.cpp + krino/unit_tests/Akri_Unit_Part_Decomposition_Fixture.cpp + krino/unit_tests/Akri_Unit_PatchInterpolator.cpp + krino/unit_tests/Akri_Unit_Phase_Support.cpp + krino/unit_tests/Akri_Unit_RebalanceUtils.cpp + krino/unit_tests/Akri_Unit_RefineInterval.cpp + krino/unit_tests/Akri_Unit_Refine_Beam.cpp + krino/unit_tests/Akri_Unit_Refine_CDMesh.cpp + krino/unit_tests/Akri_Unit_Refine_General.cpp + krino/unit_tests/Akri_Unit_Refine_Hex.cpp + krino/unit_tests/Akri_Unit_Refine_Quad.cpp + krino/unit_tests/Akri_Unit_Refine_Tet.cpp + krino/unit_tests/Akri_Unit_Refine_Tri.cpp + krino/unit_tests/Akri_Unit_SearchTree.cpp + krino/unit_tests/Akri_Unit_SemiLagrangian.cpp + krino/unit_tests/Akri_Unit_SideAttachedElements.cpp + krino/unit_tests/Akri_Unit_Single_Element_Fixtures.cpp + krino/unit_tests/Akri_Unit_Snap.cpp + krino/unit_tests/Akri_Unit_TriangleCalcs.cpp + krino/unit_tests/Akri_Unit_WindingNumber.cpp + krino/unit_tests/Akri_Unit_main.cpp) + target_link_libraries(krino_unit PUBLIC krino_math_utils) + target_link_libraries(krino_unit PUBLIC krino_rebalance_utils) + target_link_libraries(krino_unit PUBLIC krino_region) + find_package(stk REQUIRED) + target_link_libraries(krino_unit PUBLIC stk::stk_unit_test_utils) + target_include_directories(krino_unit PUBLIC + $ + $ + $ + $) + target_sources(krino_unit PUBLIC + FILE_SET krino_unit_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino/unit_tests/Akri_MeshSpecs.hpp + krino/unit_tests/Akri_StkMeshBuilder.hpp + krino/unit_tests/Akri_StkMeshFixture.hpp + krino/unit_tests/Akri_UnitMeshUtils.hpp + krino/unit_tests/Akri_UnitTestUtils.hpp + krino/unit_tests/Akri_Unit_BoundingBoxMesh.hpp + krino/unit_tests/Akri_Unit_DecompositionFixture.hpp + krino/unit_tests/Akri_Unit_InterfaceGeometry.hpp + krino/unit_tests/Akri_Unit_LogRedirecter.hpp + krino/unit_tests/Akri_Unit_MeshHelpers.hpp + krino/unit_tests/Akri_Unit_Part_Decomposition_Fixture.hpp + krino/unit_tests/Akri_Unit_RefinementFixture.hpp + krino/unit_tests/Akri_Unit_Single_Element_Fixtures.hpp + krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) + target_compile_definitions(krino_unit PUBLIC KRINO_BUILT_IN_SIERRA) + if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_unit PUBLIC Build64) + endif () + install( + TARGETS krino_unit + FILE_SET krino_unit_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino + ) +endif() +add_executable(delete_small_elements) +target_sources(delete_small_elements PRIVATE delete_small_elements/Akri_DeleteSmallElementsMain.cpp) +target_link_libraries(delete_small_elements PUBLIC krino_lib) +find_package(stk REQUIRED) +target_link_libraries(delete_small_elements PUBLIC stk::stk_util_command_line) +target_include_directories(delete_small_elements PUBLIC + $ + $) +target_sources(delete_small_elements PUBLIC + FILE_SET delete_small_elements_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(delete_small_elements PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(delete_small_elements PUBLIC Build64) +endif () +install( + TARGETS delete_small_elements + FILE_SET delete_small_elements_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) +add_executable(krino_mesh_adapt) +target_sources(krino_mesh_adapt PRIVATE krino_mesh_adapt/KrinoMeshAdaptMain.cpp) +find_package(MPI REQUIRED COMPONENTS C Fortran) +target_link_libraries(krino_mesh_adapt PUBLIC MPI::MPI_C) +target_link_libraries(krino_mesh_adapt PUBLIC MPI::MPI_Fortran) +target_link_libraries(krino_mesh_adapt PUBLIC mesh_adapt_lib) +find_package(stk REQUIRED) +target_link_libraries(krino_mesh_adapt PUBLIC stk::stk_util_command_line) +target_include_directories(krino_mesh_adapt PUBLIC + $ + $) +target_sources(krino_mesh_adapt PUBLIC + FILE_SET krino_mesh_adapt_headers + TYPE HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES krino_sierra/Akri_CDFEM_Options_SierraParser.hpp + krino_sierra/Akri_Events.hpp + krino_sierra/Akri_IC_SierraParser.hpp + krino_sierra/Akri_LevelSet_Sctl.hpp + krino_sierra/Akri_LevelSet_SierraParser.hpp + krino_sierra/Akri_Motion_SierraParser.hpp + krino_sierra/Akri_PerceptRefinementInterface.hpp + krino_sierra/Akri_Phase_SierraParser.hpp + krino_sierra/Akri_RegionInterface.hpp) +target_compile_definitions(krino_mesh_adapt PUBLIC KRINO_BUILT_IN_SIERRA) +if (${CMAKE_SIZEOF_VOID_P} STREQUAL "8") + target_compile_definitions(krino_mesh_adapt PUBLIC Build64) +endif () +install( + TARGETS krino_mesh_adapt + FILE_SET krino_mesh_adapt_headers + DESTINATION include/krino + INCLUDES DESTINATION include/krino +) + +install( + EXPORT krinoTargets + NAMESPACE krino:: + DESTINATION share/cmake/krino) + +install( + FILES cmake/krinoConfig.cmake + DESTINATION share/cmake/krino + ) + +install( + FILES ${SIERRA_SOURCE_DIR}/modules/createParserTarget.cmake + DESTINATION share/cmake/krino + ) \ No newline at end of file diff --git a/packages/krino/cmake/krinoConfig.cmake b/packages/krino/cmake/krinoConfig.cmake new file mode 100644 index 000000000000..96d330b60f11 --- /dev/null +++ b/packages/krino/cmake/krinoConfig.cmake @@ -0,0 +1,13 @@ +include(CMakeFindDependencyMacro) +find_dependency(Intrepid2 REQUIRED) +find_dependency(MPI REQUIRED) +find_dependency(SEACAS REQUIRED) +find_dependency(sierra_common REQUIRED) +find_dependency(stk REQUIRED) +find_dependency(yaml-cpp REQUIRED) + +include(${CMAKE_CURRENT_LIST_DIR}/createParserTarget.cmake) +create_parser_target(TARGET krino_commands_xmldb SOURCES krino_commands.xmldb) + +include("${CMAKE_CURRENT_LIST_DIR}/krinoTargets.cmake") + diff --git a/packages/krino/cmake_install_test/load_gcc_modules b/packages/krino/cmake_install_test/load_gcc_modules index 656de831f022..9f6db3893441 100755 --- a/packages/krino/cmake_install_test/load_gcc_modules +++ b/packages/krino/cmake_install_test/load_gcc_modules @@ -1,30 +1,12 @@ #!/bin/bash -source /projects/sems/modulefiles/utils/sems-modules-init.sh - -module load sems-cmake/3.24.3 -module load sems-gcc/8.3.0 -module load sems-metis/5.1.0 - -export BLAS_LIBRARIES=${BLAS_LIBRARIES:-/usr/lib64/libblas.so} -export LAPACK_LIBRARIES=${LAPACK_LIBRARIES:-/usr/lib64/liblapack.so} - -module load sems-openmpi/1.10.7 -module load sems-hdf5/1.10.7 -module load sems-netcdf-c/4.7.3 -module load sems-parmetis/4.0.3 -module load sems-parallel-netcdf/1.12.1 - - - -### Corresponding CDE v3 modules -- as of 2/6/23 these gave link errors -#module load cde/v3/cmake/3.23.1 -#module load cde/v3/gcc/10.3.0 -#module load cde/v3/openmpi/4.1.2-gcc-10.3.0 -#module load cde/v3/netlib-lapack/3.10.1-gcc-10.3.0 -#module load cde/v3/hdf5/1.10.6-gcc-10.3.0-openmpi-4.1.2 -#module load cde/v3/netcdf-c/4.8.1-gcc-10.3.0-openmpi-4.1.2 -#module load cde/v3/parallel-netcdf/1.12.2-gcc-10.3.0-openmpi-4.1.2 -#module load cde/v3/metis/5.1.0-gcc-10.3.0 -#module load cde/v3/parmetis/4.0.3-gcc-10.3.0-openmpi-4.1.2 - +module load aue/cmake/3.27.7 +module load aue/gcc/10.3.0 +module load aue/metis/5.1.0-gcc-10.3.0 +module load aue/netlib-lapack/3.11.0-gcc-10.3.0 + +module load aue/openmpi/4.1.6-gcc-10.3.0 +module load aue/hdf5/1.14.2-gcc-10.3.0-openmpi-4.1.6 +module load aue/netcdf-c/4.9.2-gcc-10.3.0-openmpi-4.1.6 +module load aue/parmetis/4.0.3-gcc-10.3.0-openmpi-4.1.6 +module load aue/parallel-netcdf/1.12.3-gcc-10.3.0-openmpi-4.1.6 diff --git a/packages/krino/cmake_install_test/run_cmake_krino b/packages/krino/cmake_install_test/run_cmake_krino index c92850c8d4db..e0bc4d53d2c7 100755 --- a/packages/krino/cmake_install_test/run_cmake_krino +++ b/packages/krino/cmake_install_test/run_cmake_krino @@ -45,5 +45,6 @@ cmake \ -Dyamlcpp_LIBRARY_DIRS=${yaml_install_dir}/lib \ -DTPL_ENABLE_Netcdf:BOOL=ON \ -DTPL_ENABLE_HDF5:BOOL=ON \ +-DTPL_HDF5_LIBRARIES="-lhdf5" \ ${trilinos_src_dir}/ diff --git a/packages/krino/delete_small_elements/Akri_DeleteSmallElementsMain.cpp b/packages/krino/delete_small_elements/Akri_DeleteSmallElementsMain.cpp index 1b07ed4ee21e..34aec55776d8 100644 --- a/packages/krino/delete_small_elements/Akri_DeleteSmallElementsMain.cpp +++ b/packages/krino/delete_small_elements/Akri_DeleteSmallElementsMain.cpp @@ -108,7 +108,6 @@ static bool delete_small_elements(const DeleteSmallElementsInputData& inputData, { std::shared_ptr bulk = stk::mesh::MeshBuilder(comm).create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::io::fill_mesh_with_auto_decomp(inputData.meshIn, *bulk); diff --git a/packages/krino/krino/geometry/Akri_WindingNumber.cpp b/packages/krino/krino/geometry/Akri_WindingNumber.cpp new file mode 100644 index 000000000000..fd6bd3d14c6d --- /dev/null +++ b/packages/krino/krino/geometry/Akri_WindingNumber.cpp @@ -0,0 +1,225 @@ +#include +#include +#include +#include + +namespace krino { + +double compute_facet_winding_number(const stk::math::Vector3d & facetX0, const stk::math::Vector3d & facetX1, const stk::math::Vector3d & facetX2, const stk::math::Vector3d & queryLoc) +{ + const stk::math::Vector3d x0 = facetX0 - queryLoc; + const stk::math::Vector3d x1 = facetX1 - queryLoc; + const stk::math::Vector3d x2 = facetX2 - queryLoc; + const double len0 = x0.length(); + const double len1 = x1.length(); + const double len2 = x2.length(); + const double num = Dot(Cross(x0,x1), x2); + const double den = len0*len1*len2 + Dot(x0,x1)*len2 + Dot(x0,x2)*len1 + Dot(x1,x2)*len0; + return std::atan2(num, den) / (2.*M_PI); +} + +void accumulate_cluster_approximation(const stk::math::Vector3d & facetX0, + const stk::math::Vector3d & facetX1, + const stk::math::Vector3d & facetX2, + const stk::math::Vector3d & clusterPt, + FacetClusterApproximation & approx) +{ + const stk::math::Vector3d facetAreaNormal = 0.5*Cross(facetX1-facetX0,facetX2-facetX0); + + for (int i=0; i<3; ++i) + approx.areaN[i] += facetAreaNormal[i]; + + const stk::math::Vector3d centroidContrib = ((1./3.) * (facetX0+facetX1+facetX2) - clusterPt); + for (unsigned i=0; i<3; ++i) + for (unsigned j=0; j<3; ++j) + approx.areaNiXj[3*i+j] += facetAreaNormal[i]*centroidContrib[j]; + + const stk::math::Vector3d edgeDiff0 = 0.5*(facetX0+facetX1) - clusterPt; + const stk::math::Vector3d edgeDiff1 = 0.5*(facetX1+facetX2) - clusterPt; + const stk::math::Vector3d edgeDiff2 = 0.5*(facetX2+facetX0) - clusterPt; + + // Note that this only fills in matrix for j>=i to take advantage of symmetry + unsigned index = 0; + for (unsigned i=0; i<3; ++i) + for (unsigned j=i; j<3; ++j) + for (unsigned k=0; k<3; ++k) + approx.areaDijNk[index++] += (1./3.) * (edgeDiff0[i]*edgeDiff0[j] + edgeDiff1[i]*edgeDiff1[j] + edgeDiff2[i]*edgeDiff2[j]) * facetAreaNormal[k]; +} + +double compute_approximate_winding_number(const FacetClusterApproximation & cluster, const stk::math::Vector3d & queryPt) +{ + stk::math::Vector3d dx = cluster.center - queryPt; + const double len = dx.unitize(); + + static constexpr double inv4Pi = 0.25/M_PI; + const double invLen = 1./len; + const double invLen2 = invLen*invLen; + const double mult = inv4Pi * invLen2; + double windingNumber = 0.; + + unsigned index = 0; + for (unsigned i=0; i<3; ++i) + { + windingNumber += mult * (cluster.areaN[i]*dx[i] + invLen * cluster.areaNiXj[3*i+i]); + for (unsigned j=0; j<3; ++j) + { + windingNumber -= 3. * mult * invLen* (cluster.areaNiXj[3*i+j]*dx[i]*dx[j]); + if (j>=i) + { + const double deltaij = (i==j) ? 1. : 0.; + const double symm = (i==j) ? 1.0 : 2.0; // use i-j symmetry + for (unsigned k=0; k<3; ++k) + { + const double deltajk = (j==k) ? 1. : 0.; + const double deltaik = (i==k) ? 1. : 0.; + double coeff = 7.5 * dx[i]*dx[j]*dx[k] - 1.5*(dx[i]*deltajk + dx[j]*deltaik + dx[k]*deltaij); + windingNumber += mult * symm * invLen2 * coeff * cluster.areaDijNk[index++]; + } + } + } + } + + return windingNumber; +} + +double compute_approximate_winding_number(const ClusterApproximation & approx, const stk::math::Vector3d & queryPt) +{ + stk::math::Vector3d dx = approx.center - queryPt; + const double len = dx.unitize(); + + static constexpr double inv4Pi = 0.25/M_PI; + const double invLen = 1./len; + const double invLen2 = invLen*invLen; + const double mult = inv4Pi * invLen2; + + double approxWindingNumber0 = mult*Dot(dx, approx.areaN); + + const double mult1 = mult*invLen; + double approxWindingNumber1 = 0.; + for (unsigned i=0; i<3; ++i) + { + approxWindingNumber1 += mult1 * approx.areaNiXj[3*i+i]; + for (unsigned j=0; j<3; ++j) + approxWindingNumber1 -= 3. * mult1 * approx.areaNiXj[3*i+j] * dx[i]*dx[j]; + } + + const stk::math::Vector3d dx2(dx[0]*dx[0], dx[1]*dx[1], dx[2]*dx[2]); + const stk::math::Vector3d dx3(dx[0]*dx2[0], dx[1]*dx2[1], dx[2]*dx2[2]); + const stk::math::Vector3d deltaFnCoeff( + 3.*approx.areaNDdiag[0] + approx.area2NyDyx_NxDyy + approx.area2NzDzx_NxDzz, + 3.*approx.areaNDdiag[1] + approx.area2NzDzy_NyDzz + approx.area2NxDxy_NyDxx, + 3.*approx.areaNDdiag[2] + approx.area2NxDxz_NzDxx + approx.area2NyDyz_NzDyy + ); + const stk::math::Vector3d tmp( + dx[1]*approx.area2NxDxy_NyDxx + dx[2]*approx.area2NxDxz_NzDxx, + dx[2]*approx.area2NyDyz_NzDyy + dx[0]*approx.area2NyDyx_NxDyy, + dx[0]*approx.area2NzDzx_NxDzz + dx[1]*approx.area2NzDzy_NyDzz + ); + const double approxWindingNumber2 = mult*invLen2* + (7.5*(Dot(dx3, approx.areaNDdiag) + dx[0]*dx[1]*dx[2]*approx.areaNDperm + Dot(dx2, tmp)) - 1.5*Dot(dx, deltaFnCoeff)); + + const double approxWindingNumber = approxWindingNumber0 + approxWindingNumber1 + approxWindingNumber2; + + return approxWindingNumber; +} + +std::array compute_facet3d_edge_contrib(const std::array & facetCoords, const stk::math::Vector3d & centroid) +{ + std::array edgeContrib = {0,0,0,0,0,0}; + const stk::math::Vector3d edgeDiff0 = 0.5*(facetCoords[0]+facetCoords[1]) - centroid; + const stk::math::Vector3d edgeDiff1 = 0.5*(facetCoords[1]+facetCoords[2]) - centroid; + const stk::math::Vector3d edgeDiff2 = 0.5*(facetCoords[2]+facetCoords[0]) - centroid; + + // Note that this only fills in matrix for j>=i to take advantage of symmetry + unsigned index = 0; + for (unsigned i=0; i<3; ++i) + for (unsigned j=i; j<3; ++j) + edgeContrib[index++] = (1./3.)*(edgeDiff0[i]*edgeDiff0[j] + edgeDiff1[i]*edgeDiff1[j] + edgeDiff2[i]*edgeDiff2[j]); + return edgeContrib; +} + +void compute_facet_approximation(const std::array & facetCoords, ClusterApproximation & approx) +{ + approx.center = (1./3.) * (facetCoords[0]+facetCoords[1]+facetCoords[2]); + approx.areaN = 0.5*Cross(facetCoords[1]-facetCoords[0],facetCoords[2]-facetCoords[0]); + + std::fill(approx.areaNiXj.begin(), approx.areaNiXj.end(), 0.); + + const stk::math::Vector3d & n = approx.areaN; + std::array edgeContrib = compute_facet3d_edge_contrib(facetCoords, approx.center); + approx.areaNDdiag = stk::math::Vector3d(n[0]*edgeContrib[0], n[1]*edgeContrib[3], n[2]*edgeContrib[5]); + approx.areaNDperm = 2*(n[0]*edgeContrib[4] + n[1]*edgeContrib[2] + n[2]*edgeContrib[1]); + approx.area2NxDxy_NyDxx = 2*n[0]*edgeContrib[1] + n[1]*edgeContrib[0]; + approx.area2NxDxz_NzDxx = 2*n[0]*edgeContrib[2] + n[2]*edgeContrib[0]; + approx.area2NyDyz_NzDyy = 2*n[1]*edgeContrib[4] + n[2]*edgeContrib[3]; + approx.area2NyDyx_NxDyy = 2*n[1]*edgeContrib[1] + n[0]*edgeContrib[3]; + approx.area2NzDzx_NxDzz = 2*n[2]*edgeContrib[2] + n[0]*edgeContrib[5]; + approx.area2NzDzy_NyDzz = 2*n[2]*edgeContrib[4] + n[1]*edgeContrib[5]; +} + +void accumulate_cluster_approximation(const ClusterApproximation & childApprox, ClusterApproximation & approx) +{ + approx.areaN += childApprox.areaN; + + const stk::math::Vector3d dx = childApprox.center - approx.center; + + const stk::math::Vector3d & n = childApprox.areaN; + + for (unsigned i=0; i<3; ++i) + for (unsigned j=0; j<3; ++j) + approx.areaNiXj[3*i+j] += childApprox.areaNiXj[3*i+j] + n[i]*dx[j]; + + const double childNxXx = childApprox.areaNiXj[3*0+0]; + const double childNxXy = childApprox.areaNiXj[3*0+1]; + const double childNxXz = childApprox.areaNiXj[3*0+2]; + const double childNyXx = childApprox.areaNiXj[3*1+0]; + const double childNyXy = childApprox.areaNiXj[3*1+1]; + const double childNyXz = childApprox.areaNiXj[3*1+2]; + const double childNzXx = childApprox.areaNiXj[3*2+0]; + const double childNzXy = childApprox.areaNiXj[3*2+1]; + const double childNzXz = childApprox.areaNiXj[3*2+2]; + + approx.areaNDdiag += childApprox.areaNDdiag + + stk::math::Vector3d(2*dx[0]*childNxXx, 2*dx[1]*childNyXy, 2*dx[2]*childNzXz) + + stk::math::Vector3d(dx[0]*dx[0]*n[0], dx[1]*dx[1]*n[1], dx[2]*dx[2]*n[2]); + approx.areaNDperm += childApprox.areaNDperm + + (dx[0]*(childNyXz+childNzXy + n[1]*dx[2]+n[2]*dx[1]) + + dx[1]*(childNxXz+childNzXx + n[0]*dx[2]+n[2]*dx[0]) + + dx[2]*(childNxXy+childNyXx + n[0]*dx[1]+n[1]*dx[0])); + approx.area2NxDxy_NyDxx += childApprox.area2NxDxy_NyDxx + 2*(dx[1]*childNxXx + dx[0]*childNxXy + n[0]*dx[0]*dx[1]) + 2*childNyXx*dx[0] + n[1]*dx[0]*dx[0]; + approx.area2NxDxz_NzDxx += childApprox.area2NxDxz_NzDxx + 2*(dx[2]*childNxXx + dx[0]*childNxXz + n[0]*dx[0]*dx[2]) + 2*childNzXx*dx[0] + n[2]*dx[0]*dx[0]; + approx.area2NyDyz_NzDyy += childApprox.area2NyDyz_NzDyy + 2*(dx[2]*childNyXy + dx[1]*childNyXz + n[1]*dx[1]*dx[2]) + 2*childNzXy*dx[1] + n[2]*dx[1]*dx[1]; + approx.area2NyDyx_NxDyy += childApprox.area2NyDyx_NxDyy + 2*(dx[0]*childNyXy + dx[1]*childNyXx + n[1]*dx[1]*dx[0]) + 2*childNxXy*dx[1] + n[0]*dx[1]*dx[1]; + approx.area2NzDzx_NxDzz += childApprox.area2NzDzx_NxDzz + 2*(dx[0]*childNzXz + dx[2]*childNzXx + n[2]*dx[2]*dx[0]) + 2*childNxXz*dx[2] + n[0]*dx[2]*dx[2]; + approx.area2NzDzy_NyDzz += childApprox.area2NzDzy_NyDzz + 2*(dx[1]*childNzXz + dx[2]*childNzXy + n[2]*dx[2]*dx[1]) + 2*childNyXz*dx[2] + n[1]*dx[2]*dx[2]; +} + +void compute_cluster_approximation(const std::vector> & clusterFacets, const stk::math::Vector3d & clusterPt, ClusterApproximation & approx) +{ + approx.clear(); + approx.center = clusterPt; + for (const auto & facetCoords : clusterFacets) + { + ClusterApproximation facetApprox; + compute_facet_approximation(facetCoords, facetApprox); + accumulate_cluster_approximation(facetApprox, approx); + } +} + +void compute_cluster_approximation(const std::vector> & clusterFacets, const stk::math::Vector3d & clusterPt, FacetClusterApproximation & cluster) +{ + cluster.clear(); + cluster.center = clusterPt; + for (const auto & facetCoords : clusterFacets) + accumulate_cluster_approximation(facetCoords[0], facetCoords[1], facetCoords[2], cluster.center, cluster); +} + +double compute_faceted_surface_winding_number(const std::vector> & surfFacets, const stk::math::Vector3d & queryLoc) +{ + double windingNumber = 0; + for (const auto & facetCoords : surfFacets) + windingNumber += compute_facet_winding_number(facetCoords[0], facetCoords[1], facetCoords[2], queryLoc); + return windingNumber; +} + +} diff --git a/packages/krino/krino/geometry/Akri_WindingNumber.hpp b/packages/krino/krino/geometry/Akri_WindingNumber.hpp new file mode 100644 index 000000000000..ff332d31e9d0 --- /dev/null +++ b/packages/krino/krino/geometry/Akri_WindingNumber.hpp @@ -0,0 +1,70 @@ +#ifndef KRINO_KRINO_GEOMETRY_AKRI_WINDINGNUMBER_HPP_ +#define KRINO_KRINO_GEOMETRY_AKRI_WINDINGNUMBER_HPP_ + +#include +#include + +namespace krino { + +struct FacetClusterApproximation { + void clear() + { + center = stk::math::Vector3d::ZERO; + std::fill(areaN.begin(), areaN.end(), 0.); + std::fill(areaNiXj.begin(), areaNiXj.end(), 0.); + std::fill(areaDijNk.begin(), areaDijNk.end(), 0.); + } + + stk::math::Vector3d center; + std::array areaN; + std::array areaNiXj; + std::array areaDijNk; +}; + +struct ClusterApproximation +{ + void clear() + { + center = stk::math::Vector3d::ZERO; + areaN = stk::math::Vector3d::ZERO; + std::fill(areaNiXj.begin(), areaNiXj.end(), 0.); + + areaNDdiag = stk::math::Vector3d::ZERO; + areaNDperm = 0.; + area2NxDxy_NyDxx = 0.; + area2NxDxz_NzDxx = 0.; + area2NyDyz_NzDyy = 0.; + area2NyDyx_NxDyy = 0.; + area2NzDzx_NxDzz = 0.; + area2NzDzy_NyDzz = 0.; + } + + stk::math::Vector3d center; + stk::math::Vector3d areaN; + + // data for 1st order term and correction of 2nd order term + std::array areaNiXj; + + // data for 2nd order term + stk::math::Vector3d areaNDdiag; // NxDxx, NyDyy, NzDzz + double areaNDperm; // (NxDyz+NxDzy+NyDzx+NyDxz+NzDxy+NzDyx) = 2*(NxDyz+NyDzx+NzDxy) + double area2NxDxy_NyDxx; // NxDxy+NxDyx+NyDxx = 2NxDxy+NyDxx + double area2NxDxz_NzDxx; // NxDxz+NxDzx+NzDxx = 2NxDxz+NzDxx + double area2NyDyz_NzDyy; // NyDyz+NyDzy+NzDyy = 2NyDyz+NzDyy + double area2NyDyx_NxDyy; // NyDyx+NyDxy+NxDyy = 2NyDyx+NxDyy + double area2NzDzx_NxDzz; // NzDzx+NzDxz+NxDzz = 2NzDzx+NxDzz + double area2NzDzy_NyDzz; // NzDzy+NzDyz+NyDzz = 2NzDzy+NyDzz +}; + +double compute_facet_winding_number(const stk::math::Vector3d & x0, const stk::math::Vector3d & x1, const stk::math::Vector3d & x2, const stk::math::Vector3d & queryLoc); +double compute_faceted_surface_winding_number(const std::vector> & surfFacets, const stk::math::Vector3d & queryLoc); + +void compute_cluster_approximation(const std::vector> & clusterFacets, const stk::math::Vector3d & clusterPt, FacetClusterApproximation & cluster); +double compute_approximate_winding_number(const FacetClusterApproximation & cluster, const stk::math::Vector3d & queryPt); + +void compute_cluster_approximation(const std::vector> & clusterFacets, const stk::math::Vector3d & clusterPt, ClusterApproximation & approx); +double compute_approximate_winding_number(const ClusterApproximation & cluster, const stk::math::Vector3d & queryPt); + +} + +#endif /* KRINO_KRINO_GEOMETRY_AKRI_WINDINGNUMBER_HPP_ */ diff --git a/packages/krino/krino/krino_lib/Akri_AdaptiveElementContour.cpp b/packages/krino/krino/krino_lib/Akri_AdaptiveElementContour.cpp new file mode 100644 index 000000000000..f748ee46a0c9 --- /dev/null +++ b/packages/krino/krino/krino_lib/Akri_AdaptiveElementContour.cpp @@ -0,0 +1,430 @@ +/* + * Akri_AdaptiveElementContour.cpp + * + * Created on: Mar 20, 2024 + * Author: drnoble + */ + +#include +#include + +#include +#include +#include +#include +#include + +namespace krino { + +constexpr double snapTol = 1.e-6; +constexpr double nonlinearTol = 1.e-2; + +template +bool have_possibly_cut_edge(const std::array & coords, + const std::array & dist) +{ + for (size_t n1=0; n1 +bool have_possibly_cut_edge(const std::array & coords, + const std::array & dist) +{ + return have_possibly_cut_edge(coords, dist); +} + +double clip_midedge_distance(const double d0, const double d1, const double d2) +{ + const double d25 = 0.75*d0+0.25*d1; + const double d75 = 0.25*d0+0.75*d1; + if (d0 < d1) + return std::max(std::min(d2, d75), d25); + return std::max(std::min(d2, d25), d75); +} + +std::array apply_tri_snapping_and_clipping(const std::array & unfilteredTri6Dist, const double snapDistTol) +{ + std::array tri6Dist; + for (int n=0; n<3; ++n) + tri6Dist[n] = (std::abs(unfilteredTri6Dist[n]) < snapDistTol) ? 0.0 : unfilteredTri6Dist[n]; + tri6Dist[3] = clip_midedge_distance(tri6Dist[0], tri6Dist[1], unfilteredTri6Dist[3]); + tri6Dist[4] = clip_midedge_distance(tri6Dist[1], tri6Dist[2], unfilteredTri6Dist[4]); + tri6Dist[5] = clip_midedge_distance(tri6Dist[2], tri6Dist[0], unfilteredTri6Dist[5]); + return tri6Dist; +} + +std::array interpolate_subtri(const std::array & tri6Dist) +{ + std::array subTriDist = + {{ + 0.375*tri6Dist[0] + 0.75*tri6Dist[3] - 0.125*tri6Dist[1], + 0.375*tri6Dist[1] + 0.75*tri6Dist[3] - 0.125*tri6Dist[0], + 0.375*tri6Dist[1] + 0.75*tri6Dist[4] - 0.125*tri6Dist[2], + 0.375*tri6Dist[2] + 0.75*tri6Dist[4] - 0.125*tri6Dist[1], + 0.375*tri6Dist[2] + 0.75*tri6Dist[5] - 0.125*tri6Dist[0], + 0.375*tri6Dist[0] + 0.75*tri6Dist[5] - 0.125*tri6Dist[2], + 0.5*tri6Dist[4] + 0.5*tri6Dist[5] - 0.125*tri6Dist[0] + 0.25*tri6Dist[3] - 0.125*tri6Dist[1], + 0.5*tri6Dist[3] + 0.5*tri6Dist[5] - 0.125*tri6Dist[1] + 0.25*tri6Dist[4] - 0.125*tri6Dist[2], + 0.5*tri6Dist[3] + 0.5*tri6Dist[4] - 0.125*tri6Dist[0] + 0.25*tri6Dist[5] - 0.125*tri6Dist[2] + }}; + return subTriDist; +} + +template +void snap_distance(std::array & dist, const double snapDistTol) +{ + for (int n=0; n<9; ++n) + dist[n] = (std::abs(dist[n]) < snapDistTol) ? 0.0 : dist[n]; +} + +template +std::array get_snapped_distance(const std::array & unsnappedDist, const double snapDistTol) +{ + std::array dist; + for (size_t n=0; n interpolate_and_snap_subtri(const std::array & tri6Dist, const double snapDistTol) +{ + std::array subTriDist = interpolate_subtri(tri6Dist); + for (int n=0; n<9; ++n) + subTriDist[n] = (std::abs(subTriDist[n]) < snapDistTol) ? 0.0 : subTriDist[n]; + return subTriDist; +} + +int compute_node_caseId(const double dist) +{ + return (dist == 0) ? 1 : ((dist < 0) ? 0 : 2); +} + +template +stk::math::Vector3d compute_quadratic_edge_crossing(const std::array & coords, + const std::array & distance, + const unsigned i0, const unsigned i1, const unsigned i2) +{ + const double loc = find_quadratic_crossing(distance[i0], distance[i1], distance[i2]); + return (1.-loc) * coords[i0] + loc * coords[i1]; +} + +void append_facets_for_converged_tri(const std::array & coords, + const std::array & tri6Dist, + const double lengthScale, + FacetedSurfaceBase & facets) +{ + const int caseId = compute_node_caseId(tri6Dist[0]) + 3*compute_node_caseId(tri6Dist[1]) + 9*compute_node_caseId(tri6Dist[2]); + + if (caseId == 0 || // ls[0]<0 && ls[1]<0 && ls[2]<0 + caseId == 26) // ls[0]>0 && ls[1]>0 && ls[2]>0 + return; + + static const unsigned case_permutations[] = + { 0, 0, 0, 2, 0, 0, 2, 1, 1, 1, // 0-9 + 1, 2, 2, 0, 2, 2, 1, 1, 1, 1, // 10-19 + 2, 0, 0, 2, 0, 0, 0 }; // 20-26 + static const unsigned permute_case_ids[] = + { 0, 1, 2, 1, 4, 5, 2,21,24, 1, // 0-9 + 4,21, 4,13,22, 5,22,25, 2, 5, // 10-19 + 24,21,22,25,24,25,26 }; // 20-26 + + stk::topology topo = stk::topology::TRIANGLE_6_2D; + std::vector permute(6); + topo.permutation_node_ordinals(case_permutations[caseId], permute.begin()); + + const int permutedCaseId = permute_case_ids[caseId]; + + const unsigned i0 = permute[0]; + const unsigned i1 = permute[1]; + const unsigned i2 = permute[2]; + const unsigned i3 = permute[3]; + const unsigned i5 = permute[5]; + + switch (permutedCaseId) + { + case 1: // ls[0]=0 && ls[1]<0 && ls[2]<0 + case 22: // ls[0]=0 && ls[1]=0 && ls[2]>0 + case 25: // ls[0]=0 && ls[1]>0 && ls[2]>0 + // empty + break; + + case 2: // ls[0]>0 && ls[1]<0 && ls[2]<0 + case 24: // ls[0]<0 && ls[1]>0 && ls[2]>0 + { + const stk::math::Vector3d x3 = compute_quadratic_edge_crossing(coords, tri6Dist, i0, i1, i3); + const stk::math::Vector3d x5 = compute_quadratic_edge_crossing(coords, tri6Dist, i2, i0, i5); + if (permutedCaseId == 2) + facets.emplace_back_2d(x5, x3); + else + facets.emplace_back_2d(x3, x5); + } + break; + + case 4: // ls[0]=0 && ls[1]=0 && ls[2]<0 + { + facets.emplace_back_2d(coords[i0], coords[i1]); + } + break; + + case 5: // ls[0]>0 && ls[1]=0 && ls[2]<0 + case 21: // ls[0]<0 && ls[1]=0 && ls[2]>0 + { + const stk::math::Vector3d x5 = compute_quadratic_edge_crossing(coords, tri6Dist, i2, i0, i5); + if (permutedCaseId == 5) + facets.emplace_back_2d(x5, coords[i1]); + else + facets.emplace_back_2d(coords[i1], x5); + } + break; + + default: ThrowRuntimeError("Subelement decomposition error. caseId,permutedCaseId=" << caseId << "," << permutedCaseId); + } +} + +std::array get_tri6_coords_on_tri3(const std::array & tri3Coords) +{ + std::array tri6Coords = + {{ + tri3Coords[0], tri3Coords[1], tri3Coords[2], + 0.5*(tri3Coords[0]+tri3Coords[1]), + 0.5*(tri3Coords[1]+tri3Coords[2]), + 0.5*(tri3Coords[2]+tri3Coords[0]) + }}; + return tri6Coords; +} + +std::array get_tri6_distance_on_tri3(const std::array & tri6Coords, + const std::array & tri3Dist, + const std::function & distance_at_point) +{ + std::array tri6Dist = + {{ + tri3Dist[0], tri3Dist[1], tri3Dist[2], + distance_at_point(tri6Coords[3]), + distance_at_point(tri6Coords[4]), + distance_at_point(tri6Coords[5]), + }}; + return tri6Dist; +} + +template +std::array subarray(const std::array & a, const std::array & indices) +{ + std::array subarray; + for (size_t n=0; n & tri6Coords, + const std::array & tri6Dist, + const std::function & distance_at_point, + const double lengthScale, + FacetedSurfaceBase & facets, + const int currentDepth, + const int minDepth, + const int maxDepth, + const std::array & subElemNodeIndices) +{ + adaptively_append_facets_for_tri_using_semilagrangian_distance(subarray(tri6Coords, subElemNodeIndices), + subarray(tri6Dist, subElemNodeIndices), + distance_at_point, + lengthScale, + facets, + currentDepth+1, + minDepth, + maxDepth); +} + +void append_facets_for_subtri_of_converged_tri(const std::array & tri6Coords, + const std::array & tri6Dist, + const std::array & subTriDist, + const double lengthScale, + FacetedSurfaceBase & facets, + const std::array & subElemVertexIndices, + const std::array & subElemMidsideIndices) +{ + const std::array subTri6Dist = {{ tri6Dist[subElemVertexIndices[0]], tri6Dist[subElemVertexIndices[1]], tri6Dist[subElemVertexIndices[2]], + subTriDist[subElemMidsideIndices[0]], subTriDist[subElemMidsideIndices[1]], subTriDist[subElemMidsideIndices[2]] }}; + append_facets_for_converged_tri(subarray(tri6Coords, subElemVertexIndices), subTri6Dist, lengthScale, facets); +} + +void append_facets_for_subtri_of_converged_tri(const std::array & tri6Coords, + const std::array & tri6Dist, + const std::array & subTriDist, + const double lengthScale, + const int currentDepth, + const int maxDepth, + const std::array & subElemVertexIndices, + const std::array & subElemMidsideIndices, + FacetedSurfaceBase & facets) +{ + const std::array subTri6Dist = {{ tri6Dist[subElemVertexIndices[0]], tri6Dist[subElemVertexIndices[1]], tri6Dist[subElemVertexIndices[2]], + subTriDist[subElemMidsideIndices[0]], subTriDist[subElemMidsideIndices[1]], subTriDist[subElemMidsideIndices[2]] }}; + append_facets_for_converged_tri(subarray(tri6Coords, subElemVertexIndices), subTri6Dist, lengthScale, facets); +} + +bool is_edge_converged(const double d0, const double d1, const double d2, const double nonlinearDistTol) +{ + return (std::abs(d2 - 0.5*(d0+d1)) < nonlinearDistTol); +} + +int determine_tri_edge_refinement_case_id(const std::array & tri6Dist, + const double lengthScale, + const int currentDepth, + const int minDepth, + const int maxDepth) +{ + if (currentDepth < minDepth) + return 7; + if (currentDepth == maxDepth) + return 0; + const double nonlinearDistTol = nonlinearTol*lengthScale; + int caseId = 0; + if (!is_edge_converged(tri6Dist[0], tri6Dist[1], tri6Dist[3], nonlinearDistTol)) caseId += 1; + if (!is_edge_converged(tri6Dist[1], tri6Dist[2], tri6Dist[4], nonlinearDistTol)) caseId += 2; + if (!is_edge_converged(tri6Dist[2], tri6Dist[0], tri6Dist[5], nonlinearDistTol)) caseId += 4; + return caseId; +} + +void adaptively_append_facets_for_subtri_using_interpolated_distance(const std::array & tri6Coords, + const std::array & tri6Dist, + const std::array & subTriDist, + const double lengthScale, + const int currentDepth, + const int interpDepth, + FacetedSurfaceBase & facets, + const std::array & subElemVertexIndices, + const std::array & subElemMidsideIndices) +{ + const std::array subTri6Dist = {{ tri6Dist[subElemVertexIndices[0]], tri6Dist[subElemVertexIndices[1]], tri6Dist[subElemVertexIndices[2]], + subTriDist[subElemMidsideIndices[0]], subTriDist[subElemMidsideIndices[1]], subTriDist[subElemMidsideIndices[2]] }}; + const std::array subTri6Coords = {{ tri6Coords[subElemVertexIndices[0]], tri6Coords[subElemVertexIndices[1]], tri6Coords[subElemVertexIndices[2]], + 0.5*(tri6Coords[subElemVertexIndices[0]] + tri6Coords[subElemVertexIndices[1]]), + 0.5*(tri6Coords[subElemVertexIndices[1]] + tri6Coords[subElemVertexIndices[2]]) , + 0.5*(tri6Coords[subElemVertexIndices[2]] + tri6Coords[subElemVertexIndices[0]]) }}; + + if (have_possibly_cut_edge<3>(subTri6Coords, subTri6Dist)) + adaptively_append_facets_for_tri_using_interpolated_distance(subTri6Coords, subTri6Dist, lengthScale, currentDepth+1, interpDepth, facets); +} + +void adaptively_append_facets_for_tri_using_interpolated_distance(const std::array & tri6Coords, + const std::array & tri6Dist, + const double lengthScale, + const int currentDepth, + const int interpDepth, + FacetedSurfaceBase & facets) +{ + std::array subTriDist = interpolate_subtri(tri6Dist); + if (currentDepth < interpDepth) + { + adaptively_append_facets_for_subtri_using_interpolated_distance(tri6Coords, tri6Dist, subTriDist, lengthScale, currentDepth, interpDepth, facets, {{0,3,5}}, {{0,7,5}}); + adaptively_append_facets_for_subtri_using_interpolated_distance(tri6Coords, tri6Dist, subTriDist, lengthScale, currentDepth, interpDepth, facets, {{1,4,3}}, {{2,8,1}}); + adaptively_append_facets_for_subtri_using_interpolated_distance(tri6Coords, tri6Dist, subTriDist, lengthScale, currentDepth, interpDepth, facets, {{2,5,4}}, {{4,6,3}}); + adaptively_append_facets_for_subtri_using_interpolated_distance(tri6Coords, tri6Dist, subTriDist, lengthScale, currentDepth, interpDepth, facets, {{3,4,5}}, {{8,6,7}}); + } + else + { + snap_distance(subTriDist, snapTol*lengthScale); + + append_facets_for_subtri_of_converged_tri(tri6Coords, tri6Dist, subTriDist, lengthScale, facets, {{0,3,5}}, {{0,7,5}}); + append_facets_for_subtri_of_converged_tri(tri6Coords, tri6Dist, subTriDist, lengthScale, facets, {{1,4,3}}, {{2,8,1}}); + append_facets_for_subtri_of_converged_tri(tri6Coords, tri6Dist, subTriDist, lengthScale, facets, {{2,5,4}}, {{4,6,3}}); + append_facets_for_subtri_of_converged_tri(tri6Coords, tri6Dist, subTriDist, lengthScale, facets, {{3,4,5}}, {{8,6,7}}); + } +} + +void adaptively_append_facets_for_tri_using_semilagrangian_distance(const std::array & coords, + const std::array & distance, + const std::function & distance_at_point, + const double lengthScale, + FacetedSurfaceBase & facets, + const int currentDepth, + const int minDepth, + const int maxDepth) +{ + if (!have_possibly_cut_edge(coords, distance)) + return; + const std::array tri6Coords = get_tri6_coords_on_tri3(coords); + const std::array tri6Dist = get_tri6_distance_on_tri3(tri6Coords, distance, distance_at_point); + + const int refinementCaseId = determine_tri_edge_refinement_case_id(tri6Dist, lengthScale, currentDepth, minDepth, maxDepth); + + switch (refinementCaseId) + { + case 0: + { + const std::array filteredTri6Dist = apply_tri_snapping_and_clipping(tri6Dist, snapTol*lengthScale); + adaptively_append_facets_for_tri_using_interpolated_distance(tri6Coords, filteredTri6Dist, lengthScale, currentDepth, currentDepth+1, facets); + } + break; + + case 1: + { + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{0,3,2}}); + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{1,2,3}}); + } + break; + + case 2: + { + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{0,1,4}}); + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{0,4,2}}); + } + break; + + case 3: + { + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{1,4,3}}); + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{0,3,2}}); + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{2,3,4}}); + } + break; + + case 4: + { + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{1,2,5}}); + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{1,5,0}}); + } + break; + + case 5: + { + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{0,3,5}}); + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{2,5,1}}); + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{1,5,3}}); + } + break; + + case 6: + { + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{2,5,4}}); + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{1,4,0}}); + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{0,4,5}}); + } + break; + + case 7: + { + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{0,3,5}}); + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{1,4,3}}); + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{2,5,4}}); + adaptively_append_facets_for_subtri_using_semilagrangian_distance(tri6Coords, tri6Dist, distance_at_point, lengthScale, facets, currentDepth, minDepth, maxDepth, {{3,4,5}}); + } + break; + + default: ThrowRuntimeError("Missing refinement case id =" << refinementCaseId); + } +} + +} + diff --git a/packages/krino/krino/krino_lib/Akri_AdaptiveElementContour.hpp b/packages/krino/krino/krino_lib/Akri_AdaptiveElementContour.hpp new file mode 100644 index 000000000000..8cec775add44 --- /dev/null +++ b/packages/krino/krino/krino_lib/Akri_AdaptiveElementContour.hpp @@ -0,0 +1,31 @@ +#ifndef KRINO_KRINO_KRINO_LIB_AKRI_ADAPTIVEELEMENTCONTOUR_HPP_ +#define KRINO_KRINO_KRINO_LIB_AKRI_ADAPTIVEELEMENTCONTOUR_HPP_ + +#include + +#include +#include + +namespace krino { + +class FacetedSurfaceBase; + +void adaptively_append_facets_for_tri_using_interpolated_distance(const std::array & tri6Coords, + const std::array & tri6Dist, + const double lengthScale, + const int currentDepth, + const int maxDepth, + FacetedSurfaceBase & facets); + +void adaptively_append_facets_for_tri_using_semilagrangian_distance(const std::array & coords, + const std::array & distance, + const std::function & distance_at_point, + const double distTol, + FacetedSurfaceBase & facets, + const int currentDepth, + const int minDepth, + const int maxDepth); + +} + +#endif /* KRINO_KRINO_KRINO_LIB_AKRI_ADAPTIVEELEMENTCONTOUR_HPP_ */ diff --git a/packages/krino/krino/krino_lib/Akri_AdaptivityHelpers.cpp b/packages/krino/krino/krino_lib/Akri_AdaptivityHelpers.cpp index 2d61b7a8cc84..b8cbcb90c848 100644 --- a/packages/krino/krino/krino_lib/Akri_AdaptivityHelpers.cpp +++ b/packages/krino/krino/krino_lib/Akri_AdaptivityHelpers.cpp @@ -63,7 +63,7 @@ void perform_multilevel_adaptivity(RefinementInterface & refinement, const auto & aux_meta = AuxMetaData::get(mesh.mesh_meta_data()); - const FieldRef elem_marker = refinement.get_marker_field(); + const FieldRef elem_marker = refinement.get_marker_field_and_sync_to_host(); const stk::mesh::Selector active_selector = aux_meta.active_part(); const stk::mesh::Selector locally_owned_selector = mesh.mesh_meta_data().locally_owned_part(); diff --git a/packages/krino/krino/krino_lib/Akri_AuxMetaData.cpp b/packages/krino/krino/krino_lib/Akri_AuxMetaData.cpp index cfaacddbf0cd..5afc0b303744 100644 --- a/packages/krino/krino/krino_lib/Akri_AuxMetaData.cpp +++ b/packages/krino/krino/krino_lib/Akri_AuxMetaData.cpp @@ -55,6 +55,14 @@ AuxMetaData::create(stk::mesh::MetaData & stk_meta) return *aux_meta; } +AuxMetaData & +AuxMetaData::get_or_create(stk::mesh::MetaData & stk_meta) +{ + if (AuxMetaData::has(stk_meta)) return AuxMetaData::get(stk_meta); + + return AuxMetaData::create(stk_meta); +} + AuxMetaData::AuxMetaData(stk::mesh::MetaData & stk_meta) : my_meta(stk_meta), is_fmwk(false), diff --git a/packages/krino/krino/krino_lib/Akri_AuxMetaData.hpp b/packages/krino/krino/krino_lib/Akri_AuxMetaData.hpp index 5253a63c0bea..eda0dfd104d7 100644 --- a/packages/krino/krino/krino_lib/Akri_AuxMetaData.hpp +++ b/packages/krino/krino/krino_lib/Akri_AuxMetaData.hpp @@ -63,6 +63,7 @@ class AuxMetaData final static AuxMetaData & get(const stk::mesh::MetaData & stk_meta); static bool has(const stk::mesh::MetaData & stk_meta); static AuxMetaData & create(stk::mesh::MetaData & stk_meta); // must be called before calling get + static AuxMetaData & get_or_create(stk::mesh::MetaData & stk_meta); AuxMetaData ( const AuxMetaData & ) = delete; AuxMetaData & operator= ( const AuxMetaData & ) = delete; diff --git a/packages/krino/krino/krino_lib/Akri_BoundingBoxMesh.cpp b/packages/krino/krino/krino_lib/Akri_BoundingBoxMesh.cpp index 9290a9350249..ecc1505c6a45 100644 --- a/packages/krino/krino/krino_lib/Akri_BoundingBoxMesh.cpp +++ b/packages/krino/krino/krino_lib/Akri_BoundingBoxMesh.cpp @@ -33,9 +33,10 @@ BoundingBoxMesh::BoundingBoxMesh(stk::topology element_topology, stk::ParallelMa element_topology == stk::topology::TETRAHEDRON_4 || element_topology == stk::topology::HEXAHEDRON_8); + myMeshStructureType = default_structure_type_for_topology(element_topology); + m_meta = stk::mesh::MeshBuilder().set_spatial_dimension(element_topology.dimension()) .create_meta_data(); - m_meta->use_simple_fields(); AuxMetaData & aux_meta = AuxMetaData::create(*m_meta); stk::mesh::Part & block_part = m_meta->declare_part_with_topology( "block_1", element_topology ); @@ -50,6 +51,18 @@ BoundingBoxMesh::BoundingBoxMesh(stk::topology element_topology, stk::ParallelMa stk::mesh::put_field_on_mesh(coordsField, m_meta->universal_part(), m_meta->spatial_dimension(), nullptr); } +BoundingBoxMeshStructureType BoundingBoxMesh::default_structure_type_for_topology(const stk::topology elementTopology) +{ + if (elementTopology == stk::topology::QUADRILATERAL_4_2D || elementTopology == stk::topology::HEXAHEDRON_8) + return CUBIC_BOUNDING_BOX_MESH; + else if (elementTopology == stk::topology::TETRAHEDRON_4) + return FLAT_WALLED_BCC_BOUNDING_BOX_MESH; + else if (elementTopology == stk::topology::TRIANGLE_3_2D) + return FLAT_WALLED_TRIANGULAR_LATTICE_BOUNDING_BOX_MESH; + STK_ThrowRequireMsg(false, "Unsupport topology " << elementTopology.name()); + return CUBIC_BOUNDING_BOX_MESH; +} + void BoundingBoxMesh::set_domain(const BoundingBoxType & mesh_bbox, const double mesh_size, const int pad_size) { @@ -396,7 +409,7 @@ BoundingBoxMesh::populate_cell_based_mesh() stk::mesh::FieldBase const* coord_field = m_meta->coordinate_field(); std::vector> hex_cell_node_locations = { {0,0,0}, {1,0,0}, {1,1,0}, {0,1,0}, {0,0,1}, {1,0,1}, {1,1,1}, {0,1,1} }; - std::vector> hex_cell_elem_nodes = {{0, 1, 2, 3, 4, 6, 7}}; + std::vector> hex_cell_elem_nodes = {{0, 1, 2, 3, 4, 5, 6, 7}}; std::vector> tet_even_cell_elem_nodes = {{0, 1, 2, 5}, {0, 2, 7, 5}, @@ -418,12 +431,12 @@ BoundingBoxMesh::populate_cell_based_mesh() const std::vector> & cell_node_locations = (dim == 2) ? quad_cell_node_locations : hex_cell_node_locations; const std::vector> & even_cell_elem_nodes = (m_element_topology == stk::topology::TRIANGLE_3_2D) ? tri_even_cell_elem_nodes : - ((m_element_topology == stk::topology::QUADRILATERAL_4) ? quad_cell_elem_nodes : + ((m_element_topology == stk::topology::QUADRILATERAL_4_2D) ? quad_cell_elem_nodes : ((m_element_topology == stk::topology::TETRAHEDRON_4) ? tet_even_cell_elem_nodes : hex_cell_elem_nodes)); const std::vector> & odd_cell_elem_nodes = (m_element_topology == stk::topology::TRIANGLE_3_2D) ? tri_odd_cell_elem_nodes : - ((m_element_topology == stk::topology::QUADRILATERAL_4) ? quad_cell_elem_nodes : + ((m_element_topology == stk::topology::QUADRILATERAL_4_2D) ? quad_cell_elem_nodes : ((m_element_topology == stk::topology::TETRAHEDRON_4) ? tet_odd_cell_elem_nodes : hex_cell_elem_nodes)); @@ -479,7 +492,7 @@ BoundingBoxMesh::populate_cell_based_mesh() for (auto && node_id : elem_nodes) { - stk::mesh::Entity const node = m_mesh->get_entity( stk::topology::NODE_RANK , node_id ); + stk::mesh::Entity const node = m_mesh->get_entity( stk::topology::NODE_RANK, node_id ); m_mesh->change_entity_parts(node, m_node_parts); auto map_it = nodes_to_procs.find(node_id); diff --git a/packages/krino/krino/krino_lib/Akri_BoundingBoxMesh.hpp b/packages/krino/krino/krino_lib/Akri_BoundingBoxMesh.hpp index 17ab5ffd5a5d..7d4cbe7d0fce 100644 --- a/packages/krino/krino/krino_lib/Akri_BoundingBoxMesh.hpp +++ b/packages/krino/krino/krino_lib/Akri_BoundingBoxMesh.hpp @@ -89,6 +89,7 @@ class BoundingBoxMesh : public MeshInterface { void set_mesh_structure_type(BoundingBoxMeshStructureType type) { myMeshStructureType = type; } bool has_flat_boundaries() const { return CUBIC_BOUNDING_BOX_MESH == myMeshStructureType || FLAT_WALLED_BCC_BOUNDING_BOX_MESH == myMeshStructureType || FLAT_WALLED_TRIANGULAR_LATTICE_BOUNDING_BOX_MESH == myMeshStructureType; } private: + static BoundingBoxMeshStructureType default_structure_type_for_topology(const stk::topology elementTopology); void declare_domain_side_parts(const stk::mesh::Part & blockPart); void require_has_flat_boundaries() const; void populate_2D_triangular_lattice_based_mesh(); diff --git a/packages/krino/krino/krino_lib/Akri_CDFEM_Support.cpp b/packages/krino/krino/krino_lib/Akri_CDFEM_Support.cpp index f82b88bb8e18..2998d76492b1 100644 --- a/packages/krino/krino/krino_lib/Akri_CDFEM_Support.cpp +++ b/packages/krino/krino/krino_lib/Akri_CDFEM_Support.cpp @@ -65,7 +65,7 @@ CDFEM_Support::CDFEM_Support(stk::mesh::MetaData & meta) myFlagUseVelocityToEvaluateInterfaceCFL(false), my_timer_cdfem("CDFEM", sierra::Diag::sierraTimer()) { - my_prolongation_model = ALE_NEAREST_POINT; + my_prolongation_model = ALE_CLOSEST_POINT; if (3 == my_meta.spatial_dimension()) my_simplex_generation_method = CUT_QUADS_BY_NEAREST_EDGE_CUT; @@ -217,6 +217,8 @@ CDFEM_Support::set_snap_fields() const stk::mesh::FieldState state = static_cast(is); if (state != stk::mesh::StateNew) mySnapFields.erase(lsField.field_state(state)); + else + mySnapFields.insert(lsField.field_state(state)); } } } @@ -231,6 +233,9 @@ CDFEM_Support::set_snap_fields() mySnapFields.erase(cdfemSnapField.field_state(state)); } } + + for (auto & field : mySnapFields) + krinolog << "Snap field " << field.name() << " (" << state_string(field.state()) << ")" << stk::diag::dendl; } void diff --git a/packages/krino/krino/krino_lib/Akri_CDFEM_Support.hpp b/packages/krino/krino/krino_lib/Akri_CDFEM_Support.hpp index c6175db7f930..d79582ad3b54 100644 --- a/packages/krino/krino/krino_lib/Akri_CDFEM_Support.hpp +++ b/packages/krino/krino/krino_lib/Akri_CDFEM_Support.hpp @@ -28,8 +28,8 @@ class RefinementInterface; enum Prolongation_Model { - ALE_NEAREST_NODE=0, - ALE_NEAREST_POINT, + ALE_CLOSEST_POINT=0, + ALE_CLOSEST_NODE, INTERPOLATION, MAX_PROLONGATION_MODEL }; diff --git a/packages/krino/krino/krino_lib/Akri_CDMesh.cpp b/packages/krino/krino/krino_lib/Akri_CDMesh.cpp index 59e7b9b20b3a..eee8628dc5d8 100644 --- a/packages/krino/krino/krino_lib/Akri_CDMesh.cpp +++ b/packages/krino/krino/krino_lib/Akri_CDMesh.cpp @@ -1482,8 +1482,6 @@ CDMesh::find_prolongation_node(const SubElementNode & targetNode) const const std::vector requiredFields = targetNode.prolongation_node_fields(*this); - STK_ThrowRequire(need_facets_for_prolongation()); - const ProlongationFacet * nearestProlongFacet = nullptr; FacetDistanceQuery nearestFacetQuery; diff --git a/packages/krino/krino/krino_lib/Akri_CDMesh.hpp b/packages/krino/krino/krino_lib/Akri_CDMesh.hpp index dfab86ef0bd6..e2d42a53015d 100644 --- a/packages/krino/krino/krino_lib/Akri_CDMesh.hpp +++ b/packages/krino/krino/krino_lib/Akri_CDMesh.hpp @@ -111,7 +111,7 @@ class CDMesh { const CDFEM_Support & get_cdfem_support() const { return my_cdfem_support; } CDFEM_Support & get_cdfem_support() { return my_cdfem_support; } bool need_nodes_for_prolongation() const { return INTERPOLATION != get_prolongation_model() && was_mesh_previously_decomposed(); } - bool need_facets_for_prolongation() const { return ALE_NEAREST_POINT == get_prolongation_model() && was_mesh_previously_decomposed(); } + bool need_facets_for_prolongation() const { return ALE_CLOSEST_POINT == get_prolongation_model() && was_mesh_previously_decomposed(); } Prolongation_Model get_prolongation_model() const { return my_cdfem_support.get_prolongation_model(); } Edge_Interpolation_Model get_edge_interpolation_model() const { return my_cdfem_support.get_edge_interpolation_model(); } const std::vector & all_interface_ids(const std::vector & surfaceIdentifiers) const; diff --git a/packages/krino/krino/krino_lib/Akri_CDMesh_Refinement.cpp b/packages/krino/krino/krino_lib/Akri_CDMesh_Refinement.cpp index 7f5b5c0b1be0..29a8b2d69071 100644 --- a/packages/krino/krino/krino_lib/Akri_CDMesh_Refinement.cpp +++ b/packages/krino/krino/krino_lib/Akri_CDMesh_Refinement.cpp @@ -83,7 +83,7 @@ refine_edges_with_multiple_unsnapped_crossings(const stk::mesh::BulkData& mesh, if (alreadyInSet) { std::vector edge_elems; - stk::mesh::get_entities_through_relations(mesh, {edge.nodes[0], edge.nodes[1]}, + stk::mesh::get_entities_through_relations(mesh, stk::mesh::EntityVector{edge.nodes[0], edge.nodes[1]}, stk::topology::ELEMENT_RANK, edge_elems); for (auto && elem : edge_elems) { @@ -115,7 +115,7 @@ refine_edges_with_nodes_with_multiple_snapped_interfaces(const stk::mesh::BulkDa if (num_node1_interfaces > 1 || num_node2_interfaces > 1) { std::vector edge_elems; - stk::mesh::get_entities_through_relations(mesh, {edge.nodes[0], edge.nodes[1]}, + stk::mesh::get_entities_through_relations(mesh, stk::mesh::EntityVector{edge.nodes[0], edge.nodes[1]}, stk::topology::ELEMENT_RANK, edge_elems); for (auto && elem : edge_elems) { @@ -234,7 +234,7 @@ static void initialize_marker(const stk::mesh::BulkData& mesh, const RefinementInterface & refinement, const bool isDefaultCoarsen) { - const FieldRef elementMarkerField = refinement.get_marker_field(); + const FieldRef elementMarkerField = refinement.get_marker_field_and_sync_to_host(); const int initialVal = isDefaultCoarsen ? static_cast(Refinement_Marker::COARSEN) : static_cast(Refinement_Marker::NOTHING); stk::mesh::field_fill(initialVal, elementMarkerField); } @@ -256,7 +256,7 @@ static void mark_given_elements(const stk::mesh::BulkData& mesh, const int minRefineLevel, const bool isDefaultCoarsen) { - const FieldRef elementMarkerField = refinement.get_marker_field(); + const FieldRef elementMarkerField = refinement.get_marker_field_and_sync_to_host(); constexpr bool doMarkElement = true; for( auto&& elem : elementsToMark ) @@ -386,7 +386,7 @@ mark_interface_elements_for_adaptivity(const stk::mesh::BulkData& mesh, // This refinement strategy cuts elements by the user-specified number of adapt levels // before the conformal decomposition. - const FieldRef elementMarkerField = refinement.get_marker_field(); + const FieldRef elementMarkerField = refinement.get_marker_field_and_sync_to_host(); const stk::mesh::Selector locally_owned_selector(mesh.mesh_meta_data().locally_owned_part()); const int interfaceMinRefineLevel = refinementSupport.get_interface_minimum_refinement_level(); @@ -438,7 +438,7 @@ refine_edges_with_unsnappable_nodes(const stk::mesh::BulkData& mesh, { krinolog << "Refining unsnappable edge " << mesh.identifier(edge.nodes[0]) << " " << mesh.identifier(edge.nodes[1]) << " " << debug_output(mesh, edgeIntersection) << stk::diag::dendl; std::vector edge_elems; - stk::mesh::get_entities_through_relations(mesh, {edge.nodes[0], edge.nodes[1]}, + stk::mesh::get_entities_through_relations(mesh, stk::mesh::EntityVector{edge.nodes[0], edge.nodes[1]}, stk::topology::ELEMENT_RANK, edge_elems); for (auto && elem : edge_elems) { diff --git a/packages/krino/krino/krino_lib/Akri_DetermineElementSign.cpp b/packages/krino/krino/krino_lib/Akri_DetermineElementSign.cpp index a68ae2b8d86a..d261dded6975 100644 --- a/packages/krino/krino/krino_lib/Akri_DetermineElementSign.cpp +++ b/packages/krino/krino/krino_lib/Akri_DetermineElementSign.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include namespace krino { @@ -60,7 +61,7 @@ static void assign_interface_element_sign(const unsigned surfIndex, static void fill_edge_elements(const stk::mesh::BulkData & mesh, const Edge & edge, std::vector & edgeElements) { const std::array & edgeNodes = get_edge_nodes(edge); - stk::mesh::get_entities_through_relations(mesh, {edgeNodes[0], edgeNodes[1]}, stk::topology::ELEMENT_RANK, edgeElements); + stk::mesh::get_entities_through_relations(mesh, stk::mesh::EntityVector{edgeNodes[0], edgeNodes[1]}, stk::topology::ELEMENT_RANK, edgeElements); } static void check_edge_intersections_to_assign_crossed_elements_and_find_nodes_on_either_side_of_surface(const stk::mesh::BulkData & mesh, diff --git a/packages/krino/krino/krino_lib/Akri_ElementCutterUtils.cpp b/packages/krino/krino/krino_lib/Akri_ElementCutterUtils.cpp index 96cd86ec7a4b..47fc952b6a74 100644 --- a/packages/krino/krino/krino_lib/Akri_ElementCutterUtils.cpp +++ b/packages/krino/krino/krino_lib/Akri_ElementCutterUtils.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include namespace krino { diff --git a/packages/krino/krino/krino_lib/Akri_Fast_Marching.cpp b/packages/krino/krino/krino_lib/Akri_Fast_Marching.cpp index f54501057bc5..4f564bbba05e 100644 --- a/packages/krino/krino/krino_lib/Akri_Fast_Marching.cpp +++ b/packages/krino/krino/krino_lib/Akri_Fast_Marching.cpp @@ -43,7 +43,9 @@ Fast_Marching::Fast_Marching(const stk::mesh::BulkData & mesh, { if (fieldSelector(*bucket) && bucket->topology() != stk::topology::TRIANGLE_3_2D && - bucket->topology() != stk::topology::TETRAHEDRON_4) + bucket->topology() != stk::topology::QUADRILATERAL_4_2D && + bucket->topology() != stk::topology::TETRAHEDRON_4 && + bucket->topology() != stk::topology::HEXAHEDRON_8) { err << "Topology " << bucket->topology().name() << " is not supported in Fast_Marching.\n"; } @@ -134,7 +136,7 @@ void Fast_Marching::initialize_nodes_of_crossed_elements() stk::mesh::get_selected_entities( selected_with_field_not_ghost_selector(), myMesh.buckets( stk::topology::ELEMENT_RANK ), field_elems ); for (auto&& elem : field_elems) if (have_crossing(elem)) - initialize_element(elem, get_element_interface_speed(err, elem)); + initialize_element(elem, err); if (myMesh.parallel_size() > 1) { @@ -316,16 +318,13 @@ void Fast_Marching::initialize_node(const stk::mesh::Entity node) } bool -Fast_Marching::have_crossing(const stk::mesh::Entity & elem) const +Fast_Marching::have_crossing(const StkMeshEntities & elemNodes) const { - const unsigned npe = myMesh.bucket(elem).topology().num_nodes(); - STK_ThrowAssert(npe > 0); - - const stk::mesh::Entity * elem_nodes = myMesh.begin(elem, stk::topology::NODE_RANK); - const double dist0 = get_node_distance(elem_nodes[0]); - for (unsigned n=1; n 0); + const double dist0 = get_node_distance(elemNodes[0]); + for (unsigned n=1; n build_get_fm_node_coordinates(Fast_Marching * fm) { return [fm](stk::mesh::Entity node) -> const stk::math::Vector3d & @@ -344,8 +350,56 @@ static std::function build_get_f }; } +template +std::array get_simplex_nodes_from_element_nodes(const stk::mesh::Entity* elemNodes, const std::array & simplexNodeIndices) +{ + std::array simplexNodes; + for (size_t n=0; n +void Fast_Marching::initialize_from_simplex(const std::array & simplexNodes, + const double elemSpeed, + const std::function & get_coordinates) +{ + const StkMeshEntities simplexNodeEntities{simplexNodes.data(), simplexNodes.data()+NNODES}; + if (have_crossing(simplexNodeEntities)) + { + const double mag_grad = calculate_gradient_magnitude(NNODES, simplexNodes.data(), myDistance, get_coordinates); + + for (size_t inode=0; inodestatus() != STATUS_UNUSED); + const double elem_node_dist = get_node_distance(simplexNodes[inode]) / (mag_grad * elemSpeed); + const int sign = fm_node->sign(); + fm_node->set_signed_dist(sign * std::min(std::abs(fm_node->signed_dist()), std::abs(elem_node_dist))); + fm_node->set_status(STATUS_INITIAL); + } + } +} + +template +void Fast_Marching::initialize_from_simplices(const stk::mesh::Entity* elemNodes, const std::array,NSIMPLICES> & simplices, + const double elemSpeed, + const std::function & get_coordinates) +{ + for (auto & simplex : simplices) + initialize_from_simplex(get_simplex_nodes_from_element_nodes(elemNodes, simplex), elemSpeed, get_coordinates); +} + +static constexpr std::array,1> triangleSimplices = {{ {{0,1,2}} }}; +static constexpr std::array,4> quadrilateralSimplices = {{ {{0,1,2}}, {{0,2,3}}, {{0,1,3}}, {{1,2,3}} }}; +static constexpr std::array,1> tetrahedronSimplices = {{ {{0,1,2,3}} }}; +static constexpr std::array,10> hexahedronSimplices = {{ + {{0,1,3,4}}, {{1,2,3,6}}, {{3,4,6,7}}, {{1,6,4,5}}, {{1,3,4,6}}, + {{0,1,2,5}}, {{0,2,3,7}}, {{0,5,7,4}}, {{2,7,5,6}}, {{0,2,7,5}} +}}; + void -Fast_Marching::initialize_element(const stk::mesh::Entity & elem, const double speed) +Fast_Marching::initialize_element(const stk::mesh::Entity & elem, ParallelErrorMessage& err) { // To start the nodes of elements that have interfaces will be redistanced. // I have tried a few different methods for this distance calculation with varying success. @@ -367,84 +421,123 @@ Fast_Marching::initialize_element(const stk::mesh::Entity & elem, const double s // Initialize using method #5 (element rescaling) - const stk::mesh::Entity * elem_nodes = myMesh.begin(elem, stk::topology::NODE_RANK); - const int npe = myMesh.bucket(elem).topology().num_nodes(); - auto get_coordinates = build_get_fm_node_coordinates(this); - const double mag_grad = calculate_gradient_magnitude(npe, elem_nodes, myDistance, get_coordinates); + const stk::topology elemTopology = myMesh.bucket(elem).topology(); - for (int inode=0; inodestatus() != STATUS_UNUSED); - const double elem_node_dist = get_node_distance(elem_nodes[inode]) / (mag_grad * speed); - const int sign = fm_node->sign(); - fm_node->set_signed_dist(sign * std::min(std::abs(fm_node->signed_dist()), std::abs(elem_node_dist))); - fm_node->set_status(STATUS_INITIAL); + case stk::topology::TRIANGLE_3_2D: + initialize_from_simplices(elemNodes, triangleSimplices, elemSpeed, get_coordinates); + break; + case stk::topology::QUADRILATERAL_4_2D: + initialize_from_simplices(elemNodes, quadrilateralSimplices, elemSpeed, get_coordinates); + break; + case stk::topology::TETRAHEDRON_4: + initialize_from_simplices(elemNodes, tetrahedronSimplices, elemSpeed, get_coordinates); + break; + case stk::topology::HEXAHEDRON_8: + initialize_from_simplices(elemNodes, hexahedronSimplices, elemSpeed, get_coordinates); + break; + default: + err << "Unsupported element topology " << elemTopology.name() << " in initialize_element.\n"; } } -void -Fast_Marching::update_neighbors(Fast_Marching_Node & accepted_node, ParallelErrorMessage& err) +static bool do_make_neighbor_a_trial_node(const Fast_Marching_Node & acceptedNode, const Fast_Marching_Node & nbrNode) { - const stk::mesh::Selector elemSelector = selected_with_field_not_ghost_selector(); - - stk::mesh::Entity node = accepted_node.node(); + if (nbrNode.status() == STATUS_FAR) + return true; + if (nbrNode.status() == STATUS_ACCEPTED) + { + const double accepted_node_unsigned_dist = acceptedNode.signed_dist()*acceptedNode.sign(); + const double nbr_unsigned_dist = nbrNode.signed_dist()*nbrNode.sign(); + if(nbr_unsigned_dist > accepted_node_unsigned_dist) + return true; + } + return false; +} - STK_ThrowAssertMsg(STATUS_ACCEPTED == accepted_node.status() || STATUS_INITIAL == accepted_node.status(), "Expected ACCEPTED OR INITIAL status"); +template +void Fast_Marching::update_neighbors_from_simplex(const Fast_Marching_Node & acceptedNode, const std::array & simplexNodes, const double elemSpeed) +{ + int node_to_update = -1; + int num_trial = 0; - const int dim = myMesh.mesh_meta_data().spatial_dimension(); - STK_ThrowAssert(2 == dim || 3 == dim); - const int npe_dist = (2==dim) ? 3 : 4; - std::vector elem_nodes(npe_dist); + std::array simplexFmNodes; - const unsigned num_node_elems = myMesh.num_elements(node); - const stk::mesh::Entity* node_elems = myMesh.begin_elements(node); - for (unsigned node_elem_index=0; node_elem_indexstatus() || STATUS_ACCEPTED == fm_nbr->status() || STATUS_FAR == fm_nbr->status() || STATUS_TRIAL == fm_nbr->status()), "Unexpected node status."); + simplexFmNodes[i] = fm_nbr; + if (do_make_neighbor_a_trial_node(acceptedNode, *fm_nbr)) + { + add_trial_node(*fm_nbr); + } + if (fm_nbr->status() == STATUS_TRIAL) { - continue; + ++num_trial; + node_to_update = i; } + } - const double speed = get_element_interface_speed(err, elem); + if (1 == num_trial) + { + update_trial_node_from_simplex(simplexFmNodes, node_to_update, elemSpeed); + } +} - int node_to_update = -1; - int num_trial = 0; +template +void Fast_Marching::update_neighbors_from_simplices(const Fast_Marching_Node & acceptedNode, + const stk::mesh::Entity* elemNodes, + const std::array,NSIMPLICES> & simplices, + const double elemSpeed) +{ + for (auto & simplex : simplices) + update_neighbors_from_simplex(acceptedNode, get_simplex_nodes_from_element_nodes(elemNodes, simplex), elemSpeed); +} - const stk::mesh::Entity* nodes = myMesh.begin_nodes(elem); - for ( int i = 0; i < npe_dist; ++i ) +void +Fast_Marching::update_neighbors(Fast_Marching_Node & acceptedNode, ParallelErrorMessage& err) +{ + STK_ThrowAssertMsg(STATUS_ACCEPTED == acceptedNode.status() || STATUS_INITIAL == acceptedNode.status(), "Expected ACCEPTED OR INITIAL status"); + + stk::mesh::Entity node = acceptedNode.node(); + + const stk::mesh::Selector elemSelector = selected_with_field_not_ghost_selector(); + + for (auto elem : StkMeshEntities{myMesh.begin_elements(node), myMesh.end_elements(node)}) + { + if (myMesh.is_valid(elem) && elemSelector(myMesh.bucket(elem))) { - Fast_Marching_Node * fm_nbr = get_fm_node(nodes[i]); - STK_ThrowAssertMsg(nullptr != fm_nbr && (STATUS_INITIAL == fm_nbr->status() || STATUS_ACCEPTED == fm_nbr->status() || STATUS_FAR == fm_nbr->status() || STATUS_TRIAL == fm_nbr->status()), "Unexpected node status."); - elem_nodes[i] = fm_nbr; - bool do_add_trial_node = fm_nbr->status() == STATUS_FAR; - if (fm_nbr->status() == STATUS_ACCEPTED) - { - const double accepted_node_unsigned_dist = accepted_node.signed_dist()*accepted_node.sign(); - const double nbr_unsigned_dist = fm_nbr->signed_dist()*fm_nbr->sign(); - if(nbr_unsigned_dist > accepted_node_unsigned_dist) - { - do_add_trial_node = true; - } - } - if (do_add_trial_node) - { - add_trial_node(*fm_nbr); - } - if (fm_nbr->status() == STATUS_TRIAL) + const double elemSpeed = get_element_interface_speed(err, elem); + const stk::topology elemTopology = myMesh.bucket(elem).topology(); + + const stk::mesh::Entity* elemNodes = myMesh.begin_nodes(elem); + + switch(elemTopology()) { - ++num_trial; - node_to_update = i; + case stk::topology::TRIANGLE_3_2D: + update_neighbors_from_simplices(acceptedNode, elemNodes, triangleSimplices, elemSpeed); + break; + case stk::topology::QUADRILATERAL_4_2D: + update_neighbors_from_simplices(acceptedNode, elemNodes, quadrilateralSimplices, elemSpeed); + break; + case stk::topology::TETRAHEDRON_4: + update_neighbors_from_simplices(acceptedNode, elemNodes, tetrahedronSimplices, elemSpeed); + break; + case stk::topology::HEXAHEDRON_8: + update_neighbors_from_simplices(acceptedNode, elemNodes, hexahedronSimplices, elemSpeed); + break; + default: + err << "Unsupported element topology " << elemTopology.name() << " in initialize_element.\n"; } } - - if (1 == num_trial) - { - update_node(elem_nodes, node_to_update, speed); - } } } @@ -470,35 +563,26 @@ Fast_Marching::update_trial_node(Fast_Marching_Node & trial_node, const double d trial_nodes.insert(&trial_node); } -void -Fast_Marching::update_node(std::vector & elem_nodes, int node_to_update, const double speed) +template +void Fast_Marching::update_trial_node_from_simplex(const std::array & simplexNodes, const int nodeToUpdate, const double speed) { - // update distance + static_assert(NNODES == 3 || NNODES == 4); double dist = std::numeric_limits::max(); - const int npe_dist = elem_nodes.size(); - if (3 == npe_dist) - { - dist = update_triangle(elem_nodes, node_to_update, speed); - } - else if (4 == npe_dist) - { - dist = update_tetrahedron(elem_nodes, node_to_update, speed); - } + if constexpr(4 == NNODES) + dist = update_tetrahedron(simplexNodes, nodeToUpdate, speed); else - { - STK_ThrowAssertMsg(false, "Unexpected number of nodes per element: " << npe_dist); - } + dist = update_triangle(simplexNodes, nodeToUpdate, speed); - Fast_Marching_Node & fm_node = *elem_nodes[node_to_update]; - if (dist*fm_node.sign() < fm_node.signed_dist()*fm_node.sign()) + Fast_Marching_Node & fmNode = *simplexNodes[nodeToUpdate]; + if (dist*fmNode.sign() < fmNode.signed_dist()*fmNode.sign()) { - update_trial_node(fm_node, dist); + update_trial_node(fmNode, dist); } } double -Fast_Marching::update_triangle(std::vector & elemNodes, int nodeToUpdate, const double speed) +Fast_Marching::update_triangle(const std::array & elemNodes, const int nodeToUpdate, const double speed) { static constexpr double far = std::numeric_limits::max(); @@ -512,7 +596,7 @@ Fast_Marching::update_triangle(std::vector & elemNodes, in } double -Fast_Marching::update_tetrahedron(std::vector & elemNodes, int nodeToUpdate, const double speed) +Fast_Marching::update_tetrahedron(const std::array & elemNodes, const int nodeToUpdate, const double speed) { static constexpr double far = std::numeric_limits::max(); const std::array lnn = get_oriented_nodes_tetrahedron(nodeToUpdate); diff --git a/packages/krino/krino/krino_lib/Akri_Fast_Marching.hpp b/packages/krino/krino/krino_lib/Akri_Fast_Marching.hpp index 77bed6ad448f..0efd71822e31 100644 --- a/packages/krino/krino/krino_lib/Akri_Fast_Marching.hpp +++ b/packages/krino/krino/krino_lib/Akri_Fast_Marching.hpp @@ -9,6 +9,7 @@ #ifndef Akri_Fast_Marching_h #define Akri_Fast_Marching_h +#include #include #include #include @@ -19,6 +20,7 @@ class SubElement; class Mesh_Element; class AuxMetaData; class ParallelErrorMessage; +struct StkMeshEntities; enum Enum_Fast_Marching_Node_Status{STATUS_UNUSED=0, STATUS_INITIAL, STATUS_ACCEPTED, STATUS_TRIAL, STATUS_FAR}; @@ -78,11 +80,11 @@ class Fast_Marching { void update_neighbors(Fast_Marching_Node & accepted_node, ParallelErrorMessage & err); void update_node(std::vector & elem_nodes, int node_to_update, const double speed); + bool have_crossing(const StkMeshEntities & elemNodes) const; bool have_crossing(const stk::mesh::Entity & elem) const; - void initialize_subelement(const SubElement & subelem, const int side, const double speed); - void initialize_element(const stk::mesh::Entity & elem, const double speed); - double update_triangle(std::vector & elem_nodes, int node_to_update, const double speed); - double update_tetrahedron(std::vector & elem_nodes, int node_to_update, const double speed); + void initialize_element(const stk::mesh::Entity & elem, ParallelErrorMessage& err); + double update_triangle(const std::array & elemNodes, const int node_to_update, const double speed); + double update_tetrahedron(const std::array & elemNodes, const int node_to_update, const double speed); void add_trial_node(Fast_Marching_Node & add_trial_node); void update_trial_node(Fast_Marching_Node & add_trial_node, const double dist); @@ -101,6 +103,24 @@ class Fast_Marching { stk::mesh::Selector selected_with_field_not_ghost_selector() const; stk::mesh::Selector selected_with_field_selector() const; + void update_neighbors_from_triangle_element(const Fast_Marching_Node & acceptedNode, const stk::mesh::Entity elem, const double elemSpeed); + void update_neighbors_from_quadrilateral_element(const Fast_Marching_Node & acceptedNode, const stk::mesh::Entity elem, const double elemSpeed); + void update_neighbors_from_tetrahedron_element(const Fast_Marching_Node & acceptedNode, const stk::mesh::Entity elem, const double elemSpeed); + + template void update_neighbors_from_simplices(const Fast_Marching_Node & acceptedNode, + const stk::mesh::Entity* elemNodes, + const std::array,NSIMPLICES> & simplices, + const double elemSpeed); + template void update_neighbors_from_simplex(const Fast_Marching_Node & acceptedNode, const std::array & simplexNodes, const double elemSpeed); + template void update_trial_node_from_simplex(const std::array & simplexNodes, const int nodeToUpdate, const double speed); + + template void initialize_from_simplex(const std::array & simplexNodes, + const double elemSpeed, + const std::function & get_coordinates); + template void initialize_from_simplices(const stk::mesh::Entity* elemNodes, const std::array,NSIMPLICES> & simplices, + const double elemSpeed, + const std::function & get_coordinates); + const stk::mesh::BulkData & myMesh; stk::mesh::Selector mySelector; const FieldRef myCoordinates; diff --git a/packages/krino/krino/krino_lib/Akri_IC_Alg.hpp b/packages/krino/krino/krino_lib/Akri_IC_Alg.hpp index c0e5d46de506..5fc14f8df71f 100644 --- a/packages/krino/krino/krino_lib/Akri_IC_Alg.hpp +++ b/packages/krino/krino/krino_lib/Akri_IC_Alg.hpp @@ -31,6 +31,8 @@ class IC_Alg { // query number of surfaces unsigned numberSurfaces() { return surface_list.size();} + unsigned numberCalculators() { return my_calculators.size();} + // push a surface onto our container void addSurface(Surface * surf) { surface_list.add(surf); } @@ -42,6 +44,8 @@ class IC_Alg { BoundingBox get_surface_bounding_box(); void execute(const double time); + + Composite_Surface & get_surfaces() { return surface_list; } private: void compute_IC_error_indicator(); private: diff --git a/packages/krino/krino/krino_lib/Akri_LevelSet.cpp b/packages/krino/krino/krino_lib/Akri_LevelSet.cpp index 608097d57b18..732d63fdff3f 100644 --- a/packages/krino/krino/krino_lib/Akri_LevelSet.cpp +++ b/packages/krino/krino/krino_lib/Akri_LevelSet.cpp @@ -8,6 +8,7 @@ #include +#include #include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +41,7 @@ #include #include #include +#include #include #include @@ -346,57 +349,39 @@ LevelSet::set_surface_distance(std::vector surfaces, const do } } -static stk::math::Vector3d compute_interface_velocity_at_point(const int dim, const double time, const stk::math::Vector3d & coords, const std::vector & interfaceVelocityExpr) -{ - if (2 == dim) - return stk::math::Vector3d(interfaceVelocityExpr[0].evaluate(time, coords), interfaceVelocityExpr[1].evaluate(time, coords), 0.0); - return stk::math::Vector3d(interfaceVelocityExpr[0].evaluate(time, coords), interfaceVelocityExpr[1].evaluate(time, coords), interfaceVelocityExpr[2].evaluate(time, coords)); -} - -template -double compute_max_facet_velocity_magnitude(const double time, - const std::vector & facets, - const std::vector & interfaceVelocity) -{ - double maxSqrMag = 0.; - for (auto & facet : facets) - { - for (int n=0; n maxSqrMag) - maxSqrMag = velSqrMag; - } - } - return std::sqrt(maxSqrMag); -} - //----------------------------------------------------------------------------------- + void LevelSet::advance_semilagrangian(const double timeN, const double timeNp1) { STK_ThrowRequireMsg(myInterfaceVelocity.size() == spatial_dimension, "Did not find interface velocity expression. Was it provided?"); - const stk::mesh::Selector activeFieldSelector = aux_meta().active_not_ghost_selector() & stk::mesh::selectField(get_distance_field()); - BoundingBox nodeBBox = krino::compute_nodal_bbox(mesh(), activeFieldSelector, get_coordinates_field()); + stk::mesh::field_copy(my_old_distance_field, my_distance_field); // 0th order predictor needed for preserving sign with narrow_band + facets->swap( *facets_old ); // store existing facets in facets_old - const double timeMid = 0.5*(timeN+timeNp1); - const double velMag = (2==spatial_dimension) ? - compute_max_facet_velocity_magnitude(timeMid, facets->get_facets_2d(), myInterfaceVelocity) : - compute_max_facet_velocity_magnitude(timeMid, facets->get_facets_3d(), myInterfaceVelocity); - const double paddingFactorOfSafety = 1.5; - nodeBBox.pad(paddingFactorOfSafety*velMag*(timeNp1-timeN)); // Need something better? + const stk::mesh::Selector activeFieldSelector = aux_meta().active_not_ghost_selector() & stk::mesh::selectField(get_distance_field()); + const BoundingBox paddedNodeBBox = compute_padded_node_bounding_box_for_semilagrangian(mesh(), activeFieldSelector, timeN, timeNp1, get_coordinates_field(), myInterfaceVelocity, *facets_old); + const double avgEdgeLength = compute_average_edge_length(); - facets->prepare_to_compute(nodeBBox, my_narrow_band_size); + facets_old->prepare_to_compute(paddedNodeBBox, my_narrow_band_size); - stk::mesh::field_copy(my_old_distance_field, my_distance_field); // 0th order predictor needed for preserving sign with narrow_band - compute_distance_semilagrangian( timeN, timeNp1, activeFieldSelector ); - - facets->swap( *facets_old ); // store existing facets in facets_old + if (mySemiLagrangianAlg == NON_ADAPTIVE_SINGLE_STEP) + { + calc_single_step_nonadaptive_semilagrangian_nodal_distance_and_build_facets(mesh(), activeFieldSelector, timeN, timeNp1, get_coordinates_field(), get_distance_field(), myInterfaceVelocity, my_narrow_band_size, avgEdgeLength, *facets_old, *facets); + } + else if (mySemiLagrangianAlg == ADAPTIVE_PREDICTOR_CORRECTOR) + { + std::unique_ptr facetsPred = FacetedSurfaceBase::build(my_meta.spatial_dimension()); + predict_semilagrangian_nodal_distance_and_build_facets(mesh(), activeFieldSelector, timeN, timeNp1, get_coordinates_field(), get_distance_field(), myInterfaceVelocity, my_narrow_band_size, avgEdgeLength, *facets_old, *facetsPred); - build_facets_locally(my_meta.universal_part()); + facetsPred->prepare_to_compute(paddedNodeBBox, my_narrow_band_size); + correct_semilagrangian_nodal_distance_and_build_facets(mesh(), activeFieldSelector, timeN, timeNp1, get_coordinates_field(), get_distance_field(), myInterfaceVelocity, my_narrow_band_size, avgEdgeLength, *facets_old, *facetsPred, *facets); + } + else + { + STK_ThrowRequireMsg(false, "Unrecognized Semi-Lagrangian algorithm " << mySemiLagrangianAlg); + } - // debugging if (krinolog.shouldPrint(LOG_FACETS)) { write_facets(); @@ -463,6 +448,36 @@ LevelSet::initialize(const double time) stk::mesh::field_copy(get_distance_field(), get_old_distance_field()); } + +bool LevelSet::can_create_adaptive_initial_facets_from_initial_surfaces_because_initial_distance_is_solely_from_initial_surfaces() const +{ + if (!my_compute_surface_distance_parts.empty() || + compute_time_of_arrival() || + my_perform_initial_redistance || + my_ic_offset != 0.0 || + my_ic_scale != 1.0 || + my_IC_alg->numberCalculators() > 0) + return false; + return true; +} + +void LevelSet::build_initial_facets(const double time) +{ + const bool buildAdaptiveFacets = (mySemiLagrangianAlg == ADAPTIVE_PREDICTOR_CORRECTOR) && + can_create_adaptive_initial_facets_from_initial_surfaces_because_initial_distance_is_solely_from_initial_surfaces(); + + Composite_Surface & initSurfaces = my_IC_alg->get_surfaces(); // Note that it is assumed that this fn is called right after initialize and therefore initSurfaces.prepare_to_compute has already been called. + + const stk::mesh::Selector activeFieldSelector = aux_meta().active_not_ghost_selector() & stk::mesh::selectField(get_distance_field()); + + const double avgEdgeLength = compute_average_edge_length(); + + if (buildAdaptiveFacets) + build_initial_adaptive_facets_after_nodal_distance_is_initialized_from_initial_surfaces(mesh(), activeFieldSelector, time, get_coordinates_field(), get_distance_field(), avgEdgeLength, initSurfaces, *facets); + else + build_nonadaptive_facets(mesh(), activeFieldSelector, get_coordinates_field(), get_distance_field(), avgEdgeLength, *facets); +} + //----------------------------------------------------------------------------------- void LevelSet::clear_initialization_data(stk::mesh::MetaData & meta) @@ -869,11 +884,6 @@ static bool determine_polarity_for_negative_side_of_interface(const stk::mesh::B return false; } -static std::array get_triangle_side_vector(const stk::mesh::BulkData & mesh, const FieldRef vecField, const std::array triangleNodes) -{ - return {{ get_vector_field(mesh, vecField, triangleNodes[0]), get_vector_field(mesh, vecField, triangleNodes[1]), get_vector_field(mesh, vecField, triangleNodes[2]) }}; -} - static std::array get_line_side_vector(const stk::mesh::BulkData & mesh, const FieldRef vecField, const std::array lineNodes) { return {{ get_vector_field(mesh, vecField, lineNodes[0], 2), get_vector_field(mesh, vecField, lineNodes[1], 2) }}; @@ -892,7 +902,7 @@ static std::array get_oriented_triangle_side_nodes(const st static void append_facet_from_triangle_side(const stk::mesh::BulkData & mesh, const FieldRef coords, const stk::mesh::Selector & interfaceSelector, const stk::mesh::Selector & negativeSideElementSelector, const stk::mesh::Entity side, std::vector & facets) { const std::array orientedSideNodes = get_oriented_triangle_side_nodes(mesh, negativeSideElementSelector, side); - const std::array sideNodeCoords = get_triangle_side_vector(mesh, coords, orientedSideNodes); + const std::array sideNodeCoords = get_triangle_vector(mesh, coords, orientedSideNodes); facets.emplace_back( sideNodeCoords[0], sideNodeCoords[1], sideNodeCoords[2] ); } @@ -916,8 +926,8 @@ static void append_facet_from_line_side(const stk::mesh::BulkData & mesh, const static void append_facet_with_velocity_from_triangle_side(const stk::mesh::BulkData & mesh, const FieldRef coords, const FieldRef interfaceVelocity, const stk::mesh::Selector & interfaceSelector, const stk::mesh::Selector & negativeSideElementSelector, const stk::mesh::Entity side, std::vector & facets) { const std::array orientedSideNodes = get_oriented_triangle_side_nodes(mesh, negativeSideElementSelector, side); - const std::array sideNodeCoords = get_triangle_side_vector(mesh, coords, orientedSideNodes); - const std::array sideNodeVelocity = get_triangle_side_vector(mesh, interfaceVelocity, orientedSideNodes); + const std::array sideNodeCoords = get_triangle_vector(mesh, coords, orientedSideNodes); + const std::array sideNodeVelocity = get_triangle_vector(mesh, interfaceVelocity, orientedSideNodes); facets.emplace_back( sideNodeCoords[0], sideNodeCoords[1], sideNodeCoords[2], sideNodeVelocity[0], sideNodeVelocity[1], sideNodeVelocity[2] ); } @@ -1119,10 +1129,7 @@ void LevelSet::extend_interface_velocity_using_closest_point_projection(const st void LevelSet::set_interface_velocity( const std::vector & interfaceVelocity ) { - myInterfaceVelocity.clear(); - myInterfaceVelocity.reserve(interfaceVelocity.size()); - for (auto & component : interfaceVelocity) - myInterfaceVelocity.emplace_back(component); + initialize_expression_vector(interfaceVelocity, myInterfaceVelocity); } double @@ -1428,39 +1435,6 @@ LevelSet::compute_signed_distance_at_selected_nodes( const stk::mesh::Selector & } } -void -LevelSet::compute_distance_semilagrangian( const double & timeN, const double & timeNp1, const stk::mesh::Selector & selector ) -{ - const FieldRef coordsField = get_coordinates_field(); - const FieldRef distField = get_distance_field(); - const double dt = timeNp1 - timeN; - - const stk::mesh::Selector active_field_selector = aux_meta().active_not_ghost_selector() & selector & stk::mesh::selectField(distField); - stk::mesh::BucketVector const& buckets = mesh().get_buckets(stk::topology::NODE_RANK, active_field_selector); - - for ( auto && bucketPtr : buckets ) - { - const double * coordsData = field_data( coordsField , *bucketPtr); - double * distData = field_data( distField , *bucketPtr); - - for (size_t i = 0; i < bucketPtr->size(); ++i) - { - const stk::math::Vector3d nodeCoords(coordsData+i*spatial_dimension, spatial_dimension); - const stk::math::Vector3d closestPtN = facets->closest_point(nodeCoords); - const auto velN = compute_interface_velocity_at_point(spatial_dimension, timeN, closestPtN, myInterfaceVelocity); - - const stk::math::Vector3d coordsHalf = nodeCoords - 0.5*dt*velN; - const stk::math::Vector3d closestPtHalf = facets->closest_point(coordsHalf); - const auto velHalf = compute_interface_velocity_at_point(spatial_dimension, 0.5*(timeN+timeNp1), closestPtHalf, myInterfaceVelocity); - - const stk::math::Vector3d coordsNp1 = nodeCoords - dt*velHalf; - - const int previousSign = LevelSet::sign(distData[i]); - distData[i] = facets->truncated_point_signed_distance(coordsNp1, my_narrow_band_size, previousSign*my_narrow_band_size); - } - } -} - //----------------------------------------------------------------------------------- double diff --git a/packages/krino/krino/krino_lib/Akri_LevelSet.hpp b/packages/krino/krino/krino_lib/Akri_LevelSet.hpp index 7427747219c2..16542ba9506d 100644 --- a/packages/krino/krino/krino_lib/Akri_LevelSet.hpp +++ b/packages/krino/krino/krino_lib/Akri_LevelSet.hpp @@ -49,6 +49,13 @@ enum Redistance_Method MAX_REDISTANCE_METHOD_TYPE }; +enum SemiLagrangianAlgorithm +{ + NON_ADAPTIVE_SINGLE_STEP=0, + ADAPTIVE_PREDICTOR_CORRECTOR, + MAX_SEMILAGRANGIAN_ALGORITM_TYPE +}; + /// Return true if field-data exists for the specified meshobj and field. bool all_nodes_have_field_data(const stk::mesh::BulkData& stk_bulk, stk::mesh::Entity entity, const stk::mesh::FieldBase& field); @@ -148,7 +155,10 @@ friend class LevelSet_Size; Redistance_Method get_redistance_method() const { return my_redistance_method; } void set_redistance_method( const Redistance_Method type ) { my_redistance_method = type; } + SemiLagrangianAlgorithm get_semilagrangian_algorithm() const { return mySemiLagrangianAlg; } + void set_semilagrangian_algorithm( const SemiLagrangianAlgorithm type ) { mySemiLagrangianAlg = type; } void set_time_of_arrival_element_speed_field_name( const std::string & time_of_arrival_speed_field_name) { my_time_of_arrival_element_speed_field_name = time_of_arrival_speed_field_name; } + FieldRef get_time_of_arrival_element_speed_field() const {return myTimeOfArrivalElementSpeedField;} void set_time_of_arrival_block_speed(const std::string & blockName, const double blockSpeed); FacetedSurfaceBase & get_facets() { return *facets; } const FacetedSurfaceBase & get_facets() const { return *facets; } @@ -194,6 +204,8 @@ friend class LevelSet_Size; void compute_surface_distance(const double narrowBandSize=0.0, const double farFieldValue=0.0); static void initialize(stk::mesh::MetaData & meta); void initialize(const double time = 0.0); + bool can_create_adaptive_initial_facets_from_initial_surfaces_because_initial_distance_is_solely_from_initial_surfaces() const; + void build_initial_facets(const double time); static void clear_initialization_data(stk::mesh::MetaData & meta); void clear_initialization_data(); void redistance(); @@ -272,6 +284,7 @@ friend class LevelSet_Size; double my_threshold; Redistance_Method my_redistance_method; + SemiLagrangianAlgorithm mySemiLagrangianAlg{NON_ADAPTIVE_SINGLE_STEP}; std::string my_time_of_arrival_element_speed_field_name; std::map myTimeOfArrivalBlockSpeedsByName; std::vector myTimeOfArrivalBlockSpeeds; @@ -310,7 +323,6 @@ friend class LevelSet_Size; void prepare_to_compute_distance_to_stationary_facets( const stk::mesh::Selector & selector ); void compute_signed_distance_at_selected_nodes( const stk::mesh::Selector & selector ); - void compute_distance_semilagrangian( const double & timeN, const double & timeNp1, const stk::mesh::Selector & selector ); double distance( const stk::math::Vector3d & x, const int previous_sign, diff --git a/packages/krino/krino/krino_lib/Akri_LevelSetInterfaceGeometry.cpp b/packages/krino/krino/krino_lib/Akri_LevelSetInterfaceGeometry.cpp index 676d6b97f21c..4c36f197e89f 100644 --- a/packages/krino/krino/krino_lib/Akri_LevelSetInterfaceGeometry.cpp +++ b/packages/krino/krino/krino_lib/Akri_LevelSetInterfaceGeometry.cpp @@ -400,7 +400,7 @@ static ParentEdgeFilter keep_owned_edges_filter(const stk::mesh::BulkData & mesh { const std::pair edgeNodes = edge.get_parent_nodes(); std::vector edgeElems; - stk::mesh::get_entities_through_relations(mesh, {edgeNodes.first, edgeNodes.second}, stk::topology::ELEMENT_RANK, edgeElems); + stk::mesh::get_entities_through_relations(mesh, stk::mesh::EntityVector{edgeNodes.first, edgeNodes.second}, stk::topology::ELEMENT_RANK, edgeElems); { bool foundOwnedElement = false; for (auto && edgeElem : edgeElems) diff --git a/packages/krino/krino/krino_lib/Akri_LevelSetSurfaceInterfaceGeometry.cpp b/packages/krino/krino/krino_lib/Akri_LevelSetSurfaceInterfaceGeometry.cpp index c693c52f8fb6..b925e45c3d14 100644 --- a/packages/krino/krino/krino_lib/Akri_LevelSetSurfaceInterfaceGeometry.cpp +++ b/packages/krino/krino/krino_lib/Akri_LevelSetSurfaceInterfaceGeometry.cpp @@ -23,7 +23,7 @@ LevelSetSurfaceInterfaceGeometry::LevelSetSurfaceInterfaceGeometry(const int dim for (auto && lsField : myLSFields) { mySurfaceIdentifiers.push_back(lsField.identifier); - myLSSurfaces.emplace_back(std::move(FacetedSurfaceBase::build(dim))); + myLSSurfaces.emplace_back(FacetedSurfaceBase::build(dim)); } for (size_t i=0; i & LSFields); + using AnalyticSurfaceInterfaceGeometry::prepare_to_intersect_elements; + virtual bool might_have_interior_or_face_intersections() const override { return mySurfaceIdentifiers.size() > 1; } virtual void prepare_to_decompose_elements(const stk::mesh::BulkData & mesh, const NodeToCapturedDomainsMap & nodesToCapturedDomains) const override; diff --git a/packages/krino/krino/krino_lib/Akri_MasterElementDeterminer.hpp b/packages/krino/krino/krino_lib/Akri_MasterElementDeterminer.hpp index 76ab2b770213..0f39c378a054 100644 --- a/packages/krino/krino/krino_lib/Akri_MasterElementDeterminer.hpp +++ b/packages/krino/krino/krino_lib/Akri_MasterElementDeterminer.hpp @@ -13,7 +13,7 @@ namespace krino { class FieldRef; } namespace stk { namespace mesh { class Bucket; } } -namespace stk { class topology; } +namespace stk { struct topology; } namespace krino { diff --git a/packages/krino/krino/krino_lib/Akri_MeshFromFile.cpp b/packages/krino/krino/krino_lib/Akri_MeshFromFile.cpp index 0013e36a13e5..33df2529986f 100644 --- a/packages/krino/krino/krino_lib/Akri_MeshFromFile.cpp +++ b/packages/krino/krino/krino_lib/Akri_MeshFromFile.cpp @@ -22,7 +22,6 @@ MeshFromFile::MeshFromFile(const std::string & fileName, stk::ParallelMachine co myIOBroker->add_mesh_database(fileName, stk::io::READ_MESH); myIOBroker->create_input_mesh(); myMeta = &myIOBroker->meta_data(); - myMeta->use_simple_fields(); AuxMetaData::create(*myMeta); } diff --git a/packages/krino/krino/krino_lib/Akri_NodalSurfaceDistance.cpp b/packages/krino/krino/krino_lib/Akri_NodalSurfaceDistance.cpp index 2ab863a15b24..14436910902e 100644 --- a/packages/krino/krino/krino_lib/Akri_NodalSurfaceDistance.cpp +++ b/packages/krino/krino/krino_lib/Akri_NodalSurfaceDistance.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -38,4 +39,29 @@ void compute_nodal_surface_distance(const stk::mesh::BulkData & mesh, const Fiel stk::mesh::communicate_field_data(mesh, {&distanceField.field()}); } +void compute_nodal_distance_from_spheres(const stk::mesh::BulkData & mesh, + const FieldRef coordsField, + const FieldRef distanceField, + const std::vector> & spheres, + const int sign) +{ + Composite_Surface initializationSurfaces("initialization surfaces"); + if (sign < 0) + initializationSurfaces.set_composition_method(Composite_Surface::MAXIMUM_SIGNED_DISTANCE); + for (auto & sphere : spheres) + initializationSurfaces.add(new Sphere(sphere.first, sphere.second, sign)); + compute_nodal_surface_distance(mesh, coordsField, distanceField, initializationSurfaces); +} + +void compute_nodal_distance_from_plane(const stk::mesh::BulkData & mesh, + const FieldRef coordsField, + const FieldRef distanceField, + const stk::math::Vector3d & normal, + const double offset) +{ + Composite_Surface initializationSurfaces("initialization surfaces"); + initializationSurfaces.add(new Plane(normal, offset, 1.0)); + compute_nodal_surface_distance(mesh, coordsField, distanceField, initializationSurfaces); +} + } diff --git a/packages/krino/krino/krino_lib/Akri_NodalSurfaceDistance.hpp b/packages/krino/krino/krino_lib/Akri_NodalSurfaceDistance.hpp index 8de9712538a9..5abcc644594f 100644 --- a/packages/krino/krino/krino_lib/Akri_NodalSurfaceDistance.hpp +++ b/packages/krino/krino/krino_lib/Akri_NodalSurfaceDistance.hpp @@ -7,7 +7,24 @@ namespace stk { namespace mesh { class BulkData; } } namespace krino { -void compute_nodal_surface_distance(const stk::mesh::BulkData & mesh, const FieldRef coordsField, const FieldRef distanceField, Composite_Surface & surfaces, const double time=0, const double narrowBandSize=0); +void compute_nodal_surface_distance(const stk::mesh::BulkData & mesh, + const FieldRef coordsField, + const FieldRef distanceField, + Composite_Surface & surfaces, + const double time=0, + const double narrowBandSize=0); + +void compute_nodal_distance_from_spheres(const stk::mesh::BulkData & mesh, + const FieldRef coordsField, + const FieldRef distanceField, + const std::vector> & spheres, + const int sign = 1); + +void compute_nodal_distance_from_plane(const stk::mesh::BulkData & mesh, + const FieldRef coordsField, + const FieldRef distanceField, + const stk::math::Vector3d & normal, + const double offset); } diff --git a/packages/krino/krino/krino_lib/Akri_OutputUtils.cpp b/packages/krino/krino/krino_lib/Akri_OutputUtils.cpp index 71c681ab43df..a5e9b271e6b3 100644 --- a/packages/krino/krino/krino_lib/Akri_OutputUtils.cpp +++ b/packages/krino/krino/krino_lib/Akri_OutputUtils.cpp @@ -35,7 +35,16 @@ void output_mesh_with_fields_and_properties(const stk::mesh::BulkData & mesh, co stk::mesh::BulkData & workAroundNonConstMesh = const_cast(mesh); stkIo.set_bulk_data(workAroundNonConstMesh); - size_t outputFileIndex = stkIo.create_output_mesh(fileName, purpose, properties); + const size_t outputFileIndex = stkIo.create_output_mesh(fileName, purpose, properties); + + const int filterDisconnectedNodes = true; + if (filterDisconnectedNodes) + { + // Will filter out nodes that are themselves selected, but without any attached elements that are selected. + // For example, if selector is BLOCK_1 | NODESET_1, a node will not be output if it is in NODESET_1 and not BLOCK_1. + std::shared_ptr ioRegion = stkIo.get_output_ioss_region(outputFileIndex); + ioRegion->property_add(Ioss::Property(stk::io::s_ignoreDisconnectedNodes, filterDisconnectedNodes)); + } if (step > 0) { @@ -64,14 +73,12 @@ void output_composed_mesh_with_fields(const stk::mesh::BulkData & mesh, const st { Ioss::PropertyManager properties; properties.add(Ioss::Property("COMPOSE_RESULTS", 1)); - properties.add(Ioss::Property(stk::io::s_ignoreDisconnectedNodes, true)); output_mesh_with_fields_and_properties(mesh, outputSelector, fileName, step, time, properties, purpose); } void output_mesh_with_fields(const stk::mesh::BulkData & mesh, const stk::mesh::Selector & outputSelector, const std::string & fileName, int step, double time, stk::io::DatabasePurpose purpose) { Ioss::PropertyManager properties; - properties.add(Ioss::Property(stk::io::s_ignoreDisconnectedNodes, true)); output_mesh_with_fields_and_properties(mesh, outputSelector, fileName, step, time, properties, purpose); } @@ -109,6 +116,9 @@ void write_facets( const std::vector & facets, const std::string & fileBa Ioss::PropertyManager properties; properties.add(Ioss::Property("COMPOSE_RESULTS", 1)); const std::string fileName = create_file_name(fileBaseName, fileIndex); + properties.add(Ioss::Property("base_filename", create_file_name(fileBaseName, 0))); + if (fileIndex > 0) + properties.add(Ioss::Property("state_offset", fileIndex)); Ioss::DatabaseIO *db = Ioss::IOFactory::create("exodusII", create_file_name(fileBaseName, fileIndex), Ioss::WRITE_RESULTS, comm, properties); STK_ThrowRequireMsg(db != nullptr && db->ok(), "ERROR: Could not open output database '" << fileName << "' of type 'exodus'\n"); Ioss::Region io(db, "FacetRegion"); @@ -129,13 +139,6 @@ void write_facets( const std::vector & facets, const std::string & fileBa io.property_add(Ioss::Property("title", description)); io.begin_mode(Ioss::STATE_DEFINE_MODEL); -// // if we have no elements bail now -// if ( numFacets > 0) -// { -// io.end_mode(Ioss::STATE_DEFINE_MODEL); -// return; -// } - Ioss::NodeBlock *nb = new Ioss::NodeBlock(db, "nodeblock_1", numNodes, nodesPerFacet); io.add(nb); nb->property_add(Ioss::Property("locally_owned_count", numNodes)); @@ -202,6 +205,16 @@ void write_facets( const std::vector & facets, const std::string & fileBa } io.end_mode(Ioss::STATE_MODEL); + + // write fake transient + io.begin_mode(Ioss::STATE_DEFINE_TRANSIENT); + io.end_mode(Ioss::STATE_DEFINE_TRANSIENT); + + io.begin_mode(Ioss::STATE_TRANSIENT); + const int currentOutputStep = io.add_state(1.0*fileIndex); + io.begin_state(currentOutputStep); + io.end_state(currentOutputStep); + io.end_mode(Ioss::STATE_TRANSIENT); } stk::mesh::PartVector turn_off_output_for_empty_io_parts(const stk::mesh::BulkData & mesh, const stk::mesh::Selector & outputSelector) diff --git a/packages/krino/krino/krino_lib/Akri_PhaseTag.hpp b/packages/krino/krino/krino_lib/Akri_PhaseTag.hpp index 983fd5605583..eb2d1d61ba31 100644 --- a/packages/krino/krino/krino_lib/Akri_PhaseTag.hpp +++ b/packages/krino/krino/krino_lib/Akri_PhaseTag.hpp @@ -46,8 +46,8 @@ class LS_SideTag { bool operator != ( const LS_SideTag & RHS ) const { return (my_ls_identifier != RHS.my_ls_identifier || my_ls_sign != RHS.my_ls_sign); } friend std::ostream& operator<<(std::ostream & os, const LS_SideTag & phase); protected: - const Surface_Identifier my_ls_identifier; - const int my_ls_sign; + Surface_Identifier my_ls_identifier; + int my_ls_sign; static std::map the_composite_ls_map; }; diff --git a/packages/krino/krino/krino_lib/Akri_PostProcess.cpp b/packages/krino/krino/krino_lib/Akri_PostProcess.cpp new file mode 100644 index 000000000000..0bacde585fe2 --- /dev/null +++ b/packages/krino/krino/krino_lib/Akri_PostProcess.cpp @@ -0,0 +1,80 @@ +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace krino { + +double compute_relative_nodal_RMS_error(const stk::mesh::BulkData & mesh, const FieldRef coordsField, const FieldRef distField, const std::function & analytic_fn) +{ + + const stk::mesh::Selector activeOwnedFieldSelector = AuxMetaData::get(mesh.mesh_meta_data()).active_locally_owned_selector() & stk::mesh::selectField(distField); + const int dim = mesh.mesh_meta_data().spatial_dimension(); + + double errorSum = 0.; + double solutionSum = 0.; + + for ( auto && bucketPtr : mesh.get_buckets( stk::topology::NODE_RANK, activeOwnedFieldSelector) ) + { + const size_t length = bucketPtr->size(); + const double *coordsData = field_data(coordsField, *bucketPtr); + double *dist = field_data(distField, *bucketPtr); + + for (size_t i = 0; i < length; ++i) + { + const stk::math::Vector3d nodeCoords(coordsData+i*dim, dim); + const double analytic = analytic_fn(nodeCoords); + const double error = dist[i]-analytic; + errorSum += error*error; + solutionSum += analytic*analytic; + } + } + all_reduce_sum(mesh.parallel(), errorSum); + all_reduce_sum(mesh.parallel(), solutionSum); + return std::sqrt(errorSum/solutionSum); +} + +double compute_relative_nodal_RMS_error(const stk::mesh::BulkData & mesh, const FieldRef coordsField, const FieldRef distField, const String_Function_Expression & analyticDist, const double time) +{ + auto analytic_fn = [&analyticDist, time](const stk::math::Vector3d &x) { return analyticDist.evaluate(time, x); }; + return compute_relative_nodal_RMS_error(mesh, coordsField, distField, analytic_fn); +} + +double compute_relative_nodal_RMS_error(const stk::mesh::BulkData & mesh, const FieldRef coordsField, const FieldRef distField, const String_Function_Expression & analyticDist) +{ + auto analytic_fn = [&analyticDist](const stk::math::Vector3d &x) { return analyticDist.evaluate(x); }; + return compute_relative_nodal_RMS_error(mesh, coordsField, distField, analytic_fn); +} + +static void compute_and_print_distance_error(const stk::mesh::BulkData & mesh, const double time, const FieldRef coordsField, const FieldRef distField, const String_Function_Expression & analyticDist) +{ + krinolog << "Relative Nodal L2 error of " << distField.name() << " at time " << time << " = " << compute_relative_nodal_RMS_error(mesh, coordsField, distField, analyticDist, time) << stk::diag::dendl; +} + +void PostProcessors::add_scalar_postprocesor(const std::string fieldName, const std::string & analyticalExpr) +{ + myScalarPostProcessorStrings.emplace_back(fieldName, analyticalExpr); +} + +void PostProcessors::commit(const stk::mesh::MetaData & meta) +{ + for (auto & [fieldName, analyticalExpr] : myScalarPostProcessorStrings) + { + FieldRef field = AuxMetaData::get(meta).get_field(stk::topology::NODE_RANK, fieldName); + STK_ThrowRequireMsg(field.valid(), "Cannot field " << fieldName << " for postprocessor."); + myScalarPostProcessors.emplace_back(field, analyticalExpr); + } +} + +void PostProcessors::postprocess(const stk::mesh::BulkData & mesh, const FieldRef coordsField, const double time) const +{ + for (auto & scalarPostProcessor : myScalarPostProcessors) + compute_and_print_distance_error(mesh, time, coordsField, scalarPostProcessor.first, scalarPostProcessor.second); +} + +} diff --git a/packages/krino/krino/krino_lib/Akri_PostProcess.hpp b/packages/krino/krino/krino_lib/Akri_PostProcess.hpp new file mode 100644 index 000000000000..5d926d73ce43 --- /dev/null +++ b/packages/krino/krino/krino_lib/Akri_PostProcess.hpp @@ -0,0 +1,32 @@ +#ifndef KRINO_KRINO_KRINO_LIB_AKRI_POSTPROCESS_HPP_ +#define KRINO_KRINO_KRINO_LIB_AKRI_POSTPROCESS_HPP_ + +#include +#include +#include +#include + +namespace krino { + +class PostProcessors +{ +public: + void add_scalar_postprocesor(const std::string fieldName, const std::string & analyticalExpr); + void postprocess(const stk::mesh::BulkData & mesh, const FieldRef coordsField, const double time) const; + void commit(const stk::mesh::MetaData & meta); +private: + std::list> myScalarPostProcessorStrings; + std::list> myScalarPostProcessors; +}; + +double compute_relative_nodal_RMS_error(const stk::mesh::BulkData & mesh, const FieldRef coordsField, const FieldRef distField, const std::function & analytic_fn); + +double compute_relative_nodal_RMS_error(const stk::mesh::BulkData & mesh, const FieldRef coordsField, const FieldRef distField, const String_Function_Expression & analyticDist, const double time); + +double compute_relative_nodal_RMS_error(const stk::mesh::BulkData & mesh, const FieldRef coordsField, const FieldRef distField, const String_Function_Expression & analyticDist); + +} + + + +#endif /* KRINO_KRINO_KRINO_LIB_AKRI_POSTPROCESS_HPP_ */ diff --git a/packages/krino/krino/krino_lib/Akri_RefinementInterface.cpp b/packages/krino/krino/krino_lib/Akri_RefinementInterface.cpp index 9b3553f93840..e7f57107afeb 100644 --- a/packages/krino/krino/krino_lib/Akri_RefinementInterface.cpp +++ b/packages/krino/krino/krino_lib/Akri_RefinementInterface.cpp @@ -5,7 +5,6 @@ * Author: drnoble */ #include "Akri_RefinementInterface.hpp" - #include #include #include @@ -18,18 +17,19 @@ #include "Akri_MeshHelpers.hpp" #include "Akri_ReportHandler.hpp" #include "Akri_TransitionElementEdgeMarker.hpp" +#include "stk_util/environment/Env.hpp" namespace krino { void clear_refinement_marker(const RefinementInterface & refinement) { - FieldRef markerField = refinement.get_marker_field(); + FieldRef markerField = refinement.get_marker_field_and_sync_to_host(); stk::mesh::field_fill(static_cast(Refinement::RefinementMarker::NOTHING), markerField, stk::mesh::selectField(markerField)); } void mark_selected_elements_for_refinement(const RefinementInterface & refinement, const stk::mesh::Selector & selector) { - FieldRef markerField = refinement.get_marker_field(); + FieldRef markerField = refinement.get_marker_field_and_sync_to_host(); clear_refinement_marker(refinement); stk::mesh::field_fill(static_cast(Refinement::RefinementMarker::REFINE), markerField, selector); } @@ -46,7 +46,7 @@ void mark_selected_elements_for_refinement(const RefinementInterface & refinemen void mark_elements_for_refinement(const RefinementInterface & refinement, const std::vector & elemsToRefine) { clear_refinement_marker(refinement); - FieldRef markerField = refinement.get_marker_field(); + FieldRef markerField = refinement.get_marker_field_and_sync_to_host(); for (auto && elem : elemsToRefine) { int * elemMarker = field_data(markerField, elem); @@ -85,7 +85,7 @@ void mark_based_on_indicator_field(const stk::mesh::BulkData & mesh, } const FieldRef indicatorField = auxMeta.get_field(stk::topology::ELEMENT_RANK, indicatorFieldName); - const FieldRef markerField = refinement.get_marker_field(); + const FieldRef markerField = refinement.get_marker_field_and_sync_to_host(); const auto & parentPart = refinement.parent_part(); const auto & activeBuckets = @@ -197,7 +197,7 @@ KrinoRefinement::create(stk::mesh::MetaData & meta, stk::diag::Timer & timer) STK_ThrowRequireMsg(nullptr == refinement, "KrinoRefinement::create should be called only once per MetaData."); if (nullptr == refinement) { - AuxMetaData & auxMeta = AuxMetaData::get(meta); + AuxMetaData & auxMeta = AuxMetaData::get_or_create(meta); refinement = new KrinoRefinement(meta, &auxMeta.active_part(), false /*auxMeta.get_force_64bit_flag()*/, @@ -230,6 +230,17 @@ KrinoRefinement::get_or_create(stk::mesh::MetaData & meta) return create(meta); } +KrinoRefinement & +KrinoRefinement::get_or_create(stk::mesh::MetaData & meta, stk::diag::Timer & timer) +{ + KrinoRefinement * refinement = const_cast(meta.get_attribute()); + if (refinement) + return *refinement; + + return create(meta, timer); +} + + void KrinoRefinement::register_parts_and_fields_via_aux_meta_for_fmwk(stk::mesh::MetaData & meta) { @@ -356,9 +367,11 @@ void KrinoRefinement::set_marker_field(const std::string & markerFieldName) myElementMarkerField = myMeta.get_field(stk::topology::ELEMENT_RANK, markerFieldName); } -FieldRef KrinoRefinement::get_marker_field() const +FieldRef KrinoRefinement::get_marker_field_and_sync_to_host() const { setup_marker(); + myElementMarkerField.sync_to_host(); + return myElementMarkerField; } @@ -388,10 +401,10 @@ TransitionElementEdgeMarker & KrinoRefinement::get_marker() const return *myMarker; } -std::pair KrinoRefinement::get_marked_element_counts() const +std::array KrinoRefinement::get_marked_element_counts() const { const stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); - const FieldRef markerField = get_marker_field(); + const FieldRef markerField = get_marker_field_and_sync_to_host(); const stk::mesh::Selector selector = stk::mesh::selectField(markerField) & AuxMetaData::get(myMeta).active_part() & myMeta.locally_owned_part() & !myRefinement.parent_part(); unsigned numRefine = 0; @@ -415,23 +428,58 @@ std::pair KrinoRefinement::get_marked_element_counts() const std::array globalNum{0,0}; stk::all_reduce_sum(mesh.parallel(), localNum.data(), globalNum.data(), 2); - return std::make_pair(globalNum[0], globalNum[1]); + return globalNum; } -bool KrinoRefinement::do_refinement(const int debugLevel) +bool KrinoRefinement::is_supported_uniform_refinement_element() const { const stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); + const FieldRef markerField = get_marker_field_and_sync_to_host(); + const stk::mesh::Selector selector = stk::mesh::selectField(markerField) & + AuxMetaData::get(myMeta).active_part() & myMeta.locally_owned_part() & + !myRefinement.parent_part(); + + bool is_supported = true; + + for (auto && bucket : mesh.get_buckets(stk::topology::ELEMENT_RANK, selector)) + { + const auto topology = bucket->topology(); + + is_supported &= (topology == stk::topology::TRI_3 || topology == stk::topology::TRI_3_2D || + topology == stk::topology::TETRAHEDRON_4 || topology == stk::topology::HEX_8 || + topology == stk::topology::QUAD_4 || topology == stk::topology::QUAD_4_2D || + topology == stk::topology::BEAM_2 || topology == stk::topology::BEAM_3); + } + + const int is_supported_int = static_cast(is_supported); + int reduced_result = 0; + + MPI_Allreduce(&is_supported_int, &reduced_result, 1, MPI_INT, MPI_LAND, mesh.parallel()); + + return static_cast(reduced_result); +} + +bool KrinoRefinement::do_refinement(const int debugLevel) +{ + const stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); const auto markerCounts = get_marked_element_counts(); - const unsigned numRefine = markerCounts.first; - const unsigned numUnrefine = markerCounts.second; + const unsigned numRefine = markerCounts[0]; + const unsigned numUnrefine = markerCounts[1]; + krinolog << "Number of elements marked for refinement = " << numRefine << "\n"; krinolog << "Number of elements marked for unrefinement = " << numUnrefine << stk::diag::dendl; std::vector counts; stk::mesh::comm_mesh_counts(mesh, counts); - krinolog << "Adapt: before refine, mesh has " << counts[0] << " nodes, " << counts[1] + //if it can be uniformly refined, and all elements are marked for refinement just do uniform refinement + if (is_supported_uniform_refinement_element() && counts[3] == numRefine && (counts[3]+numRefine) > 0) + { + return do_uniform_refinement(1); + } + + krinolog << "Adaptive refinement: before refine, mesh has " << counts[0] << " nodes, " << counts[1] << " edges, " << counts[2] << " faces, " << counts[3] << " elements" << stk::diag::dendl; bool didMakeAnyChanges = false; @@ -439,6 +487,7 @@ bool KrinoRefinement::do_refinement(const int debugLevel) stk::diag::TimeBlock timer_(myRefinementTimer); didMakeAnyChanges = myRefinement.do_refinement(get_marker()); } + stk::mesh::comm_mesh_counts(mesh, counts); krinolog << "Adapt: after refine, mesh has " << counts[0] << " nodes, " << counts[1] @@ -450,7 +499,7 @@ bool KrinoRefinement::do_refinement(const int debugLevel) return didMakeAnyChanges; } -void KrinoRefinement::do_uniform_refinement(const int numUniformRefinementLevels) +bool KrinoRefinement::do_uniform_refinement(const int numUniformRefinementLevels) { const stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); @@ -460,7 +509,7 @@ void KrinoRefinement::do_uniform_refinement(const int numUniformRefinementLevels krinolog << "Uniform refinement: before refine, mesh has " << counts[0] << " nodes, " << counts[1] << " edges, " << counts[2] << " faces, " << counts[3] << " elements" << stk::diag::dendl; - myRefinement.do_uniform_refinement(numUniformRefinementLevels); + const bool didMakeAnyChanges = myRefinement.do_uniform_refinement(numUniformRefinementLevels); stk::mesh::comm_mesh_counts(mesh, counts); @@ -469,6 +518,8 @@ void KrinoRefinement::do_uniform_refinement(const int numUniformRefinementLevels ParallelThrowAssert(mesh.parallel(), check_face_and_edge_ownership(mesh)); ParallelThrowAssert(mesh.parallel(), check_face_and_edge_relations(mesh)); + + return didMakeAnyChanges; } void KrinoRefinement::restore_after_restart() diff --git a/packages/krino/krino/krino_lib/Akri_RefinementInterface.hpp b/packages/krino/krino/krino_lib/Akri_RefinementInterface.hpp index e848a2d4f798..844a57de344a 100644 --- a/packages/krino/krino/krino_lib/Akri_RefinementInterface.hpp +++ b/packages/krino/krino/krino_lib/Akri_RefinementInterface.hpp @@ -63,12 +63,13 @@ class RefinementInterface virtual void update_element_rebalance_weights_incorporating_parallel_owner_constraints(stk::mesh::Field & elemWtField) const = 0; virtual unsigned get_num_children(const stk::mesh::Entity elem) const = 0; virtual int fully_refined_level(const stk::mesh::Entity elem) const = 0; - virtual FieldRef get_marker_field() const = 0; + virtual FieldRef get_marker_field_and_sync_to_host() const = 0; virtual bool require_post_refinement_fixups() const = 0; virtual std::string locally_check_leaf_children_have_parents_on_same_proc() const = 0; virtual bool do_refinement(const int debugLevel = 0) = 0; - virtual void do_uniform_refinement(const int numUniformRefinementLevels) = 0; + virtual bool do_uniform_refinement(const int numUniformRefinementLevels) = 0; + virtual void delete_parent_elements() = 0; }; class KrinoRefinement : public RefinementInterface @@ -78,6 +79,7 @@ class KrinoRefinement : public RefinementInterface static KrinoRefinement & create(stk::mesh::MetaData & meta); static KrinoRefinement & create(stk::mesh::MetaData & meta, stk::diag::Timer & timer); static KrinoRefinement & get_or_create(stk::mesh::MetaData & meta); + static KrinoRefinement & get_or_create(stk::mesh::MetaData & meta, stk::diag::Timer & timer); static bool is_created(const stk::mesh::MetaData & meta); static void register_parts_and_fields_via_aux_meta_for_fmwk(stk::mesh::MetaData & meta); @@ -101,19 +103,21 @@ class KrinoRefinement : public RefinementInterface int partially_refined_level(const stk::mesh::Entity elem) const; virtual std::string locally_check_leaf_children_have_parents_on_same_proc() const override; - virtual FieldRef get_marker_field() const override; + virtual FieldRef get_marker_field_and_sync_to_host() const override; virtual bool require_post_refinement_fixups() const override { return false; }; virtual bool do_refinement(const int debugLevel = 0) override; - virtual void do_uniform_refinement(const int numUniformRefinementLevels) override; + virtual bool do_uniform_refinement(const int numUniformRefinementLevels) override; void restore_after_restart(); void set_marker_field(const std::string & markerFieldName); + virtual void delete_parent_elements() override {myRefinement.delete_parent_elements();}; private: KrinoRefinement(stk::mesh::MetaData & meta, stk::mesh::Part * activePart, const bool force64Bit, const bool assert32Bit, stk::diag::Timer & parent_timer); - std::pair get_marked_element_counts() const; + std::array get_marked_element_counts() const; + bool is_supported_uniform_refinement_element() const; TransitionElementEdgeMarker & setup_marker() const; TransitionElementEdgeMarker & get_marker() const; stk::mesh::MetaData & myMeta; diff --git a/packages/krino/krino/krino_lib/Akri_SemiLagrangian.cpp b/packages/krino/krino/krino_lib/Akri_SemiLagrangian.cpp new file mode 100644 index 000000000000..1d4fafb2977d --- /dev/null +++ b/packages/krino/krino/krino_lib/Akri_SemiLagrangian.cpp @@ -0,0 +1,477 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace krino { + +static stk::math::Vector3d compute_interface_velocity_at_point(const int dim, const double time, const stk::math::Vector3d & coords, const std::vector & interfaceVelocityExpr) +{ + if (2 == dim) + return stk::math::Vector3d(interfaceVelocityExpr[0].evaluate(time, coords), interfaceVelocityExpr[1].evaluate(time, coords), 0.0); + return stk::math::Vector3d(interfaceVelocityExpr[0].evaluate(time, coords), interfaceVelocityExpr[1].evaluate(time, coords), interfaceVelocityExpr[2].evaluate(time, coords)); +} + +static stk::math::Vector3d compute_semilagrangian_evaluation_point(const int dim, + const double timeN, + const double timeNp1, + const FacetedSurfaceBase & facets, + const stk::math::Vector3d & pt, + const std::vector & interfaceVelocityExpr) +{ + const double dt = timeNp1 - timeN; + const double tMid = 0.5*(timeN+timeNp1); + +#if 0 + const stk::math::Vector3d closestPtN = facets.closest_point(pt); + const auto velN = compute_interface_velocity_at_point(dim, timeN, closestPtN, interfaceVelocityExpr); + + const stk::math::Vector3d coords2 = pt - 0.5*dt*velN; + const stk::math::Vector3d closestPt2 = facets.closest_point(coords2); + const auto vel2 = compute_interface_velocity_at_point(dim, tMid, closestPt2, interfaceVelocityExpr); + + const stk::math::Vector3d coords3 = pt - 0.5*dt*vel2; + const stk::math::Vector3d closestPt3 = facets.closest_point(coords3); + const auto vel3 = compute_interface_velocity_at_point(dim, tMid, closestPt3, interfaceVelocityExpr); + + const stk::math::Vector3d coords4 = pt - dt*vel3; + const stk::math::Vector3d closestPt4 = facets.closest_point(coords4); + const auto vel4 = compute_interface_velocity_at_point(dim, timeNp1, closestPt4, interfaceVelocityExpr); + + const stk::math::Vector3d coordsNp1 = pt - dt/6.*(velN + 2.*vel2 + 2.*vel3 + vel4); +#endif +#if 1 + const stk::math::Vector3d closestPtN = facets.closest_point(pt); + const auto velN = compute_interface_velocity_at_point(dim, timeN, closestPtN, interfaceVelocityExpr); + + const stk::math::Vector3d coordsHalf = pt - 0.5*dt*velN; + const stk::math::Vector3d closestPtHalf = facets.closest_point(coordsHalf); + const auto velHalf = compute_interface_velocity_at_point(dim, tMid, closestPtHalf, interfaceVelocityExpr); + + const stk::math::Vector3d coordsNp1 = pt - dt*velHalf; +#endif +#if 0 +// use local velocity instead of extension velocity + const auto velN = compute_interface_velocity_at_point(dim, timeN, pt, interfaceVelocityExpr); + const stk::math::Vector3d coordsHalf = pt - 0.5*dt*velN; + const auto velHalf = compute_interface_velocity_at_point(dim, tMid, coordsHalf, interfaceVelocityExpr); + + const stk::math::Vector3d coordsNp1 = pt - dt*velHalf; +#endif +#if 0 + const stk::math::Vector3d closestPtN = facets.closest_point(pt); + const auto velN = compute_interface_velocity_at_point(dim, timeN, closestPtN, interfaceVelocityExpr); + + const stk::math::Vector3d coordsPred = pt - dt*velN; + const stk::math::Vector3d closestPtPred = facets.closest_point(coordsPred); + const auto velPred = compute_interface_velocity_at_point(dim, tMid, closestPtPred, interfaceVelocityExpr); + + const stk::math::Vector3d coordsNp1 = pt - 0.5*dt*(velN+velPred); +#endif +#if 0 + const stk::math::Vector3d closestPtN = facets.closest_point(pt); + const auto velN = compute_interface_velocity_at_point(dim, timeN, closestPtN, interfaceVelocityExpr); + + const stk::math::Vector3d coordsHalf = pt - 0.5*dt*velN; + const stk::math::Vector3d closestPtHalf = facets.closest_point(coordsHalf); + const auto velHalf = compute_interface_velocity_at_point(dim, tMid, closestPtHalf, interfaceVelocityExpr); + + const stk::math::Vector3d coordsPred = pt - dt*velN; + const stk::math::Vector3d closestPtPred = facets.closest_point(coordsPred); + const auto velPred = compute_interface_velocity_at_point(dim, tMid, closestPtPred, interfaceVelocityExpr); + + const stk::math::Vector3d coordsNp1 = pt - 0.25*dt*(velN+2*velHalf+velPred); +#endif + return coordsNp1; +} + +static double compute_semilagrangian_distance_at_point(const int dim, + const double timeN, + const double timeNp1, + const FacetedSurfaceBase & facets, + const stk::math::Vector3d & pt, + const std::vector & interfaceVelocityExpr, + const double narrowBandSize, + const double farFieldValue) +{ + const auto prevCoords = compute_semilagrangian_evaluation_point(dim, timeN, timeNp1, facets, pt, interfaceVelocityExpr); + return facets.truncated_point_signed_distance(prevCoords, narrowBandSize, farFieldValue); +} + +static stk::math::Vector3d compute_semilagrangian_predicted_evaluation_point(const int dim, + const double timeN, + const double timeNp1, + const FacetedSurfaceBase & facetsN, + const stk::math::Vector3d & pt, + const std::vector & interfaceVelocityExpr) +{ + const double dt = timeNp1 - timeN; + const stk::math::Vector3d closestPtN = facetsN.closest_point(pt); + const auto velN = compute_interface_velocity_at_point(dim, timeN, closestPtN, interfaceVelocityExpr); + const stk::math::Vector3d coordsTilde = pt - dt*velN; + + return coordsTilde; +} + +static stk::math::Vector3d compute_semilagrangian_corrected_evaluation_point(const int dim, + const double timeN, + const double timeNp1, + const FacetedSurfaceBase & facetsN, + const FacetedSurfaceBase & facetsPred, + const stk::math::Vector3d & pt, + const std::vector & interfaceVelocityExpr) +{ + const double dt = timeNp1 - timeN; + const stk::math::Vector3d facetsNClosestPt = facetsN.closest_point(pt); + const auto velN = compute_interface_velocity_at_point(dim, timeN, facetsNClosestPt, interfaceVelocityExpr); + const stk::math::Vector3d coordsTilde = pt - dt*velN; + const stk::math::Vector3d facetsNClosestPtTilde = facetsN.closest_point(coordsTilde); + const auto vel1 = compute_interface_velocity_at_point(dim, timeN, facetsNClosestPtTilde, interfaceVelocityExpr); + const stk::math::Vector3d facetsPredClosestPt = facetsPred.closest_point(pt); + const auto vel2 = compute_interface_velocity_at_point(dim, timeNp1, facetsPredClosestPt, interfaceVelocityExpr); + const auto velCorr = 0.5*(vel1+vel2); + const stk::math::Vector3d coordsCorr = pt - dt*velCorr; + + return coordsCorr; +} + +static double compute_semilagrangian_distance_prediction_at_point(const int dim, + const double timeN, + const double timeNp1, + const FacetedSurfaceBase & facetsN, + const stk::math::Vector3d & pt, + const std::vector & interfaceVelocityExpr, + const double narrowBandSize, + const double farFieldValue) +{ + const stk::math::Vector3d coordsTilde = compute_semilagrangian_predicted_evaluation_point(dim, timeN, timeNp1, facetsN, pt, interfaceVelocityExpr); + return facetsN.truncated_point_signed_distance(coordsTilde, narrowBandSize, farFieldValue); +} + +static double compute_semilagrangian_distance_correction_at_point(const int dim, + const double timeN, + const double timeNp1, + const FacetedSurfaceBase & facetsN, + const FacetedSurfaceBase & facetsPred, + const stk::math::Vector3d & pt, + const std::vector & interfaceVelocityExpr, + const double narrowBandSize, + const double farFieldValue) +{ + const stk::math::Vector3d coordsCorr = compute_semilagrangian_corrected_evaluation_point(dim, timeN, timeNp1, facetsN, facetsPred, pt, interfaceVelocityExpr); + return facetsN.truncated_point_signed_distance(coordsCorr, narrowBandSize, farFieldValue); +} + +static std::function build_initial_distance_at_point(const Composite_Surface & initSurfaces, const double narrowBandSize) +{ + auto fn = [&initSurfaces, narrowBandSize](const stk::math::Vector3d & pt) + { + return initSurfaces.point_signed_distance_with_narrow_band(pt, narrowBandSize); + }; + return fn; +} + +static std::function build_semilagrangian_distance_at_point(const int dim, + const double timeN, + const double timeNp1, + const std::vector & interfaceVelocityExpr, + const FacetedSurfaceBase & facets) +{ + auto fn = [dim, timeN, timeNp1, &interfaceVelocityExpr, &facets](const stk::math::Vector3d & pt) + { + constexpr double zeroNarrowBandSize = 0.; + return compute_semilagrangian_distance_at_point(dim, timeN, timeNp1, facets, pt, interfaceVelocityExpr, zeroNarrowBandSize, zeroNarrowBandSize); + }; + return fn; +} + +static std::function build_semilagrangian_distance_predictor_at_point(const int dim, + const double timeN, + const double timeNp1, + const std::vector & interfaceVelocityExpr, + const FacetedSurfaceBase & facets) +{ + auto fn = [dim, timeN, timeNp1, &interfaceVelocityExpr, &facets](const stk::math::Vector3d & pt) + { + constexpr double zeroNarrowBandSize = 0.; + return compute_semilagrangian_distance_prediction_at_point(dim, timeN, timeNp1, facets, pt, interfaceVelocityExpr, zeroNarrowBandSize, zeroNarrowBandSize); + }; + return fn; +} + +static std::function build_semilagrangian_distance_corrector_at_point(const int dim, + const double timeN, + const double timeNp1, + const std::vector & interfaceVelocityExpr, + const FacetedSurfaceBase & facetsN, + const FacetedSurfaceBase & facetsPred) +{ + auto fn = [dim, timeN, timeNp1, &interfaceVelocityExpr, &facetsN, &facetsPred](const stk::math::Vector3d & pt) + { + constexpr double zeroNarrowBandSize = 0.; + return compute_semilagrangian_distance_correction_at_point(dim, timeN, timeNp1, facetsN, facetsPred, pt, interfaceVelocityExpr, zeroNarrowBandSize, zeroNarrowBandSize); + }; + return fn; +} + +template +double compute_max_facet_velocity_magnitude(const double time, + const std::vector & facets, + const std::vector & interfaceVelocity) +{ + double maxSqrMag = 0.; + for (auto & facet : facets) + { + for (int n=0; n maxSqrMag) + maxSqrMag = velSqrMag; + } + } + return std::sqrt(maxSqrMag); +} + +BoundingBox compute_padded_node_bounding_box_for_semilagrangian(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const double timeN, + const double timeNp1, + const FieldRef coordsField, + const std::vector & interfaceVelocityExpr, + const FacetedSurfaceBase & facets) +{ + BoundingBox nodeBBox = krino::compute_nodal_bbox(mesh, activeFieldSelector, coordsField); + + const double timeMid = 0.5*(timeN+timeNp1); + const double velMag = (2==mesh.mesh_meta_data().spatial_dimension()) ? + compute_max_facet_velocity_magnitude(timeMid, facets.get_facets_2d(), interfaceVelocityExpr) : + compute_max_facet_velocity_magnitude(timeMid, facets.get_facets_3d(), interfaceVelocityExpr); + const double paddingFactorOfSafety = 1.5; + nodeBBox.pad(paddingFactorOfSafety*velMag*(timeNp1-timeN)); // Need something better? + return nodeBBox; +} + +void build_nonadaptive_facets(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const FieldRef coordsField, + const FieldRef distField, + const double lengthScale, + FacetedSurfaceBase & facets) +{ + facets.clear(); + for ( auto * bucketPtr : mesh.get_buckets(stk::topology::ELEMENT_RANK, activeFieldSelector & mesh.mesh_meta_data().locally_owned_part()) ) + { + for (auto elem : *bucketPtr) + { + ContourElement lsElem( mesh, elem, coordsField, distField ); + lsElem.compute_subelement_decomposition(lengthScale); + + lsElem.build_subelement_facets( facets ); + } + } +} + +static void calc_single_step_semilagrangian_nodal_distance(const int dim, + const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const double timeN, + const double timeNp1, + const FieldRef coordsField, + const FieldRef distField, + const std::vector & interfaceVelocityExpr, + const double narrowBandSize, + const FacetedSurfaceBase & facetsN) +{ + for ( auto && bucketPtr : mesh.get_buckets(stk::topology::NODE_RANK, activeFieldSelector) ) + { + const double * coordsData = field_data(coordsField , *bucketPtr); + double * distData = field_data(distField, *bucketPtr); + + for (size_t i = 0; i < bucketPtr->size(); ++i) + { + const stk::math::Vector3d nodeCoords(coordsData+i*dim, dim); + const int previousSign = sign(distData[i]); + distData[i] = compute_semilagrangian_distance_at_point(dim, timeN, timeNp1, facetsN, nodeCoords, interfaceVelocityExpr, narrowBandSize, previousSign*narrowBandSize); + } + } +} + +void calc_single_step_nonadaptive_semilagrangian_nodal_distance_and_build_facets(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const double timeN, + const double timeNp1, + const FieldRef coordsField, + const FieldRef distField, + const std::vector & interfaceVelocityExpr, + const double narrowBandSize, + const double avgEdgeLength, + const FacetedSurfaceBase & facetsN, + FacetedSurfaceBase & facetsNp1) +{ + const int dim = mesh.mesh_meta_data().spatial_dimension(); + calc_single_step_semilagrangian_nodal_distance(dim, mesh, activeFieldSelector, timeN, timeNp1, coordsField, distField, interfaceVelocityExpr, narrowBandSize, facetsN); + + build_nonadaptive_facets(mesh, activeFieldSelector, coordsField, distField, avgEdgeLength, facetsNp1); +} + +static void adaptively_append_facets_for_mesh_element_using_semilagrangian_distance(const int dim, + const stk::mesh::BulkData & mesh, + const FieldRef coordsField, + const FieldRef isoField, + const stk::mesh::Entity elem, + const std::function & distance_at_point, + const double lengthScale, + const int minDepth, + const int maxDepth, + FacetedSurfaceBase & facets) +{ + STK_ThrowRequire(dim == 2); + const StkMeshEntities elemNodes{mesh.begin_nodes(elem), mesh.end_nodes(elem)}; + const std::array nodeCoords = get_triangle_vector(mesh, coordsField, elemNodes, 2); + const std::array nodeDist = get_triangle_scalar(mesh, isoField, elemNodes); + adaptively_append_facets_for_tri_using_semilagrangian_distance(nodeCoords, nodeDist, distance_at_point, lengthScale, facets, 0, minDepth, maxDepth); +} + +static void build_adaptive_facets_using_semilagrangian_distance(const int dim, + const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const FieldRef coordsField, + const FieldRef distField, + const std::function & distance_at_point, + const double avgEdgeLength, + const int minDepth, + const int maxDepth, + FacetedSurfaceBase & facets) +{ + facets.clear(); + for ( auto * bucketPtr : mesh.get_buckets(stk::topology::ELEMENT_RANK, activeFieldSelector) ) + { + STK_ThrowRequireMsg(bucketPtr->topology() == stk::topology::TRIANGLE_3_2D, "Only Tri3d elements currently supported."); + for (auto elem : *bucketPtr) + adaptively_append_facets_for_mesh_element_using_semilagrangian_distance(dim, mesh, coordsField, distField, elem, distance_at_point, avgEdgeLength, minDepth, maxDepth, facets); + } +} + +void build_initial_adaptive_facets_after_nodal_distance_is_initialized_from_initial_surfaces(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const double time, + const FieldRef coordsField, + const FieldRef distField, + const double avgEdgeLength, + const Composite_Surface & initSurfaces, + FacetedSurfaceBase & facets) +{ + const int minDepth = 5; + const int maxDepth = 5; + + const auto initial_distance_at_point = build_initial_distance_at_point(initSurfaces, time); + + build_adaptive_facets_using_semilagrangian_distance(mesh.mesh_meta_data().spatial_dimension(), mesh, activeFieldSelector, coordsField, distField, initial_distance_at_point, avgEdgeLength, minDepth, maxDepth, facets); +} + +void calc_single_step_semilagrangian_nodal_distance_and_build_facets(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const double timeN, + const double timeNp1, + const FieldRef coordsField, + const FieldRef distField, + const std::vector & interfaceVelocityExpr, + const double narrowBandSize, + const double avgEdgeLength, + const FacetedSurfaceBase & facetsN, + FacetedSurfaceBase & facetsNp1) +{ + const int minDepth = 2; + const int maxDepth = 5; + + const int dim = mesh.mesh_meta_data().spatial_dimension(); + calc_single_step_semilagrangian_nodal_distance(dim, mesh, activeFieldSelector, timeN, timeNp1, coordsField, distField, interfaceVelocityExpr, narrowBandSize, facetsN); + + const auto distance_at_point = build_semilagrangian_distance_at_point(dim, timeN, timeNp1, interfaceVelocityExpr, facetsN); + + build_adaptive_facets_using_semilagrangian_distance(dim, mesh, activeFieldSelector, coordsField, distField, distance_at_point, avgEdgeLength, minDepth, maxDepth, facetsNp1); +} + +void predict_semilagrangian_nodal_distance_and_build_facets(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const double timeN, + const double timeNp1, + const FieldRef coordsField, + const FieldRef distField, + const std::vector & interfaceVelocityExpr, + const double narrowBandSize, + const double avgEdgeLength, + const FacetedSurfaceBase & facetsN, + FacetedSurfaceBase & facetsPred) +{ + const int minDepth = 1; + const int maxDepth = 2; + + const int dim = mesh.mesh_meta_data().spatial_dimension(); + for ( auto && bucketPtr : mesh.get_buckets(stk::topology::NODE_RANK, activeFieldSelector) ) + { + const double * coordsData = field_data(coordsField , *bucketPtr); + double * distData = field_data(distField , *bucketPtr); + + for (size_t i = 0; i < bucketPtr->size(); ++i) + { + const stk::math::Vector3d nodeCoords(coordsData+i*dim, dim); + const int previousSign = sign(distData[i]); + distData[i] = compute_semilagrangian_distance_prediction_at_point(dim, timeN, timeNp1, facetsN, nodeCoords, interfaceVelocityExpr, narrowBandSize, previousSign*narrowBandSize); + } + } + + const auto predict_distance_at_point = build_semilagrangian_distance_predictor_at_point(dim, timeN, timeNp1, interfaceVelocityExpr, facetsN); + + build_adaptive_facets_using_semilagrangian_distance(dim, mesh, activeFieldSelector, coordsField, distField, predict_distance_at_point, avgEdgeLength, minDepth, maxDepth, facetsPred); +} + +void correct_semilagrangian_nodal_distance_and_build_facets(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const double timeN, + const double timeNp1, + const FieldRef coordsField, + const FieldRef distField, + const std::vector & interfaceVelocityExpr, + const double narrowBandSize, + const double avgEdgeLength, + const FacetedSurfaceBase & facetsN, + const FacetedSurfaceBase & facetsPred, + FacetedSurfaceBase & facetsNp1) +{ + const int minDepth = 2; + const int maxDepth = 5; + + const int dim = mesh.mesh_meta_data().spatial_dimension(); + for ( auto && bucketPtr : mesh.get_buckets(stk::topology::NODE_RANK, activeFieldSelector) ) + { + const double * coordsData = field_data(coordsField , *bucketPtr); + double * distData = field_data(distField , *bucketPtr); + + for (size_t i = 0; i < bucketPtr->size(); ++i) + { + const stk::math::Vector3d nodeCoords(coordsData+i*dim, dim); + const int previousSign = sign(distData[i]); + distData[i] = compute_semilagrangian_distance_correction_at_point(dim, timeN, timeNp1, facetsN, facetsPred, nodeCoords, interfaceVelocityExpr, narrowBandSize, previousSign*narrowBandSize); + } + } + + const auto correct_distance_at_point = build_semilagrangian_distance_corrector_at_point(dim, timeN, timeNp1, interfaceVelocityExpr, facetsN, facetsPred); + + build_adaptive_facets_using_semilagrangian_distance(dim, mesh, activeFieldSelector, coordsField, distField, correct_distance_at_point, avgEdgeLength, minDepth, maxDepth, facetsNp1); +} + +} + + diff --git a/packages/krino/krino/krino_lib/Akri_SemiLagrangian.hpp b/packages/krino/krino/krino_lib/Akri_SemiLagrangian.hpp new file mode 100644 index 000000000000..1ff7293c09b8 --- /dev/null +++ b/packages/krino/krino/krino_lib/Akri_SemiLagrangian.hpp @@ -0,0 +1,89 @@ +#ifndef KRINO_KRINO_KRINO_LIB_AKRI_SEMILAGRANGIAN_HPP_ +#define KRINO_KRINO_KRINO_LIB_AKRI_SEMILAGRANGIAN_HPP_ + +#include +#include +#include +#include +#include + +namespace krino { + +class Composite_Surface; + +BoundingBox compute_padded_node_bounding_box_for_semilagrangian(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const double timeN, + const double timeNp1, + const FieldRef coordsField, + const std::vector & interfaceVelocityExpr, + const FacetedSurfaceBase & facets); + +void build_nonadaptive_facets(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const FieldRef coordsField, + const FieldRef distField, + const double lengthScale, + FacetedSurfaceBase & facets); + +void build_initial_adaptive_facets_after_nodal_distance_is_initialized_from_initial_surfaces(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const double time, + const FieldRef coordsField, + const FieldRef distField, + const double avgEdgeLength, + const Composite_Surface & initSurfaces, + FacetedSurfaceBase & facets); + +void calc_single_step_nonadaptive_semilagrangian_nodal_distance_and_build_facets(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const double timeN, + const double timeNp1, + const FieldRef coordsField, + const FieldRef distField, + const std::vector & interfaceVelocityExpr, + const double narrowBandSize, + const double avgEdgeLength, + const FacetedSurfaceBase & facetsN, + FacetedSurfaceBase & facetsNp1); + +void calc_single_step_semilagrangian_nodal_distance_and_build_facets(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const double timeN, + const double timeNp1, + const FieldRef coordsField, + const FieldRef distField, + const std::vector & interfaceVelocityExpr, + const double narrowBandSize, + const double avgEdgeLength, + const FacetedSurfaceBase & facetsN, + FacetedSurfaceBase & facetsNp1); + +void predict_semilagrangian_nodal_distance_and_build_facets(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const double timeN, + const double timeNp1, + const FieldRef coordsField, + const FieldRef distField, + const std::vector & interfaceVelocityExpr, + const double narrowBandSize, + const double avgEdgeLength, + const FacetedSurfaceBase & facetsN, + FacetedSurfaceBase & facetsPred); + +void correct_semilagrangian_nodal_distance_and_build_facets(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & activeFieldSelector, + const double timeN, + const double timeNp1, + const FieldRef coordsField, + const FieldRef distField, + const std::vector & interfaceVelocityExpr, + const double narrowBandSize, + const double avgEdgeLength, + const FacetedSurfaceBase & facetsN, + const FacetedSurfaceBase & facetsPred, + FacetedSurfaceBase & facetsNp1); + +} + +#endif /* KRINO_KRINO_KRINO_LIB_AKRI_SEMILAGRANGIAN_HPP_ */ diff --git a/packages/krino/krino/krino_lib/Akri_SharpFeature.cpp b/packages/krino/krino/krino_lib/Akri_SharpFeature.cpp index 129a856bded3..65df5f722dd4 100644 --- a/packages/krino/krino/krino_lib/Akri_SharpFeature.cpp +++ b/packages/krino/krino/krino_lib/Akri_SharpFeature.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include "Akri_AuxMetaData.hpp" @@ -283,7 +284,7 @@ bool SharpFeatureInfo::edge_has_sharp_feature_3D(const stk::mesh::BulkData & mes { const std::array & edgeNodes = get_edge_nodes(edge); std::vector sidesOfEdge; - stk::mesh::get_entities_through_relations(mesh, {edgeNodes[0], edgeNodes[1]}, stk::topology::FACE_RANK, sidesOfEdge); + stk::mesh::get_entities_through_relations(mesh, stk::mesh::EntityVector{edgeNodes[0], edgeNodes[1]}, stk::topology::FACE_RANK, sidesOfEdge); if (sidesOfEdge.size() > 1) filter_sides_based_on_attached_element_and_side_parts(mesh, elementSelector, sideSelector, sidesOfEdge); return angle_is_sharp_between_any_two_sides_3D(mesh, coordsField, cosFeatureAngle, edgeNodes, sidesOfEdge); diff --git a/packages/krino/krino/krino_lib/Akri_Snap.cpp b/packages/krino/krino/krino_lib/Akri_Snap.cpp index addc5104df26..e3e23b06e023 100644 --- a/packages/krino/krino/krino_lib/Akri_Snap.cpp +++ b/packages/krino/krino/krino_lib/Akri_Snap.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include namespace krino diff --git a/packages/krino/krino/krino_lib/Akri_SubElementChildNodeAncestry.cpp b/packages/krino/krino/krino_lib/Akri_SubElementChildNodeAncestry.cpp index bd8631e74e78..29097020d597 100644 --- a/packages/krino/krino/krino_lib/Akri_SubElementChildNodeAncestry.cpp +++ b/packages/krino/krino/krino_lib/Akri_SubElementChildNodeAncestry.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace krino { diff --git a/packages/krino/krino/math_utils/Akri_MathUtil.cpp b/packages/krino/krino/math_utils/Akri_MathUtil.cpp index 07cdfc9aa2af..4583c7985adf 100644 --- a/packages/krino/krino/math_utils/Akri_MathUtil.cpp +++ b/packages/krino/krino/math_utils/Akri_MathUtil.cpp @@ -261,6 +261,35 @@ std::pair find_root_newton_raphson( const std::function::epsilon()*std::sqrt(d0*d0 + d1*d1 + d2*d2); + if ( std::fabs(d0) < epsilon ) return 0.0; + if ( std::fabs(d1) < epsilon ) return 1.0; + if ( std::fabs(d2) < epsilon ) return 0.5; + + STK_ThrowAssert(d0*d1 < 0.0 && (d0*d2 < 0.0 || d1*d2 < 0.0)); // Insist on one and only one crossing + + const double a = 2.0*(d0 - 2.0*d2 + d1); + const double b = -3.0*d0 - d1 + 4.0*d2; + const double c = d0; + const int sign_b = ( b < 0.0 ) ? -1 : 1; + const double q = -0.5*(b + sign_b*std::sqrt(b*b-4.0*a*c)); + + const int sign_a = ( a < 0.0 ) ? -1 : 1; + + if (q*sign_a > 0.0 && q*sign_a < a*sign_a) + { + STK_ThrowAssert(!(c*(( q < 0.0 ) ? -1 : 1) > 0.0 && c*(( q < 0.0 ) ? -1 : 1) < q*(( q < 0.0 ) ? -1 : 1))); // Insist on only one crossing + return (q/a); + } + else + { + STK_ThrowAssert(c*(( q < 0.0 ) ? -1 : 1) > 0.0 && c*(( q < 0.0 ) ? -1 : 1) < q*(( q < 0.0 ) ? -1 : 1)); + return (c/q); + } +} + } diff --git a/packages/krino/krino/math_utils/Akri_MathUtil.hpp b/packages/krino/krino/math_utils/Akri_MathUtil.hpp index 2754588ce367..ada2b00d50de 100644 --- a/packages/krino/krino/math_utils/Akri_MathUtil.hpp +++ b/packages/krino/krino/math_utils/Akri_MathUtil.hpp @@ -31,6 +31,8 @@ std::pair find_root_newton_raphson( const std::function & ids, bool assert_32bit_ids, bool make_64bit_ids); void reserve(stk::mesh::EntityRank rank, size_t count, bool assert_32bit_ids, bool make_64bit_ids); diff --git a/packages/krino/krino/mesh_utils/Akri_FieldRef.hpp b/packages/krino/krino/mesh_utils/Akri_FieldRef.hpp index 50bb5641304c..0b9518fa70e8 100644 --- a/packages/krino/krino/mesh_utils/Akri_FieldRef.hpp +++ b/packages/krino/krino/mesh_utils/Akri_FieldRef.hpp @@ -119,6 +119,11 @@ class FieldRef return my_field->max_size(); } + void sync_to_host() const + { + my_field->sync_to_host(); + } + // testing bool operator ==(const FieldRef & rhs) const { return my_field == rhs.my_field; } bool operator !=(const FieldRef & rhs) const { return my_field != rhs.my_field; } diff --git a/packages/krino/krino/mesh_utils/Akri_MeshHelpers.cpp b/packages/krino/krino/mesh_utils/Akri_MeshHelpers.cpp index a62f2c134d65..f330ff0cf4d9 100644 --- a/packages/krino/krino/mesh_utils/Akri_MeshHelpers.cpp +++ b/packages/krino/krino/mesh_utils/Akri_MeshHelpers.cpp @@ -28,6 +28,24 @@ namespace krino{ +void populate_stk_local_ids(stk::mesh::BulkData & mesh) +{ + stk::mesh::Selector selector = mesh.mesh_meta_data().universal_part(); + for (auto rank = stk::topology::NODE_RANK; rank <= stk::topology::ELEMENT_RANK; ++rank) + { + unsigned localId = 0; + auto buckets = mesh.get_buckets(rank, selector); + for (auto && b : buckets) + { + for (unsigned i = 0; i < b->size(); ++i) + { + mesh.set_local_id((*b)[i], localId); + localId++; + } + } + } +} + void fill_node_ids_for_nodes(const stk::mesh::BulkData & mesh, const std::vector & parentNodes, std::vector & parentNodeIds) { parentNodeIds.clear(); @@ -47,6 +65,42 @@ stk::mesh::PartVector get_all_block_parts(const stk::mesh::MetaData & meta) return blockParts; } +size_t get_size_of_vector_indexable_by_entity_offset(const stk::mesh::BulkData & mesh, const stk::mesh::EntityRank entityRank) +{ + stk::mesh::Entity::entity_value_type maxEntity(0); + for ( auto && bucket : mesh.buckets(entityRank) ) + for ( auto && entity : *bucket ) + maxEntity = std::max(maxEntity, entity.local_offset()); + return maxEntity + 1; +} + +std::vector get_selected_side_attached_elements(const stk::mesh::BulkData &mesh, + const stk::mesh::Selector & elementSelector, + const stk::mesh::Entity elem) +{ + std::vector nbrs; + + std::vector elemNbrs; + std::vector elemSideNodes; + + const stk::mesh::Entity* elemNodes = mesh.begin_nodes(elem); + const stk::topology elemTopology = mesh.bucket(elem).topology(); + const unsigned numSides = elemTopology.num_sides(); + nbrs.reserve(numSides); + for (unsigned iside=0; iside & entities, stk::CommSparse &commSparse) { @@ -2639,7 +2693,33 @@ void communicate_owned_entities_to_ghosting_procs(const stk::mesh::BulkData & me { stk::CommSparse commSparse(mesh.parallel()); pack_owned_entities_for_ghosting_procs(mesh, entities, commSparse); - unpack_ghosted_entities_from_owners(mesh, entities, commSparse); + unpack_entities(mesh, entities, commSparse); +} + +static +void pack_for_owning_procs(const stk::mesh::BulkData & mesh, + const std::vector & entities, + stk::CommSparse &commSparse) +{ + std::vector elemCommProcs; + stk::pack_and_communicate(commSparse,[&]() + { + for (auto entity : entities) + { + if (!mesh.bucket(entity).owned()) + { + commSparse.send_buffer(mesh.parallel_owner_rank(entity)).pack(mesh.entity_key(entity)); + } + } + }); +} + +void communicate_entities_to_owning_proc(const stk::mesh::BulkData & mesh, const std::vector & entitiesToSend, std::vector & entitiesReceived) +{ + stk::CommSparse commSparse(mesh.parallel()); + pack_for_owning_procs(mesh, entitiesToSend, commSparse); + entitiesReceived.clear(); + unpack_entities(mesh, entitiesReceived, commSparse); } template @@ -2718,4 +2798,63 @@ void communicate_shared_nodes_to_sharing_procs_and_sort_and_unique(const stk::me stk::util::sort_and_unique(nodes, stk::mesh::EntityLess(mesh)); } +static void fill_part_changes_to_convert_entity(const stk::mesh::BulkData & mesh, + const std::map & partOrdinalMapping, + const stk::mesh::Entity entity, + stk::mesh::PartVector & addParts, + stk::mesh::PartVector & removeParts) +{ + addParts.clear(); + removeParts.clear(); + for (auto * part : mesh.bucket(entity).supersets()) + { + const auto iter = partOrdinalMapping.find(part->mesh_meta_data_ordinal()); + if (iter != partOrdinalMapping.end()) + { + removeParts.push_back(part); + if (iter->second >= 0) // Negative part ordinal used to indicate invalid mapping + addParts.push_back(&mesh.mesh_meta_data().get_part(iter->second)); + } + } +} + +static void append_part_changes_to_convert_entity(const stk::mesh::BulkData & mesh, + const std::map & partOrdinalMapping, + const stk::mesh::Entity entity, + std::vector & entitiesToChange, + std::vector & addParts, + std::vector & removeParts) +{ + stk::mesh::PartVector entityAddParts; + stk::mesh::PartVector entityRemoveParts; + fill_part_changes_to_convert_entity(mesh, partOrdinalMapping, entity, entityAddParts, entityRemoveParts); + entitiesToChange.push_back(entity); + addParts.push_back(entityAddParts); + removeParts.push_back(entityRemoveParts); +} + +static void append_part_changes_to_convert_element_and_sides(const stk::mesh::BulkData & mesh, + const std::map & partOrdinalMapping, + const stk::mesh::Entity elem, + std::vector & entitiesToChange, + std::vector & addParts, + std::vector & removeParts) +{ + const stk::mesh::EntityRank sideRank = mesh.mesh_meta_data().side_rank(); + append_part_changes_to_convert_entity(mesh, partOrdinalMapping, elem, entitiesToChange, addParts, removeParts); + for (auto side : StkMeshEntities{mesh.begin(elem, sideRank), mesh.end(elem, sideRank)}) + append_part_changes_to_convert_entity(mesh, partOrdinalMapping, side, entitiesToChange, addParts, removeParts); +} + +void batch_convert_elements_and_their_sides(stk::mesh::BulkData & mesh, const std::map & partOrdinalMapping, const std::vector & elements) +{ + std::vector entitiesToChange; + std::vector addParts; + std::vector removeParts; + for (auto elem : elements) + append_part_changes_to_convert_element_and_sides(mesh, partOrdinalMapping, elem, entitiesToChange, addParts, removeParts); + + mesh.batch_change_entity_parts(entitiesToChange, addParts, removeParts); +} + } // namespace krino diff --git a/packages/krino/krino/mesh_utils/Akri_MeshHelpers.hpp b/packages/krino/krino/mesh_utils/Akri_MeshHelpers.hpp index 3311f1d80a15..320e0814aadc 100644 --- a/packages/krino/krino/mesh_utils/Akri_MeshHelpers.hpp +++ b/packages/krino/krino/mesh_utils/Akri_MeshHelpers.hpp @@ -44,6 +44,25 @@ struct StkMeshEntities value_type operator[](int i) const { return *(mBegin + i); } }; +template +std::array get_triangle_vector(const stk::mesh::BulkData & mesh, const FieldRef vecField, const NodeContainer & triangleNodes) +{ + return {{ get_vector_field(mesh, vecField, triangleNodes[0]), get_vector_field(mesh, vecField, triangleNodes[1]), get_vector_field(mesh, vecField, triangleNodes[2]) }}; +} + +template +std::array get_triangle_vector(const stk::mesh::BulkData & mesh, const FieldRef vecField, const NodeContainer & triangleNodes, const int dim) +{ + return {{ get_vector_field(mesh, vecField, triangleNodes[0], dim), get_vector_field(mesh, vecField, triangleNodes[1], dim), get_vector_field(mesh, vecField, triangleNodes[2], dim) }}; +} + +template +std::array get_triangle_scalar(const stk::mesh::BulkData & mesh, const FieldRef field, const NodeContainer & triangleNodes) +{ + return {{ get_scalar_field(mesh, field, triangleNodes[0]), get_scalar_field(mesh, field, triangleNodes[1]), get_scalar_field(mesh, field, triangleNodes[2]) }}; +} + +void populate_stk_local_ids(stk::mesh::BulkData & mesh); void fill_node_ids_for_nodes(const stk::mesh::BulkData & mesh, const std::vector & parentNodes, std::vector & parentNodeIds); stk::mesh::PartVector get_all_block_parts(const stk::mesh::MetaData & meta); double * get_field_data(const stk::mesh::BulkData& mesh, const FieldRef field, const stk::mesh::Entity entity); @@ -52,7 +71,7 @@ stk::math::Vector3d get_vector_field(const stk::mesh::BulkData& mesh, const Fiel stk::math::Vector3d get_vector_field(const stk::mesh::BulkData& mesh, const FieldRef vecField, const stk::mesh::Entity entity, const unsigned vecLen); bool is_less_than_in_x_then_y_then_z(const stk::math::Vector3d& A, const stk::math::Vector3d &B); size_t get_global_num_entities(const stk::mesh::BulkData& mesh, stk::mesh::EntityRank entityRank); -size_t get_global_num_entities(const stk::mesh::BulkData& mesh, stk::mesh::Part & part); +size_t get_global_num_entities(const stk::mesh::BulkData& mesh, const stk::mesh::Part & part); double compute_tri_volume(const std::array & elementNodeCoords); double compute_tri_volume(const std::array & elementNodeCoords); double compute_tri_volume(const stk::math::Vector3d * elementNodeCoords); @@ -74,6 +93,7 @@ bool check_induced_parts(const stk::mesh::BulkData & mesh); void attach_sides_to_elements(stk::mesh::BulkData & mesh); void attach_entity_to_element(stk::mesh::BulkData & mesh, const stk::mesh::EntityRank entityRank, const stk::mesh::Entity entity, const stk::mesh::Entity element); void attach_entity_to_elements(stk::mesh::BulkData & mesh, stk::mesh::Entity entity); +std::vector get_selected_side_attached_elements(const stk::mesh::BulkData &mesh, const stk::mesh::Selector & elementSelector, const stk::mesh::Entity elem); void unpack_entities_from_other_procs(const stk::mesh::BulkData & mesh, std::set & entities, stk::CommSparse &commSparse); void pack_entities_for_sharing_procs(const stk::mesh::BulkData & mesh, const std::vector & entities, stk::CommSparse &commSparse); std::vector unpack_entities_from_other_procs(const stk::mesh::BulkData & mesh, stk::CommSparse &commSparse); @@ -165,6 +185,7 @@ void batch_create_sides(stk::mesh::BulkData & mesh, const std::vector< SideDescr void make_side_ids_consistent_with_stk_convention(stk::mesh::BulkData & mesh); void communicate_owned_entities_to_ghosting_procs(const stk::mesh::BulkData & mesh, std::vector & entities); +void communicate_entities_to_owning_proc(const stk::mesh::BulkData & mesh, const std::vector & entitiesToSend, std::vector & entitiesReceived); void communicate_shared_nodes_to_sharing_procs(const stk::mesh::BulkData & mesh, std::set & nodes); void communicate_shared_nodes_to_sharing_procs_and_sort_and_unique(const stk::mesh::BulkData & mesh, std::vector & nodes); @@ -212,6 +233,17 @@ void pack_entities_for_owning_proc(const stk::mesh::BulkData & mesh, }); } +size_t get_size_of_vector_indexable_by_entity_offset(const stk::mesh::BulkData & mesh, const stk::mesh::EntityRank entityRank); + +template +std::vector create_vector_indexable_by_entity_offset(const stk::mesh::BulkData & mesh, const stk::mesh::EntityRank entityRank, const T & initialVal) +{ + std::vector vec(get_size_of_vector_indexable_by_entity_offset(mesh, entityRank), initialVal); + return vec; +} + +void batch_convert_elements_and_their_sides(stk::mesh::BulkData & mesh, const std::map & partOrdinalMapping, const std::vector & elements); + } // namespace krino #endif // Akri_MeshHelpers_h diff --git a/packages/krino/krino/mesh_utils/Akri_QuadFace.cpp b/packages/krino/krino/mesh_utils/Akri_QuadFace.cpp new file mode 100644 index 000000000000..5ba83014e47e --- /dev/null +++ b/packages/krino/krino/mesh_utils/Akri_QuadFace.cpp @@ -0,0 +1,74 @@ +#include "Akri_QuadFace.hpp" + +#include + +#include +#include +#include +#include "Akri_MeshHelpers.hpp" + +namespace krino { + +static QuadFace quad_face_from_quad_node_offsets(stk::mesh::Entity::entity_value_type quadNodeOffset0, + stk::mesh::Entity::entity_value_type quadNodeOffset1, + stk::mesh::Entity::entity_value_type quadNodeOffset2, + stk::mesh::Entity::entity_value_type quadNodeOffset3) +{ + static_assert(std::is_same::value, "stk::mesh::Entity must be 32 bit."); + const std::bitset<128> quadFaceValue = (std::bitset<128>(quadNodeOffset3) << 96) | (std::bitset<128>(quadNodeOffset2) << 64) | (std::bitset<128>(quadNodeOffset1) << 32) | std::bitset<128>(quadNodeOffset0); + return QuadFace(quadFaceValue); +} + +QuadFace quad_face_from_unordered_nodes(const stk::mesh::BulkData & mesh, const std::array & faceNodes) +{ + return quad_face_from_unordered_nodes(mesh, faceNodes[0], faceNodes[1], faceNodes[2], faceNodes[3]); +} + +QuadFace quad_face_from_unordered_nodes(const stk::mesh::BulkData & mesh, stk::mesh::Entity faceNode0, stk::mesh::Entity faceNode1, stk::mesh::Entity faceNode2, stk::mesh::Entity faceNode3) +{ + std::array faceNodes{faceNode0, faceNode1, faceNode2, faceNode3}; + std::sort(faceNodes.begin(), faceNodes.end(), stk::mesh::EntityLess(mesh)); + return quad_face_from_quad_node_offsets(faceNodes[0].local_offset(), faceNodes[1].local_offset(), faceNodes[2].local_offset(), faceNodes[3].local_offset()); +} + +std::array get_quad_face_nodes_sorted_by_id(const QuadFace quadFace) +{ + static_assert(std::is_same::value, "stk::mesh::Entity must be 32 bit."); + static constexpr std::bitset<128> all32bit(0xFFFFFFFF); + return std::array{stk::mesh::Entity((quadFace.value() & all32bit).to_ulong()), + stk::mesh::Entity(((quadFace.value() >> 32) & all32bit).to_ulong()), + stk::mesh::Entity(((quadFace.value() >> 64) & all32bit).to_ulong()), + stk::mesh::Entity((quadFace.value() >> 96).to_ulong())}; +} + +void append_entity_quad_faces(const stk::mesh::BulkData & mesh, const stk::topology entityTopology, const stk::mesh::Entity entity, std::vector & entityQuadFaces) +{ + const unsigned numFaces = entityTopology.num_faces(); + + const stk::mesh::Entity * entityNodes = mesh.begin_nodes(entity); + std::array faceNodes; + + for (unsigned iFace = 0; iFace < numFaces; ++iFace) + { + entityTopology.side_nodes(entityNodes, iFace, faceNodes.data()); + entityQuadFaces.push_back(quad_face_from_unordered_nodes(mesh, faceNodes)); + } +} + +void append_entity_quad_faces(const stk::mesh::BulkData & mesh, const stk::mesh::Entity entity, std::vector & entityQuadFaces) +{ + append_entity_quad_faces(mesh, mesh.bucket(entity).topology(), entity, entityQuadFaces); +} + +void fill_entity_quad_faces(const stk::mesh::BulkData & mesh, const stk::mesh::Entity entity, std::vector & entityQuadFaces) +{ + const stk::topology entityTopology = mesh.bucket(entity).topology(); + const unsigned numFaces = entityTopology.num_edges(); + + entityQuadFaces.clear(); + entityQuadFaces.reserve(numFaces); + + append_entity_quad_faces(mesh, entityTopology, entity, entityQuadFaces); +} + +} diff --git a/packages/krino/krino/mesh_utils/Akri_QuadFace.hpp b/packages/krino/krino/mesh_utils/Akri_QuadFace.hpp new file mode 100644 index 000000000000..4d9d8cc25974 --- /dev/null +++ b/packages/krino/krino/mesh_utils/Akri_QuadFace.hpp @@ -0,0 +1,76 @@ +/* + * Akri_QuadFace.hpp + * + * Created on: Apr 25, 2024 + * Author: drnoble + */ + +#ifndef KRINO_KRINO_MESH_UTILS_AKRI_QUADFACE_HPP_ +#define KRINO_KRINO_MESH_UTILS_AKRI_QUADFACE_HPP_ + +#include +#include +#include +#include + +#include + +namespace stk { namespace mesh { class BulkData; } } +namespace stk { namespace mesh { class Selector; } } +namespace stk { struct topology; } + +namespace krino { +struct QuadFace +{ + typedef std::bitset<128> quad_face_value_type; + + static constexpr quad_face_value_type InvalidQuadFace = quad_face_value_type(); + + quad_face_value_type mValue; + + QuadFace() : mValue(InvalidQuadFace) {} + + explicit QuadFace(quad_face_value_type value) : mValue(value) {} + + QuadFace operator=(quad_face_value_type val) { mValue = val; return *this;} + + quad_face_value_type value() const { return mValue; } + + bool is_valid() const { return mValue.any(); } + + bool operator==(QuadFace entity) const { return mValue == entity.mValue; } + bool operator==(quad_face_value_type val) const { return mValue == val; } + bool operator!=(QuadFace entity) const { return mValue != entity.mValue; } + bool operator!=(quad_face_value_type val) const { return mValue != val; } + bool operator<(QuadFace entity) const + { + for (int i = 128-1; i >= 0; i--) + if (mValue[i] ^ entity.mValue[i]) return entity.mValue[i]; + return false; + } +}; + +QuadFace quad_face_from_unordered_nodes(const stk::mesh::BulkData & mesh, const std::array & faceNodes); +QuadFace quad_face_from_unordered_nodes(const stk::mesh::BulkData & mesh, stk::mesh::Entity faceNode0, stk::mesh::Entity faceNode1, stk::mesh::Entity faceNode2, stk::mesh::Entity faceNode3); + +std::array get_quad_face_nodes_sorted_by_id(const QuadFace quadFace); + +void append_entity_quad_faces(const stk::mesh::BulkData & mesh, const stk::topology entityTopology, const stk::mesh::Entity entity, std::vector & entityQuadFaces); + +void append_entity_quad_faces(const stk::mesh::BulkData & mesh, const stk::mesh::Entity entity, std::vector & entityQuadFaces); +void fill_entity_quad_faces(const stk::mesh::BulkData & mesh, const stk::mesh::Entity entity, std::vector & entityQuadFaces); + +} + +template<> +struct std::hash +{ + std::size_t operator()(const krino::QuadFace & quadFace) const noexcept + { + return std::hash{}(quadFace.value()); + } +}; + + + +#endif /* KRINO_KRINO_MESH_UTILS_AKRI_QUADFACE_HPP_ */ diff --git a/packages/krino/krino/mesh_utils/Akri_SideAttachedElements.cpp b/packages/krino/krino/mesh_utils/Akri_SideAttachedElements.cpp new file mode 100644 index 000000000000..4ef35ec0804c --- /dev/null +++ b/packages/krino/krino/mesh_utils/Akri_SideAttachedElements.cpp @@ -0,0 +1,339 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace krino { + +static bool is_element_attached(const std::vector & isElemAttached, const stk::mesh::Entity elem) +{ + return isElemAttached[elem.local_offset()]; +} + +static void mark_element_as_attached_and_if_new_add_neighbors_on_stack(const stk::mesh::BulkData &mesh, + const stk::mesh::Selector & elementSelector, + const stk::mesh::Entity elem, + std::vector & isElemAttached, + std::vector & attachedElemStack) +{ + const auto elemOffset = elem.local_offset(); + if (!isElemAttached[elemOffset]) + { + isElemAttached[elemOffset] = true; + const std::vector nbrs = get_selected_side_attached_elements(mesh, elementSelector, elem); + for (auto & nbr : nbrs) + attachedElemStack.push_back(nbr); + } +} + +static stk::mesh::Entity stack_pop(std::vector &elemStack) +{ + const stk::mesh::Entity elem = elemStack.back(); + elemStack.pop_back(); + return elem; +} + +static void mark_elements_as_attached_and_recursively_check_neighbors(const stk::mesh::BulkData &mesh, + const stk::mesh::Selector & elementSelector, + std::vector & isElemAttached, + std::vector & attachedElemStack) +{ + while (!attachedElemStack.empty()) + { + const stk::mesh::Entity elem = stack_pop(attachedElemStack); + mark_element_as_attached_and_if_new_add_neighbors_on_stack(mesh, elementSelector, elem, isElemAttached, attachedElemStack); + } +} + +static void parallel_communicate_and_add_any_new_attached_elements_on_stack(const stk::mesh::BulkData &mesh, + std::vector & isElemAttached, + std::vector & attachedElemStack) +{ + std::vector unownedElemsAttachedOnThisProc; + for ( auto & bucket : mesh.get_buckets(stk::topology::ELEMENT_RANK, !mesh.mesh_meta_data().locally_owned_part())) + for ( auto elem : *bucket ) + if (is_element_attached(isElemAttached, elem)) + unownedElemsAttachedOnThisProc.push_back(elem); + std::vector ownedElemsAttachedOnOtherProcs; + communicate_entities_to_owning_proc(mesh, unownedElemsAttachedOnThisProc, ownedElemsAttachedOnOtherProcs); + + for (auto & elem : ownedElemsAttachedOnOtherProcs) + if (!is_element_attached(isElemAttached, elem)) + attachedElemStack.push_back(elem); +} + +static void parallel_sync_is_element_attached(const stk::mesh::BulkData &mesh, + const stk::mesh::Selector & elementSelector, + std::vector & isElemAttached, + std::vector & attachedElemStack) +{ + parallel_communicate_and_add_any_new_attached_elements_on_stack(mesh, isElemAttached, attachedElemStack); + while (stk::is_true_on_any_proc(mesh.parallel(), !attachedElemStack.empty())) + { + mark_elements_as_attached_and_recursively_check_neighbors(mesh, elementSelector, isElemAttached, attachedElemStack); + parallel_communicate_and_add_any_new_attached_elements_on_stack(mesh, isElemAttached, attachedElemStack); + } +} + +std::vector are_elements_side_attached_to_selected_sides(const stk::mesh::BulkData & mesh, const stk::mesh::Selector & elementSelector, const stk::mesh::Selector & sideSelector) +{ + std::vector isElemAttached = create_vector_indexable_by_entity_offset(mesh, stk::topology::ELEMENT_RANK, false); + + std::vector attachedElemStack; + for ( auto & bucket : mesh.get_buckets(mesh.mesh_meta_data().side_rank(), sideSelector & mesh.mesh_meta_data().locally_owned_part()) ) + for ( auto side : *bucket ) + for (auto && elem : StkMeshEntities{mesh.begin_elements(side), mesh.end_elements(side)}) + if (elementSelector(mesh.bucket(elem))) + attachedElemStack.push_back(elem); + + mark_elements_as_attached_and_recursively_check_neighbors(mesh, elementSelector, isElemAttached, attachedElemStack); + + parallel_sync_is_element_attached(mesh, elementSelector, isElemAttached, attachedElemStack); + + return isElemAttached; +} + +std::vector get_selected_owned_side_unattached_elements(const stk::mesh::BulkData & mesh, const stk::mesh::Selector & elementSelector, const stk::mesh::Selector & sideSelector) +{ + std::vector isElemAttached = are_elements_side_attached_to_selected_sides(mesh, elementSelector, sideSelector); + + std::vector unattachedElems; + for ( auto & bucket : mesh.get_buckets(stk::topology::ELEMENT_RANK, elementSelector & mesh.mesh_meta_data().locally_owned_part()) ) + for ( auto elem : *bucket ) + if (!is_element_attached(isElemAttached, elem)) + unattachedElems.push_back(elem); + return unattachedElems; +} + +static void assign_group_id_and_if_new_add_neighbors_on_stack(const stk::mesh::BulkData &mesh, + const stk::mesh::Selector & elementSelector, + const stk::mesh::Entity elem, + const size_t elementGroupId, + std::vector & elementGroupIds, + std::vector & attachedElemStack) +{ + const auto elemOffset = elem.local_offset(); + if (elementGroupIds[elemOffset] == 0) + { + elementGroupIds[elemOffset] = elementGroupId; + const std::vector nbrs = get_selected_side_attached_elements(mesh, elementSelector, elem); + for (auto & nbr : nbrs) + attachedElemStack.push_back(nbr); + } +} + +static void assign_element_group_id_and_recursively_check_neighbors(const stk::mesh::BulkData &mesh, + const stk::mesh::Selector & elementSelector, + const size_t elementGroupId, + std::vector & elementGroupIds, + std::vector & attachedElemStack) +{ + while (!attachedElemStack.empty()) + { + const stk::mesh::Entity elem = stack_pop(attachedElemStack); + assign_group_id_and_if_new_add_neighbors_on_stack(mesh, elementSelector, elem, elementGroupId, elementGroupIds, attachedElemStack); + } +} + +void assign_local_group_id_for_each_element(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & elementSelector, + const std::vector & ownedSelectedElements, + std::vector & elementGroupIds) +{ + std::vector attachedElemStack; + attachedElemStack.reserve(ownedSelectedElements.size()); + for ( auto elem : ownedSelectedElements ) + { + const auto elemOffset = elem.local_offset(); + if(elementGroupIds[elemOffset] == 0) + { + attachedElemStack.assign(1, elem); + const size_t elementGroupId = mesh.identifier(elem); + assign_element_group_id_and_recursively_check_neighbors(mesh, elementSelector, elementGroupId, elementGroupIds, attachedElemStack); + } + } +} + +static std::vector get_elements_not_in_given_group(const stk::mesh::BulkData & mesh, + const std::vector & ownedSelectedElements, + const size_t elementGroupId, + const std::vector & elementGroupIds) +{ + std::vector ownedElementsNotInGroup; + for ( auto elem : ownedSelectedElements ) + { + const auto elemOffset = elem.local_offset(); + if(elementGroupIds[elemOffset] != elementGroupId) + ownedElementsNotInGroup.push_back(elem); + } + return ownedElementsNotInGroup; +} + +void pack_element_group_ids_for_ghosting_procs(const stk::mesh::BulkData &mesh, + const std::vector &elements, + const std::vector &localElementGroupIds, + const std::map &localToGlobalGroupIds, + stk::CommSparse &commSparse) +{ + std::vector elemCommProcs; + stk::pack_and_communicate(commSparse,[&]() + { + for (auto elem : elements) + { + if (mesh.bucket(elem).owned()) + { + mesh.comm_procs(elem, elemCommProcs); + const auto elemOffset = elem.local_offset(); + for (int procId : elemCommProcs) + { + if (procId != commSparse.parallel_rank()) + { + commSparse.send_buffer(procId).pack(mesh.identifier(elem)); + commSparse.send_buffer(procId).pack(localToGlobalGroupIds.at(localElementGroupIds[elemOffset])); + } + } + } + } + }); +} + +void receive_element_group_ids_from_owners_and_adjust_global_group_ids(const stk::mesh::BulkData &mesh, + const std::vector & localElementGroupIds, + std::map & localToGlobalGroupIds, + bool & didSomethingChange, + stk::CommSparse &commSparse) +{ + stk::unpack_communications(commSparse, [&](int procId) + { + stk::mesh::EntityId elemId = 0; + size_t newGroupId = 0; + commSparse.recv_buffer(procId).unpack(elemId); + commSparse.recv_buffer(procId).unpack(newGroupId); + + const stk::mesh::Entity elem = mesh.get_entity(stk::topology::ELEMENT_RANK, elemId); + STK_ThrowRequire(mesh.is_valid(elem)); + const auto elemOffset = elem.local_offset(); + const size_t localGroupId = localElementGroupIds[elemOffset]; + + if (localGroupId != 0) + { + auto iter = localToGlobalGroupIds.find(localGroupId); + STK_ThrowRequire(iter != localToGlobalGroupIds.end()); + if(newGroupId < iter->second) + { + didSomethingChange = true; + iter->second = newGroupId; + } + } + }); +} + +void make_local_to_global_group_id_mapping_parallel_consistent(const stk::mesh::BulkData &mesh, + const std::vector & ownedSelectedElements, + const std::vector & localElementGroupIds, + std::map & localToGlobalGroupIds) +{ + bool didSomethingChange = true; + while(didSomethingChange) + { + didSomethingChange = false; + stk::CommSparse commSparse(mesh.parallel()); + pack_element_group_ids_for_ghosting_procs(mesh, ownedSelectedElements, localElementGroupIds, localToGlobalGroupIds, commSparse); + receive_element_group_ids_from_owners_and_adjust_global_group_ids(mesh, localElementGroupIds, localToGlobalGroupIds, didSomethingChange, commSparse); + didSomethingChange = stk::is_true_on_any_proc(mesh.parallel(), didSomethingChange); + } +} + +std::map generate_parallel_consistent_local_to_global_group_id_mapping(const stk::mesh::BulkData &mesh, + const std::vector & ownedSelectedElements, + const std::vector & localElementGroupIds) +{ + std::map localToGlobalGroupIds; + for (size_t localElementGroupId : localElementGroupIds) + if (localElementGroupId != 0) + localToGlobalGroupIds[localElementGroupId] = localElementGroupId; + make_local_to_global_group_id_mapping_parallel_consistent(mesh, ownedSelectedElements, localElementGroupIds, localToGlobalGroupIds); + return localToGlobalGroupIds; +} + +static std::map get_local_group_ids_sizes(const std::vector & elementGroupIds) +{ + std::map localGroupIdSizes; + for (auto elementGroupId : elementGroupIds) + if (elementGroupId != 0) + ++(localGroupIdSizes[elementGroupId]); + return localGroupIdSizes; +} + +static void parallel_sum_group_id_sizes(std::map & groupIdSizes, stk::ParallelMachine comm) +{ + if (stk::parallel_machine_size(comm) == 1) + return; + stk::CommSparse commSparse(comm); + std::vector localGroupSizes; + localGroupSizes.reserve(2*groupIdSizes.size()); + for (auto & entry : groupIdSizes) + { + localGroupSizes.push_back(entry.first); + localGroupSizes.push_back(entry.second); + } + + std::vector globalGroupSizes; + stk::parallel_vector_concat(comm, localGroupSizes, globalGroupSizes); + + groupIdSizes.clear(); + for (size_t i=0; i & ownedSelectedElements, + const std::vector & elementGroupIds) +{ + std::map groupIdSizes = get_local_group_ids_sizes(elementGroupIds); + parallel_sum_group_id_sizes(groupIdSizes, mesh.parallel()); + const auto iter = std::max_element(groupIdSizes.begin(), groupIdSizes.end(), [](const std::pair& a, const std::pair& b){ return a.second < b.second; }); + STK_ThrowRequire(iter != groupIdSizes.end()); + return iter->first; +} + +void make_group_ids_parallel_consistent(const stk::mesh::BulkData & mesh, + const std::vector & ownedSelectedElements, + std::vector & elementGroupIds) +{ + const std::map localToGlobalGroupIds = generate_parallel_consistent_local_to_global_group_id_mapping(mesh, ownedSelectedElements, elementGroupIds); + for (auto & elementGroupId : elementGroupIds) + if (elementGroupId != 0) + elementGroupId = localToGlobalGroupIds.at(elementGroupId); +} + +static std::vector determine_element_group_ids(const stk::mesh::BulkData & mesh, + const stk::mesh::Selector & elementSelector, + const std::vector & ownedSelectedElements) +{ + const size_t initialValue = 0; + std::vector elementGroupIds = create_vector_indexable_by_entity_offset(mesh, stk::topology::ELEMENT_RANK, initialValue); + assign_local_group_id_for_each_element(mesh, elementSelector, ownedSelectedElements, elementGroupIds); + make_group_ids_parallel_consistent(mesh, ownedSelectedElements, elementGroupIds); + return elementGroupIds; +} + +std::vector find_owned_elements_that_are_not_in_the_largest_group_of_selected_side_attached_elements(const stk::mesh::BulkData & mesh, const stk::mesh::Selector & elementSelector) +{ + std::vector ownedSelectedElements; + stk::mesh::get_selected_entities( elementSelector & mesh.mesh_meta_data().locally_owned_part(), mesh.buckets( stk::topology::ELEMENT_RANK ), ownedSelectedElements, false ); + + const std::vector elementGroupIds = determine_element_group_ids(mesh, elementSelector, ownedSelectedElements); + + const size_t groupIdOfLargestGroup = find_id_of_largest_group(mesh, ownedSelectedElements, elementGroupIds); + return get_elements_not_in_given_group(mesh, ownedSelectedElements, groupIdOfLargestGroup, elementGroupIds); +} + +} diff --git a/packages/krino/krino/mesh_utils/Akri_SideAttachedElements.hpp b/packages/krino/krino/mesh_utils/Akri_SideAttachedElements.hpp new file mode 100644 index 000000000000..e168357dd2e1 --- /dev/null +++ b/packages/krino/krino/mesh_utils/Akri_SideAttachedElements.hpp @@ -0,0 +1,18 @@ +#ifndef KRINO_KRINO_MESH_UTILS_AKRI_SIDEATTACHEDELEMENTS_HPP_ +#define KRINO_KRINO_MESH_UTILS_AKRI_SIDEATTACHEDELEMENTS_HPP_ +#include +#include + +namespace krino { + +std::vector are_elements_side_attached_to_selected_sides(const stk::mesh::BulkData & mesh, const stk::mesh::Selector & elementSelector, const stk::mesh::Selector & sideSelector); + +std::vector get_selected_owned_side_unattached_elements(const stk::mesh::BulkData & mesh, const stk::mesh::Selector & elementSelector, const stk::mesh::Selector & sideSelector); + +std::vector find_owned_elements_that_are_not_in_the_largest_group_of_selected_side_attached_elements(const stk::mesh::BulkData & mesh, const stk::mesh::Selector & elementSelector); + +} + + + +#endif /* KRINO_KRINO_MESH_UTILS_AKRI_SIDEATTACHEDELEMENTS_HPP_ */ diff --git a/packages/krino/krino/parser/Akri_LevelSet_Parser.cpp b/packages/krino/krino/parser/Akri_LevelSet_Parser.cpp index 0df859d33478..c93d93fd088d 100644 --- a/packages/krino/krino/parser/Akri_LevelSet_Parser.cpp +++ b/packages/krino/krino/parser/Akri_LevelSet_Parser.cpp @@ -120,6 +120,21 @@ LevelSet_Parser::parse(const Parser::Node & region_node, stk::mesh::MetaData & m ls.set_redistance_method(redistance_method); } + std::string semilagrangianMethodName; + if (ls_node.get_if_present("semilagrangian_algorithm", semilagrangianMethodName)) + { + std::transform(semilagrangianMethodName.begin(), semilagrangianMethodName.end(), semilagrangianMethodName.begin(), ::toupper); + SemiLagrangianAlgorithm algType = NON_ADAPTIVE_SINGLE_STEP; + if (semilagrangianMethodName == "NON_ADAPTIVE_SINGLE_STEP") + algType = NON_ADAPTIVE_SINGLE_STEP; + else if (semilagrangianMethodName == "ADAPTIVE_PREDICTOR_CORRECTOR") + algType = ADAPTIVE_PREDICTOR_CORRECTOR; + else + stk::RuntimeWarningAdHoc() << "Unrecognized redistance method: " << redistance_method_name << std::endl; + + ls.set_semilagrangian_algorithm(algType); + } + bool perform_initial_redistance; if (ls_node.get_if_present("perform_initial_redistance", perform_initial_redistance)) { diff --git a/packages/krino/krino/parser/Akri_Region_Parser.cpp b/packages/krino/krino/parser/Akri_Region_Parser.cpp index 7cdbab3847cb..6ce26f99744c 100644 --- a/packages/krino/krino/parser/Akri_Region_Parser.cpp +++ b/packages/krino/krino/parser/Akri_Region_Parser.cpp @@ -23,6 +23,30 @@ namespace krino { +static void parse_postprocessors(const Parser::Node & regionNode, PostProcessors & postprocessors) +{ + const Parser::Node ppNodes = regionNode.get_sequence_if_present("postprocessors"); + if ( ppNodes ) + { + for ( auto && ppNode : ppNodes ) + { + std::string fieldName; + ppNode.get_if_present("field", fieldName); + if (fieldName.empty()) + { + stk::RuntimeDoomedAdHoc() << "Blank or missing field name in postprocessor.\n"; + } + std::string analyticalExpr; + ppNode.get_if_present("expression", analyticalExpr); + if (analyticalExpr.empty()) + { + stk::RuntimeDoomedAdHoc() << "Blank or missing expression in postprocessor.\n"; + } + postprocessors.add_scalar_postprocesor(fieldName, analyticalExpr); + } + } +} + void Region_Parser::parse(const Parser::Node & simulation_node, Simulation & simulation) { @@ -90,6 +114,8 @@ Region_Parser::parse(const Parser::Node & simulation_node, Simulation & simulati refinementSupport.set_refinement_interval(refinementInterval); } + parse_postprocessors(region_node, region->get_postprocessors()); + const stk::diag::Timer & regionTimer = region->getRegionTimer(); Phase_Support::associate_FEModel_and_metadata(fem_model_name, meta); Surface_Manager::associate_FEModel_and_metadata(fem_model_name, meta); diff --git a/packages/krino/krino/refinement/Akri_HexRefiner.cpp b/packages/krino/krino/refinement/Akri_HexRefiner.cpp new file mode 100644 index 000000000000..0bc199846546 --- /dev/null +++ b/packages/krino/krino/refinement/Akri_HexRefiner.cpp @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include +#include + +namespace krino { +namespace HexRefiner { + +unsigned determine_permutation_hex8(const unsigned caseId) +{ + STK_ThrowRequireMsg(caseId == 0 || caseId == 4095, "Unfinished capability"); + return 0; +} + +unsigned determine_permuted_case_id_hex8(const unsigned caseId) +{ + STK_ThrowRequireMsg(caseId == 0 || caseId == 4095, "Unfinished capability"); + return caseId; +} + +unsigned num_new_child_elements_hex8(const int caseId) +{ + switch(caseId) + { + case 0: + return 0; + case 4095: + return 8; + default: + { + std::ostringstream errorMsg; + errorMsg << "Case " << caseId << " non-uniform refinement not yet support for quads in num_new_child_elements_hex8."; + throw std::runtime_error(errorMsg.str()); + } + } +} + +static std::array permutation_node_ordinals_hex8(const unsigned permutation) +{ + stk::topology topo = stk::topology::HEXAHEDRON_27; + std::array permutedNodes; + topo.permutation_node_ordinals(permutation, permutedNodes.begin()); + return permutedNodes; +} + +static std::array permutation_side_ordinals_hex8(const unsigned permutation) +{ + STK_ThrowRequireMsg(permutation == 0, "Unfinished capability"); + std::array permutedSides{0,1,2,3,4,5}; + return permutedSides; +} + +std::vector refinement_child_nodes_and_sides_hex8(const unsigned caseId) +{ + std::vector childElems; + + const unsigned numChild = num_new_child_elements_hex8(caseId); + childElems.reserve(numChild); + + const unsigned permutedCaseId = determine_permuted_case_id_hex8(caseId); + const unsigned permutation = determine_permutation_hex8(caseId); + const auto permutedParentNodeOrdinals = permutation_node_ordinals_hex8(permutation); + const auto permutedParentSideOrdinals = permutation_side_ordinals_hex8(permutation); + + if (caseId == permutedCaseId) + { + static constexpr std::array,8> childElemNodesFullyRefined{{ + {{0,8,21,11,12,25,20,23}}, {{8,1,9,21,25,13,24,20}}, {{21,9,2,10,20,24,14,26}}, {{11,21,10,3,23,20,26,15}}, + {{12,25,20,23,4,16,22,19}}, {{25,13,24,20,16,5,17,22}}, {{20,24,14,26,22,17,6,18}}, {{23,20,26,15,19,22,18,7}} + }}; + static constexpr std::array,8> childElemSidesFullyRefined{{ + {{0,-1,-1,3,4,-1}}, {{0,1,-1,-1,4,-1}}, {{-1,1,2,-1,4,-1}}, {{-1,-1,2,3,4,-1}}, + {{0,-1,-1,3,-1,5}}, {{0,1,-1,-1,-1,5}}, {{-1,1,2,-1,-1,5}}, {{-1,-1,2,3,-1,5}} + }}; + append_child_elements(permutedParentNodeOrdinals, permutedParentSideOrdinals, childElemNodesFullyRefined, childElemSidesFullyRefined, childElems); + } + else + { + std::ostringstream errorMsg; + errorMsg << "Case " << caseId << " not supported in refine_hex_8."; + throw std::runtime_error(errorMsg.str()); + } + + STK_ThrowRequireMsg(numChild == childElems.size(), "Mismatch of size " << numChild << " " << childElems.size() << " for case " << caseId << " " << permutedCaseId); + + return childElems; +} + +}} diff --git a/packages/krino/krino/refinement/Akri_HexRefiner.hpp b/packages/krino/krino/refinement/Akri_HexRefiner.hpp new file mode 100644 index 000000000000..a011c8e9ef14 --- /dev/null +++ b/packages/krino/krino/refinement/Akri_HexRefiner.hpp @@ -0,0 +1,26 @@ +#ifndef KRINO_KRINO_REFINEMENT_AKRI_HEXREFINER_HPP_ +#define KRINO_KRINO_REFINEMENT_AKRI_HEXREFINER_HPP_ + +#include +#include + +namespace krino { +namespace HexRefiner { + +struct HexDescription +{ + std::array nodeIds; + std::array sideIds; +}; + +unsigned determine_permutation_hex8(const unsigned caseId); +unsigned determine_permuted_case_id_hex8(const unsigned caseId); +unsigned num_new_child_elements_hex8(const int caseId); +std::vector refinement_child_nodes_and_sides_hex8(const unsigned caseId); + +} +} + + + +#endif /* KRINO_KRINO_REFINEMENT_AKRI_HEXREFINER_HPP_ */ diff --git a/packages/krino/krino/refinement/Akri_NodeRefiner.cpp b/packages/krino/krino/refinement/Akri_NodeRefiner.cpp index bb45e7f1ecd9..59348acbde70 100644 --- a/packages/krino/krino/refinement/Akri_NodeRefiner.cpp +++ b/packages/krino/krino/refinement/Akri_NodeRefiner.cpp @@ -1,7 +1,10 @@ +#include #include "Akri_NodeRefiner.hpp" #include #include +#include +#include #include "Akri_ChildNodeCreator.hpp" #include "Akri_Edge.hpp" #include "Akri_EntityIdPool.hpp" @@ -51,6 +54,65 @@ void NodeRefiner::create_refined_edge_nodes(stk::mesh::BulkData & mesh, const st assign_refined_edge_node_parent_ids(mesh, refinedEdgeNodeParentIdsField); } +void NodeRefiner::create_refined_element_centroid_nodes(stk::mesh::BulkData & mesh, const stk::mesh::PartVector & refinedElemCentroidNodeParts) +{ + if(stk::is_true_on_all_procs(mesh.parallel(), myRefinedElementsToChildNodes.empty())) + return; + + std::sort(myRefinedElementsToChildNodes.begin(), myRefinedElementsToChildNodes.end()); + + const size_t numElementsToRefine = myRefinedElementsToChildNodes.size(); + + std::vector newNodeIds; + EntityIdPool::generate_new_ids(mesh, stk::topology::NODE_RANK, numElementsToRefine, newNodeIds, myAssert32Bit, myForce64Bit); + + for (size_t i=0; i(refinedQuadFaceNodeParentIdsField, refinedQuadFaceNode); + refinedQuadFaceNodeParentIds[0] = mesh.identifier(quadFaceNodes[0]); + refinedQuadFaceNodeParentIds[1] = mesh.identifier(quadFaceNodes[1]); + refinedQuadFaceNodeParentIds[2] = mesh.identifier(quadFaceNodes[2]); + refinedQuadFaceNodeParentIds[3] = mesh.identifier(quadFaceNodes[3]); + } +} + +void NodeRefiner::create_refined_quad_face_nodes(stk::mesh::BulkData & mesh, const stk::mesh::PartVector & refinedQuadFaceNodeParts, FieldRef refinedQuadFaceNodeParentIdsField) +{ + if(stk::is_true_on_all_procs(mesh.parallel(), myRefinedQuadFacesToChildNodes.empty())) + return; + + const size_t numQuadFacesToRefine = myRefinedQuadFacesToChildNodes.size(); + std::vector> quadFacesParentNodes; + quadFacesParentNodes.reserve(numQuadFacesToRefine); + std::vector childNodeRequests; + childNodeRequests.reserve(numQuadFacesToRefine); + for (auto && refinedQuadFaceToChildNodes : myRefinedQuadFacesToChildNodes) + { + quadFacesParentNodes.emplace_back(get_quad_face_nodes_sorted_by_id(refinedQuadFaceToChildNodes.first)); + auto & quadFaceParentNodes = quadFacesParentNodes.back(); + childNodeRequests.emplace_back(std::vector{&(quadFaceParentNodes[0]), &(quadFaceParentNodes[1]), &(quadFaceParentNodes[2]), &(quadFaceParentNodes[3])}, &(refinedQuadFaceToChildNodes.second)); + } + + auto generate_new_ids = [&](stk::topology::rank_t entityRank, size_t numIdsNeeded, std::vector& requestedIds) + { + EntityIdPool::generate_new_ids(mesh, entityRank, numIdsNeeded, requestedIds, myAssert32Bit, myForce64Bit); + }; + + batch_create_child_nodes(mesh, childNodeRequests, refinedQuadFaceNodeParts, generate_new_ids); + + assign_refined_quad_face_node_parent_ids(mesh, refinedQuadFaceNodeParentIdsField); +} + stk::mesh::Entity NodeRefiner::get_edge_child_node(const Edge edge) const { const auto iter = myRefinedEdgesToChildNodes.find(edge); @@ -59,7 +121,47 @@ stk::mesh::Entity NodeRefiner::get_edge_child_node(const Edge edge) const return stk::mesh::Entity(); } -static void prolong_edge_node(const stk::mesh::BulkData & mesh, const std::array & parentNodes, const stk::mesh::Entity childNode) +stk::mesh::Entity NodeRefiner::get_element_centroid_child_node(const stk::mesh::Entity elem) const +{ + auto lb_cmp = [](const std::pair & elemAndNode, stk::mesh::Entity searchElem) { return elemAndNode.first < searchElem; }; + const auto iter = std::lower_bound(myRefinedElementsToChildNodes.begin(), myRefinedElementsToChildNodes.end(), elem, lb_cmp); + if (iter != myRefinedElementsToChildNodes.end()) + return iter->second; + return stk::mesh::Entity(); +} + +stk::mesh::Entity NodeRefiner::get_element_child_face_node(const QuadFace quadFace) const +{ + const auto iter = myRefinedQuadFacesToChildNodes.find(quadFace); + if (iter != myRefinedQuadFacesToChildNodes.end()) + return iter->second; + return stk::mesh::Entity(); +} + +std::vector NodeRefiner::get_element_child_face_nodes(const stk::mesh::BulkData & mesh, const stk::mesh::Entity elem) const +{ + const stk::topology elemTopology = mesh.bucket(elem).topology(); + STK_ThrowAssert(elemTopology == stk::topology::HEXAHEDRON_8); + const stk::mesh::Entity* elemNodes = mesh.begin_nodes(elem); + + constexpr unsigned numSides = 6; + constexpr unsigned nodesPerSide = 4; + std::array elemSideNodes; + + std::vector childFaceNodes; + childFaceNodes.reserve(numSides); + for (unsigned iSide=0; iSide +static void prolong_child_node(const stk::mesh::BulkData & mesh, const NODECONTAINER & parentNodes, const stk::mesh::Entity childNode) { const stk::mesh::FieldVector & allFields = mesh.mesh_meta_data().get_fields(); for ( auto && stkField : allFields ) @@ -68,29 +170,44 @@ static void prolong_edge_node(const stk::mesh::BulkData & mesh, const std::array if( field.entity_rank() == stk::topology::NODE_RANK && field.type_is() ) { - const auto * parentNodeData0 = field_data(field, parentNodes[0]); - const auto * parentNodeData1 = field_data(field, parentNodes[1]); - if (nullptr != parentNodeData0 && nullptr != parentNodeData1) + auto * childNodeData = field_data(field, childNode); + + if (nullptr != childNodeData) { - auto * childNodeData = field_data(field, childNode); - if (nullptr != childNodeData) + const unsigned fieldLength = field.length(); + for (unsigned i=0; i(field, parentNode); + STK_ThrowAssertMsg(parentNodeData, "Child centroid node has field " << field.name() << " but the parent node " << mesh.identifier(parentNode) << " does not."); for (unsigned i=0; i elemNodes; + for (auto & refinedElementToChildNodes : myRefinedElementsToChildNodes) + { + const stk::mesh::Entity elem = refinedElementToChildNodes.first; + elemNodes.assign(mesh.begin_nodes(elem), mesh.end_nodes(elem)); + prolong_child_node(mesh, elemNodes, refinedElementToChildNodes.second); + } + + for (auto & refinedQuadFacesToChildNodes : myRefinedQuadFacesToChildNodes) + { + prolong_child_node(mesh, get_quad_face_nodes_sorted_by_id(refinedQuadFacesToChildNodes.first), refinedQuadFacesToChildNodes.second); } const stk::mesh::FieldVector & allFields = mesh.mesh_meta_data().get_fields(); @@ -104,12 +221,8 @@ void pack_shared_edges_to_refine(const std::vector(entry); - const auto & sharingProcs = std::get<1>(entry); - for (auto&& procId : sharingProcs) { if (procId != commSparse.parallel_rank()) @@ -122,11 +235,21 @@ void pack_shared_edges_to_refine(const std::vector & workspaceForEdgeElems) +{ + stk::mesh::get_entities_through_relations(mesh, stk::mesh::EntityVector{node0, node1}, stk::topology::ELEMENT_RANK, workspaceForEdgeElems); + for (auto & elem : workspaceForEdgeElems) + if (mesh.bucket(elem).owned()) + return true; + return false; +} + static -void unpack_shared_edges_to_refine(const stk::mesh::BulkData & mesh, +void unpack_shared_edges_to_refine_if_there_is_a_locally_owned_element_using_edge(const stk::mesh::BulkData & mesh, typename NodeRefiner::RefinedEdgeMap & edgesToRefine, stk::CommSparse &commSparse) { + std::vector workspaceForEdgeElems; stk::unpack_communications(commSparse, [&](int procId) { stk::CommBuffer & buffer = commSparse.recv_buffer(procId); @@ -140,36 +263,31 @@ void unpack_shared_edges_to_refine(const stk::mesh::BulkData & mesh, if (mesh.is_valid(node0)) { stk::mesh::Entity node1 = mesh.get_entity(edgeNodeKeys[1]); - if (mesh.is_valid(node1)) + if (mesh.is_valid(node1) && have_locally_owned_element_using_edge_nodes(mesh, node0, node1, workspaceForEdgeElems)) edgesToRefine.emplace(edge_from_edge_nodes(mesh, node0, node1), stk::mesh::Entity::InvalidEntity); } } }); } -static void fill_procs_that_own_elements_using_edge(const stk::mesh::BulkData & mesh, const std::array & edgeNodes, std::vector & workspaceForEdgeElems, std::vector & procsThatOwnElementsUsingEdge) +static void fill_procs_that_might_own_elements_using_edge(const stk::mesh::BulkData & mesh, const std::array & edgeNodes, std::vector & procsThatMightOwnElementsUsingEdge) { - stk::mesh::get_entities_through_relations(mesh, {edgeNodes[0], edgeNodes[1]}, stk::topology::ELEMENT_RANK, workspaceForEdgeElems); - procsThatOwnElementsUsingEdge.clear(); - for (auto edgeElem : workspaceForEdgeElems) - procsThatOwnElementsUsingEdge.push_back(mesh.parallel_owner_rank(edgeElem)); - stk::util::sort_and_unique(procsThatOwnElementsUsingEdge); + mesh.shared_procs_intersection({edgeNodes[0], edgeNodes[1]}, procsThatMightOwnElementsUsingEdge); } static std::vector,std::vector>> get_shared_edges_and_sharing_procs(const stk::mesh::BulkData & mesh, const typename NodeRefiner::RefinedEdgeMap & edgesToRefine) { - STK_ThrowAssert(mesh.is_automatic_aura_on()); // NOTE: Uses AURA to determine which procs have elements that use this edge and therefore should create the child node std::vector sharingProcs; std::vector,std::vector>> edgesNodeKeysAndSharingProcs; - std::vector edgeElems; + for (auto && edgeToRefine : edgesToRefine) { const Edge edge = edgeToRefine.first; const std::array edgeNodes = get_edge_nodes(edge); if (mesh.bucket(edgeNodes[0]).shared() && mesh.bucket(edgeNodes[1]).shared()) { - fill_procs_that_own_elements_using_edge(mesh, edgeNodes, edgeElems, sharingProcs); + fill_procs_that_might_own_elements_using_edge(mesh, edgeNodes, sharingProcs); edgesNodeKeysAndSharingProcs.emplace_back(std::array{mesh.entity_key(edgeNodes[0]), mesh.entity_key(edgeNodes[1])}, sharingProcs); } } @@ -184,12 +302,14 @@ void NodeRefiner::sync_shared_edges_from_other_procs_to_refine(const stk::mesh:: stk::CommSparse commSparse(mesh.parallel()); pack_shared_edges_to_refine(edgesNodeKeysAndSharingProcs, commSparse); - unpack_shared_edges_to_refine(mesh, myRefinedEdgesToChildNodes, commSparse); + unpack_shared_edges_to_refine_if_there_is_a_locally_owned_element_using_edge(mesh, myRefinedEdgesToChildNodes, commSparse); } -void NodeRefiner::clear_edges_to_refine() +void NodeRefiner::clear_entities_to_refine() { myRefinedEdgesToChildNodes.clear(); + myRefinedElementsToChildNodes.clear(); + myRefinedQuadFacesToChildNodes.clear(); } bool NodeRefiner::mark_edge_for_refinement(const Edge & edge) @@ -198,6 +318,11 @@ bool NodeRefiner::mark_edge_for_refinement(const Edge & edge) return result.second; } +bool NodeRefiner::mark_quad_face_for_refinement(const QuadFace & quadFace) +{ + auto result = myRefinedQuadFacesToChildNodes.emplace(quadFace, stk::mesh::Entity::InvalidEntity); + return result.second; +} bool NodeRefiner::mark_already_refined_edge(const Edge & edge, const stk::mesh::Entity refinedEdgeNode) { @@ -210,4 +335,10 @@ bool NodeRefiner::is_edge_marked_for_refinement(const Edge & edge) const return myRefinedEdgesToChildNodes.end() != myRefinedEdgesToChildNodes.find(edge); } +void NodeRefiner::mark_element_with_child_centroid_node_for_refinement(const stk::mesh::Entity & elem) +{ + myRefinedElementsToChildNodes.emplace_back(elem, stk::mesh::Entity::InvalidEntity); +} + + } diff --git a/packages/krino/krino/refinement/Akri_NodeRefiner.hpp b/packages/krino/krino/refinement/Akri_NodeRefiner.hpp index 2a78d2d7a50c..670b5c2d76da 100644 --- a/packages/krino/krino/refinement/Akri_NodeRefiner.hpp +++ b/packages/krino/krino/refinement/Akri_NodeRefiner.hpp @@ -6,6 +6,7 @@ #include #include #include "Akri_Edge.hpp" +#include "Akri_QuadFace.hpp" namespace krino { @@ -19,20 +20,36 @@ class NodeRefiner NodeRefiner(const bool force64Bit, const bool assert32Bit) : myForce64Bit(force64Bit), myAssert32Bit(assert32Bit) {} NodeRefiner ( const NodeRefiner & ) = delete; NodeRefiner & operator= ( const NodeRefiner & ) = delete; - typedef std::unordered_map RefinedEdgeMap; + typedef std::map RefinedEdgeMap; + typedef std::map RefinedQuadFaceMap; - void clear_edges_to_refine(); + void clear_entities_to_refine(); void sync_shared_edges_from_other_procs_to_refine(const stk::mesh::BulkData & mesh); bool mark_edge_for_refinement(const Edge & edge); bool is_edge_marked_for_refinement(const Edge & edge) const; + bool mark_quad_face_for_refinement(const QuadFace & quadFace); void create_refined_edge_nodes(stk::mesh::BulkData & mesh, const stk::mesh::PartVector & refinedEdgeNodeParts, FieldRef refinedEdgeNodeParentIdsField); + void create_refined_element_centroid_nodes(stk::mesh::BulkData & mesh, const stk::mesh::PartVector & refinedElemCentroidNodeParts); + void assign_refined_quad_face_node_parent_ids(const stk::mesh::BulkData & mesh, FieldRef refinedQuadFaceNodeParentIdsField) const; + void create_refined_quad_face_nodes(stk::mesh::BulkData & mesh, const stk::mesh::PartVector & refinedQuadFaceNodeParts, FieldRef refinedQuadFaceNodeParentIdsField); size_t get_num_edges_to_refine() const; + bool locally_have_edges_to_refine() const { return !myRefinedEdgesToChildNodes.empty(); } stk::mesh::Entity get_edge_child_node(const Edge edge) const; - void prolong_refined_edge_nodes(const stk::mesh::BulkData & mesh) const; + stk::mesh::Entity get_element_centroid_child_node(const stk::mesh::Entity elem) const; + stk::mesh::Entity get_element_child_face_node(const QuadFace quadFace) const; + std::vector get_element_child_face_nodes(const stk::mesh::BulkData & mesh, const stk::mesh::Entity elem) const; + void prolong_refined_nodes(const stk::mesh::BulkData & mesh) const; bool mark_already_refined_edge(const Edge & edge, const stk::mesh::Entity refinedEdgeNode); + void mark_element_with_child_centroid_node_for_refinement(const stk::mesh::Entity & elem); + void set_sorted_edge_nodes_that_will_be_removed_by_unrefinement(const std::vector & sortedOwnedOrSharedEdgeNodesToBeRemovedByUnrefinement) + { mySortedOwnedOrSharedEdgeNodesToBeRemovedByUnrefinement = sortedOwnedOrSharedEdgeNodesToBeRemovedByUnrefinement; } + const std::vector & get_sorted_edge_nodes_that_will_be_removed_by_unrefinement() const { return mySortedOwnedOrSharedEdgeNodesToBeRemovedByUnrefinement; } private: void assign_refined_edge_node_parent_ids(const stk::mesh::BulkData & mesh, FieldRef refinedEdgeNodeParentIdsField) const; RefinedEdgeMap myRefinedEdgesToChildNodes; + RefinedQuadFaceMap myRefinedQuadFacesToChildNodes; + std::vector> myRefinedElementsToChildNodes; + std::vector mySortedOwnedOrSharedEdgeNodesToBeRemovedByUnrefinement; bool myForce64Bit; bool myAssert32Bit; }; diff --git a/packages/krino/krino/refinement/Akri_QuadRefiner.cpp b/packages/krino/krino/refinement/Akri_QuadRefiner.cpp new file mode 100644 index 000000000000..c2ddafccbe18 --- /dev/null +++ b/packages/krino/krino/refinement/Akri_QuadRefiner.cpp @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +namespace krino { +namespace QuadRefiner { + +unsigned determine_permutation_quad4(const unsigned caseId) +{ + STK_ThrowRequireMsg(caseId == 0 || caseId == 15, "Unfinished capability"); + static constexpr std::array permutations{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + return permutations[caseId]; +} + +unsigned determine_permuted_case_id_quad4(const unsigned caseId) +{ + STK_ThrowRequireMsg(caseId == 0 || caseId == 15, "Unfinished capability"); + static constexpr std::array permutedCaseIds{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15}; + return permutedCaseIds[caseId]; +} + +unsigned num_new_child_elements_quad4(const int caseId) +{ + switch(caseId) + { + case 0: + return 0; + case 15: + return 4; + default: + { + std::ostringstream errorMsg; + errorMsg << "Case " << caseId << " non-uniform refinement not yet support for quads in num_new_child_elements_quad4."; + throw std::runtime_error(errorMsg.str()); + } + } +} + +static std::array permutation_node_ordinals_quad4(const unsigned permutation) +{ + stk::topology topo = stk::topology::QUADRILATERAL_9_2D; + std::array permutedNodes; + topo.permutation_node_ordinals(permutation, permutedNodes.begin()); + return permutedNodes; +} + +static std::array permutation_side_ordinals_quad4(const unsigned permutation) +{ + // nodes and sides permute the same way + stk::topology topo = stk::topology::QUADRILATERAL_4_2D; + std::array permutedSides; + topo.permutation_node_ordinals(permutation, permutedSides.begin()); + return permutedSides; +} + +std::vector refinement_child_nodes_and_sides_quad4(const unsigned caseId) +{ + std::vector childElems; + + const unsigned numChild = num_new_child_elements_quad4(caseId); + childElems.reserve(numChild); + + const unsigned permutedCaseId = determine_permuted_case_id_quad4(caseId); + const unsigned permutation = determine_permutation_quad4(caseId); + const auto permutedParentNodeOrdinals = permutation_node_ordinals_quad4(permutation); + const auto permutedParentSideOrdinals = permutation_side_ordinals_quad4(permutation); + + switch(permutedCaseId) + { + case 15: + { + static constexpr std::array,4> childElemNodesFullyRefined{{ {{0,4,8,7}}, {{1,5,8,4}}, {{2,6,8,5}}, {{3,7,8,6}} }}; + static constexpr std::array,4> childElemSidesFullyRefined{{ {{0,-1,-1,3}}, {{1,-1,-1,0}}, {{2,-1,-1,1}}, {{3,-1,-1,2}} }}; + append_child_elements(permutedParentNodeOrdinals, permutedParentSideOrdinals, childElemNodesFullyRefined, childElemSidesFullyRefined, childElems); + break; + } + default: + { + std::ostringstream errorMsg; + errorMsg << "Case " << permutedCaseId << " not supported in refinement_child_nodes_and_sides_quad4."; + throw std::runtime_error(errorMsg.str()); + } + } + + STK_ThrowRequireMsg(numChild == childElems.size(), "Mismatch of size " << numChild << " " << childElems.size() << " for case " << caseId << " " << permutedCaseId); + + return childElems; +} + +}} + diff --git a/packages/krino/krino/refinement/Akri_QuadRefiner.hpp b/packages/krino/krino/refinement/Akri_QuadRefiner.hpp new file mode 100644 index 000000000000..14de309c7769 --- /dev/null +++ b/packages/krino/krino/refinement/Akri_QuadRefiner.hpp @@ -0,0 +1,26 @@ +#ifndef KRINO_KRINO_REFINEMENT_AKRI_QUADREFINER_HPP_ +#define KRINO_KRINO_REFINEMENT_AKRI_QUADREFINER_HPP_ + +#include +#include + +namespace krino { +namespace QuadRefiner { + +struct QuadDescription +{ + std::array nodeIds; + std::array sideIds; +}; + +unsigned determine_permutation_quad4(const unsigned caseId); +unsigned determine_permuted_case_id_quad4(const unsigned caseId); +unsigned num_new_child_elements_quad4(const int caseId); +std::vector refinement_child_nodes_and_sides_quad4(const unsigned caseId); + +} +} + + + +#endif /* KRINO_KRINO_REFINEMENT_AKRI_QUADREFINER_HPP_ */ diff --git a/packages/krino/krino/refinement/Akri_Refinement.cpp b/packages/krino/krino/refinement/Akri_Refinement.cpp index daa873dd8a48..39cdc3c37c66 100644 --- a/packages/krino/krino/refinement/Akri_Refinement.cpp +++ b/packages/krino/krino/refinement/Akri_Refinement.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -6,6 +7,7 @@ #include #include #include +#include #include #include "Akri_ParallelErrorMessage.hpp" #include "Akri_ChildNodeCreator.hpp" @@ -16,6 +18,7 @@ #include "Akri_MOAB_TetRefiner.hpp" #include "Akri_NodeRefiner.hpp" #include "Akri_TransitionElementEdgeMarker.hpp" +#include #include "Akri_TriRefiner.hpp" #include "Akri_NodeRefiner.hpp" @@ -26,6 +29,7 @@ void Refinement::declare_refinement_parts() myParentPart = &myMeta.declare_part("Refinement_Parent", stk::topology::ELEMENT_RANK, true); myChildPart = &myMeta.declare_part("Refinement_Child", stk::topology::ELEMENT_RANK, true); myRefinedEdgeNodePart = &myMeta.declare_part_with_topology("Refinement_Edge_Node", stk::topology::NODE); + myRefinedQuadFaceNodePart = &myMeta.declare_part_with_topology("Refinement_QuadFace_Node", stk::topology::NODE); } void Refinement::declare_refinement_fields() @@ -33,16 +37,36 @@ void Refinement::declare_refinement_fields() if (3 == myMeta.spatial_dimension()) { myChildElementIds8Field = &myMeta.declare_field(stk::topology::ELEMENT_RANK, "REFINEMENT_CHILD_ELEMENT_IDS_8"); - const stk::topology elemTopology = stk::topology::TETRAHEDRON_4; - const stk::mesh::Part & tet4TopologyPart = myMeta.get_topology_root_part(elemTopology); - stk::mesh::put_field_on_mesh(*myChildElementIds8Field, parent_part() & tet4TopologyPart, get_num_children_when_fully_refined(elemTopology), nullptr); + + const stk::topology tet4Topology = stk::topology::TETRAHEDRON_4; + const stk::mesh::Part & tet4TopologyPart = myMeta.get_topology_root_part(tet4Topology); + + const stk::topology hex8Topology = stk::topology::HEXAHEDRON_8; + const stk::mesh::Part & hex8TopologyPart = myMeta.get_topology_root_part(hex8Topology); + + stk::mesh::put_field_on_mesh(*myChildElementIds8Field, parent_part() & tet4TopologyPart, get_num_children_when_fully_refined(tet4Topology), nullptr); + stk::mesh::put_field_on_mesh(*myChildElementIds8Field, parent_part() & hex8TopologyPart, get_num_children_when_fully_refined(hex8Topology), nullptr); } else if (2 == myMeta.spatial_dimension()) { myChildElementIds4Field = &myMeta.declare_field(stk::topology::ELEMENT_RANK, "REFINEMENT_CHILD_ELEMENT_IDS_4"); - const stk::topology elemTopology = stk::topology::TRIANGLE_3_2D; - const stk::mesh::Part & tri3TopologyPart = myMeta.get_topology_root_part(elemTopology); - stk::mesh::put_field_on_mesh(*myChildElementIds4Field, parent_part() & tri3TopologyPart, get_num_children_when_fully_refined(elemTopology), nullptr); + const stk::topology tri3Topology = stk::topology::TRIANGLE_3_2D; + const stk::mesh::Part & tri3TopologyPart = myMeta.get_topology_root_part(tri3Topology); + + const stk::topology quad4Topology = stk::topology::QUADRILATERAL_4_2D; + const stk::mesh::Part & quad4TopologyPart = myMeta.get_topology_root_part(quad4Topology); + + stk::mesh::put_field_on_mesh(*myChildElementIds4Field, parent_part() & tri3TopologyPart, get_num_children_when_fully_refined(tri3Topology), nullptr); + stk::mesh::put_field_on_mesh(*myChildElementIds4Field, parent_part() & quad4TopologyPart, get_num_children_when_fully_refined(quad4Topology), nullptr); + } + + { + myChildElementIds2Field = &myMeta.declare_field(stk::topology::ELEMENT_RANK, "REFINEMENT_CHILD_ELEMENT_IDS_2"); + + const stk::topology beam2Topology = stk::topology::BEAM_2; + const stk::mesh::Part & beam2TopologyPart = myMeta.get_topology_root_part(beam2Topology); + + stk::mesh::put_field_on_mesh(*myChildElementIds2Field, parent_part() & beam2TopologyPart, get_num_children_when_fully_refined(beam2Topology), nullptr); } myRefinementLevelField = &myMeta.declare_field(stk::topology::ELEMENT_RANK, "REFINEMENT_LEVEL"); @@ -54,6 +78,9 @@ void Refinement::declare_refinement_fields() myRefinedEdgeNodeParentIdsField = &myMeta.declare_field(stk::topology::NODE_RANK, "REFINEMENT_REFINED_EDGE_NODE_PARENTS_IDS"); stk::mesh::put_field_on_mesh(*myRefinedEdgeNodeParentIdsField, refined_edge_node_part(), 2, nullptr); + myRefinedQuadFaceNodeParentIdsField = &myMeta.declare_field(stk::topology::NODE_RANK, "REFINEMENT_REFINED_QUAD_FACE_NODE_PARENTS_IDS"); + stk::mesh::put_field_on_mesh(*myRefinedQuadFaceNodeParentIdsField, refined_quad_face_node_part(), 4, nullptr); + myOriginatingProcForParentElementField = &myMeta.declare_field(stk::topology::ELEMENT_RANK, "ORIGINATING_PROC_FOR_PARENT_ELEMENT"); stk::mesh::put_field_on_mesh(*myOriginatingProcForParentElementField, myMeta.universal_part(), 1, nullptr); // needed everywhere for restart, otherwise could be parent_part } @@ -148,9 +175,13 @@ unsigned Refinement::get_num_children_when_fully_refined(const stk::topology ele { switch(elementTopology) { + case stk::topology::BEAM_2: + return 2; case stk::topology::TRIANGLE_3_2D: + case stk::topology::QUADRILATERAL_4_2D: return 4; case stk::topology::TETRAHEDRON_4: + case stk::topology::HEXAHEDRON_8: return 8; default: ThrowRuntimeError("Element topology not found in get_num_children_when_fully_refined: " << elementTopology.name()); @@ -163,6 +194,14 @@ unsigned Refinement::get_num_children_when_fully_refined(const stk::mesh::Entity return get_num_children_when_fully_refined(myMeta.mesh_bulk_data().bucket(elem).topology()); } +std::array Refinement::get_edge_parent_node_ids(const stk::mesh::Entity edgeNode) const +{ + STK_ThrowAssertMsg(myRefinedEdgeNodeParentIdsField, "Edge Node Ids field is not defined."); + auto * edgeNodeIds = stk::mesh::field_data(*myRefinedEdgeNodeParentIdsField, edgeNode); + STK_ThrowAssertMsg(edgeNodeIds != nullptr, "Edge Node Ids field missing on node " << myMeta.mesh_bulk_data().identifier(edgeNode)); + return {{edgeNodeIds[0], edgeNodeIds[1]}}; +} + std::array Refinement::get_edge_parent_nodes(const stk::mesh::Entity edgeNode) const { const stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); @@ -293,6 +332,12 @@ stk::mesh::Part & Refinement::refined_edge_node_part() const return *myRefinedEdgeNodePart; } +stk::mesh::Part & Refinement::refined_quad_face_node_part() const +{ + STK_ThrowAssert(myRefinedQuadFaceNodePart); + return *myRefinedQuadFaceNodePart; +} + stk::math::Vector3d Refinement::get_coordinates(const stk::mesh::Entity node, const int dim) const { STK_ThrowAssertMsg(myCoordsField, "Coordinates field is not defined."); @@ -301,15 +346,6 @@ stk::math::Vector3d Refinement::get_coordinates(const stk::mesh::Entity node, co return stk::math::Vector3d(coordsData, dim); } -static int get_edge_refinement_case_id(const stk::mesh::BulkData & mesh, const std::vector & elemChildEdgeNodes) -{ - int caseId = 0; - for (size_t i=0; i static stk::mesh::Entity declare_child_element(stk::mesh::BulkData & mesh, EntityIdPool & entityIdPool, @@ -421,6 +457,9 @@ stk::mesh::Field & Refinement::get_child_element_ids_field(const unsig { switch(numChildWhenFullyRefined) { + case 2: + STK_ThrowAssert(myChildElementIds2Field); + return *myChildElementIds2Field; case 4: STK_ThrowAssert(myChildElementIds4Field); return *myChildElementIds4Field; @@ -569,6 +608,33 @@ static void restrict_element_fields(const stk::mesh::BulkData & mesh, } } +void Refinement::refine_beam_2_and_append_sides_to_create(const stk::mesh::PartVector & childParts, + const stk::mesh::Entity parentElem, + const std::vector & elemChildEdgeNodes, + const int caseId, + std::vector & sideRequests) +{ + stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); + const stk::mesh::Entity * parentNodes = mesh.begin_nodes(parentElem); + const std::array parentElemNodes{ parentNodes[0], parentNodes[1], elemChildEdgeNodes[0] }; + + struct BeamDescription + { + std::array nodeIds; + std::array sideIds; + }; + + // Do we need to handle sides of a beam? + const std::vector newElems = { BeamDescription{ {{0,2}}, {{}} }, BeamDescription{ {{2,1}}, {{}} } }; + + std::vector childElements; + declare_child_elements_and_append_sides_to_create(mesh, myEntityIdPool, childParts, parentElem, parentElemNodes, newElems, childElements, sideRequests); + + set_parent_parts_and_parent_child_relation_fields(parentElem, childElements, 2); + + prolong_element_fields(mesh, parentElem, childElements); +} + void Refinement::refine_tri_3_and_append_sides_to_create(const stk::mesh::PartVector & childParts, const stk::mesh::Entity parentElem, const std::vector & elemChildEdgeNodes, @@ -589,10 +655,30 @@ void Refinement::refine_tri_3_and_append_sides_to_create(const stk::mesh::PartVe declare_child_elements_and_append_sides_to_create(mesh, myEntityIdPool, childParts, parentElem, parentElemNodes, newTris, childElements, sideRequests); set_parent_parts_and_parent_child_relation_fields(parentElem, childElements, 4); - //prolong element fields + prolong_element_fields(mesh, parentElem, childElements); } +void Refinement::refine_quad_4_and_append_sides_to_create(const stk::mesh::PartVector & childParts, + const stk::mesh::Entity parentElem, + const std::vector & elemChildEdgeNodes, + const int caseId, + std::vector & sideRequests) +{ + stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); + const stk::mesh::Entity * parentNodes = mesh.begin_nodes(parentElem); + const stk::mesh::Entity elemCentroidNode = myNodeRefiner.get_element_centroid_child_node(parentElem); + const std::array parentElemNodes{ parentNodes[0], parentNodes[1], parentNodes[2], parentNodes[3], elemChildEdgeNodes[0], elemChildEdgeNodes[1], elemChildEdgeNodes[2], elemChildEdgeNodes[3], elemCentroidNode }; + + const std::vector newElems = QuadRefiner::refinement_child_nodes_and_sides_quad4(caseId); + + std::vector childElements; + declare_child_elements_and_append_sides_to_create(mesh, myEntityIdPool, childParts, parentElem, parentElemNodes, newElems, childElements, sideRequests); + + set_parent_parts_and_parent_child_relation_fields(parentElem, childElements, 4); + + prolong_element_fields(mesh, parentElem, childElements); +} void Refinement::refine_tet_4_and_append_sides_to_create(const stk::mesh::PartVector & childParts, const stk::mesh::Entity parentElem, @@ -623,58 +709,118 @@ void Refinement::refine_tet_4_and_append_sides_to_create(const stk::mesh::PartVe prolong_element_fields(mesh, parentElem, childElements); } +void Refinement::refine_hex_8_and_append_sides_to_create(const stk::mesh::PartVector & childParts, + const stk::mesh::Entity parentElem, + const std::vector & elemChildEdgeNodes, + const int caseId, + std::vector & sideRequests) +{ + stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); + const stk::mesh::Entity * parentNodes = mesh.begin_nodes(parentElem); + const stk::mesh::Entity elemCentroidNode = myNodeRefiner.get_element_centroid_child_node(parentElem); + const std::vector elemChildFaceNodes = myNodeRefiner.get_element_child_face_nodes(mesh, parentElem); + // This is not exactly obvious, but comes out of topology_data.hpp + const std::array parentElemNodes{ + parentNodes[0], parentNodes[1], parentNodes[2], parentNodes[3], parentNodes[4], parentNodes[5], parentNodes[6], parentNodes[7], + elemChildEdgeNodes[0], elemChildEdgeNodes[1], elemChildEdgeNodes[2], elemChildEdgeNodes[3], + elemChildEdgeNodes[8], elemChildEdgeNodes[9], elemChildEdgeNodes[10], elemChildEdgeNodes[11], + elemChildEdgeNodes[4], elemChildEdgeNodes[5], elemChildEdgeNodes[6], elemChildEdgeNodes[7], + elemCentroidNode, + elemChildFaceNodes[4], elemChildFaceNodes[5], elemChildFaceNodes[3], elemChildFaceNodes[1], elemChildFaceNodes[0], elemChildFaceNodes[2]}; + + const std::vector newElems = HexRefiner::refinement_child_nodes_and_sides_hex8(caseId); + + std::vector childElements; + declare_child_elements_and_append_sides_to_create(mesh, myEntityIdPool, childParts, parentElem, parentElemNodes, newElems, childElements, sideRequests); + + set_parent_parts_and_parent_child_relation_fields(parentElem, childElements, 8); + + prolong_element_fields(mesh, parentElem, childElements); +} + static unsigned num_new_child_elements_for_case_id(const stk::topology & elemTopology, const int caseId) { switch(elemTopology()) { + case stk::topology::BEAM_2: + { + STK_ThrowRequire(caseId == 1); + return 2; + } case stk::topology::TRI_3: case stk::topology::TRI_3_2D: return TriRefiner::num_new_child_elements_tri3(caseId); + case stk::topology::QUAD_4: + case stk::topology::QUAD_4_2D: + return QuadRefiner::num_new_child_elements_quad4(caseId); case stk::topology::TETRAHEDRON_4: return moab::SimplexTemplateRefiner::num_new_child_elements_tet4(caseId); + case stk::topology::HEXAHEDRON_8: + return HexRefiner::num_new_child_elements_hex8(caseId); default: ThrowRuntimeError("Element topology not found in refine_element: " << elemTopology.name()); } } -void Refinement::refine_element_if_it_has_refined_edges_and_append_sides_to_create(const stk::topology & elemTopology, +void Refinement::adapt_element_and_append_sides_to_create(const stk::topology & elemTopology, const stk::mesh::PartVector & childParts, const stk::mesh::Entity elem, const std::vector & elemChildEdgeNodes, + const int preCaseId, + const int postCaseId, std::vector & sideRequests, - std::vector & elementsToDelete) + std::vector & elementsToDelete, + std::vector & elementsThatAreNoLongerParents) { stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); - const int caseId = get_edge_refinement_case_id(mesh, elemChildEdgeNodes); - if (0 == caseId) - return; - const std::vector existingChildrenToDelete = get_children(elem); - restrict_element_fields(mesh, elem, existingChildrenToDelete); + if (0 != preCaseId) + { + const std::vector existingChildrenToDelete = get_children(elem); + restrict_element_fields(mesh, elem, existingChildrenToDelete); + elementsToDelete.insert(elementsToDelete.end(), existingChildrenToDelete.begin(), existingChildrenToDelete.end()); + } + + if (0 == postCaseId) + { + elementsThatAreNoLongerParents.push_back(elem); + return; + } switch(elemTopology()) { + case stk::topology::BEAM_2: + refine_beam_2_and_append_sides_to_create(childParts, elem, elemChildEdgeNodes, postCaseId, sideRequests); + break; case stk::topology::TRI_3: case stk::topology::TRI_3_2D: - refine_tri_3_and_append_sides_to_create(childParts, elem, elemChildEdgeNodes, caseId, sideRequests); + refine_tri_3_and_append_sides_to_create(childParts, elem, elemChildEdgeNodes, postCaseId, sideRequests); + break; + case stk::topology::QUAD_4: + case stk::topology::QUAD_4_2D: + refine_quad_4_and_append_sides_to_create(childParts, elem, elemChildEdgeNodes, postCaseId, sideRequests); break; case stk::topology::TETRAHEDRON_4: - refine_tet_4_and_append_sides_to_create(childParts, elem, elemChildEdgeNodes, caseId, sideRequests); + refine_tet_4_and_append_sides_to_create(childParts, elem, elemChildEdgeNodes, postCaseId, sideRequests); + break; + case stk::topology::HEXAHEDRON_8: + refine_hex_8_and_append_sides_to_create(childParts, elem, elemChildEdgeNodes, postCaseId, sideRequests); break; default: ThrowRuntimeError("Element topology not found in refine_element: " << elemTopology.name()); break; } - elementsToDelete.insert(elementsToDelete.end(), existingChildrenToDelete.begin(), existingChildrenToDelete.end()); + } -size_t Refinement::count_new_child_elements(const EdgeMarkerInterface & edgeMarker, const std::vector & bucketsData) const +size_t Refinement::count_new_child_elements(const EdgeMarkerInterface & edgeMarker, const std::vector & bucketsData, const bool doingRefinement) const { stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); const stk::mesh::Selector selector = mesh.mesh_meta_data().locally_owned_part(); std::vector elemChildEdgeNodes; + ElementEdgeCaseIds elementEdgeCaseIds; size_t numNewChildElems = 0; for(const auto & bucketData : bucketsData) @@ -682,10 +828,9 @@ size_t Refinement::count_new_child_elements(const EdgeMarkerInterface & edgeMark for(const auto & elem : std::get<2>(bucketData)) { const stk::topology bucketTopology = std::get<0>(bucketData); - edgeMarker.fill_element_refined_edge_nodes(myNodeRefiner, elem, bucketTopology, elemChildEdgeNodes); - const int caseId = get_edge_refinement_case_id(mesh, elemChildEdgeNodes); - if (0 != caseId) - numNewChildElems += num_new_child_elements_for_case_id(bucketTopology, caseId); + edgeMarker.fill_adaptation_caseIds_and_refined_edge_nodes_if_changed(myNodeRefiner, elem, bucketTopology, doingRefinement, elementEdgeCaseIds, elemChildEdgeNodes); + if (elementEdgeCaseIds.has_changed()) + numNewChildElems += num_new_child_elements_for_case_id(bucketTopology, elementEdgeCaseIds.post_adapt_case_id()); } } @@ -717,43 +862,73 @@ stk::mesh::PartVector Refinement::get_parts_for_child_elements(const stk::mesh:: return childParts; } -std::vector Refinement::get_buckets_data_for_candidate_elements_to_refine(const EdgeMarkerInterface & edgeMarker) const +std::vector Refinement::get_buckets_data_for_candidate_elements_to_adapt(const EdgeMarkerInterface & edgeMarker, const bool doingRefinement) const { // Cache off bucket data to avoid looping over buckets while modifying elements const stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); const stk::mesh::Selector selector = mesh.mesh_meta_data().locally_owned_part(); - const stk::mesh::EntityVector emptyVector; + stk::mesh::EntityVector bucketCandidateElements; std::vector> bucketsData; for(const auto & bucketPtr : mesh.get_buckets(stk::topology::ELEMENT_RANK, selector)) { - const stk::mesh::PartVector childParts = get_parts_for_child_elements(*bucketPtr); - bucketsData.emplace_back(bucketPtr->topology(), childParts, emptyVector); + bucketCandidateElements.clear(); - stk::mesh::EntityVector & bucketElements = std::get<2>(bucketsData.back()); - bucketElements.reserve(bucketPtr->size()); for (auto && elem : *bucketPtr) - if (edgeMarker.is_element_a_candidate_for_refinement(elem)) - bucketElements.push_back(elem); + if (edgeMarker.is_element_a_candidate_for_adaptation(elem, doingRefinement)) + bucketCandidateElements.push_back(elem); + + if (!bucketCandidateElements.empty()) + bucketsData.emplace_back(bucketPtr->topology(), get_parts_for_child_elements(*bucketPtr), bucketCandidateElements); } return bucketsData; } -void Refinement::refine_elements_with_refined_edges_and_store_sides_to_create(const EdgeMarkerInterface & edgeMarker, const std::vector & bucketsData, std::vector & sideRequests, std::vector & elementsToDelete) +static int case_id_for_fully_refined(const stk::topology elementTopology) +{ + return (1< 0 && elementEdgeCaseIds.post_adapt_case_id() == case_id_for_fully_refined(elementTopology); +} + +void Refinement::adapt_elements_and_store_sides_to_create(const EdgeMarkerInterface & edgeMarker, + const std::vector & bucketsData, + const bool doingRefinement, + std::vector & sideRequests, + std::vector & elementsToDelete, + std::vector & elementsThatAreNoLongerParents, + std::vector & bucketDataForNewChildElementsThatMightNeedToBeRefined) +{ + const size_t numNewElements = count_new_child_elements(edgeMarker, bucketsData, doingRefinement); + myEntityIdPool.reserve(stk::topology::ELEMENT_RANK, numNewElements, myAssert32Bit, myForce64Bit); + + bucketDataForNewChildElementsThatMightNeedToBeRefined.clear(); + std::vector bucketChildElementsThatMightNeedToBeRefined; std::vector elemChildEdgeNodes; - //for(const auto & [bucketTopology, bucketChildParts, bucketElements] : bucketsData) - for(const auto & bucketData : bucketsData) + ElementEdgeCaseIds elementEdgeCaseIds; + for(const auto & [bucketTopology, bucketChildParts, bucketElements] : bucketsData) { - const auto & bucketTopology = std::get<0>(bucketData); - const auto & bucketChildParts = std::get<1>(bucketData); - const auto & bucketElements = std::get<2>(bucketData); + bucketChildElementsThatMightNeedToBeRefined.clear(); for(const auto & elem : bucketElements) { - edgeMarker.fill_element_refined_edge_nodes(myNodeRefiner, elem, bucketTopology, elemChildEdgeNodes); - refine_element_if_it_has_refined_edges_and_append_sides_to_create(bucketTopology, bucketChildParts, elem, elemChildEdgeNodes, sideRequests, elementsToDelete); + edgeMarker.fill_adaptation_caseIds_and_refined_edge_nodes_if_changed(myNodeRefiner, elem, bucketTopology, doingRefinement, elementEdgeCaseIds, elemChildEdgeNodes); + if (elementEdgeCaseIds.has_changed()) + { + adapt_element_and_append_sides_to_create(bucketTopology, bucketChildParts, elem, elemChildEdgeNodes, elementEdgeCaseIds.pre_adapt_case_id(), elementEdgeCaseIds.post_adapt_case_id(), sideRequests, elementsToDelete, elementsThatAreNoLongerParents); + + if (element_going_from_partially_refined_to_fully_refined(bucketTopology, doingRefinement, elementEdgeCaseIds)) // A second level of refinement possibly needed to remove hanging nodes + { + auto childElements = get_children(elem); + bucketChildElementsThatMightNeedToBeRefined.insert(bucketChildElementsThatMightNeedToBeRefined.end(), childElements.begin(), childElements.end()); + } + } } + if (!bucketChildElementsThatMightNeedToBeRefined.empty()) + bucketDataForNewChildElementsThatMightNeedToBeRefined.emplace_back(bucketTopology, bucketChildParts, bucketChildElementsThatMightNeedToBeRefined); } } @@ -765,6 +940,22 @@ stk::mesh::PartVector Refinement::get_parts_for_new_refined_edge_nodes() const return refinedEdgeNodeParts; } +stk::mesh::PartVector Refinement::get_parts_for_new_refined_element_centroid_nodes() const +{ + stk::mesh::PartVector refinedElemCentroidNodeParts = { &myMeta.get_topology_root_part(stk::topology::NODE) }; + if (myActivePart) + refinedElemCentroidNodeParts.push_back(myActivePart); + return refinedElemCentroidNodeParts; +} + +stk::mesh::PartVector Refinement::get_parts_for_new_refined_quad_face_nodes() const +{ + stk::mesh::PartVector refinedQuadFaceNodeParts = { &myMeta.get_topology_root_part(stk::topology::NODE), &refined_quad_face_node_part() }; + if (myActivePart) + refinedQuadFaceNodeParts.push_back(myActivePart); + return refinedQuadFaceNodeParts; +} + bool Refinement::locally_have_any_hanging_refined_nodes() const { const stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); @@ -911,69 +1102,66 @@ void Refinement::update_element_rebalance_weights_incorporating_parallel_owner_c } } -void Refinement::create_refined_nodes_elements_and_sides(const EdgeMarkerInterface & edgeMarker) +void Refinement::adapt_elements_and_sides(const EdgeMarkerInterface & edgeMarker, const bool doingRefinement) { stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); - std::vector sideRequests; + const std::vector bucketsData = get_buckets_data_for_candidate_elements_to_adapt(edgeMarker, doingRefinement); + std::vector bucketDataForNewChildElementsThatMightNeedToBeRefined; + + if(stk::is_true_on_any_proc(mesh.parallel(), !bucketsData.empty())) { + std::vector sideRequests; + mesh.modification_begin(); - // Mesh modification cycle destroy_custom_ghostings(); - myNodeRefiner.create_refined_edge_nodes( - mesh, get_parts_for_new_refined_edge_nodes(), myRefinedEdgeNodeParentIdsField); + if (doingRefinement) + { + myNodeRefiner.create_refined_element_centroid_nodes(mesh, get_parts_for_new_refined_element_centroid_nodes()); - const std::vector bucketsData = - get_buckets_data_for_candidate_elements_to_refine(edgeMarker); - const size_t numNewElements = count_new_child_elements(edgeMarker, bucketsData); - myEntityIdPool.reserve( - stk::topology::ELEMENT_RANK, numNewElements, myAssert32Bit, myForce64Bit); + myNodeRefiner.create_refined_quad_face_nodes(mesh, get_parts_for_new_refined_quad_face_nodes(), myRefinedQuadFaceNodeParentIdsField); + + myNodeRefiner.create_refined_edge_nodes(mesh, get_parts_for_new_refined_edge_nodes(), myRefinedEdgeNodeParentIdsField); + } std::vector elementsToDelete; - refine_elements_with_refined_edges_and_store_sides_to_create( - edgeMarker, bucketsData, sideRequests, elementsToDelete); - stk::mesh::destroy_elements_no_mod_cycle( - mesh, elementsToDelete, mesh.mesh_meta_data().universal_part()); + std::vector elementsThatAreNoLongerParents; + + adapt_elements_and_store_sides_to_create(edgeMarker, bucketsData, doingRefinement, sideRequests, elementsToDelete, elementsThatAreNoLongerParents, bucketDataForNewChildElementsThatMightNeedToBeRefined); + stk::mesh::destroy_elements_no_mod_cycle(mesh, elementsToDelete, mesh.mesh_meta_data().universal_part()); + remove_parent_parts(elementsThatAreNoLongerParents); mesh.modification_end(); - } - //timer batch create sides - if(stk::is_true_on_any_proc(mesh.parallel(), !sideRequests.empty())) - { - batch_create_sides(mesh, sideRequests); + if(stk::is_true_on_any_proc(mesh.parallel(), !sideRequests.empty())) + { + batch_create_sides(mesh, sideRequests); + } } + if(stk::is_true_on_any_proc(mesh.parallel(), !bucketDataForNewChildElementsThatMightNeedToBeRefined.empty())) { - myNodeRefiner.prolong_refined_edge_nodes(mesh); - } -} + std::vector sideRequests; -void Refinement::create_another_layer_of_refined_elements_and_sides_to_eliminate_hanging_nodes(const EdgeMarkerInterface & edgeMarker) -{ - stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); - const std::vector bucketsData = get_buckets_data_for_candidate_elements_to_refine(edgeMarker); - const size_t numNewElements = count_new_child_elements(edgeMarker, bucketsData); + mesh.modification_begin(); - myEntityIdPool.reserve(stk::topology::ELEMENT_RANK, numNewElements, myAssert32Bit, myForce64Bit); + std::vector shouldBeEmpty_ElementsToDelete; + std::vector shouldBeEmpty_ElementsThatAreNoLongerParents; + std::vector shouldBeEmpty_bucketDataForNextRound; + adapt_elements_and_store_sides_to_create(edgeMarker, bucketDataForNewChildElementsThatMightNeedToBeRefined, doingRefinement, sideRequests, shouldBeEmpty_ElementsToDelete, shouldBeEmpty_ElementsThatAreNoLongerParents, shouldBeEmpty_bucketDataForNextRound); - std::vector elementsToDelete; - std::vector sideRequests; + const bool logicError = !shouldBeEmpty_ElementsToDelete.empty() || !shouldBeEmpty_ElementsThatAreNoLongerParents.empty() || !shouldBeEmpty_bucketDataForNextRound.empty(); - if(stk::is_true_on_any_proc(mesh.parallel(), numNewElements > 0)) - { - mesh.modification_begin(); - refine_elements_with_refined_edges_and_store_sides_to_create(edgeMarker, bucketsData, sideRequests, elementsToDelete); - stk::mesh::destroy_elements_no_mod_cycle(mesh, elementsToDelete, mesh.mesh_meta_data().universal_part()); mesh.modification_end(); - fix_face_and_edge_ownership(mesh); - attach_sides_to_elements(mesh); + STK_ThrowRequireMsg(stk::is_true_on_all_procs(mesh.parallel(), !logicError), "Unexpected error in adapt_elements_and_sides"); if(stk::is_true_on_any_proc(mesh.parallel(), !sideRequests.empty())) + { batch_create_sides(mesh, sideRequests); + } } } @@ -990,31 +1178,12 @@ void Refinement::remove_parent_parts(const std::vector & elem mesh.change_entity_parts(element, addParts, removeParts); } -void Refinement::mark_already_refined_edges() -{ - stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); - - myNodeRefiner.clear_edges_to_refine(); - - const stk::mesh::Selector selector = refined_edge_node_part(); - - for(const auto & bucketPtr : mesh.get_buckets(stk::topology::NODE_RANK, selector)) - { - for (auto && existingRefinedNode : *bucketPtr) - { - const auto edgeNodeParents = get_edge_parent_nodes(existingRefinedNode); - if (mesh.is_valid(edgeNodeParents[0]) && mesh.is_valid(edgeNodeParents[1])) - { - myNodeRefiner.mark_already_refined_edge(edge_from_edge_nodes(mesh, edgeNodeParents[0], edgeNodeParents[1]), existingRefinedNode); - } - } - } -} - void Refinement::respect_originating_proc_for_parents_modified_by_unrefinement(const std::vector & parentsModifiedByUnrefinement, const std::vector & originatingProcForParentsModifiedByUnrefinement) { STK_ThrowAssert(parentsModifiedByUnrefinement.size() == originatingProcForParentsModifiedByUnrefinement.size()); stk::mesh::BulkData & mesh = myMeta.mesh_bulk_data(); + if (1 == mesh.parallel_size()) + return; std::vector entitiesToMove; for (size_t i=0; i Refinement::get_originating_procs_for_elements(const std::vector & elements) const { std::vector originatingProcsForElems; - originatingProcsForElems.reserve(elements.size()); - for (auto elem : elements) - originatingProcsForElems.push_back(get_originating_processor_for_parent_element(elem)); + if (1 == myMeta.mesh_bulk_data().parallel_size()) + { + originatingProcsForElems.assign(elements.size(), 0); + } + else + { + originatingProcsForElems.reserve(elements.size()); + for (auto elem : elements) + originatingProcsForElems.push_back(get_originating_processor_for_parent_element(elem)); + } return originatingProcsForElems; } @@ -1076,23 +1252,18 @@ bool Refinement::refine_elements(const EdgeMarkerInterface & edgeMarker) } bool didMakeAnyChanges = false; - const bool haveEdgesToRefineLocally = get_num_edges_to_refine() > 0; - if (stk::is_true_on_any_proc(mesh.parallel(), haveEdgesToRefineLocally)) + if (stk::is_true_on_any_proc(mesh.parallel(), locally_have_edges_to_refine())) { didMakeAnyChanges = true; - { - stk::diag::TimeBlock timer_2(refineTimer.createRefinedEdges); - create_refined_nodes_elements_and_sides(edgeMarker); - } { - stk::diag::TimeBlock timer_2(refineTimer.elminateHangingNodes); - create_another_layer_of_refined_elements_and_sides_to_eliminate_hanging_nodes(edgeMarker); + stk::diag::TimeBlock timer_2(refineTimer.doRefinement); + adapt_elements_and_sides(edgeMarker, true); } { stk::diag::TimeBlock timer_2(refineTimer.prolongNodes); - myNodeRefiner.prolong_refined_edge_nodes(mesh); + myNodeRefiner.prolong_refined_nodes(mesh); } } @@ -1113,53 +1284,22 @@ bool Refinement::unrefine_elements(const EdgeMarkerInterface & edgeMarker) bool didMakeAnyChanges = false; if(stk::is_true_on_any_proc(mesh.parallel(), edgeMarker.locally_have_elements_to_unrefine())) { - std::vector childElementsToDeleteForUnrefinement; std::vector ownedParentElementsModifiedByUnrefinement; + std::vector originatingProcForParentsBeingModified; { - stk::diag::TimeBlock timer_2(unrefineTimer.fillElements); - edgeMarker.fill_elements_modified_by_unrefinement( - ownedParentElementsModifiedByUnrefinement, childElementsToDeleteForUnrefinement); + stk::diag::TimeBlock timer_2(unrefineTimer.findEdgesToUnrefine); + edgeMarker.mark_edges_to_be_unrefined(myNodeRefiner); + ownedParentElementsModifiedByUnrefinement = edgeMarker.get_parent_elements_that_will_be_modified_by_unrefinement(myNodeRefiner); + originatingProcForParentsBeingModified = get_originating_procs_for_elements(ownedParentElementsModifiedByUnrefinement); } - if(stk::is_true_on_any_proc(mesh.parallel(), !childElementsToDeleteForUnrefinement.empty())) + if(stk::is_true_on_any_proc(mesh.parallel(), !ownedParentElementsModifiedByUnrefinement.empty())) { - for (auto parentElement : ownedParentElementsModifiedByUnrefinement) - { - stk::diag::TimeBlock timer_2(unrefineTimer.restrictElementFields); - - auto childElements = get_children(parentElement); - restrict_element_fields(mesh, parentElement, childElements); - } - - std::vector originatingProcForParentsBeingModified; didMakeAnyChanges = true; -{ - stk::diag::TimeBlock timer_2(unrefineTimer.fillElements); - originatingProcForParentsBeingModified = - get_originating_procs_for_elements(ownedParentElementsModifiedByUnrefinement); -} { - stk::diag::TimeBlock timer_2(unrefineTimer.meshMod); - - mesh.modification_begin(); - destroy_custom_ghostings(); - stk::mesh::destroy_elements_no_mod_cycle( - mesh, childElementsToDeleteForUnrefinement, mesh.mesh_meta_data().universal_part()); - remove_parent_parts(ownedParentElementsModifiedByUnrefinement); - mesh.modification_end(); - } - - { - stk::diag::TimeBlock timer_2(unrefineTimer.fixFaceEdgeOwnership); - fix_face_and_edge_ownership(mesh); - mark_already_refined_edges(); - } - - - { - stk::diag::TimeBlock timer_2(unrefineTimer.elminateHangingNodes); - create_another_layer_of_refined_elements_and_sides_to_eliminate_hanging_nodes(edgeMarker); + stk::diag::TimeBlock timer_2(unrefineTimer.doUnrefinement); + adapt_elements_and_sides(edgeMarker, false); } { @@ -1198,11 +1338,13 @@ bool Refinement::do_refinement(const EdgeMarkerInterface & edgeMarker) return didMakeAnyChanges; } -void Refinement::do_uniform_refinement(const int numUniformRefinementLevels) +bool Refinement::do_uniform_refinement(const int numUniformRefinementLevels) { UniformEdgeMarker uniformMarker(myMeta.mesh_bulk_data(), *this); + bool didMakeAnyChanges = false; for (int i=0; i get_parent_id_and_parallel_owner_rank(const stk::mesh::Entity child) const; bool is_refined_edge_node(const stk::mesh::Entity node) const; + std::array get_edge_parent_node_ids(const stk::mesh::Entity edgeNode) const; std::array get_edge_parent_nodes(const stk::mesh::Entity edgeNode) const; std::tuple get_child_ids_and_num_children_when_fully_refined(const stk::mesh::Entity elem) const; unsigned get_num_children(const stk::mesh::Entity elem) const; @@ -102,6 +101,7 @@ class Refinement std::vector get_children(const stk::mesh::Entity elem) const; stk::mesh::Entity get_edge_child_node(const Edge edge) const { return myNodeRefiner.get_edge_child_node(edge); } size_t get_num_edges_to_refine() const { return myNodeRefiner.get_num_edges_to_refine(); } + bool locally_have_edges_to_refine() const { return myNodeRefiner.locally_have_edges_to_refine(); } // Leaf children must remain on same proc as parents. This means that there are constraints on rebalancing, and impact on how element weights are determined. std::string locally_check_leaf_children_have_parents_on_same_proc() const; @@ -111,7 +111,7 @@ class Refinement unsigned rebalance_element_count_incorporating_parallel_owner_constraints(const stk::mesh::Entity elem) const; bool do_refinement(const EdgeMarkerInterface & edgeMarker); - void do_uniform_refinement(const int numUniformRefinementLevels); + bool do_uniform_refinement(const int numUniformRefinementLevels); void delete_parent_elements(); // Only leafs will remain void restore_after_restart(); @@ -128,13 +128,19 @@ class Refinement typedef std::tuple BucketData; typedef std::pair ParentAndChildId; - size_t count_new_child_elements(const EdgeMarkerInterface & edgeMarker, const std::vector & bucketsData) const; - void refine_elements_with_refined_edges_and_store_sides_to_create(const EdgeMarkerInterface & edgeMarker, const std::vector & bucketsData, std::vector & sideRequests, std::vector & elementsToDelete); + size_t count_new_child_elements(const EdgeMarkerInterface & edgeMarker, const std::vector & bucketsData, const bool doingRefinement) const; + void adapt_elements_and_store_sides_to_create(const EdgeMarkerInterface & edgeMarker, + const std::vector & bucketsData, + const bool doingRefinement, + std::vector & sideRequests, + std::vector & elementsToDelete, + std::vector & elementsThatAreNoLongerParents, + std::vector & bucketDataForNewChildElementsThatMightNeedToBeRefined); void declare_refinement_parts(); void declare_refinement_fields(); stk::mesh::PartVector get_parts_for_child_elements(const stk::mesh::Bucket & parentBucket) const; - std::vector get_buckets_data_for_candidate_elements_to_refine(const EdgeMarkerInterface & edgeMarker) const; + std::vector get_buckets_data_for_candidate_elements_to_adapt(const EdgeMarkerInterface & edgeMarker, const bool doingRefinement) const; stk::mesh::Field & get_child_element_ids_field(const unsigned numChildWhenFullyRefined) const; void set_parent_id(const stk::mesh::Entity elem, const stk::mesh::EntityId parentElemId) const; @@ -144,12 +150,12 @@ class Refinement void check_leaf_children_have_parents_on_same_proc() const; bool locally_have_any_hanging_refined_nodes() const; - void create_refined_nodes_elements_and_sides(const EdgeMarkerInterface & edgeMarker); - void create_another_layer_of_refined_elements_and_sides_to_eliminate_hanging_nodes(const EdgeMarkerInterface & edgeMarker); + void adapt_elements_and_sides(const EdgeMarkerInterface & edgeMarker, const bool doingRefinement); bool unrefine_elements(const EdgeMarkerInterface & edgeMarker); bool refine_elements(const EdgeMarkerInterface & edgeMarker); void finalize(); void mark_already_refined_edges(); + void mark_already_refined_edges_that_will_be_retained(const std::vector & sortedEdgeNodesThatWillBeUnrefined); void destroy_custom_ghostings(); stk::mesh::EntityId get_parent_id(const stk::mesh::Entity elem) const; @@ -157,15 +163,23 @@ class Refinement void set_originating_processor_for_parent_element(const stk::mesh::Entity elem, const int originatingProc) const; void set_refinement_level(const stk::mesh::Entity elem, const int refinementLevel) const; void set_parent_parts_and_parent_child_relation_fields(const stk::mesh::Entity parentElement, const std::vector & childElements, const unsigned numChildWhenFullyRefined); - void refine_element_if_it_has_refined_edges_and_append_sides_to_create(const stk::topology & elemTopology, + void adapt_element_and_append_sides_to_create(const stk::topology & elemTopology, const stk::mesh::PartVector & childParts, const stk::mesh::Entity elem, const std::vector & elemChildEdgeNodes, + const int preCaseId, + const int postCaseId, std::vector & sideRequests, - std::vector & elementsToDelete); + std::vector & elementsToDelete, + std::vector & elementsThatAreNoLongerParents); + void refine_beam_2_and_append_sides_to_create(const stk::mesh::PartVector & childParts, const stk::mesh::Entity parentElem, const std::vector & elemChildEdgeNodes, const int caseId, std::vector & sideRequests); void refine_tri_3_and_append_sides_to_create(const stk::mesh::PartVector & childParts, const stk::mesh::Entity parentElem, const std::vector & elemChildEdgeNodes, const int caseId, std::vector & sideRequests); void refine_tet_4_and_append_sides_to_create(const stk::mesh::PartVector & childParts, const stk::mesh::Entity parentElem, const std::vector & elemChildEdgeNodes, const int caseId, std::vector & sideRequests); + void refine_quad_4_and_append_sides_to_create(const stk::mesh::PartVector & childParts, const stk::mesh::Entity parentElem, const std::vector & elemChildEdgeNodes, const int caseId, std::vector & sideRequests); + void refine_hex_8_and_append_sides_to_create(const stk::mesh::PartVector & childParts, const stk::mesh::Entity parentElem, const std::vector & elemChildEdgeNodes, const int caseId, std::vector & sideRequests); stk::mesh::PartVector get_parts_for_new_refined_edge_nodes() const; + stk::mesh::PartVector get_parts_for_new_refined_element_centroid_nodes() const; + stk::mesh::PartVector get_parts_for_new_refined_quad_face_nodes() const; void remove_parent_parts(const std::vector & elements); void fill_parents_and_children_and_parents_with_off_proc_child(std::vector & parents, std::vector & children, std::vector & parentsAndOffProcChildId) const; void restore_parent_and_child_element_parts(const std::vector & parents, const std::vector & children); @@ -184,18 +198,21 @@ class Refinement stk::mesh::Part * myParentPart {nullptr}; stk::mesh::Part * myChildPart {nullptr}; stk::mesh::Part * myRefinedEdgeNodePart {nullptr}; + stk::mesh::Part * myRefinedQuadFaceNodePart {nullptr}; const stk::mesh::Field * myCoordsField{nullptr}; stk::mesh::Field * myRefinementLevelField{nullptr}; stk::mesh::Field * myParentElementIdField{nullptr}; + stk::mesh::Field * myChildElementIds2Field{nullptr}; stk::mesh::Field * myChildElementIds4Field{nullptr}; stk::mesh::Field * myChildElementIds8Field{nullptr}; stk::mesh::Field * myRefinedEdgeNodeParentIdsField{nullptr}; + stk::mesh::Field * myRefinedQuadFaceNodeParentIdsField{nullptr}; stk::mesh::Field * myOriginatingProcForParentElementField{nullptr}; mutable RefineElementsTimers refineTimer; mutable UnrefineElementsTimers unrefineTimer; - mutable stk::diag::Timer myFixPartsandOwnersTimer; + mutable stk::diag::Timer myFixPartsandOwnersTimer; }; } // namespace krino diff --git a/packages/krino/krino/refinement/Akri_RefinerUtils.hpp b/packages/krino/krino/refinement/Akri_RefinerUtils.hpp new file mode 100644 index 000000000000..0071b0138984 --- /dev/null +++ b/packages/krino/krino/refinement/Akri_RefinerUtils.hpp @@ -0,0 +1,31 @@ +#ifndef KRINO_KRINO_REFINEMENT_AKRI_REFINERUTILS_HPP_ +#define KRINO_KRINO_REFINEMENT_AKRI_REFINERUTILS_HPP_ +#include +#include + +namespace krino { + +template +void append_child_elements(const std::array & permutedParentNodeOrdinals, + const std::array & permutedParentSideOrdinals, + const std::array,NUMCHILDELEMENTS> & childElementNodeIndices, + const std::array,NUMCHILDELEMENTS> & childElementSideIndices, + std::vector & childElemDescs) +{ + const std::size_t oldSize = childElemDescs.size(); + childElemDescs.resize(oldSize + NUMCHILDELEMENTS); + for (std::size_t i=0; i #include +#include #include #include #include "Akri_Edge.hpp" @@ -16,6 +17,17 @@ namespace krino { +static bool element_topology_has_refined_centroid_node(const stk::topology elemTopology) +{ + return (elemTopology == stk::topology::QUAD_4_2D || + elemTopology == stk::topology::HEXAHEDRON_8); +} + +static bool element_topology_has_refined_face_nodes(const stk::topology elemTopology) +{ + return (elemTopology == stk::topology::HEXAHEDRON_8); +} + UniformEdgeMarker::UniformEdgeMarker(const stk::mesh::BulkData & mesh, Refinement & refinement) : myMesh(mesh), myRefinement(refinement) { @@ -25,53 +37,71 @@ void UniformEdgeMarker::locally_mark_edges_of_non_parent_elements(NodeRefiner & { const stk::mesh::Selector selector = myMesh.mesh_meta_data().locally_owned_part() & !myRefinement.parent_part(); std::vector edgesToRefineForElement; + std::vector facesToRefineForElement; - for(const auto & bucketPtr : myMesh.get_buckets(stk::topology::ELEMENT_RANK, selector)) + for(const auto * bucketPtr : myMesh.get_buckets(stk::topology::ELEMENT_RANK, selector)) { + const bool elemHasRefinedCentroidNode = element_topology_has_refined_centroid_node(bucketPtr->topology()); + const bool elemHasRefinedFaceNodes = element_topology_has_refined_face_nodes(bucketPtr->topology()); for(const auto & elem : *bucketPtr) { fill_entity_edges(myMesh, elem, edgesToRefineForElement); for (auto && edgeToRefineForElement : edgesToRefineForElement) nodeRefiner.mark_edge_for_refinement(edgeToRefineForElement); + if (elemHasRefinedCentroidNode) + nodeRefiner.mark_element_with_child_centroid_node_for_refinement(elem); + if (elemHasRefinedFaceNodes) + { + fill_entity_quad_faces(myMesh, elem, facesToRefineForElement); + for (auto && faceToRefineForElement : facesToRefineForElement) + nodeRefiner.mark_quad_face_for_refinement(faceToRefineForElement); + } } } } void UniformEdgeMarker::mark_edges_to_be_refined(NodeRefiner & nodeRefiner) const { - nodeRefiner.clear_edges_to_refine(); + nodeRefiner.clear_entities_to_refine(); locally_mark_edges_of_non_parent_elements(nodeRefiner); nodeRefiner.sync_shared_edges_from_other_procs_to_refine(myMesh); } -bool UniformEdgeMarker::is_element_a_candidate_for_refinement(const stk::mesh::Entity elem) const +bool UniformEdgeMarker::is_element_a_candidate_for_adaptation(const stk::mesh::Entity elem, const bool doingRefinement) const { - return !myRefinement.is_parent(elem); + return doingRefinement && !myRefinement.is_parent(elem); } -void UniformEdgeMarker::fill_element_refined_edge_nodes(const NodeRefiner & nodeRefiner, const stk::mesh::Entity elem, const stk::topology & elemTopology, std::vector & elemEdgeChildNodes) const +void UniformEdgeMarker::fill_adaptation_caseIds_and_refined_edge_nodes_if_changed(const NodeRefiner & nodeRefiner, + const stk::mesh::Entity elem, + const stk::topology & elemTopology, + const bool doingRefinement, + ElementEdgeCaseIds & elementEdgeCaseIds, + std::vector & elemEdgeChildNodes) const { - const unsigned elemNumEdges = elemTopology.num_edges(); - const stk::mesh::Entity * elemNodes = myMesh.begin_nodes(elem); - std::array edgeNodes; - elemEdgeChildNodes.resize(elemNumEdges); + elementEdgeCaseIds.clear(); - for (unsigned iEdge = 0; iEdge < elemNumEdges; ++iEdge) + if (doingRefinement) { - elemTopology.edge_nodes(elemNodes, iEdge, edgeNodes.data()); - const Edge edge = edge_from_edge_nodes(myMesh, edgeNodes[0], edgeNodes[1]); - const stk::mesh::Entity refinedEdgeNode = nodeRefiner.get_edge_child_node(edge); - elemEdgeChildNodes[iEdge] = refinedEdgeNode; - } -} + const unsigned elemNumEdges = elemTopology.num_edges(); + const stk::mesh::Entity * elemNodes = myMesh.begin_nodes(elem); + std::array edgeNodes; + elemEdgeChildNodes.resize(elemNumEdges); -void UniformEdgeMarker::fill_elements_modified_by_unrefinement( - std::vector & parentElementsModifiedByUnrefinement, - std::vector & childElementsToDeleteForUnrefinement) const -{ - parentElementsModifiedByUnrefinement.clear(); - childElementsToDeleteForUnrefinement.clear(); - // FIXME + int postRefineCaseId = 0; + for (unsigned iEdge = 0; iEdge < elemNumEdges; ++iEdge) + { + elemTopology.edge_nodes(elemNodes, iEdge, edgeNodes.data()); + const Edge edge = edge_from_edge_nodes(myMesh, edgeNodes[0], edgeNodes[1]); + const stk::mesh::Entity refinedEdgeNode = nodeRefiner.get_edge_child_node(edge); + elemEdgeChildNodes[iEdge] = refinedEdgeNode; + if (elemEdgeChildNodes[iEdge] != stk::mesh::Entity::InvalidEntity) + postRefineCaseId += 1<name(); } -const stk::mesh::Field & ElementBasedEdgeMarker::get_marker_field() const +const stk::mesh::Field & ElementBasedEdgeMarker::get_marker_field_and_sync_to_host() const { STK_ThrowAssert(myElementMarkerField); + myElementMarkerField->sync_to_host(); return *myElementMarkerField; } @@ -105,27 +136,47 @@ TransitionElementEdgeMarker::TransitionElementEdgeMarker(const stk::mesh::BulkDa } -static bool node_is_element_node(const StkMeshEntities & elementNodes, const stk::mesh::Entity node) +static bool node_is_parent_node(const StkMeshEntities & parentNodes, const stk::mesh::Entity node) { - return (std::find(elementNodes.begin(), elementNodes.end(), node) != elementNodes.end()); + return (std::find(parentNodes.begin(), parentNodes.end(), node) != parentNodes.end()); } -std::vector TransitionElementEdgeMarker::get_edge_nodes_of_transition_elements( +static void keep_child_nodes_that_are_not_parent_nodes(const StkMeshEntities & parentNodes, std::vector & childNodesThatAreNotParentNodes) +{ + size_t numKeep = 0; + for (size_t iChild=0; iChild TransitionElementEdgeMarker::get_child_nodes_that_are_not_parent_nodes( const stk::mesh::Entity parentElem, - const std::vector & transitionElements) const + const std::vector & childElems) const { - std::vector transitionElementEdgeNodes; - if (!transitionElements.empty()) + std::vector childNodesThatAreNotParentNodes; + if (!childElems.empty()) { const StkMeshEntities parentNodes {myMesh.begin_nodes(parentElem), myMesh.end_nodes(parentElem)}; - transitionElementEdgeNodes.reserve(2*transitionElements.size()); - for (auto && transitionElement : transitionElements) - for (auto && elementNode : StkMeshEntities{myMesh.begin_nodes(transitionElement), myMesh.end_nodes(transitionElement)}) - if (!node_is_element_node(parentNodes, elementNode)) - transitionElementEdgeNodes.push_back(elementNode); - stk::util::sort_and_unique(transitionElementEdgeNodes); + childNodesThatAreNotParentNodes.reserve(parentNodes.size()*childElems.size()); + for (auto && childElem : childElems) + { + STK_ThrowAssertMsg(myMesh.is_valid(childElem), "All child elements must by valid. Should this element really be considered for adaptation?"); + StkMeshEntities childNodes{myMesh.begin_nodes(childElem), myMesh.end_nodes(childElem)}; + childNodesThatAreNotParentNodes.insert(childNodesThatAreNotParentNodes.end(), childNodes.begin(), childNodes.end()); + } + stk::util::sort_and_unique(childNodesThatAreNotParentNodes); + + keep_child_nodes_that_are_not_parent_nodes(parentNodes, childNodesThatAreNotParentNodes); } - return transitionElementEdgeNodes; + return childNodesThatAreNotParentNodes; +} + +std::vector TransitionElementEdgeMarker::get_child_nodes_that_are_not_parent_nodes( + const stk::mesh::Entity parentElem) const +{ + const std::vector childElems = myRefinement.get_children(parentElem); + return get_child_nodes_that_are_not_parent_nodes(parentElem, childElems); } bool TransitionElementEdgeMarker::is_transition(const stk::mesh::Entity elem) const @@ -158,71 +209,208 @@ TransitionElementEdgeMarker::get_parent_edges_for_given_refined_edge_nodes(const return edges; } -void TransitionElementEdgeMarker::fill_existing_element_refined_edge_nodes_for_partially_refined_parent_element( - const stk::mesh::Entity parentElem, - const stk::topology & parentElemTopology, - const unsigned parentElemNumEdges, - const std::vector & childTransitionElements, - std::vector & elemEdgeChildNodes) const +std::vector> +TransitionElementEdgeMarker::get_parent_edge_node_ids_for_given_refined_edge_nodes(const std::vector & refinedEdgeNodes) const { - elemEdgeChildNodes.clear(); - elemEdgeChildNodes.reserve(parentElemNumEdges); - - const std::vector refinedEdgeNodes = get_edge_nodes_of_transition_elements(parentElem, childTransitionElements); - const std::vector refinedEdges = get_parent_edges_for_given_refined_edge_nodes(refinedEdgeNodes); + std::vector> refinedEdgeParentNodeIds; + refinedEdgeParentNodeIds.reserve(refinedEdgeNodes.size()); + for (auto && refinedEdgeNode : refinedEdgeNodes) + { + const std::array edgeParentNodeIds = myRefinement.get_edge_parent_node_ids(refinedEdgeNode); + refinedEdgeParentNodeIds.push_back(edgeParentNodeIds); + } + return refinedEdgeParentNodeIds; +} - const stk::mesh::Entity * entityNodes = myMesh.begin_nodes(parentElem); - for (unsigned iEdge = 0; iEdge < parentElemNumEdges; ++iEdge) +void TransitionElementEdgeMarker::fill_are_edge_nodes_being_unrefined(const std::vector & sortedOwnedOrSharedNodesToBeRemovedByUnrefinement, + const std::vector & refinedEdgeNodes, + std::vector & areEdgeNodesBeingUnrefined, + bool & areAnyEdgeNodesBeingUnrefined) const +{ + areAnyEdgeNodesBeingUnrefined = false; + areEdgeNodesBeingUnrefined.assign(refinedEdgeNodes.size(), false); + for (size_t iNode=0; iNode & elemEdgeChildNodes) const +static std::array get_sorted_edge_node_ids(const stk::mesh::BulkData & mesh, const stk::mesh::Entity edgeNode0, const stk::mesh::Entity edgeNode1) { - if (myRefinement.is_parent(elem)) + const stk::mesh::EntityId edgeNode0Id = mesh.identifier(edgeNode0); + const stk::mesh::EntityId edgeNode1Id = mesh.identifier(edgeNode1); + return (edgeNode0Id{{edgeNode0Id, edgeNode1Id}} : std::array{{edgeNode1Id, edgeNode0Id}}; +} + +void TransitionElementEdgeMarker::fill_post_unrefinement_edge_nodes_and_caseIds(const stk::mesh::Entity elem, + const stk::topology & elemTopology, + const std::vector & refinedEdgeNodes, + const std::vector & areEdgeNodesBeingUnrefined, + std::vector & elemEdgeChildNodes, + ElementEdgeCaseIds & elementEdgeCaseIds) const +{ + const std::vector> refinedEdgeParentNodeIds = get_parent_edge_node_ids_for_given_refined_edge_nodes(refinedEdgeNodes); + + int preAdaptCaseId = 0; + int postAdaptCaseId = 0; + const unsigned elemNumEdges = elemTopology.num_edges(); + elemEdgeChildNodes.assign(elemNumEdges, stk::mesh::Entity()); + + const stk::mesh::Entity * entityNodes = myMesh.begin_nodes(elem); + for (unsigned iEdge = 0; iEdge < elemNumEdges; ++iEdge) { - STK_ThrowAssert(myRefinement.is_this_parent_element_partially_refined(elem)); - const std::vector transitionElements = myRefinement.get_children(elem); - fill_existing_element_refined_edge_nodes_for_partially_refined_parent_element(elem, elemTopology, elemNumEdges, transitionElements, elemEdgeChildNodes); + const unsigned * edgeNodeOrdinals = get_edge_node_ordinals(elemTopology, iEdge); + const std::array edgeNodeIds = get_sorted_edge_node_ids(myMesh, entityNodes[edgeNodeOrdinals[0]], entityNodes[edgeNodeOrdinals[1]]); + const auto iter = std::find(refinedEdgeParentNodeIds.begin(), refinedEdgeParentNodeIds.end(), edgeNodeIds); + if (iter != refinedEdgeParentNodeIds.end()) + { + preAdaptCaseId += 1< & elemEdgeChildNodes) const +{ + elementEdgeCaseIds.clear(); + elemEdgeChildNodes.clear(); + if (myRefinement.is_parent(elem)) { - elemEdgeChildNodes.assign(elemNumEdges, stk::mesh::Entity()); + const std::vector refinedEdgeNodes = get_child_nodes_that_are_not_parent_nodes(elem); + const std::vector & sortedOwnedOrSharedNodesToBeRemovedByUnrefinement = nodeRefiner.get_sorted_edge_nodes_that_will_be_removed_by_unrefinement(); + std::vector areEdgeNodesBeingUnrefined; + bool areAnyEdgeNodesBeingUnrefined = false; + fill_are_edge_nodes_being_unrefined(sortedOwnedOrSharedNodesToBeRemovedByUnrefinement, refinedEdgeNodes, areEdgeNodesBeingUnrefined, areAnyEdgeNodesBeingUnrefined); + + if (areAnyEdgeNodesBeingUnrefined) + fill_post_unrefinement_edge_nodes_and_caseIds(elem, elemTopology, refinedEdgeNodes, areEdgeNodesBeingUnrefined, elemEdgeChildNodes, elementEdgeCaseIds); } } -void TransitionElementEdgeMarker::fill_element_refined_edge_nodes(const NodeRefiner & nodeRefiner, const stk::mesh::Entity elem, const stk::topology & elemTopology, std::vector & elemEdgeChildNodes) const +void TransitionElementEdgeMarker::fill_refined_edge_nodes_for_marked_edges(const NodeRefiner & nodeRefiner, + const stk::mesh::Entity elem, + const stk::topology & elemTopology, + std::vector & elemEdgeChildNodes, + bool & areAnyEdgesMarked) const { + areAnyEdgesMarked = false; const unsigned elemNumEdges = elemTopology.num_edges(); + elemEdgeChildNodes.assign(elemNumEdges, stk::mesh::Entity()); - fill_existing_element_refined_edge_nodes(elem, elemTopology, elemNumEdges, elemEdgeChildNodes); const stk::mesh::Entity * entityNodes = myMesh.begin_nodes(elem); - - bool haveNewNode = false; - for (unsigned iEdge = 0; iEdge < elemNumEdges; ++iEdge) { - if (elemEdgeChildNodes[iEdge] == stk::mesh::Entity::InvalidEntity) + const unsigned * edgeNodeOrdinals = get_edge_node_ordinals(elemTopology, iEdge); + const Edge edge = edge_from_edge_nodes(myMesh, entityNodes[edgeNodeOrdinals[0]], entityNodes[edgeNodeOrdinals[1]]); + const stk::mesh::Entity refinedEdgeNode = nodeRefiner.get_edge_child_node(edge); + if (myMesh.is_valid(refinedEdgeNode)) { - const unsigned * edgeNodeOrdinals = get_edge_node_ordinals(elemTopology, iEdge); - const Edge edge = edge_from_edge_nodes(myMesh, entityNodes[edgeNodeOrdinals[0]], entityNodes[edgeNodeOrdinals[1]]); - const stk::mesh::Entity refinedEdgeNode = nodeRefiner.get_edge_child_node(edge); - if (refinedEdgeNode != stk::mesh::Entity::InvalidEntity) - haveNewNode = true; + areAnyEdgesMarked = true; elemEdgeChildNodes[iEdge] = refinedEdgeNode; } } +} + +void TransitionElementEdgeMarker::fill_in_existing_refined_edge_nodes_and_caseIds(const stk::mesh::Entity elem, + const stk::topology & elemTopology, + std::vector & elemEdgeChildNodes, + ElementEdgeCaseIds & elementEdgeCaseIds) const +{ + const unsigned elemNumEdges = elemTopology.num_edges(); + + int preAdaptCaseId = 0; + int postAdaptCaseId = 0; + + if (myRefinement.is_parent(elem)) + { + const std::vector refinedEdgeNodes = get_child_nodes_that_are_not_parent_nodes(elem); + const std::vector> refinedEdgeParentNodeIds = get_parent_edge_node_ids_for_given_refined_edge_nodes(refinedEdgeNodes); - if (!haveNewNode) // all refined nodes already existed + const stk::mesh::Entity * entityNodes = myMesh.begin_nodes(elem); for (unsigned iEdge = 0; iEdge < elemNumEdges; ++iEdge) - elemEdgeChildNodes[iEdge] = stk::mesh::Entity::InvalidEntity; + { + const unsigned * edgeNodeOrdinals = get_edge_node_ordinals(elemTopology, iEdge); + const std::array edgeNodeIds = get_sorted_edge_node_ids(myMesh, entityNodes[edgeNodeOrdinals[0]], entityNodes[edgeNodeOrdinals[1]]); + const auto iter = std::find(refinedEdgeParentNodeIds.begin(), refinedEdgeParentNodeIds.end(), edgeNodeIds); + if (iter != refinedEdgeParentNodeIds.end()) + { + preAdaptCaseId += 1< & elemEdgeChildNodes) const +{ + elementEdgeCaseIds.clear(); + + if (!myRefinement.is_parent(elem) || myRefinement.is_this_parent_element_partially_refined(elem)) + { + bool areAnyEdgesMarked = false; + fill_refined_edge_nodes_for_marked_edges(nodeRefiner, elem, elemTopology, elemEdgeChildNodes, areAnyEdgesMarked); + if (areAnyEdgesMarked) + fill_in_existing_refined_edge_nodes_and_caseIds(elem, elemTopology, elemEdgeChildNodes, elementEdgeCaseIds); + else + elemEdgeChildNodes.clear(); + } +} + +void TransitionElementEdgeMarker::fill_adaptation_caseIds_and_refined_edge_nodes_if_changed(const NodeRefiner & nodeRefiner, + const stk::mesh::Entity elem, + const stk::topology & elemTopology, + const bool doingRefinement, + ElementEdgeCaseIds & elementEdgeCaseIds, + std::vector & elemEdgeChildNodes) const +{ + if (doingRefinement) + fill_refinement_caseIds_and_refined_edge_nodes_if_changed(nodeRefiner, elem, elemTopology, elementEdgeCaseIds, elemEdgeChildNodes); + else + fill_unrefinement_caseIds_and_refined_edge_nodes_if_changed(nodeRefiner, elem, elemTopology, elementEdgeCaseIds, elemEdgeChildNodes); +} + +static bool is_any_child_invalid_or_a_parent(const stk::mesh::BulkData & mesh, const Refinement & refinement, const std::vector & childElements) +{ + for (auto && childElem : childElements) + if (!mesh.is_valid(childElem) || refinement.is_parent(childElem)) + return true; + return false; +} + +bool TransitionElementEdgeMarker::is_element_a_candidate_for_unrefinement(const stk::mesh::Entity elem) const +{ + if (myRefinement.is_parent(elem)) + { + const std::vector childElems = myRefinement.get_children(elem); + return !is_any_child_invalid_or_a_parent(myMesh, myRefinement, childElems); + } + return false; } bool TransitionElementEdgeMarker::is_element_a_candidate_for_refinement(const stk::mesh::Entity elem) const @@ -232,6 +420,13 @@ bool TransitionElementEdgeMarker::is_element_a_candidate_for_refinement(const st return !is_transition(elem); } +bool TransitionElementEdgeMarker::is_element_a_candidate_for_adaptation(const stk::mesh::Entity elem, const bool doingRefinement) const +{ + if (doingRefinement) + return is_element_a_candidate_for_refinement(elem); + return is_element_a_candidate_for_unrefinement(elem); +} + static bool is_any_element_marked(FieldRef elementMarkerField, const std::vector & elems) { for(const auto & elem : elems) @@ -257,7 +452,7 @@ void TransitionElementEdgeMarker::mark_unrefined_edges_of_partially_refined_pare NodeRefiner & nodeRefiner, bool & wasAnyEdgeMarked) const { - const std::vector refinedEdgeNodes = get_edge_nodes_of_transition_elements(parentElem, childTransitionElements); + const std::vector refinedEdgeNodes = get_child_nodes_that_are_not_parent_nodes(parentElem, childTransitionElements); const std::vector refinedEdges = get_parent_edges_for_given_refined_edge_nodes(refinedEdgeNodes); fill_entity_edges(myMesh, parentElem, elemEdgesWorkspace); for (auto && elemEdge : elemEdgesWorkspace) @@ -267,7 +462,7 @@ void TransitionElementEdgeMarker::mark_unrefined_edges_of_partially_refined_pare void TransitionElementEdgeMarker::locally_mark_edges_of_partially_refined_parent_elements_with_marked_children(NodeRefiner & nodeRefiner) const { - const stk::mesh::Field & markerField = get_marker_field(); + const stk::mesh::Field & markerField = get_marker_field_and_sync_to_host(); const stk::mesh::Selector selector = myMesh.mesh_meta_data().locally_owned_part() & stk::mesh::selectField(markerField) & myRefinement.parent_part(); std::vector elemEdges; std::vector childTransitionElements; @@ -291,8 +486,8 @@ void TransitionElementEdgeMarker::locally_mark_edges_of_partially_refined_parent static bool edge_is_parent_edge(const StkMeshEntities & parentNodes, const stk::mesh::Entity * edgeNodes) { - return node_is_element_node(parentNodes, edgeNodes[0]) && - node_is_element_node(parentNodes, edgeNodes[1]); + return node_is_parent_node(parentNodes, edgeNodes[0]) && + node_is_parent_node(parentNodes, edgeNodes[1]); } bool does_child_element_have_marked_edge_that_is_not_parent_edge(const stk::mesh::BulkData & mesh, const StkMeshEntities & parentNodes, const stk::mesh::Entity childElem, const NodeRefiner & nodeRefiner) @@ -326,7 +521,7 @@ bool does_any_child_element_have_marked_edge_that_is_not_parent_edge(const stk:: void TransitionElementEdgeMarker::locally_mark_edges_of_partially_refined_parent_elements_to_satisfy_template(NodeRefiner & nodeRefiner, bool & wasAnyEdgeMarked) const { - const stk::mesh::Field & markerField = get_marker_field(); + const stk::mesh::Field & markerField = get_marker_field_and_sync_to_host(); const stk::mesh::Selector selector = myMesh.mesh_meta_data().locally_owned_part() & stk::mesh::selectField(markerField) & myRefinement.parent_part(); std::vector elemEdges; std::vector childTransitionElements; @@ -347,7 +542,7 @@ void TransitionElementEdgeMarker::locally_mark_edges_of_partially_refined_parent void TransitionElementEdgeMarker::locally_mark_edges_of_marked_non_transition_elements(NodeRefiner & nodeRefiner) const { - const stk::mesh::Field & markerField = get_marker_field(); + const stk::mesh::Field & markerField = get_marker_field_and_sync_to_host(); const stk::mesh::Selector selector = myMesh.mesh_meta_data().locally_owned_part() & stk::mesh::selectField(markerField) & !myRefinement.parent_part(); std::vector edgesToRefineForElement; @@ -378,7 +573,7 @@ void TransitionElementEdgeMarker::mark_edges_to_be_refined(NodeRefiner & nodeRef // 2. Mark all edges of partially refined parent element that are not already refined if any child element is marked - one iter only // 3. Mark all edges of partially refined parent element that are not already refined if any edge of child element is marked that is not an edge of the parent - multiple iters - nodeRefiner.clear_edges_to_refine(); + nodeRefiner.clear_entities_to_refine(); locally_mark_edges_of_marked_non_transition_elements(nodeRefiner); // stage 1 locally_mark_edges_of_partially_refined_parent_elements_with_marked_children(nodeRefiner); // stage 2 @@ -395,9 +590,14 @@ void TransitionElementEdgeMarker::mark_edges_to_be_refined(NodeRefiner & nodeRef } } +void TransitionElementEdgeMarker::mark_edges_to_be_unrefined(NodeRefiner & nodeRefiner) const +{ + nodeRefiner.set_sorted_edge_nodes_that_will_be_removed_by_unrefinement(find_sorted_edge_nodes_that_will_be_removed_by_unrefinement()); +} + bool TransitionElementEdgeMarker::are_all_children_leaves_and_marked_for_unrefinement(const std::vector & childElements) const { - const FieldRef markerField = get_marker_field(); + const FieldRef markerField = get_marker_field_and_sync_to_host(); for (auto && childElem : childElements) { @@ -430,7 +630,7 @@ bool TransitionElementEdgeMarker::can_edge_node_be_unrefined_based_on_locally_ow return false; const auto edgeNodeParents = myRefinement.get_edge_parent_nodes(refinedNode); - stk::mesh::get_entities_through_relations(myMesh, {edgeNodeParents[0], edgeNodeParents[1]}, stk::topology::ELEMENT_RANK, workParentEdgeElements); + stk::mesh::get_entities_through_relations(myMesh, stk::mesh::EntityVector{edgeNodeParents[0], edgeNodeParents[1]}, stk::topology::ELEMENT_RANK, workParentEdgeElements); for (auto && parentEdgeElem : workParentEdgeElements) { @@ -458,7 +658,7 @@ static void communicate_to_get_sorted_edges_nodes_that_will_be_removed_by_unrefi stk::util::remove_intersection_from_first(ownedOrSharedEdgeNodesThatMayBeUnrefined, sharedEdgeNodesThatMustBeKeptFromOtherProcs); } -std::vector TransitionElementEdgeMarker::get_sorted_edge_nodes_that_will_be_removed_by_unrefinement() const +std::vector TransitionElementEdgeMarker::find_sorted_edge_nodes_that_will_be_removed_by_unrefinement() const { std::vector ownedOrSharedEdgeNodesThatMayBeUnrefined; std::vector sharedEdgeNodesThatMustBeKept; @@ -497,63 +697,48 @@ static bool is_any_in_second_vec_in_first_sorted_vec(const std::vector & childElements) -{ - for (auto && childElem : childElements) - if (!mesh.is_valid(childElem) || refinement.is_parent(childElem)) - return true; - return false; -} - bool TransitionElementEdgeMarker::is_parent_element_modified_by_unrefinement(const stk::mesh::Entity parentElem, const std::vector & childElements, - const std::vector & sortedEdgeNodesThatWillBeUnrefined, - std::vector & workElemEdgeChildNodes) const + const std::vector & sortedOwnedOrSharedNodesToBeRemovedByUnrefinement) const { if (is_any_child_invalid_or_a_parent(myMesh, myRefinement, childElements)) return false; - // If all edge nodes are going to unrefined, then this parent element will have no children after unrefinement - // If only some of the edge nodes are going to unrefined, then the parent will still have children, but the current elements must still be deleted - const stk::topology elemTopology = myMesh.bucket(parentElem).topology(); - const unsigned elemNumEdges = elemTopology.num_edges(); - fill_existing_element_refined_edge_nodes_for_partially_refined_parent_element(parentElem, elemTopology, elemNumEdges, childElements, workElemEdgeChildNodes); + const std::vector refinedEdgeNodes = get_child_nodes_that_are_not_parent_nodes(parentElem); - const bool isParentModifiedByUnrefinement = is_any_in_second_vec_in_first_sorted_vec(sortedEdgeNodesThatWillBeUnrefined, workElemEdgeChildNodes); + const bool isParentModifiedByUnrefinement = is_any_in_second_vec_in_first_sorted_vec(sortedOwnedOrSharedNodesToBeRemovedByUnrefinement, refinedEdgeNodes); return isParentModifiedByUnrefinement; } -void TransitionElementEdgeMarker::fill_elements_modified_by_unrefinement(std::vector & parentElementsModifiedByUnrefinement, - std::vector & childElementsToDeleteForUnrefinement) const +std::vector TransitionElementEdgeMarker::get_parent_elements_that_will_be_modified_by_unrefinement(const NodeRefiner & nodeRefiner) const { - const stk::mesh::Field & markerField = get_marker_field(); - const stk::mesh::Selector selector = myMesh.mesh_meta_data().locally_owned_part() & stk::mesh::selectField(markerField) & myRefinement.parent_part(); - std::vector childElements; - std::vector workElemEdgeChildNodes; - - parentElementsModifiedByUnrefinement.clear(); - childElementsToDeleteForUnrefinement.clear(); + std::vector parentElementsModifiedByUnrefinement; + const std::vector & sortedOwnedOrSharedNodesToBeRemovedByUnrefinement = nodeRefiner.get_sorted_edge_nodes_that_will_be_removed_by_unrefinement(); - const std::vector sortedEdgeNodesThatWillBeUnrefined = get_sorted_edge_nodes_that_will_be_removed_by_unrefinement(); - - for(const auto & bucketPtr : myMesh.get_buckets(stk::topology::ELEMENT_RANK, selector)) + if (!sortedOwnedOrSharedNodesToBeRemovedByUnrefinement.empty()) { - for(const auto & parentElem : *bucketPtr) + const stk::mesh::Field & markerField = get_marker_field_and_sync_to_host(); + const stk::mesh::Selector selector = myMesh.mesh_meta_data().locally_owned_part() & stk::mesh::selectField(markerField) & myRefinement.parent_part(); + std::vector childElements; + + for(const auto & bucketPtr : myMesh.get_buckets(stk::topology::ELEMENT_RANK, selector)) { - myRefinement.fill_children(parentElem, childElements); - if (is_parent_element_modified_by_unrefinement(parentElem, childElements, sortedEdgeNodesThatWillBeUnrefined, workElemEdgeChildNodes)) + for(const auto & parentElem : *bucketPtr) { - childElementsToDeleteForUnrefinement.insert(childElementsToDeleteForUnrefinement.end(), childElements.begin(), childElements.end()); - parentElementsModifiedByUnrefinement.push_back(parentElem); + myRefinement.fill_children(parentElem, childElements); + if (is_parent_element_modified_by_unrefinement(parentElem, childElements, sortedOwnedOrSharedNodesToBeRemovedByUnrefinement)) + parentElementsModifiedByUnrefinement.push_back(parentElem); } } } + + return parentElementsModifiedByUnrefinement; } bool TransitionElementEdgeMarker::locally_have_elements_to_unrefine() const { - const stk::mesh::Field & markerField = get_marker_field(); + const stk::mesh::Field & markerField = get_marker_field_and_sync_to_host(); const stk::mesh::Selector selector = myMesh.mesh_meta_data().locally_owned_part() & stk::mesh::selectField(markerField) & myRefinement.parent_part(); std::vector childElements; diff --git a/packages/krino/krino/refinement/Akri_TransitionElementEdgeMarker.hpp b/packages/krino/krino/refinement/Akri_TransitionElementEdgeMarker.hpp index edf58ce6ba38..9e20fe8e6ebb 100644 --- a/packages/krino/krino/refinement/Akri_TransitionElementEdgeMarker.hpp +++ b/packages/krino/krino/refinement/Akri_TransitionElementEdgeMarker.hpp @@ -20,15 +20,33 @@ class Refinement; struct Edge; class NodeRefiner; +class ElementEdgeCaseIds +{ +public: + void set(const int preAdaptCaseId, const int postAdaptCaseId) { myPreAdaptCaseId=preAdaptCaseId; myPostAdaptCaseId=postAdaptCaseId; } + bool has_changed() const { return myPreAdaptCaseId != myPostAdaptCaseId; } + void clear() { myPreAdaptCaseId=0; myPostAdaptCaseId=0; } + int pre_adapt_case_id() const { STK_ThrowAssert(has_changed()); return myPreAdaptCaseId; } + int post_adapt_case_id() const { STK_ThrowAssert(has_changed()); return myPostAdaptCaseId; } +private: + int myPreAdaptCaseId{0}; + int myPostAdaptCaseId{0}; +}; + class EdgeMarkerInterface { public: virtual ~EdgeMarkerInterface() {} virtual void mark_edges_to_be_refined(NodeRefiner & nodeRefiner) const = 0; - virtual bool is_element_a_candidate_for_refinement(const stk::mesh::Entity elem) const = 0; - virtual void fill_element_refined_edge_nodes(const NodeRefiner & nodeRefiner, const stk::mesh::Entity elem, const stk::topology & elemTopology, std::vector & elemEdgeChildNodes) const = 0; - virtual void fill_elements_modified_by_unrefinement(std::vector & parentElementsModifiedByUnrefinement, - std::vector & childElementsToDeleteForUnrefinement) const = 0; + virtual void mark_edges_to_be_unrefined(NodeRefiner & nodeRefiner) const = 0; + virtual bool is_element_a_candidate_for_adaptation(const stk::mesh::Entity elem, const bool doingRefinement) const = 0; + virtual void fill_adaptation_caseIds_and_refined_edge_nodes_if_changed(const NodeRefiner & nodeRefiner, + const stk::mesh::Entity elem, + const stk::topology & elemTopology, + const bool doingRefinement, + ElementEdgeCaseIds & elementEdgeCaseIds, + std::vector & elemEdgeChildNodes) const = 0; + virtual std::vector get_parent_elements_that_will_be_modified_by_unrefinement(const NodeRefiner & nodeRefiner) const = 0; virtual bool locally_have_elements_to_unrefine() const = 0; }; @@ -39,10 +57,15 @@ class UniformEdgeMarker : public EdgeMarkerInterface virtual ~UniformEdgeMarker() {} protected: virtual void mark_edges_to_be_refined(NodeRefiner & nodeRefiner) const override; - virtual bool is_element_a_candidate_for_refinement(const stk::mesh::Entity elem) const override; - virtual void fill_element_refined_edge_nodes(const NodeRefiner & nodeRefiner, const stk::mesh::Entity elem, const stk::topology & elemTopology, std::vector & elemEdgeChildNodes) const override; - virtual void fill_elements_modified_by_unrefinement(std::vector & parentElementsModifiedByUnrefinement, - std::vector & childElementsToDeleteForUnrefinement) const override; + virtual void mark_edges_to_be_unrefined(NodeRefiner & nodeRefiner) const override {} + virtual bool is_element_a_candidate_for_adaptation(const stk::mesh::Entity elem, const bool doingRefinement) const override; + virtual void fill_adaptation_caseIds_and_refined_edge_nodes_if_changed(const NodeRefiner & nodeRefiner, + const stk::mesh::Entity elem, + const stk::topology & elemTopology, + const bool doingRefinement, + ElementEdgeCaseIds & elementEdgeCaseIds, + std::vector & elemEdgeChildNodes) const override; + virtual std::vector get_parent_elements_that_will_be_modified_by_unrefinement(const NodeRefiner & nodeRefiner) const override {std::vector tmp; return tmp;}; virtual bool locally_have_elements_to_unrefine() const override { return false; } private: void locally_mark_edges_of_non_parent_elements(NodeRefiner & nodeRefiner) const; @@ -57,7 +80,7 @@ class ElementBasedEdgeMarker : public EdgeMarkerInterface virtual ~ElementBasedEdgeMarker() {} const std::string & get_marker_field_name() const; protected: - const stk::mesh::Field & get_marker_field() const; + const stk::mesh::Field & get_marker_field_and_sync_to_host() const; private: const stk::mesh::BulkData & myMesh; Refinement & myRefinement; @@ -76,23 +99,63 @@ class TransitionElementEdgeMarker : public ElementBasedEdgeMarker virtual ~TransitionElementEdgeMarker() {} virtual void mark_edges_to_be_refined(NodeRefiner & nodeRefiner) const override; - virtual bool is_element_a_candidate_for_refinement(const stk::mesh::Entity elem) const override; - virtual void fill_element_refined_edge_nodes(const NodeRefiner & nodeRefiner, const stk::mesh::Entity elem, const stk::topology & elemTopology, std::vector & elemEdgeChildNodes) const override; - virtual void fill_elements_modified_by_unrefinement(std::vector & parentElementsModifiedByUnrefinement, - std::vector & childElementsToDeleteForUnrefinement) const override; + virtual void mark_edges_to_be_unrefined(NodeRefiner & nodeRefiner) const override; + virtual bool is_element_a_candidate_for_adaptation(const stk::mesh::Entity elem, const bool doingRefinement) const override; + virtual void fill_adaptation_caseIds_and_refined_edge_nodes_if_changed(const NodeRefiner & nodeRefiner, + const stk::mesh::Entity elem, + const stk::topology & elemTopology, + const bool doingRefinement, + ElementEdgeCaseIds & elementEdgeCaseIds, + std::vector & elemEdgeChildNodes) const override; + virtual std::vector get_parent_elements_that_will_be_modified_by_unrefinement(const NodeRefiner & nodeRefiner) const override; virtual bool locally_have_elements_to_unrefine() const override; bool is_transition(const stk::mesh::Entity elem) const; private: + std::vector find_sorted_edge_nodes_that_will_be_removed_by_unrefinement() const; std::vector get_edge_nodes_of_transition_elements(const stk::mesh::Entity parentElem, const std::vector & transitionElements) const; std::vector get_parent_edges_for_given_refined_edge_nodes(const std::vector & refinedEdgeNodes) const; + std::vector> get_parent_edge_node_ids_for_given_refined_edge_nodes(const std::vector & refinedEdgeNodes) const; + void fill_are_edge_nodes_being_unrefined(const std::vector & sortedOwnedOrSharedNodesToBeRemovedByUnrefinement, + const std::vector & refinedEdgeNodes, + std::vector & areEdgeNodesBeingUnrefined, + bool & areAnyEdgeNodesBeingUnrefined) const; + void fill_post_unrefinement_edge_nodes_and_caseIds(const stk::mesh::Entity elem, + const stk::topology & elemTopology, + const std::vector & refinedEdgeNodes, + const std::vector & areEdgeNodesBeingUnrefined, + std::vector & elemEdgeChildNodes, + ElementEdgeCaseIds & elementEdgeCaseIds) const; + std::vector get_child_nodes_that_are_not_parent_nodes(const stk::mesh::Entity parentElem, const std::vector & childElems) const; + std::vector get_child_nodes_that_are_not_parent_nodes(const stk::mesh::Entity parentElem) const; void fill_existing_element_refined_edge_nodes_for_partially_refined_parent_element( - const stk::mesh::Entity parentElem, - const stk::topology & parentElemTopology, - const unsigned parentElemNumEdges, - const std::vector & childTransitionElements, - std::vector & elemEdgeChildNodes) const; + const stk::mesh::Entity parentElem, + const stk::topology & parentElemTopology, + const unsigned parentElemNumEdges, + const std::vector & childTransitionElements, + std::vector & elemEdgeChildNodes) const; + void fill_unrefinement_caseIds_and_refined_edge_nodes_if_changed(const NodeRefiner & nodeRefiner, + const stk::mesh::Entity elem, + const stk::topology & elemTopology, + ElementEdgeCaseIds & elementEdgeCaseIds, + std::vector & elemEdgeChildNodes) const; + void fill_refined_edge_nodes_for_marked_edges(const NodeRefiner & nodeRefiner, + const stk::mesh::Entity elem, + const stk::topology & elemTopology, + std::vector & elemEdgeChildNodes, + bool & areAnyEdgesMarked) const; + void fill_in_existing_refined_edge_nodes_and_caseIds(const stk::mesh::Entity elem, + const stk::topology & elemTopology, + std::vector & elemEdgeChildNodes, + ElementEdgeCaseIds & elementEdgeCaseIds) const; + void fill_refinement_caseIds_and_refined_edge_nodes_if_changed(const NodeRefiner & nodeRefiner, + const stk::mesh::Entity elem, + const stk::topology & elemTopology, + ElementEdgeCaseIds & elementEdgeCaseIds, + std::vector & elemEdgeChildNodes) const; + bool is_element_a_candidate_for_unrefinement(const stk::mesh::Entity elem) const; + bool is_element_a_candidate_for_refinement(const stk::mesh::Entity elem) const; void fill_existing_element_refined_edge_nodes(const stk::mesh::Entity elem, const stk::topology & elemTopology, const unsigned elemNumEdges, std::vector & elemEdgeChildNodes) const; void mark_unrefined_edges_of_partially_refined_parent_element(const stk::mesh::Entity parentElem, const std::vector & childTransitionElements, std::vector & elemEdgesWorkspace, NodeRefiner & nodeRefiner, bool & wasEdgeMarked) const; void locally_mark_edges_of_partially_refined_parent_elements_with_marked_children(NodeRefiner & nodeRefiner) const; @@ -103,11 +166,10 @@ class TransitionElementEdgeMarker : public ElementBasedEdgeMarker bool can_edge_node_be_unrefined_based_on_locally_owned_elements(const stk::mesh::Entity refinedNode, std::vector & workParentEdgeElements, std::vector & workChildElements) const; - std::vector get_sorted_edge_nodes_that_will_be_removed_by_unrefinement() const; + bool is_parent_element_modified_by_unrefinement(const stk::mesh::Entity parentElem, const std::vector & childElements, - const std::vector & sortedEdgeNodesThatWillBeUnrefined, - std::vector & workElemEdgeChildNodes) const; + const std::vector & sortedOwnedOrSharedNodesToBeRemovedByUnrefinement) const; const stk::mesh::BulkData & myMesh; Refinement & myRefinement; diff --git a/packages/krino/krino/refinement/Akri_TriRefiner.cpp b/packages/krino/krino/refinement/Akri_TriRefiner.cpp index 519d655b7889..0539ba2b297b 100644 --- a/packages/krino/krino/refinement/Akri_TriRefiner.cpp +++ b/packages/krino/krino/refinement/Akri_TriRefiner.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace krino { namespace TriRefiner { @@ -144,27 +145,6 @@ static std::array permutation_side_ordinals_tri3(const unsigned case return permutation; } -template -static void append_child_elements(const std::array & permutedParentNodeOrdinals, - const std::array & permutedParentSideOrdinals, - const std::array,NUMCHILDELEMENTS> & childElementNodeIndices, - const std::array,NUMCHILDELEMENTS> & childElementSideIndices, - std::vector & childElemDescs) -{ - const size_t oldSize = childElemDescs.size(); - childElemDescs.resize(oldSize + NUMCHILDELEMENTS); - for (size_t i=0; i refinement_child_nodes_and_sides_tri3(const unsigned caseId, const std::array & elementNodeCoords, const std::array & elementNodeScore) { std::vector childElemNodes; diff --git a/packages/krino/krino/region/Akri_Region.cpp b/packages/krino/krino/region/Akri_Region.cpp index e3c284db8c5b..55ddc0889cd0 100644 --- a/packages/krino/krino/region/Akri_Region.cpp +++ b/packages/krino/krino/region/Akri_Region.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -99,6 +100,8 @@ void Region::commit() CDFEM_Support & cdfem_support = CDFEM_Support::get(meta); RefinementSupport & refinementSupport = RefinementSupport::get(meta); + myPostProcessors.commit(meta); + if (krino::CDFEM_Support::is_active(meta)) { if (cdfem_mesh_displacements_requested_in_results_fields(CDFEM_Support::cdfem_mesh_displacements_field_name(), my_results_options->get_nodal_fields())) @@ -113,18 +116,17 @@ void Region::commit() auto & active_part = AuxMetaData::get(meta).active_part(); stk::mesh::BulkData::AutomaticAuraOption auto_aura_option = stk::mesh::BulkData::NO_AUTO_AURA; - if (refinementSupport.get_initial_refinement_levels() > 0 || refinementSupport.get_interface_maximum_refinement_level() > 0 || - (krino::CDFEM_Support::is_active(meta) && cdfem_support.get_post_cdfem_refinement_levels() > 0)) + if (cdfem_support.get_cdfem_edge_degeneracy_handling() == SNAP_TO_INTERFACE_WHEN_QUALITY_ALLOWS_THEN_SNAP_TO_NODE) { auto_aura_option = stk::mesh::BulkData::AUTO_AURA; - RefinementInterface & refinement = KrinoRefinement::create(meta); - refinementSupport.set_non_interface_conforming_refinement(refinement); + cdfem_support.register_cdfem_snap_displacements_field(); } - if (cdfem_support.get_cdfem_edge_degeneracy_handling() == SNAP_TO_INTERFACE_WHEN_QUALITY_ALLOWS_THEN_SNAP_TO_NODE) + if (refinementSupport.get_initial_refinement_levels() > 0 || refinementSupport.get_interface_maximum_refinement_level() > 0 || + (krino::CDFEM_Support::is_active(meta) && cdfem_support.get_post_cdfem_refinement_levels() > 0)) { - auto_aura_option = stk::mesh::BulkData::AUTO_AURA; - cdfem_support.register_cdfem_snap_displacements_field(); + RefinementInterface & refinement = KrinoRefinement::create(meta); + refinementSupport.set_non_interface_conforming_refinement(refinement); } if (krino::CDFEM_Support::is_active(meta)) @@ -329,7 +331,7 @@ void do_post_adapt_uniform_refinement(const Simulation & simulation, const Refin // the transition elements are handled. auto & refinement = refinementSupport.get_non_interface_conforming_refinement(); const int num_levels = refinementSupport.get_post_adapt_refinement_levels(); - FieldRef marker_field = refinement.get_marker_field(); + FieldRef marker_field = refinement.get_marker_field_and_sync_to_host(); std::function marker_function = [&mesh, marker_field, num_levels] @@ -432,17 +434,10 @@ void Region::initialize() for(auto&& ls : surfaceManager.get_levelsets()) { ls->initialize(0.); - if (my_simulation.is_transient()) { // initialize does not end with the facets constructed so manually construct them now - ls->build_facets_locally(mesh_meta_data().universal_part()); - - // debugging - if (krinolog.shouldPrint(LOG_FACETS)) - { - ls->write_facets(); - } + ls->build_initial_facets(0.); } } } @@ -478,15 +473,16 @@ void Region::execute() mesh_topology_has_changed(); } - const double deltaTime = time_step(); - const double timeN = get_current_time(); - const double timeNP1 = timeN + deltaTime; + const double timeN = get_old_time(); + const double timeNp1 = get_current_time(); const Surface_Manager & surfaceManager = Surface_Manager::get(mesh_meta_data()); for(auto&& ls : surfaceManager.get_levelsets()) { - ls->advance_semilagrangian(timeN, timeNP1); + ls->advance_semilagrangian(timeN, timeNp1); } + + myPostProcessors.postprocess(mesh_bulk_data(), AuxMetaData::get(mesh_meta_data()).get_current_coordinates(), timeNp1); } unsigned Region::spatial_dimension() const { return mesh_meta_data().spatial_dimension(); } @@ -496,6 +492,7 @@ const stk::mesh::MetaData& Region::mesh_meta_data() const { return myMesh->meta_ stk::mesh::MetaData& Region::mesh_meta_data() { return myMesh->meta_data(); } double Region::time_step() const { return my_simulation.get_time_step(); } double Region::get_current_time() const { return my_simulation.get_current_time(); } +double Region::get_old_time() const { return my_simulation.get_old_time(); } stk::io::StkMeshIoBroker & Region::stkOutput() { @@ -528,7 +525,6 @@ void Region::create_output_mesh() fileProperties.add(Ioss::Property("state_offset", stateCount)); } - stkOutput().use_simple_fields(); const auto oldOutputFileIndex = myOutputFileIndex; myOutputFileIndex = stkOutput().create_output_mesh(filename, stk::io::WRITE_RESULTS, fileProperties); diff --git a/packages/krino/krino/region/Akri_Region.hpp b/packages/krino/krino/region/Akri_Region.hpp index 77fe62d8dee5..f8bff638a888 100644 --- a/packages/krino/krino/region/Akri_Region.hpp +++ b/packages/krino/krino/region/Akri_Region.hpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace stk { namespace mesh { class MetaData; } } namespace stk { namespace mesh { class BulkData; } } @@ -35,6 +36,7 @@ class Region { virtual void execute(); double time_step() const; + double get_old_time() const; double get_current_time() const; const std::string & name() const { return my_name; } unsigned spatial_dimension() const; @@ -54,9 +56,11 @@ class Region { void process_output(bool forceOutput); ResultsOutputOptions * get_results_options() { return my_results_options.get(); } void mesh_topology_has_changed() { myIsOutputFileCreatedAndCurrent = false; } + PostProcessors & get_postprocessors() { return myPostProcessors; } private: Simulation & my_simulation; + PostProcessors myPostProcessors; std::unique_ptr myMesh; std::unique_ptr myOutputBroker; std::unique_ptr my_results_options; diff --git a/packages/krino/krino/region/Akri_Simulation.cpp b/packages/krino/krino/region/Akri_Simulation.cpp index 96c34459005d..8fc989cc8d5c 100644 --- a/packages/krino/krino/region/Akri_Simulation.cpp +++ b/packages/krino/krino/region/Akri_Simulation.cpp @@ -77,6 +77,7 @@ void Simulation::execute() while ( my_current_time < my_stop_time ) { static const double sqrt_epsilon = std::sqrt(std::numeric_limits::epsilon()); + my_old_time = my_current_time; if (my_current_time+my_time_step_size > my_stop_time*(1.-sqrt_epsilon)) { my_time_step_size = my_stop_time - my_current_time; diff --git a/packages/krino/krino/region/Akri_Simulation.hpp b/packages/krino/krino/region/Akri_Simulation.hpp index 8a9658de224f..ddb39c7b271f 100644 --- a/packages/krino/krino/region/Akri_Simulation.hpp +++ b/packages/krino/krino/region/Akri_Simulation.hpp @@ -33,6 +33,7 @@ class Simulation { const std::string & get_name() const { return my_name; } bool is_transient() const { return my_is_transient; } double get_time_step() const { return my_time_step_size; } + double get_old_time() const { return my_old_time; } double get_current_time() const { return my_current_time; } double get_stop_time() const { return my_stop_time; } int get_time_step_count() const { return my_step_count; } @@ -50,6 +51,7 @@ class Simulation { bool my_is_transient; double my_stop_time; unsigned my_step_count; + double my_old_time; double my_current_time; double my_time_step_size; std::vector> my_regions; diff --git a/packages/krino/krino/surface/Akri_Facet.cpp b/packages/krino/krino/surface/Akri_Facet.cpp index d8145e714b72..5efd822c0b7f 100644 --- a/packages/krino/krino/surface/Akri_Facet.cpp +++ b/packages/krino/krino/surface/Akri_Facet.cpp @@ -115,7 +115,6 @@ stk::math::Vector3d FacetWithVelocity3d::velocity_at_closest_point( const stk::m stk::math::Vector3d closestPt; stk::math::Vector2d paramAtClosestPt; closest_point(queryPt, closestPt, paramAtClosestPt); - return myVelocity[0] * (1.-paramAtClosestPt[0]) + myVelocity[1] * paramAtClosestPt[0]; return (1.0-paramAtClosestPt[0]-paramAtClosestPt[1]) * myVelocity[0] + paramAtClosestPt[0] * myVelocity[1] + paramAtClosestPt[1] * myVelocity[2]; } diff --git a/packages/krino/krino/surface/Akri_Faceted_Surface.cpp b/packages/krino/krino/surface/Akri_Faceted_Surface.cpp index ddb5cf19cf3d..b5fbfafb5d4f 100644 --- a/packages/krino/krino/surface/Akri_Faceted_Surface.cpp +++ b/packages/krino/krino/surface/Akri_Faceted_Surface.cpp @@ -38,8 +38,6 @@ static int find_destination_proc_for_facet(const std::vector & proc template static void unpack_and_append_facets_from_proc(stk::CommSparse & commSparse, const int recvProc, std::vector & facetVec) { - std::array facetCoords; - stk::CommBuffer & b = commSparse.recv_buffer(recvProc); if (b.remaining()) { diff --git a/packages/krino/krino/surface/Akri_Faceted_Surface.hpp b/packages/krino/krino/surface/Akri_Faceted_Surface.hpp index 87cc0af53f87..87d634eff179 100644 --- a/packages/krino/krino/surface/Akri_Faceted_Surface.hpp +++ b/packages/krino/krino/surface/Akri_Faceted_Surface.hpp @@ -37,7 +37,9 @@ class FacetedSurfaceBase : public SurfaceThatTakesAdvantageOfNarrowBandAndTheref virtual size_t size() const = 0; virtual size_t nonlocal_size() const = 0; virtual double point_distance(const stk::math::Vector3d &x, const double narrow_band_size, const double far_field_value, const bool compute_signed_distance) const = 0; - virtual void prepare_to_compute(const double time, const BoundingBox & point_bbox, const double truncation_length) = 0; + void prepare_to_compute(const double time, + const BoundingBox & point_bbox, + const double truncation_length) override = 0; virtual std::string print_sizes() const = 0; virtual stk::math::Vector3d closest_point(const stk::math::Vector3d &x) const = 0; @@ -100,7 +102,7 @@ class Faceted_Surface : public FacetedSurfaceBase virtual void swap(FacetedSurfaceBase & other) override; virtual size_t nonlocal_size() const override { return myNonLocalFacets.size(); } - virtual std::string print_sizes() const; + std::string print_sizes() const override; void parallel_distribute_facets(const size_t batch_size, const std::vector & proc_bboxes); double point_distance(const stk::math::Vector3d &x, const double narrow_band_size, const double far_field_value, const bool compute_signed_distance) const override; diff --git a/packages/krino/krino/surface/Akri_String_Function_Expression.cpp b/packages/krino/krino/surface/Akri_String_Function_Expression.cpp index 4a1e89e3cb46..957d0f1798c2 100644 --- a/packages/krino/krino/surface/Akri_String_Function_Expression.cpp +++ b/packages/krino/krino/surface/Akri_String_Function_Expression.cpp @@ -74,4 +74,12 @@ String_Function_Expression::evaluate(const stk::math::Vector3d &coord) const return myEvaluator.evaluate(); } +void initialize_expression_vector(const std::vector & stringVec, std::vector & exprVec) +{ + exprVec.clear(); + exprVec.reserve(stringVec.size()); + for (auto & component : stringVec) + exprVec.emplace_back(component); +} + } diff --git a/packages/krino/krino/surface/Akri_String_Function_Expression.hpp b/packages/krino/krino/surface/Akri_String_Function_Expression.hpp index 33c4236f73ba..7ba89c1f1b5d 100644 --- a/packages/krino/krino/surface/Akri_String_Function_Expression.hpp +++ b/packages/krino/krino/surface/Akri_String_Function_Expression.hpp @@ -18,6 +18,8 @@ class String_Function_Expression : public stk::expreval::VariableMap::Resolver { public: String_Function_Expression(const std::string & expression); + String_Function_Expression (const String_Function_Expression&) { throw std::runtime_error("copying String_Function_Expression not allowed because it will not be resolved correctly."); } + String_Function_Expression& operator= (const String_Function_Expression&) { throw std::runtime_error("copying String_Function_Expression not allowed because it will not be resolved correctly."); } void resolve(stk::expreval::VariableMap::iterator & varIt) override; double evaluate(const stk::math::Vector3d &coords) const; double evaluate(const double time, const stk::math::Vector3d &coord) const; @@ -29,6 +31,8 @@ class String_Function_Expression : public stk::expreval::VariableMap::Resolver mutable stk::math::Vector3d myQueryCoords{stk::math::Vector3d::ZERO}; }; +void initialize_expression_vector(const std::vector & stringVec, std::vector & exprVec); + } #endif /* KRINO_KRINO_KRINO_LIB_AKRI_STRING_FUNCTION_EXPRESSION_HPP_ */ diff --git a/packages/krino/krino/unit_tests/Akri_MeshSpecs.hpp b/packages/krino/krino/unit_tests/Akri_MeshSpecs.hpp index a05adca8d999..1be986223d11 100644 --- a/packages/krino/krino/unit_tests/Akri_MeshSpecs.hpp +++ b/packages/krino/krino/unit_tests/Akri_MeshSpecs.hpp @@ -8,6 +8,36 @@ namespace krino { +struct RegularBeam +{ + RegularBeam() = default; + static constexpr stk::topology::topology_t TOPOLOGY = stk::topology::BEAM_2; + std::vector nodeLocs + {{ + {-0.500, 0.000, 0.000 }, + { 0.500, 0.000, 0.000 }, + }}; + + std::array BeamConn{{0, 1}}; + std::vector> allElementConn{BeamConn}; +}; + +struct UMRRegularBeam +{ + UMRRegularBeam() = default; + static constexpr stk::topology::topology_t TOPOLOGY = stk::topology::BEAM_2; + std::vector nodeLocs + {{ + {-0.500, 0.000, 0.000 }, + { 0.000, 0.000, 0.000 }, + { 0.500, 0.000, 0.000 }, + }}; + + std::array Beam1Conn{{0, 1}}; + std::array Beam2Conn{{1, 2}}; + std::vector> allElementConn{Beam1Conn, Beam2Conn}; +}; + struct RegularTri { RegularTri() = default; @@ -156,6 +186,32 @@ struct QuadSplit4Tri std::vector> allElementConn{Tri1Conn, Tri2Conn, Tri3Conn, Tri4Conn }; }; +struct QuadSplit4TriAndQuadSplit2Tri +{ + QuadSplit4TriAndQuadSplit2Tri() = default; + static constexpr stk::topology::topology_t TOPOLOGY = stk::topology::TRIANGLE_3_2D; + std::vector nodeLocs + {{ + { -0.500, -0.500 }, + { 0.500, -0.500 }, + { 0.500, 0.500 }, + { -0.500, 0.500 }, + { 0, 0 }, + { 1., 0.5 }, + { 1., 1. }, + { 0.5, 1. } + }}; + + std::array Tri1Conn{{0, 1, 4}}; + std::array Tri2Conn{{1, 2, 4}}; + std::array Tri3Conn{{2, 3, 4}}; + std::array Tri4Conn{{3, 0, 4}}; + std::array Tri5Conn{{2, 5, 6}}; + std::array Tri6Conn{{2, 6, 7}}; + std::vector> allElementConn{Tri1Conn, Tri2Conn, Tri3Conn, Tri4Conn, Tri5Conn, Tri6Conn }; +}; + + struct FourDisconnectedTris { FourDisconnectedTris() = default; @@ -428,6 +484,112 @@ struct PatchOfRegularTrisAroundNode std::vector> allElementConn{Tri1Conn, Tri2Conn, Tri3Conn, Tri4Conn, Tri5Conn, Tri6Conn}; }; +struct RegularQuad +{ + RegularQuad() = default; + static constexpr stk::topology::topology_t TOPOLOGY = stk::topology::QUADRILATERAL_4_2D; + std::vector nodeLocs + {{ + { 0.0, 0.0 }, + { 1.0, 0.0 }, + { 1.0, 1.0 }, + { 0.0, 1.0 }, + }}; + + std::array QuadConn{{0, 1, 2, 3}}; + std::vector> allElementConn{QuadConn}; +}; + +struct UMRRegularQuad +{ + UMRRegularQuad() = default; + static constexpr stk::topology::topology_t TOPOLOGY = stk::topology::QUADRILATERAL_4_2D; + std::vector nodeLocs + {{ + { 0.0, 0.0 }, + { 1.0, 0.0 }, + { 1.0, 1.0 }, + { 0.0, 1.0 }, + { 0.5, 0.0 }, + { 1.0, 0.5 }, + { 0.5, 1.0 }, + { 0.0, 0.5 }, + { 0.5, 0.5 } + }}; + + std::array Quad1Conn{{0, 4, 8, 7}}; + std::array Quad2Conn{{1, 5, 8, 4}}; + std::array Quad3Conn{{2, 6, 8, 5}}; + std::array Quad4Conn{{3, 7, 8, 6}}; + std::vector> allElementConn{Quad1Conn, Quad2Conn, Quad3Conn, Quad4Conn}; +}; + +struct RegularHex +{ + RegularHex() = default; + static constexpr stk::topology::topology_t TOPOLOGY = stk::topology::HEXAHEDRON_8; + std::vector nodeLocs + {{ + { 0.0, 0.0, 0.0 }, + { 1.0, 0.0, 0.0 }, + { 1.0, 1.0, 0.0 }, + { 0.0, 1.0, 0.0 }, + { 0.0, 0.0, 1.0 }, + { 1.0, 0.0, 1.0 }, + { 1.0, 1.0, 1.0 }, + { 0.0, 1.0, 1.0 }, + }}; + + std::array HexConn{{0, 1, 2, 3, 4, 5, 6, 7}}; + std::vector> allElementConn{HexConn}; +}; + +struct UMRRegularHex +{ + UMRRegularHex() = default; + static constexpr stk::topology::topology_t TOPOLOGY = stk::topology::HEXAHEDRON_8; + std::vector nodeLocs + {{ + { 0.0, 0.0, 0.0 }, + { 1.0, 0.0, 0.0 }, + { 1.0, 1.0, 0.0 }, + { 0.0, 1.0, 0.0 }, + { 0.0, 0.0, 1.0 }, + { 1.0, 0.0, 1.0 }, + { 1.0, 1.0, 1.0 }, + { 0.0, 1.0, 1.0 }, + { 0.5, 0.0, 0.0 }, + { 1.0, 0.5, 0.0 }, + { 0.5, 1.0, 0.0 }, + { 0.0, 0.5, 0.0 }, + { 0.0, 0.0, 0.5 }, + { 1.0, 0.0, 0.5 }, + { 1.0, 1.0, 0.5 }, + { 0.0, 1.0, 0.5 }, + { 0.5, 0.0, 1.0 }, + { 1.0, 0.5, 1.0 }, + { 0.5, 1.0, 1.0 }, + { 0.0, 0.5, 1.0 }, + { 0.5, 0.5, 0.5 }, + { 0.5, 0.5, 0.0 }, + { 0.5, 0.5, 1.0 }, + { 0.0, 0.5, 0.5 }, + { 1.0, 0.5, 0.5 }, + { 0.5, 0.0, 0.5 }, + { 0.5, 1.0, 0.5 }, + }}; + + std::array Hex1Conn{{0,8,21,11,12,25,20,23}}; + std::array Hex2Conn{{1,9,21,8,13,24,20,25}}; + std::array Hex3Conn{{2,10,21,9,14,26,20,24}}; + std::array Hex4Conn{{3,11,21,10,15,23,20,26}}; + std::array Hex5Conn{{12,25,20,23,4,16,22,19}}; + std::array Hex6Conn{{13,24,20,25,5,17,22,16}}; + std::array Hex7Conn{{14,26,20,24,6,18,22,17}}; + std::array Hex8Conn{{15,23,20,26,7,19,22,18}}; + std::vector> allElementConn{Hex1Conn, Hex2Conn, Hex3Conn, Hex4Conn, Hex5Conn, Hex6Conn, Hex7Conn, Hex8Conn}; +}; + } diff --git a/packages/krino/krino/unit_tests/Akri_StkMeshBuilder.cpp b/packages/krino/krino/unit_tests/Akri_StkMeshBuilder.cpp index 7f2058d268b6..85a6ba7478aa 100644 --- a/packages/krino/krino/unit_tests/Akri_StkMeshBuilder.cpp +++ b/packages/krino/krino/unit_tests/Akri_StkMeshBuilder.cpp @@ -18,7 +18,20 @@ StkMeshBuilder::StkMeshBuilder(stk::mesh::BulkData & mesh, const stk::Para : mMesh(mesh), mAuxMeta(AuxMetaData::create(mesh.mesh_meta_data())), mPhaseSupport(Phase_Support::get(mesh.mesh_meta_data())), mComm(comm), time(0.0) { declare_coordinates(); - mMesh.mesh_meta_data().use_simple_fields(); +} + +template +std::vector StkMeshBuilder::get_processor_distribution_for_num_elements(const unsigned numElements) const +{ + std::vector elemOwners(numElements); + int elemOwner = 0; + for (unsigned iElem=0; iElem @@ -56,15 +69,27 @@ std::string get_surface_name(const unsigned sidesetId) return surfaceName; } +template +const stk::mesh::Part & StkMeshBuilder::get_sideset_part(const unsigned sidesetId) +{ + stk::mesh::Part * sidesetPart = mMesh.mesh_meta_data().get_part(get_surface_name(sidesetId)); + STK_ThrowRequireMsg(sidesetPart, "No sideset with id " << sidesetId); + return *sidesetPart; +} + +template +void StkMeshBuilder::create_sideset_part(const unsigned sidesetId) +{ + stk::mesh::Part &sidesetPart = mMesh.mesh_meta_data().declare_part(get_surface_name(sidesetId), mMesh.mesh_meta_data().side_rank()); + mMesh.mesh_meta_data().set_part_id(sidesetPart, sidesetId); + stk::io::put_io_part_attribute(sidesetPart); +} + template void StkMeshBuilder::create_sideset_parts(const std::vector &sidesetIds) { for (unsigned sidesetId : sidesetIds) - { - stk::mesh::Part &sidesetPart = mMesh.mesh_meta_data().declare_part(get_surface_name(sidesetId), mMesh.mesh_meta_data().side_rank()); - mMesh.mesh_meta_data().set_part_id(sidesetPart, sidesetId); - stk::io::put_io_part_attribute(sidesetPart); - } + create_sideset_part(sidesetId); } template @@ -465,9 +490,10 @@ void StkMeshBuilder::write_mesh(const std::string & fileName) } // Explicit template instantiation +template class StkMeshBuilder; template class StkMeshBuilder; template class StkMeshBuilder; template class StkMeshBuilder; - +template class StkMeshBuilder; } diff --git a/packages/krino/krino/unit_tests/Akri_StkMeshBuilder.hpp b/packages/krino/krino/unit_tests/Akri_StkMeshBuilder.hpp index 19fd433e40e9..34a68999f6ab 100644 --- a/packages/krino/krino/unit_tests/Akri_StkMeshBuilder.hpp +++ b/packages/krino/krino/unit_tests/Akri_StkMeshBuilder.hpp @@ -43,6 +43,7 @@ class StkMeshBuilder const std::vector &elementBlockIDs, const std::vector &specifiedElementProcOwners); + std::vector get_processor_distribution_for_num_elements(const unsigned numElements) const; const std::vector & get_owned_elements() const { return mOwnedElems; } stk::mesh::Entity get_assigned_node_for_index(const size_t nodeIndex) const { return mMesh.get_entity(stk::topology::NODE_RANK, mAssignedGlobalNodeIdsforAllNodes[nodeIndex]); } const std::vector & get_assigned_node_global_ids() const { return mAssignedGlobalNodeIdsforAllNodes; } @@ -59,6 +60,7 @@ class StkMeshBuilder Phase_Support & get_phase_support() { return mPhaseSupport; } const Phase_Support & get_phase_support() const { return mPhaseSupport; } + void create_sideset_part(const unsigned sidesetId); void create_sideset_parts(const std::vector &sidesetIds); void create_sideset_parts(const std::vector &sideIdsAndNodeOfSides); void add_sides_to_sidesets(const std::vector &sideIdsAndNodeOfSides); @@ -68,6 +70,7 @@ class StkMeshBuilder stk::math::Vector3d get_node_coordinates(const stk::mesh::Entity node) const; const stk::mesh::FieldBase & get_coordinates_field() const; const stk::mesh::PartVector & get_block_parts() const { return mBlockParts; } + const stk::mesh::Part & get_sideset_part(const unsigned sidesetId); void write_mesh(const std::string & fileName); private: diff --git a/packages/krino/krino/unit_tests/Akri_StkMeshFixture.hpp b/packages/krino/krino/unit_tests/Akri_StkMeshFixture.hpp index c8b21a1eae33..bfa8ebf7ed3e 100644 --- a/packages/krino/krino/unit_tests/Akri_StkMeshFixture.hpp +++ b/packages/krino/krino/unit_tests/Akri_StkMeshFixture.hpp @@ -25,6 +25,8 @@ class StkMeshAndBuilder stk::mesh::BulkData & get_mesh() { return mMesh; } const stk::mesh::BulkData & get_mesh() const { return mMesh; } stk::ParallelMachine get_comm() const { return mComm; } + int parallel_size() const { return stk::parallel_machine_size(mComm); } + int parallel_rank() const { return stk::parallel_machine_rank(mComm); } AuxMetaData & get_aux_meta() { return mBuilder.get_aux_meta(); } const AuxMetaData & get_aux_meta() const { return mBuilder.get_aux_meta(); } const std::vector & get_assigned_node_global_ids() const { return mBuilder.get_assigned_node_global_ids(); } @@ -45,7 +47,6 @@ class StkMeshAndBuilder void build_mesh(const std::vector> &nodeLocs, const std::vector>> &elemConnPerProc) { - mMesh.mesh_meta_data().use_simple_fields(); mBuilder.build_mesh(nodeLocs, elemConnPerProc, theBlockId); } @@ -54,7 +55,6 @@ class StkMeshAndBuilder const std::vector &elementBlockIDs, const std::vector &specifiedElementProcOwners = {}) { - mMesh.mesh_meta_data().use_simple_fields(); mBuilder.build_mesh(nodeLocs, elementConn, elementBlockIDs, specifiedElementProcOwners); } @@ -74,10 +74,18 @@ class StkMeshAndBuilder template class StkMeshFixture : public ::testing::Test, public StkMeshAndBuilder { +public: + void set_valid_proc_sizes_for_test(const std::vector & procSizes) { mTestProcSizes = procSizes; } + bool is_valid_proc_size_for_test() const { STK_ThrowRequireMsg(!mTestProcSizes.empty(), "Valid proc sizes not set for test."); return std::find(mTestProcSizes.begin(), mTestProcSizes.end(), this->parallel_size()) != mTestProcSizes.end(); } +protected: + std::vector mTestProcSizes; }; +typedef StkMeshFixture StkMeshBeamFixture; typedef StkMeshFixture StkMeshTetFixture; typedef StkMeshFixture StkMeshTriFixture; +typedef StkMeshFixture StkMeshQuadFixture; +typedef StkMeshFixture StkMeshHexFixture; typedef StkMeshAndBuilder StkTetMeshAndBuilder; typedef StkMeshAndBuilder StkTriMeshAndBuilder; diff --git a/packages/krino/krino/unit_tests/Akri_UnitMathUtils.cpp b/packages/krino/krino/unit_tests/Akri_UnitMathUtils.cpp index 8df8c1a1f209..5cff4a51c929 100644 --- a/packages/krino/krino/unit_tests/Akri_UnitMathUtils.cpp +++ b/packages/krino/krino/unit_tests/Akri_UnitMathUtils.cpp @@ -60,5 +60,24 @@ TEST(find_root_newton_raphson, givenPolynomialFunctionWithWRONGJacobian_findRoot expect_root_newton_raphson(0.25, guess, tol, [error](const double x){ std::cout << "Eval at " << x << std::endl; return std::make_pair(x*x*x-0.25*0.25*0.25, 3.*x*x*error); }); } +void expect_quadratic_crossing(const double gold, const std::array & edgeVals) +{ + EXPECT_NEAR(gold, find_quadratic_crossing(edgeVals[0],edgeVals[1],edgeVals[2]), 1.e-6); +} + +std::array compute_edge_values(const double crossing1, const double crossing2) +{ + std::array edgeVals = {{(0.-crossing1)*(0.-crossing2), (1.-crossing1)*(1.-crossing2), (0.5-crossing1)*(0.5-crossing2)}}; + return edgeVals; +} + +TEST(compute_edge_values, singleCrossings) +{ + expect_quadratic_crossing(0.25, compute_edge_values(0.25, -0.25)); + expect_quadratic_crossing(0.25, compute_edge_values(0.25, 1.25)); + expect_quadratic_crossing(0.33, compute_edge_values(0.33, -0.25)); + expect_quadratic_crossing(0.77, compute_edge_values(0.77, 1.25)); +} + } diff --git a/packages/krino/krino/unit_tests/Akri_Unit_DecomposeWithSensitivities.cpp b/packages/krino/krino/unit_tests/Akri_Unit_DecomposeWithSensitivities.cpp index 081e9a0087f2..8fc2ffe12023 100644 --- a/packages/krino/krino/unit_tests/Akri_Unit_DecomposeWithSensitivities.cpp +++ b/packages/krino/krino/unit_tests/Akri_Unit_DecomposeWithSensitivities.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -78,23 +79,6 @@ void expect_part_counts(const stk::mesh::BulkData & mesh, const std::map> & spheres) -{ - const krino::FieldRef coordsField = mesh.mesh_meta_data().coordinate_field(); - krino::Composite_Surface initializationSurfaces("initialization surfaces"); - for (auto & sphere : spheres) - initializationSurfaces.add(new krino::Sphere(sphere.first, sphere.second)); - compute_nodal_surface_distance(mesh, coordsField, levelSetField, initializationSurfaces); -} - -void initialize_levelset_field_for_plane(const stk::mesh::BulkData & mesh, krino::FieldRef levelSetField, const stk::math::Vector3d & normal, const double offset) -{ - const krino::FieldRef coordsField = mesh.mesh_meta_data().coordinate_field(); - krino::Composite_Surface initializationSurfaces("initialization surfaces"); - initializationSurfaces.add(new krino::Plane(normal.data(), offset, 1.0)); - compute_nodal_surface_distance(mesh, coordsField, levelSetField, initializationSurfaces); -} - void test_sensitivity_for_plane(const krino::LevelSetShapeSensitivity & sens, const int iPlaneCoord) { double sum = 0; @@ -128,7 +112,7 @@ TEST(DecomposeMeshAndComputeSensitivities, createDecomposedMeshForPlaneNotThroug generate_bounding_box_mesh(bboxMesh, {0.,0.,0.}, {1.,1.,1.}, 0.3333); - initialize_levelset_field_for_plane(bboxMesh.bulk_data(), lsFields[0].isovar, {1.,0.,0.}, -0.2); + compute_nodal_distance_from_plane(bboxMesh.bulk_data(), bboxMesh.meta_data().coordinate_field(), lsFields[0].isovar, {1.,0.,0.}, -0.2); decompose_mesh_to_conform_to_levelsets(bboxMesh.bulk_data(), lsFields); @@ -150,7 +134,7 @@ TEST(DecomposeMeshAndComputeSensitivities, createDecomposedMeshForPlaneThrowSome generate_bounding_box_mesh(bboxMesh, {0.,0.,0.}, {1.,1.,1.}, 1.0); - initialize_levelset_field_for_plane(bboxMesh.bulk_data(), lsFields[0].isovar, {1.,0.,0.}, -0.5); + compute_nodal_distance_from_plane(bboxMesh.bulk_data(), bboxMesh.meta_data().coordinate_field(), lsFields[0].isovar, {1.,0.,0.}, -0.5); decompose_mesh_to_conform_to_levelsets(bboxMesh.bulk_data(), lsFields); @@ -163,6 +147,64 @@ TEST(DecomposeMeshAndComputeSensitivities, createDecomposedMeshForPlaneThrowSome output_mesh(bboxMesh.bulk_data(), "output.e", 1, 0.0); } +stk::mesh::PartVector get_nonvoid_parts_of_rank(const stk::mesh::MetaData & meta, const stk::mesh::EntityRank entityRank) +{ + stk::mesh::PartVector parts; + for (auto * part : meta.get_parts()) + if (stk::io::is_part_io_part(*part) && part->primary_entity_rank() == entityRank && part->name().find("_void") == std::string::npos) + parts.push_back(part); + return parts; +} + +stk::mesh::Selector get_nonvoid_selector(const stk::mesh::MetaData & meta, const stk::mesh::EntityRank entityRank) +{ + return stk::mesh::selectUnion(get_nonvoid_parts_of_rank(meta, entityRank)); +} + +std::map get_nonvoid_to_void_part_ordinal_map(const stk::mesh::MetaData & meta) +{ + const krino::Phase_Support & phaseSupport = krino::Phase_Support::get(meta); + + std::map partOrdinalMapping; + for (auto * part : meta.get_parts()) + { + if ((part->primary_entity_rank() == stk::topology::ELEMENT_RANK || part->primary_entity_rank() == meta.side_rank()) && + stk::io::is_part_io_part(*part)) + { + if (phaseSupport.is_interface(part)) + { + partOrdinalMapping[part->mesh_meta_data_ordinal()] = -1; // negative value used to indicate that we will remove interface parts + } + else if (part->name().find("_void") == std::string::npos) + { + stk::mesh::Part * voidPart = meta.get_part(part->name() + "_void"); + if (voidPart) + partOrdinalMapping[part->mesh_meta_data_ordinal()] = voidPart->mesh_meta_data_ordinal(); + } + } + } + return partOrdinalMapping; +} + +void move_disconnected_elements_to_void(stk::mesh::BulkData & mesh) +{ + const stk::mesh::Selector sideSelector = get_nonvoid_selector(mesh.mesh_meta_data(), mesh.mesh_meta_data().side_rank()); + const stk::mesh::Selector elementSelector = get_nonvoid_selector(mesh.mesh_meta_data(), stk::topology::ELEMENT_RANK); + const std::vector unattachedElems = krino::get_selected_owned_side_unattached_elements(mesh, elementSelector, sideSelector); + + const std::map partOrdinalMapping = get_nonvoid_to_void_part_ordinal_map(mesh.mesh_meta_data()); + krino::batch_convert_elements_and_their_sides(mesh, partOrdinalMapping, unattachedElems); +} + +void move_elements_not_in_largest_group_to_void(stk::mesh::BulkData & mesh) +{ + const stk::mesh::Selector elementSelector = get_nonvoid_selector(mesh.mesh_meta_data(), stk::topology::ELEMENT_RANK); + const std::vector elemsNotInLargestGroup = krino::find_owned_elements_that_are_not_in_the_largest_group_of_selected_side_attached_elements(mesh, elementSelector); + + const std::map partOrdinalMapping = get_nonvoid_to_void_part_ordinal_map(mesh.mesh_meta_data()); + krino::batch_convert_elements_and_their_sides(mesh, partOrdinalMapping, elemsNotInLargestGroup); +} + TEST(DecomposeMeshAndComputeSensitivities, readMeshInitializeDecomposeResetInitializeDecompose) { const std::string initialMeshName = "mesh.g"; @@ -190,7 +232,7 @@ TEST(DecomposeMeshAndComputeSensitivities, readMeshInitializeDecomposeResetIniti { {0.7,0.7,0.7}, 0.25 }, }; - initialize_levelset_field_for_spheres(meshFromFile.bulk_data(), lsFields[0].isovar, spheres); + compute_nodal_distance_from_spheres(meshFromFile.bulk_data(), meshFromFile.meta_data().coordinate_field(), lsFields[0].isovar, spheres); decompose_mesh_to_conform_to_levelsets(meshFromFile.bulk_data(), lsFields); @@ -207,7 +249,7 @@ TEST(DecomposeMeshAndComputeSensitivities, readMeshInitializeDecomposeResetIniti if (doWriteMesh) output_mesh(meshFromFile.bulk_data(), "reset.e", 1, 1.0); - initialize_levelset_field_for_spheres(meshFromFile.bulk_data(), lsFields[0].isovar, spheres ); + compute_nodal_distance_from_spheres(meshFromFile.bulk_data(), meshFromFile.meta_data().coordinate_field(), lsFields[0].isovar, spheres ); decompose_mesh_to_conform_to_levelsets(meshFromFile.bulk_data(), lsFields); @@ -216,3 +258,66 @@ TEST(DecomposeMeshAndComputeSensitivities, readMeshInitializeDecomposeResetIniti expect_part_counts(meshFromFile.bulk_data(), decompPartCounts); } + +void test_moving_islands_to_void(const std::function & island_removal_method) +{ + const std::string initialMeshName = "mesh.g"; + generate_and_write_bounding_box_mesh(stk::topology::TETRAHEDRON_4, {0.,0.,0.}, {1.,1.,1.}, 0.3333, initialMeshName); + + krino::MeshFromFile meshFromFile(initialMeshName, MPI_COMM_WORLD); + + const std::vector lsFields = krino::LSPerInterfacePolicy::setup_levelsets_on_all_blocks_with_void_phase_for_any_negative_levelset(meshFromFile.meta_data(), 1); + setup_fields_for_conforming_decomposition(meshFromFile.meta_data()); + + meshFromFile.populate_mesh(); + krino::activate_all_entities(meshFromFile.bulk_data(), krino::AuxMetaData::get(meshFromFile.meta_data()).active_part()); + + const std::vector> oneSphere + { + { {0.0,0.0,0.0}, 0.7 }, + }; + + compute_nodal_distance_from_spheres(meshFromFile.bulk_data(), meshFromFile.meta_data().coordinate_field(), lsFields[0].isovar, oneSphere, -1); + + decompose_mesh_to_conform_to_levelsets(meshFromFile.bulk_data(), lsFields); + + const bool doWriteMesh = false; + if (doWriteMesh) + output_mesh(meshFromFile.bulk_data(), "output1.e", 1, 0.0); + + const stk::mesh::Part & block_1 = *meshFromFile.meta_data().get_part("block_1"); + const size_t numElementsInBlockFromOneSphere = krino::get_global_num_entities(meshFromFile.bulk_data(), block_1); + + krino::CDMesh::reset_mesh_to_original_undecomposed_state(meshFromFile.bulk_data()); + + const std::vector> oneConnectedAndOneDisconnectedSphere + { + { {0.0,0.0,0.0}, 0.7 }, + { {0.7,0.7,0.7}, 0.25 } + }; + + compute_nodal_distance_from_spheres(meshFromFile.bulk_data(), meshFromFile.meta_data().coordinate_field(), lsFields[0].isovar, oneConnectedAndOneDisconnectedSphere, -1); + + decompose_mesh_to_conform_to_levelsets(meshFromFile.bulk_data(), lsFields); + + island_removal_method(meshFromFile.bulk_data()); + + if (doWriteMesh) + output_mesh(meshFromFile.bulk_data(), "output2.e", 1, 1.0); + + const size_t numElementsInBlockAfterDeletingSecondDisconnectedSphere = krino::get_global_num_entities(meshFromFile.bulk_data(), block_1); + + EXPECT_EQ(numElementsInBlockFromOneSphere, numElementsInBlockAfterDeletingSecondDisconnectedSphere); +} + +TEST(DecomposeMeshAndComputeSensitivities, readMeshInitializeDecomposeAndRemoveVolNotConnectedToSide) +{ + auto island_removal_method = [](stk::mesh::BulkData& mesh){ move_disconnected_elements_to_void(mesh); }; + test_moving_islands_to_void(island_removal_method); +} + +TEST(DecomposeMeshAndComputeSensitivities, readMeshInitializeDecomposeAndRemoveVolNotConnectedToLargestVol) +{ + auto island_removal_method = [](stk::mesh::BulkData& mesh){ move_elements_not_in_largest_group_to_void(mesh); }; + test_moving_islands_to_void(island_removal_method); +} diff --git a/packages/krino/krino/unit_tests/Akri_Unit_FastMarching.cpp b/packages/krino/krino/unit_tests/Akri_Unit_FastMarching.cpp new file mode 100644 index 000000000000..5c5b4803dde8 --- /dev/null +++ b/packages/krino/krino/unit_tests/Akri_Unit_FastMarching.cpp @@ -0,0 +1,140 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace krino { + +static FieldRef register_distance_field(stk::mesh::MetaData & meta) +{ + FieldRef distanceField = AuxMetaData::get(meta).register_field("distance", FieldType::REAL, stk::topology::NODE_RANK, 2, 1, meta.universal_part()); + return distanceField; +} + +static void generate_bounding_box_mesh(krino::BoundingBoxMesh & bboxMesh, const stk::math::Vector3d & minCorner, const stk::math::Vector3d & maxCorner, const double meshSize) +{ + bboxMesh.set_domain(krino::BoundingBoxMesh::BoundingBoxType(minCorner, maxCorner), meshSize); + bboxMesh.populate_mesh(); + stk::mesh::BulkData & mesh = bboxMesh.bulk_data(); + krino::activate_all_entities(mesh, krino::AuxMetaData::get(mesh.mesh_meta_data()).active_part()); + populate_stk_local_ids(bboxMesh.bulk_data()); +} + +static double redistance_and_compute_error(const stk::mesh::BulkData & mesh, const FieldRef coordsField, const FieldRef distanceField, const std::function & analytic_fn) +{ + std::function get_interface_speed; + + Fast_Marching fm(mesh, + mesh.mesh_meta_data().universal_part(), + coordsField, + distanceField, + get_interface_speed, + sierra::Diag::sierraTimer()); + fm.redistance(); + + const bool doOutput = false; + if (doOutput) + output_composed_mesh_with_fields(mesh, mesh.mesh_meta_data().universal_part(), "fastMarching.e", 1, 0.); + + const double err = compute_relative_nodal_RMS_error(mesh, coordsField, distanceField, analytic_fn); + return err; +} + +static double build_mesh_initialize_sphere_redistance_and_compute_error(const stk::topology elemTopology, const double meshSize, stk::diag::Timer & testTimer) +{ + stk::diag::TimeBlock timer__(testTimer); + krino::BoundingBoxMesh bboxMesh(elemTopology, MPI_COMM_WORLD); + FieldRef distanceField = register_distance_field(bboxMesh.meta_data()); + generate_bounding_box_mesh(bboxMesh, stk::math::Vector3d{0,0,0}, stk::math::Vector3d{1,1,1}, meshSize); + + const stk::math::Vector3d center{-0.05,-0.05,0}; + const double radius = 0.7; + const std::vector> sphere { { center, radius } }; + auto analytic_fn = [¢er, radius](const stk::math::Vector3d &x) { return (x-center).length() - radius; }; + + const FieldRef coordsField = bboxMesh.meta_data().coordinate_field(); + compute_nodal_distance_from_spheres(bboxMesh.bulk_data(), coordsField, distanceField, sphere); + + return redistance_and_compute_error(bboxMesh.bulk_data(), coordsField, distanceField, analytic_fn); +} + +static void test_fast_marching_error_for_circle_or_sphere(const stk::topology elemTopology, const double coarseMeshSize, const std::vector & goldErrors, stk::diag::Timer & testTimer) +{ + double meshSize = coarseMeshSize; + for(size_t i=0; i bulk = stk::mesh::MeshBuilder(pm).set_spatial_dimension(2).create(); - bulk->mesh_meta_data().use_simple_fields(); return bulk; } diff --git a/packages/krino/krino/unit_tests/Akri_Unit_OutputUtils.cpp b/packages/krino/krino/unit_tests/Akri_Unit_OutputUtils.cpp new file mode 100644 index 000000000000..a33d98412b7e --- /dev/null +++ b/packages/krino/krino/unit_tests/Akri_Unit_OutputUtils.cpp @@ -0,0 +1,80 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void put_node_with_id_into_nodeset(stk::mesh::BulkData & mesh, stk::mesh::Part & nodeset, const stk::mesh::EntityId id) +{ + const stk::mesh::Entity node = mesh.get_entity(stk::topology::NODE_RANK, 1); + mesh.modification_begin(); + if (mesh.is_valid(node) && mesh.bucket(node).owned()) + mesh.change_entity_parts(node, stk::mesh::PartVector{&nodeset}); + mesh.modification_end(); +} + +int get_num_nodes_in_exodus_file(const std::string & filename) +{ + int numNodes = 0; + if ( 0 == stk::EnvData::parallel_rank() ) + { + /* open file */ + Ioss::DatabaseIO *db = Ioss::IOFactory::create("exodusII", filename.c_str(), Ioss::READ_MODEL, MPI_COMM_SELF); + if ( !db ) { + ThrowRuntimeError("error reading file " << filename); + } + std::unique_ptr io = std::make_unique(db, "EXOSurface IC Region"); + + numNodes = io->get_property("node_count").get_int(); + } + krino::all_reduce_sum(stk::EnvData::parallel_comm(), numNodes); + return numNodes; +} + +static void generate_bounding_box_mesh(krino::BoundingBoxMesh & bboxMesh, const stk::math::Vector3d & minCorner, const stk::math::Vector3d & maxCorner, const double meshSize) +{ + bboxMesh.set_domain(krino::BoundingBoxMesh::BoundingBoxType(minCorner, maxCorner), meshSize); + bboxMesh.set_mesh_structure_type(krino::FLAT_WALLED_BCC_BOUNDING_BOX_MESH); + bboxMesh.populate_mesh(); +} + +void output_mesh_and_test_num_nodes(const stk::mesh::BulkData & mesh, const stk::mesh::Selector & outputSelector, const int goldNumOutputNodes) +{ + const std::string filename = "outputTest.e"; + krino::output_composed_mesh_with_fields(mesh, outputSelector, filename, 1, 0.); + EXPECT_EQ(goldNumOutputNodes, get_num_nodes_in_exodus_file(filename)); +} + +TEST(OutputUtils, createMesh_outputMeshWithSelectingBlock_allNodesOutput) +{ + krino::BoundingBoxMesh bboxMesh(stk::topology::TETRAHEDRON_4, MPI_COMM_WORLD); + generate_bounding_box_mesh(bboxMesh, {0.,0.,0.}, {1.,1.,1.}, 0.3333); + + stk::mesh::Selector outputSelector = *bboxMesh.meta_data().get_part("block_1"); + int numNodes = stk::mesh::count_selected_entities(outputSelector & bboxMesh.meta_data().locally_owned_part(), bboxMesh.bulk_data().buckets(stk::topology::NODE_RANK)); + krino::all_reduce_sum(stk::EnvData::parallel_comm(), numNodes); + + output_mesh_and_test_num_nodes(bboxMesh.bulk_data(), outputSelector, numNodes); +} + +TEST(OutputUtils, createMeshWithNodeset_outputMeshWithSelectingOnlyNodest_noNodesOutputSinceNotConnectedToAnySelectedElems) +{ + krino::BoundingBoxMesh bboxMesh(stk::topology::TETRAHEDRON_4, MPI_COMM_WORLD); + + stk::mesh::Part & nodeset = bboxMesh.meta_data().declare_part_with_topology("MYNODESET", stk::topology::NODE); + stk::io::put_io_part_attribute(nodeset); + + generate_bounding_box_mesh(bboxMesh, {0.,0.,0.}, {1.,1.,1.}, 0.3333); + + put_node_with_id_into_nodeset(bboxMesh.bulk_data(), nodeset, 1); + + stk::mesh::Selector outputSelector = nodeset; + output_mesh_and_test_num_nodes(bboxMesh.bulk_data(), outputSelector, 0); +} diff --git a/packages/krino/krino/unit_tests/Akri_Unit_RebalanceUtils.cpp b/packages/krino/krino/unit_tests/Akri_Unit_RebalanceUtils.cpp index 2d4eb56c4888..79351e35593b 100644 --- a/packages/krino/krino/unit_tests/Akri_Unit_RebalanceUtils.cpp +++ b/packages/krino/krino/unit_tests/Akri_Unit_RebalanceUtils.cpp @@ -228,7 +228,6 @@ class ParallelRebalanceForAdaptivityFixture3D : public ::testing::Test load_field(meta->declare_field(stk::topology::ELEMENT_RANK, "element_weights")), change_list(*bulk, {}) { - meta->use_simple_fields(); AuxMetaData::create(*meta); double zero_val = 0.; stk::mesh::put_field_on_mesh(load_field, meta->universal_part(), &zero_val); @@ -256,7 +255,7 @@ class ParallelRebalanceForAdaptivityFixture3D : public ::testing::Test auto * coords = field_data(*meta->coordinate_field(), node); if(std::fabs(coords[meta->spatial_dimension()-1]-1.) <= 1e-6) { - int * elemMarker = field_data(refinement.get_marker_field(), elem); + int * elemMarker = field_data(refinement.get_marker_field_and_sync_to_host(), elem); *elemMarker = static_cast(Refinement::RefinementMarker::REFINE); } } @@ -507,7 +506,7 @@ TEST_F(ParallelRebalanceForAdaptivityFixture3D, ParentChildRebalanceRules) //Verify leaf elements have their parent element on the same processor and flag for coarsening { clear_refinement_marker(refinement); - FieldRef markerField = refinement.get_marker_field(); + FieldRef markerField = refinement.get_marker_field_and_sync_to_host(); auto elem_buckets = bulk->get_buckets(stk::topology::ELEMENT_RANK, meta->locally_owned_part()); for(auto && bucket : elem_buckets) diff --git a/packages/krino/krino/unit_tests/Akri_Unit_Refine_Beam.cpp b/packages/krino/krino/unit_tests/Akri_Unit_Refine_Beam.cpp new file mode 100644 index 000000000000..dfbcaa5237a5 --- /dev/null +++ b/packages/krino/krino/unit_tests/Akri_Unit_Refine_Beam.cpp @@ -0,0 +1,65 @@ +#include +#include + +namespace krino { + +class RegularBeamRefinement : public RefinementFixture +{ +public: + RegularBeamRefinement() + { + StkMeshBeamFixture::build_mesh(meshSpec.nodeLocs, meshSpec.allElementConn, {1}); + } +protected: +}; + +TEST_F(RegularBeamRefinement, meshAfter3LevelsOfUMRViaUniformMarker_have15Elements) +{ + if(stk::parallel_machine_size(mComm) == 1) + { + perform_iterations_of_uniform_refinement_with_uniform_marker(3); + + EXPECT_EQ(9u, get_global_num_entities(mMesh, stk::topology::NODE_RANK)); + EXPECT_EQ(15u, get_global_num_entities(mMesh, stk::topology::ELEMENT_RANK)); + } +} + +class UMRRegularBeamRefinement : public RefinementFixture +{ +public: + UMRRegularBeamRefinement() + {; + if(stk::parallel_machine_size(mComm) == 1) + this->build_mesh(meshSpec.nodeLocs, {meshSpec.allElementConn}); + else if(stk::parallel_machine_size(mComm) == 2) + this->build_mesh(meshSpec.nodeLocs, meshSpec.allElementConn, {1,1}, {0,1}); + } +protected: +}; + +TEST_F(UMRRegularBeamRefinement, meshAfterEachLevelOfRefineHasCorrectNumElemsAndNodesInParallel) +{ + const std::vector testActiveProcs{1,2}; + const int parallelSize = stk::parallel_machine_size(mComm); + if (std::find(testActiveProcs.begin(), testActiveProcs.end(), parallelSize) == testActiveProcs.end()) + return; + + const std::vector goldNumElementsByRefinementLevel = {6, 14, 30, 62}; + const std::vector goldNumNodesByRefinementLevel = {5, 9, 17, 33}; + for (size_t i=0; i elemsToMarkForUnrefinement{1005,1006,1007,1008,1017,1018,1019,1020}; + std::vector elemsToUnrefine; + for (auto elemId : elemsToMarkForUnrefinement) + { + stk::mesh::Entity elem = mMesh.get_entity(stk::topology::ELEMENT_RANK, elemId); + if (mMesh.is_valid(elem)) + elemsToUnrefine.push_back(elem); + } + mark_elements_for_unrefinement(elemsToUnrefine); + + refine_marked_elements(); + } + + void test_field_is_preserved_on_child_edge_during_unrefinement(const stk::mesh::Field & field, const stk::mesh::Entity parentNode0, const stk::mesh::Entity parentNode1, const double goldFieldVal) { - if(stk::parallel_machine_size(mComm) <= 4) + if (mMesh.is_valid(parentNode0) && mMesh.is_valid(parentNode1)) { - if(stk::parallel_machine_size(mComm) == 1) - this->build_mesh(meshSpec.nodeLocs, meshSpec.allElementConn, {2,2,2,1}, {0,0,0,0}); - else if(stk::parallel_machine_size(mComm) == 2) - this->build_mesh(meshSpec.nodeLocs, meshSpec.allElementConn, {2,2,2,1}, {0,0,1,1}); - else if(stk::parallel_machine_size(mComm) == 3) - this->build_mesh(meshSpec.nodeLocs, meshSpec.allElementConn, {2,2,2,1}, {0,1,2,2}); - else if(stk::parallel_machine_size(mComm) == 4) - this->build_mesh(meshSpec.nodeLocs, meshSpec.allElementConn, {2,2,2,1}, {0,1,2,3}); + const stk::mesh::Entity childEdgeNode = myRefinement.get_edge_child_node(edge_from_edge_nodes(mMesh, parentNode0, parentNode1)); + if (mMesh.is_valid(childEdgeNode)) + { + EXPECT_EQ(goldFieldVal, *stk::mesh::field_data(field, childEdgeNode)) << "Field is not preserved during unrefinement"; + } } } +}; + +class RightTriSurroundedByEdgeTrisRefinement : public RefinementFixture +{ +public: + RightTriSurroundedByEdgeTrisRefinement() + { + set_valid_proc_sizes_for_test({1,2,3,4}); + if(stk::parallel_machine_size(mComm) == 1) + this->build_mesh(meshSpec.nodeLocs, meshSpec.allElementConn, {2,2,2,1}, {0,0,0,0}); + else if(stk::parallel_machine_size(mComm) == 2) + this->build_mesh(meshSpec.nodeLocs, meshSpec.allElementConn, {2,2,2,1}, {0,0,1,1}); + else if(stk::parallel_machine_size(mComm) == 3) + this->build_mesh(meshSpec.nodeLocs, meshSpec.allElementConn, {2,2,2,1}, {0,1,2,2}); + else if(stk::parallel_machine_size(mComm) == 4) + this->build_mesh(meshSpec.nodeLocs, meshSpec.allElementConn, {2,2,2,1}, {0,1,2,3}); + } protected: }; TEST_F(RegularTriRefinement, givenMeshWithoutAnyRefinement_whenQueryingParentsAndChildren_noParentOrChildren) { - if(stk::parallel_machine_size(mComm) == 1) + if(is_valid_proc_size_for_test()) { const stk::mesh::Entity elem = get_element(); EXPECT_FALSE(myRefinement.is_parent(elem)); @@ -76,7 +132,7 @@ TEST_F(RegularTriRefinement, givenMeshWithoutAnyRefinement_whenQueryingParentsAn TEST_F(RegularTriRefinement, givenMeshWithNoElementMarked_whenFindingEdgesToRefine_noEdgesToRefine) { - if(stk::parallel_machine_size(mComm) == 1) + if(is_valid_proc_size_for_test()) { clear_refinement_marker(); @@ -89,7 +145,7 @@ TEST_F(RegularTriRefinement, givenMeshWithNoElementMarked_whenFindingEdgesToRefi TEST_F(RegularTriRefinement, givenMeshWithSingleTriMarked_whenFindingEdgesToRefine_3EdgesToRefine) { - if(stk::parallel_machine_size(mComm) == 1) + if(is_valid_proc_size_for_test()) { clear_refinement_marker(); mark_elements_for_refinement({get_element()}); @@ -103,7 +159,7 @@ TEST_F(RegularTriRefinement, givenMeshWithSingleTriMarked_whenFindingEdgesToRefi TEST_F(RegularTriRefinement, givenMeshWithSingleTriMarked_afterRefinement_all3EdgesHaveRefineNodes) { - if(stk::parallel_machine_size(mComm) == 1) + if(is_valid_proc_size_for_test()) { mark_elements_for_refinement({get_element()}); do_refinement(); @@ -127,7 +183,7 @@ TEST_F(RegularTriRefinement, givenMeshWithSingleTriMarked_afterRefinement_all3Ed TEST_F(RegularTriRefinement, givenMeshWithSingleTriMarked_afterRefinement_have4ChildElements) { - if(stk::parallel_machine_size(mComm) == 1) + if(is_valid_proc_size_for_test()) { mark_elements_for_refinement({get_element()}); do_refinement(); @@ -148,7 +204,7 @@ TEST_F(RegularTriRefinement, givenMeshWithSingleTriMarked_afterRefinement_have4C TEST_F(RegularTriRefinement, twoRoundsOfMarkingSameElement_secondRoundDoesNothing) { - if(stk::parallel_machine_size(mComm) == 1) + if(is_valid_proc_size_for_test()) { refine_elements_with_given_indices({0}); @@ -162,7 +218,7 @@ TEST_F(RegularTriRefinement, twoRoundsOfMarkingSameElement_secondRoundDoesNothin TEST_F(RegularTriRefinement, meshAfter3LevelsOfUMRViaGeneralMarker_have85Elements) { - if(stk::parallel_machine_size(mComm) == 1) + if(is_valid_proc_size_for_test()) { perform_iterations_of_uniform_refinement_with_general_element_marker(3); @@ -173,7 +229,7 @@ TEST_F(RegularTriRefinement, meshAfter3LevelsOfUMRViaGeneralMarker_have85Element TEST_F(RegularTriRefinement, meshAfter3LevelsOfUMRViaUniformMarker_have85Elements) { - if(stk::parallel_machine_size(mComm) == 1) + if(is_valid_proc_size_for_test()) { perform_iterations_of_uniform_refinement_with_uniform_marker(3); @@ -184,7 +240,7 @@ TEST_F(RegularTriRefinement, meshAfter3LevelsOfUMRViaUniformMarker_have85Element TEST_F(RightTriSurroundedByEdgeTrisRefinement, refinementOfOneTriInParallel_expectEdgeIsRefinedAndCoordinatesAreCorrect) { - if(stk::parallel_machine_size(mComm) <= 4) + if(is_valid_proc_size_for_test()) { const unsigned indexOfElemToRefine = 0; const stk::mesh::Entity edgeNode0 = mMesh.get_entity(stk::topology::NODE_RANK, mBuilder.get_assigned_node_global_ids()[3]); @@ -216,6 +272,9 @@ TEST_F(RightTriSurroundedByEdgeTrisRefinement, refinementOfOneTriInParallel_expe TEST_F(RightTriSurroundedByEdgeTrisRefinement, checkAllPossibleRefinementsInParallel_expectBoundarySidesAreCorrect) { + if(!is_valid_proc_size_for_test()) + return; + for (int i=0; i<8; ++i) { std::vector edgeElementsToRefine; @@ -235,6 +294,9 @@ TEST_F(RightTriSurroundedByEdgeTrisRefinement, checkAllPossibleRefinementsInPara TEST_F(RightTriSurroundedByEdgeTrisRefinement, checkAllPossibleRefinementsInParallel_getSameQualityAsProducedByPercept) { + if(!is_valid_proc_size_for_test()) + return; + // These gold values were generated by running Percept in 10/2022 const std::array goldNumElementsByCase{4,10,10,15,10,15,15,20}; const std::array goldNumNodesByCase{6,9,9,12,9,12,12,15}; @@ -255,7 +317,7 @@ TEST_F(RightTriSurroundedByEdgeTrisRefinement, checkAllPossibleRefinementsInPara TEST_F(RightTriSurroundedByEdgeTrisRefinement, afterEachOfThreeRoundsOfRefinementOfEdgeElements_centerElementHasCorrectNumberOfChildren) { - if(stk::parallel_machine_size(mComm) <= 4) + if(is_valid_proc_size_for_test()) { stk::mesh::Entity centerElem = mMesh.get_entity(stk::topology::ELEMENT_RANK, mBuilder.get_assigned_element_global_ids()[3]); @@ -276,7 +338,7 @@ TEST_F(RightTriSurroundedByEdgeTrisRefinement, afterEachOfThreeRoundsOfRefinemen TEST_F(RightTriSurroundedByEdgeTrisRefinement, refineCenterElemAndThenChildOfCenterElem_noHangingNodes) { - if(stk::parallel_machine_size(mComm) <= 4) + if(is_valid_proc_size_for_test()) { stk::mesh::Entity centerElem = mMesh.get_entity(stk::topology::ELEMENT_RANK, mBuilder.get_assigned_element_global_ids()[3]); @@ -307,7 +369,7 @@ TEST_F(RightTriSurroundedByEdgeTrisRefinement, refineCenterElemAndThenChildOfCen TEST_F(RightTriSurroundedByEdgeTrisRefinement, refineCenterElemAndThenMarkTransitionElement_parentOfTransitionElementGetsRefined) { - if(stk::parallel_machine_size(mComm) <= 4) + if(is_valid_proc_size_for_test()) { stk::mesh::Entity centerElem = mMesh.get_entity(stk::topology::ELEMENT_RANK, mBuilder.get_assigned_element_global_ids()[3]); stk::mesh::Entity edgeElem = mMesh.get_entity(stk::topology::ELEMENT_RANK, mBuilder.get_assigned_element_global_ids()[0]); @@ -346,13 +408,16 @@ TEST_F(RightTriSurroundedByEdgeTrisRefinement, refineCenterElemAndThenMarkTransi TEST_F(RightTriSurroundedByEdgeTrisRefinement, markedAnyTransitionElementForEveryEdgeConfiguration_parentElementGetsFullyRefined) { + if(!is_valid_proc_size_for_test()) + return; + const int indexOfCentralElement = 3; test_refinement_of_transition_element_leads_to_refinement_of_parent(indexOfCentralElement); } TEST_F(UMRRegularTriRefinement, refinementThenUnrefinementTest) { - if(stk::parallel_machine_size(mComm) > 4) + if(!is_valid_proc_size_for_test()) return; const bool doWriteMesh = false; @@ -399,37 +464,51 @@ TEST_F(UMRRegularTriRefinement, refinementThenUnrefinementTest) TEST_F(RegularTriRefinement, meshRefinedTwiceWithParentAndChildrenMovedToNewProc_unrefinementCausesParentToReturnToOriginatingProc) { - if(stk::parallel_machine_size(mComm) > 1) - { - perform_iterations_of_uniform_refinement_with_general_element_marker(2); + if(!is_valid_proc_size_for_test() || this->parallel_size() == 1) + return; - move_owned_elements_with_given_ids_and_owned_attached_entities_to_processor({1004, 1010, 1011, 1012, 1013}, 1); + perform_iterations_of_uniform_refinement_with_general_element_marker(2); - EXPECT_TRUE(mBuilder.check_boundary_sides()); - EXPECT_TRUE(check_face_and_edge_ownership(mMesh)); + move_owned_elements_with_given_ids_and_owned_attached_entities_to_processor({1004, 1010, 1011, 1012, 1013}, 1); - mark_all_elements_for_unrefinement(); - refine_marked_elements(); + EXPECT_TRUE(mBuilder.check_boundary_sides()); + EXPECT_TRUE(check_face_and_edge_ownership(mMesh)); - EXPECT_TRUE(mBuilder.check_boundary_sides()); - EXPECT_TRUE(check_face_and_edge_ownership(mMesh)); + mark_all_elements_for_unrefinement(); + refine_marked_elements(); - stk::mesh::Entity movedParentElement = mMesh.get_entity(stk::topology::ELEMENT_RANK, 1004); - if (mMesh.is_valid(movedParentElement)) - { - EXPECT_EQ(0, mMesh.parallel_owner_rank(movedParentElement)); - } + EXPECT_TRUE(mBuilder.check_boundary_sides()); + EXPECT_TRUE(check_face_and_edge_ownership(mMesh)); - mark_all_elements_for_unrefinement(); - ASSERT_NO_THROW(refine_marked_elements()); + stk::mesh::Entity movedParentElement = mMesh.get_entity(stk::topology::ELEMENT_RANK, 1004); + if (mMesh.is_valid(movedParentElement)) + { + EXPECT_EQ(0, mMesh.parallel_owner_rank(movedParentElement)); + } - EXPECT_EQ(1u, get_global_num_entities(mMesh, stk::topology::ELEMENT_RANK)); + mark_all_elements_for_unrefinement(); + ASSERT_NO_THROW(refine_marked_elements()); - EXPECT_TRUE(mBuilder.check_boundary_sides()); - EXPECT_TRUE(check_face_and_edge_ownership(mMesh)); - } + EXPECT_EQ(1u, get_global_num_entities(mMesh, stk::topology::ELEMENT_RANK)); + + EXPECT_TRUE(mBuilder.check_boundary_sides()); + EXPECT_TRUE(check_face_and_edge_ownership(mMesh)); } +TEST_F(UMRRegularTriRefinementWithCornerElementsInBlock2, centerElementInDifferentBlockWithField_unrefinementThatModifiesRefinementOfCenterElement_nodeThatRemainsCorrectlyPreservesField) +{ + if(!is_valid_proc_size_for_test()) + return; + + perform_iterations_of_uniform_refinement_with_general_element_marker(1); + + const double goldFieldVal = 1.0; + const stk::mesh::Field & block1Field = create_and_initialize_field_on_elements_of_block_1(goldFieldVal); + + mark_all_children_of_center_element_and_one_corner_element_for_unrefinement_and_unrefine(); + + test_field_is_preserved_on_child_edge_during_unrefinement(block1Field, get_assigned_node_for_index(3), get_assigned_node_for_index(5), goldFieldVal); +} } diff --git a/packages/krino/krino/unit_tests/Akri_Unit_RefinementFixture.hpp b/packages/krino/krino/unit_tests/Akri_Unit_RefinementFixture.hpp index a48c64ff42ba..5abe24531726 100644 --- a/packages/krino/krino/unit_tests/Akri_Unit_RefinementFixture.hpp +++ b/packages/krino/krino/unit_tests/Akri_Unit_RefinementFixture.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,7 @@ class RefinementFixture : public StkMeshFixture myElemField = FieldRef(elemField); stk::mesh::put_field_on_mesh(elemMarkerField, meta.universal_part(), 1, 1, nullptr); stk::mesh::put_field_on_mesh(elemField, meta.universal_part(), 1, 1, nullptr); + mMesh.set_automatic_aura_option(stk::mesh::BulkData::NO_AUTO_AURA); } using StkMeshFixture::mMesh; @@ -101,7 +103,9 @@ class RefinementFixture : public StkMeshFixture myTimer.start(); myRefinement.do_uniform_refinement(numIterationsOfUMR); myTimer.stop(); - std::cout << "After " << numIterationsOfUMR << " levels of uniform refinement, there are " << get_global_num_entities(mMesh, stk::topology::ELEMENT_RANK) << " elements, time = " << myTimer.getMetric().getLap() << std::endl; + const size_t numElems = get_global_num_entities(mMesh, stk::topology::ELEMENT_RANK); + if (0 == stk::parallel_machine_rank(mComm)) + std::cout << "After " << numIterationsOfUMR << " levels of uniform refinement, there are " << numElems << " elements, time = " << myTimer.getMetric().getLap() << std::endl; } void refine_marked_elements(const std::string fileName = "") @@ -344,6 +348,7 @@ class RefinementFixture : public StkMeshFixture void test_refinement_of_transition_element_leads_to_refinement_of_parent(const int indexOfCenterElement) { const stk::mesh::Entity centerElem = mMesh.get_entity(stk::topology::ELEMENT_RANK, mBuilder.get_assigned_element_global_ids()[indexOfCenterElement]); + std::vector transitionElements; const unsigned numEdges = get_global_num_entities(mMesh, stk::topology::ELEMENT_RANK) - 1; for (int iCaseId=0; iCaseId<(1< refine_elements_with_given_indices(edgeElementsToRefine); - std::vector transitionElements = get_children(centerElem); - const unsigned numTransitionElements = transitionElements.size(); + unsigned numTransitionElements = 0; + if (mMesh.is_valid(centerElem)) + numTransitionElements = get_num_children(centerElem); + all_reduce_max(mMesh.parallel(), numTransitionElements); unrefine_mesh(); for (unsigned iTransitionElement=0; iTransitionElement elementsToRefine; + if (mMesh.is_valid(centerElem)) + { + transitionElements = get_children(centerElem); + + ASSERT_EQ(numTransitionElements, transitionElements.size()) << "Number of transition elements changed from " << numTransitionElements << " to " << transitionElements.size() << std::endl; - ASSERT_EQ(numTransitionElements, transitionElements.size()) << "Number of transition elements changed from " << numTransitionElements << " to " << transitionElements.size() << std::endl; + elementsToRefine.push_back(transitionElements[iTransitionElement]); + } - refine_elements_with_given_ids({mMesh.identifier(transitionElements[iTransitionElement])}); + refine_elements(elementsToRefine); if (mMesh.is_valid(centerElem) && mMesh.bucket(centerElem).owned()) { const unsigned numChildrenAfterRefinementOfTransition = (get_children(centerElem)).size(); diff --git a/packages/krino/krino/unit_tests/Akri_Unit_SemiLagrangian.cpp b/packages/krino/krino/unit_tests/Akri_Unit_SemiLagrangian.cpp new file mode 100644 index 000000000000..c851c003194d --- /dev/null +++ b/packages/krino/krino/unit_tests/Akri_Unit_SemiLagrangian.cpp @@ -0,0 +1,107 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace krino { + +class SemiLagrangianTriElements : public StkMeshTriFixture +{ +public: + SemiLagrangianTriElements() {} + + void build_quad_split_4tri() + { + myDistanceField = mMesh.mesh_meta_data().declare_field(stk::topology::NODE_RANK, "distance"); + stk::mesh::put_field_on_mesh(myDistanceField.field(), mMesh.mesh_meta_data().universal_part(), 1, 1, nullptr); + QuadSplit4Tri meshSpec; + StkMeshTriFixture::build_mesh(meshSpec.nodeLocs, meshSpec.allElementConn, {1,1,1,1}, mBuilder.get_processor_distribution_for_num_elements(meshSpec.allElementConn.size())); + } + + bool point_has_matching_Y_value(const stk::math::Vector3d & pt, const std::vector & goldYValues) + { + const double absTol = 1.e-6; + for(double goldYValue : goldYValues) + if (std::abs(goldYValue - pt[1]) < absTol) + return true; + return false; + } + + bool facet_points_all_have_matching_Y_value(const Facet2d & facet, const std::vector & goldFacetYValues) + { + for (int pt=0; pt<2; ++pt) + if(!point_has_matching_Y_value(facet.facet_vertex(pt), goldFacetYValues)) + return false; + return true; + } + + void test_facets(const FacetedSurfaceBase & facets, const std::vector & goldFacetYValues) + { + ASSERT_FALSE(facets.get_facets_2d().empty()); + for (auto facet : facets.get_facets_2d()) + EXPECT_TRUE(facet_points_all_have_matching_Y_value(facet, goldFacetYValues)) << "Non matching facet " << facet; + } + + void initialize_planar_surfaces(Composite_Surface & initSurfaces) + { + initSurfaces.add(new Plane(stk::math::Vector3d(0,-1,0), 0.2)); + initSurfaces.add(new Plane(stk::math::Vector3d(0,1,0), 0.2)); + } + + void set_interface_velocity(const std::vector & interfaceVelocity) + { + initialize_expression_vector(interfaceVelocity, myInterfaceVelocity); + } + +protected: + FieldRef myDistanceField; + double avgEdgeLength{1.0}; + std::vector myInterfaceVelocity; +}; + +TEST_F(SemiLagrangianTriElements, initialTwoParallelPlanesIntersectingElementsMultipleTimes_adaptivelyContourElementsAndThenAdvect_exactlyRecoverPlanes) +{ + build_quad_split_4tri(); + + Composite_Surface initSurfaces("init"); + initialize_planar_surfaces(initSurfaces); + + const stk::mesh::Selector activeFieldSelector = mMesh.mesh_meta_data().universal_part(); + BoundingBox nodeBBox = krino::compute_nodal_bbox(mMesh, activeFieldSelector, get_coordinates_field()); + constexpr double zeroNarrowBandSize = 0.; + initSurfaces.prepare_to_compute(0.0, nodeBBox, zeroNarrowBandSize); + + // initialize + const double time0 = 0.; + std::unique_ptr facetsOrig = FacetedSurfaceBase::build(2); + compute_nodal_surface_distance(mMesh, get_coordinates_field(), myDistanceField, initSurfaces, time0, zeroNarrowBandSize); + build_initial_adaptive_facets_after_nodal_distance_is_initialized_from_initial_surfaces(mMesh, activeFieldSelector, time0, get_coordinates_field(), myDistanceField, avgEdgeLength, initSurfaces, *facetsOrig); + test_facets(*facetsOrig, {-0.2, 0.2}); + + set_interface_velocity({"0.", "1."}); + + const BoundingBox paddedNodeBBox = compute_padded_node_bounding_box_for_semilagrangian(mMesh, activeFieldSelector, 0., 0.5, get_coordinates_field(), myInterfaceVelocity, *facetsOrig); + facetsOrig->prepare_to_compute(paddedNodeBBox, 0.); + + // single step advection (still surprised that this is unstable in the circle advection, molenkamp, test (not here)) + std::unique_ptr facetsEnd = FacetedSurfaceBase::build(2); + calc_single_step_semilagrangian_nodal_distance_and_build_facets(mMesh, activeFieldSelector, 0.0, 0.5, get_coordinates_field(), myDistanceField, myInterfaceVelocity, zeroNarrowBandSize, avgEdgeLength, *facetsOrig, *facetsEnd); + test_facets(*facetsEnd, {0.3}); + + // predistor-corrector (seems super stable and accurate, but expensive) + std::unique_ptr facetsPred = FacetedSurfaceBase::build(2); + predict_semilagrangian_nodal_distance_and_build_facets(mMesh, activeFieldSelector, 0.0, 0.5, get_coordinates_field(), myDistanceField, myInterfaceVelocity, zeroNarrowBandSize, avgEdgeLength, *facetsOrig, *facetsPred); + + facetsPred->prepare_to_compute(paddedNodeBBox, 0.); + correct_semilagrangian_nodal_distance_and_build_facets(mMesh, activeFieldSelector, 0.0, 0.5, get_coordinates_field(), myDistanceField, myInterfaceVelocity, zeroNarrowBandSize, avgEdgeLength, *facetsOrig, *facetsPred, *facetsEnd); + test_facets(*facetsEnd, {0.3}); +} +} + diff --git a/packages/krino/krino/unit_tests/Akri_Unit_SideAttachedElements.cpp b/packages/krino/krino/unit_tests/Akri_Unit_SideAttachedElements.cpp new file mode 100644 index 000000000000..a25ccc15551d --- /dev/null +++ b/packages/krino/krino/unit_tests/Akri_Unit_SideAttachedElements.cpp @@ -0,0 +1,109 @@ +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace krino { + +class SideAttachedTriElements : public StkMeshTriFixture +{ +public: + SideAttachedTriElements() {} + + void build_quad_split_4tri_with_sideset(const std::vector &elementBlockIDs = {1,1,1,1}) + { + mBuilder.create_sideset_part(1); + QuadSplit4Tri meshSpec; + StkMeshTriFixture::build_mesh(meshSpec.nodeLocs, meshSpec.allElementConn, elementBlockIDs, mBuilder.get_processor_distribution_for_num_elements(meshSpec.allElementConn.size())); + } + + void build_quad_split_4tri_and_quad_split_2tri(const std::vector &elementBlockIDs = {1,1,1,1,1,1}) + { + QuadSplit4TriAndQuadSplit2Tri meshSpec; + StkMeshTriFixture::build_mesh(meshSpec.nodeLocs, meshSpec.allElementConn, elementBlockIDs, mBuilder.get_processor_distribution_for_num_elements(meshSpec.allElementConn.size())); + } + + stk::mesh::Entity get_tri_side_with_node_indices(const unsigned node0, const unsigned node1) const { return mBuilder.get_side_with_nodes({get_assigned_node_for_index(node0), get_assigned_node_for_index(node1)}); } + void add_side_with_node_indices_to_sideset(const unsigned node0, const unsigned node1, const unsigned sidesetId) + { + stk::mesh::Entity side; + if (mMesh.is_valid(get_assigned_node_for_index(node0)) && mMesh.is_valid(get_assigned_node_for_index(node1))) + side = get_tri_side_with_node_indices(node0, node1); + if (mMesh.is_valid(side) && mMesh.bucket(side).owned()) + mBuilder.add_sides_to_sidesets({side}, {{sidesetId}}); + else + mBuilder.add_sides_to_sidesets({}, {}); + } + + void test_num_unattached(const stk::mesh::Selector & elementSelector, const stk::mesh::Selector & sideSelector, const size_t goldNumUnattached) + { + const std::vector unattachedElems = get_selected_owned_side_unattached_elements(get_mesh(), elementSelector, sideSelector); + size_t numUnattached = unattachedElems.size(); + all_reduce_sum(get_mesh().parallel(), numUnattached); + EXPECT_EQ(goldNumUnattached, numUnattached); + } + + void test_num_elements_not_in_largest_group_of_selected_side_attached_elements(const stk::mesh::Selector & elementSelector, const size_t goldNumNotInLargestGroup) + { + const std::vector elemsNotInLargestGroup = find_owned_elements_that_are_not_in_the_largest_group_of_selected_side_attached_elements(get_mesh(), elementSelector); + size_t numNotInLargestGroup = elemsNotInLargestGroup.size(); + all_reduce_sum(get_mesh().parallel(), numNotInLargestGroup); + EXPECT_EQ(goldNumNotInLargestGroup, numNotInLargestGroup); + } + + + +protected: + + +}; + +TEST_F(SideAttachedTriElements, emptySideSelector_allElementsAreUnAttached) +{ + build_quad_split_4tri_with_sideset(); + + stk::mesh::Selector elemSelector = *mBuilder.get_block_parts()[0]; + stk::mesh::Selector emptySideSelector; + + test_num_unattached(elemSelector, emptySideSelector, 4); +} + +TEST_F(SideAttachedTriElements, sideSelectorWithSideAttachedToAllElements_noElementsAreUnAttached) +{ + build_quad_split_4tri_with_sideset(); + const unsigned sidesetId = 1; + add_side_with_node_indices_to_sideset(0,1, sidesetId); + + stk::mesh::Selector elemSelector = *mBuilder.get_block_parts()[0]; + stk::mesh::Selector sideSelector = mBuilder.get_sideset_part(sidesetId); + test_num_unattached(elemSelector, sideSelector, 0); +} + +TEST_F(SideAttachedTriElements, sideSelectorWithSideAttachedToOneOfTwoElementsInBlockThatAreEdgeConnected_oneElementsIsUnAttached) +{ + build_quad_split_4tri_with_sideset({1,2,1,2}); + const unsigned sidesetId = 1; + add_side_with_node_indices_to_sideset(0,1, sidesetId); + + stk::mesh::Selector elemSelector = *mBuilder.get_block_parts()[0]; + stk::mesh::Selector sideSelector = mBuilder.get_sideset_part(sidesetId); + + test_num_unattached(elemSelector, sideSelector, 1); +} + +TEST_F(SideAttachedTriElements, meshWithClusterOf2AndClusterOf4tris_numNotInLargesGroup_equal2) +{ + build_quad_split_4tri_and_quad_split_2tri(); + + stk::mesh::Selector elemSelector = *mBuilder.get_block_parts()[0]; + + test_num_elements_not_in_largest_group_of_selected_side_attached_elements(elemSelector, 2); +} + + +} diff --git a/packages/krino/krino/unit_tests/Akri_Unit_Single_Element_Fixtures.hpp b/packages/krino/krino/unit_tests/Akri_Unit_Single_Element_Fixtures.hpp index e587c82e4cb2..262e5ebe9858 100644 --- a/packages/krino/krino/unit_tests/Akri_Unit_Single_Element_Fixtures.hpp +++ b/packages/krino/krino/unit_tests/Akri_Unit_Single_Element_Fixtures.hpp @@ -27,7 +27,6 @@ class SimpleStkFixture .create(); meta = bulk->mesh_meta_data_ptr(); - meta->use_simple_fields(); AuxMetaData::create(*meta); } void commit() { meta->commit(); } diff --git a/packages/krino/krino/unit_tests/Akri_Unit_WindingNumber.cpp b/packages/krino/krino/unit_tests/Akri_Unit_WindingNumber.cpp new file mode 100644 index 000000000000..fcbaaa5f73ed --- /dev/null +++ b/packages/krino/krino/unit_tests/Akri_Unit_WindingNumber.cpp @@ -0,0 +1,186 @@ +#include +#include +#include +#include + +namespace krino { + +void expect_winding_number_for_facet(const stk::math::Vector3d & x0, const stk::math::Vector3d & x1, const stk::math::Vector3d & x2, const stk::math::Vector3d & queryLoc, const double goldWindingNumber) +{ + EXPECT_NEAR(goldWindingNumber, compute_facet_winding_number(x0, x1, x2, queryLoc), 1.e-6); +} + +void expect_winding_number_in_range_for_point_in_plane_of_facet(const stk::math::Vector3d & x0, const stk::math::Vector3d & x1, const stk::math::Vector3d & x2, const stk::math::Vector3d & queryLoc) +{ + const double windingNumber = compute_facet_winding_number(x0, x1, x2, queryLoc); + EXPECT_LE(-0.5, windingNumber); + EXPECT_GE(0.5, windingNumber); +} + +TEST(FacetWindingNumber, uniformTetWindingNumberBasedOnAnalyticSolidAngle) +{ + const stk::math::Vector3d x0(1,-1,-1); + const stk::math::Vector3d x1(-1,-1,1); + const stk::math::Vector3d x2(-1,1,-1); + const stk::math::Vector3d queryLoc(1,1,1); + + const double goldWindingNumber = std::acos(23./27.)/(4.*M_PI); // From https://mathworld.wolfram.com/RegularTetrahedron.html + expect_winding_number_for_facet(x0, x1, x2, queryLoc, goldWindingNumber); +} + +TEST(FacetWindingNumber, pointInPlaneOfFacet_getValidWindingNumberInRange) +{ + const stk::math::Vector3d x0(0,0,0); + const stk::math::Vector3d x1(1,0,0); + const stk::math::Vector3d x2(0,1,0); + const stk::math::Vector3d queryLoc(1.,1.,1.e-12); + + expect_winding_number_in_range_for_point_in_plane_of_facet(x0, x1, x2, stk::math::Vector3d(0.2,0.2,0)); // On facet + expect_winding_number_in_range_for_point_in_plane_of_facet(x0, x1, x2, stk::math::Vector3d(0,0,0)); // On vertex + expect_winding_number_in_range_for_point_in_plane_of_facet(x0, x1, x2, stk::math::Vector3d(0,0.5,0)); // On edge + expect_winding_number_in_range_for_point_in_plane_of_facet(x0, x1, x2, stk::math::Vector3d(0.6,0.6,0)); // Outside facet +} + +stk::math::Vector3d compute_surface_centroid(const std::vector> & surfFacets) +{ + stk::math::Vector3d centroid = stk::math::Vector3d::ZERO; + for (const auto & facetCoords : surfFacets) + centroid += 1./3. * (facetCoords[0] + facetCoords[1] + facetCoords[2]); + centroid /= surfFacets.size(); + return centroid; +} + +double time_incremental_approximate_winding_number(const std::vector> & surfFacets, const std::vector & queryLocs, const stk::math::Vector3d & centroid) +{ + const double startTime = stk::wall_time(); + + ClusterApproximation approx; + compute_cluster_approximation(surfFacets, centroid, approx); + for (const auto & queryLoc: queryLocs) + compute_approximate_winding_number(approx, queryLoc); + + return stk::wall_time() - startTime; +} + +double time_approximate_winding_number(const std::vector> & surfFacets, const std::vector & queryLocs, const stk::math::Vector3d & centroid) +{ + const double startTime = stk::wall_time(); + + FacetClusterApproximation approx; + compute_cluster_approximation(surfFacets, centroid, approx); + for (const auto & queryLoc: queryLocs) + compute_approximate_winding_number(approx, queryLoc); + + return stk::wall_time() - startTime; +} + +double time_exact_winding_number(const std::vector> & surfFacets, const std::vector & queryLocs) +{ + const double startTime = stk::wall_time(); + + for (const auto & queryLoc: queryLocs) + compute_faceted_surface_winding_number(surfFacets, queryLoc); + + return stk::wall_time() - startTime; +} + +void test_performance_for_winding_number(const std::vector> & surfFacets, const std::vector & queryLocs) +{ + const stk::math::Vector3d centroid = compute_surface_centroid(surfFacets); + + std::cout << "Incremental approx time = " << time_incremental_approximate_winding_number(surfFacets, queryLocs, centroid) << std::endl; + std::cout << "Approx time = " << time_approximate_winding_number(surfFacets, queryLocs, centroid) << std::endl; + std::cout << "Exact time = " << time_exact_winding_number(surfFacets, queryLocs) << std::endl; +} + +double compute_approximate_winding_number(const std::vector> & surfFacets, const stk::math::Vector3d & queryLoc, const stk::math::Vector3d & centroid) +{ + FacetClusterApproximation approx; + compute_cluster_approximation(surfFacets, centroid, approx); + return compute_approximate_winding_number(approx, queryLoc); +} + +double compute_incremental_approximate_winding_number(const std::vector> & surfFacets, const stk::math::Vector3d & queryLoc, const stk::math::Vector3d & centroid) +{ + ClusterApproximation approx; + compute_cluster_approximation(surfFacets, centroid, approx); + return compute_approximate_winding_number(approx, queryLoc); +} + +void expect_approximate_winding_number_to_match_exact(const std::vector> & surfFacets, const stk::math::Vector3d & queryLoc, const double relativeTol) +{ + const double exactWinding = compute_faceted_surface_winding_number(surfFacets, queryLoc); + + const stk::math::Vector3d centroid = compute_surface_centroid(surfFacets); + + const double approxWinding = compute_approximate_winding_number(surfFacets, queryLoc, centroid); + EXPECT_NEAR(exactWinding, approxWinding, exactWinding*relativeTol); + + const double incrementalApproxWinding = compute_incremental_approximate_winding_number(surfFacets, queryLoc, centroid); + EXPECT_NEAR(exactWinding, incrementalApproxWinding, exactWinding*relativeTol); + + EXPECT_NEAR(approxWinding, incrementalApproxWinding, exactWinding*1.e-10); + + std::cout << "For queryLoc " << queryLoc << ", exact = " << exactWinding << ", approx = " << approxWinding << ", incremental approx = " << incrementalApproxWinding << std::endl; +} + +void append_refined_facet(const std::array & facetCoords, const unsigned numRefine, std::vector> & refinedFacets) +{ + if (numRefine == 0) + { + refinedFacets.push_back(facetCoords); + } + else + { + const stk::math::Vector3d edge0 = 0.5*(facetCoords[0]+facetCoords[1]); + const stk::math::Vector3d edge1 = 0.5*(facetCoords[1]+facetCoords[2]); + const stk::math::Vector3d edge2 = 0.5*(facetCoords[2]+facetCoords[0]); + append_refined_facet({{facetCoords[0], edge0, edge2}}, numRefine-1, refinedFacets); + append_refined_facet({{facetCoords[1], edge1, edge0}}, numRefine-1, refinedFacets); + append_refined_facet({{facetCoords[2], edge2, edge1}}, numRefine-1, refinedFacets); + append_refined_facet({{edge0, edge1, edge2}}, numRefine-1, refinedFacets); + } +} + +std::vector> initialize_nonplanar_facets(const unsigned numRefine) +{ + const std::array facet0Coords{{ stk::math::Vector3d(0,1,0), stk::math::Vector3d(1,0,0), stk::math::Vector3d(0.4,0.5,0.6) }}; + const std::array facet1Coords{{ stk::math::Vector3d(0,0,1), stk::math::Vector3d(0,1,0), stk::math::Vector3d(0.4,0.5,0.6) }}; + const std::array facet2Coords{{ stk::math::Vector3d(1,0,0), stk::math::Vector3d(0,0,1), stk::math::Vector3d(0.4,0.5,0.6) }}; + + std::vector> surfFacets; + append_refined_facet(facet0Coords, numRefine, surfFacets); + append_refined_facet(facet1Coords, numRefine, surfFacets); + append_refined_facet(facet2Coords, numRefine, surfFacets); + return surfFacets; +} + +TEST(approximateWindingNumber, showConvergenceAndAgreementBetweenApproximateMethods) +{ + const std::vector> surfFacets = initialize_nonplanar_facets(3); + + expect_approximate_winding_number_to_match_exact(surfFacets, stk::math::Vector3d(1,1,1), 2.e-1); + expect_approximate_winding_number_to_match_exact(surfFacets, stk::math::Vector3d(2,2,2), 1.e-2); + expect_approximate_winding_number_to_match_exact(surfFacets, stk::math::Vector3d(4,4,4), 1.e-3); + expect_approximate_winding_number_to_match_exact(surfFacets, stk::math::Vector3d(8,8,8), 1.e-4); + expect_approximate_winding_number_to_match_exact(surfFacets, stk::math::Vector3d(1,2,4), 1.e-2); + expect_approximate_winding_number_to_match_exact(surfFacets, stk::math::Vector3d(4,1,2), 1.e-2); + expect_approximate_winding_number_to_match_exact(surfFacets, stk::math::Vector3d(2,4,1), 1.e-2); +} + +TEST(approximateWindingNumber, compareCPUTimesForApproximateAndExactMethods) +{ + const std::vector> surfFacets = initialize_nonplanar_facets(6); + + const unsigned dim=20; + std::vector queryLocs; + for (unsigned i=0; i bulk = stk::mesh::MeshBuilder(comm).create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); meta.enable_late_fields(); stk::io::fill_mesh_with_auto_decomp(inputData.meshIn, *bulk); diff --git a/packages/krino/krino_mesh_adapt/mesh_adapt_lib/KrinoMeshAdapt.cpp b/packages/krino/krino_mesh_adapt/mesh_adapt_lib/KrinoMeshAdapt.cpp index 5a4d1b632b12..5604d8273de8 100644 --- a/packages/krino/krino_mesh_adapt/mesh_adapt_lib/KrinoMeshAdapt.cpp +++ b/packages/krino/krino_mesh_adapt/mesh_adapt_lib/KrinoMeshAdapt.cpp @@ -15,6 +15,7 @@ #include #include #include +#include "Akri_TransitionElementEdgeMarker.hpp" namespace krino { @@ -31,6 +32,7 @@ void refine_mesh_with_params(stk::mesh::BulkData & mesh, const MeshAdaptAlgorith << " edges, " << counts[2] << " faces, " << counts[3] << " elements" << std::endl; refinement.do_uniform_refinement(algParams.numUniformRefinementLevels); + refinement.delete_parent_elements(); stk::mesh::comm_mesh_counts(mesh, counts); diff --git a/packages/krino/tools/trilinos_snapshot.sh b/packages/krino/tools/trilinos_snapshot.sh index fdc794484ce4..df0dccd62876 100755 --- a/packages/krino/tools/trilinos_snapshot.sh +++ b/packages/krino/tools/trilinos_snapshot.sh @@ -53,15 +53,19 @@ verify_clean_repo $TRILINOS verify_no_local_commits $TRILINOS_BRANCH #Pull request workflow -exe git fetch upstream -exe git pull upstream develop -exe git push +exe git fetch --all +exe git checkout master +exe git merge upstream/master +exe git push origin master +exe git checkout develop +exe git merge upstream/develop +exe git push origin develop exe git checkout $TRILINOS_BRANCH exe git pull exe git checkout $SNAPSHOT_BRANCH -exe git reset --hard $TRILINOS_BRANCH +exe git reset --hard upstream/$TRILINOS_BRANCH update_package krino exe git rm -rf packages/krino/krino_sierra packages/krino/Jamfile packages/krino/.clang-format diff --git a/packages/percept/src/adapt/Colorer.cpp b/packages/percept/src/adapt/Colorer.cpp index 4edb88d1d16a..fd6e33d908cd 100644 --- a/packages/percept/src/adapt/Colorer.cpp +++ b/packages/percept/src/adapt/Colorer.cpp @@ -14,8 +14,6 @@ namespace percept { - using namespace std; - template bool contains(STD_Set& set, Key key) { return set.find(key) != set.end(); } Colorer::Colorer(std::vector< ColorerSetType >& element_colors, std::vector ranks ) : m_element_colors(element_colors), m_entityRanks(), @@ -53,7 +51,7 @@ color(percept::PerceptMesh& eMesh, unsigned * elementType, stk::mesh::PartVector* fromParts, stk::mesh::FieldBase *element_color_field) { const unsigned MAX_COLORS=1000; - vector< ColorerNodeSetType > node_colors(MAX_COLORS+1); + std::vector< ColorerNodeSetType > node_colors(MAX_COLORS+1); ColorerElementSetType all_elements; stk::mesh::Selector selector(eMesh.get_fem_meta_data()->universal_part()); @@ -79,7 +77,7 @@ if (m_noColoring) num_max_colors = 1; - m_element_colors = vector< ColorerSetType > (num_max_colors+1); + m_element_colors = std::vector< ColorerSetType > (num_max_colors+1); for (unsigned icolor = 0; icolor < num_max_colors; icolor++) { diff --git a/packages/percept/src/adapt/DiscretizeWedge.hpp b/packages/percept/src/adapt/DiscretizeWedge.hpp index 644564fb46aa..fdd3b963e013 100644 --- a/packages/percept/src/adapt/DiscretizeWedge.hpp +++ b/packages/percept/src/adapt/DiscretizeWedge.hpp @@ -457,7 +457,7 @@ namespace percept { for (unsigned iface=3; iface < 5; ++iface) { unsigned tri_edge_marks[3] = {0}; - unsigned num_tri_edge_marks = 0; + //unsigned num_tri_edge_marks = 0; //stk::mesh::Entity tri_local_entities[3] = {stk::mesh::Entity()}; stk::mesh::Entity tri_local_entities_non_outward_normal[3] = {stk::mesh::Entity()}; for (unsigned j = 0; j < 6; j++) @@ -475,8 +475,8 @@ namespace percept { int l_edge = edge_map_rev[q_edge - DiscretizeWedge::edge_offset]; // extract Shards index of edge VERIFY_OP_ON(((0 <= l_edge) && (l_edge < nedges)), == , true, "l_edge"); tri_edge_marks[j] = edge_marks[l_edge]; - if (edge_marks[l_edge]) - ++num_tri_edge_marks; + //if (edge_marks[l_edge]) + // ++num_tri_edge_marks; } TriangulateTri tt; diff --git a/packages/percept/src/adapt/FindValidCentroid.cpp b/packages/percept/src/adapt/FindValidCentroid.cpp index aa725bfe506c..3bbea8e26780 100644 --- a/packages/percept/src/adapt/FindValidCentroid.cpp +++ b/packages/percept/src/adapt/FindValidCentroid.cpp @@ -9,7 +9,7 @@ #include -#if defined(STK_BUILT_IN_SIERRA) && !STK_PERCEPT_LITE +#if defined(STK_BUILT_FOR_SIERRA) && !STK_PERCEPT_LITE #include #endif @@ -29,7 +29,7 @@ namespace percept { { const CellTopologyData *cell_topo_data = m_eMesh.get_cell_topology(children[ii]); volumes[ii] = m_eMesh.volume(children[ii], m_eMesh.get_coordinates_field(), cell_topo_data); -#if defined(STK_BUILT_IN_SIERRA) && !STK_PERCEPT_LITE +#if defined(STK_BUILT_FOR_SIERRA) && !STK_PERCEPT_LITE if (m_use_finite_volume) { volumes[ii] = std::numeric_limits::max(); diff --git a/packages/percept/src/adapt/FixSideSets.cpp b/packages/percept/src/adapt/FixSideSets.cpp index d3d8a81b9656..7ae205f0ee32 100644 --- a/packages/percept/src/adapt/FixSideSets.cpp +++ b/packages/percept/src/adapt/FixSideSets.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #if defined( STK_HAS_MPI ) #include @@ -28,6 +29,10 @@ #include #include +#include + +#include +#include #define DEBUG_GSPR 0 #define LTRACE 0 @@ -267,9 +272,12 @@ namespace percept { if (found) { disconnect_entity(side); - for (unsigned irel=0; irel < elems_to_connect_to.size(); ++irel) - { - m_eMesh.get_bulk_data()->declare_relation(elems_to_connect_to[irel], side, ordinals_to_connect_to[irel]); + + stk::mesh::BulkData& bulk = *m_eMesh.get_bulk_data(); + + for (unsigned irel=0; irel < elems_to_connect_to.size(); ++irel) { + stk::mesh::Entity element = elems_to_connect_to[irel]; + bulk.declare_relation(element, side, ordinals_to_connect_to[irel]); } } @@ -650,6 +658,7 @@ namespace percept { ++count; } } + (void)count; } void FixSideSets::check_connect(SetOfEntities& side_set, SetOfEntities *avoid_elems) @@ -745,8 +754,7 @@ namespace percept { const std::string& append_conv_string = UniformRefinerPatternBase::getAppendConvertString(); (void)append_conv_string; - const bool debug = false; - if (debug) + if (m_debug) { print_surface_blocks_map(m_eMesh); } @@ -754,7 +762,7 @@ namespace percept { stk::mesh::PartVector pv = m_eMesh.get_fem_meta_data()->get_mesh_parts(); for (auto partp : pv) { - if (debug) std::cout << "move_sides_to_correct_surfaces: processing surface= " << partp->name() + if (m_debug) std::cout << "move_sides_to_correct_surfaces: processing surface= " << partp->name() << " topo= " << partp->topology() << " primary_entity_rank= " << partp->primary_entity_rank() << std::endl; if (partp->topology() == stk::topology::INVALID_TOPOLOGY) @@ -777,65 +785,129 @@ namespace percept { } } - void FixSideSets::move_side_to_correct_surface(stk::mesh::Part& surface, stk::mesh::Entity side, stk::mesh::Entity volume) + std::pair + FixSideSets::get_new_sideset_part_name(const std::string& surfaceName, + stk::mesh::Entity side, stk::mesh::Entity volume) { - const bool debug = false; - const std::string& append_conv_string = UniformRefinerPatternBase::getAppendConvertString(); - (void)append_conv_string; + // If the sideset has a "canonical" name as in "surface_{id}", + // Then the sideblock name will be of the form: + // * "surface_eltopo_sidetopo_id" or + // * "surface_block_id_sidetopo_id" + // If the sideset does *not* have a canonical name, then + // the sideblock name will be of the form: + // * "{sideset_name}_eltopo_sidetopo" or + // * "{sideset_name}_block_id_sidetopo" + // Generated mesh will create sidesets of the form + // * "surface_id_sidetopo - stk::topology side_topo = m_eMesh.get_bulk_data()->bucket(side).topology(); - stk::topology elem_topo = m_eMesh.get_bulk_data()->bucket(volume).topology(); + const std::string& append_conv_string = UniformRefinerPatternBase::getAppendConvertString(); + bool isConvertedPart = ( surfaceName.find(append_conv_string) != std::string::npos); - if (debug) std::cout << "side,elem topo = " << side_topo << "," << elem_topo << std::endl; + std::vector tokens; + stk::util::tokenize(surfaceName, "_", tokens); - //std::string part_name = add_parts[0]->name(); // surface_hex8_quad4_1 + if(isConvertedPart) { + return std::make_pair("", false); + } - std::vector add_parts(1, static_cast(0)); - std::vector remove_parts(1, &surface); + size_t tokenSize = tokens.size(); std::string new_part_name; - std::vector tokens; - stk::util::tokenize(surface.name(), "_", tokens); + const stk::mesh::BulkData& bulk = *m_eMesh.get_bulk_data(); - // this can happen, eg. for generated meshes - with a surface name like surface_1_quad4 - if (tokens.size() <= 3) - return; + stk::topology side_topo = bulk.bucket(side).topology(); + stk::topology elem_topo = bulk.bucket(volume).topology(); + + if (m_debug) std::cout << "side,elem topo = " << side_topo << "," << elem_topo << " surface= " << surfaceName << std::endl; + + std::string ioss_side_topo; + std::string ioss_elem_topo; + + convert_stk_topology_to_ioss_name(elem_topo, ioss_elem_topo); + convert_stk_topology_to_ioss_name(side_topo, ioss_side_topo); + + bool matching_volume_topologies = false; + if(tokenSize >= 4) { + matching_volume_topologies = stk::equal_case(tokens[1], ioss_elem_topo); + + tokens[1] = ioss_elem_topo; + tokens[2] = ioss_side_topo; + + // FIXME: substr usage - only works for single digit + const unsigned nl = (tokens[3].find(".") != std::string::npos) ? tokens[3].find(".") : tokens[3].length(); + tokens[3] = tokens[3].substr(0, nl); + + new_part_name = tokens[0]; + for (unsigned i = 1; i < 4; i++) + new_part_name += "_" + tokens[i]; + } else if(tokenSize == 3) { + const bool allDigits = tokens[1].find_first_not_of("0123456789") == std::string::npos; + + std::string parentSurfaceName; + + if (allDigits) { + // Generated mesh format + parentSurfaceName = tokens[0] + "_" + tokens[1]; + } else { + // non-canonical format + parentSurfaceName = tokens[0]; + matching_volume_topologies = stk::equal_case(tokens[1], ioss_elem_topo); + } + + stk::mesh::Part* parentSurface = m_eMesh.get_fem_meta_data()->get_part(parentSurfaceName); - if (debug) { - std::cout << "tokens = "; - for (unsigned i=0; iid()); + } } - const unsigned nl = (tokens[3].find(".") != std::string::npos) ? tokens[3].find(".") : tokens[3].length(); - // FIXME: substr usage - only works for single digit + return std::make_pair(new_part_name, matching_volume_topologies); + } - convert_stk_topology_to_ioss_name(elem_topo, tokens[1]); - convert_stk_topology_to_ioss_name(side_topo, tokens[2]); + void FixSideSets::fill_change_parts(stk::mesh::Part& surface, + stk::mesh::Entity side, stk::mesh::Entity volume, + std::vector& add_parts, std::vector& remove_parts) + { + add_parts.clear(); + remove_parts.clear(); - tokens[3] = tokens[3].substr(0, nl); - new_part_name = tokens[0]; - for (unsigned i = 1; i < 4; i++) - new_part_name += "_" + tokens[i]; + stk::topology side_topo = m_eMesh.get_bulk_data()->bucket(side).topology(); + stk::topology elem_topo = m_eMesh.get_bulk_data()->bucket(volume).topology(); - if (debug) - std::cout << "new_part_name=" << new_part_name << std::endl; + std::string new_part_name; + bool matching_volume_topologies; + std::tie(new_part_name, matching_volume_topologies) = get_new_sideset_part_name(surface.name(), side, volume); + stk::mesh::Part* new_part = m_eMesh.get_fem_meta_data()->get_part(new_part_name); - add_parts[0] = m_eMesh.get_fem_meta_data()->get_part(new_part_name); - if (!add_parts[0]) + if (nullptr == new_part) { - if (debug) std::cout << " new part name not found, skipping" << std::endl; + if (m_debug) std::cout << " new part name not found, skipping" << std::endl; //std::cout << "add_parts[0] = null, new_part_name= " << new_part_name << " elem_topo= " << elem_topo << " side_topo= " << side_topo << std::endl; //VERIFY_MSG("bad add_parts"); return; } - if (debug) + bool equivalent_parts = (new_part->id() == surface.id()) && (new_part->topology() == surface.topology()) && matching_volume_topologies; + if(equivalent_parts) { + if (m_debug) std::cout << " new surface part: " << new_part_name << " is equivalent to original: " << surface.name() << " ... not moving" << std::endl; + return; + } + + const std::string& append_conv_string = UniformRefinerPatternBase::getAppendConvertString(); + (void)append_conv_string; + + add_parts.push_back(new_part); + remove_parts.push_back(&surface); + + if (m_debug) + std::cout << "new_part_name=" << new_part_name << std::endl; + + if (m_debug) { std::cout << "moving side= " << m_eMesh.id(side) << " side_topo= " << side_topo << " attached to vol= " << m_eMesh.id(volume) - << " of topo= " << elem_topo << " from surface= " << surface.name() << " to: " << add_parts[0]->name() << " remove_parts.size= " << remove_parts.size() + << " of topo= " << elem_topo << " from surface= " << surface.name() << " (id " << surface.id() << ")" + << " to: " << add_parts[0]->name() << " (id " << add_parts[0]->id() << ")" << " remove_parts.size= " << remove_parts.size() << std::endl; std::cout << "before parts= " << m_eMesh.print_entity_parts_string(side, "\n") << std::endl; } @@ -851,15 +923,26 @@ namespace percept { } } - if (add_parts[0] == remove_parts[0]) + if (add_parts[0] == remove_parts[0]) { + if (m_debug) std::cout << "resizing: add = " << add_parts[0]->name() << " remove = " << remove_parts[0]->name() << std::endl; remove_parts.resize(0); + } + + if (m_debug && remove_parts.size()) std::cout << "found remove_parts = " << remove_parts[0]->name() << std::endl; + } + + void FixSideSets::move_side_to_correct_surface(stk::mesh::Part& surface, stk::mesh::Entity side, stk::mesh::Entity volume) + { + std::vector add_parts; + std::vector remove_parts; - if (debug && remove_parts.size()) std::cout << "found remove_parts = " << remove_parts[0]->name() << std::endl; + fill_change_parts(surface, side, volume, add_parts, remove_parts); - m_eMesh.get_bulk_data()->change_entity_parts( side, add_parts, remove_parts ); + if(add_parts.empty() && remove_parts.empty()) return; - if (debug) std::cout << "after parts= " << m_eMesh.print_entity_parts_string(side, "\n") << std::endl; + m_eMesh.get_bulk_data()->change_entity_parts( side, add_parts, remove_parts ); + if (m_debug) std::cout << "after parts= " << m_eMesh.print_entity_parts_string(side, "\n") << std::endl; } diff --git a/packages/percept/src/adapt/FixSideSets.hpp b/packages/percept/src/adapt/FixSideSets.hpp index 249a9a40d699..908b114cb8a2 100644 --- a/packages/percept/src/adapt/FixSideSets.hpp +++ b/packages/percept/src/adapt/FixSideSets.hpp @@ -42,6 +42,7 @@ namespace percept { bool m_avoidFixSideSetChecks; RefinerSelector *m_buildSideSetSelector; bool m_doProgress; + bool m_debug{false}; FixSideSets(Refiner *ref, PerceptMesh& eMesh, stk::mesh::PartVector& excludeParts, SidePartMap& side_part_map, const std::string& geomFile, bool avoidFixSideSetChecks, RefinerSelector *sel = 0, bool doProgress=false); @@ -64,6 +65,11 @@ namespace percept { void move_sides_to_correct_surfaces(); void move_side_to_correct_surface(stk::mesh::Part& surface, stk::mesh::Entity side, stk::mesh::Entity volume); + std::pair get_new_sideset_part_name(const std::string& surfaceName, stk::mesh::Entity side, stk::mesh::Entity volume); + void fill_change_parts(stk::mesh::Part& surface, + stk::mesh::Entity side, stk::mesh::Entity volume, + std::vector& add_parts, std::vector& remove_parts); + // fast reconnector void fix_side_sets_2(bool allow_not_found, SetOfEntities *avoid_elems, SetOfEntities *avoid_sides, const std::string& msg); }; diff --git a/packages/percept/src/adapt/IEdgeAdapter.cpp b/packages/percept/src/adapt/IEdgeAdapter.cpp index 8babedf50aa2..1d1d9f09f905 100644 --- a/packages/percept/src/adapt/IEdgeAdapter.cpp +++ b/packages/percept/src/adapt/IEdgeAdapter.cpp @@ -44,7 +44,6 @@ } // see how many edges are already marked - int num_marked=0; std::vector edge_marks(numSubDimNeededEntities,0); if (needed_entity_rank == m_eMesh.edge_rank()) { @@ -54,7 +53,6 @@ if (!is_empty) { edge_marks[iSubDimOrd] = 1; - ++num_marked; } } } diff --git a/packages/percept/src/adapt/IElementBasedAdapterPredicate.hpp b/packages/percept/src/adapt/IElementBasedAdapterPredicate.hpp index 112338d4e3c0..b03645bfa54b 100644 --- a/packages/percept/src/adapt/IElementBasedAdapterPredicate.hpp +++ b/packages/percept/src/adapt/IElementBasedAdapterPredicate.hpp @@ -32,7 +32,7 @@ */ // Example - struct IElementBasedAdapterPredicate { + struct IElementBasedAdapterPredicate : public std::function { PerceptMesh& m_eMesh; stk::mesh::Selector * m_eb_selector; stk::mesh::FieldBase *m_field; diff --git a/packages/percept/src/adapt/PredicateBasedElementAdapter.hpp b/packages/percept/src/adapt/PredicateBasedElementAdapter.hpp index 6b692a34965f..c8d16dcbbbec 100644 --- a/packages/percept/src/adapt/PredicateBasedElementAdapter.hpp +++ b/packages/percept/src/adapt/PredicateBasedElementAdapter.hpp @@ -25,6 +25,7 @@ * The functor @class RefinePredicate should supply an operator() that returns an entry from AdaptInstruction, * either to do nothing, refine, unrefine, or both refine & unrefine (useful for unit testing, etc.) */ + typedef std::function AdapterPredicateFunctor; /// This class (and derived classes) supports basic element-based marking, /// a flavor of quality-improved element-based marking, diff --git a/packages/percept/src/adapt/PredicateTemplateAdapter.hpp b/packages/percept/src/adapt/PredicateTemplateAdapter.hpp index d0e60e7ccdfa..4f3538f83bf1 100644 --- a/packages/percept/src/adapt/PredicateTemplateAdapter.hpp +++ b/packages/percept/src/adapt/PredicateTemplateAdapter.hpp @@ -38,6 +38,7 @@ * * Note: the steps above are now embedded in Stage_2_Mark_TE_Parents, so we only have 2 stages now. */ + typedef std::function AdapterPredicateFunctor; enum PTA_Stage { Stage_None, Stage_1_Mark_NTE, diff --git a/packages/percept/src/adapt/Refiner.cpp b/packages/percept/src/adapt/Refiner.cpp index 51d25e0823ec..92658303e61a 100644 --- a/packages/percept/src/adapt/Refiner.cpp +++ b/packages/percept/src/adapt/Refiner.cpp @@ -243,7 +243,7 @@ std::vector elems; const stk::mesh::BucketVector & buckets = m_eMesh.get_bulk_data()->buckets( rank ); - unsigned nele=0; + //unsigned nele=0; for ( stk::mesh::BucketVector::const_iterator k = buckets.begin() ; k != buckets.end() ; ++k ) { if (on_locally_owned_part(**k) && fromPartsSelector(**k) ) @@ -270,7 +270,7 @@ else { elems.push_back(element); - ++nele; + //++nele; } } } @@ -963,7 +963,7 @@ m_nodeRegistry->prolongate(m_eMesh.get_coordinates_field()); } -#if defined(STK_BUILT_IN_SIERRA) +#if defined(STK_BUILT_FOR_SIERRA) if (m_rbar_names.size()) m_nodeRegistry->add_rbars(m_rbar_names); #endif @@ -1026,7 +1026,7 @@ #endif removeOldElements(irank, ranks[irank], m_breakPattern[irank]); renameNewParts(ranks[irank], m_breakPattern[irank]); - fixSurfaceAndEdgeSetNames(ranks[irank], m_breakPattern[irank]); +// fixSurfaceAndEdgeSetNames(ranks[irank], m_breakPattern[irank]); } } @@ -1215,7 +1215,6 @@ { const stk::mesh::BucketVector & buckets = m_eMesh.get_bulk_data()->buckets( ranks_to_be_deleted[irank] ); - int npar=0; int nchild=0; for ( stk::mesh::BucketVector::const_iterator k = buckets.begin() ; k != buckets.end() ; ++k ) { @@ -1238,13 +1237,11 @@ } else { - ++npar; parents.insert(element); } } } - //std::cout << "tmp removeElements(parents) irank, size= " << ranks_to_be_deleted[irank] << " " << npar << " nchild= " << nchild << std::endl; - + (void)nchild; } mod_begin_timer(*m_eMesh.get_bulk_data(), timerDoRefine_); @@ -2413,6 +2410,8 @@ EXCEPTWATCH; stk::mesh::PartVector toParts = breakPattern->getToParts(); + bool debug = true; + //std::cout << "toParts.size()= " << toParts.size() << " typeid= " << typeid(*breakPattern).name() << std::endl; for (unsigned i_part = 0; i_part < toParts.size(); i_part++) @@ -2423,7 +2422,7 @@ std::string toPartName = toParts[i_part]->name(); if ( toPartName.find("surface_", 0) == std::string::npos) { - if (0) std::cout << "tmp fixSurfaceAndEdgeSetNames:: skipping toPartName= " << toPartName << " typeid= " << typeid(*breakPattern).name() << std::endl; + if (debug) std::cout << "tmp fixSurfaceAndEdgeSetNames:: skipping toPartName= " << toPartName << " typeid= " << typeid(*breakPattern).name() << std::endl; continue; } @@ -2431,7 +2430,7 @@ StringStringMap::iterator map_it; StringStringMap str_map = breakPattern->fixSurfaceAndEdgeSetNamesMap(); - if (0) std::cout << "tmp fixSurfaceAndEdgeSetNamesMap:: str_map.size()= " << str_map.size() + if (debug) std::cout << "tmp fixSurfaceAndEdgeSetNamesMap:: str_map.size()= " << str_map.size() //<< " " << breakPattern->getFromTopoPartName() << "__" << breakPattern->getToTopoPartName() << " typeid= " << typeid(*breakPattern).name() << std::endl; @@ -2441,13 +2440,13 @@ std::string from_str = map_it->first; std::string to_str = map_it->second; Util::replace(newToPartName, from_str, to_str); - if (0) + if (debug) std::cout << "tmp fixSurfaceAndEdgeSetNamesMap: old= " << toPartName << " new= " << newToPartName << std::endl; } *toPartName_p = newToPartName; - if (0) + if (debug) std::cout << "tmp fixSurfaceAndEdgeSetNamesMap:: P[" << m_eMesh.get_rank() << "] new part name= " << toParts[i_part]->name() << " old part name = " << toPartName << std::endl; @@ -2498,6 +2497,55 @@ return out; } + std::string Refiner::get_parent_element_topology(const std::string& surfaceName) + { + // If the sideset has a "canonical" name as in "surface_{id}", + // Then the sideblock name will be of the form: + // * "surface_eltopo_sidetopo_id" or + // * "surface_block_id_sidetopo_id" + // If the sideset does *not* have a canonical name, then + // the sideblock name will be of the form: + // * "{sideset_name}_eltopo_sidetopo" or + // * "{sideset_name}_block_id_sidetopo" + // Generated mesh will create sidesets of the form + // * "surface_id_sidetopo + + const stk::mesh::BulkData& bulk = *m_eMesh.get_bulk_data(); + const stk::mesh::MetaData& meta = bulk.mesh_meta_data(); + std::vector tokens; + stk::util::tokenize(surfaceName, "_", tokens); + + size_t tokenSize = tokens.size(); + + std::string parent_element_topology; + + if(tokenSize >= 4) { + parent_element_topology = tokens[1]; + } else if(tokenSize == 3) { + const bool allDigits = tokens[1].find_first_not_of("0123456789") == std::string::npos; + + std::string parentSurfaceName; + + if (allDigits) { + // Generated mesh format + parentSurfaceName = tokens[0] + "_" + tokens[1]; + stk::mesh::Part* parentSurface = meta.get_part(parentSurfaceName); + + if(nullptr != parentSurface) { + std::vector touchingBlocks = meta.get_blocks_touching_surface(parentSurface); + if(touchingBlocks.size() == 1) { + convert_stk_topology_to_ioss_name(touchingBlocks[0]->topology(), parent_element_topology); + } + } + } else { + // non-canonical format + parent_element_topology = tokens[1]; + } + } + + return parent_element_topology; + } + void Refiner:: renameNewParts(stk::mesh::EntityRank rank, UniformRefinerPatternBase* breakPattern) { @@ -2508,7 +2556,8 @@ bool do_strip_hashes = breakPattern->m_do_strip_hashes; bool do_strip_hashes_from = false; - stk::mesh::PartVector all_parts = m_eMesh.get_fem_meta_data()->get_parts(); + stk::mesh::MetaData* meta = m_eMesh.get_fem_meta_data(); + stk::mesh::PartVector all_parts = meta->get_parts(); if (DEBUG_RENAME_NEW_PARTS) { @@ -2553,21 +2602,53 @@ if (do_strip_hashes) newToPartName = strip_hashes(newToPartName, all_parts, breakPattern->getConvertSeparatorString(), false); if (do_strip_hashes_from) newFromPartName = strip_hashes(newFromPartName, all_parts, breakPattern->getConvertSeparatorString(), false); - m_eMesh.get_fem_meta_data()->delete_part_alias_case_insensitive(*fromParts[i_part], newToPartName); + meta->delete_part_alias_case_insensitive(*fromParts[i_part], newToPartName); - m_eMesh.get_fem_meta_data()->add_part_alias(*toParts[i_part], newToPartName); - m_eMesh.get_fem_meta_data()->add_part_alias(*fromParts[i_part], newFromPartName); + meta->add_part_alias(*toParts[i_part], newToPartName); + meta->add_part_alias(*fromParts[i_part], newFromPartName); + + if (DEBUG_RENAME_NEW_PARTS) { + std::cout << "tmp renameNewParts: to alias for " << toParts[i_part]->name() << " = " << newToPartName + << " parent topo = " << get_parent_element_topology(toParts[i_part]->name()) << std::endl; + std::cout << "tmp renameNewParts: from alias for " << fromParts[i_part]->name() << " = " << newFromPartName + << " parent topo = " << get_parent_element_topology(fromParts[i_part]->name()) << std::endl; + } + + StringStringMap::iterator map_it; + StringStringMap str_map = breakPattern->fixSurfaceAndEdgeSetNamesMap(); + if(rank == meta->side_rank()) { + if (DEBUG_RENAME_NEW_PARTS) std::cout << "tmp renameNewParts:: str_map.size()= " << str_map.size() + << " typeid= " << typeid(*breakPattern).name() + << std::endl; + + for (map_it = str_map.begin(); map_it != str_map.end(); map_it++) + { + std::string from_str = map_it->first; + std::string to_str = map_it->second; + Util::replace(newToPartName, from_str, to_str); + Util::replace(newFromPartName, from_str, to_str); + if (DEBUG_RENAME_NEW_PARTS) { + std::cout << "tmp renameNewParts: old toPartNane= " << toPartName << " new toPartName= " << newToPartName << std::endl; + std::cout << "tmp renameNewParts: old fromPartNane= " << fromPartName << " new fromPartName= " << newFromPartName << std::endl; + } + } + + // This is to prevent the collapse of refined subset parts onto the same name + newToPartName += ("." + get_parent_element_topology(toParts[i_part]->name())); + newFromPartName += ("." + get_parent_element_topology(fromParts[i_part]->name())); + } stk::io::set_alternate_part_name(*toParts[i_part], newToPartName); stk::io::set_alternate_part_name(*fromParts[i_part], newFromPartName); - } - if (DEBUG_RENAME_NEW_PARTS) { - std::cout << "tmp after: fromPartName= " << fromParts[i_part]->name() << " toPartName= " << toParts[i_part]->name() << std::endl; - std::cout << "tmp P[" << m_eMesh.get_rank() << "] fromPartName: " << fromPartName << " part= " << toParts[i_part]->name() - << " old part name = " << fromPart->name() - << std::endl; - } + if (DEBUG_RENAME_NEW_PARTS) { + std::cout << "tmp after: fromPartName= " << fromParts[i_part]->name() << " (" << newFromPartName << ") " + << " toPartName= " << toParts[i_part]->name() << " (" << newToPartName << ") " << std::endl; + std::cout << "tmp P[" << m_eMesh.get_rank() << "] fromPartName: " << fromPartName << " part= " << toParts[i_part]->name() + << " old part name = " << fromPart->name() + << std::endl; + } + } } } } diff --git a/packages/percept/src/adapt/Refiner.hpp b/packages/percept/src/adapt/Refiner.hpp index d148739cc646..3d1fdc64cb38 100644 --- a/packages/percept/src/adapt/Refiner.hpp +++ b/packages/percept/src/adapt/Refiner.hpp @@ -168,7 +168,7 @@ void setRefinerSelector(RefinerSelector *sel) { m_refinerSelector = sel; } RefinerSelector *getRefinerSelector() { return m_refinerSelector; } -#if defined(STK_BUILT_IN_SIERRA) +#if defined(STK_BUILT_FOR_SIERRA) void set_rbar_special_treatment(BlockNamesType& rbar_names) { m_rbar_names = rbar_names; } #endif @@ -466,6 +466,8 @@ void add_children_to_parts(); + std::string get_parent_element_topology(const std::string& surfaceName); + protected: percept::PerceptMesh& m_eMesh; @@ -496,7 +498,7 @@ SidePartMap m_side_part_map; bool m_fixAllBlockBoundaries; -#if defined(STK_BUILT_IN_SIERRA) +#if defined(STK_BUILT_FOR_SIERRA) BlockNamesType m_rbar_names; #endif bool m_needsRemesh; diff --git a/packages/percept/src/adapt/RefinerPattern_Tet4_Tet4_HangingNode.hpp b/packages/percept/src/adapt/RefinerPattern_Tet4_Tet4_HangingNode.hpp index 247d6c41814f..c743954e2121 100644 --- a/packages/percept/src/adapt/RefinerPattern_Tet4_Tet4_HangingNode.hpp +++ b/packages/percept/src/adapt/RefinerPattern_Tet4_Tet4_HangingNode.hpp @@ -106,15 +106,6 @@ vector::iterator& ft_element_pool, stk::mesh::FieldBase *proc_rank_field=0) { - unsigned num_edges_marked=0; - for (int iedge = 0; iedge < 6; iedge++) - { - unsigned num_nodes_on_edge = new_sub_entity_nodes[m_eMesh.edge_rank()][iedge].size(); - if (num_nodes_on_edge) - { - ++num_edges_marked; - } - } m_transition_breaker->createNewElements(eMesh, nodeRegistry, element, new_sub_entity_nodes, element_pool, ft_element_pool, proc_rank_field); } diff --git a/packages/percept/src/adapt/RefinerPattern_Wedge6_Het_N.hpp b/packages/percept/src/adapt/RefinerPattern_Wedge6_Het_N.hpp index 2f36cf1f0912..65804e12fc4b 100644 --- a/packages/percept/src/adapt/RefinerPattern_Wedge6_Het_N.hpp +++ b/packages/percept/src/adapt/RefinerPattern_Wedge6_Het_N.hpp @@ -147,7 +147,7 @@ namespace percept { } } - unsigned num_faces_marked = 0; + //unsigned num_faces_marked = 0; unsigned face_marks[DiscretizeWedge::nfaces] = {0,0,0,0,0}; stk::mesh::EntityRank rank = m_eMesh.face_rank(); @@ -158,7 +158,7 @@ namespace percept { if (new_sub_entity_nodes[rank][iface].size()) { face_marks[iface] = 1; - ++num_faces_marked; + //++num_faces_marked; } } } @@ -1001,17 +1001,17 @@ namespace percept { ++num_edges_marked; } } - unsigned num_faces_marked = 0; - stk::mesh::EntityRank rank = m_eMesh.face_rank(); - - for (int iface = 0; iface < 6; iface++) - { - if ( new_sub_entity_nodes[rank].size() ) - { - if (new_sub_entity_nodes[rank][iface].size()) - ++num_faces_marked; - } - } + //unsigned num_faces_marked = 0; + //stk::mesh::EntityRank rank = m_eMesh.face_rank(); + + //for (int iface = 0; iface < 6; iface++) + // { + // if ( new_sub_entity_nodes[rank].size() ) + // { + // if (new_sub_entity_nodes[rank][iface].size()) + // ++num_faces_marked; + // } + // } if ( num_edges_marked == 9 ) //if ( num_edges_marked == 9 && num_faces_marked == 5) diff --git a/packages/percept/src/adapt/RefinerUnrefine.cpp b/packages/percept/src/adapt/RefinerUnrefine.cpp index 24370909a324..c3d53c24082d 100644 --- a/packages/percept/src/adapt/RefinerUnrefine.cpp +++ b/packages/percept/src/adapt/RefinerUnrefine.cpp @@ -46,7 +46,6 @@ #include namespace percept { - using namespace std; using namespace percept; diff --git a/packages/percept/src/adapt/RefinerUtil.cpp b/packages/percept/src/adapt/RefinerUtil.cpp index 47c6e2a951a9..15e3c80d0825 100644 --- a/packages/percept/src/adapt/RefinerUtil.cpp +++ b/packages/percept/src/adapt/RefinerUtil.cpp @@ -25,7 +25,6 @@ namespace percept { - using namespace std; using namespace percept; #define EXTRA_PRINT_UR_GETBLOCKS 0 diff --git a/packages/percept/src/adapt/SDCEntityType.hpp b/packages/percept/src/adapt/SDCEntityType.hpp index ec561b5abaed..2b6c7ea0ac68 100644 --- a/packages/percept/src/adapt/SDCEntityType.hpp +++ b/packages/percept/src/adapt/SDCEntityType.hpp @@ -160,7 +160,7 @@ template<> - struct my_fast_hash + struct my_fast_hash : public std::function< std::size_t(MySubDimCell)> { typedef MySubDimCell _Tp ; @@ -174,7 +174,7 @@ template<> - struct my_fast_hash + struct my_fast_hash : public std::function< std::size_t(MySubDimCell)> { typedef MySubDimCell _Tp ; @@ -188,7 +188,8 @@ template<> - struct my_fast_equal_to + struct my_fast_equal_to : public std::function, + MySubDimCell)> { typedef MySubDimCell _Tp ; inline bool @@ -208,7 +209,8 @@ }; template<> - struct my_fast_equal_to + struct my_fast_equal_to : public std::function< bool(MySubDimCell, + MySubDimCell)> { typedef MySubDimCell _Tp ; inline bool diff --git a/packages/percept/src/adapt/SubDimCell.hpp b/packages/percept/src/adapt/SubDimCell.hpp index 670a9d69d41b..6b3026e394ed 100644 --- a/packages/percept/src/adapt/SubDimCell.hpp +++ b/packages/percept/src/adapt/SubDimCell.hpp @@ -157,7 +157,7 @@ #define GET(x,i) x[i] template - struct my_hash + struct my_hash : public std::function)> { typedef SubDimCell _Tp ; @@ -184,7 +184,7 @@ }; template - struct my_fast_hash + struct my_fast_hash : public std::function)> { typedef SubDimCell _Tp ; @@ -197,7 +197,8 @@ }; template - struct my_equal_to + struct my_equal_to : public std::function, + SubDimCell)> { typedef SubDimCell _Tp ; bool @@ -218,7 +219,8 @@ }; template - struct my_fast_equal_to + struct my_fast_equal_to : public std::function, + SubDimCell)> { typedef SubDimCell _Tp ; inline bool diff --git a/packages/percept/src/adapt/TransitionElementAdapter.hpp b/packages/percept/src/adapt/TransitionElementAdapter.hpp index 30be17df1937..f20243787285 100644 --- a/packages/percept/src/adapt/TransitionElementAdapter.hpp +++ b/packages/percept/src/adapt/TransitionElementAdapter.hpp @@ -66,6 +66,12 @@ public: typedef HangingNodeAdapter Base; + ~TransitionElementAdapter() override { + if( m_adaptedMeshVerifier ) { + delete m_adaptedMeshVerifier; + } + } + TransitionElementAdapter(RefinePredicate& predicate_refine, percept::PerceptMesh& eMesh, UniformRefinerPatternBase & bp, diff --git a/packages/percept/src/adapt/UniformRefinerPattern.cpp b/packages/percept/src/adapt/UniformRefinerPattern.cpp index c6242d30d551..238092a2132d 100644 --- a/packages/percept/src/adapt/UniformRefinerPattern.cpp +++ b/packages/percept/src/adapt/UniformRefinerPattern.cpp @@ -715,7 +715,7 @@ if (DEBUG_SET_NEEDED_PARTS) std::cout << "tmp setNeededParts:: declare_part name= " << newPartName << " with topo= " << getToTopoPartName() << std::endl; stk::mesh::set_topology(*block_to, stk::mesh::get_topology(shards::CellTopology(getToTopology()), eMesh.get_fem_meta_data()->spatial_dimension())); - + eMesh.get_fem_meta_data()->set_part_id(*block_to, part->id()); if (!stk::io::is_part_io_part(block_to)) { stk::io::put_io_part_attribute(*block_to); } diff --git a/packages/percept/src/adapt/UniformRefinerPattern_Quad4_Tri3_2.hpp b/packages/percept/src/adapt/UniformRefinerPattern_Quad4_Tri3_2.hpp index 2e0d213516ea..80c14ca073d7 100644 --- a/packages/percept/src/adapt/UniformRefinerPattern_Quad4_Tri3_2.hpp +++ b/packages/percept/src/adapt/UniformRefinerPattern_Quad4_Tri3_2.hpp @@ -54,6 +54,8 @@ virtual StringStringMap fixSurfaceAndEdgeSetNamesMap() { StringStringMap str_map; + str_map["wedge6"] = "tet4"; + str_map["pyramid5"] = "tet4"; str_map["hex8"] = "tet4"; str_map["quad4"] = "tri3"; return str_map; diff --git a/packages/percept/src/adapt/UniformRefinerPattern_Tet4_Tet4_8_sierra.hpp b/packages/percept/src/adapt/UniformRefinerPattern_Tet4_Tet4_8_sierra.hpp index 476fa53e5b38..be22fefa49b2 100644 --- a/packages/percept/src/adapt/UniformRefinerPattern_Tet4_Tet4_8_sierra.hpp +++ b/packages/percept/src/adapt/UniformRefinerPattern_Tet4_Tet4_8_sierra.hpp @@ -178,11 +178,9 @@ add_parts = m_toParts; unsigned edge_marks[6] = {0,0,0,0,0,0}; - unsigned num_edges_marked=0; for (int iedge = 0; iedge < 6; iedge++) { edge_marks[iedge] = 1; - ++num_edges_marked; } stk::mesh::Entity elem_nodes_local[4] = {stk::mesh::Entity()}; diff --git a/packages/percept/src/adapt/main/MeshAdapt.cpp b/packages/percept/src/adapt/main/MeshAdapt.cpp index 8d259b658730..059c4063fcd7 100644 --- a/packages/percept/src/adapt/main/MeshAdapt.cpp +++ b/packages/percept/src/adapt/main/MeshAdapt.cpp @@ -22,7 +22,7 @@ #include -#if defined(STK_BUILT_IN_SIERRA) +#if defined(STK_BUILT_FOR_SIERRA) #include #endif @@ -848,7 +848,7 @@ namespace percept { "\n\tEnsures output mesh has the same block ids and names as the input mesh." "\n\tThis only makes sense for refine (not enrich or convert).", true); -#if defined(STK_BUILT_IN_SIERRA) +#if defined(STK_BUILT_FOR_SIERRA) // Salinas set_option(ps, "rbar_blocks" , &rbar_blocks , PARAMETER, "blocks to treat as special Salinas RBARs.", "", "\n\tRBARs will connect new nodes between two surfaces." @@ -1554,7 +1554,7 @@ namespace percept { BlockNamesType block_names(percept::EntityRankEnd+1u); BlockNamesType block_names_rbar(percept::EntityRankEnd+1u); -#if defined(STK_BUILT_IN_SIERRA) +#if defined(STK_BUILT_FOR_SIERRA) if (rbar_blocks.length()) { BlockNamesType rbar_names(percept::EntityRankEnd+1u); @@ -1866,7 +1866,7 @@ namespace percept { refiner->setFixAllBlockBoundaries(fix_all_block_boundaries); refiner->setDoProgressMeter(progress_meter == 1 && 0 == p_rank); -#if defined(STK_BUILT_IN_SIERRA) +#if defined(STK_BUILT_FOR_SIERRA) if (rbar_blocks.length()) { BlockNamesType rbar_names(percept::EntityRankEnd+1u); @@ -2014,7 +2014,7 @@ namespace percept { bool MeshAdapt::get_version(std::string* v) { if (v) *v = version_prefix+version; -#if defined(STK_BUILT_IN_SIERRA) +#if defined(STK_BUILT_FOR_SIERRA) return true; #else return false; @@ -2023,7 +2023,7 @@ namespace percept { void MeshAdapt::log_usage( bool status ) { -#if defined(STK_BUILT_IN_SIERRA) +#if defined(STK_BUILT_FOR_SIERRA) const bool disable_audit = !sierra::Env::get_param("noaudit").empty() || std::getenv("SIERRA_USAGE_METRICS_OFF") != NULL; size_t hwm_max = 0, hwm_min = 0, hwm_avg = 0; @@ -2098,7 +2098,6 @@ namespace percept { // std::string output_mesh_save = output_mesh; eMeshP.reset(new percept::PerceptMesh); - eMeshP->use_simple_fields(); if (output_active_elements_only) eMeshP->output_active_children_only(true); diff --git a/packages/percept/src/adapt/main/MeshAdaptMemberVarInit.hpp b/packages/percept/src/adapt/main/MeshAdaptMemberVarInit.hpp index 983336853ad8..48053bfda037 100644 --- a/packages/percept/src/adapt/main/MeshAdaptMemberVarInit.hpp +++ b/packages/percept/src/adapt/main/MeshAdaptMemberVarInit.hpp @@ -31,14 +31,14 @@ std::string next_adapted_mesh = ""; std::string memory_logfile_name = ""; // for Salinas -#if defined(STK_BUILT_IN_SIERRA) +#if defined(STK_BUILT_FOR_SIERRA) std::string rbar_blocks = ""; #endif // for Salinas and other codes //std::string ignore_blocks = ""; // just use block_name_inc to exclude.... -#if defined(STK_BUILT_IN_SIERRA) +#if defined(STK_BUILT_FOR_SIERRA) std::string version_prefix = "Sierra_"; #else std::string version_prefix = "NonSierra_"; diff --git a/packages/percept/src/adapt/markers/Marker.cpp b/packages/percept/src/adapt/markers/Marker.cpp index 5b0285f066aa..f06e3232c2ec 100644 --- a/packages/percept/src/adapt/markers/Marker.cpp +++ b/packages/percept/src/adapt/markers/Marker.cpp @@ -94,7 +94,7 @@ void Marker::setSelector(stk::mesh::Selector *sel) stk::mesh::Selector *Marker::getSelector() { return m_globalSelector; } -struct CompareErrIndRefFieldVec { + struct CompareErrIndRefFieldVec : public std::function { bool operator()( ErrIndInfoTuple a, ErrIndInfoTuple b) { return std::get<0>(a) < std::get<0>(b); diff --git a/packages/percept/src/percept/GeometryVerifier.cpp b/packages/percept/src/percept/GeometryVerifier.cpp index c5100f8bec1c..768a50cb4587 100644 --- a/packages/percept/src/percept/GeometryVerifier.cpp +++ b/packages/percept/src/percept/GeometryVerifier.cpp @@ -37,7 +37,7 @@ #include "GeometryVerifier.hpp" #include -#if defined(STK_BUILT_IN_SIERRA) +#if defined(STK_BUILT_FOR_SIERRA) #include #endif @@ -437,7 +437,7 @@ using namespace Intrepid; jacobian_det(iCell, 0) = Jac; //std::cout << "Jac= " << Jac << " vol= " << volume(iCell) << std::endl; } -#if defined(STK_BUILT_IN_SIERRA) +#if defined(STK_BUILT_FOR_SIERRA) if (m_use_finite_volume) { FiniteVolumeMesh3D fvm(*eMesh.get_bulk_data()); diff --git a/packages/percept/src/percept/Percept.hpp b/packages/percept/src/percept/Percept.hpp index 74799994dc07..d5de985a7c58 100644 --- a/packages/percept/src/percept/Percept.hpp +++ b/packages/percept/src/percept/Percept.hpp @@ -64,12 +64,12 @@ //------------------------------------------------------------------------------------------------------------------------ //------------------------------------------------------------------------------------------------------------------------ -#if defined(STK_BUILT_IN_SIERRA) && !defined(STK_PERCEPT_HAS_GEOMETRY) +#if defined(STK_BUILT_FOR_SIERRA) && !defined(STK_PERCEPT_HAS_GEOMETRY) #define STK_PERCEPT_HAS_GEOMETRY #define STK_PERCEPT_USE_INTREPID #endif -#if !defined(STK_BUILT_IN_SIERRA) && defined(STK_PERCEPT_LITE) && STK_PERCEPT_LITE == 0 +#if !defined(STK_BUILT_FOR_SIERRA) && defined(STK_PERCEPT_LITE) && STK_PERCEPT_LITE == 0 # if !defined(STK_PERCEPT_HAS_GEOMETRY) # define STK_PERCEPT_HAS_GEOMETRY # endif diff --git a/packages/percept/src/percept/PerceptMesh.cpp b/packages/percept/src/percept/PerceptMesh.cpp index e59f0e782c60..5c04dfe8328d 100644 --- a/packages/percept/src/percept/PerceptMesh.cpp +++ b/packages/percept/src/percept/PerceptMesh.cpp @@ -78,6 +78,7 @@ #include #include +#include #include #include @@ -155,7 +156,6 @@ ,m_unprojected_coordinates(0) ,m_avoid_add_all_mesh_fields_as_input_fields(false) ,m_markNone(false) - ,m_useSimpleFields(false) { init( m_comm); s_static_singleton_instance = this; @@ -196,9 +196,6 @@ stk::mesh::MeshBuilder builder(m_comm); m_bulkData = builder.create(); m_metaData = std::shared_ptr(&m_bulkData->mesh_meta_data(),[](auto ptrWeWontDelete){}); - if (m_useSimpleFields) { - m_metaData->use_simple_fields(); - } m_metaData->initialize(m_spatialDim, entity_rank_names); const unsigned p_rank = stk::parallel_machine_rank( m_comm ); @@ -948,7 +945,7 @@ { stk::mesh::Entity const *elem_nodes = get_bulk_data()->begin_nodes(element); stk::mesh::Entity const *side_nodes = get_bulk_data()->begin_nodes(side); - return get_bulk_data()->find_permutation(topology(element), elem_nodes, topology(side), side_nodes, side_ord); + return stk::mesh::find_permutation(*get_bulk_data(), topology(element), elem_nodes, topology(side), side_nodes, side_ord); } bool PerceptMesh::is_perm_bad(stk::mesh::Entity element, stk::mesh::Entity side, unsigned side_ord, stk::mesh::Permutation& perm) @@ -1016,7 +1013,7 @@ stk::topology side_topo = topology(element).side_topology(side_ord); if (side_topo != side_topo_in) continue; - stk::mesh::Permutation perm = get_bulk_data()->find_permutation(topology(element), elem_nodes, side_topo, side_nodes, side_ord); + stk::mesh::Permutation perm = stk::mesh::find_permutation(*get_bulk_data(), topology(element), elem_nodes, side_topo, side_nodes, side_ord); bool sameOwner = this->owner_rank(element) == this->get_rank(); bool isPos = perm < side_topo.num_positive_permutations(); @@ -1814,7 +1811,6 @@ ,m_unprojected_coordinates(0) ,m_avoid_add_all_mesh_fields_as_input_fields(false) ,m_markNone(false) - ,m_useSimpleFields((metaData) ? metaData->is_using_simple_fields() : false) { //if (!bulkData) // throw std::runtime_error("PerceptMesh::PerceptMesh: must pass in non-null bulkData"); @@ -1828,13 +1824,6 @@ void PerceptMesh::use_simple_fields() { - m_useSimpleFields = true; - if (m_iossMeshData) { - m_iossMeshData->use_simple_fields(); - } - if (m_metaData) { - m_metaData->use_simple_fields(); - } } void PerceptMesh::set_bulk_data(stk::mesh::BulkData *bulkData) @@ -1882,9 +1871,6 @@ entity_rank_names.push_back("FAMILY_TREE"); #endif m_iossMeshData->set_rank_name_vector(entity_rank_names); - if (m_useSimpleFields) { - m_iossMeshData->use_simple_fields(); - } } void PerceptMesh::destroy() @@ -1898,6 +1884,13 @@ m_iossMeshDataOut = Teuchos::null; m_bulkData.reset(); m_metaData.reset(); + +#if !STK_PERCEPT_LITE + if(m_searcher) { + delete m_searcher; + m_searcher = NULL; + } +#endif } PerceptMesh::~PerceptMesh() @@ -3928,75 +3921,143 @@ stk::mesh::Entity entity_1 = bucket_1[iEntity]; stk::mesh::Entity entity_2 = bucket_2[iEntity]; - unsigned loc_stride_1 = 0; - unsigned loc_stride_2 = 0; - double * fdata_1 = eMesh1.field_data( field_1 , entity_1, &loc_stride_1); - double * fdata_2 = eMesh2.field_data( field_2 , entity_2, &loc_stride_2); - - if ((fdata_1 == 0) != (fdata_2 == 0) || (loc_stride_1 != loc_stride_2)) + if (field_1->type_is() && field_2->type_is()) { - msg += "| (fdata_1 == 0) != (fdata_2 == 0)) |\n"; - diff = true; - } + unsigned loc_stride_1 = 0; + unsigned loc_stride_2 = 0; + double * fdata_1 = eMesh1.field_data( field_1 , entity_1, &loc_stride_1); + double * fdata_2 = eMesh2.field_data( field_2 , entity_2, &loc_stride_2); - if (fdata_1) - { - bool is_same=true; - double tol = 1.e-5; - for (unsigned istride = 0; istride < loc_stride_1; istride++) + if ((fdata_1 == 0) != (fdata_2 == 0) || (loc_stride_1 != loc_stride_2)) { - double fd1 = fdata_1[istride]; - double fd2 = fdata_2[istride]; - if (!Util::approx_equal_relative(fd1, fd2, tol)) - { - is_same=false; - break; - } + msg += "| (fdata_1 == 0) != (fdata_2 == 0)) |\n"; + diff = true; } - if (!is_same) + if (fdata_1) { - if (!printed_header) + bool is_same=true; + double tol = 1.e-5; + for (unsigned istride = 0; istride < loc_stride_1; istride++) { - for (unsigned jfld = 0; jfld < fields_1.size(); jfld++) + double fd1 = fdata_1[istride]; + double fd2 = fdata_2[istride]; + if (!Util::approx_equal_relative(fd1, fd2, tol)) { - stk::mesh::FieldBase *field_0_1 = fields_1[jfld]; - if (1) std::cout << "P[" << p_rank << "] info> Field1[" << jfld << "]= " << field_0_1->name() << std::endl; + is_same=false; + break; } - for (unsigned jfld = 0; jfld < fields_2.size(); jfld++) + } + + if (!is_same) + { + if (!printed_header) { - stk::mesh::FieldBase *field_0_2 = fields_2[jfld]; - if (1) std::cout << "P[" << p_rank << "] info> Field2[" << jfld << "]= " << field_0_2->name() << std::endl; + for (unsigned jfld = 0; jfld < fields_1.size(); jfld++) + { + stk::mesh::FieldBase *field_0_1 = fields_1[jfld]; + if (1) std::cout << "P[" << p_rank << "] info> Field1[" << jfld << "]= " << field_0_1->name() << std::endl; + } + for (unsigned jfld = 0; jfld < fields_2.size(); jfld++) + { + stk::mesh::FieldBase *field_0_2 = fields_2[jfld]; + if (1) std::cout << "P[" << p_rank << "] info> Field2[" << jfld << "]= " << field_0_2->name() << std::endl; + } + + msg += std::string("\n| field data not equal field_1= ") +field_1->name()+" field_2= "+field_2->name()+" |"; + printed_header = true; } - - msg += std::string("\n| field data not equal field_1= ") +field_1->name()+" field_2= "+field_2->name()+" |"; - printed_header = true; + msg += "\n|{"; + for (unsigned istride = 0; istride < loc_stride_1; istride++) + { + double fd1 = fdata_1[istride]; + double fd2 = fdata_2[istride]; + // msg += "\n| "+toString(fd1).substr(0,print_field_width)+" - "+toString(fd2).substr(0,print_field_width)+" = " + // +toString(fd1-fd2).substr(0,print_field_width)+ + // " [ "+toString(100.0*(fd1-fd2)/(std::abs(fd1)+std::abs(fd2)+1.e-20)).substr(0,print_percent_width)+" % ] |"; + //std::ostringstream ostr; + // ostr << "\n| " << std::setw(print_field_width) << fd1 << " - " << fd2 << " = " + // << (fd1-fd2) + // << std::setw(print_percent_width) << " [ " << (100.0*(fd1-fd2)/(std::abs(fd1)+std::abs(fd2)+1.e-20)) << " % ] |"; + //msg += ostr.str(); + char buf[1024]; + sprintf(buf, ", | %12.3g - %12.3g = %12.3g [ %10.3g %% ] |", fd1, fd2, (fd1-fd2), (100.0*(fd1-fd2)/(std::abs(fd1)+std::abs(fd2)+1.e-20))); + // << (fd1-fd2) + // << std::setw(print_percent_width) << " [ " << (100.0*(fd1-fd2)/(std::abs(fd1)+std::abs(fd2)+1.e-20)) << " % ] |"; + msg += buf; + diff = true; + local_local_diff = true; + max_diff = std::max(max_diff, std::abs(fd1-fd2)); + min_diff = std::min(min_diff, std::abs(fd1-fd2)); + } + msg += "}|"; } - msg += "\n|{"; + } + } + else if (field_1->type_is() && field_2->type_is()) + { + const unsigned loc_stride_1 = (field_1 != nullptr) ? stk::mesh::field_scalars_per_entity(*field_1, entity_1) + : 0; + const unsigned loc_stride_2 = (field_2 != nullptr) ? stk::mesh::field_scalars_per_entity(*field_2, entity_2) + : 0; + int * fdata_1 = static_cast(stk::mesh::field_data(*field_1, entity_1)); + int * fdata_2 = static_cast(stk::mesh::field_data(*field_2, entity_2)); + + if ((fdata_1 == 0) != (fdata_2 == 0) || (loc_stride_1 != loc_stride_2)) + { + msg += "| (fdata_1 == 0) != (fdata_2 == 0)) |\n"; + diff = true; + } + + if (fdata_1) + { + bool is_same=true; for (unsigned istride = 0; istride < loc_stride_1; istride++) { - double fd1 = fdata_1[istride]; - double fd2 = fdata_2[istride]; - // msg += "\n| "+toString(fd1).substr(0,print_field_width)+" - "+toString(fd2).substr(0,print_field_width)+" = " - // +toString(fd1-fd2).substr(0,print_field_width)+ - // " [ "+toString(100.0*(fd1-fd2)/(std::abs(fd1)+std::abs(fd2)+1.e-20)).substr(0,print_percent_width)+" % ] |"; - //std::ostringstream ostr; - // ostr << "\n| " << std::setw(print_field_width) << fd1 << " - " << fd2 << " = " - // << (fd1-fd2) - // << std::setw(print_percent_width) << " [ " << (100.0*(fd1-fd2)/(std::abs(fd1)+std::abs(fd2)+1.e-20)) << " % ] |"; - //msg += ostr.str(); - char buf[1024]; - sprintf(buf, ", | %12.3g - %12.3g = %12.3g [ %10.3g %% ] |", fd1, fd2, (fd1-fd2), (100.0*(fd1-fd2)/(std::abs(fd1)+std::abs(fd2)+1.e-20))); - // << (fd1-fd2) - // << std::setw(print_percent_width) << " [ " << (100.0*(fd1-fd2)/(std::abs(fd1)+std::abs(fd2)+1.e-20)) << " % ] |"; - msg += buf; - diff = true; - local_local_diff = true; - max_diff = std::max(max_diff, std::abs(fd1-fd2)); - min_diff = std::min(min_diff, std::abs(fd1-fd2)); + int fd1 = fdata_1[istride]; + int fd2 = fdata_2[istride]; + if (fd1 != fd2) + { + is_same=false; + break; + } + } + + if (!is_same) + { + if (!printed_header) + { + for (unsigned jfld = 0; jfld < fields_1.size(); jfld++) + { + stk::mesh::FieldBase *field_0_1 = fields_1[jfld]; + std::cout << "P[" << p_rank << "] info> Field1[" << jfld << "]= " << field_0_1->name() << std::endl; + } + for (unsigned jfld = 0; jfld < fields_2.size(); jfld++) + { + stk::mesh::FieldBase *field_0_2 = fields_2[jfld]; + std::cout << "P[" << p_rank << "] info> Field2[" << jfld << "]= " << field_0_2->name() << std::endl; + } + + msg += std::string("\n| field data not equal field_1= ") +field_1->name()+" field_2= "+field_2->name()+" |"; + printed_header = true; + } + msg += "\n|{"; + for (unsigned istride = 0; istride < loc_stride_1; istride++) + { + int fd1 = fdata_1[istride]; + int fd2 = fdata_2[istride]; + char buf[1024]; + sprintf(buf, ", | %i - %i = %i [ %10.3g %% ] |", fd1, fd2, (fd1-fd2), (100.0*(fd1-fd2)/(std::abs(fd1)+std::abs(fd2)+1.e-20))); + msg += buf; + diff = true; + local_local_diff = true; + max_diff = std::max(max_diff, static_cast(std::abs(fd1-fd2))); + min_diff = std::min(min_diff, static_cast(std::abs(fd1-fd2))); + } + msg += "}|"; } - msg += "}|"; } + } if (!print_all_field_diffs && local_local_diff) break; @@ -5873,15 +5934,7 @@ FieldType * find_field_possible_array_tag(const stk::mesh::MetaData & meta, const stk::mesh::EntityRank rank, const std::string & field_name) { - stk::mesh::FieldBase * result = meta.get_field(rank, field_name); - - // Note: This is dangerous, but apparently Percept has been getting away with - // it for years without problems. Once all apps have been migrated to the - // new simple_fields workflow, this code can be replaced with a - // generic get_field() call without a reinterpret cast because there - // won't be any ambiguity about the absence/presence/type of extra Field - // template parameters. - return reinterpret_cast(result); + return meta.get_field(rank, field_name); } void PerceptMesh::register_and_set_refine_fields() diff --git a/packages/percept/src/percept/PerceptMesh.hpp b/packages/percept/src/percept/PerceptMesh.hpp index 72ee9657cb55..10293be7267d 100644 --- a/packages/percept/src/percept/PerceptMesh.hpp +++ b/packages/percept/src/percept/PerceptMesh.hpp @@ -1169,9 +1169,8 @@ bool m_avoid_add_all_mesh_fields_as_input_fields; public: bool m_markNone; - private: - bool m_useSimpleFields; + private: void checkStateSpec(const std::string& function, bool cond1=true, bool cond2=true, bool cond3=true); void checkState(const std::string& function) { diff --git a/packages/percept/src/percept/PerceptMeshReadWrite.hpp b/packages/percept/src/percept/PerceptMeshReadWrite.hpp index 741ce73af9c6..68f8c746ed78 100644 --- a/packages/percept/src/percept/PerceptMeshReadWrite.hpp +++ b/packages/percept/src/percept/PerceptMeshReadWrite.hpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include diff --git a/packages/percept/src/percept/eigen_verify/EigenVerify.cpp b/packages/percept/src/percept/eigen_verify/EigenVerify.cpp index e8f4a89bad38..875c060608ca 100644 --- a/packages/percept/src/percept/eigen_verify/EigenVerify.cpp +++ b/packages/percept/src/percept/eigen_verify/EigenVerify.cpp @@ -39,7 +39,6 @@ void EigenVerify::process_options() void EigenVerify::create_mesh_data(stk::io::StkMeshIoBroker * mesh_data, const std::string &filename) { - mesh_data->use_simple_fields(); mesh_data->property_add(Ioss::Property("FIELD_SUFFIX_SEPARATOR", "")); mesh_data->add_mesh_database(filename, "exodus", stk::io::READ_MESH); mesh_data->create_input_mesh(); diff --git a/packages/percept/src/percept/fixtures/BeamFixture.cpp b/packages/percept/src/percept/fixtures/BeamFixture.cpp index 834c4fbbf5c9..b484a41b5d25 100644 --- a/packages/percept/src/percept/fixtures/BeamFixture.cpp +++ b/packages/percept/src/percept/fixtures/BeamFixture.cpp @@ -40,7 +40,6 @@ , m_metaData(m_bulkData.mesh_meta_data()) , m_block_beam( m_metaData.declare_part_with_topology( "block_2", stk::topology::BEAM_2 ) ) { - m_metaData.use_simple_fields(); m_coordinates_field = &m_metaData.declare_field( stk::topology::NODE_RANK, "coordinates" ); m_centroid_field = &m_metaData.declare_field( stk::topology::ELEMENT_RANK, "centroid" ); m_temperature_field = &m_metaData.declare_field( stk::topology::NODE_RANK, "temperature" ); diff --git a/packages/percept/src/percept/fixtures/HeterogeneousFixture.cpp b/packages/percept/src/percept/fixtures/HeterogeneousFixture.cpp index 6d4339ce1691..aaee99617e6d 100644 --- a/packages/percept/src/percept/fixtures/HeterogeneousFixture.cpp +++ b/packages/percept/src/percept/fixtures/HeterogeneousFixture.cpp @@ -67,7 +67,6 @@ , m_sideset_quad(0), m_sideset_quad_subset(0) , m_sideset_tri(0), m_sideset_tri_subset(0) { - m_metaData.use_simple_fields(); m_coordinates_field = &m_metaData.declare_field( stk::topology::NODE_RANK, "coordinates" ); m_centroid_field = &m_metaData.declare_field( stk::topology::ELEMENT_RANK, "centroid" ); m_temperature_field = &m_metaData.declare_field( stk::topology::NODE_RANK, "temperature" ); diff --git a/packages/percept/src/percept/fixtures/PyramidFixture.cpp b/packages/percept/src/percept/fixtures/PyramidFixture.cpp index 2b827222ad7b..ad916c64ea05 100644 --- a/packages/percept/src/percept/fixtures/PyramidFixture.cpp +++ b/packages/percept/src/percept/fixtures/PyramidFixture.cpp @@ -60,7 +60,6 @@ , m_sideset_quad(0), m_sideset_quad_subset(0) , m_sideset_tri(0), m_sideset_tri_subset(0) { - m_metaData.use_simple_fields(); m_coordinates_field = &m_metaData.declare_field( stk::topology::NODE_RANK, "coordinates" ); m_centroid_field = &m_metaData.declare_field( stk::topology::ELEMENT_RANK, "centroid" ); m_temperature_field = &m_metaData.declare_field( stk::topology::NODE_RANK, "temperature" ); diff --git a/packages/percept/src/percept/fixtures/QuadFixture.hpp b/packages/percept/src/percept/fixtures/QuadFixture.hpp index d1f1bef205d5..5898d6df2549 100644 --- a/packages/percept/src/percept/fixtures/QuadFixture.hpp +++ b/packages/percept/src/percept/fixtures/QuadFixture.hpp @@ -93,7 +93,6 @@ { enum { SpatialDim = 2 }; - meta_data.use_simple_fields(); coord_field = &meta_data.declare_field(stk::topology::NODE_RANK, "coordinates"); set_bounding_box(0,(double)NX,0,(double)NY); diff --git a/packages/percept/src/percept/fixtures/SingleTetFixture.cpp b/packages/percept/src/percept/fixtures/SingleTetFixture.cpp index c8fc3abd9d22..dcc213a8d779 100644 --- a/packages/percept/src/percept/fixtures/SingleTetFixture.cpp +++ b/packages/percept/src/percept/fixtures/SingleTetFixture.cpp @@ -48,7 +48,6 @@ , m_ntets(ntets), m_tetIds(tetIds) , m_elem_id_start(elem_id_start) { - m_metaData.use_simple_fields(); m_coordinates_field = &m_metaData.declare_field( stk::topology::NODE_RANK, "coordinates" ); // Define where fields exist on the mesh: diff --git a/packages/percept/src/percept/fixtures/TetWedgeFixture.cpp b/packages/percept/src/percept/fixtures/TetWedgeFixture.cpp index 01f1ec273bbb..4677ed47b12a 100644 --- a/packages/percept/src/percept/fixtures/TetWedgeFixture.cpp +++ b/packages/percept/src/percept/fixtures/TetWedgeFixture.cpp @@ -48,7 +48,6 @@ , m_sideset_tri(0), m_sideset_tri_subset(0) { - m_metaData.use_simple_fields(); m_coordinates_field = &m_metaData.declare_field( stk::topology::NODE_RANK, "coordinates" ); m_centroid_field = &m_metaData.declare_field( stk::topology::ELEMENT_RANK, "centroid" ); m_temperature_field = &m_metaData.declare_field( stk::topology::NODE_RANK, "temperature" ); diff --git a/packages/percept/src/percept/fixtures/TriQuadSurfaceMesh3D.cpp b/packages/percept/src/percept/fixtures/TriQuadSurfaceMesh3D.cpp index 4db6500dc230..e138ad76221e 100644 --- a/packages/percept/src/percept/fixtures/TriQuadSurfaceMesh3D.cpp +++ b/packages/percept/src/percept/fixtures/TriQuadSurfaceMesh3D.cpp @@ -58,7 +58,6 @@ { - m_metaData.use_simple_fields(); m_coordinates_field = &m_metaData.declare_field( stk::topology::NODE_RANK, "coordinates" ); // m_centroid_field = &m_metaData.declare_field( stk::topology::ELEMENT_RANK, "centroid" ); // m_temperature_field = &m_metaData.declare_field( stk::topology::NODE_RANK, "temperature" ); diff --git a/packages/percept/src/percept/mesh/gen/SweepMesher.cpp b/packages/percept/src/percept/mesh/gen/SweepMesher.cpp index 2efc0af6be32..f304c8a49c29 100644 --- a/packages/percept/src/percept/mesh/gen/SweepMesher.cpp +++ b/packages/percept/src/percept/mesh/gen/SweepMesher.cpp @@ -90,7 +90,6 @@ namespace percept builder.set_entity_rank_names(get_entity_rank_names(3u)); m_bulkData = builder.create(); m_metaData = std::shared_ptr(&m_bulkData->mesh_meta_data(), [](auto ptrWeWontDelete){}); - m_metaData->use_simple_fields(); m_parts.resize(NUM_ELEM_TYPES); diff --git a/packages/percept/src/percept/mesh/geometry/kernel/GeometryKernelGregoryPatch.cpp b/packages/percept/src/percept/mesh/geometry/kernel/GeometryKernelGregoryPatch.cpp index 101d50b8dfea..84ee211e1670 100644 --- a/packages/percept/src/percept/mesh/geometry/kernel/GeometryKernelGregoryPatch.cpp +++ b/packages/percept/src/percept/mesh/geometry/kernel/GeometryKernelGregoryPatch.cpp @@ -41,7 +41,6 @@ bool GeometryKernelGregoryPatch::read_file ) { m_geometryMesh = new percept::PerceptMesh(); - m_geometryMesh->use_simple_fields(); std::string options = ""; bool auto_decomp = get_property("auto_decomp") == "true"; bool exo_large = get_property("exo_large") == "true"; diff --git a/packages/percept/src/percept/mesh/geometry/kernel/GeometryKernelPGEOM.cpp b/packages/percept/src/percept/mesh/geometry/kernel/GeometryKernelPGEOM.cpp index 47f218988c76..4f481f5c62ed 100644 --- a/packages/percept/src/percept/mesh/geometry/kernel/GeometryKernelPGEOM.cpp +++ b/packages/percept/src/percept/mesh/geometry/kernel/GeometryKernelPGEOM.cpp @@ -19,14 +19,14 @@ #include #include -#include -#include +#include +#include #ifdef HAVE_ACIS -#include +#include #endif -#include +#include #include #include diff --git a/packages/percept/src/percept/mesh/geometry/stk_geom/3D/FitGregoryPatches.cpp b/packages/percept/src/percept/mesh/geometry/stk_geom/3D/FitGregoryPatches.cpp index 9649677f259e..aa2cc30b95cd 100644 --- a/packages/percept/src/percept/mesh/geometry/stk_geom/3D/FitGregoryPatches.cpp +++ b/packages/percept/src/percept/mesh/geometry/stk_geom/3D/FitGregoryPatches.cpp @@ -746,7 +746,6 @@ namespace percept { typedef std::vector VecEdge; VecEdge vecEdge; - stk::mesh::EntityId nedges_topo=0, nedges_geom=0; if (1) { for (unsigned ii=0; ii < vecFaces.size(); ++ii) @@ -764,7 +763,6 @@ namespace percept { if (m_edgeSet.find(edge) == m_edgeSet.end()) { m_edgeSet.insert(edge); - ++nedges_topo; } } } @@ -835,7 +833,6 @@ namespace percept { if (m_edgeSet.find(edge) == m_edgeSet.end()) { m_edgeSet.insert(edge); - ++nedges_geom; } } } diff --git a/packages/percept/src/percept/mesh/mod/smoother/GenericAlgorithm_total_element_metric.hpp b/packages/percept/src/percept/mesh/mod/smoother/GenericAlgorithm_total_element_metric.hpp index 075db3ef149e..34cc4f9310c9 100644 --- a/packages/percept/src/percept/mesh/mod/smoother/GenericAlgorithm_total_element_metric.hpp +++ b/packages/percept/src/percept/mesh/mod/smoother/GenericAlgorithm_total_element_metric.hpp @@ -57,13 +57,13 @@ class PerceptMesh; void updateState(SmootherMetricImpl *metric, PerceptMesh *eMesh, bool valid_in, size_t *num_invalid_in, Double mtot_in, size_t n_invalid_in) {m_metric = metric; m_eMesh = eMesh; valid = valid_in; num_invalid = num_invalid_in; mtot = mtot_in; n_invalid = n_invalid_in;} - KOKKOS_INLINE_FUNCTION + inline void operator()(const unsigned& index, Double& mtot_loc) const { const_cast(this)->operator()(index, mtot_loc); } - KOKKOS_INLINE_FUNCTION + inline void operator()(const unsigned& index, Double& mtot_loc); }; diff --git a/packages/percept/src/percept/mesh/mod/smoother/MeshSmoother.cpp b/packages/percept/src/percept/mesh/mod/smoother/MeshSmoother.cpp index f6af63272b3c..ec0a018fda7e 100644 --- a/packages/percept/src/percept/mesh/mod/smoother/MeshSmoother.cpp +++ b/packages/percept/src/percept/mesh/mod/smoother/MeshSmoother.cpp @@ -15,9 +15,6 @@ #include #define DEBUG_PRINT 0 -namespace std { - -} namespace percept { diff --git a/packages/percept/src/percept/mesh_transfer/RotationTranslation.hpp b/packages/percept/src/percept/mesh_transfer/RotationTranslation.hpp index 7f47b137a5e4..c4132167fd54 100644 --- a/packages/percept/src/percept/mesh_transfer/RotationTranslation.hpp +++ b/packages/percept/src/percept/mesh_transfer/RotationTranslation.hpp @@ -17,7 +17,6 @@ void applyRotation(stk::mesh::FieldBase *vectorField, const stk::mesh::MetaData & meta = bulkdata.mesh_meta_data(); stk::mesh::EntityRank rank = vectorField->entity_rank(); - // TODO support 2D? const unsigned nDim = meta.spatial_dimension(); const double xtheta = M_PI*xrot/180.0; @@ -66,19 +65,36 @@ void applyRotation(stk::mesh::FieldBase *vectorField, stk::mesh::Bucket & b = **ib ; double * v = static_cast(stk::mesh::field_data(*vectorField, b)); - double tmp[3]; - const stk::mesh::Bucket::size_type length = b.size(); - for (unsigned ie=0; iecontiguous_data()); + std::copy(&mda[0], &mda[0] + mda.size(), this->contiguous_data()); } //------------------------------------------------------------------------------------------------------------------------ @@ -331,7 +331,7 @@ IntrepidManager::IntegrandValues:: copyFrom(MDArray& mda) { - copy(&mda[0], &mda[0] + mda.size(), this->contiguous_data()); + std::copy(&mda[0], &mda[0] + mda.size(), this->contiguous_data()); } void IntrepidManager::IntegrandValues:: diff --git a/packages/percept/src/percept/norm/IntrepidManager.hpp b/packages/percept/src/percept/norm/IntrepidManager.hpp index c817945fdd58..ce8fcbab7b3f 100644 --- a/packages/percept/src/percept/norm/IntrepidManager.hpp +++ b/packages/percept/src/percept/norm/IntrepidManager.hpp @@ -51,7 +51,6 @@ xxx error #endif -using namespace std; using namespace Intrepid; #define IM_TAG( ADT ) ADT ## _TAG diff --git a/packages/percept/src/percept/stk_rebalance/ZoltanPartition.cpp b/packages/percept/src/percept/stk_rebalance/ZoltanPartition.cpp index efbb1416d888..9eafe251345e 100644 --- a/packages/percept/src/percept/stk_rebalance/ZoltanPartition.cpp +++ b/packages/percept/src/percept/stk_rebalance/ZoltanPartition.cpp @@ -26,7 +26,6 @@ #include -using namespace std; using namespace stk; using namespace stk::rebalance; @@ -47,7 +46,7 @@ inline unsigned num_lid_entries() { } inline void convert_param_to_string(const Parameters &from, - vector < pair > &to) + std::vector < std::pair > &to) { Parameters::ConstIterator from_iter = from.begin(), @@ -590,7 +589,7 @@ Zoltan::set_mesh_info( const std::vector &mesh_entities, m_mesh_information_ = mesh_info; } -void Zoltan::init( const vector< pair > +void Zoltan::init( const std::vector< std::pair > &dynamicLoadRebalancingParameters ) { if (0==static_zoltan_version()) { const double v = init_zoltan_library(); @@ -603,14 +602,14 @@ void Zoltan::init( const vector< pair > m_zoltan_id_ = Zoltan_Create( comm_ ); if ( m_zoltan_id_ == NULL ) { - throw runtime_error ("(FATAL ERROR) Zoltan_Create() returned NULL"); + throw std::runtime_error ("(FATAL ERROR) Zoltan_Create() returned NULL"); } /** * Set up dynamic load rebalancing */ - vector >::const_iterator + std::vector >::const_iterator P = dynamicLoadRebalancingParameters.begin(), PE = dynamicLoadRebalancingParameters.end(); @@ -621,7 +620,7 @@ void Zoltan::init( const vector< pair > if (ZOLTAN_OK != (Zoltan_Set_Param(m_zoltan_id_,label,value))) { - throw runtime_error(": FATAL ERROR returned from Zoltan_Set_Param "); + throw std::runtime_error(": FATAL ERROR returned from Zoltan_Set_Param "); } } @@ -629,7 +628,7 @@ void Zoltan::init( const vector< pair > * Register the Zoltan/SIERRA "call-back" (querry) functions. */ if ( ZOLTAN_OK != register_callbacks() ) - throw runtime_error ("zoltan->Register_Callbacks error. "); + throw std::runtime_error ("zoltan->Register_Callbacks error. "); #if STK_GEOMDECOMP_DEBUG>=2 { @@ -932,7 +931,7 @@ void Zoltan::determine_new_partition (bool &RebalancingNeeded) &num_exported, &export_gids, &export_lids, &export_procs ); if (status != ZOLTAN_OK) { - stringstream sstatus; + std::stringstream sstatus; sstatus << status; throw std::runtime_error("Zoltan_Balance() returned error code " + sstatus.str()); } @@ -972,7 +971,7 @@ void Zoltan::determine_new_partition (bool &RebalancingNeeded) if ( ZOLTAN_OK != Zoltan_LB_Free_Data( &import_gids, &import_lids, &import_procs, &export_gids, &export_lids, &export_procs )) { - throw runtime_error (" FATAL ERROR in Zoltan_LB_Free_Data."); + throw std::runtime_error (" FATAL ERROR in Zoltan_LB_Free_Data."); } } @@ -1025,8 +1024,7 @@ void Zoltan::convert_names_and_values(const Parameters &from, Parameters &to) Only a couple of parameters have parameter conversion. The ones converted are nested in Value_Conversion. */ - std::string to_value = Teuchos::getValue(from.entry(from_iter)); - //if (Value_Conversion->isParameter(from_name)) to_value = Value_Conversion->get(to_value); + std::string to_value = Teuchos::getValue(from.entry(from_iter)); if (Value_Conversion->isParameter(from_name)) to_value = Value_Conversion->sublist(from_name).get(to_value); if (!to_name.empty()) to.set(to_name, to_value); } diff --git a/packages/percept/src/percept/uq/main/RFRealizeMain.cpp b/packages/percept/src/percept/uq/main/RFRealizeMain.cpp index d5815a4c99f6..22d27b809525 100644 --- a/packages/percept/src/percept/uq/main/RFRealizeMain.cpp +++ b/packages/percept/src/percept/uq/main/RFRealizeMain.cpp @@ -92,7 +92,6 @@ int main(int argc, char **argv) const unsigned numCoeffs = xi.size(); stk::io::StkMeshIoBroker mesh_data(comm); - mesh_data.use_simple_fields(); mesh_data.add_mesh_database(input_file, "exodus", stk::io::READ_MESH); mesh_data.create_input_mesh(); diff --git a/packages/percept/src/percept/uq/main/RFSuiteMain.cpp b/packages/percept/src/percept/uq/main/RFSuiteMain.cpp index 6eb4cf67b993..b9d3119f5933 100644 --- a/packages/percept/src/percept/uq/main/RFSuiteMain.cpp +++ b/packages/percept/src/percept/uq/main/RFSuiteMain.cpp @@ -144,7 +144,6 @@ int main(int argc, char **argv) } stk::io::StkMeshIoBroker mesh_data(comm); - mesh_data.use_simple_fields(); mesh_data.add_mesh_database(input_mesh, "exodus", stk::io::READ_MESH); mesh_data.create_input_mesh(); diff --git a/packages/percept/src/percept/util/GeneralFunction.hpp b/packages/percept/src/percept/util/GeneralFunction.hpp index 4943d125d657..ca333c454d41 100644 --- a/packages/percept/src/percept/util/GeneralFunction.hpp +++ b/packages/percept/src/percept/util/GeneralFunction.hpp @@ -12,8 +12,6 @@ #include -using namespace std; - #if 0 template void push_back( vector& dst, const vector& src) { @@ -64,18 +62,18 @@ template void push_back( vector& dst, const vector& src) virtual void operator()(const domain& x, codomain& y) { y = x; } // multiple values - virtual vector operator()(const vector& x) + virtual std::vector operator()(const std::vector& x) { // inefficient default impl int n = x.size(); - vector y(n); + std::vector y(n); for(int i = 0; i < n; i++) { y[i] = (*this)(x[i]); } return y; }; // return value or reference? - virtual void operator()(const vector& x, vector& y) + virtual void operator()(const std::vector& x, std::vector& y) { // inefficient default impl int n = x.size(); @@ -98,7 +96,7 @@ template void push_back( vector& dst, const vector& src) class GeneralFunctionWithGrad : public GeneralFunction { // return a function that computes the gradient of this - virtual GeneralFunction > grad()=0; // return GeneralFunction >(); } + virtual GeneralFunction > grad()=0; // return GeneralFunction >(); } }; template diff --git a/packages/stk/CHANGELOG.md b/packages/stk/CHANGELOG.md index 6e418aed6c67..aa40a496fb6d 100644 --- a/packages/stk/CHANGELOG.md +++ b/packages/stk/CHANGELOG.md @@ -1,5 +1,25 @@ # CHANGELOG +5.21.3 (STK_VERSION 5210300) 8/12/2024 + general: compile-warnings/errors fixed for gcc 12 and arm + stk_mesh: BulkData::change_entity_owner now returns a bool + - indicates whether any entities actually changed owner. + stk_mesh: continue "simple field" transition + - remove some previously-deprecated functions/behaviors + - 'use_simple_field()' calls now unnecessary, will be deprecated in future + stk_mesh: deleted previously-deprecated types/methods + - DeviceMeshIndex and usage in DeviceMesh/HostMesh + - NgpFieldBase::rotate_multistate_data() + - DeviceMesh::host_get_entity(..) + stk_search: changing template parameters for input Views + - now less restrictive on how users' views were declared + +5.21.1 (STK_VERSION 5210100) 6/17/2024 + stk_mesh: Deprecated BulkData::find_permutation and BulkData::check_permutation + (replaced by free-functions in FindPermutation.hpp) + stk_mesh: Added destroy_relations free-function in DestroyRelations.hpp + stk_mesh: Added overloads of field_fill, field_copy, field_axpbyz in NgpFieldBLAS.hpp + 5.19.4 (STK_VERSION 5190401) 5/29/2024 stk_search: fixed bug in morton: (accessing device view on host) stk_search: fixed implementations to respect execution-space diff --git a/packages/stk/CMakeLists.txt b/packages/stk/CMakeLists.txt index cc20b0052921..519af66ec029 100644 --- a/packages/stk/CMakeLists.txt +++ b/packages/stk/CMakeLists.txt @@ -36,19 +36,22 @@ cmake_minimum_required(VERSION 3.16 FATAL_ERROR) message("starting STK cmake configuration, CMAKE_SOURCE_DIR=${CMAKE_SOURCE_DIR}") +option(STK_BUILT_FOR_SIERRA "Enable SIERRA capability" OFF) +set(SIERRA_MIGRATION ${STK_BUILT_FOR_SIERRA} CACHE BOOL "Enable SIERRA capability") + IF(COMMAND TRIBITS_PACKAGE_DECL) SET(HAVE_STK_Trilinos ON) TRIBITS_PACKAGE_DECL(STK) MESSAGE("*** Building STK as a Trilinos package. ***") ELSE() SET(HAVE_STK_Trilinos OFF) - project(STK CXX) + project(STK CXX Fortran) SET(PACKAGE_NAME "STK") MESSAGE("*** Building STK as a stand-alone cmake package. ***") ENDIF() - + SET(STK_TOPLEVEL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) SET(STK_TOPLEVEL_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) @@ -201,3 +204,41 @@ STK_SUBPACKAGES() STK_PACKAGE_POSTPROCESS() +IF (NOT HAVE_STK_Trilinos) + include(CMakePackageConfigHelpers) + + write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/stkConfigVersion.cmake" + VERSION VOTD + COMPATIBILITY ExactVersion + ) + + install(EXPORT stkTargets + FILE stkTargets.cmake + NAMESPACE stk:: + DESTINATION share/cmake/stk + ) + + install( + FILES + cmake/stkConfig.cmake + "${CMAKE_CURRENT_BINARY_DIR}/stkConfigVersion.cmake" + DESTINATION share/cmake/stk + COMPONENT Devel + ) + if(STK_BUILT_FOR_SIERRA) + install( + FILES cmake/stkLapackSierra.cmake + DESTINATION share/cmake/stk + RENAME stkLapack.cmake + COMPONENT Devel + ) + else() + install( + FILES cmake/stkLapackGeneric.cmake + DESTINATION share/cmake/stk + RENAME stkLapack.cmake + COMPONENT Devel + ) + endif() +ENDIF() diff --git a/packages/stk/cmake/STK_Trilinos_config.h.in b/packages/stk/cmake/STK_Trilinos_config.h.in index 0f5ea4683d84..b8a7557f6875 100644 --- a/packages/stk/cmake/STK_Trilinos_config.h.in +++ b/packages/stk/cmake/STK_Trilinos_config.h.in @@ -38,8 +38,6 @@ #cmakedefine STK_DISABLE_MPI_NEIGHBOR_COMM -#cmakedefine STK_USE_SIMPLE_FIELDS - #cmakedefine STK_HAVE_BOOST #cmakedefine STK_HAVE_KOKKOS @@ -80,4 +78,6 @@ #cmakedefine FORTRAN_TWO_UNDERSCORES #endif -#define SIERRA_MIGRATION +#cmakedefine SIERRA_MIGRATION + +#cmakedefine STK_BUILT_FOR_SIERRA diff --git a/packages/stk/cmake/stkConfig.cmake b/packages/stk/cmake/stkConfig.cmake new file mode 100644 index 000000000000..118e96a97c51 --- /dev/null +++ b/packages/stk/cmake/stkConfig.cmake @@ -0,0 +1,10 @@ +include(CMakeFindDependencyMacro) +find_dependency(Kokkos REQUIRED) +find_dependency(Shards REQUIRED) +find_dependency(MPI REQUIRED) +find_dependency(ArborX QUIET) +include("${CMAKE_CURRENT_LIST_DIR}/stkLapack.cmake") +find_dependency(SEACAS REQUIRED) +find_dependency(GTest REQUIRED) +find_dependency(Zoltan2Core REQUIRED) +include("${CMAKE_CURRENT_LIST_DIR}/stkTargets.cmake") diff --git a/packages/stk/cmake/stkLapackGeneric.cmake b/packages/stk/cmake/stkLapackGeneric.cmake new file mode 100644 index 000000000000..b46b42d2d60a --- /dev/null +++ b/packages/stk/cmake/stkLapackGeneric.cmake @@ -0,0 +1,2 @@ +find_dependency(BLAS) +find_dependency(LAPACK) \ No newline at end of file diff --git a/packages/stk/cmake/stkLapackSierra.cmake b/packages/stk/cmake/stkLapackSierra.cmake new file mode 100644 index 000000000000..79446eb5c279 --- /dev/null +++ b/packages/stk/cmake/stkLapackSierra.cmake @@ -0,0 +1 @@ +find_dependency(SierraLapack) \ No newline at end of file diff --git a/packages/stk/stk_balance/stk_balance/CMakeLists.txt b/packages/stk/stk_balance/stk_balance/CMakeLists.txt index ce6d1321094c..822195892ba1 100644 --- a/packages/stk/stk_balance/stk_balance/CMakeLists.txt +++ b/packages/stk/stk_balance/stk_balance/CMakeLists.txt @@ -70,9 +70,14 @@ FILE(GLOB SOURCES_MAIN ${CMAKE_CURRENT_SOURCE_DIR}/balance_main/*.cpp) FILE(GLOB SOURCES_M2N_MAIN ${CMAKE_CURRENT_SOURCE_DIR}/m2n_main/*.cpp) if(HAVE_STK_Trilinos) + TRIBITS_ADD_LIBRARY(search_tolerance_algs + NOINSTALLHEADERS ${HEADERS_SEARCH_TOLERANCE_ALGS} + SOURCES ${SOURCES_SEARCH_TOLERANCE_ALGS} + ) + TRIBITS_ADD_LIBRARY(stk_balance_lib - NOINSTALLHEADERS ${HEADERS} ${HEADERS_INTERNAL} ${HEADERS_M2N} ${HEADERS_SETUP} ${HEADERS_SEARCH_TOLERANCE} ${HEADERS_SEARCH_TOLERANCE_ALGS} - SOURCES ${SOURCES} ${SOURCES_INTERNAL} ${SOURCES_M2N} ${SOURCES_SETUP} ${SOURCES_SEARCH_TOLERANCE_ALGS} + NOINSTALLHEADERS ${HEADERS} ${HEADERS_INTERNAL} ${HEADERS_M2N} ${HEADERS_SETUP} + SOURCES ${SOURCES} ${SOURCES_INTERNAL} ${SOURCES_M2N} ${SOURCES_SETUP} ) TRIBITS_ADD_EXECUTABLE(stk_balance @@ -87,7 +92,12 @@ if(HAVE_STK_Trilinos) else() find_package(Zoltan2Core REQUIRED) - add_library(stk_balance_lib ${SOURCES} ${SOURCES_INTERNAL} ${SOURCES_M2N} ${SOURCES_SETUP} ${SOURCES_SEARCH_TOLERANCE_ALGS}) + add_library(search_tolerance_algs ${SOURCES_SEARCH_TOLERANCE_ALGS}) + target_link_libraries(search_tolerance_algs PUBLIC stk_mesh_base) + + add_library(stk_balance_lib ${SOURCES} ${SOURCES_INTERNAL} ${SOURCES_M2N} ${SOURCES_SETUP}) + + target_link_libraries(stk_balance_lib PUBLIC search_tolerance_algs) target_link_libraries(stk_balance_lib PUBLIC stk_io) target_link_libraries(stk_balance_lib PUBLIC stk_tools_lib) target_link_libraries(stk_balance_lib PUBLIC stk_util_registry) @@ -101,6 +111,10 @@ else() target_link_libraries(stk_balance_m2n PUBLIC stk_balance_lib) endif() +target_include_directories(search_tolerance_algs PUBLIC + $ + $ +) target_include_directories(stk_balance_lib PUBLIC $ $ @@ -120,7 +134,8 @@ INSTALL(FILES ${HEADERS_SEARCH_TOLERANCE_ALGS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_balance/search_tolerance_algs) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_balance_lib DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS search_tolerance_algs EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_balance_lib EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) INSTALL(TARGETS stk_balance DESTINATION ${STK_INSTALL_BINDIR}) endif() diff --git a/packages/stk/stk_balance/stk_balance/internal/Diagnostics.hpp b/packages/stk/stk_balance/stk_balance/internal/Diagnostics.hpp index 853d13536ea9..164095c039d3 100644 --- a/packages/stk/stk_balance/stk_balance/internal/Diagnostics.hpp +++ b/packages/stk/stk_balance/stk_balance/internal/Diagnostics.hpp @@ -127,7 +127,7 @@ class MultiUnsignedDiagnostic : public Diagnostic void store_value(unsigned column, int rank, unsigned value) { m_localValues[column][rank] = value; } double get_rank_value(unsigned column, int rank) { return m_values[column][rank]; } - virtual unsigned num_columns() { return m_numColumns; } + virtual unsigned num_columns() override { return m_numColumns; } virtual void collect_data(stk::ParallelMachine comm, int numRanks) override; virtual void process_data(stk::ParallelMachine comm) override; diff --git a/packages/stk/stk_balance/stk_balance/internal/OutputMesh.cpp b/packages/stk/stk_balance/stk_balance/internal/OutputMesh.cpp index 345b341e3c0f..6d44f54ea7b7 100644 --- a/packages/stk/stk_balance/stk_balance/internal/OutputMesh.cpp +++ b/packages/stk/stk_balance/stk_balance/internal/OutputMesh.cpp @@ -49,7 +49,6 @@ OutputMesh::OutputMesh(const InputMesh& inputMesh, m_bulk(m_inputMesh.get_bulk().parallel()), m_meta(m_bulk.mesh_meta_data()) { - m_meta.use_simple_fields(); clone_input_mesh(); move_subdomain_to_owning_processor(); compute_rebalance_diagnostics(); diff --git a/packages/stk/stk_balance/stk_balance/internal/OutputSerializerBulkData.cpp b/packages/stk/stk_balance/stk_balance/internal/OutputSerializerBulkData.cpp index f54c1bf6761a..df3341e26744 100644 --- a/packages/stk/stk_balance/stk_balance/internal/OutputSerializerBulkData.cpp +++ b/packages/stk/stk_balance/stk_balance/internal/OutputSerializerBulkData.cpp @@ -39,15 +39,21 @@ namespace stk { namespace balance { OutputSerializerBulkData::OutputSerializerBulkData(ParallelMachine parallel) - : BulkData(std::make_shared(), parallel, stk::mesh::BulkData::NO_AUTO_AURA, true) + : BulkData(std::make_shared(), parallel, stk::mesh::BulkData::NO_AUTO_AURA +#ifdef SIERRA_MIGRATION + , true +#endif + ) { - mesh_meta_data().use_simple_fields(); } OutputSerializerBulkData::OutputSerializerBulkData(unsigned spatialDim, ParallelMachine parallel) - : BulkData(std::make_shared(spatialDim), parallel, stk::mesh::BulkData::NO_AUTO_AURA, true) + : BulkData(std::make_shared(), parallel, stk::mesh::BulkData::NO_AUTO_AURA +#ifdef SIERRA_MIGRATION + , true +#endif + ) { - mesh_meta_data().use_simple_fields(); } void diff --git a/packages/stk/stk_balance/stk_balance/io/BalanceIO.cpp b/packages/stk/stk_balance/stk_balance/io/BalanceIO.cpp index f458f26e60a8..2cf51b9f1b6b 100644 --- a/packages/stk/stk_balance/stk_balance/io/BalanceIO.cpp +++ b/packages/stk/stk_balance/stk_balance/io/BalanceIO.cpp @@ -82,10 +82,8 @@ BalanceIO::BalanceIO(MPI_Comm comm, const BalanceSettings& settings) m_copyMeta(m_copyBulk->mesh_meta_data()), m_mesh(nullptr) { - m_inputMeta.use_simple_fields(); m_inputMeta.set_coordinate_field_name(m_settings.getCoordinateFieldName()); - m_copyMeta.use_simple_fields(); } BalanceMesh& BalanceIO::initial_decomp() @@ -110,7 +108,6 @@ BalanceMesh& BalanceIO::initial_decomp() void BalanceIO::write(BalanceMesh& mesh) { stk::io::StkMeshIoBroker outputBroker; - outputBroker.use_simple_fields(); outputBroker.set_bulk_data(mesh.get_bulk()); outputBroker.set_attribute_field_ordering_stored_by_part_ordinal(m_inputBroker.get_attribute_field_ordering_stored_by_part_ordinal()); m_inputBroker.cache_entity_list_for_transient_steps(true); diff --git a/packages/stk/stk_balance/stk_balance/m2n/M2NOutputMesh.cpp b/packages/stk/stk_balance/stk_balance/m2n/M2NOutputMesh.cpp index 7ea17d3b7004..a3064d416455 100644 --- a/packages/stk/stk_balance/stk_balance/m2n/M2NOutputMesh.cpp +++ b/packages/stk/stk_balance/stk_balance/m2n/M2NOutputMesh.cpp @@ -48,7 +48,6 @@ OutputMesh::OutputMesh(const InputMesh& inputMesh, m_bulk(m_inputMesh.get_bulk().parallel()), m_meta(m_bulk.mesh_meta_data()) { - m_meta.use_simple_fields(); clone_input_mesh(); move_subdomain_to_owning_processor(); } diff --git a/packages/stk/stk_balance/stk_balance/m2n/M2NOutputSerializerBulkData.cpp b/packages/stk/stk_balance/stk_balance/m2n/M2NOutputSerializerBulkData.cpp index 5ce52af9d924..b184d803ef4e 100644 --- a/packages/stk/stk_balance/stk_balance/m2n/M2NOutputSerializerBulkData.cpp +++ b/packages/stk/stk_balance/stk_balance/m2n/M2NOutputSerializerBulkData.cpp @@ -40,15 +40,21 @@ namespace balance { namespace m2n { OutputSerializerBulkData::OutputSerializerBulkData(ParallelMachine parallel) - : BulkData(std::make_shared(), parallel, stk::mesh::BulkData::NO_AUTO_AURA, true) + : BulkData(std::make_shared(), parallel, stk::mesh::BulkData::NO_AUTO_AURA +#ifdef SIERRA_MIGRATION + , true +#endif + ) { - mesh_meta_data().use_simple_fields(); } OutputSerializerBulkData::OutputSerializerBulkData(unsigned spatialDim, ParallelMachine parallel) - : BulkData(std::make_shared(spatialDim), parallel, stk::mesh::BulkData::NO_AUTO_AURA, true) + : BulkData(std::make_shared(spatialDim), parallel, stk::mesh::BulkData::NO_AUTO_AURA +#ifdef SIERRA_MIGRATION + , true +#endif + ) { - mesh_meta_data().use_simple_fields(); } void diff --git a/packages/stk/stk_balance/stk_balance/m2n/m2nRebalance.cpp b/packages/stk/stk_balance/stk_balance/m2n/m2nRebalance.cpp index 9374e13baea2..f15edf71376c 100644 --- a/packages/stk/stk_balance/stk_balance/m2n/m2nRebalance.cpp +++ b/packages/stk/stk_balance/stk_balance/m2n/m2nRebalance.cpp @@ -123,7 +123,6 @@ void rebalance_m2n(stk::balance::M2NBalanceSettings &balanceSettings, MPI_Comm c print_banner(sierra::Env::outputP0()); std::shared_ptr bulk = stk::mesh::MeshBuilder(comm).create(); - bulk->mesh_meta_data().use_simple_fields(); stk::io::StkMeshIoBroker ioBroker; stk::io::fill_mesh_preexisting(ioBroker, balanceSettings.get_input_filename(), *bulk); diff --git a/packages/stk/stk_balance/stk_balance/setup/LifeCycle.cpp b/packages/stk/stk_balance/stk_balance/setup/LifeCycle.cpp index 69ec4a6df1b7..afeec92fce53 100644 --- a/packages/stk/stk_balance/stk_balance/setup/LifeCycle.cpp +++ b/packages/stk/stk_balance/stk_balance/setup/LifeCycle.cpp @@ -169,7 +169,6 @@ void LifeCycle::rebalance() { std::shared_ptr bulk = stk::mesh::MeshBuilder(m_comm).create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::io::StkMeshIoBroker ioBroker; meta.set_coordinate_field_name(m_settings.getCoordinateFieldName()); diff --git a/packages/stk/stk_coupling/Jamfile b/packages/stk/stk_coupling/Jamfile index 2fa157eb7681..cba21de00cdf 100644 --- a/packages/stk/stk_coupling/Jamfile +++ b/packages/stk/stk_coupling/Jamfile @@ -48,7 +48,8 @@ if $(RTenv-arg) = "user" { project votd : requirements $(sierra-warnings) - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM $(stk_coupling-root-inc) : usage-requirements $(stk_coupling-root-inc) diff --git a/packages/stk/stk_coupling/stk_coupling/CMakeLists.txt b/packages/stk/stk_coupling/stk_coupling/CMakeLists.txt index a63080b7aedf..851e84cdfe39 100644 --- a/packages/stk/stk_coupling/stk_coupling/CMakeLists.txt +++ b/packages/stk/stk_coupling/stk_coupling/CMakeLists.txt @@ -55,5 +55,5 @@ INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_coupling) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_coupling DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_coupling EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() diff --git a/packages/stk/stk_doc_tests/stk_balance/howToFixPMR1Violation.cpp b/packages/stk/stk_doc_tests/stk_balance/howToFixPMR1Violation.cpp index c0c8bc75686d..aeef2a252edc 100644 --- a/packages/stk/stk_doc_tests/stk_balance/howToFixPMR1Violation.cpp +++ b/packages/stk/stk_doc_tests/stk_balance/howToFixPMR1Violation.cpp @@ -45,7 +45,6 @@ namespace TEST(StkMeshHowTo, FixPMR1Violation) { stk::mesh::MetaData meta; - meta.use_simple_fields(); std::shared_ptr bulkData = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); stk::io::fill_mesh("generated:4x4x4|sideset:xX", *bulkData); diff --git a/packages/stk/stk_doc_tests/stk_balance/howToUseStkBalance.cpp b/packages/stk/stk_doc_tests/stk_balance/howToUseStkBalance.cpp index 007bccd6335d..9f4366d4b98e 100644 --- a/packages/stk/stk_doc_tests/stk_balance/howToUseStkBalance.cpp +++ b/packages/stk/stk_doc_tests/stk_balance/howToUseStkBalance.cpp @@ -24,7 +24,7 @@ class RcbSettings : public stk::balance::BalanceSettings }; //ENDRcbSettings -class StkBalanceHowTo : public stk::unit_test_util::simple_fields::MeshFixture +class StkBalanceHowTo : public stk::unit_test_util::MeshFixture {}; bool is_mesh_balanced(const stk::mesh::BulkData& bulk) @@ -377,7 +377,7 @@ class MultipleCriteriaFieldSettings : public ParmetisSettings } virtual ~MultipleCriteriaFieldSettings() override = default; - virtual bool isMultiCriteriaRebalance() const { return true;} + virtual bool isMultiCriteriaRebalance() const override { return true;} protected: MultipleCriteriaFieldSettings() = delete; diff --git a/packages/stk/stk_doc_tests/stk_expreval/BasicHostEvaluation.cpp b/packages/stk/stk_doc_tests/stk_expreval/BasicHostEvaluation.cpp index 32da98c8e11d..2724e545ec86 100644 --- a/packages/stk/stk_doc_tests/stk_expreval/BasicHostEvaluation.cpp +++ b/packages/stk/stk_doc_tests/stk_expreval/BasicHostEvaluation.cpp @@ -134,6 +134,7 @@ TEST(HostEvaluation, testFunctions) EXPECT_DOUBLE_EQ(evaluate("cosine_ramp(1/3, 0, 1)"), 0.25); EXPECT_DOUBLE_EQ(evaluate("cosine_ramp(1/3, 1)"), 0.25); EXPECT_DOUBLE_EQ(evaluate("cosine_ramp(1/3)"), 0.25); + EXPECT_DOUBLE_EQ(evaluate("linear_ramp(1/4, 0, 1)"), 0.25); EXPECT_DOUBLE_EQ(evaluate("haversine_pulse(1/6, 0, 1)"), 0.25); EXPECT_DOUBLE_EQ(evaluate("point2d(1, 0, 1, 1)"), 0.5); EXPECT_DOUBLE_EQ(evaluate("point3d(0, -1, 0, 1, 1)"), 0.5); @@ -162,3 +163,4 @@ TEST(HostEvaluation, testPDFFunctions) //-END } // namespace + diff --git a/packages/stk/stk_doc_tests/stk_io/QueryExoVars.cpp b/packages/stk/stk_doc_tests/stk_io/QueryExoVars.cpp index e7222425a0cb..06163cf8e73f 100644 --- a/packages/stk/stk_doc_tests/stk_io/QueryExoVars.cpp +++ b/packages/stk/stk_doc_tests/stk_io/QueryExoVars.cpp @@ -12,7 +12,7 @@ namespace { -class QueryExoVars : public stk::unit_test_util::simple_fields::MeshFixture +class QueryExoVars : public stk::unit_test_util::MeshFixture { protected: void read_meta(stk::io::StkMeshIoBroker &stkIo, const std::string &filename) @@ -40,7 +40,6 @@ TEST_F(QueryExoVars, nodeVars_getNames) if(stk::parallel_machine_size(get_comm()) == 1) { stk::io::StkMeshIoBroker stkIo; - stkIo.use_simple_fields(); read_meta(stkIo, "allTypesOfData.exo"); expect_names({{"dispx","{UNIVERSAL}"}, {"dispy","{UNIVERSAL}"}, @@ -56,7 +55,6 @@ TEST_F(QueryExoVars, elemVars_getNames) if(stk::parallel_machine_size(get_comm()) == 1) { stk::io::StkMeshIoBroker stkIo; - stkIo.use_simple_fields(); read_meta(stkIo, "elemData.exo"); expect_names({{"vonmises","block_1"}, {"vonmises","block_11"}}, stkIo.get_elem_var_names()); @@ -68,7 +66,6 @@ TEST_F(QueryExoVars, nodesetVars_getNames) if(stk::parallel_machine_size(get_comm()) == 1) { stk::io::StkMeshIoBroker stkIo; - stkIo.use_simple_fields(); read_meta(stkIo, "nodesetData.exo"); expect_names({{"apressure","nodelist_2"}, {"dispx","nodelist_1"}, @@ -82,7 +79,6 @@ TEST_F(QueryExoVars, sidesetVars_getNames) if(stk::parallel_machine_size(get_comm()) == 1) { stk::io::StkMeshIoBroker stkIo; - stkIo.use_simple_fields(); read_meta(stkIo, "allTypesOfData.exo"); expect_names({{"appliedpressure_sideset_30","surface_hex8_quad4_30"}, {"appliedpressure_sideset_31","surface_hex8_quad4_31"}, diff --git a/packages/stk/stk_doc_tests/stk_io/RenamedInputFields.cpp b/packages/stk/stk_doc_tests/stk_io/RenamedInputFields.cpp index 163baea79612..18779caa5367 100644 --- a/packages/stk/stk_doc_tests/stk_io/RenamedInputFields.cpp +++ b/packages/stk/stk_doc_tests/stk_io/RenamedInputFields.cpp @@ -12,7 +12,7 @@ namespace { -class InputNodesetDataCalledDispX : public stk::unit_test_util::simple_fields::MeshFixture +class InputNodesetDataCalledDispX : public stk::unit_test_util::MeshFixture { protected: ~InputNodesetDataCalledDispX() @@ -24,7 +24,6 @@ class InputNodesetDataCalledDispX : public stk::unit_test_util::simple_fields::M { delete stkIo; stkIo = new stk::io::StkMeshIoBroker; - stkIo->use_simple_fields(); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); stkIo->set_bulk_data(get_bulk()); stkIo->add_mesh_database(filename, stk::io::READ_MESH); diff --git a/packages/stk/stk_doc_tests/stk_io/addFileContentsToOutputDatabase.cpp b/packages/stk/stk_doc_tests/stk_io/addFileContentsToOutputDatabase.cpp index 9d53bedf3dae..ecc816d22978 100644 --- a/packages/stk/stk_doc_tests/stk_io/addFileContentsToOutputDatabase.cpp +++ b/packages/stk/stk_doc_tests/stk_io/addFileContentsToOutputDatabase.cpp @@ -78,7 +78,6 @@ TEST(StkMeshIoBrokerHowTo, addFileContentsToOutputDatabase) // ============================================================ //+ EXAMPLE stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); size_t ifh = stkIo.add_mesh_database("9x9x9|shell:xyzXYZ", "generated", stk::io::READ_MESH); stkIo.set_active_mesh(ifh); stkIo.create_input_mesh(); @@ -107,7 +106,6 @@ TEST(StkMeshIoBrokerHowTo, addFileContentsToOutputDatabase) { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); // Verify output mesh contains the data in // 'input_file' as information records... Note that // the output mesh will contain all element blocks; however, the diff --git a/packages/stk/stk_doc_tests/stk_io/appendResults.cpp b/packages/stk/stk_doc_tests/stk_io/appendResults.cpp index f59520658d70..669708550fb5 100644 --- a/packages/stk/stk_doc_tests/stk_io/appendResults.cpp +++ b/packages/stk/stk_doc_tests/stk_io/appendResults.cpp @@ -46,7 +46,7 @@ namespace { -class StkIoHowToAppend : public stk::unit_test_util::simple_fields::MeshFixture +class StkIoHowToAppend : public stk::unit_test_util::MeshFixture { protected: void initialize_mesh_and_field() @@ -61,7 +61,6 @@ class StkIoHowToAppend : public stk::unit_test_util::simple_fields::MeshFixture stk::io::DatabasePurpose purpose) { stk::io::StkMeshIoBroker stkIo(get_comm()); - stkIo.use_simple_fields(); stkIo.set_bulk_data(get_bulk()); size_t outputFileIndex = stkIo.create_output_mesh(ouputName, purpose); stkIo.add_field(outputFileIndex, *nodeField); @@ -81,7 +80,6 @@ class StkIoHowToAppend : public stk::unit_test_util::simple_fields::MeshFixture void expect_ten_steps_in_file(const std::string& ouputName) { stk::io::StkMeshIoBroker stkIo(get_comm()); - stkIo.use_simple_fields(); stkIo.add_mesh_database(ouputName, stk::io::READ_MESH); stkIo.create_input_mesh(); EXPECT_EQ(10, stkIo.get_num_time_steps()); diff --git a/packages/stk/stk_doc_tests/stk_io/handleMissingFieldOnRead.cpp b/packages/stk/stk_doc_tests/stk_io/handleMissingFieldOnRead.cpp index c3ecc5b0bc9b..6a77c3d20b87 100644 --- a/packages/stk/stk_doc_tests/stk_io/handleMissingFieldOnRead.cpp +++ b/packages/stk/stk_doc_tests/stk_io/handleMissingFieldOnRead.cpp @@ -67,7 +67,6 @@ namespace { //+ The value of the field at each node is 0.0 at time 0.0, //+ 1.0 at time 1.0, and 2.0 at time 2.0 stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|nodeset:xyz"; size_t index = stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -107,7 +106,6 @@ namespace { //+ requested for input from the database field "disp" which //+ does not exist. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); size_t index = stkIo.add_mesh_database(ic_name, stk::io::READ_MESH); stkIo.set_active_mesh(index); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/handleMissingFieldOnReadThrow.cpp b/packages/stk/stk_doc_tests/stk_io/handleMissingFieldOnReadThrow.cpp index 4215cd978b86..62110250281a 100644 --- a/packages/stk/stk_doc_tests/stk_io/handleMissingFieldOnReadThrow.cpp +++ b/packages/stk/stk_doc_tests/stk_io/handleMissingFieldOnReadThrow.cpp @@ -66,7 +66,6 @@ TEST(StkMeshIoBrokerHowTo, handleMissingFieldOnReadThrow) //+ The value of the field at each node is 0.0 at time 0.0, //+ 1.0 at time 1.0, and 2.0 at time 2.0 stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|nodeset:xyz"; size_t index = stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -112,7 +111,6 @@ TEST(StkMeshIoBrokerHowTo, handleMissingFieldOnReadThrow) //+ requested for input from the database field "disp" which //+ does not exist. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); size_t index = stkIo.add_mesh_database(ic_name, stk::io::READ_MESH); stkIo.set_active_mesh(index); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/howToCreateAndWriteNodesetOrSideset.cpp b/packages/stk/stk_doc_tests/stk_io/howToCreateAndWriteNodesetOrSideset.cpp index eeaf5d8064fa..5fff5c4d7efa 100644 --- a/packages/stk/stk_doc_tests/stk_io/howToCreateAndWriteNodesetOrSideset.cpp +++ b/packages/stk/stk_doc_tests/stk_io/howToCreateAndWriteNodesetOrSideset.cpp @@ -163,7 +163,7 @@ void verify_nodesetField_in_file(stk::mesh::BulkData& input_bulk, stk::mesh::Ent verify_field_in_file(input_bulk, node, nodesetName, fieldName, filename); } -class MeshWithNodeset : public stk::unit_test_util::simple_fields::MeshFixture +class MeshWithNodeset : public stk::unit_test_util::MeshFixture { }; @@ -211,7 +211,7 @@ void verify_sidesetField_in_file(stk::mesh::BulkData& input_bulk, stk::mesh::Ent verify_field_in_file(input_bulk, side, sidesetName, fieldName, filename); } -class MeshWithSideset : public stk::unit_test_util::simple_fields::MeshFixture +class MeshWithSideset : public stk::unit_test_util::MeshFixture { }; diff --git a/packages/stk/stk_doc_tests/stk_io/howToCreateAssemblies.cpp b/packages/stk/stk_doc_tests/stk_io/howToCreateAssemblies.cpp index 41a3d96e7bb3..8ba2aecd0c94 100644 --- a/packages/stk/stk_doc_tests/stk_io/howToCreateAssemblies.cpp +++ b/packages/stk/stk_doc_tests/stk_io/howToCreateAssemblies.cpp @@ -49,7 +49,6 @@ TEST(Assemblies, createAssemblyWithElementBlocks) { const unsigned spatialDim = 2; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::mesh::Part& block1Part = meta.declare_part_with_topology("block_1", stk::topology::QUAD_4_2D); stk::mesh::Part& block2Part = meta.declare_part_with_topology("block_2", stk::topology::TRI_3_2D); @@ -79,7 +78,6 @@ TEST(Assemblies, createAssemblyWithElementBlocksAndSurfaces) { const unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::mesh::Part& block1Part = meta.declare_part_with_topology("block_1", stk::topology::HEX_8); stk::mesh::Part& block2Part = meta.declare_part_with_topology("block_2", stk::topology::WEDGE_6); @@ -142,7 +140,6 @@ TEST(Assemblies, cannotCreateAssemblyWithMixedRanks) builder.set_spatial_dimension(3); std::shared_ptr bulk = builder.create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Part& block1Part = meta.declare_part_with_topology("block_1", stk::topology::HEX_8); stk::mesh::Part& surface1Part = meta.declare_part_with_topology("surface_1", stk::topology::QUAD_4); diff --git a/packages/stk/stk_doc_tests/stk_io/howToReadWriteQa.cpp b/packages/stk/stk_doc_tests/stk_io/howToReadWriteQa.cpp index 335e53e917b6..92b15800189b 100644 --- a/packages/stk/stk_doc_tests/stk_io/howToReadWriteQa.cpp +++ b/packages/stk/stk_doc_tests/stk_io/howToReadWriteQa.cpp @@ -11,13 +11,12 @@ namespace { -class StkIoHowToQaRecords : public stk::unit_test_util::simple_fields::MeshFixture +class StkIoHowToQaRecords : public stk::unit_test_util::MeshFixture { protected: void write_qa_information() { stk::io::StkMeshIoBroker stkIo(get_comm()); - stkIo.use_simple_fields(); stkIo.set_bulk_data(get_bulk()); size_t fileId = stkIo.create_output_mesh(filename, stk::io::WRITE_RESULTS); stkIo.set_name_and_version_for_qa_record(fileId, codeName, codeVersion); @@ -29,7 +28,6 @@ class StkIoHowToQaRecords : public stk::unit_test_util::simple_fields::MeshFixtu void read_qa_information_and_verify() { stk::io::StkMeshIoBroker stkIo(get_comm()); - stkIo.use_simple_fields(); size_t fileId = stkIo.add_mesh_database(filename, stk::io::READ_MESH); stkIo.set_active_mesh(fileId); std::vector qas = stkIo.get_qa_records(); diff --git a/packages/stk/stk_doc_tests/stk_io/howToUseTextMeshWithStkIO.cpp b/packages/stk/stk_doc_tests/stk_io/howToUseTextMeshWithStkIO.cpp index 534973cd0809..fa51c116edb9 100644 --- a/packages/stk/stk_doc_tests/stk_io/howToUseTextMeshWithStkIO.cpp +++ b/packages/stk/stk_doc_tests/stk_io/howToUseTextMeshWithStkIO.cpp @@ -49,7 +49,6 @@ namespace TEST(StkIoHowTo, useTextMesh) { stk::io::StkMeshIoBroker stkIo(MPI_COMM_WORLD); - stkIo.use_simple_fields(); std::string textMeshDesc = "textmesh:0,1,HEX_8,1,2,3,4,5,6,7,8"; @@ -67,7 +66,6 @@ TEST(StkIoHowTo, useTextMesh) TEST(StkIoHowTo, useTextMesh_withAllOptions) { stk::io::StkMeshIoBroker stkIo(MPI_COMM_WORLD); - stkIo.use_simple_fields(); std::string textMeshDesc = "textmesh:" diff --git a/packages/stk/stk_doc_tests/stk_io/howToWriteMesh.cpp b/packages/stk/stk_doc_tests/stk_io/howToWriteMesh.cpp index b19a991840af..96290fb05312 100644 --- a/packages/stk/stk_doc_tests/stk_io/howToWriteMesh.cpp +++ b/packages/stk/stk_doc_tests/stk_io/howToWriteMesh.cpp @@ -19,8 +19,6 @@ TEST(StkIoHowTo, WriteMesh) std::string filename = "output.exo"; { std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); - stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::io::fill_mesh("generated:1x1x4", *bulk); stk::io::StkMeshIoBroker stkIo; @@ -32,8 +30,6 @@ TEST(StkIoHowTo, WriteMesh) { std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); - stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::io::fill_mesh(filename, *bulk); std::vector entityCounts; @@ -46,19 +42,16 @@ TEST(StkIoHowTo, WriteMesh) TEST(StkIoHowTo, generateMeshWith64BitIds) { - std::string meshSpec = stk::unit_test_util::simple_fields::get_option("-i", "1x1x4"); + std::string meshSpec = stk::unit_test_util::get_option("-i", "1x1x4"); std::string fullMeshSpec = "generated:"+meshSpec; std::string filename = "output.exo"; stk::io::StkMeshIoBroker inputBroker; - inputBroker.use_simple_fields(); //+ Set properties to ensure that 64-bit integers will be used inputBroker.property_add(Ioss::Property("INTEGER_SIZE_API" , 8)); inputBroker.property_add(Ioss::Property("INTEGER_SIZE_DB" , 8)); std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); - stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::io::fill_mesh_preexisting(inputBroker, fullMeshSpec, *bulk); stk::io::write_mesh_with_large_ids_and_fields(filename, *bulk); diff --git a/packages/stk/stk_doc_tests/stk_io/howToWriteMeshWithEdges.cpp b/packages/stk/stk_doc_tests/stk_io/howToWriteMeshWithEdges.cpp index c95037d85d46..4984ec4a897a 100644 --- a/packages/stk/stk_doc_tests/stk_io/howToWriteMeshWithEdges.cpp +++ b/packages/stk/stk_doc_tests/stk_io/howToWriteMeshWithEdges.cpp @@ -24,7 +24,6 @@ TEST(StkIoHowTo, WriteMeshWithEdges) builder.set_spatial_dimension(3); std::shared_ptr bulk = builder.create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Part* edgeBlockPart = &meta.declare_part_with_topology("edgeBlock", stk::topology::LINE_2); stk::io::put_edge_block_io_part_attribute(*edgeBlockPart); @@ -42,7 +41,6 @@ TEST(StkIoHowTo, WriteMeshWithEdges) { std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::io::fill_mesh(filename, *bulk); const stk::mesh::Part* edgeBlockPart = meta.get_part("edgeBlock"); @@ -77,7 +75,7 @@ TEST(StkIoHowTo, Write2DMeshWithEdges) std::string filename = "output2D.exo"; { unsigned nx = 2, ny = 2; - stk::mesh::fixtures::simple_fields::QuadFixture quadFixture(MPI_COMM_WORLD, nx, ny); + stk::mesh::fixtures::QuadFixture quadFixture(MPI_COMM_WORLD, nx, ny); stk::mesh::Part& sidesetPart = quadFixture.m_meta.declare_part("surface_1", stk::topology::EDGE_RANK); stk::mesh::Part& edgeBlockPart = quadFixture.m_meta.declare_part_with_topology("edgeBlock", stk::topology::LINE_2); @@ -108,7 +106,6 @@ TEST(StkIoHowTo, Write2DMeshWithEdges) { std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::io::fill_mesh(filename, *bulk); EXPECT_EQ(2u, meta.spatial_dimension()); diff --git a/packages/stk/stk_doc_tests/stk_io/howToWriteMeshWithFaces.cpp b/packages/stk/stk_doc_tests/stk_io/howToWriteMeshWithFaces.cpp index 5e4bdaf519a9..fa6011995862 100644 --- a/packages/stk/stk_doc_tests/stk_io/howToWriteMeshWithFaces.cpp +++ b/packages/stk/stk_doc_tests/stk_io/howToWriteMeshWithFaces.cpp @@ -22,7 +22,6 @@ TEST(StkIoHowTo, WriteMeshWithFaceBlock) builder.set_spatial_dimension(3); std::shared_ptr bulk = builder.create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Part* part = &meta.declare_part_with_topology("faceBlock", stk::topology::QUAD_4); stk::io::put_face_block_io_part_attribute(*part); @@ -40,8 +39,6 @@ TEST(StkIoHowTo, WriteMeshWithFaceBlock) { std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); - stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::io::fill_mesh(filename, *bulk); std::vector entityCounts; diff --git a/packages/stk/stk_doc_tests/stk_io/howToWriteMeshWithInternalSidesets.cpp b/packages/stk/stk_doc_tests/stk_io/howToWriteMeshWithInternalSidesets.cpp index 91ff72415420..f9d7438b2451 100644 --- a/packages/stk/stk_doc_tests/stk_io/howToWriteMeshWithInternalSidesets.cpp +++ b/packages/stk/stk_doc_tests/stk_io/howToWriteMeshWithInternalSidesets.cpp @@ -25,7 +25,7 @@ void verify_element_side_pairs(stk::mesh::BulkData& bulkData, const ExodusSideSe for(;iter!=goldSideset.end();++iter) { int id = iter->first; - stk::mesh::Part *part = stk::unit_test_util::simple_fields::get_surface_part_with_id(bulkData.mesh_meta_data(), id); + stk::mesh::Part *part = stk::unit_test_util::get_surface_part_with_id(bulkData.mesh_meta_data(), id); stk::mesh::SideSet &sset = bulkData.get_sideset(*part); ElementSidePairs goldSet = iter->second; ASSERT_EQ(goldSet.size(), sset.size()); @@ -83,7 +83,6 @@ void testSidesetCreation(TestData &testData) { std::shared_ptr bulkData = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); stk::mesh::MetaData& meta = bulkData->mesh_meta_data(); - meta.use_simple_fields(); stk::io::StkMeshIoBroker stkIo; stkIo.property_add(Ioss::Property("DECOMPOSITION_METHOD", "RCB")); stkIo.set_bulk_data(bulkData); diff --git a/packages/stk/stk_doc_tests/stk_io/howToWriteRestartWithEdges.cpp b/packages/stk/stk_doc_tests/stk_io/howToWriteRestartWithEdges.cpp index 53afba8a0e08..bcb2acbb7094 100644 --- a/packages/stk/stk_doc_tests/stk_io/howToWriteRestartWithEdges.cpp +++ b/packages/stk/stk_doc_tests/stk_io/howToWriteRestartWithEdges.cpp @@ -25,7 +25,6 @@ TEST(StkIoHowTo, WriteRestartWithEdges) builder.set_spatial_dimension(3); std::shared_ptr bulk = builder.create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Part* part = &meta.declare_part_with_topology("edgeBlock", stk::topology::LINE_2); stk::mesh::Field& edgeField = meta.declare_field(stk::topology::EDGE_RANK, "edgeField", numStates); @@ -51,7 +50,6 @@ TEST(StkIoHowTo, WriteRestartWithEdges) { std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Field& edgeField = meta.declare_field(stk::topology::EDGE_RANK, "edgeField", numStates); stk::mesh::put_field_on_mesh(edgeField, meta.universal_part(), nullptr); diff --git a/packages/stk/stk_doc_tests/stk_io/howToWriteRestartWithFaces.cpp b/packages/stk/stk_doc_tests/stk_io/howToWriteRestartWithFaces.cpp index 9ad000659e19..2f016846a66d 100644 --- a/packages/stk/stk_doc_tests/stk_io/howToWriteRestartWithFaces.cpp +++ b/packages/stk/stk_doc_tests/stk_io/howToWriteRestartWithFaces.cpp @@ -25,7 +25,6 @@ TEST(StkIoHowTo, WriteRestartWithFaceBlock) builder.set_spatial_dimension(3); std::shared_ptr bulk = builder.create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Part* part = &meta.declare_part_with_topology("faceBlock", stk::topology::QUAD_4); stk::mesh::Field& faceField = meta.declare_field(stk::topology::FACE_RANK, "faceField", numStates); @@ -52,7 +51,6 @@ TEST(StkIoHowTo, WriteRestartWithFaceBlock) { std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Field& faceField = meta.declare_field(stk::topology::FACE_RANK, "faceField", numStates); stk::mesh::put_field_on_mesh(faceField, meta.universal_part(), nullptr); diff --git a/packages/stk/stk_doc_tests/stk_io/interpolateFieldCyclic.cpp b/packages/stk/stk_doc_tests/stk_io/interpolateFieldCyclic.cpp index 928a130b249f..415403de9b45 100644 --- a/packages/stk/stk_doc_tests/stk_io/interpolateFieldCyclic.cpp +++ b/packages/stk/stk_doc_tests/stk_io/interpolateFieldCyclic.cpp @@ -69,7 +69,6 @@ TEST(StkMeshIoBrokerHowTo, interpolateFieldCyclic) //+ The value of the field at each node is 0.0 at time 0.0, //+ 10.0 at time 10.0, and 20.0 at time 20.0 stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:1x1x1|nodeset:xyz"; stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -121,7 +120,6 @@ TEST(StkMeshIoBrokerHowTo, interpolateFieldCyclic) //+ stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); size_t idx = stkIo.add_mesh_database(ic_name, stk::io::READ_MESH); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/interpolateFieldNegativeTime.cpp b/packages/stk/stk_doc_tests/stk_io/interpolateFieldNegativeTime.cpp index f002b9458944..31f133c1f6cf 100644 --- a/packages/stk/stk_doc_tests/stk_io/interpolateFieldNegativeTime.cpp +++ b/packages/stk/stk_doc_tests/stk_io/interpolateFieldNegativeTime.cpp @@ -69,7 +69,6 @@ TEST(StkMeshIoBrokerHowTo, interpolateFieldNegativeTime) //+ The value of the field at each node is 0.0 at time 0.0, //+ -1.0 at time -1.0, and -2.0 at time -2.0 stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|nodeset:xyz"; stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -108,7 +107,6 @@ TEST(StkMeshIoBrokerHowTo, interpolateFieldNegativeTime) //+ of 0.1 (-2.0, -1.9, -1.8, ..., 0.0) and verify that //+ the field contains the correct interpolated value. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(ic_name, stk::io::READ_MESH); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/interpolateFieldNonMonotonicTime.cpp b/packages/stk/stk_doc_tests/stk_io/interpolateFieldNonMonotonicTime.cpp index 908cd7ced4c7..4b7e6f43dcdc 100644 --- a/packages/stk/stk_doc_tests/stk_io/interpolateFieldNonMonotonicTime.cpp +++ b/packages/stk/stk_doc_tests/stk_io/interpolateFieldNonMonotonicTime.cpp @@ -69,7 +69,6 @@ TEST(StkMeshIoBrokerHowTo, interpolateFieldNonMonotonicTime) //+ The value of the field at each node is 0.0 at time 0.0, //+ 1.0 at time 1.0, and 2.0 at time 2.0 stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|nodeset:xyz"; stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -110,7 +109,6 @@ TEST(StkMeshIoBrokerHowTo, interpolateFieldNonMonotonicTime) //+ of 0.1 (0.0, 0.1, 0.2, ..., 2.0) and verify that //+ the field contains the correct interpolated value. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(ic_name, stk::io::READ_MESH); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/interpolateIntegerFieldInvalid.cpp b/packages/stk/stk_doc_tests/stk_io/interpolateIntegerFieldInvalid.cpp index 6072c40d89e1..fb542a184cf7 100644 --- a/packages/stk/stk_doc_tests/stk_io/interpolateIntegerFieldInvalid.cpp +++ b/packages/stk/stk_doc_tests/stk_io/interpolateIntegerFieldInvalid.cpp @@ -64,7 +64,6 @@ TEST(StkMeshIoBrokerHowTo, interpolateIntegerFieldInvalid) //+ integer interpolated field. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|nodeset:xyz"; stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); diff --git a/packages/stk/stk_doc_tests/stk_io/interpolateNodalField.cpp b/packages/stk/stk_doc_tests/stk_io/interpolateNodalField.cpp index c43e6ddd1a1a..9581f962cada 100644 --- a/packages/stk/stk_doc_tests/stk_io/interpolateNodalField.cpp +++ b/packages/stk/stk_doc_tests/stk_io/interpolateNodalField.cpp @@ -68,7 +68,6 @@ TEST(StkMeshIoBrokerHowTo, interpolateNodalField) //+ The value of the field at each node is 0.0 at time 0.0, //+ 1.0 at time 1.0, and 2.0 at time 2.0 stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|nodeset:xyz"; stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -107,7 +106,6 @@ TEST(StkMeshIoBrokerHowTo, interpolateNodalField) //+ of 0.1 (0.0, 0.1, 0.2, 0.3, ..., 2.0) and verify that //+ the field contains the correct interpolated value. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(ic_name, stk::io::READ_MESH); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/interpolateOutsideRange.cpp b/packages/stk/stk_doc_tests/stk_io/interpolateOutsideRange.cpp index 76c1f1f143e1..7228d8f74545 100644 --- a/packages/stk/stk_doc_tests/stk_io/interpolateOutsideRange.cpp +++ b/packages/stk/stk_doc_tests/stk_io/interpolateOutsideRange.cpp @@ -67,7 +67,6 @@ TEST(StkMeshIoBrokerHowTo, interpolateOutsideRange) //+ with times 1.0 and 2.0. //+ The value of the field at each node is equal to the 'time' stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|nodeset:xyz"; stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -115,7 +114,6 @@ TEST(StkMeshIoBrokerHowTo, interpolateOutsideRange) //+ The field values from 1.0 to 2.0 will be interpolated //+ stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(ic_name, stk::io::READ_MESH); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/interpolateSingleStep.cpp b/packages/stk/stk_doc_tests/stk_io/interpolateSingleStep.cpp index 3d41ab49d179..483ff50032f3 100644 --- a/packages/stk/stk_doc_tests/stk_io/interpolateSingleStep.cpp +++ b/packages/stk/stk_doc_tests/stk_io/interpolateSingleStep.cpp @@ -67,7 +67,6 @@ TEST(StkMeshIoBrokerHowTo, interpolateSingleStep) //+ Create a mesh with the nodal field "temp" for 1 timestep. //+ The value of the field at each node is 1.0 stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|nodeset:xyz"; stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -105,7 +104,6 @@ TEST(StkMeshIoBrokerHowTo, interpolateSingleStep) //+ enough steps to do any interpolation. //+ stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(ic_name, stk::io::READ_MESH); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/readAttributes.cpp b/packages/stk/stk_doc_tests/stk_io/readAttributes.cpp index 0691b02c3e9e..ce65c81243a9 100644 --- a/packages/stk/stk_doc_tests/stk_io/readAttributes.cpp +++ b/packages/stk/stk_doc_tests/stk_io/readAttributes.cpp @@ -52,7 +52,7 @@ namespace { -class ExodusFileWithAttributes : public stk::unit_test_util::simple_fields::MeshFixture { }; +class ExodusFileWithAttributes : public stk::unit_test_util::MeshFixture { }; stk::mesh::FieldVector get_attribute_fields_for_part(const stk::mesh::MetaData &meta, const stk::mesh::Part *ioPart) { @@ -115,7 +115,6 @@ TEST_F(ExodusFileWithAttributes, addAttribute_haveFieldsWithAttribute) allocate_bulk(stk::mesh::BulkData::AUTO_AURA); stk::io::StkMeshIoBroker stkIo; - stkIo.use_simple_fields(); stkIo.set_bulk_data(get_bulk()); stkIo.add_mesh_database("hex_spider.exo", stk::io::READ_MESH); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/readInitialCondition.cpp b/packages/stk/stk_doc_tests/stk_io/readInitialCondition.cpp index 917c94ea4073..5bd8de1b420c 100644 --- a/packages/stk/stk_doc_tests/stk_io/readInitialCondition.cpp +++ b/packages/stk/stk_doc_tests/stk_io/readInitialCondition.cpp @@ -69,7 +69,6 @@ TEST(StkMeshIoBrokerHowTo, readInitialCondition) //+ The value of the field at each node is 0.0 at time 0.0, //+ 1.0 at time 1.0, and 2.0 at time 2.0 stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8"; size_t index = stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -106,7 +105,6 @@ TEST(StkMeshIoBrokerHowTo, readInitialCondition) //+ Read the value of the "temp" field at step 2 and populate //+ the nodal field "temperature" for use as an initial condition stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); size_t index = stkIo.add_mesh_database(ic_name, stk::io::READ_MESH); stkIo.set_active_mesh(index); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/readInitialConditionMultiSubset.cpp b/packages/stk/stk_doc_tests/stk_io/readInitialConditionMultiSubset.cpp index 72a458e9f533..829bbeab0848 100644 --- a/packages/stk/stk_doc_tests/stk_io/readInitialConditionMultiSubset.cpp +++ b/packages/stk/stk_doc_tests/stk_io/readInitialConditionMultiSubset.cpp @@ -79,7 +79,6 @@ TEST(StkMeshIoBrokerHowTo, readInitialConditionMultiSubset) std::string input_filename = "9x9x9|shell:xyzXYZ|variables:element,1|times:1"; stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(input_filename, "generated", stk::io::READ_MESH); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/readInitialConditionNodalSubset.cpp b/packages/stk/stk_doc_tests/stk_io/readInitialConditionNodalSubset.cpp index 28a15c48e3de..bf47fbd190c2 100644 --- a/packages/stk/stk_doc_tests/stk_io/readInitialConditionNodalSubset.cpp +++ b/packages/stk/stk_doc_tests/stk_io/readInitialConditionNodalSubset.cpp @@ -84,7 +84,6 @@ TEST(StkMeshIoBrokerHowTo, readInitialConditionNodalSubset) input_filename += "|shell:xyzXYZ|variables:nodal,1|times:1"; stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(input_filename, "generated", stk::io::READ_MESH); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/readInitialConditionOnce.cpp b/packages/stk/stk_doc_tests/stk_io/readInitialConditionOnce.cpp index 43923f14489a..6f985b07db94 100644 --- a/packages/stk/stk_doc_tests/stk_io/readInitialConditionOnce.cpp +++ b/packages/stk/stk_doc_tests/stk_io/readInitialConditionOnce.cpp @@ -69,7 +69,6 @@ TEST(StkMeshIoBrokerHowTo, readInitialConditionOnce) //+ The value of the field at each node is 0.0 at time 0.0, //+ 1.0 at time 1.0, and 2.0 at time 2.0 stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8"; size_t index = stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -109,7 +108,6 @@ TEST(StkMeshIoBrokerHowTo, readInitialConditionOnce) //+ call, so verify this by calling the function again at step 3 and //+ then verify that the field values are still those read from step 2. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); size_t index = stkIo.add_mesh_database(ic_name, stk::io::READ_MESH); stkIo.set_active_mesh(index); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/readInitialConditionSpecifiedTime.cpp b/packages/stk/stk_doc_tests/stk_io/readInitialConditionSpecifiedTime.cpp index d0b4bf47db1f..224430635f80 100644 --- a/packages/stk/stk_doc_tests/stk_io/readInitialConditionSpecifiedTime.cpp +++ b/packages/stk/stk_doc_tests/stk_io/readInitialConditionSpecifiedTime.cpp @@ -70,7 +70,6 @@ TEST(StkMeshIoBrokerHowTo, readInitialConditionSpecifiedTime) //+ The value of the fields at each node is 0.0 at time 0.0, //+ 1.0 at time 1.0, and 2.0 at time 2.0 stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|nodeset:xyz"; stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -118,7 +117,6 @@ TEST(StkMeshIoBrokerHowTo, readInitialConditionSpecifiedTime) //+ to the analysis time passed in to read_defined_input_fields. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); size_t index = stkIo.add_mesh_database(ic_name, stk::io::READ_MESH); stkIo.set_active_mesh(index); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/readInitialConditionSubset.cpp b/packages/stk/stk_doc_tests/stk_io/readInitialConditionSubset.cpp index a34ef824a2b8..f218ea0120f1 100644 --- a/packages/stk/stk_doc_tests/stk_io/readInitialConditionSubset.cpp +++ b/packages/stk/stk_doc_tests/stk_io/readInitialConditionSubset.cpp @@ -77,7 +77,6 @@ TEST(StkMeshIoBrokerHowTo, readInitialConditionSubset) std::string input_filename = "9x9x9|shell:xyzXYZ|variables:element,1|times:1"; stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(input_filename, "generated", stk::io::READ_MESH); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/readInitialConditionTwoFieldSubset.cpp b/packages/stk/stk_doc_tests/stk_io/readInitialConditionTwoFieldSubset.cpp index 10e558a38dcc..83acaec5eca0 100644 --- a/packages/stk/stk_doc_tests/stk_io/readInitialConditionTwoFieldSubset.cpp +++ b/packages/stk/stk_doc_tests/stk_io/readInitialConditionTwoFieldSubset.cpp @@ -78,7 +78,6 @@ TEST(StkMeshIoBrokerHowTo, readInitialConditionTwoFieldSubset) std::string input_filename = "9x9x9|shell:xyzXYZ|variables:element,2|times:1"; stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(input_filename, "generated", stk::io::READ_MESH); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/readMesh.cpp b/packages/stk/stk_doc_tests/stk_io/readMesh.cpp index 12ab2e5a8bae..08ef56b9d0ac 100644 --- a/packages/stk/stk_doc_tests/stk_io/readMesh.cpp +++ b/packages/stk/stk_doc_tests/stk_io/readMesh.cpp @@ -61,7 +61,6 @@ TEST(StkMeshIoBrokerHowTo, readMesh) // ============================================================ //BeginBasicReadWrite std::shared_ptr stkMesh = stk::mesh::MeshBuilder(communicator).create(); - stkMesh->mesh_meta_data().use_simple_fields(); //+ Create a basic mesh with a hex block, 3 shell blocks, 3 nodesets, and 3 sidesets. const std::string generatedFileName = "generated:8x8x8|shell:xyz|nodeset:xyz|sideset:XYZ"; @@ -76,7 +75,6 @@ TEST(StkMeshIoBrokerHowTo, readMesh) //+ EXAMPLE: //+ Read mesh data from the specified file. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(mesh_file_name, stk::io::READ_MESH); //+ Creates meta data; creates parts diff --git a/packages/stk/stk_doc_tests/stk_io/readMeshDelayFieldAllocation.cpp b/packages/stk/stk_doc_tests/stk_io/readMeshDelayFieldAllocation.cpp index 44aae31d76b0..4538a8a63656 100644 --- a/packages/stk/stk_doc_tests/stk_io/readMeshDelayFieldAllocation.cpp +++ b/packages/stk/stk_doc_tests/stk_io/readMeshDelayFieldAllocation.cpp @@ -52,7 +52,6 @@ TEST(StkMeshIoBrokerHowTo, readMeshDelayFieldAllocation) //+ INITIALIZATION: //+ Create a basic mesh with a hex block, 3 shell blocks, 3 nodesets, and 3 sidesets. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|shell:xyz|nodeset:xyz|sideset:XYZ"; size_t index = stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -71,7 +70,6 @@ TEST(StkMeshIoBrokerHowTo, readMeshDelayFieldAllocation) //+ EXAMPLE: //+ Read mesh data from the specified file. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(mesh_name, stk::io::READ_MESH); //+ Creates meta data; creates parts diff --git a/packages/stk/stk_doc_tests/stk_io/replaceBulkData.cpp b/packages/stk/stk_doc_tests/stk_io/replaceBulkData.cpp index a99483dfeb23..d8026edc1d40 100644 --- a/packages/stk/stk_doc_tests/stk_io/replaceBulkData.cpp +++ b/packages/stk/stk_doc_tests/stk_io/replaceBulkData.cpp @@ -69,7 +69,6 @@ TEST(StkMeshIoBrokerHowTo, replaceBulkData) //+ INITIALIZATION: //+ Create a basic mesh with a hex block, 3 shell blocks, 3 nodesets, and 3 sidesets. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|shell:xyz|nodeset:xyz|sideset:XYZ"; size_t index = stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -87,7 +86,6 @@ TEST(StkMeshIoBrokerHowTo, replaceBulkData) //+ EXAMPLE: //+ Read mesh data from the specified file. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(mesh_name, stk::io::READ_MESH); //+ Creates meta data; creates parts diff --git a/packages/stk/stk_doc_tests/stk_io/requestedResultsFieldName.cpp b/packages/stk/stk_doc_tests/stk_io/requestedResultsFieldName.cpp index acb7ceec3745..49d5f954ccd3 100644 --- a/packages/stk/stk_doc_tests/stk_io/requestedResultsFieldName.cpp +++ b/packages/stk/stk_doc_tests/stk_io/requestedResultsFieldName.cpp @@ -61,7 +61,6 @@ TEST(StkMeshIoBrokerHowTo, writeResults) //+ INITIALIZATION: //+ Create a basic mesh with a hex block, 3 shell blocks, 3 nodesets, and 3 sidesets. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|shell:xyz|nodeset:xyz|sideset:XYZ"; size_t index = stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -78,7 +77,6 @@ TEST(StkMeshIoBrokerHowTo, writeResults) //+ EXAMPLE: //+ Read mesh data from the specified file. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(mesh_name, stk::io::READ_MESH); //+ Creates meta data; creates parts diff --git a/packages/stk/stk_doc_tests/stk_io/restartInterpolatedField.cpp b/packages/stk/stk_doc_tests/stk_io/restartInterpolatedField.cpp index be1672938399..3dfb5105214d 100644 --- a/packages/stk/stk_doc_tests/stk_io/restartInterpolatedField.cpp +++ b/packages/stk/stk_doc_tests/stk_io/restartInterpolatedField.cpp @@ -69,7 +69,6 @@ TEST(StkMeshIoBrokerHowTo, restartInterpolatedField) //+ Create a "restart database" with several nodal and element fields, //+ and some timesteps... stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|shell:XYZ|" "nodeset:xyz|times:3|variables:nodal,4,element,3,nodeset,2"; @@ -103,7 +102,6 @@ TEST(StkMeshIoBrokerHowTo, restartInterpolatedField) //+ "temp" for 10 timesteps - 0.0, 1.0, ..., 9.0. //+ The value of the field at each node is the 'time' value. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|nodeset:xyz"; stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -156,7 +154,6 @@ TEST(StkMeshIoBrokerHowTo, restartInterpolatedField) //+ reading the initial condition data from the other database //+ interpolating this data. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); size_t ic = stkIo.add_mesh_database(ic_name, stk::io::READ_MESH); size_t rs = stkIo.add_mesh_database(rs_name, stk::io::READ_RESTART); diff --git a/packages/stk/stk_doc_tests/stk_io/restartTestUtils.hpp b/packages/stk/stk_doc_tests/stk_io/restartTestUtils.hpp index bbda3dc62c51..7cc7c1506278 100644 --- a/packages/stk/stk_doc_tests/stk_io/restartTestUtils.hpp +++ b/packages/stk/stk_doc_tests/stk_io/restartTestUtils.hpp @@ -186,7 +186,6 @@ inline void testMultistateFieldWroteCorrectly(const std::string &resultsFilename { MPI_Comm communicator = MPI_COMM_WORLD; stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); size_t index = stkIo.add_mesh_database(resultsFilename, stk::io::READ_RESTART); stkIo.set_active_mesh(index); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/setOptionToNotCollapseSequencedFields.cpp b/packages/stk/stk_doc_tests/stk_io/setOptionToNotCollapseSequencedFields.cpp index 48a8d11c77d4..0fdc177d46a6 100644 --- a/packages/stk/stk_doc_tests/stk_io/setOptionToNotCollapseSequencedFields.cpp +++ b/packages/stk/stk_doc_tests/stk_io/setOptionToNotCollapseSequencedFields.cpp @@ -11,19 +11,19 @@ namespace { -class MultipleNumberedFieldsWithSameBaseName : public stk::unit_test_util::simple_fields::MeshFileFixture { }; +class MultipleNumberedFieldsWithSameBaseName : public stk::unit_test_util::MeshFileFixture { }; //-BEGIN TEST_F(MultipleNumberedFieldsWithSameBaseName, whenReading_collapseToSingleStkField) { - stk::unit_test_util::simple_fields::create_mesh_with__field_1__field_2__field_3(filename, get_comm()); + stk::unit_test_util::create_mesh_with__field_1__field_2__field_3(filename, get_comm()); read_mesh(filename); EXPECT_EQ(1u, get_meta().get_fields(stk::topology::ELEM_RANK).size()); } TEST_F(MultipleNumberedFieldsWithSameBaseName, whenReadingWithoutCollapseOption_threeStkFieldsAreRead) { - stk::unit_test_util::simple_fields::create_mesh_with__field_1__field_2__field_3(filename, get_comm()); + stk::unit_test_util::create_mesh_with__field_1__field_2__field_3(filename, get_comm()); stkIo.set_option_to_not_collapse_sequenced_fields(); read_mesh(filename); EXPECT_EQ(3u, get_meta().get_fields(stk::topology::ELEM_RANK).size()); diff --git a/packages/stk/stk_doc_tests/stk_io/singleStepOnRestart.cpp b/packages/stk/stk_doc_tests/stk_io/singleStepOnRestart.cpp index 1fb86b5ad940..a484d41998bd 100644 --- a/packages/stk/stk_doc_tests/stk_io/singleStepOnRestart.cpp +++ b/packages/stk/stk_doc_tests/stk_io/singleStepOnRestart.cpp @@ -58,7 +58,6 @@ TEST(StkMeshIoBrokerHowTo, singleStepOnRestart) MPI_Comm comm = MPI_COMM_WORLD; { stk::io::StkMeshIoBroker stkIo(comm); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/subsettingOutputDB.cpp b/packages/stk/stk_doc_tests/stk_io/subsettingOutputDB.cpp index cfd76bf3ff40..317be3aa8ee8 100644 --- a/packages/stk/stk_doc_tests/stk_io/subsettingOutputDB.cpp +++ b/packages/stk/stk_doc_tests/stk_io/subsettingOutputDB.cpp @@ -73,7 +73,6 @@ TEST(StkMeshIoBrokerHowTo, subsetOutputDatabase) s_elems_per_edge + "|shell:xyzXYZ"; stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); size_t index = stkIo.add_mesh_database(input_filename, "generated", stk::io::READ_MESH); stkIo.set_active_mesh(index); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/useNodesetDbVarForNodalField.cpp b/packages/stk/stk_doc_tests/stk_io/useNodesetDbVarForNodalField.cpp index 9a54df86ee5b..e68a52562cc4 100644 --- a/packages/stk/stk_doc_tests/stk_io/useNodesetDbVarForNodalField.cpp +++ b/packages/stk/stk_doc_tests/stk_io/useNodesetDbVarForNodalField.cpp @@ -82,7 +82,6 @@ TEST(StkMeshIoBrokerHowTo, useNodesetDbVarForNodalFields) s_elems_per_edge + "|shell:xyzXYZ"; stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(input_filename, "generated", stk::io::READ_MESH); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/usingHeartbeat.cpp b/packages/stk/stk_doc_tests/stk_io/usingHeartbeat.cpp index 71ca8406d679..301407fa91e6 100644 --- a/packages/stk/stk_doc_tests/stk_io/usingHeartbeat.cpp +++ b/packages/stk/stk_doc_tests/stk_io/usingHeartbeat.cpp @@ -86,7 +86,6 @@ TEST(StkMeshIoBrokerHowTo, writeHeartbeat) //+ EXAMPLE USAGE... //+ Begin use of stk io heartbeat file... stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); //+ Define the heartbeat output to be in TEXT format. size_t hb = stkIo.add_heartbeat_output(file_name, stk::io::TEXT); /*@\label{io:hb:add_heartbeat_output}*/ diff --git a/packages/stk/stk_doc_tests/stk_io/usingHeartbeatCSVChangePrecision.cpp b/packages/stk/stk_doc_tests/stk_io/usingHeartbeatCSVChangePrecision.cpp index 886d34170710..3c68ae719671 100644 --- a/packages/stk/stk_doc_tests/stk_io/usingHeartbeatCSVChangePrecision.cpp +++ b/packages/stk/stk_doc_tests/stk_io/usingHeartbeatCSVChangePrecision.cpp @@ -88,7 +88,6 @@ TEST(StkMeshIoBrokerHowTo, writeHeartbeatCSVChangePrecision) // EXAMPLE USAGE... // Define the heartbeat output... stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); //-BEGIN //+ Output should have 10 digits of precision (1.0123456789e+00) diff --git a/packages/stk/stk_doc_tests/stk_io/usingHeartbeatOverrideSeparator.cpp b/packages/stk/stk_doc_tests/stk_io/usingHeartbeatOverrideSeparator.cpp index 662dd0cc758a..f7c981dc7fc9 100644 --- a/packages/stk/stk_doc_tests/stk_io/usingHeartbeatOverrideSeparator.cpp +++ b/packages/stk/stk_doc_tests/stk_io/usingHeartbeatOverrideSeparator.cpp @@ -88,7 +88,6 @@ TEST(StkMeshIoBrokerHowTo, writeHeartbeatOverrideSeparator) // EXAMPLE USAGE... // Begin use of stk io heartbeat file... stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); //-BEGIN //+ Use vertical bar as field separator diff --git a/packages/stk/stk_doc_tests/stk_io/usingHeartbeatSpyhisFormat.cpp b/packages/stk/stk_doc_tests/stk_io/usingHeartbeatSpyhisFormat.cpp index ababb033e7a2..4cb9a02c6ec6 100644 --- a/packages/stk/stk_doc_tests/stk_io/usingHeartbeatSpyhisFormat.cpp +++ b/packages/stk/stk_doc_tests/stk_io/usingHeartbeatSpyhisFormat.cpp @@ -85,7 +85,6 @@ TEST(StkMeshIoBrokerHowTo, writeHeartbeatSpyhisFormat) // EXAMPLE USAGE... // Begin use of stk io heartbeat file... stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); // Define the heartbeat output. size_t heartbeat_index = stkIo.add_heartbeat_output(file_name, stk::io::SPYHIS); diff --git a/packages/stk/stk_doc_tests/stk_io/usingHistory.cpp b/packages/stk/stk_doc_tests/stk_io/usingHistory.cpp index 0acf356d03d7..db1ec007d7bb 100644 --- a/packages/stk/stk_doc_tests/stk_io/usingHistory.cpp +++ b/packages/stk/stk_doc_tests/stk_io/usingHistory.cpp @@ -103,7 +103,6 @@ TEST(StkMeshIoBrokerHowTo, writeHistory) // EXAMPLE USAGE... // Begin use of stk io history file... stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); //-BEGIN //+ Define the heartbeat output and the format (BINARY) @@ -180,7 +179,6 @@ double initialValue() void setUpMeshWithFieldOnBlock1(stk::mesh::BulkData& bulk, stk::mesh::Field& field1, stk::mesh::Field& field2) { stk::io::StkMeshIoBroker stkIo(bulk.parallel()); - stkIo.use_simple_fields(); stkIo.set_bulk_data(bulk); stkIo.add_mesh_database("generated:1x1x2", stk::io::READ_MESH); stkIo.create_input_mesh(); @@ -216,7 +214,6 @@ void writeHistoryFile(const std::string& historyFilename, stk::mesh::BulkData& b { stk::mesh::Selector subset = elementHistoryPart | nodeHistoryPart; stk::io::StkMeshIoBroker outStkIo; - outStkIo.use_simple_fields(); outStkIo.set_bulk_data(bulk); size_t outputFileIndex = outStkIo.create_output_mesh(historyFilename, stk::io::WRITE_RESULTS); @@ -272,7 +269,6 @@ void verifyHistoryFileOutput(const std::string& filename) { std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); int numSteps = 0; double maxTime = 0; @@ -300,7 +296,6 @@ TEST(StkMeshIoBrokerHowTo, writeHistoryOfElementAndNode) builder.set_spatial_dimension(3); std::shared_ptr bulk = builder.create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Field& elemField = meta.declare_field(stk::topology::ELEM_RANK, getElementFieldName()); stk::mesh::Field& nodalField = meta.declare_field(stk::topology::NODE_RANK, getNodalFieldName()); @@ -338,7 +333,6 @@ TEST(StkMeshIoBrokerHowTo, writeEmptyHistory) // EXAMPLE USAGE... // Begin use of stk io history file... stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); //-BEGIN //+ Define the heartbeat output and the format (BINARY) diff --git a/packages/stk/stk_doc_tests/stk_io/usingResults.cpp b/packages/stk/stk_doc_tests/stk_io/usingResults.cpp index 7a2043bf786f..17a58eee7ebb 100644 --- a/packages/stk/stk_doc_tests/stk_io/usingResults.cpp +++ b/packages/stk/stk_doc_tests/stk_io/usingResults.cpp @@ -77,7 +77,6 @@ TEST(StkMeshIoBrokerHowTo, writeResultsWithMultistateField) //+ INITIALIZATION const std::string exodusFileName = "generated:1x1x8"; stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); size_t index = stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.set_active_mesh(index); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_io/writeResults.cpp b/packages/stk/stk_doc_tests/stk_io/writeResults.cpp index 5101e985e00f..bdcc95106ffc 100644 --- a/packages/stk/stk_doc_tests/stk_io/writeResults.cpp +++ b/packages/stk/stk_doc_tests/stk_io/writeResults.cpp @@ -71,7 +71,6 @@ TEST(StkMeshIoBrokerHowTo, writeResults) //+ INITIALIZATION: //+ Create a basic mesh with a hex block, 3 shell blocks, 3 nodesets, and 3 sidesets. std::unique_ptr mesh = stk::mesh::MeshBuilder(communicator).create(); - mesh->mesh_meta_data().use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|shell:xyz|nodeset:xyz|sideset:XYZ"; stk::io::fill_mesh(generatedFileName, *mesh); @@ -84,7 +83,6 @@ TEST(StkMeshIoBrokerHowTo, writeResults) //+ EXAMPLE: //+ Read mesh data from the specified file. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(mesh_name, stk::io::READ_MESH); //+ Creates meta data; creates parts diff --git a/packages/stk/stk_doc_tests/stk_io/writeResultsAndRestart.cpp b/packages/stk/stk_doc_tests/stk_io/writeResultsAndRestart.cpp index 0b1d04b251e7..aeb9998cd897 100644 --- a/packages/stk/stk_doc_tests/stk_io/writeResultsAndRestart.cpp +++ b/packages/stk/stk_doc_tests/stk_io/writeResultsAndRestart.cpp @@ -69,7 +69,6 @@ TEST(StkMeshIoBrokerHowTo, writeResultsAndRestart) //+ INITIALIZATION: //+ Create a basic mesh with a hex block, 3 shell blocks, 3 nodesets, and 3 sidesets. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8|shell:xyz|nodeset:xyz|sideset:XYZ"; size_t index = stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -87,7 +86,6 @@ TEST(StkMeshIoBrokerHowTo, writeResultsAndRestart) //+ EXAMPLE: //+ Read mesh data from the specified file. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(mesh_name, stk::io::READ_MESH); //+ Creates meta data; creates parts @@ -169,7 +167,6 @@ TEST(StkMeshIoBrokerHowTo, writeResultsAndRestart) //passing the 'missingFields' argument to stkIo.read_defined_input_fields allows //the code to continue without throwing an exception due to not finding the field. stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); size_t rs = stkIo.add_mesh_database(restart_name, stk::io::READ_RESTART); //+ "Restart" the calculation... diff --git a/packages/stk/stk_doc_tests/stk_io/writingAndReadingGlobalParameters.cpp b/packages/stk/stk_doc_tests/stk_io/writingAndReadingGlobalParameters.cpp index 23bbed510e2e..5a369b8f5752 100644 --- a/packages/stk/stk_doc_tests/stk_io/writingAndReadingGlobalParameters.cpp +++ b/packages/stk/stk_doc_tests/stk_io/writingAndReadingGlobalParameters.cpp @@ -67,7 +67,6 @@ TEST(StkMeshIoBrokerHowTo, writeAndReadGlobalParameters) { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; size_t index = stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.set_active_mesh(index); @@ -104,7 +103,6 @@ TEST(StkMeshIoBrokerHowTo, writeAndReadGlobalParameters) //+ EXAMPLE //+ Read parameters from file... stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(file_name, stk::io::READ_MESH); stkIo.create_input_mesh(); stkIo.populate_bulk_data(); diff --git a/packages/stk/stk_doc_tests/stk_io/writingAndReadingGlobalParametersAuto.cpp b/packages/stk/stk_doc_tests/stk_io/writingAndReadingGlobalParametersAuto.cpp index 487bf2a0c91b..4de5170b3b2d 100644 --- a/packages/stk/stk_doc_tests/stk_io/writingAndReadingGlobalParametersAuto.cpp +++ b/packages/stk/stk_doc_tests/stk_io/writingAndReadingGlobalParametersAuto.cpp @@ -76,7 +76,6 @@ TEST(StkMeshIoBrokerHowTo, writeAndReadGlobalParametersAuto) // Write output file with all parameters in params list... { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; size_t input_index = stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.set_active_mesh(input_index); @@ -111,7 +110,6 @@ TEST(StkMeshIoBrokerHowTo, writeAndReadGlobalParametersAuto) gold_params.set_param("Ages", ages); // Vector of integers... stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(file_name, stk::io::READ_MESH); stkIo.create_input_mesh(); stkIo.populate_bulk_data(); diff --git a/packages/stk/stk_doc_tests/stk_io/writingAndReadingGlobalVariables.cpp b/packages/stk/stk_doc_tests/stk_io/writingAndReadingGlobalVariables.cpp index 5ccb6a5bf708..543fe9a2c8f8 100644 --- a/packages/stk/stk_doc_tests/stk_io/writingAndReadingGlobalVariables.cpp +++ b/packages/stk/stk_doc_tests/stk_io/writingAndReadingGlobalVariables.cpp @@ -61,7 +61,6 @@ TEST(StkMeshIoBrokerHowTo, writeAndReadGlobalVariables) //+ Write restart file with time step size as a global variable { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.create_input_mesh(); @@ -78,7 +77,6 @@ TEST(StkMeshIoBrokerHowTo, writeAndReadGlobalVariables) //+ Read restart file with time step size as a global variable { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(restartFileName, stk::io::READ_RESTART); stkIo.create_input_mesh(); stkIo.populate_bulk_data(); diff --git a/packages/stk/stk_doc_tests/stk_io/writingMultipleOutputFiles.cpp b/packages/stk/stk_doc_tests/stk_io/writingMultipleOutputFiles.cpp index 8ba2445d2af5..5ca97c0b21c2 100644 --- a/packages/stk/stk_doc_tests/stk_io/writingMultipleOutputFiles.cpp +++ b/packages/stk/stk_doc_tests/stk_io/writingMultipleOutputFiles.cpp @@ -59,7 +59,6 @@ TEST(StkMeshIoBrokerHowTo, writingMultipleOutputFiles) MPI_Comm communicator = MPI_COMM_WORLD; { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); setupMeshAndFieldsForTest(stkIo, displacementFieldName, velocityFieldName); stk::mesh::MetaData &meta_data = stkIo.meta_data(); diff --git a/packages/stk/stk_doc_tests/stk_mesh/CreateFacesHexesShells.cpp b/packages/stk/stk_doc_tests/stk_mesh/CreateFacesHexesShells.cpp index e0f0b274d5ef..90bbec943ccf 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/CreateFacesHexesShells.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/CreateFacesHexesShells.cpp @@ -59,7 +59,6 @@ TEST(StkMeshHowTo, CreateFacesTwoHexes) // | | | // ----------- stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); stkMeshIoBroker.add_mesh_database("AA.e", stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); stkMeshIoBroker.populate_bulk_data(); @@ -102,7 +101,6 @@ TEST(StkMeshHowTo, CreateFacesSingleShell) // L // L stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); stkMeshIoBroker.add_mesh_database("e.e", stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); stkMeshIoBroker.populate_bulk_data(); @@ -148,7 +146,6 @@ TEST(StkMeshHowTo, CreateFacesTwoHexesInternalShell) // | |L| | // ------L------ stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); stkMeshIoBroker.add_mesh_database("AeA.e", stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); stkMeshIoBroker.populate_bulk_data(); diff --git a/packages/stk/stk_doc_tests/stk_mesh/CreateFacesLayeredShellsHex.cpp b/packages/stk/stk_doc_tests/stk_mesh/CreateFacesLayeredShellsHex.cpp index 5d1d324e49fc..2e2fea447be2 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/CreateFacesLayeredShellsHex.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/CreateFacesLayeredShellsHex.cpp @@ -54,7 +54,6 @@ TEST(StkMeshHowTo, CreateFacesLayeredShellsHex) MPI_Comm communicator = MPI_COMM_WORLD; if (stk::parallel_machine_size(communicator) != 1) { return; } stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); // Generate a mesh containing 1 hex part and 12 shell parts // Shells are layered 2 deep. diff --git a/packages/stk/stk_doc_tests/stk_mesh/IOSidesetFaceCreation.cpp b/packages/stk/stk_doc_tests/stk_mesh/IOSidesetFaceCreation.cpp index c0f75236a6a8..ec2a08a7228d 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/IOSidesetFaceCreation.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/IOSidesetFaceCreation.cpp @@ -67,7 +67,6 @@ TEST(StkMeshHowTo, StkIO2Hex1SidesetFaceCreation) // from Hex1 face5 stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); stkMeshIoBroker.add_mesh_database("ALA.e", stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); stkMeshIoBroker.populate_bulk_data(); @@ -143,7 +142,6 @@ TEST(StkMeshHowTo, StkIO2Hex2Shell3SidesetFaceCreation) stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); stkMeshIoBroker.add_mesh_database("ALefLRA.e", stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); stkMeshIoBroker.populate_bulk_data(); @@ -245,7 +243,7 @@ TEST(StkMeshHowTo, StkIO2Hex2Shell3SidesetFaceCreation) } //END2hex2shell3sideset -class SideCreationExplanation : public stk::unit_test_util::simple_fields::MeshFixture +class SideCreationExplanation : public stk::unit_test_util::MeshFixture { protected: void test_face_created_on_elem_side_gets_id_16(stk::mesh::EntityId elemId, int sideOrdinal) diff --git a/packages/stk/stk_doc_tests/stk_mesh/UnitTestCommMeshCounts.cpp b/packages/stk/stk_doc_tests/stk_mesh/UnitTestCommMeshCounts.cpp index 517fb32cffd0..5c39615f5a4d 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/UnitTestCommMeshCounts.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/UnitTestCommMeshCounts.cpp @@ -62,7 +62,7 @@ TEST( CommMeshCounts, Serial ) if ( numprocs == 1 ) { const std::string generatedMeshSpec = getGeneratedMeshString(10,20,2); - stk::unit_test_util::simple_fields::StkMeshCreator stkMesh(generatedMeshSpec, communicator); + stk::unit_test_util::StkMeshCreator stkMesh(generatedMeshSpec, communicator); std::vector comm_mesh_counts; stk::mesh::comm_mesh_counts(*stkMesh.getBulkData(), comm_mesh_counts); @@ -78,7 +78,7 @@ TEST( CommMeshCounts, Parallel ) int numprocs = stk::parallel_machine_size(communicator); const std::string generatedMeshSpec = getGeneratedMeshString(10,20,2*numprocs); - stk::unit_test_util::simple_fields::StkMeshCreator stkMesh(generatedMeshSpec, communicator); + stk::unit_test_util::StkMeshCreator stkMesh(generatedMeshSpec, communicator); std::vector comm_mesh_counts; stk::mesh::comm_mesh_counts(*stkMesh.getBulkData(), comm_mesh_counts); @@ -93,7 +93,7 @@ TEST( CommMeshCountsWithStats, Parallel ) int numprocs = stk::parallel_machine_size(communicator); const std::string generatedMeshSpec = getGeneratedMeshString(10,20,2*numprocs); - stk::unit_test_util::simple_fields::StkMeshCreator stkMesh(generatedMeshSpec, communicator); + stk::unit_test_util::StkMeshCreator stkMesh(generatedMeshSpec, communicator); std::vector comm_mesh_counts; std::vector min_counts; diff --git a/packages/stk/stk_doc_tests/stk_mesh/UnitTestGhostParts.cpp b/packages/stk/stk_doc_tests/stk_mesh/UnitTestGhostParts.cpp index b26eafc5c92d..241d7128b874 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/UnitTestGhostParts.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/UnitTestGhostParts.cpp @@ -73,7 +73,6 @@ TEST(UnitTestGhostParts, Aura) } stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x3"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -122,7 +121,6 @@ TEST(UnitTestGhostParts, Custom1) } stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x4"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -176,7 +174,6 @@ TEST(UnitTestAura, test_num_communicated_entities) int numProcs = stk::parallel_machine_size(communicator); if (numProcs == 2) { stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x4"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_mesh/changeEntityOwner.cpp b/packages/stk/stk_doc_tests/stk_mesh/changeEntityOwner.cpp index 864c8d548ce1..451dc2575fce 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/changeEntityOwner.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/changeEntityOwner.cpp @@ -71,7 +71,6 @@ TEST(StkMeshHowTo, changeEntityOwner) if (stk::parallel_machine_size(communicator) == 2) { std::shared_ptr bulkDataPtr = stk::mesh::MeshBuilder(communicator).create(); - bulkDataPtr->mesh_meta_data().use_simple_fields(); stk::io::fill_mesh("generated:1x1x4", *bulkDataPtr); stk::mesh::EntityId elem2Id = 2; diff --git a/packages/stk/stk_doc_tests/stk_mesh/changeEntityParts.cpp b/packages/stk/stk_doc_tests/stk_mesh/changeEntityParts.cpp index 45a616c10ed0..b3926ef51719 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/changeEntityParts.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/changeEntityParts.cpp @@ -70,7 +70,6 @@ TEST(StkMeshHowTo, changeEntityPartsUsingSelector) std::shared_ptr bulkPtr = stk::mesh::MeshBuilder(communicator).create(); stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); - meta.use_simple_fields(); unsigned elementCount = 10; stk::io::fill_mesh("generated:1x1x" + std::to_string(elementCount), *bulkPtr); diff --git a/packages/stk/stk_doc_tests/stk_mesh/communicateFieldData.cpp b/packages/stk/stk_doc_tests/stk_mesh/communicateFieldData.cpp index d84fdeb2d58c..b2683eed8a1d 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/communicateFieldData.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/communicateFieldData.cpp @@ -51,7 +51,7 @@ #include "stk_mesh/base/Types.hpp" // for BucketVector #include "stk_topology/topology.hpp" // for topology, etc -class ParallelHowTo : public stk::unit_test_util::simple_fields::MeshFixture {}; +class ParallelHowTo : public stk::unit_test_util::MeshFixture {}; //BEGINCommuniateFieldData TEST_F(ParallelHowTo, communicateFieldDataForSharedAndAura) diff --git a/packages/stk/stk_doc_tests/stk_mesh/createFacesEdgesHex.cpp b/packages/stk/stk_doc_tests/stk_mesh/createFacesEdgesHex.cpp index 4c8b84cc0512..a06e790fdf24 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/createFacesEdgesHex.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/createFacesEdgesHex.cpp @@ -65,7 +65,6 @@ TEST(StkMeshHowTo, CreateFacesEdgesHex) MPI_Comm communicator = MPI_COMM_WORLD; if (stk::parallel_machine_size(communicator) != 1) { return; } stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8"; stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -111,7 +110,6 @@ TEST(StkMeshHowTo, CreateEdgesFacesHex) MPI_Comm communicator = MPI_COMM_WORLD; if (stk::parallel_machine_size(communicator) != 1) { return; } stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8"; stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -156,7 +154,6 @@ TEST(StkMeshHowTo, CreateEdgesFacesHexNoConnect) MPI_Comm communicator = MPI_COMM_WORLD; if (stk::parallel_machine_size(communicator) != 1) { return; } stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8"; stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -208,7 +205,6 @@ TEST(StkMeshHowTo, UnderstandEdgeAndFaceOrdering) // INITIALIZATION MPI_Comm communicator = MPI_COMM_WORLD; stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = exodusFileName; stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); stkIo.create_input_mesh(); @@ -364,7 +360,6 @@ void writeExodusFile(Iogn::GeneratedMesh *generatedMesh, const std::string &exod Ioss::Region* io_region = new Ioss::Region(database); stk::io::StkMeshIoBroker meshData; - meshData.use_simple_fields(); std::shared_ptr junk(io_region, [](auto pointerWeWontDelete){}); meshData.add_mesh_database(junk); meshData.create_input_mesh(); diff --git a/packages/stk/stk_doc_tests/stk_mesh/createFacesHex.cpp b/packages/stk/stk_doc_tests/stk_mesh/createFacesHex.cpp index c910f9f9ed7b..0d1453a45358 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/createFacesHex.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/createFacesHex.cpp @@ -53,7 +53,6 @@ TEST(StkMeshHowTo, CreateFacesHex) MPI_Comm communicator = MPI_COMM_WORLD; if (stk::parallel_machine_size(communicator) != 1) { GTEST_SKIP(); } std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(communicator).create(); - bulkPtr->mesh_meta_data().use_simple_fields(); const std::string generatedFileName = "generated:8x8x8"; stk::io::fill_mesh(generatedFileName, *bulkPtr); diff --git a/packages/stk/stk_doc_tests/stk_mesh/createSelectedFaces.cpp b/packages/stk/stk_doc_tests/stk_mesh/createSelectedFaces.cpp index 4883552ff529..97af00289ed9 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/createSelectedFaces.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/createSelectedFaces.cpp @@ -56,7 +56,6 @@ TEST(StkMeshHowTo, CreateSelectedFacesHex) MPI_Comm communicator = MPI_COMM_WORLD; if (stk::parallel_machine_size(communicator) != 1) { GTEST_SKIP(); } std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(communicator).create(); - bulkPtr->mesh_meta_data().use_simple_fields(); // Generate a mesh containing 1 hex part and 6 shell parts const std::string generatedFileName = "generated:8x8x8|shell:xyzXYZ"; diff --git a/packages/stk/stk_doc_tests/stk_mesh/createSharedNodes.cpp b/packages/stk/stk_doc_tests/stk_mesh/createSharedNodes.cpp index ef1e66e53699..fbe2719449bb 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/createSharedNodes.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/createSharedNodes.cpp @@ -78,7 +78,6 @@ TEST(stkMeshHowTo, createSharedNodes) stk::mesh::MeshBuilder builder(MPI_COMM_WORLD); builder.set_spatial_dimension(spatialDimension); std::shared_ptr bulkPtr = builder.create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::MetaData& metaData = bulkPtr->mesh_meta_data(); stk::mesh::BulkData& bulkData = *bulkPtr; stk::mesh::Part &triPart = metaData.declare_part_with_topology("tri_part", stk::topology::TRIANGLE_3_2D); @@ -122,7 +121,6 @@ TEST(stkMeshHowTo, createIndependentSharedNodes) stk::mesh::MeshBuilder builder(MPI_COMM_WORLD); builder.set_spatial_dimension(spatialDimension); std::shared_ptr bulkPtr = builder.create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::BulkData& bulkData = *bulkPtr; stk::mesh::MetaData& metaData = bulkPtr->mesh_meta_data(); metaData.commit(); @@ -158,7 +156,6 @@ TEST(stkMeshHowTo, createIndependentSharedNodesThenAddDependence) stk::mesh::MeshBuilder builder(MPI_COMM_WORLD); builder.set_spatial_dimension(spatialDimension); std::shared_ptr bulkPtr = builder.create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::BulkData& bulkData = *bulkPtr; stk::mesh::MetaData& metaData = bulkPtr->mesh_meta_data(); stk::mesh::Part &triPart = metaData.declare_part_with_topology("triPart", stk::topology::TRIANGLE_3_2D); diff --git a/packages/stk/stk_doc_tests/stk_mesh/createStkMesh.cpp b/packages/stk/stk_doc_tests/stk_mesh/createStkMesh.cpp index 3d7907fef40e..da096ea7fa7a 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/createStkMesh.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/createStkMesh.cpp @@ -54,7 +54,6 @@ TEST(StkMeshHowTo, UseStkIO) if(stk::parallel_machine_size(communicator) == 1) { std::shared_ptr bulkPtr = stk::mesh::MeshBuilder(communicator).create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::io::StkMeshIoBroker meshReader; meshReader.set_bulk_data(*bulkPtr); diff --git a/packages/stk/stk_doc_tests/stk_mesh/createStkMeshAlt1.cpp b/packages/stk/stk_doc_tests/stk_mesh/createStkMeshAlt1.cpp index d46607b29eab..d427f4117a7a 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/createStkMeshAlt1.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/createStkMeshAlt1.cpp @@ -63,13 +63,11 @@ TEST(StkMeshHowTo, CreateStkMesh) // MetaData creates the universal_part, locally-owned part, and globally shared part. std::shared_ptr stkMeshBulkDataPtr = stk::mesh::MeshBuilder(communicator).create(); stk::mesh::MetaData& stkMeshMetaData = stkMeshBulkDataPtr->mesh_meta_data(); - stkMeshMetaData.use_simple_fields(); // Read the mesh data from the Exodus file and populate an STK Mesh. // The order of the following lines in {} are important { stk::io::StkMeshIoBroker exodusFileReader(communicator); - exodusFileReader.use_simple_fields(); // Provide STK Mesh object to be populated exodusFileReader.set_bulk_data(*stkMeshBulkDataPtr); @@ -98,7 +96,6 @@ void create_example_exodus_file(MPI_Comm communicator, const std::string & exodu //+ INITIALIZATION: //+ Create a mesh stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:8x8x8"; size_t index = stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); diff --git a/packages/stk/stk_doc_tests/stk_mesh/customGhosting.cpp b/packages/stk/stk_doc_tests/stk_mesh/customGhosting.cpp index 4e2e1ad711de..e8b5a4e4a91e 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/customGhosting.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/customGhosting.cpp @@ -100,7 +100,6 @@ TEST(StkMeshHowTo, customGhostElem) if (stk::parallel_machine_size(communicator) == 2) { std::shared_ptr bulkPtr = stk::mesh::MeshBuilder(communicator).create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::BulkData& bulkData = *bulkPtr; stk::io::fill_mesh("generated:1x1x4", bulkData); @@ -127,7 +126,6 @@ TEST(StkMeshHowTo, addElementToGhostingUsingSpecializedModificationForPerformanc if(stk::parallel_machine_size(communicator) == 2) { std::shared_ptr bulkPtr = stk::mesh::MeshBuilder(communicator).create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::BulkData& bulk = *bulkPtr; stk::io::fill_mesh("generated:1x1x4", bulk); diff --git a/packages/stk/stk_doc_tests/stk_mesh/entityState.cpp b/packages/stk/stk_doc_tests/stk_mesh/entityState.cpp index 520ba44b7027..29fd542d686c 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/entityState.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/entityState.cpp @@ -45,7 +45,6 @@ TEST(stkMeshHowTo, checkCreatedStateAfterMeshCreation) if (parallel_size != 1) { return; } const std::string fileName = "generated:1x1x1"; stk::io::StkMeshIoBroker meshReader(communicator); - meshReader.use_simple_fields(); meshReader.add_mesh_database(fileName, stk::io::READ_MESH); meshReader.create_input_mesh(); meshReader.populate_bulk_data(); @@ -70,7 +69,6 @@ TEST(stkMeshHowTo, checkDeletedState) if (parallel_size != 1) { return; } const std::string fileName = "generated:1x1x1"; stk::io::StkMeshIoBroker meshReader(communicator); - meshReader.use_simple_fields(); meshReader.add_mesh_database(fileName, stk::io::READ_MESH); meshReader.create_input_mesh(); meshReader.populate_bulk_data(); @@ -101,7 +99,6 @@ TEST(stkMeshHowTo, checkModifiedState) if (parallel_size != 1) { return; } const std::string fileName = "generated:1x1x1"; stk::io::StkMeshIoBroker meshReader(communicator); - meshReader.use_simple_fields(); meshReader.add_mesh_database(fileName, stk::io::READ_MESH); meshReader.create_input_mesh(); meshReader.populate_bulk_data(); @@ -128,7 +125,6 @@ TEST(stkMeshHowTo, checkAuraCreatedState) if (parallel_size != 2) { GTEST_SKIP(); } const std::string fileName = "generated:1x1x2"; stk::io::StkMeshIoBroker meshReader(communicator); - meshReader.use_simple_fields(); meshReader.add_mesh_database(fileName, stk::io::READ_MESH); meshReader.create_input_mesh(); meshReader.populate_bulk_data(); @@ -155,7 +151,6 @@ TEST(stkMeshHowTo, checkCEOModifiedState) if (parallel_size != 2) { GTEST_SKIP(); } const std::string fileName = "generated:1x1x2"; stk::io::StkMeshIoBroker meshReader(communicator); - meshReader.use_simple_fields(); meshReader.add_mesh_database(fileName, stk::io::READ_MESH); meshReader.create_input_mesh(); meshReader.populate_bulk_data(); @@ -187,7 +182,6 @@ TEST(stkMeshHowTo, checkParallelConsistencyModifiedState) if (parallel_size != 2) { GTEST_SKIP(); } const std::string fileName = "generated:1x1x2"; stk::io::StkMeshIoBroker meshReader(communicator); - meshReader.use_simple_fields(); meshReader.add_mesh_database(fileName, stk::io::READ_MESH); meshReader.create_input_mesh(); meshReader.populate_bulk_data(); diff --git a/packages/stk/stk_doc_tests/stk_mesh/generateNewEntities.cpp b/packages/stk/stk_doc_tests/stk_mesh/generateNewEntities.cpp index e2b60b5f4918..6d5267120019 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/generateNewEntities.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/generateNewEntities.cpp @@ -75,7 +75,6 @@ TEST(stkMeshHowTo, generateNewEntities) builder.set_spatial_dimension(spatialDimension); builder.set_entity_rank_names(stk::mesh::entity_rank_names()); std::shared_ptr bulkPtr = builder.create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::MetaData& metaData = bulkPtr->mesh_meta_data(); stk::mesh::Part &tetPart = metaData.declare_part_with_topology("tetElementPart", stk::topology::TET_4); stk::mesh::Part &hexPart = metaData.declare_part_with_topology("hexElementPart", stk::topology::HEX_8); diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToDestroyElementsInList.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToDestroyElementsInList.cpp index 0822bb0b9f53..f1ded57ff382 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToDestroyElementsInList.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToDestroyElementsInList.cpp @@ -12,7 +12,6 @@ namespace TEST(StkMeshHowTo, DestroyElementsInList) { std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::BulkData& bulkData = *bulkPtr; stk::io::fill_mesh("generated:1x1x4", bulkData); EXPECT_GT(stk::mesh::count_entities(*bulkPtr, stk::topology::ELEM_RANK, bulkPtr->mesh_meta_data().universal_part()), 0u); diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToDestroyElementsOfTopology.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToDestroyElementsOfTopology.cpp index ed57103269b8..8ccd8a9326e0 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToDestroyElementsOfTopology.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToDestroyElementsOfTopology.cpp @@ -12,7 +12,6 @@ TEST(StkMeshHowTo, DestroyElementsOfTopology) if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::MetaData& metaData = bulkPtr->mesh_meta_data(); stk::io::fill_mesh("generated:1x1x4", *bulkPtr); diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToEnableMeshDiagnostics.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToEnableMeshDiagnostics.cpp index 1822a40695d9..9331149f8db0 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToEnableMeshDiagnostics.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToEnableMeshDiagnostics.cpp @@ -45,7 +45,6 @@ namespace TEST(StkMeshHowTo, EnableMeshDiagnostics) { std::shared_ptr bulkPtr = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::io::fill_mesh("generated:4x4x4|sideset:xX", *bulkPtr); bulkPtr->enable_mesh_diagnostic_rule(stk::mesh::RULE_3); diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToGetFields.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToGetFields.cpp index cb66d0b446a9..73ae3806f8ab 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToGetFields.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToGetFields.cpp @@ -47,7 +47,6 @@ namespace SpatialDimension { const unsigned three = 3; } TEST(stkMeshHowTo, getFields) { stk::mesh::MetaData metaData(SpatialDimension::three); - metaData.use_simple_fields(); typedef stk::mesh::Field DoubleFieldType; diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToIterateConnectivity.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToIterateConnectivity.cpp index eedaaeab28b0..e35bbc0235ef 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToIterateConnectivity.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToIterateConnectivity.cpp @@ -57,7 +57,6 @@ TEST(StkMeshHowTo, iterateElemNodeConnectivity_ForEachEntityWithNodes) MPI_Comm comm = MPI_COMM_WORLD; if (stk::parallel_machine_size(comm) != 1) { GTEST_SKIP(); } std::unique_ptr stkMesh = stk::mesh::MeshBuilder(comm).create(); - stkMesh->mesh_meta_data().use_simple_fields(); // Generate a mesh of unit-cube hexes with a sideset const std::string generatedMeshSpecification = "generated:2x2x2|sideset:X"; stk::io::fill_mesh(generatedMeshSpecification, *stkMesh); @@ -98,7 +97,6 @@ TEST(StkMeshHowTo, iterateConnectivity_General_BulkData) MPI_Comm comm = MPI_COMM_WORLD; if (stk::parallel_machine_size(comm) != 1) { GTEST_SKIP(); } std::unique_ptr stkMesh = stk::mesh::MeshBuilder(comm).create(); - stkMesh->mesh_meta_data().use_simple_fields(); // Generate a mesh of unit-cube hexes with a sideset const std::string generatedMeshSpecification = "generated:2x2x2|sideset:X"; stk::io::fill_mesh(generatedMeshSpecification, *stkMesh); @@ -139,7 +137,6 @@ TEST(StkMeshHowTo, iterateConnectivity_Buckets) MPI_Comm comm = MPI_COMM_WORLD; if (stk::parallel_machine_size(comm) != 1) { return; } std::unique_ptr stkMesh = stk::mesh::MeshBuilder(comm).create(); - stkMesh->mesh_meta_data().use_simple_fields(); // Generate a mesh of unit-cube hexes with a sideset const std::string generatedMeshSpecification = "generated:2x2x2|sideset:X"; stk::io::fill_mesh(generatedMeshSpecification, *stkMesh); diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToIterateEntities.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToIterateEntities.cpp index 943737502a19..16382d1294f6 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToIterateEntities.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToIterateEntities.cpp @@ -64,7 +64,6 @@ TEST(StkMeshHowTo, iterateSidesetNodes_BucketLoop_ContiguousFieldDataWithinBucke .set_spatial_dimension(3) .create(); stk::mesh::MetaData &stkMeshMeta = stkMesh->mesh_meta_data(); - stkMeshMeta.use_simple_fields(); stk::mesh::Field &temperatureField = stkMeshMeta.declare_field(stk::topology::NODE_RANK, "temperature"); stk::mesh::put_field_on_entire_mesh(temperatureField); @@ -100,7 +99,6 @@ TEST(StkMeshHowTo, iterateSidesetNodes_ForEachEntity_FieldDataAccess) .set_spatial_dimension(3) .create(); stk::mesh::MetaData &stkMeshMeta = stkMesh->mesh_meta_data(); - stkMeshMeta.use_simple_fields(); stk::mesh::Field &temperatureField = stkMeshMeta.declare_field(stk::topology::NODE_RANK, "temperature"); stk::mesh::put_field_on_entire_mesh(temperatureField); diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp index e98c31647f84..7bcbd60764f4 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp @@ -71,7 +71,7 @@ void check_field_on_host(const stk::mesh::BulkData & bulk, //ENDNgpReadFieldOnHost } -class NgpHowTo : public stk::unit_test_util::simple_fields::MeshFixture +class NgpHowTo : public stk::unit_test_util::MeshFixture { public: void setup_test_mesh() @@ -84,7 +84,7 @@ class NgpHowTo : public stk::unit_test_util::simple_fields::MeshFixture std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8\n\ 0,2,SHELL_QUAD_4,5,6,7,8"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } const stk::mesh::Part* extraPart = nullptr; }; @@ -98,7 +98,7 @@ TEST_F(NgpHowTo, loopOverSubsetOfMesh) std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8\n\ 0,2,SHELL_QUAD_4,5,6,7,8"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); double fieldVal = 13.0; set_field_on_device(get_bulk(), stk::topology::ELEM_RANK, shellQuadPart, shellQuadField, fieldVal); @@ -187,7 +187,7 @@ TEST_F(NgpHowTo, fieldOnSubsetOfMesh) std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8\n\ 0,2,SHELL_QUAD_4,5,6,7,8"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); double fieldVal = 13.0; set_field_on_device(get_bulk(), stk::topology::ELEM_RANK, shellQuadPart, shellQuadField, fieldVal); @@ -206,7 +206,7 @@ TEST_F(NgpHowTo, loopOverAllMeshNodes) auto &field = get_meta().declare_field(stk::topology::NODE_RANK, "myField"); stk::mesh::put_field_on_mesh(field, get_meta().universal_part(), nullptr); std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); double fieldVal = 13.0; set_field_on_device(get_bulk(), stk::topology::NODE_RANK, get_meta().universal_part(), field, fieldVal); @@ -225,7 +225,7 @@ TEST_F(NgpHowTo, loopOverMeshFaces) auto &field = get_meta().declare_field(stk::topology::FACE_RANK, "myField"); stk::mesh::put_field_on_mesh(field, facePart, nullptr); std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); stk::mesh::create_exposed_block_boundary_sides(get_bulk(), get_meta().universal_part(), {&facePart}); @@ -285,7 +285,7 @@ TEST_F(NgpHowTo, loopOverElemNodes) auto &field = get_meta().declare_field(stk::topology::NODE_RANK, "myField"); stk::mesh::put_field_on_mesh(field, get_meta().universal_part(), nullptr); std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); run_connected_node_test(get_bulk()); } @@ -300,7 +300,7 @@ TEST_F(NgpHowTo, loopOverElemNodes_bucketCapacity) auto &field = get_meta().declare_field(stk::topology::NODE_RANK, "myField"); stk::mesh::put_field_on_mesh(field, get_meta().universal_part(), nullptr); std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); run_connected_node_test(get_bulk()); } @@ -348,7 +348,7 @@ NGP_TEST_F(NgpHowTo, checkElemNodeIds) } setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); run_id_test(get_bulk()); } @@ -471,7 +471,7 @@ void run_constraint_node_test(const stk::mesh::BulkData& bulk, ); } -class NgpHowToConstraint : public stk::unit_test_util::simple_fields::MeshFixture +class NgpHowToConstraint : public stk::unit_test_util::MeshFixture { public: NgpHowToConstraint() : MeshFixture(3, {"node", "edge", "face", "elem", "constraint"}) @@ -733,7 +733,7 @@ void test_ngp_mesh_construction(const stk::mesh::BulkData& bulk) TEST_F(NgpHowTo, ngpMeshConstruction) { - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-mesh", "generated:20x20x20|sideset:xXyYzZ"); + std::string exodusFileName = stk::unit_test_util::get_option("-mesh", "generated:20x20x20|sideset:xXyYzZ"); if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1 && exodusFileName == "generated:20x20x20|sideset:xXyYzZ") { std::cout<<"NgpHowTo.ngpMeshConstruction Only runs in parallel if user specified a mesh." << std::endl; @@ -803,7 +803,7 @@ TEST_F(NgpHowTo, exerciseAura) meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8\n" "1,2,HEX_8,5,6,7,8,9,10,11,12"; } - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); set_num_elems_in_field_on_device_and_copy_back(get_bulk(), get_meta().universal_part(), field); @@ -967,7 +967,7 @@ NGP_TEST_F(NgpHowTo, accessVectorFieldValues) } -class NgpReduceHowTo : public stk::unit_test_util::simple_fields::MeshFixture +class NgpReduceHowTo : public stk::unit_test_util::MeshFixture { protected: NgpReduceHowTo() @@ -1248,7 +1248,7 @@ TEST_F(NgpHowTo, checkPartMembership) stk::mesh::Part& testPart = get_meta().declare_part("testPart", stk::topology::NODE_RANK); std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); stk::mesh::Entity node1 = get_bulk().get_entity(stk::topology::NODE_RANK, 1u); stk::mesh::Entity node2 = get_bulk().get_entity(stk::topology::NODE_RANK, 2u); @@ -1278,7 +1278,6 @@ TEST(NgpMesh, meshIndices) } std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); - bulk->mesh_meta_data().use_simple_fields(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); stk::mesh::EntityRank rank = stk::topology::ELEMENT_RANK; diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToNgpMultistateFields.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToNgpMultistateFields.cpp index 356974ac58ea..d7fa1c715271 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToNgpMultistateFields.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToNgpMultistateFields.cpp @@ -78,7 +78,6 @@ NGP_TEST(NgpMultistateField, setOnHost_swap_checkOnDevice) std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(MPI_COMM_WORLD) .set_spatial_dimension(3).create(); stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); - meta.use_simple_fields(); constexpr unsigned numStates = 2; stk::mesh::Field& stkFieldNew = meta.declare_field(stk::topology::ELEM_RANK, "myElemField", numStates); @@ -112,7 +111,6 @@ NGP_TEST(NgpMultistateField, setOnHost_swap_preExistingNgpFieldsNeedSync) std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(MPI_COMM_WORLD) .set_spatial_dimension(3).create(); stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); - meta.use_simple_fields(); constexpr unsigned numStates = 2; stk::mesh::Field& stkFieldNew = meta.declare_field(stk::topology::ELEM_RANK, "myElemField", numStates); @@ -163,7 +161,6 @@ NGP_TEST(NgpMultistateField, setOnDevice_swap_checkOnDevice) std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(MPI_COMM_WORLD) .set_spatial_dimension(3).create(); stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); - meta.use_simple_fields(); constexpr unsigned numStates = 2; stk::mesh::Field& stkFieldNew = meta.declare_field(stk::topology::ELEM_RANK, "myElemField", numStates); diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToSkinMesh.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToSkinMesh.cpp index 53021db54387..ac5cdfb91f3e 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToSkinMesh.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToSkinMesh.cpp @@ -58,7 +58,6 @@ TEST(StkMeshHowTo, SkinExposedHex) std::shared_ptr bulk = stk::mesh::MeshBuilder(communicator).create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); const std::string generatedFileName = "generated:1x1x1"; stk::io::fill_mesh(generatedFileName, *bulk); @@ -91,7 +90,6 @@ TEST(StkMeshHowTo, SkinInteriorHex) std::shared_ptr bulk = stk::mesh::MeshBuilder(communicator).create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); const std::string generatedFileName = "generated:1x1x2"; stk::io::fill_mesh(generatedFileName, *bulk); @@ -133,7 +131,6 @@ TEST(StkMeshHowTo, SkinAllHexBlocks) std::shared_ptr bulk = stk::mesh::MeshBuilder(communicator).create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); const std::string generatedFileName = "generated:1x1x2"; stk::io::fill_mesh(generatedFileName, *bulk); diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToSortEntities.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToSortEntities.cpp index 5bbf1bc3cdb0..c0d31fd5a1cc 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToSortEntities.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToSortEntities.cpp @@ -15,7 +15,7 @@ class EntityReverseSorter : public stk::mesh::EntitySorterBase } }; -class HowToSortEntities : public stk::unit_test_util::simple_fields::MeshFixture +class HowToSortEntities : public stk::unit_test_util::MeshFixture { protected: void sort_and_check() diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToUseAura.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToUseAura.cpp index be8bd3f947ea..db8f7d0b8782 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToUseAura.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToUseAura.cpp @@ -56,7 +56,6 @@ void expectNumElementsInAura(stk::mesh::BulkData::AutomaticAuraOption autoAuraOp stk::mesh::MeshBuilder builder(MPI_COMM_WORLD); builder.set_aura_option(autoAuraOption); std::shared_ptr bulkPtr = builder.create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); stk::mesh::BulkData& bulk = *bulkPtr; stk::io::fill_mesh("generated:1x1x2", bulk); diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToUseEquivalent.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToUseEquivalent.cpp index 0672fd9dd8fd..a9c5fb97ab1e 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToUseEquivalent.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToUseEquivalent.cpp @@ -13,7 +13,7 @@ namespace { -class MeshWithSide : public stk::unit_test_util::simple_fields::MeshFixture { }; +class MeshWithSide : public stk::unit_test_util::MeshFixture { }; //-BEGIN TEST_F(MeshWithSide, whenCheckingSideEquivalency_returnsCorrectPermutation) diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToUseGenerateNewIds.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToUseGenerateNewIds.cpp index 8970f57727eb..432e00bce9a9 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToUseGenerateNewIds.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToUseGenerateNewIds.cpp @@ -36,7 +36,7 @@ void test_that_ids_are_unique(const stk::mesh::BulkData &bulkData, stk::mesh::En for(size_t j = 0; j < requestedIds.size(); ++j) { bool is_id_unique = std::binary_search(ids_in_use.begin(), ids_in_use.end(), requestedIds[j]); - STK_ThrowRequireMsg(is_id_unique == false, "Oh no! " << __FILE__<< __LINE__); + STK_ThrowRequireMsg(is_id_unique == false, "ID="<(requestedIds[j]); } } @@ -70,10 +70,8 @@ void test_that_ids_are_unique(const stk::mesh::BulkData &bulkData, stk::mesh::En TEST(StkMeshHowTo, use_generate_new_ids) { MPI_Comm communicator = MPI_COMM_WORLD; - int num_procs = stk::parallel_machine_size(communicator); std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(communicator).create(); - bulkPtr->mesh_meta_data().use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x" + std::to_string(num_procs); stk::io::fill_mesh(generatedMeshSpecification, *bulkPtr); diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToUseNgpFieldAsyncCopy.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToUseNgpFieldAsyncCopy.cpp index cd990e4a8cd3..868b1168f76b 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToUseNgpFieldAsyncCopy.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToUseNgpFieldAsyncCopy.cpp @@ -76,7 +76,6 @@ TEST(stkMeshHowTo, ngpFieldAsyncCopy) stk::mesh::MeshBuilder builder(MPI_COMM_WORLD); builder.set_spatial_dimension(spatialDimension); std::shared_ptr bulkPtr = stk::mesh::MeshBuilder(MPI_COMM_WORLD).create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); stk::mesh::BulkData& bulk = *bulkPtr; diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToUseSelectors.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToUseSelectors.cpp index 35be1026d23d..edfd28a31cee 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToUseSelectors.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToUseSelectors.cpp @@ -57,7 +57,6 @@ TEST(StkMeshHowTo, basicSelectorUsage) std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(communicator) .set_spatial_dimension(3).create(); stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); - meta.use_simple_fields(); //create a simple shell-quad-4 mesh: // 6 @@ -75,7 +74,7 @@ TEST(StkMeshHowTo, basicSelectorUsage) "0,2,SHELL_QUAD_4, 2,5,6,3, block_2\n" "0,3,SHELL_QUAD_4, 4,7,8,5, block_3\n" "0,4,SHELL_QUAD_4, 5,8,9,6, block_4\n"; - stk::unit_test_util::simple_fields::setup_text_mesh(*bulkPtr, meshDesc); + stk::unit_test_util::setup_text_mesh(*bulkPtr, meshDesc); stk::mesh::Part& block_1 = *meta.get_part("block_1"); stk::mesh::Part& block_2 = *meta.get_part("block_2"); @@ -99,7 +98,6 @@ TEST(StkMeshHowTo, betterUnderstandSelectorConstruction) MPI_Comm communicator = MPI_COMM_WORLD; if (stk::parallel_machine_size(communicator) != 1) { GTEST_SKIP(); } std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(communicator).create(); - bulkPtr->mesh_meta_data().use_simple_fields(); const std::string generatedCubeMeshSpecification = "generated:1x1x1"; stk::io::fill_mesh(generatedCubeMeshSpecification, *bulkPtr); @@ -121,7 +119,6 @@ TEST(StkMeshHowTo, makeSureYouAreNotIntersectingNothingSelector) MPI_Comm communicator = MPI_COMM_WORLD; if (stk::parallel_machine_size(communicator) != 1) { return; } std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(communicator).create(); - bulkPtr->mesh_meta_data().use_simple_fields(); // syntax creates faces for surface on the positive: 'x-side', 'y-side', and 'z-side' // of a 1x1x1 cube, these parts are given the names: 'surface_1', 'surface_2', and 'surface_3' const std::string generatedCubeMeshSpecification = "generated:1x1x1|sideset:XYZ"; diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToVisitEdgeNodes.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToVisitEdgeNodes.cpp new file mode 100644 index 000000000000..2c979688b285 --- /dev/null +++ b/packages/stk/stk_doc_tests/stk_mesh/howToVisitEdgeNodes.cpp @@ -0,0 +1,84 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include "stk_io/FillMesh.hpp" + +namespace { + +//BEGIN_VISIT_EDGE_NODES +TEST(StkMeshHowTo, VisitEdgeNodes) +{ + // ============================================================ + // INITIALIZATION + MPI_Comm comm = MPI_COMM_WORLD; + if (stk::parallel_machine_size(comm) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk = stk::mesh::MeshBuilder(comm).create(); + stk::mesh::MetaData& meta = bulk->mesh_meta_data(); + + const std::string generatedFileName = "generated:1x1x1"; + stk::io::fill_mesh(generatedFileName, *bulk); + + stk::mesh::EntityVector edgeNodes(2); + unsigned edgeCount = 0; + + stk::mesh::for_each_entity_run(*bulk, stk::topology::ELEM_RANK, meta.locally_owned_part(), + [&](const stk::mesh::BulkData& mesh, stk::mesh::Entity elem) { + stk::topology elemTopo = mesh.bucket(elem).topology(); + const unsigned numEdgesPerElem = elemTopo.num_edges(); + const stk::mesh::Entity* elemNodes = mesh.begin(elem, stk::topology::NODE_RANK); + for(unsigned i=0; i bulkPtr = builder.create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::MetaData& metaData = bulkPtr->mesh_meta_data(); stk::mesh::Part &tetPart = metaData.declare_part_with_topology("tet part", stk::topology::TET_4); diff --git a/packages/stk/stk_doc_tests/stk_mesh/useAdvancedFields.cpp b/packages/stk/stk_doc_tests/stk_mesh/useAdvancedFields.cpp index 6cdbe5f4900a..c0b288c01ddf 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/useAdvancedFields.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/useAdvancedFields.cpp @@ -55,7 +55,6 @@ TEST(stkMeshHowTo, useAdvancedFields) stk::mesh::MeshBuilder builder(MPI_COMM_WORLD); builder.set_spatial_dimension(spatialDimension); std::unique_ptr bulkPtr = builder.create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::MetaData& metaData = bulkPtr->mesh_meta_data(); typedef stk::mesh::Field DoubleField; diff --git a/packages/stk/stk_doc_tests/stk_mesh/useFieldBLAS.cpp b/packages/stk/stk_doc_tests/stk_mesh/useFieldBLAS.cpp index 415fa7146c7b..bd3a52360eb9 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/useFieldBLAS.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/useFieldBLAS.cpp @@ -55,7 +55,7 @@ void create_two_tet_element_mesh(stk::mesh::BulkData &bulk) { std::string meshSpec = "0, 1,TET_4, 1,2,3,4\n" "0, 2,TET_4, 2,3,4,5"; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, meshSpec); + stk::unit_test_util::setup_text_mesh(bulk, meshSpec); } //BEGINUseFieldBLAS @@ -65,7 +65,6 @@ TEST(stkMeshHowTo, useFieldBLAS) stk::mesh::MeshBuilder builder(MPI_COMM_WORLD); builder.set_spatial_dimension(SpatialDimension); std::unique_ptr bulkPtr = builder.create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::MetaData& metaData = bulkPtr->mesh_meta_data(); typedef stk::mesh::Field DoubleField; diff --git a/packages/stk/stk_doc_tests/stk_mesh/useMultistateFields.cpp b/packages/stk/stk_doc_tests/stk_mesh/useMultistateFields.cpp index fe0d234695af..ac9bbf6f656f 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/useMultistateFields.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/useMultistateFields.cpp @@ -55,7 +55,6 @@ TEST(stkMeshHowTo, useMultistateField) builder.set_spatial_dimension(spatialDimension); builder.set_entity_rank_names(stk::mesh::entity_rank_names()); std::shared_ptr bulkPtr = builder.create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::MetaData& metaData = bulkPtr->mesh_meta_data(); typedef stk::mesh::Field ScalarField; diff --git a/packages/stk/stk_doc_tests/stk_mesh/useSimpleFields.cpp b/packages/stk/stk_doc_tests/stk_mesh/useSimpleFields.cpp index 3057011b52de..91aa38fe33c2 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/useSimpleFields.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/useSimpleFields.cpp @@ -55,7 +55,7 @@ void create_two_tet_element_mesh(stk::mesh::BulkData &bulk) { std::string meshSpec = "0, 1,TET_4, 1,2,3,4\n" "0, 2,TET_4, 2,3,4,5"; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, meshSpec); + stk::unit_test_util::setup_text_mesh(bulk, meshSpec); } //BEGINUseSimpleFields @@ -66,7 +66,6 @@ TEST(stkMeshHowTo, useSimpleFields) stk::mesh::MeshBuilder builder(MPI_COMM_WORLD); builder.set_spatial_dimension(SpatialDimension); std::unique_ptr bulkPtr = builder.create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::MetaData& metaData = bulkPtr->mesh_meta_data(); typedef stk::mesh::Field DoubleField; diff --git a/packages/stk/stk_doc_tests/stk_search/howToNgpSearchElemNodeNeighbors.cpp b/packages/stk/stk_doc_tests/stk_search/howToNgpSearchElemNodeNeighbors.cpp index c3039761e78f..60cbaf661e31 100644 --- a/packages/stk/stk_doc_tests/stk_search/howToNgpSearchElemNodeNeighbors.cpp +++ b/packages/stk/stk_doc_tests/stk_search/howToNgpSearchElemNodeNeighbors.cpp @@ -51,6 +51,7 @@ namespace doc_test { using ExecSpace = Kokkos::DefaultExecutionSpace; using HostSpace = Kokkos::DefaultHostExecutionSpace; +//BEGINngp_coarse_search_types using ElemIdentProc = stk::search::IdentProc; using NodeIdentProc = stk::search::IdentProc; using SphereIdentProc = stk::search::BoxIdentProc,ElemIdentProc>; @@ -60,6 +61,7 @@ using Intersection = stk::search::IdentProcIntersection; using RangeViewType = Kokkos::View; using ResultViewType = Kokkos::View; +//ENDngp_coarse_search_types using FastMeshIndicesViewType = Kokkos::View; @@ -133,6 +135,7 @@ RangeViewType create_node_points(const stk::mesh::BulkData& mesh) FastMeshIndicesViewType nodeIndices = get_local_indices(mesh, stk::topology::NODE_RANK); const int myRank = mesh.parallel_rank(); +//BEGINngp_construct_search_points Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, numLocalNodes), KOKKOS_LAMBDA(const unsigned& i) { stk::mesh::EntityFieldData coords = ngpCoords(nodeIndices(i)); @@ -140,6 +143,7 @@ RangeViewType create_node_points(const stk::mesh::BulkData& mesh) nodePoints(i) = PointIdentProc{stk::search::Point(coords[0], coords[1], coords[2]), NodeIdentProc(ngpMesh.identifier(node), myRank)}; } ); +//ENDngp_construct_search_points return nodePoints; } @@ -208,12 +212,11 @@ TEST(HowToNgpSearch, elemNodeNeighbors) { if (stk::parallel_machine_size(MPI_COMM_WORLD) > 4) { GTEST_SKIP(); } - std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(MPI_COMM_WORLD) + std::unique_ptr meshPtr = stk::mesh::MeshBuilder(MPI_COMM_WORLD) .set_aura_option(stk::mesh::BulkData::NO_AUTO_AURA) .set_spatial_dimension(3) .create(); - stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); - meta.use_simple_fields(); + stk::mesh::MetaData& meta = meshPtr->mesh_meta_data(); std::string meshSpec("generated:4x4x4|bbox:-1,-1,-1,1,1,1"); const double radius = 0.5; @@ -222,16 +225,17 @@ TEST(HowToNgpSearch, elemNodeNeighbors) stk::mesh::Field& neighborField = meta.declare_field(stk::topology::ELEM_RANK, "nodeNeighbors"); stk::mesh::put_field_on_mesh(neighborField, meta.universal_part(), maxNumNeighbors+1, nullptr); - stk::io::fill_mesh(meshSpec, *bulkPtr); + stk::io::fill_mesh(meshSpec, *meshPtr); - DomainViewType elemSpheres = create_elem_spheres(*bulkPtr, radius); - RangeViewType nodePoints = create_node_points(*bulkPtr); - - const unsigned numLocalElems = stk::mesh::count_entities(*bulkPtr, stk::topology::ELEM_RANK, meta.locally_owned_part()); - const unsigned numLocalOwnedNodes = stk::mesh::count_entities(*bulkPtr, stk::topology::NODE_RANK, meta.locally_owned_part()); + const unsigned numLocalElems = stk::mesh::count_entities(*meshPtr, stk::topology::ELEM_RANK, meta.locally_owned_part()); + const unsigned numLocalOwnedNodes = stk::mesh::count_entities(*meshPtr, stk::topology::NODE_RANK, meta.locally_owned_part()); stk::mesh::Selector sharedAndOwned = meta.globally_shared_part() & meta.locally_owned_part(); - const unsigned numSharedAndOwnedNodes = stk::mesh::count_entities(*bulkPtr, stk::topology::NODE_RANK, sharedAndOwned); + const unsigned numSharedAndOwnedNodes = stk::mesh::count_entities(*meshPtr, stk::topology::NODE_RANK, sharedAndOwned); +//BEGINngp_call_coarse_search + DomainViewType elemSpheres = create_elem_spheres(*meshPtr, radius); + RangeViewType nodePoints = create_node_points(*meshPtr); + EXPECT_EQ(elemSpheres.size(), numLocalElems); EXPECT_EQ(nodePoints.size(), numLocalOwnedNodes); @@ -239,24 +243,26 @@ TEST(HowToNgpSearch, elemNodeNeighbors) stk::search::SearchMethod searchMethod = stk::search::MORTON_LBVH; stk::ngp::ExecSpace execSpace = Kokkos::DefaultExecutionSpace{}; - const bool resultsParallelSymmetry = true; + const bool enforceSearchResultSymmetry = true; + MPI_Comm comm = meshPtr->parallel(); - stk::search::coarse_search(elemSpheres, nodePoints, searchMethod, bulkPtr->parallel(), searchResults, execSpace, resultsParallelSymmetry); + stk::search::coarse_search(elemSpheres, nodePoints, searchMethod, comm, searchResults, execSpace, enforceSearchResultSymmetry); +//ENDngp_call_coarse_search constexpr unsigned numNodesPerElement = 8; unsigned expectedNumResults = numLocalElems * numNodesPerElement; - if (resultsParallelSymmetry) { + if (enforceSearchResultSymmetry) { EXPECT_GE(searchResults.size(), expectedNumResults+numSharedAndOwnedNodes); } else { EXPECT_EQ(searchResults.size(), expectedNumResults); } - ghost_node_neighbors_to_elements(*bulkPtr, searchResults, execSpace); + ghost_node_neighbors_to_elements(*meshPtr, searchResults, execSpace); - unpack_search_results_into_field(*bulkPtr, neighborField, searchResults, execSpace); + unpack_search_results_into_field(*meshPtr, neighborField, searchResults, execSpace); - verify_8_neighbors_per_element(*bulkPtr, neighborField); + verify_8_neighbors_per_element(*meshPtr, neighborField); } } // namespace doc_test diff --git a/packages/stk/stk_doc_tests/stk_search/howToUseCoarseSearch.cpp b/packages/stk/stk_doc_tests/stk_search/howToUseCoarseSearch.cpp index e76d487f6b8b..95bf4e17518a 100644 --- a/packages/stk/stk_doc_tests/stk_search/howToUseCoarseSearch.cpp +++ b/packages/stk/stk_doc_tests/stk_search/howToUseCoarseSearch.cpp @@ -152,7 +152,6 @@ TEST(StkSearchHowTo, useCoarseSearch) builder.set_spatial_dimension(spatialDim); std::shared_ptr mesh = builder.create(); stk::mesh::MetaData& meta = mesh->mesh_meta_data(); - meta.use_simple_fields(); stk::io::fill_mesh(meshSpec, *mesh); // Point in element 1 @@ -186,7 +185,7 @@ TEST(StkSearchHowTo, useCoarseSearch) stk::mesh::EntityIdVector expectedSendIds{1, 3, 5, 7}; - for (auto& result : coarseSearchResult) { + for (Relation& result : coarseSearchResult) { const Hex8SourceMesh::EntityKey sendEntityKey = result.second.id(); EXPECT_EQ(stk::topology::ELEM_RANK, sendEntityKey.rank()); diff --git a/packages/stk/stk_doc_tests/stk_search/howToUseFilterCoarseSearch.cpp b/packages/stk/stk_doc_tests/stk_search/howToUseFilterCoarseSearch.cpp index 1e14798e00cd..2a093ad753fe 100644 --- a/packages/stk/stk_doc_tests/stk_search/howToUseFilterCoarseSearch.cpp +++ b/packages/stk/stk_doc_tests/stk_search/howToUseFilterCoarseSearch.cpp @@ -53,7 +53,6 @@ TEST(StkSearchHowTo, useFilterCoarseSearch) builder.set_spatial_dimension(spatialDim); std::shared_ptr mesh = builder.create(); stk::mesh::MetaData& meta = mesh->mesh_meta_data(); - meta.use_simple_fields(); stk::io::fill_mesh(meshSpec, *mesh); // Point in element 1 diff --git a/packages/stk/stk_doc_tests/stk_transfer/howToUseCopyTransfer.cpp b/packages/stk/stk_doc_tests/stk_transfer/howToUseCopyTransfer.cpp index 5c426b089d42..242a42e95ac5 100644 --- a/packages/stk/stk_doc_tests/stk_transfer/howToUseCopyTransfer.cpp +++ b/packages/stk/stk_doc_tests/stk_transfer/howToUseCopyTransfer.cpp @@ -103,14 +103,12 @@ TEST(StkTransferHowTo, useCopyTransfer) builder.set_spatial_dimension(spatialDim); std::shared_ptr meshA = builder.create(); stk::mesh::MetaData& metaA = meshA->mesh_meta_data(); - metaA.use_simple_fields(); DoubleField & scalarFieldNodeA = metaA.declare_field(stk::topology::NODE_RANK, "Node Scalar Field"); stk::mesh::put_field_on_mesh(scalarFieldNodeA, metaA.universal_part(), &initVals); stk::io::fill_mesh(meshSpec, *meshA); std::shared_ptr meshB = builder.create(); stk::mesh::MetaData& metaB = meshB->mesh_meta_data(); - metaB.use_simple_fields(); DoubleField & scalarFieldNodeB = metaB.declare_field(stk::topology::NODE_RANK, "Node Scalar Field"); stk::mesh::put_field_on_mesh(scalarFieldNodeB, metaB.universal_part(), &initVals); stk::io::fill_mesh(meshSpec, *meshB); diff --git a/packages/stk/stk_doc_tests/stk_transfer/howToUseLeastSquaresInterpolation.cpp b/packages/stk/stk_doc_tests/stk_transfer/howToUseLeastSquaresInterpolation.cpp index 063dcf70c189..e136c3ad7877 100644 --- a/packages/stk/stk_doc_tests/stk_transfer/howToUseLeastSquaresInterpolation.cpp +++ b/packages/stk/stk_doc_tests/stk_transfer/howToUseLeastSquaresInterpolation.cpp @@ -183,7 +183,6 @@ TEST(StkTransferHowTo, useNodeLinearLeastSquaresInterpolation) builder.set_spatial_dimension(spatialDim); std::shared_ptr mesh = builder.create(); stk::mesh::MetaData& meta = mesh->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Field &transferField = meta.declare_field(stk::topology::NODE_RANK, "transfer_field", 1); stk::mesh::put_field_on_mesh(transferField, meta.universal_part(), &initVals); stk::io::fill_mesh(meshSpec, *mesh); @@ -238,7 +237,6 @@ TEST(StkTransferHowTo, useElementCentroidLinearLeastSquaresInterpolation) builder.set_spatial_dimension(spatialDim); std::shared_ptr mesh = builder.create(); stk::mesh::MetaData& meta = mesh->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Field &transferField = meta.declare_field(stk::topology::ELEM_RANK, "transfer_field", 1); stk::mesh::put_field_on_mesh(transferField, meta.universal_part(), &initVals); stk::io::fill_mesh(meshSpec, *mesh); @@ -292,7 +290,6 @@ TEST(StkTransferHowTo, useElementCentroidLinearMovingLeastSquaresInterpolation) builder.set_spatial_dimension(spatialDim); std::shared_ptr mesh = builder.create(); stk::mesh::MetaData& meta = mesh->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Field &transferField = meta.declare_field(stk::topology::ELEM_RANK, "transfer_field", 1); stk::mesh::put_field_on_mesh(transferField, meta.universal_part(), &initVals); stk::io::fill_mesh(meshSpec, *mesh); @@ -348,7 +345,6 @@ TEST(StkTransferHowTo, useElementCentroidQuadraticLeastSquaresInterpolation) builder.set_spatial_dimension(spatialDim); std::shared_ptr mesh = builder.create(); stk::mesh::MetaData& meta = mesh->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Field &transferField = meta.declare_field(stk::topology::ELEM_RANK, "transfer_field", 1); stk::mesh::put_field_on_mesh(transferField, meta.universal_part(), &initVals); stk::io::fill_mesh(meshSpec, *mesh); @@ -401,7 +397,6 @@ TEST(StkTransferHowTo, useElementCentroidCubicLeastSquaresInterpolation) builder.set_spatial_dimension(spatialDim); std::shared_ptr mesh = builder.create(); stk::mesh::MetaData& meta = mesh->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Field &transferField = meta.declare_field(stk::topology::ELEM_RANK, "transfer_field", 1); stk::mesh::put_field_on_mesh(transferField, meta.universal_part(), &initVals); stk::io::fill_mesh(meshSpec, *mesh); diff --git a/packages/stk/stk_doc_tests/stk_util/TimerHowTo.cpp b/packages/stk/stk_doc_tests/stk_util/TimerHowTo.cpp index 5d9677d3f8fa..708a0aa82e40 100644 --- a/packages/stk/stk_doc_tests/stk_util/TimerHowTo.cpp +++ b/packages/stk/stk_doc_tests/stk_util/TimerHowTo.cpp @@ -76,7 +76,7 @@ totalTestRuntime 1 SKIP SKIP 00:00 \ Took 0.0001 seconds to generate the table above. \ "; - using stk::unit_test_util::simple_fields::areStringsEqualWithToleranceForNumbers; + using stk::unit_test_util::areStringsEqualWithToleranceForNumbers; EXPECT_TRUE(areStringsEqualWithToleranceForNumbers(expectedOutput, outputStream.str(), tolerance)); } @@ -126,7 +126,7 @@ totalTestRuntime 1 SKIP SKIP 00:00:0 \ Took 0.0001 seconds to generate the table above. \ "; - using stk::unit_test_util::simple_fields::areStringsEqualWithToleranceForNumbers; + using stk::unit_test_util::areStringsEqualWithToleranceForNumbers; EXPECT_TRUE(areStringsEqualWithToleranceForNumbers(expectedOutput, outputStream.str(), tolerance)); stk::diag::deleteRootTimer(rootTimer); @@ -173,7 +173,7 @@ totalTestRuntime 1 SKIP SKIP 00:00:0 \ Took 0.0001 seconds to generate the table above. \ "; - using stk::unit_test_util::simple_fields::areStringsEqualWithToleranceForNumbers; + using stk::unit_test_util::areStringsEqualWithToleranceForNumbers; EXPECT_TRUE(areStringsEqualWithToleranceForNumbers(expectedOutput, outputStream.str(), tolerance)); stk::diag::deleteRootTimer(rootTimer); diff --git a/packages/stk/stk_doc_tests/stk_util/TimerHowToParallel.cpp b/packages/stk/stk_doc_tests/stk_util/TimerHowToParallel.cpp index ad8a9bf40d13..433c50ab67e3 100644 --- a/packages/stk/stk_doc_tests/stk_util/TimerHowToParallel.cpp +++ b/packages/stk/stk_doc_tests/stk_util/TimerHowToParallel.cpp @@ -92,7 +92,7 @@ totalTestRuntime 2 SKIP SKIP SKIP SKIP SKIP SKI Took SKIP seconds to generate the table above. \ "; std::cerr<STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM $(stk_expreval-root-inc) : usage-requirements $(stk_expreval-root-inc) diff --git a/packages/stk/stk_expreval/stk_expreval/CMakeLists.txt b/packages/stk/stk_expreval/stk_expreval/CMakeLists.txt index bbd65810dce9..c18485819eb6 100644 --- a/packages/stk/stk_expreval/stk_expreval/CMakeLists.txt +++ b/packages/stk/stk_expreval/stk_expreval/CMakeLists.txt @@ -61,5 +61,6 @@ INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_expreval) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_expreval DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_expreval EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) + endif() diff --git a/packages/stk/stk_expreval/stk_expreval/Eval.cpp b/packages/stk/stk_expreval/stk_expreval/Eval.cpp index bec8d1296fd6..f46ff7b01d14 100644 --- a/packages/stk/stk_expreval/stk_expreval/Eval.cpp +++ b/packages/stk/stk_expreval/stk_expreval/Eval.cpp @@ -369,6 +369,7 @@ Eval::initialize_function_map() m_functionMap["cycloidal_ramp"] = FunctionType::CYCLOIDAL_RAMP; m_functionMap["cos_ramp"] = FunctionType::COS_RAMP; m_functionMap["cosine_ramp"] = FunctionType::COS_RAMP; + m_functionMap["linear_ramp"] = FunctionType::LINEAR_RAMP; m_functionMap["haversine_pulse"] = FunctionType::HAVERSINE_PULSE; m_functionMap["point2d"] = FunctionType::POINT2D; m_functionMap["point3d"] = FunctionType::POINT3D; diff --git a/packages/stk/stk_expreval/stk_expreval/Function.cpp b/packages/stk/stk_expreval/stk_expreval/Function.cpp index 84d40d783604..57bc443c1dab 100644 --- a/packages/stk/stk_expreval/stk_expreval/Function.cpp +++ b/packages/stk/stk_expreval/stk_expreval/Function.cpp @@ -306,6 +306,7 @@ CFunctionMap::CFunctionMap() (*this).emplace("cosine_ramp", new CFunction1(cosine_ramp1)); (*this).emplace("cosine_ramp", new CFunction2(cosine_ramp2)); (*this).emplace("cosine_ramp", new CFunction3(cosine_ramp3)); + (*this).emplace("linear_ramp", new CFunction3(linear_ramp3)); (*this).emplace("haversine_pulse", new CFunction3(haversine_pulse)); (*this).emplace("cycloidal_ramp", new CFunction3(cycloidal_ramp)); diff --git a/packages/stk/stk_expreval/stk_expreval/Function.hpp b/packages/stk/stk_expreval/stk_expreval/Function.hpp index 18f65ac24b5e..d8b91f959bdb 100644 --- a/packages/stk/stk_expreval/stk_expreval/Function.hpp +++ b/packages/stk/stk_expreval/stk_expreval/Function.hpp @@ -100,6 +100,7 @@ enum class FunctionType { UNIT_STEP, CYCLOIDAL_RAMP, COS_RAMP, + LINEAR_RAMP, HAVERSINE_PULSE, POINT2D, POINT3D, @@ -372,6 +373,20 @@ double cosine_ramp3(double t, double t1, double t2) } } +KOKKOS_INLINE_FUNCTION +double linear_ramp3(double t, double t1, double t2) +{ + if (t < t1) { + return 0.0; + } + else if (t < t2) { + return (t - t1)/(t2 - t1); + } + else { + return 1.0; + } +} + KOKKOS_INLINE_FUNCTION double haversine_pulse(double t, double t1, double t2) { diff --git a/packages/stk/stk_expreval/stk_expreval/NgpNode.hpp b/packages/stk/stk_expreval/stk_expreval/NgpNode.hpp index 970afd4de548..e55671dc92da 100644 --- a/packages/stk/stk_expreval/stk_expreval/NgpNode.hpp +++ b/packages/stk/stk_expreval/stk_expreval/NgpNode.hpp @@ -567,6 +567,13 @@ class NgpNode STK_NGP_ThrowErrorMsg("Incorrect number of arguments for cos_ramp or cosine_ramp function"); break; } + case FunctionType::LINEAR_RAMP : { + if (argumentCount == 3) { + return linear_ramp3(arguments[0], arguments[1], arguments[2]); + } + STK_NGP_ThrowErrorMsg("Incorrect number of arguments for linear_ramp function"); + break; + } case FunctionType::HAVERSINE_PULSE : { if (argumentCount == 3) { return haversine_pulse(arguments[0], arguments[1], arguments[2]); diff --git a/packages/stk/stk_integration_tests/Jamfile b/packages/stk/stk_integration_tests/Jamfile index 2965ffe2181a..76253354a3d3 100644 --- a/packages/stk/stk_integration_tests/Jamfile +++ b/packages/stk/stk_integration_tests/Jamfile @@ -41,7 +41,8 @@ project votd : requirements $(sierra-warnings) $(stk_integration_tests-root) - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM : usage-requirements $(stk_integration_tests-root) : build-dir $(stk_integration_tests-builddir) diff --git a/packages/stk/stk_integration_tests/cmake_install_test/build_stk_standalone_serial_using_cmake b/packages/stk/stk_integration_tests/cmake_install_test/build_stk_standalone_serial_using_cmake index 5efb5e5b3a25..d2a8d5faa48e 100755 --- a/packages/stk/stk_integration_tests/cmake_install_test/build_stk_standalone_serial_using_cmake +++ b/packages/stk/stk_integration_tests/cmake_install_test/build_stk_standalone_serial_using_cmake @@ -44,7 +44,7 @@ stk_ctest_log=${output_dir}/stk-standalone-serial-ctest.out.$date_suffix exe cp ${stk_cmake_testing_source_dir}/run_cmake_stk_standalone_serial ${stk_build_dir} exe cd ${stk_build_dir} -exe source ${stk_cmake_testing_source_dir}/load_gcc_modules_no_boost +exe source ${stk_cmake_testing_source_dir}/load_aue_serial_modules_no_boost printf "Configuring stk (running cmake)...\n"; exe "STK_SRC_DIR=${stk_src_dir} \ diff --git a/packages/stk/stk_integration_tests/cmake_install_test/build_stk_using_cmake b/packages/stk/stk_integration_tests/cmake_install_test/build_stk_using_cmake index 06c4c46b2baf..503fa56863b3 100755 --- a/packages/stk/stk_integration_tests/cmake_install_test/build_stk_using_cmake +++ b/packages/stk/stk_integration_tests/cmake_install_test/build_stk_using_cmake @@ -14,6 +14,7 @@ output_dir=${OUTPUT_DIR:-${PWD}/../stk-cmake-testing} trilinos_dir=${output_dir}/Trilinos cuda_on_or_off=${CUDA:-OFF} clear_cache=${CLEAR_CACHE:-ON} +incremental_build=${INCREMENTAL_BUILD:-OFF} build_type=${CMAKE_BUILD_TYPE:-release} date_suffix=`date +%F_%H-%M-%S` @@ -23,16 +24,20 @@ if [ ! -d ${output_dir} ] ; then fi stk_cmake_testing_source_dir=${sierra_proj}/stk/stk_integration_tests/cmake_install_test +if [ "${incremental_build}" == "OFF" ] ; then ${stk_cmake_testing_source_dir}/create_workspace.sh ${trilinos_dir} ${sierra_proj} +fi trilinos_install_dir=${output_dir}/trilinos_install_${build_type}_gcc exe rm -rf $trilinos_install_dir stk_build_dir=${output_dir}/stk_build_${build_type}_gcc +if [ "${incremental_build}" == "OFF" ] ; then if [ "${clear_cache}" == "ON" ] ; then exe rm -rf $stk_build_dir exe mkdir -p $stk_build_dir fi +fi printf "\nUsing sierra project: ${sierra_proj}\n"; printf "Using build-type: ${build_type}\n"; @@ -52,6 +57,7 @@ exe cd ${stk_build_dir} exe source ${stk_cmake_testing_source_dir}/load_gcc_modules +if [ "${incremental_build}" == "OFF" ] ; then printf "Configuring trilinos/stk (running cmake)...\n"; exe "TRILINOS_DIR=${trilinos_dir} \ TRILINOS_INSTALL_DIR=${trilinos_install_dir} \ @@ -63,6 +69,7 @@ if [ $? -ne 0 ] ; then echo "!! error in stk/trilinos config, check output in ${stk_config_log} !!"; exit 1; fi +fi printf "Now building trilinos/stk using make...\n"; exe "make VERBOSE=1 -j8 >& ${stk_make_log}"; diff --git a/packages/stk/stk_integration_tests/cmake_install_test/load_aue_serial_modules_no_boost b/packages/stk/stk_integration_tests/cmake_install_test/load_aue_serial_modules_no_boost new file mode 100644 index 000000000000..1e104d993024 --- /dev/null +++ b/packages/stk/stk_integration_tests/cmake_install_test/load_aue_serial_modules_no_boost @@ -0,0 +1,6 @@ +#!/bin/bash + +module load aue/cmake/3.27.7 +module load aue/gcc/10.3.0 +module load aue/netlib-lapack/3.11.0-gcc-10.3.0 + diff --git a/packages/stk/stk_integration_tests/cmake_install_test/load_gcc_modules b/packages/stk/stk_integration_tests/cmake_install_test/load_gcc_modules index 956c08717861..7ee7b25160f8 100644 --- a/packages/stk/stk_integration_tests/cmake_install_test/load_gcc_modules +++ b/packages/stk/stk_integration_tests/cmake_install_test/load_gcc_modules @@ -5,10 +5,10 @@ module load aue/gcc/10.3.0 module load aue/metis/5.1.0-gcc-10.3.0 module load aue/netlib-lapack/3.11.0-gcc-10.3.0 -#export BLAS_LIBRARIES=${BLAS_LIBRARIES:-/usr/lib64/libblas.so} -#export LAPACK_LIBRARIES=${LAPACK_LIBRARIES:-/usr/lib64/liblapack.so} -#export TPL_BLAS_LIBRARIES=${BLAS_LIBRARIES:-/usr/lib64/libblas.so} -#export TPL_LAPACK_LIBRARIES=${LAPACK_LIBRARIES:-/usr/lib64/liblapack.so} +export BLAS_LIBRARIES=${BLAS_LIBRARIES:-$BLAS_ROOT/lib64/libblas.so} +export LAPACK_LIBRARIES=${LAPACK_LIBRARIES:-$LAPACK_ROOT/lib64/liblapack.so} +export TPL_BLAS_LIBRARIES=${BLAS_LIBRARIES:-$BLAS_ROOT/lib64/libblas.so} +export TPL_LAPACK_LIBRARIES=${LAPACK_LIBRARIES:-$LAPACK_ROOT/lib64/liblapack.so} module load aue/openmpi/4.1.6-gcc-10.3.0 module load aue/hdf5/1.14.2-gcc-10.3.0-openmpi-4.1.6 @@ -16,13 +16,3 @@ module load aue/netcdf-c/4.9.2-gcc-10.3.0-openmpi-4.1.6 module load aue/parmetis/4.0.3-gcc-10.3.0-openmpi-4.1.6 module load aue/parallel-netcdf/1.12.3-gcc-10.3.0-openmpi-4.1.6 - -### Corresponding CDE v3 modules -- as of 2/6/23 these gave link errors -#module load cde/v3/gcc/10.3.0 -#module load cde/v3/openmpi/4.1.2-gcc-10.3.0 -#module load cde/v3/hdf5/1.10.6-gcc-10.3.0-openmpi-4.1.2 -#module load cde/v3/netcdf-c/4.8.1-gcc-10.3.0-openmpi-4.1.2 -#module load cde/v3/parallel-netcdf/1.12.2-gcc-10.3.0-openmpi-4.1.2 -#module load cde/v3/metis/5.1.0-gcc-10.3.0 -#module load cde/v3/parmetis/4.0.3-gcc-10.3.0-openmpi-4.1.2 - diff --git a/packages/stk/stk_integration_tests/cmake_install_test/run_cmake_stk b/packages/stk/stk_integration_tests/cmake_install_test/run_cmake_stk index ddc0f06b7a46..4d4b62032503 100755 --- a/packages/stk/stk_integration_tests/cmake_install_test/run_cmake_stk +++ b/packages/stk/stk_integration_tests/cmake_install_test/run_cmake_stk @@ -64,8 +64,9 @@ cmake \ -DTrilinos_ENABLE_Zoltan2:BOOL=ON \ -DZoltan2_ENABLE_ParMETIS:BOOL=ON \ -DTrilinos_ENABLE_Pamgen:BOOL=ON \ --DTrilinos_ENABLE_Krino:BOOL=OFF \ +-DTrilinos_ENABLE_Krino:BOOL=ON \ -DTrilinos_ENABLE_Percept:BOOL=ON \ +-DTrilinos_ENABLE_TrilinosCouplings:BOOL=ON \ -DTrilinos_ENABLE_Panzer:BOOL=${not_cuda} \ -DTrilinos_ENABLE_PanzerAdaptersSTK:BOOL=${not_cuda} \ -DPanzer_ENABLE_TESTS:BOOL=${not_cuda} \ @@ -77,7 +78,6 @@ cmake \ -DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE:BOOL=ON \ -DKokkos_ARCH_VOLTA70=${cuda_on_or_off} \ -DTrilinos_ENABLE_KokkosKernels:BOOL=ON \ --DTrilinos_ENABLE_Zoltan:BOOL=ON \ -DTrilinos_ENABLE_Fortran:BOOL=ON \ -DCMAKE_CXX_STANDARD:STRING=17 \ -DCMAKE_CXX_FLAGS:STRING="-D${fortran_macro} ${cmake_cxx_flags} -Werror=dangling-else" \ diff --git a/packages/stk/stk_integration_tests/cmake_install_test/run_cmake_stk_standalone_serial b/packages/stk/stk_integration_tests/cmake_install_test/run_cmake_stk_standalone_serial index f1c0b827b79a..30ac86d21f52 100755 --- a/packages/stk/stk_integration_tests/cmake_install_test/run_cmake_stk_standalone_serial +++ b/packages/stk/stk_integration_tests/cmake_install_test/run_cmake_stk_standalone_serial @@ -14,9 +14,8 @@ printf "STK_INSTALL_DIR=${stk_install_dir}\n"; printf "\nTo change these vars, set as env vars or pass to this script like 'VAR=value run_pure_cmake_stk'\n\n"; # before running this script, load these modules: -# module load cde/v3/cmake/3.23.1 -# module load cde/v3/gcc/10.3.0 -# module load cde/v3/openmpi/4.1.2-gcc-10.3.0 +# module load aue/cmake/3.27.7 +# module load aue/gcc/12.3.0 mkdir -p $build_dir diff --git a/packages/stk/stk_integration_tests/cmake_install_test/spack.cuda.yaml b/packages/stk/stk_integration_tests/cmake_install_test/spack.cuda.yaml new file mode 100644 index 000000000000..f0c30bcf6301 --- /dev/null +++ b/packages/stk/stk_integration_tests/cmake_install_test/spack.cuda.yaml @@ -0,0 +1,156 @@ +# This is a Spack Environment file. +# +# It describes a set of packages to be installed, along with +# configuration settings. +spack: + # add package specs to the `specs` list + specs: + - hdf5@1.14.3~shared + - zlib + - openmpi@4.1.6 + - kokkos+cuda+cuda_constexpr+cuda_lambda+cuda_relocatable_device_code~cuda_uvm~shared+wrapper cuda_arch=70 + - trilinos@develop~boost+cuda+cuda_rdc+exodus+kokkos~shared+stk+test~uvm+wrapper cuda_arch=70 cxxstd=17 + view: true + concretizer: + unify: true + config: + install_tree: + root: SED_REPLACE_INSTALL_PATH + compilers: + - compiler: + spec: gcc@=10.3.0 + paths: + cc: /projects/aue/cee/builds/x86_64/rhel8/818788d6/gcc-10.3.0/install/linux-rhel8-x86_64/gcc-8.5.0/gcc-10.3.0-6ljscis/bin/gcc + cxx: /projects/aue/cee/builds/x86_64/rhel8/818788d6/gcc-10.3.0/install/linux-rhel8-x86_64/gcc-8.5.0/gcc-10.3.0-6ljscis/bin/g++ + f77: /projects/aue/cee/builds/x86_64/rhel8/818788d6/gcc-10.3.0/install/linux-rhel8-x86_64/gcc-8.5.0/gcc-10.3.0-6ljscis/bin/gfortran + fc: /projects/aue/cee/builds/x86_64/rhel8/818788d6/gcc-10.3.0/install/linux-rhel8-x86_64/gcc-8.5.0/gcc-10.3.0-6ljscis/bin/gfortran + flags: {} + operating_system: rhel8 + target: x86_64 + modules: [] + environment: {} + extra_rpaths: [] + - compiler: + spec: gcc@=8.5.0 + paths: + cc: /usr/bin/gcc + cxx: /usr/bin/g++ + f77: /usr/bin/gfortran + fc: /usr/bin/gfortran + flags: {} + operating_system: rhel8 + target: x86_64 + modules: [] + environment: {} + extra_rpaths: [] + develop: + trilinos: + path: SED_REPLACE_TRILINOS_PATH + spec: trilinos@=develop + packages: + binutils: + externals: + - spec: binutils@2.41 + prefix: /projects/aue/cee/builds/x86_64/rhel8/3aa2f152/tooling-24.08/install/linux-rhel8-x86_64/gcc-10.3.0/binutils-2.41-uxjiarj + - spec: binutils@2.30.123 + prefix: /usr + coreutils: + externals: + - spec: coreutils@8.30 + prefix: /usr + diffutils: + externals: + - spec: diffutils@3.6 + prefix: /usr + findutils: + externals: + - spec: findutils@4.6.0 + prefix: /usr + gawk: + externals: + - spec: gawk@4.2.1 + prefix: /usr + openssh: + externals: + - spec: openssh@8.0p1 + prefix: /usr + openssl: + externals: + - spec: openssl@1.1.1k + prefix: /usr + tar: + externals: + - spec: tar@1.30 + prefix: /usr + autoconf: + externals: + - spec: autoconf@2.69 + prefix: /usr + automake: + externals: + - spec: automake@1.16.1 + prefix: /usr + bison: + externals: + - spec: bison@3.0.4 + prefix: /usr + cmake: + externals: + - spec: cmake@3.26.5 + prefix: /usr + curl: + externals: + - spec: curl@7.61.1+gssapi+ldap+nghttp2 + prefix: /usr + cvs: + externals: + - spec: cvs@1.11.23 + prefix: /usr + doxygen: + externals: + - spec: doxygen@1.8.14+graphviz~mscgen + prefix: /usr + flex: + externals: + - spec: flex@2.6.1+lex + prefix: /usr + git: + externals: + - spec: git@2.42.0+tcltk + prefix: /projects/aue/cee/builds/x86_64/rhel8/33ebcb7e/tooling-sprint-24.02/install/linux-rhel8-x86_64/gcc-10.3.0/git-2.42.0-zueya4d + - spec: git@2.39.3~tcltk + prefix: /usr + gmake: + externals: + - spec: gmake@4.2.1 + prefix: /usr + groff: + externals: + - spec: groff@1.22.3 + prefix: /usr + m4: + externals: + - spec: m4@1.4.18 + prefix: /usr + pkgconf: + externals: + - spec: pkgconf@1.4.2 + prefix: /usr + subversion: + externals: + - spec: subversion@1.10.2 + prefix: /usr + swig: + externals: + - spec: swig@3.0.12 + prefix: /usr + texinfo: + externals: + - spec: texinfo@6.5 + prefix: /usr + openmpi: + buildable: false + externals: + - spec: openmpi@4.1.6 + modules: + - aue/openmpi/4.1.6-gcc-10.3.0 diff --git a/packages/stk/stk_integration_tests/cmake_install_test/spack.gcc.yaml b/packages/stk/stk_integration_tests/cmake_install_test/spack.gcc.yaml new file mode 100644 index 000000000000..3e42ee32771e --- /dev/null +++ b/packages/stk/stk_integration_tests/cmake_install_test/spack.gcc.yaml @@ -0,0 +1,156 @@ +# This is a Spack Environment file. +# +# It describes a set of packages to be installed, along with +# configuration settings. +spack: + # add package specs to the `specs` list + specs: + - hdf5@1.14.3~shared + - zlib + - openmpi@4.1.6 + - kokkos~shared + - trilinos@develop~boost+exodus+kokkos~shared+stk+test + view: true + concretizer: + unify: true + config: + install_tree: + root: SED_REPLACE_INSTALL_PATH + compilers: + - compiler: + spec: gcc@=10.3.0 + paths: + cc: /projects/aue/cee/builds/x86_64/rhel8/818788d6/gcc-10.3.0/install/linux-rhel8-x86_64/gcc-8.5.0/gcc-10.3.0-6ljscis/bin/gcc + cxx: /projects/aue/cee/builds/x86_64/rhel8/818788d6/gcc-10.3.0/install/linux-rhel8-x86_64/gcc-8.5.0/gcc-10.3.0-6ljscis/bin/g++ + f77: /projects/aue/cee/builds/x86_64/rhel8/818788d6/gcc-10.3.0/install/linux-rhel8-x86_64/gcc-8.5.0/gcc-10.3.0-6ljscis/bin/gfortran + fc: /projects/aue/cee/builds/x86_64/rhel8/818788d6/gcc-10.3.0/install/linux-rhel8-x86_64/gcc-8.5.0/gcc-10.3.0-6ljscis/bin/gfortran + flags: {} + operating_system: rhel8 + target: x86_64 + modules: [] + environment: {} + extra_rpaths: [] + - compiler: + spec: gcc@=8.5.0 + paths: + cc: /usr/bin/gcc + cxx: /usr/bin/g++ + f77: /usr/bin/gfortran + fc: /usr/bin/gfortran + flags: {} + operating_system: rhel8 + target: x86_64 + modules: [] + environment: {} + extra_rpaths: [] + develop: + trilinos: + path: SED_REPLACE_TRILINOS_PATH + spec: trilinos@=develop + packages: + binutils: + externals: + - spec: binutils@2.41 + prefix: /projects/aue/cee/builds/x86_64/rhel8/3aa2f152/tooling-24.08/install/linux-rhel8-x86_64/gcc-10.3.0/binutils-2.41-uxjiarj + - spec: binutils@2.30.123 + prefix: /usr + coreutils: + externals: + - spec: coreutils@8.30 + prefix: /usr + diffutils: + externals: + - spec: diffutils@3.6 + prefix: /usr + findutils: + externals: + - spec: findutils@4.6.0 + prefix: /usr + gawk: + externals: + - spec: gawk@4.2.1 + prefix: /usr + openssh: + externals: + - spec: openssh@8.0p1 + prefix: /usr + openssl: + externals: + - spec: openssl@1.1.1k + prefix: /usr + tar: + externals: + - spec: tar@1.30 + prefix: /usr + autoconf: + externals: + - spec: autoconf@2.69 + prefix: /usr + automake: + externals: + - spec: automake@1.16.1 + prefix: /usr + bison: + externals: + - spec: bison@3.0.4 + prefix: /usr + cmake: + externals: + - spec: cmake@3.26.5 + prefix: /usr + curl: + externals: + - spec: curl@7.61.1+gssapi+ldap+nghttp2 + prefix: /usr + cvs: + externals: + - spec: cvs@1.11.23 + prefix: /usr + doxygen: + externals: + - spec: doxygen@1.8.14+graphviz~mscgen + prefix: /usr + flex: + externals: + - spec: flex@2.6.1+lex + prefix: /usr + git: + externals: + - spec: git@2.42.0+tcltk + prefix: /projects/aue/cee/builds/x86_64/rhel8/33ebcb7e/tooling-sprint-24.02/install/linux-rhel8-x86_64/gcc-10.3.0/git-2.42.0-zueya4d + - spec: git@2.39.3~tcltk + prefix: /usr + gmake: + externals: + - spec: gmake@4.2.1 + prefix: /usr + groff: + externals: + - spec: groff@1.22.3 + prefix: /usr + m4: + externals: + - spec: m4@1.4.18 + prefix: /usr + pkgconf: + externals: + - spec: pkgconf@1.4.2 + prefix: /usr + subversion: + externals: + - spec: subversion@1.10.2 + prefix: /usr + swig: + externals: + - spec: swig@3.0.12 + prefix: /usr + texinfo: + externals: + - spec: texinfo@6.5 + prefix: /usr + openmpi: + buildable: false + externals: + - spec: openmpi@4.1.6 + modules: + - aue/openmpi/4.1.6-gcc-10.3.0 diff --git a/packages/stk/stk_integration_tests/cmake_install_test/stk_spack_build_test_cuda.sh b/packages/stk/stk_integration_tests/cmake_install_test/stk_spack_build_test_cuda.sh new file mode 100755 index 000000000000..d1dc0f318ff8 --- /dev/null +++ b/packages/stk/stk_integration_tests/cmake_install_test/stk_spack_build_test_cuda.sh @@ -0,0 +1,132 @@ +#!/usr/bin/bash + +exe() { + stdbuf -o0 -e0 echo "% $@" ; + eval "$@" ; + if [ $? -ne 0 ] ; then + echo "'$@' failed."; + return 1; + fi +} + +# To specify custom paths for one or more of the following, set +# the variable on the command line when running this script. +# Example: +# $ TRILINOS=/my/path/trilinos source stk_spack_create_env_cuda.sh + +work_dir=${STK_SPACK_WORK_DIR:-/fgs/$USER/stk-spack-testing-cuda} +trilinos_source=${TRILINOS:-/fgs/$USER/Trilinos} +sierra_source=${SIERRA:-/fgs/$USER/code} + +stk_spack_env=CUDA +STK_SPACK_WORK_DIR=${work_dir} +TRILINOS=${trilinos_source} +SIERRA=${sierra_source} + +printf "using STK_SPACK_WORK_DIR=${STK_SPACK_WORK_DIR}\n"; +printf "using TRILINOS=${TRILINOS}\n"; +printf "using SIERRA=${SIERRA}\n"; + +if [ ! -d ${trilinos_source} ] ; then + printf "ERROR, TRILINOS location not specified or not a directory.\n"; + return 1; +fi + +if [ ! -d ${sierra_source} ] ; then + printf "ERROR, SIERRA location not specified or not a directory.\n"; + return 1; +fi + +printf "copying stk directory from SIERRA to TRILINOS...\n"; +exe rm -rf ${trilinos_source}/packages/stk +exe cp -r ${sierra_source}/stk ${trilinos_source}/packages + +printf "Setting up spack env 'stkSpackTesting' in STK_SPACK_WORK_DIR=${work_dir}\n" + +exe mkdir -p ${work_dir} +exe cd ${work_dir} +exe rm -rf spack spack.yaml stk_test_app + +exe module load aue/python/3.11.6 +exe module load aue/git/2.42.0 +exe module load aue/netlib-lapack/3.11.0-gcc-10.3.0 +exe module load aue/openmpi/4.1.6-gcc-10.3.0 + +exe git clone --depth=100 --branch=releases/latest git@github.com:spack/spack.git +exe source ./spack/share/spack/setup-env.sh + +exe spack env create stkSpackTesting + +exe module load aue/gcc/10.3.0 +exe spack compiler add +exe spack compilers + +#make a copy of spack.cuda.yaml before editing it +exe cp ${sierra_source}/stk/stk_integration_tests/cmake_install_test/spack.cuda.yaml ${work_dir}/spack.yaml +spack_yaml_file=${work_dir}/spack.yaml + +exe sed -i s@SED_REPLACE_INSTALL_PATH@"${work_dir}/install"@g ${spack_yaml_file} +exe sed -i s@SED_REPLACE_TRILINOS_PATH@"${trilinos_source}"@g ${spack_yaml_file} +exe spack config add -f ${spack_yaml_file} +exe spack env activate stkSpackTesting + +#why do we still need the following 'spack add' commands? +#shouldn't they be loaded when we activate the environment? These specs +#are in the spack.yaml file that we just added before activating the env... + +exe spack add hdf5@1.14.3~shared +exe spack add zlib +exe spack add ncurses@6.3 +exe spack add openmpi@4.1.6 +exe spack add cuda@11.4.4 +exe spack add kokkos+cuda+wrapper+cuda_constexpr+cuda_lambda+cuda_relocatable_device_code~shared cuda_arch=70 +exe spack add trilinos@develop+cuda+cuda_rdc+exodus+stk+kokkos+wrapper~amesos~epetra~shared~boost cuda_arch=70 cxxstd=17 + +# don't need the following 'spack develop' command since we have specified it in +# our pre-packaged spack.cuda.yaml file. +# exe spack develop trilinos@develop -p ${trilinos_source} + +exe spack concretize -f +if [ $? -ne 0 ] ; then + printf "!! error running spack concretize\n"; + return 1; +fi + +exe spack install +if [ $? -ne 0 ] ; then + printf "!! error running spack install\n"; + return 1; +fi + +exe spack load cmake +exe spack load openmpi + +printf "setting OMPI_CXX for CUDA environment\n"; +export OMPI_CXX=$(find $(spack location -i kokkos) -name nvcc_wrapper) + +printf "copying stk test app from SIERRA...\n"; +exe cp -r ${sierra_source}/stk/stk_integration_tests/cmake_install_test/stk_test_app . + +exe cd stk_test_app + +exe source run_cmake_in_spack_env +if [ $? -ne 0 ] ; then + printf "!! error running cmake\n"; + return 1; +fi + +exe make +if [ $? -ne 0 ] ; then + printf "!! error building\n"; + return 1; +fi + +exe mpirun --np 4 ./test_stk_app +if [ $? -ne 0 ] ; then + printf "!! error running test_stk_app\n"; + return 1; +fi + +printf "all done, SUCCESS!\n"; +return 0 + diff --git a/packages/stk/stk_integration_tests/cmake_install_test/stk_spack_build_test_gcc.sh b/packages/stk/stk_integration_tests/cmake_install_test/stk_spack_build_test_gcc.sh new file mode 100755 index 000000000000..beedac3c81ce --- /dev/null +++ b/packages/stk/stk_integration_tests/cmake_install_test/stk_spack_build_test_gcc.sh @@ -0,0 +1,124 @@ +#!/usr/bin/bash + +exe() { + stdbuf -o0 -e0 echo "% $@" ; + eval "$@" ; + if [ $? -ne 0 ] ; then + echo "'$@' failed."; + return 1; + fi +} + +# To specify custom paths for one or more of the following, set +# the variable on the command line when running this script. +# Example: +# $ TRILINOS=/my/path/trilinos source stk_spack_create_env_gcc.sh + +work_dir=${STK_SPACK_WORK_DIR:-/fgs/$USER/stk-spack-testing-gcc} +trilinos_source=${TRILINOS:-/fgs/$USER/Trilinos} +sierra_source=${SIERRA:-/fgs/$USER/code} + +stk_spack_env=CUDA +STK_SPACK_WORK_DIR=${work_dir} +TRILINOS=${trilinos_source} +SIERRA=${sierra_source} + +printf "using STK_SPACK_WORK_DIR=${STK_SPACK_WORK_DIR}\n"; +printf "using TRILINOS=${TRILINOS}\n"; +printf "using SIERRA=${SIERRA}\n"; + +if [ ! -d ${trilinos_source} ] ; then + printf "ERROR, TRILINOS location not specified or not a directory.\n"; + return 1; +fi + +if [ ! -d ${sierra_source} ] ; then + printf "ERROR, SIERRA location not specified or not a directory.\n"; + return 1; +fi + +printf "copying stk directory from SIERRA to TRILINOS...\n"; +exe rm -rf ${trilinos_source}/packages/stk +exe cp -r ${sierra_source}/stk ${trilinos_source}/packages + +printf "Setting up spack env 'stkSpackTesting' in STK_SPACK_WORK_DIR=${work_dir}\n" + +exe mkdir -p ${work_dir} +exe cd ${work_dir} +exe rm -rf spack spack.yaml stk_test_app + +exe module load aue/python/3.11.6 +exe module load aue/git/2.42.0 +exe module load aue/netlib-lapack/3.11.0-gcc-10.3.0 +exe module load aue/openmpi/4.1.6-gcc-10.3.0 + +exe git clone --depth=100 --branch=releases/latest git@github.com:spack/spack.git +exe source ./spack/share/spack/setup-env.sh + +exe spack env create stkSpackTesting + +exe module load aue/gcc/10.3.0 +exe spack compiler add +exe spack compilers + +#make a copy of spack.gcc.yaml before editing it +exe cp ${sierra_source}/stk/stk_integration_tests/cmake_install_test/spack.gcc.yaml ${work_dir}/spack.yaml + +spack_yaml_file=${work_dir}/spack.yaml + +exe sed -i s@SED_REPLACE_INSTALL_PATH@"${work_dir}/install"@g ${spack_yaml_file} +exe sed -i s@SED_REPLACE_TRILINOS_PATH@"${trilinos_source}"@g ${spack_yaml_file} +exe spack config add -f ${spack_yaml_file} +exe spack env activate stkSpackTesting + +#why do we still need the following 'spack add' commands? +#shouldn't they be loaded when we activate the environment? These specs +#are in the spack.yaml file that we just added before activating the env... + +exe spack add hdf5@1.14.3~shared +exe spack add zlib +exe spack add openmpi@4.1.6 +exe spack add kokkos~shared +exe spack add trilinos@develop+exodus+stk+test+kokkos~amesos~epetra~shared~boost + +exe spack concretize -f +if [ $? -ne 0 ] ; then + printf "!! error running spack concretize\n"; + return 1; +fi + +exe spack install +if [ $? -ne 0 ] ; then + printf "!! error running spack install\n"; + return 1; +fi + +exe spack load cmake +exe spack load openmpi + +printf "copying stk test app from SIERRA...\n"; +exe cp -r ${sierra_source}/stk/stk_integration_tests/cmake_install_test/stk_test_app . + +exe cd stk_test_app + +exe source run_cmake_in_spack_env +if [ $? -ne 0 ] ; then + printf "!! error running cmake\n"; + return 1; +fi + +exe make +if [ $? -ne 0 ] ; then + printf "!! error building\n"; + return 1; +fi + +exe mpirun --np 4 ./test_stk_app +if [ $? -ne 0 ] ; then + printf "!! error running test_stk_app\n"; + return 1; +fi + +printf "all done, SUCCESS!\n"; +return 0 + diff --git a/packages/stk/stk_integration_tests/cmake_install_test/stk_test_app/src/test_stk_io.cpp b/packages/stk/stk_integration_tests/cmake_install_test/stk_test_app/src/test_stk_io.cpp index 34405c7281ef..98e86bbe8977 100644 --- a/packages/stk/stk_integration_tests/cmake_install_test/stk_test_app/src/test_stk_io.cpp +++ b/packages/stk/stk_integration_tests/cmake_install_test/stk_test_app/src/test_stk_io.cpp @@ -15,7 +15,6 @@ void test_stk_io(stk::ParallelMachine comm, const std::string& meshSource, bool { std::shared_ptr bulk = stk::mesh::MeshBuilder(comm).create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); if (bulk->parallel_rank() == 0) { std::cout << "test_stk_io: meshSource="< bulk = stk::mesh::MeshBuilder(comm).create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::io::StkMeshIoBroker ioBroker(comm); ioBroker.set_bulk_data(bulk); diff --git a/packages/stk/stk_integration_tests/mock_apps/mock_aria.cpp b/packages/stk/stk_integration_tests/mock_apps/mock_aria.cpp index b90a75b3d633..17d42bc9b642 100644 --- a/packages/stk/stk_integration_tests/mock_apps/mock_aria.cpp +++ b/packages/stk/stk_integration_tests/mock_apps/mock_aria.cpp @@ -41,7 +41,6 @@ class MockAria m_finalTime(), m_iWasToldToStop(), m_iWantToStop(false), - m_step(), m_sendTransfer(), m_recvTransfer1(), m_recvTransfer2(), @@ -357,7 +356,6 @@ class MockAria double m_finalTime; bool m_iWasToldToStop; bool m_iWantToStop; - int m_step; std::shared_ptr m_sendTransfer; std::shared_ptr m_recvTransfer1; std::shared_ptr m_recvTransfer2; diff --git a/packages/stk/stk_integration_tests/mock_apps/mock_fuego.cpp b/packages/stk/stk_integration_tests/mock_apps/mock_fuego.cpp index b8cdbd23376e..c3ad65a95a0f 100644 --- a/packages/stk/stk_integration_tests/mock_apps/mock_fuego.cpp +++ b/packages/stk/stk_integration_tests/mock_apps/mock_fuego.cpp @@ -30,7 +30,6 @@ class MockFuego m_currentTime(), m_finalTime(), m_step(), - m_doingSendTransfer(false), m_sendFieldName() {} @@ -195,7 +194,6 @@ class MockFuego double m_currentTime; double m_finalTime; int m_step; - bool m_doingSendTransfer; std::string m_sendFieldName; }; diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestAttributeOrdering.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestAttributeOrdering.cpp index cf8518a232a0..a51e2bd7ac82 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestAttributeOrdering.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestAttributeOrdering.cpp @@ -38,7 +38,7 @@ #include #include -class AttributeOrdering : public stk::unit_test_util::simple_fields::MeshFixture +class AttributeOrdering : public stk::unit_test_util::MeshFixture { public: AttributeOrdering() diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestBalanceNodes.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestBalanceNodes.cpp index 0593d9bc4f20..fd3c7e0a946c 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestBalanceNodes.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestBalanceNodes.cpp @@ -7,7 +7,7 @@ #include #include -class BalanceNodes : public stk::unit_test_util::simple_fields::MeshFixture {}; +class BalanceNodes : public stk::unit_test_util::MeshFixture {}; TEST_F(BalanceNodes, twoHex_initiallyImbalanced) { @@ -17,7 +17,7 @@ TEST_F(BalanceNodes, twoHex_initiallyImbalanced) balanceSettings.setUseNodeBalancer(true); setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), + stk::unit_test_util::setup_text_mesh(get_bulk(), "0,1,HEX_8,1,2,3,4,5,6,7,8\n" "0,2,HEX_8,5,6,7,8,9,10,11,12"); @@ -38,7 +38,7 @@ TEST_F(BalanceNodes, twoHex_initiallyBalanced) balanceSettings.setUseNodeBalancer(true); setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), + stk::unit_test_util::setup_text_mesh(get_bulk(), "0,1,HEX_8,1,2,3,4,5,6,7,8\n" "1,2,HEX_8,5,6,7,8,9,10,11,12"); @@ -58,7 +58,7 @@ TEST_F(BalanceNodes, threeHex) balanceSettings.setUseNodeBalancer(true); setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), + stk::unit_test_util::setup_text_mesh(get_bulk(), "0,1,HEX_8,1,2,3,4,5,6,7,8\n" "0,2,HEX_8,5,6,7,8,9,10,11,12\n" "0,3,HEX_8,9,10,11,12,13,14,15,16"); diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestBasicLoadBalance.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestBasicLoadBalance.cpp index 3752b52d617c..70f36b2bfac8 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestBasicLoadBalance.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestBasicLoadBalance.cpp @@ -20,7 +20,7 @@ stk::mesh::EntityProcVec get_only_valid_entity_proc_vec(stk::mesh::BulkData& stk //////////////////////////////////////////////////////////////////////////////////////////// -class BasicLoadBalance : public stk::unit_test_util::simple_fields::MeshFixture {}; +class BasicLoadBalance : public stk::unit_test_util::MeshFixture {}; TEST_F(BasicLoadBalance, testWithAura) { diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestCoincidentElems.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestCoincidentElems.cpp index 060d04b81299..f1d72e63e91d 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestCoincidentElems.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestCoincidentElems.cpp @@ -45,7 +45,7 @@ namespace { -class CoincidentElems : public stk::unit_test_util::simple_fields::MeshFixture +class CoincidentElems : public stk::unit_test_util::MeshFixture { protected: CoincidentElems() @@ -96,7 +96,7 @@ TEST_F(CoincidentElems, balance_coincidentsNotSplit) "0,1,HEX_8,1,2,3,4,5,6,7,8\n" "1,2,HEX_8,5,6,7,8,9,10,11,12\n" "1,3,HEX_8,5,6,7,8,9,10,11,12"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); expect_coincidents_on_same_proc(2, 3); stk::balance::balanceStkMesh(graphOptions, get_bulk()); diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestIncrementalRebalance.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestIncrementalRebalance.cpp index 4d8297dab656..e004240a1261 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestIncrementalRebalance.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestIncrementalRebalance.cpp @@ -69,7 +69,7 @@ class FieldVertexWeightSettingsWithSearchForParticles : public stk::balance::Gra bool m_incrementalRebalance; }; -class IncrementalRebalance : public stk::unit_test_util::simple_fields::MeshFixture +class IncrementalRebalance : public stk::unit_test_util::MeshFixture { protected: void check_migration() diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalance.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalance.cpp index ee3ee0c192d7..2439099b729e 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalance.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalance.cpp @@ -115,7 +115,6 @@ TEST(LoadBalance, writeMesh) if(numProcs == 2) { stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, options.getMeshFileName(), ioBroker); stk::mesh::BulkData &outputBulkData = ioBroker.bulk_data(); @@ -129,7 +128,6 @@ TEST(LoadBalance, writeMesh) MPI_Barrier(communicator); stk::io::StkMeshIoBroker inputBroker(communicator); - inputBroker.use_simple_fields(); fillIoBroker(communicator, output_file_name, inputBroker); stk::mesh::BulkData &inputBulkData = inputBroker.bulk_data(); @@ -159,7 +157,6 @@ TEST(LoadBalance, DISABLED_moveElementToAnotherProcessor) if(numProcs == 2) { stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, options.getMeshFileName(), ioBroker); stk::mesh::BulkData &stkMeshBulkData = ioBroker.bulk_data(); @@ -231,7 +228,6 @@ TEST(LoadBalance, DISABLED_specifyWhichProcessorYouWantEachElementToBeOnAndWrite if(numProcs == 2) { stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, generatedMeshSpec, ioBroker); stk::mesh::BulkData &stkMeshBulkData = ioBroker.bulk_data(); @@ -266,7 +262,6 @@ TEST(LoadBalance, DISABLED_Zoltan2Parmetis) stk::balance::internal::logMessage(communicator, "Creating mesh"); stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, options.getMeshFileName(), ioBroker); stk::balance::internal::logMessage(communicator, "Finished creating mesh"); @@ -332,7 +327,6 @@ TEST(LoadBalance, zoltan2Coloring) stk::balance::internal::logMessage(communicator, "Creating mesh"); stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, options.getMeshFileName(), ioBroker); stk::balance::internal::logMessage(communicator, "Finished creating mesh"); @@ -389,7 +383,6 @@ TEST(LoadBalance, zoltan2Adapter) { const std::string generatedMeshSpec = "generated:1x1x1"; stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, generatedMeshSpec, ioBroker); stk::mesh::BulkData &stkMeshBulkData = ioBroker.bulk_data(); @@ -479,7 +472,6 @@ TEST(LoadBalance, DISABLED_createGraphEdgesUsingNodeConnectivity) if(numProcs == 2) { stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, options.getMeshFileName(), ioBroker); stk::mesh::BulkData &stkMeshBulkData = ioBroker.bulk_data(); @@ -553,7 +545,6 @@ TEST(LoadBalance, zoltan2coloring) if(numProcs == 2 || options.overRideTest()) { stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, options.getMeshFileName(), ioBroker); stk::mesh::BulkData &stkMeshBulkData = ioBroker.bulk_data(); @@ -634,7 +625,6 @@ TEST(LoadBalance, ourColoring) if(numProcs == 2) { stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, options.getMeshFileName(), ioBroker); stk::mesh::BulkData &stkMeshBulkData = ioBroker.bulk_data(); @@ -733,7 +723,6 @@ TEST(LoadBalance, DISABLED_zoltan1decomposition) if(numProcs == 2 || options.overRideTest()) { stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, options.getMeshFileName(), ioBroker); stk::mesh::BulkData &stkMeshBulkData = ioBroker.bulk_data(); @@ -764,7 +753,6 @@ TEST(LoadBalance, findBoundaryNodesAndFaces) if(numProcs == 1 || options.overRideTest()) { stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, options.getMeshFileName(), ioBroker); stk::mesh::BulkData &stkMeshBulkData = ioBroker.bulk_data(); @@ -798,7 +786,6 @@ TEST(LoadBalance, checkBBOnFace) if(numProcs == 1) { stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, options.getMeshFileName(), ioBroker); stk::mesh::BulkData &stkMeshBulkData = ioBroker.bulk_data(); @@ -839,7 +826,6 @@ TEST(LoadBalance, doOneElementSearch) if(numProcs == 1) { stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, options.getMeshFileName(), ioBroker); stk::mesh::BulkData &stkMeshBulkData = ioBroker.bulk_data(); @@ -957,7 +943,6 @@ TEST(LoadBalance, doSearch) if(numProcs == 2 || options.overRideTest()) { stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, options.getMeshFileName(), ioBroker); stk::mesh::BulkData &stkMeshBulkData = ioBroker.bulk_data(); @@ -1029,7 +1014,6 @@ TEST(LoadBalance, testGraphCreationUsingSearchForContact) if(options.overRideTest()) { stk::io::StkMeshIoBroker meshIO(MPI_COMM_WORLD); - meshIO.use_simple_fields(); meshIO.set_bulk_data(stkMeshBulkData); unsigned index = meshIO.create_output_mesh("twoDis.exo", stk::io::WRITE_RESULTS); meshIO.write_output_mesh(index); @@ -1108,11 +1092,10 @@ TEST(LoadBalance, createNewMeshFromPart_writeFilesTillWeDontNeedThisTestAsADrive if(numProcs == 1 && options.overRideTest()) { stk::io::StkMeshIoBroker ioBroker(communicator); - ioBroker.use_simple_fields(); fillIoBroker(communicator, options.getMeshFileName(), ioBroker); stk::mesh::BulkData &stkMeshBulkData = ioBroker.bulk_data(); stk::mesh::MetaData &meta = stkMeshBulkData.mesh_meta_data(); - const std::string blockName = stk::unit_test_util::simple_fields::get_option("-b", "block_1"); + const std::string blockName = stk::unit_test_util::get_option("-b", "block_1"); stk::mesh::Part &outputPart = *meta.get_part(blockName); @@ -1154,7 +1137,6 @@ TEST(LoadBalance, testGraphCreationUsingSearchWithParticles) if(options.overRideTest()) { stk::io::StkMeshIoBroker meshIO(MPI_COMM_WORLD); - meshIO.use_simple_fields(); meshIO.set_bulk_data(stkMeshBulkData); unsigned index = meshIO.create_output_mesh("twoDis.exo", stk::io::WRITE_RESULTS); meshIO.write_output_mesh(index); @@ -1211,7 +1193,6 @@ TEST(LoadBalance, testGraphCreationUsingSearchWithParticlesAndSkin) if(options.overRideTest()) { stk::io::StkMeshIoBroker meshIO(MPI_COMM_WORLD); - meshIO.use_simple_fields(); meshIO.set_bulk_data(stkMeshBulkData); unsigned index = meshIO.create_output_mesh("twoDis.exo", stk::io::WRITE_RESULTS); meshIO.write_output_mesh(index); @@ -1473,7 +1454,7 @@ void writeParFiles(stk::io::StkMeshIoBroker &ioBroker, const std::string &output void fillIoBroker(MPI_Comm communicator, const std::string &generatedMeshSpec, stk::io::StkMeshIoBroker &ioBroker) { - std::string doDecomp = stk::unit_test_util::simple_fields::get_option("-decomp", "no"); + std::string doDecomp = stk::unit_test_util::get_option("-decomp", "no"); if ( doDecomp != "no") { @@ -1482,7 +1463,7 @@ void fillIoBroker(MPI_Comm communicator, const std::string &generatedMeshSpec, s ioBroker.property_add(Ioss::Property("DECOMPOSITION_METHOD", "LINEAR")); } - std::string useLargeInt = stk::unit_test_util::simple_fields::get_option("-lint", "no"); + std::string useLargeInt = stk::unit_test_util::get_option("-lint", "no"); if(useLargeInt != "no") { ioBroker.property_add(Ioss::Property("INTEGER_SIZE_API", 8)); diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceActiveOnly.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceActiveOnly.cpp index 8c18dea12022..9aa004a45001 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceActiveOnly.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceActiveOnly.cpp @@ -10,7 +10,7 @@ namespace { -class TestBalanceBalanceActiveEntities : public stk::unit_test_util::simple_fields::MeshFixture +class TestBalanceBalanceActiveEntities : public stk::unit_test_util::MeshFixture { protected: TestBalanceBalanceActiveEntities() @@ -21,7 +21,7 @@ class TestBalanceBalanceActiveEntities : public stk::unit_test_util::simple_fiel setup_empty_mesh(auraOption); activePart = &(get_meta().declare_part("active")); stk::io::fill_mesh("generated:1x1x6", get_bulk()); - stk::unit_test_util::simple_fields::put_mesh_into_part(get_bulk(), *activePart); + stk::unit_test_util::put_mesh_into_part(get_bulk(), *activePart); make_elements_4_and_5_inactive(); test_balance_of_active_only(); } diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceEmptyMeshOnProc.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceEmptyMeshOnProc.cpp index 7a187e6f22ee..3af7adb96f68 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceEmptyMeshOnProc.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceEmptyMeshOnProc.cpp @@ -21,7 +21,7 @@ void test_load_balancing_when_one_proc_has_no_mesh(stk::mesh::BulkData& stkMeshB void move_elements_from_proc_2_to_proc_0(stk::mesh::BulkData& bulkData); void test_that_empty_mesh_exists_on_proc_2(const stk::mesh::BulkData& stkMeshBulkData); -class EmptyMeshOnProc : public stk::unit_test_util::simple_fields::MeshFixture {}; +class EmptyMeshOnProc : public stk::unit_test_util::MeshFixture {}; TEST_F(EmptyMeshOnProc, testEmptyMeshOnProcNoAura) { diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceMultiPhysics.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceMultiPhysics.cpp index 1912e4d5a1eb..71d3380597bf 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceMultiPhysics.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceMultiPhysics.cpp @@ -10,7 +10,7 @@ namespace { -class TestBalanceBalanceMultiPhysics : public stk::unit_test_util::simple_fields::MeshFixture +class TestBalanceBalanceMultiPhysics : public stk::unit_test_util::MeshFixture { protected: TestBalanceBalanceMultiPhysics() diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceMultipleCriteria.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceMultipleCriteria.cpp index 1a5edda671ec..49e583c42798 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceMultipleCriteria.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceMultipleCriteria.cpp @@ -37,7 +37,7 @@ void write_mesh_and_results(stk::mesh::BulkData& bulkData, const std::string& fi broker.end_output_step(fh); } -class MultipleCriteria : public stk::unit_test_util::simple_fields::MeshFixture +class MultipleCriteria : public stk::unit_test_util::MeshFixture { public: diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceParentChild.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceParentChild.cpp index e2a2d22f8759..ba5d9ef54dec 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceParentChild.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceParentChild.cpp @@ -421,7 +421,6 @@ class RebalanceParentChildMesh : public ::testing::Test builder.set_aura_option(auraOption); m_bulkData = builder.create(); m_metaData = &(m_bulkData->mesh_meta_data()); - m_metaData->use_simple_fields(); } void create_coarse_mesh() diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceParticles.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceParticles.cpp index 28ce3a5d1b74..f679bb945cbb 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceParticles.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestLoadBalanceParticles.cpp @@ -31,7 +31,7 @@ class StkRebalance : public stk::balance::FieldVertexWeightSettings class StkParticleRebalance : public StkRebalance { public: - StkParticleRebalance(stk::unit_test_util::simple_fields::ParticleManager & particleManager, + StkParticleRebalance(stk::unit_test_util::ParticleManager & particleManager, stk::mesh::BulkData &stkMeshBulkData, const stk::balance::DoubleFieldType &weightField, const double defaultVertexWeight) @@ -57,7 +57,7 @@ class StkParticleRebalance : public StkRebalance void set_particle_destination_from_owning_element(stk::balance::DecompositionChangeList & decomp, stk::mesh::Entity owner_element, const int destination) const { - stk::unit_test_util::simple_fields::ParticleVector & vec = m_particleManager.get_particle_vector(owner_element); + stk::unit_test_util::ParticleVector & vec = m_particleManager.get_particle_vector(owner_element); for (auto && particlePtr : vec) { set_particle_destination(decomp, particlePtr->spherical_element(), destination); } @@ -94,10 +94,10 @@ class StkParticleRebalance : public StkRebalance StkParticleRebalance(const StkParticleRebalance&) = delete; StkParticleRebalance& operator=(const StkParticleRebalance&) = delete; - stk::unit_test_util::simple_fields::ParticleManager &m_particleManager; + stk::unit_test_util::ParticleManager &m_particleManager; }; -class RebalanceParticleMesh : public stk::unit_test_util::simple_fields::MeshFixture +class RebalanceParticleMesh : public stk::unit_test_util::MeshFixture { protected: RebalanceParticleMesh() @@ -294,7 +294,7 @@ class RebalanceParticleMesh : public stk::unit_test_util::simple_fields::MeshFix } protected: - stk::unit_test_util::simple_fields::ParticleManager m_particleManager; + stk::unit_test_util::ParticleManager m_particleManager; stk::balance::DoubleFieldType * m_particleCountField; stk::mesh::Part * m_particlePart; }; @@ -362,7 +362,7 @@ class RebalanceParticleMesh : public stk::unit_test_util::simple_fields::MeshFix int get_num_local_elements_from_cmdline() { - return stk::unit_test_util::simple_fields::get_command_line_option("-nLocal", 100); + return stk::unit_test_util::get_command_line_option("-nLocal", 100); } TEST_F(RebalanceParticleMesh, UnevenParticles2ProcWithAura) diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestSpiderElements.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestSpiderElements.cpp index 06b8857b1dce..a1b6bedf7133 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestSpiderElements.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestSpiderElements.cpp @@ -14,7 +14,7 @@ namespace { -class SpiderElementMesh : public stk::unit_test_util::simple_fields::MeshFixture +class SpiderElementMesh : public stk::unit_test_util::MeshFixture { protected: void setup_spider_mesh(const std::string & meshSpec, stk::mesh::BulkData::AutomaticAuraOption auraOption) @@ -98,7 +98,7 @@ TEST_F(SpiderElementMesh, move_spider_legs_to_volume_elem_proc) if (get_parallel_size() > 4) return; m_balanceSettings.setShouldFixSpiders(true); - std::string meshSpec = stk::unit_test_util::simple_fields::get_option("--mesh-spec", "generated:30x3x30"); + std::string meshSpec = stk::unit_test_util::get_option("--mesh-spec", "generated:30x3x30"); setup_spider_mesh(meshSpec, stk::mesh::BulkData::NO_AUTO_AURA); stk::balance::balanceStkMesh(m_balanceSettings, get_bulk()); diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestTransientFields.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestTransientFields.cpp index 7c06e0c365d5..f66bc8ac8b49 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestTransientFields.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestTransientFields.cpp @@ -82,12 +82,12 @@ class TransientWriter void write_static_mesh(const std::string& meshDesc) const { - stk::unit_test_util::simple_fields::generated_mesh_to_file_in_serial(meshDesc, m_fileBaseName); + stk::unit_test_util::generated_mesh_to_file_in_serial(meshDesc, m_fileBaseName); } void write_transient_mesh(const std::string& meshDesc) const { - stk::unit_test_util::simple_fields::generated_mesh_with_transient_data_to_file_in_serial( + stk::unit_test_util::generated_mesh_with_transient_data_to_file_in_serial( meshDesc, m_fileBaseName, m_fieldName, stk::topology::NODE_RANK, m_varName, m_timeSteps, m_fieldSetter); } @@ -97,7 +97,7 @@ class TransientWriter std::shared_ptr bulkPtr = build_mesh(3, MPI_COMM_SELF); stk::mesh::BulkData& bulk = *bulkPtr; - stk::unit_test_util::simple_fields::create_AB_mesh_with_sideset_and_field( + stk::unit_test_util::create_AB_mesh_with_sideset_and_field( bulk, stk::unit_test_util::LEFT, elemOrdering, "dummyField"); stk::io::write_mesh_with_fields(m_fileBaseName, bulk, 1, 1.0); } @@ -112,7 +112,7 @@ class TransientWriter stk::unit_test_util::IdAndTimeFieldValueSetter m_fieldSetter; }; -class TransientFieldBalance : public stk::unit_test_util::simple_fields::MeshFixture +class TransientFieldBalance : public stk::unit_test_util::MeshFixture { public: TransientFieldBalance() @@ -129,13 +129,13 @@ class TransientFieldBalance : public stk::unit_test_util::simple_fields::MeshFix balanceRunner.set_decomp_method("rcb"); } - stk::unit_test_util::simple_fields::MeshFromFile& get_initial_mesh() + stk::unit_test_util::MeshFromFile& get_initial_mesh() { if (m_initialMesh.is_empty()) read_initial_mesh(); return m_initialMesh; } - stk::unit_test_util::simple_fields::MeshFromFile& get_balanced_mesh() + stk::unit_test_util::MeshFromFile& get_balanced_mesh() { if (m_balancedMesh.is_empty()) read_balanced_mesh(); return m_balancedMesh; @@ -173,11 +173,11 @@ class TransientFieldBalance : public stk::unit_test_util::simple_fields::MeshFix const std::string fileBaseName; stk::integration_test_utils::StkBalanceRunner balanceRunner; TransientWriter writer; - const stk::unit_test_util::simple_fields::TransientVerifier verifier; + const stk::unit_test_util::TransientVerifier verifier; private: - stk::unit_test_util::simple_fields::MeshFromFile m_initialMesh; - stk::unit_test_util::simple_fields::MeshFromFile m_balancedMesh; + stk::unit_test_util::MeshFromFile m_initialMesh; + stk::unit_test_util::MeshFromFile m_balancedMesh; }; TEST_F(TransientFieldBalance, verifyStaticDataTransfer) @@ -189,13 +189,13 @@ TEST_F(TransientFieldBalance, verifyStaticDataTransfer) writer.write_static_mesh("1x4x4"); - stk::unit_test_util::simple_fields::MeshFromFile& initialMesh = get_initial_mesh(); + stk::unit_test_util::MeshFromFile& initialMesh = get_initial_mesh(); verifier.verify_time_steps(initialMesh, {}); verifier.verify_num_transient_fields(initialMesh, 0u); balanceRunner.run_end_to_end(); - stk::unit_test_util::simple_fields::MeshFromFile& balancedMesh = get_balanced_mesh(); + stk::unit_test_util::MeshFromFile& balancedMesh = get_balanced_mesh(); verifier.verify_time_steps(balancedMesh, {}); verifier.verify_num_transient_fields(balancedMesh, 0u); @@ -212,13 +212,13 @@ TEST_F(TransientFieldBalance, verifyNumberOfSteps) writer.set_time_steps(timeSteps); writer.write_transient_mesh("1x1x20"); - stk::unit_test_util::simple_fields::MeshFromFile& initialMesh = get_initial_mesh(); + stk::unit_test_util::MeshFromFile& initialMesh = get_initial_mesh(); verifier.verify_time_steps(initialMesh, timeSteps); verifier.verify_num_transient_fields(initialMesh, 2u); balanceRunner.run_end_to_end(); - stk::unit_test_util::simple_fields::MeshFromFile& balancedMesh = get_balanced_mesh(); + stk::unit_test_util::MeshFromFile& balancedMesh = get_balanced_mesh(); verifier.verify_time_steps(balancedMesh, timeSteps); cleanup_files(); @@ -235,13 +235,13 @@ TEST_F(TransientFieldBalance, verifyGlobalVariable) writer.set_global_variable_name(globalVariableName); writer.write_transient_mesh("1x1x20"); - stk::unit_test_util::simple_fields::MeshFromFile& initialMesh = get_initial_mesh(); + stk::unit_test_util::MeshFromFile& initialMesh = get_initial_mesh(); verifier.verify_time_steps(initialMesh, timeSteps); verifier.verify_num_transient_fields(initialMesh, 2u); balanceRunner.run_end_to_end(); - stk::unit_test_util::simple_fields::MeshFromFile& balancedMesh = get_balanced_mesh(); + stk::unit_test_util::MeshFromFile& balancedMesh = get_balanced_mesh(); verifier.verify_global_variables_at_each_time_step(balancedMesh, globalVariableName, timeSteps); cleanup_files(); @@ -258,14 +258,14 @@ TEST_F(TransientFieldBalance, verifyTransientDataTransferOnFourProcessors) writer.set_field_name(fieldName); writer.write_transient_mesh("1x4x4"); - stk::unit_test_util::simple_fields::MeshFromFile& initialMesh = get_initial_mesh(); + stk::unit_test_util::MeshFromFile& initialMesh = get_initial_mesh(); verifier.verify_time_steps(initialMesh, timeSteps); verifier.verify_num_transient_fields(initialMesh, 2u); verifier.verify_transient_field_names(initialMesh, fieldName); balanceRunner.run_end_to_end(); - stk::unit_test_util::simple_fields::MeshFromFile& balancedMesh = get_balanced_mesh(); + stk::unit_test_util::MeshFromFile& balancedMesh = get_balanced_mesh(); verifier.verify_time_steps(balancedMesh, timeSteps); verifier.verify_num_transient_fields(balancedMesh, 2u); verifier.verify_transient_field_names(balancedMesh, fieldName); @@ -292,14 +292,14 @@ TEST_F(TransientFieldBalance, verifyTransientDataTransferWithSidesets) writer.write_two_element_mesh_with_sideset(stk::unit_test_util::INCREASING); - stk::unit_test_util::simple_fields::MeshFromFile& initialMesh = get_initial_mesh(); + stk::unit_test_util::MeshFromFile& initialMesh = get_initial_mesh(); verifier.verify_sideset_orientation(initialMesh, initialSidesetProc, expectedId, expectedOrdinal); verifier.verify_decomp(initialMesh, expectedInitialDecomp); balanceRunner.set_decomp_method("rib"); balanceRunner.run_end_to_end(); - stk::unit_test_util::simple_fields::MeshFromFile& balancedMesh = get_balanced_mesh(); + stk::unit_test_util::MeshFromFile& balancedMesh = get_balanced_mesh(); verifier.verify_sideset_orientation(balancedMesh, balancedSidesetProc, expectedId, expectedOrdinal); verifier.verify_decomp(balancedMesh, expectedBalancedDecomp); @@ -321,13 +321,13 @@ TEST_F(TransientFieldBalance, verifyTransientDataTransferWithSidesetsOnMovedElem writer.write_two_element_mesh_with_sideset(stk::unit_test_util::DECREASING); - stk::unit_test_util::simple_fields::MeshFromFile& initialMesh = get_initial_mesh(); + stk::unit_test_util::MeshFromFile& initialMesh = get_initial_mesh(); verifier.verify_sideset_orientation(initialMesh, initialSidesetProc, expectedId, expectedOrdinal); verifier.verify_decomp(initialMesh, expectedInitialDecomp); balanceRunner.run_end_to_end(); - stk::unit_test_util::simple_fields::MeshFromFile& balancedMesh = get_balanced_mesh(); + stk::unit_test_util::MeshFromFile& balancedMesh = get_balanced_mesh(); verifier.verify_sideset_orientation(balancedMesh, balancedSidesetProc, expectedId, expectedOrdinal); verifier.verify_decomp(balancedMesh, expectedBalancedDecomp); diff --git a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestUserSupport.cpp b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestUserSupport.cpp index 74a55994f803..3368819884f9 100644 --- a/packages/stk/stk_integration_tests/stk_balance/IntegrationTestUserSupport.cpp +++ b/packages/stk/stk_integration_tests/stk_balance/IntegrationTestUserSupport.cpp @@ -32,7 +32,7 @@ using stk::unit_test_util::build_mesh; TEST(Stkbalance, DISABLED_Ticket15830) { - std::string filename = stk::unit_test_util::simple_fields::get_option("-i", "rs1.rsout"); + std::string filename = stk::unit_test_util::get_option("-i", "rs1.rsout"); std::shared_ptr bulkPtr = build_mesh(MPI_COMM_WORLD); stk::mesh::BulkData& bulk = *bulkPtr; @@ -132,7 +132,7 @@ TEST(Stkbalance, NumOverlappingBB) if (stk::parallel_machine_size(MPI_COMM_WORLD) > 3) return; const std::string dummyFileName("ARefLA.e"); - std::string filename = stk::unit_test_util::simple_fields::get_option("-i", dummyFileName); + std::string filename = stk::unit_test_util::get_option("-i", dummyFileName); std::vector coordMinOnProc(3, std::numeric_limits::max()); std::vector coordMaxOnProc(3, std::numeric_limits::lowest()); @@ -190,7 +190,7 @@ TEST(Stkbalance, modifyMeshIfNeeded) { if (stk::parallel_machine_size(MPI_COMM_WORLD) > 3) return; - std::string filename = stk::unit_test_util::simple_fields::get_option("-i", "ARefLA.e"); + std::string filename = stk::unit_test_util::get_option("-i", "ARefLA.e"); std::shared_ptr bulkPtr = build_mesh(MPI_COMM_WORLD); stk::mesh::BulkData& bulk = *bulkPtr; @@ -283,7 +283,7 @@ TEST(Stkbalance, modifyMeshIfNeeded) // actually check anything. TEST(Stkbalance, checkForDegenerateElements) { - std::string filename = stk::unit_test_util::simple_fields::get_option("-i", "ZDZ.e"); + std::string filename = stk::unit_test_util::get_option("-i", "ZDZ.e"); std::shared_ptr bulkPtr = build_mesh(MPI_COMM_WORLD); stk::mesh::BulkData& bulk = *bulkPtr; diff --git a/packages/stk/stk_integration_tests/stk_io/CheckSupportedInternalSidesetCases.cpp b/packages/stk/stk_integration_tests/stk_io/CheckSupportedInternalSidesetCases.cpp index 139a9f9489e4..00e7d9a0630b 100644 --- a/packages/stk/stk_integration_tests/stk_io/CheckSupportedInternalSidesetCases.cpp +++ b/packages/stk/stk_integration_tests/stk_io/CheckSupportedInternalSidesetCases.cpp @@ -59,7 +59,7 @@ bool is_sideset_case_supported(const std::string& input_file_name, stk::mesh::Bu void test_supported_sideset_cases_with_aura_option(stk::mesh::BulkData::AutomaticAuraOption auraOption) { - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-i", "none"); + std::string exodusFileName = stk::unit_test_util::get_option("-i", "none"); if(exodusFileName=="none") { diff --git a/packages/stk/stk_integration_tests/stk_io/IntegrationTestStkIo.cpp b/packages/stk/stk_integration_tests/stk_io/IntegrationTestStkIo.cpp index 30ddc4305bf9..2323f6dc97ef 100644 --- a/packages/stk/stk_integration_tests/stk_io/IntegrationTestStkIo.cpp +++ b/packages/stk/stk_integration_tests/stk_io/IntegrationTestStkIo.cpp @@ -51,8 +51,8 @@ TEST(StkIo, checkCanonicalNameFromFile) stk::io::StkMeshIoBroker stkIo; std::shared_ptr bulk = stk::mesh::MeshBuilder(communicator).create(); - std::string meshSpec = stk::unit_test_util::simple_fields::get_option("-mesh", ""); - std::string partName = stk::unit_test_util::simple_fields::get_option("-part", "UNKNOWN"); + std::string meshSpec = stk::unit_test_util::get_option("-mesh", ""); + std::string partName = stk::unit_test_util::get_option("-part", "UNKNOWN"); if(file_exists(meshSpec)) { stk::io::fill_mesh(meshSpec, *bulk, stkIo); diff --git a/packages/stk/stk_integration_tests/stk_io/WriteSidesetsUsingMetaData.cpp b/packages/stk/stk_integration_tests/stk_io/WriteSidesetsUsingMetaData.cpp index ac23a95bb681..a0519c53b8fa 100644 --- a/packages/stk/stk_integration_tests/stk_io/WriteSidesetsUsingMetaData.cpp +++ b/packages/stk/stk_integration_tests/stk_io/WriteSidesetsUsingMetaData.cpp @@ -105,7 +105,7 @@ void verify_element_side_pairs(stk::mesh::BulkData& bulkData, const ExodusSideSe for(;iter!=goldSideset.end();++iter) { int id = iter->first; - stk::mesh::Part *part = stk::unit_test_util::simple_fields::get_surface_part_with_id(bulkData.mesh_meta_data(), id); + stk::mesh::Part *part = stk::unit_test_util::get_surface_part_with_id(bulkData.mesh_meta_data(), id); stk::mesh::SideSet &sset = bulkData.get_sideset(*part); ElementSidePairs goldSet = iter->second; ASSERT_EQ(goldSet.size(), sset.size()); diff --git a/packages/stk/stk_integration_tests/stk_mesh/IntegrationTestDetectOrphanNodes.cpp b/packages/stk/stk_integration_tests/stk_mesh/IntegrationTestDetectOrphanNodes.cpp index c3a47090edda..1dfef0e387f7 100644 --- a/packages/stk/stk_integration_tests/stk_mesh/IntegrationTestDetectOrphanNodes.cpp +++ b/packages/stk/stk_integration_tests/stk_mesh/IntegrationTestDetectOrphanNodes.cpp @@ -6,7 +6,7 @@ namespace { -class OrphanedNodeMesh : public stk::unit_test_util::simple_fields::MeshFixture {}; +class OrphanedNodeMesh : public stk::unit_test_util::MeshFixture {}; bool doOrphanededNodesExist(const stk::mesh::BulkData& bulk) { @@ -32,7 +32,7 @@ bool doOrphanededNodesExist(const stk::mesh::BulkData& bulk) TEST_F(OrphanedNodeMesh, detectOrphanedNodes) { - std::string filename = stk::unit_test_util::simple_fields::get_option("-i", "generated:1x1x100"); + std::string filename = stk::unit_test_util::get_option("-i", "generated:1x1x100"); setup_mesh(filename, stk::mesh::BulkData::NO_AUTO_AURA); EXPECT_TRUE(!doOrphanededNodesExist(get_bulk())); diff --git a/packages/stk/stk_integration_tests/stk_mesh/IntegrationTestElementBlockMembership.cpp b/packages/stk/stk_integration_tests/stk_mesh/IntegrationTestElementBlockMembership.cpp index fad82aff880b..1f6dff4d9565 100644 --- a/packages/stk/stk_integration_tests/stk_mesh/IntegrationTestElementBlockMembership.cpp +++ b/packages/stk/stk_integration_tests/stk_mesh/IntegrationTestElementBlockMembership.cpp @@ -29,7 +29,7 @@ namespace typedef std::map TestCaseData; typedef TestCaseData::value_type TestCaseDatum; -class LoadMesh: public stk::unit_test_util::simple_fields::MeshTestFixture +class LoadMesh: public stk::unit_test_util::MeshTestFixture { public: virtual ~LoadMesh() {} @@ -44,7 +44,7 @@ class LoadMesh: public stk::unit_test_util::simple_fields::MeshTestFixture if(get_bulk().parallel_rank() == 0) std::cout << "Reading " << meshSpec << std::endl; #endif - stk::unit_test_util::simple_fields::read_from_serial_file_and_decompose(meshSpec, get_bulk(), "cyclic"); + stk::unit_test_util::read_from_serial_file_and_decompose(meshSpec, get_bulk(), "cyclic"); } stk::mesh::EntityVector get_all_elements() diff --git a/packages/stk/stk_integration_tests/stk_mesh/IntegrationTestTopology.cpp b/packages/stk/stk_integration_tests/stk_mesh/IntegrationTestTopology.cpp index d2e75c60907d..b9f6d95a56f7 100644 --- a/packages/stk/stk_integration_tests/stk_mesh/IntegrationTestTopology.cpp +++ b/packages/stk/stk_integration_tests/stk_mesh/IntegrationTestTopology.cpp @@ -44,7 +44,7 @@ using stk::unit_test_util::build_mesh; -class StkTopologyTest : public stk::unit_test_util::simple_fields::MeshFixture +class StkTopologyTest : public stk::unit_test_util::MeshFixture { public: StkTopologyTest() @@ -55,7 +55,7 @@ class StkTopologyTest : public stk::unit_test_util::simple_fields::MeshFixture void init_mesh_with_wedge12_element(stk::mesh::BulkData& bulk) { std::string meshDesc = "0,1,WEDGE_12,1,2,3,4,5,6,7,8,9,10,11,12"; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, meshDesc); + stk::unit_test_util::setup_text_mesh(bulk, meshDesc); stk::mesh::create_all_sides(bulk, bulk.mesh_meta_data().universal_part(), {}, true); } diff --git a/packages/stk/stk_integration_tests/stk_mesh/face_creation/skin_mesh/IntegrationTestCheckExposedBoundary.cpp b/packages/stk/stk_integration_tests/stk_mesh/face_creation/skin_mesh/IntegrationTestCheckExposedBoundary.cpp index c8570de9b845..c611a842691b 100644 --- a/packages/stk/stk_integration_tests/stk_mesh/face_creation/skin_mesh/IntegrationTestCheckExposedBoundary.cpp +++ b/packages/stk/stk_integration_tests/stk_mesh/face_creation/skin_mesh/IntegrationTestCheckExposedBoundary.cpp @@ -24,13 +24,13 @@ namespace { -class SkinnedMeshWithModifiedSkinPart : public stk::unit_test_util::simple_fields::MeshTestFixture +class SkinnedMeshWithModifiedSkinPart : public stk::unit_test_util::MeshTestFixture { protected: virtual void run_test(stk::mesh::BulkData::AutomaticAuraOption auraOption) { setup_empty_mesh(auraOption); - stk::unit_test_util::simple_fields::read_from_serial_file_and_decompose("ARA.e", get_bulk(), "cyclic"); + stk::unit_test_util::read_from_serial_file_and_decompose("ARA.e", get_bulk(), "cyclic"); stk::mesh::Part& skinnedPart = SideTestUtil::run_skin_mesh(get_bulk(), get_things_to_skin(get_bulk())); run_modification(skinnedPart); diff --git a/packages/stk/stk_integration_tests/stk_mesh/face_creation/skin_mesh/IntegrationTestSkinAllBoundaries.cpp b/packages/stk/stk_integration_tests/stk_mesh/face_creation/skin_mesh/IntegrationTestSkinAllBoundaries.cpp index 48169fa84fec..b2cfd8e3d329 100644 --- a/packages/stk/stk_integration_tests/stk_mesh/face_creation/skin_mesh/IntegrationTestSkinAllBoundaries.cpp +++ b/packages/stk/stk_integration_tests/stk_mesh/face_creation/skin_mesh/IntegrationTestSkinAllBoundaries.cpp @@ -8,7 +8,7 @@ /*--------------------------------------------------------------------*/ #include // for AssertHelper, EXPECT_EQ, etc -#include // for Initializer +#include // for Initializer #include // for size_t, nullptr #include // for StkMeshIoBroker #include // for string diff --git a/packages/stk/stk_integration_tests/stk_mesh/face_creation/skin_mesh/IntegrationTestSkinWithModifications.cpp b/packages/stk/stk_integration_tests/stk_mesh/face_creation/skin_mesh/IntegrationTestSkinWithModifications.cpp index 620792d38c17..11a63b9750de 100644 --- a/packages/stk/stk_integration_tests/stk_mesh/face_creation/skin_mesh/IntegrationTestSkinWithModifications.cpp +++ b/packages/stk/stk_integration_tests/stk_mesh/face_creation/skin_mesh/IntegrationTestSkinWithModifications.cpp @@ -31,7 +31,7 @@ namespace { -class SkinWithModification : public stk::unit_test_util::simple_fields::MeshFixture +class SkinWithModification : public stk::unit_test_util::MeshFixture { protected: SkinWithModification() : boundaryPart(nullptr) @@ -289,7 +289,7 @@ class SkinFileWithModification : public SkinWithModification "1,2,QUAD_4_2D,4,3,5,6\n" "0,3,QUAD_4_2D,21,22,3,4"; } - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); get_bulk().modification_begin(); put_entity_into_part(get_bulk(), 1, block1); diff --git a/packages/stk/stk_integration_tests/stk_mesh/face_creation/user_support/IntegrationTestTicket13009.cpp b/packages/stk/stk_integration_tests/stk_mesh/face_creation/user_support/IntegrationTestTicket13009.cpp index 96276a792f2f..a3032e73664b 100644 --- a/packages/stk/stk_integration_tests/stk_mesh/face_creation/user_support/IntegrationTestTicket13009.cpp +++ b/packages/stk/stk_integration_tests/stk_mesh/face_creation/user_support/IntegrationTestTicket13009.cpp @@ -4,7 +4,7 @@ #include #include -class Ticket13009 : public stk::unit_test_util::simple_fields::MeshFixture +class Ticket13009 : public stk::unit_test_util::MeshFixture { protected: Ticket13009() : MeshFixture(2) {}; diff --git a/packages/stk/stk_integration_tests/stk_mesh/face_creation/user_support/IntegrationTestTicket13227.cpp b/packages/stk/stk_integration_tests/stk_mesh/face_creation/user_support/IntegrationTestTicket13227.cpp index 09bbd92276ff..c411cbd9c294 100644 --- a/packages/stk/stk_integration_tests/stk_mesh/face_creation/user_support/IntegrationTestTicket13227.cpp +++ b/packages/stk/stk_integration_tests/stk_mesh/face_creation/user_support/IntegrationTestTicket13227.cpp @@ -10,7 +10,7 @@ */ -class Ticket13227 : public stk::unit_test_util::simple_fields::MeshFixture +class Ticket13227 : public stk::unit_test_util::MeshFixture { }; diff --git a/packages/stk/stk_integration_tests/stk_mesh_doc/IntegrationTestBulkData.cpp b/packages/stk/stk_integration_tests/stk_mesh_doc/IntegrationTestBulkData.cpp index 74d48457d4c9..2351f2fa6764 100644 --- a/packages/stk/stk_integration_tests/stk_mesh_doc/IntegrationTestBulkData.cpp +++ b/packages/stk/stk_integration_tests/stk_mesh_doc/IntegrationTestBulkData.cpp @@ -89,7 +89,7 @@ TEST(BulkData_test, use_entity_ids_for_resolving_sharing) if(stkMeshBulkData.parallel_size() == 2) { - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-i", "mesh.exo"); + std::string exodusFileName = stk::unit_test_util::get_option("-i", "mesh.exo"); stk::io::fill_mesh(exodusFileName, stkMeshBulkData); } @@ -110,7 +110,7 @@ TEST(BulkData_test, testTwoDimProblemForSharingOfDifferentEdgesWithSameNodesFour if ( stkMeshBulkData.parallel_size() == 4 ) { - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-i", "mesh.exo"); + std::string exodusFileName = stk::unit_test_util::get_option("-i", "mesh.exo"); stk::io::fill_mesh(exodusFileName, stkMeshBulkData); @@ -131,7 +131,7 @@ TEST(BulkData_test, test3DProblemSharingOfDifferentFacesWithSameNodesTwoProc) if ( stkMeshBulkData.parallel_size() == 2 ) { - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-i", "mesh.exo"); + std::string exodusFileName = stk::unit_test_util::get_option("-i", "mesh.exo"); { stk::io::StkMeshIoBroker exodusFileReader(communicator); @@ -154,7 +154,7 @@ TEST(BulkData_test, test3DProblemSharingOfDifferentFacesWithSameNodesOneProc) stk::unit_test_util::BulkDataTester stkMeshBulkData(stkMeshMetaData, communicator); if ( stkMeshBulkData.parallel_size() == 1 ) { - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-i", "mesh.exo"); + std::string exodusFileName = stk::unit_test_util::get_option("-i", "mesh.exo"); { stk::io::StkMeshIoBroker exodusFileReader(communicator); diff --git a/packages/stk/stk_integration_tests/stk_middle_mesh/MeshQualityImprover.cpp b/packages/stk/stk_integration_tests/stk_middle_mesh/MeshQualityImprover.cpp index 2306383e9481..b3cfcc929b7c 100644 --- a/packages/stk/stk_integration_tests/stk_middle_mesh/MeshQualityImprover.cpp +++ b/packages/stk/stk_integration_tests/stk_middle_mesh/MeshQualityImprover.cpp @@ -11,7 +11,7 @@ #include "stk_middle_mesh/regularized_distortion_metric.hpp" #include "util/meshes.hpp" -#ifdef STK_BUILT_IN_SIERRA +#ifdef STK_BUILT_FOR_SIERRA #include "stk_middle_mesh_util/create_stk_mesh.hpp" #endif @@ -154,7 +154,7 @@ TEST(MeshQualityImprover, Ellipsoid) } } -#ifdef STK_BUILT_IN_SIERRA +#ifdef STK_BUILT_FOR_SIERRA TEST(MeshQualityImprover, EllipsoidFromCAD) { diff --git a/packages/stk/stk_integration_tests/stk_middle_mesh/NonConformalInterface.cpp b/packages/stk/stk_integration_tests/stk_middle_mesh/NonConformalInterface.cpp index a37ea4d9fa11..906e0a558016 100644 --- a/packages/stk/stk_integration_tests/stk_middle_mesh/NonConformalInterface.cpp +++ b/packages/stk/stk_integration_tests/stk_middle_mesh/NonConformalInterface.cpp @@ -10,7 +10,7 @@ #include -#ifdef STK_BUILT_IN_SIERRA +#ifdef STK_BUILT_FOR_SIERRA #include "stk_middle_mesh_util/create_stk_mesh.hpp" #include "stk_middle_mesh_util/exodus_writer.hpp" @@ -681,7 +681,7 @@ TEST(Interface, EllipsoidNewTri) } } -#ifdef STK_BUILT_IN_SIERRA +#ifdef STK_BUILT_FOR_SIERRA TEST(Interface, EllipsoidFromCADNewQuad) { diff --git a/packages/stk/stk_integration_tests/stk_search/AperiCMC_NeighborSearchTest.cpp b/packages/stk/stk_integration_tests/stk_search/AperiCMC_NeighborSearchTest.cpp new file mode 100644 index 000000000000..f7f942003a64 --- /dev/null +++ b/packages/stk/stk_integration_tests/stk_search/AperiCMC_NeighborSearchTest.cpp @@ -0,0 +1,476 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using DoubleField = stk::mesh::Field; +using NgpDoubleField = stk::mesh::NgpField; +static constexpr size_t MAX_NEIGHBORS = 40; + +class NeighborSearch { + using ExecSpace = stk::ngp::ExecSpace; + using NodeIdentProc = stk::search::IdentProc; + using SphereIdentProc = stk::search::BoxIdentProc, NodeIdentProc>; + using PointIdentProc = stk::search::BoxIdentProc, NodeIdentProc>; + using Intersection = stk::search::IdentProcIntersection; + + using RangeViewType = Kokkos::View; + using DomainViewType = Kokkos::View; + using ResultViewType = Kokkos::View; + + using FastMeshIndicesViewType = Kokkos::View; + + public: + NeighborSearch(stk::mesh::BulkData *bulk_data, const std::vector &sets = {}) : m_bulk_data(bulk_data), m_sets(sets) { + m_ngp_mesh = stk::mesh::get_updated_ngp_mesh(*m_bulk_data); + stk::mesh::MetaData *meta_data = &m_bulk_data->mesh_meta_data(); + + if (sets.size() == 0) { + m_selector = stk::mesh::Selector(meta_data->universal_part()); + } else { + stk::mesh::PartVector parts; + for (const auto &set : sets) { + stk::mesh::Part *part = meta_data->get_part(set); + if (part == nullptr) { + throw std::runtime_error("Set " + set + " not found."); + } + parts.push_back(part); + } + m_selector = stk::mesh::selectUnion(parts); + } + // Warn if the selector is empty. + if (m_selector.is_empty(stk::topology::ELEMENT_RANK)) { + std::cout << "Warning: NeighborSearch selector is empty." << std::endl; + } + + stk::mesh::Selector full_owned_selector = m_bulk_data->mesh_meta_data().locally_owned_part(); + m_owned_selector = m_selector & full_owned_selector; + + // Get the node number of neighbors field + m_node_num_neighbors_field = &meta_data->get_field(stk::topology::NODE_RANK, "num_neighbors")->field_of_state(stk::mesh::StateNone); + m_ngp_node_num_neighbors_field = &stk::mesh::get_updated_ngp_field(*m_node_num_neighbors_field); + + // Get the node neighbors field + m_node_neighbors_field = &meta_data->get_field(stk::topology::NODE_RANK, "neighbors")->field_of_state(stk::mesh::StateNone); + m_ngp_node_neighbors_field = &stk::mesh::get_updated_ngp_field(*m_node_neighbors_field); + + // Get the coordinates field + m_coordinates_field = &meta_data->get_field(stk::topology::NODE_RANK, m_bulk_data->mesh_meta_data().coordinate_field_name())->field_of_state(stk::mesh::StateNone); + m_ngp_coordinates_field = &stk::mesh::get_updated_ngp_field(*m_coordinates_field); + + // Get the kernel radius field + m_kernel_radius_field = &meta_data->get_field(stk::topology::NODE_RANK, "kernel_radius")->field_of_state(stk::mesh::StateNone); + m_ngp_kernel_radius_field = &stk::mesh::get_updated_ngp_field(*m_kernel_radius_field); + + // Get the function values field + m_function_values_field = &meta_data->get_field(stk::topology::NODE_RANK, "function_values")->field_of_state(stk::mesh::StateNone); + } + + void ComputeKernelRadius(double scale_factor) { + auto ngp_mesh = m_ngp_mesh; + // Get the ngp fields + auto ngp_coordinates_field = *m_ngp_coordinates_field; + auto ngp_kernel_radius_field = *m_ngp_kernel_radius_field; + const double tolerance = std::numeric_limits::epsilon(); + + stk::mesh::for_each_entity_run( + ngp_mesh, stk::topology::NODE_RANK, m_selector, + KOKKOS_LAMBDA(const stk::mesh::FastMeshIndex &node_index) { + // Get the node's coordinates + stk::mesh::EntityFieldData coordinates = ngp_coordinates_field(node_index); + + // Get the kernel radius + double kernel_radius_squared = 0.0; + stk::mesh::NgpMesh::ConnectedEntities connected_entities = ngp_mesh.get_connected_entities(stk::topology::NODE_RANK, node_index, stk::topology::ELEMENT_RANK); + for (size_t i = 0; i < connected_entities.size(); ++i) { + stk::mesh::FastMeshIndex elem_index = ngp_mesh.fast_mesh_index(connected_entities[i]); + stk::mesh::NgpMesh::ConnectedNodes connected_nodes = ngp_mesh.get_nodes(stk::topology::ELEM_RANK, elem_index); + for (size_t j = 0; j < connected_nodes.size(); ++j) { + stk::mesh::FastMeshIndex neighbor_index = ngp_mesh.fast_mesh_index(connected_nodes[j]); + stk::mesh::EntityFieldData neighbor_coordinates = ngp_coordinates_field(neighbor_index); + double length_squared = 0; + for (size_t k = 0; k < 3; ++k) { + const double value = coordinates[k] - neighbor_coordinates[k]; + length_squared += value * value; + } + kernel_radius_squared = Kokkos::max(kernel_radius_squared, length_squared); + } + } + const double kernel_radius = Kokkos::sqrt(kernel_radius_squared); + ngp_kernel_radius_field(node_index, 0) = kernel_radius * scale_factor + tolerance; + }); + ngp_kernel_radius_field.clear_sync_state(); + ngp_kernel_radius_field.modify_on_device(); + ngp_kernel_radius_field.sync_to_host(); + } + + // Create local entities on host and copy to device + FastMeshIndicesViewType GetLocalEntityIndices(stk::mesh::EntityRank rank, stk::mesh::Selector selector) { + std::vector local_entities; + stk::mesh::get_entities(*m_bulk_data, rank, selector, local_entities); + + FastMeshIndicesViewType mesh_indices("mesh_indices", local_entities.size()); + FastMeshIndicesViewType::HostMirror host_mesh_indices = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, mesh_indices); + + for (size_t i = 0; i < local_entities.size(); ++i) { + const stk::mesh::MeshIndex &mesh_index = m_bulk_data->mesh_index(local_entities[i]); + host_mesh_indices(i) = stk::mesh::FastMeshIndex{mesh_index.bucket->bucket_id(), mesh_index.bucket_ordinal}; + } + + Kokkos::deep_copy(mesh_indices, host_mesh_indices); + return mesh_indices; + } + + // The domain will be the nodes. Search will find the nodes within the spheres from above. + // The identifiers will be the global node ids. + DomainViewType CreateNodePoints() { + const stk::mesh::MetaData &meta = m_bulk_data->mesh_meta_data(); + const unsigned num_local_nodes = stk::mesh::count_entities(*m_bulk_data, stk::topology::NODE_RANK, m_owned_selector | meta.globally_shared_part()); + DomainViewType node_points("node_points", num_local_nodes); + + auto ngp_coordinates_field = *m_ngp_coordinates_field; + const stk::mesh::NgpMesh &ngp_mesh = m_ngp_mesh; + + // Slow host operation that is needed to get an index. There is plans to add this to the stk::mesh::NgpMesh. + FastMeshIndicesViewType node_indices = GetLocalEntityIndices(stk::topology::NODE_RANK, m_owned_selector | meta.globally_shared_part()); + const int my_rank = m_bulk_data->parallel_rank(); + + Kokkos::parallel_for( + stk::ngp::DeviceRangePolicy(0, num_local_nodes), KOKKOS_LAMBDA(const unsigned &i) { + stk::mesh::EntityFieldData coords = ngp_coordinates_field(node_indices(i)); + stk::mesh::Entity node = ngp_mesh.get_entity(stk::topology::NODE_RANK, node_indices(i)); + node_points(i) = PointIdentProc{stk::search::Point(coords[0], coords[1], coords[2]), NodeIdentProc(ngp_mesh.identifier(node), my_rank)}; + }); + + return node_points; + } + + // Sphere range. Will be used to find the nodes within a ball defined by the sphere. + // The identifiers will be the global node ids. + RangeViewType CreateNodeSpheres() { + const unsigned num_local_nodes = stk::mesh::count_entities(*m_bulk_data, stk::topology::NODE_RANK, m_owned_selector); + RangeViewType node_spheres("node_spheres", num_local_nodes); + + auto ngp_coordinates_field = *m_ngp_coordinates_field; + auto ngp_kernel_radius_field = *m_ngp_kernel_radius_field; + const stk::mesh::NgpMesh &ngp_mesh = m_ngp_mesh; + + // Slow host operation that is needed to get an index. There is plans to add this to the stk::mesh::NgpMesh. + FastMeshIndicesViewType node_indices = GetLocalEntityIndices(stk::topology::NODE_RANK, m_owned_selector); + const int my_rank = m_bulk_data->parallel_rank(); + + Kokkos::parallel_for( + stk::ngp::DeviceRangePolicy(0, num_local_nodes), KOKKOS_LAMBDA(const unsigned &i) { + stk::mesh::EntityFieldData coords = ngp_coordinates_field(node_indices(i)); + stk::search::Point center(coords[0], coords[1], coords[2]); + stk::mesh::Entity node = ngp_mesh.get_entity(stk::topology::NODE_RANK, node_indices(i)); + double radius = ngp_kernel_radius_field(node_indices(i), 0); + node_spheres(i) = SphereIdentProc{stk::search::Sphere(center, radius), NodeIdentProc(ngp_mesh.identifier(node), my_rank)}; + }); + + return node_spheres; + } + + // Ghost the neighbors to the nodes processor + void GhostNodeNeighbors(const ResultViewType::HostMirror &host_search_results) { + m_bulk_data->modification_begin(); + stk::mesh::Ghosting &neighbor_ghosting = m_bulk_data->create_ghosting("neighbors"); + std::vector nodes_to_ghost; + + const int my_rank = m_bulk_data->parallel_rank(); + + for (size_t i = 0; i < host_search_results.size(); ++i) { + auto result = host_search_results(i); + if (result.domainIdentProc.proc() != my_rank && result.rangeIdentProc.proc() == my_rank) { + stk::mesh::Entity node = m_bulk_data->get_entity(stk::topology::NODE_RANK, result.rangeIdentProc.id()); + nodes_to_ghost.emplace_back(node, result.domainIdentProc.proc()); + } + } + + m_bulk_data->change_ghosting(neighbor_ghosting, nodes_to_ghost); + m_bulk_data->modification_end(); + } + + // Put the search results into the neighbors field. The neighbors field is a field of global node ids. The neighbors are sorted by distance. Near to far. + void UnpackSearchResultsIntoField(const ResultViewType::HostMirror &host_search_results) { + const int my_rank = m_bulk_data->parallel_rank(); + + for (size_t i = 0; i < host_search_results.size(); ++i) { + auto result = host_search_results(i); + if (result.domainIdentProc.proc() == my_rank) { + stk::mesh::Entity node = m_bulk_data->get_entity(stk::topology::NODE_RANK, result.domainIdentProc.id()); + stk::mesh::Entity neighbor = m_bulk_data->get_entity(stk::topology::NODE_RANK, result.rangeIdentProc.id()); + const double *p_neighbor_coordinates = stk::mesh::field_data(*m_coordinates_field, neighbor); + const double *p_node_coordinates = stk::mesh::field_data(*m_coordinates_field, node); + double *p_neighbor_data = stk::mesh::field_data(*m_node_neighbors_field, node); + double &num_neighbors = *stk::mesh::field_data(*m_node_num_neighbors_field, node); + double *p_function_values = stk::mesh::field_data(*m_function_values_field, node); // Using the function values field as a temporary storage for the squared distances + + // Calculate the squared distance between the node and the neighbor + double distance_squared = 0.0; + for (size_t j = 0; j < 3; ++j) { + const double value = p_neighbor_coordinates[j] - p_node_coordinates[j]; + distance_squared += value * value; + } + + // Find where to insert the neighbor, based on the distance + size_t insert_index = (size_t)num_neighbors; // Default to the end of the list + for (size_t j = 0; j < insert_index; ++j) { + if (distance_squared < p_function_values[j]) { + insert_index = j; + break; + } + } + + // Shift the function values and neighbors to make room for the new neighbor + size_t reverse_start_index = (size_t)num_neighbors; + if (reverse_start_index == MAX_NEIGHBORS) { + Kokkos::printf("Node %ld has too many neighbors. The furthest neighbor will be removed.\n", m_bulk_data->identifier(node)); + --reverse_start_index; + } else { + num_neighbors += 1; + } + for (size_t j = reverse_start_index; j > insert_index; --j) { + p_function_values[j] = p_function_values[j - 1]; + p_neighbor_data[j] = p_neighbor_data[j - 1]; + } + + // Insert the new neighbor + p_function_values[insert_index] = distance_squared; + p_neighbor_data[insert_index] = (double)neighbor.local_offset(); + } + } + // Never communicate the neighbors field. The shared nodes need to have a processor local value and not the value of the owning processor. + m_node_neighbors_field->modify_on_host(); + m_node_num_neighbors_field->modify_on_host(); + m_node_neighbors_field->sync_to_device(); + m_node_num_neighbors_field->sync_to_device(); + } + + void DoBallSearch() { + DomainViewType node_points = CreateNodePoints(); + RangeViewType node_spheres = CreateNodeSpheres(); + + ResultViewType search_results; + stk::search::SearchMethod search_method = stk::search::MORTON_LBVH; + + stk::ngp::ExecSpace exec_space = Kokkos::DefaultExecutionSpace{}; + const bool results_parallel_symmetry = true; + + stk::search::coarse_search(node_points, node_spheres, search_method, m_bulk_data->parallel(), search_results, exec_space, results_parallel_symmetry); + + ResultViewType::HostMirror host_search_results = Kokkos::create_mirror_view(search_results); + Kokkos::deep_copy(host_search_results, search_results); + + // Print sizes + std::cout << "Neighborhood Search Information:" << std::endl; + std::cout << "\n Search Point-Sphere Pair Results Size: " << host_search_results.size() + << "\n Evaluation Points Size: " << node_points.size() + << "\n Neighbor Spheres Size: " << node_spheres.size() << std::endl; + + GhostNodeNeighbors(host_search_results); + + UnpackSearchResultsIntoField(host_search_results); + } + + void add_nodes_neighbors_within_variable_ball(double scale_factor) { + ComputeKernelRadius(scale_factor); + DoBallSearch(); + } + + std::map GetNumNeighborStats() { + // Initialize the min and max values + double max_num_neighbors = 0; + double min_num_neighbors = std::numeric_limits::max(); + double total_num_neighbors = 0; + double num_entities = 0; + NgpDoubleField ngp_num_neighbors_field; + + num_entities = stk::mesh::count_entities(*m_bulk_data, stk::topology::NODE_RANK, m_owned_selector); + ngp_num_neighbors_field = *m_ngp_node_num_neighbors_field; + ngp_num_neighbors_field.sync_to_host(); + + const stk::mesh::FieldBase* hostField = ngp_num_neighbors_field.get_field_base(); + const stk::mesh::BucketVector& nodeBuckets = m_bulk_data->get_buckets(stk::topology::NODE_RANK, m_owned_selector); + for(const stk::mesh::Bucket* bptr : nodeBuckets) { + for(stk::mesh::Entity node : *bptr) { + const double* numNeighbors = reinterpret_cast(stk::mesh::field_data(*hostField, node)); + max_num_neighbors = std::max(max_num_neighbors, numNeighbors[0]); + min_num_neighbors = std::min(min_num_neighbors, numNeighbors[0]); + total_num_neighbors += numNeighbors[0]; + } + } + + // Use MPI_Allreduce to calculate the min, max, and sum across all MPI ranks + MPI_Allreduce(MPI_IN_PLACE, &max_num_neighbors, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(MPI_IN_PLACE, &min_num_neighbors, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(MPI_IN_PLACE, &total_num_neighbors, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(MPI_IN_PLACE, &num_entities, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + std::map stats; + stats["max_num_neighbors"] = max_num_neighbors; + stats["min_num_neighbors"] = min_num_neighbors; + stats["avg_num_neighbors"] = total_num_neighbors / num_entities; + stats["num_entities"] = num_entities; + return stats; + } + + void PrintNumNeighborsStats() { + // Node + std::map node_stats = GetNumNeighborStats(); + + std::cout << "Node Stats: " << std::endl; + std::cout << " Total Num Nodes: " << node_stats["num_entities"] << std::endl; + std::cout << " Max Num Neighbors: " << node_stats["max_num_neighbors"] << std::endl; + std::cout << " Min Num Neighbors: " << node_stats["min_num_neighbors"] << std::endl; + std::cout << " Avg Num Neighbors: " << node_stats["avg_num_neighbors"] << std::endl + << std::endl; // Add a new line for readability + } + + void SyncFieldsToHost() { + m_ngp_node_num_neighbors_field->sync_to_host(); + m_ngp_kernel_radius_field->clear_sync_state(); + m_ngp_kernel_radius_field->sync_to_host(); + } + + private: + stk::mesh::BulkData *m_bulk_data; // The bulk data object. + std::vector m_sets; // The sets to process. + stk::mesh::Selector m_selector; // The selector + stk::mesh::Selector m_owned_selector; // The local selector + stk::mesh::NgpMesh m_ngp_mesh; // The ngp mesh object. + DoubleField *m_coordinates_field; // The coordinates field + DoubleField *m_node_num_neighbors_field; // The number of neighbors field + DoubleField *m_node_neighbors_field; // The neighbors field + DoubleField *m_kernel_radius_field; // The kernel radius field + DoubleField *m_function_values_field; // The function values field + NgpDoubleField *m_ngp_coordinates_field; // The ngp coordinates field + NgpDoubleField *m_ngp_node_num_neighbors_field; // The ngp number of neighbors field + NgpDoubleField *m_ngp_node_neighbors_field; // The ngp neighbors field + NgpDoubleField *m_ngp_kernel_radius_field; // The ngp kernel radius field +}; + +class NeighborSearchTestFixture : public ::testing::Test { + protected: + void SetUp() override { + } + + void CreateMeshAndProcessors(const std::string &mesh_spec) { + MPI_Comm p_communicator = MPI_COMM_WORLD; + m_bulk_data = stk::mesh::MeshBuilder(p_communicator).create(); + stk::mesh::MetaData *p_meta_data = &m_bulk_data->mesh_meta_data(); + + stk::io::StkMeshIoBroker mesh_reader; + mesh_reader.set_bulk_data(*m_bulk_data); + mesh_reader.add_mesh_database(mesh_spec, stk::io::READ_MESH); + mesh_reader.create_input_mesh(); + mesh_reader.add_all_mesh_fields_as_input_fields(); + + // Create the fields, start with nodes + m_node_num_neighbors_field = &p_meta_data->declare_field(stk::topology::NODE_RANK, "num_neighbors", 1); + stk::mesh::put_field_on_entire_mesh(*m_node_num_neighbors_field, 1); + + m_node_neighbors_field = &p_meta_data->declare_field(stk::topology::NODE_RANK, "neighbors", 1); + stk::mesh::put_field_on_entire_mesh(*m_node_neighbors_field, MAX_NEIGHBORS); + + m_node_neighbors_function_values_field = &p_meta_data->declare_field(stk::topology::NODE_RANK, "function_values", 1); + stk::mesh::put_field_on_entire_mesh(*m_node_neighbors_function_values_field, MAX_NEIGHBORS); + + m_kernel_radius_field = &p_meta_data->declare_field(stk::topology::NODE_RANK, "kernel_radius", 1); + stk::mesh::put_field_on_entire_mesh(*m_kernel_radius_field, 1); + + mesh_reader.populate_bulk_data(); + + // Create the NeighborSearch + m_search_processor = std::make_shared(m_bulk_data.get(), std::vector{"block_1"}); + } + + std::shared_ptr m_bulk_data; + DoubleField *m_node_num_neighbors_field; + DoubleField *m_node_neighbors_field; + DoubleField *m_node_neighbors_function_values_field; + DoubleField *m_kernel_radius_field; + std::shared_ptr m_search_processor; +}; + +TEST_F(NeighborSearchTestFixture, VariableBallSearchUnitCubes) { + // Unit cube elements. Should give same answer on CPU and GPU. + int num_procs; + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + if (num_procs > 5) { + GTEST_SKIP_("Test only runs with 5 or fewer processes."); + } + int num_elements_x = 1; + int num_elements_y = 1; + int num_elements_z = 5; + std::string mesh_spec = "generated:" + std::to_string(num_elements_x) + "x" + std::to_string(num_elements_y) + "x" + std::to_string(num_elements_z); + std::cout<<"mesh_spec: "<add_nodes_neighbors_within_variable_ball(ball_scale_factor); + m_search_processor->SyncFieldsToHost(); + + // Check the neighbor stats + std::map node_neighbor_stats = m_search_processor->GetNumNeighborStats(); + // Expected results are hard-coded to CPU results + EXPECT_EQ(node_neighbor_stats["min_num_neighbors"], 7); + EXPECT_EQ(node_neighbor_stats["max_num_neighbors"], 10); + EXPECT_NEAR(node_neighbor_stats["avg_num_neighbors"], 9.0, 0.001); + size_t expected_num_nodes = (num_elements_x + 1) * (num_elements_y + 1) * (num_elements_z + 1); + EXPECT_EQ(node_neighbor_stats["num_entities"], expected_num_nodes); +} + +TEST_F(NeighborSearchTestFixture, VariableBallSearchScaledCubes) { + // Scaled cube elements. Noticing different answers on CPU and GPU. + int num_procs; + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + if (num_procs > 5) { + GTEST_SKIP_("Test only runs with 5 or fewer processes."); + } + int num_elements_x = 1; + int num_elements_y = 1; + int num_elements_z = 5; + std::string mesh_spec = "generated:" + std::to_string(num_elements_x) + "x" + std::to_string(num_elements_y) + "x" + std::to_string(num_elements_z); + double bbox_scale_factor = 1.0 / 7.0; // Rational factor to exacerbate the CPU vs GPU differences. Adding a tolerance in the radius calculation fixes the differences. + mesh_spec += "|bbox:"; + mesh_spec += "-" + std::to_string(bbox_scale_factor * num_elements_x) + ",-" + std::to_string(bbox_scale_factor * num_elements_y) + ",-" + std::to_string(bbox_scale_factor * num_elements_z); + mesh_spec += "," + std::to_string(bbox_scale_factor * num_elements_x) + "," + std::to_string(bbox_scale_factor * num_elements_y) + "," + std::to_string(bbox_scale_factor * num_elements_z); + std::cout<<"mesh_spec: "<add_nodes_neighbors_within_variable_ball(ball_scale_factor); + m_search_processor->SyncFieldsToHost(); + + // Check the neighbor stats + std::map node_neighbor_stats = m_search_processor->GetNumNeighborStats(); + // Expected results are hard-coded to CPU results + EXPECT_EQ(node_neighbor_stats["min_num_neighbors"], 7); + EXPECT_EQ(node_neighbor_stats["max_num_neighbors"], 11); + EXPECT_NEAR(node_neighbor_stats["avg_num_neighbors"], 9.166667, 0.001); + size_t expected_num_nodes = (num_elements_x + 1) * (num_elements_y + 1) * (num_elements_z + 1); + EXPECT_EQ(node_neighbor_stats["num_entities"], expected_num_nodes); +} diff --git a/packages/stk/stk_integration_tests/stk_search/CMakeLists.txt b/packages/stk/stk_integration_tests/stk_search/CMakeLists.txt index e032acba7758..db2e6b8972f4 100644 --- a/packages/stk/stk_integration_tests/stk_search/CMakeLists.txt +++ b/packages/stk/stk_integration_tests/stk_search/CMakeLists.txt @@ -32,11 +32,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -#TRIBITS_INCLUDE_DIRECTORIES(${${PARENT_PACKAGE_NAME}_BINARY_DIR}/stk_util/stk_util) - FILE(GLOB SOURCES *.cpp) -#removing due to dependence on geometry toolkit which is not available to Trilinos -LIST(REMOVE_ITEM SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/UnitTestNaluPerformance.cpp) TRIBITS_ADD_EXECUTABLE( search_integration_tests diff --git a/packages/stk/stk_integration_tests/stk_search/UnitTestNaluPerformance.cpp b/packages/stk/stk_integration_tests/stk_search/UnitTestNaluPerformance.cpp index 6e8f81de6733..e13ef991bdf7 100644 --- a/packages/stk/stk_integration_tests/stk_search/UnitTestNaluPerformance.cpp +++ b/packages/stk/stk_integration_tests/stk_search/UnitTestNaluPerformance.cpp @@ -89,33 +89,42 @@ struct Options void setSphereFile() { std::string optionString = "-sphere"; - mSphereFile = stk::unit_test_util::simple_fields::get_option(optionString, "NO_FILE_SPECIFIED"); + mSphereFile = stk::unit_test_util::get_option(optionString, "NO_FILE_SPECIFIED"); checkForRequiredFile(optionString, mSphereFile); } void setVolumeFile() { std::string optionString = "-volume"; - mVolumeFile = stk::unit_test_util::simple_fields::get_option(optionString, "NO_FILE_SPECIFIED"); + mVolumeFile = stk::unit_test_util::get_option(optionString, "NO_FILE_SPECIFIED"); checkForRequiredFile(optionString, mVolumeFile); } void setSearchMethod() { std::string optionString = "-method"; - mSearchMethod = stk::search::KDTREE; - std::string searchString = stk::unit_test_util::simple_fields::get_option(optionString, "gtk"); - if ( searchString != "gtk" && searchString != "kdtree") - { - STK_ThrowRequireMsg(false, "unrecognized search method"); + std::string searchString = stk::unit_test_util::get_option(optionString, "gtk"); + if ( searchString == "kdtree" || searchString == "KDTREE" || searchString == "gtk") { + mSearchMethod = stk::search::KDTREE; + return; + } + if ( searchString == "MORTON_LBVH") { + mSearchMethod = stk::search::MORTON_LBVH; + return; + } + if ( searchString == "ARBORX") { + mSearchMethod = stk::search::ARBORX; + return; } + + STK_ThrowErrorMsg("unrecognized search method: "< >::iterator iter_end = std::unique(globalIdMapping.begin(), globalIdMapping.end()); globalIdMapping.erase(iter_end, globalIdMapping.end()); - size_t numInteractions = stk::unit_test_util::simple_fields::getGoldValueForTest(); + size_t numInteractions = stk::unit_test_util::getGoldValueForTest(); EXPECT_EQ(numInteractions, globalIdMapping.size()); } } @@ -270,7 +271,7 @@ TEST(NaluPerformance, BoxBoxIntersections) } double elapsedTime = stk::wall_time() - startTime; - stk::unit_test_util::simple_fields::printPeformanceStats(elapsedTime, comm); + stk::unit_test_util::printPeformanceStats(elapsedTime, comm); if ( options.mTestToGetGoldResults ) { @@ -287,7 +288,7 @@ TEST(NaluPerformance, BoxBoxIntersections) } else { - stk::unit_test_util::simple_fields::gatherResultstoProcZero(comm, searchResults); + stk::unit_test_util::gatherResultstoProcZero(comm, searchResults); if ( procId == 0 ) { @@ -300,7 +301,7 @@ TEST(NaluPerformance, BoxBoxIntersections) std::vector< std::pair >::iterator iter_end = std::unique(globalIdMapping.begin(), globalIdMapping.end()); globalIdMapping.erase(iter_end, globalIdMapping.end()); - size_t numInteractions = stk::unit_test_util::simple_fields::getGoldValueForTest(); + size_t numInteractions = stk::unit_test_util::getGoldValueForTest(); EXPECT_EQ(numInteractions, globalIdMapping.size()); } } diff --git a/packages/stk/stk_integration_tests/stk_transfer/IntgTestCopyTransfer.cpp b/packages/stk/stk_integration_tests/stk_transfer/IntgTestCopyTransfer.cpp index c9a0da58a6b5..a8d3b3157a5a 100644 --- a/packages/stk/stk_integration_tests/stk_transfer/IntgTestCopyTransfer.cpp +++ b/packages/stk/stk_integration_tests/stk_transfer/IntgTestCopyTransfer.cpp @@ -371,7 +371,6 @@ class CopyTransferFixture : public ::testing::Test if (commOwnsMesh[0]) { metaA = stk::mesh::MeshBuilder().set_spatial_dimension(spatial_dimension).create_meta_data(); - metaA->use_simple_fields(); meshA = stk::mesh::MeshBuilder(pmSub).create(metaA); build_mesh(*metaA, *meshA, info.num_elements, info.num_nodes, info.element_ids, element_ownerA, &info.elem_node_ids[0], info.node_sharingA, info.coordinates, create_faces); } @@ -381,7 +380,6 @@ class CopyTransferFixture : public ::testing::Test if (commOwnsMesh[1]) { metaB = stk::mesh::MeshBuilder().set_spatial_dimension(spatial_dimension).create_meta_data(); - metaB->use_simple_fields(); meshB = stk::mesh::MeshBuilder(pmSub).create(metaB); build_mesh(*metaB, *meshB, info.num_elements, info.num_nodes, info.element_ids, element_ownerB, &info.elem_node_ids[0], info.node_sharingB, info.coordinates, create_faces); } @@ -2418,7 +2416,7 @@ TEST(Transfer, mismatchedFieldDataTypeCopyTransfer) stk::mesh::put_field_on_mesh(*fieldBaseA, metaA.universal_part(), &intInitVals); std::string meshDescA = "0,1,QUAD_4_2D,1,2,4,3,block_1"; - stk::unit_test_util::simple_fields::setup_text_mesh(bulkA, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDescA, coords)); + stk::unit_test_util::setup_text_mesh(bulkA, stk::unit_test_util::get_full_text_mesh_desc(meshDescA, coords)); std::shared_ptr bulkBPtr = build_mesh(2, MPI_COMM_WORLD); stk::mesh::MetaData& metaB = bulkBPtr->mesh_meta_data(); @@ -2433,7 +2431,7 @@ TEST(Transfer, mismatchedFieldDataTypeCopyTransfer) } else { meshDescB = "1,1,QUAD_4_2D,1,2,4,3,block_1"; } - stk::unit_test_util::simple_fields::setup_text_mesh(bulkB, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDescB, coords)); + stk::unit_test_util::setup_text_mesh(bulkB, stk::unit_test_util::get_full_text_mesh_desc(meshDescB, coords)); // Set up CopyTransfer stk::mesh::EntityVector entitiesA; diff --git a/packages/stk/stk_integration_tests/test_utils/OptionsForTesting.hpp b/packages/stk/stk_integration_tests/test_utils/OptionsForTesting.hpp index 8384dff42a2b..402032026936 100644 --- a/packages/stk/stk_integration_tests/test_utils/OptionsForTesting.hpp +++ b/packages/stk/stk_integration_tests/test_utils/OptionsForTesting.hpp @@ -239,33 +239,33 @@ struct Options inline Options getOptionsForTest(const std::string &defaultMesh) { Options local; - const std::string generatedMeshSpec = stk::unit_test_util::simple_fields::get_option("-i", defaultMesh); + const std::string generatedMeshSpec = stk::unit_test_util::get_option("-i", defaultMesh); local.setMeshFileName(generatedMeshSpec); - std::string manualRun = stk::unit_test_util::simple_fields::get_option("-manual", "no"); + std::string manualRun = stk::unit_test_util::get_option("-manual", "no"); if(manualRun != "no") { local.setOverRideTest(true); } - std::string debugZoltanLevel = stk::unit_test_util::simple_fields::get_option("-zdl", "0"); + std::string debugZoltanLevel = stk::unit_test_util::get_option("-zdl", "0"); local.setZoltanDebugLevel(debugZoltanLevel); - std::string debugZoltan = stk::unit_test_util::simple_fields::get_option("-z", "no"); + std::string debugZoltan = stk::unit_test_util::get_option("-z", "no"); if(debugZoltan != "no") { local.setDebugZoltan(true); } { - std::string targetProcs = stk::unit_test_util::simple_fields::get_option("-t", "0"); + std::string targetProcs = stk::unit_test_util::get_option("-t", "0"); int numTargetProcs = 0; std::istringstream is(targetProcs); is >> numTargetProcs; local.setNumTargetProcs(numTargetProcs); } - const std::string outputFilename = stk::unit_test_util::simple_fields::get_option("-o", "subdomain.exo"); + const std::string outputFilename = stk::unit_test_util::get_option("-o", "subdomain.exo"); local.setOutputFilename(outputFilename); { @@ -273,34 +273,34 @@ inline Options getOptionsForTest(const std::string &defaultMesh) local.setNumSubdomains(numsub); } - std::string parmetisMethod = stk::unit_test_util::simple_fields::get_option("-m", "PartKway"); + std::string parmetisMethod = stk::unit_test_util::get_option("-m", "PartKway"); local.setPartmetisMethod(parmetisMethod); - std::string parmetisOutputLevel = stk::unit_test_util::simple_fields::get_option("-o", "0"); + std::string parmetisOutputLevel = stk::unit_test_util::get_option("-o", "0"); local.setPartmetisOutputLevel(parmetisOutputLevel); - std::string parmetisIter = stk::unit_test_util::simple_fields::get_option("-iter", "100"); + std::string parmetisIter = stk::unit_test_util::get_option("-iter", "100"); local.setParmetisIter(parmetisIter); - std::string parmetisCheckGraph = stk::unit_test_util::simple_fields::get_option("-check", "1"); + std::string parmetisCheckGraph = stk::unit_test_util::get_option("-check", "1"); local.setParmetisCheckGraph(parmetisCheckGraph); - std::string largestDegreeFirst = stk::unit_test_util::simple_fields::get_option("-v", "L"); + std::string largestDegreeFirst = stk::unit_test_util::get_option("-v", "L"); local.setLargestDegreeFirst(largestDegreeFirst); - std::string deleteFile = stk::unit_test_util::simple_fields::get_option("-d", "yes"); + std::string deleteFile = stk::unit_test_util::get_option("-d", "yes"); if(deleteFile != "yes") { local.setDeleteFiles(false); } { - double faceSearchTolerance = stk::unit_test_util::simple_fields::get_command_line_option("-tolFace", 0.1); + double faceSearchTolerance = stk::unit_test_util::get_command_line_option("-tolFace", 0.1); local.setToleranceForFaceSearch(faceSearchTolerance); } { - double particleSearchTolerance = stk::unit_test_util::simple_fields::get_command_line_option("-tolPart", 1.0); + double particleSearchTolerance = stk::unit_test_util::get_command_line_option("-tolPart", 1.0); local.setToleranceForParticleSearch(particleSearchTolerance); } diff --git a/packages/stk/stk_io/Jamfile b/packages/stk/stk_io/Jamfile index 81a8e935fcfd..818401b3f16b 100644 --- a/packages/stk/stk_io/Jamfile +++ b/packages/stk/stk_io/Jamfile @@ -47,7 +47,8 @@ if $(RTenv-arg) = "user" { project votd : requirements $(sierra-warnings) - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM STK_HIDE_DEPRECATED_CODE $(stk_io-root-inc) : usage-requirements @@ -157,8 +158,8 @@ lib stk_io /sierra/stk_util//stk_util_environment /sierra/stk_util//stk_util_util /sierra/stk_topology//stk_topology - /sierra/seacas//ioss - /sierra/seacas//ioinit + /sierra/seacas//Ioss + /sierra/seacas//Ionit /tpl/trilinos//shards : [ ifuserbuild @@ -183,7 +184,7 @@ lib stk_io_util stk_io /sierra/stk_mesh//stk_mesh_base /sierra/stk_util//stk_util_parallel - /sierra/seacas//ioss + /sierra/seacas//Ioss /tpl/trilinos//shards : [ ifuserbuild diff --git a/packages/stk/stk_io/example/io_lowlevel_example.cpp b/packages/stk/stk_io/example/io_lowlevel_example.cpp index 1433c029bfd6..29af4065a085 100644 --- a/packages/stk/stk_io/example/io_lowlevel_example.cpp +++ b/packages/stk/stk_io/example/io_lowlevel_example.cpp @@ -269,7 +269,6 @@ void io_example( const std::string& in_filename, std::shared_ptr bulk_data = builder.create(); stk::mesh::MetaData& fem_meta_data = bulk_data->mesh_meta_data(); - fem_meta_data.use_simple_fields(); process_elementblocks(in_region, fem_meta_data); process_nodeblocks(in_region, fem_meta_data); process_sidesets(in_region, fem_meta_data); diff --git a/packages/stk/stk_io/stk_io/CMakeLists.txt b/packages/stk/stk_io/stk_io/CMakeLists.txt index b8aaed48fe06..ae8c9f837ba1 100644 --- a/packages/stk/stk_io/stk_io/CMakeLists.txt +++ b/packages/stk/stk_io/stk_io/CMakeLists.txt @@ -68,5 +68,5 @@ ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_io/) ADD_SUBDIRECTORY(util) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_io DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_io EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() diff --git a/packages/stk/stk_io/stk_io/IossBridge.cpp b/packages/stk/stk_io/stk_io/IossBridge.cpp index 5bbf9000b1e6..451354b23099 100644 --- a/packages/stk/stk_io/stk_io/IossBridge.cpp +++ b/packages/stk/stk_io/stk_io/IossBridge.cpp @@ -37,7 +37,6 @@ #include #include // for NameList, IOFactory #include // for assert -#include // for ArrayDimension #include // for min, sort, max #include // for int64_t, uint64_t #include // for operator<<, basic... @@ -47,7 +46,6 @@ #include #include // for BulkData #include // for comm_mesh_counts -#include // for Cartesian, FullTe... #include // for get_side_entity_f... #include // for Field #include // for find_restriction @@ -203,15 +201,14 @@ namespace { const stk::mesh::FieldBase *declare_stk_field(stk::mesh::MetaData &meta, stk::mesh::EntityRank type, stk::mesh::Part &part, - const Ioss::Field &ioField, - bool useCartesianForScalar) + const Ioss::Field &ioField) { Ioss::Field::BasicType ioFieldType = ioField.get_type(); const bool ioFieldTypeIsRecognized = (ioFieldType == Ioss::Field::INTEGER) || (ioFieldType == Ioss::Field::INT64) || (ioFieldType == Ioss::Field::REAL) || (ioFieldType == Ioss::Field::COMPLEX); STK_ThrowRequireMsg(ioFieldTypeIsRecognized, "Unrecognized field type for IO field '"< @@ -552,109 +549,46 @@ const Ioss::VariableType * get_field_output_variable_type(const stk::mesh::Field return field.attribute(); } -template -stk::mesh::FieldBase* add_stk_field(stk::mesh::MetaData& meta, - const std::string& fieldName, - stk::mesh::EntityRank entityRank, - stk::mesh::Part& part, - size_t numComponents) -{ - using StkField = stk::mesh::Field; - StkField& field = stk::mesh::legacy::declare_field(meta, entityRank, fieldName); - stk::mesh::put_field_on_mesh(field, part, numComponents, nullptr); - return &field; -} - const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta, stk::mesh::EntityRank type, stk::mesh::Part &part, - const Ioss::Field &io_field, - bool use_cartesian_for_scalar) + const Ioss::Field &io_field) { - std::string name = io_field.get_name(); - stk::mesh::FieldBase *fieldPtr = meta.get_field(type, name); - // If the field has already been declared, don't redeclare it. - if (fieldPtr != nullptr && stk::io::is_field_on_part(fieldPtr, type, part)) { - return fieldPtr; - } - - stk::topology::rank_t entityRank = static_cast(type); - - if (meta.is_using_simple_fields()) { - const Ioss::VariableType* varType = io_field.transformed_storage(); - size_t numComponents = varType->component_count(); - size_t numCopies = 1; - - const Ioss::CompositeVariableType* compositeVarType = dynamic_cast(varType); - if (compositeVarType != nullptr) { - const Ioss::VariableType * baseVarType = compositeVarType->GetBaseType(); - numComponents = baseVarType->component_count(); - numCopies = compositeVarType->GetNumCopies(); - varType = baseVarType; - } - std::string field_type = varType->name(); + std::string name = io_field.get_name(); + stk::mesh::FieldBase *field = meta.get_field(type, name); + // If the field has already been declared, don't redeclare it. + if (field != nullptr && stk::io::is_field_on_part(field, type, part)) { + return field; + } - stk::mesh::Field & field = meta.declare_field(entityRank, name); - stk::mesh::put_field_on_mesh(field, part, numComponents, numCopies, nullptr); + stk::topology::rank_t entityRank = static_cast(type); - const int oldVarTypeSize = has_field_output_type(field) ? get_field_output_variable_type(field)->component_count() : 0; - const int newVarTypeSize = varType->component_count(); + const Ioss::VariableType* varType = io_field.transformed_storage(); + size_t numComponents = varType->component_count(); + size_t numCopies = 1; - if (newVarTypeSize > oldVarTypeSize) { - set_field_output_type(field, varType); - } + const Ioss::CompositeVariableType* compositeVarType = dynamic_cast(varType); + if (compositeVarType != nullptr) { + const Ioss::VariableType * baseVarType = compositeVarType->get_base_type(); + numComponents = baseVarType->component_count(); + numCopies = compositeVarType->get_num_copies(); + varType = baseVarType; + } + std::string field_type = varType->name(); - fieldPtr = &field; - } - else { + field = &meta.declare_field(entityRank, name); + stk::mesh::put_field_on_mesh(*field, part, numComponents, numCopies, nullptr); - const Ioss::VariableType* varType = io_field.transformed_storage(); - size_t numComponents = varType->component_count(); + const int oldVarTypeSize = has_field_output_type(*field) ? get_field_output_variable_type(*field)->component_count() : 0; + const int newVarTypeSize = varType->component_count(); - const Ioss::CompositeVariableType* compVarType = dynamic_cast(varType); - if (compVarType != nullptr) { - varType = compVarType->GetBaseType(); - } - std::string fieldType = varType->name(); + if (newVarTypeSize > oldVarTypeSize) { + set_field_output_type(*field, varType); + } - if (fieldType == "scalar" || numComponents == 1) { - if (!use_cartesian_for_scalar) { - stk::mesh::Field & field = meta.declare_field(entityRank, name); - stk::mesh::put_field_on_mesh(field, part, nullptr); - fieldPtr = &field; - } else { - stk::mesh::Field & field = - stk::mesh::legacy::declare_field>(meta, entityRank, name); - stk::mesh::put_field_on_mesh(field, part, 1, nullptr); - fieldPtr = &field; - } - } - else if (stk::string_starts_with(sierra::make_lower(fieldType), "real[")) { - stk::mesh::Field & field = meta.declare_field(entityRank, name); - stk::mesh::put_field_on_mesh(field, part, numComponents, nullptr); - fieldPtr = &field; - } - else if ((fieldType == "vector_2d") || (fieldType == "vector_3d")) { - fieldPtr = add_stk_field(meta, name, entityRank, part, numComponents); - } - else if (fieldType == "sym_tensor_33") { - fieldPtr = add_stk_field(meta, name, entityRank, part, numComponents); - } - else if (fieldType == "full_tensor_36") { - fieldPtr = add_stk_field(meta, name, entityRank, part, numComponents); - } - else if ((fieldType == "matrix_22") || (fieldType == "matrix_33")) { - fieldPtr = add_stk_field(meta, name, entityRank, part, numComponents); - } - else { - fieldPtr = add_stk_field(meta, name, entityRank, part, numComponents); - } - } + stk::io::set_field_role(*field, io_field.get_role()); - if (fieldPtr != nullptr) { - stk::io::set_field_role(*fieldPtr, io_field.get_role()); - } - return fieldPtr; + return field; } } //namespace impl @@ -988,8 +922,6 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta const stk::mesh::FieldRestriction &res, FieldType *result) { - const stk::mesh::MetaData & meta = field->mesh_meta_data(); - result->type = Ioss::Field::INVALID; if ( field->type_is() ) { @@ -1004,9 +936,6 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta const int scalarsPerEntity = res.num_scalars_per_entity(); const int firstDimension = res.dimension(); - const int legacyFieldArrayRank = meta.is_using_simple_fields() ? 0 : stk::mesh::legacy::field_array_rank(*field); - const shards::ArrayDimTag * const * const tags = meta.is_using_simple_fields() ? nullptr - : stk::mesh::legacy::dimension_tags(*field); result->copies = 1; @@ -1016,11 +945,11 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta if (variableTypeName == vector_3d || variableTypeName == vector_2d) { if (firstDimension == 3) { - result->name = vector_3d ; + result->name = vector_3d; result->copies = scalarsPerEntity / firstDimension; } else if (firstDimension == 2) { - result->name = vector_2d ; + result->name = vector_2d; result->copies = scalarsPerEntity / firstDimension; } else { @@ -1037,65 +966,14 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta } } } - else if ( 0 == legacyFieldArrayRank ) { - assign_generic_field_type(res, result); - } - else if ( 1 == legacyFieldArrayRank ) { - if ( tags[0] == & stk::mesh::Cartesian2d::tag() || tags[0] == & stk::mesh::Cartesian3d::tag()) { - if (firstDimension == stk::mesh::Cartesian2d::Size) { - result->name = vector_2d ; - result->copies = scalarsPerEntity/firstDimension; - } - else if (firstDimension == stk::mesh::Cartesian3d::Size) { - result->name = vector_3d ; - result->copies = scalarsPerEntity/firstDimension; - } - } - else if ( tags[0] == & stk::mesh::FullTensor22::tag() || tags[0] == & stk::mesh::FullTensor36::tag()) { - if ( 9 == scalarsPerEntity ) { - result->name = full_tensor_36 ; - } - else if ( 5 == scalarsPerEntity ) { - result->name = full_tensor_32 ; - } - else if ( 4 == scalarsPerEntity ) { - result->name = full_tensor_22 ; - } - else if ( 3 == scalarsPerEntity ) { - result->name = full_tensor_12 ; - } - } - else if (tags[0] == & stk::mesh::SymmetricTensor21::tag() || - tags[0] == & stk::mesh::SymmetricTensor31::tag() || - tags[0] == & stk::mesh::SymmetricTensor33::tag()) { - if ( 6 == scalarsPerEntity ) { - result->name = sym_tensor_33 ; - } - else if ( 4 == scalarsPerEntity ) { - result->name = sym_tensor_31 ; - } - else if ( 3 == scalarsPerEntity ) { - result->name = sym_tensor_21 ; - } - } - else if ( tags[0] == & stk::mesh::Matrix22::tag() || tags[0] == & stk::mesh::Matrix33::tag()) { - if (4 == scalarsPerEntity ) { - result->name = matrix_22; - } - else if ( 9 == scalarsPerEntity ) { - result->name = matrix_33 ; - } - } - } - - if ( result->name.empty() ) { + else { assign_generic_field_type(res, result); } } void create_named_suffix_field_output_type(const std::string & typeName, const std::vector & suffices) { - Ioss::VariableType::create_named_suffix_field_type(typeName, suffices); + Ioss::VariableType::create_named_suffix_type(typeName, suffices); } void set_named_suffix_field_output_type(stk::mesh::FieldBase & field, const std::string & typeName) @@ -1787,10 +1665,6 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta { stk::mesh::MetaData &meta = mesh::MetaData::get(part); - bool useCartesianForScalar = false; - if (role == Ioss::Field::ATTRIBUTE) - useCartesianForScalar = true; - Ioss::NameList names; entity->field_describe(role, &names); @@ -1806,7 +1680,7 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta // \todo IMPLEMENT Need to determine whether these are // multi-state fields or constant, or interpolated, or ... Ioss::Field ioField = entity->get_field(*I); - declare_stk_field(meta, partType, part, ioField, useCartesianForScalar); + declare_stk_field(meta, partType, part, ioField); } } diff --git a/packages/stk/stk_io/stk_io/IossBridge.hpp b/packages/stk/stk_io/stk_io/IossBridge.hpp index fa43af3aa0f2..1bfe68d84553 100644 --- a/packages/stk/stk_io/stk_io/IossBridge.hpp +++ b/packages/stk/stk_io/stk_io/IossBridge.hpp @@ -670,8 +670,7 @@ namespace impl { const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta, stk::mesh::EntityRank type, stk::mesh::Part &part, - const Ioss::Field &io_field, - bool use_cartesian_for_scalar); + const Ioss::Field &io_field); }//namespace impl }//namespace io diff --git a/packages/stk/stk_io/stk_io/OutputFile.hpp b/packages/stk/stk_io/stk_io/OutputFile.hpp index 1c97ae55b3b9..6530f4612ffd 100644 --- a/packages/stk/stk_io/stk_io/OutputFile.hpp +++ b/packages/stk/stk_io/stk_io/OutputFile.hpp @@ -47,6 +47,7 @@ #include // for string #include // for pair, swap #include // for vector +#include #include "mpi.h" // for MPI_Comm, ompi_communicat... #include "stk_io/FieldAndName.hpp" // for FieldAndName, UserDataAnd... #include "stk_io/OutputVariableParams.hpp" // for OutputVariableParams diff --git a/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp b/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp index 597e32274587..b275bf697931 100644 --- a/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp +++ b/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp @@ -21,12 +21,10 @@ #include "StkIoUtils.hpp" // for part_primary_entit... #include "StkMeshIoBroker.hpp" // for StkMeshIoBroker #include "stk_mesh/base/Bucket.hpp" // for Bucket -#include "stk_mesh/base/CoordinateSystems.hpp" // for Cartesian #include "stk_mesh/base/EntityKey.hpp" // for EntityKey #include "stk_mesh/base/FEMHelpers.hpp" // for declare_element_edge #include "stk_mesh/base/Field.hpp" // for Field #include "stk_mesh/base/SideSetEntry.hpp" // for SideSet -#include "stk_mesh/base/TopologyDimensions.hpp" // for ElementNode #include "stk_mesh/base/Types.hpp" // for EntityId, PartVector #include "stk_mesh/baseImpl/ConnectEdgesImpl.hpp" // for connect_face_to_edges #include "stk_mesh/baseImpl/MeshImplUtils.hpp" // for connect_edge_to_el... @@ -55,29 +53,16 @@ void process_nodeblocks(Ioss::Region ®ion, stk::mesh::MetaData &meta) const Ioss::NodeBlockContainer& node_blocks = region.get_node_blocks(); assert(node_blocks.size() == 1); - if (meta.is_using_simple_fields()) { - auto & coord_field = meta.declare_field(stk::topology::NODE_RANK, meta.coordinate_field_name()); - stk::mesh::put_field_on_mesh(coord_field, meta.universal_part(), meta.spatial_dimension(), nullptr); - stk::io::set_field_output_type(coord_field, stk::io::FieldOutputType::VECTOR_3D); - stk::io::set_field_role(coord_field, Ioss::Field::MESH); - meta.set_coordinate_field(&coord_field); - } - else { - auto & coord_field = - stk::mesh::legacy::declare_field>(meta, - stk::topology::NODE_RANK, - meta.coordinate_field_name()); - stk::mesh::put_field_on_mesh(coord_field, meta.universal_part(), meta.spatial_dimension(), nullptr); - stk::io::set_field_role(coord_field, Ioss::Field::MESH); - meta.set_coordinate_field(&coord_field); - } + auto & coord_field = meta.declare_field(stk::topology::NODE_RANK, meta.coordinate_field_name()); + stk::mesh::put_field_on_mesh(coord_field, meta.universal_part(), meta.spatial_dimension(), nullptr); + stk::io::set_field_output_type(coord_field, stk::io::FieldOutputType::VECTOR_3D); + stk::io::set_field_role(coord_field, Ioss::Field::MESH); + meta.set_coordinate_field(&coord_field); Ioss::NodeBlock *nb = node_blocks[0]; stk::io::define_io_fields(nb, Ioss::Field::ATTRIBUTE, meta.universal_part(), stk::topology::NODE_RANK); } - - void process_elementblocks(Ioss::Region ®ion, stk::mesh::MetaData &meta, TopologyErrorHandler handler) { const Ioss::ElementBlockContainer& elem_blocks = region.get_element_blocks(); @@ -143,24 +128,14 @@ void process_surface_entity(Ioss::SideSet *sset, stk::mesh::MetaData &meta) if (!surface_df_defined) { stk::topology::rank_t side_rank = static_cast(stk::io::part_primary_entity_rank(*sb_part)); std::string field_name = sset->name() + "_df"; - if (meta.is_using_simple_fields()) { - distribution_factors_field = &meta.declare_field(side_rank, field_name); - } - else { - distribution_factors_field = &stk::mesh::legacy::declare_field>(meta, side_rank, field_name); - } + distribution_factors_field = &meta.declare_field(side_rank, field_name); stk::io::set_field_role(*distribution_factors_field, Ioss::Field::MESH); stk::io::set_distribution_factor_field(*ss_part, *distribution_factors_field); surface_df_defined = true; } stk::io::set_distribution_factor_field(*sb_part, *distribution_factors_field); int side_node_count = sb->topology()->number_nodes(); - if (meta.is_using_simple_fields()) { - stk::mesh::put_field_on_mesh(*distribution_factors_field, *sb_part, side_node_count, nullptr); - } - else { - stk::mesh::put_field_on_mesh(*distribution_factors_field, *sb_part, side_node_count, nullptr); - } + stk::mesh::put_field_on_mesh(*distribution_factors_field, *sb_part, side_node_count, nullptr); } } } diff --git a/packages/stk/stk_io/stk_io/SidesetTranslator.hpp b/packages/stk/stk_io/stk_io/SidesetTranslator.hpp index 7245ef9db7eb..c76a6a9ae86d 100644 --- a/packages/stk/stk_io/stk_io/SidesetTranslator.hpp +++ b/packages/stk/stk_io/stk_io/SidesetTranslator.hpp @@ -34,12 +34,14 @@ #ifndef SIDESETTRANSLATOR_HPP_ #define SIDESETTRANSLATOR_HPP_ +#include "stk_mesh/base/Types.hpp" #include "stk_mesh/baseImpl/EquivalentEntityBlocks.hpp" #include "stk_mesh/base/GetEntities.hpp" #include "stk_mesh/base/Selector.hpp" -#include "stk_mesh/base/Types.hpp" #include "stk_mesh/base/SideSetUtil.hpp" #include "stk_mesh/base/FEMHelpers.hpp" +#include "stk_mesh/base/EntityLess.hpp" +#include "stk_mesh/base/Relation.hpp" #include "stk_io/StkIoUtils.hpp" #include "stk_io/OutputParams.hpp" diff --git a/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp b/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp index d5124a3c5a9e..ff689be6b6b4 100644 --- a/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp +++ b/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp @@ -95,6 +95,21 @@ namespace stk { namespace mesh { class FieldDataManager; } } namespace stk { namespace io { +namespace impl { +std::string basename( std::string const& pathname ) +{ + struct MatchPathSeparator + { + bool operator()( char ch ) const + { + return ch == '\\' || ch == '/'; + } + }; + + return std::string( std::find_if( pathname.rbegin(), pathname.rend(), MatchPathSeparator() ).base(), + pathname.end() ); +} +} template bool is_index_valid(const std::vector &file_vector, size_t input_file_index) @@ -150,7 +165,7 @@ StkMeshIoBroker::StkMeshIoBroker() m_autoLoadDistributionFactorPerNodeSet(true), m_enableEdgeIO(false), m_cacheEntityListForTransientSteps(false), - m_useSimpleFields(false) + m_throwOnMissingInputFields(false) { Ioss::Init::Initializer::initialize_ioss(); } @@ -164,7 +179,7 @@ StkMeshIoBroker::StkMeshIoBroker(stk::ParallelMachine comm) m_autoLoadDistributionFactorPerNodeSet(true), m_enableEdgeIO(false), m_cacheEntityListForTransientSteps(false), - m_useSimpleFields(false) + m_throwOnMissingInputFields(false) { Ioss::Init::Initializer::initialize_ioss(); } @@ -280,10 +295,6 @@ void StkMeshIoBroker::set_bulk_data(std::shared_ptr arg_bul m_metaData = std::shared_ptr(&(bulk_data().mesh_meta_data()), [](auto pointerWeWontDelete){}); } - if (m_useSimpleFields) { - m_metaData->use_simple_fields(); - } - m_communicator = m_bulkData->parallel(); create_sideset_observer(); } @@ -299,10 +310,6 @@ void StkMeshIoBroker::replace_bulk_data(std::shared_ptr arg m_bulkData = arg_bulk_data; - if (m_useSimpleFields) { - m_metaData->use_simple_fields(); - } - create_sideset_observer(); } @@ -494,10 +501,6 @@ void StkMeshIoBroker::create_input_mesh() m_metaData = m_meshBuilder->create_meta_data(); } - if (m_useSimpleFields) { - m_metaData->use_simple_fields(); - } - size_t spatial_dimension = region->get_property("spatial_dimension").get_int(); if (m_rankNames.empty()) { initialize_spatial_dimension(meta_data(), spatial_dimension, stk::mesh::entity_rank_names()); @@ -1128,35 +1131,59 @@ bool StkMeshIoBroker::read_input_field(stk::io::MeshField &mf) return read_input_field(mf, readStatus); } +void StkMeshIoBroker::check_for_missing_input_fields(std::vector *missingFields) +{ + if(nullptr != missingFields && missingFields->size() > 0 && m_throwOnMissingInputFields) { + std::ostringstream oss; + std::string fileName = m_inputFiles[m_activeMeshIndex]->get_ioss_input_database()->get_filename(); + + oss << "There are missing fields in input file: " << impl::basename(fileName) << std::endl; + + for(const stk::io::MeshField& missingField : *missingFields) { + oss << "\t" << missingField.db_name() << " stk field: " << missingField.field()->name() + << std::endl; + } + + oss << "ERROR: Input field processing could not find " << missingFields->size() << " fields.\n"; + + STK_ThrowRequireMsg(false,oss.str()); + } +} + double StkMeshIoBroker::read_defined_input_fields(double time, std::vector *missingFields) { validate_input_file_index(m_activeMeshIndex); - return m_inputFiles[m_activeMeshIndex]->read_defined_input_fields(time, missingFields, bulk_data()); + double readTime = m_inputFiles[m_activeMeshIndex]->read_defined_input_fields(time, missingFields, bulk_data()); + check_for_missing_input_fields(missingFields); + return readTime; } double StkMeshIoBroker::read_defined_input_fields(int step, - std::vector *missing) + std::vector *missingFields) { if (step <= 0) { return 0.0; } validate_input_file_index(m_activeMeshIndex); - return m_inputFiles[m_activeMeshIndex]->read_defined_input_fields(step, missing, bulk_data()); + double readTime = m_inputFiles[m_activeMeshIndex]->read_defined_input_fields(step, missingFields, bulk_data()); + check_for_missing_input_fields(missingFields); + return readTime; } double StkMeshIoBroker::read_defined_input_fields_at_step(int step, - std::vector *missing) + std::vector *missingFields) { if (step <= 0) { return 0.0; } validate_input_file_index(m_activeMeshIndex); - - return m_inputFiles[m_activeMeshIndex]->read_defined_input_fields_at_step(step, missing, bulk_data(), - m_cacheEntityListForTransientSteps); + double readTime = m_inputFiles[m_activeMeshIndex]->read_defined_input_fields_at_step(step, missingFields, bulk_data(), + m_cacheEntityListForTransientSteps); + check_for_missing_input_fields(missingFields); + return readTime; } bool StkMeshIoBroker::use_nodeset_for_block_nodes_fields(size_t output_file_index) const @@ -1223,6 +1250,16 @@ void StkMeshIoBroker::use_part_id_for_output(size_t output_file_index, bool true m_outputFiles[output_file_index]->use_part_id_for_output(true_false); } +void StkMeshIoBroker::set_throw_on_missing_input_fields(bool flag) +{ + m_throwOnMissingInputFields = flag; +} + +bool StkMeshIoBroker::get_throw_on_missing_input_fields() const +{ + return m_throwOnMissingInputFields; +} + void StkMeshIoBroker::set_option_to_not_collapse_sequenced_fields() { property_add(Ioss::Property("ENABLE_FIELD_RECOGNITION", "NO")); @@ -1476,7 +1513,7 @@ void StkMeshIoBroker::set_reference_input_region(size_t outputIndex, const StkMe bool StkMeshIoBroker::create_named_suffix_field_type(const std::string& type_name, const std::vector& suffices) const { - return Ioss::VariableType::create_named_suffix_field_type(type_name, suffices); + return Ioss::VariableType::create_named_suffix_type(type_name, suffices); } bool StkMeshIoBroker::add_field_type_mapping(const std::string& field, const std::string& type) const @@ -1519,7 +1556,9 @@ std::vector StkMeshIoBroker::get_output_entities(size_t outpu return entities; } - +void StkMeshIoBroker::use_simple_fields() +{ +} } // namespace io diff --git a/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp b/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp index 92e6d7f25f73..27678a95c270 100644 --- a/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp +++ b/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp @@ -423,21 +423,9 @@ namespace stk { void add_input_field(size_t mesh_index, const stk::io::MeshField &mesh_field); // Create an exodus mesh database with the specified - // filename. This function creates the exodus metadata which - // is the number and type of element blocks, nodesets, and - // sidesets; and then outputs the mesh bulk data such as the - // node coordinates, id maps, element connectivity. When the - // function returns, the non-transient portion of the mesh will - // have been defined. - // - // A stk part will have a corresponding exodus entity (element - // block, nodeset, sideset) defined if the "is_io_part()" function - // returns true. By default, all parts read from the mesh - // database in the create_input_mesh() function will return true - // as will all stk parts on which the function - // stk::io::put_io_part_attribute() was called. The function - // stk::io::remove_io_part_attribute(part) can be called to omit a - // part from being output. + // filename. See STK IO documentation tests for demonstrations of + // the proper sequence of calls needed to write an exodus database + // with transient field-data, etc. // // \param[in] filename The full pathname to the file which will be // created and the mesh data written to. If the file already @@ -472,6 +460,10 @@ namespace stk { // Free up memory by removing resouces associated with output files that will no longer be used by the run void close_output_mesh(size_t output_file_index); + // write_output_mesh writes the non-transient portion + // of the mesh, including the number and type of element blocks, + // nodesets, and sidesets, and then outputs the mesh bulk data such as the + // node coordinates, id maps, element connectivity. void write_output_mesh(size_t output_file_index); void add_field(size_t output_file_index, stk::mesh::FieldBase &field); @@ -615,7 +607,7 @@ namespace stk { void process_heartbeat_output_write(size_t index, int step, double time); void process_heartbeat_output_post_write(size_t index, int step, double time); - void use_simple_fields() { m_useSimpleFields = true; } + void use_simple_fields(); bool is_meta_data_null() const; bool is_bulk_data_null() const; @@ -660,6 +652,9 @@ namespace stk { void use_part_id_for_output(size_t output_file_index, bool flag); bool use_part_id_for_output(size_t output_file_index) const; + void set_throw_on_missing_input_fields(bool flag); + bool get_throw_on_missing_input_fields() const; + void set_option_to_not_collapse_sequenced_fields(); int get_num_time_steps() const; double get_max_time() const; @@ -719,6 +714,8 @@ namespace stk { void validate_output_file_index(size_t output_file_index) const; void validate_heartbeat_file_index(size_t heartbeat_file_index) const; + void check_for_missing_input_fields(std::vector *missingFields); + void copy_property_manager(const Ioss::PropertyManager &properties); Ioss::Property property_get(const std::string &property_name) const; @@ -773,7 +770,7 @@ namespace stk { bool m_autoLoadDistributionFactorPerNodeSet; bool m_enableEdgeIO; bool m_cacheEntityListForTransientSteps; - bool m_useSimpleFields; + bool m_throwOnMissingInputFields{false}; }; inline std::shared_ptr StkMeshIoBroker::get_output_ioss_region(size_t output_file_index) const { diff --git a/packages/stk/stk_io/stk_io/WriteMesh.cpp b/packages/stk/stk_io/stk_io/WriteMesh.cpp index 053fe8cbf1e0..38a65203e0df 100644 --- a/packages/stk/stk_io/stk_io/WriteMesh.cpp +++ b/packages/stk/stk_io/stk_io/WriteMesh.cpp @@ -105,6 +105,37 @@ void write_mesh_with_fields(const std::string& filename, stk::io::StkMeshIoBroke } } +void write_mesh_with_specified_fields(const std::string& filename, stk::io::StkMeshIoBroker &outStkIo, + const std::vector& fieldNames, int step, double time, stk::io::DatabasePurpose databasePurpose) +{ + size_t outputFileIndex = outStkIo.create_output_mesh(filename, databasePurpose); + + if (step>0) + { + const stk::mesh::FieldVector fields = outStkIo.bulk_data().mesh_meta_data().get_fields(); + for(stk::mesh::FieldBase* field : fields) + { + std::string fieldName = field->name(); + if (std::find(fieldNames.begin(), fieldNames.end(), fieldName) == fieldNames.end()) { + continue; + } + + const Ioss::Field::RoleType* fieldRole = stk::io::get_field_role(*field); + if(fieldRole == nullptr || *fieldRole == Ioss::Field::TRANSIENT) + outStkIo.add_field(outputFileIndex, *field); + } + } + + outStkIo.write_output_mesh(outputFileIndex); + + if (step>0) + { + outStkIo.begin_output_step(outputFileIndex, time); + outStkIo.write_defined_output_fields(outputFileIndex); + outStkIo.end_output_step(outputFileIndex); + } +} + void write_mesh_with_fields(const std::string& filename, stk::mesh::BulkData &bulkData, int step, double time, stk::io::DatabasePurpose databasePurpose) { stk::io::StkMeshIoBroker outStkIo; @@ -112,6 +143,15 @@ void write_mesh_with_fields(const std::string& filename, stk::mesh::BulkData &bu write_mesh_with_fields(filename, outStkIo, step, time, databasePurpose); } +void write_mesh_with_specified_fields(const std::string& filename, stk::mesh::BulkData &bulkData, + const std::vector& fieldNames, int step, double time, stk::io::DatabasePurpose databasePurpose) +{ + stk::io::StkMeshIoBroker outStkIo; + outStkIo.set_bulk_data(bulkData); + write_mesh_with_specified_fields(filename, outStkIo, fieldNames, step, time, databasePurpose); +} + + void set_64bit_properties(stk::io::StkMeshIoBroker &outStkIo) { outStkIo.property_add(Ioss::Property("INTEGER_SIZE_API" , 8)); diff --git a/packages/stk/stk_io/stk_io/WriteMesh.hpp b/packages/stk/stk_io/stk_io/WriteMesh.hpp index 499d218cc1fe..b416e3d51be9 100644 --- a/packages/stk/stk_io/stk_io/WriteMesh.hpp +++ b/packages/stk/stk_io/stk_io/WriteMesh.hpp @@ -38,6 +38,7 @@ // clang-format off #include // for WRITE_RESULTS, DatabasePurpose #include // for string +#include namespace stk { namespace io { class StkMeshIoBroker; } } namespace stk { namespace mesh { class BulkData; } } namespace stk { namespace mesh { class Selector; } } @@ -75,11 +76,21 @@ void write_mesh_with_fields(const std::string& filename, int step=0, double time=0.0, stk::io::DatabasePurpose databasePurpose = stk::io::WRITE_RESULTS); +void write_mesh_with_specified_fields(const std::string& filename, stk::mesh::BulkData &bulkData, + const std::vector& fieldNames, + int step, double time, stk::io::DatabasePurpose databasePurpose); + void write_mesh_with_fields(const std::string& filename, StkMeshIoBroker &broker, int step=0, double time=0.0, stk::io::DatabasePurpose databasePurpose = stk::io::WRITE_RESULTS); +void write_mesh_with_specified_fields(const std::string& filename, + StkMeshIoBroker &broker, + const std::vector& fieldNames, + int step=0, double time=0.0, + stk::io::DatabasePurpose databasePurpose = stk::io::WRITE_RESULTS); + void set_64bit_properties(stk::io::StkMeshIoBroker &broker); void write_mesh_with_large_ids_and_fields(const std::string& filename, diff --git a/packages/stk/stk_io/stk_io/util/CMakeLists.txt b/packages/stk/stk_io/stk_io/util/CMakeLists.txt index 5aff4c21d611..9f3baea04b41 100644 --- a/packages/stk/stk_io/stk_io/util/CMakeLists.txt +++ b/packages/stk/stk_io/stk_io/util/CMakeLists.txt @@ -59,5 +59,5 @@ INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_io/util/) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_io_util DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_io_util EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() diff --git a/packages/stk/stk_io/stk_io/util/Gmesh_STKmesh_Fixture.cpp b/packages/stk/stk_io/stk_io/util/Gmesh_STKmesh_Fixture.cpp index a37d92925565..984aefec0a31 100644 --- a/packages/stk/stk_io/stk_io/util/Gmesh_STKmesh_Fixture.cpp +++ b/packages/stk/stk_io/stk_io/util/Gmesh_STKmesh_Fixture.cpp @@ -53,10 +53,9 @@ namespace stk { namespace io { namespace util { - Gmesh_STKmesh_Fixture::Gmesh_STKmesh_Fixture( stk::ParallelMachine comm - , const std::string& gmesh_spec - , bool use_64bit_int_IO_api - ) +Gmesh_STKmesh_Fixture::Gmesh_STKmesh_Fixture(stk::ParallelMachine comm, + const std::string& gmesh_spec, + bool use_64bit_int_IO_api) : m_mesh_data(comm) { if (use_64bit_int_IO_api) { @@ -68,7 +67,7 @@ namespace util { auto iossRegion = m_mesh_data.get_input_ioss_region(); const Iogn::DatabaseIO* database = dynamic_cast(iossRegion->get_database()); -// database->set_int_byte_size_api(Ioss::USE_INT64_API); + // database->set_int_byte_size_api(Ioss::USE_INT64_API); // get face parts names; need to convert these to strings const std::vector sideset_names = database->get_sideset_names(); @@ -92,7 +91,6 @@ Gmesh_STKmesh_Fixture::Gmesh_STKmesh_Fixture(stk::ParallelMachine comm, bool use_64bit_int_IO_api) : m_mesh_data(comm) { - m_mesh_data.use_simple_fields(); if (use_64bit_int_IO_api) { m_mesh_data.property_add(Ioss::Property("INTEGER_SIZE_API", 8)); } diff --git a/packages/stk/stk_io/stk_io/util/Gmesh_STKmesh_Fixture.hpp b/packages/stk/stk_io/stk_io/util/Gmesh_STKmesh_Fixture.hpp index 0454d4b5ef91..4a3124c9f3a7 100644 --- a/packages/stk/stk_io/stk_io/util/Gmesh_STKmesh_Fixture.hpp +++ b/packages/stk/stk_io/stk_io/util/Gmesh_STKmesh_Fixture.hpp @@ -68,10 +68,9 @@ class Gmesh_STKmesh_Fixture * @param gmesh_spec The specification for the mesh. See Iogn::GeneratedMesh * for documentation on how to specify meshes. */ - Gmesh_STKmesh_Fixture( stk::ParallelMachine comm - , const std::string& gmesh_spec - , bool use_64bit_int_IO_api=false - ); + Gmesh_STKmesh_Fixture(stk::ParallelMachine comm, + const std::string& gmesh_spec, + bool use_64bit_int_IO_api=false); /** * Commits the meta-data of the mesh and populates the bulk-data. Don't call @@ -127,7 +126,8 @@ namespace simple_fields { * This class implements a Stk-mesh based fixture that uses a generated * mesh as the basis of the fixture. */ -class Gmesh_STKmesh_Fixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +Gmesh_STKmesh_Fixture { public: diff --git a/packages/stk/stk_math/Jamfile b/packages/stk/stk_math/Jamfile index e402fb2fa1d4..03c9087f83c3 100644 --- a/packages/stk/stk_math/Jamfile +++ b/packages/stk/stk_math/Jamfile @@ -48,7 +48,8 @@ if $(RTenv-arg) = "user" { project votd : requirements $(sierra-warnings) - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM $(stk_math-root-inc) : usage-requirements $(stk_math-root-inc) diff --git a/packages/stk/stk_math/stk_math/CMakeLists.txt b/packages/stk/stk_math/stk_math/CMakeLists.txt index c32b31ce8f11..3bcf9764e6da 100644 --- a/packages/stk/stk_math/stk_math/CMakeLists.txt +++ b/packages/stk/stk_math/stk_math/CMakeLists.txt @@ -56,5 +56,6 @@ INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_math) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_math DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_math EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) + endif() diff --git a/packages/stk/stk_math/stk_math/SideGeometry.cpp b/packages/stk/stk_math/stk_math/SideGeometry.cpp index e9e40bd7ca1a..2a05aedba18d 100644 --- a/packages/stk/stk_math/stk_math/SideGeometry.cpp +++ b/packages/stk/stk_math/stk_math/SideGeometry.cpp @@ -34,13 +34,6 @@ PointGeometry::PointGeometry(const stk::math::Vector3d & n) m_nodeData(n) {} -const stk::math::Vector3d & -PointGeometry::node(int index) const -{ - STK_ThrowAssert(index==0); - return m_nodeData; -} - stk::math::Vector3d PointGeometry::centroid() const { @@ -59,25 +52,6 @@ LineGeometry::LineGeometry(const stk::math::Vector3d & n0, m_nodeData{n0, n1} {} -#ifdef __GNUC__ -# if (__GNUC__ == 11) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Warray-bounds" -# define SIERRA_GCC_POP_DIAG -# endif -#endif - -const stk::math::Vector3d & -LineGeometry::node(int index) const -{ - STK_ThrowAssert(index>=0 && index < 2); - return m_nodeData[index]; -} - -#ifdef SIERRA_GCC_POP_DIAG -#pragma GCC diagnostic pop -#endif - stk::math::Vector3d LineGeometry::centroid() const { @@ -125,13 +99,6 @@ TriGeometry::TriGeometry(const stk::math::Vector3d & n0, m_nodeData{n0, n1, n2} {} -const stk::math::Vector3d & -TriGeometry::node(int index) const -{ - STK_ThrowAssert(index>=0 && index < 3); - return m_nodeData[index]; -} - stk::math::Vector3d TriGeometry::centroid() const { @@ -209,13 +176,6 @@ QuadGeometry::QuadGeometry(const stk::math::Vector3d & n0, { } -const stk::math::Vector3d & -QuadGeometry::node(int index) const -{ - STK_ThrowAssert(index>=0 && index < 4); - return m_nodeData[index]; -} - stk::math::Vector3d QuadGeometry::centroid() const { diff --git a/packages/stk/stk_math/stk_math/SideGeometry.hpp b/packages/stk/stk_math/stk_math/SideGeometry.hpp index cbdf43459b9a..a627b3ed7048 100644 --- a/packages/stk/stk_math/stk_math/SideGeometry.hpp +++ b/packages/stk/stk_math/stk_math/SideGeometry.hpp @@ -2,6 +2,7 @@ #define SIDEGEOMETRY_HPP #include "stk_math/StkVector.hpp" +#include "stk_util/util/ReportHandler.hpp" namespace stk { namespace math { @@ -28,7 +29,12 @@ class PointGeometry : public SideGeometry PointGeometry(const stk::math::Vector3d & n); ~PointGeometry() override = default; - const stk::math::Vector3d & node(int index) const override; + const stk::math::Vector3d & node(int index) const override + { + STK_ThrowAssert(index==0); + return m_nodeData; + } + stk::math::Vector3d centroid() const override; stk::math::Vector3d closest_proj_on_face(const stk::math::Vector3d & point) const override; @@ -43,7 +49,12 @@ class LineGeometry : public SideGeometry const stk::math::Vector3d & n1); ~LineGeometry() override = default; - const stk::math::Vector3d & node(int index) const override; + const stk::math::Vector3d & node(int index) const override + { + STK_ThrowAssert(index>=0 && index < 2); + return m_nodeData[index]; + } + stk::math::Vector3d centroid() const override; stk::math::Vector3d closest_proj_on_face(const stk::math::Vector3d & point) const override; @@ -59,7 +70,12 @@ class TriGeometry : public SideGeometry const stk::math::Vector3d & n2); ~TriGeometry() override = default; - const stk::math::Vector3d & node(int index) const override; + const stk::math::Vector3d & node(int index) const override + { + STK_ThrowAssert(index>=0 && index < 3); + return m_nodeData[index]; + } + stk::math::Vector3d centroid() const override; stk::math::Vector3d closest_proj_on_face(const stk::math::Vector3d & point) const override; @@ -76,7 +92,12 @@ class QuadGeometry : public SideGeometry const stk::math::Vector3d & n3); ~QuadGeometry() override = default; - const stk::math::Vector3d & node(int index) const override; + const stk::math::Vector3d & node(int index) const override + { + STK_ThrowAssert(index>=0 && index < 4); + return m_nodeData[index]; + } + stk::math::Vector3d centroid() const override; stk::math::Vector3d closest_proj_on_face(const stk::math::Vector3d & point) const override; diff --git a/packages/stk/stk_mesh/Jamfile b/packages/stk/stk_mesh/Jamfile index 13f9fe9dac2b..91216f9cf3c0 100644 --- a/packages/stk/stk_mesh/Jamfile +++ b/packages/stk/stk_mesh/Jamfile @@ -47,14 +47,13 @@ if $(RTenv-arg) = "user" { project votd : requirements $(sierra-warnings) - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM STK_SHOW_DEPRECATED_WARNINGS STK_HIDE_DEPRECATED_CODE - STK_USE_SIMPLE_FIELDS SIERRA_MIGRATION $(stk_mesh-root-inc) : usage-requirements - STK_USE_SIMPLE_FIELDS SIERRA_MIGRATION $(stk_mesh-root-inc) : build-dir $(stk_mesh-builddir) diff --git a/packages/stk/stk_mesh/stk_mesh/base/BoundaryAnalysis.cpp b/packages/stk/stk_mesh/stk_mesh/base/BoundaryAnalysis.cpp index 84ab62d09792..040e17482d1e 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/BoundaryAnalysis.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/BoundaryAnalysis.cpp @@ -35,8 +35,9 @@ #include #include // for NULL, size_t #include // for sort, binary_search, etc -#include // for BulkData, EntityLess +#include // for BulkData #include // for Entity +#include // for Entity #include // for get_entity_subcell_id, etc #include // for MetaData, get_cell_topology #include // for Part diff --git a/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp b/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp index 8c5e12728edb..9343a4992358 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp @@ -44,7 +44,7 @@ #include "stk_mesh/base/Part.hpp" // for Part #include "stk_mesh/base/Types.hpp" // for PartVector, etc #include "stk_topology/topology.hpp" // for topology::num_nodes -#include "stk_util/util/ReportHandler.hpp" // for ThrowAssert, etc +#include "stk_util/util/ReportHandler.hpp" // for STK_ThrowAssert, etc #include "stk_util/util/SortAndUnique.hpp" namespace stk { namespace mesh { namespace impl { template class BucketConnectivity; } } } @@ -56,33 +56,80 @@ namespace mesh { namespace { -#ifndef NDEBUG -struct CheckSizeFunctor -{ - template - void operator()(const Bucket& bucket, const Connectivity& connectivity, const Bucket*) const - { STK_ThrowAssert(bucket.size() == static_cast(connectivity.size())); } - - bool is_modifying() const { return false; } -}; -#endif - struct AddEntityFunctor { - template - void operator()(Bucket&, Connectivity& connectivity, Bucket*) + template + void operator()(Bucket&, impl::BucketConnectivity& connectivity, Bucket*) { connectivity.add_entity(); } + template + void operator()(Bucket& thisBucket, impl::BucketConnDynamic& connectivity, Bucket*) + {connectivity.grow_if_necessary(thisBucket.size());} + bool is_modifying() const { return true; } }; struct RemoveEntityFunctor { - template - void operator()(Bucket&, Connectivity& connectivity, Bucket*) + RemoveEntityFunctor(unsigned bktOrdinal) + : m_bucketOrdinal(bktOrdinal) + {} + + template + void operator()(Bucket&, impl::BucketConnectivity& connectivity, Bucket*) { connectivity.remove_entity(); } + template + void operator()(Bucket&, impl::BucketConnDynamic& connectivity, Bucket*) + { connectivity.remove_connectivity(m_bucketOrdinal); } + + bool is_modifying() const { return true; } + + unsigned m_bucketOrdinal; +}; + +struct ClearEntityFunctor +{ + ClearEntityFunctor(unsigned bktOrdinal) + : m_bucketOrdinal(bktOrdinal) + {} + + template + void operator()(Bucket&, ConnectivityType& connectivity) + {} + + void operator()(Bucket&, impl::BucketConnDynamic& connectivity) + { connectivity.remove_connectivity(m_bucketOrdinal); } + + bool is_modifying() const { return true; } + + unsigned m_bucketOrdinal; +}; + +struct ReplaceEntityFunctor +{ + ReplaceEntityFunctor(unsigned srcOrdinal, unsigned destOrdinal) + : m_srcOrdinal(srcOrdinal), m_destOrdinal(destOrdinal) + {} + + template + void operator()(Bucket&, impl::BucketConnectivity& connectivity, Bucket*) + { + connectivity.replace_connectivity(m_destOrdinal, + connectivity.num_connectivity(m_srcOrdinal), + connectivity.begin(m_srcOrdinal), + connectivity.begin_ordinals(m_srcOrdinal), + connectivity.begin_permutations(m_srcOrdinal)); + } + + template + void operator()(Bucket&, impl::BucketConnDynamic& connectivity, Bucket*) + { connectivity.swap_connectivity(m_srcOrdinal, m_destOrdinal); } + bool is_modifying() const { return true; } + + unsigned m_srcOrdinal; + unsigned m_destOrdinal; }; struct DeclareRelationFunctor @@ -99,9 +146,6 @@ struct DeclareRelationFunctor template void operator()(Bucket& bucket, Connectivity& connectivity) { - STK_ThrowAssert( (Connectivity::target_rank == static_cast(stk::topology::INVALID_RANK) && - bucket.mesh().entity_rank(m_to) > static_cast(stk::topology::ELEMENT_RANK)) || - bucket.mesh().entity_rank(m_to) == Connectivity::target_rank ); STK_ThrowAssert(!m_modified); m_modified = connectivity.add_connectivity(m_bucket_ordinal, m_to, m_ordinal, m_permutation); } @@ -125,9 +169,6 @@ struct DestroyRelationFunctor template void operator()(Bucket& bucket, Connectivity& connectivity) { - STK_ThrowAssert( (Connectivity::target_rank == static_cast(stk::topology::INVALID_RANK) && - bucket.mesh().entity_rank(m_to) > static_cast(stk::topology::ELEMENT_RANK)) || - bucket.mesh().entity_rank(m_to) == Connectivity::target_rank); STK_ThrowAssert(!m_modified); m_modified = connectivity.remove_connectivity(m_bucket_ordinal, m_to, m_ordinal); } @@ -138,25 +179,35 @@ struct DestroyRelationFunctor bool m_modified; }; -struct DebugPrintFunctor +struct ReplaceRelationFunctor { - DebugPrintFunctor(std::ostream& out, unsigned ordinal = -1u) : m_out(out), m_ordinal(ordinal) {} + ReplaceRelationFunctor(unsigned bucket_ordinal, + unsigned numConnectivity, + const Entity* connectivity, + const ConnectivityOrdinal* ordinals, + const Permutation* permutations) + : m_bucket_ordinal(bucket_ordinal), + m_numConnectivity(numConnectivity), + m_connectivity(connectivity), + m_ordinals(ordinals), + m_permutations(permutations), + m_modified(false) + {} - template - void operator()(const Bucket&, const Connectivity& connectivity, const Bucket*) const + template + void operator()(Bucket& bucket, Connectivity& connectivity) { - if (m_ordinal == -1u) { - connectivity.debug_dump(m_out); - } - else { - connectivity.debug_dump(m_out, m_ordinal); - } + STK_ThrowAssert(!m_modified); + m_modified = connectivity.replace_connectivity(m_bucket_ordinal, m_numConnectivity, + m_connectivity, m_ordinals, m_permutations); } - bool is_modifying() const { return false; } - - std::ostream& m_out; - unsigned m_ordinal; + unsigned m_bucket_ordinal; + unsigned m_numConnectivity; + const Entity* m_connectivity; + const ConnectivityOrdinal* m_ordinals; + const Permutation* m_permutations; + bool m_modified; }; template @@ -181,36 +232,15 @@ void setup_connectivity(stk::topology bucket_topology, } //namespace anonymous +//---------------------------------------------------------------------- + namespace impl { static const unsigned default_initial_bucket_capacity = 16; static const unsigned default_maximum_bucket_capacity = 512; -struct OverwriteEntityFunctor -{ - OverwriteEntityFunctor(unsigned old_ordinal, unsigned new_ordinal) : m_old_ordinal(old_ordinal), m_new_ordinal(new_ordinal) {} - - template - void operator()(Bucket& bucket, Connectivity& connectivity, Bucket* otherBucket) - { - impl::BucketConnectivity & otherConnectivity = get_other_connectivity(otherBucket); - otherConnectivity.copy_entity(m_old_ordinal, connectivity, m_new_ordinal); - } - - bool is_modifying() const { return true; } - - template - static - impl::BucketConnectivity& get_other_connectivity(Bucket* other_bucket); - - unsigned m_old_ordinal; - unsigned m_new_ordinal; -}; - } -//---------------------------------------------------------------------- - unsigned get_default_bucket_capacity() { return impl::default_maximum_bucket_capacity; } unsigned get_default_initial_bucket_capacity() { return impl::default_initial_bucket_capacity; } unsigned get_default_maximum_bucket_capacity() { return impl::default_maximum_bucket_capacity; } @@ -274,11 +304,11 @@ Bucket::Bucket(BulkData & mesh, m_fixed_edge_connectivity(), m_fixed_face_connectivity(), m_fixed_element_connectivity(), - m_dynamic_node_connectivity(entityRank, &m_mesh), - m_dynamic_edge_connectivity(entityRank, &m_mesh), - m_dynamic_face_connectivity(entityRank, &m_mesh), - m_dynamic_element_connectivity(entityRank, &m_mesh), - m_dynamic_other_connectivity(entityRank, &m_mesh), + m_dynamic_node_connectivity(initialCapacity), + m_dynamic_edge_connectivity(initialCapacity, should_store_permutations(entityRank, stk::topology::EDGE_RANK)), + m_dynamic_face_connectivity(initialCapacity, should_store_permutations(entityRank, stk::topology::FACE_RANK)), + m_dynamic_element_connectivity(initialCapacity, should_store_permutations(entityRank, stk::topology::ELEM_RANK)), + m_dynamic_other_connectivity(initialCapacity), m_owned(has_superset(*this, m_mesh.mesh_meta_data().locally_owned_part())), m_shared(has_superset(*this, m_mesh.mesh_meta_data().globally_shared_part())), m_aura(has_superset(*this, m_mesh.mesh_meta_data().aura_part())) @@ -321,13 +351,6 @@ size_t Bucket::memory_size_in_bytes() const return bytes; } -void -Bucket::grow_capacity() -{ - STK_ThrowAssert(m_capacity < std::numeric_limits::max()/2); - m_capacity = std::min(2 * m_capacity, m_maxCapacity); -} - void Bucket::change_existing_connectivity(unsigned bucket_ordinal, stk::mesh::Entity* new_nodes) { unsigned num_nodes = this->num_nodes(bucket_ordinal); @@ -349,15 +372,10 @@ void Bucket::change_existing_connectivity(unsigned bucket_ordinal, stk::mesh::En void Bucket::change_existing_permutation_for_connected_element(unsigned bucket_ordinal_of_lower_ranked_entity, unsigned elem_connectivity_ordinal, stk::mesh::Permutation permut) { - stk::mesh::Permutation *perms=0; - if (m_element_kind == FIXED_CONNECTIVITY) - { - perms = m_fixed_element_connectivity.begin_permutations(bucket_ordinal_of_lower_ranked_entity); - } - else - { - perms = m_dynamic_element_connectivity.begin_permutations(bucket_ordinal_of_lower_ranked_entity); - } + stk::mesh::Permutation *perms = m_element_kind == FIXED_CONNECTIVITY ? + m_fixed_element_connectivity.begin_permutations(bucket_ordinal_of_lower_ranked_entity) + : + m_dynamic_element_connectivity.begin_permutations(bucket_ordinal_of_lower_ranked_entity); if (perms) { @@ -367,15 +385,10 @@ void Bucket::change_existing_permutation_for_connected_element(unsigned bucket_o void Bucket::change_existing_permutation_for_connected_face(unsigned bucket_ordinal_of_higher_ranked_entity, unsigned face_connectivity_ordinal, stk::mesh::Permutation permut) { - stk::mesh::Permutation *perms=0; - if (m_face_kind == FIXED_CONNECTIVITY) - { - perms = m_fixed_face_connectivity.begin_permutations(bucket_ordinal_of_higher_ranked_entity); - } - else - { - perms = m_dynamic_face_connectivity.begin_permutations(bucket_ordinal_of_higher_ranked_entity); - } + stk::mesh::Permutation *perms = (m_face_kind == FIXED_CONNECTIVITY) ? + m_fixed_face_connectivity.begin_permutations(bucket_ordinal_of_higher_ranked_entity) + : + m_dynamic_face_connectivity.begin_permutations(bucket_ordinal_of_higher_ranked_entity); if (perms) { @@ -385,14 +398,14 @@ void Bucket::change_existing_permutation_for_connected_face(unsigned bucket_ordi void Bucket::change_existing_permutation_for_connected_edge(unsigned bucket_ordinal_of_higher_ranked_entity, unsigned edge_connectivity_ordinal, stk::mesh::Permutation permut) { - stk::mesh::Permutation *perms=0; + stk::mesh::Permutation *perms=nullptr; if (m_edge_kind == FIXED_CONNECTIVITY) { perms = m_fixed_edge_connectivity.begin_permutations(bucket_ordinal_of_higher_ranked_entity); } else { - perms = m_dynamic_edge_connectivity.begin_permutations(bucket_ordinal_of_higher_ranked_entity); + perms = const_cast(m_dynamic_edge_connectivity.begin_permutations(bucket_ordinal_of_higher_ranked_entity)); } if (perms) @@ -719,6 +732,18 @@ void Bucket::add_entity(Entity entity) process_all_connectivity(functor); } +void +Bucket::grow_capacity() +{ + STK_ThrowAssert(m_capacity < std::numeric_limits::max()/2); + m_capacity = std::min(2 * m_capacity, m_maxCapacity); + m_dynamic_node_connectivity.increase_bucket_capacity(m_capacity); + m_dynamic_edge_connectivity.increase_bucket_capacity(m_capacity); + m_dynamic_face_connectivity.increase_bucket_capacity(m_capacity); + m_dynamic_element_connectivity.increase_bucket_capacity(m_capacity); + m_dynamic_other_connectivity.increase_bucket_capacity(m_capacity); +} + bool Bucket::destroy_relation(Entity e_from, Entity e_to, const RelationIdentifier local_id ) { const unsigned from_bucket_ordinal = mesh().bucket_ordinal(e_from); @@ -736,17 +761,34 @@ bool Bucket::declare_relation(unsigned bucket_ordinal, Entity e_to, const Connec return functor.m_modified; } +bool Bucket::replace_relations(unsigned bucketOrdinal, + EntityRank rank, + unsigned numConnectivity, + const Entity* connectivity, + const ConnectivityOrdinal* ordinals, + const Permutation* permutations) +{ + if (numConnectivity > 0) { + ReplaceRelationFunctor functor(bucketOrdinal, numConnectivity, connectivity, + ordinals, permutations); + modify_connectivity(functor, rank); + return functor.m_modified; + } + return false; +} + void Bucket::remove_entity() { STK_ThrowAssert(m_size > 0); mark_for_modification(); mesh().remove_entity_field_data_callback(entity_rank(), bucket_id(), m_size-1); + const unsigned bktOrdinal = m_size-1; --m_size; initialize_slot(m_size, Entity()); - RemoveEntityFunctor functor; + RemoveEntityFunctor functor(bktOrdinal); process_all_connectivity(functor); } @@ -763,70 +805,26 @@ void Bucket::copy_entity(Entity entity) Bucket* old_bucket = mesh().bucket_ptr(entity); const unsigned old_ordinal = mesh().bucket_ordinal(entity); - mesh().add_entity_callback(entity_rank(), bucket_id(), capacity(), m_size); - reset_entity_location(entity, m_size); + this->mesh().add_entity_callback(entity_rank(), bucket_id(), capacity(), m_size); + const unsigned newOrdinal = m_size; + reset_entity_location(entity, newOrdinal); ++m_size; - // Unfortunately, we had to copy/paste modify_connectivity to allow dynamic->fixed moves. The - // modify_connectivity framework couldn't elegantly handle this case. - switch(m_node_kind) { - case FIXED_CONNECTIVITY: - if (old_bucket->m_node_kind == FIXED_CONNECTIVITY) { - old_bucket->m_fixed_node_connectivity.copy_entity(old_ordinal, m_fixed_node_connectivity); - } - else { - STK_ThrowAssert(old_bucket->m_node_kind != INVALID_CONNECTIVITY_TYPE); - old_bucket->m_dynamic_node_connectivity.copy_to_fixed(old_ordinal, m_fixed_node_connectivity); - } - break; - case DYNAMIC_CONNECTIVITY: old_bucket->m_dynamic_node_connectivity.copy_entity(old_ordinal, m_dynamic_node_connectivity); break; - default: break; - } - - switch(m_edge_kind) { - case FIXED_CONNECTIVITY: - if (old_bucket->m_edge_kind == FIXED_CONNECTIVITY) { - old_bucket->m_fixed_edge_connectivity.copy_entity(old_ordinal, m_fixed_edge_connectivity); - } - else { - STK_ThrowAssert(old_bucket->m_edge_kind != INVALID_CONNECTIVITY_TYPE); - old_bucket->m_dynamic_edge_connectivity.copy_to_fixed(old_ordinal, m_fixed_edge_connectivity); - } - break; - case DYNAMIC_CONNECTIVITY: old_bucket->m_dynamic_edge_connectivity.copy_entity(old_ordinal, m_dynamic_edge_connectivity); break; - default: break; - } + AddEntityFunctor functor; + process_all_connectivity(functor); - switch(m_face_kind) { - case FIXED_CONNECTIVITY: - if (old_bucket->m_face_kind == FIXED_CONNECTIVITY) { - old_bucket->m_fixed_face_connectivity.copy_entity(old_ordinal, m_fixed_face_connectivity); - } - else { - STK_ThrowAssert(old_bucket->m_face_kind != INVALID_CONNECTIVITY_TYPE); - old_bucket->m_dynamic_face_connectivity.copy_to_fixed(old_ordinal, m_fixed_face_connectivity); - } - break; - case DYNAMIC_CONNECTIVITY: old_bucket->m_dynamic_face_connectivity.copy_entity(old_ordinal, m_dynamic_face_connectivity); break; - default: break; - } + EntityRank endRank = static_cast(mesh().mesh_meta_data().entity_rank_count()); - switch(m_element_kind) { - case FIXED_CONNECTIVITY: - if (old_bucket->m_element_kind == FIXED_CONNECTIVITY) { - old_bucket->m_fixed_element_connectivity.copy_entity(old_ordinal, m_fixed_element_connectivity); - } - else { - STK_ThrowAssert(old_bucket->m_element_kind != INVALID_CONNECTIVITY_TYPE); - old_bucket->m_dynamic_element_connectivity.copy_to_fixed(old_ordinal, m_fixed_element_connectivity); + for(EntityRank rank = stk::topology::NODE_RANK; ranknum_connectivity(old_ordinal, rank); + if (numConn > 0) { + const Entity* conn = old_bucket->begin(old_ordinal, rank); + const ConnectivityOrdinal* ordinals = old_bucket->begin_ordinals(old_ordinal, rank); + const Permutation* perms = old_bucket->begin_permutations(old_ordinal, rank); + replace_relations(newOrdinal, rank, numConn, conn, ordinals, perms); } - break; - case DYNAMIC_CONNECTIVITY: old_bucket->m_dynamic_element_connectivity.copy_entity(old_ordinal, m_dynamic_element_connectivity); break; - default: break; } - - old_bucket->m_dynamic_other_connectivity.copy_entity(old_ordinal, m_dynamic_other_connectivity); } void Bucket::overwrite_entity(unsigned to_ordinal, Entity entity, const FieldVector* fields) @@ -837,10 +835,31 @@ void Bucket::overwrite_entity(unsigned to_ordinal, Entity entity, const FieldVec STK_ThrowAssert(mesh().entity_rank(entity) == m_entity_rank); const MeshIndex from_index = m_mesh.mesh_index(entity); + const Bucket* old_bucket = from_index.bucket; + const unsigned old_ordinal = from_index.bucket_ordinal; reset_entity_location(entity, to_ordinal, fields); - impl::OverwriteEntityFunctor functor(from_index.bucket_ordinal, to_ordinal); - process_all_connectivity(functor, from_index.bucket); + if (bucket_id() == old_bucket->bucket_id()) { + ReplaceEntityFunctor functor(old_ordinal, to_ordinal); + process_all_connectivity(functor); + } + else { + ClearEntityFunctor functor(to_ordinal); + EntityRank endRank = static_cast(mesh().mesh_meta_data().entity_rank_count()); + + for(EntityRank rank = stk::topology::NODE_RANK; ranknum_connectivity(old_ordinal, rank); + if (numConn > 0) { + const Entity* conn = old_bucket->begin(old_ordinal, rank); + const ConnectivityOrdinal* ordinals = old_bucket->begin_ordinals(old_ordinal, rank); + const Permutation* perms = old_bucket->begin_permutations(old_ordinal, rank); + replace_relations(to_ordinal, rank, numConn, conn, ordinals, perms); + } + else { + modify_connectivity(functor, rank); + } + } + } } @@ -863,25 +882,10 @@ void Bucket::parent_topology( EntityRank parent_rank, std::vector void Bucket::check_size_invariant() const { -#ifndef NDEBUG -// for (size_t i = 0; i < m_entities.size(); ++i) { -// if (i < m_size) { -// STK_ThrowAssert(mesh().is_valid(m_entities[i])); -// } -// else { -// STK_ThrowAssert(!mesh().is_valid(m_entities[i])); -// } -// } - - CheckSizeFunctor functor; - const_cast(this)->process_all_connectivity(functor); -#endif } void Bucket::debug_dump(std::ostream& out, unsigned ordinal) const { - DebugPrintFunctor functor(out, ordinal); - const_cast(this)->process_all_connectivity(functor); } void Bucket::debug_check_for_invalid_connectivity_request(ConnectivityType const* type) const @@ -911,46 +915,5 @@ void Bucket::debug_check_for_invalid_connectivity_request(ConnectivityType const #endif } -namespace impl { - -template <> -impl::BucketConnectivity& OverwriteEntityFunctor::get_other_connectivity(Bucket* other_bucket) -{ return other_bucket->m_fixed_node_connectivity; } - -template <> -impl::BucketConnectivity& OverwriteEntityFunctor::get_other_connectivity(Bucket* other_bucket) -{ return other_bucket->m_fixed_edge_connectivity; } - -template <> -impl::BucketConnectivity& OverwriteEntityFunctor::get_other_connectivity(Bucket* other_bucket) -{ return other_bucket->m_fixed_face_connectivity; } - -template <> -impl::BucketConnectivity& OverwriteEntityFunctor::get_other_connectivity(Bucket* other_bucket) -{ return other_bucket->m_fixed_element_connectivity; } - - -template <> -impl::BucketConnectivity& OverwriteEntityFunctor::get_other_connectivity(Bucket* other_bucket) -{ return other_bucket->m_dynamic_node_connectivity; } - -template <> -impl::BucketConnectivity& OverwriteEntityFunctor::get_other_connectivity(Bucket* other_bucket) -{ return other_bucket->m_dynamic_edge_connectivity; } - -template <> -impl::BucketConnectivity& OverwriteEntityFunctor::get_other_connectivity(Bucket* other_bucket) -{ return other_bucket->m_dynamic_face_connectivity; } - -template <> -impl::BucketConnectivity& OverwriteEntityFunctor::get_other_connectivity(Bucket* other_bucket) -{ return other_bucket->m_dynamic_element_connectivity; } - -template <> -impl::BucketConnectivity& OverwriteEntityFunctor::get_other_connectivity(Bucket* other_bucket) -{ return other_bucket->m_dynamic_other_connectivity; } - -} - } // namespace mesh } // namespace stk diff --git a/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp b/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp index 7cb82f226ef4..9d9af289ca09 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp @@ -39,11 +39,12 @@ #include // for lower_bound #include // for ostream #include -#include // for Entity +#include #include // for BucketConnectivity +#include // for Entity #include // for contains_ordinal, Part #include // for topology, etc -#include // for ThrowAssert, etc +#include // for STK_ThrowAssert, etc #include // for string #include // for pair #include // for vector, etc @@ -330,6 +331,7 @@ class Bucket ConnectedEntities get_connected_entities(unsigned offsetIntoBucket, stk::mesh::EntityRank connectedRank) const { + STK_ThrowAssertMsg(offsetIntoBucket < size(),"Bucket::get_connected_entities offsetIntoBucket="< m_fixed_face_connectivity; impl::BucketConnectivity m_fixed_element_connectivity; - impl::BucketConnectivity m_dynamic_node_connectivity; - impl::BucketConnectivity m_dynamic_edge_connectivity; - impl::BucketConnectivity m_dynamic_face_connectivity; - impl::BucketConnectivity m_dynamic_element_connectivity; - - impl::BucketConnectivity m_dynamic_other_connectivity; + impl::BucketConnDynamic m_dynamic_node_connectivity; + impl::BucketConnDynamic m_dynamic_edge_connectivity; + impl::BucketConnDynamic m_dynamic_face_connectivity; + impl::BucketConnDynamic m_dynamic_element_connectivity; + impl::BucketConnDynamic m_dynamic_other_connectivity; bool m_owned; bool m_shared; @@ -732,7 +740,6 @@ void Bucket::modify_connectivity(T& callable, EntityRank rank) { switch(rank) { case stk::topology::NODE_RANK: - STK_ThrowAssert(m_node_kind != INVALID_CONNECTIVITY_TYPE); mark_for_modification(); switch(m_node_kind) { @@ -742,7 +749,6 @@ void Bucket::modify_connectivity(T& callable, EntityRank rank) } break; case stk::topology::EDGE_RANK: - STK_ThrowAssert(m_edge_kind != INVALID_CONNECTIVITY_TYPE); switch(m_edge_kind) { case FIXED_CONNECTIVITY: callable(*this, m_fixed_edge_connectivity); break; case DYNAMIC_CONNECTIVITY: callable(*this, m_dynamic_edge_connectivity); break; @@ -750,7 +756,6 @@ void Bucket::modify_connectivity(T& callable, EntityRank rank) } break; case stk::topology::FACE_RANK: - STK_ThrowAssert(m_face_kind != INVALID_CONNECTIVITY_TYPE); switch(m_face_kind) { case FIXED_CONNECTIVITY: callable(*this, m_fixed_face_connectivity); break; case DYNAMIC_CONNECTIVITY: callable(*this, m_dynamic_face_connectivity); break; @@ -758,7 +763,6 @@ void Bucket::modify_connectivity(T& callable, EntityRank rank) } break; case stk::topology::ELEMENT_RANK: - STK_ThrowAssert(m_element_kind != INVALID_CONNECTIVITY_TYPE); switch(m_element_kind) { case FIXED_CONNECTIVITY: callable(*this, m_fixed_element_connectivity); break; case DYNAMIC_CONNECTIVITY: callable(*this, m_dynamic_element_connectivity); break; diff --git a/packages/stk/stk_mesh/stk_mesh/base/BucketConnectivity.hpp b/packages/stk/stk_mesh/stk_mesh/base/BucketConnectivity.hpp index 0ada0e6b78ae..f4447b968d9d 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/BucketConnectivity.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/BucketConnectivity.hpp @@ -52,46 +52,6 @@ using Permutations = util::StridedArray; namespace impl { -struct LowerConnectivityCompare -{ - bool operator()(Entity first_entity, ConnectivityOrdinal first_ordinal, Entity second_entity, ConnectivityOrdinal second_ordinal) const - { - // only compare ordinals - return first_ordinal < second_ordinal; - } -}; - -template -struct LowerConnectivitityRankSensitiveCompare -{ - LowerConnectivitityRankSensitiveCompare(const BULKDATA &bulk_data) : m_mesh(bulk_data) { } - - const BULKDATA &m_mesh; - - bool operator()(Entity first_entity, ConnectivityOrdinal first_ordinal, - Entity second_entity, ConnectivityOrdinal second_ordinal) const; -}; - -struct HigherConnectivityCompare -{ - bool operator()(Entity first_entity, ConnectivityOrdinal first_ordinal, Entity second_entity, ConnectivityOrdinal second_ordinal) const - { - // Needs to match LessRelation in BulkData.hpp - return std::make_pair(first_ordinal, first_entity.is_local_offset_valid() ? first_entity.local_offset() : Entity::MaxEntity) < - std::make_pair(second_ordinal, second_entity.is_local_offset_valid() ? second_entity.local_offset() : Entity::MaxEntity); - } -}; - -template -struct HigherConnectivityRankSensitiveCompare -{ - HigherConnectivityRankSensitiveCompare(const BULKDATA &bulk_data) : m_mesh(bulk_data) { } - - const BULKDATA &m_mesh; - - bool operator()(Entity first_entity, ConnectivityOrdinal first_ordinal, Entity second_entity, - ConnectivityOrdinal second_ordinal) const; -}; template inline void check_bucket_ordinal(unsigned bucket_ordinal, Connectivity const* connectivity) @@ -114,7 +74,6 @@ class BucketConnectivity { public: typedef BucketConnectivity SelfType; - typedef BucketConnectivity OtherType; static const EntityRank target_rank = TargetRank; static const ConnectivityType connectivity_type = FIXED_CONNECTIVITY; @@ -283,6 +242,26 @@ class BucketConnectivity return true; } + bool replace_connectivity(unsigned bucket_ordinal, unsigned numConnectivity, + const Entity* connectivity, + const ConnectivityOrdinal* ordinals, + const Permutation* perms) + { + if (bucket_ordinal == size()) { + add_entity(); + } + + impl::check_bucket_ordinal(bucket_ordinal, this); + const unsigned index = m_num_connectivity*bucket_ordinal; + for(unsigned i=0; i copy_connectivity(from_ordinal, to, to_ordinal); } - void copy_to_fixed(unsigned from_ordinal, SelfType& to) - { STK_ThrowAssert(false); } - - void copy_to_fixed(unsigned from_ordinal, OtherType& to) - { STK_ThrowAssert(false); } - bool has_permutation() const { const static bool rv = TargetRank != stk::topology::NODE_RANK; @@ -391,716 +364,6 @@ class BucketConnectivity EntityVector m_targets; ConnectivityOrdinalVector m_ordinals; // shared for all entities PermutationVector m_permutations; - - // friend OtherType; // 1337! Will have to wait for c++11 - friend class BucketConnectivity; -}; - -// Want a way for all dynamic connectivity instantiations to share the same id space -struct Counter -{ - static int counter; -}; - -// Profiling data for an individual dynamic connectivity object -struct DynConnData -{ - // from-rank for the associated dynamic connectivity - EntityRank m_from_rank; - - // to-rank for the associated dynamic connectivity - EntityRank m_to_rank; - - // the maximum capacity ever achieved by connectivity vectors - size_t m_max_capacity; - - // at the point at which maximum capacity (member above) was achieved, how much memory - // was lost due to "abandoned space". - // "abandoned space" - When an entity overflows its current chunks, it gets additional - // chunks but must be copied to the end. The space left behind is abandoned and will - // not be reused until the next compress (resize_and_order_by_index). - size_t m_abandoned_space; - - // at the point at which maximum capacity (member above) was achieved, how much memory - // was lost due to unused chunk capacity. If chunk size is > 1, it's possible that an - // entity is not using all the space available in it's chunk. For example, if chunk size - // is 8 and an entity has 5 connectivities, then unused chunk capacity is 3 for that - // entity. This member stores the sum over all entities. - size_t m_unused_chunk_capacity; - - // The number of times this dynamic connectivity had to be grown - size_t m_num_growths; - - // The number of times any entity overflowed it's chunk allocation and had to be - // copied to the end - size_t m_num_entity_relocations; - - // at the point at which maximum capacity (member above) was achieved, what is the - // total amount of wasted memory - size_t m_total_unused_memory; - - // at the point at which maximum capacity (member above) was achieved, what is the - // amount of memory that is wasted due to vector capacity growth over-provisioning. - size_t m_unused_capacity; - - // at the point at which maximum capacity (member above) was achieved, what is the - // number of connectivity being stored. - size_t m_total_num_conn; - - DynConnData(EntityRank from_rank, EntityRank to_rank) : - m_from_rank(from_rank), - m_to_rank(to_rank), - m_max_capacity(0), - m_abandoned_space(0), - m_unused_chunk_capacity(0), - m_num_growths(0), - m_num_entity_relocations(0), - m_total_unused_memory(0), - m_unused_capacity(0), - m_total_num_conn(0) - {} -}; - -template -class BucketConnectivity -{ - enum connectivity_direction { Lower=0,Higher=1,Adjacent=2 }; - -public: - typedef BucketConnectivity SelfType; - typedef BucketConnectivity OtherType; - - static const EntityRank target_rank = TargetRank; - static const ConnectivityType connectivity_type = DYNAMIC_CONNECTIVITY; - - typedef std::vector EntityVector; - typedef std::vector ConnectivityOrdinalVector; - typedef std::vector PermutationVector; - typedef std::vector UInt32Vector; - typedef std::vector UInt16Vector; - - static const unsigned chunk_size = 1u; - - BucketConnectivity(EntityRank from_rank, BulkData *bulk_data) - : m_from_rank(from_rank) - , m_direction( (m_from_rank < TargetRank) ? Higher : ((m_from_rank == TargetRank) ? Adjacent : Lower)) - , m_active(false) - , m_needs_shrink_to_fit(false) - , m_num_inactive(0) - , m_indices() - , m_num_connectivities() - , m_total_connectivities(0) - , m_targets() - , m_ordinals() - , m_permutations() - , m_bulk_data(bulk_data) - , m_id(Counter::counter++) - , m_rank_sensitive_higher_connectivity_cmp(*m_bulk_data) - , m_rank_sensitive_lower_connectivity_cmp(*m_bulk_data) - , m_last_capacity(0) - { - } - - const ConnectedEntities get_connected_entities(unsigned bucket_ordinal) const - { - return ConnectedEntities(&m_targets[m_active ? m_indices[bucket_ordinal] : 0], - m_active ? m_num_connectivities[bucket_ordinal] : 0); - } - ConnectedEntities get_connected_entities(unsigned bucket_ordinal) - { - return ConnectedEntities(&m_targets[m_active ? m_indices[bucket_ordinal] : 0], - m_active ? m_num_connectivities[bucket_ordinal] : 0); - } - - Entity const* begin(unsigned bucket_ordinal) const - { impl::check_bucket_ordinal(bucket_ordinal, this); - return &m_targets[m_active ? m_indices[bucket_ordinal] : 0]; } - - Entity * begin(unsigned bucket_ordinal) - { impl::check_bucket_ordinal(bucket_ordinal, this); - return &m_targets[m_active ? m_indices[bucket_ordinal] : 0]; } - - Entity const* end(unsigned bucket_ordinal) const - { impl::check_bucket_ordinal(bucket_ordinal, this); - return begin(bucket_ordinal) + num_connectivity(bucket_ordinal); } - - Entity * end(unsigned bucket_ordinal) - { impl::check_bucket_ordinal(bucket_ordinal, this); - return begin(bucket_ordinal) + num_connectivity(bucket_ordinal); } - - // Ordinal iterator - - ConnectivityOrdinal const* begin_ordinals(unsigned bucket_ordinal) const - { impl::check_bucket_ordinal(bucket_ordinal, this); - return &m_ordinals[m_active ? m_indices[bucket_ordinal] : 0]; } - - ConnectivityOrdinal * begin_ordinals(unsigned bucket_ordinal) - { impl::check_bucket_ordinal(bucket_ordinal, this); - return &m_ordinals[m_active ? m_indices[bucket_ordinal] : 0]; } - - ConnectivityOrdinal const* end_ordinals(unsigned bucket_ordinal) const - { impl::check_bucket_ordinal(bucket_ordinal, this); - return begin_ordinals(bucket_ordinal) + num_connectivity(bucket_ordinal); } - - ConnectivityOrdinal * end_ordinals(unsigned bucket_ordinal) - { impl::check_bucket_ordinal(bucket_ordinal, this); - return begin_ordinals(bucket_ordinal) + num_connectivity(bucket_ordinal); } - - // Permutation iterator - - Permutation const* begin_permutations(unsigned bucket_ordinal) const - { - impl::check_bucket_ordinal(bucket_ordinal, this); - if (!has_permutation()) return NULL; - return &m_permutations[m_active ? m_indices[bucket_ordinal] : 0]; - } - - Permutation * begin_permutations(unsigned bucket_ordinal) - { - impl::check_bucket_ordinal(bucket_ordinal, this); - if (!has_permutation()) return NULL; - return &m_permutations[m_active ? m_indices[bucket_ordinal] : 0]; - } - - Permutation const* end_permutations(unsigned bucket_ordinal) const - { - impl::check_bucket_ordinal(bucket_ordinal, this); - if (!has_permutation()) return NULL; - return begin_permutations(bucket_ordinal) + num_connectivity(bucket_ordinal); - } - - Permutation * end_permutations(unsigned bucket_ordinal) - { - impl::check_bucket_ordinal(bucket_ordinal, this); - if (!has_permutation()) return NULL; - return begin_permutations(bucket_ordinal) + num_connectivity(bucket_ordinal); - } - - // Queries - - unsigned num_connectivity(unsigned bucket_ordinal) const - { impl::check_bucket_ordinal(bucket_ordinal, this); - return m_active ? m_num_connectivities[bucket_ordinal] : 0; } - - // return number of entities - unsigned size() const - { return m_active ? m_indices.size() : m_num_inactive; } - - // Modification API - - bool add_connectivity(unsigned bucket_ordinal, Entity to, ConnectivityOrdinal ordinal, Permutation permutation = INVALID_PERMUTATION) - { - impl::check_bucket_ordinal(bucket_ordinal, this); - - m_needs_shrink_to_fit = true; - - if (!m_active) { - activate(); - } - - if (target_rank <= stk::topology::ELEMENT_RANK) { - switch(m_direction) - { - case Lower: return add_helper(bucket_ordinal, to, ordinal, permutation, LowerConnectivityCompare()); - case Higher: return add_helper(bucket_ordinal, to, ordinal, permutation, HigherConnectivityCompare()); - case Adjacent: return add_helper(bucket_ordinal, to, ordinal, permutation, LowerConnectivityCompare()); // same comparing as lower - default: - STK_ThrowAssertMsg(false, "What type of connectivity are you trying to add? " << m_direction); - return false; - } - } - else { - switch(m_direction) - { - case Lower: return add_helper(bucket_ordinal, to, ordinal, permutation, m_rank_sensitive_lower_connectivity_cmp); - case Higher: return add_helper(bucket_ordinal, to, ordinal, permutation, m_rank_sensitive_higher_connectivity_cmp); - case Adjacent: return add_helper(bucket_ordinal, to, ordinal, permutation, m_rank_sensitive_lower_connectivity_cmp); - default: - STK_ThrowAssertMsg(false, "What type of connectivity are you trying to add? " << m_direction); - return false; - } - } - } - - bool remove_connectivity(unsigned bucket_ordinal, Entity to, ConnectivityOrdinal ordinal) - { - impl::check_bucket_ordinal(bucket_ordinal, this); - - if (!m_active) return false; - - uint32_t found_idx = ~0u; - const uint32_t end_i = m_indices[bucket_ordinal]+m_num_connectivities[bucket_ordinal]; - for (uint32_t i = m_indices[bucket_ordinal]; i < end_i; ++i) - { - //remove connectivity - if ( m_targets[i] == to && m_ordinals[i] == ordinal ) { - found_idx = i; - --m_num_connectivities[bucket_ordinal]; - --m_total_connectivities; - break; - } - } - - //slide memory down - if (found_idx != ~0u) { - m_needs_shrink_to_fit = true; - for (uint32_t i = found_idx; i < end_i - 1; ++i) { - m_targets[i] = m_targets[i+1]; - m_ordinals[i] = m_ordinals[i+1]; - if (has_permutation()) { - m_permutations[i] = m_permutations[i+1]; - } - } - } - - return found_idx != ~0u; - } - - void begin_modification() - {} - - template - void end_modification(BULKDATA* mesh = NULL); - - void add_entity() - { - if (m_active) { - m_indices.push_back(m_targets.size()); - m_num_connectivities.push_back(0); - m_needs_shrink_to_fit = true; - } - else { - ++m_num_inactive; - } - } - - void remove_entity() - { - STK_ThrowAssertMsg(size() > 0, "Cannot remove, connectivity is already empty"); - - if (m_active) { - m_indices.pop_back(); - m_total_connectivities -= m_num_connectivities.back(); - m_num_connectivities.pop_back(); - m_needs_shrink_to_fit = true; - } - else { - --m_num_inactive; - } - } - - void copy_entity(unsigned from_ordinal, SelfType& to, unsigned to_ordinal=-1u) - { - STK_ThrowAssert(m_from_rank == to.m_from_rank); - impl::check_bucket_ordinal(from_ordinal, this); - - if (to_ordinal == -1u) { - to_ordinal = to.size(); - to.add_entity(); - } - impl::check_bucket_ordinal(to_ordinal, &to); - - // Manage activation state - if (!m_active) { - if (to.m_active) { - to.m_total_connectivities -= to.m_num_connectivities[to_ordinal]; - to.m_num_connectivities[to_ordinal] = 0; - to.m_needs_shrink_to_fit = true; - } - return; - } - if (m_active && !to.m_active) { - to.activate(); - } - - // Copy data - if (&to == this) { - // easy - // note this implements swap semantics instead of copy, but this is necessary - // to avoid aliasing in certain situations when sorting Partitions - std::swap(m_indices[to_ordinal], m_indices[from_ordinal]); - std::swap(m_num_connectivities[to_ordinal], m_num_connectivities[from_ordinal]); - - m_needs_shrink_to_fit = true; - to.m_needs_shrink_to_fit = true; - } - else { - // much harder - const unsigned from_num = m_num_connectivities[from_ordinal]; - const unsigned to_num = to.m_num_connectivities[to_ordinal]; - const int delta_num = from_num - to_num; - if (delta_num > 0) { - // If adding additional connectivity, need to reserve space - to.add_connectivity_helper(to_ordinal, delta_num); - } - else { - to.m_num_connectivities[to_ordinal] = from_num; - to.m_total_connectivities += delta_num; - } - if (delta_num != 0) { - to.m_needs_shrink_to_fit = true; - } - copy_connectivity(from_ordinal, to, to_ordinal); - } - } - - void copy_to_fixed(unsigned from_ordinal, OtherType& to) - { - const unsigned num_conn_to_move = m_active ? m_num_connectivities[from_ordinal] : 0; - - STK_ThrowAssert(OtherType::connectivity_type == FIXED_CONNECTIVITY); - STK_ThrowAssertMsg(size() > 0, "Cannot move, connectivity is empty"); - STK_ThrowAssertMsg(num_conn_to_move <= to.num_connectivity(666 /*any unsigned, doesn't matter*/), "Incompatible"); - - const unsigned to_offset = to.m_targets.size(); - to.add_entity(); // make room for new entity - - const unsigned from_offset = m_active ? m_indices[from_ordinal] : 0; - -#ifndef NDEBUG - // Check the ordinals are compatible with fixed connectivity - ConnectivityOrdinal const* ordinals = m_ordinals.data() + from_offset; - for (unsigned i = 0; i < num_conn_to_move; ++i) { - STK_ThrowAssert(ordinals[i] == i); - } -#endif - - std::copy(m_targets.begin() + from_offset, - m_targets.begin() + from_offset + num_conn_to_move, - to.m_targets.begin() + to_offset); - - if (has_permutation()) { - std::copy(m_permutations.begin() + from_offset, - m_permutations.begin() + from_offset + num_conn_to_move, - to.m_permutations.begin() + to_offset); - } - } - - void copy_to_fixed(unsigned from_ordinal, SelfType& to) - { STK_ThrowAssert(false); } - - bool has_permutation() const - { return does_rank_have_valid_permutations(TargetRank) && does_rank_have_valid_permutations(m_from_rank); } - - size_t heap_memory_in_bytes() const - { - return capacity_in_bytes(m_targets) - + capacity_in_bytes(m_ordinals) - + capacity_in_bytes(m_permutations); - } - - void debug_dump(std::ostream& out) const - { - out << "For dynamic connectivity to rank: " << TargetRank << ", with id: " << m_id << "\n"; - if (m_active) { - out << " size is: " << m_indices.size() << "\n"; - for (int i = 0, ie = m_indices.size(); i < ie; ++i) { - out << " At ordinal " << i << "\n"; - debug_dump(out, i, false); - } - out << std::endl; - } - else { - out << " size is: " << m_num_inactive << ", but inactive" << std::endl; - } - } - - void debug_dump(std::ostream& out, unsigned ordinal, bool add_context=true) const - { - if (m_active) { - int idx = m_indices[ordinal]; - int num = m_num_connectivities[ordinal]; - if (add_context) { - out << "For dynamic connectivity to rank: " << TargetRank << ", with id: " << m_id << "\n"; - } - out << " Index is: " << idx << ", Num is: " << num << "\n"; - for (int j = idx, je = idx + num; j < je; ++j) { - out << " (target:" << m_targets[j].local_offset() << ", ordinal:" << (uint32_t)m_ordinals[j] << ")\n"; - } - } - else { - out << " Index is: 0, Num is: 0\n"; - } - } - -private: - - bool does_rank_have_valid_permutations(stk::mesh::EntityRank rank) const - { - return rank > stk::topology::NODE_RANK && rank < stk::topology::CONSTRAINT_RANK; - } - - void copy_connectivity(unsigned from_ordinal, SelfType& to, unsigned to_ordinal) - { - unsigned num_conn = m_num_connectivities[from_ordinal]; - unsigned to_offset = to.m_indices[to_ordinal]; - unsigned from_offset = m_indices[from_ordinal]; - STK_ThrowAssert(to.m_num_connectivities[to_ordinal] == num_conn); - - std::copy(m_targets.begin() + from_offset, - m_targets.begin() + from_offset + num_conn, - to.m_targets.begin() + to_offset); - - std::copy(m_ordinals.begin() + from_offset, - m_ordinals.begin() + from_offset + num_conn, - to.m_ordinals.begin() + to_offset); - - if (has_permutation()) { - std::copy(m_permutations.begin() + from_offset, - m_permutations.begin() + from_offset + num_conn, - to.m_permutations.begin() + to_offset); - } - } - - static unsigned num_chunks(unsigned num) - { return (num + chunk_size -1)/chunk_size; } - - void activate() - { - STK_ThrowAssert(!m_active); - - m_indices.resize(m_num_inactive, 0); - m_num_connectivities.resize(m_num_inactive, 0); - - m_active = true; - m_num_inactive = 0; - } - - template - void resize_and_order_by_index_helper(Vector & data, unsigned capacity, bool update_index = false) - { - Vector temp; - temp.reserve(capacity); - - uint32_t current_index=0; - for(size_t i=0, e=m_indices.size(); i 0 ? 2 * old_capacity : 8*chunk_size; - while (new_capacity < minimum_size) { - new_capacity *= 2; - } - return new_capacity; - } - - // The old capacity is at or above the threshold for being careful about growing - // the connectivity representation(and memory footprint). Only grow the capacity - // if compressing the representation will not yield sufficient unused capacity. - if (capacity_ratio * m_total_connectivities > old_capacity) - { - return 2 * old_capacity; - } - else - { - return old_capacity; - } - } - - void add_connectivity_helper(unsigned bucket_ordinal, unsigned num_to_add=1) - { - const unsigned chunks_needed_by_entity = num_chunks(m_num_connectivities[bucket_ordinal]+num_to_add); - const unsigned chunks_used_by_entity = num_chunks(m_num_connectivities[bucket_ordinal]); - - if (chunks_needed_by_entity == chunks_used_by_entity) - { - m_total_connectivities += num_to_add; - m_num_connectivities[bucket_ordinal] += num_to_add; - return; - } - - const unsigned chunks_available = num_chunks(m_targets.capacity() - m_targets.size()); - - if (chunks_available < chunks_needed_by_entity) - { - const unsigned new_capacity = compute_new_connectivity_capacity(m_targets.size() + chunks_needed_by_entity * chunk_size); - resize_and_order_by_index(new_capacity); - } - - const bool last_entity_by_index = (chunks_used_by_entity > 0) && - (m_indices[bucket_ordinal] + chunks_used_by_entity*chunk_size == m_targets.size()); - Entity invalid; - - //copy to end - if (!last_entity_by_index) - { - uint32_t new_index = static_cast(m_targets.size()); - - m_targets.insert(m_targets.end(), chunks_needed_by_entity*chunk_size, invalid); - std::copy(begin(bucket_ordinal), end(bucket_ordinal), m_targets.begin() + new_index); - - m_ordinals.insert(m_ordinals.end(), chunks_needed_by_entity*chunk_size, INVALID_CONNECTIVITY_ORDINAL); - std::copy(begin_ordinals(bucket_ordinal), end_ordinals(bucket_ordinal), m_ordinals.begin() + new_index); - - if (has_permutation()) { - m_permutations.insert(m_permutations.end(), chunks_needed_by_entity*chunk_size, INVALID_PERMUTATION); - std::copy(begin_permutations(bucket_ordinal), end_permutations(bucket_ordinal), m_permutations.begin() + new_index); - } - - m_indices[bucket_ordinal] = new_index; - } - //add new chunk to end - else { - const unsigned extra_chunks_needed = chunks_needed_by_entity - chunks_used_by_entity; - m_targets.insert(m_targets.end(), extra_chunks_needed*chunk_size, invalid); - m_ordinals.insert(m_ordinals.end(), extra_chunks_needed*chunk_size, INVALID_CONNECTIVITY_ORDINAL); - if (has_permutation()) { - m_permutations.insert(m_permutations.end(), extra_chunks_needed*chunk_size, INVALID_PERMUTATION); - } - } - - m_total_connectivities += num_to_add; - m_num_connectivities[bucket_ordinal] += num_to_add; - } - - template - bool add_helper(unsigned bucket_ordinal, Entity to, ConnectivityOrdinal ordinal, Permutation permutation, - const ConnectivityComparator &compare) - { -#ifndef NDEBUG - // TODO - If downward conn, check to's rank and topology -#endif - bool rv = true; - - add_connectivity_helper(bucket_ordinal); - - const uint32_t begin_index = m_indices[bucket_ordinal] + m_num_connectivities[bucket_ordinal] - 1; - - if (m_num_connectivities[bucket_ordinal] == 1) { - m_targets[begin_index] = to; - m_ordinals[begin_index] = ordinal; - if (has_permutation()) { - m_permutations[begin_index] = permutation; - } - return true; - } - - for (uint32_t i = begin_index, e = m_indices[bucket_ordinal]; i > e; --i) - { - //slide up - if ( compare(to, ordinal, m_targets[i-1], m_ordinals[i-1u]) ) { - m_targets[i] = m_targets[i-1u]; - m_ordinals[i] = m_ordinals[i-1u]; - if (has_permutation()) { - m_permutations[i] = m_permutations[i-1u]; - } - //insert if on last iteration - if ((i-1)==e) { - m_targets[i-1u] = to; - m_ordinals[i-1u] = ordinal; - if (has_permutation()) { - m_permutations[i-1u] = permutation; - } - } - } - //insert - else if ( compare(m_targets[i-1], m_ordinals[i-1u], to, ordinal) ) { - m_targets[i] = to; - m_ordinals[i] = ordinal; - if (has_permutation()) { - m_permutations[i] = permutation; - } - break; - } - //duplicate -- insert new and remove the original - else - { - m_targets[i] = to; - m_ordinals[i] = ordinal; - if (has_permutation()) { - m_permutations[i] = permutation; - } - remove_connectivity(bucket_ordinal, to, ordinal); - rv = false; - break; - } - } - - return rv; - } - - // Illegal - BucketConnectivity(const SelfType&); - SelfType& operator=(const SelfType&); - - // MEMBERS - - EntityRank m_from_rank; - connectivity_direction m_direction; - - bool m_active; // In many cases, uses will not make use of dynamic connectivity, so don't even waste the memory unless it looks like they want it - bool m_needs_shrink_to_fit; // True if this object potentially has partially full vectors or out-of-order entities - unsigned m_num_inactive; - - // meta data - UInt32Vector m_indices; // Common index into vectors below that stores where connectivity starts for a partition_offset (entity). - UInt32Vector m_num_connectivities; - unsigned m_total_connectivities; - - // connectivity data - EntityVector m_targets; - ConnectivityOrdinalVector m_ordinals; - PermutationVector m_permutations; - - BulkData * m_bulk_data; - int m_id; - - impl::HigherConnectivityRankSensitiveCompare m_rank_sensitive_higher_connectivity_cmp; - impl::LowerConnectivitityRankSensitiveCompare m_rank_sensitive_lower_connectivity_cmp; - - size_t m_last_capacity; - - size_t m_data_idx; }; } @@ -1131,62 +394,6 @@ void impl::BucketConnectivity::end_modification( } -template -template -inline -void impl::BucketConnectivity::end_modification(BULKDATA* mesh) -{ - if (m_active && m_needs_shrink_to_fit) { - resize_and_order_by_index(); - - { - UInt32Vector temp(m_indices.begin(), m_indices.end()); - m_indices.swap(temp); - } - - { - UInt32Vector temp(m_num_connectivities.begin(), m_num_connectivities.end()); - m_num_connectivities.swap(temp); - } - - m_needs_shrink_to_fit = false; - } -} - -template -inline -bool impl::LowerConnectivitityRankSensitiveCompare::operator()(Entity first_entity, ConnectivityOrdinal first_ordinal, - Entity second_entity, ConnectivityOrdinal second_ordinal) const -{ - const EntityRank first_rank = m_mesh.entity_rank(first_entity); - const EntityRank second_rank = m_mesh.entity_rank(second_entity); - - return (first_rank < second_rank) - || ((first_rank == second_rank) && (first_ordinal < second_ordinal)); -} - -template -inline -bool impl::HigherConnectivityRankSensitiveCompare::operator()(Entity first_entity, ConnectivityOrdinal first_ordinal, Entity second_entity, ConnectivityOrdinal second_ordinal) const -{ - const EntityRank first_rank = m_mesh.entity_rank(first_entity); - const EntityRank second_rank = m_mesh.entity_rank(second_entity); - - if (first_rank < second_rank) { - return true; - } - if (first_rank > second_rank) { - return false; - } - // Needs to match LessRelation in BulkData.hpp - return std::make_pair(first_ordinal, first_entity.is_local_offset_valid() ? first_entity.local_offset() : Entity::MaxEntity) < - std::make_pair(second_ordinal, second_entity.is_local_offset_valid() ? second_entity.local_offset() : Entity::MaxEntity); -} - - - - - }} //namespace stk::mesh::impl #endif diff --git a/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp b/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp index 0ae9eda96f75..f03f8948adee 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp @@ -32,11 +32,16 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // +#include #include "stk_mesh/base/Entity.hpp" // for Entity, operator<<, etc #include "stk_mesh/base/EntityCommDatabase.hpp" // for pack_entity_info, etc #include "stk_mesh/base/EntityKey.hpp" // for EntityKey, etc +#include "stk_mesh/base/EntityLess.hpp" #include "stk_mesh/base/FieldBase.hpp" // for FieldBase, FieldMetaData, etc #include "stk_mesh/base/FieldDataManager.hpp" // for FieldDataManager, etc +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after July 31 2024 +#include "stk_mesh/base/FindPermutation.hpp" +#endif #include "stk_mesh/base/Ghosting.hpp" // for Ghosting #include "stk_mesh/base/Part.hpp" // for Part, remove, etc #include "stk_mesh/base/Relation.hpp" // for Relation, etc @@ -47,16 +52,17 @@ #include "stk_mesh/baseImpl/BucketRepository.hpp" // for BucketRepository #include "stk_mesh/baseImpl/Visitors.hpp" #include "stk_mesh/baseImpl/MeshImplUtils.hpp" +#include "stk_mesh/baseImpl/ElemDeathImpl.hpp" #include "stk_mesh/baseImpl/MeshCommImplUtils.hpp" #include "stk_mesh/baseImpl/MeshCommVerify.hpp" #include "stk_mesh/baseImpl/PartVectorUtils.hpp" #include "stk_mesh/baseImpl/MeshModification.hpp" #include "stk_mesh/baseImpl/CommEntityMods.hpp" +#include #include "stk_mesh/baseImpl/ConnectEdgesImpl.hpp" #include "stk_mesh/baseImpl/Partition.hpp" #include "stk_topology/topology.hpp" // for topology, etc #include "stk_util/diag/StringUtil.hpp" -#include "stk_util/environment/RuntimeWarning.hpp" #include "stk_util/parallel/Parallel.hpp" // for ParallelMachine, etc #include "stk_util/util/NamedPair.hpp" #include "stk_util/util/PairIter.hpp" // for PairIter @@ -71,18 +77,13 @@ #include #include // for size_t #include // for Bucket, BucketIdComparator, etc -#include #include -#include // for get_selected_entities #include // for MetaData #include #include #include -#include -#include "stk_mesh/base/GetNgpMesh.hpp" #include #include -#include #include #include #include // for SideConnector @@ -103,10 +104,6 @@ namespace stk { namespace mesh { -namespace impl { -int Counter::counter = 0; -} - // Static constant on BulkData: const uint16_t BulkData::orphaned_node_marking = 25000; @@ -475,18 +472,12 @@ void BulkData::set_automatic_aura_option(AutomaticAuraOption auraOption, bool ap } } -void BulkData::update_deleted_entities_container() -{ - m_meshModification.get_deleted_entity_cache().update_deleted_entities_container(); -} - //---------------------------------------------------------------------- //---------------------------------------------------------------------- void BulkData::require_ok_to_modify() const { - STK_ThrowRequireMsg( !this->in_synchronized_state(), - "NOT in the ok-to-modify state" ); + STK_ThrowRequireMsg( !this->in_synchronized_state(), "NOT in the ok-to-modify state" ); } void BulkData::require_entity_owner( const Entity entity , @@ -507,8 +498,7 @@ void BulkData::require_good_rank_and_id(EntityRank ent_rank, EntityId ent_id) co const bool ok_id = EntityKey::is_valid_id(ent_id); const bool ok_rank = ent_rank < rank_count && !(ent_rank == stk::topology::FACE_RANK && mesh_meta_data().spatial_dimension() == 2); - STK_ThrowRequireMsg( ok_rank, - "Bad key rank: " << ent_rank << " for id " << ent_id ); + STK_ThrowRequireMsg( ok_rank, "Bad key rank: " << ent_rank << " for id " << ent_id ); STK_ThrowRequireMsg( ok_id, "Bad id : " << ent_id); } @@ -624,7 +614,7 @@ Entity BulkData::generate_new_entity(unsigned preferred_offset) m_local_ids.push_back(stk::mesh::GetInvalidLocalId()); #ifdef SIERRA_MIGRATION - if (m_add_fmwk_data) { + if (add_fmwk_data()) { m_fmwk_aux_relations.push_back(nullptr); m_fmwk_global_ids.push_back(0); } @@ -643,7 +633,7 @@ Entity BulkData::generate_new_entity(unsigned preferred_offset) m_local_ids[new_local_offset] = stk::mesh::GetInvalidLocalId(); #ifdef SIERRA_MIGRATION - if (m_add_fmwk_data) { + if (add_fmwk_data()) { //bulk-data allocated aux-relation vector, so delete it here. delete m_fmwk_aux_relations[new_local_offset]; m_fmwk_aux_relations[new_local_offset] = nullptr; @@ -673,7 +663,7 @@ void BulkData::initialize_arrays() m_local_ids.push_back(stk::mesh::GetInvalidLocalId()); #ifdef SIERRA_MIGRATION - if (m_add_fmwk_data) { + if (add_fmwk_data()) { m_fmwk_aux_relations.push_back(nullptr); m_fmwk_global_ids.push_back(0); } @@ -1001,13 +991,6 @@ void BulkData::internal_verify_and_change_entity_parts( Entity entity, { require_ok_to_modify(); -#ifdef SIERRA_MIGRATION - if(!m_add_fmwk_data) - { - require_entity_owner(entity, parallel_rank()); - } -#endif //SIERRA_MIGRATION - OrdinalVector addPartsAndSupersets; impl::fill_add_parts_and_supersets(add_parts, addPartsAndSupersets); @@ -1040,13 +1023,6 @@ void BulkData::internal_verify_and_change_entity_parts( const EntityVector& enti OrdinalVector scratchOrdinalVec, scratchSpace; for(Entity entity : entities) { -#ifdef SIERRA_MIGRATION - if(!m_add_fmwk_data) - { - require_entity_owner(entity, parallel_rank()); - } -#endif //SIERRA_MIGRATION - addPartsAndSupersets.clear(); impl::fill_add_parts_and_supersets(add_parts, addPartsAndSupersets); @@ -1153,8 +1129,8 @@ void require_fmwk_or_entity_purely_local(const BulkData& mesh, Entity entity, co void BulkData::change_entity_id( EntityId id, Entity entity) { -// THIS ThrowAssertMsg IS ONLY MACRO CONTROLLED TO ALLOW EXPERIMENTATION WITH -// Fmwk USING stk_parallel. WHEN stk parallel IS USED WITHN Fmwk, THIS ASSERTION +// THIS STK_ThrowAssertMsg IS ONLY MACRO CONTROLLED TO ALLOW EXPERIMENTATION WITH +// Fmwk USING stk_parallel. WHEN stk parallel IS USED WITHIN Fmwk, THIS ASSERTION // IS VIOLATED. #ifndef SIERRA_MIGRATION STK_ThrowAssertMsg(parallel_size() == 1, @@ -1319,17 +1295,78 @@ std::vector BulkData::internal_get_ids_in_use(stk::topology::rank_t ra } uint64_t BulkData::get_max_allowed_id() const { + if(add_fmwk_data()) { #ifdef SIERRA_MIGRATION - if(m_add_fmwk_data) { return std::numeric_limits::max(); - } else { +#else return stk::mesh::EntityKey::MAX_ID; +#endif } -#else return stk::mesh::EntityKey::MAX_ID; -#endif } +#ifdef SIERRA_MIGRATION + +const RelationVector& +BulkData::aux_relations(Entity entity) const +{ + STK_ThrowAssert(add_fmwk_data()); + STK_ThrowAssert(entity.local_offset() > 0); + + if (m_fmwk_aux_relations[entity.local_offset()] == NULL) { + m_fmwk_aux_relations[entity.local_offset()] = new RelationVector(); + } + return *m_fmwk_aux_relations[entity.local_offset()]; +} + +RelationVector& +BulkData::aux_relations(Entity entity) +{ + STK_ThrowAssert(add_fmwk_data()); + STK_ThrowAssert(entity.local_offset() > 0); + + if (m_fmwk_aux_relations[entity.local_offset()] == NULL) { + m_fmwk_aux_relations[entity.local_offset()] = new RelationVector(); + } + return *m_fmwk_aux_relations[entity.local_offset()]; +} + +RelationIterator +BulkData::internal_begin_relation(Entity entity, const RelationType relation_type) const +{ + STK_ThrowAssert(add_fmwk_data()); + if (impl::internal_is_handled_generically(relation_type)) { + STK_ThrowErrorMsg("stk::Mesh::BulkData::internal_begin_relation(..) requests native stk::mesh relation type"); + return RelationIterator(); + } + else { + return aux_relations(entity).begin(); + } +} + +RelationIterator +BulkData::internal_end_relation(Entity entity, const RelationType relation_type) const +{ + STK_ThrowAssert(add_fmwk_data()); + if (impl::internal_is_handled_generically(relation_type)) { + STK_ThrowErrorMsg("stk::Mesh::BulkData::internal_begin_relation(..) requests native stk::mesh relation type"); + return RelationIterator(); + } + else { + return aux_relations(entity).end(); + } +} + +void +BulkData::compress_relation_capacity(Entity entity) +{ + RelationVector &rels = aux_relations(entity); + RelationVector tmp(rels); + tmp.swap(rels); +} + +#endif + void BulkData::generate_new_ids_given_reserved_ids(stk::topology::rank_t rank, size_t numIdsNeeded, const std::vector& reserved_ids, std::vector& requestedIds) const { size_t maxNumNeeded = get_max_num_ids_needed_across_all_procs(*this, numIdsNeeded); @@ -2069,6 +2106,8 @@ void BulkData::reorder_buckets_callback(EntityRank rank, const std::vectorreorder_bucket_field_data(rank, fields, reorderedBucketIds); } +#ifdef SIERRA_MIGRATION + void BulkData::reserve_relation(Entity entity, const unsigned num) { if (num == 0 && aux_relations(entity).empty()) { @@ -2092,6 +2131,8 @@ void BulkData::erase_and_clear_if_empty(Entity entity, RelationIterator rel_itr) } } +#endif + BucketVector const& BulkData::get_buckets(EntityRank rank, Selector const& selector) const { if (rank == stk::topology::INVALID_RANK) { @@ -2169,61 +2210,21 @@ bool BulkData::internal_declare_relation(Entity e_from, Entity e_to, return modified; } -bool BulkData::check_permutation(Entity entity, Entity rel_entity, unsigned rel_ordinal, Permutation expected) const +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after July 31 2024 +STK_DEPRECATED bool BulkData::check_permutation(Entity entity, Entity rel_entity, unsigned rel_ordinal, Permutation expected) const { - const stk::topology &entity_topo = mesh_index(entity).bucket->topology(); - const stk::topology &rel_topo = mesh_index(rel_entity).bucket->topology(); - Entity const *entity_nodes = begin_nodes(entity); - Entity const *rel_entity_nodes = begin_nodes(rel_entity); - - Permutation computed_permutation = find_permutation(entity_topo, entity_nodes, - rel_topo, rel_entity_nodes, rel_ordinal); - - return computed_permutation == expected; + return stk::mesh::check_permutation(*this, entity, rel_entity, rel_ordinal, expected); } -Permutation BulkData::find_permutation( const stk::topology &hr_entity_topo, +STK_DEPRECATED Permutation BulkData::find_permutation( const stk::topology &hr_entity_topo, Entity const *hr_entity_nodes, const stk::topology &side_topo, Entity const *side_nodes, unsigned side_ordinal) const { - Entity expected_nodes[100]; - switch (side_topo.rank()) - { - case stk::topology::EDGE_RANK: - hr_entity_topo.edge_nodes(hr_entity_nodes, side_ordinal, expected_nodes); - break; - case stk::topology::FACE_RANK: - hr_entity_topo.face_nodes(hr_entity_nodes, side_ordinal, expected_nodes); - break; - default: - return INVALID_PERMUTATION; - } - - Permutation retval = INVALID_PERMUTATION; - - int permuted[100]; - const int nv = side_topo.num_nodes(); - const int np = side_topo.num_permutations() ; - int p = 0 ; - for ( ; p < np ; ++p ) { - side_topo.permutation_node_ordinals(p, permuted); - - // ALAN: can we replace this with equivalent? method on topology - int j = 0 ; - for ( ; j < nv && side_nodes[j] == expected_nodes[permuted[j]] ; ++j ); - - if ( nv == j ) - { - retval = static_cast(p); - break; - } - } - - return retval; + return stk::mesh::find_permutation(*this, hr_entity_topo, hr_entity_nodes, side_topo, side_nodes, side_ordinal); } - +#endif void BulkData::declare_relation( Entity e_from , Entity e_to , @@ -2648,280 +2649,6 @@ void BulkData::update_sharing_after_change_entity_owner() resolve_entity_ownership_and_part_membership_and_comm_list(modifiedEntities); } -void BulkData::internal_change_entity_owner( const std::vector & arg_change, - ModEndOptimizationFlag mod_optimization ) -{ - require_ok_to_modify(); - m_modSummary.track_change_entity_owner(arg_change); - - const MetaData & meta = mesh_meta_data() ; - const int p_rank = parallel_rank() ; - const int p_size = parallel_size() ; - ParallelMachine p_comm = parallel() ; - - //------------------------------ - // Verify the input changes, generate a clean local change list, and - // generate the remote change list so that all processes know about - // pending changes. - - std::vector local_change( arg_change ); - - // Parallel synchronous clean up and verify the requested changes: - impl::internal_clean_and_verify_parallel_change( *this , local_change ); - - //---------------------------------------- - // Parallel synchronous determination of changing shared and ghosted. - - // The two vectors below will contain changes to ghosted and shared - // entities on this process coming from change-entity-owner requests - // on other processes. - std::vector ghosted_change ; - std::vector shared_change ; - - impl::internal_generate_parallel_change_lists( *this , local_change , - shared_change , ghosted_change ); - - //------------------------------ - // Have enough information to delete all effected ghosts. - // If the closure of a ghost contains a changing entity - // then that ghost must be deleted. - // Request that all ghost entities in the closure of the ghost be deleted. - - std::set send_closure(*this); - impl::StoreInEntityProcSet store_entity_proc_in_set(*this, send_closure); - - // Compute the closure of all the locally changing entities - for (const EntityProc& entityProc : local_change) { - store_entity_proc_in_set.proc = entityProc.second; - impl::VisitClosureGeneral(*this,entityProc.first,entity_rank(entityProc.first),store_entity_proc_in_set,store_entity_proc_in_set); - } - - // Calculate all the ghosts that are impacted by the set of ownership - // changes. We look at ghosted, shared, and local changes looking for ghosts - // that are either in the closure of the changing entity, or have the - // changing entity in their closure. All modified ghosts will be removed. - { - impl::OnlyVisitGhostsOnce only_visit_ghosts_once(*this); - impl::StoreEntity store_entity(*this); - - std::vector& allChanges = ghosted_change; - allChanges.reserve(allChanges.size()+shared_change.size()+send_closure.size()); - allChanges.insert(allChanges.end(), shared_change.begin(), shared_change.end()); - allChanges.insert(allChanges.end(), local_change.begin(), local_change.end()); - impl::VisitAuraClosureGeneral(*this,allChanges.begin(),allChanges.end(),store_entity,only_visit_ghosts_once); - - std::vector remove_modified_ghosts; - store_entity.store_visited_entities_in_vec(remove_modified_ghosts); - - std::vector empty_add ; - std::vector removesForThisGhosting; - removesForThisGhosting.reserve(remove_modified_ghosts.size()); - const bool notAddingSendGhosts = true; - - // Skip 'm_ghosting[0]' which is the shared subset. - for (unsigned i=1; i::iterator - i = local_change.begin() ; i != local_change.end() ; ++i ) { - // Giving ownership, change the parts first and then - // the owner rank to pass the ownership test. - Entity entity = i->first; - - internal_verify_and_change_entity_parts( entity , ConstPartVector() , owned, - scratchOrdinalVec, scratchSpace ); - - internal_set_owner(entity, i->second); - } - - for ( std::vector::iterator - i = shared_change.begin() ; i != shared_change.end() ; ++i ) { - Entity entity = i->first; - internal_set_owner(entity, i->second); - if ( p_rank == i->second ) { // I received ownership - internal_verify_and_change_entity_parts( entity , owned , ConstPartVector(), - scratchOrdinalVec, scratchSpace ); - } - } - } - - - //------------------------------ - // Send entities, along with their closure, to the new owner processes - { - std::ostringstream error_msg ; - int error_count = 0 ; - - stk::CommSparse comm( p_comm ); - - EntityVector unique_list_of_send_closure; - unique_list_of_send_closure.reserve(send_closure.size()); - - const bool onlyPackDownwardRelations = true; - for ( std::set::iterator - i = send_closure.begin() ; i != send_closure.end() ; ++i ) { - CommBuffer & buffer = comm.send_buffer( i->second ); - Entity entity = i->first; - pack_entity_info(*this, buffer, entity, onlyPackDownwardRelations); - if (!is_communicated_with_proc(entity, i->second) || - std::binary_search(local_change.begin(), local_change.end(), *i, EntityLess(*this))) { - buffer.pack(1); - pack_field_values(*this, buffer , entity ); - } - else { - buffer.pack(0); - } - pack_sideset_info(*this, buffer , entity ); - - if (unique_list_of_send_closure.empty() || entity_key(unique_list_of_send_closure.back()) != entity_key(entity)) { - unique_list_of_send_closure.push_back(entity); - } - } - - comm.allocate_buffers(); - - for ( std::set::iterator - i = send_closure.begin() ; i != send_closure.end() ; ++i ) { - CommBuffer & buffer = comm.send_buffer( i->second ); - Entity entity = i->first; - pack_entity_info(*this, buffer, entity, onlyPackDownwardRelations); - if (!is_communicated_with_proc(entity, i->second) || - std::binary_search(local_change.begin(), local_change.end(), *i, EntityLess(*this))) { - buffer.pack(1); - pack_field_values(*this, buffer , entity ); - } - else { - buffer.pack(0); - } - pack_sideset_info(*this, buffer , entity ); - } - - const bool deallocateSendBuffers = true; - comm.communicate(deallocateSendBuffers); - - SideSetHelper helper(*this, mesh_meta_data().universal_part()); - for ( std::set::iterator - i = send_closure.begin() ; i != send_closure.end() ; ++i ) { - Entity entity = i->first; - helper.remove_element_entries_from_sidesets(entity); - } - - OrdinalVector partOrdinals; - OrdinalVector scratchOrdinalVec, scratchSpace; - PartVector parts ; - std::vector relations ; - - OrdinalVector removeCustomGhostParts; - const std::vector& ghostingObjs = ghostings(); - const unsigned firstCustomGhosting = 2; - for(unsigned i=firstCustomGhosting; i result = internal_create_entity( key ); - - Entity entity = result.first; - - // The entity was copied and not created. - partOrdinals.clear(); - for(const stk::mesh::Part* part : parts) { - partOrdinals.push_back(part->mesh_meta_data_ordinal()); - } - - internal_change_entity_parts( entity , partOrdinals , removeCustomGhostParts, scratchOrdinalVec, scratchSpace ); - for(unsigned i=firstCustomGhosting; iordinal(), p)); - } - - if (state(entity) == Created) { - set_state(entity, Modified); - } - - internal_set_owner(entity, owner); - - internal_declare_relation( entity , relations, scratchOrdinalVec ); - - int shouldUnpackFieldValues = 0; - buf.unpack(shouldUnpackFieldValues); - if ( shouldUnpackFieldValues==1 ) { - if ( ! unpack_field_values(*this, buf , entity , error_msg ) ) { - ++error_count ; - } - } - - unpack_sideset_info( buf, *this, entity); - } - } - -#ifndef NDEBUG - all_reduce( p_comm , ReduceSum<1>( & error_count ) ); -#endif - STK_ThrowAssertMsg(error_count==0, error_msg.str() ); - - // Any entity that I sent and is not in an owned closure is deleted. - // The owned closure will be effected by received entities, so can - // only clean up after the newly owned entities have been received. - // Destroy backwards so as not to invalidate closures in the process. - - { - for ( EntityVector::reverse_iterator i = unique_list_of_send_closure.rbegin() ; i != unique_list_of_send_closure.rend() ; ++i) { - stk::mesh::Entity entity = *i; - if ( ! this->owned_closure(entity) ) { - for(unsigned ig=firstCustomGhosting; ig().swap(sendGhosts); } - std::ostringstream error_msg ; - int error_count = 0 ; OrdinalVector ordinal_scratch, removeParts, partOrdinals, scratchSpace, scratch3; PartVector parts ; std::vector relations ; @@ -3254,6 +2979,9 @@ void BulkData::ghost_entities_and_fields(Ghosting & ghosting, // ranking entities may be owned by different processes, // as such unpacking must be performed in rank order. + std::ostringstream error_msg ; + int error_count = 0 ; + for ( unsigned rank = 0 ; rank < rank_count ; ++rank ) { for ( int p = 0 ; p < p_size ; ++p ) { CommBuffer & buf = commSparse.recv_buffer(p); @@ -3369,7 +3097,7 @@ void BulkData::ghost_entities_and_fields(Ghosting & ghosting, all_reduce( parallel() , ReduceSum<1>( & error_count ) ); } #endif - STK_ThrowAssertMsg(error_count==0, error_msg.str() ); + STK_ThrowRequireMsg(error_count==0, error_msg.str() ); internal_add_comm_list_entries(newCommListEntries); @@ -4239,7 +3967,7 @@ void BulkData::check_mesh_consistency() if(m_runConsistencyCheck) { STK_ThrowErrorMsgIf(!stk::mesh::impl::check_permutations_on_all(*this), "Permutation checks failed."); std::ostringstream msg ; - bool is_consistent = comm_mesh_verify_parallel_consistency( msg ); + bool is_consistent = impl::comm_mesh_verify_parallel_consistency(*this, internal_comm_db(), internal_comm_list(), [&](Entity entity){return internal_entity_comm_map(entity);}, msg ); std::string error_msg = msg.str(); STK_ThrowErrorMsgIf( !is_consistent, error_msg ); } @@ -4396,7 +4124,7 @@ void BulkData::internal_finish_modification_end(ModEndOptimizationFlag opt) m_meshModification.set_sync_state_synchronized(); m_add_node_sharing_called = false; - update_deleted_entities_container(); + m_meshModification.get_deleted_entity_cache().update_deleted_entities_container(); for (FieldBase * stkField : mesh_meta_data().get_fields()) { if (stkField->has_ngp_field()) { @@ -4903,11 +4631,13 @@ void BulkData::change_entity_parts( Entity entity, const PARTVECTOR & add_parts , const PARTVECTOR & remove_parts) { - bool stkMeshRunningUnderFramework = m_add_fmwk_data; + const bool stkMeshRunningUnderFramework = add_fmwk_data(); if(!stkMeshRunningUnderFramework) { internal_throw_error_if_manipulating_internal_part_memberships(add_parts); internal_throw_error_if_manipulating_internal_part_memberships(remove_parts); + + require_entity_owner(entity, parallel_rank()); } OrdinalVector scratchOrdinalVec, scratchSpace; internal_verify_and_change_entity_parts(entity, add_parts, remove_parts, @@ -4922,11 +4652,13 @@ void BulkData::change_entity_parts( const EntityVector& entities, const PARTVECTOR & add_parts , const PARTVECTOR & remove_parts) { - bool stkMeshRunningUnderFramework = m_add_fmwk_data; - if(!stkMeshRunningUnderFramework) - { - internal_throw_error_if_manipulating_internal_part_memberships(add_parts); - internal_throw_error_if_manipulating_internal_part_memberships(remove_parts); + const bool stkMeshRunningUnderFramework = add_fmwk_data(); + if(!stkMeshRunningUnderFramework) { + internal_throw_error_if_manipulating_internal_part_memberships(add_parts); + internal_throw_error_if_manipulating_internal_part_memberships(remove_parts); + for(Entity entity : entities) { + require_entity_owner(entity, parallel_rank()); + } } internal_verify_and_change_entity_parts(entities, add_parts, remove_parts); } @@ -4939,7 +4671,7 @@ void BulkData::batch_change_entity_parts( const stk::mesh::EntityVector& entitie const std::vector& remove_parts, ModEndOptimizationFlag opt) { - bool stkMeshRunningUnderFramework = m_add_fmwk_data; + const bool stkMeshRunningUnderFramework = add_fmwk_data(); if(!stkMeshRunningUnderFramework) { for(size_t i=0; i & shared_nodes) } } -bool BulkData::verify_parallel_attributes( std::ostream & error_log ) -{ - bool result = true ; - - const EntityRank entityRankEnd = static_cast(mesh_meta_data().entity_rank_count()); - - for ( EntityRank rank = stk::topology::NODE_RANK ; rank < entityRankEnd ; ++rank ) { - const BucketVector & all_buckets = buckets(rank); - - for(const Bucket* bucketptr : all_buckets) - { - result = result && impl::verify_parallel_attributes_for_bucket(*bucketptr, - [&](Entity entity){return internal_entity_comm_map(entity);}, error_log); - } - } - - bool isGloballyConsistentCommList = impl::is_comm_list_globally_consistent(*this, internal_comm_db(), m_entity_comm_list, error_log); - result = result && isGloballyConsistentCommList; - - return result ; -} - -bool BulkData::comm_mesh_verify_parallel_consistency(std::ostream & error_log ) -{ - int verified_ok = 1 ; - - // Verify consistency of parallel attributes - - verified_ok = verify_parallel_attributes( error_log ); - if (parallel_size() > 1) { - all_reduce( parallel() , ReduceMin<1>( & verified_ok ) ); - } - - // Verify entities against owner. - - if ( verified_ok ) { - CommSparse comm( parallel() ); - - impl::pack_owned_verify(*this, internal_comm_db(), internal_comm_list(), comm); - - comm.allocate_buffers(); - - impl::pack_owned_verify(*this, internal_comm_db(), internal_comm_list(), comm); - - comm.communicate(); - - verified_ok = impl::unpack_not_owned_verify(*this, this->internal_comm_list(), - [&](Entity entity){return internal_entity_comm_map(entity);}, - comm , error_log ); - - if (parallel_size() > 1) { - all_reduce( parallel() , ReduceMin<1>( & verified_ok ) ); - } - } - - return verified_ok == 1 ; -} - void BulkData::remove_entities_from_sharing(const EntityProcVec& entitiesToRemoveFromSharing, EntityVector & entitiesNoLongerShared) { entitiesNoLongerShared.clear(); @@ -5620,27 +5299,6 @@ void BulkData::remove_entities_from_sharing(const EntityProcVec& entitiesToRemov stk::util::sort_and_unique(entitiesNoLongerShared); } -namespace -{ -bool is_node_connected_to_active_element_locally(const stk::mesh::BulkData &mesh, stk::mesh::Entity node, const stk::mesh::Part &activePart) -{ - bool activeNode = false; - const int numElements = mesh.num_elements(node); - const stk::mesh::Entity * elements = mesh.begin_elements(node); - for (int elementI=0 ; elementIparallel()); @@ -5908,135 +5566,9 @@ std::ostream &operator<<(std::ostream &out, const stk::mesh::PartVector &partVec return out; } -stk::mesh::EntityVector BulkData::get_nodes_to_deactivate(const stk::mesh::EntityVector & deactivatedElements, const stk::mesh::Part & activePart) const -{ - stk::mesh::EntityVector nodesToDeactivate; - - stk::mesh::EntityVector potentiallyDeactivatedNodes; - for (stk::mesh::Entity element : deactivatedElements) - { - const int numNodes = this->num_nodes(element); - const stk::mesh::Entity * nodes = this->begin_nodes(element); - for (int nodeI=0 ; nodeIbucket(node).owned() || this->bucket(node).shared()) - { - bool activeNode = is_node_connected_to_active_element_locally(*this, node, activePart); - if (!activeNode) - { - if (this->bucket(node).shared()) - { - nodesToCommunicate.push_back(node); - } - else - { - nodesToDeactivate.push_back(node); - } - } - } - } - - std::vector sharedProcs; - stk::CommSparse inquiryComm(this->parallel()); - pack_and_communicate(inquiryComm, - [this,&inquiryComm,&nodesToCommunicate,&sharedProcs]() - { - for (stk::mesh::Entity node : nodesToCommunicate) - { - const stk::mesh::EntityKey nodeKey = this->entity_key(node); - this->comm_shared_procs(nodeKey,sharedProcs); - for (int otherProc : sharedProcs) - { - inquiryComm.send_buffer(otherProc).pack(nodeKey.id()); - } - } - } - ); - stk::mesh::EntityVector incomingNodes; - unpack_communications(inquiryComm, - [this,&inquiryComm,&incomingNodes](int procId) - { - stk::mesh::EntityId nodeId; - inquiryComm.recv_buffer(procId).unpack(nodeId); - stk::mesh::Entity node = this->get_entity(stk::topology::NODE_RANK, nodeId); - STK_ThrowAssertMsg(this->is_valid(node),"Error in communication for de-imprinting the active part on nodes of killed elements in element death!"); - incomingNodes.push_back(node); - } - ); - - std::map nodeToActiveStatusMap; - stk::CommSparse answerComm(this->parallel()); - pack_and_communicate(answerComm, - [this,&answerComm,&incomingNodes,&nodeToActiveStatusMap,&activePart]() - { - for (stk::mesh::Entity incomingNode : incomingNodes) - { - std::vector sharingProcs; - this->comm_shared_procs(this->entity_key(incomingNode),sharingProcs); - bool activeStatus = is_node_connected_to_active_element_locally(*this, incomingNode, activePart); - for (int otherProc : sharingProcs) - { - answerComm.send_buffer(otherProc).pack(this->identifier(incomingNode)); - answerComm.send_buffer(otherProc).pack(activeStatus); - } - auto nodeLocationInMap = nodeToActiveStatusMap.find(incomingNode); - if (nodeLocationInMap == nodeToActiveStatusMap.end()) - { - nodeToActiveStatusMap.emplace(incomingNode, activeStatus); - } - else - { - nodeLocationInMap->second = nodeLocationInMap->second || activeStatus; - } - } - } - ); - - unpack_communications(answerComm, - [this,&answerComm,&nodeToActiveStatusMap](int procId) - { - stk::mesh::EntityId nodeId; - answerComm.recv_buffer(procId).unpack(nodeId); - bool activeStatus = false; - answerComm.recv_buffer(procId).unpack(activeStatus); - stk::mesh::Entity node = this->get_entity(stk::topology::NODE_RANK,nodeId); - STK_ThrowAssertMsg(this->is_valid(node),"Error in communication for de-imprinting the active part on nodes of killed elements in element death!"); - auto nodeLocationInMap = nodeToActiveStatusMap.find(node); - if (nodeLocationInMap == nodeToActiveStatusMap.end()) - { - nodeToActiveStatusMap.emplace(node, activeStatus); - } - else - { - nodeLocationInMap->second = nodeLocationInMap->second || activeStatus; - } - } - ); - - for (auto nodeActiveStatusPair : nodeToActiveStatusMap) - { - stk::mesh::Entity node = nodeActiveStatusPair.first; - bool nodeIsActiveOnAnyOtherProcessors = nodeActiveStatusPair.second; - if (!nodeIsActiveOnAnyOtherProcessors) - { - nodesToDeactivate.push_back(node); - } - } - - return nodesToDeactivate; -} - void BulkData::de_induce_parts_from_nodes(const stk::mesh::EntityVector & deactivatedElements, stk::mesh::Part & activePart) { - stk::mesh::EntityVector nodesToDeactivate = get_nodes_to_deactivate(deactivatedElements, activePart); + stk::mesh::EntityVector nodesToDeactivate = impl::get_nodes_to_deactivate(*this, deactivatedElements, activePart); OrdinalVector scratchOrdinalVec, scratchSpace; for (stk::mesh::Entity nodeToDeactivate : nodesToDeactivate) { @@ -6121,8 +5653,9 @@ void BulkData::destroy_elements_of_topology(stk::topology topologyToDelete) void BulkData::break_boundary_relations_and_delete_buckets(const std::vector & relationsToDestroy, const stk::mesh::BucketVector & bucketsToDelete) { modification_begin(); - for(const impl::RelationEntityToNode & relation : relationsToDestroy) + for(const impl::RelationEntityToNode & relation : relationsToDestroy) { destroy_relation(relation.entity, relation.node, relation.ordinal); + } delete_buckets(bucketsToDelete); modification_end(); } @@ -6158,45 +5691,10 @@ bool BulkData::does_sideset_exist(const stk::mesh::Part &part) const return m_sideSetData.does_sideset_exist(part); } -namespace { -bool part_is_connected_to_shell_block(const BulkData& bulk, const stk::mesh::Part &part) -{ - bool connected = false; - const MetaData& meta = bulk.mesh_meta_data(); - std::vector touchingBlocks = meta.get_blocks_touching_surface(&part); - - for(const stk::mesh::Part* touchingBlock : touchingBlocks) { - connected |= meta.get_topology(*touchingBlock).is_shell(); - } - return connected; -} - -void check_sideset_part_constraints(const BulkData& bulk, const stk::mesh::Part &part) -{ - const MetaData& meta = bulk.mesh_meta_data(); - if(part.primary_entity_rank() != meta.side_rank() && !part_is_connected_to_shell_block(bulk, part)) - stk::RuntimeWarning() << "create_sideset: part " << part.name() - << " has rank " << part.primary_entity_rank(); - if((part.id() == stk::mesh::Part::INVALID_ID) && (part.name() != "universal_sideset") && !part.subsets().empty()) - stk::RuntimeWarning() << "create_sideset: part " << part.name() - << " has invalid id "; - - for(const stk::mesh::Part* subsetPart : part.subsets()) { - if(subsetPart->primary_entity_rank() == meta.side_rank()) { - if(subsetPart->id() != part.id()) - stk::RuntimeWarning() << "create_sideset: part " << part.name() - << " with id " << part.id() - << "; subset sideblock part " << subsetPart->name() - << " has different id " << subsetPart->id(); - } - } -} -} - SideSet& BulkData::create_sideset(const stk::mesh::Part &part, bool fromInput) { if(!m_sideSetData.does_sideset_exist(part)) { - check_sideset_part_constraints(*this, part); + impl::check_sideset_part_constraints(*this, part); } return m_sideSetData.create_sideset(part, fromInput); diff --git a/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp b/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp index d9d995769690..9dc476bed3ce 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp @@ -39,9 +39,6 @@ #include // for uint16_t #include // for max #include // for operator<<, basic_ostream, etc -#include // for list -#include // for map, map<>::value_compare -#include #include #include // for Entity, etc #include // for EntityCommDatabase @@ -49,18 +46,15 @@ #include // for Selector #include // for MeshIndex, EntityRank, etc #include -#include "stk_util/ngp/NgpSpaces.hpp" #include // for BucketRepository #include // for ParallelMachine #include // for char_traits, string #include // for pair #include // for vector #include -#include #include "stk_mesh/base/Bucket.hpp" // for Bucket #include "stk_mesh/base/EntityKey.hpp" // for EntityKey, hash_value #include "stk_mesh/base/FieldDataManager.hpp" -#include "stk_mesh/base/Relation.hpp" // for Relation, etc #include "stk_topology/topology.hpp" // for topology, etc #include "stk_util/util/ReportHandler.hpp" // for ThrowAssert, etc #include "stk_mesh/base/ModificationSummary.hpp" @@ -72,6 +66,7 @@ #include "stk_mesh/baseImpl/SoloSideIdGenerator.hpp" #include "stk_mesh/baseImpl/SideSetImpl.hpp" +namespace stk { namespace mesh { class EntityLess; } } namespace stk { namespace mesh { class FieldBase; } } namespace stk { namespace mesh { class MetaData; } } namespace stk { namespace mesh { class Part; } } @@ -99,7 +94,6 @@ void replace_bulk_data(const stk::mesh::BulkData & inMesh, T & outMesh, std::fun } } #include "EntityCommListInfo.hpp" -#include "EntityLess.hpp" #include "SharedEntityType.hpp" #include "CommListUpdater.hpp" @@ -293,11 +287,10 @@ class BulkData { * enough communication that it will be most efficient to batch up all * desired changes so that it can be called only once. */ - virtual void change_entity_owner( const EntityProcVec & arg_change) + virtual bool change_entity_owner( const EntityProcVec & arg_change) { - notifier.notify_elements_about_to_move_procs(arg_change); - m_meshModification.change_entity_owner(arg_change); - notifier.notify_elements_moved_procs(arg_change); + const bool anyEntitiesMoved = m_meshModification.change_entity_owner(arg_change); + return anyEntitiesMoved; } /** \brief Rotate the field data of multistate fields. @@ -508,13 +501,15 @@ class BulkData { virtual void generate_new_entities(const std::vector& requests, // Mod Mark std::vector& requested_entities); - Permutation find_permutation( const stk::topology &hr_entity_topo, +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after July 31 2024 + STK_DEPRECATED_MSG("Use function in FindPermutation.hpp") Permutation find_permutation( const stk::topology &hr_entity_topo, Entity const *higher_rank_entity_nodes, const stk::topology &side_topo, Entity const *side_nodes, unsigned side_ordinal) const; - bool check_permutation(Entity entity, Entity rel_entity, unsigned rel_ordinal, Permutation expected) const; + STK_DEPRECATED_MSG("Use function in FindPermutation.hpp") bool check_permutation(Entity entity, Entity rel_entity, unsigned rel_ordinal, Permutation expected) const; +#endif //------------------------------------ /** \brief Declare a relation and its converse between @@ -689,16 +684,18 @@ class BulkData { inline void set_global_id(stk::mesh::Entity entity, FmwkId id); void initialize_global_ids(); - inline const RelationVector& aux_relations(Entity entity) const; - inline RelationVector& aux_relations(Entity entity); // Mod Mark + const RelationVector& aux_relations(Entity entity) const; + RelationVector& aux_relations(Entity entity); // Mod Mark void reserve_relation(stk::mesh::Entity entity, const unsigned num); // Mod Mark void erase_and_clear_if_empty(stk::mesh::Entity entity, RelationIterator rel_itr); // Mod Mark void initialize_aux_relations(); - inline RelationIterator internal_begin_relation(Entity entity, const Relation::RelationType relation_type) const; - inline RelationIterator internal_end_relation(Entity entity, const Relation::RelationType relation_type) const; - inline void compress_relation_capacity(Entity entity); + RelationIterator internal_begin_relation(Entity entity, const RelationType relation_type) const; + RelationIterator internal_end_relation(Entity entity, const RelationType relation_type) const; + void compress_relation_capacity(Entity entity); bool add_fmwk_data() const { return m_add_fmwk_data; } +#else + bool add_fmwk_data() const { return false; } #endif // @@ -995,7 +992,6 @@ class BulkData { inline void set_state(Entity entity, EntityState entity_state); inline void set_entity_key(Entity entity, EntityKey key); - void update_deleted_entities_container(); std::pair internal_create_entity(EntityKey key, size_t preferred_offset = 0); // Mod Mark std::pair internal_get_or_create_entity_with_notification(EntityKey key, size_t preferred_offset = 0); @@ -1003,7 +999,7 @@ class BulkData { /** \brief Declare a collection of relations by simply iterating * the input and calling declare_relation on each entry. */ - void internal_declare_relation( Entity entity, const std::vector & rel, + void internal_declare_relation( Entity entity, const RelationVector & rel, OrdinalVector& scratch1); // Mod Mark bool internal_declare_relation(Entity e_from, Entity e_to, @@ -1013,9 +1009,6 @@ class BulkData { inline void log_created_parallel_copy(Entity entity); - void internal_change_entity_owner( const std::vector & arg_change, - ModEndOptimizationFlag mod_optimization = ModEndOptimizationFlag::MOD_END_SORT ); // Mod Mark - void internal_change_entity_parts_without_propagating_to_downward_connected_entities(Entity entity, const OrdinalVector& add_parts, const OrdinalVector& remove_parts, OrdinalVector& parts_removed, OrdinalVector& newBucketPartList, OrdinalVector& scratchSpace); void internal_change_bucket_parts_without_propagating_to_downward_connected_entities(Bucket* bucket, EntityRank rank, const OrdinalVector& add_parts, const OrdinalVector& remove_parts, OrdinalVector& ranked_parts_removed, OrdinalVector& newBucketPartList); void internal_change_entity_parts_without_propagating_to_downward_connected_entities_with_notification(Entity entity, const OrdinalVector& add_parts, const OrdinalVector& remove_parts, OrdinalVector& parts_removed, OrdinalVector& newBucketPartList, OrdinalVector& scratchSpace); @@ -1220,7 +1213,6 @@ class BulkData { const RelationIdentifier local_id ); // Mod Mark void check_mesh_consistency(); - bool comm_mesh_verify_parallel_consistency(std::ostream & error_log); virtual void remove_entities_from_sharing(const EntityProcVec& entitiesToRemoveFromSharing, EntityVector & entitiesNoLongerShared); virtual void check_if_entity_from_other_proc_exists_on_this_proc_and_update_info_if_shared(std::vector& shared_entity_map, int proc_id, const shared_entity_type &sentity); void update_owner_global_key_and_sharing_proc(stk::mesh::EntityKey global_key_other_proc, shared_entity_type& shared_entity_this_proc, int proc_id) const; @@ -1311,16 +1303,14 @@ class BulkData { // Only to be called from add_node_sharing void protect_orphaned_node(Entity entity) { - if (does_entity_need_orphan_protection(entity)) - { + if (does_entity_need_orphan_protection(entity)) { internal_force_protect_orphaned_node(entity); } } void unprotect_orphaned_node(Entity entity) { - if (does_entity_have_orphan_protection(entity)) - { + if (does_entity_have_orphan_protection(entity)) { internal_force_unprotect_orphaned_node(entity); } } @@ -1331,8 +1321,6 @@ class BulkData { void fill_entity_procs_for_owned_modified_or_created(std::vector & send_list) const; stk::mesh::EntityVector get_lower_ranked_shared_entities(const stk::mesh::EntityVector& created_sides) const; - stk::mesh::EntityVector get_nodes_to_deactivate(const stk::mesh::EntityVector & deactivatedElements, const stk::mesh::Part & activePart) const; - inline bool internal_add_node_sharing_called() const; // Forbidden @@ -1480,8 +1468,6 @@ class BulkData { template friend void stk::tools::replace_bulk_data(const stk::mesh::BulkData & in_mesh, T & out_mesh, std::function op); - bool verify_parallel_attributes( std::ostream & error_log ); - void determineEntitiesThatNeedGhosting(stk::mesh::Entity edge, std::vector& entitiesConnectedToNodes, const stk::mesh::Entity* nodes, @@ -2214,30 +2200,6 @@ BulkData::global_id(stk::mesh::Entity entity) const return m_fmwk_global_ids[entity.local_offset()]; } -inline const RelationVector& -BulkData::aux_relations(Entity entity) const -{ - STK_ThrowAssert(m_add_fmwk_data); - STK_ThrowAssert(entity.local_offset() > 0); - - if (m_fmwk_aux_relations[entity.local_offset()] == NULL) { - m_fmwk_aux_relations[entity.local_offset()] = new RelationVector(); - } - return *m_fmwk_aux_relations[entity.local_offset()]; -} - -inline RelationVector& -BulkData::aux_relations(Entity entity) -{ - STK_ThrowAssert(m_add_fmwk_data); - STK_ThrowAssert(entity.local_offset() > 0); - - if (m_fmwk_aux_relations[entity.local_offset()] == NULL) { - m_fmwk_aux_relations[entity.local_offset()] = new RelationVector(); - } - return *m_fmwk_aux_relations[entity.local_offset()]; -} - inline void BulkData::set_global_id(stk::mesh::Entity entity, BulkData::FmwkId id) { @@ -2248,39 +2210,6 @@ BulkData::set_global_id(stk::mesh::Entity entity, BulkData::FmwkId id) m_fmwk_global_ids[entity.local_offset()] = id; } -inline RelationIterator -BulkData::internal_begin_relation(Entity entity, const Relation::RelationType relation_type) const -{ - STK_ThrowAssert(m_add_fmwk_data); - if (impl::internal_is_handled_generically(relation_type)) { - STK_ThrowErrorMsg("stk::Mesh::BulkData::internal_begin_relation(..) requests native stk::mesh relation type"); - return RelationIterator(); - } - else { - return aux_relations(entity).begin(); - } -} - -inline RelationIterator -BulkData::internal_end_relation(Entity entity, const Relation::RelationType relation_type) const -{ - STK_ThrowAssert(m_add_fmwk_data); - if (impl::internal_is_handled_generically(relation_type)) { - STK_ThrowErrorMsg("stk::Mesh::BulkData::internal_begin_relation(..) requests native stk::mesh relation type"); - return RelationIterator(); - } - else { - return aux_relations(entity).end(); - } -} - -inline void -BulkData::compress_relation_capacity(Entity entity) -{ - RelationVector &rels = aux_relations(entity); - RelationVector tmp(rels); - tmp.swap(rels); -} #endif inline void @@ -2348,94 +2277,6 @@ inline void set_ngp_mesh(const BulkData & bulk, NgpMeshBase * ngpMesh) { } } - -#ifdef SIERRA_MIGRATION -inline bool -EntityLess::operator()(const Entity lhs, const Entity rhs) const -{ - bool result = false; - if (m_shouldSortFacesByNodeIds && - m_mesh->entity_rank(lhs) == m_sideRank && - m_mesh->entity_rank(rhs) == m_sideRank) - { - unsigned num_nodes_lhs = m_mesh->count_valid_connectivity(lhs, stk::topology::NODE_RANK); - unsigned num_nodes_rhs = m_mesh->count_valid_connectivity(rhs, stk::topology::NODE_RANK); - if (num_nodes_lhs != num_nodes_rhs) - { - result = num_nodes_lhs < num_nodes_rhs; - } - else if (num_nodes_lhs == 0) { - result = m_mesh->identifier(lhs) < m_mesh->identifier(rhs); - } - else - { - const stk::mesh::Entity* nodes_lhs_ptr = m_mesh->begin_nodes(lhs); - const stk::mesh::Entity* nodes_rhs_ptr = m_mesh->begin_nodes(rhs); - unsigned i=0; - while(iidentifier(nodes_lhs_ptr[i]) == m_mesh->identifier(nodes_rhs_ptr[i]))) - { - ++i; - } - result = (iidentifier(nodes_lhs_ptr[i]) < m_mesh->identifier(nodes_rhs_ptr[i])) - : false; - } - } - else - { - const EntityKey lhs_key = m_mesh->entity_key(lhs); - const EntityKey rhs_key = m_mesh->entity_key(rhs); - result = lhs_key < rhs_key; - } - return result; -} - -#else - -inline -EntityLess::EntityLess(const BulkData& mesh) : m_mesh(&mesh) {} - -inline bool -EntityLess::operator()(const Entity lhs, const Entity rhs) const -{ - const EntityKey lhs_key = m_mesh->entity_key(lhs); - const EntityKey rhs_key = m_mesh->entity_key(rhs); - return (lhs_key < rhs_key); -} -#endif - -inline bool -EntityLess::operator()(const Entity lhs, const EntityKey & rhs) const -{ - const EntityKey lhs_key = m_mesh->entity_key(lhs); - return lhs_key < rhs; -} - -inline bool -EntityLess::operator()( const EntityProc & lhs, const EntityProc & rhs) const -{ - const EntityKey lhs_key = m_mesh->entity_key(lhs.first); - const EntityKey rhs_key = m_mesh->entity_key(rhs.first); - return lhs_key != rhs_key ? lhs_key < rhs_key : lhs.second < rhs.second ; -} - -inline bool -EntityLess::operator()( const EntityProc & lhs, const Entity rhs) const -{ - const EntityKey lhs_key = m_mesh->entity_key(lhs.first); - const EntityKey rhs_key = m_mesh->entity_key(rhs); - return lhs_key < rhs_key; -} - -inline bool -EntityLess::operator()( const EntityProc & lhs, const EntityKey & rhs) const -{ - const EntityKey lhs_key = m_mesh->entity_key(lhs.first); - return lhs_key < rhs ; -} - - } // namespace mesh } // namespace stk diff --git a/packages/stk/stk_mesh/stk_mesh/base/BulkModification.cpp b/packages/stk/stk_mesh/stk_mesh/base/BulkModification.cpp index c402f3cae3e5..e2911ce41f00 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/BulkModification.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/BulkModification.cpp @@ -37,8 +37,9 @@ #include // for _Rb_tree_const_iterator, etc #include // for operator<<, basic_ostream, etc #include // for runtime_error -#include // for EntityLess, BulkData, etc +#include // for BulkData, etc #include // for Entity +#include #include // for CommBuffer #include #include // for pair diff --git a/packages/stk/stk_mesh/stk_mesh/base/CMakeLists.txt b/packages/stk/stk_mesh/stk_mesh/base/CMakeLists.txt index f97cc5eb019c..10a295f24937 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/CMakeLists.txt +++ b/packages/stk/stk_mesh/stk_mesh/base/CMakeLists.txt @@ -56,17 +56,17 @@ else() find_package(Shards REQUIRED) add_library(stk_mesh_base ${SOURCES} ${SOURCES_IMPL} ${SOURCES_ELEMGRAPH}) target_link_libraries(stk_mesh_base ${Shards_LIBRARIES}) - target_link_libraries(stk_mesh_base sierra_blas_lapack) target_link_libraries(stk_mesh_base stk_topology) target_link_libraries(stk_mesh_base stk_util_diag) target_link_libraries(stk_mesh_base stk_util_env) target_link_libraries(stk_mesh_base stk_util_parallel) - if(USE_SIERRA_BLAS_LAPACK) - target_link_libraries(stk_mesh_base sierra_blas_lapack) + if(STK_BUILT_FOR_SIERRA) + find_package(SierraLapack REQUIRED) + target_link_libraries(stk_util_util PUBLIC SierraLapack::sierra_blas_lapack) else() find_package(BLAS REQUIRED) - target_link_libraries(stk_mesh_base BLAS::BLAS) + target_link_libraries(stk_util_util PUBLIC BLAS::BLAS) endif() endif() @@ -86,5 +86,5 @@ INSTALL(FILES ${HEADERS_ELEMGRAPH} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_mesh/baseImpl/elementGraph) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_mesh_base DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_mesh_base EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() diff --git a/packages/stk/stk_mesh/stk_mesh/base/CoordinateSystems.hpp b/packages/stk/stk_mesh/stk_mesh/base/CoordinateSystems.hpp index 220154e1070d..b90d6f011526 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/CoordinateSystems.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/CoordinateSystems.hpp @@ -35,403 +35,8 @@ #ifndef stk_mesh_fem_CoordinateSystems_hpp #define stk_mesh_fem_CoordinateSystems_hpp -//---------------------------------------------------------------------- - -#include -#include // for ArrayDimTag::size_type, etc -#include // for string - -#define DEPRECATED_SHARDS_ARRAY_DIM_TAG_SIMPLE_DECLARATION( ADT ) \ - class ADT : public shards::ArrayDimTag { \ - public: \ - const char * name() const { static const char n[] = # ADT; return n; } \ - const ADT & tag() { static const ADT self ; return self ; } \ - private: \ - ~ADT() {} \ - ADT() { \ - std::cerr << "Warning: The stk::mesh::" #ADT " type is deprecated and will soon be removed." << std::endl; \ - } \ - ADT( const ADT & ); \ - ADT & operator = ( const ADT & ); \ - }; - -namespace stk { -namespace mesh { - -/** \addtogroup stk_mesh_field_dimension_tags - * \{ - * - * ArrayDimTags are required for multidimensional Field types; they specify - * the dimensions of the field and the intent of each dimension. Note that - * scalar Field types do not involve ArrayDimTags. This file defines a number - * of ArrayDimTags that we believe will be widely useful for STK users. Clients - * have the freedom to define their own ArrayDimTags as well. - * - * Example use of Cartesian ArrayDimTag to create a field type: - * stk::mesh::Field - */ - -DEPRECATED_SHARDS_ARRAY_DIM_TAG_SIMPLE_DECLARATION( SimpleArrayTag ) - -/** - * \brief Implement an shards::ArrayDimTag for Cartesian coordinate dimensions. - * - * A Cartesian coordinate has up to three dimensions in X, Y, Z order. - */ -struct Cartesian3d : public shards::ArrayDimTag { - - enum { Size = 3 }; ///< default size - - enum { X = 0 , Y = 1 , Z = 2 }; ///< Identifiers for each dimension - - const char * name() const { - static const char n[] = "Cartesian3d"; - return n; - } - - static const Cartesian3d & tag() { - static const Cartesian3d self; - return self; - } - -private: - Cartesian3d() { - std::cerr << "Warning: The stk::mesh::Cartesian3d type is deprecated and will soon be removed." << std::endl; - } - Cartesian3d( const Cartesian3d & ); - Cartesian3d & operator = ( const Cartesian3d & ); -}; - -/** - * \brief Implement an shards::ArrayDimTag for Cartesian 2d coordinate dimensions. - * - * A Cartesian coordinate has up to two dimensions in X, Y order. - */ -struct Cartesian2d: public shards::ArrayDimTag { - - enum { Size = 2 }; ///< default size - - enum { X = 0 , Y = 1 }; ///< Identifiers for each dimension - - const char * name() const { - static const char n[] = "Cartesian2d"; - return n; - } - - static const Cartesian2d & tag() { - static const Cartesian2d self; - return self; - } - -private: - Cartesian2d() { - std::cerr << "Warning: The stk::mesh::Cartesian2d type is deprecated and will soon be removed." << std::endl; - } - Cartesian2d( const Cartesian2d & ); - Cartesian2d & operator = ( const Cartesian2d & ); -}; - -typedef Cartesian3d Cartesian; -/** - * \brief Implement an shards::ArrayDimTag for Cylindrical coordinate dimensions. - * - * A Cylindral coordinate has up to three dimensions in - * radius, angle, and longitudinal-distance order. - */ -struct Cylindrical : public shards::ArrayDimTag { - - enum { Radius = 0 , R = 0 , ///< Identifiers for each dimension - Angle = 1 , A = 1 , - Z = 2 }; - - const char * name() const { - static const char n[] = "Cylindrical"; - return n; - } - - static const Cylindrical & tag() { - static const Cylindrical self; - return self; - } - -private: - Cylindrical() { - std::cerr << "Warning: The stk::mesh::Cylindrical type is deprecated and will soon be removed." << std::endl; - } - Cylindrical( const Cylindrical & ); - Cylindrical & operator = ( const Cylindrical & ); -}; - -/** - * \brief Implement an shards::ArrayDimTag for FullTensor. - * - * \todo REFACTOR Where should FullTensor live, in the application, - * in the toolkit or a common application header? - */ -struct FullTensor36 : public shards::ArrayDimTag { - - enum { Size = 9 }; - -/* - * Note on Ordering: This is the ordering as used in the old - * Sierra Framework and is somewhat standard in that a symmetric - * tensor is the first six values of a full tensor and a diagonal - * only tensor is the first three values of that. - * - * I think this is actually in ERROR in that (XZ,YX,ZY) SHOULD - * be (6,7,8) NOT (8,6,7). But backwards compatibility is useful. - * - * \todo Look at the proper ordering of a full second order tensor. - */ - enum { XX = 0 , XY = 3 , XZ = 8 , - YX = 6 , YY = 1 , YZ = 4 , - ZX = 5 , ZY = 7 , ZZ = 2 }; - - const char * name() const { - static const char n[] = "FullTensor36"; - return n; - } - - static const FullTensor36 & tag() { - static const FullTensor36 self; - return self; - } - -private: - FullTensor36() { - std::cerr << "Warning: The stk::mesh::FullTensor36 type is deprecated and will soon be removed." << std::endl; - } - FullTensor36( const FullTensor36 & ); - FullTensor36 & operator = ( const FullTensor36 & ); -}; - -typedef FullTensor36 FullTensor; - -/** - * \brief Implement an shards::ArrayDimTag for FullTensor. - */ -struct FullTensor22 : public shards::ArrayDimTag { - - enum { Size = 4 }; - - enum { XX = 0 , XY = 2 , - YX = 3 , YY = 1}; - - const char * name() const { - static const char n[] = "FullTensor22"; - return n; - } - - static const FullTensor22 & tag() { - static const FullTensor22 self; - return self; - } - -private: - FullTensor22() { - std::cerr << "Warning: The stk::mesh::FullTensor22 type is deprecated and will soon be removed." << std::endl; - } - FullTensor22( const FullTensor22 & ); - FullTensor22 & operator = ( const FullTensor22 & ); -}; - -//---------------------------------------------------------------------- - -/** - * \brief Implement an shards::ArrayDimTag for SymmetricTensor. - * - * \todo REFACTOR Where should SymmetricTensor live, in the application, - * in the toolkit or a common application header? - */ -struct SymmetricTensor33 : public shards::ArrayDimTag { - - enum { Size = 6 }; - - enum { XX = 0 , XY = 3, XZ = 5, - YX = 3 , YY = 1, YZ = 4, - ZX = 5 , ZY = 4, ZZ = 2}; - - const char * name() const { - static const char n[] = "SymmetricTensor33"; - return n; - } - - static const SymmetricTensor33 & tag() { - static const SymmetricTensor33 self; - return self; - } - -private: - SymmetricTensor33() { - std::cerr << "Warning: The stk::mesh::SymmetricTensor33 type is deprecated and will soon be removed." << std::endl; - } - SymmetricTensor33( const SymmetricTensor33 & ); - SymmetricTensor33 & operator = ( const SymmetricTensor33 & ); -}; - -typedef SymmetricTensor33 SymmetricTensor; - -/** - * \brief Implement an shards::ArrayDimTag for SymmetricTensor. - * - * SymmetricTensor31 is an axisymmetric tensor in 3D. It - * has the radius and height of the cylindrical coordinate - * system but with no theta coordinate. - */ -struct SymmetricTensor31 : public shards::ArrayDimTag { - - enum { Size = 4 }; - - enum { rr = 0 , rz = 2 , - zr = 3 , zz = 1}; - - const char * name() const { - static const char n[] = "SymmetricTensor31"; - return n; - } - - static const SymmetricTensor31 & tag() { - static const SymmetricTensor31 self; - return self; - } - -private: - SymmetricTensor31() { - std::cerr << "Warning: The stk::mesh::SymmetricTensor31 type is deprecated and will soon be removed." << std::endl; - } - SymmetricTensor31( const SymmetricTensor31 & ); - SymmetricTensor31 & operator = ( const SymmetricTensor31 & ); -}; - -/** - * \brief Implement an shards::ArrayDimTag for SymmetricTensor. - */ -struct SymmetricTensor21 : public shards::ArrayDimTag { - - enum { Size = 3 }; - - enum { XX = 0 , XY = 2 , - YX = 2 , YY = 1 }; - - const char * name() const { - static const char n[] = "SymmetricTensor21"; - return n; - } - - static const SymmetricTensor21 & tag() { - static const SymmetricTensor21 self; - return self; - } - -private: - SymmetricTensor21() { - std::cerr << "Warning: The stk::mesh::SymmetricTensor21 type is deprecated and will soon be removed." << std::endl; - } - SymmetricTensor21( const SymmetricTensor21 & ); - SymmetricTensor21 & operator = ( const SymmetricTensor21 & ); -}; - -/** - * \brief Implement an shards::ArrayDimTag for AsymmetricTensor. - * - * Note: I think by Axymmetric is ment Skew-symmetric. - * Asymmetric would be any non-symmetric tensor while skew-symmetric - * means it is equal to the negative of it's transpose. This - * forces the diagonals to be zero and only the three off-diagonal - * elements are useful. - */ -struct AsymmetricTensor03 : public shards::ArrayDimTag { - - enum { Size = 3 }; - - enum { /* XX = 0 */ XY = 0 , XZ = 2 , - YX = 0 ,/* YY = 0 */ YZ = 1 , - ZX = 2 , ZY = 1 /* ZZ=0 */ }; - - const char * name() const { - static const char n[] = "AsymmetricTensor03"; - return n; - } - - static const AsymmetricTensor03 & tag() { - static const AsymmetricTensor03 self; - return self; - } - -private: - AsymmetricTensor03() { - std::cerr << "Warning: The stk::mesh::AsymmetricTensor03 type is deprecated and will soon be removed." << std::endl; - } - AsymmetricTensor03( const AsymmetricTensor03 & ); - AsymmetricTensor03 & operator = ( const AsymmetricTensor03 & ); -}; - -typedef AsymmetricTensor03 AsymmetricTensor; - -/** - * \brief Implement an shards::ArrayDimTag for Matrix. - */ -struct Matrix22 : public shards::ArrayDimTag { - - enum { Size = 4 }; - - enum { XX = 0 , XY = 2 , - YX = 1, YY = 3 }; - - const char * name() const { - static const char n[] = "Matrix22"; - return n; - } - - static const Matrix22 & tag() { - static const Matrix22 self; - return self; - } - -private: - Matrix22() { - std::cerr << "Warning: The stk::mesh::Matrix22 type is deprecated and will soon be removed." << std::endl; - } - Matrix22( const Matrix22 & ); - Matrix22 & operator = ( const Matrix22 & ); -}; - -/** - * \brief Implement an shards::ArrayDimTag for Matrix. - */ -struct Matrix33 : public shards::ArrayDimTag { - - enum { Size = 9 }; - - enum { XX = 0 , XY = 3 , XZ = 6 , - YX = 1 , YY = 4 , YZ = 7 , - ZX = 2 , ZY = 5 , ZZ = 8 }; - - const char * name() const { - static const char n[] = "Matrix33"; - return n; - } - - static const Matrix33 & tag() { - static const Matrix33 self; - return self; - } - -private: - Matrix33() { - std::cerr << "Warning: The stk::mesh::Matrix33 type is deprecated and will soon be removed." << std::endl; - } - Matrix33( const Matrix33 & ); - Matrix33 & operator = ( const Matrix33 & ); -}; - -typedef Matrix33 Matrix; - -//---------------------------------------------------------------------- - -/** \} */ - -} //namespace mesh -} //namespace stk +// Deprecated contents have been removed. Inclusions of this header should +// be removed from application code, and this file will be deprecated and +// removed in the near future. #endif //stk_mesh_fem_CoordinateSystems_hpp diff --git a/packages/stk/stk_mesh/stk_mesh/base/CreateEdges.cpp b/packages/stk/stk_mesh/stk_mesh/base/CreateEdges.cpp index 59aa3b71708a..1e0833b40413 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/CreateEdges.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/CreateEdges.cpp @@ -39,7 +39,9 @@ #include // for equal_to #include // for back_insert_iterator, etc #include // for BulkData, EntityLess, etc +#include #include // for Entity, hash_value +#include #include // for MetaData, get_cell_topology #include // for operator&, Selector, etc #include // for EntityVector, etc @@ -155,7 +157,7 @@ struct create_single_edge_impl else { side = iedge->second; } - perm = mesh.find_permutation(elem_topo, elem_nodes, edge_topo, edge_nodes.data(), m_edge_ordinal); + perm = stk::mesh::find_permutation(mesh, elem_topo, elem_nodes, edge_topo, edge_nodes.data(), m_edge_ordinal); STK_ThrowRequireMsg(perm != INVALID_PERMUTATION, "CreateEdges: could not find valid permutation to connect face to element"); mesh.declare_relation(ielem, side, m_edge_ordinal, perm, scratch1, scratch2, scratch3); } @@ -268,7 +270,7 @@ struct create_edge_impl else { side = iedge->second; } - perm = mesh.find_permutation(elem_topo, elem_nodes, edge_topo, edge_nodes.data(), e); + perm = stk::mesh::find_permutation(mesh, elem_topo, elem_nodes, edge_topo, edge_nodes.data(), e); STK_ThrowRequireMsg(perm != INVALID_PERMUTATION, "CreateEdges: could not find valid permutation to connect face to element"); mesh.declare_relation(m_bucket[ielem], side, e, perm, scratch1, scratch2, scratch3); } diff --git a/packages/stk/stk_mesh/stk_mesh/base/CreateFaces.cpp b/packages/stk/stk_mesh/stk_mesh/base/CreateFaces.cpp index 69e7ec829ecf..ddbed0143dfd 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/CreateFaces.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/CreateFaces.cpp @@ -43,6 +43,7 @@ #include #include // for BulkData, EntityLess, etc +#include #include // for Entity, hash_value #include // for MetaData, get_cell_topology #include // for operator&, Selector, etc @@ -173,13 +174,13 @@ struct create_face_impl mesh.declare_relation(face,node,n); } - Permutation permut = mesh.find_permutation(elemTopology, elem_nodes, + Permutation permut = stk::mesh::find_permutation(mesh, elemTopology, elem_nodes, faceTopology, &permuted_face_nodes[0], side_ordinal); mesh.declare_relation(m_bucket[ielem], face, side_ordinal, permut); } else { face = iface->second; - Permutation permut = mesh.find_permutation(elemTopology, elem_nodes, + Permutation permut = stk::mesh::find_permutation(mesh, elemTopology, elem_nodes, faceTopology, &permuted_face_nodes[0], side_ordinal); STK_ThrowRequireMsg(permut != INVALID_PERMUTATION, "CreateFaces: could not find valid permutation to connect face to element"); mesh.declare_relation(m_bucket[ielem], face, side_ordinal, permut); diff --git a/packages/stk/stk_mesh/stk_mesh/base/DestroyRelations.cpp b/packages/stk/stk_mesh/stk_mesh/base/DestroyRelations.cpp new file mode 100644 index 000000000000..18130a9c61cf --- /dev/null +++ b/packages/stk/stk_mesh/stk_mesh/base/DestroyRelations.cpp @@ -0,0 +1,93 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include +#include + +namespace stk { namespace mesh { + +void destroy_relations(stk::mesh::BulkData &bulk, + stk::mesh::Entity entity, + stk::mesh::EntityRank connectedRank) +{ + const int numConn = bulk.num_connectivity(entity, connectedRank); + const Entity* conn = bulk.begin(entity, connectedRank); + const ConnectivityOrdinal* ords = bulk.begin_ordinals(entity, connectedRank); + + switch(numConn) { + case 0: + return; break; + case 1: + if (bulk.entity_rank(entity) > connectedRank) { + bulk.destroy_relation(entity, conn[0], ords[0]); + } + else { + bulk.destroy_relation(conn[0], entity, ords[0]); + } + break; + case 2: + { + Entity tmpEntity = conn[1]; + ConnectivityOrdinal tmpOrd = ords[1]; + if (bulk.entity_rank(entity) > connectedRank) { + bulk.destroy_relation(entity, conn[0], ords[0]); + bulk.destroy_relation(entity, tmpEntity, tmpOrd); + } + else { + bulk.destroy_relation(conn[0], entity, ords[0]); + bulk.destroy_relation(tmpEntity, entity, tmpOrd); + } + break; + } + default: + { + stk::mesh::EntityVector connv(conn, bulk.end(entity, connectedRank)); + std::vector ordv(ords, bulk.end_ordinals(entity, connectedRank)); + if (bulk.entity_rank(entity) > connectedRank) { + for(int i=0; i +#include + +namespace stk { +namespace mesh { +class BulkData; + +void destroy_relations(stk::mesh::BulkData &bulk, + stk::mesh::Entity entity, + stk::mesh::EntityRank connectedRank); + +}} // namespace stk::mesh + +#endif diff --git a/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp b/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp index 1604e79a8e61..93056081af0b 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp @@ -311,15 +311,6 @@ class DeviceField : public NgpFieldBase return deviceData(deviceSelectedBucketOffset(index.bucket_id), ORDER_INDICES(index.bucket_ord, component)); } -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after May 2024 - template STK_DEPRECATED KOKKOS_FUNCTION - T& get(MeshIndex index, int component, - const char * fileName = DEVICE_DEBUG_FILE_NAME, int lineNumber = DEVICE_DEBUG_LINE_NUMBER) const - { - return deviceData(deviceSelectedBucketOffset(index.bucket->bucket_id()), ORDER_INDICES(index.bucketOrd, component)); - } -#endif - KOKKOS_FUNCTION T& operator()(const FastMeshIndex& index, int component, const char * fileName = DEVICE_DEBUG_FILE_NAME, int lineNumber = DEVICE_DEBUG_LINE_NUMBER) const @@ -328,15 +319,6 @@ class DeviceField : public NgpFieldBase return deviceData(deviceSelectedBucketOffset(index.bucket_id), ORDER_INDICES(index.bucket_ord, component)); } -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after May 2024 - template STK_DEPRECATED KOKKOS_FUNCTION - T& operator()(const MeshIndex& index, int component, - const char * fileName = DEVICE_DEBUG_FILE_NAME, int lineNumber = DEVICE_DEBUG_LINE_NUMBER) const - { - return deviceData(deviceSelectedBucketOffset(index.bucket->bucket_id()), ORDER_INDICES(index.bucketOrd, component)); - } -#endif - KOKKOS_FUNCTION EntityFieldData operator()(const FastMeshIndex& index, const char * fileName = DEVICE_DEBUG_FILE_NAME, int lineNumber = DEVICE_DEBUG_LINE_NUMBER) const @@ -370,12 +352,6 @@ class DeviceField : public NgpFieldBase const FieldBase* get_field_base() const { return hostField; } -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after May 2024 - STK_DEPRECATED void rotate_multistate_data() override - { - } -#endif - void update_bucket_pointer_view() override { Selector selector = selectField(*hostField); @@ -512,7 +488,7 @@ class DeviceField : public NgpFieldBase newDeviceSelectedBucketOffset = UnsignedViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, hostField->name() + "_bucket_offset"), allBuckets.size()); newHostSelectedBucketOffset = - Kokkos::create_mirror_view(Kokkos::WithoutInitializing, Kokkos::HostSpace(), newDeviceSelectedBucketOffset); + Kokkos::create_mirror_view(Kokkos::WithoutInitializing, newDeviceSelectedBucketOffset); for(unsigned i = 0; i < allBuckets.size(); i++) { if(selector(*allBuckets[i])) { diff --git a/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.cpp b/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.cpp index 571f7f8a1850..3041d8b31058 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.cpp @@ -115,10 +115,8 @@ void DeviceBucket::update_entity_data_from_host(const stk::mesh::Bucket &bucket) Kokkos::Profiling::pushRegion("filling host-side Views"); auto hostEntities = HostEntityViewType(bucket.begin(), m_bucketCapacity); - auto hostNodeConnectivity = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, - Kokkos::HostSpace(), m_nodeConnectivity); - auto hostNodeConnectivityOffsets = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, - Kokkos::HostSpace(), m_nodeConnectivityOffsets); + auto hostNodeConnectivity = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, m_nodeConnectivity); + auto hostNodeConnectivityOffsets = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, m_nodeConnectivityOffsets); unsigned nodeOffset = 0; for (unsigned iEntity = 0; iEntity < bucket.size(); ++iEntity) { const unsigned nodesPerEntity = bucket.num_nodes(iEntity); @@ -239,7 +237,7 @@ inline void reallocate_views(DEVICE_VIEW & deviceView, HOST_VIEW & hostView, siz if (needGrowth || needShrink) { const size_t newSize = requiredSize + static_cast(resizeFactor*requiredSize); deviceView = DEVICE_VIEW(Kokkos::view_alloc(Kokkos::WithoutInitializing, deviceView.label()), newSize); - hostView = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, Kokkos::HostSpace(), deviceView); + hostView = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, deviceView); } } diff --git a/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp b/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp index 0b0d97a49b6b..7a5ff75d7541 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp @@ -51,6 +51,7 @@ #include #include #include +#include #include "stk_mesh/baseImpl/DeviceMeshHostData.hpp" @@ -121,13 +122,6 @@ struct DeviceBucket { return m_entities(offsetIntoBucket); } -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after 2024/06/26 - STK_DEPRECATED - stk::mesh::Entity host_get_entity(unsigned offsetIntoBucket) const { - return m_hostEntities(offsetIntoBucket); - } -#endif - KOKKOS_FUNCTION bool member(stk::mesh::PartOrdinal partOrdinal) const { @@ -147,10 +141,6 @@ struct DeviceBucket { std::pair scan_entities_for_nodal_connectivity(const stk::mesh::Bucket & bucket); EntityViewType m_entities; -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after 2024/06/26 - HostEntityViewType m_hostEntities; -#endif - BucketConnectivityType m_nodeConnectivity; OrdinalViewType m_nodeConnectivityOffsets; @@ -168,14 +158,6 @@ struct DeviceBucket { stk::mesh::EntityRank m_entityRank; }; -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after May 2024 -struct STK_DEPRECATED DeviceMeshIndex -{ - const DeviceBucket *bucket; - size_t bucketOrd; -}; -#endif - class DeviceMesh : public NgpMeshBase { public: @@ -184,11 +166,7 @@ class DeviceMesh : public NgpMeshBase using ConnectedEntities = DeviceBucket::ConnectedEntities; using ConnectedOrdinals = DeviceBucket::ConnectedOrdinals; using Permutations = DeviceBucket::Permutations; -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after May 2024 - using MeshIndex = DeviceMeshIndex; -#else using MeshIndex = FastMeshIndex; -#endif using BucketType = DeviceBucket; KOKKOS_FUNCTION @@ -257,15 +235,6 @@ class DeviceMesh : public NgpMeshBase return buckets[rank](meshIndex.bucket_id)[meshIndex.bucket_ord]; } -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after May 2024 - STK_DEPRECATED - KOKKOS_FUNCTION - ConnectedNodes get_nodes(const DeviceMeshIndex &entity) const - { - return buckets[entity.bucket->entity_rank()](entity.bucket->bucket_id()).get_nodes(entity.bucketOrd); - } -#endif - KOKKOS_FUNCTION ConnectedEntities get_connected_entities(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity, stk::mesh::EntityRank connectedRank) const { @@ -413,13 +382,6 @@ class DeviceMesh : public NgpMeshBase return deviceMeshIndices(entity.local_offset()); } -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after May 2024 -STK_DEPRECATED const stk::mesh::FastMeshIndex& host_mesh_index(stk::mesh::Entity entity) const - { - return hostMeshIndices(entity.local_offset()); - } -#endif - stk::NgpVector get_bucket_ids(stk::mesh::EntityRank rank, const stk::mesh::Selector &selector) const { return stk::mesh::get_bucket_ids(get_bulk_on_host(), rank, selector); @@ -455,6 +417,7 @@ STK_DEPRECATED const stk::mesh::FastMeshIndex& host_mesh_index(stk::mesh::Entity const stk::mesh::BulkData &get_bulk_on_host() const { + STK_ThrowRequireMsg(bulk != nullptr, "DeviceMesh::get_bulk_on_host, bulk==nullptr"); return *bulk; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/DumpMeshInfo.hpp b/packages/stk/stk_mesh/stk_mesh/base/DumpMeshInfo.hpp index 7a7ae03c1826..35d4b9449f22 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/DumpMeshInfo.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/DumpMeshInfo.hpp @@ -41,7 +41,7 @@ namespace stk::mesh { class BulkData; class MetaData; -class EntityKey; +struct EntityKey; class Bucket; class Ghosting; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/Entity.hpp b/packages/stk/stk_mesh/stk_mesh/base/Entity.hpp index c9f1ef39f44e..6ebcb59a4ab8 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Entity.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Entity.hpp @@ -115,6 +115,8 @@ struct Entity KOKKOS_FUNCTION bool operator<(Entity entity) const { return m_value < entity.m_value; } + KOKKOS_FUNCTION + bool operator>(Entity entity) const { return m_value > entity.m_value; } }; std::ostream & operator << ( std::ostream & , const Entity & ); diff --git a/packages/stk/stk_mesh/stk_mesh/base/EntityCommDatabase.cpp b/packages/stk/stk_mesh/stk_mesh/base/EntityCommDatabase.cpp index 3fd933c41faa..3dbd4fab7f02 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/EntityCommDatabase.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/EntityCommDatabase.cpp @@ -137,7 +137,7 @@ void unpack_entity_info( EntityKey & key , int & owner , PartVector & parts , - std::vector & relations ) + RelationVector& relations ) { unsigned nparts = 0 ; unsigned nrel = 0 ; diff --git a/packages/stk/stk_mesh/stk_mesh/base/EntityCommDatabase.hpp b/packages/stk/stk_mesh/stk_mesh/base/EntityCommDatabase.hpp index 08dbfa086abb..ba193e062faf 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/EntityCommDatabase.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/EntityCommDatabase.hpp @@ -149,7 +149,7 @@ void unpack_entity_info( EntityKey & key , int & owner , PartVector & parts , - std::vector & relations ); + RelationVector& relations ); void pack_sideset_info(BulkData& mesh, CommBuffer & buf, const Entity entity); diff --git a/packages/stk/stk_mesh/stk_mesh/base/EntityLess.hpp b/packages/stk/stk_mesh/stk_mesh/base/EntityLess.hpp index bed6f2fe24f1..ac2953ee5f46 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/EntityLess.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/EntityLess.hpp @@ -34,7 +34,9 @@ #ifndef STK_ENTITYLESS_HPP #define STK_ENTITYLESS_HPP +#include #include +#include namespace stk { namespace mesh { @@ -60,7 +62,93 @@ class EntityLess { #endif }; //struct EntityLess +#ifdef SIERRA_MIGRATION +inline bool +EntityLess::operator()(const Entity lhs, const Entity rhs) const +{ + bool result = false; + if (m_shouldSortFacesByNodeIds && + m_mesh->entity_rank(lhs) == m_sideRank && + m_mesh->entity_rank(rhs) == m_sideRank) + { + unsigned num_nodes_lhs = m_mesh->count_valid_connectivity(lhs, stk::topology::NODE_RANK); + unsigned num_nodes_rhs = m_mesh->count_valid_connectivity(rhs, stk::topology::NODE_RANK); + if (num_nodes_lhs != num_nodes_rhs) + { + result = num_nodes_lhs < num_nodes_rhs; + } + else if (num_nodes_lhs == 0) { + result = m_mesh->identifier(lhs) < m_mesh->identifier(rhs); + } + else + { + const stk::mesh::Entity* nodes_lhs_ptr = m_mesh->begin_nodes(lhs); + const stk::mesh::Entity* nodes_rhs_ptr = m_mesh->begin_nodes(rhs); + unsigned i=0; + while(iidentifier(nodes_lhs_ptr[i]) == m_mesh->identifier(nodes_rhs_ptr[i]))) + { + ++i; + } + result = (iidentifier(nodes_lhs_ptr[i]) < m_mesh->identifier(nodes_rhs_ptr[i])) + : false; + } + } + else + { + const EntityKey lhs_key = m_mesh->entity_key(lhs); + const EntityKey rhs_key = m_mesh->entity_key(rhs); + result = lhs_key < rhs_key; + } + return result; +} + +#else + +inline +EntityLess::EntityLess(const BulkData& mesh) : m_mesh(&mesh) {} + +inline bool +EntityLess::operator()(const Entity lhs, const Entity rhs) const +{ + const EntityKey lhs_key = m_mesh->entity_key(lhs); + const EntityKey rhs_key = m_mesh->entity_key(rhs); + return (lhs_key < rhs_key); } +#endif + +inline bool +EntityLess::operator()(const Entity lhs, const EntityKey & rhs) const +{ + const EntityKey lhs_key = m_mesh->entity_key(lhs); + return lhs_key < rhs; } +inline bool +EntityLess::operator()( const EntityProc & lhs, const EntityProc & rhs) const +{ + const EntityKey lhs_key = m_mesh->entity_key(lhs.first); + const EntityKey rhs_key = m_mesh->entity_key(rhs.first); + return lhs_key != rhs_key ? lhs_key < rhs_key : lhs.second < rhs.second ; +} + +inline bool +EntityLess::operator()( const EntityProc & lhs, const Entity rhs) const +{ + const EntityKey lhs_key = m_mesh->entity_key(lhs.first); + const EntityKey rhs_key = m_mesh->entity_key(rhs); + return lhs_key < rhs_key; +} + +inline bool +EntityLess::operator()( const EntityProc & lhs, const EntityKey & rhs) const +{ + const EntityKey lhs_key = m_mesh->entity_key(lhs.first); + return lhs_key < rhs ; +} + +} // namespace mesh +} // namespace stk + #endif diff --git a/packages/stk/stk_mesh/stk_mesh/base/EntityParallelState.hpp b/packages/stk/stk_mesh/stk_mesh/base/EntityParallelState.hpp index 4b2898fae62f..57b40a932996 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/EntityParallelState.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/EntityParallelState.hpp @@ -35,14 +35,11 @@ #define STK_ENTITYPARALLELSTATE_HPP #include -#include #include namespace stk { namespace mesh { -class BulkData; - struct EntityParallelState { int from_proc; EntityState state; diff --git a/packages/stk/stk_mesh/stk_mesh/base/EntitySorterBase.hpp b/packages/stk/stk_mesh/stk_mesh/base/EntitySorterBase.hpp index be0effbacbf6..3129c313ab8a 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/EntitySorterBase.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/EntitySorterBase.hpp @@ -39,6 +39,8 @@ namespace stk { namespace mesh { +class BulkData; + class EntitySorterBase { public: diff --git a/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp b/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp index 3438cf035177..1722d5955489 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp @@ -465,55 +465,44 @@ EquivAndPositive is_equivalent_and_positive(const stk::mesh::BulkData& mesh, stk::topology get_subcell_nodes(const BulkData& mesh, const Entity entity, EntityRank subcell_rank, - unsigned subcell_identifier, + unsigned subcell_ordinal, EntityVector & subcell_nodes) { STK_ThrowAssert(subcell_rank <= stk::topology::ELEMENT_RANK); subcell_nodes.clear(); - // get cell topology stk::topology celltopology = mesh.bucket(entity).topology(); //error checking { -//no celltopology defined + //no celltopology defined if(celltopology == stk::topology::INVALID_TOPOLOGY) { return celltopology; } -// valid ranks fall within the dimension of the cell topology + // valid ranks fall within the dimension of the cell topology const bool bad_rank = static_cast(subcell_rank) >= celltopology.dimension(); STK_ThrowInvalidArgMsgIf( bad_rank, "subcell_rank is >= celltopology dimension\n"); -// subcell_identifier must be less than the subcell count - bool bad_id = subcell_identifier >= celltopology.num_sub_topology(subcell_rank); + // subcell_ordinal must be less than the subcell count + bool bad_id = subcell_ordinal >= celltopology.num_sub_topology(subcell_rank); // FIXME SHELL_SIDE_TOPO if (celltopology.is_shell_with_face_sides() && subcell_rank == stk::topology::FACE_RANK) { - bad_id = (subcell_identifier >= celltopology.num_sides()); + bad_id = (subcell_ordinal >= celltopology.num_sides()); } STK_ThrowInvalidArgMsgIf( bad_id, "subcell_id is >= subcell_count\n"); } - // Get the cell topology of the subcell stk::topology subcell_topology = - celltopology.sub_topology(subcell_rank, subcell_identifier); + celltopology.sub_topology(subcell_rank, subcell_ordinal); - const int num_nodes_in_subcell = subcell_topology.num_nodes(); - - // For the subcell, get it's local nodes ids - std::vector subcell_node_local_ids(num_nodes_in_subcell); - celltopology.sub_topology_node_ordinals(subcell_rank, subcell_identifier, subcell_node_local_ids.data()); + subcell_nodes.resize(subcell_topology.num_nodes()); Entity const *node_relations = mesh.begin_nodes(entity); - subcell_nodes.reserve(num_nodes_in_subcell); - - for(int i = 0; i < num_nodes_in_subcell; ++i) - { - subcell_nodes.push_back(node_relations[subcell_node_local_ids[i]]); - } + celltopology.sub_topology_nodes(node_relations, subcell_rank, subcell_ordinal, subcell_nodes.data()); return subcell_topology; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.hpp b/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.hpp index 16a1abad3f81..433218726254 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.hpp @@ -120,13 +120,13 @@ EquivAndPositive is_side_equivalent_and_positive(const stk::mesh::BulkData& mesh EquivAndPositive is_equivalent_and_positive(const stk::mesh::BulkData& mesh, stk::mesh::Entity element, unsigned ordinal, stk::mesh::EntityRank subRank, const stk::mesh::Entity* candidateNodes); /** - * Given an entity, subcell_rank, and subcell_id, return the nodes - * that make up the subcell in a correct order for the given polarity. + * Given an entity, subcell_rank, and subcell_ordinal, return the nodes + * that make up the subcell */ stk::topology get_subcell_nodes(const BulkData& mesh, const Entity entity , EntityRank subcell_rank , - unsigned subcell_identifier , + unsigned subcell_ordinal , EntityVector & subcell_nodes ); diff --git a/packages/stk/stk_mesh/stk_mesh/base/Field.hpp b/packages/stk/stk_mesh/stk_mesh/base/Field.hpp index 02118ac768b7..b351d23738ae 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Field.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Field.hpp @@ -38,7 +38,6 @@ //---------------------------------------------------------------------- #include -#include #include #include // for equal_case #include diff --git a/packages/stk/stk_mesh/stk_mesh/base/FieldBLAS.hpp b/packages/stk/stk_mesh/stk_mesh/base/FieldBLAS.hpp index ea7ddebfe2e3..c49ee0838715 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/FieldBLAS.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/FieldBLAS.hpp @@ -1443,7 +1443,7 @@ void field_amax(Scalar& result, const FieldBase& xField, const Selector& selecto BucketVector const& buckets = xField.get_mesh().get_buckets(xField.entity_rank(), selector & xField.mesh_meta_data().locally_owned_part()); Scalar priv_tmp; - Scalar local_amax(-1.0); + Scalar local_amax(0.0); int orig_thread_count = fix_omp_threads(); #ifdef OPEN_MP_ACTIVE_FIELDBLAS_HPP @@ -1451,9 +1451,10 @@ void field_amax(Scalar& result, const FieldBase& xField, const Selector& selecto #endif for(size_t i=0; i < buckets.size(); i++) { BucketSpan x(xField, *buckets[i]); + if (x.length == 0) continue; priv_tmp = std::abs(x[FortranBLAS::iamax(x.size(),x.data())]); if (local_amax < priv_tmp) { - local_amax = priv_tmp; + local_amax = priv_tmp; } } diff --git a/packages/stk/stk_mesh/stk_mesh/base/FieldBase.cpp b/packages/stk/stk_mesh/stk_mesh/base/FieldBase.cpp index 13be49b8dbb8..11c5abc3e604 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/FieldBase.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/FieldBase.cpp @@ -35,7 +35,6 @@ #include #include // for operator<<, basic_ostream, etc #include // for vector, etc -#include "Shards_Array.hpp" // for ArrayDimTag #include "stk_mesh/base/DataTraits.hpp" // for DataTraits #include "stk_mesh/base/MetaData.hpp" // for FieldRestriction #include "stk_mesh/base/FieldRestriction.hpp" // for FieldRestriction @@ -114,17 +113,8 @@ std::pair check_for_existing_subsets_or_supersets(FieldRestriction& t std::ostream & operator<<(std::ostream & s, const FieldBase & field) { - if (field.mesh_meta_data().is_using_simple_fields()) { - s << "Field<" << field.data_traits().name << ">"; - } - else { - s << "Field<" << field.data_traits().name; - for (unsigned i = 0; i < stk::mesh::legacy::field_array_rank(field); ++i) { - s << "," << stk::mesh::legacy::dimension_tags(field)[i]->name(); - } - s << ">"; - } - s << "[\"" << field.name() << "\", #states: " << field.number_of_states() << "]"; + s << "Field<" << field.data_traits().name << ">[\"" << field.name() << "\", #states: " + << field.number_of_states() << "]"; return s ; } @@ -463,44 +453,6 @@ bool FieldBase::defined_on(const stk::mesh::Part& part) const return (length(part) > 0); } -STK_DEPRECATED_MSG("FieldBase::field_array_rank() is no longer supported since it represents the number of " - "extra Field template parameters, which are being removed.") -unsigned -FieldBase::field_array_rank() const -{ - return legacy_field_array_rank(); -} - -unsigned -FieldBase::legacy_field_array_rank() const -{ - if (m_meta_data->is_using_simple_fields()) { - STK_ThrowErrorMsg("FieldBase::field_array_rank() is no longer supported since it represents" << std::endl - << "the number of extra Field template parameters, which are being removed."); - } - - return m_field_rank; -} - -STK_DEPRECATED_MSG("FieldBase::dimension_tags() is no longer supported since it holds the " - "extra Field template parameters, which are being removed.") -const shards::ArrayDimTag * const * -FieldBase::dimension_tags() const -{ - return legacy_dimension_tags(); -} - -const shards::ArrayDimTag * const * -FieldBase::legacy_dimension_tags() const -{ - if (m_meta_data->is_using_simple_fields()) { - STK_ThrowErrorMsg("FieldBase::dimension_tags() is no longer supported since it holds the" << std::endl - << "extra Field template parameters, which are being removed."); - } - - return m_dim_tags; -} - unsigned FieldBase::length(const stk::mesh::Part& part) const { const stk::mesh::FieldRestriction& restriction = stk::mesh::find_restriction(*this, entity_rank(), part); diff --git a/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp b/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp index 58aee32d04e9..e4c12e29d702 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp @@ -73,11 +73,6 @@ void set_ngp_field(const FieldBase & stkField, NgpFieldBase * ngpField); stk::CSet & get_attributes(FieldBase & field); } -namespace legacy { -unsigned field_array_rank(const FieldBase & field); -const shards::ArrayDimTag * const * dimension_tags(const FieldBase & field); -} - struct FieldMetaData { unsigned char* m_data = nullptr; @@ -135,23 +130,8 @@ class FieldBase /** \brief FieldState of this field */ FieldState state() const { return m_this_state; } - /** \brief Multi-dimensional array rank of this field, - * which is zero for a scalar field. - */ - STK_DEPRECATED_MSG("FieldBase::field_array_rank() is no longer supported since it represents the number of " - "extra Field template parameters, which are being removed.") - unsigned field_array_rank() const; - EntityRank entity_rank() const { return m_entity_rank; } - /** \brief Multi-dimensional - * \ref shards::ArrayDimTag "array dimension tags" - * of this field. - */ - STK_DEPRECATED_MSG("FieldBase::dimension_tags() is no longer supported since it holds the " - "extra Field template parameters, which are being removed.") - const shards::ArrayDimTag * const * dimension_tags() const; - /** \brief Maximum field data allocation size declared for this * field for the given entity rank. */ @@ -283,9 +263,6 @@ class FieldBase void rotate_multistate_data(bool rotateNgpFieldViews = false); private: - unsigned legacy_field_array_rank() const; - const shards::ArrayDimTag * const * legacy_dimension_tags() const; - stk::ngp::ExecSpace& get_execution_space() const { return m_execSpace; } @@ -365,9 +342,6 @@ class FieldBase friend NgpFieldBase* impl::get_ngp_field(const FieldBase & stkField); friend void impl::set_ngp_field(const FieldBase & stkField, NgpFieldBase * ngpField); - friend unsigned legacy::field_array_rank(const FieldBase & field); - friend const shards::ArrayDimTag * const * legacy::dimension_tags(const FieldBase & field); - template class NgpDebugger> friend class HostField; template class NgpDebugger> friend class DeviceField; template friend class Field; @@ -778,22 +752,6 @@ field_data(const FieldType & f, Entity e, field_meta_data.m_bytesPerEntity * mi.bucket_ordinal); } -namespace legacy { - -// These functions will be removed when the deprecated legacy Field handling is removed. Do not use! - -inline unsigned field_array_rank(const FieldBase & field) -{ - return field.legacy_field_array_rank(); -} - -inline const shards::ArrayDimTag * const * dimension_tags(const FieldBase & field) -{ - return field.legacy_dimension_tags(); -} - -} - } //namespace stk::mesh #endif //stk_mesh_base_FieldBase_hpp diff --git a/packages/stk/stk_mesh/stk_mesh/base/FieldTraits.hpp b/packages/stk/stk_mesh/stk_mesh/base/FieldTraits.hpp index 9f1ed0adc0ec..4c1c2608b50d 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/FieldTraits.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/FieldTraits.hpp @@ -35,100 +35,8 @@ #ifndef stk_mesh_base_FieldTraits_hpp #define stk_mesh_base_FieldTraits_hpp -#include -#include - -#include - -namespace stk { -namespace mesh { - -/** - * FieldTraits provide an API for making queries about field types. - * Examples: - * - Get the scalar data type contained by a field - * stk::mesh::FieldTraits< field_type >::data_type - * - Get the dimensional rank (number of dimensions) in a field type - * stk::mesh::FieldTraits< field_type >::Rank - */ - -template<> -struct STK_DEPRECATED FieldTraits -{ -public: - typedef shards::array_traits::Helper - Helper ; - - typedef void data_type ; ///< \brief Data type of the field's members - typedef void tag1 ; ///< \brief Array dimension tag - typedef void tag2 ; ///< \brief Array dimension tag - typedef void tag3 ; ///< \brief Array dimension tag - typedef void tag4 ; ///< \brief Array dimension tag - typedef void tag5 ; ///< \brief Array dimension tag - typedef void tag6 ; ///< \brief Array dimension tag - typedef void tag7 ; ///< \brief Array dimension tag - - /** \brief Multidimensional array rank */ - enum { Rank = 0 }; - - static void assign_tags( const shards::ArrayDimTag ** tags ) {} -}; - - -/** \brief Scalar type and multi-dimensional array traits of a Field */ -template< typename Scalar > -struct STK_DEPRECATED FieldTraits< Field > -{ -public: - typedef shards::array_traits::Helper - Helper ; - - typedef Scalar data_type ; ///< \brief Data type of the field's members - typedef void tag1 ; ///< \brief Array dimension tag - typedef void tag2 ; ///< \brief Array dimension tag - typedef void tag3 ; ///< \brief Array dimension tag - typedef void tag4 ; ///< \brief Array dimension tag - typedef void tag5 ; ///< \brief Array dimension tag - typedef void tag6 ; ///< \brief Array dimension tag - typedef void tag7 ; ///< \brief Array dimension tag - - /** \brief Multidimensional array rank */ - enum { Rank = 0 }; - - static void assign_tags( const shards::ArrayDimTag ** tags ) {} -}; - -/** \brief Scalar type and multi-dimensional array traits of a Field */ -template< typename Scalar , - class Tag1 , class Tag2 , class Tag3 , class Tag4 , - class Tag5 , class Tag6 , class Tag7 > -struct STK_DEPRECATED FieldTraits< Field > -{ -public: - typedef shards::array_traits::Helper - Helper ; - - typedef Scalar data_type ; ///< \brief Data type of the field's members - typedef Tag1 tag1 ; ///< \brief Array dimension tag - typedef Tag2 tag2 ; ///< \brief Array dimension tag - typedef Tag3 tag3 ; ///< \brief Array dimension tag - typedef Tag4 tag4 ; ///< \brief Array dimension tag - typedef Tag5 tag5 ; ///< \brief Array dimension tag - typedef Tag6 tag6 ; ///< \brief Array dimension tag - typedef Tag7 tag7 ; ///< \brief Array dimension tag - - /** \brief Multidimensional array rank */ - enum { Rank = Helper::Rank }; - - static void assign_tags( const shards::ArrayDimTag ** tags ) - { Helper::assign_tags( tags ); } -}; - - -} //namespace mesh -} //namespace stk +// Deprecated contents have been removed. Inclusions of this header should +// be removed from application code, and this file will be deprecated and +// removed in the near future. #endif //stk_mesh_base_FieldTraits_hpp diff --git a/packages/stk/stk_mesh/stk_mesh/base/FindPermutation.cpp b/packages/stk/stk_mesh/stk_mesh/base/FindPermutation.cpp new file mode 100644 index 000000000000..c9d7744b13c3 --- /dev/null +++ b/packages/stk/stk_mesh/stk_mesh/base/FindPermutation.cpp @@ -0,0 +1,82 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include +#include + +namespace stk { +namespace mesh { + +Permutation find_permutation(const BulkData& bulk, + const stk::topology& entityTopology, + const Entity* entityNodes, + const stk::topology& sideTopology, + const Entity* sideNodes, + unsigned sideOrdinal) +{ + Entity expectedNodes[100]; + switch (sideTopology.rank()) + { + case stk::topology::EDGE_RANK: + entityTopology.edge_nodes(entityNodes, sideOrdinal, expectedNodes); + break; + case stk::topology::FACE_RANK: + entityTopology.face_nodes(entityNodes, sideOrdinal, expectedNodes); + break; + default: + return INVALID_PERMUTATION; + } + + stk::EquivalentPermutation equivPerm = sideTopology.is_equivalent(expectedNodes, sideNodes); + return equivPerm.is_equivalent ? static_cast(equivPerm.permutation_number) : INVALID_PERMUTATION; +} + +bool check_permutation(const BulkData& bulk, + Entity entity, + Entity subEntity, + unsigned subOrdinal, + Permutation expectedPerm) +{ + const stk::topology entityTopo = bulk.mesh_index(entity).bucket->topology(); + const stk::topology subTopo = bulk.mesh_index(subEntity).bucket->topology(); + const Entity* entityNodes = bulk.begin_nodes(entity); + const Entity* subEntityNodes = bulk.begin_nodes(subEntity); + + Permutation computedPerm = find_permutation(bulk, entityTopo, entityNodes, subTopo, subEntityNodes, subOrdinal); + + return computedPerm == expectedPerm; +} + +}} // namespace stk::mesh + diff --git a/packages/stk/stk_mesh/stk_mesh/base/FindPermutation.hpp b/packages/stk/stk_mesh/stk_mesh/base/FindPermutation.hpp new file mode 100644 index 000000000000..1380ea5dfc1b --- /dev/null +++ b/packages/stk/stk_mesh/stk_mesh/base/FindPermutation.hpp @@ -0,0 +1,61 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef STK_MESH_FIND_PERMUTATION_HPP +#define STK_MESH_FIND_PERMUTATION_HPP + +#include +#include +#include + +namespace stk { +namespace mesh { +class BulkData; + +Permutation find_permutation(const BulkData& bulk, + const stk::topology& entityTopology, + const Entity* entityNodes, + const stk::topology& sideTopology, + const Entity* sideNodes, + unsigned sideOrdinal); + +bool check_permutation(const BulkData& bulk, + Entity entity, + Entity subEntity, + unsigned subOrdinal, + Permutation expectedPerm); + +}} // namespace stk::mesh + +#endif diff --git a/packages/stk/stk_mesh/stk_mesh/base/GetEntities.cpp b/packages/stk/stk_mesh/stk_mesh/base/GetEntities.cpp index 931e6e622827..c94c2fef94d7 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/GetEntities.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/GetEntities.cpp @@ -36,8 +36,9 @@ #include // for size_t #include // for sort #include "stk_mesh/base/Bucket.hpp" // for Bucket -#include "stk_mesh/base/BulkData.hpp" // for EntityLess, BulkData +#include "stk_mesh/base/BulkData.hpp" // for BulkData #include "stk_mesh/base/Entity.hpp" // for Entity +#include "stk_mesh/base/EntityLess.hpp" // for Entity #include "stk_mesh/base/MetaData.hpp" // for MetaData #include "stk_mesh/base/Selector.hpp" // for Selector diff --git a/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp b/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp index 4a1bded44930..8a0cde9ba484 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp @@ -140,26 +140,6 @@ class HostField : public NgpFieldBase return data[component]; } -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after April 2024 - STK_DEPRECATED - T& get(HostMesh::MeshIndex entity, int component, - const char * fileName = HOST_DEBUG_FILE_NAME, int lineNumber = HOST_DEBUG_LINE_NUMBER) const - { - T* data = static_cast(stk::mesh::field_data(*field, entity.bucket->bucket_id(), entity.bucketOrd)); - STK_ThrowAssert(data); - return data[component]; - } - - STK_DEPRECATED - T& operator()(const HostMesh::MeshIndex& index, int component, - const char * fileName = HOST_DEBUG_FILE_NAME, int lineNumber = HOST_DEBUG_LINE_NUMBER) const - { - T* data = static_cast(stk::mesh::field_data(*field, index.bucket->bucket_id(), index.bucketOrd)); - STK_ThrowAssert(data); - return data[component]; - } -#endif - T& operator()(const stk::mesh::FastMeshIndex& index, int component, const char * fileName = HOST_DEBUG_FILE_NAME, int lineNumber = HOST_DEBUG_LINE_NUMBER) const { @@ -265,10 +245,6 @@ class HostField : public NgpFieldBase FieldState state() const { return field->state(); } -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after May 2024 - STK_DEPRECATED void rotate_multistate_data() override { } -#endif - void update_bucket_pointer_view() override { } void swap_field_views(NgpFieldBase *other) override { } diff --git a/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp b/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp index a897cd3bf5e8..a7fbeaa21684 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp @@ -56,23 +56,11 @@ namespace stk { namespace mesh { -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after May 2024 -struct HostMeshIndex -{ - const stk::mesh::Bucket *bucket; - size_t bucketOrd; -}; -#endif - class HostMesh : public NgpMeshBase { public: using MeshExecSpace = stk::ngp::HostExecSpace; -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after May 2024 - using MeshIndex = HostMeshIndex; -#else using MeshIndex = FastMeshIndex; -#endif using BucketType = stk::mesh::Bucket; using ConnectedNodes = util::StridedArray; using ConnectedEntities = util::StridedArray; @@ -132,15 +120,6 @@ class HostMesh : public NgpMeshBase return (*(bulk->buckets(rank)[meshIndex.bucket_id]))[meshIndex.bucket_ord]; } -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after May 2024 - STK_DEPRECATED - ConnectedNodes get_nodes(const MeshIndex &elem) const - { - const stk::mesh::Bucket& bucket = *elem.bucket; - return ConnectedNodes(bucket.begin_nodes(elem.bucketOrd), bucket.num_nodes(elem.bucketOrd)); - } -#endif - ConnectedEntities get_connected_entities(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity, stk::mesh::EntityRank connectedRank) const { const stk::mesh::Bucket& bucket = get_bucket(rank, entity.bucket_id); @@ -225,13 +204,6 @@ class HostMesh : public NgpMeshBase return stk::mesh::FastMeshIndex{meshIndex.bucket->bucket_id(), static_cast(meshIndex.bucket_ordinal)}; } -#ifndef STK_HIDE_DEPRECATED_CODE -STK_DEPRECATED stk::mesh::FastMeshIndex host_mesh_index(stk::mesh::Entity entity) const - { - return fast_mesh_index(entity); - } -#endif - stk::mesh::FastMeshIndex device_mesh_index(stk::mesh::Entity entity) const { return fast_mesh_index(entity); diff --git a/packages/stk/stk_mesh/stk_mesh/base/LegacyFieldTraits.hpp b/packages/stk/stk_mesh/stk_mesh/base/LegacyFieldTraits.hpp deleted file mode 100644 index c63b8a642557..000000000000 --- a/packages/stk/stk_mesh/stk_mesh/base/LegacyFieldTraits.hpp +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering -// Solutions of Sandia, LLC (NTESS). Under the terms of Contract -// DE-NA0003525 with NTESS, the U.S. Government retains certain rights -// in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// * Neither the name of NTESS nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef stk_mesh_base_LegacyFieldTraits_hpp -#define stk_mesh_base_LegacyFieldTraits_hpp - -#include -#include - -#include - -namespace stk { -namespace mesh { -namespace legacy { - -/** - * FieldTraits provide an API for making queries about field types. - * Examples: - * - Get the scalar data type contained by a field - * stk::mesh::FieldTraits< field_type >::data_type - * - Get the dimensional rank (number of dimensions) in a field type - * stk::mesh::FieldTraits< field_type >::Rank - */ - -template<> -struct FieldTraits -{ -public: - typedef shards::array_traits::Helper - Helper ; - - typedef void data_type ; ///< \brief Data type of the field's members - typedef void tag1 ; ///< \brief Array dimension tag - typedef void tag2 ; ///< \brief Array dimension tag - typedef void tag3 ; ///< \brief Array dimension tag - typedef void tag4 ; ///< \brief Array dimension tag - typedef void tag5 ; ///< \brief Array dimension tag - typedef void tag6 ; ///< \brief Array dimension tag - typedef void tag7 ; ///< \brief Array dimension tag - - /** \brief Multidimensional array rank */ - enum { Rank = 0 }; - - static void assign_tags( const shards::ArrayDimTag ** tags ) {} -}; - - -/** \brief Scalar type and multi-dimensional array traits of a Field */ -template< typename Scalar > -struct FieldTraits< Field > -{ -public: - typedef shards::array_traits::Helper - Helper ; - - typedef Scalar data_type ; ///< \brief Data type of the field's members - typedef void tag1 ; ///< \brief Array dimension tag - typedef void tag2 ; ///< \brief Array dimension tag - typedef void tag3 ; ///< \brief Array dimension tag - typedef void tag4 ; ///< \brief Array dimension tag - typedef void tag5 ; ///< \brief Array dimension tag - typedef void tag6 ; ///< \brief Array dimension tag - typedef void tag7 ; ///< \brief Array dimension tag - - /** \brief Multidimensional array rank */ - enum { Rank = 0 }; - - static void assign_tags( const shards::ArrayDimTag ** tags ) {} -}; - -/** \brief Scalar type and multi-dimensional array traits of a Field */ -template< typename Scalar , - class Tag1 , class Tag2 , class Tag3 , class Tag4 , - class Tag5 , class Tag6 , class Tag7 > -struct FieldTraits< Field > -{ -public: - typedef shards::array_traits::Helper - Helper ; - - typedef Scalar data_type ; ///< \brief Data type of the field's members - typedef Tag1 tag1 ; ///< \brief Array dimension tag - typedef Tag2 tag2 ; ///< \brief Array dimension tag - typedef Tag3 tag3 ; ///< \brief Array dimension tag - typedef Tag4 tag4 ; ///< \brief Array dimension tag - typedef Tag5 tag5 ; ///< \brief Array dimension tag - typedef Tag6 tag6 ; ///< \brief Array dimension tag - typedef Tag7 tag7 ; ///< \brief Array dimension tag - - /** \brief Multidimensional array rank */ - enum { Rank = Helper::Rank }; - - static void assign_tags( const shards::ArrayDimTag ** tags ) - { Helper::assign_tags( tags ); } -}; - - -} //namespace legacy -} //namespace mesh -} //namespace stk - -#endif //stk_mesh_base_LegacyFieldTraits_hpp diff --git a/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp b/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp index 7e53e06d1cbb..4d7995f8e1e8 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp @@ -148,7 +148,6 @@ MetaData::MetaData(size_t spatial_dimension, const std::vector& ent : m_bulk_data(NULL), m_commit( false ), m_are_late_fields_enabled( false ), - m_use_simple_fields(false), m_part_repo( this ), m_attributes(), m_universal_part( NULL ), @@ -164,10 +163,6 @@ MetaData::MetaData(size_t spatial_dimension, const std::vector& ent const size_t numRanks = stk::topology::NUM_RANKS; STK_ThrowRequireMsg(entity_rank_names.size() <= numRanks, "MetaData: number of entity-ranks (" << entity_rank_names.size() << ") exceeds limit of stk::topology::NUM_RANKS (" << numRanks <<")"); -#ifdef STK_USE_SIMPLE_FIELDS - m_use_simple_fields = true; -#endif - m_universal_part = m_part_repo.universal_part(); m_owns_part = & declare_internal_part("OWNS"); m_shares_part = & declare_internal_part("SHARES"); @@ -180,7 +175,6 @@ MetaData::MetaData() : m_bulk_data(NULL), m_commit( false ), m_are_late_fields_enabled( false ), - m_use_simple_fields(false), m_part_repo( this ), m_attributes(), m_universal_part( NULL ), @@ -193,10 +187,6 @@ MetaData::MetaData() m_spatial_dimension( 0 /*invalid spatial dimension*/), m_surfaceToBlock() { -#ifdef STK_USE_SIMPLE_FIELDS - m_use_simple_fields = true; -#endif - // Declare the predefined parts m_universal_part = m_part_repo.universal_part(); @@ -1452,6 +1442,9 @@ FieldBase* MetaData::get_field(stk::mesh::EntityRank entity_rank, const std::str return nullptr; } +void MetaData::use_simple_fields() { } + +bool MetaData::is_using_simple_fields() const { return true; } FieldBase* get_field_by_name( const std::string& name, const MetaData & metaData ) { diff --git a/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp b/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp index 45fc78bd8266..cafd67981813 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp @@ -47,7 +47,6 @@ #include // for pair #include // for vector, vector<>::size_type #include -#include #include // for EntityKey #include // for Part #include // for Selector @@ -75,8 +74,6 @@ namespace stk { namespace mesh { class MetaData; } } namespace stk { namespace mesh { -typedef Field CoordinatesField; - template struct is_field : std::false_type {}; @@ -104,22 +101,6 @@ template constexpr bool is_simple_field_v = is_simple_field::value; -template -struct is_complex_field : std::false_type {}; - -template -struct is_complex_field> : std::negation> {}; - -template -constexpr bool is_complex_field_v = is_complex_field::value; - - -namespace legacy { - template - field_type & - declare_field(MetaData & meta, stk::topology::rank_t entityRank, const std::string & name, unsigned numberOfStates = 1); -} - /** \addtogroup stk_mesh_module * \{ */ @@ -393,13 +374,6 @@ class MetaData { */ bool check_rank(EntityRank rank) const; - template , int> = 0> - STK_DEPRECATED_MSG("Only the datatype template parameter is supported") - field_type * get_field(stk::mesh::EntityRank entity_rank, - const std::string & name, - const char * fileName = HOST_DEBUG_FILE_NAME, - int lineNumber = HOST_DEBUG_LINE_NUMBER) const; - // Get a field by name, and return nullptr if it does not exist. // A case-insensitive name search will be performed. An exception // will be thrown if a Field with the specified name exists but @@ -407,7 +381,7 @@ class MetaData { // // stk::mesh::Field * field = meta.get_field(stk::topology::NODE_RANK, "density"); // - template , int> = 0> + template Field * get_field(stk::mesh::EntityRank entity_rank, const std::string & name, const char * fileName = HOST_DEBUG_FILE_NAME, @@ -435,14 +409,6 @@ class MetaData { return m_field_repo.get_fields(rank) ; } - template , int> = 0> - STK_DEPRECATED_MSG("Only the datatype template parameter is supported") - field_type & declare_field(stk::topology::rank_t arg_entity_rank, - const std::string & name, - unsigned number_of_states = 1, - const char * fileName = HOST_DEBUG_FILE_NAME, - int lineNumber = HOST_DEBUG_LINE_NUMBER); - // Declare a stk::mesh::Field by providing the datatype as the template parameter. // For example: // @@ -453,7 +419,7 @@ class MetaData { // calls to stk::mesh::put_field_on_mesh(). Exodus file output subscripting of // multi-component Fields is handled through calls to stk::io::set_field_output_type(). // - template , int> = 0> + template Field & declare_field(stk::topology::rank_t arg_entity_rank, const std::string & name, unsigned number_of_states = 1, @@ -523,12 +489,12 @@ class MetaData { // Enable a mode where an error will be thrown if Fields are registered with // extra template parameters beyond just the datatype, and auto-registered // Fields during a mesh read will be created with only the datatype template - // parameter. This will eventually graduate from an optional behavior to - // the only supported behavior. + // parameter. This is now the only supported behavior, and this function + // will be deprecated and removed in the near future. // - void use_simple_fields() { m_use_simple_fields = true; } + void use_simple_fields(); - bool is_using_simple_fields() const { return m_use_simple_fields; } + bool is_using_simple_fields() const; /** \brief Declare a field restriction via runtime type information. */ @@ -635,10 +601,6 @@ class MetaData { bool delete_part_alias_case_insensitive(Part& part, const std::string& alias); std::vector get_part_aliases(const Part& part) const; - template - friend field_type & - legacy::declare_field(MetaData & meta, stk::topology::rank_t entityRank, const std::string & name, unsigned numberOfStates); - protected: Part & declare_internal_part( const std::string & p_name); @@ -650,13 +612,6 @@ class MetaData { MetaData( const MetaData & ); ///< \brief Not allowed MetaData & operator = ( const MetaData & ); ///< \brief Not allowed - template , int> = 0> - field_type & legacy_declare_field(stk::topology::rank_t arg_entity_rank, - const std::string & name, - unsigned number_of_states = 1, - const char * fileName = HOST_DEBUG_FILE_NAME, - int lineNumber = HOST_DEBUG_LINE_NUMBER); - const char** reserved_state_suffix() const; virtual Part & declare_internal_part( const std::string & p_name, EntityRank rank); @@ -672,7 +627,6 @@ class MetaData { BulkData* m_bulk_data; bool m_commit ; bool m_are_late_fields_enabled; - bool m_use_simple_fields; impl::PartRepository m_part_repo ; CSet m_attributes ; @@ -731,23 +685,12 @@ const std::vector& entity_rank_names(); * See Field.hpp for a full discussion of field restrictions. */ -template , int> = 0> -field_type & put_field_on_mesh(field_type & field, - const Part & part, - const typename field_type::value_type* init_value); - -template || is_field_base_v, int> = 0> +template field_type & put_field_on_mesh(field_type & field, const Part & part, const typename field_type::value_type* init_value); - -template , int> = 0> -field_type & put_field_on_mesh(field_type & field, - const Selector & selector, - const typename field_type::value_type* init_value); - -template || is_field_base_v, int> = 0> +template field_type & put_field_on_mesh(field_type & field, const Selector & selector, const typename field_type::value_type* init_value); @@ -765,6 +708,7 @@ field_type & put_field_on_mesh(field_type & field, unsigned n1, const typename field_type::value_type* init_value); + template field_type & put_field_on_mesh(field_type & field, const Part & part, @@ -779,6 +723,7 @@ field_type & put_field_on_mesh(field_type & field, unsigned n2, const typename field_type::value_type* init_value); + template field_type & put_field_on_entire_mesh_with_initial_value(field_type & field, const typename field_type::value_type *initial_value) @@ -832,55 +777,16 @@ Part & MetaData::get_part( unsigned ord ) const return *m_part_repo.get_all_parts()[ord]; } -template , int>> -STK_DEPRECATED_MSG("Only the datatype template parameter is supported") -inline -field_type * MetaData::get_field(stk::mesh::EntityRank arg_entity_rank, - const std::string & name, - const char * fileName, - int lineNumber) const -{ - typedef legacy::FieldTraits< field_type > Traits ; - - if (m_use_simple_fields) { - std::ostringstream os; - os << "Invalid call to MetaData::get_field() for Field '" << name << "'." << std::endl - << " Cannot use a Field as the template parameter:" << std::endl - << " get_field<" << sierra::demangle(typeid(field_type).name()) << ">()" << std::endl - << " Please use only a datatype template parameter, e.g. get_field<" - << sierra::demangle(typeid(typename Traits::data_type).name()) << ">()." << std::endl; - - if (lineNumber != -1) { - os << " Called from: " << fileName << ":" << lineNumber << std::endl; - } - else { - os << " Please build with at least gcc-4.8.0 or clang-9.0.0 to see caller location" << std::endl; - } - STK_ThrowErrorMsg(os.str()); - } - - const DataTraits & dt = data_traits< typename Traits::data_type >(); - const DataTraits & dt_void = data_traits< void >(); - - const shards::ArrayDimTag * tags[8] ; - - Traits::assign_tags( tags ); - - FieldBase * const field = m_field_repo.get_field( arg_entity_rank, name , dt , Traits::Rank , tags , 0 ); - - STK_ThrowRequireMsg(field == nullptr || field->data_traits().type_info == dt.type_info || dt_void.type_info == dt.type_info, - "field " << field->name() << " has type " << field->data_traits().type_info.name() << " when expecting type " << dt.type_info.name()); - - return static_cast(field); -} - -template , int>> +template inline Field * MetaData::get_field(stk::mesh::EntityRank arg_entity_rank, const std::string & name, const char * fileName, int lineNumber) const { + static_assert(not is_field_v, "You must use a datatype as the template parameter to MetaData::get_field()," + "and not the Field itself"); + const DataTraits & dt = data_traits(); const DataTraits & dt_void = data_traits(); const int fieldRank = 0; @@ -898,20 +804,7 @@ Field * MetaData::get_field(stk::mesh::EntityRank arg_entity_rank, return static_cast*>(field); } - -template , int>> -STK_DEPRECATED_MSG("Only the datatype template parameter is supported") -field_type & -MetaData::declare_field(stk::topology::rank_t arg_entity_rank, - const std::string & name, - unsigned number_of_states, - const char * fileName, - int lineNumber) -{ - return legacy_declare_field(arg_entity_rank, name, number_of_states, fileName, lineNumber); -} - -template , int>> +template Field & MetaData::declare_field(stk::topology::rank_t arg_entity_rank, const std::string & name, @@ -919,6 +812,9 @@ MetaData::declare_field(stk::topology::rank_t arg_entity_rank, const char * fileName, int lineNumber) { + static_assert(not is_field_v, "You must use a datatype as the template parameter to MetaData::declare_field()," + "and not the Field itself"); + const DataTraits & traits = data_traits(); const int fieldRank = 0; @@ -1003,150 +899,16 @@ MetaData::declare_field(stk::topology::rank_t arg_entity_rank, return *f[0]; } -template , int>> -field_type & -MetaData::legacy_declare_field(stk::topology::rank_t arg_entity_rank, - const std::string & name, - unsigned number_of_states, - const char * fileName, - int lineNumber) -{ - typedef legacy::FieldTraits< field_type > Traits ; - - const DataTraits & traits = data_traits< typename Traits::data_type >(); - - const shards::ArrayDimTag * dim_tags[8] ; - - Traits::assign_tags( dim_tags ); - - if (m_use_simple_fields) { - std::ostringstream os; - os << "Invalid call to MetaData::declare_field() for Field '" << name << "'." << std::endl - << " Cannot use a Field as the template parameter:" << std::endl - << " declare_field<" << sierra::demangle(typeid(field_type).name()) << ">()" << std::endl - << " Please use only a datatype template parameter, e.g. declare_field<" - << sierra::demangle(typeid(typename Traits::data_type).name()) << ">()." << std::endl; - - if (lineNumber != -1) { - os << " Called from: " << fileName << ":" << lineNumber << std::endl; - } - else { - os << " Please build with at least gcc-4.8.0 or clang-9.0.0 to see caller location" << std::endl; - } - STK_ThrowErrorMsg(os.str()); - } - - const char** reservedStateSuffix = reserved_state_suffix(); - - // Check that the name does not have a reserved suffix - - for ( unsigned i = 0 ; i < 6 ; ++i ) { - const int len_name = name.size(); - const int len_suffix = std::strlen( reservedStateSuffix[i] ); - const int offset = len_name - len_suffix ; - if ( 0 <= offset ) { - const char * const name_suffix = name.c_str() + offset ; - STK_ThrowErrorMsgIf( equal_case( name_suffix , reservedStateSuffix[i] ), - "For name = \"" << name_suffix << - "\" CANNOT HAVE THE RESERVED STATE SUFFIX \"" << - reservedStateSuffix[i] << "\"" ); - } - } - - // Check that the field of this name has not already been declared - - field_type * f[ MaximumFieldStates ] = {nullptr}; - - FieldBase* rawField = m_field_repo.get_field(arg_entity_rank , name , - traits , Traits::Rank , dim_tags , number_of_states); - - - f[0] = dynamic_cast(rawField); - - if (rawField != nullptr) { - STK_ThrowRequireMsg(f[0] == rawField, "Re-registration of field '" << name << "' with a different template type is not allowed."); - } - - if (f[0] != nullptr) { - for ( unsigned i = 1 ; i < number_of_states ; ++i ) { - f[i] = &(f[0]->field_of_state(static_cast(i))); - } - } - else { - // Field does not exist then create it - - std::string field_names[ MaximumFieldStates ]; - - field_names[0] = name ; - - if ( 2 == number_of_states ) { - field_names[1] = name ; - field_names[1].append( reservedStateSuffix[0] ); - } - else { - for ( unsigned i = 1 ; i < number_of_states ; ++i ) { - field_names[i] = name ; - field_names[i].append( reservedStateSuffix[i] ); - } - } - - for ( unsigned i = 0 ; i < number_of_states ; ++i ) { - - f[i] = new field_type(this, - arg_entity_rank, - m_field_repo.get_fields().size(), - field_names[i], - traits, - Traits::Rank, - dim_tags, - number_of_states, - static_cast(i)); - - m_field_repo.add_field( f[i] ); - } - - for ( unsigned i = 0 ; i < number_of_states ; ++i ) { - f[i]->set_field_states( f ); - } - } - - f[0]->set_mesh(m_bulk_data); - - return *f[0] ; -} - - -template , int>> +template inline field_type & put_field_on_mesh(field_type & field, const Part & part, const typename field_type::value_type* init_value) { - MetaData & meta = MetaData::get(field); - - unsigned numScalarsPerEntity = 1; - if (not meta.is_using_simple_fields()) { - typedef legacy::FieldTraits Traits; - typedef typename Traits::Helper Helper; - if (stk::mesh::legacy::field_array_rank(field) > 0) { - unsigned stride[8] = {0,0,0,0,0,0,0,0}; - Helper::assign(stride); - numScalarsPerEntity = stride[0]; - } - } - - unsigned firstDimension = numScalarsPerEntity; - meta.declare_field_restriction(field, part, numScalarsPerEntity, firstDimension, init_value); + static_assert(is_simple_field_v || is_field_base_v, + "You must only call put_field_on_mesh() with a simple field argument (i.e. without template parameters" + " beyond the datatype"); - return field; -} - -template || is_field_base_v, int>> -inline -field_type & put_field_on_mesh(field_type & field, - const Part & part, - const typename field_type::value_type* init_value) -{ MetaData & meta = MetaData::get(field); unsigned numScalarsPerEntity = 1; @@ -1156,38 +918,16 @@ field_type & put_field_on_mesh(field_type & field, return field; } - -template , int>> +template inline field_type & put_field_on_mesh(field_type & field, const Selector & selector, const typename field_type::value_type* init_value) { - MetaData & meta = MetaData::get(field); + static_assert(is_simple_field_v || is_field_base_v, + "You must only call put_field_on_mesh() with a simple field argument (i.e. without template parameters" + " beyond the datatype"); - unsigned numScalarsPerEntity = 1; - if (not meta.is_using_simple_fields()) { - typedef legacy::FieldTraits Traits; - typedef typename Traits::Helper Helper; - if (stk::mesh::legacy::field_array_rank(field) > 0) { - unsigned stride[8] = {0,0,0,0,0,0,0,0}; - Helper::assign(stride); - numScalarsPerEntity = stride[0]; - } - } - - unsigned firstDimension = numScalarsPerEntity; - meta.declare_field_restriction(field, selector, numScalarsPerEntity, firstDimension, init_value); - - return field; -} - -template || is_field_base_v, int>> -inline -field_type & put_field_on_mesh(field_type & field, - const Selector & selector, - const typename field_type::value_type* init_value) -{ MetaData & meta = MetaData::get(field); unsigned numScalarsPerEntity = 1; @@ -1356,57 +1096,15 @@ is_auto_declared_part(const Part &part) return stk::mesh::impl::is_internal_part(part); } -template , int> = 0> -STK_DEPRECATED_MSG("Only the datatype template parameter is supported") -field_type * get_field_by_name(const std::string & name, - const MetaData & metaData, - const char * fileName = HOST_DEBUG_FILE_NAME, - int lineNumber = HOST_DEBUG_LINE_NUMBER) -{ - if (metaData.is_using_simple_fields()) { - typedef legacy::FieldTraits Traits; - std::ostringstream os; - os << "Invalid call to get_field_by_name() for Field '" << name << "'." << std::endl - << " Cannot use a Field as the template parameter:" << std::endl - << " get_field_by_name<" << sierra::demangle(typeid(field_type).name()) << ">()" << std::endl - << " Please use only a datatype template parameter, e.g. get_field_by_name<" - << sierra::demangle(typeid(typename Traits::data_type).name()) << ">()." << std::endl; - - if (lineNumber != -1) { - os << " Called from: " << fileName << ":" << lineNumber << std::endl; - } - else { - os << " Please build with at least gcc-4.8.0 or clang-9.0.0 to see caller location" << std::endl; - } - STK_ThrowErrorMsg(os.str()); - } - - field_type* field = nullptr; - unsigned num_nonnull_fields = 0; - for(stk::topology::rank_t i=stk::topology::NODE_RANK; i<=stk::topology::CONSTRAINT_RANK; ++i) { - field_type* thisfield = metaData.get_field(i, name); - if (thisfield != nullptr) { - if (field == nullptr) { - field = thisfield; - } - ++num_nonnull_fields; - } - } - - if (num_nonnull_fields > 1) { - std::cerr << "get_field_by_name WARNING, found "<, int> = 0> +template Field * get_field_by_name(const std::string & name, const MetaData & metaData, const char * fileName = HOST_DEBUG_FILE_NAME, int lineNumber = HOST_DEBUG_LINE_NUMBER) { + static_assert(not is_field_v, "You must use a datatype as the template parameter to get_field_by_name()," + "and not the Field itself"); + Field* field = nullptr; unsigned num_nonnull_fields = 0; for(stk::topology::rank_t i=stk::topology::NODE_RANK; i<=stk::topology::CONSTRAINT_RANK; ++i) { @@ -1429,19 +1127,6 @@ Field * get_field_by_name(const std::string & name, FieldBase* get_field_by_name( const std::string& name, const MetaData & metaData ); -namespace legacy { - -// This function will be removed when the deprecated legacy Field handling is removed. Do not use! - -template -inline field_type & -declare_field(MetaData & meta, stk::topology::rank_t entityRank, const std::string & name, unsigned numberOfStates) -{ - return meta.legacy_declare_field(entityRank, name, numberOfStates); -} - -} - } // namespace mesh } // namespace stk diff --git a/packages/stk/stk_mesh/stk_mesh/base/NgpFieldBLAS.hpp b/packages/stk/stk_mesh/stk_mesh/base/NgpFieldBLAS.hpp index 34124cd90ed6..ff94b25073f0 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/NgpFieldBLAS.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/NgpFieldBLAS.hpp @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -57,129 +58,88 @@ namespace mesh { template inline -void field_fill(const Scalar alpha, const FieldBase& field, const EXEC_SPACE& execSpace, - bool IsDeviceExecSpaceUserOverride = (!std::is_same_v)) +void field_fill(const Scalar alpha, + const FieldBase& field, + int component, + const Selector& selector, + const EXEC_SPACE& execSpace, + bool isDeviceExecSpaceUserOverride = (!std::is_same_v)) { - constexpr bool isActuallyDeviceExecSpace = !Kokkos::SpaceAccessibility::accessible; -#ifdef STK_USE_DEVICE_MESH - constexpr bool operateOnDevice = isActuallyDeviceExecSpace; -#else - constexpr bool operateOnDevice = false; -#endif - - field.clear_sync_state(); - - if constexpr (operateOnDevice) { - NgpField& ngpField = get_updated_ngp_field(field); - impl::field_fill_no_sync_or_mark(alpha, ngpField, execSpace); - } - else { - stk::mesh::field_fill(alpha, field); - } - - const bool markModifiedOnDevice = isActuallyDeviceExecSpace || IsDeviceExecSpaceUserOverride; - if (markModifiedOnDevice) { - field.modify_on_device(); - } - else { - field.modify_on_host(); - } + ngp_field_blas::impl::field_fill_impl(alpha, field, component, &selector, execSpace, isDeviceExecSpaceUserOverride); } -template +template inline -void field_fill(const Scalar alpha, const FieldBase& field, const Selector& selector, const EXEC_SPACE& execSpace, - bool IsDeviceExecSpaceUserOverride = (!std::is_same_v)) +void field_fill(const Scalar alpha, + const FieldBase& field, + const EXEC_SPACE& execSpace, + bool isDeviceExecSpaceUserOverride = (!std::is_same_v)) { - constexpr bool isActuallyDeviceExecSpace = !Kokkos::SpaceAccessibility::accessible; -#ifdef STK_USE_DEVICE_MESH - constexpr bool operateOnDevice = isActuallyDeviceExecSpace; -#else - constexpr bool operateOnDevice = false; -#endif - - field.clear_sync_state(); - - if constexpr (operateOnDevice) { - NgpField& ngpField = get_updated_ngp_field(field); - impl::field_fill_no_sync_or_mark(alpha, ngpField, selector, execSpace); - } - else { - stk::mesh::field_fill(alpha, field, selector); - } + ngp_field_blas::impl::field_fill_impl(alpha, field, -1, nullptr, execSpace, isDeviceExecSpaceUserOverride); +} - const bool markModifiedOnDevice = isActuallyDeviceExecSpace || IsDeviceExecSpaceUserOverride; - if (markModifiedOnDevice) { - field.modify_on_device(); - } - else { - field.modify_on_host(); - } +template +inline +void field_fill(const Scalar alpha, + const FieldBase& field, + const Selector& selector, + const EXEC_SPACE& execSpace, + bool isDeviceExecSpaceUserOverride = (!std::is_same_v)) +{ + ngp_field_blas::impl::field_fill_impl(alpha, field, -1, &selector, execSpace, isDeviceExecSpaceUserOverride); } template inline -void field_copy(const FieldBase& xField, const FieldBase& yField, const EXEC_SPACE& execSpace, - bool IsDeviceExecSpaceUserOverride = (!std::is_same_v)) +void field_copy(const FieldBase& xField, + const FieldBase& yField, + const EXEC_SPACE& execSpace, + bool isDeviceExecSpaceUserOverride = (!std::is_same_v)) { - constexpr bool isActuallyDeviceExecSpace = !Kokkos::SpaceAccessibility::accessible; -#ifdef STK_USE_DEVICE_MESH - constexpr bool operateOnDevice = isActuallyDeviceExecSpace; -#else - constexpr bool operateOnDevice = false; -#endif - - yField.clear_sync_state(); - - if constexpr (operateOnDevice) { - xField.sync_to_device(); - impl::field_copy_no_sync_or_mark(xField, yField, execSpace); - } - else { - xField.sync_to_host(); - stk::mesh::field_copy(xField, yField); - } - - yField.clear_sync_state(); - const bool markModifiedOnDevice = isActuallyDeviceExecSpace || IsDeviceExecSpaceUserOverride; - if (markModifiedOnDevice) { - yField.modify_on_device(); - } - else { - yField.modify_on_host(); - } + ngp_field_blas::impl::field_copy_impl(xField, yField, nullptr, execSpace, isDeviceExecSpaceUserOverride); } template inline -void field_copy(const FieldBase& xField, const FieldBase& yField, const Selector& selector, const EXEC_SPACE& execSpace, - bool IsDeviceExecSpaceUserOverride = (!std::is_same_v)) +void field_copy(const FieldBase& xField, + const FieldBase& yField, + const Selector& selector, + const EXEC_SPACE& execSpace, + bool isDeviceExecSpaceUserOverride = (!std::is_same_v)) { - constexpr bool isActuallyDeviceExecSpace = !Kokkos::SpaceAccessibility::accessible; -#ifdef STK_USE_DEVICE_MESH - constexpr bool operateOnDevice = isActuallyDeviceExecSpace; -#else - constexpr bool operateOnDevice = false; -#endif + ngp_field_blas::impl::field_copy_impl(xField, yField, &selector, execSpace, isDeviceExecSpaceUserOverride); +} - yField.clear_sync_state(); +template +inline void field_axpbyz(const stk::mesh::BulkData& mesh, + const DataType alpha, + const stk::mesh::FieldBase & xField, + const DataType beta, + const stk::mesh::FieldBase & yField, + const stk::mesh::FieldBase & zField, + const stk::mesh::Selector & selector, + const EXEC_SPACE& execSpace, + bool IsDeviceExecSpaceUserOverride = (!std::is_same_v)) +{ + // z = a*x + b*y - if constexpr (operateOnDevice) { - xField.sync_to_device(); - impl::field_copy_no_sync_or_mark(xField, yField, selector, execSpace); - } - else { + if constexpr (ngp_field_blas::impl::operate_on_ngp_mesh()) { + ngp_field_blas::impl::apply_functor_on_field( + mesh, zField, xField, yField, alpha, beta, selector); + } + else { xField.sync_to_host(); - stk::mesh::field_copy(xField, yField, selector); + yField.sync_to_host(); + stk::mesh::field_copy(yField, zField, selector); + stk::mesh::field_axpby(alpha, xField, beta, zField, selector); } - yField.clear_sync_state(); - const bool markModifiedOnDevice = isActuallyDeviceExecSpace || IsDeviceExecSpaceUserOverride; - if (markModifiedOnDevice) { - yField.modify_on_device(); + zField.clear_sync_state(); + if (ngp_field_blas::impl::mark_modified_on_device(execSpace, IsDeviceExecSpaceUserOverride)) { + zField.modify_on_device(); } else { - yField.modify_on_host(); + zField.modify_on_host(); } } diff --git a/packages/stk/stk_mesh/stk_mesh/base/NgpFieldBase.hpp b/packages/stk/stk_mesh/stk_mesh/base/NgpFieldBase.hpp index 46fbbc9f8622..857f2778b0a9 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/NgpFieldBase.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/NgpFieldBase.hpp @@ -19,9 +19,6 @@ class NgpFieldBase KOKKOS_FUNCTION NgpFieldBase& operator=(const NgpFieldBase&) { return *this; } KOKKOS_FUNCTION NgpFieldBase& operator=(NgpFieldBase&&) { return *this; } KOKKOS_FUNCTION virtual ~NgpFieldBase() {} -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after May 2024 - STK_DEPRECATED virtual void rotate_multistate_data() = 0; -#endif virtual void update_bucket_pointer_view() = 0; virtual void swap_field_views(NgpFieldBase*) = 0; virtual void modify_on_host() = 0; diff --git a/packages/stk/stk_mesh/stk_mesh/base/NgpForEachEntity.hpp b/packages/stk/stk_mesh/stk_mesh/base/NgpForEachEntity.hpp index 8ff9ff21bd9d..c2b1232d95bd 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/NgpForEachEntity.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/NgpForEachEntity.hpp @@ -48,156 +48,87 @@ namespace mesh { template struct ThreadFunctor { - ThreadFunctor(const typename Mesh::BucketType *b, const AlgorithmPerEntity &f) : - bucket(b), - functor(f) - {} - - void operator()(const int& i) const - { - static_assert(std::is_same::type, stk::mesh::HostMesh>::value, - "Mesh is not of stk::mesh::HostMesh type"); - functor(typename stk::mesh::FastMeshIndex{bucket->bucket_id(), static_cast(i)}); - } - - const typename Mesh::BucketType *bucket; - const AlgorithmPerEntity &functor; -}; - -template -struct ThreadFunctor { - using Mesh = stk::mesh::DeviceMesh; - KOKKOS_FUNCTION - ThreadFunctor(const typename Mesh::BucketType *b, const AlgorithmPerEntity &f) : - bucket(b), + ThreadFunctor(const typename Mesh::BucketType *b, const AlgorithmPerEntity &f) + : bucket(b), functor(f) {} KOKKOS_FUNCTION void operator()(const int& i) const { - functor(typename stk::mesh::FastMeshIndex{bucket->bucket_id(), static_cast(i)}); + functor(stk::mesh::FastMeshIndex{bucket->bucket_id(), static_cast(i)}); } const typename Mesh::BucketType *bucket; const AlgorithmPerEntity &functor; }; -template -struct ThreadFunctor { - using Mesh = stk::mesh::DeviceMesh; - - KOKKOS_FUNCTION - ThreadFunctor(const typename Mesh::BucketType *b, const AlgorithmPerEntity &f) : - bucket(b), - functor(f) - {} - - KOKKOS_FUNCTION - void operator()(const int& i) const - { - functor(typename stk::mesh::FastMeshIndex{bucket->bucket_id(), static_cast(i)}); - } - - const typename Mesh::BucketType *bucket; - const AlgorithmPerEntity &functor; -}; - -template +template struct TeamFunctor { - using TeamHandleType = typename stk::ngp::TeamPolicy::member_type; + using TeamHandleType = typename stk::ngp::TeamPolicy::member_type; - TeamFunctor(const Mesh m, const stk::mesh::EntityRank r, stk::NgpVector b, const AlgorithmPerEntity f) : - mesh(m), + KOKKOS_FUNCTION + TeamFunctor(const Mesh& m, const stk::mesh::EntityRank r, const stk::NgpVector& b, const AlgorithmPerEntity& f) + : mesh(m), rank(r), bucketIds(b), functor(f) { } - void operator()(const TeamHandleType& team) const - { - const int bucketIndex = bucketIds.get(team.league_rank()); - const typename Mesh::BucketType &bucket = mesh.get_bucket(rank, bucketIndex); - unsigned numElements = bucket.size(); - static_assert(std::is_same::type, stk::mesh::HostMesh>::value, - "Mesh is not of stk::mesh::HostMesh type"); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 0u, numElements), ThreadFunctor(&bucket, functor)); - } - - const Mesh mesh; - const stk::mesh::EntityRank rank; - stk::NgpVector bucketIds; - const AlgorithmPerEntity functor; -}; - -template -struct TeamFunctor { - using Mesh = stk::mesh::DeviceMesh; - using TeamHandleType = typename stk::ngp::TeamPolicy::member_type; - - KOKKOS_FUNCTION - TeamFunctor(const Mesh m, const stk::mesh::EntityRank r, stk::NgpVector b, const AlgorithmPerEntity f) : - mesh(m), - rank(r), - bucketIds(b), - functor(f) - {} - KOKKOS_FUNCTION void operator()(const TeamHandleType& team) const { - const int bucketIndex = bucketIds.get(team.league_rank()); + const int bucketIndex = bucketIds.get(team.league_rank()); const typename Mesh::BucketType &bucket = mesh.get_bucket(rank, bucketIndex); - unsigned numElements = bucket.size(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 0u, numElements), ThreadFunctor(&bucket, functor)); + unsigned numEntities = bucket.size(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 0u, numEntities), ThreadFunctor(&bucket, functor)); } - const Mesh mesh; - const stk::mesh::EntityRank rank; + Mesh mesh; + stk::mesh::EntityRank rank; stk::NgpVector bucketIds; const AlgorithmPerEntity functor; }; -template -struct TeamFunctor { - using Mesh = stk::mesh::DeviceMesh; - using TeamHandleType = typename stk::ngp::TeamPolicy::member_type; +template +void for_each_entity_run(Mesh &mesh, stk::topology::rank_t rank, const stk::mesh::Selector &selector, const AlgorithmPerEntity &functor) +{ + Kokkos::Profiling::pushRegion("for_each_entity_run with selector"); - KOKKOS_FUNCTION - TeamFunctor(const Mesh m, const stk::mesh::EntityRank r, stk::NgpVector b, const AlgorithmPerEntity f) : - mesh(m), - rank(r), - bucketIds(b), - functor(f) - {} + stk::NgpVector bucketIds = mesh.get_bucket_ids(rank, selector); + unsigned numBuckets = bucketIds.size(); - KOKKOS_FUNCTION - void operator()(const TeamHandleType& team) const - { - const int bucketIndex = bucketIds.get(team.league_rank()); - const typename Mesh::BucketType &bucket = mesh.get_bucket(rank, bucketIndex); - unsigned numElements = bucket.size(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 0u, numElements), ThreadFunctor(&bucket, functor)); - } + using EXEC_SPACE = typename Mesh::MeshExecSpace; + TeamFunctor teamFunctor(mesh, rank, bucketIds, functor); + Kokkos::parallel_for(stk::ngp::TeamPolicy(numBuckets, Kokkos::AUTO), teamFunctor); - const Mesh mesh; - const stk::mesh::EntityRank rank; - stk::NgpVector bucketIds; - const AlgorithmPerEntity functor; -}; + Kokkos::Profiling::popRegion(); +} -template -void for_each_entity_run(Mesh &mesh, stk::topology::rank_t rank, const stk::mesh::Selector &selector, const AlgorithmPerEntity &functor) +template +void for_each_entity_run(Mesh &mesh, stk::topology::rank_t rank, const stk::mesh::Selector &selector, const AlgorithmPerEntity &functor, const EXEC_SPACE& execSpace) { - Kokkos::Profiling::pushRegion("for_each_entity_run with selector"); + Kokkos::Profiling::pushRegion("for_each_entity_run with selector and EXEC_SPACE"); stk::NgpVector bucketIds = mesh.get_bucket_ids(rank, selector); unsigned numBuckets = bucketIds.size(); - Kokkos::parallel_for(stk::ngp::TeamPolicy(numBuckets, Kokkos::AUTO), - TeamFunctor(mesh, rank, bucketIds, functor)); + + using TeamHandleType = typename stk::ngp::TeamPolicy::member_type; + Kokkos::parallel_for(stk::ngp::TeamPolicy(execSpace, numBuckets, Kokkos::AUTO), + KOKKOS_LAMBDA(const TeamHandleType& team){ + const int bucketIndex = bucketIds.get(team.league_rank()); + const typename Mesh::BucketType& bucket = mesh.get_bucket(rank, bucketIndex); + const unsigned numEntities = bucket.size(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 0u, numEntities), + [&](const int& idx) { + functor(stk::mesh::FastMeshIndex{bucket.bucket_id(), static_cast(idx)}); + } + ); + } + ); Kokkos::Profiling::popRegion(); } diff --git a/packages/stk/stk_mesh/stk_mesh/base/PolarityUtil.hpp b/packages/stk/stk_mesh/stk_mesh/base/PolarityUtil.hpp index ea941a4d37f7..fa0fba7742d2 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/PolarityUtil.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/PolarityUtil.hpp @@ -15,7 +15,7 @@ namespace stk { namespace mesh { class BulkData; } } namespace stk { namespace mesh { class MetaData; } } namespace stk { namespace mesh { class Part; } } namespace stk { namespace mesh { struct Entity; } } -namespace stk { namespace mesh { struct SideSet; } } +namespace stk { namespace mesh { class SideSet; } } namespace stk { namespace mesh { class ElemElemGraph; } } namespace stk { namespace mesh { struct GraphEdge; } } diff --git a/packages/stk/stk_mesh/stk_mesh/base/Relation.cpp b/packages/stk/stk_mesh/stk_mesh/base/Relation.cpp index 37345ddcc35b..cac2bbf19c64 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Relation.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Relation.cpp @@ -41,7 +41,6 @@ #include // for MetaData #include // for pair #include "stk_mesh/base/Types.hpp" // for EntityRank, OrdinalVector, etc -#include #include "stk_topology/topology.hpp" // for topology, etc #include "stk_util/util/ReportHandler.hpp" // for ThrowAssertMsg, etc @@ -56,19 +55,6 @@ Relation::RawRelationType & Relation::RawRelationType::operator =(const Relation return *this; } -void get_entities_through_relations( - const BulkData& mesh, - const std::vector & entities , - EntityRank entities_related_rank , - std::vector & entities_related ) -{ - impl::find_entities_these_nodes_have_in_common(mesh, entities_related_rank, - entities.size(), entities.data(), - entities_related); -} - -//---------------------------------------------------------------------- - void induced_part_membership(const BulkData& mesh, const Entity entity , OrdinalVector & induced_parts) @@ -80,16 +66,18 @@ void induced_part_membership(const BulkData& mesh, for (EntityRank irank = static_cast(e_rank + 1); irank < end_rank; ++irank) { - int num_rels = mesh.num_connectivity(entity, irank); - Entity const* rels = mesh.begin(entity, irank); + const int num_rels = mesh.num_connectivity(entity, irank); + if (num_rels > 0) { + const Entity * rels = mesh.begin(entity, irank); - const Bucket* prevBucketPtr = nullptr; - for (int j = 0; j < num_rels; ++j) - { - const Bucket* curBucketPtr = mesh.bucket_ptr(rels[j]); - if (prevBucketPtr != curBucketPtr) { - prevBucketPtr = curBucketPtr; - impl::get_part_ordinals_to_induce_on_lower_ranks(mesh, *curBucketPtr, e_rank, induced_parts); + const Bucket* prevBucketPtr = nullptr; + for (int j = 0; j < num_rels; ++j) + { + const Bucket* curBucketPtr = mesh.bucket_ptr(rels[j]); + if (prevBucketPtr != curBucketPtr) { + prevBucketPtr = curBucketPtr; + impl::get_part_ordinals_to_induce_on_lower_ranks(mesh, *curBucketPtr, e_rank, induced_parts); + } } } } diff --git a/packages/stk/stk_mesh/stk_mesh/base/Relation.hpp b/packages/stk/stk_mesh/stk_mesh/base/Relation.hpp index 77e1e759e15b..475043118498 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Relation.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Relation.hpp @@ -39,6 +39,7 @@ #include // for ostream #include // for Entity #include // for RelationType, EntityRank, etc +#include #include // for ThrowAssertMsg #include // for vector namespace stk { namespace mesh { class BulkData; } } @@ -118,13 +119,10 @@ class Relation { enum { rank_digits = 8 , id_digits = 24 , - id_mask = ~(0u) >> rank_digits -#ifdef SIERRA_MIGRATION - , + id_mask = ~(0u) >> rank_digits , fwmk_relation_type_digits = 8, fmwk_permutation_digits = 24, fmwk_permutation_mask = ~(0u) >> fwmk_relation_type_digits -#endif }; union RawRelationType { @@ -174,7 +172,6 @@ class Relation { // Solution: Have framework and STK_Mesh use the same type as its relation_descriptor. // The code below is designed to make this class compatible with the fmwk // Relation class. -#ifdef SIERRA_MIGRATION public: // Moved this to enum in struct RelationType. @@ -247,7 +244,6 @@ class Relation { private: bool has_fmwk_state() const { return getRelationType() != RelationType::INVALID; } -#endif // SIERRA_MIGRATION }; //---------------------------------------------------------------------- @@ -275,11 +271,17 @@ RelationIdentifier Relation::relation_ordinal() const * that are related (connected) to all of * the input mesh entities. */ +template void get_entities_through_relations( - const BulkData& mesh, - const std::vector & entities , - EntityRank entities_related_rank , - std::vector & entities_related ); + const BulkData& mesh, + const Vec1Type& entities, + EntityRank relatedEntitiesRank, + Vec2Type& relatedEntities) +{ + impl::find_entities_these_nodes_have_in_common(mesh, relatedEntitiesRank, + entities.size(), entities.data(), + relatedEntities); +} /** \brief Induce an entity's part membership based upon relationships * from other entities. Do not include and parts in the 'omit' list. @@ -298,9 +300,7 @@ Relation::Relation() : m_attribute(), m_target_entity() { -#ifdef SIERRA_MIGRATION setRelationType(RelationType::INVALID); -#endif } inline @@ -309,36 +309,26 @@ Relation::Relation( Entity ent, EntityRank entityRank , RelationIdentifier id ) m_attribute(), m_target_entity(ent) { -#ifdef SIERRA_MIGRATION setRelationType(RelationType::INVALID); -#endif } inline bool Relation::operator == ( const Relation & rhs ) const { return m_raw_relation.value == rhs.m_raw_relation.value && m_target_entity == rhs.m_target_entity -#ifdef SIERRA_MIGRATION // compared fmwk state too && m_attribute == rhs.m_attribute -#endif ; } inline bool same_specification(const Relation& lhs, const Relation& rhs) { -#ifdef SIERRA_MIGRATION return lhs.entity_rank() == rhs.entity_rank() && lhs.getRelationType() == rhs.getRelationType() && lhs.getOrdinal() == rhs.getOrdinal(); -#else - return lhs.entity_rank() == rhs.entity_rank(); -#endif // SIERRA_MIGRATION } -#ifdef SIERRA_MIGRATION - inline RelationType back_relation_type(const RelationType relType) @@ -393,17 +383,12 @@ bool internal_is_handled_generically(const RelationType relation_type) } -#endif // SIERRA_MIGRATION - inline Entity Relation::entity() const { return m_target_entity; } - -#ifdef SIERRA_MIGRATION - inline Relation::Relation(EntityRank rel_rank, Entity obj, const unsigned relation_type, const unsigned ordinal, const unsigned permut) : @@ -425,8 +410,6 @@ void Relation::setMeshObj(Entity object, EntityRank object_rank ) // Find relation from mesh_obj to an Entity of certain rank by relation ordinal stk::mesh::Entity find_by_ordinal(stk::mesh::Entity mesh_obj, stk::mesh::EntityRank rank, size_t ordinal, const stk::mesh::BulkData& mesh); -#endif - } // namespace mesh } // namespace stk #endif diff --git a/packages/stk/stk_mesh/stk_mesh/base/SideSetUtil.cpp b/packages/stk/stk_mesh/stk_mesh/base/SideSetUtil.cpp index 7b82705c18aa..b7c9d1f2173f 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/SideSetUtil.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/SideSetUtil.cpp @@ -1,6 +1,7 @@ #include #include #include "stk_mesh/base/BulkData.hpp" +#include "stk_mesh/base/Relation.hpp" #include "stk_mesh/base/MetaData.hpp" #include "stk_mesh/base/Types.hpp" #include "stk_mesh/base/SidesetUpdater.hpp" diff --git a/packages/stk/stk_mesh/stk_mesh/base/SkinMesh.cpp b/packages/stk/stk_mesh/stk_mesh/base/SkinMesh.cpp index 31ed5bc2fefb..00e7f0403170 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/SkinMesh.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/SkinMesh.cpp @@ -40,6 +40,7 @@ #include // for back_insert_iterator, etc #include // for _Rb_tree_const_iterator, etc #include // for BulkData, etc +#include #include // for Selector, operator& #include // for pair, make_pair #include // for vector @@ -263,7 +264,7 @@ void skin_mesh_attach_new_sides_to_connected_entities(BulkData & mesh, // attach side to element Permutation permut = - mesh.find_permutation(element_topology, elem_nodes, + stk::mesh::find_permutation(mesh, element_topology, elem_nodes, side_topology, ordered_side_nodes.data(), side_ordinal); STK_ThrowRequireMsg(permut != INVALID_PERMUTATION, ": skin_mesh_attach_new_sides_to_connected_entities could not find valid permutation to connect face to element"); diff --git a/packages/stk/stk_mesh/stk_mesh/base/TopologyDimensions.hpp b/packages/stk/stk_mesh/stk_mesh/base/TopologyDimensions.hpp index 0b7b7242a440..ede6dd093f53 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/TopologyDimensions.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/TopologyDimensions.hpp @@ -35,75 +35,9 @@ #ifndef stk_mesh_TopologyDimensions_hpp #define stk_mesh_TopologyDimensions_hpp -#include // for ArrayDimTag -#include // for Field -#include // for MetaData -#include // for string -#include "stk_topology/topology.hpp" // for topology, etc - - - -namespace stk { -namespace mesh { - -/** - * The file contains Field typed and ArrayDimTags that are useful for - * setting up various types of field relations. - */ - -//---------------------------------------------------------------------- -/** \ingroup stk_mesh_field_dimension_tags - * \brief Define an array dimension of the number of nodes per element. - */ -class ElementNode : public shards::ArrayDimTag { -public: - - const char * name() const { - static const char n[] = "ElementNode"; - return n; - } - - static const ElementNode & tag() { - static const ElementNode self; - return self; - } - -private: - ElementNode() { - std::cerr << "Warning: The stk::mesh::ElementNode type is deprecated and will soon be removed." << std::endl; - } - ElementNode( const ElementNode & ); - ElementNode & operator = ( const ElementNode & ); -}; - -/** \ingroup stk_mesh_relation_stencil - * \brief An element Field defining an array of values, one value per - * node of the element. - */ -STK_DEPRECATED typedef Field ElementNodeField ; - -/** \ingroup stk_mesh_relation_stencil - * \brief A Field defining an array of pointers - * to an element's nodal field data. - */ - -/** \ingroup stk_mesh_relation_stencil - * \brief Declare an element-node field. - */ -STK_DEPRECATED -inline -ElementNodeField & -declare_element_node_field( MetaData & md , const std::string & s ) -{ - ElementNodeField & f = stk::mesh::legacy::declare_field< ElementNodeField >(md, stk::topology::ELEMENT_RANK, s, 1 /* 1 state */ ); - - return f ; -} - -//---------------------------------------------------------------------- - -}//namespace mesh -}//namespace stk +// Deprecated contents have been removed. Inclusions of this header should +// be removed from application code, and this file will be deprecated and +// removed in the near future. #endif diff --git a/packages/stk/stk_mesh/stk_mesh/base/Types.hpp b/packages/stk/stk_mesh/stk_mesh/base/Types.hpp index 6a0e7ecc7594..5a6fa5ba0fb8 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Types.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Types.hpp @@ -281,9 +281,11 @@ typedef PairIter PairIterEntityComm ; * a stencil function returns a non-negative integer; * otherwise a stencil function returns a negative value. */ -typedef int ( * relation_stencil_ptr )( EntityRank from_type , +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after July 31 2024 +STK_DEPRECATED typedef int ( * relation_stencil_ptr )( EntityRank from_type , EntityRank to_type , unsigned identifier ); +#endif //---------------------------------------------------------------------- /** \brief Span of a sorted relations for a given domain entity. diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/AuraGhosting.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/AuraGhosting.cpp index 08e8e124e5a7..840f92e883ed 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/AuraGhosting.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/AuraGhosting.cpp @@ -34,9 +34,9 @@ #include #include +#include #include #include -#include #include #include #include @@ -92,11 +92,13 @@ void AuraGhosting::fill_send_aura_entities(BulkData& bulkData, static constexpr EntityRank nextHigherRank = stk::topology::EDGE_RANK; for (EntityRank higherRank = nextHigherRank; higherRank < endRank; ++higherRank) { const unsigned num_rels = bucket.num_connectivity(bucketOrd, higherRank); - const Entity* rels = bucket.begin(bucketOrd, higherRank); + if (num_rels > 0) { + const Entity* rels = bucket.begin(bucketOrd, higherRank); - for (unsigned r = 0; r < num_rels; ++r) { - if (bulk.parallel_rank() == bulk.parallel_owner_rank(rels[r])) { - stk::mesh::impl::insert_upward_relations_for_owned(bulk, rels[r], higherRank, maxRank, sharingProcs, sendAuraEntityProcs); + for (unsigned r = 0; r < num_rels; ++r) { + if (bulk.parallel_rank() == bulk.parallel_owner_rank(rels[r])) { + stk::mesh::impl::insert_upward_relations_for_owned(bulk, rels[r], higherRank, maxRank, sharingProcs, sendAuraEntityProcs); + } } } } diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp new file mode 100644 index 000000000000..bcd7f473247e --- /dev/null +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp @@ -0,0 +1,509 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef stk_mesh_impl_BucketConnDynamic_hpp +#define stk_mesh_impl_BucketConnDynamic_hpp + +//---------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include +#include + +//---------------------------------------------------------------------- + +namespace stk { +namespace mesh { + +using ConnectedEntities = util::StridedArray; + +namespace impl { + +class BucketConnDynamic +{ +public: + BucketConnDynamic(unsigned bucketCapacity, bool hasPermutations = false) + : m_bucketCapacity(bucketCapacity), + m_hasPermutations(hasPermutations), + m_offsets(), + m_connectivity(), + m_ordinals(), + m_permutations(), + m_numUnusedEntries(0), + m_compressionThreshold(0.5) + { + STK_ThrowRequireMsg(bucketCapacity > 0, "BucketConnDynamic must have bucketCapacity strictly greater than 0"); + } + + ~BucketConnDynamic() { } + + bool has_permutation() const { return m_hasPermutations; } + + unsigned num_connectivity(unsigned bktOrdinal) const + { + STK_ThrowAssertMsg(bktOrdinal < m_bucketCapacity,"BucketConnDynamic::num_connectivity: bktOrdinal("<> sortedOffsets(m_offsets.size()); + for(unsigned i=0; i 0) { + IndexRange& sRange = sortedOffsets[0].first; + const unsigned gap = sRange.first; + slide_range_and_update(sRange, gap, sortedOffsets[0].second); + } + + for(unsigned i=0; i 0) { + slide_range_and_update(sortedOffsets[i+1].first, gap, sortedOffsets[i+1].second); + } + } + + const unsigned oldSize = m_connectivity.size(); + m_connectivity.resize(oldSize-m_numUnusedEntries); + m_ordinals.resize(oldSize-m_numUnusedEntries); + if (m_hasPermutations) { + m_permutations.resize(oldSize-m_numUnusedEntries); + } + m_numUnusedEntries = 0; + const unsigned lastIdx = sortedOffsets.size()-1; + STK_ThrowRequireMsg(sortedOffsets[lastIdx].first.second == m_connectivity.size(), + "Internal BucketConnDynamic::compress_connectivity ERROR, indices out of sync with data."); + } + + void grow_if_necessary(unsigned bktOrdinal) + { + m_bucketCapacity = std::max(bktOrdinal+1, m_bucketCapacity); + if (bktOrdinal >= m_offsets.size()) { + const unsigned candidate = m_offsets.empty() ? bktOrdinal+1 : 2*m_offsets.size(); + const unsigned newSize = std::min(m_bucketCapacity, candidate); + m_offsets.resize(newSize, IndexRange(0u, 0u)); + + if (m_offsets.capacity() > m_bucketCapacity) { + std::vector(m_offsets).swap(m_offsets); + } + } + } + + void increase_bucket_capacity(unsigned newBucketCapacity) + { + STK_ThrowRequireMsg(newBucketCapacity >= m_bucketCapacity, "BucketDynamicConn::increase_bucket_capacity, old capacity="< m_offsets; + std::vector m_connectivity; + std::vector m_ordinals; + std::vector m_permutations; + unsigned m_numUnusedEntries; + double m_compressionThreshold; +}; + +} // namespace impl +} // namespace mesh +} // namespace stk + +//---------------------------------------------------------------------- +//---------------------------------------------------------------------- + +#endif + diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp index 9ef5c3227254..e5a12eb5ca37 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp @@ -39,6 +39,7 @@ #include // for runtime_error #include // for Bucket, raw_part_equal #include // for BulkData, etc +#include #include // for Partition, lower_bound #include #include @@ -297,8 +298,6 @@ void BucketRepository::internal_modification_end() STK_ThrowAssert(buckets[j] != nullptr); Bucket &bucket = *buckets[j]; - // Update the hop-saving connectivity data on this bucket. - // for(EntityRank to_rank = stk::topology::NODE_RANK; to_rank < stk::topology::NUM_RANKS; ++to_rank) { if (from_rank == to_rank) { @@ -311,18 +310,18 @@ void BucketRepository::internal_modification_end() bucket.m_fixed_node_connectivity.end_modification(&bucket.m_mesh); break; case stk::topology::EDGE_RANK: - bucket.m_dynamic_edge_connectivity.end_modification(&bucket.m_mesh); + bucket.m_dynamic_edge_connectivity.compress_connectivity(); break; case stk::topology::FACE_RANK: - bucket.m_dynamic_face_connectivity.end_modification(&bucket.m_mesh); + bucket.m_dynamic_face_connectivity.compress_connectivity(); break; case stk::topology::ELEMENT_RANK: - bucket.m_dynamic_element_connectivity.end_modification(&bucket.m_mesh); + bucket.m_dynamic_element_connectivity.compress_connectivity(); break; case stk::topology::INVALID_RANK: break; default: - bucket.m_dynamic_other_connectivity.end_modification(&bucket.m_mesh); + bucket.m_dynamic_other_connectivity.compress_connectivity(); break; } } diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/ConnectEdgesImpl.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/ConnectEdgesImpl.cpp index 84ec70ea4761..c8f0728fe2a4 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/ConnectEdgesImpl.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/ConnectEdgesImpl.cpp @@ -33,7 +33,9 @@ #include #include +#include #include +#include #include "stk_topology/topology.hpp" // for topology, etc namespace stk { @@ -103,7 +105,7 @@ struct connect_face_impl if (iedge != m_edge_map.end()) { Entity edge = iedge->second; Entity const* original_edge_nodes = mesh.begin_nodes(edge); - Permutation perm = mesh.find_permutation(face_topo, face_nodes, edge_topo, original_edge_nodes, e); + Permutation perm = stk::mesh::find_permutation(mesh, face_topo, face_nodes, edge_topo, original_edge_nodes, e); STK_ThrowRequireMsg(perm != INVALID_PERMUTATION, "CreateEdges: could not find valid permutation to connect face to edge"); mesh.declare_relation(m_bucket[iface], edge, e, perm, scratch1, scratch2, scratch3); } @@ -178,7 +180,7 @@ struct connect_face_entity_impl Entity edge = iedge->second; Entity const* original_edge_nodes = m_bulk.begin_nodes(edge); - Permutation perm = m_bulk.find_permutation(face_topo, face_nodes, edge_topo, original_edge_nodes, e); + Permutation perm = stk::mesh::find_permutation(m_bulk, face_topo, face_nodes, edge_topo, original_edge_nodes, e); STK_ThrowRequireMsg(perm != INVALID_PERMUTATION, "Connect face to edge: could not find valid permutation to connect face to edge"); m_bulk.declare_relation(m_face, edge, e, perm, scratch1, scratch2, scratch3); } diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/DeletedEntityCache.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/DeletedEntityCache.hpp index 40252923588d..892493d4bf15 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/DeletedEntityCache.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/DeletedEntityCache.hpp @@ -39,6 +39,7 @@ #include #include #include +#include namespace stk { namespace mesh { diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/ElemDeathImpl.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/ElemDeathImpl.cpp new file mode 100644 index 000000000000..5945ad646e04 --- /dev/null +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/ElemDeathImpl.cpp @@ -0,0 +1,179 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include +#include +#include +#include +#include +#include + +namespace stk { +namespace mesh { +namespace impl { + +bool is_node_connected_to_active_element_locally(const stk::mesh::BulkData &mesh, + stk::mesh::Entity node, + const stk::mesh::Part &activePart) +{ + bool activeNode = false; + const int numElements = mesh.num_elements(node); + const stk::mesh::Entity * elements = mesh.begin_elements(node); + for (int elementI=0 ; elementI sharedProcs; + stk::CommSparse inquiryComm(bulk.parallel()); + pack_and_communicate(inquiryComm, + [&bulk,&inquiryComm,&nodesToCommunicate,&sharedProcs]() + { + for (stk::mesh::Entity node : nodesToCommunicate) { + const stk::mesh::EntityKey nodeKey = bulk.entity_key(node); + bulk.comm_shared_procs(nodeKey,sharedProcs); + for (int otherProc : sharedProcs) { + inquiryComm.send_buffer(otherProc).pack(nodeKey.id()); + } + } + } + ); + stk::mesh::EntityVector incomingNodes; + unpack_communications(inquiryComm, + [&bulk,&inquiryComm,&incomingNodes](int procId) + { + stk::mesh::EntityId nodeId; + inquiryComm.recv_buffer(procId).unpack(nodeId); + stk::mesh::Entity node = bulk.get_entity(stk::topology::NODE_RANK, nodeId); + STK_ThrowAssertMsg(bulk.is_valid(node),"Error in communication for de-imprinting the active part on nodes of killed elements in element death!"); + incomingNodes.push_back(node); + } + ); + + std::map nodeToActiveStatusMap; + stk::CommSparse answerComm(bulk.parallel()); + pack_and_communicate(answerComm, + [&bulk,&answerComm,&incomingNodes,&nodeToActiveStatusMap,&activePart]() + { + for (stk::mesh::Entity incomingNode : incomingNodes) { + std::vector sharingProcs; + bulk.comm_shared_procs(bulk.entity_key(incomingNode),sharingProcs); + bool activeStatus = is_node_connected_to_active_element_locally(bulk, incomingNode, activePart); + for (int otherProc : sharingProcs) { + answerComm.send_buffer(otherProc).pack(bulk.identifier(incomingNode)); + answerComm.send_buffer(otherProc).pack(activeStatus); + } + auto nodeLocationInMap = nodeToActiveStatusMap.find(incomingNode); + if (nodeLocationInMap == nodeToActiveStatusMap.end()) { + nodeToActiveStatusMap.emplace(incomingNode, activeStatus); + } + else { + nodeLocationInMap->second = nodeLocationInMap->second || activeStatus; + } + } + } + ); + + unpack_communications(answerComm, + [&bulk,&answerComm,&nodeToActiveStatusMap](int procId) + { + stk::mesh::EntityId nodeId; + answerComm.recv_buffer(procId).unpack(nodeId); + bool activeStatus = false; + answerComm.recv_buffer(procId).unpack(activeStatus); + stk::mesh::Entity node = bulk.get_entity(stk::topology::NODE_RANK,nodeId); + STK_ThrowAssertMsg(bulk.is_valid(node),"Error in communication for de-imprinting the active part on nodes of killed elements in element death!"); + auto nodeLocationInMap = nodeToActiveStatusMap.find(node); + if (nodeLocationInMap == nodeToActiveStatusMap.end()) { + nodeToActiveStatusMap.emplace(node, activeStatus); + } + else { + nodeLocationInMap->second = nodeLocationInMap->second || activeStatus; + } + } + ); + + for (auto nodeActiveStatusPair : nodeToActiveStatusMap) { + stk::mesh::Entity node = nodeActiveStatusPair.first; + bool nodeIsActiveOnAnyOtherProcessors = nodeActiveStatusPair.second; + if (!nodeIsActiveOnAnyOtherProcessors) { + nodesToDeactivate.push_back(node); + } + } + + return nodesToDeactivate; +} + +} // namespace impl +} // namespace mesh +} // namespace stk + diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/ElemDeathImpl.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/ElemDeathImpl.hpp new file mode 100644 index 000000000000..2762f9600067 --- /dev/null +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/ElemDeathImpl.hpp @@ -0,0 +1,62 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + + +#ifndef stk_mesh_ElemDeathImpl_hpp +#define stk_mesh_ElemDeathImpl_hpp + +#include + +namespace stk { namespace mesh { class BulkData; } } +namespace stk { namespace mesh { class Part; } } + +namespace stk { +namespace mesh { +namespace impl { + +bool +is_node_connected_to_active_element_locally(const stk::mesh::BulkData &mesh, + stk::mesh::Entity node, + const stk::mesh::Part &activePart); + +stk::mesh::EntityVector +get_nodes_to_deactivate(const stk::mesh::BulkData& bulk, + const stk::mesh::EntityVector & deactivatedElements, + const stk::mesh::Part & activePart); + +} // namespace impl +} // namespace mesh +} // namespace stk + +#endif diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/FieldRepository.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/FieldRepository.cpp index c36aa6c93b7a..78bcf4bbb8dd 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/FieldRepository.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/FieldRepository.cpp @@ -86,40 +86,14 @@ FieldRepository::verify_field_type(const FieldBase & arg_field const bool ok_number_states = not arg_num_states || arg_num_states == arg_field.number_of_states(); - if (m_meta.is_using_simple_fields()) { - const bool has_extra_template_parameters = (arg_field.m_field_rank > 0); - - STK_ThrowErrorMsgIf(not ok_traits || not ok_number_states || has_extra_template_parameters, - " verify_field_type FAILED: Existing field = " << - print_field_type(arg_field.data_traits(), arg_field.m_field_rank, arg_field.m_dim_tags) << - "[ name = \"" << arg_field.name() << - "\" , #states = " << arg_field.number_of_states() << " ]" << - " Expected field info = " << - print_field_type(arg_traits, arg_rank, arg_dim_tags) << - "[ #states = " << arg_num_states << " ]"); - } - else { - bool ok_dimension = ! arg_rank || arg_rank == stk::mesh::legacy::field_array_rank(arg_field) || - arg_rank + 1 == stk::mesh::legacy::field_array_rank(arg_field) || - arg_rank - 1 == stk::mesh::legacy::field_array_rank(arg_field) ; - - const unsigned check_rank = (arg_rank < stk::mesh::legacy::field_array_rank(arg_field)) ? arg_rank - : stk::mesh::legacy::field_array_rank(arg_field); - - for (unsigned i = 0; i < check_rank && ok_dimension; ++i) { - ok_dimension = arg_dim_tags[i] == stk::mesh::legacy::dimension_tags(arg_field)[i]; - } - - STK_ThrowErrorMsgIf(not ok_traits || not ok_number_states || not ok_dimension, - " verify_field_type FAILED: Existing field = " << - print_field_type(arg_field.data_traits(), stk::mesh::legacy::field_array_rank(arg_field), - stk::mesh::legacy::dimension_tags(arg_field)) << - "[ name = \"" << arg_field.name() << - "\" , #states = " << arg_field.number_of_states() << " ]" << - " Expected field info = " << - print_field_type(arg_traits, arg_rank, arg_dim_tags) << - "[ #states = " << arg_num_states << " ]"); - } + STK_ThrowErrorMsgIf(not ok_traits || not ok_number_states, + " verify_field_type FAILED: Existing field = " << + print_field_type(arg_field.data_traits(), arg_field.m_field_rank, arg_field.m_dim_tags) << + "[ name = \"" << arg_field.name() << + "\" , #states = " << arg_field.number_of_states() << " ]" << + " Expected field info = " << + print_field_type(arg_traits, arg_rank, arg_dim_tags) << + "[ #states = " << arg_num_states << " ]"); } //---------------------------------------------------------------------- diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/GlobalIdEntitySorter.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/GlobalIdEntitySorter.cpp new file mode 100644 index 000000000000..7fb0ada23898 --- /dev/null +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/GlobalIdEntitySorter.cpp @@ -0,0 +1,62 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include +#include +#include +#include + +namespace stk { +namespace mesh { +namespace impl { + +void GlobalIdEntitySorter::sort(stk::mesh::BulkData &bulk, EntityVector& entityVector) const +{ + auto fastEntityLess = [&bulk](const Entity lhs, const Entity rhs)->bool + {return (bulk.entity_key(lhs) < bulk.entity_key(rhs));}; + + const bool useSlowEntityLess = m_mustSortFacesByNodeIds; + + if (useSlowEntityLess) { + std::sort(entityVector.begin(), entityVector.end(), EntityLess(bulk)); + } + else { + std::sort(entityVector.begin(), entityVector.end(), fastEntityLess); + } +} + +} // namespace impl +} // namespace mesh +} // namespace stk + diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/GlobalIdEntitySorter.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/GlobalIdEntitySorter.hpp new file mode 100644 index 000000000000..0a2819eb4355 --- /dev/null +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/GlobalIdEntitySorter.hpp @@ -0,0 +1,65 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + + +#ifndef stk_mesh_GlobalIdEntitySorter_hpp +#define stk_mesh_GlobalIdEntitySorter_hpp + +#include +#include + +namespace stk { namespace mesh { class BulkData; } } + +namespace stk { +namespace mesh { +namespace impl { + +class GlobalIdEntitySorter : public EntitySorterBase +{ +public: + GlobalIdEntitySorter(bool mustSortFacesByNodeIds=false) + : m_mustSortFacesByNodeIds(mustSortFacesByNodeIds) + {} + + virtual void sort(stk::mesh::BulkData &bulk, EntityVector& entityVector) const; + +private: + bool m_mustSortFacesByNodeIds; +}; + +} // namespace impl +} // namespace mesh +} // namespace stk + +#endif diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshCommImplUtils.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshCommImplUtils.cpp index 885aa9afa6c0..055501cbaec0 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshCommImplUtils.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshCommImplUtils.cpp @@ -34,6 +34,7 @@ #include #include +#include #include #include #include diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshCommVerify.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshCommVerify.cpp index 107f99d27386..e264ab0b7de7 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshCommVerify.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshCommVerify.cpp @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -44,6 +45,7 @@ #include #include #include +#include #include #include @@ -721,6 +723,70 @@ bool unpack_not_owned_verify(const BulkData& mesh, return result ; } +bool verify_parallel_attributes(const BulkData& mesh, + const EntityCommDatabase& commDB, + const EntityCommListInfoVector& commList, + const std::function& getEntityComm, + std::ostream & error_log ) +{ + bool result = true ; + + const EntityRank entityRankEnd = static_cast(mesh.mesh_meta_data().entity_rank_count()); + + for ( EntityRank rank = stk::topology::NODE_RANK ; rank < entityRankEnd ; ++rank ) { + const BucketVector & all_buckets = mesh.buckets(rank); + + for(const Bucket* bucketptr : all_buckets) + { + result = result && impl::verify_parallel_attributes_for_bucket(*bucketptr, + getEntityComm, error_log); + } + } + + bool isGloballyConsistentCommList = impl::is_comm_list_globally_consistent(mesh, commDB, commList, error_log); + result = result && isGloballyConsistentCommList; + + return result ; +} + +bool comm_mesh_verify_parallel_consistency(const BulkData& mesh, + const EntityCommDatabase& commDB, + const EntityCommListInfoVector& commList, + const std::function& getEntityComm, + std::ostream & error_log ) +{ + int verified_ok = 1 ; + + // Verify consistency of parallel attributes + + verified_ok = verify_parallel_attributes(mesh, commDB, commList, getEntityComm, error_log ); + if (mesh.parallel_size() > 1) { + all_reduce( mesh.parallel() , ReduceMin<1>( & verified_ok ) ); + } + + // Verify entities against owner. + + if ( verified_ok ) { + CommSparse comm( mesh.parallel() ); + + impl::pack_owned_verify(mesh, commDB, commList, comm); + + comm.allocate_buffers(); + + impl::pack_owned_verify(mesh, commDB, commList, comm); + + comm.communicate(); + + verified_ok = impl::unpack_not_owned_verify(mesh, commList, getEntityComm, comm , error_log ); + + if (mesh.parallel_size() > 1) { + all_reduce( mesh.parallel() , ReduceMin<1>( & verified_ok ) ); + } + } + + return verified_ok == 1 ; +} + void check_matching_parts_count(unsigned partsCount, int rank, int commSize, MPI_Comm comm) { std::vector partsCounts(commSize); diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshCommVerify.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshCommVerify.hpp index 95c57daf6d56..43a6b797de5c 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshCommVerify.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshCommVerify.hpp @@ -40,6 +40,8 @@ #include #include #include +#include +#include #include #include @@ -109,6 +111,18 @@ bool unpack_not_owned_verify(const BulkData& mesh, CommSparse& commSparse, std::ostream& error_log); +bool verify_parallel_attributes(const BulkData& mesh, + const EntityCommDatabase& commDB, + const EntityCommListInfoVector& commList, + const std::function& getEntityComm, + std::ostream & error_log ); + +bool comm_mesh_verify_parallel_consistency(const BulkData& mesh, + const EntityCommDatabase& commDB, + const EntityCommListInfoVector& commList, + const std::function& getEntityComm, + std::ostream & error_log ); + void check_matching_parts_count(unsigned partsCount, int rank, int commSize, MPI_Comm comm); void check_matching_parts(const PartVector& parts, unsigned partsCount, int rank, int commSize, MPI_Comm comm); diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp index eaec56dc3147..2c0b21aeba93 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp @@ -34,6 +34,8 @@ #include #include +#include +#include #include #include #include @@ -56,68 +58,12 @@ namespace stk { namespace mesh { namespace impl { -bool is_in_list(Entity entity, const Entity* begin, const Entity* end) -{ - return std::find(begin, end, entity) != end; -} - -void remove_entities_not_in_list(const Entity* begin, const Entity* end, std::vector& elementsInCommon) -{ - int numElemsFound=0; - for(int j=0, endElemsInCommon=elementsInCommon.size(); j numElemsFound) { - elementsInCommon[numElemsFound] = elementsInCommon[j]; - } - ++numElemsFound; - } - } - elementsInCommon.resize(numElemsFound); -} - -void remove_entities_not_connected_to_other_nodes(const BulkData& mesh, stk::mesh::EntityRank rank, unsigned numNodes, const Entity* nodes, std::vector& elementsInCommon) -{ - for(unsigned i = 1; i < numNodes; ++i) { - const ConnectedEntities conn = mesh.get_connected_entities(nodes[i], rank); - remove_entities_not_in_list(conn.data(), conn.data()+conn.size(), elementsInCommon); - } -} - -void find_entities_these_nodes_have_in_common(const BulkData& mesh, stk::mesh::EntityRank rank, unsigned numNodes, const Entity* nodes, std::vector& elementsInCommon) -{ - elementsInCommon.clear(); - if(numNodes > 0) - { - const ConnectedEntities conn = mesh.get_connected_entities(nodes[0], rank); - elementsInCommon.assign(conn.data(), conn.data()+conn.size()); - remove_entities_not_connected_to_other_nodes(mesh, rank, numNodes, nodes, elementsInCommon); - } -} - - -void fill_owned_entities_with_larger_ids_connected_to_node(const BulkData& mesh, - Entity node, - stk::mesh::EntityRank rank, - stk::mesh::EntityId id, - std::vector& elemsWithLargerIds) -{ - const ConnectedEntities elems = mesh.get_connected_entities(node, rank); - unsigned numElems = elems.size(); - elemsWithLargerIds.reserve(numElems); - - for(unsigned j = 0; j < numElems; ++j) - if(mesh.identifier(elems[j]) > id && mesh.bucket(elems[j]).owned()) - elemsWithLargerIds.push_back(elems[j]); -} - void find_entities_with_larger_ids_these_nodes_have_in_common_and_locally_owned(stk::mesh::EntityId id, const BulkData& mesh, stk::mesh::EntityRank rank, unsigned numNodes, const Entity* nodes, std::vector& elementsInCommon) { - elementsInCommon.clear(); - if(numNodes > 0) - { - fill_owned_entities_with_larger_ids_connected_to_node(mesh, nodes[0], rank, id, elementsInCommon); - remove_entities_not_connected_to_other_nodes(mesh, rank, numNodes, nodes, elementsInCommon); - } + find_entities_these_nodes_have_in_common_and(mesh, rank, numNodes, nodes, elementsInCommon, + [&](const Entity& entity) { + return mesh.identifier(entity) > id && mesh.bucket(entity).owned(); + }); } const EntityCommListInfo& find_entity(const BulkData& mesh, @@ -133,25 +79,19 @@ const EntityCommListInfo& find_entity(const BulkData& mesh, bool do_these_nodes_have_any_shell_elements_in_common(BulkData& mesh, unsigned numNodes, const Entity* nodes) { std::vector elems; - find_entities_these_nodes_have_in_common(mesh, stk::topology::ELEMENT_RANK, numNodes, nodes, elems); - bool found_shell = false; - for (unsigned count = 0; count < elems.size(); ++count) { - if (mesh.bucket(elems[count]).topology().is_shell()) { - found_shell = true; - } - } - return found_shell; + find_entities_these_nodes_have_in_common_and(mesh, stk::topology::ELEMENT_RANK, numNodes, nodes, elems, + [&](const Entity& entity) { + return mesh.bucket(entity).topology().is_shell(); + }); + return !elems.empty(); } void find_locally_owned_elements_these_nodes_have_in_common(const BulkData& mesh, unsigned numNodes, const Entity* nodes, std::vector& elems) { - find_entities_these_nodes_have_in_common(mesh, stk::topology::ELEMENT_RANK, numNodes, nodes, elems); - - for(int i=elems.size()-1; i>=0; --i) { - if (!mesh.bucket(elems[i]).owned()) { - elems.erase(elems.begin()+i); - } - } + find_entities_these_nodes_have_in_common_and(mesh, stk::topology::ELEMENT_RANK, numNodes, nodes, elems, + [&](const Entity& entity) { + return mesh.bucket(entity).owned(); + }); } bool find_element_edge_ordinal_and_equivalent_nodes(BulkData& mesh, Entity element, unsigned numEdgeNodes, const Entity* edgeNodes, unsigned& elemEdgeOrdinal, Entity* elemEdgeNodes) @@ -309,7 +249,7 @@ void connectUpwardEntityToEntity(stk::mesh::BulkData& mesh, stk::mesh::Entity up STK_ThrowRequireMsg(entity_ordinal !=100000, "Program error. Contact sierra-help for support."); if ((entity_rank > stk::topology::NODE_RANK) && (mesh.entity_rank(upward_entity) > entity_rank)) { - perm = mesh.find_permutation(upward_entity_topology, upward_entity_nodes, entity_top, nodes, entity_ordinal); + perm = stk::mesh::find_permutation(mesh, upward_entity_topology, upward_entity_nodes, entity_top, nodes, entity_ordinal); STK_ThrowRequireMsg(perm != INVALID_PERMUTATION, "find_permutation could not find permutation that produces a match"); } mesh.declare_relation(upward_entity, entity, entity_ordinal, perm, scratch1, scratch2, scratch3); @@ -319,7 +259,7 @@ void connectUpwardEntityToEntity(stk::mesh::BulkData& mesh, stk::mesh::Entity up // Given a vector of local ownership changes, remove duplicates and // sanity check. -void internal_clean_and_verify_parallel_change( +bool internal_clean_and_verify_parallel_change( const BulkData & mesh , std::vector & local_change ) { @@ -327,7 +267,7 @@ void internal_clean_and_verify_parallel_change( const int p_size = mesh.parallel_size(); const ParallelMachine p_comm = mesh.parallel(); - size_t error_count = 0 ; + size_t error_and_count[2] = {0, 0} ; std::ostringstream error_msg ; @@ -361,7 +301,7 @@ void internal_clean_and_verify_parallel_change( bad_new_owner_does_not_exist || bad_inconsistent_change) { - ++error_count ; + ++error_and_count[0]; error_msg << " P" << p_rank << ": " ; if ( bad_null ) { error_msg << " NULL ENTITY" ; } @@ -383,14 +323,6 @@ void internal_clean_and_verify_parallel_change( } } - all_reduce( p_comm , ReduceSum<1>( & error_count ) ); - - if ( error_count ) { - all_write_string( p_comm , std::cerr , error_msg.str() ); - - STK_ThrowErrorMsg("Bad change ownership directives\n"); - } - // Filter out non-changes (entity will be NULL { std::vector::iterator i = local_change.begin(), @@ -398,8 +330,19 @@ void internal_clean_and_verify_parallel_change( i = std::remove( i , j , EntityProc(Entity(), 0) ); local_change.erase( i , j ); } -} + error_and_count[1] = local_change.size(); + + all_reduce( p_comm , ReduceSum<2>( error_and_count ) ); + + if ( error_and_count[0] ) { + all_write_string( p_comm , std::cerr , error_msg.str() ); + + STK_ThrowErrorMsg("Bad change ownership directives\n"); + } + + return error_and_count[1] > 0; +} //---------------------------------------------------------------------- // Generate a parallel consistent list of ownership changes: @@ -614,7 +557,7 @@ void find_side_nodes(BulkData& mesh, Entity element, int side_ordinal, EntityVec for (unsigned count=0 ; count(entity_relations[k].getOrdinal()), static_cast(entity_relations[k].attribute()) diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.hpp index 9795aed6704c..fef5b2ccd7db 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.hpp @@ -39,6 +39,7 @@ #include #include +#include #include #include #include "stk_util/parallel/DistributedIndex.hpp" // for DistributedIndex, etc @@ -59,34 +60,81 @@ struct EntityGhostData; //stk-mesh capabilities. //---------------------------------------------------------------------- -void find_entities_these_nodes_have_in_common(const BulkData& mesh, stk::mesh::EntityRank rank, unsigned numNodes, const Entity* nodes, std::vector& entity_vector); +inline +bool is_in_list(Entity entity, const Entity* begin, const Entity* end) +{ + return std::find(begin, end, entity) != end; +} -void find_entities_with_larger_ids_these_nodes_have_in_common_and_locally_owned(stk::mesh::EntityId id, const BulkData& mesh, stk::mesh::EntityRank rank, unsigned numNodes, const Entity* nodes, std::vector& entity_vector); +template +void +remove_entities_not_in_list(const Entity* beginList, + const Entity* endList, + VecType& entities) +{ + int numFound=0; + for(int j=0, initialSize=entities.size(); j numFound) { + entities[numFound] = entities[j]; + } + ++numFound; + } + } + entities.resize(numFound); +} -void remove_entities_not_connected_to_other_nodes(const BulkData& mesh, EntityRank rank, - unsigned numNodes, const Entity* nodes, - std::vector& elementsInCommon); +template +void +remove_entities_not_connected_to_other_nodes(const BulkData& mesh, + stk::mesh::EntityRank rank, + unsigned numNodes, + const Entity* nodes, + VecType& elementsInCommon) +{ + for(unsigned i = 1; i < numNodes; ++i) { + const ConnectedEntities conn = mesh.get_connected_entities(nodes[i], rank); + remove_entities_not_in_list(conn.data(), conn.data()+conn.size(), elementsInCommon); + } +} -template -void find_entities_these_nodes_have_in_common_and(const BulkData& mesh, EntityRank rank, - unsigned numNodes, const Entity* nodes, - std::vector& elementsInCommon, - const Pred& pred) +template +void +find_entities_these_nodes_have_in_common(const BulkData& mesh, + stk::mesh::EntityRank rank, + unsigned numNodes, + const Entity* nodes, + VecType& entitiesInCommon) { - elementsInCommon.clear(); - if(numNodes > 0) - { - const Entity* begin = mesh.begin(nodes[0], rank); - const Entity* end = mesh.end(nodes[0], rank); - elementsInCommon.reserve(std::distance(begin,end)); - for(const Entity* ent = begin; ent != end; ++ent) { - if (pred(*ent)) { - elementsInCommon.push_back(*ent); - } - } + entitiesInCommon.clear(); + if(numNodes > 0) { + const ConnectedEntities conn = mesh.get_connected_entities(nodes[0], rank); + entitiesInCommon.assign(conn.data(), conn.data()+conn.size()); + remove_entities_not_connected_to_other_nodes(mesh, rank, numNodes, nodes, entitiesInCommon); + } +} + +void find_entities_with_larger_ids_these_nodes_have_in_common_and_locally_owned(stk::mesh::EntityId id, const BulkData& mesh, stk::mesh::EntityRank rank, unsigned numNodes, const Entity* nodes, std::vector& entity_vector); - remove_entities_not_connected_to_other_nodes(mesh, rank, numNodes, nodes, elementsInCommon); +template +void +find_entities_these_nodes_have_in_common_and(const BulkData& mesh, EntityRank rank, + unsigned numNodes, const Entity* nodes, + std::vector& entitiesInCommon, + const Pred& pred) +{ + entitiesInCommon.clear(); + if(numNodes > 0) { + const ConnectedEntities conn = mesh.get_connected_entities(nodes[0], rank); + entitiesInCommon.reserve(conn.size()); + for(unsigned i=0; i& node_keys); -void internal_clean_and_verify_parallel_change( +bool internal_clean_and_verify_parallel_change( const BulkData & mesh , std::vector & local_change ); @@ -134,31 +182,6 @@ stk::mesh::EntityId side_id_formula(stk::mesh::EntityId elemId, unsigned sideOrd return 10*elemId + sideOrdinal + 1; } -class GlobalIdEntitySorter : public EntitySorterBase -{ -public: - GlobalIdEntitySorter(bool mustSortFacesByNodeIds=false) - : m_mustSortFacesByNodeIds(mustSortFacesByNodeIds) - {} - - virtual void sort(stk::mesh::BulkData &bulk, EntityVector& entityVector) const - { - auto fastEntityLess = [&bulk](const Entity lhs, const Entity rhs)->bool - {return (bulk.entity_key(lhs) < bulk.entity_key(rhs));}; - - const bool useSlowEntityLess = m_mustSortFacesByNodeIds; - - if (useSlowEntityLess) { - std::sort(entityVector.begin(), entityVector.end(), EntityLess(bulk)); - } - else { - std::sort(entityVector.begin(), entityVector.end(), fastEntityLess); - } - } -private: - bool m_mustSortFacesByNodeIds; -}; - stk::parallel::DistributedIndex::KeySpanVector convert_entity_keys_to_spans( const MetaData & meta ); void get_part_ordinals_to_induce_on_lower_ranks_except_for_omits(const BulkData& mesh, diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp index 97c3a2c7a5db..ee62942156bb 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp @@ -3,10 +3,15 @@ #include #include #include +#include #include #include #include +#include +#include +#include #include +#include namespace stk { namespace mesh { @@ -50,7 +55,6 @@ bool MeshModification::modification_begin(const std::string description) const stk::mesh::FieldVector allFields = m_bulkData.mesh_meta_data().get_fields(); for (FieldBase * stkField : allFields) { stkField->sync_to_host(); - stkField->modify_on_host(); if (stkField->has_ngp_field()) { impl::get_ngp_field(*stkField)->debug_modification_begin(); } @@ -207,14 +211,290 @@ bool MeshModification::modification_end_after_node_sharing_resolution() return true; } -void MeshModification::change_entity_owner( const EntityProcVec & arg_change) +bool MeshModification::change_entity_owner( const EntityProcVec & arg_change) { STK_ThrowRequireMsg(in_synchronized_state(), "BulkData::change_entity_owner() must not be called from within a modification cycle."); + std::vector local_change( arg_change ); + const bool validChangesOnAnyProc = impl::internal_clean_and_verify_parallel_change(m_bulkData, local_change); + if (!validChangesOnAnyProc) { + return false; + } + + m_bulkData.notifier.notify_elements_about_to_move_procs(local_change); + modification_optimization mod_optimization = MOD_END_SORT; modification_begin("change_entity_owner"); - m_bulkData.internal_change_entity_owner(arg_change, mod_optimization); + internal_change_entity_owner(local_change, mod_optimization); m_bulkData.update_sharing_after_change_entity_owner(); m_bulkData.internal_modification_end_for_change_entity_owner(mod_optimization); + + m_bulkData.notifier.notify_elements_moved_procs(local_change); + + return true; +} + +void MeshModification::internal_change_entity_owner( const std::vector & local_change, + modification_optimization mod_optimization ) +{ + m_bulkData.require_ok_to_modify(); + m_bulkData.m_modSummary.track_change_entity_owner(local_change); + + const MetaData & meta = m_bulkData.mesh_meta_data() ; + const int p_rank = m_bulkData.parallel_rank() ; + const int p_size = m_bulkData.parallel_size() ; + ParallelMachine p_comm = m_bulkData.parallel() ; + + //------------------------------ + // internal_change_entity_owner can assume a clean local change list, it was + // checked in MeshModification::change_entity_owner, which called this method. + + //---------------------------------------- + // Parallel synchronous determination of changing shared and ghosted. + + // The two vectors below will contain changes to ghosted and shared + // entities on this process coming from change-entity-owner requests + // on other processes. + std::vector ghosted_change ; + std::vector shared_change ; + + impl::internal_generate_parallel_change_lists( m_bulkData , local_change , + shared_change , ghosted_change ); + + //------------------------------ + // Have enough information to delete all effected ghosts. + // If the closure of a ghost contains a changing entity + // then that ghost must be deleted. + // Request that all ghost entities in the closure of the ghost be deleted. + + std::set send_closure(m_bulkData); + impl::StoreInEntityProcSet store_entity_proc_in_set(m_bulkData, send_closure); + + // Compute the closure of all the locally changing entities + for (const EntityProc& entityProc : local_change) { + store_entity_proc_in_set.proc = entityProc.second; + impl::VisitClosureGeneral(m_bulkData, entityProc.first, m_bulkData.entity_rank(entityProc.first), store_entity_proc_in_set, store_entity_proc_in_set); + } + + // Calculate all the ghosts that are impacted by the set of ownership + // changes. We look at ghosted, shared, and local changes looking for ghosts + // that are either in the closure of the changing entity, or have the + // changing entity in their closure. All modified ghosts will be removed. + { + impl::OnlyVisitGhostsOnce only_visit_ghosts_once(m_bulkData); + impl::StoreEntity store_entity(m_bulkData); + + std::vector& allChanges = ghosted_change; + allChanges.reserve(allChanges.size()+shared_change.size()+send_closure.size()); + allChanges.insert(allChanges.end(), shared_change.begin(), shared_change.end()); + allChanges.insert(allChanges.end(), local_change.begin(), local_change.end()); + impl::VisitAuraClosureGeneral(m_bulkData,allChanges.begin(),allChanges.end(),store_entity,only_visit_ghosts_once); + + std::vector remove_modified_ghosts; + store_entity.store_visited_entities_in_vec(remove_modified_ghosts); + + std::vector empty_add ; + std::vector removesForThisGhosting; + removesForThisGhosting.reserve(remove_modified_ghosts.size()); + const bool notAddingSendGhosts = true; + + // Skip 'm_ghosting[0]' which is the shared subset. + for (unsigned i=1; i::iterator + i = send_closure.begin() ; i != send_closure.end() ; ++i ) { + CommBuffer & buffer = comm.send_buffer( i->second ); + Entity entity = i->first; + pack_entity_info(m_bulkData, buffer, entity, onlyPackDownwardRelations); + if (!m_bulkData.is_communicated_with_proc(entity, i->second) || + std::binary_search(local_change.begin(), local_change.end(), *i, EntityLess(m_bulkData))) { + buffer.pack(1); + pack_field_values(m_bulkData, buffer , entity ); + } + else { + buffer.pack(0); + } + pack_sideset_info(m_bulkData, buffer , entity ); + + if (unique_list_of_send_closure.empty() || m_bulkData.entity_key(unique_list_of_send_closure.back()) != m_bulkData.entity_key(entity)) { + unique_list_of_send_closure.push_back(entity); + } + } + + comm.allocate_buffers(); + + for ( std::set::iterator + i = send_closure.begin() ; i != send_closure.end() ; ++i ) { + CommBuffer & buffer = comm.send_buffer( i->second ); + Entity entity = i->first; + pack_entity_info(m_bulkData, buffer, entity, onlyPackDownwardRelations); + if (!m_bulkData.is_communicated_with_proc(entity, i->second) || + std::binary_search(local_change.begin(), local_change.end(), *i, EntityLess(m_bulkData))) { + buffer.pack(1); + pack_field_values(m_bulkData, buffer , entity ); + } + else { + buffer.pack(0); + } + pack_sideset_info(m_bulkData, buffer , entity ); + } + + const bool deallocateSendBuffers = true; + comm.communicate(deallocateSendBuffers); + + SideSetHelper helper(m_bulkData, m_bulkData.mesh_meta_data().universal_part()); + for ( std::set::iterator + i = send_closure.begin() ; i != send_closure.end() ; ++i ) { + Entity entity = i->first; + helper.remove_element_entries_from_sidesets(entity); + } + + OrdinalVector partOrdinals; + OrdinalVector scratchOrdinalVec, scratchSpace; + PartVector parts ; + std::vector relations ; + + OrdinalVector removeCustomGhostParts; + const std::vector& ghostingObjs = m_bulkData.ghostings(); + const unsigned firstCustomGhosting = 2; + for(unsigned i=firstCustomGhosting; i result = m_bulkData.internal_create_entity( key ); + + Entity entity = result.first; + + // The entity was copied and not created. + partOrdinals.clear(); + for(const stk::mesh::Part* part : parts) { + partOrdinals.push_back(part->mesh_meta_data_ordinal()); + } + + m_bulkData.internal_change_entity_parts( entity , partOrdinals , removeCustomGhostParts, scratchOrdinalVec, scratchSpace ); + for(unsigned i=firstCustomGhosting; iordinal(), p)); + } + + if (m_bulkData.state(entity) == Created) { + set_entity_state(entity.local_offset(), Modified); + } + + m_bulkData.internal_set_owner(entity, owner); + + m_bulkData.internal_declare_relation( entity , relations, scratchOrdinalVec ); + + int shouldUnpackFieldValues = 0; + buf.unpack(shouldUnpackFieldValues); + if ( shouldUnpackFieldValues==1 ) { + if ( ! unpack_field_values(m_bulkData, buf , entity , error_msg ) ) { + ++error_count ; + } + } + + unpack_sideset_info( buf, m_bulkData, entity); + } + } + +#ifndef NDEBUG + all_reduce( p_comm , ReduceSum<1>( & error_count ) ); +#endif + STK_ThrowRequireMsg(error_count==0, error_msg.str() ); + + // Any entity that I sent and is not in an owned closure is deleted. + // The owned closure will be effected by received entities, so can + // only clean up after the newly owned entities have been received. + // Destroy backwards so as not to invalidate closures in the process. + { + for ( EntityVector::reverse_iterator i = unique_list_of_send_closure.rbegin() ; i != unique_list_of_send_closure.rend() ; ++i) { + stk::mesh::Entity entity = *i; + if ( ! m_bulkData.owned_closure(entity) ) { + for(unsigned ig=firstCustomGhosting; ig // for MeshIndex, EntityRank, etc #include -#include #include #include "stk_mesh/base/EntityKey.hpp" #include "stk_mesh/base/EntityParallelState.hpp" @@ -81,7 +80,7 @@ class MeshModification bool resolve_node_sharing(); bool modification_end_after_node_sharing_resolution(); - void change_entity_owner( const EntityProcVec & arg_change); + bool change_entity_owner( const EntityProcVec & arg_change); void internal_resolve_shared_modify_delete( const std::vector& remotely_modified_shared_entities, @@ -115,6 +114,8 @@ class MeshModification const DeletedEntityCache& get_deleted_entity_cache() const { return m_deleted_entity_cache; } void delete_shared_entities_which_are_no_longer_in_owned_closure(EntityProcVec& entitiesToRemoveFromSharing); + void internal_change_entity_owner( const std::vector & local_change, + modification_optimization mod_optimization ); private: bool remote_owner_destroyed(EntityKey key, const std::vector& pllStates) const; void reset_shared_entity_changed_parts() { m_did_any_shared_entity_change_parts = false; } diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpFieldBLASImpl.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpFieldBLASImpl.hpp index 93e9f422d511..bbcc1183b770 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpFieldBLASImpl.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpFieldBLASImpl.hpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -53,108 +54,196 @@ #include namespace stk { -namespace mesh { - +namespace ngp_field_blas { namespace impl { + //************ implementation detail, not for public use ******************** //************ public functions are in stk_mesh/base/NgpFieldBLAS.hpp ************ -template -inline -void field_fill_no_sync_or_mark(const Scalar alpha, DeviceField& ngpField, const EXEC_SPACE& execSpace) +template +constexpr bool operate_on_ngp_mesh() { - using FieldExecSpace = typename DeviceField::ExecSpace::execution_space; - static_assert(Kokkos::SpaceAccessibility::accessible); - auto ngpView = impl::get_device_data(ngpField); - Kokkos::deep_copy(execSpace, ngpView, alpha); -} +#ifdef STK_USE_DEVICE_MESH -template -inline -void field_fill_no_sync_or_mark(const Scalar alpha, DeviceField& ngpField, const Selector& selector, const EXEC_SPACE& execSpace) -{ - using FieldExecSpace = typename DeviceField::ExecSpace::execution_space; - static_assert(Kokkos::SpaceAccessibility::accessible); - auto ngpMesh = get_updated_ngp_mesh(ngpField.get_field_base()->get_mesh()); - for_each_entity_run(ngpMesh, ngpField.get_rank(), selector, KOKKOS_LAMBDA(const FastMeshIndex& entityIndex) { - const unsigned numComponents = ngpField.get_num_components_per_entity(entityIndex); - for(unsigned d=0; d::accessible; + constexpr bool operateOnNgpMesh = isActuallyDeviceExecSpace; +#else + constexpr bool operateOnNgpMesh = true; +#endif + +#else + constexpr bool operateOnNgpMesh = false; +#endif + + return operateOnNgpMesh; } -template -inline -void field_fill_no_sync_or_mark(const Scalar alpha, HostField& ngpField, const EXEC_SPACE& execSpace) +template +bool mark_modified_on_device( + const EXEC_SPACE& execSpace, + bool isDeviceExecSpaceUserOverride = (!std::is_same_v)) { - using FieldExecSpace = typename DeviceField::ExecSpace::execution_space; - static_assert(Kokkos::SpaceAccessibility::accessible); - stk::mesh::field_fill(alpha, *ngpField.get_field_base()); + return operate_on_ngp_mesh() || isDeviceExecSpaceUserOverride; } -template -inline -void field_fill_no_sync_or_mark(const Scalar alpha, HostField& ngpField, const Selector& selector, const EXEC_SPACE& execSpace) +template +class FieldFill { +public: + FieldFill(const NGP_FIELD_TYPE& field, Scalar inputAlpha) + : ngpField(field), alpha(inputAlpha) + {} + + KOKKOS_FUNCTION + void operator()(const stk::mesh::FastMeshIndex& entityIndex) const + { + const int numComponents = ngpField.get_num_components_per_entity(entityIndex); + for(int component=0; component +class FieldFillComponent { +public: + FieldFillComponent(const NGP_FIELD_TYPE& field, Scalar inputAlpha, int inputComponent) + : ngpField(field), alpha(inputAlpha), component(inputComponent) + {} + + KOKKOS_FUNCTION + void operator()(const stk::mesh::FastMeshIndex& entityIndex) const + { + const int numComponents = ngpField.get_num_components_per_entity(entityIndex); + STK_NGP_ThrowRequire(component < numComponents); + ngpField(entityIndex, component) = alpha; + } + + NGP_FIELD_TYPE ngpField; + Scalar alpha; + int component; +}; + +template +void field_fill_for_each_entity(const NGP_MESH_TYPE& ngpMesh, + const NGP_FIELD_TYPE& ngpField, + Scalar alpha, + int component, + const stk::mesh::Selector& selector, + const EXEC_SPACE& execSpace) { - using FieldExecSpace = typename DeviceField::ExecSpace::execution_space; - static_assert(Kokkos::SpaceAccessibility::accessible); - stk::mesh::field_fill(alpha, *ngpField.get_field_base(), selector); + if (component == -1) { + FieldFill fieldFill(ngpField, alpha); + stk::mesh::for_each_entity_run(ngpMesh, ngpField.get_rank(), selector, fieldFill, execSpace); + } + else { + FieldFillComponent fieldFill(ngpField, alpha, component); + stk::mesh::for_each_entity_run(ngpMesh, ngpField.get_rank(), selector, fieldFill, execSpace); + } } -template -void field_copy_no_sync_or_mark_t(const FieldBase& xField, const FieldBase& yField, const EXEC_SPACE& execSpace) +template +void field_fill_impl(const Scalar alpha, + const stk::mesh::FieldBase& field, + int component, + const stk::mesh::Selector* selectorPtr, + const EXEC_SPACE& execSpace, + bool isDeviceExecSpaceUserOverride) { -#ifdef STK_USE_DEVICE_MESH - NgpField& ngpX = get_updated_ngp_field(xField); - NgpField& ngpY = get_updated_ngp_field(yField); - using FieldExecSpace = typename DeviceField::ExecSpace::execution_space; - static_assert(Kokkos::SpaceAccessibility::accessible); - auto ngpViewX = impl::get_device_data(ngpX); - auto ngpViewY = impl::get_device_data(ngpY); - Kokkos::deep_copy(execSpace, ngpViewY, ngpViewX); -#else - STK_ThrowErrorMsg("field_copy_no_sync_or_mark_t: there should be no way to get here if STK_USE_DEVICE_MESH not defined"); -#endif + field.clear_sync_state(); + + std::unique_ptr fieldSelector; + if (selectorPtr == nullptr) { + fieldSelector = std::make_unique(field); + } + const stk::mesh::Selector& selector = selectorPtr != nullptr ? *selectorPtr : *(fieldSelector.get()); + + if constexpr (operate_on_ngp_mesh()) { + stk::mesh::NgpMesh& ngpMesh = stk::mesh::get_updated_ngp_mesh(field.get_mesh()); + stk::mesh::NgpField& ngpField = stk::mesh::get_updated_ngp_field(field); + field_fill_for_each_entity(ngpMesh, ngpField, alpha, component, selector, execSpace); + } + else { + stk::mesh::HostMesh hostMesh(field.get_mesh()); + stk::mesh::HostField hostField(field.get_mesh(), field); + field_fill_for_each_entity(hostMesh, hostField, alpha, component, selector, execSpace); + } + + if (mark_modified_on_device(execSpace, isDeviceExecSpaceUserOverride)) { + field.modify_on_device(); + } + else { + field.modify_on_host(); + } } -template -void field_copy_no_sync_or_mark_t(const FieldBase& xField, const FieldBase& yField, const Selector& selector, const EXEC_SPACE& execSpace) -{ -#ifdef STK_USE_DEVICE_MESH - NgpField& ngpX = get_updated_ngp_field(xField); - NgpField& ngpY = get_updated_ngp_field(yField); - using FieldExecSpace = typename DeviceField::ExecSpace::execution_space; - static_assert(Kokkos::SpaceAccessibility::accessible); - auto ngpMesh = get_updated_ngp_mesh(xField.get_mesh()); - for_each_entity_run(ngpMesh, xField.entity_rank(), selector, KOKKOS_LAMBDA(const FastMeshIndex& entityIndex) { +template +class FieldCopy { +public: + FieldCopy(const NGP_FIELD_TYPE& ngpXfield, const NGP_FIELD_TYPE& ngpYfield) + : ngpX(ngpXfield), ngpY(ngpYfield) + {} + + KOKKOS_FUNCTION + void operator()(const stk::mesh::FastMeshIndex& entityIndex) const + { const unsigned numComponents = ngpX.get_num_components_per_entity(entityIndex); STK_NGP_ThrowAssert(numComponents == ngpY.get_num_components_per_entity(entityIndex)); for(unsigned d=0; d +void field_copy_no_mark_t(const stk::mesh::FieldBase& xField, + const stk::mesh::FieldBase& yField, + const stk::mesh::Selector& selector, + const EXEC_SPACE& execSpace) +{ + if constexpr (operate_on_ngp_mesh()) { + xField.sync_to_device(); + stk::mesh::NgpField& ngpX = stk::mesh::get_updated_ngp_field(xField); + stk::mesh::NgpField& ngpY = stk::mesh::get_updated_ngp_field(yField); + auto ngpMesh = stk::mesh::get_updated_ngp_mesh(xField.get_mesh()); + FieldCopy> fieldCopy(ngpX, ngpY); + stk::mesh::for_each_entity_run(ngpMesh, xField.entity_rank(), selector, fieldCopy); + } + else { + xField.sync_to_host(); + stk::mesh::HostField hostX(xField.get_mesh(), xField); + stk::mesh::HostField hostY(yField.get_mesh(), yField); + stk::mesh::HostMesh hostMesh(xField.get_mesh()); + FieldCopy> fieldCopy(hostX, hostY); + stk::mesh::for_each_entity_run(hostMesh, xField.entity_rank(), selector, fieldCopy); + } } template -void field_copy_no_sync_or_mark(const FieldBase& xField, const FieldBase& yField, const EXEC_SPACE& execSpace) +void field_copy_no_mark_mod(const stk::mesh::FieldBase& xField, + const stk::mesh::FieldBase& yField, + const stk::mesh::Selector& selector, + const EXEC_SPACE& execSpace) { - const DataTraits& dataTraits = xField.data_traits(); + const stk::mesh::DataTraits& dataTraits = xField.data_traits(); if (dataTraits.type_info == typeid(double)) { - field_copy_no_sync_or_mark_t(xField, yField, execSpace); + field_copy_no_mark_t(xField, yField, selector, execSpace); } else if (dataTraits.type_info == typeid(float)) { - field_copy_no_sync_or_mark_t(xField, yField, execSpace); + field_copy_no_mark_t(xField, yField, selector, execSpace); } else if (dataTraits.type_info == typeid(int)) { - field_copy_no_sync_or_mark_t(xField, yField, execSpace); + field_copy_no_mark_t(xField, yField, selector, execSpace); } else if (dataTraits.type_info == typeid(unsigned)) { - field_copy_no_sync_or_mark_t(xField, yField, execSpace); + field_copy_no_mark_t(xField, yField, selector, execSpace); } else { STK_ThrowErrorMsg("field_copy doesn't yet support fields of type "< -void field_copy_no_sync_or_mark(const FieldBase& xField, const FieldBase& yField, const Selector& selector, const EXEC_SPACE& execSpace) +void field_copy_impl(const stk::mesh::FieldBase& xField, + const stk::mesh::FieldBase& yField, + const stk::mesh::Selector* selectorPtr, + const EXEC_SPACE& execSpace, + bool isDeviceExecSpaceUserOverride) { - const DataTraits& dataTraits = xField.data_traits(); - - if (dataTraits.type_info == typeid(double)) { - field_copy_no_sync_or_mark_t(xField, yField, selector, execSpace); - } - else if (dataTraits.type_info == typeid(float)) { - field_copy_no_sync_or_mark_t(xField, yField, selector, execSpace); - } - else if (dataTraits.type_info == typeid(int)) { - field_copy_no_sync_or_mark_t(xField, yField, selector, execSpace); + std::unique_ptr fieldSelector; + if (selectorPtr == nullptr) { + fieldSelector = std::make_unique(stk::mesh::Selector(xField) & stk::mesh::Selector(yField)); } - else if (dataTraits.type_info == typeid(unsigned)) { - field_copy_no_sync_or_mark_t(xField, yField, selector, execSpace); + const stk::mesh::Selector& selector = selectorPtr != nullptr ? *selectorPtr : *(fieldSelector.get()); + + field_copy_no_mark_mod(xField, yField, selector, execSpace); + + yField.clear_sync_state(); + if (mark_modified_on_device(execSpace, isDeviceExecSpaceUserOverride)) { + yField.modify_on_device(); } else { - STK_ThrowErrorMsg("field_copy doesn't yet support fields of type "< +void apply_functor_on_field(const stk::mesh::BulkData& mesh, + const stk::mesh::FieldBase & zField, + const stk::mesh::FieldBase & xField, + const stk::mesh::FieldBase & yField, + const DataType alpha, + const DataType beta, + const stk::mesh::Selector & select) +{ + const stk::mesh::Selector selector = select & stk::mesh::selectField(zField) & + stk::mesh::selectField(xField) & stk::mesh::selectField(yField); + stk::mesh::EntityRank entityRank = zField.entity_rank(); + xField.sync_to_device(); + yField.sync_to_device(); + zField.sync_to_device(); + stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(mesh); + auto& ngpXField = stk::mesh::get_updated_ngp_field(xField); + auto& ngpYField = stk::mesh::get_updated_ngp_field(yField); + auto& ngpFieldResult = stk::mesh::get_updated_ngp_field(zField); + Functor f(ngpMesh, ngpFieldResult, ngpXField, ngpYField, alpha, beta); + stk::mesh::for_each_entity_run(ngpMesh, entityRank, selector, f); + zField.modify_on_device(); +} + + +struct FieldAXPBYFunctor +{ + FieldAXPBYFunctor(stk::mesh::NgpMesh & my_ngp_mesh, + stk::mesh::NgpField & output_field, + stk::mesh::NgpField & input_x, + stk::mesh::NgpField & input_y, + const double & alpha, + const double & beta) + : my_mesh(my_ngp_mesh), output(output_field), x(input_x), y(input_y), a(alpha), b(beta) + { + } + stk::mesh::NgpMesh my_mesh; + stk::mesh::NgpField output; + stk::mesh::NgpField x; + stk::mesh::NgpField y; + const double a; + const double b; + KOKKOS_FUNCTION + void operator()(stk::mesh::FastMeshIndex f) const + { + unsigned num_components = output.get_num_components_per_entity(f); + unsigned other = x.get_num_components_per_entity(f); + num_components = (other < num_components) ? other : num_components; + other = y.get_num_components_per_entity(f); + num_components = (other < num_components) ? other : num_components; + for (unsigned i = 0; i < num_components; ++i) + { + output.get(f, i) = a * x.get(f, i) + b * y.get(f, i); + } + } +}; + //************ end of implementation detail ********************************* } // namespace impl -} // mesh +} // ngp_field_blas } // stk #endif // STK_MESH_BASEIMPL_NGPFIELDBLASIMPL_HPP diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp index 6ce65e6c58c4..59d4e7a10076 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp @@ -34,15 +34,17 @@ #include #include // for operator<<, basic_ostream, etc -#include // for EntityLess, BulkData +#include // for BulkData #include // for topology, operator<<, etc #include "stk_mesh/base/Entity.hpp" // for Entity +#include "stk_mesh/base/EntityLess.hpp" #include "stk_mesh/base/FieldBase.hpp" // for field_bytes_per_entity, etc #include "stk_mesh/base/MetaData.hpp" // for MetaData #include "stk_mesh/base/Part.hpp" // for Part #include "stk_mesh/base/Types.hpp" // for BucketVector, PartOrdinal, etc #include "stk_mesh/baseImpl/BucketRepository.hpp" // for BucketRepository #include +#include #include "stk_util/util/ReportHandler.hpp" // for ThrowAssert, etc namespace stk { namespace mesh { class FieldBase; } } diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/SideSetPartImpl.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/SideSetPartImpl.cpp new file mode 100644 index 000000000000..39d8ec45a7d0 --- /dev/null +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/SideSetPartImpl.cpp @@ -0,0 +1,86 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include +#include +#include +#include +#include + +namespace stk { +namespace mesh { +namespace impl { + +bool part_is_connected_to_shell_block(const BulkData& bulk, const stk::mesh::Part &part) +{ + bool connected = false; + const MetaData& meta = bulk.mesh_meta_data(); + std::vector touchingBlocks = meta.get_blocks_touching_surface(&part); + + for(const stk::mesh::Part* touchingBlock : touchingBlocks) { + connected |= meta.get_topology(*touchingBlock).is_shell(); + } + return connected; +} + +void check_sideset_part_constraints(const BulkData& bulk, const stk::mesh::Part &part) +{ + const MetaData& meta = bulk.mesh_meta_data(); + if(part.primary_entity_rank() != meta.side_rank() && + !part_is_connected_to_shell_block(bulk, part)) { + stk::RuntimeWarning() << "create_sideset: part " << part.name() + << " has rank " << part.primary_entity_rank(); + } + if((part.id() == stk::mesh::Part::INVALID_ID) && + (part.name() != "universal_sideset") && !part.subsets().empty()) { + stk::RuntimeWarning() << "create_sideset: part " << part.name() + << " has invalid id "; + } + + for(const stk::mesh::Part* subsetPart : part.subsets()) { + if(subsetPart->primary_entity_rank() == meta.side_rank()) { + if(subsetPart->id() != part.id()) { + stk::RuntimeWarning() << "create_sideset: part " << part.name() + << " with id " << part.id() + << "; subset sideblock part " << subsetPart->name() + << " has different id " << subsetPart->id(); + } + } + } +} + +} // namespace impl +} // namespace mesh +} // namespace stk + diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/SideSetPartImpl.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/SideSetPartImpl.hpp new file mode 100644 index 000000000000..c6c0f8fe7330 --- /dev/null +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/SideSetPartImpl.hpp @@ -0,0 +1,56 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + + +#ifndef stk_mesh_SideSetPartImpl_hpp +#define stk_mesh_SideSetPartImpl_hpp + +//---------------------------------------------------------------------- + +namespace stk { namespace mesh { class BulkData; } } +namespace stk { namespace mesh { class Part; } } + +namespace stk { +namespace mesh { +namespace impl { + +bool part_is_connected_to_shell_block(const BulkData& bulk, const stk::mesh::Part &part); + +void check_sideset_part_constraints(const BulkData& bulk, const stk::mesh::Part &part); + +} // namespace impl +} // namespace mesh +} // namespace stk + +#endif diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/SideSetUtilImpl.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/SideSetUtilImpl.hpp index ad788a2a26ff..11ae81230713 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/SideSetUtilImpl.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/SideSetUtilImpl.hpp @@ -13,7 +13,7 @@ namespace stk { namespace mesh { class MetaData; } } namespace stk { namespace mesh { class Part; } } namespace stk { namespace mesh { class Selector; } } namespace stk { namespace mesh { struct Entity; } } -namespace stk { namespace mesh { struct SideSet; } } +namespace stk { namespace mesh { class SideSet; } } namespace stk { namespace mesh { class ElemElemGraph; } } namespace stk { namespace mesh { struct GraphEdge; } } diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/Visitors.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/Visitors.hpp index ad9fe7ff19c7..86347533b143 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/Visitors.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/Visitors.hpp @@ -40,6 +40,7 @@ #include #include #include +#include #include #include diff --git a/packages/stk/stk_middle_mesh/Jamfile b/packages/stk/stk_middle_mesh/Jamfile index 32b1ca559c81..b714c0bbab1a 100644 --- a/packages/stk/stk_middle_mesh/Jamfile +++ b/packages/stk/stk_middle_mesh/Jamfile @@ -47,10 +47,12 @@ if $(RTenv-arg) = "user" { project votd : requirements $(sierra-warnings) - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM $(stk_middle_mesh-root-inc) : usage-requirements - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM $(stk_middle_mesh-root-inc) : build-dir $(stk_middle_mesh-builddir) ; diff --git a/packages/stk/stk_middle_mesh/stk_middle_mesh/CMakeLists.txt b/packages/stk/stk_middle_mesh/stk_middle_mesh/CMakeLists.txt index 5bc850e979d2..a4a07159577a 100644 --- a/packages/stk/stk_middle_mesh/stk_middle_mesh/CMakeLists.txt +++ b/packages/stk/stk_middle_mesh/stk_middle_mesh/CMakeLists.txt @@ -58,6 +58,9 @@ else() add_library(stk_middle_mesh ${SOURCES} ${PRED_SOURCES}) target_link_libraries(stk_middle_mesh PUBLIC stk_util_parallel) target_link_libraries(stk_middle_mesh PUBLIC stk_search) + find_package(CBLAS REQUIRED) + find_package(Lapack REQUIRED) + target_link_libraries(stk_middle_mesh PUBLIC lapack) endif() target_include_directories(stk_middle_mesh PUBLIC @@ -86,5 +89,5 @@ INSTALL(FILES ${PRED_HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_middle_mesh/predicates) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_middle_mesh DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_middle_mesh EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() diff --git a/packages/stk/stk_middle_mesh/stk_middle_mesh/mesh_agglomerator.cpp b/packages/stk/stk_middle_mesh/stk_middle_mesh/mesh_agglomerator.cpp index c10c5627303a..7f819c46b303 100644 --- a/packages/stk/stk_middle_mesh/stk_middle_mesh/mesh_agglomerator.cpp +++ b/packages/stk/stk_middle_mesh/stk_middle_mesh/mesh_agglomerator.cpp @@ -9,14 +9,12 @@ namespace impl { std::vector MeshAgglomerator::get_group_idxs(SetType& vertsIn, const int nthres) { std::vector idxs; - int nsearches = 0; for (int i = 0; i < get_num_groups(); ++i) { int nfound = 0; int nvertsRemaining = vertsIn.size(); for (auto& v : vertsIn) { - nsearches++; if (contains_entity_sorted(m_verts[i], v)) { nfound += 1; diff --git a/packages/stk/stk_middle_mesh_util/Jamfile b/packages/stk/stk_middle_mesh_util/Jamfile index d97edcd78b3c..e6ce84c1434c 100644 --- a/packages/stk/stk_middle_mesh_util/Jamfile +++ b/packages/stk/stk_middle_mesh_util/Jamfile @@ -44,7 +44,8 @@ project votd : requirements $(sierra-warnings) $(stk_middle_mesh_util-root-inc) - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM : usage-requirements $(stk_middle_mesh_util-root-inc) : build-dir $(stk_middle_mesh_util-builddir) diff --git a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/CMakeLists.txt b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/CMakeLists.txt index b480b4ed3cbc..f458d1592a65 100644 --- a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/CMakeLists.txt +++ b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/CMakeLists.txt @@ -64,6 +64,6 @@ INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_middle_mesh_util/) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_middle_mesh_util DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_middle_mesh_util EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() diff --git a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.hpp b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.hpp index 0d7e1b1b118a..29fb4da91af1 100644 --- a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.hpp +++ b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.hpp @@ -44,7 +44,6 @@ class StkMeshCreator m_metaDataPtr(m_bulkDataPtr->mesh_meta_data_ptr()), m_autodecompMethod(autodecompMethod) { - m_metaDataPtr->use_simple_fields(); declare_stk_vert_field(); load_mesh(fname); } diff --git a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/exodus_writer.hpp b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/exodus_writer.hpp index 4778413e4bcf..46c5a890d682 100644 --- a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/exodus_writer.hpp +++ b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/exodus_writer.hpp @@ -29,7 +29,6 @@ class ExodusWriter , m_mesh(mesh) , m_meshFields(fields) { - m_metaDataOutPtr->use_simple_fields(); initialize_output_mesh(); create_part("block_1"); declare_fields(); diff --git a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_interface.hpp b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_interface.hpp index 46b9ae74552b..260e797adf94 100644 --- a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_interface.hpp +++ b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_interface.hpp @@ -36,7 +36,6 @@ class StkInterface , m_bulkDataOutPtr(stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create()) , m_metaDataOutPtr(m_bulkDataOutPtr->mesh_meta_data_ptr()) { - m_metaDataOutPtr->use_simple_fields(); check_sideset_size(); check_sideset_uniqueness(); initialize_output_mesh(); diff --git a/packages/stk/stk_ngp_test/Jamfile b/packages/stk/stk_ngp_test/Jamfile index 213728060c00..4d72e7fe6957 100644 --- a/packages/stk/stk_ngp_test/Jamfile +++ b/packages/stk/stk_ngp_test/Jamfile @@ -47,7 +47,8 @@ if $(RTenv-arg) = "user" { project votd : requirements $(sierra-warnings) - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM STK_SHOW_DEPRECATED_WARNINGS STK_HIDE_DEPRECATED_CODE SIERRA_MIGRATION diff --git a/packages/stk/stk_ngp_test/stk_ngp_test/CMakeLists.txt b/packages/stk/stk_ngp_test/stk_ngp_test/CMakeLists.txt index 2fa8bdfcbb5d..c5d31e96edbc 100644 --- a/packages/stk/stk_ngp_test/stk_ngp_test/CMakeLists.txt +++ b/packages/stk/stk_ngp_test/stk_ngp_test/CMakeLists.txt @@ -56,5 +56,5 @@ INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_ngp_test/) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_ngp_test DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_ngp_test EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() diff --git a/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp b/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp index 1e7bd867656f..0f25e3439677 100644 --- a/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp +++ b/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp @@ -1,9 +1,12 @@ #ifndef STK_NGP_TEST_NGP_TEST_HPP #define STK_NGP_TEST_NGP_TEST_HPP + #include #include "GlobalReporter.hpp" -#include "Reporter.hpp" #include +#if KOKKOS_VERSION >= 40200 +#include +#endif #include namespace ngp_testing { diff --git a/packages/stk/stk_performance_tests/Jamfile b/packages/stk/stk_performance_tests/Jamfile index 8681f54cb3b6..9a7c552010a1 100644 --- a/packages/stk/stk_performance_tests/Jamfile +++ b/packages/stk/stk_performance_tests/Jamfile @@ -42,7 +42,8 @@ project votd $(sierra-warnings) $(stk_performance_tests-root) $(stk_performance_tests-root)/.. - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM STK_HIDE_DEPRECATED_CODE : usage-requirements $(stk_performance_tests-root) diff --git a/packages/stk/stk_performance_tests/stk_balance/balanceHexesEdgesNodes.cpp b/packages/stk/stk_performance_tests/stk_balance/balanceHexesEdgesNodes.cpp index ee053b3e021c..c580770d8167 100644 --- a/packages/stk/stk_performance_tests/stk_balance/balanceHexesEdgesNodes.cpp +++ b/packages/stk/stk_performance_tests/stk_balance/balanceHexesEdgesNodes.cpp @@ -41,11 +41,11 @@ #include #include -class BalanceHexesEdgesNodes : public stk::unit_test_util::simple_fields::MeshFixture +class BalanceHexesEdgesNodes : public stk::unit_test_util::MeshFixture { public: BalanceHexesEdgesNodes() - : stk::unit_test_util::simple_fields::MeshFixture() + : stk::unit_test_util::MeshFixture() { } void setup_host_mesh(stk::mesh::BulkData::AutomaticAuraOption auraOption) diff --git a/packages/stk/stk_performance_tests/stk_io/perfMeshRead.cpp b/packages/stk/stk_performance_tests/stk_io/perfMeshRead.cpp index 92052255fe0a..0c37e9f2ecee 100644 --- a/packages/stk/stk_performance_tests/stk_io/perfMeshRead.cpp +++ b/packages/stk/stk_performance_tests/stk_io/perfMeshRead.cpp @@ -54,7 +54,7 @@ TEST(StkIo, meshRead_hex_noAura) const unsigned NUM_RUNS = 5; const unsigned NUM_ITERS = 10; - int ELEMS_PER_DIM = stk::unit_test_util::simple_fields::get_command_line_option("--ne", 80); + int ELEMS_PER_DIM = stk::unit_test_util::get_command_line_option("--ne", 80); std::string elems = std::to_string(ELEMS_PER_DIM); std::string meshSpec = "generated:"+elems+"x"+elems+"x"+elems; @@ -75,7 +75,6 @@ TEST(StkIo, meshRead_hex_noAura) std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(MPI_COMM_WORLD) .set_aura_option(stk::mesh::BulkData::NO_AUTO_AURA) .create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::io::fill_mesh(meshSpec, *bulkPtr); } @@ -90,7 +89,7 @@ TEST(StkIo, meshRead_hex_shells_sidesets_aura) const unsigned NUM_RUNS = 5; const unsigned NUM_ITERS = 10; - int ELEMS_PER_DIM = stk::unit_test_util::simple_fields::get_command_line_option("--ne", 80); + int ELEMS_PER_DIM = stk::unit_test_util::get_command_line_option("--ne", 80); std::string elems = std::to_string(ELEMS_PER_DIM); std::string meshSpec = "generated:"+elems+"x"+elems+"x"+elems+"|shell:xyzXYZ|sideset:xyzXYZ"; @@ -110,7 +109,6 @@ TEST(StkIo, meshRead_hex_shells_sidesets_aura) for(unsigned i=0; i bulkPtr = stk::mesh::MeshBuilder(MPI_COMM_WORLD) .create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::io::fill_mesh(meshSpec, *bulkPtr); } diff --git a/packages/stk/stk_performance_tests/stk_mesh/ChangeEntityPartPerfTest.cpp b/packages/stk/stk_performance_tests/stk_mesh/ChangeEntityPartPerfTest.cpp index 05f22ed876c2..3f3aec723e96 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/ChangeEntityPartPerfTest.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/ChangeEntityPartPerfTest.cpp @@ -45,10 +45,10 @@ namespace { -class ChangePartsTest : public stk::unit_test_util::simple_fields::MeshFixture +class ChangePartsTest : public stk::unit_test_util::MeshFixture { public: - ChangePartsTest() : stk::unit_test_util::simple_fields::MeshFixture(), + ChangePartsTest() : stk::unit_test_util::MeshFixture(), batchTimer(get_comm()), elementsOnBlock1(true) { @@ -165,8 +165,8 @@ TEST_F(ChangePartsTest, changeEntityPartsUsingEntityVectorSimplePerfTest) if(get_parallel_size() > 1) { GTEST_SKIP(); } const unsigned NUM_RUNS = 5; - const unsigned NUM_ITERS = stk::unit_test_util::simple_fields::get_command_line_option("-r", 50); - numElemPerDim = stk::unit_test_util::simple_fields::get_command_line_option("-e", 50); + const unsigned NUM_ITERS = stk::unit_test_util::get_command_line_option("-r", 50); + numElemPerDim = stk::unit_test_util::get_command_line_option("-e", 50); numBlocks = 1; batchTimer.initialize_batch_timer(); @@ -194,8 +194,8 @@ TEST_F(ChangePartsTest, changeEntityPartsUsingSelectorSimplePerfTest) if(get_parallel_size() > 1) { GTEST_SKIP(); } const unsigned NUM_RUNS = 5; - const unsigned NUM_ITERS = stk::unit_test_util::simple_fields::get_command_line_option("-r", 200); - numElemPerDim = stk::unit_test_util::simple_fields::get_command_line_option("-e", 50); + const unsigned NUM_ITERS = stk::unit_test_util::get_command_line_option("-r", 200); + numElemPerDim = stk::unit_test_util::get_command_line_option("-e", 50); numBlocks = 1; batchTimer.initialize_batch_timer(); @@ -222,9 +222,9 @@ TEST_F(ChangePartsTest, cacheRemovalImpactChangeEntityPartsWithEntityVector) if(get_parallel_size() > 1) { GTEST_SKIP(); } const unsigned NUM_RUNS = 5; - const unsigned NUM_ITERS = stk::unit_test_util::simple_fields::get_command_line_option("-r", 500000); - numElemPerDim = stk::unit_test_util::simple_fields::get_command_line_option("-e", 80); - numBlocks = stk::unit_test_util::simple_fields::get_command_line_option("-b", 125); + const unsigned NUM_ITERS = stk::unit_test_util::get_command_line_option("-r", 500000); + numElemPerDim = stk::unit_test_util::get_command_line_option("-e", 80); + numBlocks = stk::unit_test_util::get_command_line_option("-b", 125); batchTimer.initialize_batch_timer(); @@ -250,9 +250,9 @@ TEST_F(ChangePartsTest, cacheRemovalImpactChangeEntityPartsWithSelector) if(get_parallel_size() > 1) { GTEST_SKIP(); } const unsigned NUM_RUNS = 5; - const unsigned NUM_ITERS = stk::unit_test_util::simple_fields::get_command_line_option("-r", 500000); - numElemPerDim = stk::unit_test_util::simple_fields::get_command_line_option("-e", 80); - numBlocks = stk::unit_test_util::simple_fields::get_command_line_option("-b", 125); + const unsigned NUM_ITERS = stk::unit_test_util::get_command_line_option("-r", 500000); + numElemPerDim = stk::unit_test_util::get_command_line_option("-e", 80); + numBlocks = stk::unit_test_util::get_command_line_option("-b", 125); batchTimer.initialize_batch_timer(); diff --git a/packages/stk/stk_performance_tests/stk_mesh/CommunicateFieldData.cpp b/packages/stk/stk_performance_tests/stk_mesh/CommunicateFieldData.cpp index 975df7d51d9e..2b7981cb0178 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/CommunicateFieldData.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/CommunicateFieldData.cpp @@ -110,7 +110,7 @@ void createMetaAndBulkData(stk::io::StkMeshIoBroker &exodusFileReader, unsigned numBlocks, unsigned numFields) { - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-i", "NO_FILE_SPECIFIED"); + std::string exodusFileName = stk::unit_test_util::get_option("-i", "NO_FILE_SPECIFIED"); if (exodusFileName == "NO_FILE_SPECIFIED") { exodusFileName = genMeshSpec; } @@ -220,7 +220,6 @@ void test_communicate_field_data_all_ghosting(stk::ParallelMachine communicator, batchTimer.initialize_batch_timer(); stk::io::StkMeshIoBroker exodusFileReader(communicator); - exodusFileReader.use_simple_fields(); std::string genMeshSpec = "generated:60x60x48|sideset:xXyY"; const unsigned numBlocks = 1; @@ -289,7 +288,6 @@ void test_communicate_field_data_ghosting(MPI_Comm communicator, batchTimer.initialize_batch_timer(); stk::io::StkMeshIoBroker exodusFileReader(communicator); - exodusFileReader.use_simple_fields(); std::string genMeshSpec = "generated:100x100x48|sideset:xXyY"; createMetaAndBulkData(exodusFileReader,genMeshSpec, numBlocks, numFields); @@ -322,7 +320,7 @@ void test_communicate_field_data_ghosting(MPI_Comm communicator, void test_communicate_field_data_ngp_ghosting(int num_iters, bool syncToHostEveryIter) { - const int meshDim = stk::unit_test_util::simple_fields::get_command_line_option("-s", 100); + const int meshDim = stk::unit_test_util::get_command_line_option("-s", 100); std::string meshDimStr = std::to_string(meshDim); std::string meshSpec = "generated:" + meshDimStr + "x" + meshDimStr + "x" + meshDimStr; @@ -331,7 +329,6 @@ void test_communicate_field_data_ngp_ghosting(int num_iters, bool syncToHostEver batchTimer.initialize_batch_timer(); stk::io::StkMeshIoBroker exodusFileReader(comm); - exodusFileReader.use_simple_fields(); const unsigned numBlocks = 1; const unsigned numFields = 8; @@ -409,8 +406,8 @@ TEST(CommunicateFieldData, NgpGhosting) { if (stk::parallel_machine_size(MPI_COMM_WORLD) < 2) { GTEST_SKIP(); } - int iter = stk::unit_test_util::simple_fields::get_command_line_option("-t", 1000); - bool syncToHostEveryIter = stk::unit_test_util::simple_fields::get_command_line_option("-h", false); + int iter = stk::unit_test_util::get_command_line_option("-t", 1000); + bool syncToHostEveryIter = stk::unit_test_util::get_command_line_option("-h", false); test_communicate_field_data_ngp_ghosting(iter, syncToHostEveryIter); } diff --git a/packages/stk/stk_performance_tests/stk_mesh/GatherGears.cpp b/packages/stk/stk_performance_tests/stk_mesh/GatherGears.cpp index 01af42ce2ade..58f7131aeed9 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/GatherGears.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/GatherGears.cpp @@ -111,8 +111,8 @@ void do_stk_gather_gears_test(stk::mesh::BulkData& bulk, std::vector& su TEST(gather_gears, gather_gears) { - stk::mesh::fixtures::simple_fields::GearsFixture fixture(MPI_COMM_WORLD, 1, - stk::mesh::fixtures::simple_fields::GearParams(0.01, 0.4, 1.5, -0.4, 0.4)); + stk::mesh::fixtures::GearsFixture fixture(MPI_COMM_WORLD, 1, + stk::mesh::fixtures::GearParams(0.01, 0.4, 1.5, -0.4, 0.4)); fixture.meta_data.commit(); double start_time = stk::cpu_time(); diff --git a/packages/stk/stk_performance_tests/stk_mesh/GearsSkinning.cpp b/packages/stk/stk_performance_tests/stk_mesh/GearsSkinning.cpp index 0552c1a948b6..baaa738f3074 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/GearsSkinning.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/GearsSkinning.cpp @@ -77,7 +77,7 @@ namespace { static const stk::mesh::EntityRank NODE_RANK = stk::topology::NODE_RANK; -typedef stk::mesh::fixtures::simple_fields::GearsFixture::CartesianField CartesianField; +typedef stk::mesh::fixtures::GearsFixture::CartesianField CartesianField; typedef stk::mesh::Field IntField; // @@ -91,7 +91,7 @@ typedef stk::mesh::Field IntField; // if do_separate_wedge == true then wedge must be nonnull pointer void separate_wedge( bool do_separate_wedge, - stk::mesh::fixtures::simple_fields::GearsFixture & fixture, + stk::mesh::fixtures::GearsFixture & fixture, stk::mesh::Entity wedge, CartesianField & velocity_field, stk::mesh::Part & skin_part @@ -193,7 +193,7 @@ void separate_wedge( * in the wedges argument. */ void find_and_shuffle_wedges_to_separate( - stk::mesh::fixtures::simple_fields::GearsFixture & fixture, + stk::mesh::fixtures::GearsFixture & fixture, stk::mesh::EntityVector & wedges ) { @@ -230,7 +230,7 @@ void find_and_shuffle_wedges_to_separate( * continue flying through the air. */ void move_detached_wedges( - stk::mesh::fixtures::simple_fields::GearsFixture & fixture, + stk::mesh::fixtures::GearsFixture & fixture, CartesianField & velocity_field ) { @@ -267,7 +267,7 @@ void move_detached_wedges( //----------------------------------------------------------------------------- // -void populate_processor_id_field_data( stk::mesh::fixtures::simple_fields::GearsFixture & fixture, +void populate_processor_id_field_data( stk::mesh::fixtures::GearsFixture & fixture, IntField & processor_field ) { @@ -300,8 +300,8 @@ TEST( gears_skinning, gears_skinning ) const size_t NUM_GEARS = 1; double start_time = stk::wall_time(); - stk::mesh::fixtures::simple_fields::GearsFixture fixture(MPI_COMM_WORLD, NUM_GEARS, - stk::mesh::fixtures::simple_fields::GearParams(0.025, 0.6, 1.05, -0.4, 0.4)); + stk::mesh::fixtures::GearsFixture fixture(MPI_COMM_WORLD, NUM_GEARS, + stk::mesh::fixtures::GearParams(0.025, 0.6, 1.05, -0.4, 0.4)); const unsigned p_rank = fixture.bulk_data.parallel_rank(); std::srand(p_rank); // Seed pseudo-random generator based on processor rank. @@ -393,9 +393,9 @@ TEST( gears_skinning, gears_skinning ) const double x = 0; const double y = 0; const double z = 0; - const stk::mesh::fixtures::simple_fields::GearMovement gear_movement_data(rotation,x,y,z); + const stk::mesh::fixtures::GearMovement gear_movement_data(rotation,x,y,z); - stk::mesh::fixtures::simple_fields::Gear & gear = fixture.get_gear(0); + stk::mesh::fixtures::Gear & gear = fixture.get_gear(0); // Iterate over the time steps, updating the locations of the entities and // writing the current mesh state to output files. diff --git a/packages/stk/stk_performance_tests/stk_mesh/ManyBlocksSidesetsPerformance.cpp b/packages/stk/stk_performance_tests/stk_mesh/ManyBlocksSidesetsPerformance.cpp index 5c81554dad1a..cba039e1afbe 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/ManyBlocksSidesetsPerformance.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/ManyBlocksSidesetsPerformance.cpp @@ -58,7 +58,7 @@ namespace stk_perf_many_blocks { -class ManyBlocksSidesets : public stk::unit_test_util::simple_fields::MeshFixture +class ManyBlocksSidesets : public stk::unit_test_util::MeshFixture { public: ManyBlocksSidesets() @@ -135,7 +135,7 @@ class ManyBlocksSidesets : public stk::unit_test_util::simple_fields::MeshFixtur void output_mesh(stk::mesh::BulkData & bulk, const std::string & fileName) { - std::string writeOutput = stk::unit_test_util::simple_fields::get_option("--output", "off"); + std::string writeOutput = stk::unit_test_util::get_option("--output", "off"); if (writeOutput == "on") { stk::io::write_mesh(fileName, bulk); } @@ -210,9 +210,9 @@ TEST_F(ManyBlocksSidesets, disconnect_blocks_face_creation) const unsigned NUM_RUNS = 5; const unsigned NUM_ITERS = 1; - int ELEMS_PER_DIM = stk::unit_test_util::simple_fields::get_command_line_option("--ne", 400); - int NUM_BLOCKS = stk::unit_test_util::simple_fields::get_command_line_option("--nb", 10); - bool verbose = stk::unit_test_util::simple_fields::has_option("--v"); + int ELEMS_PER_DIM = stk::unit_test_util::get_command_line_option("--ne", 400); + int NUM_BLOCKS = stk::unit_test_util::get_command_line_option("--nb", 10); + bool verbose = stk::unit_test_util::has_option("--v"); batchTimer.initialize_batch_timer(); for (unsigned j = 0; j < NUM_RUNS; j++) { @@ -284,9 +284,9 @@ TEST_F(ManyBlocksSidesets, sidesets_writeRestart) const unsigned NUM_RUNS = 1; const unsigned NUM_ITERS = 5; - int ELEMS_PER_DIM = stk::unit_test_util::simple_fields::get_command_line_option("--ne", 200); - int NUM_BLOCKS = stk::unit_test_util::simple_fields::get_command_line_option("--nb", 200); - int NUM_FIELDS = stk::unit_test_util::simple_fields::get_command_line_option("--nf", 100); + int ELEMS_PER_DIM = stk::unit_test_util::get_command_line_option("--ne", 200); + int NUM_BLOCKS = stk::unit_test_util::get_command_line_option("--nb", 200); + int NUM_FIELDS = stk::unit_test_util::get_command_line_option("--nf", 100); batchTimer.initialize_batch_timer(); for (unsigned j = 0; j < NUM_RUNS; j++) { @@ -316,9 +316,9 @@ TEST_F(ManyBlocksSidesets, noSidesets_writeRestart) const unsigned NUM_RUNS = 1; const unsigned NUM_ITERS = 5; - int ELEMS_PER_DIM = stk::unit_test_util::simple_fields::get_command_line_option("--ne", 200); - int NUM_BLOCKS = stk::unit_test_util::simple_fields::get_command_line_option("--nb", 200); - int NUM_FIELDS = stk::unit_test_util::simple_fields::get_command_line_option("--nf", 100); + int ELEMS_PER_DIM = stk::unit_test_util::get_command_line_option("--ne", 200); + int NUM_BLOCKS = stk::unit_test_util::get_command_line_option("--nb", 200); + int NUM_FIELDS = stk::unit_test_util::get_command_line_option("--nf", 100); batchTimer.initialize_batch_timer(); for (unsigned j = 0; j < NUM_RUNS; j++) { @@ -351,8 +351,8 @@ TEST_F(ManyBlocksSidesets, find_restriction) unsigned numElemsX = 400; unsigned numElemsY = 1; unsigned numElemsZ = 2; - int NUM_BLOCKS = stk::unit_test_util::simple_fields::get_command_line_option("--nb", 400); - int NUM_FIELDS = stk::unit_test_util::simple_fields::get_command_line_option("--nf", 50); + int NUM_BLOCKS = stk::unit_test_util::get_command_line_option("--nb", 400); + int NUM_FIELDS = stk::unit_test_util::get_command_line_option("--nf", 50); unsigned initialBucketCapacity = 1; unsigned maxBucketCapacity = 8; @@ -395,8 +395,8 @@ TEST_F(ManyBlocksSidesets, find_restriction_part_union) unsigned numElemsX = 400; unsigned numElemsY = 1; unsigned numElemsZ = 2; - int NUM_BLOCKS = stk::unit_test_util::simple_fields::get_command_line_option("--nb", 400); - int NUM_FIELDS = stk::unit_test_util::simple_fields::get_command_line_option("--nf", 50); + int NUM_BLOCKS = stk::unit_test_util::get_command_line_option("--nb", 400); + int NUM_FIELDS = stk::unit_test_util::get_command_line_option("--nf", 50); unsigned initialBucketCapacity = 1; unsigned maxBucketCapacity = 8; @@ -439,8 +439,8 @@ TEST_F(ManyBlocksSidesets, selectUnion) unsigned numElemsX = 400; unsigned numElemsY = 1; unsigned numElemsZ = 2; - int NUM_BLOCKS = stk::unit_test_util::simple_fields::get_command_line_option("--nb", 400); - int NUM_FIELDS = stk::unit_test_util::simple_fields::get_command_line_option("--nf", 50); + int NUM_BLOCKS = stk::unit_test_util::get_command_line_option("--nb", 400); + int NUM_FIELDS = stk::unit_test_util::get_command_line_option("--nf", 50); unsigned initialBucketCapacity = 1; unsigned maxBucketCapacity = 8; diff --git a/packages/stk/stk_performance_tests/stk_mesh/MeshOperations.cpp b/packages/stk/stk_performance_tests/stk_mesh/MeshOperations.cpp index 0f9361ab381d..afbd48e6875c 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/MeshOperations.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/MeshOperations.cpp @@ -46,7 +46,7 @@ #include #include -class MeshOperations : public stk::unit_test_util::simple_fields::MeshFixture { +class MeshOperations : public stk::unit_test_util::MeshFixture { public: MeshOperations() : p_rank(get_parallel_rank()), @@ -148,7 +148,6 @@ TEST_F( MeshOperations, PerformanceTimings ) batchTimer.start_batch_timer(); stk::io::StkMeshIoBroker broker(get_comm()); - broker.use_simple_fields(); initialize_meta(broker); initialize_bulk(broker); diff --git a/packages/stk/stk_performance_tests/stk_mesh/NgpFieldAccess.cpp b/packages/stk/stk_performance_tests/stk_mesh/NgpFieldAccess.cpp index 6a3f3c8fe69d..afb163cd5f13 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/NgpFieldAccess.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/NgpFieldAccess.cpp @@ -56,7 +56,7 @@ namespace ngp_field_perf_test { -class NgpFieldAccess : public stk::unit_test_util::simple_fields::MeshFixture +class NgpFieldAccess : public stk::unit_test_util::MeshFixture { public: NgpFieldAccess() @@ -86,7 +86,7 @@ class NgpFieldAccess : public stk::unit_test_util::simple_fields::MeshFixture void setup_multi_block_mesh(unsigned numElemsPerDim, unsigned numBlocks) { stk::performance_tests::setup_multiple_blocks(get_meta(), numBlocks); - stk::io::fill_mesh(stk::unit_test_util::simple_fields::get_mesh_spec(numElemsPerDim), get_bulk()); + stk::io::fill_mesh(stk::unit_test_util::get_mesh_spec(numElemsPerDim), get_bulk()); stk::performance_tests::move_elements_to_other_blocks(get_bulk(), numElemsPerDim); } @@ -134,7 +134,7 @@ TEST_F(NgpFieldAccess, Centroid) setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); declare_centroid_field(); - stk::io::fill_mesh(stk::unit_test_util::simple_fields::get_mesh_spec(ELEMS_PER_DIM), get_bulk()); + stk::io::fill_mesh(stk::unit_test_util::get_mesh_spec(ELEMS_PER_DIM), get_bulk()); for (unsigned j = 0; j < NUM_RUNS; j++) { batchTimer.start_batch_timer(); @@ -159,7 +159,7 @@ TEST_F(NgpFieldAccess, HostCentroid) setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); declare_centroid_field(); - stk::io::fill_mesh(stk::unit_test_util::simple_fields::get_mesh_spec(ELEMS_PER_DIM), get_bulk()); + stk::io::fill_mesh(stk::unit_test_util::get_mesh_spec(ELEMS_PER_DIM), get_bulk()); for (unsigned j = 0; j < NUM_RUNS; j++) { batchTimer.start_batch_timer(); @@ -215,7 +215,7 @@ TEST_F(NgpFieldAccess, CentroidPartialBlock) const int NUM_ITERS = 250; const int ELEMS_PER_DIM = 100; const int NUM_BLOCKS = 100; - int BLOCKS = stk::unit_test_util::simple_fields::get_command_line_option("-n", 50); + int BLOCKS = stk::unit_test_util::get_command_line_option("-n", 50); BLOCKS = std::max(BLOCKS, 1); BLOCKS = std::min(BLOCKS, NUM_BLOCKS); diff --git a/packages/stk/stk_performance_tests/stk_mesh/NgpFieldAsync.cpp b/packages/stk/stk_performance_tests/stk_mesh/NgpFieldAsync.cpp index a03af5bd6206..482e8dbda38a 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/NgpFieldAsync.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/NgpFieldAsync.cpp @@ -58,11 +58,11 @@ #define SPEEDUP_DELTA 1.0 -class NgpFieldAsyncTest : public stk::unit_test_util::simple_fields::MeshFixture +class NgpFieldAsyncTest : public stk::unit_test_util::MeshFixture { public: NgpFieldAsyncTest() - : stk::unit_test_util::simple_fields::MeshFixture(), + : stk::unit_test_util::MeshFixture(), m_numBlocks(1), m_numElemsPerDim(100), m_numElements(std::pow(m_numElemsPerDim, 3)), @@ -309,10 +309,10 @@ TEST_F(NgpFieldAsyncTest, SyncToDeviceAsyncTiming) if(get_parallel_size() != 1) return; unsigned NUM_RUNS = 5; - unsigned NUM_ITERS = stk::unit_test_util::simple_fields::get_command_line_option("-r", 50); - unsigned numStreams = stk::unit_test_util::simple_fields::get_command_line_option("-s", 3); - unsigned numElemsPerDim = stk::unit_test_util::simple_fields::get_command_line_option("-e", 50); - unsigned waitIteration = stk::unit_test_util::simple_fields::get_command_line_option("-p", 100); + unsigned NUM_ITERS = stk::unit_test_util::get_command_line_option("-r", 50); + unsigned numStreams = stk::unit_test_util::get_command_line_option("-s", 3); + unsigned numElemsPerDim = stk::unit_test_util::get_command_line_option("-e", 50); + unsigned waitIteration = stk::unit_test_util::get_command_line_option("-p", 100); stk::unit_test_util::BatchTimer batchTimer(MPI_COMM_WORLD); stk::unit_test_util::BatchTimer batchTimer2(MPI_COMM_WORLD); batchTimer.initialize_batch_timer(); @@ -392,10 +392,10 @@ TEST_F(NgpFieldAsyncTest, SyncToHostAsyncTiming) if(get_parallel_size() != 1) return; unsigned NUM_RUNS = 5; - unsigned NUM_ITERS = stk::unit_test_util::simple_fields::get_command_line_option("-r", 50); - unsigned numStreams = stk::unit_test_util::simple_fields::get_command_line_option("-s", 3); - unsigned numElemsPerDim = stk::unit_test_util::simple_fields::get_command_line_option("-e", 50); - unsigned waitIteration = stk::unit_test_util::simple_fields::get_command_line_option("-p", 100); + unsigned NUM_ITERS = stk::unit_test_util::get_command_line_option("-r", 50); + unsigned numStreams = stk::unit_test_util::get_command_line_option("-s", 3); + unsigned numElemsPerDim = stk::unit_test_util::get_command_line_option("-e", 50); + unsigned waitIteration = stk::unit_test_util::get_command_line_option("-p", 100); stk::unit_test_util::BatchTimer batchTimer(MPI_COMM_WORLD); stk::unit_test_util::BatchTimer batchTimer2(MPI_COMM_WORLD); batchTimer.initialize_batch_timer(); @@ -482,10 +482,10 @@ TEST_F(NgpFieldAsyncTest, SyncAsyncTiming) if(get_parallel_size() != 1) return; unsigned NUM_RUNS = 5; - unsigned NUM_ITERS = stk::unit_test_util::simple_fields::get_command_line_option("-r", 50); - unsigned numStreams = stk::unit_test_util::simple_fields::get_command_line_option("-s", 3); - unsigned numElemsPerDim = stk::unit_test_util::simple_fields::get_command_line_option("-e", 50); - unsigned waitIteration = stk::unit_test_util::simple_fields::get_command_line_option("-p", 100); + unsigned NUM_ITERS = stk::unit_test_util::get_command_line_option("-r", 50); + unsigned numStreams = stk::unit_test_util::get_command_line_option("-s", 3); + unsigned numElemsPerDim = stk::unit_test_util::get_command_line_option("-e", 50); + unsigned waitIteration = stk::unit_test_util::get_command_line_option("-p", 100); stk::unit_test_util::BatchTimer batchTimer(MPI_COMM_WORLD); stk::unit_test_util::BatchTimer batchTimer2(MPI_COMM_WORLD); batchTimer.initialize_batch_timer(); @@ -568,14 +568,14 @@ TEST_F(NgpFieldAsyncTest, PartialSyncToDeviceAsyncTiming) if(get_parallel_size() != 1) return; unsigned NUM_RUNS = 5; - unsigned NUM_ITERS = stk::unit_test_util::simple_fields::get_command_line_option("-r", 50); - unsigned numStreams = stk::unit_test_util::simple_fields::get_command_line_option("-s", 3); - unsigned numFields = stk::unit_test_util::simple_fields::get_command_line_option("-f", 3); - unsigned numBlocks = stk::unit_test_util::simple_fields::get_command_line_option("-b", 3); - unsigned numBlocksToSync = stk::unit_test_util::simple_fields::get_command_line_option("-c", 1); + unsigned NUM_ITERS = stk::unit_test_util::get_command_line_option("-r", 50); + unsigned numStreams = stk::unit_test_util::get_command_line_option("-s", 3); + unsigned numFields = stk::unit_test_util::get_command_line_option("-f", 3); + unsigned numBlocks = stk::unit_test_util::get_command_line_option("-b", 3); + unsigned numBlocksToSync = stk::unit_test_util::get_command_line_option("-c", 1); EXPECT_TRUE(numBlocksToSync <= numBlocks && numBlocksToSync >= 1); - unsigned numElemsPerDim = stk::unit_test_util::simple_fields::get_command_line_option("-e", 50); - unsigned waitIteration = stk::unit_test_util::simple_fields::get_command_line_option("-p", 100); + unsigned numElemsPerDim = stk::unit_test_util::get_command_line_option("-e", 50); + unsigned waitIteration = stk::unit_test_util::get_command_line_option("-p", 100); stk::unit_test_util::BatchTimer batchTimer(MPI_COMM_WORLD); stk::unit_test_util::BatchTimer batchTimer2(MPI_COMM_WORLD); batchTimer.initialize_batch_timer(); @@ -663,14 +663,14 @@ TEST_F(NgpFieldAsyncTest, PartialSyncToHostAsyncTiming) if(get_parallel_size() != 1) { GTEST_SKIP(); } unsigned NUM_RUNS = 5; - unsigned NUM_ITERS = stk::unit_test_util::simple_fields::get_command_line_option("-r", 50); - unsigned numStreams = stk::unit_test_util::simple_fields::get_command_line_option("-s", 3); - unsigned numFields = stk::unit_test_util::simple_fields::get_command_line_option("-f", 3); - unsigned numBlocks = stk::unit_test_util::simple_fields::get_command_line_option("-b", 3); - unsigned numBlocksToSync = stk::unit_test_util::simple_fields::get_command_line_option("-c", 1); + unsigned NUM_ITERS = stk::unit_test_util::get_command_line_option("-r", 50); + unsigned numStreams = stk::unit_test_util::get_command_line_option("-s", 3); + unsigned numFields = stk::unit_test_util::get_command_line_option("-f", 3); + unsigned numBlocks = stk::unit_test_util::get_command_line_option("-b", 3); + unsigned numBlocksToSync = stk::unit_test_util::get_command_line_option("-c", 1); EXPECT_TRUE(numBlocksToSync <= numBlocks && numBlocksToSync >= 1); - unsigned numElemsPerDim = stk::unit_test_util::simple_fields::get_command_line_option("-e", 50); - unsigned waitIteration = stk::unit_test_util::simple_fields::get_command_line_option("-p", 100); + unsigned numElemsPerDim = stk::unit_test_util::get_command_line_option("-e", 50); + unsigned waitIteration = stk::unit_test_util::get_command_line_option("-p", 100); stk::unit_test_util::BatchTimer batchTimer(MPI_COMM_WORLD); stk::unit_test_util::BatchTimer batchTimer2(MPI_COMM_WORLD); batchTimer.initialize_batch_timer(); @@ -764,13 +764,13 @@ TEST_F(NgpFieldAsyncTest, AsyncDeepCopyTiming) if(get_parallel_size() != 1) { GTEST_SKIP(); } unsigned NUM_RUNS = 5; - unsigned NUM_ITERS = stk::unit_test_util::simple_fields::get_command_line_option("-r", 50); - unsigned numStreams = stk::unit_test_util::simple_fields::get_command_line_option("-s", 10); - unsigned numFields = stk::unit_test_util::simple_fields::get_command_line_option("-f", 10); - unsigned numBlocks = stk::unit_test_util::simple_fields::get_command_line_option("-b", 1); - unsigned numElemsPerDim = stk::unit_test_util::simple_fields::get_command_line_option("-e", 100); - unsigned sleepTime = stk::unit_test_util::simple_fields::get_command_line_option("-m", 50); - unsigned waitIteration = stk::unit_test_util::simple_fields::get_command_line_option("-p", 20); + unsigned NUM_ITERS = stk::unit_test_util::get_command_line_option("-r", 50); + unsigned numStreams = stk::unit_test_util::get_command_line_option("-s", 10); + unsigned numFields = stk::unit_test_util::get_command_line_option("-f", 10); + unsigned numBlocks = stk::unit_test_util::get_command_line_option("-b", 1); + unsigned numElemsPerDim = stk::unit_test_util::get_command_line_option("-e", 100); + unsigned sleepTime = stk::unit_test_util::get_command_line_option("-m", 50); + unsigned waitIteration = stk::unit_test_util::get_command_line_option("-p", 20); stk::unit_test_util::BatchTimer batchTimer(MPI_COMM_WORLD); stk::unit_test_util::BatchTimer batchTimer2(MPI_COMM_WORLD); stk::unit_test_util::BatchTimer batchTimer3(MPI_COMM_WORLD); diff --git a/packages/stk/stk_performance_tests/stk_mesh/NgpFieldUpdate.cpp b/packages/stk/stk_performance_tests/stk_mesh/NgpFieldUpdate.cpp index a2fb15096ef4..f4485d0a65f3 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/NgpFieldUpdate.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/NgpFieldUpdate.cpp @@ -51,10 +51,10 @@ #include #include -class NgpFieldSyncTest : public stk::unit_test_util::simple_fields::MeshFixture +class NgpFieldSyncTest : public stk::unit_test_util::MeshFixture { public: - NgpFieldSyncTest() : stk::unit_test_util::simple_fields::MeshFixture() + NgpFieldSyncTest() : stk::unit_test_util::MeshFixture() {} void setup_mesh_with_many_blocks_many_elements(unsigned numBlocks, unsigned numElemPerDim) @@ -124,11 +124,11 @@ class NgpFieldSyncTest : public stk::unit_test_util::simple_fields::MeshFixture } }; -class NgpFieldUpdateFixture : public stk::unit_test_util::simple_fields::MeshFixture +class NgpFieldUpdateFixture : public stk::unit_test_util::MeshFixture { public: NgpFieldUpdateFixture() - : stk::unit_test_util::simple_fields::MeshFixture(), + : stk::unit_test_util::MeshFixture(), tensorField(nullptr), vectorField(nullptr), tensorFieldSizePerElem(72), @@ -181,7 +181,7 @@ class NgpFieldUpdateFixture : public stk::unit_test_util::simple_fields::MeshFix stk::mesh::FieldBase* field = &get_meta().declare_field(stk::topology::ELEMENT_RANK, "FieldA"); stk::mesh::put_field_on_mesh(*field, get_meta().universal_part(), &init); - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } void setup_mesh_with_fields(const std::string &meshSpecification) @@ -406,15 +406,15 @@ TEST_F(NgpFieldSyncTest, PartialSyncTiming) if(get_parallel_size() != 1) return; const unsigned NUM_RUNS = 5; - const unsigned NUM_ITERS = stk::unit_test_util::simple_fields::get_command_line_option("-r", 20000000); - unsigned numComponents = stk::unit_test_util::simple_fields::get_command_line_option("-c", 1); - unsigned numBlocks = stk::unit_test_util::simple_fields::get_command_line_option("-b", 20); - unsigned numBlocksToSync = stk::unit_test_util::simple_fields::get_command_line_option("-s", 5); - unsigned numElemPerDim = stk::unit_test_util::simple_fields::get_command_line_option("-e", 100); - unsigned tensorFieldSizePerElem = stk::unit_test_util::simple_fields::get_command_line_option("--tensorField", 72); - unsigned vectorFieldSizePerElem = stk::unit_test_util::simple_fields::get_command_line_option("-vectorField", 8); - bool justSyncAll = stk::unit_test_util::simple_fields::get_command_line_option("-a", false); - bool contiguousBlocks = stk::unit_test_util::simple_fields::get_command_line_option("-t", true); + const unsigned NUM_ITERS = stk::unit_test_util::get_command_line_option("-r", 20000000); + unsigned numComponents = stk::unit_test_util::get_command_line_option("-c", 1); + unsigned numBlocks = stk::unit_test_util::get_command_line_option("-b", 20); + unsigned numBlocksToSync = stk::unit_test_util::get_command_line_option("-s", 5); + unsigned numElemPerDim = stk::unit_test_util::get_command_line_option("-e", 100); + unsigned tensorFieldSizePerElem = stk::unit_test_util::get_command_line_option("--tensorField", 72); + unsigned vectorFieldSizePerElem = stk::unit_test_util::get_command_line_option("-vectorField", 8); + bool justSyncAll = stk::unit_test_util::get_command_line_option("-a", false); + bool contiguousBlocks = stk::unit_test_util::get_command_line_option("-t", true); numBlocksToSync = std::min(numBlocks, numBlocksToSync); stk::unit_test_util::BatchTimer batchTimer(MPI_COMM_WORLD); diff --git a/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp b/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp index c9f70608f477..42dbab9fc055 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp @@ -44,11 +44,11 @@ #include #include -class NgpMeshChangeElementPartMembership : public stk::unit_test_util::simple_fields::MeshFixture +class NgpMeshChangeElementPartMembership : public stk::unit_test_util::MeshFixture { public: NgpMeshChangeElementPartMembership() - : stk::unit_test_util::simple_fields::MeshFixture(), + : stk::unit_test_util::MeshFixture(), newPartName("block2") { } @@ -97,11 +97,11 @@ class NgpMeshChangeElementPartMembership : public stk::unit_test_util::simple_fi unsigned numElements; }; -class NgpMeshCreateEntity : public stk::unit_test_util::simple_fields::MeshFixture +class NgpMeshCreateEntity : public stk::unit_test_util::MeshFixture { public: NgpMeshCreateEntity() - : stk::unit_test_util::simple_fields::MeshFixture(), + : stk::unit_test_util::MeshFixture(), numElements(1000000) { } @@ -127,11 +127,11 @@ class NgpMeshCreateEntity : public stk::unit_test_util::simple_fields::MeshFixtu int numElements; }; -class NgpMeshGhosting : public stk::unit_test_util::simple_fields::MeshFixture +class NgpMeshGhosting : public stk::unit_test_util::MeshFixture { public: NgpMeshGhosting() - : stk::unit_test_util::simple_fields::MeshFixture(), + : stk::unit_test_util::MeshFixture(), ghostingName("testGhosting") { } @@ -272,7 +272,7 @@ TEST_F( NgpMeshGhosting, Timing ) { if (get_parallel_size() != 2) return; - std::string perfCheck = stk::unit_test_util::simple_fields::get_option("-perf_check", "PERF_CHECK"); + std::string perfCheck = stk::unit_test_util::get_option("-perf_check", "PERF_CHECK"); #ifdef NDEBUG const int NUM_INNER_ITERS = (perfCheck=="NO_PERF_CHECK" ? 1 : 100); #else diff --git a/packages/stk/stk_performance_tests/stk_mesh/NodalFieldPerf.cpp b/packages/stk/stk_performance_tests/stk_mesh/NodalFieldPerf.cpp index e067078e2590..7c2589b6df23 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/NodalFieldPerf.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/NodalFieldPerf.cpp @@ -57,7 +57,7 @@ namespace double initial_value[3] = {-1, 2, -0.3}; -class NgpFieldAccessPerformance : public stk::unit_test_util::simple_fields::MeshFixture +class NgpFieldAccessPerformance : public stk::unit_test_util::MeshFixture { public: using DoubleVecField = stk::mesh::Field; @@ -85,7 +85,6 @@ class NgpFieldAccessPerformance : public stk::unit_test_util::simple_fields::Mes if(nullptr == metaData) { metaData = builder.create_meta_data(); - metaData->use_simple_fields(); } if(nullptr == bulkData) { @@ -249,7 +248,7 @@ TEST_F(NgpFieldAccessPerformance, pureHost_vectorSum_DefaultFieldDataManager) batchTimer.initialize_batch_timer(); setup_empty_mesh_with_field_data_manager(stk::mesh::BulkData::NO_AUTO_AURA, std::move(fieldDataManager)); createNodalVectorFields(); - stk::io::fill_mesh(stk::unit_test_util::simple_fields::get_mesh_spec(numElemsPerDim), *bulkData); + stk::io::fill_mesh(stk::unit_test_util::get_mesh_spec(numElemsPerDim), *bulkData); const unsigned NUM_RUNS = 5; const unsigned NUM_ITERS = 1000; @@ -274,7 +273,7 @@ TEST_F(NgpFieldAccessPerformance, host_vectorSum_DefaultFieldDataManager) batchTimer.initialize_batch_timer(); setup_empty_mesh_with_field_data_manager(stk::mesh::BulkData::NO_AUTO_AURA, std::move(fieldDataManager)); createNodalVectorFields(); - stk::io::fill_mesh(stk::unit_test_util::simple_fields::get_mesh_spec(numElemsPerDim), *bulkData); + stk::io::fill_mesh(stk::unit_test_util::get_mesh_spec(numElemsPerDim), *bulkData); const unsigned NUM_RUNS = 5; const unsigned NUM_ITERS = 1000; @@ -291,9 +290,8 @@ TEST_F(NgpFieldAccessPerformance, host_vectorSum_DefaultFieldDataManager) void fill_mesh(stk::mesh::BulkData& bulk, unsigned numElemsPerDim) { stk::io::StkMeshIoBroker stkIo(MPI_COMM_WORLD); - stkIo.use_simple_fields(); stkIo.set_bulk_data(bulk); - stkIo.add_mesh_database(stk::unit_test_util::simple_fields::get_mesh_spec(numElemsPerDim), stk::io::READ_MESH); + stkIo.add_mesh_database(stk::unit_test_util::get_mesh_spec(numElemsPerDim), stk::io::READ_MESH); stkIo.create_input_mesh(); const bool delayFieldDataAllocation = true; stkIo.populate_mesh(delayFieldDataAllocation); diff --git a/packages/stk/stk_performance_tests/stk_mesh/ParallelSum.cpp b/packages/stk/stk_performance_tests/stk_mesh/ParallelSum.cpp index d848699e998f..3539d763c041 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/ParallelSum.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/ParallelSum.cpp @@ -84,7 +84,6 @@ void do_stk_test(bool with_ghosts=false) std::shared_ptr bulkPtr = builder.create(); stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); stk::mesh::BulkData& bulk = *bulkPtr; - meta.use_simple_fields(); if (parallel_rank == 0) { std::cerr << "Mesh: " << oss.str() << std::endl; diff --git a/packages/stk/stk_performance_tests/stk_mesh/Selector.cpp b/packages/stk/stk_performance_tests/stk_mesh/Selector.cpp index 80a72b51a69e..07909726a861 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/Selector.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/Selector.cpp @@ -49,7 +49,7 @@ namespace { -using stk::mesh::fixtures::simple_fields::VariableSelectorFixture; +using stk::mesh::fixtures::VariableSelectorFixture; } @@ -105,7 +105,7 @@ TEST(Verify, selectorAlgorithmicComplexity) // and the complexity is not meaningful. // - stk::mesh::fixtures::simple_fields::SelectorFixture fix; + stk::mesh::fixtures::SelectorFixture fix; fix.m_meta_data.commit(); fix.m_bulk_data.modification_begin(); fix.generate_mesh(); diff --git a/packages/stk/stk_performance_tests/stk_mesh/SkinningLargeCube.cpp b/packages/stk/stk_performance_tests/stk_mesh/SkinningLargeCube.cpp index 48a35c5ff949..300a5de9b34e 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/SkinningLargeCube.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/SkinningLargeCube.cpp @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -384,7 +385,7 @@ TEST( skinning_large_cube, skinning_large_cube) //create the mesh start_time = stk::wall_time(); - stk::mesh::fixtures::simple_fields::HexFixture fixture(pm,NX,NY,NZ); + stk::mesh::fixtures::HexFixture fixture(pm,NX,NY,NZ); const EntityRank element_rank = stk::topology::ELEMENT_RANK; const EntityRank side_rank = fixture.m_meta.side_rank(); @@ -552,7 +553,7 @@ double run_skinning_large_cube_test(bool createEdges, unsigned numRuns, std::vec double skinningTime = 0.0; for (unsigned testRun = 0; testRun < numRuns; ++testRun) { - stk::mesh::fixtures::simple_fields::HexFixture fixture(pm,NX,NY,NZ); + stk::mesh::fixtures::HexFixture fixture(pm,NX,NY,NZ); stk::mesh::MetaData & fem_meta = fixture.m_meta; diff --git a/packages/stk/stk_performance_tests/stk_mesh/TetSTKfaces.cpp b/packages/stk/stk_performance_tests/stk_mesh/TetSTKfaces.cpp index 9a44f45c53a0..5dad3b413b8d 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/TetSTKfaces.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/TetSTKfaces.cpp @@ -74,7 +74,7 @@ TEST(tet_faces, tet_faces) double start_time = stk::cpu_time(); - stk::mesh::fixtures::simple_fields::TetFixture fixture( MPI_COMM_WORLD, mesh_dims[0], mesh_dims[1], mesh_dims[2]); + stk::mesh::fixtures::TetFixture fixture( MPI_COMM_WORLD, mesh_dims[0], mesh_dims[1], mesh_dims[2]); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -140,7 +140,7 @@ TEST(tet_faces, minimal_tet_faces) double start_time = stk::cpu_time(); - stk::mesh::fixtures::simple_fields::TetFixture fixture( MPI_COMM_WORLD, mesh_dims[0], mesh_dims[1], mesh_dims[2]); + stk::mesh::fixtures::TetFixture fixture( MPI_COMM_WORLD, mesh_dims[0], mesh_dims[1], mesh_dims[2]); fixture.m_meta.commit(); fixture.generate_mesh(); diff --git a/packages/stk/stk_performance_tests/stk_mesh/entity_sorting/entitySorting.cpp b/packages/stk/stk_performance_tests/stk_mesh/entity_sorting/entitySorting.cpp index b79ee36ca37b..778f7e0cfb4a 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/entity_sorting/entitySorting.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/entity_sorting/entitySorting.cpp @@ -3,7 +3,7 @@ #include #include #include // for BucketRepository -#include +#include #include // for BulkDataTester #include #include @@ -11,11 +11,11 @@ namespace { -class EntitySortingPerformance : public stk::unit_test_util::simple_fields::PerformanceTester +class EntitySortingPerformance : public stk::unit_test_util::PerformanceTester { public: EntitySortingPerformance(stk::unit_test_util::BulkDataTester &bulk) - : stk::unit_test_util::simple_fields::PerformanceTester(bulk.parallel()), + : stk::unit_test_util::PerformanceTester(bulk.parallel()), bulkData(bulk) { } @@ -58,7 +58,6 @@ class SortEntitiesCustomLess : public ::testing::Test TEST_F(SortEntitiesCustomLess, test_entity_sorting_performance) { stk::mesh::MetaData meta; - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester bulk(meta, MPI_COMM_WORLD); stk::io::fill_mesh("generated:100x100x100",bulk); run_entity_sort_performance_test(bulk); diff --git a/packages/stk/stk_performance_tests/stk_mesh/perfCommNeighbors.cpp b/packages/stk/stk_performance_tests/stk_mesh/perfCommNeighbors.cpp index 4fbda23626d4..a52e3ad2df1e 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/perfCommNeighbors.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/perfCommNeighbors.cpp @@ -127,7 +127,7 @@ void fill_small_mesh_with_big_ids(stk::mesh::BulkData& bulkData) bulkData.modification_end(); } -class StkPerfComm : public stk::unit_test_util::simple_fields::MeshFixture +class StkPerfComm : public stk::unit_test_util::MeshFixture { typedef stk::mesh::Field ScalarField; typedef stk::mesh::Field VectorField; @@ -408,7 +408,7 @@ class StkPerfComm : public stk::unit_test_util::simple_fields::MeshFixture if (get_bulk().parallel_rank()==0) { print_mesh_stats("Before rebalance: ",stats); } - // stk::unit_test_util::simple_fields::write_mesh_using_stk_io("before_rebal.e",get_bulk(), get_bulk().parallel()); + // stk::unit_test_util::write_mesh_using_stk_io("before_rebal.e",get_bulk(), get_bulk().parallel()); rebalance(get_bulk()); @@ -416,7 +416,7 @@ class StkPerfComm : public stk::unit_test_util::simple_fields::MeshFixture if (get_bulk().parallel_rank()==0) { print_mesh_stats("After rebalance: ",stats); } - // stk::unit_test_util::simple_fields::write_mesh_using_stk_io("after_rebal.e",get_bulk(), get_bulk().parallel()); + // stk::unit_test_util::write_mesh_using_stk_io("after_rebal.e",get_bulk(), get_bulk().parallel()); } void generate_and_rebalance_small_mesh_with_big_ids() @@ -433,7 +433,7 @@ class StkPerfComm : public stk::unit_test_util::simple_fields::MeshFixture if (get_bulk().parallel_rank()==0) { print_mesh_stats("Before rebalance: ",stats); } - // stk::unit_test_util::simple_fields::write_mesh_using_stk_io("before_rebal.e",get_bulk(), get_bulk().parallel()); + // stk::unit_test_util::write_mesh_using_stk_io("before_rebal.e",get_bulk(), get_bulk().parallel()); rebalance(get_bulk()); @@ -441,12 +441,12 @@ class StkPerfComm : public stk::unit_test_util::simple_fields::MeshFixture if (get_bulk().parallel_rank()==0) { print_mesh_stats("After rebalance: ",stats); } - // stk::unit_test_util::simple_fields::write_mesh_using_stk_io("after_rebal.e",get_bulk(), get_bulk().parallel()); + // stk::unit_test_util::write_mesh_using_stk_io("after_rebal.e",get_bulk(), get_bulk().parallel()); } std::string get_mesh_spec() { - return stk::unit_test_util::simple_fields::get_option("-mesh", "NO_MESH_SPECIFIED"); + return stk::unit_test_util::get_option("-mesh", "NO_MESH_SPECIFIED"); } ScalarField& nodeFieldScalar; diff --git a/packages/stk/stk_performance_tests/stk_mesh/perfCreateFaces.cpp b/packages/stk/stk_performance_tests/stk_mesh/perfCreateFaces.cpp index 93af0e5e896c..beadedec9c23 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/perfCreateFaces.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/perfCreateFaces.cpp @@ -18,11 +18,11 @@ size_t get_num_global_faces(const stk::mesh::BulkData &bulk) } //============================================================================== -class CreateFacesClassicPerformance : public stk::unit_test_util::simple_fields::PerformanceTester +class CreateFacesClassicPerformance : public stk::unit_test_util::PerformanceTester { public: CreateFacesClassicPerformance(stk::mesh::BulkData &bulk) - : stk::unit_test_util::simple_fields::PerformanceTester(bulk.parallel()), + : stk::unit_test_util::PerformanceTester(bulk.parallel()), bulkData(bulk), locallyOwnedElements(bulk.mesh_meta_data().locally_owned_part()) { @@ -44,11 +44,11 @@ class CreateFacesClassicPerformance : public stk::unit_test_util::simple_fields: }; //============================================================================== -class CreateFacesPerformance : public stk::unit_test_util::simple_fields::PerformanceTester +class CreateFacesPerformance : public stk::unit_test_util::PerformanceTester { public: CreateFacesPerformance(stk::mesh::BulkData &bulk) : - stk::unit_test_util::simple_fields::PerformanceTester(bulk.parallel()), + stk::unit_test_util::PerformanceTester(bulk.parallel()), bulkData(bulk), locallyOwnedElements(bulk.mesh_meta_data().locally_owned_part()) { @@ -70,7 +70,7 @@ class CreateFacesPerformance : public stk::unit_test_util::simple_fields::Perfor }; //============================================================================== -class CreateFacesClassicPerformanceTest : public stk::unit_test_util::simple_fields::MeshFixture +class CreateFacesClassicPerformanceTest : public stk::unit_test_util::MeshFixture { protected: void run_create_faces_perf_test() @@ -81,12 +81,12 @@ class CreateFacesClassicPerformanceTest : public stk::unit_test_util::simple_fie std::string get_mesh_spec() { - return stk::unit_test_util::simple_fields::get_option("-file", "NO_FILE_SPECIFIED"); + return stk::unit_test_util::get_option("-file", "NO_FILE_SPECIFIED"); } }; //============================================================================== -class CreateFacesPerformanceTest : public stk::unit_test_util::simple_fields::MeshFixture +class CreateFacesPerformanceTest : public stk::unit_test_util::MeshFixture { protected: void run_create_faces_perf_test() @@ -97,7 +97,7 @@ class CreateFacesPerformanceTest : public stk::unit_test_util::simple_fields::Me std::string get_mesh_spec() { - return stk::unit_test_util::simple_fields::get_option("-file", "NO_FILE_SPECIFIED"); + return stk::unit_test_util::get_option("-file", "NO_FILE_SPECIFIED"); } }; @@ -112,7 +112,7 @@ TEST_F(CreateFacesClassicPerformanceTest, read_mesh) TEST_F(CreateFacesPerformanceTest, read_mesh_with_auto_decomp) { allocate_bulk(stk::mesh::BulkData::AUTO_AURA); - stk::unit_test_util::simple_fields::read_from_serial_file_and_decompose(get_mesh_spec(), get_bulk(), "rcb"); + stk::unit_test_util::read_from_serial_file_and_decompose(get_mesh_spec(), get_bulk(), "rcb"); run_create_faces_perf_test(); } diff --git a/packages/stk/stk_performance_tests/stk_mesh/perfDeleteElementTopology.cpp b/packages/stk/stk_performance_tests/stk_mesh/perfDeleteElementTopology.cpp index ec328cf137eb..d0648d715cbd 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/perfDeleteElementTopology.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/perfDeleteElementTopology.cpp @@ -10,7 +10,7 @@ namespace { -class DestroyElementTopologyPerformanceTest : public stk::unit_test_util::simple_fields::MeshFixture +class DestroyElementTopologyPerformanceTest : public stk::unit_test_util::MeshFixture { protected: DestroyElementTopologyPerformanceTest() @@ -41,11 +41,11 @@ class DestroyElementTopologyPerformanceTest : public stk::unit_test_util::simple }; -class DestroyElementTopologyPerformance : public stk::unit_test_util::simple_fields::PerformanceTester +class DestroyElementTopologyPerformance : public stk::unit_test_util::PerformanceTester { public: DestroyElementTopologyPerformance(stk::mesh::BulkData &bulk) - : stk::unit_test_util::simple_fields::PerformanceTester(bulk.parallel()), + : stk::unit_test_util::PerformanceTester(bulk.parallel()), bulkData(bulk) { } protected: @@ -64,11 +64,11 @@ TEST_F(DestroyElementTopologyPerformanceTest, DestroyElementTopology) } -class DestroyAllElementsIndividuallyPerformance : public stk::unit_test_util::simple_fields::PerformanceTester +class DestroyAllElementsIndividuallyPerformance : public stk::unit_test_util::PerformanceTester { public: DestroyAllElementsIndividuallyPerformance(stk::mesh::BulkData &bulk) - : stk::unit_test_util::simple_fields::PerformanceTester(bulk.parallel()), + : stk::unit_test_util::PerformanceTester(bulk.parallel()), bulkData(bulk) { } protected: diff --git a/packages/stk/stk_performance_tests/stk_mesh/perfElemGraph.cpp b/packages/stk/stk_performance_tests/stk_mesh/perfElemGraph.cpp index 1121cc69accd..967791952bfa 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/perfElemGraph.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/perfElemGraph.cpp @@ -1,5 +1,6 @@ #include "gtest/gtest.h" // for AssertHelper, EXPECT_EQ, etc #include // for BulkData +#include #include // for count_selected_entities #include #include @@ -76,7 +77,6 @@ class AnimateMeshDeletion m_statusField(statusField), m_animationFileName(animationFileName) { - m_stkIo.use_simple_fields(); initialize_status_field(); create_output_mesh(); } @@ -132,7 +132,7 @@ class AnimateMeshDeletion void declare_animation_field(stk::mesh::MetaData &metaData, std::string &animationFile) { - animationFile = stk::unit_test_util::simple_fields::get_option("-animationFile", ""); + animationFile = stk::unit_test_util::get_option("-animationFile", ""); if("" != animationFile) { @@ -388,11 +388,11 @@ void get_perforated_mesh_ids(const stk::mesh::BulkData &bulkData, const stk::mes print_delete_info(bulkData, meshInfo); } -class ElementGraphPerformance : public stk::unit_test_util::simple_fields::PerformanceTester +class ElementGraphPerformance : public stk::unit_test_util::PerformanceTester { public: ElementGraphPerformance(stk::mesh::BulkData &bulk, const std::string &fileSpec) - : stk::unit_test_util::simple_fields::PerformanceTester(bulk.parallel()), + : stk::unit_test_util::PerformanceTester(bulk.parallel()), bulkData(bulk), meshSpec(fileSpec) { @@ -508,13 +508,7 @@ class PerforatedElementGraphPerformance : public ElementGraphPerformance for(stk::mesh::Entity elem : elems) { - unsigned numSides = bulkData.num_sides(elem); - const stk::mesh::Entity* sides = bulkData.begin(elem, meta.side_rank()); - const stk::mesh::ConnectivityOrdinal* side_ords = bulkData.begin_ordinals(elem, meta.side_rank()); - for(unsigned i=0; i // for BulkData +#include #include // for count_selected_entities #include // for skin_mesh #include @@ -16,11 +17,11 @@ size_t get_num_global_faces(const stk::mesh::BulkData &bulk) return meshCounts[stk::topology::FACE_RANK]; } -class ExposedBlockBoundaryPerformance : public stk::unit_test_util::simple_fields::PerformanceTester +class ExposedBlockBoundaryPerformance : public stk::unit_test_util::PerformanceTester { public: ExposedBlockBoundaryPerformance(stk::mesh::BulkData &bulk) - : stk::unit_test_util::simple_fields::PerformanceTester(bulk.parallel()), + : stk::unit_test_util::PerformanceTester(bulk.parallel()), bulkData(bulk), thingToSkin(bulk.mesh_meta_data().universal_part()), skinPart(bulk.mesh_meta_data().declare_part("skinPart")) @@ -37,13 +38,7 @@ class ExposedBlockBoundaryPerformance : public stk::unit_test_util::simple_field for(stk::mesh::Entity elem : elems) { - unsigned numSides = bulkData.num_sides(elem); - const stk::mesh::Entity* sides = bulkData.begin(elem, meta.side_rank()); - const stk::mesh::ConnectivityOrdinal* side_ords = bulkData.begin_ordinals(elem, meta.side_rank()); - for(unsigned i=0; i // for BulkData +#include #include // for count_selected_entities #include // for skin_mesh #include @@ -16,11 +17,11 @@ size_t get_num_global_faces(const stk::mesh::BulkData &bulk) return meshCounts[stk::topology::FACE_RANK]; } -class SkinMeshPerformance : public stk::unit_test_util::simple_fields::PerformanceTester +class SkinMeshPerformance : public stk::unit_test_util::PerformanceTester { public: SkinMeshPerformance(stk::mesh::BulkData &bulk) - : stk::unit_test_util::simple_fields::PerformanceTester(bulk.parallel()), + : stk::unit_test_util::PerformanceTester(bulk.parallel()), bulkData(bulk), thingToSkin(bulk.mesh_meta_data().universal_part()), skinPart(bulk.mesh_meta_data().declare_part("skinPart")) @@ -36,13 +37,7 @@ class SkinMeshPerformance : public stk::unit_test_util::simple_fields::Performan for(stk::mesh::Entity elem : elems) { - unsigned numSides = bulkData.num_sides(elem); - const stk::mesh::Entity* sides = bulkData.begin(elem, meta.side_rank()); - const stk::mesh::ConnectivityOrdinal* side_ords = bulkData.begin_ordinals(elem, meta.side_rank()); - for(unsigned i=0; i; -class StressEntityKeyMapping : public stk::unit_test_util::simple_fields::MeshFixture +class StressEntityKeyMapping : public stk::unit_test_util::MeshFixture { public: StressEntityKeyMapping() diff --git a/packages/stk/stk_performance_tests/stk_middle_mesh/perfMiddleMeshConstruction.cpp b/packages/stk/stk_performance_tests/stk_middle_mesh/perfMiddleMeshConstruction.cpp index 82c383f0f6ef..45c619c27d0f 100644 --- a/packages/stk/stk_performance_tests/stk_middle_mesh/perfMiddleMeshConstruction.cpp +++ b/packages/stk/stk_performance_tests/stk_middle_mesh/perfMiddleMeshConstruction.cpp @@ -42,8 +42,8 @@ TEST(MiddleMesh, StkMeshCreator) { stk::ParallelMachine commWorld = MPI_COMM_WORLD; - std::string meshFileName = stk::unit_test_util::simple_fields::get_command_line_option("--mesh", "generated:2x1000x1000|sideset:x"); - std::string surfacePartName = stk::unit_test_util::simple_fields::get_command_line_option("--surface", "surface_1"); + std::string meshFileName = stk::unit_test_util::get_command_line_option("--mesh", "generated:2x1000x1000|sideset:x"); + std::string surfacePartName = stk::unit_test_util::get_command_line_option("--surface", "surface_1"); const unsigned NUM_RUNS = 5; const int NUM_ITERS = 1; diff --git a/packages/stk/stk_performance_tests/stk_middle_mesh/perfMiddleMeshEntityOps.cpp b/packages/stk/stk_performance_tests/stk_middle_mesh/perfMiddleMeshEntityOps.cpp index 01498e322077..853c0b630550 100644 --- a/packages/stk/stk_performance_tests/stk_middle_mesh/perfMiddleMeshEntityOps.cpp +++ b/packages/stk/stk_performance_tests/stk_middle_mesh/perfMiddleMeshEntityOps.cpp @@ -53,14 +53,12 @@ TEST(MiddleMeshOps, GetAndDeleteDown) if (stk::middle_mesh::utils::impl::comm_size(comm) > 1) GTEST_SKIP(); - const int NUM_RUNS = 2000; + const int NUM_RUNS = 40; const int NUM_ITERS = 1; - stk::middle_mesh::mesh::impl::MeshSpec spec{100, 100, 0, 1, 0, 1}; + stk::middle_mesh::mesh::impl::MeshSpec spec{1000, 1000, 0, 1, 0, 1}; auto func = [&](stk::middle_mesh::utils::Point const& pt) { return pt; }; - //auto mesh = stk::middle_mesh::mesh::impl::create_mesh(spec, func); - std::cout << "finished creating mesh" << std::endl; stk::unit_test_util::BatchTimer batchTimer(comm); batchTimer.initialize_batch_timer(); for (unsigned i = 0; i < NUM_RUNS; ++i) { @@ -101,10 +99,10 @@ TEST(MiddleMeshOps, GetAndDeleteUp) if (stk::middle_mesh::utils::impl::comm_size(comm) > 1) GTEST_SKIP(); - const int NUM_RUNS = 2000; + const int NUM_RUNS = 40; const int NUM_ITERS = 1; - stk::middle_mesh::mesh::impl::MeshSpec spec{100, 100, 0, 1, 0, 1}; + stk::middle_mesh::mesh::impl::MeshSpec spec{1000, 1000, 0, 1, 0, 1}; auto func = [&](stk::middle_mesh::utils::Point const& pt) { return pt; }; stk::unit_test_util::BatchTimer batchTimer(comm); diff --git a/packages/stk/stk_performance_tests/stk_middle_mesh/perfMiddleMeshQualityImprover.cpp b/packages/stk/stk_performance_tests/stk_middle_mesh/perfMiddleMeshQualityImprover.cpp index fb048cd6a8cc..c91fe1198ad1 100644 --- a/packages/stk/stk_performance_tests/stk_middle_mesh/perfMiddleMeshQualityImprover.cpp +++ b/packages/stk/stk_performance_tests/stk_middle_mesh/perfMiddleMeshQualityImprover.cpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,7 +30,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #include #include @@ -92,15 +92,15 @@ TEST(MiddleMeshQualityImprover, IncrementalBoundarySnapperAnnulusRotation) { stk::ParallelMachine comm = MPI_COMM_WORLD; - const int NUM_RUNS = 300; + const int NUM_RUNS = 3; const int NUM_ITERS = 1; stk::unit_test_util::BatchTimer batchTimer(comm); batchTimer.initialize_batch_timer(); for (unsigned run = 0; run < NUM_RUNS; ++run) { std::cout << "run = " << run << std::endl; - auto mesh1 = make_annulus_mesh(10, 10, 0.5, 1.5, 0); - auto mesh2 = make_annulus_mesh(13, 13, 0.5, 1.5, pi/32); + auto mesh1 = make_annulus_mesh(35, 35, 0.5, 1.5, 0); + auto mesh2 = make_annulus_mesh(71, 71, 0.5, 1.5, pi/32); batchTimer.start_batch_timer(); auto bsnapper = stk::middle_mesh::mesh::impl::make_incremental_boundary_snapper(mesh1, mesh2, comm); @@ -121,8 +121,8 @@ TEST(MiddleMeshQualityImprover, IncrementalBoundarySnapperLargeElemCount) batchTimer.initialize_batch_timer(); for (unsigned run = 0; run < NUM_RUNS; ++run) { - stk::middle_mesh::mesh::impl::MeshSpec spec{100, 100, 0, 1, 0, 1}; - stk::middle_mesh::mesh::impl::MeshSpec spec2{10, 10, 0, 1, 0, 1}; + stk::middle_mesh::mesh::impl::MeshSpec spec{300, 300, 0, 1, 0, 1}; + stk::middle_mesh::mesh::impl::MeshSpec spec2{30, 30, 0, 1, 0, 1}; auto func = [&](stk::middle_mesh::utils::Point const& pt) { return pt; }; auto mesh1 = create_mesh(spec, func); diff --git a/packages/stk/stk_performance_tests/stk_search/SurfaceToSurface.cpp b/packages/stk/stk_performance_tests/stk_search/SurfaceToSurface.cpp index 6fde8e7bd742..84b4ef702f81 100644 --- a/packages/stk/stk_performance_tests/stk_search/SurfaceToSurface.cpp +++ b/packages/stk/stk_performance_tests/stk_search/SurfaceToSurface.cpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,7 +30,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #include @@ -115,10 +115,10 @@ void run_imported_surface_to_surface_test(const std::string& boxFileBaseName, BoxVectorType toolBoxes = read_boxes_from_file(boxFileBaseName + ".txt_tool", comm); for (unsigned run = 0; run < NUM_RUNS; ++run) { - SearchResults searchResults; batchTimer.start_batch_timer(); for (int i = 0; i < numIterations; ++i) { + SearchResults searchResults; stk::search::coarse_search(diceBoxes, toolBoxes, searchMethod, comm, searchResults, enforceSearchResultSymmetry); } batchTimer.stop_batch_timer(); @@ -147,7 +147,7 @@ void run_imported_surface_to_surface_test_with_views(const std::string& boxFileB BoxVectorType toolBoxesVector = read_boxes_from_file(boxFileBaseName + ".txt_tool", comm); Kokkos::View diceBoxes("diceBoxes", diceBoxesVector.size()); - Kokkos::View toolBoxes("diceBoxes", toolBoxesVector.size()); + Kokkos::View toolBoxes("toolBoxes", toolBoxesVector.size()); auto diceBoxesHost = Kokkos::create_mirror_view(diceBoxes); auto toolBoxesHost = Kokkos::create_mirror_view(toolBoxes); @@ -167,10 +167,9 @@ void run_imported_surface_to_surface_test_with_views(const std::string& boxFileB Kokkos::deep_copy(toolBoxes, toolBoxesHost); for (unsigned run = 0; run < NUM_RUNS; ++run) { - Kokkos::View searchResults; - batchTimer.start_batch_timer(); for (int i = 0; i < numIterations; ++i) { + Kokkos::View searchResults; stk::search::coarse_search(diceBoxes, toolBoxes, searchMethod, comm, searchResults, ExecSpace{}, enforceSearchResultSymmetry); } @@ -484,6 +483,7 @@ TEST(StkSearch_SurfaceToSurface, tractorTrailerCrash_floatBox_with_views_ARBORX) const int numIterations = 20; run_imported_surface_to_surface_test_with_views(boxFileBaseName, numIterations, stk::search::ARBORX); } + template BoxVectorType read_local_boxes_from_file_and_number(const std::string& baseName) { @@ -502,6 +502,33 @@ BoxVectorType read_local_boxes_from_file_and_number(const std::string& baseName) iss >> box; } + unsigned long id = 0; + for (auto & boxIdent : boxVector) { + boxIdent.second = id; + id++; + } + + return boxVector; +} + +template +BoxVectorType read_local_boxes_from_file_and_number(const std::string& baseName, MPI_Comm comm) +{ + const std::string fileName = get_parallel_filename(baseName, comm); + std::ifstream infile(fileName); + STK_ThrowRequireMsg(infile.good(), "Unable to open file " + fileName); + + BoxVectorType boxVector; + std::string line; + while (std::getline(infile, line)) { + if (line.size() == 0) continue; + if (line[0] == '#') continue; + std::istringstream iss(line); + boxVector.emplace_back(); + auto & [box, ident] = boxVector.back(); + iss >> box; + } + unsigned long id = 0; for (auto & boxIdent : boxVector) { boxIdent.second = id; @@ -510,6 +537,7 @@ BoxVectorType read_local_boxes_from_file_and_number(const std::string& baseName) return boxVector; } + template void run_imported_surface_to_surface_test_local(const std::string& boxFileBaseName, const int numIterations, @@ -524,10 +552,10 @@ void run_imported_surface_to_surface_test_local(const std::string& boxFileBaseNa BoxVectorType toolBoxes = read_local_boxes_from_file_and_number(boxFileBaseName + ".txt_tool"); for (unsigned run = 0; run < NUM_RUNS; ++run) { - LocalSearchResults searchResults; batchTimer.start_batch_timer(); for (int i = 0; i < numIterations; ++i) { + LocalSearchResults searchResults; stk::search::local_coarse_search(diceBoxes, toolBoxes, searchMethod, searchResults); } batchTimer.stop_batch_timer(); @@ -554,6 +582,57 @@ void run_imported_surface_to_surface_test_local_with_views(const std::string& bo BoxVectorType diceBoxesVector = read_local_boxes_from_file_and_number(boxFileBaseName + ".txt_dice"); BoxVectorType toolBoxesVector = read_local_boxes_from_file_and_number(boxFileBaseName + ".txt_tool"); + Kokkos::View diceBoxes("diceBoxes", diceBoxesVector.size()); + Kokkos::View toolBoxes("toolBoxes", toolBoxesVector.size()); + auto diceBoxesHost = Kokkos::create_mirror_view(diceBoxes); + auto toolBoxesHost = Kokkos::create_mirror_view(toolBoxes); + + for (unsigned i = 0; i < diceBoxesVector.size(); i++) { + auto boxIdentPair = diceBoxesVector[i]; + BoxIdentType domainBoxIdent{boxIdentPair.first, boxIdentPair.second}; + diceBoxesHost(i) = domainBoxIdent; + } + + for (unsigned i = 0; i < toolBoxesVector.size(); i++) { + auto boxIdentPair = toolBoxesVector[i]; + BoxIdentType rangeBoxIdent{boxIdentPair.first, boxIdentPair.second}; + toolBoxesHost(i) = rangeBoxIdent; + } + + Kokkos::deep_copy(diceBoxes, diceBoxesHost); + Kokkos::deep_copy(toolBoxes, toolBoxesHost); + + for (unsigned run = 0; run < NUM_RUNS; ++run) { + + batchTimer.start_batch_timer(); + for (int i = 0; i < numIterations; ++i) { + Kokkos::View searchResults; + stk::search::local_coarse_search(diceBoxes, toolBoxes, searchMethod, searchResults, ExecSpace{}); + } + batchTimer.stop_batch_timer(); + } + + batchTimer.print_batch_timing(numIterations); +} + +template +void run_imported_surface_to_surface_test_pll_local_with_views(const std::string& boxFileBaseName, + const int numIterations, + stk::search::SearchMethod searchMethod) +{ + using BoxType = typename BoxIdentType::box_type; + using IdentType = typename BoxIdentType::second_type; + using BoxVectorType = typename std::vector>; + using ExecSpace = Kokkos::DefaultExecutionSpace; + + MPI_Comm comm = MPI_COMM_WORLD; + const unsigned NUM_RUNS = 5; + stk::unit_test_util::BatchTimer batchTimer(comm); + batchTimer.initialize_batch_timer(); + + BoxVectorType diceBoxesVector = read_local_boxes_from_file_and_number(boxFileBaseName + ".txt_dice", comm); + BoxVectorType toolBoxesVector = read_local_boxes_from_file_and_number(boxFileBaseName + ".txt_tool", comm); + Kokkos::View diceBoxes("diceBoxes", diceBoxesVector.size()); Kokkos::View toolBoxes("diceBoxes", toolBoxesVector.size()); auto diceBoxesHost = Kokkos::create_mirror_view(diceBoxes); @@ -575,10 +654,10 @@ void run_imported_surface_to_surface_test_local_with_views(const std::string& bo Kokkos::deep_copy(toolBoxes, toolBoxesHost); for (unsigned run = 0; run < NUM_RUNS; ++run) { - Kokkos::View searchResults; batchTimer.start_batch_timer(); for (int i = 0; i < numIterations; ++i) { + Kokkos::View searchResults; stk::search::local_coarse_search(diceBoxes, toolBoxes, searchMethod, searchResults, ExecSpace{}); } batchTimer.stop_batch_timer(); @@ -587,6 +666,105 @@ void run_imported_surface_to_surface_test_local_with_views(const std::string& bo batchTimer.print_batch_timing(numIterations); } +using MemSpace = stk::ngp::ExecSpace::memory_space; + +template +void run_imported_surface_to_surface_test_local_with_views_rawArborX(const std::string& boxFileBaseName, + const int numIterations, + stk::search::SearchMethod searchMethod) +{ + using BoxType = typename BoxIdentType::box_type; + using IdentType = typename BoxIdentType::second_type; + using BoxVectorType = typename std::vector>; + using ExecSpace = Kokkos::DefaultExecutionSpace; + + MPI_Comm comm = MPI_COMM_WORLD; + const unsigned NUM_RUNS = 5; + stk::unit_test_util::BatchTimer batchTimer(comm); + batchTimer.initialize_batch_timer(); + + BoxVectorType diceBoxesVector = read_local_boxes_from_file_and_number(boxFileBaseName + ".txt_dice", comm); + BoxVectorType toolBoxesVector = read_local_boxes_from_file_and_number(boxFileBaseName + ".txt_tool", comm); + + Kokkos::View diceBoxes("diceBoxes", diceBoxesVector.size()); + Kokkos::View toolBoxes("diceBoxes", toolBoxesVector.size()); + auto diceBoxesHost = Kokkos::create_mirror_view(diceBoxes); + auto toolBoxesHost = Kokkos::create_mirror_view(toolBoxes); + + for (unsigned i = 0; i < diceBoxesVector.size(); i++) { + auto boxIdentPair = diceBoxesVector[i]; + auto stkMinCorner = boxIdentPair.first.min_corner(); + auto stkMaxCorner = boxIdentPair.first.max_corner(); + ArborX::Point min_point(stkMinCorner[0], stkMinCorner[1], stkMinCorner[2]); + ArborX::Point max_point(stkMaxCorner[0], stkMaxCorner[1], stkMaxCorner[2]); + diceBoxesHost(i) = {min_point, max_point}; + } + + for (unsigned i = 0; i < toolBoxesVector.size(); i++) { + auto boxIdentPair = toolBoxesVector[i]; + auto stkMinCorner = boxIdentPair.first.min_corner(); + auto stkMaxCorner = boxIdentPair.first.max_corner(); + ArborX::Point min_point(stkMinCorner[0], stkMinCorner[1], stkMinCorner[2]); + ArborX::Point max_point(stkMaxCorner[0], stkMaxCorner[1], stkMaxCorner[2]); + toolBoxesHost(i) = {min_point, max_point}; + } + + Kokkos::deep_copy(diceBoxes, diceBoxesHost); + Kokkos::deep_copy(toolBoxes, toolBoxesHost); + + for (unsigned run = 0; run < NUM_RUNS; ++run) { + batchTimer.start_batch_timer(); + ExecSpace execSpace{}; + for (int i = 0; i < numIterations; ++i) { + Kokkos::Profiling::pushRegion("Raw ArborX"); + Kokkos::View indices("ArborX::indices", 0); + Kokkos::View offsets("ArborX::offsets", 0); + + ArborX::BVH bvh{execSpace, toolBoxes}; + const int numQueries = diceBoxes.extent(0); + Kokkos::View *, MemSpace> queries(Kokkos::ViewAllocateWithoutInitializing("queries"), numQueries); + + Kokkos::parallel_for("setup_queries", Kokkos::RangePolicy(0, numQueries), + KOKKOS_LAMBDA(int i) { queries(i) = ArborX::intersects(diceBoxes(i)); }); + Kokkos::fence(); + bvh.query(execSpace, queries, indices, offsets); + Kokkos::Profiling::popRegion(); + } + batchTimer.stop_batch_timer(); + } + + batchTimer.print_batch_timing(numIterations); +} + +TEST(StkSearch_SurfaceToSurface, a001_intent_strong_link_floatBox_local_with_views_rawARBORX) +{ + std::string boxFileBaseName = stk::unit_test_util::get_option("-m", "none-specified"); + if (boxFileBaseName == "none-specified") GTEST_SKIP(); + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 40) GTEST_SKIP(); + + const int numIterations = 20; + run_imported_surface_to_surface_test_local_with_views_rawArborX(boxFileBaseName, numIterations, stk::search::ARBORX); +} + +TEST(StkSearch_SurfaceToSurface, a001_intent_strong_link_floatBox_local_with_views_ARBORX) +{ + std::string boxFileBaseName = stk::unit_test_util::get_option("-m", "none-specified"); + if (boxFileBaseName == "none-specified") GTEST_SKIP(); + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 40) GTEST_SKIP(); + + const int numIterations = 20; + run_imported_surface_to_surface_test_pll_local_with_views(boxFileBaseName, numIterations, stk::search::ARBORX); +} + +TEST(StkSearch_SurfaceToSurface, a001_intent_strong_link_floatBox_local_with_views_MORTON_LBVH) +{ + std::string boxFileBaseName = stk::unit_test_util::get_option("-m", "none-specified"); + if (boxFileBaseName == "none-specified") GTEST_SKIP(); + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 40) GTEST_SKIP(); + + const int numIterations = 20; + run_imported_surface_to_surface_test_pll_local_with_views(boxFileBaseName, numIterations, stk::search::MORTON_LBVH); +} TEST(StkSearch_SurfaceToSurface, ecsl_floatBox_local_KDTREE) { diff --git a/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp b/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp index 7a801b523c8d..3cc7847388fd 100644 --- a/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp +++ b/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,7 +30,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #include @@ -54,8 +54,6 @@ void run_volume_to_one_test(const std::string& meshFileName, stk::search::SearchMethod searchMethod, bool enforceSearchResultSymmetry = true) { - using BoxType = typename BoxVectorType::value_type::first_type; - MPI_Comm comm = MPI_COMM_WORLD; const unsigned NUM_RUNS = 5; const unsigned NUM_ITERS = 1000; @@ -82,10 +80,9 @@ void run_volume_to_one_test(const std::string& meshFileName, stk::search::add_to_box(supersetBox, box); } - SearchResults searchResults; - batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { + SearchResults searchResults; stk::search::coarse_search(elemBoxes, supersetBoxVec, searchMethod, comm, searchResults, enforceSearchResultSymmetry); } batchTimer.stop_batch_timer(); @@ -125,10 +122,10 @@ void run_volume_to_one_test_with_views(const std::string& meshFileName, stk::search::add_to_box(supersetBoxes(0).box, elemBoxes(i).box); } - Kokkos::View searchResults; batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { + Kokkos::View searchResults; stk::search::coarse_search(elemBoxes, supersetBoxes, searchMethod, comm, searchResults, ExecSpace{}, enforceSearchResultSymmetry); } @@ -167,8 +164,6 @@ template void run_volume_to_one_test_local(const std::string& meshFileName, stk::search::SearchMethod searchMethod) { - using BoxType = typename BoxVectorType::value_type::first_type; - MPI_Comm comm = MPI_COMM_WORLD; const unsigned NUM_RUNS = 5; const unsigned NUM_ITERS = 1000; @@ -186,16 +181,15 @@ void run_volume_to_one_test_local(const std::string& meshFileName, createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK, elemBoxes); BoxVectorType supersetBoxVec { elemBoxes[0] }; - auto & [supersetBox, ident] = supersetBoxVec[0]; + auto & [supersetBox, supersetIdent] = supersetBoxVec[0]; for (const auto & [box, ident] : elemBoxes) { stk::search::add_to_box(supersetBox, box); } - LocalSearchResults searchResults; - batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { + LocalSearchResults searchResults; stk::search::local_coarse_search(elemBoxes, supersetBoxVec, searchMethod, searchResults); } batchTimer.stop_batch_timer(); @@ -233,10 +227,9 @@ void run_volume_to_one_test_local_with_views(const std::string& meshFileName, stk::search::add_to_box(supersetBoxes(0).box, elemBoxes(i).box); } - Kokkos::View searchResults; - batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { + Kokkos::View searchResults; stk::search::local_coarse_search(elemBoxes, supersetBoxes, searchMethod, searchResults, ExecSpace{}); } batchTimer.stop_batch_timer(); @@ -245,6 +238,46 @@ void run_volume_to_one_test_local_with_views(const std::string& meshFileName, batchTimer.print_batch_timing(NUM_ITERS); } +template +void run_one_to_volume_test_local_with_views(const std::string& meshFileName, + stk::search::SearchMethod searchMethod) +{ + + using ExecSpace = Kokkos::DefaultExecutionSpace; + + MPI_Comm comm = MPI_COMM_WORLD; + const unsigned NUM_RUNS = 5; + const unsigned NUM_ITERS = 1000; + stk::unit_test_util::BatchTimer batchTimer(comm); + batchTimer.initialize_batch_timer(); + + for (unsigned run = 0; run < NUM_RUNS; ++run) { + + stk::mesh::MeshBuilder builder(comm); + std::shared_ptr bulkPtr = builder.create(); + + stk::io::fill_mesh_with_auto_decomp(meshFileName, *bulkPtr); + + Kokkos::View elemBoxes = createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); + + Kokkos::View supersetBoxes("Range Boxes", 1); + supersetBoxes(0) = {elemBoxes[0].box, stk::parallel_machine_rank(comm)}; + + for (unsigned i = 0; i != elemBoxes.extent(0); ++i) { + stk::search::add_to_box(supersetBoxes(0).box, elemBoxes(i).box); + } + + batchTimer.start_batch_timer(); + for (unsigned i = 0; i < NUM_ITERS; ++i) { + Kokkos::View searchResults; + stk::search::local_coarse_search(supersetBoxes, elemBoxes, searchMethod, searchResults, ExecSpace{}); + } + batchTimer.stop_batch_timer(); + } + + batchTimer.print_batch_timing(NUM_ITERS); +} + TEST(StkSearch_VolumeToOne, generatedMesh_floatBox_local_KDTREE) { run_volume_to_one_test_local("generated:40x80x20|sideset:xXyYzZ", stk::search::KDTREE); @@ -269,5 +302,16 @@ TEST(StkSearch_VolumeToOne, generatedMesh_floatBox_local_with_views_ARBORX) { run_volume_to_one_test_local_with_views("generated:40x80x20|sideset:xXyYzZ", stk::search::ARBORX); } + +TEST(StkSearch_OneToVolume, generatedMesh_floatBox_local_with_views_ARBORX) +{ + run_one_to_volume_test_local_with_views("generated:40x80x20|sideset:xXyYzZ", stk::search::ARBORX); +} + +TEST(StkSearch_OneToVolume, generatedMesh_floatBox_local_with_views_MORTON_LBVH) +{ + run_one_to_volume_test_local_with_views("generated:40x80x20|sideset:xXyYzZ", stk::search::MORTON_LBVH); +} + } // namespace diff --git a/packages/stk/stk_performance_tests/stk_search/VolumeToSurface.cpp b/packages/stk/stk_performance_tests/stk_search/VolumeToSurface.cpp index 16c820018756..afe29d623127 100644 --- a/packages/stk/stk_performance_tests/stk_search/VolumeToSurface.cpp +++ b/packages/stk/stk_performance_tests/stk_search/VolumeToSurface.cpp @@ -71,10 +71,9 @@ void run_volume_to_surface_test(const std::string& meshFileName, BoxVectorType sideBoxes; createBoundingBoxesForEntities(*bulkPtr, stk::topology::FACE_RANK, sideBoxes); - SearchResults searchResults; - batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { + SearchResults searchResults; stk::search::coarse_search(elemBoxes, sideBoxes, searchMethod, comm, searchResults, enforceSearchResultSymmetry); } batchTimer.stop_batch_timer(); @@ -109,10 +108,9 @@ void run_volume_to_surface_test_with_views(const std::string& meshFileName, Kokkos::View sideBoxes = createBoundingBoxesForEntities(*bulkPtr, stk::topology::FACE_RANK); - Kokkos::View searchResults; - batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { + Kokkos::View searchResults; stk::search::coarse_search(elemBoxes, sideBoxes, searchMethod, comm, searchResults, ExecSpace{}, enforceSearchResultSymmetry); } @@ -211,10 +209,9 @@ void run_volume_to_surface_test_local(const std::string& meshFileName, BoxVectorType sideBoxes; createBoundingBoxesForEntities(*bulkPtr, stk::topology::FACE_RANK, sideBoxes); - LocalSearchResults searchResults; - batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { + LocalSearchResults searchResults; stk::search::local_coarse_search(elemBoxes, sideBoxes, searchMethod, searchResults); } batchTimer.stop_batch_timer(); @@ -248,10 +245,9 @@ void run_volume_to_surface_test_local_with_views(const std::string& meshFileName Kokkos::View sideBoxes = createBoundingBoxesForEntities(*bulkPtr, stk::topology::FACE_RANK); - Kokkos::View searchResults; - batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { + Kokkos::View searchResults; stk::search::local_coarse_search(elemBoxes, sideBoxes, searchMethod, searchResults, ExecSpace{}); } batchTimer.stop_batch_timer(); @@ -429,11 +425,10 @@ void distributed_arborx_coarse_search(Kokkos::View elem Kokkos::View sideBoxes, MPI_Comm comm) { - std::cerr << "start of distributed raw ArborX test" << std::endl; + ExecSpace execSpace; ArborX::DistributedTree tree(comm, execSpace, elemBoxes); - std::cerr << "after DistributedTree construction" << std::endl; const int numQueries = sideBoxes.extent(0); Kokkos::View *, MemSpace> queries(Kokkos::ViewAllocateWithoutInitializing("queries"), numQueries); @@ -441,25 +436,22 @@ void distributed_arborx_coarse_search(Kokkos::View elem Kokkos::parallel_for("setup_queries", Kokkos::RangePolicy(0, numQueries), KOKKOS_LAMBDA(int i) { queries(i) = ArborX::intersects(sideBoxes(i)); }); Kokkos::fence(); - std::cerr << "after queries population" << std::endl; Kokkos::View values("indicesAndRanks", 0); Kokkos::View offsets("offsets", 0); - - std::cerr << "before tree query" << std::endl; + tree.query(execSpace, queries, values, offsets); - std::cerr << "after tree query" << std::endl; } void run_search_test_distributed_arborx(const std::string& meshFileName) { const unsigned NUM_RUNS = 5; - const unsigned NUM_ITERS = 5; + const unsigned NUM_ITERS = 100; stk::unit_test_util::BatchTimer batchTimer(MPI_COMM_WORLD); batchTimer.initialize_batch_timer(); for (unsigned j = 0; j < NUM_RUNS; j++) { - std::cerr << "batch timer iteration " << j << std::endl; + stk::mesh::MeshBuilder builder(MPI_COMM_WORLD); std::shared_ptr bulkPtr = builder.create(); @@ -470,7 +462,6 @@ void run_search_test_distributed_arborx(const std::string& meshFileName) batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { - std::cerr << "inner iteration " << i << std::endl; distributed_arborx_coarse_search(elemBoxes, sideBoxes, MPI_COMM_WORLD); } batchTimer.stop_batch_timer(); diff --git a/packages/stk/stk_performance_tests/stk_search/VolumeToVolume.cpp b/packages/stk/stk_performance_tests/stk_search/VolumeToVolume.cpp index cbf78bcaa67e..2fb8cdd45d1f 100644 --- a/packages/stk/stk_performance_tests/stk_search/VolumeToVolume.cpp +++ b/packages/stk/stk_performance_tests/stk_search/VolumeToVolume.cpp @@ -102,10 +102,9 @@ void run_volume_to_volume_test_with_views(const std::string& meshFileName, Kokkos::View elemBoxes = createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); - Kokkos::View searchResults; - batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { + Kokkos::View searchResults; stk::search::coarse_search(elemBoxes, elemBoxes, searchMethod, comm, searchResults, ExecSpace{}, enforceSearchResultSymmetry); } @@ -185,10 +184,9 @@ void run_volume_to_volume_test_local(const std::string& meshFileName, BoxVectorType elemBoxes; createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK, elemBoxes); - LocalSearchResults searchResults; - batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { + LocalSearchResults searchResults; stk::search::local_coarse_search(elemBoxes, elemBoxes, searchMethod, searchResults); } batchTimer.stop_batch_timer(); @@ -219,10 +217,10 @@ void run_volume_to_volume_test_local_with_views(const std::string& meshFileName, Kokkos::View elemBoxes = createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); - Kokkos::View searchResults; batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { + Kokkos::View searchResults; stk::search::local_coarse_search(elemBoxes, elemBoxes, searchMethod, searchResults, ExecSpace{}); } batchTimer.stop_batch_timer(); diff --git a/packages/stk/stk_performance_tests/stk_util/perfParallelExchange.cpp b/packages/stk/stk_performance_tests/stk_util/perfParallelExchange.cpp index b0c47eec71b3..a76a7d0c4413 100644 --- a/packages/stk/stk_performance_tests/stk_util/perfParallelExchange.cpp +++ b/packages/stk/stk_performance_tests/stk_util/perfParallelExchange.cpp @@ -107,10 +107,10 @@ TEST(PerfPllDataExchange, nonsym_knownsizes) if (numProcs < 3) { GTEST_SKIP(); } const unsigned NUM_BATCHES = 5; - const unsigned NUM_RUNS = stk::unit_test_util::simple_fields::get_command_line_option("-r", 5); + const unsigned NUM_RUNS = stk::unit_test_util::get_command_line_option("-r", 5); const unsigned defaultBandwidth = std::max(2, numProcs/5); - const unsigned BANDWIDTH = stk::unit_test_util::simple_fields::get_command_line_option("-b", defaultBandwidth); - const unsigned NUM_VALUES = stk::unit_test_util::simple_fields::get_command_line_option("-v", 20); + const unsigned BANDWIDTH = stk::unit_test_util::get_command_line_option("-b", defaultBandwidth); + const unsigned NUM_VALUES = stk::unit_test_util::get_command_line_option("-v", 20); const int myProc = stk::parallel_machine_rank(comm); diff --git a/packages/stk/stk_search/Jamfile b/packages/stk/stk_search/Jamfile index 88d9adb1dbe3..195b136d715d 100644 --- a/packages/stk/stk_search/Jamfile +++ b/packages/stk/stk_search/Jamfile @@ -44,7 +44,8 @@ project votd : requirements $(sierra-warnings) $(stk_search-root-inc) - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM : usage-requirements $(stk_search-root-inc) : build-dir $(stk_search-builddir) diff --git a/packages/stk/stk_search/stk_search/BoxIdent.hpp b/packages/stk/stk_search/stk_search/BoxIdent.hpp index 7db8fd016798..d3252f74f028 100644 --- a/packages/stk/stk_search/stk_search/BoxIdent.hpp +++ b/packages/stk/stk_search/stk_search/BoxIdent.hpp @@ -35,6 +35,7 @@ #ifndef BOXIDENT_HPP #define BOXIDENT_HPP +#include "Kokkos_Core.hpp" namespace stk::search { template @@ -80,6 +81,7 @@ struct IdentIntersection return domainIdent < rhs.domainIdent || (!(rhs.domainIdent < domainIdent) && rangeIdent < rhs.rangeIdent); } + }; template @@ -98,6 +100,7 @@ struct IdentProcIntersection return domainIdentProc < rhs.domainIdentProc || (!(rhs.domainIdentProc < domainIdentProc) && rangeIdentProc < rhs.rangeIdentProc); } + }; } diff --git a/packages/stk/stk_search/stk_search/CMakeLists.txt b/packages/stk/stk_search/stk_search/CMakeLists.txt index 45b6b7b9d45b..4a64bd432acc 100644 --- a/packages/stk/stk_search/stk_search/CMakeLists.txt +++ b/packages/stk/stk_search/stk_search/CMakeLists.txt @@ -38,7 +38,8 @@ FILE(GLOB HEADERS_KDTREE ${CMAKE_CURRENT_SOURCE_DIR}/kdtree/*.hpp) FILE(GLOB HEADERS_MORTON ${CMAKE_CURRENT_SOURCE_DIR}/morton_lbvh/*.hpp) - +FILE(GLOB HEADERS_ARBORX + ${CMAKE_CURRENT_SOURCE_DIR}/arborx/*.hpp) FILE(GLOB HEADERS *.hpp) FILE(GLOB SOURCES_KDTREE @@ -61,6 +62,8 @@ else() if(STK_HAS_ARBORX) target_link_libraries(stk_search PUBLIC ArborX::ArborX) + INSTALL(FILES ${HEADERS_ARBORX} DESTINATION + ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_search/arborx) endif() # find_package(KokkosKernels REQUIRED) @@ -80,5 +83,5 @@ INSTALL(FILES ${HEADERS_MORTON} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_search/morton_lbvh) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_search DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_search EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() diff --git a/packages/stk/stk_search/stk_search/CoarseSearch.hpp b/packages/stk/stk_search/stk_search/CoarseSearch.hpp index 1b3dba2b55b8..4bae8287d1af 100644 --- a/packages/stk/stk_search/stk_search/CoarseSearch.hpp +++ b/packages/stk/stk_search/stk_search/CoarseSearch.hpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,7 +30,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #ifndef stk_search_CoarseSearch_hpp #define stk_search_CoarseSearch_hpp @@ -47,6 +47,7 @@ #include #include +#include #include #include #include @@ -75,7 +76,6 @@ namespace stk::search { // potentially swapping the domain and range vectors. The final results should // be independent of this flag. // -//BEGINcoarse_search_impl template void coarse_search(std::vector> const & domain, std::vector> const & range, @@ -112,18 +112,20 @@ void coarse_search(std::vector> co } } } -//ENDcoarse_search_impl -template -void coarse_search(Kokkos::View*, ExecutionSpace> const & domain, - Kokkos::View*, ExecutionSpace> const & range, +template +void coarse_search(DomainView const & domain, + RangeView const & range, SearchMethod method, stk::ParallelMachine comm, - Kokkos::View*, ExecutionSpace>& intersections, + ResultView& intersections, ExecutionSpace const& execSpace = ExecutionSpace{}, bool enforceSearchResultSymmetry = true, bool autoSwapDomainAndRange = true) { + check_coarse_search_types_parallel(); + Kokkos::Profiling::pushRegion("STK coarse search with Views"); + switch (method) { case ARBORX: { #ifdef STK_HAS_ARBORX @@ -145,6 +147,8 @@ void coarse_search(Kokkos::View STK_ThrowErrorMsg("Unsupported coarse_search method supplied. Choices are: KDTREE, MORTON_LBVH, or ARBORX."); } } + + Kokkos::Profiling::popRegion(); } } // namespace stk::search diff --git a/packages/stk/stk_search/stk_search/CommonSearchUtil.hpp b/packages/stk/stk_search/stk_search/CommonSearchUtil.hpp index eee1434aaece..836a088b0853 100644 --- a/packages/stk/stk_search/stk_search/CommonSearchUtil.hpp +++ b/packages/stk/stk_search/stk_search/CommonSearchUtil.hpp @@ -46,6 +46,7 @@ #include "stk_search/BoxIdent.hpp" #include "stk_search/kdtree/KDTree_BoundingBox.hpp" #include "stk_search/kdtree/KDTree.hpp" +#include "DeviceMPIUtils.hpp" namespace stk::search { @@ -119,6 +120,24 @@ inline void all_gather_helper(const DomainBox& localBox, std::vector MPI_Allgather(localData, sizeof(DomainBox), MPI_CHAR, global_box_array.data(), sizeof(DomainBox), MPI_CHAR, comm); } +template +void all_gather_helper(const DomainBox& localBox, Kokkos::View global_box_array, + MPI_Comm comm) +{ + int commSize = stk::parallel_machine_size(comm); + if (static_cast(global_box_array.extent(0)) < commSize) + { + Kokkos::resize(global_box_array, commSize); + } + auto global_box_array_host = Kokkos::create_mirror_view(global_box_array); + + impl::check_view_c_layout(global_box_array_host); + + MPI_Allgather(&localBox, sizeof(DomainBox), MPI_CHAR, + global_box_array_host.data(), sizeof(DomainBox), MPI_CHAR, comm); + Kokkos::deep_copy(global_box_array, global_box_array_host); +} + template inline void concatenate_thread_lists(const std::vector> &vectorIn, std::vector &vectorOut) @@ -318,6 +337,100 @@ bool constexpr is_stk_point = std::is_same_v> || std::is_base_of_v, T>; } +template +class SearchResultCommunication +{ + using ValueType = typename SearchResultsType::value_type; + using DeviceBufferAppender = impl::DeviceMPIBufferAppender; + using DeviceBuffers = impl::DeviceMPIBuffers; + + public: + struct FillBuffer {}; + struct UnpackBuffer {}; + + + //Note: unlike communicate_views(), this assumes search_relations is + // exactly the size it needs to be (no unused entries at the end) + SearchResultCommunication(stk::ParallelMachine comm, + SearchResultsType searchResults, + ExecutionSpace execSpace, + bool enforceSearchResultSymmetry = true) : + m_comm(comm), + m_searchResults(searchResults), + m_execSpace(execSpace), + m_enforceSearchResultSymmetry(enforceSearchResultSymmetry), + m_bufferAppender(stk::parallel_machine_size(comm), execSpace), + m_commRank(stk::parallel_machine_rank(comm)), + m_numLocalResults(searchResults.size()) + {} + + SearchResultsType run() + { + Kokkos::Profiling::pushRegion("Filling MPI Buffers"); + Kokkos::RangePolicy packPolicy(m_execSpace, 0, m_searchResults.extent(0)); + Kokkos::parallel_for("mpi_buffer_sizing", packPolicy, *this); + m_execSpace.fence(); + + m_bufferAppender.allocate_buffers(); + + Kokkos::parallel_for("mpi_buffer_fill", packPolicy, *this); + m_execSpace.fence(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("Parallel Communication"); + impl::DeviceDataExchangeUnknownPattern exchanger(m_bufferAppender.getBuffers(), m_execSpace, m_comm); + m_recvBuffers = exchanger.communicate(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("Unpacking buffers"); + Kokkos::resize(Kokkos::WithoutInitializing, m_searchResults, m_numLocalResults + m_recvBuffers.buffers.extent(0)); + Kokkos::RangePolicy unpackPolicy(m_execSpace, 0, m_recvBuffers.buffers.extent(0)); + Kokkos::parallel_for("mpi_buffer_unpack", unpackPolicy, *this); + m_execSpace.fence(); + Kokkos::Profiling::popRegion(); + + return m_searchResults; + } + + KOKKOS_INLINE_FUNCTION + void operator()(FillBuffer /*tag*/, int idx) const + { + ValueType searchResult = m_searchResults(idx); + int domainProc = static_cast(searchResult.domainIdentProc.proc()); + + if (domainProc != m_commRank) { + m_bufferAppender.push_back(domainProc, searchResult); + } + + if (m_enforceSearchResultSymmetry) + { + int rangeProc = static_cast(searchResult.rangeIdentProc.proc()); + if (rangeProc != m_commRank && + rangeProc != domainProc) { + m_bufferAppender.push_back(rangeProc, searchResult); + } + } + } + + KOKKOS_INLINE_FUNCTION + void operator()(UnpackBuffer /*tag*/, int idx) const + { + m_searchResults(idx + m_numLocalResults) = m_recvBuffers.buffers(idx); + } + + + private: + stk::ParallelMachine m_comm; + SearchResultsType m_searchResults; + ExecutionSpace m_execSpace; + bool m_enforceSearchResultSymmetry; + + DeviceBufferAppender m_bufferAppender; + int m_commRank; + int m_numLocalResults; + DeviceBuffers m_recvBuffers; +}; + } // end namespace stk::search #endif diff --git a/packages/stk/stk_search/stk_search/DeviceMPIUtils.hpp b/packages/stk/stk_search/stk_search/DeviceMPIUtils.hpp new file mode 100644 index 000000000000..ac7dbe77ea17 --- /dev/null +++ b/packages/stk/stk_search/stk_search/DeviceMPIUtils.hpp @@ -0,0 +1,338 @@ +#ifndef STK_SEARCH_DEVICE_MPI_UTILS_HPP +#define STK_SEARCH_DEVICE_MPI_UTILS_HPP + +#include +#include "Kokkos_Core.hpp" +#include "stk_util/parallel/DeviceAwareMPI.hpp" +#include "stk_util/util/ReportHandler.hpp" +#include "stk_util/parallel/MPITagManager.hpp" +#include "stk_util/parallel/ParallelComm.hpp" +#include "stk_util/parallel/DeviceAwareMPI.hpp" + +namespace stk::search { + +namespace impl { + +// throws an error if the view data layout is different than a C array (or std::vector). +// This can happen if Kokkos puts padding in, for example for memory alignment reasons. +// If no error is thrown, the view can be passed into MPI as a send or receive buffer. +template +void check_view_c_layout(ViewType view) +{ + static_assert(ViewType::rank() == 1); + if (view.extent(0) == 0) + { + return; + } + + size_t viewSizeBytes = (&(view(view.extent(0) - 1)) - &(view(0)) + 1) * sizeof(typename ViewType::value_type); + size_t cArraySize = sizeof(typename ViewType::value_type) * view.extent(0); + STK_ThrowRequireMsg(viewSizeBytes == cArraySize, "The view must not have any padding"); +} + +template +void check_buffer_sizes(const BufferSizesView bufferSizes, const BuffersView buffers) +{ + using ExecutionSpace = typename BufferSizesView::execution_space; + Kokkos::RangePolicy policy(0, bufferSizes.extent(0)); + auto func = KOKKOS_LAMBDA(int i, int& lsum) + { + lsum += bufferSizes(i); + }; + + int totalSize = 0; + Kokkos::parallel_reduce("check DeviceMPIBuffers size", policy, func, totalSize); + STK_ThrowRequireMsg(size_t(totalSize) == buffers.extent(0), "the buffer view must be the the total size of the buffers"); +} + + +template +struct DeviceMPIBuffers +{ + using BufferSizesView = Kokkos::View; + using BufferView = Kokkos::View; + using ValueType = T; + + DeviceMPIBuffers() : + bufferSizes("mpi_buffer_sizes", 0), + buffers("mpi_buffers", 0) + {} + + DeviceMPIBuffers(int numRanks) : + bufferSizes("mpi_buffer_sizes", numRanks), + buffers("mpi_buffers", 0) + {} + + DeviceMPIBuffers(BufferSizesView _bufferSizes, BufferView _buffers) : + bufferSizes(_bufferSizes), + buffers(_buffers) + { +#ifndef NDEBUG + check_buffer_sizes(bufferSizes, buffers); +#endif + } + + BufferSizesView bufferSizes; + BufferView buffers; +}; + +template +class DeviceMPIBufferAppender +{ + public: + using DeviceBuffers = DeviceMPIBuffers; + + DeviceMPIBufferAppender() = default; + + DeviceMPIBufferAppender(int numBuffers, ExecutionSpace execSpace) : + m_deviceBuffers(numBuffers), + m_bufferIdxs("mpi_buffer_current_idx", numBuffers), + m_execSpace(execSpace), + m_areBuffersSized("areBuffersSized") + { + Kokkos::deep_copy(m_areBuffersSized, false); + } + + KOKKOS_INLINE_FUNCTION + void push_back(int rank, const T& val) const + { + if (!(m_areBuffersSized())) + { + Kokkos::atomic_add(&m_deviceBuffers.bufferSizes(rank), 1); + } else + { + int idx = Kokkos::atomic_fetch_add(&m_bufferIdxs(rank), 1); +#ifndef NDEBUG + int offset = 0; + for (int i=0; i < rank; ++i) + { + offset += m_deviceBuffers.bufferSizes(i); + } + KOKKOS_ASSERT(idx >= offset && idx < (m_deviceBuffers.bufferSizes(rank) + offset)) +#endif + m_deviceBuffers.buffers(idx) = val; + } + } + + void allocate_buffers() + { + bool areBuffersSized; + Kokkos::deep_copy(areBuffersSized, m_areBuffersSized); + STK_ThrowRequireMsg(!areBuffersSized, "DeviceMPIBufferAppender can only be used once"); + + Kokkos::View totalSize("total_size"); + + Kokkos::RangePolicy policy(m_execSpace, 0, m_bufferIdxs.extent(0)); + int lastIdx = m_deviceBuffers.bufferSizes.extent(0) - 1; + auto deviceBuffersLocal = m_deviceBuffers; + auto bufferIdxsLocal = m_bufferIdxs; + auto func = KOKKOS_LAMBDA(int i, int& update, bool final) + { + if (final) + { + bufferIdxsLocal(i) = update; + if (i == lastIdx) + { + totalSize() = update + deviceBuffersLocal.bufferSizes(lastIdx); + } + } + + update += deviceBuffersLocal.bufferSizes(i); + }; + + Kokkos::parallel_scan("set_buffer_offsets", policy, func); + m_execSpace.fence(); + + int totalSizeHost; + Kokkos::deep_copy(totalSizeHost, totalSize); + Kokkos::resize(Kokkos::WithoutInitializing, m_deviceBuffers.buffers, totalSizeHost); + Kokkos::deep_copy(m_areBuffersSized, true); + } + + DeviceBuffers getBuffers() const { return m_deviceBuffers; } + + private: + DeviceBuffers m_deviceBuffers; + Kokkos::View m_bufferIdxs; + ExecutionSpace m_execSpace; + + Kokkos::View m_areBuffersSized; +}; + +template +class DeviceDataExchangeUnknownPattern +{ + public: + using DeviceBuffers = impl::DeviceMPIBuffers; + using HostMirrorSpace = typename DeviceBuffers::BufferSizesView::host_mirror_space; + + using BufferSizesHostView = typename DeviceBuffers::BufferSizesView::HostMirror; + using BufferHostView = typename DeviceBuffers::BufferView::HostMirror; + + // Note: running the ConeCrush and Jenga performance tests on Vortex showed device MPI + // was never faster than host MPI and sometimes ~5% slower. Setting useDeviceMPI + // to false for now, maybe things will be better on ATS-4 + explicit DeviceDataExchangeUnknownPattern(const DeviceBuffers& sendBuffers, ExecutionSpace execSpace, + MPI_Comm comm, bool useDeviceMPI=false) : + m_sendBuffers(sendBuffers), + m_recvBuffers(stk::parallel_machine_size(comm)), + m_sendBufferSizesHost(Kokkos::create_mirror_view_and_copy(HostMirrorSpace{}, sendBuffers.bufferSizes)), + m_sendBufferHost(), + m_comm(comm), + m_useDeviceMPI(useDeviceMPI) + { + if (useDeviceMPI && !stk::have_device_aware_mpi()) + { + STK_ThrowRequireMsg(false, "STK did detect device-aware MPI on this system"); + } + + if (!useDeviceMPI) + { + m_sendBufferHost = Kokkos::create_mirror_view_and_copy(HostMirrorSpace{}, sendBuffers.buffers); + } + } + + DeviceBuffers communicate() + { + int commSize = stk::parallel_machine_size(m_comm); + MPITag tag = get_mpi_tag_manager().get_tag(m_comm); + + Kokkos::Profiling::pushRegion("Compute Recv lists"); + std::vector recvSizes = getRecvCounts(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("Create recv buffers"); + createReceiveBuffers(recvSizes); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("Post sends and receives"); + std::vector sendRequests(commSize, MPI_REQUEST_NULL); + std::vector recvRequests(commSize, MPI_REQUEST_NULL); + + postReceives(recvSizes, recvRequests, tag); + postSends(sendRequests, tag); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("Wait for MPI completion"); + MPI_Waitall(commSize, recvRequests.data(), MPI_STATUSES_IGNORE); + if (!m_useDeviceMPI) + { + Kokkos::deep_copy(m_execSpace, m_recvBuffers.buffers, m_recvBufferHost); + } + MPI_Waitall(commSize, sendRequests.data(), MPI_STATUSES_IGNORE); + Kokkos::Profiling::popRegion(); + + return m_recvBuffers; + } + + private: + void createReceiveBuffers(const std::vector& recvSizes) + { + using BufferSizesVectorWrap = Kokkos::View>; + BufferSizesVectorWrap recvBufferSizesHost(Kokkos::view_wrap(recvSizes.data()), recvSizes.size()); + Kokkos::deep_copy(m_recvBuffers.bufferSizes, recvBufferSizesHost); + + int totalSize = std::accumulate(recvSizes.begin(), recvSizes.end(), int(0)); + Kokkos::resize(Kokkos::WithoutInitializing, m_recvBuffers.buffers, totalSize); + if (!m_useDeviceMPI) + { + m_recvBufferHost = Kokkos::create_mirror_view(m_recvBuffers.buffers); + } + } + + int postReceives(const std::vector& recvSizes, std::vector& recvRequests, stk::MPITag tag) + { + size_t recvBufOffset = 0; + int numRecvs = 0; + T* recvBufferData = m_useDeviceMPI ? m_recvBuffers.buffers.data() : m_recvBufferHost.data(); + for (int rank=0; rank < int(recvRequests.size()); ++rank) + { + if (recvSizes[rank] > 0) + { + MPI_Irecv(recvBufferData + recvBufOffset, recvSizes[rank]*sizeof(T), MPI_CHAR, rank, tag, m_comm, &recvRequests[rank]); + numRecvs++; + } + + recvBufOffset += recvSizes[rank]; + } + + return numRecvs; + } + + std::vector getRecvCounts() + { + auto tag = stk::get_mpi_tag_manager().get_tag(m_comm); + check_view_c_layout(m_sendBufferSizesHost); + + int numItems; + MPI_Reduce_scatter_block(&(m_sendBufferSizesHost(0)), &numItems, 1, MPI_INT, MPI_SUM, m_comm); + + int numProcs = m_sendBufferSizesHost.extent(0); + std::vector recvCounts(numProcs, 0); + std::vector sendReqs(numProcs, MPI_REQUEST_NULL); + + for (int proc=0; proc < numProcs; ++proc) + { + if (m_sendBufferSizesHost(proc) > 0) + { + MPI_Isend(&(m_sendBufferSizesHost(proc)), 1, MPI_INT, proc, tag, m_comm, &(sendReqs[proc])); + } + } + + //TODO: this doesn't need to be a separate step. We could do this + // as part of the receives, and then run a kernel to permute + // the data (or modify the API of DeviceBuffers to allow the data to be permuted) + int recvCount = 0; + while (recvCount != numItems) + { + MPI_Status status; + int itemCount = 0; + MPI_Recv(&itemCount, 1, MPI_INT, MPI_ANY_SOURCE, tag, m_comm, &status); + int sender = status.MPI_SOURCE; + + recvCounts[sender] = itemCount; + recvCount += itemCount; + } + + MPI_Waitall(numProcs, sendReqs.data(), MPI_STATUSES_IGNORE); + + return recvCounts; + } + + int postSends(std::vector& sendRequests, stk::MPITag tag) + { + size_t sendBufOffset = 0; + int numSends = 0; + const T* sendBufferData = m_useDeviceMPI ? m_sendBuffers.buffers.data() : m_sendBufferHost.data(); + for (int rank=0; rank < int(sendRequests.size()); ++rank) + { + if (m_sendBufferSizesHost(rank) > 0) + { + MPI_Isend(sendBufferData + sendBufOffset, m_sendBufferSizesHost(rank)*sizeof(T), MPI_CHAR, rank, tag, m_comm, &sendRequests[rank]); + numSends++; + } + + sendBufOffset += m_sendBufferSizesHost(rank); + } + + return numSends; + } + + + DeviceBuffers m_sendBuffers; + DeviceBuffers m_recvBuffers; + + BufferSizesHostView m_sendBufferSizesHost; + BufferHostView m_sendBufferHost; + + BufferHostView m_recvBufferHost; + + ExecutionSpace m_execSpace; + MPI_Comm m_comm; + bool m_useDeviceMPI; +}; + +} +} + +#endif diff --git a/packages/stk/stk_search/stk_search/FilterCoarseSearch.hpp b/packages/stk/stk_search/stk_search/FilterCoarseSearch.hpp index 926b5aaaf57a..7b1c6525f938 100644 --- a/packages/stk/stk_search/stk_search/FilterCoarseSearch.hpp +++ b/packages/stk/stk_search/stk_search/FilterCoarseSearch.hpp @@ -257,19 +257,11 @@ double get_distance_from_centroid(MESH& mesh, const typename MESH::EntityKey k, template void remove_non_local_range_entities(EntityProcRelationVec& rangeToDomain, int localProc) { - size_t keep = 0; - for(size_t i=0; i keep) { - rangeToDomain[keep] = rangeToDomain[i]; - } - ++keep; - } - } - if(keep(x.first.proc()) != localProc); }), + rangeToDomain.end()); } template diff --git a/packages/stk/stk_search/stk_search/HelperTraits.hpp b/packages/stk/stk_search/stk_search/HelperTraits.hpp new file mode 100644 index 000000000000..24674151321a --- /dev/null +++ b/packages/stk/stk_search/stk_search/HelperTraits.hpp @@ -0,0 +1,187 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef STK_SEARCH_HELPER_TRAITS_H +#define STK_SEARCH_HELPER_TRAITS_H + +#include +#include "BoxIdent.hpp" +#include "Kokkos_Core.hpp" +#include "stk_search/SearchMethod.hpp" + +#include + +namespace stk::search { + +template +struct is_box_ident : std::false_type +{}; + +template +struct is_box_ident> : std::true_type +{}; + +template +struct is_box_ident_proc : std::false_type +{}; + +template +struct is_box_ident_proc> : std::true_type +{}; + +template +struct is_ident_intersection : std::false_type +{}; + +template +struct is_ident_intersection> : std::true_type +{}; + +template +struct is_ident_proc_intersection : std::false_type +{}; + +template +struct is_ident_proc_intersection> : std::true_type +{}; + + +template +struct value_type_or_void +{ + using type = void; +}; + +template +struct value_type_or_void> +{ + using type = typename T::value_type; +}; + +template +using value_type_or_void_t = typename value_type_or_void>::type; + + +// replace with std::remove_cvref in c++20 +template +struct remove_cvref +{ + using type = std::remove_cv_t>; +}; + +template +using remove_cvref_t = typename remove_cvref::type; + +template +constexpr bool is_box_ident_v = is_box_ident>::value; + +template +constexpr bool is_box_ident_container_v = is_box_ident_v>>; + +template +constexpr bool is_box_ident_proc_v = is_box_ident_proc>::value; + +template +constexpr bool is_box_ident_proc_container_v = is_box_ident_proc_v>>; + +template +constexpr bool is_ident_intersection_v = is_ident_intersection>::value; + +template +constexpr bool is_ident_intersection_container_v = is_ident_intersection_v>; + +template +constexpr bool is_ident_proc_intersection_v = is_ident_proc_intersection>::value; + +template +constexpr bool is_ident_proc_intersection_container_v = is_ident_proc_intersection_v>; + +template +constexpr bool is_modifiable_v = !std::is_const_v>; + +template +constexpr bool is_modifiable_view_v = Kokkos::is_view_v> && + is_modifiable_v::value_type>; + +template +constexpr void check_view_is_usable_from() +{ + using ExecSpace = std::remove_cv_t; + using ViewType = std::remove_cv_t; + static_assert(Kokkos::is_execution_space_v, "type passed as ExecSpace is not an ExecutionSpace"); + static_assert(Kokkos::is_view_v, "type passed in as ViewType is not a view"); + static_assert(Kokkos::SpaceAccessibility::accessible, "ViewType is not accesible from ExecSpace"); +} + +template +constexpr void check_domain_or_range_view_types_local() +{ + check_view_is_usable_from(); + static_assert(DomainOrRangeView::rank() == 1, "View must be a rank 1 View"); + static_assert(is_box_ident_container_v, "View must be a View of BoxIdent"); +} + + +template +constexpr void check_coarse_search_types_local() +{ + check_domain_or_range_view_types_local(); + check_domain_or_range_view_types_local(); + check_view_is_usable_from(); + static_assert(ResultView::rank() == 1, "ResultView must be a rank 1 View"); + static_assert(is_ident_intersection_container_v, "ResultView must be a View of IdentIntersection"); + static_assert(is_modifiable_view_v, "ResultView must not be const (ie. View not View)"); +} + +template +constexpr void check_domain_or_range_view_parallel() +{ + check_view_is_usable_from(); + static_assert(DomainOrRangeView::rank() == 1, "View must be a rank 1 View"); + static_assert(is_box_ident_proc_container_v, "View must be a View of BoxIdentProc"); +} + +template +constexpr void check_coarse_search_types_parallel() +{ + check_domain_or_range_view_parallel(); + check_domain_or_range_view_parallel(); + check_view_is_usable_from(); + static_assert(ResultView::rank() == 1, "ResultView must be a rank 1 View"); + static_assert(is_ident_proc_intersection_container_v, "ResultView must be a View of IdentProcIntersection"); + static_assert(is_modifiable_view_v, "ResultView must not be const (ie. View not View)"); +} + +} + +#endif \ No newline at end of file diff --git a/packages/stk/stk_search/stk_search/LocalCoarseSearch.hpp b/packages/stk/stk_search/stk_search/LocalCoarseSearch.hpp index 8edd8d2c75f5..cbc0ba445782 100644 --- a/packages/stk/stk_search/stk_search/LocalCoarseSearch.hpp +++ b/packages/stk/stk_search/stk_search/LocalCoarseSearch.hpp @@ -79,15 +79,18 @@ void local_coarse_search( } -template + +template void local_coarse_search( - Kokkos::View*, ExecutionSpace> const & domain, - Kokkos::View*, ExecutionSpace> const & range, + DomainView const & domain, + RangeView const & range, SearchMethod method, - Kokkos::View*, ExecutionSpace> & intersections, + ResultView & intersections, ExecutionSpace const& execSpace = ExecutionSpace{}) + { + check_coarse_search_types_local(); + switch (method) { case ARBORX: { #ifdef STK_HAS_ARBORX diff --git a/packages/stk/stk_search/stk_search/arborx/CoarseSearchArborX.hpp b/packages/stk/stk_search/stk_search/arborx/CoarseSearchArborX.hpp index 8e5d2f537b04..e7ad3f271847 100644 --- a/packages/stk/stk_search/stk_search/arborx/CoarseSearchArborX.hpp +++ b/packages/stk/stk_search/stk_search/arborx/CoarseSearchArborX.hpp @@ -41,6 +41,7 @@ #include "Kokkos_StdAlgorithms.hpp" #include "stk_search/CommonSearchUtil.hpp" #include "stk_search/arborx/StkToArborX.hpp" +#include "stk_search/HelperTraits.hpp" #include "stk_util/util/ReportHandler.hpp" #include "stk_util/util/SortAndUnique.hpp" #include "stk_search/arborx/AccessTraits.hpp" @@ -241,25 +242,29 @@ inline void coarse_search_arborx(std::vector +template inline void coarse_search_arborx( - Kokkos::View*, ExecutionSpace> const& localDomain, - Kokkos::View*, ExecutionSpace> const& localRange, + DomainView const& localDomain, + RangeView const& localRange, MPI_Comm comm, - Kokkos::View*, ExecutionSpace>& searchResults, + ResultView& searchResults, ExecutionSpace const& execSpace = ExecutionSpace{}, bool enforceSearchResultSymmetry = true) { + check_coarse_search_types_parallel(); using HostSpace = Kokkos::DefaultHostExecutionSpace; using ExecSpace = ExecutionSpace; using MemSpace = typename ExecSpace::memory_space; - using DomainValueType = typename DomainBoxType::value_type; - using RangeValueType = typename RangeBoxType::value_type; - using ArborXDomainType = typename impl::StkToArborX::ArborXType; + using DomainBoxType = typename DomainView::value_type::box_type; + using DomainIdentProcType = typename DomainView::value_type::ident_proc_type; + using RangeBoxType = typename RangeView::value_type::box_type; + using RangeIdentProcType = typename RangeView::value_type::ident_proc_type; + using DomainValueType = typename DomainBoxType::value_type; + using RangeValueType = typename RangeBoxType::value_type; + using ArborXDomainType = typename impl::StkToArborX::ArborXType; STK_ThrowRequireMsg((std::is_same_v), "The domain and range boxes must have the same floating-point precision"); diff --git a/packages/stk/stk_search/stk_search/arborx/LocalCoarseSearchArborX.hpp b/packages/stk/stk_search/stk_search/arborx/LocalCoarseSearchArborX.hpp index 50e53c068a95..ba38362a6fab 100644 --- a/packages/stk/stk_search/stk_search/arborx/LocalCoarseSearchArborX.hpp +++ b/packages/stk/stk_search/stk_search/arborx/LocalCoarseSearchArborX.hpp @@ -42,6 +42,7 @@ #include "ArborX.hpp" #include "Kokkos_Core.hpp" #include "stk_search/BoxIdent.hpp" +#include "stk_search/HelperTraits.hpp" #include "stk_search/CommonSearchUtil.hpp" #include "stk_search/arborx/StkToArborX.hpp" #include "stk_util/util/ReportHandler.hpp" @@ -53,13 +54,10 @@ namespace stk::search namespace impl { -template +template class SearchResultsInserter { public: - static_assert(Kokkos::is_view_v); - static_assert(Kokkos::is_view_v); - using DomainBoxIdent = typename DomainViewType::value_type; using RangeBoxIdent = typename RangeViewType::value_type; using DomainBox = typename DomainBoxIdent::box_type; @@ -67,31 +65,62 @@ class SearchResultsInserter using RangeBox = typename RangeBoxIdent::box_type; using RangeIdent = typename RangeBoxIdent::ident_type; using ExecutionSpace = typename DomainViewType::execution_space; - using SearchResultsView = Kokkos::View*, ExecutionSpace>; using ArborXPredicateWithIndex = typename ArborX::AccessTraits, ArborX::PredicatesTag>::ArborXPredicateWithIndex; static bool constexpr isSphere = impl::is_stk_sphere || impl::is_stk_sphere; - SearchResultsInserter(DomainViewType localDomain, RangeViewType localRange, SearchResultsView searchResults) : + SearchResultsInserter(DomainViewType localDomain, RangeViewType localRange, + ResultView searchResults, bool swapRangeDomain) : m_localDomain(localDomain), m_localRange(localRange), m_searchResults(searchResults), - m_counter("counter"), - m_searchResultsSize(0) - {} + m_counter(Kokkos::ViewAllocateWithoutInitializing("counter")), + m_searchResultsSize(m_searchResults.extent(0)), + m_swapRangeDomain(swapRangeDomain) + { + check_domain_or_range_view_types_local(); + check_domain_or_range_view_types_local(); + static_assert(is_ident_intersection_container_v, "ResultView must be a View"); + + Kokkos::deep_copy(m_counter, 0); + + Kokkos::Profiling::pushRegion("SearchResultsInserter pre-allocation"); + if (m_searchResults.extent(0) == 0) { + unsigned initialCapacity = std::max(localDomain.extent(0), localRange.extent(0)) * 16; + Kokkos::resize(Kokkos::WithoutInitializing, m_searchResults, initialCapacity); + m_searchResultsSize = initialCapacity; + } + Kokkos::Profiling::popRegion(); + } KOKKOS_INLINE_FUNCTION void operator()(const ArborXPredicateWithIndex& predicate, int rangeBoxIdx) const { - int domainBoxIdx = ArborX::getData(predicate); + const int domainBoxIdx = ArborX::getData(predicate); const DomainBoxIdent domainBoxIdent = m_localDomain(domainBoxIdx); const RangeBoxIdent rangeBoxIdent = m_localRange(rangeBoxIdx); - if (!(isSphere) || intersects(domainBoxIdent.box, rangeBoxIdent.box)) + + if constexpr (isSphere) { + if (intersects(domainBoxIdent.box, rangeBoxIdent.box)) { + insert_result(domainBoxIdent.ident, rangeBoxIdent.ident); + } + } + else { + insert_result(domainBoxIdent.ident, rangeBoxIdent.ident); + } + } + + KOKKOS_INLINE_FUNCTION + void insert_result(typename DomainBoxIdent::ident_type domainIdent, typename RangeBoxIdent::ident_type rangeIdent) const + { + int idx = Kokkos::atomic_fetch_add(&(m_counter()), 1); + if (idx < m_searchResultsSize) { - int idx = Kokkos::atomic_fetch_add(&(m_counter()), 1); - if (idx < m_searchResultsSize) - { - m_searchResults(idx) = {domainBoxIdent.ident, rangeBoxIdent.ident}; + if (m_swapRangeDomain) { + m_searchResults(idx) = {rangeIdent, domainIdent}; + } + else { + m_searchResults(idx) = {domainIdent, rangeIdent}; } } } @@ -102,7 +131,7 @@ class SearchResultsInserter Kokkos::deep_copy(m_searchResultsSize, m_counter); bool needToRunAgain = m_searchResultsSize > initialResultsSize; - Kokkos::resize(m_searchResults, m_searchResultsSize); + Kokkos::resize(Kokkos::WithoutInitializing, m_searchResults, m_searchResultsSize); if (needToRunAgain) { Kokkos::deep_copy(m_counter, 0); @@ -111,23 +140,17 @@ class SearchResultsInserter return needToRunAgain; } - SearchResultsView getSearchResults() { return m_searchResults; } + ResultView getSearchResults() { return m_searchResults; } private: DomainViewType m_localDomain; RangeViewType m_localRange; - SearchResultsView m_searchResults; + ResultView m_searchResults; Kokkos::View m_counter; int m_searchResultsSize; + bool m_swapRangeDomain; }; -template -SearchResultsInserter create_results_inserter(DomainViewType localDomain, RangeViewType localRange, - typename SearchResultsInserter::SearchResultsView searchResults) -{ - return SearchResultsInserter(localDomain, localRange, searchResults); -} - } // namespace impl template @@ -172,41 +195,63 @@ inline void local_coarse_search_arborx(const std::vector +template inline void local_coarse_search_arborx( - const Kokkos::View*, ExecutionSpace>& localDomain, - const Kokkos::View*, ExecutionSpace>& localRange, - Kokkos::View*, ExecutionSpace>& searchResults, + const DomainView& localDomain, + const RangeView& localRange, + ResultView& searchResults, ExecutionSpace const& execSpace = ExecutionSpace{}) { - using ExecSpace = ExecutionSpace; - using MemSpace = typename ExecSpace::memory_space; - using DomainValueType = typename DomainBoxType::value_type; - using RangeValueType = typename RangeBoxType::value_type; + check_coarse_search_types_local(); + using MemSpace = typename ExecutionSpace::memory_space; + using DomainValueType = typename DomainView::value_type::box_type::value_type; + using RangeValueType = typename RangeView::value_type::box_type::value_type; STK_ThrowRequireMsg((std::is_same_v), "The domain and range boxes must have the same floating-point precision"); Kokkos::Profiling::pushRegion("STK call arborx"); - auto localRangeWrapped = impl::wrap_view_for_arborx(localRange); - auto localDomainWrapped = impl::wrap_view_for_arborx(localDomain); - auto callback = impl::create_results_inserter(localDomain, localRange, searchResults); + const bool rangeIsSmaller = localRange.extent(0) < localDomain.extent(0); + const bool swapRangeDomain = !rangeIsSmaller; - ArborX::BVH bvh(execSpace, localRangeWrapped); - bvh.query(execSpace, localDomainWrapped, callback); + if (rangeIsSmaller) { + auto localRangeWrapped = impl::wrap_view_for_arborx(localRange); + auto localDomainWrapped = impl::wrap_view_for_arborx(localDomain); - bool runSecondPass = callback.resizeSearchResults(); - if (runSecondPass) - { + impl::SearchResultsInserter callback(localDomain, localRange, searchResults, swapRangeDomain); + + ArborX::BVH bvh(execSpace, localRangeWrapped); bvh.query(execSpace, localDomainWrapped, callback); + + bool runSecondPass = callback.resizeSearchResults(); + if (runSecondPass) + { + bvh.query(execSpace, localDomainWrapped, callback); + } + + searchResults = callback.getSearchResults(); } + else { + auto localRangeWrapped = impl::wrap_view_for_arborx(localRange); + auto localDomainWrapped = impl::wrap_view_for_arborx(localDomain); + + impl::SearchResultsInserter callback(localRange, localDomain, searchResults, swapRangeDomain); - searchResults = callback.getSearchResults(); + ArborX::BVH bvh(execSpace, localDomainWrapped); + bvh.query(execSpace, localRangeWrapped, callback); + + bool runSecondPass = callback.resizeSearchResults(); + if (runSecondPass) + { + bvh.query(execSpace, localRangeWrapped, callback); + } + + searchResults = callback.getSearchResults(); + } Kokkos::fence(); Kokkos::Profiling::popRegion(); diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/CoarseSearchMortonLBVH.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/CoarseSearchMortonLBVH.hpp index 82d9bd3d4a1c..7a11fd4bac28 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/CoarseSearchMortonLBVH.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/CoarseSearchMortonLBVH.hpp @@ -44,6 +44,7 @@ #include "stk_search/IdentProc.hpp" #include "stk_search/CommonSearchUtil.hpp" #include "stk_search/BoundingBox.hpp" +#include "stk_search/HelperTraits.hpp" #include "stk_util/parallel/Parallel.hpp" #include "stk_util/parallel/ParallelReduce.hpp" #include "stk_util/util/ReportHandler.hpp" @@ -54,6 +55,72 @@ namespace stk::search { +namespace impl { +template +class MortonCoarseSearchVectorCallback +{ + using DomainVec = std::vector>; + using RangeVec = std::vector>; + using ExtendedRangeVec = std::vector; + using RemoteRangeIdentProcVec = std::vector; + using ResultVec = std::vector>; + + static bool constexpr isSearchExact = !(impl::is_stk_sphere || impl::is_stk_sphere); + + public: + MortonCoarseSearchVectorCallback(const DomainVec& localDomain, const RangeVec& localRange, + const ExtendedRangeVec& extendedRangeBoxes, + const RemoteRangeIdentProcVec& remoteRangeIdentProcs, + ResultVec& searchResults) : + m_localDomain(localDomain), + m_localRange(localRange), + m_extendedRangeBoxes(extendedRangeBoxes), + m_remoteRangeIdentProcs(remoteRangeIdentProcs), + m_numLocalRange(localRange.size()), + m_searchResults(searchResults) + { + m_searchResults.resize(0); + } + + void operator()(int domainIdx, int rangeIdx) const + { + if constexpr (isSearchExact) + { + insert_result(domainIdx, rangeIdx); + } else + { + if (intersects(m_localDomain[domainIdx].first, m_extendedRangeBoxes[rangeIdx])) + { + insert_result(domainIdx, rangeIdx); + } + } + } + + bool resize_for_second_pass() + { + return false; + } + + private: + void insert_result(int domainIdx, int rangeIdx) const + { + if (rangeIdx < m_numLocalRange) { + m_searchResults.emplace_back(m_localDomain[domainIdx].second, m_localRange[rangeIdx].second); + } + else { + m_searchResults.emplace_back(m_localDomain[domainIdx].second, m_remoteRangeIdentProcs[rangeIdx - m_numLocalRange]); + } + } + + const DomainVec& m_localDomain; + const RangeVec& m_localRange; + const ExtendedRangeVec& m_extendedRangeBoxes; + const RemoteRangeIdentProcVec& m_remoteRangeIdentProcs; + int m_numLocalRange; + ResultVec& m_searchResults; +}; +} + template inline void coarse_search_morton_lbvh(std::vector> const & localDomain, std::vector> const & localRange, @@ -62,6 +129,7 @@ inline void coarse_search_morton_lbvh(std::vector; STK_ThrowRequireMsg((std::is_same_v), "The domain and range boxes must have the same floating-point precision"); @@ -84,163 +152,203 @@ inline void coarse_search_morton_lbvh(std::vector collisionList("Collision List"); - stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList, HostSpace{}); - collisionList.sync_from_device(); + Callback callback(localDomain, localRange, extendedRangeBoxes, remoteRangeIdentProcs, searchResults); + stk::search::morton_lbvh_search(domainTree, rangeTree, callback, HostSpace{}); Kokkos::Profiling::popRegion(); - Kokkos::Profiling::pushRegion("Aggregate search results"); - searchResults.clear(); - const unsigned numCollisions = collisionList.hm_idx(); - searchResults.reserve(numCollisions); + if (enforceSearchResultSymmetry) { + Kokkos::Profiling::pushRegion("Enforce results symmetry"); + stk::search::communicate_vector(comm, searchResults, enforceSearchResultSymmetry); + std::sort(searchResults.begin(), searchResults.end()); + Kokkos::Profiling::popRegion(); + } +} - const unsigned numLocalRange = localRange.size(); +namespace impl { +template +class BoundingShapeIntersectionCheckFunctor +{ + public: + using DomainBoxType = typename DomainView::value_type::box_type; + using RangeBoxType = typename RangeView::value_type::box_type; + using DomainIdentProcType = typename DomainView::value_type::ident_proc_type; + using RangeIdentProcType = typename RangeView::value_type::ident_proc_type; + + using ValueType = typename DomainBoxType::value_type; + + static constexpr bool isBoundingBoxSearchExact = (std::is_base_of_v> || std::is_base_of_v>) && + (std::is_base_of_v> || std::is_base_of_v>); + + BoundingShapeIntersectionCheckFunctor( + DomainView localDomain, + RangeView localRange, + const Kokkos::View extendedRangeBoxes, + const Kokkos::View remoteIdentProc, + ResultView searchResults) : + m_localDomain(localDomain), + m_localRange(localRange), + m_extendedRangeBoxes(extendedRangeBoxes), + m_remoteIdentProc(remoteIdentProc), + m_searchResults(searchResults), + m_searchResultIdx("index_in_search_results") + { + check_coarse_search_types_parallel(); + } - auto insert_into_results = [&, &remoteRangeIdentProcs=remoteRangeIdentProcs](unsigned domainIdx, unsigned rangeIdx) { - if (rangeIdx < numLocalRange) { - searchResults.emplace_back(localDomain[domainIdx].second, localRange[rangeIdx].second); + KOKKOS_INLINE_FUNCTION + void operator()(int domainIdx, int rangeIdx) const + { + if constexpr (isBoundingBoxSearchExact) + { + assign_to_results(domainIdx, rangeIdx); + } else + { + DomainBoxType domainBox = m_localDomain(domainIdx).box; + RangeBoxType rangeBox = m_extendedRangeBoxes(rangeIdx); + if (intersects(domainBox, rangeBox)) + { + assign_to_results(domainIdx, rangeIdx); + } + } } - else { - searchResults.emplace_back(localDomain[domainIdx].second, remoteRangeIdentProcs[rangeIdx - numLocalRange]); + + KOKKOS_INLINE_FUNCTION + void assign_to_results(int domainIdx, int rangeIdx) const + { + unsigned int idx = Kokkos::atomic_fetch_inc(&m_searchResultIdx()); + if (idx < m_searchResults.size()) + { + if (size_t(rangeIdx) < m_localRange.extent(0)) + { + m_searchResults(idx) = {m_localDomain(domainIdx).identProc, m_localRange(rangeIdx).identProc}; + } else + { + m_searchResults(idx) = {m_localDomain(domainIdx).identProc, m_remoteIdentProc(rangeIdx - m_localRange.extent(0))}; + } + } } - }; - for (unsigned i = 0; i < numCollisions; ++i) { - const unsigned domainIdx = collisionList.hm_data(i, 0); - const unsigned rangeIdx = collisionList.hm_data(i, 1); + bool resize_for_second_pass() + { + unsigned int numResults = 0; + Kokkos::deep_copy(numResults, m_searchResultIdx); + bool needSecondPass = numResults > m_searchResults.size(); + Kokkos::resize(m_searchResults, numResults); + Kokkos::deep_copy(m_searchResultIdx, 0); - if constexpr ((std::is_same_v> || std::is_same_v>) && - (std::is_same_v> || std::is_same_v>)) { - insert_into_results(domainIdx, rangeIdx); - } - else { - if (intersects(localDomain[domainIdx].first, extendedRangeBoxes[rangeIdx])) { - insert_into_results(domainIdx, rangeIdx); - } + return needSecondPass; } - } - Kokkos::Profiling::popRegion(); - if (enforceSearchResultSymmetry) { - Kokkos::Profiling::pushRegion("Enforce results symmetry"); - stk::search::communicate_vector(comm, searchResults, enforceSearchResultSymmetry); - std::sort(searchResults.begin(), searchResults.end()); - Kokkos::Profiling::popRegion(); - } + ResultView get_search_results() const { return m_searchResults; } + + private: + DomainView m_localDomain; + RangeView m_localRange; + Kokkos::View m_extendedRangeBoxes; + Kokkos::View m_remoteIdentProc; + ResultView m_searchResults; + + Kokkos::View m_searchResultIdx; +}; + } -template + +template inline void coarse_search_morton_lbvh( - Kokkos::View*, ExecutionSpace> const& localDomain, - Kokkos::View*, ExecutionSpace> const& localRange, + DomainView const& localDomain, + RangeView const& localRange, MPI_Comm comm, - Kokkos::View*, ExecutionSpace>& searchResults, + ResultView& searchResults, ExecutionSpace const& execSpace = ExecutionSpace{}, - bool enforceSearchResultSymmetry = true) + bool enforceSearchResultSymmetry = true, + bool doParallelConsistencyOnHost = false) { + check_coarse_search_types_parallel(); using HostSpace = Kokkos::DefaultHostExecutionSpace; - - STK_ThrowRequireMsg((std::is_same_v), - "The domain and range boxes must have the same floating-point precision"); - + using DomainBoxType = typename DomainView::value_type::box_type; + using RangeBoxType = typename RangeView::value_type::box_type; + using RangeIdentProcType = typename RangeView::value_type::ident_proc_type; using ValueType = typename DomainBoxType::value_type; + using BoundingShapeIntersectionChecker = impl::BoundingShapeIntersectionCheckFunctor; + using ExtendedRangeBoxView = Kokkos::View; + using ExtendedRangeIdentProcView = Kokkos::View; - Kokkos::Profiling::pushRegion("Move device results to host and convert into compatible data type."); - auto localDomainHost = Kokkos::create_mirror_view_and_copy(HostSpace{}, localDomain); - auto localRangeHost = Kokkos::create_mirror_view_and_copy(HostSpace{}, localRange); - - std::vector> localDomainVec(localDomainHost.size()); - std::vector> localRangeVec(localRangeHost.size()); - - for (unsigned i = 0; i < localDomainHost.size(); i++) { - auto hostPair = localDomainHost(i); - std::pair domainPair{hostPair.box, hostPair.identProc}; - localDomainVec[i] = domainPair; - } - - for (unsigned i = 0; i < localRangeHost.size(); i++) { - auto hostPair = localRangeHost(i); - std::pair rangePair{hostPair.box, hostPair.identProc}; - localRangeVec[i] = rangePair; - } - Kokkos::Profiling::popRegion(); + static_assert(std::is_same_v, + "The domain and range boxes must have the same floating-point precision"); Kokkos::Profiling::pushRegion("Parallel consistency: extend range box list"); - const auto [extendedRangeBoxes, remoteRangeIdentProcs] = - morton_extend_local_range_with_remote_boxes_that_might_intersect(localDomainVec, localRangeVec, comm, execSpace); + ExtendedRangeBoxView extendedRangeBoxes; + ExtendedRangeIdentProcView remoteRangeIdentProcs; + + bool expectedParallelConsistencyFasterOnHost = (localDomain.size() + localRange.size()) < 30000U; + if (expectedParallelConsistencyFasterOnHost || doParallelConsistencyOnHost) + { + HostSpace hostSpace{}; + auto localDomainHost = Kokkos::create_mirror_view_and_copy(hostSpace, localDomain); + auto localRangeHost = Kokkos::create_mirror_view_and_copy(hostSpace, localRange); + + const auto [extendedRangeBoxesHost, remoteRangeIdentProcsHost] = + morton_extend_local_range_with_remote_boxes_that_might_intersect(localDomainHost, localRangeHost, hostSpace, comm); + + Kokkos::resize(extendedRangeBoxes, extendedRangeBoxesHost.size()); + Kokkos::resize(remoteRangeIdentProcs, remoteRangeIdentProcsHost.size()); + + Kokkos::deep_copy(execSpace, extendedRangeBoxes, extendedRangeBoxesHost); + Kokkos::deep_copy(execSpace, remoteRangeIdentProcs, remoteRangeIdentProcsHost); + execSpace.fence(); + } else + { + std::tie(extendedRangeBoxes, remoteRangeIdentProcs) = + morton_extend_local_range_with_remote_boxes_that_might_intersect(localDomain, localRange, execSpace, comm); + } Kokkos::Profiling::popRegion(); Kokkos::Profiling::pushRegion("Fill domain and range trees"); - stk::search::MortonAabbTree domainTree("Domain Tree", localDomainHost.size()); - stk::search::MortonAabbTree rangeTree("Range Tree", extendedRangeBoxes.size()); - stk::search::export_from_box_ident_proc_vec_to_morton_tree(localDomainVec, domainTree); - stk::search::export_from_box_vec_to_morton_tree(extendedRangeBoxes, rangeTree); + bool setBoxesOnHost = false; + stk::search::MortonAabbTree domainTree("Domain Tree", localDomain.extent(0), setBoxesOnHost); + stk::search::MortonAabbTree rangeTree("Range Tree", extendedRangeBoxes.extent(0), setBoxesOnHost); + + stk::search::export_box_ident_view_to_morton_tree(localDomain, domainTree, execSpace); + stk::search::export_box_view_to_morton_tree(extendedRangeBoxes, rangeTree, execSpace); + execSpace.fence(); domainTree.sync_to_device(); rangeTree.sync_to_device(); Kokkos::Profiling::popRegion(); - Kokkos::Profiling::pushRegion("Perform Morton query"); - stk::search::CollisionList collisionList("Collision List"); - stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList, execSpace); - collisionList.sync_from_device(); - Kokkos::Profiling::popRegion(); - - Kokkos::Profiling::pushRegion("Aggregate search results"); - const unsigned numCollisions = collisionList.hm_idx(); - searchResults = Kokkos::View*, ExecutionSpace>( - Kokkos::ViewAllocateWithoutInitializing(searchResults.label()), numCollisions); - - auto searchResultsHost = Kokkos::create_mirror_view_and_copy(HostSpace{}, searchResults); - - const unsigned numLocalRange = localRangeHost.size(); - unsigned searchResultIndex = 0; - - auto insert_into_results = [&, &remoteRangeIdentProcs=remoteRangeIdentProcs](unsigned domainIdx, unsigned rangeIdx, unsigned& searchResultIdx) { - if (rangeIdx < numLocalRange) { - searchResultsHost(searchResultIdx) = {localDomainHost(domainIdx).identProc, localRangeHost(rangeIdx).identProc}; - searchResultIdx++; - } - else { - searchResultsHost(searchResultIdx) = {localDomainHost(domainIdx).identProc, remoteRangeIdentProcs[rangeIdx - numLocalRange]}; - searchResultIdx++; - } - }; - - for (unsigned i = 0; i < numCollisions; ++i) { - const unsigned domainIdx = collisionList.hm_data(i, 0); - const unsigned rangeIdx = collisionList.hm_data(i, 1); - - if constexpr ((std::is_same_v> || std::is_same_v>) && - (std::is_same_v> || std::is_same_v>)) { - insert_into_results(domainIdx, rangeIdx, searchResultIndex); - } - else { - if (intersects(localDomainHost(domainIdx).box, extendedRangeBoxes[rangeIdx])) { - insert_into_results(domainIdx, rangeIdx, searchResultIndex); - } - } + if (searchResults.size() == 0) + { + size_t sizeGuess = std::max(localDomain.size(), extendedRangeBoxes.size()) * COLLISION_SCALE_FACTOR; + Kokkos::resize(Kokkos::WithoutInitializing, searchResults, sizeGuess); } - Kokkos::resize(searchResultsHost, searchResultIndex); + Kokkos::Profiling::pushRegion("Inner morton search"); + BoundingShapeIntersectionChecker intersectionChecker(localDomain, localRange, extendedRangeBoxes, + remoteRangeIdentProcs, searchResults); + stk::search::morton_lbvh_search(domainTree, rangeTree, intersectionChecker); + searchResults = intersectionChecker.get_search_results(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("Kokkos sort"); + Kokkos::sort(searchResults); Kokkos::Profiling::popRegion(); if (enforceSearchResultSymmetry) { Kokkos::Profiling::pushRegion("Enforce results symmetry"); - communicate_views(comm, searchResultsHost, enforceSearchResultSymmetry); + SearchResultCommunication resultComm(comm, searchResults, execSpace, enforceSearchResultSymmetry); + searchResults = resultComm.run(); Kokkos::Profiling::popRegion(); } - - Kokkos::resize(Kokkos::WithoutInitializing, searchResults, searchResultsHost.extent(0)); - Kokkos::deep_copy(searchResults, searchResultsHost); - Kokkos::sort(searchResults); } } diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/LocalCoarseSearchMortonLBVH.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/LocalCoarseSearchMortonLBVH.hpp index 7553b9fe9bf2..bfa2bf6df944 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/LocalCoarseSearchMortonLBVH.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/LocalCoarseSearchMortonLBVH.hpp @@ -37,6 +37,7 @@ #include "stk_search/Box.hpp" #include "stk_search/BoxIdent.hpp" #include "stk_search/CommonSearchUtil.hpp" +#include "stk_search/HelperTraits.hpp" #include "stk_search/morton_lbvh/MortonLBVH_Search.hpp" #include "stk_search/morton_lbvh/MortonLBVH_Tree.hpp" @@ -47,123 +48,50 @@ namespace stk::search { -template -void insert_intersections_into_results(const std::vector>& domain, - const std::vector>& range, - const stk::search::CollisionList rawIntersections, - std::vector>& intersections) -{ - const int numCollisions = rawIntersections.get_num_collisions(); - - for (int i = 0; i < numCollisions; ++i) { - const unsigned domainIdx = rawIntersections.m_data(i, 0); - const unsigned rangeIdx = rawIntersections.m_data(i, 1); - intersections.emplace_back(domain[domainIdx].second, range[rangeIdx].second); - }; -} - -template -void insert_intersections_into_results( - const Kokkos::View*, ExecutionSpace> & domain, - const Kokkos::View*, ExecutionSpace> & range, - const stk::search::CollisionList rawIntersections, - Kokkos::View*, ExecutionSpace> & intersections, - ExecutionSpace const& execSpace) -{ - const int numCollisions = rawIntersections.get_num_collisions(); - - Kokkos::parallel_for(Kokkos::RangePolicy(execSpace, 0, numCollisions), - KOKKOS_LAMBDA(int index) { - const unsigned domainIdx = rawIntersections.m_data(index, 0); - const unsigned rangeIdx = rawIntersections.m_data(index, 1); - intersections[index] = {domain[domainIdx].ident, range[rangeIdx].ident}; - }); -} - -template -void insert_only_confirmed_intersections_into_results( - const std::vector>& domain, - const std::vector>& range, - const stk::search::CollisionList rawIntersections, - std::vector>& intersections) +namespace impl { +template +class LocalMortonCoarseSearchVectorCallback { - constexpr DomainIdentType INVALID_DOMAIN_IDENT = std::numeric_limits::max(); - constexpr RangeIdentType INVALID_RANGE_IDENT = std::numeric_limits::max(); - const int numCollisions = rawIntersections.get_num_collisions(); - - for (int index = 0; index < numCollisions; ++index) { - const unsigned domainIdx = rawIntersections.m_data(index, 0); - const unsigned rangeIdx = rawIntersections.m_data(index, 1); - const auto& domainBoxIdent = domain[domainIdx]; - const auto& rangeBoxIdent = range[rangeIdx]; - if (intersects(domainBoxIdent.first, rangeBoxIdent.first)) { - intersections.emplace_back(domainBoxIdent.second, rangeBoxIdent.second); - } else { - intersections.emplace_back(INVALID_DOMAIN_IDENT, INVALID_RANGE_IDENT); + using DomainVec = std::vector>; + using RangeVec = std::vector>; + using ResultVec = std::vector>; + + static bool constexpr isSearchExact = !(impl::is_stk_sphere || impl::is_stk_sphere); + + public: + LocalMortonCoarseSearchVectorCallback(const DomainVec& domain, const RangeVec& range, ResultVec& searchResults) : + m_domain(domain), + m_range(range), + m_searchResults(searchResults) + { + m_searchResults.resize(0); } - }; - - int numActualIntersections = 0; - int destIndex = 0; - for (int sourceIndex = 0; sourceIndex < numCollisions; ++sourceIndex) { - if (intersections[sourceIndex].first != INVALID_DOMAIN_IDENT) { - intersections[destIndex++] = intersections[sourceIndex]; - numActualIntersections++; - } - } - intersections.resize(numActualIntersections); -} - -template -void insert_only_confirmed_intersections_into_results( - const Kokkos::View*, ExecutionSpace> & domain, - const Kokkos::View*, ExecutionSpace> & range, - const stk::search::CollisionList rawIntersections, - Kokkos::View*, ExecutionSpace> & intersections, - ExecutionSpace const& execSpace) -{ - static bool constexpr isSphere = impl::is_stk_sphere || impl::is_stk_sphere; - - const int numCollisions = rawIntersections.get_num_collisions(); - - Kokkos::View counter("counter"); - Kokkos::parallel_for(Kokkos::RangePolicy(execSpace, 0, numCollisions), - KOKKOS_LAMBDA(int index) { - const unsigned domainIdx = rawIntersections.m_data(index, 0); - const unsigned rangeIdx = rawIntersections.m_data(index, 1); - const auto domainBoxIdent = domain[domainIdx]; - const auto rangeBoxIdent = range[rangeIdx]; - - if (isSphere) - { - if (intersects(domainBoxIdent.box, rangeBoxIdent.box)) - { - int outputIdx = Kokkos::atomic_fetch_add(&(counter()), 1); - intersections[outputIdx] = {domainBoxIdent.ident, rangeBoxIdent.ident}; - } - } else + void operator()(int domainIdx, int rangeIdx) const + { + if constexpr (isSearchExact) + { + m_searchResults.push_back({m_domain[domainIdx].second, m_range[rangeIdx].second}); + } else + { + if (intersects(m_domain[domainIdx].first, m_range[rangeIdx].first)) { - intersections[index] = {domainBoxIdent.ident, rangeBoxIdent.ident}; + m_searchResults.push_back({m_domain[domainIdx].second, m_range[rangeIdx].second}); } - }); + } + } + + bool resize_for_second_pass() + { + return false; + } + + private: + const DomainVec& m_domain; + const RangeVec& m_range; + ResultVec& m_searchResults; +}; - if constexpr (isSphere) - { - int numActualIntersections; - Kokkos::deep_copy(numActualIntersections, counter); - Kokkos::resize(intersections, numActualIntersections); - } } template @@ -177,6 +105,7 @@ void local_coarse_search_morton_lbvh( using ValueType = typename DomainBoxType::value_type; using HostSpace = Kokkos::DefaultHostExecutionSpace; + using Callback = impl::LocalMortonCoarseSearchVectorCallback; Kokkos::Profiling::pushRegion("Fill domain and range trees"); const bool supportHostBoxes = false; @@ -188,66 +117,119 @@ void local_coarse_search_morton_lbvh( Kokkos::Profiling::popRegion(); stk::search::CollisionList collisionList("Collision List"); - stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList, HostSpace{}); + Callback callback(domain, range, searchResults); + stk::search::morton_lbvh_search(domainTree, rangeTree, callback, HostSpace{}); +} - Kokkos::Profiling::pushRegion("Aggregate search results"); - const int numCollisions = collisionList.get_num_collisions(); - searchResults.reserve(numCollisions); +namespace impl { +template +class LocalMortonCoarseSearchViewCallback +{ - if constexpr ((std::is_same_v> || std::is_same_v>) && - (std::is_same_v> || std::is_same_v>)) - { - insert_intersections_into_results(domain, range, collisionList, searchResults); - } - else { - insert_only_confirmed_intersections_into_results(domain, range, collisionList, searchResults); - } + using DomainBoxIdent = typename DomainView::value_type; + using RangeBoxIdent = typename RangeView::value_type; + using DomainBox = typename DomainBoxIdent::box_type; + using RangeBox = typename RangeBoxIdent::box_type; + static bool constexpr isSearchExact = !(impl::is_stk_sphere || impl::is_stk_sphere); + + public: + LocalMortonCoarseSearchViewCallback(const DomainView& domain, const RangeView& range, ResultView& searchResults) : + m_domain(domain), + m_range(range), + m_searchResults(searchResults), + m_idx(Kokkos::ViewAllocateWithoutInitializing("result_idx")) + { + check_coarse_search_types_local(); + Kokkos::deep_copy(m_idx, 0); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int domainIdx, int rangeIdx) const + { + if constexpr (isSearchExact) + { + unsigned int idx = Kokkos::atomic_fetch_inc(&m_idx()); + if (idx < m_searchResults.size()) + { + m_searchResults(idx) = {m_domain(domainIdx).ident, m_range(rangeIdx).ident}; + } + } else + { + DomainBoxIdent domainBoxIdent = m_domain(domainIdx); + RangeBoxIdent rangeBoxIdent = m_range(rangeIdx); + if (intersects(domainBoxIdent.box, rangeBoxIdent.box)) + { + unsigned int idx = Kokkos::atomic_fetch_inc(&m_idx()); + if (idx < m_searchResults.size()) + { + m_searchResults(idx) = {domainBoxIdent.ident, rangeBoxIdent.ident}; + } + } + } + } + + bool resize_for_second_pass() + { + unsigned int numResults = 0; + Kokkos::deep_copy(numResults, m_idx); + bool needSecondPass = numResults > m_searchResults.size(); + Kokkos::resize(Kokkos::WithoutInitializing, m_searchResults, numResults); + Kokkos::deep_copy(m_idx, 0); + + return needSecondPass; + } + + ResultView get_search_results() const { return m_searchResults; } + + private: + DomainView m_domain; + RangeView m_range; + ResultView m_searchResults; + Kokkos::View m_idx; +}; - Kokkos::Profiling::popRegion(); } -template void local_coarse_search_morton_lbvh( - const Kokkos::View*, ExecutionSpace> & domain, - const Kokkos::View*, ExecutionSpace> & range, - Kokkos::View*, ExecutionSpace> & searchResults, + const DomainView & domain, + const RangeView & range, + ResultView & searchResults, ExecutionSpace const& execSpace = ExecutionSpace{}) { + Kokkos::Profiling::pushRegion("local_coarse_search_morton_lbvh"); + check_coarse_search_types_local(); + using DomainBoxType = typename DomainView::value_type::box_type; + using RangeBoxType = typename RangeView::value_type::box_type; STK_ThrowRequireMsg((std::is_same_v), "The domain and range boxes must have the same floating-point precision"); - using ValueType = typename DomainBoxType::value_type; + using Callback = impl::LocalMortonCoarseSearchViewCallback; Kokkos::Profiling::pushRegion("STK Fill domain and range trees"); const bool supportHostBoxes = false; stk::search::MortonAabbTree domainTree("Domain Tree", domain.extent(0), supportHostBoxes); stk::search::MortonAabbTree rangeTree("Range Tree", range.extent(0), supportHostBoxes); - stk::search::export_from_box_ident_view_to_morton_tree(domain, domainTree); - stk::search::export_from_box_ident_view_to_morton_tree(range, rangeTree); - domainTree.sync_to_device(); - rangeTree.sync_to_device(); + Kokkos::Profiling::pushRegion("STK Export box ident views to trees"); + stk::search::export_box_ident_view_to_morton_tree(domain, domainTree, execSpace); + stk::search::export_box_ident_view_to_morton_tree(range, rangeTree, execSpace); + execSpace.fence(); + Kokkos::Profiling::popRegion(); Kokkos::Profiling::popRegion(); Kokkos::Profiling::pushRegion("STK Morton Search"); - stk::search::CollisionList collisionList("Collision List"); - stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList, execSpace); - - Kokkos::Profiling::pushRegion("STK Aggregate search results"); - const int numCollisions = collisionList.get_num_collisions(); - searchResults = Kokkos::View*, ExecutionSpace>( - Kokkos::ViewAllocateWithoutInitializing(searchResults.label()), numCollisions); - - if constexpr ((std::is_same_v> || std::is_same_v>) && - (std::is_same_v> || std::is_same_v>)) + if (searchResults.size() == 0) { - insert_intersections_into_results(domain, range, collisionList, searchResults, execSpace); - } - else { - insert_only_confirmed_intersections_into_results(domain, range, collisionList, searchResults, execSpace); + size_t sizeGuess = std::max(domain.size(), range.size()) * COLLISION_SCALE_FACTOR; + Kokkos::resize(Kokkos::WithoutInitializing, searchResults, sizeGuess); } + Callback callback(domain, range, searchResults); + stk::search::morton_lbvh_search(domainTree, rangeTree, callback, execSpace); + searchResults = callback.get_search_results(); + Kokkos::Profiling::popRegion(); Kokkos::Profiling::popRegion(); } diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_ParallelConsistencyUtils.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_ParallelConsistencyUtils.hpp index 08a8cca0ce33..d8c8e297c484 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_ParallelConsistencyUtils.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_ParallelConsistencyUtils.hpp @@ -1,12 +1,16 @@ #ifndef MORTONLBVH_PARALLELCONSISTENCYUTILS_HPP #define MORTONLBVH_PARALLELCONSISTENCYUTILS_HPP -#include "stk_util/parallel/Parallel.hpp" +#include "stk_search/BoxIdent.hpp" +#include "stk_search/HelperTraits.hpp" #include "stk_search/morton_lbvh/MortonLBVH_Tree.hpp" #include "stk_search/morton_lbvh/MortonLBVH_Search.hpp" #include "stk_search/Box.hpp" #include "stk_search/BoundingBox.hpp" #include "stk_search/CommonSearchUtil.hpp" +#include "stk_search/DeviceMPIUtils.hpp" +#include "stk_search/HelperTraits.hpp" +#include "Kokkos_Core.hpp" #include #include @@ -30,6 +34,95 @@ gather_all_processor_superset_domain_boxes(const std::vector +class BoundingBoxReduction +{ + public: + using DomainBoxType = typename DomainView::value_type::box_type; + using Real = typename DomainBoxType::value_type; + using ResultBoxType = Box; + + using value_type = ResultBoxType; + + + BoundingBoxReduction(DomainView localDomain) : + m_localDomain(localDomain) + { + check_domain_or_range_view_parallel(); + } + + ResultBoxType run(ExecutionSpace execSpace) + { + Kokkos::RangePolicy execPolicy(execSpace, size_t(0), m_localDomain.extent(0)); + + ResultBoxType outputBox; + Kokkos::parallel_reduce("local_bounding_box_reduction", execPolicy, *this, outputBox); + execSpace.fence(); + + return outputBox; + } + + KOKKOS_INLINE_FUNCTION + void init(ResultBoxType& val) const + { + constexpr Real min_val = Kokkos::Experimental::finite_min_v; + constexpr Real max_val = Kokkos::Experimental::finite_max_v; + + val = ResultBoxType(max_val, max_val, max_val, + min_val, min_val, min_val); + } + + KOKKOS_INLINE_FUNCTION + void join(ResultBoxType& dest, const ResultBoxType& src) const + { + stk::search::add_to_box(dest, src); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int i, ResultBoxType& reductionBox) const + { + if constexpr (std::is_same_v) + { + ResultBoxType box = m_localDomain(i).box; + join(reductionBox, box); + } else + { + DomainBoxType inputBox = m_localDomain(i).box; + ResultBoxType outputBox(inputBox.get_x_min(), inputBox.get_y_min(), inputBox.get_z_min(), + inputBox.get_x_max(), inputBox.get_y_max(), inputBox.get_z_max()); + join(reductionBox, outputBox); + } + } + + private: + DomainView m_localDomain; +}; + +} + +template +Kokkos::View*, ExecutionSpace> +gather_all_processor_superset_domain_boxes(const DomainView & localDomain, + ExecutionSpace execSpace, + MPI_Comm comm) +{ + check_domain_or_range_view_parallel(); + + using BoundingBoxReductionType = impl::BoundingBoxReduction; + using BoxType = typename BoundingBoxReductionType::ResultBoxType; + + BoundingBoxReductionType func(localDomain); + BoxType outputBox = func.run(execSpace); + + std::string name = "global_superset_boxes"; + Kokkos::View globalSupersetBoxes(Kokkos::view_alloc(name, Kokkos::WithoutInitializing), stk::parallel_machine_size(comm)); + stk::search::all_gather_helper(outputBox, globalSupersetBoxes, comm); + + return globalSupersetBoxes; +} + + template std::pair, std::vector> morton_extend_local_range_with_remote_boxes_that_might_intersect( @@ -95,6 +188,136 @@ morton_extend_local_range_with_remote_boxes_that_might_intersect( return result; } +namespace impl { + +template +class FillGhostBoxBuffers +{ + public: + using CollisionListType = CollisionList; + using SendDataType = typename RangeView::value_type; + using DeviceBufferAppender = impl::DeviceMPIBufferAppender; + using DeviceBuffers = typename DeviceBufferAppender::DeviceBuffers; + + FillGhostBoxBuffers(CollisionListType collisionList, + const RangeView& rangeBoxIdentProc, + ExecutionSpace execSpace, + MPI_Comm comm) : + m_collisionList(collisionList), + m_rangeBoxIdentProc(rangeBoxIdentProc), + m_execSpace(execSpace), + m_commRank(stk::parallel_machine_rank(comm)), + m_deviceBufferAppender(stk::parallel_machine_size(comm), execSpace) + { + check_domain_or_range_view_parallel(); + } + + DeviceBuffers run() + { + Kokkos::RangePolicy policy(m_execSpace, 0, m_collisionList.get_num_collisions()); + Kokkos::parallel_for("mpi_buffer_size_calc", policy, *this); + m_execSpace.fence(); + + m_deviceBufferAppender.allocate_buffers(); + + Kokkos::parallel_for("mpi_buffer_fill", policy, *this); + m_execSpace.fence(); + + return m_deviceBufferAppender.getBuffers(); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int idx) const + { + const int myProcId = m_commRank; + const int localBoxIdx = m_collisionList.m_data(idx, 0); + const int remoteProcId = m_collisionList.m_data(idx, 1); + + if (remoteProcId != myProcId) + { + m_deviceBufferAppender.push_back(remoteProcId, m_rangeBoxIdentProc(localBoxIdx)); + } + } + + private: + CollisionListType m_collisionList; + RangeView m_rangeBoxIdentProc; + ExecutionSpace m_execSpace; + int m_commRank; + + DeviceBufferAppender m_deviceBufferAppender; +}; +} + +template +std::pair, + Kokkos::View> +morton_extend_local_range_with_remote_boxes_that_might_intersect( + const DomainView & localDomain, + const RangeView & localRange, + ExecutionSpace execSpace, + MPI_Comm comm) +{ + check_domain_or_range_view_parallel(); + check_domain_or_range_view_parallel(); + using Real = typename DomainView::value_type::box_type::value_type; + using RangeBoxType = typename RangeView::value_type::box_type; + using RangeIdentProcType = typename RangeView::value_type::ident_proc_type; + using ViewType = Kokkos::View*, ExecutionSpace>; + + + using FillMPIBuffersType = impl::FillGhostBoxBuffers; + using DeviceBuffers = typename FillMPIBuffersType::DeviceBuffers; + + ViewType globalSupersetBoxes = gather_all_processor_superset_domain_boxes(localDomain, execSpace, comm); + + const bool setBoxesOnHost = false; + stk::search::MortonAabbTree domainTree("Proc Domain Tree", + localRange.size(), setBoxesOnHost); + stk::search::MortonAabbTree rangeTree("Proc Range Tree", + globalSupersetBoxes.size(), setBoxesOnHost); + + export_box_ident_view_to_morton_tree(localRange, domainTree, execSpace); + export_box_view_to_morton_tree(globalSupersetBoxes, rangeTree, execSpace); + execSpace.fence(); + domainTree.sync_to_device(); + rangeTree.sync_to_device(); + + stk::search::CollisionList collisionList("Proc Collision List"); + stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList); + + FillMPIBuffersType fill_buffers(collisionList, localRange, execSpace, comm); + DeviceBuffers deviceSendBuffers = fill_buffers.run(); + impl::DeviceDataExchangeUnknownPattern exchanger(deviceSendBuffers, execSpace, comm); + DeviceBuffers deviceRecvBuffers = exchanger.communicate(); + + size_t extendedRangeSize = localRange.size() + deviceRecvBuffers.buffers.extent(0); + Kokkos::View extendedRangeBoxes(Kokkos::ViewAllocateWithoutInitializing("extended_range_boxes"), + extendedRangeSize); + Kokkos::View extendedRangeIdentProcs(Kokkos::ViewAllocateWithoutInitializing("extended_range_ident_procs"), + deviceRecvBuffers.buffers.extent(0)); + + Kokkos::RangePolicy policy(execSpace, 0, extendedRangeSize); + size_t numLocalBoxes = localRange.extent(0); + auto fillExtendedRange = KOKKOS_LAMBDA (size_t i) + { + if (i < numLocalBoxes) + { + extendedRangeBoxes(i) = localRange(i).box; + } else + { + size_t bufferIdx = i - numLocalBoxes; + extendedRangeBoxes(i) = deviceRecvBuffers.buffers(bufferIdx).box; + extendedRangeIdentProcs(bufferIdx) = deviceRecvBuffers.buffers(bufferIdx).identProc; + } + }; + + Kokkos::parallel_for("fill_extended_range", policy, fillExtendedRange); + execSpace.fence(); + + return std::make_pair(extendedRangeBoxes, extendedRangeIdentProcs); +} + } #endif // MORTONLBVH_PARALLELCONSISTENCYUTILS_HPP diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_Search.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_Search.hpp index 2e6e89c79626..75d98a2ffb72 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_Search.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_Search.hpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,8 @@ namespace stk::search { +constexpr size_t COLLISION_SCALE_FACTOR = 16; + template inline void determine_mas_calling(const MortonAabbTree &partialTree1, const MortonAabbTree &partialTree2, @@ -111,6 +114,30 @@ inline void export_from_box_ident_proc_vec_to_morton_tree( } } +template +inline void export_box_ident_view_to_morton_tree( + const DomainView& boxIdentProcs, + MortonAabbTree& tree, + ExecutionSpace execSpace) +{ + check_view_is_usable_from(); + static_assert(is_box_ident_proc_container_v || is_box_ident_container_v, + "view must be a View or View"); + using BoxType = typename DomainView::value_type::box_type; + tree.reset(boxIdentProcs.extent(0)); + + Kokkos::RangePolicy policy(execSpace, 0, boxIdentProcs.extent(0)); + auto func = KOKKOS_LAMBDA(int index) + { + const BoxType box = boxIdentProcs(index).box; + tree.device_set_box(index, box.get_x_min(), box.get_x_max(), + box.get_y_min(), box.get_y_max(), + box.get_z_min(), box.get_z_max()); + }; + + Kokkos::parallel_for("export box-ident view to tree", policy, func); +} + template inline void export_from_box_ident_vector_to_morton_tree( const std::vector> &boxIdentList, MortonAabbTree &tree) @@ -127,22 +154,6 @@ inline void export_from_box_ident_vector_to_morton_tree( }); } -template -inline void export_from_box_ident_view_to_morton_tree( - const Kokkos::View*, ExecutionSpace> & boxIdentList, - MortonAabbTree & tree) -{ - int numBoxes = static_cast(boxIdentList.extent(0)); - tree.reset(numBoxes); - - Kokkos::parallel_for(Kokkos::RangePolicy(0, numBoxes), - KOKKOS_LAMBDA(int index) { - const auto & box = boxIdentList[index].box; - tree.device_set_box(index, box.get_x_min(), box.get_x_max(), - box.get_y_min(), box.get_y_max(), - box.get_z_min(), box.get_z_max()); - }); -} template inline void export_from_box_vec_to_morton_tree(const std::vector &boxVec, @@ -157,18 +168,39 @@ inline void export_from_box_vec_to_morton_tree(const std::vector &boxVe } } -template +template +inline void export_box_view_to_morton_tree(const BoxView boxes, + MortonAabbTree& tree, + ExecutionSpace execSpace) +{ + check_view_is_usable_from(); + using BoxType = typename BoxView::value_type; + tree.reset(boxes.extent(0)); + + Kokkos::RangePolicy policy(execSpace, 0, boxes.extent(0)); + auto func = KOKKOS_LAMBDA (int index) + { + const BoxType box = boxes(index); + tree.device_set_box(index, box.get_x_min(), box.get_x_max(), + box.get_y_min(), box.get_y_max(), + box.get_z_min(), box.get_z_max()); + }; + + Kokkos::parallel_for(policy, func); +} + +template inline void morton_lbvh_search(MortonAabbTree &tree1, MortonAabbTree &tree2, - CollisionList &searchResults, + Callback& resultCallback, ExecutionSpace const& execSpace = ExecutionSpace{}) { - Kokkos::Profiling::pushRegion("Initialization"); + Kokkos::Profiling::pushRegion("Initialize the trees"); Kokkos::Profiling::pushRegion("Get global bounds"); // Get total bounds TotalBoundsFunctor::apply(tree1, execSpace); TotalBoundsFunctor::apply(tree2, execSpace); - Kokkos::fence(); + execSpace.fence(); Kokkos::Profiling::popRegion(); Kokkos::Profiling::pushRegion("Determine need to sort/flip trees"); @@ -180,7 +212,7 @@ inline void morton_lbvh_search(MortonAabbTree &tree1, Kokkos::Profiling::pushRegion("Morton encoding of leaves"); MortonEncoder::apply(tree1, execSpace, sortTree1); MortonEncoder::apply(tree2, execSpace, sortTree2); - Kokkos::fence(); + execSpace.fence(); Kokkos::Profiling::popRegion(); // Sort the leaves if appropriate @@ -190,10 +222,10 @@ inline void morton_lbvh_search(MortonAabbTree &tree1, SortByCode::apply(tree1, execSpace); } if (sortTree2) { - // printf("Sorting tree with %d leaves\n", tree1.hm_numLeaves()); + // printf("Sorting tree with %d leaves\n", tree2.hm_numLeaves()); SortByCode::apply(tree2, execSpace); } - Kokkos::fence(); + execSpace.fence(); Kokkos::Profiling::popRegion(); // Build the tree structures, if appropriate, following Karras's algorithm @@ -206,7 +238,7 @@ inline void morton_lbvh_search(MortonAabbTree &tree1, if (buildTree2) { BuildRadixTree::apply(tree2, execSpace); } - Kokkos::fence(); + execSpace.fence(); Kokkos::Profiling::popRegion(); // Augment the trees to be bounding volume (box) hierarchies @@ -217,22 +249,41 @@ inline void morton_lbvh_search(MortonAabbTree &tree1, if (buildTree2) { UpdateInteriorNodeBVs::apply(tree2, execSpace); } - Kokkos::fence(); + execSpace.fence(); Kokkos::Profiling::popRegion(); Kokkos::Profiling::popRegion(); // Test the boxes from the non-tree against the tree that was built. Kokkos::Profiling::pushRegion("Search query"); + if (flipOrder) { - Traverse_MASTB_BVH_Functor::apply_tree(tree2, tree1, searchResults, execSpace, true); - } - else { - Traverse_MASTB_BVH_Functor::apply_tree(tree1, tree2, searchResults, execSpace); + search_tree(tree2, tree1, resultCallback, execSpace, true); + } else { + search_tree(tree1, tree2, resultCallback, execSpace); } - Kokkos::fence(); + execSpace.fence(); Kokkos::Profiling::popRegion(); } +template +inline void morton_lbvh_search(MortonAabbTree &tree1, + MortonAabbTree &tree2, + CollisionList &searchResults, + ExecutionSpace const& execSpace = ExecutionSpace{}) +{ + if (searchResults.get_capacity() == 0) { + const int numDomainLeaves = tree1.hm_numLeaves(); + const int numRangeLeaves = tree2.hm_numLeaves(); + + const int collisionEstimate = std::max(numDomainLeaves, numRangeLeaves) * COLLISION_SCALE_FACTOR; + searchResults.reset(collisionEstimate); + } + + CollisionListCallback resultCallback(searchResults); + morton_lbvh_search(tree1, tree2, resultCallback, execSpace); + searchResults = resultCallback.get_collision_list(); +} + template inline void morton_lbvh_search(const std::vector &boxA, const std::vector &boxB, diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp index 2aca828b2aa0..db2fb78f3b49 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp @@ -80,8 +80,15 @@ #include #include #include +#include #include #include "Kokkos_Sort.hpp" +//#if KOKKOS_VERSION < 40300 +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCTHRUST) +#include +#include +#endif +//#endif #include #include #include @@ -106,8 +113,6 @@ namespace stk::search { -constexpr size_t COLLISION_SCALE_FACTOR = 16; - template struct TotalBoundsFunctor { @@ -427,7 +432,21 @@ struct SortByCode SortByCodeIdPair::apply(tree); } else { - Kokkos::Experimental::sort_by_key(execSpace, tree.m_leafCodes, tree.m_leafIds); +//#if KOKKOS_VERSION >= 40300 +// Kokkos::Experimental::sort_by_key(execSpace, tree.m_leafCodes, tree.m_leafIds); +//#elif defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCTHRUST) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCTHRUST) + const int n = tree.m_leafIds.extent(0); + + morton_code_t *rawLeafCodes = tree.m_leafCodes.data(); + thrust::device_ptr rawLeafCodesThr = thrust::device_pointer_cast(rawLeafCodes); + LocalOrdinal *rawLeafIds = tree.m_leafIds.data(); + thrust::device_ptr rawLeafIdsThr = thrust::device_pointer_cast(rawLeafIds); + //thrust::stable_sort_by_key(rawLeafCodesThr, rawLeafCodesThr + n, rawLeafIdsThr); + thrust::sort_by_key(rawLeafCodesThr, rawLeafCodesThr + n, rawLeafIdsThr); +#else + STK_ThrowErrorMsg("shouldn't be able to get here"); // SortByCodeIdPair::apply(tree); +#endif } } }; @@ -692,10 +711,42 @@ void UpdateInteriorNodeBVs::get_box(RealType bvMinMax[ } -template +template +class CollisionListCallback +{ + public: + CollisionListCallback(CollisionList& collisionList) : + m_collisionList(collisionList) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(int domainIdx, int rangeIdx) const + { + m_collisionList.push_back(domainIdx, rangeIdx); + } + + bool resize_for_second_pass() + { + int numActualCollisions = m_collisionList.get_num_collisions(); + bool needSecondPass = numActualCollisions > m_collisionList.get_capacity(); + if (needSecondPass) + { + m_collisionList.reset(numActualCollisions); + } + + return needSecondPass; + } + + CollisionList get_collision_list() const { return m_collisionList; } + + private: + CollisionList m_collisionList; +}; + + +template struct Traverse_MASTB_BVH_Functor { - using value_type = int; using LBVH_types = MortonLbvhTypes; using kokkos_aabb_types = MortonAabbTypes; using local_ordinals_tmt = typename LBVH_types::local_ordinals_tmt; @@ -706,20 +757,11 @@ struct Traverse_MASTB_BVH_Functor Traverse_MASTB_BVH_Functor(bboxes_3d_view_t domainMinMaxs, local_ordinals_tmt domainIds, const MortonAabbTree &rangeTree, - collision_list_type &collisions, + Callback& callback, bool flippedResults = false); KOKKOS_INLINE_FUNCTION - void init(value_type &update) const { update = 0; } - - static void apply_tree(const MortonAabbTree &domainTree, - const MortonAabbTree &rangeTree, - collision_list_type &collisions, - ExecutionSpace const& execSpace, - bool flipOutputPairs = false); - - KOKKOS_INLINE_FUNCTION - void operator()(unsigned domainIdx, value_type &update) const; + void operator()(unsigned domainIdx) const; KOKKOS_FORCEINLINE_FUNCTION bool overlaps_range(RealType bvMinMax[6], LocalOrdinal rangeIdx) const; @@ -730,11 +772,16 @@ struct Traverse_MASTB_BVH_Functor KOKKOS_FORCEINLINE_FUNCTION void get_box(RealType bvMinMax[6], LocalOrdinal idx, const bboxes_const_3d_view_t &boxMinMaxs) const; - KOKKOS_INLINE_FUNCTION - void join(value_type &update, const value_type &input) const { update = (input < update ? input : update); } - std::ostream &stream_pair(LocalOrdinal domainIdx, bool overlap, LocalOrdinal rangeIdx, std::ostream &os) const; + KOKKOS_INLINE_FUNCTION + void record_result(LocalOrdinal domainIdx, LocalOrdinal rangeIdx, bool flip) const + { + LocalOrdinal domainIdxFlipped = flip ? rangeIdx : domainIdx; + LocalOrdinal rangeIdxFlipped = flip ? domainIdx : rangeIdx; + m_callback(domainIdxFlipped, rangeIdxFlipped); + } + bboxes_const_3d_view_t m_domainMinMaxs; typename LBVH_types::local_ordinals_tmt tm_domainIds; @@ -744,15 +791,15 @@ struct Traverse_MASTB_BVH_Functor typename LBVH_types::local_ordinals_tmt tm_rangeLeafIds; const bool m_flippedResults; - collision_list_type m_results; + Callback m_callback; }; -template -Traverse_MASTB_BVH_Functor::Traverse_MASTB_BVH_Functor( +template +Traverse_MASTB_BVH_Functor::Traverse_MASTB_BVH_Functor( bboxes_3d_view_t domainMinMaxs, local_ordinals_tmt domainIds, const MortonAabbTree &rangeTree, - collision_list_type &collisions, + Callback& callback, bool flippedResults) : m_domainMinMaxs(domainMinMaxs), tm_domainIds(domainIds), @@ -761,49 +808,42 @@ Traverse_MASTB_BVH_Functor::Traverse_MASTB_BVH_Functor tm_rangeNodeChildren(rangeTree.m_nodeChildren), tm_rangeLeafIds(rangeTree.m_leafIds), m_flippedResults(flippedResults), - m_results(collisions) + m_callback(callback) {} -template -void Traverse_MASTB_BVH_Functor::apply_tree( + +template +void search_tree( const MortonAabbTree &domainTree, const MortonAabbTree &rangeTree, - collision_list_type &collisions, + Callback& callback, ExecutionSpace const& execSpace, - bool flipOutputPairs) + bool flipOutputPairs = false) { + Kokkos::Profiling::pushRegion("search_tree"); if ((domainTree.hm_numLeaves() == 0) || (rangeTree.hm_numLeaves() == 0)) { + callback.resize_for_second_pass(); return; } - int retCode = 0; - const int numDomainLeaves = domainTree.hm_numLeaves(); - const int numRangeLeaves = rangeTree.hm_numLeaves(); + const Traverse_MASTB_BVH_Functor op(domainTree.m_minMaxs, domainTree.m_leafIds, rangeTree, + callback, flipOutputPairs); + auto policy = Kokkos::RangePolicy(execSpace, 0, domainTree.hm_numLeaves()); + Kokkos::parallel_for("Traverse_MASTB_BVH_Functor", policy, op); + execSpace.fence(); - if (collisions.get_capacity() == 0) { - const int collisionEstimate = std::max(numDomainLeaves, numRangeLeaves) * COLLISION_SCALE_FACTOR; - collisions.reset(collisionEstimate); + if (callback.resize_for_second_pass()) { + const Traverse_MASTB_BVH_Functor op2(domainTree.m_minMaxs, domainTree.m_leafIds, rangeTree, + callback, flipOutputPairs); + Kokkos::parallel_for("Traverse_MASTB_BVH_Functor - pass2", policy, op2); } - const Traverse_MASTB_BVH_Functor op(domainTree.m_minMaxs, domainTree.m_leafIds, rangeTree, - collisions, flipOutputPairs); - auto policy = Kokkos::RangePolicy(execSpace, 0, numDomainLeaves); - Kokkos::parallel_reduce(policy, op, retCode); - - int numActualCollisions = collisions.get_num_collisions(); - - if ((retCode < 0) && (numActualCollisions > collisions.get_capacity())) { - collisions.reset(numActualCollisions); - retCode = 0; - const Traverse_MASTB_BVH_Functor op2(domainTree.m_minMaxs, domainTree.m_leafIds, rangeTree, - collisions, flipOutputPairs); - Kokkos::parallel_reduce(policy, op2, retCode); - } + execSpace.fence(); + Kokkos::Profiling::popRegion(); } -template -KOKKOS_INLINE_FUNCTION void Traverse_MASTB_BVH_Functor::operator()(unsigned argDomainIdx, - value_type& update) const +template +KOKKOS_INLINE_FUNCTION void Traverse_MASTB_BVH_Functor::operator()(unsigned argDomainIdx) const { LocalOrdinal domainIdx = tm_domainIds(argDomainIdx); @@ -815,7 +855,6 @@ KOKKOS_INLINE_FUNCTION void Traverse_MASTB_BVH_Functor int* stackPtr = ridxStack; *stackPtr++ = -1; - int result = 0; int nodeIdx = m_rangeRoot; do { // Check each child node for overlap. @@ -829,7 +868,7 @@ KOKKOS_INLINE_FUNCTION void Traverse_MASTB_BVH_Functor // Query overlaps a leaf node => report collision. if (overlapL) { if (is_range_leaf(childL)) { - result = m_results.push_back(domainIdx, childL, m_flippedResults) ? result : -1; + record_result(domainIdx, childL, m_flippedResults); } else { traverseL = true; @@ -840,7 +879,7 @@ KOKKOS_INLINE_FUNCTION void Traverse_MASTB_BVH_Functor // Query overlaps and internal node => traverse. if (overlapR) { if (is_range_leaf(childR)) { - result = m_results.push_back(domainIdx, childR, m_flippedResults) ? result : -1; + record_result(domainIdx, childR, m_flippedResults); if (!traverseL) { nodeIdx = *--stackPtr; // pop } @@ -858,30 +897,20 @@ KOKKOS_INLINE_FUNCTION void Traverse_MASTB_BVH_Functor nodeIdx = *--stackPtr; // pop } } while (nodeIdx >= 0); - - if (result < update) { - update = result; - } } else { // Degenerate case of only one leaf node - int result = 0; bool overlap = overlaps_range(bvMinMax, 0); if (overlap) { - bool ok = m_results.push_back(domainIdx, 0, m_flippedResults); - result = (ok ? result : -1); - } - - if (result < update) { - update = result; + record_result(domainIdx, 0, m_flippedResults); } } } -template +template KOKKOS_FORCEINLINE_FUNCTION -bool Traverse_MASTB_BVH_Functor::overlaps_range(RealType bvMinMax[6], - LocalOrdinal rangeIdx) const +bool Traverse_MASTB_BVH_Functor::overlaps_range(RealType bvMinMax[6], + LocalOrdinal rangeIdx) const { return (bvMinMax[3] < tm_rangeMinMaxs(rangeIdx, 0) || bvMinMax[4] < tm_rangeMinMaxs(rangeIdx, 1) || @@ -891,10 +920,10 @@ bool Traverse_MASTB_BVH_Functor::overlaps_range(RealTy bvMinMax[2] > tm_rangeMinMaxs(rangeIdx, 5)) ? false : true; } -template +template KOKKOS_FORCEINLINE_FUNCTION -void Traverse_MASTB_BVH_Functor::get_box(RealType bvMinMax[6], LocalOrdinal idx, - const bboxes_const_3d_view_t &boxMinMaxs) const +void Traverse_MASTB_BVH_Functor::get_box(RealType bvMinMax[6], LocalOrdinal idx, + const bboxes_const_3d_view_t &boxMinMaxs) const { bvMinMax[0] = boxMinMaxs(idx, 0); bvMinMax[1] = boxMinMaxs(idx, 1); @@ -904,8 +933,8 @@ void Traverse_MASTB_BVH_Functor::get_box(RealType bvMi bvMinMax[5] = boxMinMaxs(idx, 5); } -template -std::ostream &Traverse_MASTB_BVH_Functor::stream_pair(LocalOrdinal domainIdx, bool overlap, +template +std::ostream &Traverse_MASTB_BVH_Functor::stream_pair(LocalOrdinal domainIdx, bool overlap, LocalOrdinal rangeIdx, std::ostream &os) const { os << " {(" << m_domainMinMaxs(domainIdx, 0) << "," << m_domainMinMaxs(domainIdx, 1) << "," << m_domainMinMaxs(domainIdx, 2) diff --git a/packages/stk/stk_search_util/Jamfile b/packages/stk/stk_search_util/Jamfile index 63e5ab0474cf..dd020dab7fd5 100644 --- a/packages/stk/stk_search_util/Jamfile +++ b/packages/stk/stk_search_util/Jamfile @@ -44,7 +44,8 @@ project votd : requirements $(sierra-warnings) $(stk_search_util-root-inc) - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM : usage-requirements $(stk_search_util-root-inc) : build-dir $(stk_search_util-builddir) diff --git a/packages/stk/stk_search_util/stk_search_util/CMakeLists.txt b/packages/stk/stk_search_util/stk_search_util/CMakeLists.txt index b2ce11a12ff0..a1e86155e818 100644 --- a/packages/stk/stk_search_util/stk_search_util/CMakeLists.txt +++ b/packages/stk/stk_search_util/stk_search_util/CMakeLists.txt @@ -60,6 +60,6 @@ INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_search_util) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_search_util DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_search_util EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() diff --git a/packages/stk/stk_simd/Jamfile b/packages/stk/stk_simd/Jamfile index b2e19b1c3138..3d40926ef5b5 100644 --- a/packages/stk/stk_simd/Jamfile +++ b/packages/stk/stk_simd/Jamfile @@ -41,7 +41,8 @@ import path ; project votd : requirements $(sierra-warnings) - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM $(stk_simd-root) STK_VOLATILE_SIMD power:USE_STK_SIMD_NONE diff --git a/packages/stk/stk_simd/stk_simd/CMakeLists.txt b/packages/stk/stk_simd/stk_simd/CMakeLists.txt index 3340cba4841a..a06fed371aad 100644 --- a/packages/stk/stk_simd/stk_simd/CMakeLists.txt +++ b/packages/stk/stk_simd/stk_simd/CMakeLists.txt @@ -52,6 +52,8 @@ ELSE() $ $ ) + INSTALL(TARGETS stk_simd EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) + ENDIF() INSTALL(FILES ${HEADERS} DESTINATION diff --git a/packages/stk/stk_tools/Jamfile b/packages/stk/stk_tools/Jamfile index dd3ac218ccde..cc02b92134a1 100644 --- a/packages/stk/stk_tools/Jamfile +++ b/packages/stk/stk_tools/Jamfile @@ -47,7 +47,8 @@ if $(RTenv-arg) = "user" { project votd : requirements $(sierra-warnings) - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM STK_SHOW_DEPRECATED_WARNINGS STK_HIDE_DEPRECATED_CODE SIERRA_MIGRATION diff --git a/packages/stk/stk_tools/stk_tools/CMakeLists.txt b/packages/stk/stk_tools/stk_tools/CMakeLists.txt index a93b4294ebe0..92669929f25b 100644 --- a/packages/stk/stk_tools/stk_tools/CMakeLists.txt +++ b/packages/stk/stk_tools/stk_tools/CMakeLists.txt @@ -136,11 +136,11 @@ endif() INSTALL(TARGETS stk_block_extractor COMPONENT ${PACKAGE_NAME} RUNTIME DESTINATION ${${PROJECT_NAME}_INSTALL_RUNTIME_DIR}) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_transfer_utils_lib DESTINATION ${STK_INSTALL_LIBDIR}) - INSTALL(TARGETS stk_tools_lib DESTINATION ${STK_INSTALL_LIBDIR}) - INSTALL(TARGETS stk_block_extractor DESTINATION ${STK_INSTALL_BINDIR}) + INSTALL(TARGETS stk_transfer_utils_lib EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_tools_lib EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_block_extractor EXPORT stkTargets DESTINATION ${STK_INSTALL_BINDIR}) if (STK_HAS_SEACAS_NEMESIS) - INSTALL(TARGETS stk_pmesh_lib DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_pmesh_lib EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() endif() diff --git a/packages/stk/stk_tools/stk_tools/block_extractor/ExtractBlocks.cpp b/packages/stk/stk_tools/stk_tools/block_extractor/ExtractBlocks.cpp index 7659394ea6f5..46a5ceaea700 100644 --- a/packages/stk/stk_tools/stk_tools/block_extractor/ExtractBlocks.cpp +++ b/packages/stk/stk_tools/stk_tools/block_extractor/ExtractBlocks.cpp @@ -98,12 +98,7 @@ void extract_blocks_and_ns_from_file(const std::string &inFile, stk::mesh::MeshBuilder builder(comm); builder.set_aura_option(stk::mesh::BulkData::AUTO_AURA); std::shared_ptr inBulk = builder.create(); - stk::mesh::MetaData& inMeta = inBulk->mesh_meta_data(); - inMeta.use_simple_fields(); - std::shared_ptr outBulk = builder.create(); - stk::mesh::MetaData& outMeta = outBulk->mesh_meta_data(); - outMeta.use_simple_fields(); stk::io::StkMeshIoBroker stkInput; stk::io::fill_mesh_preexisting(stkInput, inFile, *inBulk); diff --git a/packages/stk/stk_tools/stk_tools/mesh_clone/MeshClone.cpp b/packages/stk/stk_tools/stk_tools/mesh_clone/MeshClone.cpp index 0e98b834f135..0d64f4c3d2dd 100644 --- a/packages/stk/stk_tools/stk_tools/mesh_clone/MeshClone.cpp +++ b/packages/stk/stk_tools/stk_tools/mesh_clone/MeshClone.cpp @@ -143,9 +143,12 @@ void copy_meta(const stk::mesh::MetaData &inputMeta, stk::mesh::MetaData &output // Query the coordinate field, to figure out the final name (if none set by the user) inputMeta.coordinate_field(); - outputMeta.initialize(inputMeta.spatial_dimension(), - inputMeta.entity_rank_names(), - inputMeta.coordinate_field_name()); + if (!outputMeta.is_initialized()){ + outputMeta.initialize(inputMeta.spatial_dimension(), + inputMeta.entity_rank_names(), + inputMeta.coordinate_field_name()); + } + copy_parts(inputMeta, outputMeta); copy_fields(inputMeta, outputMeta); copy_surface_to_block_mapping(inputMeta, outputMeta); diff --git a/packages/stk/stk_tools/stk_tools/mesh_clone/MeshClone.hpp b/packages/stk/stk_tools/stk_tools/mesh_clone/MeshClone.hpp index 4fdf10591456..136204131811 100644 --- a/packages/stk/stk_tools/stk_tools/mesh_clone/MeshClone.hpp +++ b/packages/stk/stk_tools/stk_tools/mesh_clone/MeshClone.hpp @@ -45,9 +45,11 @@ namespace tools { void copy_meta_with_io_attributes(const stk::mesh::MetaData &inputMeta, stk::mesh::MetaData &outputMeta); void copy_bulk(const stk::mesh::BulkData &inputBulk, stk::mesh::Selector selector, stk::mesh::BulkData &outputBulk); - +void copy_fields(const stk::mesh::MetaData &oldMeta, stk::mesh::MetaData &newMeta); void copy_mesh(const stk::mesh::BulkData &inputBulk, stk::mesh::Selector selector, stk::mesh::BulkData &outputBulk); +void copy_meta(const stk::mesh::MetaData &inputMeta, stk::mesh::MetaData &outputMeta); + } } diff --git a/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocks.cpp b/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocks.cpp index 73fd8e531503..b135928869ee 100644 --- a/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocks.cpp +++ b/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocks.cpp @@ -90,22 +90,6 @@ void disconnect_user_blocks_locally(stk::mesh::BulkData& bulk, const BlockPairVe disconnect_and_reconnect_blocks(bulk, sortedBlocksToDisconnect, blockPairsToReconnect, info); } -void disconnect_user_blocks_globally(stk::mesh::BulkData& bulk, const BlockPairVector& blocksToDisconnect, - LinkInfo& info) -{ - info.startTime = stk::wall_time(); - - stk::mesh::PartVector allBlocksInMesh; - BlockPairVector orderedBlockPairsInMesh; - BlockPairVector blockPairsToReconnect; - - get_all_blocks_in_mesh(bulk, allBlocksInMesh); - fill_ordered_block_pairs(allBlocksInMesh, orderedBlockPairsInMesh); - populate_blocks_to_reconnect(bulk, orderedBlockPairsInMesh, blocksToDisconnect, blockPairsToReconnect); - - disconnect_and_reconnect_blocks(bulk, orderedBlockPairsInMesh, blockPairsToReconnect, info); -} - void snip_hinges(stk::mesh::BulkData& bulk, impl::HingeNodeVector& preservedHingeNodes, const BlockPairVector& blocksToDisconnect, LinkInfo& info) { stk::mesh::EntityVector affectedNodes = get_affected_nodes(bulk, blocksToDisconnect); @@ -161,26 +145,30 @@ void disconnect_all_blocks(stk::mesh::BulkData & bulk, impl::LinkInfo& info, boo } void disconnect_user_blocks(stk::mesh::BulkData& bulk, const BlockPairVector& blocksToDisconnect, - DisconnectBlocksOption options) + SnipOption snipOption) { impl::LinkInfo info; - info.preserveOrphans = true; + info.preserveOrphans = (snipOption == PRESERVE_INITIAL_HINGES) ? true : false; impl::HingeNodeVector preservedHingeNodes; - if(options.snipOption == PRESERVE_INITIAL_HINGES) { + if(info.preserveOrphans) { impl::populate_hinge_node_list(bulk, blocksToDisconnect, preservedHingeNodes); } - if(options.disconnectOption == DISCONNECT_GLOBAL) { - impl::disconnect_user_blocks_globally(bulk, blocksToDisconnect, info); - } else { - impl::disconnect_user_blocks_locally(bulk, blocksToDisconnect, info); - } + impl::disconnect_user_blocks_locally(bulk, blocksToDisconnect, info); impl::snip_hinges(bulk, preservedHingeNodes, blocksToDisconnect, info); // impl::print_timings(bulk, info); } +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after Sep 2024 +void disconnect_user_blocks(stk::mesh::BulkData& bulk, const BlockPairVector& blockPairsToDisconnect, + DisconnectBlocksOption options) +{ + disconnect_user_blocks(bulk, blockPairsToDisconnect, options.snipOption); +} +#endif + } } diff --git a/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocks.hpp b/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocks.hpp index 72b5e2a3bdaf..60dbf1671d36 100644 --- a/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocks.hpp +++ b/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocks.hpp @@ -41,17 +41,22 @@ namespace stk { namespace mesh { class BulkData; } } namespace stk { namespace tools { -enum DisconnectOption { - DISCONNECT_GLOBAL, - DISCONNECT_LOCAL -}; - enum SnipOption { PRESERVE_INITIAL_HINGES, SNIP_ALL_HINGES }; -struct DisconnectBlocksOption { +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after Sep 2024 +enum STK_DEPRECATED +DisconnectOption +{ + DISCONNECT_GLOBAL, + DISCONNECT_LOCAL +}; + +struct STK_DEPRECATED +DisconnectBlocksOption +{ DisconnectBlocksOption() : disconnectOption(DISCONNECT_GLOBAL), snipOption(PRESERVE_INITIAL_HINGES) @@ -66,10 +71,16 @@ struct DisconnectBlocksOption { SnipOption snipOption; }; +STK_DEPRECATED +void disconnect_user_blocks(stk::mesh::BulkData& bulk, const BlockPairVector& blockPairsToDisconnect, + DisconnectBlocksOption options = DisconnectBlocksOption()); +#endif + void disconnect_all_blocks(stk::mesh::BulkData& bulk, bool preserveOrphans = false); void disconnect_all_blocks(stk::mesh::BulkData & bulk, impl::LinkInfo& info, bool preserveOrphans = false); void disconnect_user_blocks(stk::mesh::BulkData& bulk, const BlockPairVector& blockPairsToDisconnect, - DisconnectBlocksOption options = DisconnectBlocksOption()); + SnipOption snipOption = PRESERVE_INITIAL_HINGES); + } } diff --git a/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocksImpl.cpp b/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocksImpl.cpp index aa20d13a9f50..da12ddfc1838 100644 --- a/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocksImpl.cpp +++ b/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocksImpl.cpp @@ -1,5 +1,6 @@ #include "stk_io/IossBridge.hpp" #include "stk_mesh/base/BulkData.hpp" +#include "stk_mesh/base/DestroyRelations.hpp" #include "stk_mesh/base/FEMHelpers.hpp" #include "stk_mesh/base/GetEntities.hpp" #include "stk_mesh/base/Types.hpp" @@ -344,8 +345,9 @@ bool update_disconnected_entity_relation(stk::mesh::BulkData& bulk, stk::mesh::E stk::mesh::ConnectivityOrdinal const * nodeOrdinals = bulk.begin_ordinals(entity, stk::topology::NODE_RANK); for (unsigned iNode = 0; iNode < numNodes; ++iNode) { if (entityNodes[iNode] == node) { - bulk.destroy_relation(entity, node, nodeOrdinals[iNode]); - bulk.declare_relation(entity, newNode, nodeOrdinals[iNode]); + stk::mesh::ConnectivityOrdinal nodeOrd = nodeOrdinals[iNode]; + bulk.destroy_relation(entity, node, nodeOrd); + bulk.declare_relation(entity, newNode, nodeOrd); updatedNode = true; } } @@ -506,19 +508,8 @@ void connect_element_side_to_internal_face(stk::mesh::BulkData& bulk, stk::mesh::ConstPartVector& addSurfaces, stk::mesh::ConstPartVector& removeSurfaces) { - auto numElems = bulk.num_elements(face); - auto elems = bulk.begin_elements(face); - auto elemOrdinals = bulk.begin_ordinals(face, stk::topology::ELEM_RANK); - for(unsigned i=0; i= 0; --i) { stk::mesh::Entity elem = elems[i]; - if(bulk.bucket(elem).owned()) { - numLocallyOwnedElems++; + if(!bulk.bucket(elem).owned()) { + bulk.destroy_entity(elem); + ++numDestroyedElems; } } - if(numLocallyOwnedElems == 0) { - for(unsigned j = 0; j < numElems; j++) { - stk::mesh::Entity elem = elems[j]; - bulk.destroy_entity(elem); - } + if(numDestroyedElems == numElems) { bulk.destroy_entity(node); } } diff --git a/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocksImpl.hpp b/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocksImpl.hpp index 9060775f246f..2a1970a019ac 100644 --- a/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocksImpl.hpp +++ b/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocksImpl.hpp @@ -178,32 +178,32 @@ struct InternalFaceInfo operator stk::mesh::Entity() const { return internalFace; } - bool operator<(const InternalFaceInfo &rhs) + bool operator<(const InternalFaceInfo &rhs) const { return internalFace < rhs.internalFace; }; - bool operator<(const stk::mesh::Entity &rhs) + bool operator<(const stk::mesh::Entity &rhs) const { return internalFace < rhs; }; - bool operator==(const InternalFaceInfo &rhs) + bool operator==(const InternalFaceInfo &rhs) const { return internalFace == rhs.internalFace; }; - bool operator==(const stk::mesh::Entity &rhs) + bool operator==(const stk::mesh::Entity &rhs) const { return internalFace == rhs; }; - bool operator!=(const InternalFaceInfo &rhs) + bool operator!=(const InternalFaceInfo &rhs) const { return internalFace != rhs.internalFace; }; - bool operator!=(const stk::mesh::Entity &rhs) + bool operator!=(const stk::mesh::Entity &rhs) const { return internalFace != rhs; }; diff --git a/packages/stk/stk_tools/stk_tools/pmesh_lib/UnitTest/UnitTestPmesh.cpp b/packages/stk/stk_tools/stk_tools/pmesh_lib/UnitTest/UnitTestPmesh.cpp index 061b067ae4b2..fde76f067341 100644 --- a/packages/stk/stk_tools/stk_tools/pmesh_lib/UnitTest/UnitTestPmesh.cpp +++ b/packages/stk/stk_tools/stk_tools/pmesh_lib/UnitTest/UnitTestPmesh.cpp @@ -34,7 +34,6 @@ TEST(PMESH, a) std::shared_ptr bulk = stk::mesh::MeshBuilder(comm).create(); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::io::fill_mesh("cube.exo.1.0", *bulk); size_t numElements = stk::mesh::count_selected_entities(meta.locally_owned_part(), bulk->buckets(stk::topology::ELEM_RANK)); diff --git a/packages/stk/stk_tools/stk_tools/pmesh_lib/makeparfiles.cpp b/packages/stk/stk_tools/stk_tools/pmesh_lib/makeparfiles.cpp index 0fa24c4698dc..f4743abebe90 100644 --- a/packages/stk/stk_tools/stk_tools/pmesh_lib/makeparfiles.cpp +++ b/packages/stk/stk_tools/stk_tools/pmesh_lib/makeparfiles.cpp @@ -386,13 +386,13 @@ void MakeParFile(const int& my_proc_id, const int& num_procs, const int& ncuts_x num_nodes_globalx, num_nodes_globaly, num_nodes_globalz); // Write element block attributes. Only 1 block here. - ierr = ex_put_elem_block(exoid, block_id, elem_type, num_elem, num_nodes_per_elem, num_attr); + ierr = ex_put_block(exoid, EX_ELEM_BLOCK, block_id, elem_type, num_elem, num_nodes_per_elem, 0, 0, num_attr); assert ( ierr>=0 ); // Write element block connectivity - ierr = ex_put_elem_conn(exoid, block_id, conn); + ierr = ex_put_conn(exoid, EX_ELEM_BLOCK, block_id, conn, nullptr, nullptr); assert ( ierr>=0 ); @@ -403,10 +403,10 @@ void MakeParFile(const int& my_proc_id, const int& num_procs, const int& ncuts_x // Write proc_id as element variable - ierr = ex_put_var_param(exoid, "e", 1); + ierr = ex_put_variable_param(exoid, EX_ELEM_BLOCK, 1); assert ( ierr >= 0 ); - ierr = ex_put_var_name(exoid, "e", 1, "Domain #"); + ierr = ex_put_variable_name(exoid, EX_ELEM_BLOCK, 1, "Domain #"); assert ( ierr >= 0 ); double time_value = 1.0; @@ -425,7 +425,7 @@ void MakeParFile(const int& my_proc_id, const int& num_procs, const int& ncuts_x values[ii] = colorVal; } - ierr = ex_put_elem_var(exoid, 1, 1, 1, num_elem, values); + ierr = ex_put_var(exoid, 1, EX_ELEM_BLOCK, 1, 1, num_elem, values); assert ( ierr >= 0 ); delete[] values; values = 0; @@ -922,7 +922,7 @@ void WriteNodeElemMaps(int num_nodes, int nelem_per_edge, int num_elem, } assert( counter == num_nodes); - int ierr = ex_put_node_num_map(exoid, nodemap); + int ierr = ex_put_id_map(exoid, EX_NODE_MAP, nodemap); assert(ierr>=0); (void)(ierr); delete[] nodemap; @@ -945,7 +945,7 @@ void WriteNodeElemMaps(int num_nodes, int nelem_per_edge, int num_elem, assert( counter == num_elem); - ierr = ex_put_elem_num_map(exoid, elemmap); + ierr = ex_put_id_map(exoid, EX_ELEM_MAP, elemmap); assert( ierr>=0); @@ -965,30 +965,30 @@ void WriteNodesets(int xc, int exoid, int num_nodes_in_set, int ierr = 0; if(xc == 0) // left_face { - ierr = ex_put_node_set_param(exoid, 1, num_nodes_in_set, num_nodes_in_set); + ierr = ex_put_set_param(exoid, EX_NODE_SET, 1, num_nodes_in_set, num_nodes_in_set); assert( ierr >= 0); - ierr = ex_put_node_set(exoid, 1, nodelist1); + ierr = ex_put_set(exoid, EX_NODE_SET, 1, nodelist1, nullptr); assert( ierr>= 0); - ierr = ex_put_node_set_dist_fact(exoid, 1, df_ns); + ierr = ex_put_set_dist_fact(exoid, EX_NODE_SET, 1, df_ns); assert( ierr>= 0); } else // if no nodeset on this subdomain { - ierr = ex_put_node_set_param(exoid, 1, 0, 0); + ierr = ex_put_set_param(exoid, EX_NODE_SET, 1, 0, 0); assert( ierr >= 0); } if(xc == ncuts_x - 1) // right_face { - ierr = ex_put_node_set_param(exoid, 2, num_nodes_in_set, num_nodes_in_set); + ierr = ex_put_set_param(exoid, EX_NODE_SET, 2, num_nodes_in_set, num_nodes_in_set); assert( ierr >= 0); - ierr = ex_put_node_set(exoid, 2, nodelist2); + ierr = ex_put_set(exoid, EX_NODE_SET, 2, nodelist2, nullptr); assert( ierr >= 0); - ierr = ex_put_node_set_dist_fact(exoid, 2, df_ns); + ierr = ex_put_set_dist_fact(exoid, EX_NODE_SET, 2, df_ns); assert( ierr >= 0); } else // if no nodeset on this subdomain { - ierr = ex_put_node_set_param(exoid, 2, 0, 0); + ierr = ex_put_set_param(exoid, EX_NODE_SET, 2, 0, 0); if (ierr < 0) { assert( ierr >= 0); } diff --git a/packages/stk/stk_topology/stk_topology/CMakeLists.txt b/packages/stk/stk_topology/stk_topology/CMakeLists.txt index feea7dc428ae..3e2d5bd2ff88 100644 --- a/packages/stk/stk_topology/stk_topology/CMakeLists.txt +++ b/packages/stk/stk_topology/stk_topology/CMakeLists.txt @@ -64,5 +64,5 @@ INSTALL(FILES ${DETAIL_HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_topology/topology_detail) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_topology DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_topology EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() diff --git a/packages/stk/stk_transfer/Jamfile b/packages/stk/stk_transfer/Jamfile index c6953a181144..e97893c8d06b 100644 --- a/packages/stk/stk_transfer/Jamfile +++ b/packages/stk/stk_transfer/Jamfile @@ -43,7 +43,8 @@ if $(RTenv-arg) = "user" { project votd : requirements $(sierra-warnings) - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM $(stk_transfer-root-inc) : usage-requirements $(stk_transfer-root-inc) diff --git a/packages/stk/stk_transfer/stk_transfer/CMakeLists.txt b/packages/stk/stk_transfer/stk_transfer/CMakeLists.txt index e4809a4297f7..593d015217cd 100644 --- a/packages/stk/stk_transfer/stk_transfer/CMakeLists.txt +++ b/packages/stk/stk_transfer/stk_transfer/CMakeLists.txt @@ -30,6 +30,7 @@ else() if (STK_ENABLE_STKMiddle_mesh) target_link_libraries(stk_transfer PUBLIC stk_middle_mesh) endif() + INSTALL(TARGETS stk_transfer EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() target_include_directories(stk_transfer PUBLIC diff --git a/packages/stk/stk_transfer_util/Jamfile b/packages/stk/stk_transfer_util/Jamfile index 702b38d321ef..f5da80017268 100644 --- a/packages/stk/stk_transfer_util/Jamfile +++ b/packages/stk/stk_transfer_util/Jamfile @@ -44,7 +44,8 @@ project votd : requirements $(sierra-warnings) $(stk_transfer_util-root-inc) - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM : usage-requirements $(stk_transfer_util-root-inc) : build-dir $(stk_transfer_util-builddir) diff --git a/packages/stk/stk_transfer_util/stk_transfer_util/CMakeLists.txt b/packages/stk/stk_transfer_util/stk_transfer_util/CMakeLists.txt index e3cd4b9a6654..c9fb864f034d 100644 --- a/packages/stk/stk_transfer_util/stk_transfer_util/CMakeLists.txt +++ b/packages/stk/stk_transfer_util/stk_transfer_util/CMakeLists.txt @@ -51,23 +51,22 @@ else() target_link_libraries(stk_transfer_util PUBLIC stk_search_util) target_link_libraries(stk_transfer_util PUBLIC stk_util_env) - if(USE_SIERRA_BLAS_LAPACK) - target_link_libraries(stk_transfer_util PUBLIC sierra_blas_lapack) + if(STK_BUILT_FOR_SIERRA) + find_package(SierraLapack REQUIRED) + target_link_libraries(stk_util_util PUBLIC SierraLapack::sierra_blas_lapack) else() - find_package(LAPACK REQUIRED) - target_link_libraries(stk_transfer_util PUBLIC LAPACK::LAPACK) + find_package(BLAS REQUIRED) + target_link_libraries(stk_util_util PUBLIC BLAS::BLAS) endif() target_include_directories(stk_transfer_util PUBLIC $ $ ) + INSTALL(TARGETS stk_transfer_util EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_transfer_util) - -if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_transfer_util DESTINATION ${STK_INSTALL_LIBDIR}) -endif() + diff --git a/packages/stk/stk_transfer_util/stk_transfer_util/Patch.hpp b/packages/stk/stk_transfer_util/stk_transfer_util/Patch.hpp index 3c2d938c4834..4d4e5648f7bf 100644 --- a/packages/stk/stk_transfer_util/stk_transfer_util/Patch.hpp +++ b/packages/stk/stk_transfer_util/stk_transfer_util/Patch.hpp @@ -16,6 +16,7 @@ #include // for lower_bound #include #include +#include #include #include // for vector, vector<>::iterator, etc diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/BuildMesh.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/BuildMesh.hpp index 6de8d3d9550f..b1e71a6337ac 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/BuildMesh.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/BuildMesh.hpp @@ -46,7 +46,6 @@ std::shared_ptr build_mesh(stk::ParallelMachine comm, stk::mesh::MeshBuilder builder(comm); builder.set_aura_option(auraOption); std::shared_ptr bulk = builder.create(); - bulk->mesh_meta_data().use_simple_fields(); return bulk; } @@ -59,7 +58,6 @@ std::shared_ptr build_mesh(unsigned spatialDim, builder.set_spatial_dimension(spatialDim); builder.set_aura_option(auraOption); std::shared_ptr bulk = builder.create(); - bulk->mesh_meta_data().use_simple_fields(); return bulk; } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/BulkDataTester.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/BulkDataTester.hpp index 897f9827b773..074e44266c14 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/BulkDataTester.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/BulkDataTester.hpp @@ -43,6 +43,7 @@ #include // for MeshIndex, EntityRank, etc #include // for BucketRepository #include +#include #include #include @@ -143,7 +144,7 @@ class BulkDataTester : public stk::mesh::BulkData void my_internal_change_entity_owner( const std::vector & arg_change, bool regenerate_aura = true, stk::mesh::ModEndOptimizationFlag mod_optimization = stk::mesh::ModEndOptimizationFlag::MOD_END_SORT ) { - this->internal_change_entity_owner(arg_change,mod_optimization); + this->m_meshModification.internal_change_entity_owner(arg_change,mod_optimization); } stk::mesh::Entity my_generate_new_entity(unsigned preferred_offset = 0) @@ -228,7 +229,7 @@ class BulkDataTester : public stk::mesh::BulkData bool my_comm_mesh_verify_parallel_consistency(std::ostream & error_log) { - return comm_mesh_verify_parallel_consistency(error_log); + return mesh::impl::comm_mesh_verify_parallel_consistency(*this, internal_comm_db(), internal_comm_list(), [&](stk::mesh::Entity entity){return internal_entity_comm_map(entity);}, error_log); } void my_internal_resolve_shared_modify_delete() diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/CMakeLists.txt b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/CMakeLists.txt index a2383f7ebf31..fee3d39136c0 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/CMakeLists.txt +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/CMakeLists.txt @@ -11,6 +11,7 @@ SET(HEADERS_NEED_IO ElemGraphMultipleSharedSidesUtils.hpp BulkDataTester.hpp FaceCreationTestUtils.hpp + FaceTestingUtils.hpp GeneratedMeshToFile.hpp ReadWriteSidesetTester.hpp StkMeshFromGeneratedMesh.hpp @@ -47,6 +48,7 @@ SET(SOURCES_NEED_IO SET(HEADERS_NEED_MESH_BUT_NOT_IO BuildMesh.hpp BucketTester.hpp + FieldEvaluator.hpp MeshFixture.hpp MeshFileFixture.hpp MeshUtilsForBoundingVolumes.hpp @@ -72,6 +74,7 @@ SET(SOURCES_NEED_MPI SET(HEADERS + algorithmTimer.hpp AssemblyUtils.hpp CommandLineArgs.hpp getOption.h @@ -108,9 +111,6 @@ ENDIF() IF(${PROJECT_NAME}_ENABLE_STKTransfer AND ${PROJECT_NAME}_ENABLE_STKMiddle_mesh) ADD_SUBDIRECTORY(stk_transfer_fixtures) ENDIF() - -INSTALL(FILES ${HEADERS} DESTINATION - ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_unit_test_utils/) if(HAVE_STK_Trilinos) TRIBITS_ADD_LIBRARY(stk_unit_test_utils @@ -119,6 +119,8 @@ if(HAVE_STK_Trilinos) ${Gtest_NO_INSTALL_LIB_OR_HEADERS_ARG} ) else() + LIST(APPEND HEADERS ConservativeTransferUserExample.hpp) + add_library(stk_unit_test_utils ${SOURCES}) if(STK_ENABLE_STKMesh) @@ -137,8 +139,13 @@ else() find_package(GTest REQUIRED) target_link_libraries(stk_unit_test_utils PUBLIC GTest::gtest) + + INSTALL(TARGETS stk_unit_test_utils EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() +INSTALL(FILES ${HEADERS} DESTINATION + ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_unit_test_utils/) + target_include_directories(stk_unit_test_utils PUBLIC $ $ @@ -169,6 +176,8 @@ if (STK_ENABLE_UnitMain) target_link_libraries(stk_unit_main PUBLIC stk_util_env) target_link_libraries(stk_unit_main PUBLIC stk_util_parallel) target_link_libraries(stk_unit_main PUBLIC Kokkos::kokkos) + + INSTALL(TARGETS stk_unit_main EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() endif() diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ConstructedMesh.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ConstructedMesh.cpp index a62946f8c4a6..e7789cd3fca8 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ConstructedMesh.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ConstructedMesh.cpp @@ -34,7 +34,10 @@ namespace stk { namespace unit_test_util { -void ConstructedMesh::create_block_elements_and_nodes(stk::mesh::BulkData& bulk, const ConstructedElementBlock& block, const unsigned elemIdOffset) + +void ConstructedMesh::create_block_elements_and_nodes(stk::mesh::BulkData& bulk, + const ConstructedElementBlock& block, + const unsigned elemIdOffset) { stk::mesh::Part* part = bulk.mesh_meta_data().get_part(block.name); STK_ThrowRequire(nullptr != part); @@ -75,9 +78,9 @@ void ConstructedMesh::populate_bulk_data(stk::mesh::BulkData& bulk) meta.set_part_id(block, elemBlock.id); } - stk::mesh::Field & coordsField = - stk::mesh::legacy::declare_field>(meta, stk::topology::NODE_RANK, "coordinates", 1); + stk::mesh::Field & coordsField = meta.declare_field(stk::topology::NODE_RANK, "coordinates", 1); stk::mesh::put_field_on_mesh(coordsField, meta.universal_part(), m_spatialDimension, nullptr); + stk::io::set_field_output_type(coordsField, stk::io::FieldOutputType::VECTOR_3D); bulk.modification_begin(); if(bulk.parallel_rank() == 0) { diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ConstructedMesh.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ConstructedMesh.hpp index 9dc4db95cc6c..60f53458223a 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ConstructedMesh.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ConstructedMesh.hpp @@ -54,58 +54,59 @@ namespace stk { namespace unit_test_util { - struct ConstructedElementBlock + +struct ConstructedElementBlock +{ + ConstructedElementBlock() + : topology(stk::topology::INVALID_TOPOLOGY), + name(""), + id(-1) + { } + + ConstructedElementBlock(stk::topology topology_, const std::string& name_, int id_) + : topology(topology_), + name(name_), + id(id_) + { } + + ConstructedElementBlock(stk::topology topology_, const std::string& name_, int id_, const std::vector< std::vector >& connectivityIndex_) + : topology(topology_), + name(name_), + id(id_), + connectivityIndex(connectivityIndex_) + { } + + ConstructedElementBlock(const ConstructedElementBlock& block) + : topology(block.topology), + name(block.name), + id(block.id), + connectivityIndex(block.connectivityIndex) + { } + + void add_connectivity(const std::vector& connectivity) { - ConstructedElementBlock() - : topology(stk::topology::INVALID_TOPOLOGY) - , name("") - , id(-1) - { } - - ConstructedElementBlock(stk::topology topology_, const std::string& name_, int id_) - : topology(topology_) - , name(name_) - , id(id_) - { } - - ConstructedElementBlock(stk::topology topology_, const std::string& name_, int id_, const std::vector< std::vector >& connectivityIndex_) - : topology(topology_) - , name(name_) - , id(id_) - , connectivityIndex(connectivityIndex_) - { } - - ConstructedElementBlock(const ConstructedElementBlock& block) - : topology(block.topology) - , name(block.name) - , id(block.id) - , connectivityIndex(block.connectivityIndex) - { } - - void add_connectivity(const std::vector& connectivity) - { - ASSERT_EQ(topology.num_nodes(), connectivity.size()); - connectivityIndex.push_back(connectivity); - } + ASSERT_EQ(topology.num_nodes(), connectivity.size()); + connectivityIndex.push_back(connectivity); + } - void set_connectivity(const std::vector< std::vector >& connectivities) - { - for(const std::vector& connectivity : connectivities) { - add_connectivity(connectivity); - } + void set_connectivity(const std::vector< std::vector >& connectivities) + { + for(const std::vector& connectivity : connectivities) { + add_connectivity(connectivity); } + } - stk::topology topology; - std::string name; - int id; - std::vector< std::vector > connectivityIndex; - }; + stk::topology topology; + std::string name; + int id; + std::vector< std::vector > connectivityIndex; +}; class ConstructedMesh { public: ConstructedMesh(unsigned spatialDimension) - : m_spatialDimension(spatialDimension) + : m_spatialDimension(spatialDimension) { } void set_x_coordinates(const std::vector< double >& xCoordinates) diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ElemGraphMultipleSharedSidesUtils.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ElemGraphMultipleSharedSidesUtils.hpp index d83cd4f77943..2957958ca339 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ElemGraphMultipleSharedSidesUtils.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ElemGraphMultipleSharedSidesUtils.hpp @@ -70,17 +70,19 @@ class TwoElemMultipleSharedSideTester : public ::testing::Test const std::vector nodeIDs[2], const std::vector &sharedNodeIds, stk::mesh::BulkData::AutomaticAuraOption auraOption) - : bulkDataPtr(stk::unit_test_util::build_mesh_no_simple_fields(spatialDim, MPI_COMM_WORLD, auraOption)), + : bulkDataPtr(stk::unit_test_util::build_mesh(spatialDim, MPI_COMM_WORLD, auraOption)), bulkData(*bulkDataPtr), meta(bulkData.mesh_meta_data()), skinPart(meta.declare_part_with_topology("skin", get_side_topology())), block1(meta.declare_part_with_topology("block_1", get_element_topology())), - activePart(meta.declare_part("active")), - coordField(stk::mesh::legacy::declare_field>(meta, stk::topology::NODE_RANK, "coordinates")) + activePart(meta.declare_part("active")) { + coordField = &meta.declare_field(stk::topology::NODE_RANK, "coordinates"); + if (bulkData.parallel_size() <= 2) { - stk::mesh::put_field_on_mesh(coordField, meta.universal_part(), nullptr); + stk::mesh::put_field_on_mesh(*coordField, meta.universal_part(), meta.spatial_dimension(), nullptr); + stk::io::set_field_output_type(*coordField, stk::io::FieldOutputType::VECTOR_3D); make_mesh_2_elems_connected_through_multiple_sides(nodeIDs, sharedNodeIds); } } @@ -147,7 +149,7 @@ class TwoElemMultipleSharedSideTester : public ::testing::Test stk::mesh::Part& skinPart; stk::mesh::Part& block1; stk::mesh::Part& activePart; - stk::mesh::Field &coordField; + stk::mesh::Field *coordField; private: }; @@ -182,10 +184,10 @@ class TwoElemTwoSharedSideTester : public TwoElemMultipleSharedSideTester stk::io::put_io_part_attribute(skinPart); for(size_t i = 0; i < twoElemTwoSharedSideCoordinates.size(); i++) - set_node_coords(coordField, i+1, twoElemTwoSharedSideCoordinates[i]); + set_node_coords(*coordField, i+1, twoElemTwoSharedSideCoordinates[i]); } - void set_node_coords(stk::mesh::Field& coordField, stk::mesh::EntityId id, const std::vector &coords) + void set_node_coords(stk::mesh::Field& coordField, stk::mesh::EntityId id, const std::vector &coords) { stk::mesh::Entity node = bulkData.get_entity(stk::topology::NODE_RANK, id); if(bulkData.is_valid(node) && bulkData.bucket(node).owned()) @@ -339,7 +341,8 @@ class TwoElem2dTwoSharedSideTester : public TwoElemMultipleSharedSideTester namespace simple_fields { -class TwoElemMultipleSharedSideTester : public ::testing::Test +class +TwoElemMultipleSharedSideTester : public ::testing::Test { public: TwoElemMultipleSharedSideTester(size_t spatialDim, @@ -353,7 +356,6 @@ class TwoElemMultipleSharedSideTester : public ::testing::Test block1(meta.declare_part_with_topology("block_1", get_element_topology())), activePart(meta.declare_part("active")) { - meta.use_simple_fields(); coordField = &meta.declare_field(stk::topology::NODE_RANK, "coordinates"); if (bulkData.parallel_size() <= 2) @@ -437,7 +439,8 @@ static const std::vector twoElemTwoSharedSideNodeIDs[2] = }; static const std::vector twoElemTwoSharedSideSharedNodeIds = {2, 3, 4, 6, 7, 8}; -class TwoElemTwoSharedSideTester : public TwoElemMultipleSharedSideTester +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +TwoElemTwoSharedSideTester : public TwoElemMultipleSharedSideTester { public: TwoElemTwoSharedSideTester() @@ -586,7 +589,9 @@ static const std::vector twoElemThreeSharedSideNodeIDs[2] = {3, 2, 1, 4, 7, 6, 10, 8} }; static const std::vector twoElemThreeSharedSideSharedNodeIds = {1, 2, 3, 4, 6, 7, 8}; -class TwoElemThreeSharedSideTester : public TwoElemMultipleSharedSideTester + +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +TwoElemThreeSharedSideTester : public TwoElemMultipleSharedSideTester { public: TwoElemThreeSharedSideTester() : @@ -594,7 +599,8 @@ class TwoElemThreeSharedSideTester : public TwoElemMultipleSharedSideTester {} }; -class TwoElemThreeSharedSideNoAuraTester : public TwoElemMultipleSharedSideTester +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +TwoElemThreeSharedSideNoAuraTester : public TwoElemMultipleSharedSideTester { public: TwoElemThreeSharedSideNoAuraTester() : @@ -607,8 +613,8 @@ static std::vector twoQuadTwoSharedSideNodeIDs[2] = { {2, 5, 3, 4} }; static std::vector twoQuadTwoSharedSideSharedNodeIds = {2, 3, 4}; - -class TwoElem2dTwoSharedSideTester : public TwoElemMultipleSharedSideTester +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +TwoElem2dTwoSharedSideTester : public TwoElemMultipleSharedSideTester { public: TwoElem2dTwoSharedSideTester() : diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ElemGraphTestUtils.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ElemGraphTestUtils.hpp index c3becb029b2c..b1811815deea 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ElemGraphTestUtils.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ElemGraphTestUtils.hpp @@ -171,7 +171,8 @@ inline void test_num_faces_per_element(const stk::mesh::BulkData& bulkData, cons namespace simple_fields { -class ElementDeathBulkDataTester : public ElemGraphTestUtils::ElementDeathBulkDataTester {}; +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +ElementDeathBulkDataTester : public ElemGraphTestUtils::ElementDeathBulkDataTester {}; } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/FaceCreationTestUtils.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/FaceCreationTestUtils.hpp index ddc54c7c7ec0..1bae7f7755cc 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/FaceCreationTestUtils.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/FaceCreationTestUtils.hpp @@ -205,7 +205,8 @@ class SideCreationTester namespace simple_fields { -class SideCreationTester +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +SideCreationTester { public: SideCreationTester(MPI_Comm comm) : communicator(comm) {} diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/FaceTestingUtils.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/FaceTestingUtils.cpp index 8a08e4e81d44..f4cd44f56fb9 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/FaceTestingUtils.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/FaceTestingUtils.cpp @@ -268,14 +268,14 @@ unsigned read_file_create_faces_count_sides(std::string filename) std::shared_ptr mesh = build_mesh(MPI_COMM_WORLD); stk::io::fill_mesh(filename, *mesh); stk::mesh::create_all_sides(*mesh, mesh->mesh_meta_data().universal_part(), {}, false); - return count_sides_in_mesh(*mesh); + return ::count_sides_in_mesh(*mesh); } unsigned read_file_count_sides(std::string filename) { std::shared_ptr mesh = build_mesh(MPI_COMM_WORLD); stk::io::fill_mesh(filename, *mesh); - return count_sides_in_mesh(*mesh); + return ::count_sides_in_mesh(*mesh); } bool fully_connected_elements_to_faces(const stk::mesh::BulkData& bulk) @@ -295,14 +295,14 @@ unsigned read_file_create_faces_fully_connected_stk(std::string filename) std::shared_ptr mesh = build_mesh(MPI_COMM_WORLD); stk::io::fill_mesh(filename, *mesh); stk::mesh::create_all_sides(*mesh, mesh->mesh_meta_data().universal_part(), {}, false); - return fully_connected_elements_to_faces(*mesh); + return ::fully_connected_elements_to_faces(*mesh); } unsigned read_file_fully_connected_stk(std::string filename) { std::shared_ptr mesh = build_mesh(MPI_COMM_WORLD); stk::io::fill_mesh(filename, *mesh); - return fully_connected_elements_to_faces(*mesh); + return ::fully_connected_elements_to_faces(*mesh); } unsigned count_shared_faces_between_different_elements(const stk::mesh::BulkData& bulk) @@ -311,7 +311,7 @@ unsigned count_shared_faces_between_different_elements(const stk::mesh::BulkData stk::mesh::for_each_entity_run(bulk, stk::topology::FACE_RANK, [&shared_face_count](const stk::mesh::BulkData& mesh, stk::mesh::Entity face) { - if (is_face_shared_between_different_elements(mesh, face)) + if (::is_face_shared_between_different_elements(mesh, face)) ++shared_face_count; } ); @@ -323,14 +323,14 @@ unsigned read_file_create_faces_shared_faces_different_elements_stk(std::string std::shared_ptr mesh = build_mesh(MPI_COMM_WORLD); stk::io::fill_mesh(filename, *mesh); stk::mesh::create_all_sides(*mesh, mesh->mesh_meta_data().universal_part(), {}, false); - return count_shared_faces_between_different_elements(*mesh); + return ::count_shared_faces_between_different_elements(*mesh); } unsigned read_file_shared_faces_different_elements_stk(std::string filename) { std::shared_ptr mesh = build_mesh(MPI_COMM_WORLD); stk::io::fill_mesh(filename, *mesh); - return count_shared_faces_between_different_elements(*mesh); + return ::count_shared_faces_between_different_elements(*mesh); } unsigned count_shared_faces_between_same_element(const stk::mesh::BulkData& bulk) @@ -339,7 +339,7 @@ unsigned count_shared_faces_between_same_element(const stk::mesh::BulkData& bulk stk::mesh::for_each_entity_run(bulk, stk::topology::FACE_RANK, [&shared_face_count](const stk::mesh::BulkData& mesh, stk::mesh::Entity face) { - if (is_face_shared_between_same_element(mesh,face)) + if (::is_face_shared_between_same_element(mesh,face)) ++shared_face_count; } ); @@ -351,19 +351,19 @@ unsigned read_file_create_faces_shared_faces_same_elements_stk(std::string filen std::shared_ptr mesh = build_mesh(MPI_COMM_WORLD); stk::io::fill_mesh(filename, *mesh); stk::mesh::create_all_sides(*mesh, mesh->mesh_meta_data().universal_part(), {}, false); - return count_shared_faces_between_same_element(*mesh); + return ::count_shared_faces_between_same_element(*mesh); } unsigned read_file_shared_faces_same_elements_stk(std::string filename) { std::shared_ptr mesh = build_mesh(MPI_COMM_WORLD); stk::io::fill_mesh(filename, *mesh); - return count_shared_faces_between_same_element(*mesh); + return ::count_shared_faces_between_same_element(*mesh); } bool check_face_elem_connectivity(const stk::mesh::BulkData& mesh, const std::set& gold_faces) { - std::set current_faces = get_face_connectivity_at_x_equal_half(mesh); + std::set current_faces = ::get_face_connectivity_at_x_equal_half(mesh); if (current_faces == gold_faces) { return true; } @@ -376,7 +376,7 @@ bool read_file_create_faces_check_face_elem_connectivity_stk(std::string filenam std::shared_ptr mesh = build_mesh(MPI_COMM_WORLD); stk::io::fill_mesh(filename, *mesh); stk::mesh::create_all_sides(*mesh, mesh->mesh_meta_data().universal_part(), {}, false); - return check_face_elem_connectivity(*mesh, counts); + return ::check_face_elem_connectivity(*mesh, counts); } @@ -384,7 +384,7 @@ bool read_file_check_face_elem_connectivity_stk(std::string filename, const std: { std::shared_ptr mesh = build_mesh(MPI_COMM_WORLD); stk::io::fill_mesh(filename, *mesh); - return check_face_elem_connectivity(*mesh, counts); + return ::check_face_elem_connectivity(*mesh, counts); } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/FaceTestingUtils.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/FaceTestingUtils.hpp index 6adeec26a88b..2182720af005 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/FaceTestingUtils.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/FaceTestingUtils.hpp @@ -77,20 +77,49 @@ bool read_file_check_face_elem_connectivity_stk(std::string filename, const std: namespace simple_fields { +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") unsigned count_sides_in_mesh(const stk::mesh::BulkData& mesh); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") unsigned read_file_create_faces_count_sides(std::string filename); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") unsigned read_file_count_sides(std::string filename); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") bool fully_connected_elements_to_faces(const stk::mesh::BulkData& mesh); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") unsigned read_file_create_faces_fully_connected_stk(std::string filename); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") unsigned read_file_fully_connected_stk(std::string filename); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") unsigned count_shared_faces_between_different_elements(const stk::mesh::BulkData& mesh); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") unsigned read_file_create_faces_shared_faces_different_elements_stk(std::string filename); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") unsigned read_file_shared_faces_different_elements_stk(std::string filename); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") unsigned count_shared_faces_between_same_element(const stk::mesh::BulkData& mesh); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") unsigned read_file_create_faces_shared_faces_same_elements_stk(std::string filename); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") unsigned read_file_shared_faces_same_elements_stk(std::string filename); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") bool check_face_elem_connectivity(const stk::mesh::BulkData& mesh, const std::set& counts); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") bool read_file_create_faces_check_face_elem_connectivity_stk(std::string filename, const std::set& counts); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") bool read_file_check_face_elem_connectivity_stk(std::string filename, const std::set& counts); } // namespace simple_fields @@ -115,18 +144,21 @@ stk::mesh::Part *get_surface_part_with_id(const stk::mesh::MetaData &meta, int i namespace simple_fields { +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") stk::mesh::Entity declare_element_side_with_nodes(stk::mesh::BulkData &mesh, stk::mesh::Entity elem, const stk::mesh::EntityVector &nodes, stk::mesh::EntityId globalId, stk::mesh::Part &part); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") stk::mesh::Entity declare_element_to_edge_with_nodes(stk::mesh::BulkData &mesh, stk::mesh::Entity elem, const stk::mesh::EntityVector &sub_topology_nodes, stk::mesh::EntityId global_sub_topology_id, stk::mesh::Part &part); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") stk::mesh::Part *get_surface_part_with_id(const stk::mesh::MetaData &meta, int id); } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GenerateALefRAMesh.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GenerateALefRAMesh.cpp index 26bfa4bfcb73..1608aac2107c 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GenerateALefRAMesh.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GenerateALefRAMesh.cpp @@ -52,11 +52,11 @@ stk::mesh::PartVector create_sideset_parts(stk::mesh::MetaData &meta, const std: int id = 1; for(const std::string &name : names) { - stk::mesh::Part &part = meta.declare_part_with_topology(name, stk::topology::QUAD_4); - meta.set_part_id(part, id); - stk::io::put_io_part_attribute(part); - parts.push_back(&part); - ++id; + stk::mesh::Part &part = meta.declare_part_with_topology(name, stk::topology::QUAD_4); + meta.set_part_id(part, id); + stk::io::put_io_part_attribute(part); + parts.push_back(&part); + ++id; } return parts; @@ -64,42 +64,42 @@ stk::mesh::PartVector create_sideset_parts(stk::mesh::MetaData &meta, const std: void create_AA_mesh(stk::mesh::BulkData &bulk, ElementOrdering elemOrdering) { - std::string meshDesc; - if (elemOrdering == INCREASING) { - meshDesc = "0,1,HEX_8,1,2,3,4,5, 6, 7, 8,block_1\n" - "0,2,HEX_8,5,6,7,8,9,10,11,12,block_1"; - } - else if (elemOrdering == DECREASING) { - meshDesc = "0,1,HEX_8,5,6,7,8,9,10,11,12,block_1\n" - "0,2,HEX_8,1,2,3,4,5, 6, 7, 8,block_1"; - } + std::string meshDesc; + if (elemOrdering == INCREASING) { + meshDesc = "0,1,HEX_8,1,2,3,4,5, 6, 7, 8,block_1\n" + "0,2,HEX_8,5,6,7,8,9,10,11,12,block_1"; + } + else if (elemOrdering == DECREASING) { + meshDesc = "0,1,HEX_8,5,6,7,8,9,10,11,12,block_1\n" + "0,2,HEX_8,1,2,3,4,5, 6, 7, 8,block_1"; + } - std::vector coordinates = { 0,0,0, 1,0,0, 1,1,0, 0,1,0, - 0,0,1, 1,0,1, 1,1,1, 0,1,1, - 0,0,2, 1,0,2, 1,1,2, 0,1,2 }; + std::vector coordinates = { 0,0,0, 1,0,0, 1,1,0, 0,1,0, + 0,0,1, 1,0,1, 1,1,1, 0,1,1, + 0,0,2, 1,0,2, 1,1,2, 0,1,2 }; - bulk.initialize_face_adjacent_element_graph(); - stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); + bulk.initialize_face_adjacent_element_graph(); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void create_AB_mesh(stk::mesh::BulkData &bulk, ElementOrdering elemOrdering) { - std::string meshDesc; - if (elemOrdering == INCREASING) { - meshDesc = "0,1,HEX_8,1,2,3,4,5, 6, 7, 8,block_1\n" - "0,2,HEX_8,5,6,7,8,9,10,11,12,block_2"; - } - else if (elemOrdering == DECREASING) { - meshDesc = "0,1,HEX_8,5,6,7,8,9,10,11,12,block_2\n" - "0,2,HEX_8,1,2,3,4,5, 6, 7, 8,block_1"; - } + std::string meshDesc; + if (elemOrdering == INCREASING) { + meshDesc = "0,1,HEX_8,1,2,3,4,5, 6, 7, 8,block_1\n" + "0,2,HEX_8,5,6,7,8,9,10,11,12,block_2"; + } + else if (elemOrdering == DECREASING) { + meshDesc = "0,1,HEX_8,5,6,7,8,9,10,11,12,block_2\n" + "0,2,HEX_8,1,2,3,4,5, 6, 7, 8,block_1"; + } - std::vector coordinates = { 0,0,0, 1,0,0, 1,1,0, 0,1,0, - 0,0,1, 1,0,1, 1,1,1, 0,1,1, - 0,0,2, 1,0,2, 1,1,2, 0,1,2 }; + std::vector coordinates = { 0,0,0, 1,0,0, 1,1,0, 0,1,0, + 0,0,1, 1,0,1, 1,1,1, 0,1,1, + 0,0,2, 1,0,2, 1,1,2, 0,1,2 }; - bulk.initialize_face_adjacent_element_graph(); - stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); + bulk.initialize_face_adjacent_element_graph(); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void populate_elem_sides(SidesetDirection direction, @@ -107,44 +107,44 @@ void populate_elem_sides(SidesetDirection direction, stk::mesh::EntityIdVector &elem, std::vector &ordinal) { - const stk::mesh::EntityId leftElementId = (elemOrdering == INCREASING) ? 1 : 2; - const stk::mesh::EntityId rightElementId = (elemOrdering == INCREASING) ? 2 : 1; + const stk::mesh::EntityId leftElementId = (elemOrdering == INCREASING) ? 1 : 2; + const stk::mesh::EntityId rightElementId = (elemOrdering == INCREASING) ? 2 : 1; - if (direction == LEFT) - { - elem.push_back(leftElementId); - ordinal.push_back(5); - } - else if (direction == RIGHT) - { - elem.push_back(rightElementId); - ordinal.push_back(4); - } - else - { - elem.push_back(leftElementId); - ordinal.push_back(5); + if (direction == LEFT) + { + elem.push_back(leftElementId); + ordinal.push_back(5); + } + else if (direction == RIGHT) + { + elem.push_back(rightElementId); + ordinal.push_back(4); + } + else + { + elem.push_back(leftElementId); + ordinal.push_back(5); - elem.push_back(rightElementId); - ordinal.push_back(4); - } + elem.push_back(rightElementId); + ordinal.push_back(4); + } } void populate_sideset_names(SidesetDirection direction, std::vector &names) { - if(direction == LEFT) - { - names = {"surface_1", "surface_block_1_QUAD4_1"}; - } - else if(direction == RIGHT) - { - names = {"surface_1", "surface_block_2_QUAD4_1"}; - } - else - { - names = {"surface_1", "surface_block_1_QUAD4_1", "surface_block_2_QUAD4_1"}; - } + if(direction == LEFT) + { + names = {"surface_1", "surface_block_1_QUAD4_1"}; + } + else if(direction == RIGHT) + { + names = {"surface_1", "surface_block_2_QUAD4_1"}; + } + else + { + names = {"surface_1", "surface_block_1_QUAD4_1", "surface_block_2_QUAD4_1"}; + } } void populate_AA_sideset(stk::mesh::BulkData& bulk, @@ -152,42 +152,42 @@ void populate_AA_sideset(stk::mesh::BulkData& bulk, ElementOrdering elemOrdering, const stk::mesh::PartVector& parts) { - stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - stk::mesh::EntityIdVector elem; - std::vector ordinal; - populate_elem_sides(direction, elemOrdering, elem, ordinal); - STK_ThrowRequire(elem.size() == ordinal.size()); - - stk::mesh::SideSet& sideSet = bulk.create_sideset(*parts[0]); - sideSet.set_accept_all_internal_non_coincident_entries(false); - for (unsigned i = 0; i < elem.size(); ++i) { - sideSet.add( { bulk.get_entity(stk::topology::ELEMENT_RANK, elem[i]), - ordinal[i] }); - } + stk::mesh::MetaData &meta = bulk.mesh_meta_data(); + stk::mesh::EntityIdVector elem; + std::vector ordinal; + populate_elem_sides(direction, elemOrdering, elem, ordinal); + STK_ThrowRequire(elem.size() == ordinal.size()); + + stk::mesh::SideSet& sideSet = bulk.create_sideset(*parts[0]); + sideSet.set_accept_all_internal_non_coincident_entries(false); + for (unsigned i = 0; i < elem.size(); ++i) { + sideSet.add( { bulk.get_entity(stk::topology::ELEMENT_RANK, elem[i]), + ordinal[i] }); + } - stk::mesh::Part* block_1 = meta.get_part("block_1"); - EXPECT_TRUE(block_1 != nullptr); + stk::mesh::Part* block_1 = meta.get_part("block_1"); + EXPECT_TRUE(block_1 != nullptr); - meta.set_part_id(*block_1, 1); - std::vector touchingParts { block_1 }; - meta.set_surface_to_block_mapping(parts[0], touchingParts); + meta.set_part_id(*block_1, 1); + std::vector touchingParts { block_1 }; + meta.set_surface_to_block_mapping(parts[0], touchingParts); - // tookusa: Order is important for the incremental sideset updater ... surface to block mapping must be set first - bulk.create_side_entities(sideSet, parts); + // tookusa: Order is important for the incremental sideset updater ... surface to block mapping must be set first + bulk.create_side_entities(sideSet, parts); } stk::mesh::Part* create_AA_mesh_with_sideset(stk::mesh::BulkData &bulk, SidesetDirection direction, ElementOrdering elemOrdering) { - stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - stk::mesh::PartVector parts = create_sideset_parts(meta, std::vector{"surface_1"}); + stk::mesh::MetaData &meta = bulk.mesh_meta_data(); + stk::mesh::PartVector parts = create_sideset_parts(meta, std::vector{"surface_1"}); - create_AA_mesh(bulk, elemOrdering); + create_AA_mesh(bulk, elemOrdering); - populate_AA_sideset(bulk, direction, elemOrdering, parts); + populate_AA_sideset(bulk, direction, elemOrdering, parts); - return parts[0]; + return parts[0]; } stk::mesh::Part* create_AA_mesh_with_sideset_and_field(stk::mesh::BulkData &bulk, @@ -195,19 +195,19 @@ stk::mesh::Part* create_AA_mesh_with_sideset_and_field(stk::mesh::BulkData &bulk ElementOrdering elemOrdering, const std::string & fieldName) { - stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - stk::mesh::PartVector parts = create_sideset_parts(meta, std::vector{"surface_1"}); + stk::mesh::MetaData &meta = bulk.mesh_meta_data(); + stk::mesh::PartVector parts = create_sideset_parts(meta, std::vector{"surface_1"}); - const unsigned numberOfStates = 1; - stk::mesh::Field & field = stk::mesh::legacy::declare_field>(meta, stk::topology::NODE_RANK, fieldName, numberOfStates); - const double initValue = 123; - stk::mesh::put_field_on_mesh(field, meta.universal_part(), &initValue); + const unsigned numberOfStates = 1; + stk::mesh::Field & field = meta.declare_field(stk::topology::NODE_RANK, fieldName, numberOfStates); + const double initValue = 123; + stk::mesh::put_field_on_mesh(field, meta.universal_part(), &initValue); - create_AA_mesh(bulk, elemOrdering); + create_AA_mesh(bulk, elemOrdering); - populate_AA_sideset(bulk, direction, elemOrdering, parts); + populate_AA_sideset(bulk, direction, elemOrdering, parts); - return parts[0]; + return parts[0]; } void populate_AB_sideset(stk::mesh::BulkData& bulk, @@ -215,78 +215,78 @@ void populate_AB_sideset(stk::mesh::BulkData& bulk, ElementOrdering elemOrdering, const stk::mesh::PartVector& parts) { - stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - stk::mesh::EntityIdVector elem; - std::vector ordinal; - - populate_elem_sides(direction, elemOrdering, elem, ordinal); - STK_ThrowRequire(elem.size() == ordinal.size()); - - stk::mesh::SideSet &sideSet = bulk.create_sideset(*parts[0]); - sideSet.set_accept_all_internal_non_coincident_entries(false); - for(unsigned i=0; i ordinal; + + populate_elem_sides(direction, elemOrdering, elem, ordinal); + STK_ThrowRequire(elem.size() == ordinal.size()); + + stk::mesh::SideSet &sideSet = bulk.create_sideset(*parts[0]); + sideSet.set_accept_all_internal_non_coincident_entries(false); + for(unsigned i=0; i touchingParts; + std::vector touchingParts; - if(direction == LEFT) - { - touchingParts = {block_1}; - meta.set_surface_to_block_mapping(parts[1], touchingParts); - } - else if(direction == RIGHT) - { - touchingParts = {block_2}; - meta.set_surface_to_block_mapping(parts[1], touchingParts); - } - else - { - touchingParts = {block_1}; - meta.set_surface_to_block_mapping(parts[1], touchingParts); + if(direction == LEFT) + { + touchingParts = {block_1}; + meta.set_surface_to_block_mapping(parts[1], touchingParts); + } + else if(direction == RIGHT) + { + touchingParts = {block_2}; + meta.set_surface_to_block_mapping(parts[1], touchingParts); + } + else + { + touchingParts = {block_1}; + meta.set_surface_to_block_mapping(parts[1], touchingParts); - touchingParts = {block_2}; - meta.set_surface_to_block_mapping(parts[2], touchingParts); - } + touchingParts = {block_2}; + meta.set_surface_to_block_mapping(parts[2], touchingParts); + } - // tookusa: Order is important for the incremental sideset updater ... surface to block mapping must be set first - bulk.create_side_entities(sideSet, parts); + // tookusa: Order is important for the incremental sideset updater ... surface to block mapping must be set first + bulk.create_side_entities(sideSet, parts); } stk::mesh::Part* create_AB_mesh_with_sideset(stk::mesh::BulkData &bulk, SidesetDirection direction, ElementOrdering elemOrdering) { - stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - std::vector sideSetNames; - populate_sideset_names(direction, sideSetNames); + stk::mesh::MetaData &meta = bulk.mesh_meta_data(); + std::vector sideSetNames; + populate_sideset_names(direction, sideSetNames); - stk::mesh::PartVector parts = create_sideset_parts(meta, sideSetNames); + stk::mesh::PartVector parts = create_sideset_parts(meta, sideSetNames); - for(unsigned i = 0; i sideSetNames; - populate_sideset_names(direction, sideSetNames); + stk::mesh::MetaData &meta = bulk.mesh_meta_data(); + std::vector sideSetNames; + populate_sideset_names(direction, sideSetNames); - stk::mesh::PartVector parts = create_sideset_parts(meta, sideSetNames); + stk::mesh::PartVector parts = create_sideset_parts(meta, sideSetNames); - for(unsigned i = 0; i & field = stk::mesh::legacy::declare_field>(meta, stk::topology::NODE_RANK, fieldName, numberOfStates); - const double initValue = 123; - stk::mesh::put_field_on_mesh(field, meta.universal_part(), &initValue); + const unsigned numberOfStates = 1; + stk::mesh::Field & field = meta.declare_field(stk::topology::NODE_RANK, fieldName, numberOfStates); + const double initValue = 123; + stk::mesh::put_field_on_mesh(field, meta.universal_part(), &initValue); - create_AB_mesh(bulk, elemOrdering); + create_AB_mesh(bulk, elemOrdering); - populate_AB_sideset(bulk, direction, elemOrdering, parts); + populate_AB_sideset(bulk, direction, elemOrdering, parts); - return parts[0]; + return parts[0]; } stk::mesh::Part* create_AB_mesh_with_sideset_and_distribution_factors(stk::mesh::BulkData &bulk, @@ -326,39 +326,39 @@ stk::mesh::Part* create_AB_mesh_with_sideset_and_distribution_factors(stk::mesh: const std::string & fieldName, const double initValue) { - stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - std::vector sideSetNames; - populate_sideset_names(direction, sideSetNames); + stk::mesh::MetaData &meta = bulk.mesh_meta_data(); + std::vector sideSetNames; + populate_sideset_names(direction, sideSetNames); - stk::mesh::PartVector parts = create_sideset_parts(meta, sideSetNames); + stk::mesh::PartVector parts = create_sideset_parts(meta, sideSetNames); - for(unsigned i = 0; i & ssField = stk::mesh::legacy::declare_field>(meta, stk::topology::FACE_RANK, fieldName, numberOfStates); - for (stk::mesh::Part* part : parts) - { - stk::io::set_distribution_factor_field(*part, ssField); - } - stk::topology sideTopo = sidePart->topology(); - STK_ThrowRequireMsg(sideTopo != stk::topology::INVALID_TOPOLOGY, "sidePart "<name()<<" has invalid topology."); - unsigned numNodes = sideTopo.num_nodes(); - std::vector initValVec(numNodes, initValue); - stk::mesh::put_field_on_mesh(ssField, *sidePart, numNodes, initValVec.data()); + stk::mesh::Part* sidePart = parts[0]; + STK_ThrowRequire(nullptr != sidePart); + const unsigned numberOfStates = 1; + stk::mesh::Field & ssField = meta.declare_field(stk::topology::FACE_RANK, fieldName, numberOfStates); + for (stk::mesh::Part* part : parts) + { + stk::io::set_distribution_factor_field(*part, ssField); + } + stk::topology sideTopo = sidePart->topology(); + STK_ThrowRequireMsg(sideTopo != stk::topology::INVALID_TOPOLOGY, "sidePart "<name()<<" has invalid topology."); + unsigned numNodes = sideTopo.num_nodes(); + std::vector initValVec(numNodes, initValue); + stk::mesh::put_field_on_mesh(ssField, *sidePart, numNodes, initValVec.data()); - create_AB_mesh(bulk, elemOrdering); + create_AB_mesh(bulk, elemOrdering); - populate_AB_sideset(bulk, direction, elemOrdering, parts); + populate_AB_sideset(bulk, direction, elemOrdering, parts); - return parts[0]; + return parts[0]; } namespace simple_fields { @@ -397,7 +397,7 @@ void create_AA_mesh(stk::mesh::BulkData &bulk, ElementOrdering elemOrdering) 0,0,2, 1,0,2, 1,1,2, 0,1,2 }; bulk.initialize_face_adjacent_element_graph(); - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void create_AB_mesh(stk::mesh::BulkData &bulk, ElementOrdering elemOrdering) @@ -417,7 +417,7 @@ void create_AB_mesh(stk::mesh::BulkData &bulk, ElementOrdering elemOrdering) 0,0,2, 1,0,2, 1,1,2, 0,1,2 }; bulk.initialize_face_adjacent_element_graph(); - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void populate_elem_sides(SidesetDirection direction, @@ -473,7 +473,7 @@ void populate_AA_sideset(stk::mesh::BulkData& bulk, stk::mesh::MetaData &meta = bulk.mesh_meta_data(); stk::mesh::EntityIdVector elem; std::vector ordinal; - simple_fields::populate_elem_sides(direction, elemOrdering, elem, ordinal); + stk::unit_test_util::populate_elem_sides(direction, elemOrdering, elem, ordinal); STK_ThrowRequire(elem.size() == ordinal.size()); stk::mesh::SideSet& sideSet = bulk.create_sideset(*parts[0]); @@ -501,9 +501,9 @@ stk::mesh::Part* create_AA_mesh_with_sideset(stk::mesh::BulkData &bulk, stk::mesh::MetaData &meta = bulk.mesh_meta_data(); stk::mesh::PartVector parts = create_sideset_parts(meta, std::vector{"surface_1"}); - simple_fields::create_AA_mesh(bulk, elemOrdering); + stk::unit_test_util::create_AA_mesh(bulk, elemOrdering); - simple_fields::populate_AA_sideset(bulk, direction, elemOrdering, parts); + stk::unit_test_util::populate_AA_sideset(bulk, direction, elemOrdering, parts); return parts[0]; } @@ -521,9 +521,9 @@ stk::mesh::Part* create_AA_mesh_with_sideset_and_field(stk::mesh::BulkData &bulk const double initValue = 123; stk::mesh::put_field_on_mesh(field, meta.universal_part(), &initValue); - simple_fields::create_AA_mesh(bulk, elemOrdering); + stk::unit_test_util::create_AA_mesh(bulk, elemOrdering); - simple_fields::populate_AA_sideset(bulk, direction, elemOrdering, parts); + stk::unit_test_util::populate_AA_sideset(bulk, direction, elemOrdering, parts); return parts[0]; } @@ -537,7 +537,7 @@ void populate_AB_sideset(stk::mesh::BulkData& bulk, stk::mesh::EntityIdVector elem; std::vector ordinal; - simple_fields::populate_elem_sides(direction, elemOrdering, elem, ordinal); + stk::unit_test_util::populate_elem_sides(direction, elemOrdering, elem, ordinal); STK_ThrowRequire(elem.size() == ordinal.size()); stk::mesh::SideSet &sideSet = bulk.create_sideset(*parts[0]); @@ -588,7 +588,7 @@ stk::mesh::Part* create_AB_mesh_with_sideset(stk::mesh::BulkData &bulk, { stk::mesh::MetaData &meta = bulk.mesh_meta_data(); std::vector sideSetNames; - simple_fields::populate_sideset_names(direction, sideSetNames); + stk::unit_test_util::populate_sideset_names(direction, sideSetNames); stk::mesh::PartVector parts = create_sideset_parts(meta, sideSetNames); @@ -600,9 +600,9 @@ stk::mesh::Part* create_AB_mesh_with_sideset(stk::mesh::BulkData &bulk, meta.declare_part_subset(*parts[0], *parts[i]); } - simple_fields::create_AB_mesh(bulk, elemOrdering); + stk::unit_test_util::create_AB_mesh(bulk, elemOrdering); - simple_fields::populate_AB_sideset(bulk, direction, elemOrdering, parts); + stk::unit_test_util::populate_AB_sideset(bulk, direction, elemOrdering, parts); return parts[0]; } @@ -614,7 +614,7 @@ stk::mesh::Part* create_AB_mesh_with_sideset_and_field(stk::mesh::BulkData &bulk { stk::mesh::MetaData &meta = bulk.mesh_meta_data(); std::vector sideSetNames; - simple_fields::populate_sideset_names(direction, sideSetNames); + stk::unit_test_util::populate_sideset_names(direction, sideSetNames); stk::mesh::PartVector parts = create_sideset_parts(meta, sideSetNames); @@ -631,9 +631,9 @@ stk::mesh::Part* create_AB_mesh_with_sideset_and_field(stk::mesh::BulkData &bulk const double initValue = 123; stk::mesh::put_field_on_mesh(field, meta.universal_part(), &initValue); - simple_fields::create_AB_mesh(bulk, elemOrdering); + stk::unit_test_util::create_AB_mesh(bulk, elemOrdering); - simple_fields::populate_AB_sideset(bulk, direction, elemOrdering, parts); + stk::unit_test_util::populate_AB_sideset(bulk, direction, elemOrdering, parts); return parts[0]; } @@ -646,7 +646,7 @@ stk::mesh::Part* create_AB_mesh_with_sideset_and_distribution_factors(stk::mesh: { stk::mesh::MetaData &meta = bulk.mesh_meta_data(); std::vector sideSetNames; - simple_fields::populate_sideset_names(direction, sideSetNames); + stk::unit_test_util::populate_sideset_names(direction, sideSetNames); stk::mesh::PartVector parts = create_sideset_parts(meta, sideSetNames); @@ -672,9 +672,9 @@ stk::mesh::Part* create_AB_mesh_with_sideset_and_distribution_factors(stk::mesh: std::vector initValVec(numNodes, initValue); stk::mesh::put_field_on_mesh(ssField, *sidePart, numNodes, initValVec.data()); - simple_fields::create_AB_mesh(bulk, elemOrdering); + stk::unit_test_util::create_AB_mesh(bulk, elemOrdering); - simple_fields::populate_AB_sideset(bulk, direction, elemOrdering, parts); + stk::unit_test_util::populate_AB_sideset(bulk, direction, elemOrdering, parts); return parts[0]; } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GenerateALefRAMesh.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GenerateALefRAMesh.hpp index 10bc4c8a9230..09df60d1faa9 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GenerateALefRAMesh.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GenerateALefRAMesh.hpp @@ -34,6 +34,7 @@ #ifndef GENERATE_ALEFRA_MESH_HPP #define GENERATE_ALEFRA_MESH_HPP +#include #include namespace stk { @@ -69,19 +70,29 @@ stk::mesh::Part* create_AB_mesh_with_sideset_and_distribution_factors(stk::mesh: namespace simple_fields { +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void create_AA_mesh(stk::mesh::BulkData &bulk, ElementOrdering elemOrdering = INCREASING); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void create_AB_mesh(stk::mesh::BulkData &bulk, ElementOrdering elemOrdering = INCREASING); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") stk::mesh::Part* create_AA_mesh_with_sideset(stk::mesh::BulkData &bulk, SidesetDirection direction, ElementOrdering elemOrdering = INCREASING); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") stk::mesh::Part* create_AB_mesh_with_sideset(stk::mesh::BulkData &bulk, SidesetDirection direction, ElementOrdering elemOrdering = INCREASING); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") stk::mesh::Part* create_AA_mesh_with_sideset_and_field(stk::mesh::BulkData &bulk, SidesetDirection direction, ElementOrdering elemOrdering, const std::string & fieldName); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") stk::mesh::Part* create_AB_mesh_with_sideset_and_field(stk::mesh::BulkData &bulk, SidesetDirection direction, ElementOrdering elemOrdering, const std::string & fieldName); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") stk::mesh::Part* create_AB_mesh_with_sideset_and_distribution_factors(stk::mesh::BulkData &bulk, SidesetDirection direction, ElementOrdering elemOrdering, diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GeneratedMeshToFile.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GeneratedMeshToFile.cpp index f1c10bb8950c..4302f5da805e 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GeneratedMeshToFile.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GeneratedMeshToFile.cpp @@ -23,7 +23,7 @@ namespace unit_test_util GeneratedMeshToFile::GeneratedMeshToFile(stk::ParallelMachine comm, stk::mesh::BulkData::AutomaticAuraOption auraOption) - : bulkPtr(build_mesh_no_simple_fields(3, comm, auraOption)), + : bulkPtr(build_mesh(3, comm, auraOption)), bulk(*bulkPtr), meta(bulk.mesh_meta_data()) { @@ -55,11 +55,13 @@ GeneratedMeshToFileWithTransientFields::GeneratedMeshToFileWithTransientFields(s stk::topology::rank_t rank) : GeneratedMeshToFile(comm, auraOption), fieldRank(rank), - scalarField(stk::mesh::legacy::declare_field >(meta, fieldRank, fieldBaseName+"_scalar", 1)), - vectorField(stk::mesh::legacy::declare_field >(meta, fieldRank, fieldBaseName+"_vector", 1)) + scalarField(meta.declare_field(fieldRank, fieldBaseName+"_scalar", 1)), + vectorField(meta.declare_field(fieldRank, fieldBaseName+"_vector", 1)) { stk::mesh::put_field_on_mesh(scalarField, meta.universal_part(), nullptr); stk::mesh::put_field_on_mesh(vectorField, meta.universal_part(), 3, nullptr); + + stk::io::set_field_output_type(vectorField, stk::io::FieldOutputType::VECTOR_3D); } void GeneratedMeshToFileWithTransientFields::write_mesh_with_field(const std::vector& timeSteps, @@ -98,7 +100,6 @@ GeneratedMeshToFile::GeneratedMeshToFile(stk::ParallelMachine comm, bulk(*bulkPtr), meta(bulk.mesh_meta_data()) { - meta.use_simple_fields(); } void GeneratedMeshToFile::write_mesh() diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GeneratedMeshToFile.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GeneratedMeshToFile.hpp index 39bc39afab79..559bbd72b247 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GeneratedMeshToFile.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GeneratedMeshToFile.hpp @@ -94,7 +94,7 @@ class GeneratedMeshToFileWithTransientFields : public GeneratedMeshToFile protected: stk::topology::rank_t fieldRank; stk::mesh::Field &scalarField; - stk::mesh::Field &vectorField; + stk::mesh::Field &vectorField; private: GeneratedMeshToFileWithTransientFields(); @@ -102,7 +102,8 @@ class GeneratedMeshToFileWithTransientFields : public GeneratedMeshToFile namespace simple_fields { -class GeneratedMeshToFile +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +GeneratedMeshToFile { public: GeneratedMeshToFile(stk::ParallelMachine comm, stk::mesh::BulkData::AutomaticAuraOption auraOption); @@ -123,7 +124,8 @@ class GeneratedMeshToFile GeneratedMeshToFile(); }; -class GeneratedMeshToFileWithTransientFields : public GeneratedMeshToFile +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +GeneratedMeshToFileWithTransientFields : public GeneratedMeshToFile { public: GeneratedMeshToFileWithTransientFields(stk::ParallelMachine comm, diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GetMeshSpec.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GetMeshSpec.cpp new file mode 100644 index 000000000000..3a9f9f2176e0 --- /dev/null +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GetMeshSpec.cpp @@ -0,0 +1,70 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// + // Redistribution and use in source and binary forms, with or without + // modification, are permitted provided that the following conditions are + // met: + // + // * Redistributions of source code must retain the above copyright + // notice, this list of conditions and the following disclaimer. + // + // * Redistributions in binary form must reproduce the above + // copyright notice, this list of conditions and the following + // disclaimer in the documentation and/or other materials provided + // with the distribution. + // +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// + // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +namespace stk +{ +namespace unit_test_util +{ + +namespace simple_fields { + +std::string get_mesh_spec(unsigned dim) { + return stk::unit_test_util::get_mesh_spec(dim); +} + +std::string get_mesh_spec(const std::string &optionName) +{ + return stk::unit_test_util::get_mesh_spec(optionName); +} + +std::vector get_many_block_coordinates(unsigned numBlocks) +{ + return stk::unit_test_util::get_many_block_coordinates(numBlocks); +} + +void get_block_proc_distribution(unsigned numBlocks, unsigned numProc, std::vector& procs) +{ + stk::unit_test_util::get_block_proc_distribution(numBlocks, numProc, procs); +} + +std::string get_many_block_mesh_desc(unsigned numBlocks, unsigned numProc) +{ + return stk::unit_test_util::get_many_block_mesh_desc(numBlocks, numProc); +} + +} // namespace simple_fields + +} +} diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GetMeshSpec.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GetMeshSpec.hpp index 8ddcf10a7597..3c32ae882813 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GetMeshSpec.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/GetMeshSpec.hpp @@ -36,7 +36,9 @@ #define STK_GET_MESH_SPEC_H #include +#include #include +#include namespace stk { @@ -128,34 +130,20 @@ inline std::string get_many_block_mesh_desc(unsigned numBlocks, unsigned numProc namespace simple_fields { -inline -std::string get_mesh_spec(unsigned dim) { - return stk::unit_test_util::get_mesh_spec(dim); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +std::string get_mesh_spec(unsigned dim); -inline -std::string get_mesh_spec(const std::string &optionName) -{ - return stk::unit_test_util::get_mesh_spec(optionName); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +std::string get_mesh_spec(const std::string &optionName); -inline -std::vector get_many_block_coordinates(unsigned numBlocks) -{ - return stk::unit_test_util::get_many_block_coordinates(numBlocks); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +std::vector get_many_block_coordinates(unsigned numBlocks); -inline -void get_block_proc_distribution(unsigned numBlocks, unsigned numProc, std::vector& procs) -{ - stk::unit_test_util::get_block_proc_distribution(numBlocks, numProc, procs); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void get_block_proc_distribution(unsigned numBlocks, unsigned numProc, std::vector& procs); -inline -std::string get_many_block_mesh_desc(unsigned numBlocks, unsigned numProc = 1) -{ - return stk::unit_test_util::get_many_block_mesh_desc(numBlocks, numProc); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +std::string get_many_block_mesh_desc(unsigned numBlocks, unsigned numProc = 1); } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshFileFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshFileFixture.hpp index 530ae43fe91b..2503b2a815a5 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshFileFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshFileFixture.hpp @@ -34,10 +34,7 @@ #ifndef UNITTEST_MESHFILEFIXTURE_HPP #define UNITTEST_MESHFILEFIXTURE_HPP -#include "mpi.h" - #include - #include #include // for BulkData #include // for MetaData, put_field @@ -47,7 +44,6 @@ namespace stk { - namespace unit_test_util { @@ -73,7 +69,8 @@ class MeshFileFixture : public MeshFixture namespace simple_fields { -class MeshFileFixture : public MeshFixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +MeshFileFixture : public MeshFixture { protected: MeshFileFixture() diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshFixture.hpp index 7dbad9fd0889..24576ef8fd8d 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshFixture.hpp @@ -55,36 +55,36 @@ class MeshFixtureNoTest MeshFixtureNoTest() : communicator(MPI_COMM_WORLD), m_spatialDim(3), - m_entityRankNames(), - metaData(nullptr), bulkData() + m_entityRankNames() { } MeshFixtureNoTest(unsigned spatial_dim) : communicator(MPI_COMM_WORLD), m_spatialDim(spatial_dim), - m_entityRankNames(), - metaData(nullptr), bulkData() + m_entityRankNames() { } MeshFixtureNoTest(unsigned spatial_dim, const std::vector& entityRankNames) : communicator(MPI_COMM_WORLD), m_spatialDim(spatial_dim), - m_entityRankNames(entityRankNames), - metaData(nullptr), bulkData() + m_entityRankNames(entityRankNames) { } - MeshFixtureNoTest( - unsigned spatial_dim, stk::mesh::BulkData::AutomaticAuraOption auraOption, MPI_Comm comm = MPI_COMM_WORLD) - : communicator(comm), m_spatialDim(spatial_dim), m_entityRankNames(), metaData(nullptr), bulkData() + MeshFixtureNoTest(unsigned spatial_dim, stk::mesh::BulkData::AutomaticAuraOption auraOption, + MPI_Comm comm = MPI_COMM_WORLD) + : communicator(comm), + m_spatialDim(spatial_dim), + m_entityRankNames() { setup_empty_mesh(auraOption); } virtual ~MeshFixtureNoTest() { + } void set_spatial_dimension(unsigned spatialDim) @@ -125,7 +125,7 @@ class MeshFixtureNoTest void reset_mesh() { bulkData.reset(); - metaData = nullptr; + metaData.reset(); } int get_parallel_rank() const @@ -161,22 +161,51 @@ class MeshFixtureNoTest builder.set_initial_bucket_capacity(initialBucketCapacity); builder.set_maximum_bucket_capacity(maximumBucketCapacity); - bulkData = builder.create(); - metaData = &(bulkData->mesh_meta_data()); + if(nullptr == metaData) { + metaData = builder.create_meta_data(); + } + + if(nullptr == bulkData) { + bulkData = builder.create(metaData); + m_auraOption = auraOption; + m_initialBucketCapacity = initialBucketCapacity; + m_maximumBucketCapacity = maximumBucketCapacity; + } + + STK_ThrowRequireMsg((auraOption == m_auraOption) && + (initialBucketCapacity == m_initialBucketCapacity) && + (maximumBucketCapacity == m_maximumBucketCapacity), + "allocate_bulk() being called with different arguments from previous call:\n" + " auraOption = " << auraOption << " (previously: " << m_auraOption << ")\n" + " initialBucketCapacity = " << initialBucketCapacity << " (previously: " << m_initialBucketCapacity << ")\n" + " maximumBucketCapacity = " << maximumBucketCapacity << " (previously: " << m_maximumBucketCapacity << ")"); + } + + void set_meta(std::shared_ptr inMetaData) + { + STK_ThrowRequireMsg(metaData==nullptr, "Unit test error. Trying to reset non NULL meta data."); + metaData = inMetaData; } void set_bulk(std::shared_ptr inBulkData) { STK_ThrowRequireMsg(bulkData==nullptr, "Unit test error. Trying to reset non NULL bulk data."); bulkData = inBulkData; + + STK_ThrowRequireMsg(metaData==nullptr || metaData==bulkData->mesh_meta_data_ptr(), + "Unit test error. Trying to reset non NULL meta data."); } protected: MPI_Comm communicator; unsigned m_spatialDim; std::vector m_entityRankNames; - stk::mesh::MetaData *metaData = nullptr; + std::shared_ptr metaData; std::shared_ptr bulkData; + + stk::mesh::BulkData::AutomaticAuraOption m_auraOption{stk::mesh::BulkData::AUTO_AURA}; + unsigned m_initialBucketCapacity = 0; + unsigned m_maximumBucketCapacity = 0; }; class MeshFixture : public MeshFixtureNoTest, public ::ngp_testing::Test { @@ -231,7 +260,8 @@ inline void delete_mesh(const std::string & baseFileName) namespace simple_fields { -class MeshFixtureNoTest +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +MeshFixtureNoTest { protected: MeshFixtureNoTest() @@ -296,7 +326,7 @@ class MeshFixtureNoTest unsigned maximumBucketCapacity = mesh::get_default_maximum_bucket_capacity()) { allocate_bulk(auraOption, initialBucketCapacity, maximumBucketCapacity); - stk::unit_test_util::simple_fields::generate_mesh_from_serial_spec_and_load_in_parallel_with_auto_decomp(meshSpecification,*bulkData,"cyclic"); + stk::unit_test_util::generate_mesh_from_serial_spec_and_load_in_parallel_with_auto_decomp(meshSpecification,*bulkData,"cyclic"); } MPI_Comm get_comm() const @@ -345,7 +375,6 @@ class MeshFixtureNoTest if(nullptr == metaData) { metaData = builder.create_meta_data(); - metaData->use_simple_fields(); } if(nullptr == bulkData) { @@ -391,7 +420,8 @@ class MeshFixtureNoTest unsigned m_maximumBucketCapacity = 0; }; -class MeshFixture : public MeshFixtureNoTest, public ::ngp_testing::Test { +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +MeshFixture : public MeshFixtureNoTest, public ::ngp_testing::Test { protected: MeshFixture(){} MeshFixture(unsigned spatial_dim) : MeshFixtureNoTest(spatial_dim) {} @@ -400,12 +430,14 @@ class MeshFixture : public MeshFixtureNoTest, public ::ngp_testing::Test { }; -class MeshFixture2D : public MeshFixtureNoTest, public ::ngp_testing::Test { +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +MeshFixture2D : public MeshFixtureNoTest, public ::ngp_testing::Test { protected: MeshFixture2D() : MeshFixtureNoTest(2) {} }; -class MeshTestFixture : public MeshFixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +MeshTestFixture : public MeshFixture { protected: void run_test_on_num_procs(int numProcs, stk::mesh::BulkData::AutomaticAuraOption auraOption) diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshUtilsForBoundingVolumes.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshUtilsForBoundingVolumes.cpp new file mode 100644 index 000000000000..051f1452cd5e --- /dev/null +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshUtilsForBoundingVolumes.cpp @@ -0,0 +1,383 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include "stk_unit_test_utils/MeshUtilsForBoundingVolumes.hpp" + +namespace simple_fields { + +void findBoundingBoxCoordinates(const std::vector &coordinates, std::vector& boxCoordinates) +{ + int spatialDim = 3; + double *minCoordinates = boxCoordinates.data(); + double *maxCoordinates = &boxCoordinates[spatialDim]; + + for (int j=0;j& domainBoxes) +{ + stk::mesh::ExodusTranslator exoTranslator(bulk); + size_t numberBoundingBoxes = 0; + std::vector sidesetIds; + exoTranslator.fill_side_set_ids(sidesetIds); + + for (size_t i=0;i boxCoordinates(6); + for (size_t ssetCounter=0;ssetCounter coordinates(3*num_nodes_per_side,0); + for(unsigned k=0;k(stk::mesh::field_data(*coords, nodes[k])); + coordinates[3*k] = data[0]; + coordinates[3*k+1] = data[1]; + coordinates[3*k+2] = data[2]; + } + ::findBoundingBoxCoordinates(coordinates, boxCoordinates); + domainBoxes[boxCounter].set_box(boxCoordinates[0], boxCoordinates[1], boxCoordinates[2], + boxCoordinates[3], boxCoordinates[4], boxCoordinates[5]); + boxCounter++; + } + } + + STK_ThrowRequireMsg(boxCounter == numberBoundingBoxes, "Program error. Please contact sierra-help for support"); +} + +void fillBoxesUsingSidesetsFromFile(MPI_Comm comm, const std::string& filename, std::vector &domainBoxes) +{ + std::shared_ptr bulk = build_mesh(3, comm); + stk::io::fill_mesh(filename, *bulk); + + ::createBoundingBoxesForSidesInSidesets(*bulk, domainBoxes); +} + +int openFileAndGetId(const int numBoxes, const int num_element_blocks, const std::string &filename) +{ + int CPU_word_size = sizeof(double); + int IO_word_size = 8; + int exoid = ex_create (filename.c_str(), EX_CLOBBER, &CPU_word_size, &IO_word_size); + int num_dim = 3; + int num_elements = numBoxes; + int num_nodes_per_element = 8; + int num_nodes = num_nodes_per_element*num_elements; + + int num_ns = 0, num_ss = 0; + ex_put_init(exoid, "Boxes", num_dim, num_nodes, num_elements, num_element_blocks, num_ns, num_ss); + return exoid; +} + +void setHexCoordinates(const double &xmin, const double &ymin, const double &zmin, + const double &xmax, const double &ymax, const double &zmax, + double* hexCoordinates) +{ +// int ordering[8] = { 4, 3, 2, 1, 8, 7, 6, 5 }; // one based! + int ordering[8] = { 3, 2, 1, 0, 7, 6, 5, 4 }; + + hexCoordinates[3*ordering[0]+0] = xmin; + hexCoordinates[3*ordering[0]+1] = ymin; + hexCoordinates[3*ordering[0]+2] = zmin; + + hexCoordinates[3*ordering[1]+0] = xmax; + hexCoordinates[3*ordering[1]+1] = ymin; + hexCoordinates[3*ordering[1]+2] = zmin; + + hexCoordinates[3*ordering[2]+0] = xmax; + hexCoordinates[3*ordering[2]+1] = ymin; + hexCoordinates[3*ordering[2]+2] = zmax; + + hexCoordinates[3*ordering[3]+0] = xmin; + hexCoordinates[3*ordering[3]+1] = ymin; + hexCoordinates[3*ordering[3]+2] = zmax; + + hexCoordinates[3*ordering[4]+0] = xmin; + hexCoordinates[3*ordering[4]+1] = ymax; + hexCoordinates[3*ordering[4]+2] = zmin; + + hexCoordinates[3*ordering[5]+0] = xmax; + hexCoordinates[3*ordering[5]+1] = ymax; + hexCoordinates[3*ordering[5]+2] = zmin; + + hexCoordinates[3*ordering[6]+0] = xmax; + hexCoordinates[3*ordering[6]+1] = ymax; + hexCoordinates[3*ordering[6]+2] = zmax; + + hexCoordinates[3*ordering[7]+0] = xmin; + hexCoordinates[3*ordering[7]+1] = ymax; + hexCoordinates[3*ordering[7]+2] = zmax; +} + +void putCoordinatesInFile(const int exoid, const std::vector& boxes) +{ + const int num_nodes_per_element = 8; + const int spatialDim = 3; + + std::vector x(num_nodes_per_element*boxes.size()); + std::vector y(num_nodes_per_element*boxes.size()); + std::vector z(num_nodes_per_element*boxes.size()); + + for (size_t i=0; i &numElementsPerBlock) +{ + const int minNumElementsPer = 1; + const int maxNumElementsPer = 1000; + + int numElementsPer = num_elements / 100; + + numElementsPer = std::max( numElementsPer, minNumElementsPer ); + numElementsPer = std::min( numElementsPer, maxNumElementsPer ); + + for (int i = 0; i < num_elements; i += numElementsPer) + { + int numElementsThisBlock = (i+numElementsPer) < num_elements ? numElementsPer : num_elements-i; + numElementsPerBlock.push_back(numElementsThisBlock); + } +} + +void writeExodusFileUsingBoxes(const std::vector& boxes, const std::string &filename) +{ + if ( boxes.size() == 0 ) + { + // std::cerr << "Skipping writing of file. No boxes to write.\n"; + return; + } + + const int num_nodes_per_elem = 8; + const int num_attr = 0; + const unsigned num_elements = boxes.size(); + std::vector numElementsPerBlock; + ::fillNumElementsPerBlock(num_elements, numElementsPerBlock); + const int num_blocks = numElementsPerBlock.size(); + const int exoid = ::openFileAndGetId(boxes.size(), num_blocks, filename); + ::putCoordinatesInFile(exoid, boxes); + + std::vector connect(numElementsPerBlock[0]*num_nodes_per_elem); + int ordering[8] = { 4, 3, 2, 1, 8, 7, 6, 5 }; // one based! + unsigned offset = 0; + for (int blockId=1;blockId<=num_blocks;blockId++) + { + const int num_elements_this_block = numElementsPerBlock[blockId-1]; + ex_put_elem_block(exoid, blockId, "HEX", num_elements_this_block, num_nodes_per_elem, num_attr); + + for (int j=0;j +fillDomainBoxes(MPI_Comm comm) +{ + std::vector domainBoxes; + std::string filename = stk::unit_test_util::get_option("-i", "input.exo"); + ::fillBoxesUsingSidesetsFromFile(comm, filename, domainBoxes); + + std::string exodusFilename = stk::unit_test_util::get_option("-o", "boxes.exo"); + if ( exodusFilename != "skip" ) + { + ::writeExodusFileUsingBoxes(domainBoxes, exodusFilename); + } + + return domainBoxes; +} + +void fillStkBoxesUsingFloatBoxes(const std::vector &domainBoxes, const int procId, StkBoxIdentProcVector& stkBoxes) +{ + for (size_t i=0;i bulk = build_mesh(3, comm); + stk::io::fill_mesh(volumeFilename, *bulk); + + ::createBoundingBoxesForElementsInElementBlocks(*bulk, domainBoxes); +} + +void fillBoundingVolumesUsingNodesFromFile( + MPI_Comm comm, const std::string& sphereFilename, std::vector< std::pair > &spheres) +{ + std::shared_ptr bulkPtr = build_mesh(3, comm); + stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); + stk::mesh::BulkData& bulk = *bulkPtr; + + stk::io::fill_mesh(sphereFilename, bulk); + + stk::mesh::EntityVector nodes; + const bool sortById = true; + stk::mesh::get_entities(bulk, stk::topology::NODE_RANK, meta.locally_owned_part(), nodes, sortById); + + spheres.clear(); + spheres.resize(nodes.size()); + + stk::mesh::FieldBase const * coords = meta.coordinate_field(); + + for (size_t i=0;i(stk::mesh::field_data(*coords, node)); + + double x=data[0]; + double y=data[1]; + double z=data[2]; + + double radius=1e-5; + unsigned id = bulk.identifier(node); + spheres[i] = std::make_pair(Sphere(Point(x,y,z), radius), IdentProc(id, bulk.parallel_rank())); + } +} + +void fillBoundingVolumesUsingNodesFromFile(MPI_Comm comm, const std::string& sphereFilename, FloatBoxIdentProcVector &spheres) +{ + std::shared_ptr bulkPtr = build_mesh(3, comm); + stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); + stk::mesh::BulkData& bulk = *bulkPtr; + + stk::io::fill_mesh(sphereFilename, bulk); + + stk::mesh::EntityVector nodes; + const bool sortById = true; + stk::mesh::get_entities(bulk, stk::topology::NODE_RANK, meta.locally_owned_part(), nodes, sortById); + + spheres.clear(); + spheres.resize(nodes.size()); + + stk::mesh::FieldBase const * coords = meta.coordinate_field(); + + for (size_t i=0;i(stk::mesh::field_data(*coords, node)); + + double x=data[0]; + double y=data[1]; + double z=data[2]; + + double radius=1e-5; + unsigned id = bulk.identifier(node); + FloatBox box(x-radius, y-radius, z-radius, x+radius, y+radius, z+radius); + spheres[i] = std::make_pair(box, IdentProc(id, bulk.parallel_rank())); + } +} + +} // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshUtilsForBoundingVolumes.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshUtilsForBoundingVolumes.hpp index 25710cb16352..469554f166f5 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshUtilsForBoundingVolumes.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshUtilsForBoundingVolumes.hpp @@ -273,7 +273,7 @@ inline void writeExodusFileUsingBoxes(const std::vector& boxes, const for (int blockId=1;blockId<=num_blocks;blockId++) { const int num_elements_this_block = numElementsPerBlock[blockId-1]; - ex_put_elem_block(exoid, blockId, "HEX", num_elements_this_block, num_nodes_per_elem, num_attr); + ex_put_block(exoid, EX_ELEM_BLOCK, blockId, "HEX", num_elements_this_block, num_nodes_per_elem, num_attr, 0, 0); for (int j=0;j& boxes, const } offset += num_elements_this_block*num_nodes_per_elem; - ex_put_elem_conn(exoid, blockId, connect.data()); + ex_put_conn(exoid, EX_ELEM_BLOCK, blockId, connect.data(), nullptr, nullptr); } ex_close(exoid); @@ -482,364 +482,66 @@ inline void fillBoundingVolumesUsingNodesFromFile(MPI_Comm comm, const std::stri for (size_t i=0;i(stk::mesh::field_data(*coords, node)); + const double *data = static_cast(stk::mesh::field_data(*coords, node)); - double x=data[0]; - double y=data[1]; - double z=data[2]; + const double x=data[0]; + const double y=data[1]; + const double z=data[2]; - double radius=1e-5; - unsigned id = bulk->identifier(node); + constexpr double radius=1e-5; + const unsigned id = bulk->identifier(node); FloatBox box(x-radius, y-radius, z-radius, x+radius, y+radius, z+radius); spheres[i] = std::make_pair(box, IdentProc(id, bulk->parallel_rank())); + STK_ThrowRequire(spheres[i].first == box); } } namespace simple_fields { -inline void findBoundingBoxCoordinates(const std::vector &coordinates, std::vector& boxCoordinates) -{ - int spatialDim = 3; - double *minCoordinates = boxCoordinates.data(); - double *maxCoordinates = &boxCoordinates[spatialDim]; - - for (int j=0;j& domainBoxes) -{ - stk::mesh::ExodusTranslator exoTranslator(bulk); - size_t numberBoundingBoxes = 0; - std::vector sidesetIds; - exoTranslator.fill_side_set_ids(sidesetIds); - - for (size_t i=0;i &coordinates, std::vector& boxCoordinates); - stk::mesh::FieldBase const * coords = bulk.mesh_meta_data().coordinate_field(); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void createBoundingBoxesForSidesInSidesets(const stk::mesh::BulkData& bulk, std::vector& domainBoxes); - size_t boxCounter = 0; +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void fillBoxesUsingSidesetsFromFile(MPI_Comm comm, const std::string& filename, std::vector &domainBoxes); - std::vector boxCoordinates(6); - for (size_t ssetCounter=0;ssetCounter coordinates(3*num_nodes_per_side,0); - for(unsigned k=0;k(stk::mesh::field_data(*coords, nodes[k])); - coordinates[3*k] = data[0]; - coordinates[3*k+1] = data[1]; - coordinates[3*k+2] = data[2]; - } - findBoundingBoxCoordinates(coordinates, boxCoordinates); - domainBoxes[boxCounter].set_box(boxCoordinates[0], boxCoordinates[1], boxCoordinates[2], - boxCoordinates[3], boxCoordinates[4], boxCoordinates[5]); - boxCounter++; - } - } - - STK_ThrowRequireMsg(boxCounter == numberBoundingBoxes, "Program error. Please contact sierra-help for support"); -} - -inline void fillBoxesUsingSidesetsFromFile(MPI_Comm comm, const std::string& filename, std::vector &domainBoxes) -{ - std::shared_ptr bulk = build_mesh(3, comm); - stk::io::fill_mesh(filename, *bulk); - - createBoundingBoxesForSidesInSidesets(*bulk, domainBoxes); -} - -inline int openFileAndGetId(const int numBoxes, const int num_element_blocks, const std::string &filename) -{ - int CPU_word_size = sizeof(double); - int IO_word_size = 8; - int exoid = ex_create (filename.c_str(), EX_CLOBBER, &CPU_word_size, &IO_word_size); - int num_dim = 3; - int num_elements = numBoxes; - int num_nodes_per_element = 8; - int num_nodes = num_nodes_per_element*num_elements; - - int num_ns = 0, num_ss = 0; - ex_put_init(exoid, "Boxes", num_dim, num_nodes, num_elements, num_element_blocks, num_ns, num_ss); - return exoid; -} - -inline void setHexCoordinates(const double &xmin, const double &ymin, const double &zmin, +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void setHexCoordinates(const double &xmin, const double &ymin, const double &zmin, const double &xmax, const double &ymax, const double &zmax, - double* hexCoordinates) -{ -// int ordering[8] = { 4, 3, 2, 1, 8, 7, 6, 5 }; // one based! - int ordering[8] = { 3, 2, 1, 0, 7, 6, 5, 4 }; - - hexCoordinates[3*ordering[0]+0] = xmin; - hexCoordinates[3*ordering[0]+1] = ymin; - hexCoordinates[3*ordering[0]+2] = zmin; - - hexCoordinates[3*ordering[1]+0] = xmax; - hexCoordinates[3*ordering[1]+1] = ymin; - hexCoordinates[3*ordering[1]+2] = zmin; - - hexCoordinates[3*ordering[2]+0] = xmax; - hexCoordinates[3*ordering[2]+1] = ymin; - hexCoordinates[3*ordering[2]+2] = zmax; - - hexCoordinates[3*ordering[3]+0] = xmin; - hexCoordinates[3*ordering[3]+1] = ymin; - hexCoordinates[3*ordering[3]+2] = zmax; - - hexCoordinates[3*ordering[4]+0] = xmin; - hexCoordinates[3*ordering[4]+1] = ymax; - hexCoordinates[3*ordering[4]+2] = zmin; - - hexCoordinates[3*ordering[5]+0] = xmax; - hexCoordinates[3*ordering[5]+1] = ymax; - hexCoordinates[3*ordering[5]+2] = zmin; - - hexCoordinates[3*ordering[6]+0] = xmax; - hexCoordinates[3*ordering[6]+1] = ymax; - hexCoordinates[3*ordering[6]+2] = zmax; - - hexCoordinates[3*ordering[7]+0] = xmin; - hexCoordinates[3*ordering[7]+1] = ymax; - hexCoordinates[3*ordering[7]+2] = zmax; -} - -inline void putCoordinatesInFile(const int exoid, const std::vector& boxes) -{ - const int num_nodes_per_element = 8; - const int spatialDim = 3; - - std::vector x(num_nodes_per_element*boxes.size()); - std::vector y(num_nodes_per_element*boxes.size()); - std::vector z(num_nodes_per_element*boxes.size()); + double* hexCoordinates); - for (size_t i=0; i &numElementsPerBlock) -{ - const int minNumElementsPer = 1; - const int maxNumElementsPer = 1000; - - int numElementsPer = num_elements / 100; - - numElementsPer = std::max( numElementsPer, minNumElementsPer ); - numElementsPer = std::min( numElementsPer, maxNumElementsPer ); - - for (int i = 0; i < num_elements; i += numElementsPer) - { - int numElementsThisBlock = (i+numElementsPer) < num_elements ? numElementsPer : num_elements-i; - numElementsPerBlock.push_back(numElementsThisBlock); - } -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void putCoordinatesInFile(const int exoid, const std::vector& boxes); -inline void writeExodusFileUsingBoxes(const std::vector& boxes, const std::string &filename) -{ - if ( boxes.size() == 0 ) - { - // std::cerr << "Skipping writing of file. No boxes to write.\n"; - return; - } +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void fillNumElementsPerBlock(const int num_elements, std::vector &numElementsPerBlock); - const int num_nodes_per_elem = 8; - const int num_attr = 0; - const unsigned num_elements = boxes.size(); - std::vector numElementsPerBlock; - fillNumElementsPerBlock(num_elements, numElementsPerBlock); - const int num_blocks = numElementsPerBlock.size(); - const int exoid = openFileAndGetId(boxes.size(), num_blocks, filename); - putCoordinatesInFile(exoid, boxes); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void writeExodusFileUsingBoxes(const std::vector& boxes, const std::string &filename); - std::vector connect(numElementsPerBlock[0]*num_nodes_per_elem); - int ordering[8] = { 4, 3, 2, 1, 8, 7, 6, 5 }; // one based! - unsigned offset = 0; - for (int blockId=1;blockId<=num_blocks;blockId++) - { - const int num_elements_this_block = numElementsPerBlock[blockId-1]; - ex_put_elem_block(exoid, blockId, "HEX", num_elements_this_block, num_nodes_per_elem, num_attr); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +std::vector fillDomainBoxes(MPI_Comm comm); - for (int j=0;j &domainBoxes, const int procId, StkBoxIdentProcVector& stkBoxes); - ex_put_elem_conn(exoid, blockId, connect.data()); - } +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void createBoundingBoxesForElementsInElementBlocks(const stk::mesh::BulkData &bulk, FloatBoxIdentProcVector& domainBoxes); - ex_close(exoid); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void fillBoxesUsingElementBlocksFromFile(MPI_Comm comm, const std::string& volumeFilename, FloatBoxIdentProcVector &domainBoxes); -inline std::vector -fillDomainBoxes(MPI_Comm comm) -{ - std::vector domainBoxes; - std::string filename = stk::unit_test_util::get_option("-i", "input.exo"); - simple_fields::fillBoxesUsingSidesetsFromFile(comm, filename, domainBoxes); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void fillBoundingVolumesUsingNodesFromFile( + MPI_Comm comm, const std::string& sphereFilename, std::vector< std::pair > &spheres); - std::string exodusFilename = stk::unit_test_util::get_option("-o", "boxes.exo"); - if ( exodusFilename != "skip" ) - { - writeExodusFileUsingBoxes(domainBoxes, exodusFilename); - } - - return domainBoxes; -} - -inline void fillStkBoxesUsingFloatBoxes(const std::vector &domainBoxes, const int procId, StkBoxIdentProcVector& stkBoxes) -{ - for (size_t i=0;i bulk = build_mesh(3, comm); - stk::io::fill_mesh(volumeFilename, *bulk); - - createBoundingBoxesForElementsInElementBlocks(*bulk, domainBoxes); -} - -inline void fillBoundingVolumesUsingNodesFromFile( - MPI_Comm comm, const std::string& sphereFilename, std::vector< std::pair > &spheres) -{ - std::shared_ptr bulkPtr = build_mesh(3, comm); - stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); - stk::mesh::BulkData& bulk = *bulkPtr; - - stk::io::fill_mesh(sphereFilename, bulk); - - stk::mesh::EntityVector nodes; - const bool sortById = true; - stk::mesh::get_entities(bulk, stk::topology::NODE_RANK, meta.locally_owned_part(), nodes, sortById); - - spheres.clear(); - spheres.resize(nodes.size()); - - stk::mesh::FieldBase const * coords = meta.coordinate_field(); - - for (size_t i=0;i(stk::mesh::field_data(*coords, node)); - - double x=data[0]; - double y=data[1]; - double z=data[2]; - - double radius=1e-5; - unsigned id = bulk.identifier(node); - spheres[i] = std::make_pair(Sphere(Point(x,y,z), radius), IdentProc(id, bulk.parallel_rank())); - } -} - -inline void fillBoundingVolumesUsingNodesFromFile(MPI_Comm comm, const std::string& sphereFilename, FloatBoxIdentProcVector &spheres) -{ - std::shared_ptr bulkPtr = build_mesh(3, comm); - stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); - stk::mesh::BulkData& bulk = *bulkPtr; - - stk::io::fill_mesh(sphereFilename, bulk); - - stk::mesh::EntityVector nodes; - const bool sortById = true; - stk::mesh::get_entities(bulk, stk::topology::NODE_RANK, meta.locally_owned_part(), nodes, sortById); - - spheres.clear(); - spheres.resize(nodes.size()); - - stk::mesh::FieldBase const * coords = meta.coordinate_field(); - - for (size_t i=0;i(stk::mesh::field_data(*coords, node)); - - double x=data[0]; - double y=data[1]; - double z=data[2]; - - double radius=1e-5; - unsigned id = bulk.identifier(node); - FloatBox box(x-radius, y-radius, z-radius, x+radius, y+radius, z+radius); - spheres[i] = std::make_pair(box, IdentProc(id, bulk.parallel_rank())); - } -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void fillBoundingVolumesUsingNodesFromFile(MPI_Comm comm, const std::string& sphereFilename, FloatBoxIdentProcVector &spheres); } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ParallelGtestOutput.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ParallelGtestOutput.cpp index 27b76453c41f..84bd9c45a36d 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ParallelGtestOutput.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ParallelGtestOutput.cpp @@ -56,7 +56,7 @@ class MinimalistPrinter : public ::testing::EmptyTestEventListener { if(mProcId == 0) { -#ifdef STK_BUILT_IN_SIERRA +#ifdef STK_BUILT_FOR_SIERRA printf("*** Starting test %s.%s from %s:%d\n", test_info.test_case_name(), test_info.name(), @@ -126,7 +126,7 @@ class MinimalistPrinter : public ::testing::EmptyTestEventListener { if(numTotalFailures == 0) { -#ifdef STK_BUILT_IN_SIERRA +#ifdef STK_BUILT_FOR_SIERRA ::testing::internal::ColoredPrintf(::testing::internal::COLOR_GREEN, "[ OK ] "); #else //newer versions of gtest don't allow external access to ColoredPrintf @@ -142,7 +142,7 @@ class MinimalistPrinter : public ::testing::EmptyTestEventListener printf("%s.%s", test_info.test_case_name(), test_info.name()); if ( should_print_time() ) { -#ifdef STK_BUILT_IN_SIERRA +#ifdef STK_BUILT_FOR_SIERRA size_t millis = test_info.result() != nullptr ? test_info.result()->elapsed_time() : 0; #else size_t millis = 0; @@ -214,7 +214,7 @@ class MinimalistPrinter : public ::testing::EmptyTestEventListener void print_failed(const std::string &message) { -#ifdef STK_BUILT_IN_SIERRA +#ifdef STK_BUILT_FOR_SIERRA ::testing::internal::ColoredPrintf(::testing::internal::COLOR_RED, "[ FAILED ] "); #else //newer versions of gtest don't allow external access to ColoredPrintf @@ -225,7 +225,7 @@ class MinimalistPrinter : public ::testing::EmptyTestEventListener void print_passed(const std::string &message) { -#ifdef STK_BUILT_IN_SIERRA +#ifdef STK_BUILT_FOR_SIERRA ::testing::internal::ColoredPrintf(::testing::internal::COLOR_GREEN, "[ PASSED ] "); #else //newer versions of gtest don't allow external access to ColoredPrintf diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ParallelGtestOutput.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ParallelGtestOutput.hpp index 965ed76d4ae8..4d763517d6df 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ParallelGtestOutput.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ParallelGtestOutput.hpp @@ -49,7 +49,10 @@ void create_parallel_output_with_comm(int procId, MPI_Comm comm); namespace simple_fields { +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void create_parallel_output(int procId); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void create_parallel_output_with_comm(int procId, MPI_Comm comm); } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ParticleUtils.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ParticleUtils.hpp index 707a93eef7fe..28b465181421 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ParticleUtils.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ParticleUtils.hpp @@ -72,7 +72,7 @@ class Particle stk::mesh::Entity m_owningElement; }; -typedef std::vector > ParticleVector; +typedef std::vector> ParticleVector; class ParticleManager { @@ -122,7 +122,8 @@ class Particle typedef std::vector> ParticleVector; -class ParticleManager +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +ParticleManager { public: ParticleManager() = default; diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/PerformanceTester.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/PerformanceTester.cpp new file mode 100644 index 000000000000..43afee040cd5 --- /dev/null +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/PerformanceTester.cpp @@ -0,0 +1,56 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// + // Redistribution and use in source and binary forms, with or without + // modification, are permitted provided that the following conditions are + // met: + // + // * Redistributions of source code must retain the above copyright + // notice, this list of conditions and the following disclaimer. + // + // * Redistributions in binary form must reproduce the above + // copyright notice, this list of conditions and the following + // disclaimer in the documentation and/or other materials provided + // with the distribution. + // +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// + // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +namespace stk +{ +namespace unit_test_util +{ + +namespace simple_fields { + +void print_output_for_pass_fail_test(double duration, unsigned iterCount, MPI_Comm communicator) +{ + stk::unit_test_util::print_output_for_pass_fail_test(duration, iterCount, communicator); +} + +void print_output_for_graph_generation(double duration, const stk::diag::Timer &rootTimer, MPI_Comm communicator) +{ + stk::unit_test_util::print_output_for_graph_generation(duration, rootTimer, communicator); +} + +} // namespace simple_fields + +} +} diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/PerformanceTester.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/PerformanceTester.hpp index fb1213b695e6..8ccab31a6438 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/PerformanceTester.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/PerformanceTester.hpp @@ -39,6 +39,7 @@ #include #include #include +#include namespace stk { @@ -64,68 +65,65 @@ inline void print_output_for_graph_generation(double duration, const stk::diag:: class PerformanceTester { public: - void run_performance_test() - { - time_algorithm(); - generate_output(); - } + void run_performance_test() + { + time_algorithm(); + generate_output(); + } protected: - PerformanceTester(MPI_Comm comm) : - duration(0.0), - enabledTimerSet(CHILDMASK1), - rootTimer(createRootTimer("totalTestRuntime", enabledTimerSet)), - childTimer("timed algorithm", CHILDMASK1, rootTimer), - communicator(comm) - { - rootTimer.start(); - } - - virtual ~PerformanceTester() - { - stk::diag::deleteRootTimer(rootTimer); - } - - virtual void run_algorithm_to_time() = 0; - virtual size_t get_value_to_output_as_iteration_count() = 0; - - double duration; + PerformanceTester(MPI_Comm comm) + : duration(0.0), + enabledTimerSet(CHILDMASK1), + rootTimer(createRootTimer("totalTestRuntime", enabledTimerSet)), + childTimer("timed algorithm", CHILDMASK1, rootTimer), + communicator(comm) + { + rootTimer.start(); + } + + virtual ~PerformanceTester() + { + stk::diag::deleteRootTimer(rootTimer); + } + + virtual void run_algorithm_to_time() = 0; + virtual size_t get_value_to_output_as_iteration_count() = 0; + + double duration; private: - const int CHILDMASK1 = 1; - stk::diag::TimerSet enabledTimerSet; - stk::diag::Timer rootTimer; - stk::diag::Timer childTimer; - MPI_Comm communicator; - - void time_algorithm() - { - stk::diag::TimeBlockSynchronized timerStartSynchronizedAcrossProcessors(childTimer, communicator); - double startTime = stk::wall_time(); - run_algorithm_to_time(); - duration += stk::wall_time() - startTime; - } - - void generate_output() - { - print_output_for_pass_fail_test(duration, get_value_to_output_as_iteration_count(), communicator); - print_output_for_graph_generation(duration, rootTimer, communicator); - } + const int CHILDMASK1 = 1; + stk::diag::TimerSet enabledTimerSet; + stk::diag::Timer rootTimer; + stk::diag::Timer childTimer; + MPI_Comm communicator; + + void time_algorithm() + { + stk::diag::TimeBlockSynchronized timerStartSynchronizedAcrossProcessors(childTimer, communicator); + double startTime = stk::wall_time(); + run_algorithm_to_time(); + duration += stk::wall_time() - startTime; + } + + void generate_output() + { + print_output_for_pass_fail_test(duration, get_value_to_output_as_iteration_count(), communicator); + print_output_for_graph_generation(duration, rootTimer, communicator); + } }; namespace simple_fields { -inline void print_output_for_pass_fail_test(double duration, unsigned iterCount, MPI_Comm communicator) -{ - stk::unit_test_util::print_output_for_pass_fail_test(duration, iterCount, communicator); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void print_output_for_pass_fail_test(double duration, unsigned iterCount, MPI_Comm communicator); -inline void print_output_for_graph_generation(double duration, const stk::diag::Timer &rootTimer, MPI_Comm communicator) -{ - stk::unit_test_util::print_output_for_graph_generation(duration, rootTimer, communicator); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void print_output_for_graph_generation(double duration, const stk::diag::Timer &rootTimer, MPI_Comm communicator); -class PerformanceTester +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +PerformanceTester { public: void run_performance_test() @@ -172,8 +170,8 @@ class PerformanceTester void generate_output() { - print_output_for_pass_fail_test(duration, get_value_to_output_as_iteration_count(), communicator); - print_output_for_graph_generation(duration, rootTimer, communicator); + stk::unit_test_util::print_output_for_pass_fail_test(duration, get_value_to_output_as_iteration_count(), communicator); + stk::unit_test_util::print_output_for_graph_generation(duration, rootTimer, communicator); } }; diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/PrintType.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/PrintType.hpp index 55095536c594..210fd559dd07 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/PrintType.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/PrintType.hpp @@ -31,6 +31,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include #include #ifdef __GNUG__ #include @@ -61,6 +62,7 @@ void print_type(T& t) namespace simple_fields { template +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void print_type(T& t) { stk::unit_test_util::print_type(t); diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ReadWriteSidesetTester.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ReadWriteSidesetTester.cpp index 30632a7ce47f..e1f8c5c3d8ff 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ReadWriteSidesetTester.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ReadWriteSidesetTester.cpp @@ -118,19 +118,18 @@ void compare_sidesets(const std::string& input_file_name, void test_reading_writing_sideset_from_file(stk::ParallelMachine comm, const std::string& input_file_name, const std::string& output_file_name) { - auto meta1 = std::make_shared(); + stk::mesh::MetaData meta1; BulkDataTester bulk1(meta1, comm); read_exo_file( bulk1, input_file_name, READ_SERIAL_AND_DECOMPOSE); write_exo_file( bulk1, output_file_name); - auto meta2 = std::make_shared(); + stk::mesh::MetaData meta2; BulkDataTester bulk2(meta2, comm); read_exo_file( bulk2, output_file_name, READ_ALREADY_DECOMPOSED); compare_sidesets(input_file_name, bulk1, bulk2); unlink(output_file_name.c_str()); } - namespace simple_fields { stk::mesh::SideSet* get_stk_side_set(stk::mesh::BulkData &bulk, const ElemIdSideVector &ss) @@ -149,7 +148,7 @@ stk::unit_test_util::sideset::SideSetData get_stk_side_set_data(stk::mesh::BulkD for(size_t i=0; iwrite_output_mesh(bulk_data(), attributeFieldOrderingByPartOrdinal); - } + virtual void write_output_mesh(size_t output_file_index) + { + m_outputFiles[output_file_index]->write_output_mesh(bulk_data(), attributeFieldOrderingByPartOrdinal); + } }; class BulkDataTester : public stk::mesh::BulkData { public: - BulkDataTester(std::shared_ptr mesh_meta_data, stk::ParallelMachine parallel) - : stk::mesh::BulkData(mesh_meta_data, parallel, stk::mesh::BulkData::AUTO_AURA) - { } + BulkDataTester(stk::mesh::MetaData & mesh_meta_data, stk::ParallelMachine parallel) + : stk::mesh::BulkData(std::shared_ptr(&mesh_meta_data, [](auto pointerWeWontDelete){}), parallel, stk::mesh::BulkData::AUTO_AURA) + { + } }; stk::mesh::SideSet* get_stk_side_set(stk::mesh::BulkData &bulk, const ElemIdSideVector &ss); @@ -132,6 +133,7 @@ void test_reading_writing_sideset_from_file(stk::ParallelMachine comm, const std namespace simple_fields { +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void test_reading_writing_sideset_from_file(stk::ParallelMachine comm, const std::string& inputFileName, const std::string& outputFileName); } // namespace simple_fields @@ -172,15 +174,25 @@ class BulkDataTester : public stk::mesh::BulkData } }; +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") stk::mesh::SideSet* get_stk_side_set(stk::mesh::BulkData &bulk, const ElemIdSideVector &ss); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") SideSetData get_stk_side_set_data(stk::mesh::BulkData &bulk, const SideSetIdAndElemIdSidesVector &ssData); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void write_exo_file(BulkDataTester &bulkData, const std::string &filename); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void read_exo_file( stk::mesh::BulkData &bulkData, std::string filename, ReadMode read_mode); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void load_mesh_and_fill_sideset_data(StkMeshIoBrokerTester &stkIo); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void setup_io_broker_for_read(stk::io::StkMeshIoBroker &stkIo, stk::mesh::BulkData &bulkData, std::string filename, ReadMode readMode); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void test_reading_writing_sideset_from_file(stk::ParallelMachine comm, const std::string& inputFileName, const std::string& outputFileName); } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/Search_UnitTestUtils.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/Search_UnitTestUtils.cpp new file mode 100644 index 000000000000..e2e4738a0b2a --- /dev/null +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/Search_UnitTestUtils.cpp @@ -0,0 +1,61 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + + +#include + +namespace stk { +namespace unit_test_util { + +namespace simple_fields { + +size_t getGoldValueForTest() +{ + return stk::unit_test_util::getGoldValueForTest(); +} + +void gatherResultstoProcZero(MPI_Comm comm, SearchResults& boxIdPairResults) +{ + stk::unit_test_util::gatherResultstoProcZero(comm, boxIdPairResults); +} + +void printPeformanceStats(double elapsedTime, MPI_Comm comm) +{ + stk::unit_test_util::printPeformanceStats(elapsedTime, comm); +} + +} // namespace simple_fields + +} // namespace unit_test_util +} // namespace stk diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/Search_UnitTestUtils.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/Search_UnitTestUtils.hpp index e2e9f79d9b43..035fd18ef0d7 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/Search_UnitTestUtils.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/Search_UnitTestUtils.hpp @@ -125,6 +125,55 @@ std::pair generateBoundingVolume(double x, double y, doub return std::make_pair(generateBoundingVolume(x,y,z,radius), IdentProc(id,proc)); } +template +KOKKOS_FUNCTION +BoxType device_generateBox(double x, double y, double z, double radius); + +template<> +KOKKOS_INLINE_FUNCTION +Point device_generateBox(double x, double y, double z, double /*radius*/) +{ + return Point(x,y,z); +} + +template<> +KOKKOS_INLINE_FUNCTION +Sphere device_generateBox(double x, double y, double z, double radius) +{ + return Sphere(Point(x, y, z), radius); +} + +template<> +KOKKOS_INLINE_FUNCTION +StkBox device_generateBox(double x, double y, double z, double radius) +{ + Point min_corner(x-radius, y-radius, z-radius); + Point max_corner(x+radius, y+radius, z+radius); + return StkBox(min_corner, max_corner); +} + +template +KOKKOS_FUNCTION +stk::search::BoxIdentProc device_generateBoxIdentProc(double x, double y, double z, + double radius, int id, int proc) +{ + return stk::search::BoxIdentProc{device_generateBox(x, y, z, radius), + IdentProcType{id, proc}}; +} + +template +KOKKOS_FUNCTION +stk::search::BoxIdent device_generateBoxIdent(double x, double y, double z, + double radius, IdentType id) +{ + return stk::search::BoxIdent{device_generateBox(x, y, z, radius), id}; +} + +template +auto box_ident_to_pair(BoxIdent const& ident) { + return std::make_pair(ident.box, ident.ident); +} + //====================== inline size_t getGoldValueForTest() @@ -214,9 +263,11 @@ inline void printPeformanceStats(double elapsedTime, MPI_Comm comm) namespace simple_fields { template +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") VolumeType generateBoundingVolume(double x, double y, double z, double radius); template<> +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") inline Point generateBoundingVolume(double x, double y, double z, double /*radius*/) { @@ -224,6 +275,7 @@ Point generateBoundingVolume(double x, double y, double z, double /*radiu } template<> +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") inline Sphere generateBoundingVolume(double x, double y, double z, double radius) { @@ -239,6 +291,7 @@ Sphere generateBoundingVolume(double x, double y, double z, double radiu // ------------ // width = 2*radius template<> +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") inline StkBox generateBoundingVolume< StkBox >(double x, double y, double z, double radius) { @@ -248,6 +301,7 @@ StkBox generateBoundingVolume< StkBox >(double x, double y, double z, double rad } template +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") std::pair generateBoundingVolume(double x, double y, double z, double radius, int id, int proc) { return std::make_pair(generateBoundingVolume(x,y,z,radius), IdentProc(id,proc)); @@ -255,10 +309,12 @@ std::pair generateBoundingVolume(double x, double y, doub template +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") KOKKOS_FUNCTION BoxType device_generateBox(double x, double y, double z, double radius); template<> +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") KOKKOS_INLINE_FUNCTION Point device_generateBox(double x, double y, double z, double /*radius*/) { @@ -266,6 +322,7 @@ Point device_generateBox(double x, double y, double z, double /*radius*/) } template<> +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") KOKKOS_INLINE_FUNCTION Sphere device_generateBox(double x, double y, double z, double radius) { @@ -273,6 +330,7 @@ Sphere device_generateBox(double x, double y, double z, double radius) } template<> +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") KOKKOS_INLINE_FUNCTION StkBox device_generateBox(double x, double y, double z, double radius) { @@ -282,6 +340,7 @@ StkBox device_generateBox(double x, double y, double z, double radius) } template +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") KOKKOS_FUNCTION stk::search::BoxIdentProc device_generateBoxIdentProc(double x, double y, double z, double radius, int id, int proc) @@ -291,6 +350,7 @@ stk::search::BoxIdentProc device_generateBoxIdentProc(do } template +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") KOKKOS_FUNCTION stk::search::BoxIdent device_generateBoxIdent(double x, double y, double z, double radius, IdentType id) @@ -299,26 +359,21 @@ stk::search::BoxIdent device_generateBoxIdent(double x, doub } template +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") auto box_ident_to_pair(BoxIdent const& ident) { return std::make_pair(ident.box, ident.ident); } //====================== -inline size_t getGoldValueForTest() -{ - return stk::unit_test_util::getGoldValueForTest(); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +size_t getGoldValueForTest(); -inline void gatherResultstoProcZero(MPI_Comm comm, SearchResults& boxIdPairResults) -{ - stk::unit_test_util::gatherResultstoProcZero(comm, boxIdPairResults); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void gatherResultstoProcZero(MPI_Comm comm, SearchResults& boxIdPairResults); -inline void printPeformanceStats(double elapsedTime, MPI_Comm comm) -{ - stk::unit_test_util::printPeformanceStats(elapsedTime, comm); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void printPeformanceStats(double elapsedTime, MPI_Comm comm); } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/StkBalanceUnitTestSettings.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/StkBalanceUnitTestSettings.hpp index 32ab2996160a..5e83b2ed21bd 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/StkBalanceUnitTestSettings.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/StkBalanceUnitTestSettings.hpp @@ -20,7 +20,8 @@ class StkBalanceUnitTestSettings : public stk::balance::StkBalanceSettings namespace simple_fields { -class StkBalanceUnitTestSettings : public stk::unit_test_util::StkBalanceUnitTestSettings {}; +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +StkBalanceUnitTestSettings : public stk::unit_test_util::StkBalanceUnitTestSettings {}; } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/StkMeshFromGeneratedMesh.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/StkMeshFromGeneratedMesh.hpp index 4e17b96feec9..8c4dfb662f65 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/StkMeshFromGeneratedMesh.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/StkMeshFromGeneratedMesh.hpp @@ -46,7 +46,8 @@ class StkMeshCreator namespace simple_fields { -class StkMeshCreator +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +StkMeshCreator { public: StkMeshCreator(const std::string& generatedMeshSpec, MPI_Comm communicator) @@ -55,7 +56,6 @@ class StkMeshCreator { const int spatialDim = 3; m_stkMeshMetaData = new stk::mesh::MetaData(spatialDim); - m_stkMeshMetaData->use_simple_fields(); m_stkMeshBulkData = new stk::unit_test_util::BulkDataTester(*m_stkMeshMetaData, communicator); readExodusFileIntoStkMesh(generatedMeshSpec, *m_stkMeshBulkData, communicator); diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/StkReportRedirector.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/StkReportRedirector.hpp index 387ea88f55a9..98d16268e54d 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/StkReportRedirector.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/StkReportRedirector.hpp @@ -9,7 +9,7 @@ #ifndef STK_STK_UNIT_TEST_UTILS_STKREPORTREDIRECTOR_HPP_ #define STK_STK_UNIT_TEST_UTILS_STKREPORTREDIRECTOR_HPP_ #include - +#include "stk_util/stk_config.h" namespace stk { @@ -28,7 +28,8 @@ class StkReportRedirector { namespace simple_fields { -class StkReportRedirector : public stk::unit_test_util::StkReportRedirector {}; +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +StkReportRedirector : public stk::unit_test_util::StkReportRedirector {}; } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMesh.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMesh.cpp index 9d6203cd1922..437be5a1e2be 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMesh.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMesh.cpp @@ -23,10 +23,8 @@ #include // for Field #include // for get_entities #include // for MetaData, etc -#include "stk_mesh/base/TopologyDimensions.hpp" // for ElementNode #include "stk_mesh/base/FieldParallel.hpp" #include "stk_mesh/base/CompositeRank.hpp" -#include "stk_mesh/base/CoordinateSystems.hpp" // for Cartesian #include "stk_mesh/base/Entity.hpp" // for Entity #include "stk_mesh/base/FieldBase.hpp" // for field_data #include "stk_mesh/base/Types.hpp" // for EntityId, etc @@ -66,7 +64,7 @@ class MetaDataInitializer void setup() { declare_parts(); - declare_coordinate_field(); + declare_coordinate_field(); declare_nodeset_distribution_factor_fields(); declare_sideset_distribution_factor_fields(); } @@ -79,13 +77,8 @@ class MetaDataInitializer std::string nodesetDistFieldName = "distribution_factors_" + nodesetData.name; - stk::mesh::Field * distributionFactorsFieldPerNodeset = nullptr; - if (m_meta.is_using_simple_fields()) { - distributionFactorsFieldPerNodeset = &m_meta.declare_field(stk::topology::NODE_RANK, nodesetDistFieldName); - } - else { - distributionFactorsFieldPerNodeset = &stk::mesh::legacy::declare_field>(m_meta, stk::topology::NODE_RANK, nodesetDistFieldName); - } + stk::mesh::Field * distributionFactorsFieldPerNodeset = + &m_meta.declare_field(stk::topology::NODE_RANK, nodesetDistFieldName); stk::io::set_field_role(*distributionFactorsFieldPerNodeset, Ioss::Field::MESH); stk::mesh::put_field_on_mesh(*distributionFactorsFieldPerNodeset, *part, nullptr); @@ -93,18 +86,7 @@ class MetaDataInitializer } void declare_sideblock_distribution_factor_field(const SideBlockInfo& sideBlock, - stk::mesh::Field* distributionFactorsField) - { - stk::mesh::Part* sideBlockPart = m_meta.get_part(sideBlock.name); - - if (nullptr != distributionFactorsField) { - stk::io::set_distribution_factor_field(*sideBlockPart, *distributionFactorsField); - stk::mesh::put_field_on_mesh(*distributionFactorsField, *sideBlockPart, sideBlock.numNodesPerSide, nullptr); - } - } - - void declare_simple_sideblock_distribution_factor_field(const SideBlockInfo& sideBlock, - stk::mesh::Field* distributionFactorsField) + stk::mesh::Field* distributionFactorsField) { stk::mesh::Part* sideBlockPart = m_meta.get_part(sideBlock.name); @@ -114,26 +96,7 @@ class MetaDataInitializer } } - stk::mesh::Field* declare_sideset_distribution_factor_field(const SidesetData& sidesetData) - { - stk::mesh::Field* distributionFactorsField = nullptr; - stk::mesh::Part* sidesetPart = m_meta.get_part(sidesetData.name); - - SplitType splitType = sidesetData.get_split_type(); - if (splitType != SplitType::NO_SPLIT) { - std::string fieldName = sidesetData.name + "_df"; - - distributionFactorsField = - &stk::mesh::legacy::declare_field>(m_meta, m_meta.side_rank(), fieldName); - - stk::io::set_field_role(*distributionFactorsField, Ioss::Field::MESH); - stk::io::set_distribution_factor_field(*sidesetPart, *distributionFactorsField); - } - - return distributionFactorsField; - } - - stk::mesh::Field* declare_simple_sideset_distribution_factor_field(const SidesetData& sidesetData) + stk::mesh::Field* declare_sideset_distribution_factor_field(const SidesetData& sidesetData) { stk::mesh::Field* distributionFactorsField = nullptr; stk::mesh::Part* sidesetPart = m_meta.get_part(sidesetData.name); @@ -153,25 +116,12 @@ class MetaDataInitializer void declare_sideset_distribution_factor_fields() { - if (m_meta.is_using_simple_fields()) { - for (const SidesetData& sidesetData : m_data.sidesets.get_group_data()) { - stk::mesh::Field* distributionFactorsField = declare_simple_sideset_distribution_factor_field(sidesetData); - std::vector sideBlocks = sidesetData.get_side_block_info(); + for (const SidesetData& sidesetData : m_data.sidesets.get_group_data()) { + stk::mesh::Field* distributionFactorsField = declare_sideset_distribution_factor_field(sidesetData); + std::vector sideBlocks = sidesetData.get_side_block_info(); - for (const auto& sideBlock : sideBlocks) { - declare_simple_sideblock_distribution_factor_field(sideBlock, distributionFactorsField); - } - } - } - else { - for (const SidesetData& sidesetData : m_data.sidesets.get_group_data()) { - stk::mesh::Field* distributionFactorsField = - declare_sideset_distribution_factor_field(sidesetData); - std::vector sideBlocks = sidesetData.get_side_block_info(); - - for (const auto& sideBlock : sideBlocks) { - declare_sideblock_distribution_factor_field(sideBlock, distributionFactorsField); - } + for (const auto& sideBlock : sideBlocks) { + declare_sideblock_distribution_factor_field(sideBlock, distributionFactorsField); } } } @@ -335,30 +285,8 @@ class MetaDataInitializer declare_assembly_parts(); } - void declare_coordinate_field() - { - if (m_meta.is_using_simple_fields()) { - declare_coordinate_field_with_datatype(); - } - else { - if (m_data.spatialDim == 3) { - declare_coordinate_field_with_type(); - } - else if (m_data.spatialDim == 2) { - declare_coordinate_field_with_type>(); - } - } - } - - template - void declare_coordinate_field_with_type() - { - F& coordsField = stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, m_meta.coordinate_field_name()); - stk::mesh::put_field_on_mesh(coordsField, m_meta.universal_part(), m_data.spatialDim, nullptr); - } - template - void declare_coordinate_field_with_datatype() + void declare_coordinate_field() { stk::mesh::Field& coordsField = m_meta.declare_field(stk::topology::NODE_RANK, m_meta.coordinate_field_name()); stk::mesh::put_field_on_mesh(coordsField, m_meta.universal_part(), m_data.spatialDim, nullptr); diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMesh.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMesh.hpp index b995e97b6b27..848e3a810a71 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMesh.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMesh.hpp @@ -34,6 +34,7 @@ #ifndef TextMesh_hpp #define TextMesh_hpp +#include "stk_util/stk_config.h" #include #include namespace stk { namespace mesh { class BulkData; }} @@ -48,7 +49,10 @@ void setup_text_mesh(stk::mesh::BulkData& bulkData, const std::string& meshDesc) namespace simple_fields { +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") std::string get_full_text_mesh_desc(const std::string& textMeshConnectivityDesc, const std::vector& coordVec); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void setup_text_mesh(stk::mesh::BulkData& bulkData, const std::string& meshDesc); } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshFixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshFixture.cpp index 5ce524c8c8cb..873c439d1f7d 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshFixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshFixture.cpp @@ -27,7 +27,7 @@ struct PartLessByName { } TextMeshFixture::TextMeshFixture(unsigned spatialDim) - : stk::unit_test_util::MeshFixture(spatialDim) + : stk::unit_test_util::MeshFixture(spatialDim) { m_topologyMapping.initialize_topology_map(); } @@ -259,13 +259,14 @@ void TextMeshFixture::verify_elements_on_part(stk::mesh::Part* blockPart, const } } -TextMeshFixture::CoordinateVerifier::CoordinateVerifier( - const stk::mesh::BulkData& b, const stk::mesh::EntityIdVector& ids, const std::vector& coords) - : bulk(b), - meta(bulk.mesh_meta_data()), - spatialDim(meta.spatial_dimension()), - goldNodeIds(ids), - goldCoordinates(coords) +TextMeshFixture::CoordinateVerifier::CoordinateVerifier(const stk::mesh::BulkData& b, + const stk::mesh::EntityIdVector& ids, + const std::vector& coords) + : bulk(b), + meta(bulk.mesh_meta_data()), + spatialDim(meta.spatial_dimension()), + goldNodeIds(ids), + goldCoordinates(coords) { } @@ -293,8 +294,8 @@ void TextMeshFixture::CoordinateVerifier::verify_num_nodes() const double* TextMeshFixture::CoordinateVerifier::get_nodal_coordinates(const stk::mesh::EntityId& nodeId) { - const stk::mesh::CoordinatesField& coordsField = - static_cast(*meta.coordinate_field()); + const stk::mesh::Field& coordsField = + static_cast&>(*meta.coordinate_field()); return stk::mesh::field_data(coordsField, get_node(nodeId)); } @@ -321,7 +322,7 @@ std::string TextMeshFixture::CoordinateVerifier::error_message(const stk::mesh:: namespace simple_fields { TextMeshFixture::TextMeshFixture(unsigned spatialDim) - : stk::unit_test_util::simple_fields::MeshFixture(spatialDim) + : stk::unit_test_util::MeshFixture(spatialDim) { m_topologyMapping.initialize_topology_map(); } @@ -333,7 +334,7 @@ std::string TextMeshFixture::get_topology_name(const std::string& textMeshTopolo void TextMeshFixture::setup_text_mesh(const std::string& meshDesc) { - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } void TextMeshFixture::verify_shared_nodes(const stk::mesh::EntityIdVector& nodeIds, int sharingProc) diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshFixture.hpp index b60c726f0bcb..2ba6f395113a 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshFixture.hpp @@ -62,6 +62,7 @@ namespace stk { namespace unit_test_util { + class TextMeshFixture : public stk::unit_test_util::MeshFixture { protected: @@ -155,7 +156,8 @@ class TextMeshFixture : public stk::unit_test_util::MeshFixture namespace simple_fields { -class TextMeshFixture : public stk::unit_test_util::simple_fields::MeshFixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +TextMeshFixture : public stk::unit_test_util::MeshFixture { protected: TextMeshFixture(unsigned spatialDim); @@ -243,7 +245,7 @@ class TextMeshFixture : public stk::unit_test_util::simple_fields::MeshFixture const std::vector& goldCoordinates; }; - StkTopologyMapping m_topologyMapping; + stk::unit_test_util::StkTopologyMapping m_topologyMapping; }; } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp index 2ffabd6d6113..1f52825509ad 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp @@ -209,9 +209,11 @@ class StkTopologyMapping : public text_mesh::TopologyMapping + +namespace unitTestUtils +{ +namespace exampleMeshes +{ + +namespace simple_fields { + +void fillDataForUnitCube(std::vector &coordinates) { + unitTestUtils::exampleMeshes::fillDataForUnitCube(coordinates); +} + +void fillDataForRectangloid(std::vector &coordinates) { + unitTestUtils::exampleMeshes::fillDataForRectangloid(coordinates); +} + +Iogn::ExodusData createExodusDataForDisconnectedHex8s(int numberOfHexes) { + return unitTestUtils::exampleMeshes::createExodusDataForDisconnectedHex8s(numberOfHexes); +} + +} // namespace simple_fields + +} +} diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/exampleMeshes.h b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/exampleMeshes.h index 46fb91ba91ad..6eb8da0e8c52 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/exampleMeshes.h +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/exampleMeshes.h @@ -1,6 +1,7 @@ #ifndef EXAMPLEMESHES_H_ #define EXAMPLEMESHES_H_ +#include "stk_util/stk_config.h" #include #include @@ -105,18 +106,14 @@ Iogn::ExodusData createExodusDataForDisconnectedHex8s(int numberOfHexes) namespace simple_fields { -inline void fillDataForUnitCube(std::vector &coordinates) { - unitTestUtils::exampleMeshes::fillDataForUnitCube(coordinates); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void fillDataForUnitCube(std::vector &coordinates); -inline void fillDataForRectangloid(std::vector &coordinates) { - unitTestUtils::exampleMeshes::fillDataForRectangloid(coordinates); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void fillDataForRectangloid(std::vector &coordinates); -inline -Iogn::ExodusData createExodusDataForDisconnectedHex8s(int numberOfHexes) { - return unitTestUtils::exampleMeshes::createExodusDataForDisconnectedHex8s(numberOfHexes); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +Iogn::ExodusData createExodusDataForDisconnectedHex8s(int numberOfHexes); } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/getOption.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/getOption.cpp new file mode 100644 index 000000000000..f426760f4558 --- /dev/null +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/getOption.cpp @@ -0,0 +1,21 @@ +#include + +namespace stk +{ +namespace unit_test_util +{ + +namespace simple_fields { + +bool has_option(const std::string& option) { + return stk::unit_test_util::has_option(option); +} + +std::string get_option(const std::string& option, const std::string defaultString) { + return stk::unit_test_util::get_option(option, defaultString); +} + +} // namespace simple_fields + +} +} diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/getOption.h b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/getOption.h index 3d64b8f87cc0..6b6909bf4997 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/getOption.h +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/getOption.h @@ -2,6 +2,7 @@ #define UNITTESTUTILS_OPTIONS_PARSING #include +#include "stk_util/stk_config.h" #include #include @@ -56,17 +57,14 @@ T get_command_line_option(const std::string &option, const T &defaultValue) namespace simple_fields { -inline -bool has_option(const std::string& option) { - return stk::unit_test_util::has_option(option); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +bool has_option(const std::string& option); -inline -std::string get_option(const std::string& option, const std::string defaultString="no") { - return stk::unit_test_util::get_option(option, defaultString); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +std::string get_option(const std::string& option, const std::string defaultString="no"); template +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") T get_command_line_option(const std::string &option, const T &defaultValue) { return stk::unit_test_util::get_command_line_option(option, defaultValue); } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ioUtils.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ioUtils.cpp index 2d6b0a23d928..d4f3b3b75716 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ioUtils.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ioUtils.cpp @@ -50,29 +50,17 @@ void text_mesh_to_file_in_serial(const std::string& meshDesc, const std::string& } } -void IdAndTimeFieldValueSetter::populate_field(stk::mesh::BulkData &bulk, stk::mesh::FieldBase* field, const unsigned step, const double time) const +void generate_mesh_from_serial_spec_and_load_in_parallel_with_auto_decomp(const std::string &meshSizeSpec, stk::mesh::BulkData &mesh, const std::string &decompositionMethod) { - stk::mesh::EntityRank fieldRank = field->entity_rank(); - - std::vector entities; - stk::mesh::get_entities(bulk, fieldRank, entities); - - stk::mesh::FieldVector allTransientFields = stk::io::get_transient_fields(bulk.mesh_meta_data()); + // meshSizeSpec should NOT include generated:, just "2x2x1" for example. + // decomposition methods: "linear", "rcb", "rib", "hsfc", "block", "cyclic", "random", "kway", "geom_kway", "metis_sfc" + const std::string tempFilename = "exodus_" + meshSizeSpec + ".e"; + generated_mesh_to_file_in_serial(meshSizeSpec,tempFilename); - for(stk::mesh::FieldBase * transientField : allTransientFields) - { - for(size_t i = 0; i < entities.size(); i++) - { - unsigned numEntriesPerEntity = stk::mesh::field_scalars_per_entity(*transientField, entities[i]); - double value = 100.0 * time + static_cast(bulk.identifier(entities[i])); - double *data = static_cast (stk::mesh::field_data(*transientField, entities[i])); - for(unsigned j=0; j& timeSteps, const FieldValueSetter &fieldValueSetter) { if (stk::parallel_machine_rank(MPI_COMM_WORLD) == 0) { - GeneratedMeshToFileWithTransientFields gMesh(MPI_COMM_SELF, stk::mesh::BulkData::NO_AUTO_AURA, fieldName, stk::topology::NODE_RANK); + GeneratedMeshToFileWithTransientFields gMesh(MPI_COMM_SELF, stk::mesh::BulkData::NO_AUTO_AURA, fieldName, + fieldRank); gMesh.setup_mesh(meshSizeSpec, fileName); gMesh.write_mesh_with_field(timeSteps, fieldValueSetter, globalVariableName); @@ -294,15 +283,26 @@ void read_from_serial_file_and_decompose(const std::string& fileName, stk::mesh: broker.populate_bulk_data(); } -void generate_mesh_from_serial_spec_and_load_in_parallel_with_auto_decomp(const std::string &meshSizeSpec, stk::mesh::BulkData &mesh, const std::string &decompositionMethod) +void IdAndTimeFieldValueSetter::populate_field(stk::mesh::BulkData &bulk, stk::mesh::FieldBase* field, const unsigned step, const double time) const { - // meshSizeSpec should NOT include generated:, just "2x2x1" for example. - // decomposition methods: "linear", "rcb", "rib", "hsfc", "block", "cyclic", "random", "kway", "geom_kway", "metis_sfc" - const std::string tempFilename = "exodus_" + meshSizeSpec + ".e"; - generated_mesh_to_file_in_serial(meshSizeSpec,tempFilename); + stk::mesh::EntityRank fieldRank = field->entity_rank(); - read_from_serial_file_and_decompose(tempFilename, mesh, decompositionMethod); - unlink(tempFilename.c_str()); + std::vector entities; + stk::mesh::get_entities(bulk, fieldRank, entities); + + stk::mesh::FieldVector allTransientFields = stk::io::get_transient_fields(bulk.mesh_meta_data()); + + for(stk::mesh::FieldBase * transientField : allTransientFields) + { + for(size_t i = 0; i < entities.size(); i++) + { + unsigned numEntriesPerEntity = stk::mesh::field_scalars_per_entity(*transientField, entities[i]); + double value = 100.0 * time + static_cast(bulk.identifier(entities[i])); + double *data = static_cast (stk::mesh::field_data(*transientField, entities[i])); + for(unsigned j=0; jmesh_meta_data()), broker() { - meta.use_simple_fields(); } void @@ -543,7 +542,7 @@ void generated_mesh_with_transient_data_to_file_in_serial(const std::string &mes { if (stk::parallel_machine_rank(MPI_COMM_WORLD) == 0) { - GeneratedMeshToFileWithTransientFields gMesh(MPI_COMM_SELF, stk::mesh::BulkData::NO_AUTO_AURA, fieldName, + stk::unit_test_util::GeneratedMeshToFileWithTransientFields gMesh(MPI_COMM_SELF, stk::mesh::BulkData::NO_AUTO_AURA, fieldName, fieldRank); gMesh.setup_mesh(meshSizeSpec, fileName); diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ioUtils.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ioUtils.hpp index 59344d35fdd7..09894f545f74 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ioUtils.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ioUtils.hpp @@ -129,14 +129,24 @@ class TransientVerifier void generated_mesh_with_transient_data_to_file_in_serial(const std::string &meshSizeSpec, const std::string &fileName, const std::string& fieldName, + stk::topology::rank_t fieldRank, const std::string& globalVariableName, const std::vector& timeSteps, const FieldValueSetter &fieldValueSetter); +void read_from_serial_file_and_decompose(const std::string& fileName, stk::mesh::BulkData &mesh, + const std::string &decompositionMethod); + + namespace simple_fields { +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void generated_mesh_to_file_in_serial(const std::string& meshSizeSpec, const std::string& fileName); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void text_mesh_to_file_in_serial(const std::string& meshDesc, const std::string& fileName); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void generate_mesh_from_serial_spec_and_load_in_parallel_with_auto_decomp(const std::string &meshSizeSpec, stk::mesh::BulkData & mesh, const std::string &decompositionMethod); class MeshFromFile @@ -158,7 +168,8 @@ class MeshFromFile stk::io::StkMeshIoBroker broker; }; -class TransientVerifier +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +TransientVerifier { public: TransientVerifier(const MPI_Comm& c); @@ -189,6 +200,7 @@ class TransientVerifier const double m_epsilon; }; +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void generated_mesh_with_transient_data_to_file_in_serial(const std::string &meshSizeSpec, const std::string &fileName, const std::string& fieldName, @@ -197,6 +209,7 @@ void generated_mesh_with_transient_data_to_file_in_serial(const std::string &mes const std::vector& timeSteps, const FieldValueSetter &fieldValueSetter); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void read_from_serial_file_and_decompose(const std::string& fileName, stk::mesh::BulkData &mesh, const std::string &decompositionMethod); diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/meshCreationHelpers.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/meshCreationHelpers.cpp index f363ef16a57f..833151f9de91 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/meshCreationHelpers.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/meshCreationHelpers.cpp @@ -27,9 +27,9 @@ size_t write_mesh_data__field_1__field_2__field_3(const std::string & filename, stkIo.create_input_mesh(); stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - stk::mesh::Field & field1 = stk::mesh::legacy::declare_field>(meta, stk::topology::ELEM_RANK, "field_1", 1); - stk::mesh::Field & field2 = stk::mesh::legacy::declare_field>(meta, stk::topology::ELEM_RANK, "field_2", 1); - stk::mesh::Field & field3 = stk::mesh::legacy::declare_field>(meta, stk::topology::ELEM_RANK, "field_3", 1); + stk::mesh::Field & field1 = meta.declare_field(stk::topology::ELEM_RANK, "field_1", 1); + stk::mesh::Field & field2 = meta.declare_field(stk::topology::ELEM_RANK, "field_2", 1); + stk::mesh::Field & field3 = meta.declare_field(stk::topology::ELEM_RANK, "field_3", 1); double fieldValues[] = {1.0, 2.0, 3.0}; stk::mesh::put_field_on_mesh(field1, meta.universal_part(), fieldValues); @@ -50,7 +50,7 @@ void create_mesh_without_time_steps(const std::string & filename, MPI_Comm commu { std::shared_ptr bulk = build_mesh(communicator, stk::mesh::BulkData::AUTO_AURA); stk::io::StkMeshIoBroker stkIoWriter(communicator); - stk::unit_test_util::write_mesh_data__field_1__field_2__field_3(filename, communicator, *bulk, stkIoWriter); + write_mesh_data__field_1__field_2__field_3(filename, communicator, *bulk, stkIoWriter); } void create_mesh_with__field_1__field_2__field_3(const std::string & filename, MPI_Comm communicator) diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/meshCreationHelpers.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/meshCreationHelpers.hpp index a406c2e5a798..8451ff074bef 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/meshCreationHelpers.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/meshCreationHelpers.hpp @@ -46,7 +46,10 @@ void create_mesh_with__field_1__field_2__field_3(const std::string & filename, M namespace simple_fields { +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void create_mesh_without_time_steps(const std::string & filename, MPI_Comm communicator); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void create_mesh_with__field_1__field_2__field_3(const std::string & filename, MPI_Comm communicator); } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/BoxFixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/BoxFixture.cpp index f515d9d18d4c..ad5295d9b0cc 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/BoxFixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/BoxFixture.cpp @@ -58,7 +58,8 @@ BoxFixture::BoxFixture( stk::ParallelMachine pm , m_comm_size( stk::parallel_machine_size( pm ) ), m_elem_part( m_fem_meta.declare_part_with_topology("elem_part", stk::topology::HEX_8) ), m_elem_topology( stk::topology::HEX_8 ) -{} +{ +} void BoxFixture::generate_boxes( const BOX root_box, BOX local_box ) @@ -236,7 +237,6 @@ BoxFixture::BoxFixture( stk::ParallelMachine pm , m_elem_part( m_fem_meta.declare_part_with_topology("elem_part", stk::topology::HEX_8) ), m_elem_topology( stk::topology::HEX_8 ) { - m_fem_meta.use_simple_fields(); } void BoxFixture::generate_boxes( const BOX root_box, @@ -293,7 +293,7 @@ void BoxFixture::generate_boxes( const BOX root_box, for (int en_i = 0; en_i < 8; ++en_i) { nodes[en_i] = m_bulk_data.declare_node(node_ids[en_i] , no_parts); m_bulk_data.declare_relation(elem, nodes[en_i], en_i); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, node_ids[en_i], nodes[en_i]); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, node_ids[en_i], nodes[en_i]); } } } @@ -339,7 +339,7 @@ void BoxFixture::fill_node_map(int proc_rank, const BOX root_box) node_ids[7]= 1 + (i+0) + (j+1) * (ngx+1) + (k+1) * (ngx+1) * (ngy+1); for (int en_i = 0; en_i < 8; ++en_i) { - AddToNodeProcsMMap(m_nodes_to_procs, node_ids[en_i], proc_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, node_ids[en_i], proc_rank); } } } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/BoxFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/BoxFixture.hpp index 093f0a47553f..bc976d5206eb 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/BoxFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/BoxFixture.hpp @@ -58,7 +58,7 @@ static const size_t spatial_dimension = 3; /** * A fixture that creates a "box" mesh of hexes */ -class BoxFixture { +class BoxFixture { public: BoxFixture(stk::ParallelMachine pm = MPI_COMM_WORLD, stk::mesh::BulkData::AutomaticAuraOption autoAuraOption = stk::mesh::BulkData::AUTO_AURA, @@ -127,7 +127,8 @@ static const size_t spatial_dimension = 3; /** * A fixture that creates a "box" mesh of hexes */ -class BoxFixture { +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +BoxFixture { public: BoxFixture(stk::ParallelMachine pm = MPI_COMM_WORLD, stk::mesh::BulkData::AutomaticAuraOption autoAuraOption = stk::mesh::BulkData::AUTO_AURA, diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/CMakeLists.txt b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/CMakeLists.txt index 738da333ceec..052316a06f15 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/CMakeLists.txt +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/CMakeLists.txt @@ -82,5 +82,5 @@ INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_unit_test_utils/stk_mesh_fixtures) if(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_mesh_fixtures DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_mesh_fixtures EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) endif() diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/FixtureNodeSharing.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/FixtureNodeSharing.hpp index f54aa5729ef6..5ac568b45f2a 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/FixtureNodeSharing.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/FixtureNodeSharing.hpp @@ -56,8 +56,10 @@ void DoAddNodeSharings(BulkData &bulk_data, NodeToProcsMMap &nodes_to_procs, Ent namespace simple_fields { +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void AddToNodeProcsMMap(NodeToProcsMMap &nodes_to_procs, EntityId node_id, int proc_rank); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void DoAddNodeSharings(BulkData &bulk_data, NodeToProcsMMap &nodes_to_procs, EntityId node_id, Entity node); } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Gear.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Gear.hpp index 082c31ef8a0c..2779827f685f 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Gear.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Gear.hpp @@ -82,8 +82,8 @@ struct GearMovement class Gear { - typedef Field CartesianField; - typedef Field CylindricalField; + typedef Field CartesianField; + typedef Field CylindricalField; enum { SpatialDimension = 3 }; @@ -169,7 +169,7 @@ class Gear { CartesianField & translation_field ; CylindricalField & cylindrical_coord_field ; -private: + private: EntityVector gear_entities; Gear(const Gear &); @@ -196,7 +196,8 @@ struct GearMovement {} }; -class Gear { +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +Gear { typedef Field CartesianField; typedef Field CylindricalField; diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GearsFixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GearsFixture.cpp index b218c6a5dda7..1929e3a8d6ec 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GearsFixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GearsFixture.cpp @@ -75,17 +75,23 @@ GearsFixture::GearsFixture(ParallelMachine pm, size_t num_gears, GearParams gear cylindrical_coord_part( meta_data.declare_part("cylindrical_coord_part", stk::topology::ELEMENT_RANK)), hex_part( meta_data.declare_part_with_topology("hex8_part", stk::topology::HEX_8)), wedge_part( meta_data.declare_part_with_topology("wedge6_part", stk::topology::WEDGE_6)), - cartesian_coord_field( stk::mesh::legacy::declare_field(meta_data, stk::topology::NODE_RANK, "coordinates", ONE_STATE)), - displacement_field( stk::mesh::legacy::declare_field(meta_data, stk::topology::NODE_RANK, "displacement", TWO_STATE)), - translation_field( stk::mesh::legacy::declare_field(meta_data, stk::topology::NODE_RANK, "translation", ONE_STATE)), - cylindrical_coord_field( stk::mesh::legacy::declare_field(meta_data, stk::topology::NODE_RANK, "cylindrical_coordinates", ONE_STATE)), m_gears() { + cartesian_coord_field = &meta_data.declare_field(stk::topology::NODE_RANK, "coordinates", ONE_STATE); + displacement_field = &meta_data.declare_field(stk::topology::NODE_RANK, "displacement", TWO_STATE); + translation_field = &meta_data.declare_field(stk::topology::NODE_RANK, "translation", ONE_STATE); + cylindrical_coord_field = &meta_data.declare_field(stk::topology::NODE_RANK, "cylindrical_coordinates", ONE_STATE); - put_field_on_mesh(cartesian_coord_field, meta_data.universal_part(), SpatialDimension, nullptr); - put_field_on_mesh(displacement_field, meta_data.universal_part(), SpatialDimension, nullptr); - put_field_on_mesh(translation_field, cylindrical_coord_part, SpatialDimension, nullptr); - put_field_on_mesh(cylindrical_coord_field, cylindrical_coord_part, SpatialDimension, nullptr); + put_field_on_mesh(*cartesian_coord_field, meta_data.universal_part(), SpatialDimension, nullptr); + stk::io::set_field_output_type(*cartesian_coord_field, stk::io::FieldOutputType::VECTOR_3D); + + put_field_on_mesh(*displacement_field, meta_data.universal_part(), SpatialDimension, nullptr); + stk::io::set_field_output_type(*displacement_field, stk::io::FieldOutputType::VECTOR_3D); + + put_field_on_mesh(*translation_field, cylindrical_coord_part, SpatialDimension, nullptr); + stk::io::set_field_output_type(*translation_field, stk::io::FieldOutputType::VECTOR_3D); + + put_field_on_mesh(*cylindrical_coord_field, cylindrical_coord_part, SpatialDimension, nullptr); m_gears.resize(NUM_GEARS); @@ -98,10 +104,10 @@ GearsFixture::GearsFixture(ParallelMachine pm, size_t num_gears, GearParams gear cylindrical_coord_part, hex_part, wedge_part, - cartesian_coord_field, - displacement_field, - translation_field, - cylindrical_coord_field, + *cartesian_coord_field, + *displacement_field, + *translation_field, + *cylindrical_coord_field, gear_params.element_size, gear_params.radius_min, gear_params.radius_max, @@ -147,7 +153,7 @@ void GearsFixture::generate_mesh() { for ( size_t i = 0 ; i < m_gears.size() ; ++i ) { // Parallel collective call: - distribute_gear_across_processors(get_gear(i),cylindrical_coord_field); + distribute_gear_across_processors(get_gear(i), *cylindrical_coord_field); } } @@ -158,11 +164,11 @@ void GearsFixture::communicate_model_fields() std::vector< const FieldBase *> fields; - fields.push_back(& cartesian_coord_field); - fields.push_back(& translation_field); - fields.push_back(& cylindrical_coord_field); - fields.push_back(& displacement_field.field_of_state(stk::mesh::StateNew)); - fields.push_back(& displacement_field.field_of_state(stk::mesh::StateOld)); + fields.push_back(cartesian_coord_field); + fields.push_back(translation_field); + fields.push_back(cylindrical_coord_field); + fields.push_back(&displacement_field->field_of_state(stk::mesh::StateNew)); + fields.push_back(&displacement_field->field_of_state(stk::mesh::StateOld)); // Parallel collective call: #if defined( STK_HAS_MPI) @@ -282,7 +288,7 @@ unsigned destination_processor(const Gear & gear, double rad, double angle, doub namespace simple_fields { -GearsFixture::GearsFixture(ParallelMachine pm, size_t num_gears, GearParams gear_params) +GearsFixture::GearsFixture(ParallelMachine pm, size_t num_gears, stk::mesh::fixtures::GearParams gear_params) : NUM_GEARS(num_gears), bulk_data_ptr(stk::unit_test_util::build_mesh(SpatialDimension, pm)), bulk_data(*bulk_data_ptr), @@ -292,7 +298,6 @@ GearsFixture::GearsFixture(ParallelMachine pm, size_t num_gears, GearParams gear wedge_part( meta_data.declare_part_with_topology("wedge6_part", stk::topology::WEDGE_6)), m_gears() { - meta_data.use_simple_fields(); cartesian_coord_field = &meta_data.declare_field(stk::topology::NODE_RANK, "coordinates", ONE_STATE); displacement_field = &meta_data.declare_field(stk::topology::NODE_RANK, "displacement", TWO_STATE); translation_field = &meta_data.declare_field(stk::topology::NODE_RANK, "translation", ONE_STATE); @@ -314,7 +319,7 @@ GearsFixture::GearsFixture(ParallelMachine pm, size_t num_gears, GearParams gear for ( size_t i = 0; i < NUM_GEARS; ++i) { std::ostringstream oss; oss << "Gear_" << i; - m_gears[i] = new Gear(meta_data, + m_gears[i] = new stk::mesh::fixtures::Gear(meta_data, bulk_data, meta_data.declare_part(oss.str(),static_cast(SpatialDimension)), cylindrical_coord_part, @@ -334,7 +339,7 @@ GearsFixture::GearsFixture(ParallelMachine pm, size_t num_gears, GearParams gear GearsFixture::~GearsFixture() { - for (std::vector::iterator i = m_gears.begin(); i != m_gears.end(); ++i) { + for (std::vector::iterator i = m_gears.begin(); i != m_gears.end(); ++i) { delete *i; *i = nullptr; } @@ -351,7 +356,7 @@ void GearsFixture::generate_mesh() { //create the gears on a line for( size_t i = 0; i < m_gears.size(); ++i) { if (( (i*p_size)/m_gears.size()) == p_rank) { - Gear & gear = get_gear(i); + stk::mesh::fixtures::Gear & gear = get_gear(i); gear.generate_gear(); } else { // Parallel synchronization: @@ -369,7 +374,7 @@ void GearsFixture::generate_mesh() { for ( size_t i = 0 ; i < m_gears.size() ; ++i ) { // Parallel collective call: - distribute_gear_across_processors(get_gear(i), *cylindrical_coord_field); + stk::mesh::fixtures::distribute_gear_across_processors(get_gear(i), *cylindrical_coord_field); } } @@ -405,7 +410,7 @@ double scale_angle_2pi(double angle) { void select_nodal_data(const BulkData& mesh, - GearsFixture::CylindricalField & cylindrical_coord_field, + stk::mesh::fixtures::GearsFixture::CylindricalField & cylindrical_coord_field, Entity element, double & radius, double & angle, @@ -432,7 +437,8 @@ void select_nodal_data(const BulkData& mesh, } // Parallel collective call: -void distribute_gear_across_processors(Gear & gear, GearsFixture::CylindricalField & cylindrical_coord_field) +void distribute_gear_across_processors(stk::mesh::fixtures::Gear & gear, + stk::mesh::fixtures::GearsFixture::CylindricalField & cylindrical_coord_field) { BulkData & bulk_data = gear.bulk_data; @@ -453,7 +459,7 @@ void distribute_gear_across_processors(Gear & gear, GearsFixture::CylindricalFie double angle = 0.0; double height = 0.0; select_nodal_data(bulk_data, cylindrical_coord_field, element, radius, angle, height); - unsigned destination_processor_rank = destination_processor(gear,radius,angle,height,p_rank,p_size); + unsigned destination_processor_rank = stk::mesh::fixtures::destination_processor(gear,radius,angle,height,p_rank,p_size); elements_to_change_owner.push_back(EntityProc(element,destination_processor_rank)); // Now add all related nodes to list to move to this processor: @@ -490,7 +496,7 @@ void scale_p_rank(unsigned & p_rank, unsigned p_size) } } -unsigned destination_processor(const Gear & gear, double rad, double angle, double height, unsigned p_rank, unsigned p_size) +unsigned destination_processor(const stk::mesh::fixtures::Gear & gear, double rad, double angle, double height, unsigned p_rank, unsigned p_size) { unsigned result = 0; // Distribute elements across angles: (not working perfectly yet) diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GearsFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GearsFixture.hpp index 714ebf18825c..e0aeefe2c8fb 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GearsFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GearsFixture.hpp @@ -81,8 +81,8 @@ struct GearParams { class GearsFixture { public: - typedef Field CylindricalField ; - typedef Field CartesianField ; + typedef Field CylindricalField; + typedef Field CartesianField; enum { SpatialDimension = 3 }; @@ -102,10 +102,10 @@ class GearsFixture { Part & hex_part; Part & wedge_part; - CartesianField & cartesian_coord_field ; - CartesianField & displacement_field ; - CartesianField & translation_field ; - CylindricalField & cylindrical_coord_field; + CartesianField * cartesian_coord_field ; + CartesianField * displacement_field ; + CartesianField * translation_field ; + CylindricalField * cylindrical_coord_field; Gear & get_gear(size_t i) { return * m_gears[i]; @@ -131,7 +131,8 @@ unsigned destination_processor(const Gear & gear, double rad, double angle, doub namespace simple_fields { -struct GearParams { +struct STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +GearParams { GearParams() : element_size(0.1), radius_min(0.6), @@ -159,7 +160,8 @@ struct GearParams { double height_max; }; -class GearsFixture { +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +GearsFixture { public: typedef Field CylindricalField; typedef Field CartesianField; @@ -167,7 +169,7 @@ class GearsFixture { enum { SpatialDimension = 3 }; GearsFixture( stk::ParallelMachine pm, size_t num_gears, - GearParams gear_params=GearParams()); + stk::mesh::fixtures::GearParams gear_params=stk::mesh::fixtures::GearParams()); ~GearsFixture(); void generate_mesh(); @@ -187,11 +189,11 @@ class GearsFixture { CartesianField * translation_field ; CylindricalField * cylindrical_coord_field; - Gear & get_gear(size_t i) { + stk::mesh::fixtures::Gear & get_gear(size_t i) { return * m_gears[i]; } - const Gear & get_gear(size_t i) const{ + const stk::mesh::fixtures::Gear & get_gear(size_t i) const{ return * m_gears[i]; } @@ -199,15 +201,19 @@ class GearsFixture { private: - std::vector m_gears; + std::vector m_gears; GearsFixture( const GearsFixture & ); GearsFixture & operator=( const GearsFixture & ); }; /// \brief Distribute gears across processors -void distribute_gear_across_processors(Gear & gear, GearsFixture::CylindricalField & cylindrical_coord_field); -unsigned destination_processor(const Gear & gear, double rad, double angle, double height, unsigned p_rank, unsigned p_size); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +void distribute_gear_across_processors(stk::mesh::fixtures::Gear & gear, + stk::mesh::fixtures::GearsFixture::CylindricalField & cylindrical_coord_field); + +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +unsigned destination_processor(const stk::mesh::fixtures::Gear & gear, double rad, double angle, double height, unsigned p_rank, unsigned p_size); } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GridFixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GridFixture.cpp index 00ba295a419d..acc54fbb1c22 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GridFixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GridFixture.cpp @@ -65,12 +65,13 @@ namespace fixtures { GridFixture::GridFixture(stk::ParallelMachine pm) : m_spatial_dimension(2), - m_bulk_data_ptr(stk::unit_test_util::build_mesh_no_simple_fields(2, pm)), + m_bulk_data_ptr(stk::unit_test_util::build_mesh(2, pm)), m_bulk_data(*m_bulk_data_ptr), m_fem_meta(m_bulk_data.mesh_meta_data()), m_quad_part( m_fem_meta.declare_part_with_topology("quad_part", stk::topology::QUAD_4_2D) ), m_dead_part( m_fem_meta.declare_part("dead_part")) -{} +{ +} GridFixture::~GridFixture() { } @@ -189,7 +190,6 @@ GridFixture::GridFixture(stk::ParallelMachine pm) m_quad_part( m_fem_meta.declare_part_with_topology("quad_part", stk::topology::QUAD_4_2D) ), m_dead_part( m_fem_meta.declare_part("dead_part")) { - m_fem_meta.use_simple_fields(); } GridFixture::~GridFixture() @@ -250,7 +250,7 @@ void GridFixture::generate_grid() node_id += stencil_for_4x4_quad_mesh[chg_itr]; Entity node = m_bulk_data.declare_node(node_id, no_parts); m_bulk_data.declare_relation( face , node , chg_itr); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, node_id, node); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, node_id, node); } } } @@ -293,7 +293,7 @@ void GridFixture::fill_node_map(unsigned num_nodes, unsigned num_quad_faces, int for (unsigned chg_itr = 0; chg_itr < num_nodes_per_quad; ++chg_itr) { node_id += stencil_for_4x4_quad_mesh[chg_itr]; - AddToNodeProcsMMap(m_nodes_to_procs, node_id, p_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, node_id, p_rank); } } } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GridFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GridFixture.hpp index 92e6fdb6783a..32af48a9241d 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GridFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/GridFixture.hpp @@ -77,7 +77,8 @@ class GridFixture namespace simple_fields { -class GridFixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +GridFixture { public: GridFixture(stk::ParallelMachine pm); diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex20Fixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex20Fixture.cpp index 44511a95774b..ffe349e36504 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex20Fixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex20Fixture.cpp @@ -68,10 +68,11 @@ Hex20Fixture::Hex20Fixture(MetaData& meta, m_bulk_data( bulk ), m_elem_parts( ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ) + m_coord_field( &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates") ) { //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } Hex20Fixture::Hex20Fixture(stk::ParallelMachine pm, @@ -91,12 +92,13 @@ Hex20Fixture::Hex20Fixture(stk::ParallelMachine pm, m_meta(m_bulk_p->mesh_meta_data()), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_20) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } @@ -118,12 +120,13 @@ Hex20Fixture::Hex20Fixture(stk::ParallelMachine pm, m_meta(m_bulk_p->mesh_meta_data()), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_20) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, coordsName) ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } @@ -231,7 +234,7 @@ void Hex20Fixture::generate_mesh(std::vector & hex_range_on_this_process size_t nx = 0, ny = 0, nz = 0; node_x_y_z(elem_nodes[i], nx, ny, nz); - Scalar * data = stk::mesh::field_data( m_coord_field , node ); + Scalar * data = stk::mesh::field_data( *m_coord_field , node ); coordMap.getNodeCoordinates(data, nx, ny, nz); } @@ -340,7 +343,6 @@ Hex20Fixture::Hex20Fixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_20) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: @@ -369,7 +371,6 @@ Hex20Fixture::Hex20Fixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_20) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); //put coord-field on all nodes: @@ -476,7 +477,7 @@ void Hex20Fixture::generate_mesh(std::vector & hex_range_on_this_process STK_ThrowRequireMsg( m_bulk_data.is_valid(node), "This process should know about the nodes that make up its element"); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, elem_nodes[i], node); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, elem_nodes[i], node); // Compute and assign coordinates to the node size_t nx = 0, ny = 0, nz = 0; @@ -541,7 +542,7 @@ void Hex20Fixture::fill_node_map(int p_rank) elem_nodes[19] = node_id( ix , iy+1 , iz+2 ); for (size_t i = 0; i<20; ++i) { - AddToNodeProcsMMap(m_nodes_to_procs, elem_nodes[i] , p_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, elem_nodes[i] , p_rank); } } } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex20Fixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex20Fixture.hpp index c87275f1b93a..09766044ad71 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex20Fixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex20Fixture.hpp @@ -71,8 +71,8 @@ namespace fixtures { class Hex20Fixture { public: - typedef double Scalar; - typedef Field CoordFieldType; + typedef double Scalar; + typedef Field CoordFieldType; /** * Set up meta data to support this fixture. Meta data is left uncommitted @@ -124,7 +124,7 @@ class Hex20Fixture BulkData& m_bulk_data; PartVector m_elem_parts; PartVector m_node_parts; - CoordFieldType & m_coord_field; + CoordFieldType * m_coord_field; stk::topology m_elem_topology = stk::topology::HEX_20; stk::topology m_face_topology = stk::topology::QUAD_8; @@ -195,7 +195,8 @@ class Hex20Fixture namespace simple_fields { -class Hex20Fixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +Hex20Fixture { public: typedef double Scalar; diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex27Fixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex27Fixture.cpp index 585c3578977e..0b35c2937d48 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex27Fixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex27Fixture.cpp @@ -68,10 +68,11 @@ Hex27Fixture::Hex27Fixture(MetaData& meta, m_bulk_data( bulk ), m_elem_parts( ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ) + m_coord_field( &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates") ) { //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } Hex27Fixture::Hex27Fixture(stk::ParallelMachine pm, @@ -91,12 +92,13 @@ Hex27Fixture::Hex27Fixture(stk::ParallelMachine pm, m_meta(m_bulk_p->mesh_meta_data()), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_27) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } @@ -118,12 +120,13 @@ Hex27Fixture::Hex27Fixture(stk::ParallelMachine pm, m_meta(m_bulk_p->mesh_meta_data()), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_27) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, coordsName) ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } @@ -242,7 +245,7 @@ void Hex27Fixture::generate_mesh(std::vector & hex_range_on_this_process size_t nx = 0, ny = 0, nz = 0; node_x_y_z(elem_nodes[i], nx, ny, nz); - Scalar * data = stk::mesh::field_data( m_coord_field , node ); + Scalar * data = stk::mesh::field_data( *m_coord_field , node ); coordMap.getNodeCoordinates(data, nx, ny, nz); } @@ -358,7 +361,6 @@ Hex27Fixture::Hex27Fixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_27) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: @@ -387,7 +389,6 @@ Hex27Fixture::Hex27Fixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_27) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); //put coord-field on all nodes: @@ -505,7 +506,7 @@ void Hex27Fixture::generate_mesh(std::vector & hex_range_on_this_process STK_ThrowRequireMsg( m_bulk_data.is_valid(node), "This process should know about the nodes that make up its element"); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, elem_nodes[i], node); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, elem_nodes[i], node); // Compute and assign coordinates to the node size_t nx = 0, ny = 0, nz = 0; @@ -577,7 +578,7 @@ void Hex27Fixture::fill_node_map(int p_rank) elem_nodes[26] = node_id( ix+1 , iy+2 , iz+1 ); for (size_t i = 0; i<27; ++i) { - AddToNodeProcsMMap(m_nodes_to_procs, elem_nodes[i] , p_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, elem_nodes[i] , p_rank); } } } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex27Fixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex27Fixture.hpp index 1a6e67c412cf..8fa85785b05f 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex27Fixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Hex27Fixture.hpp @@ -71,8 +71,8 @@ namespace fixtures { class Hex27Fixture { public: - typedef double Scalar; - typedef Field CoordFieldType; + typedef double Scalar; + typedef Field CoordFieldType; /** * Set up meta data to support this fixture. Meta data is left uncommitted @@ -124,7 +124,7 @@ class Hex27Fixture BulkData& m_bulk_data; PartVector m_elem_parts; PartVector m_node_parts; - CoordFieldType & m_coord_field ; + CoordFieldType * m_coord_field ; stk::topology m_elem_topology = stk::topology::HEX_27; stk::topology m_face_topology = stk::topology::QUAD_9; diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/HexFixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/HexFixture.cpp index db74e06fb16b..edbdb72c8cc6 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/HexFixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/HexFixture.cpp @@ -70,11 +70,11 @@ HexFixture::HexFixture(MetaData& meta, m_bulk_data( bulk ), m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_8) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ), + m_coord_field( &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates") ), owns_mesh(false) { //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); } HexFixture::HexFixture(stk::ParallelMachine pm, @@ -93,13 +93,12 @@ HexFixture::HexFixture(stk::ParallelMachine pm, m_meta(m_bulk_p->mesh_meta_data()), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_8) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); - + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); } HexFixture::HexFixture(stk::ParallelMachine pm, @@ -119,11 +118,12 @@ HexFixture::HexFixture(stk::ParallelMachine pm, m_meta(m_bulk_p->mesh_meta_data()), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_8) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, coordinate_name) ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordinate_name); + //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); } HexFixture::HexFixture(stk::ParallelMachine pm, @@ -143,13 +143,12 @@ HexFixture::HexFixture(stk::ParallelMachine pm, m_meta(m_bulk_p->mesh_meta_data()), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_8) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); - + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); } HexFixture::~HexFixture() @@ -249,7 +248,7 @@ void HexFixture::fill_node_map( int p_rank) elem_node[7] = node_id( ix , iy+1 , iz+1 ); for (int ien = 0; ien < 8; ++ien) { - AddToNodeProcsMMap(m_nodes_to_procs, elem_node[ien], p_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, elem_node[ien], p_rank); } } } @@ -284,7 +283,7 @@ void HexFixture::fill_node_map(const std::map > ¶l elem_node[7] = node_id( ix , iy+1 , iz+1 ); for (int ien = 0; ien < 8; ++ien) { - AddToNodeProcsMMap(m_nodes_to_procs, elem_node[ien], proc); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, elem_node[ien], proc); } } } @@ -331,7 +330,7 @@ void HexFixture::generate_mesh(std::vector & element_ids_on_this_proce stk::mesh::Entity const node = m_bulk_data.get_entity( stk::topology::NODE_RANK , node_id ); m_bulk_data.change_entity_parts(node, m_node_parts); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, node_id, node); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, node_id, node); STK_ThrowRequireMsg( m_bulk_data.is_valid(node), "This process should know about the nodes that make up its element"); @@ -340,7 +339,7 @@ void HexFixture::generate_mesh(std::vector & element_ids_on_this_proce size_t nx = 0, ny = 0, nz = 0; node_x_y_z(elem_nodes[i], nx, ny, nz); - Scalar * data = stk::mesh::field_data( m_coord_field , node ); + Scalar * data = stk::mesh::field_data( *m_coord_field , node ); coordMap.getNodeCoordinates(data, nx, ny, nz); } @@ -394,7 +393,6 @@ HexFixture::HexFixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_8) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: @@ -420,7 +418,6 @@ HexFixture::HexFixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_8) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordinate_name); //put coord-field on all nodes: @@ -446,7 +443,6 @@ HexFixture::HexFixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("hex_part", stk::topology::HEX_8) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: @@ -550,7 +546,7 @@ void HexFixture::fill_node_map( int p_rank) elem_node[7] = node_id( ix , iy+1 , iz+1 ); for (int ien = 0; ien < 8; ++ien) { - AddToNodeProcsMMap(m_nodes_to_procs, elem_node[ien], p_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, elem_node[ien], p_rank); } } } @@ -585,7 +581,7 @@ void HexFixture::fill_node_map(const std::map > ¶l elem_node[7] = node_id( ix , iy+1 , iz+1 ); for (int ien = 0; ien < 8; ++ien) { - AddToNodeProcsMMap(m_nodes_to_procs, elem_node[ien], proc); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, elem_node[ien], proc); } } } @@ -632,7 +628,7 @@ void HexFixture::generate_mesh(std::vector & element_ids_on_this_proce stk::mesh::Entity const node = m_bulk_data.get_entity( stk::topology::NODE_RANK , node_id ); m_bulk_data.change_entity_parts(node, m_node_parts); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, node_id, node); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, node_id, node); STK_ThrowRequireMsg( m_bulk_data.is_valid(node), "This process should know about the nodes that make up its element"); diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/HexFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/HexFixture.hpp index 436929c6bae5..ad4c6d701d9b 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/HexFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/HexFixture.hpp @@ -70,8 +70,9 @@ namespace fixtures { class HexFixture { public: - typedef double Scalar; - typedef Field CoordFieldType; + static std::string name() { return "HexFixture"; } + typedef double Scalar; + typedef Field CoordFieldType; /** * Set up meta data to support this fixture. Meta data is left uncommitted @@ -124,7 +125,7 @@ class HexFixture BulkData& m_bulk_data; PartVector m_elem_parts; PartVector m_node_parts; - CoordFieldType & m_coord_field ; + CoordFieldType * m_coord_field ; bool owns_mesh = true; stk::topology m_elem_topology = stk::topology::HEX_8; stk::topology m_face_topology = stk::topology::QUAD_4; @@ -205,7 +206,6 @@ class HexFixture //} private: - NodeToProcsMMap m_nodes_to_procs; HexFixture(); @@ -232,6 +232,7 @@ class HexFixture * Set up meta data to support this fixture. Meta data is left uncommitted * to allow additional modifications by the client. */ + STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") HexFixture(MetaData& meta, BulkData& bulk, size_t nx, @@ -240,17 +241,20 @@ class HexFixture size_t nid_start, size_t eid_start); + STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") HexFixture(stk::ParallelMachine pm, size_t nx, size_t ny, size_t nz); + STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") HexFixture(stk::ParallelMachine pm, size_t nx, size_t ny, size_t nz, const std::string& coordinate_name); + STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") HexFixture(stk::ParallelMachine pm, size_t nx, size_t ny, diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/PyramidFixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/PyramidFixture.cpp index 66a8b4ec48dd..2b722aaa166a 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/PyramidFixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/PyramidFixture.cpp @@ -68,11 +68,12 @@ PyramidFixture::PyramidFixture(MetaData& meta, m_bulk_data( bulk ), m_elem_parts( ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ), + m_coord_field( &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates") ), owns_mesh(false) { //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } PyramidFixture::PyramidFixture(stk::ParallelMachine pm, @@ -92,12 +93,13 @@ PyramidFixture::PyramidFixture(stk::ParallelMachine pm, m_meta(m_bulk_p->mesh_meta_data()), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("pyramid_part", stk::topology::PYRAMID_5) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } @@ -119,12 +121,13 @@ PyramidFixture::PyramidFixture(stk::ParallelMachine pm, m_meta(m_bulk_p->mesh_meta_data()), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("pyramid_part", stk::topology::PYRAMID_5) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, coordinate_name) ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordinate_name); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } @@ -237,7 +240,7 @@ void PyramidFixture::generate_mesh(std::vector & hex_range_on_this_proce size_t nx = 0, ny = 0, nz = 0; node_x_y_z(pyramid_nodes[i], nx, ny, nz); - Scalar * data = stk::mesh::field_data( m_coord_field , node ); + Scalar * data = stk::mesh::field_data( *m_coord_field , node ); coordMap.getNodeCoordinates(data, nx, ny, nz); @@ -358,7 +361,6 @@ PyramidFixture::PyramidFixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("pyramid_part", stk::topology::PYRAMID_5) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: @@ -387,7 +389,6 @@ PyramidFixture::PyramidFixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("pyramid_part", stk::topology::PYRAMID_5) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordinate_name); //put coord-field on all nodes: @@ -499,7 +500,7 @@ void PyramidFixture::generate_mesh(std::vector & hex_range_on_this_proce STK_ThrowRequireMsg( m_bulk_data.is_valid(node), "This process should know about the nodes that make up its element"); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, pyramid_nodes[i], node); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, pyramid_nodes[i], node); // Compute and assign coordinates to the node size_t nx = 0, ny = 0, nz = 0; @@ -573,7 +574,7 @@ void PyramidFixture::fill_node_map(int p_rank) pyramid_nodes[4] = elem_nodes[pyramid_vert[pyr][4]]; for (size_t i = 0; i<5; ++i) { - AddToNodeProcsMMap(m_nodes_to_procs, pyramid_nodes[i] , p_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, pyramid_nodes[i] , p_rank); } } } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/PyramidFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/PyramidFixture.hpp index 8d01fa9cc31a..7ee12ccc0e75 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/PyramidFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/PyramidFixture.hpp @@ -65,8 +65,9 @@ namespace fixtures { class PyramidFixture { public: - typedef double Scalar; - typedef Field CoordFieldType; + static std::string name() { return "PyramidFixture"; } + typedef double Scalar; + typedef Field CoordFieldType; /** * Set up meta data to support this fixture. Meta data is left uncommitted @@ -119,7 +120,7 @@ class PyramidFixture BulkData& m_bulk_data; PartVector m_elem_parts; PartVector m_node_parts; - CoordFieldType & m_coord_field ; + CoordFieldType * m_coord_field ; bool owns_mesh = true; stk::topology m_elem_topology = stk::topology::PYRAMID_5; stk::topology m_face_topology = stk::topology::QUAD_4; @@ -198,7 +199,8 @@ namespace simple_fields { * A coordinate field will be added to all nodes, a coordinate-gather field * will be added to all elements. */ -class PyramidFixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +PyramidFixture { public: static std::string name() { return "PyramidFixture"; } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/QuadFixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/QuadFixture.cpp index 6e4233f5043f..db6223172c4f 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/QuadFixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/QuadFixture.cpp @@ -62,14 +62,14 @@ QuadFixture::QuadFixture( MetaData& meta, BulkData& bulk, size_t nx, size_t ny, m_quad_part( m_meta.declare_part_with_topology("quad_part", stk::topology::QUAD_4_2D ) ), m_elem_parts(1, &m_quad_part), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ), + m_coord_field( &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates") ), m_nx( nx ), m_ny( ny ), m_node_id_start(nid_start), m_elem_id_start(eid_start) { //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); } QuadFixture::QuadFixture( stk::ParallelMachine pm , @@ -78,16 +78,17 @@ QuadFixture::QuadFixture( stk::ParallelMachine pm , : m_spatial_dimension(2), m_bulk_p( MeshBuilder(pm).set_spatial_dimension(2).set_entity_rank_names(rank_names).create() ), m_meta( m_bulk_p->mesh_meta_data() ), - m_bulk_data( *m_bulk_p ), + m_bulk_data(*m_bulk_p), m_quad_part( m_meta.declare_part_with_topology("quad_part", stk::topology::QUAD_4_2D ) ), m_elem_parts(1, &m_quad_part), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ), m_nx( nx ), m_ny( ny ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); + //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); } QuadFixture::QuadFixture( stk::ParallelMachine pm , @@ -97,35 +98,36 @@ QuadFixture::QuadFixture( stk::ParallelMachine pm , : m_spatial_dimension(2), m_bulk_p( MeshBuilder(pm).set_spatial_dimension(2).set_entity_rank_names(rank_names).create() ), m_meta( m_bulk_p->mesh_meta_data() ), - m_bulk_data( *m_bulk_p ), + m_bulk_data(*m_bulk_p), m_quad_part( m_meta.declare_part_with_topology("quad_part", stk::topology::QUAD_4_2D ) ), m_elem_parts(1, &m_quad_part), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, coordsName) ), m_nx( nx ), m_ny( ny ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); + //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); } QuadFixture::QuadFixture( stk::ParallelMachine pm , unsigned nx , unsigned ny, bool auraOn ) : m_spatial_dimension(2), - m_bulk_p( build_mesh_no_simple_fields(2, pm, (auraOn ? stk::mesh::BulkData::AUTO_AURA : stk::mesh::BulkData::NO_AUTO_AURA)) ), + m_bulk_p( build_mesh(2, pm, (auraOn ? stk::mesh::BulkData::AUTO_AURA : stk::mesh::BulkData::NO_AUTO_AURA)) ), m_meta( m_bulk_p->mesh_meta_data() ), m_bulk_data(*m_bulk_p), m_quad_part( m_meta.declare_part_with_topology("quad_part", stk::topology::QUAD_4_2D ) ), m_elem_parts(1, &m_quad_part), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ), m_nx( nx ), m_ny( ny ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); } void QuadFixture::node_x_y( EntityId entity_id, unsigned &x , unsigned &y ) const @@ -187,45 +189,47 @@ void QuadFixture::generate_mesh(std::vector & element_ids_on_this_proc m_bulk_data.modification_begin(); - // Declare the elements that belong on this process + { + // Declare the elements that belong on this process - std::vector::const_iterator ib = element_ids_on_this_processor.begin(); - const std::vector::const_iterator ie = element_ids_on_this_processor.end(); - stk::mesh::EntityIdVector elem_nodes(4) ; + std::vector::const_iterator ib = element_ids_on_this_processor.begin(); + const std::vector::const_iterator ie = element_ids_on_this_processor.end(); + stk::mesh::EntityIdVector elem_nodes(4) ; - for (; ib != ie; ++ib) { - EntityId entity_id = *ib; - unsigned ix = 0, iy = 0; - elem_x_y(entity_id, ix, iy); + for (; ib != ie; ++ib) { + EntityId entity_id = *ib; + unsigned ix = 0, iy = 0; + elem_x_y(entity_id, ix, iy); - elem_nodes[0] = node_id( ix , iy ); - elem_nodes[1] = node_id( ix+1 , iy ); - elem_nodes[2] = node_id( ix+1 , iy+1 ); - elem_nodes[3] = node_id( ix , iy+1 ); + elem_nodes[0] = node_id( ix , iy ); + elem_nodes[1] = node_id( ix+1 , iy ); + elem_nodes[2] = node_id( ix+1 , iy+1 ); + elem_nodes[3] = node_id( ix , iy+1 ); - stk::mesh::declare_element( m_bulk_data, m_elem_parts, elem_id( ix , iy ) , elem_nodes); + stk::mesh::declare_element( m_bulk_data, m_elem_parts, elem_id( ix , iy ) , elem_nodes); - for (unsigned i = 0; i<4; ++i) { - stk::mesh::Entity const node = m_bulk_data.get_entity( stk::topology::NODE_RANK , elem_nodes[i] ); - m_bulk_data.change_entity_parts(node, m_node_parts); + for (unsigned i = 0; i<4; ++i) { + stk::mesh::Entity const node = m_bulk_data.get_entity( stk::topology::NODE_RANK , elem_nodes[i] ); + m_bulk_data.change_entity_parts(node, m_node_parts); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, elem_nodes[i], node); + DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, elem_nodes[i], node); - STK_ThrowRequireMsg( m_bulk_data.is_valid(node), + STK_ThrowRequireMsg( m_bulk_data.is_valid(node), "This process should know about the nodes that make up its element"); - // Compute and assign coordinates to the node - unsigned nx = 0, ny = 0; - node_x_y(elem_nodes[i], nx, ny); + // Compute and assign coordinates to the node + unsigned nx = 0, ny = 0; + node_x_y(elem_nodes[i], nx, ny); - Scalar * data = stk::mesh::field_data( m_coord_field , node ); + Scalar * data = stk::mesh::field_data( *m_coord_field , node ); - // The CoordinateMappings are used for 2D and 3D so make sure we give it enough space to write to. - std::array temp; - coordMap.getNodeCoordinates(temp.data(), nx, ny, 0); + // The CoordinateMappings are used for 2D and 3D so make sure we give it enough space to write to. + std::array temp; + coordMap.getNodeCoordinates(temp.data(), nx, ny, 0); - data[0] = temp[0]; - data[1] = temp[1] ; + data[0] = temp[0]; + data[1] = temp[1] ; + } } } @@ -236,6 +240,7 @@ void QuadFixture::generate_mesh(std::vector & element_ids_on_this_proc void QuadFixture::fill_node_map(int p_rank) { + std::vector element_ids_on_this_processor; const size_t p_size = m_bulk_data.parallel_size(); @@ -248,22 +253,25 @@ void QuadFixture::fill_node_map(int p_rank) element_ids_on_this_processor.push_back(i); } - std::vector::const_iterator ib = element_ids_on_this_processor.begin(); - const std::vector::const_iterator ie = element_ids_on_this_processor.end(); - for (; ib != ie; ++ib) { - EntityId entity_id = *ib; - unsigned ix = 0, iy = 0; - elem_x_y(entity_id, ix, iy); + { - stk::mesh::EntityId elem_nodes[4] ; + std::vector::const_iterator ib = element_ids_on_this_processor.begin(); + const std::vector::const_iterator ie = element_ids_on_this_processor.end(); + for (; ib != ie; ++ib) { + EntityId entity_id = *ib; + unsigned ix = 0, iy = 0; + elem_x_y(entity_id, ix, iy); - elem_nodes[0] = node_id( ix , iy ); - elem_nodes[1] = node_id( ix+1 , iy ); - elem_nodes[2] = node_id( ix+1 , iy+1 ); - elem_nodes[3] = node_id( ix , iy+1 ); + stk::mesh::EntityId elem_nodes[4] ; - for (unsigned i = 0; i<4; ++i) { - AddToNodeProcsMMap(m_nodes_to_procs, elem_nodes[i] , p_rank); + elem_nodes[0] = node_id( ix , iy ); + elem_nodes[1] = node_id( ix+1 , iy ); + elem_nodes[2] = node_id( ix+1 , iy+1 ); + elem_nodes[3] = node_id( ix , iy+1 ); + + for (unsigned i = 0; i<4; ++i) { + AddToNodeProcsMMap(m_nodes_to_procs, elem_nodes[i] , p_rank); + } } } } @@ -275,14 +283,14 @@ Quad9Fixture::Quad9Fixture( MetaData& meta, BulkData& bulk, size_t nx, size_t ny m_quad_part( m_meta.declare_part_with_topology("quad9_part", stk::topology::QUAD_9_2D ) ), m_elem_parts(1, &m_quad_part), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ), + m_coord_field( &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates") ), m_nx( nx ), m_ny( ny ), m_node_id_start(nid_start), m_elem_id_start(eid_start) { //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); } Quad9Fixture::Quad9Fixture( stk::ParallelMachine pm , @@ -291,16 +299,17 @@ Quad9Fixture::Quad9Fixture( stk::ParallelMachine pm , : m_spatial_dimension(2), m_bulk_p( MeshBuilder(pm).set_spatial_dimension(2).set_entity_rank_names(rank_names).create() ), m_meta( m_bulk_p->mesh_meta_data() ), - m_bulk_data( *m_bulk_p ), + m_bulk_data(*m_bulk_p), m_quad_part( m_meta.declare_part_with_topology("quad_part", stk::topology::QUAD_9_2D ) ), m_elem_parts(1, &m_quad_part), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ), m_nx( nx ), m_ny( ny ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); + //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); } Quad9Fixture::Quad9Fixture( stk::ParallelMachine pm , @@ -310,35 +319,36 @@ Quad9Fixture::Quad9Fixture( stk::ParallelMachine pm , : m_spatial_dimension(2), m_bulk_p( MeshBuilder(pm).set_spatial_dimension(2).set_entity_rank_names(rank_names).create() ), m_meta( m_bulk_p->mesh_meta_data() ), - m_bulk_data( *m_bulk_p ), + m_bulk_data(*m_bulk_p), m_quad_part( m_meta.declare_part_with_topology("quad_part", stk::topology::QUAD_9_2D ) ), m_elem_parts(1, &m_quad_part), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, coordsName) ), m_nx( nx ), m_ny( ny ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); + //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); } Quad9Fixture::Quad9Fixture( stk::ParallelMachine pm , unsigned nx , unsigned ny, bool auraOn ) : m_spatial_dimension(2), - m_bulk_p( build_mesh_no_simple_fields(2, pm, (auraOn ? stk::mesh::BulkData::AUTO_AURA : stk::mesh::BulkData::NO_AUTO_AURA)) ), + m_bulk_p( build_mesh(2, pm, (auraOn ? stk::mesh::BulkData::AUTO_AURA : stk::mesh::BulkData::NO_AUTO_AURA)) ), m_meta( m_bulk_p->mesh_meta_data() ), m_bulk_data(*m_bulk_p), m_quad_part( m_meta.declare_part_with_topology("quad_part", stk::topology::QUAD_9_2D ) ), m_elem_parts(1, &m_quad_part), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ), m_nx( nx ), m_ny( ny ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); } void Quad9Fixture::elem_x_y( EntityId entity_id, unsigned &x , unsigned &y ) const @@ -410,7 +420,7 @@ void Quad9Fixture::generate_mesh(std::vector & element_ids_on_this_pro STK_ThrowRequireMsg( m_bulk_data.is_valid(node), "This process should know about the nodes that make up its element"); - Scalar * data = stk::mesh::field_data( m_coord_field , node ); + Scalar * data = stk::mesh::field_data( *m_coord_field , node ); data[0] = nodeCoords[2*i+0]; data[1] = nodeCoords[2*i+1] ; @@ -544,7 +554,6 @@ QuadFixture::QuadFixture( stk::ParallelMachine pm , m_nx( nx ), m_ny( ny ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: @@ -565,7 +574,6 @@ QuadFixture::QuadFixture( stk::ParallelMachine pm , m_nx( nx ), m_ny( ny ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); //put coord-field on all nodes: @@ -585,7 +593,6 @@ QuadFixture::QuadFixture( stk::ParallelMachine pm , m_nx( nx ), m_ny( ny ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: @@ -674,7 +681,7 @@ void QuadFixture::generate_mesh(std::vector & element_ids_on_this_proc stk::mesh::Entity const node = m_bulk_data.get_entity( stk::topology::NODE_RANK , elem_nodes[i] ); m_bulk_data.change_entity_parts(node, m_node_parts); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, elem_nodes[i], node); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, elem_nodes[i], node); STK_ThrowRequireMsg( m_bulk_data.is_valid(node), "This process should know about the nodes that make up its element"); @@ -732,7 +739,7 @@ void QuadFixture::fill_node_map(int p_rank) elem_nodes[3] = node_id( ix , iy+1 ); for (unsigned i = 0; i<4; ++i) { - AddToNodeProcsMMap(m_nodes_to_procs, elem_nodes[i] , p_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, elem_nodes[i] , p_rank); } } } @@ -768,7 +775,6 @@ Quad9Fixture::Quad9Fixture( stk::ParallelMachine pm , m_nx( nx ), m_ny( ny ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: @@ -789,7 +795,6 @@ Quad9Fixture::Quad9Fixture( stk::ParallelMachine pm , m_nx( nx ), m_ny( ny ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); //put coord-field on all nodes: @@ -809,7 +814,6 @@ Quad9Fixture::Quad9Fixture( stk::ParallelMachine pm , m_nx( nx ), m_ny( ny ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: @@ -880,7 +884,7 @@ void Quad9Fixture::generate_mesh(std::vector & element_ids_on_this_pro stk::mesh::Entity const node = m_bulk_data.get_entity( stk::topology::NODE_RANK , elemNodes[i] ); m_bulk_data.change_entity_parts(node, m_node_parts); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, elemNodes[i], node); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, elemNodes[i], node); STK_ThrowRequireMsg( m_bulk_data.is_valid(node), "This process should know about the nodes that make up its element"); @@ -918,7 +922,7 @@ void Quad9Fixture::fill_node_map(int p_rank) stk::mesh::EntityIdVector elem_nodes = node_ids( entity_id ); for (unsigned i = 0; i CoordFieldType; + typedef Field CoordFieldType; /** * Set up meta data to support this fixture. Meta data is left uncommitted @@ -89,7 +89,7 @@ class QuadFixture const unsigned m_spatial_dimension; private: - std::shared_ptr m_bulk_p; + std::shared_ptr m_bulk_p; public: MetaData & m_meta; @@ -97,7 +97,7 @@ class QuadFixture Part & m_quad_part; PartVector m_elem_parts; PartVector m_node_parts; - CoordFieldType & m_coord_field; + CoordFieldType * m_coord_field; const unsigned m_nx; const unsigned m_ny; const size_t m_node_id_start = 1; @@ -181,7 +181,7 @@ class Quad9Fixture { public: typedef double Scalar; - typedef Field CoordFieldType; + typedef Field CoordFieldType; /** * Set up meta data to support this fixture. Meta data is left uncommitted @@ -211,7 +211,7 @@ class Quad9Fixture Part & m_quad_part; PartVector m_elem_parts; PartVector m_node_parts; - CoordFieldType & m_coord_field; + CoordFieldType * m_coord_field; const unsigned m_nx; const unsigned m_ny; const size_t m_node_id_start = 1; @@ -417,9 +417,12 @@ class QuadFixture void fill_node_map( int proc_rank); }; -using Quad4Fixture = QuadFixture; +using +Quad4Fixture +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") = QuadFixture; -class Quad9Fixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +Quad9Fixture { public: typedef double Scalar; diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/QuadShellFixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/QuadShellFixture.cpp index c71ac7130a69..945930063f69 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/QuadShellFixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/QuadShellFixture.cpp @@ -67,7 +67,7 @@ QuadShellFixture::QuadShellFixture(MetaData& meta, m_quad_part( m_meta.declare_part_with_topology("quad_part", stk::topology::SHELL_QUAD_4 ) ), m_elem_parts(1, &m_quad_part), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ), + m_coord_field( &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates") ), m_node_id_start(nid_start), m_elem_id_start(eid_start), owns_mesh(false), @@ -75,7 +75,8 @@ QuadShellFixture::QuadShellFixture(MetaData& meta, m_ny( ny ) { //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } QuadShellFixture::QuadShellFixture( stk::ParallelMachine pm , @@ -88,12 +89,14 @@ QuadShellFixture::QuadShellFixture( stk::ParallelMachine pm , m_quad_part( m_meta.declare_part_with_topology("quad_part", stk::topology::SHELL_QUAD_4 ) ), m_elem_parts(1, &m_quad_part), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ), m_nx( nx ), m_ny( ny ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); + //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } QuadShellFixture::QuadShellFixture( stk::ParallelMachine pm , @@ -107,30 +110,34 @@ QuadShellFixture::QuadShellFixture( stk::ParallelMachine pm , m_quad_part( m_meta.declare_part_with_topology("quad_part", stk::topology::SHELL_QUAD_4 ) ), m_elem_parts(1, &m_quad_part), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, coordsName) ), m_nx( nx ), m_ny( ny ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); + //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } QuadShellFixture::QuadShellFixture( stk::ParallelMachine pm , unsigned nx , unsigned ny, bool auraOn ) - : m_bulk_p( build_mesh_no_simple_fields(3, pm, (auraOn ? stk::mesh::BulkData::AUTO_AURA : stk::mesh::BulkData::NO_AUTO_AURA)) ), + : m_bulk_p( build_mesh(3, pm, (auraOn ? stk::mesh::BulkData::AUTO_AURA : stk::mesh::BulkData::NO_AUTO_AURA)) ), m_spatial_dimension(3), m_meta( m_bulk_p->mesh_meta_data() ), m_bulk_data( *m_bulk_p ), m_quad_part( m_meta.declare_part_with_topology("quad_part", stk::topology::SHELL_QUAD_4 ) ), m_elem_parts(1, &m_quad_part), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ), m_nx( nx ), m_ny( ny ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); + //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } QuadShellFixture::~QuadShellFixture() @@ -229,7 +236,7 @@ void QuadShellFixture::generate_mesh(std::vector & element_ids_on_this unsigned nx = 0, ny = 0; node_x_y(elem_nodes[i], nx, ny); - Scalar * data = stk::mesh::field_data( m_coord_field , node ); + Scalar * data = stk::mesh::field_data( *m_coord_field , node ); coordMap.getNodeCoordinates(data, nx, ny, 0); } @@ -318,7 +325,6 @@ QuadShellFixture::QuadShellFixture( stk::ParallelMachine pm , m_nx( nx ), m_ny( ny ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: @@ -340,7 +346,6 @@ QuadShellFixture::QuadShellFixture( stk::ParallelMachine pm , m_nx( nx ), m_ny( ny ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); //put coord-field on all nodes: @@ -455,7 +460,7 @@ void QuadShellFixture::generate_mesh(std::vector & element_ids_on_this stk::mesh::Entity const node = m_bulk_data.get_entity( stk::topology::NODE_RANK , elem_nodes[i] ); m_bulk_data.change_entity_parts(node, m_node_parts); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, elem_nodes[i], node); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, elem_nodes[i], node); STK_ThrowRequireMsg( m_bulk_data.is_valid(node), "This process should know about the nodes that make up its element"); @@ -508,7 +513,7 @@ void QuadShellFixture::fill_node_map(int p_rank) elem_nodes[3] = node_id( ix , iy+1 ); for (unsigned i = 0; i<4; ++i) { - AddToNodeProcsMMap(m_nodes_to_procs, elem_nodes[i] , p_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, elem_nodes[i] , p_rank); } } } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/QuadShellFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/QuadShellFixture.hpp index 03d23845c2e2..e2812aa4eadc 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/QuadShellFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/QuadShellFixture.hpp @@ -67,7 +67,7 @@ class QuadShellFixture { public: typedef double Scalar; - typedef Field CoordFieldType; + typedef Field CoordFieldType; /** * Set up meta data to support this fixture. Meta data is left uncommitted @@ -99,7 +99,7 @@ class QuadShellFixture Part & m_quad_part ; PartVector m_elem_parts; PartVector m_node_parts; - CoordFieldType & m_coord_field ; + CoordFieldType * m_coord_field ; const unsigned m_node_id_start = 1; const unsigned m_elem_id_start = 1; bool owns_mesh = true; @@ -185,7 +185,8 @@ namespace simple_fields { * A coordinate field will be added to all nodes, a coordinate-gather field * will be added to all elements. */ -class QuadShellFixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +QuadShellFixture { public: typedef double Scalar; diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/RingFixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/RingFixture.cpp index 9ce1b451eeff..4dd7e9363704 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/RingFixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/RingFixture.cpp @@ -67,6 +67,7 @@ RingFixture::RingFixture( stk::ParallelMachine pm , m_element_ids(), m_beam_2_part( m_meta_data.declare_part_with_topology("beam_2_part", stk::topology::BEAM_2 ) ) { + if ( use_element_parts ) { m_element_parts.resize( num_element_per_proc ); for ( unsigned i = 0 ; i < num_element_per_proc ; ++i ) { @@ -196,7 +197,6 @@ RingFixture::RingFixture( stk::ParallelMachine pm , m_element_ids(), m_beam_2_part( m_meta_data.declare_part_with_topology("beam_2_part", stk::topology::BEAM_2 ) ) { - m_meta_data.use_simple_fields(); if ( use_element_parts ) { m_element_parts.resize( num_element_per_proc ); @@ -253,8 +253,8 @@ void RingFixture::generate_mesh( ) Entity e_element = m_bulk_data.declare_element(m_element_ids[i], add_parts); m_bulk_data.declare_relation( e_element , e_node_0 , 0 ); m_bulk_data.declare_relation( e_element , e_node_1 , 1 ); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, m_node_ids[n0], e_node_0); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, m_node_ids[n1], e_node_1); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, m_node_ids[n0], e_node_0); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, m_node_ids[n1], e_node_1); } } } @@ -283,8 +283,8 @@ void RingFixture::fill_node_map(int p_rank) add_parts[1] = m_element_parts[ i % m_element_parts.size() ]; } - AddToNodeProcsMMap(m_nodes_to_procs, m_node_ids[n0] , p_rank); - AddToNodeProcsMMap(m_nodes_to_procs, m_node_ids[n1] , p_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, m_node_ids[n0] , p_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, m_node_ids[n1] , p_rank); } } } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/RingFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/RingFixture.hpp index fb69ee2909c7..6f1380115f8f 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/RingFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/RingFixture.hpp @@ -100,7 +100,8 @@ class RingFixture namespace simple_fields { -class RingFixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +RingFixture { public: const int m_spatial_dimension; diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/SelectorFixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/SelectorFixture.cpp index b0c6edc0b53d..8cad5af95917 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/SelectorFixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/SelectorFixture.cpp @@ -63,15 +63,16 @@ SelectorFixture::SelectorFixture() m_entity2( ), m_entity3( ), m_entity4( ), - m_entity5( ), - m_fieldA(stk::mesh::legacy::declare_field >(m_meta_data, stk::topology::NODE_RANK, "FieldA")), - m_fieldABC(stk::mesh::legacy::declare_field >(m_meta_data, stk::topology::NODE_RANK, "FieldABC")) + m_entity5( ) { - stk::mesh::put_field_on_mesh(m_fieldA, m_partA, nullptr); + m_fieldA = &m_meta_data.declare_field(stk::topology::NODE_RANK, "FieldA"); + m_fieldABC = &m_meta_data.declare_field(stk::topology::NODE_RANK, "FieldABC"); - stk::mesh::put_field_on_mesh(m_fieldABC, m_partA, nullptr); - stk::mesh::put_field_on_mesh(m_fieldABC, m_partB, nullptr); - stk::mesh::put_field_on_mesh(m_fieldABC, m_partC, nullptr); + stk::mesh::put_field_on_mesh(*m_fieldA, m_partA, nullptr); + + stk::mesh::put_field_on_mesh(*m_fieldABC, m_partA, nullptr); + stk::mesh::put_field_on_mesh(*m_fieldABC, m_partB, nullptr); + stk::mesh::put_field_on_mesh(*m_fieldABC, m_partC, nullptr); } void SelectorFixture::generate_mesh() @@ -120,12 +121,13 @@ void SelectorFixture::generate_mesh() VariableSelectorFixture::~VariableSelectorFixture() {} VariableSelectorFixture::VariableSelectorFixture(int NumParts) - : m_MetaDataPtr(MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).set_entity_rank_names(std::vector(4, std::string("MyEntityRank"))).create_meta_data()) - , m_MetaData( *m_MetaDataPtr ) - , m_BulkDataPtr( MeshBuilder(MPI_COMM_WORLD).create(m_MetaDataPtr) ) - , m_BulkData( *m_BulkDataPtr ) - , m_declared_part_vector() +: m_MetaDataPtr(MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).set_entity_rank_names(std::vector(4, std::string("MyEntityRank"))).create_meta_data()) +, m_MetaData( *m_MetaDataPtr ) +, m_BulkDataPtr( MeshBuilder(MPI_COMM_WORLD).create(m_MetaDataPtr) ) +, m_BulkData( *m_BulkDataPtr ) +, m_declared_part_vector() { + // Create Parts and commit: std::string myPartName; stk::mesh::EntityRank myRank = stk::topology::NODE_RANK; @@ -177,7 +179,6 @@ SelectorFixture::SelectorFixture() m_entity4( ), m_entity5( ) { - m_meta_data.use_simple_fields(); m_fieldA = &m_meta_data.declare_field(stk::topology::NODE_RANK, "FieldA"); m_fieldABC = &m_meta_data.declare_field(stk::topology::NODE_RANK, "FieldABC"); @@ -240,7 +241,6 @@ VariableSelectorFixture::VariableSelectorFixture(int NumParts) , m_BulkData( *m_BulkDataPtr ) , m_declared_part_vector() { - m_MetaData.use_simple_fields(); // Create Parts and commit: std::string myPartName; diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/SelectorFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/SelectorFixture.hpp index 273a7bc2bb2c..ee703f132936 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/SelectorFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/SelectorFixture.hpp @@ -99,8 +99,8 @@ class SelectorFixture stk::mesh::Entity m_entity4 ; stk::mesh::Entity m_entity5 ; - stk::mesh::Field& m_fieldA; - stk::mesh::Field& m_fieldABC; + stk::mesh::Field* m_fieldA; + stk::mesh::Field* m_fieldABC; void generate_mesh(); @@ -130,7 +130,8 @@ class VariableSelectorFixture namespace simple_fields { -class SelectorFixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +SelectorFixture { public: SelectorFixture(); @@ -167,7 +168,8 @@ class SelectorFixture SelectorFixture & operator = ( const SelectorFixture & ); }; -class VariableSelectorFixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +VariableSelectorFixture { public: VariableSelectorFixture(int NumParts); diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TestHexFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TestHexFixture.hpp index 9e9e01848e17..752fcb66ad3b 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TestHexFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TestHexFixture.hpp @@ -52,13 +52,13 @@ namespace fixtures { class TestHexFixture : public ::ngp_testing::Test { protected: - TestHexFixture() : m_bulk(stk::mesh::MeshBuilder(MPI_COMM_WORLD).create()), m_meta(m_bulk->mesh_meta_data()), - m_coord_field(stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates")), m_hexFixture(nullptr) - {} + { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); + } virtual ~TestHexFixture() { delete m_hexFixture; } @@ -66,7 +66,7 @@ class TestHexFixture : public ::ngp_testing::Test const std::vector& entityRankNames = std::vector()) { STK_ThrowRequireMsg(m_hexFixture == nullptr, "TestHexFixture::setup_mesh may only be called once."); - m_meta.initialize(3, entityRankNames, m_coord_field.name()); + m_meta.initialize(3, entityRankNames, m_coord_field->name()); m_hexFixture = new HexFixture(m_meta, *m_bulk, nx, ny, nz, 1, 1); m_hexFixture->m_meta.commit(); m_hexFixture->generate_mesh(); @@ -84,18 +84,19 @@ class TestHexFixture : public ::ngp_testing::Test stk::mesh::BulkData& get_bulk() { return *m_bulk; } const stk::mesh::BulkData& get_bulk() const { return *m_bulk; } - HexFixture::CoordFieldType& get_coord_field() { return m_coord_field; } + HexFixture::CoordFieldType& get_coord_field() { return *m_coord_field; } private: std::shared_ptr m_bulk; stk::mesh::MetaData& m_meta; - HexFixture::CoordFieldType& m_coord_field; + HexFixture::CoordFieldType* m_coord_field; HexFixture* m_hexFixture; }; namespace simple_fields { -class TestHexFixture : public ::ngp_testing::Test +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +TestHexFixture : public ::ngp_testing::Test { protected: TestHexFixture() @@ -103,7 +104,6 @@ class TestHexFixture : public ::ngp_testing::Test m_meta(m_bulk->mesh_meta_data()), m_hexFixture(nullptr) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); } @@ -114,7 +114,7 @@ class TestHexFixture : public ::ngp_testing::Test { STK_ThrowRequireMsg(m_hexFixture == nullptr, "TestHexFixture::setup_mesh may only be called once."); m_meta.initialize(3, entityRankNames, m_coord_field->name()); - m_hexFixture = new HexFixture(m_meta, *m_bulk, nx, ny, nz, 1, 1); + m_hexFixture = new stk::mesh::fixtures::HexFixture(m_meta, *m_bulk, nx, ny, nz, 1, 1); m_hexFixture->m_meta.commit(); m_hexFixture->generate_mesh(); } @@ -131,13 +131,13 @@ class TestHexFixture : public ::ngp_testing::Test stk::mesh::BulkData& get_bulk() { return *m_bulk; } const stk::mesh::BulkData& get_bulk() const { return *m_bulk; } - HexFixture::CoordFieldType& get_coord_field() { return *m_coord_field; } + stk::mesh::Field& get_coord_field() { return *m_coord_field; } private: std::shared_ptr m_bulk; stk::mesh::MetaData& m_meta; - HexFixture::CoordFieldType* m_coord_field; - HexFixture* m_hexFixture; + stk::mesh::Field* m_coord_field; + stk::mesh::fixtures::HexFixture* m_hexFixture; }; } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Tet10Fixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Tet10Fixture.cpp index 3bd67c368d21..38a514a21b62 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Tet10Fixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Tet10Fixture.cpp @@ -68,10 +68,11 @@ Tet10Fixture::Tet10Fixture(MetaData& meta, m_bulk_data( bulk ), m_elem_parts( ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ) + m_coord_field( &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates") ) { //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } Tet10Fixture::Tet10Fixture(stk::ParallelMachine pm, @@ -90,12 +91,13 @@ Tet10Fixture::Tet10Fixture(stk::ParallelMachine pm, m_meta(m_bulk_p->mesh_meta_data()), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("tet_part", stk::topology::TET_10) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } @@ -116,12 +118,13 @@ Tet10Fixture::Tet10Fixture(stk::ParallelMachine pm, m_meta(m_bulk_p->mesh_meta_data()), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("tet_part", stk::topology::TET_10) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, coordsName) ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } @@ -251,7 +254,7 @@ void Tet10Fixture::generate_mesh(std::vector & hex_range_on_this_process size_t nx = 0, ny = 0, nz = 0; node_x_y_z(tet_nodes[i], nx, ny, nz); - Scalar * data = stk::mesh::field_data( m_coord_field , node ); + Scalar * data = stk::mesh::field_data( *m_coord_field , node ); coordMap.getNodeCoordinates(data, nx, ny, nz); } @@ -367,7 +370,6 @@ Tet10Fixture::Tet10Fixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("tet_part", stk::topology::TET_10) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: @@ -395,7 +397,6 @@ Tet10Fixture::Tet10Fixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("tet_part", stk::topology::TET_10) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); //put coord-field on all nodes: @@ -524,7 +525,7 @@ void Tet10Fixture::generate_mesh(std::vector & hex_range_on_this_process STK_ThrowRequireMsg( m_bulk_data.is_valid(node), "This process should know about the nodes that make up its element"); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, tet_nodes[i], node); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, tet_nodes[i], node); // Compute and assign coordinates to the node size_t nx = 0, ny = 0, nz = 0; @@ -597,7 +598,7 @@ void Tet10Fixture::fill_node_map(int p_rank) elem_nodes[26] = node_id( ix+1 , iy+2 , iz+2 ); for (size_t i = 0; i<27; ++i) { - AddToNodeProcsMMap(m_nodes_to_procs, elem_nodes[i] , p_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, elem_nodes[i] , p_rank); } } } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Tet10Fixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Tet10Fixture.hpp index 1756a6a56ec5..f37abbd5fcc3 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Tet10Fixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/Tet10Fixture.hpp @@ -71,8 +71,8 @@ namespace fixtures { class Tet10Fixture { public: - typedef double Scalar; - typedef Field CoordFieldType; + typedef double Scalar; + typedef Field CoordFieldType; /** * Set up meta data to support this fixture. Meta data is left uncommitted @@ -124,7 +124,7 @@ class Tet10Fixture BulkData& m_bulk_data; PartVector m_elem_parts; PartVector m_node_parts; - CoordFieldType & m_coord_field ; + CoordFieldType * m_coord_field ; stk::topology m_elem_topology = stk::topology::TET_10; stk::topology m_face_topology = stk::topology::TRI_6; @@ -188,7 +188,8 @@ class Tet10Fixture namespace simple_fields { -class Tet10Fixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +Tet10Fixture { public: typedef double Scalar; diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TetFixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TetFixture.cpp index 4149b212cd92..9c3727874335 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TetFixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TetFixture.cpp @@ -68,11 +68,12 @@ TetFixture::TetFixture(MetaData& meta, m_bulk_data( bulk ), m_elem_parts( ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ), + m_coord_field( &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates") ), owns_mesh(false) { //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } TetFixture::TetFixture(stk::ParallelMachine pm, @@ -91,12 +92,13 @@ TetFixture::TetFixture(stk::ParallelMachine pm, m_meta(m_bulk_p->mesh_meta_data()), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("tet_part", stk::topology::TET_4) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } TetFixture::TetFixture(stk::ParallelMachine pm, @@ -116,12 +118,13 @@ TetFixture::TetFixture(stk::ParallelMachine pm, m_meta(m_bulk_p->mesh_meta_data()), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("tet_part", stk::topology::TET_4) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, coordsName) ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } @@ -230,7 +233,7 @@ void TetFixture::generate_mesh(std::vector & hex_range_on_this_processor size_t nx = 0, ny = 0, nz = 0; node_x_y_z(tet_nodes[i], nx, ny, nz); - Scalar * data = stk::mesh::field_data( m_coord_field , node ); + Scalar * data = stk::mesh::field_data( *m_coord_field , node ); coordMap.getNodeCoordinates(data, nx, ny, nz); } @@ -342,7 +345,6 @@ TetFixture::TetFixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("tet_part", stk::topology::TET_4) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: @@ -369,7 +371,6 @@ TetFixture::TetFixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("tet_part", stk::topology::TET_4) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); //put coord-field on all nodes: @@ -477,7 +478,7 @@ void TetFixture::generate_mesh(std::vector & hex_range_on_this_processor STK_ThrowRequireMsg( m_bulk_data.is_valid(node), "This process should know about the nodes that make up its element"); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, tet_nodes[i], node); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, tet_nodes[i], node); // Compute and assign coordinates to the node size_t nx = 0, ny = 0, nz = 0; @@ -543,7 +544,7 @@ void TetFixture::fill_node_map(int p_rank) tet_node[3] = elem_node[tet_vert[tet][3]]; for (size_t i = 0; i<4; ++i) { - AddToNodeProcsMMap(m_nodes_to_procs, tet_node[i] , p_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, tet_node[i] , p_rank); } } } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TetFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TetFixture.hpp index ca755690abd5..d54e74403e29 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TetFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TetFixture.hpp @@ -70,8 +70,10 @@ namespace fixtures { class TetFixture { public: - typedef double Scalar; - typedef Field CoordFieldType; + static std::string name() { return "TetFixture"; } + + typedef double Scalar; + typedef Field CoordFieldType; /** * Set up meta data to support this fixture. Meta data is left uncommitted @@ -115,15 +117,15 @@ class TetFixture return 6*(m_nx)*(m_ny)*(m_nz); } - private: +private: std::shared_ptr m_bulk_p; - public: +public: MetaData& m_meta; BulkData& m_bulk_data; PartVector m_elem_parts; PartVector m_node_parts; - CoordFieldType & m_coord_field ; + CoordFieldType * m_coord_field ; bool owns_mesh = true; stk::topology m_elem_topology = stk::topology::TET_4; stk::topology m_face_topology = stk::topology::TRI_3; @@ -181,8 +183,7 @@ class TetFixture // m_node_parts.insert(m_node_parts.end(), itr, itr + num); //} - private: - +private: typedef std::multimap NodeToProcsMMap; NodeToProcsMMap m_nodes_to_procs; @@ -196,7 +197,8 @@ class TetFixture namespace simple_fields { -class TetFixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +TetFixture { public: static std::string name() { return "TetFixture"; } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TriFixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TriFixture.cpp index c17a426dd227..a5a3e614b09e 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TriFixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TriFixture.cpp @@ -70,14 +70,15 @@ TriFixtureImpl::TriFixtureImpl(MetaData& meta, m_elem_parts( 1, &m_meta.declare_part_with_topology("tri_part", DIM == 2 ? stk::topology::TRI_3_2D : stk::topology::SHELL_TRI_3) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ), + m_coord_field( &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates") ), m_node_id_start(nid_start), m_elem_id_start(eid_start), m_elem_topology( DIM == 2 ? stk::topology::TRI_3_2D : stk::topology::SHELL_TRI_3), m_face_topology( DIM == 2 ? stk::topology::LINE_2 : stk::topology::TRI_3) { //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } template @@ -107,12 +108,14 @@ TriFixtureImpl::TriFixtureImpl(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("tri_part", DIM == 2 ? stk::topology::TRI_3_2D : stk::topology::SHELL_TRI_3) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, coordsName) ), m_elem_topology( DIM == 2 ? stk::topology::TRI_3_2D : stk::topology::SHELL_TRI_3), m_face_topology( DIM == 2 ? stk::topology::LINE_2 : stk::topology::TRI_3) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); + //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } @@ -162,7 +165,7 @@ void TriFixtureImpl::quad_x_y( EntityId entity_id, size_t &x , size_t &y ) template void TriFixtureImpl::generate_mesh(std::vector & quad_range_on_this_processor, - const CoordinateMapping & coordMap) + const CoordinateMapping & coordMap) { { //sort and unique the input elements @@ -216,7 +219,7 @@ void TriFixtureImpl::generate_mesh(std::vector & quad_range_on_this size_t nx = 0, ny = 0; node_x_y(tri_nodes[i], nx, ny); - Scalar * data = stk::mesh::field_data( m_coord_field , node ); + Scalar * data = stk::mesh::field_data( *m_coord_field , node ); // The CoordinateMappings are used for 2D and 3D so make sure we give it enough space to write to. std::array temp; @@ -235,7 +238,6 @@ void TriFixtureImpl::generate_mesh(std::vector & quad_range_on_this template void TriFixtureImpl::fill_node_map(int p_rank) { - std::vector element_ids_on_this_processor; const size_t p_size = m_bulk_data.parallel_size(); @@ -344,7 +346,6 @@ TriFixtureImpl::TriFixtureImpl(stk::ParallelMachine pm, m_elem_topology( DIM == 2 ? stk::topology::TRI_3_2D : stk::topology::SHELL_TRI_3), m_face_topology( DIM == 2 ? stk::topology::LINE_2 : stk::topology::TRI_3) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordsName); //put coord-field on all nodes: @@ -447,7 +448,7 @@ void TriFixtureImpl::generate_mesh(std::vector & quad_range_on_this STK_ThrowRequireMsg( m_bulk_data.is_valid(node), "This process should know about the nodes that make up its element"); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, tri_nodes[i], node); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, tri_nodes[i], node); // Compute and assign coordinates to the node size_t nx = 0, ny = 0; @@ -509,7 +510,7 @@ void TriFixtureImpl::fill_node_map(int p_rank) tri_node[2] = elem_node[tri_vert[tri][2]]; for (size_t i = 0; i<3; ++i) { - AddToNodeProcsMMap(m_nodes_to_procs, tri_node[i] , p_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, tri_node[i] , p_rank); } } } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TriFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TriFixture.hpp index e15be926f4c0..98d9e790795a 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TriFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/TriFixture.hpp @@ -68,8 +68,8 @@ template class TriFixtureImpl { public: - typedef double Scalar; - typedef Field CoordFieldType; + typedef double Scalar; + typedef Field CoordFieldType; /** * Set up meta data to support this fixture. Meta data is left uncommitted @@ -101,7 +101,7 @@ class TriFixtureImpl BulkData & m_bulk_data; PartVector m_elem_parts; PartVector m_node_parts; - CoordFieldType & m_coord_field ; + CoordFieldType * m_coord_field; const size_t m_node_id_start = 1; const size_t m_elem_id_start = 1; stk::topology m_elem_topology; @@ -270,8 +270,10 @@ class TriFixtureImpl } // impl -using TriFixture = impl::TriFixtureImpl<2>; -using TriShellFixture = impl::TriFixtureImpl<3>; +using TriFixture +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") = impl::TriFixtureImpl<2>; +using TriShellFixture +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") = impl::TriFixtureImpl<3>; } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/WedgeFixture.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/WedgeFixture.cpp index eb85030a9371..c2c62c97cc0d 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/WedgeFixture.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/WedgeFixture.cpp @@ -71,11 +71,12 @@ WedgeFixture::WedgeFixture(MetaData& meta, m_bulk_data( bulk ), m_elem_parts( ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ), + m_coord_field( &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates") ), owns_mesh(false) { //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } WedgeFixture::WedgeFixture(stk::ParallelMachine pm, @@ -94,12 +95,13 @@ WedgeFixture::WedgeFixture(stk::ParallelMachine pm, m_meta( m_bulk_p->mesh_meta_data() ), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("wedge_part", stk::topology::WEDGE_6) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, "Coordinates") ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } @@ -120,12 +122,13 @@ WedgeFixture::WedgeFixture(stk::ParallelMachine pm, m_meta( m_bulk_p->mesh_meta_data() ), m_bulk_data(*m_bulk_p), m_elem_parts( 1, &m_meta.declare_part_with_topology("wedge_part", stk::topology::WEDGE_6) ), - m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ), - m_coord_field( stk::mesh::legacy::declare_field(m_meta, stk::topology::NODE_RANK, coordinate_name) ) + m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { + m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordinate_name); //put coord-field on all nodes: - put_field_on_mesh(m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + put_field_on_mesh(*m_coord_field, m_meta.universal_part(), m_spatial_dimension, nullptr); + stk::io::set_field_output_type(*m_coord_field, stk::io::FieldOutputType::VECTOR_3D); } @@ -233,7 +236,7 @@ void WedgeFixture::generate_mesh(std::vector & hex_range_on_this_process size_t nx = 0, ny = 0, nz = 0; node_x_y_z(wedge_nodes[i], nx, ny, nz); - Scalar * data = stk::mesh::field_data( m_coord_field , node ); + Scalar * data = stk::mesh::field_data( *m_coord_field , node ); coordMap.getNodeCoordinates(data, nx, ny, nz); } @@ -342,7 +345,6 @@ WedgeFixture::WedgeFixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("wedge_part", stk::topology::WEDGE_6) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, "Coordinates"); //put coord-field on all nodes: @@ -370,7 +372,6 @@ WedgeFixture::WedgeFixture(stk::ParallelMachine pm, m_elem_parts( 1, &m_meta.declare_part_with_topology("wedge_part", stk::topology::WEDGE_6) ), m_node_parts( 1, &m_meta.declare_part_with_topology("node_part", stk::topology::NODE) ) { - m_meta.use_simple_fields(); m_coord_field = &m_meta.declare_field(stk::topology::NODE_RANK, coordinate_name); //put coord-field on all nodes: @@ -477,7 +478,7 @@ void WedgeFixture::generate_mesh(std::vector & hex_range_on_this_process STK_ThrowRequireMsg( m_bulk_data.is_valid(node), "This process should know about the nodes that make up its element"); - DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, wedge_nodes[i], node); + stk::mesh::fixtures::DoAddNodeSharings(m_bulk_data, m_nodes_to_procs, wedge_nodes[i], node); // Compute and assign coordinates to the node size_t nx = 0, ny = 0, nz = 0; @@ -540,7 +541,7 @@ void WedgeFixture::fill_node_map(int p_rank) wedge_node[5] = elem_node[wedge_vert[wed][5]]; for (size_t i = 0; i<6; ++i) { - AddToNodeProcsMMap(m_nodes_to_procs, wedge_node[i] , p_rank); + stk::mesh::fixtures::AddToNodeProcsMMap(m_nodes_to_procs, wedge_node[i] , p_rank); } } } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/WedgeFixture.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/WedgeFixture.hpp index affe72174af4..8f57a4444c24 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/WedgeFixture.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/WedgeFixture.hpp @@ -65,8 +65,9 @@ namespace fixtures { class WedgeFixture { public: - typedef double Scalar; - typedef Field CoordFieldType; + static std::string name() { return "WedgeFixture"; } + typedef double Scalar; + typedef Field CoordFieldType; /** * Set up meta data to support this fixture. Meta data is left uncommitted @@ -118,7 +119,7 @@ class WedgeFixture BulkData& m_bulk_data; PartVector m_elem_parts; PartVector m_node_parts; - CoordFieldType & m_coord_field; + CoordFieldType * m_coord_field; bool owns_mesh = true; stk::topology m_elem_topology = stk::topology::WEDGE_6; @@ -179,7 +180,6 @@ class WedgeFixture //} private: - typedef std::multimap NodeToProcsMMap; NodeToProcsMMap m_nodes_to_procs; @@ -193,7 +193,8 @@ class WedgeFixture namespace simple_fields { -class WedgeFixture +class STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this class instead") +WedgeFixture { public: static std::string name() { return "WedgeFixture"; } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/degenerate_mesh.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/degenerate_mesh.cpp index 8244d83667f1..3fe85c257c9a 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/degenerate_mesh.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/degenerate_mesh.cpp @@ -127,7 +127,7 @@ static const stk::mesh::EntityIdVector hex_node_ids[number_hex] { void degenerate_mesh_bulk_data(stk::mesh::BulkData & bulk_data, const VectorFieldType & node_coord) { - static const char method[] = "stk_mesh::fixtures::heterogenous_mesh_bulk_data" ; + static const char method[] = "stk_mesh::fixtures::heterogenous_mesh_bulk_data"; bulk_data.modification_begin(); diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/degenerate_mesh.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/degenerate_mesh.hpp index 218e30be632b..09fe6a703d67 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/degenerate_mesh.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/degenerate_mesh.hpp @@ -44,7 +44,7 @@ class MetaData; class BulkData; namespace fixtures { -typedef mesh::Field VectorFieldType ; +typedef mesh::Field VectorFieldType; void degenerate_mesh_meta_data(stk::mesh::MetaData & meta_data, VectorFieldType & node_coord); @@ -54,8 +54,10 @@ namespace simple_fields { typedef mesh::Field VectorFieldType; +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void degenerate_mesh_meta_data(stk::mesh::MetaData & meta_data, VectorFieldType & node_coord); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void degenerate_mesh_bulk_data(stk::mesh::BulkData & bulk_data, const VectorFieldType & node_coord); } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/heterogeneous_mesh.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/heterogeneous_mesh.cpp index 8c841ac64da4..08c0949806f0 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/heterogeneous_mesh.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/heterogeneous_mesh.cpp @@ -66,7 +66,7 @@ void heterogeneous_mesh_meta_data(stk::mesh::MetaData & meta_data, const VectorF stk::io::put_io_part_attribute(meta_data.declare_part_with_topology("pyramids", stk::topology::PYRAMID_5)); stk::io::put_io_part_attribute(meta_data.declare_part_with_topology("quad_shells", stk::topology::SHELL_QUAD_4)); stk::io::put_io_part_attribute(meta_data.declare_part_with_topology("tri_shells", stk::topology::SHELL_TRI_3)); - + const stk::mesh::FieldBase::Restriction & res = stk::mesh::find_restriction(node_coord, stk::topology::NODE_RANK , universal ); if ( res.num_scalars_per_entity() != 3 ) { @@ -225,7 +225,7 @@ void heterogeneous_mesh_bulk_data(stk::mesh::BulkData & bulk_data, const VectorF for ( unsigned i = 0 ; i < number_shell_tri ; ++i , ++elem_id ) { stk::mesh::declare_element( bulk_data, tri_shell_block, elem_id, shell_tri_node_ids[i] ); } - + for ( unsigned i = 0 ; i < node_count ; ++i ) { stk::mesh::Entity const node = bulk_data.get_entity( stk::topology::NODE_RANK , i + 1 ); diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/heterogeneous_mesh.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/heterogeneous_mesh.hpp index b1488123e7cd..1dbd4f12fd7a 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/heterogeneous_mesh.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_mesh_fixtures/heterogeneous_mesh.hpp @@ -44,7 +44,7 @@ class MetaData; class BulkData; namespace fixtures { -typedef mesh::Field VectorFieldType; +typedef mesh::Field VectorFieldType; void heterogeneous_mesh_meta_data(stk::mesh::MetaData & meta_data, const VectorFieldType & node_coord); @@ -53,8 +53,10 @@ void heterogeneous_mesh_bulk_data(stk::mesh::BulkData & bulk_data, const VectorF namespace simple_fields { typedef mesh::Field VectorFieldType; +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void heterogeneous_mesh_meta_data(stk::mesh::MetaData & meta_data, const VectorFieldType & node_coord); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") void heterogeneous_mesh_bulk_data(stk::mesh::BulkData & bulk_data, const VectorFieldType & node_coord); } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_transfer_fixtures/CMakeLists.txt b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_transfer_fixtures/CMakeLists.txt index 4b6bba7e36c7..e104df98f276 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_transfer_fixtures/CMakeLists.txt +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stk_transfer_fixtures/CMakeLists.txt @@ -49,25 +49,29 @@ IF(HAVE_STK_Trilinos) NOINSTALLHEADERS ${HEADERS} SOURCES ${SOURCES} ${Gtest_NO_INSTALL_LIB_OR_HEADERS_ARG} - ) + ) ELSE() add_library(stk_transfer_fixtures ${SOURCES}) target_link_libraries(stk_transfer_fixtures PUBLIC stk_middle_mesh) target_include_directories(stk_transfer_fixtures PUBLIC - $ - $ + $ + $ ) target_include_directories(stk_transfer_fixtures PUBLIC - $ - $ + $ + $ ) target_include_directories(stk_transfer_fixtures PUBLIC - $ - $ + $ + $ ) + find_package(GTest) + target_link_libraries(stk_transfer_fixtures PUBLIC stk_unit_test_utils) + + INSTALL(TARGETS stk_transfer_fixtures EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) ENDIF() INSTALL(FILES ${HEADERS} DESTINATION - ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_unit_test_utils/stk_transfer_fixtures) + ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_unit_test_utils/stk_transfer_fixtures) diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stringAndNumberComparisons.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stringAndNumberComparisons.cpp index a34c1bf4ab23..f9fb472b1e42 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stringAndNumberComparisons.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stringAndNumberComparisons.cpp @@ -51,6 +51,11 @@ bool areStringsEqualWithToleranceForNumbers(const std::string &expectedString, c namespace simple_fields { +bool isNear(double a, double b, double tolerance) +{ + return stk::unit_test_util::isNear(a, b, tolerance); +} + bool approximatelyEqualAsNumbers(const std::string &expectedWord, const std::string &actualWord, double tol) { return stk::unit_test_util::approximatelyEqualAsNumbers(expectedWord, actualWord, tol); } diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stringAndNumberComparisons.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stringAndNumberComparisons.hpp index 8145e5a64d3a..5c59c6e9546f 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stringAndNumberComparisons.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/stringAndNumberComparisons.hpp @@ -34,6 +34,7 @@ #ifndef STRING_AND_NUMBER_COMPARISONS_HPP #define STRING_AND_NUMBER_COMPARISONS_HPP +#include "stk_util/stk_config.h" #include #include @@ -57,13 +58,13 @@ bool areStringsEqualWithToleranceForNumbers(const std::string &expectedString, c namespace simple_fields { -inline bool isNear(double a, double b, double tolerance) -{ - return stk::unit_test_util::isNear(a, b, tolerance); -} +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") +bool isNear(double a, double b, double tolerance); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") bool approximatelyEqualAsNumbers(const std::string &expectedWord, const std::string &actualWord, double tol); +STK_DEPRECATED_MSG("Please use the non-simple_fields-namespaced version of this function instead") bool areStringsEqualWithToleranceForNumbers(const std::string &expectedString, const std::string &actualString, double tol); } // namespace simple_fields diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/timer.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/timer.hpp index aedd101775cf..5d6317308af9 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/timer.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/timer.hpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,7 +30,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #ifndef STK_PERFORMANCE_TIMER_HPP #define STK_PERFORMANCE_TIMER_HPP @@ -100,7 +100,7 @@ class BatchTimer minBatchTime(std::numeric_limits::max()), batchBaselineHwm(0), batchBaselineGpuUsage(0), - maxBatchHwm(1) + minBatchHwm(std::numeric_limits::max()) { } void initialize_batch_timer() @@ -131,13 +131,13 @@ class BatchTimer } size_t batchCpuMemUsage = stk::get_max_hwm_across_procs(communicator) - batchBaselineHwm; size_t batchHwm = batchGpuMemUsage + batchCpuMemUsage; - maxBatchHwm = std::max(maxBatchHwm, batchHwm); + minBatchHwm = std::min(minBatchHwm, batchHwm); } void print_batch_timing(unsigned iterationCount, size_t userProvidedHwm = 0) { - size_t hwmToPrint = userProvidedHwm > 0 ? userProvidedHwm : maxBatchHwm; + size_t hwmToPrint = userProvidedHwm > 0 ? userProvidedHwm : minBatchHwm; stk::print_stats_for_performance_compare(std::cout, minBatchTime, hwmToPrint, iterationCount, communicator); } @@ -160,7 +160,7 @@ class BatchTimer double minBatchTime; size_t batchBaselineHwm; size_t batchBaselineGpuUsage; - size_t maxBatchHwm; + size_t minBatchHwm; std::vector batchBaselineMemBuffer; }; diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/unittestMeshUtils.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/unittestMeshUtils.hpp index 3450f6b623ed..5634263a66f8 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/unittestMeshUtils.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/unittestMeshUtils.hpp @@ -63,14 +63,18 @@ void put_elements_into_part(stk::mesh::BulkData& bulkData, const std::vector & entries); } // namespace simple_fields diff --git a/packages/stk/stk_unit_tests/stk_balance/MeshFixtureDecomposer.hpp b/packages/stk/stk_unit_tests/stk_balance/MeshFixtureDecomposer.hpp index 0b6449a5e2f8..7b65765f9ad8 100644 --- a/packages/stk/stk_unit_tests/stk_balance/MeshFixtureDecomposer.hpp +++ b/packages/stk/stk_unit_tests/stk_balance/MeshFixtureDecomposer.hpp @@ -43,7 +43,7 @@ #include #include -class MeshFixtureDecomposer : public stk::unit_test_util::simple_fields::MeshFixture +class MeshFixtureDecomposer : public stk::unit_test_util::MeshFixture { protected: MeshFixtureDecomposer() diff --git a/packages/stk/stk_unit_tests/stk_balance/MeshFixtureM2NDecomposer.hpp b/packages/stk/stk_unit_tests/stk_balance/MeshFixtureM2NDecomposer.hpp index e0b074c91eef..504393a7d6c5 100644 --- a/packages/stk/stk_unit_tests/stk_balance/MeshFixtureM2NDecomposer.hpp +++ b/packages/stk/stk_unit_tests/stk_balance/MeshFixtureM2NDecomposer.hpp @@ -44,7 +44,7 @@ #include #include -class MeshFixtureM2NDecomposer : public stk::unit_test_util::simple_fields::MeshFixture +class MeshFixtureM2NDecomposer : public stk::unit_test_util::MeshFixture { protected: MeshFixtureM2NDecomposer() diff --git a/packages/stk/stk_unit_tests/stk_balance/MeshFixtureM2NRebalance.hpp b/packages/stk/stk_unit_tests/stk_balance/MeshFixtureM2NRebalance.hpp index 451574ac235d..35d153097033 100644 --- a/packages/stk/stk_unit_tests/stk_balance/MeshFixtureM2NRebalance.hpp +++ b/packages/stk/stk_unit_tests/stk_balance/MeshFixtureM2NRebalance.hpp @@ -62,7 +62,7 @@ struct AssemblyGrouping { }; -class MeshFixtureM2NRebalance : public stk::unit_test_util::simple_fields::MeshFixture +class MeshFixtureM2NRebalance : public stk::unit_test_util::MeshFixture { protected: MeshFixtureM2NRebalance() @@ -83,7 +83,7 @@ class MeshFixtureM2NRebalance : public stk::unit_test_util::simple_fields::MeshF void setup_initial_mesh(const std::string & inputMeshSpec) { - stk::unit_test_util::simple_fields::generated_mesh_to_file_in_serial(inputMeshSpec, get_input_file_name()); + stk::unit_test_util::generated_mesh_to_file_in_serial(inputMeshSpec, get_input_file_name()); read_serial_mesh_with_auto_decomp(); } @@ -91,7 +91,7 @@ class MeshFixtureM2NRebalance : public stk::unit_test_util::simple_fields::MeshF const std::vector& originalTopologies) { if (get_parallel_rank() == 0) { - stk::unit_test_util::simple_fields::TextMeshToFile tMesh(MPI_COMM_SELF, stk::mesh::BulkData::AUTO_AURA); + stk::unit_test_util::TextMeshToFile tMesh(MPI_COMM_SELF, stk::mesh::BulkData::AUTO_AURA); tMesh.setup_mesh(inputMeshDesc, get_input_file_name()); for (const OriginalTopology & ot : originalTopologies) { @@ -108,7 +108,7 @@ class MeshFixtureM2NRebalance : public stk::unit_test_util::simple_fields::MeshF const std::vector& assemblies) { if (get_parallel_rank() == 0) { - stk::unit_test_util::simple_fields::TextMeshToFile tMesh(MPI_COMM_SELF, stk::mesh::BulkData::AUTO_AURA); + stk::unit_test_util::TextMeshToFile tMesh(MPI_COMM_SELF, stk::mesh::BulkData::AUTO_AURA); tMesh.setup_mesh(inputMeshDesc, get_input_file_name()); for (const AssemblyGrouping & ag : assemblies) { @@ -131,7 +131,7 @@ class MeshFixtureM2NRebalance : public stk::unit_test_util::simple_fields::MeshF void setup_initial_mesh_textmesh(const std::string & inputMeshDesc) { - stk::unit_test_util::simple_fields::text_mesh_to_file_in_serial(inputMeshDesc, get_input_file_name()); + stk::unit_test_util::text_mesh_to_file_in_serial(inputMeshDesc, get_input_file_name()); allocate_bulk(stk::mesh::BulkData::AUTO_AURA); m_ioBroker.property_add(Ioss::Property("DECOMPOSITION_METHOD", "RCB")); @@ -143,7 +143,7 @@ class MeshFixtureM2NRebalance : public stk::unit_test_util::simple_fields::MeshF m_transientTimeSteps = {0.0, 1.0, 2.0}; m_transientFieldName = "transient_field"; m_globalVariableName = "global_variable"; - stk::unit_test_util::simple_fields::generated_mesh_with_transient_data_to_file_in_serial(inputMeshSpec, + stk::unit_test_util::generated_mesh_with_transient_data_to_file_in_serial(inputMeshSpec, get_input_file_name(), m_transientFieldName, stk::topology::NODE_RANK, @@ -191,10 +191,10 @@ class MeshFixtureM2NRebalance : public stk::unit_test_util::simple_fields::MeshF if (get_parallel_rank() == 0) { for (size_t i = 0; i < elemsPerProc.size(); ++i) { - stk::unit_test_util::simple_fields::MeshFromFile finalMesh(MPI_COMM_SELF); + stk::unit_test_util::MeshFromFile finalMesh(MPI_COMM_SELF); finalMesh.fill_from_serial(get_subdomain_filename(elemsPerProc.size(), i)); - stk::unit_test_util::simple_fields::TransientVerifier verifier(MPI_COMM_SELF); + stk::unit_test_util::TransientVerifier verifier(MPI_COMM_SELF); verifier.verify_time_steps(finalMesh, m_transientTimeSteps); verifier.verify_global_variables_at_each_time_step(finalMesh, m_globalVariableName, m_transientTimeSteps); verifier.verify_num_transient_fields(finalMesh, 2); @@ -230,7 +230,7 @@ class MeshFixtureM2NRebalance : public stk::unit_test_util::simple_fields::MeshF { if (get_parallel_rank() == 0) { for (size_t subdomain = 0; subdomain < m_numFinalProcs; ++subdomain) { - stk::unit_test_util::simple_fields::MeshFromFile finalMesh(MPI_COMM_SELF); + stk::unit_test_util::MeshFromFile finalMesh(MPI_COMM_SELF); finalMesh.fill_from_serial(get_subdomain_filename(m_numFinalProcs, subdomain)); for (const AssemblyGrouping & ag : expectedAssemblies) { diff --git a/packages/stk/stk_unit_tests/stk_balance/MeshFixtureRebalance.hpp b/packages/stk/stk_unit_tests/stk_balance/MeshFixtureRebalance.hpp index 6a8020a731b9..17dba076216f 100644 --- a/packages/stk/stk_unit_tests/stk_balance/MeshFixtureRebalance.hpp +++ b/packages/stk/stk_unit_tests/stk_balance/MeshFixtureRebalance.hpp @@ -62,7 +62,7 @@ struct AssemblyGrouping { }; -class MeshFixtureRebalance : public stk::unit_test_util::simple_fields::MeshFixture +class MeshFixtureRebalance : public stk::unit_test_util::MeshFixture { protected: MeshFixtureRebalance() @@ -85,7 +85,7 @@ class MeshFixtureRebalance : public stk::unit_test_util::simple_fields::MeshFixt void setup_initial_mesh(const std::string & inputMeshSpec) { - stk::unit_test_util::simple_fields::generated_mesh_to_file_in_serial(inputMeshSpec, get_input_file_name()); + stk::unit_test_util::generated_mesh_to_file_in_serial(inputMeshSpec, get_input_file_name()); read_serial_mesh_with_auto_decomp(); } @@ -105,7 +105,7 @@ class MeshFixtureRebalance : public stk::unit_test_util::simple_fields::MeshFixt const std::vector& originalTopologies) { if (get_parallel_rank() == 0) { - stk::unit_test_util::simple_fields::TextMeshToFile tMesh(MPI_COMM_SELF, stk::mesh::BulkData::AUTO_AURA); + stk::unit_test_util::TextMeshToFile tMesh(MPI_COMM_SELF, stk::mesh::BulkData::AUTO_AURA); tMesh.setup_mesh(inputMeshDesc, get_input_file_name()); for (const OriginalTopology & ot : originalTopologies) { @@ -122,7 +122,7 @@ class MeshFixtureRebalance : public stk::unit_test_util::simple_fields::MeshFixt const std::vector& assemblies) { if (get_parallel_rank() == 0) { - stk::unit_test_util::simple_fields::TextMeshToFile tMesh(MPI_COMM_SELF, stk::mesh::BulkData::AUTO_AURA); + stk::unit_test_util::TextMeshToFile tMesh(MPI_COMM_SELF, stk::mesh::BulkData::AUTO_AURA); tMesh.setup_mesh(inputMeshDesc, get_input_file_name()); for (const AssemblyGrouping & ag : assemblies) { @@ -145,7 +145,7 @@ class MeshFixtureRebalance : public stk::unit_test_util::simple_fields::MeshFixt void setup_initial_mesh_textmesh(const std::string & inputMeshDesc) { - stk::unit_test_util::simple_fields::text_mesh_to_file_in_serial(inputMeshDesc, get_input_file_name()); + stk::unit_test_util::text_mesh_to_file_in_serial(inputMeshDesc, get_input_file_name()); allocate_bulk(stk::mesh::BulkData::AUTO_AURA); m_ioBroker.property_add(Ioss::Property("DECOMPOSITION_METHOD", "RCB")); @@ -157,7 +157,7 @@ class MeshFixtureRebalance : public stk::unit_test_util::simple_fields::MeshFixt m_transientTimeSteps = {0.0, 1.0, 2.0}; m_transientFieldName = "transient_field"; m_globalVariableName = "global_variable"; - stk::unit_test_util::simple_fields::generated_mesh_with_transient_data_to_file_in_serial(inputMeshSpec, + stk::unit_test_util::generated_mesh_with_transient_data_to_file_in_serial(inputMeshSpec, get_input_file_name(), m_transientFieldName, stk::topology::NODE_RANK, @@ -204,10 +204,10 @@ class MeshFixtureRebalance : public stk::unit_test_util::simple_fields::MeshFixt if (get_parallel_rank() == 0) { for (size_t i = 0; i < elemsPerProc.size(); ++i) { - stk::unit_test_util::simple_fields::MeshFromFile finalMesh(MPI_COMM_SELF); + stk::unit_test_util::MeshFromFile finalMesh(MPI_COMM_SELF); finalMesh.fill_from_serial(get_subdomain_filename(elemsPerProc.size(), i)); - stk::unit_test_util::simple_fields::TransientVerifier verifier(MPI_COMM_SELF); + stk::unit_test_util::TransientVerifier verifier(MPI_COMM_SELF); verifier.verify_time_steps(finalMesh, m_transientTimeSteps); verifier.verify_global_variables_at_each_time_step(finalMesh, m_globalVariableName, m_transientTimeSteps); verifier.verify_num_transient_fields(finalMesh, 2); @@ -243,7 +243,7 @@ class MeshFixtureRebalance : public stk::unit_test_util::simple_fields::MeshFixt { if (get_parallel_rank() == 0) { for (size_t subdomain = 0; subdomain < m_balanceSettings.get_num_output_processors(); ++subdomain) { - stk::unit_test_util::simple_fields::MeshFromFile finalMesh(MPI_COMM_SELF); + stk::unit_test_util::MeshFromFile finalMesh(MPI_COMM_SELF); finalMesh.fill_from_serial(get_subdomain_filename(m_balanceSettings.get_num_output_processors(), subdomain)); for (const AssemblyGrouping & ag : expectedAssemblies) { diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestBalanceFromField.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestBalanceFromField.cpp index 6eb811e1b2dc..5099e07f7711 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestBalanceFromField.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestBalanceFromField.cpp @@ -91,7 +91,7 @@ class BalanceFromField : public MeshFixtureRebalance m_transientTimeSteps = {0.0, 1.0, 2.0}; m_transientFieldName = "weight_field"; m_globalVariableName = "global_variable"; - stk::unit_test_util::simple_fields::generated_mesh_with_transient_data_to_file_in_serial(inputMeshSpec, + stk::unit_test_util::generated_mesh_with_transient_data_to_file_in_serial(inputMeshSpec, get_input_file_name(), m_transientFieldName, stk::topology::ELEM_RANK, diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestBalanceNodes.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestBalanceNodes.cpp index 4c7181c45811..2d23297bd802 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestBalanceNodes.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestBalanceNodes.cpp @@ -7,7 +7,7 @@ namespace { -class TestBalanceNodes : public stk::unit_test_util::simple_fields::MeshFixture +class TestBalanceNodes : public stk::unit_test_util::MeshFixture { protected: }; diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestBlockWeights.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestBlockWeights.cpp index 0be15648a19c..e022f8b00b6e 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestBlockWeights.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestBlockWeights.cpp @@ -59,7 +59,7 @@ class BalanceSettingsTester : public stk::balance::GraphCreationSettings using BlockWeightsMap = std::map; -class TestBlockWeights : public stk::unit_test_util::simple_fields::MeshFixture +class TestBlockWeights : public stk::unit_test_util::MeshFixture { protected: void set_up_1x1x8_mesh_one_block() diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestBoundingBoxSearch.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestBoundingBoxSearch.cpp index 205b87b308e7..b756682fe826 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestBoundingBoxSearch.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestBoundingBoxSearch.cpp @@ -7,7 +7,7 @@ #include "stk_util/parallel/ParallelReduce.hpp" #include "stk_util/parallel/ParallelReduceBool.hpp" -class BoundingBoxSearch : public stk::unit_test_util::simple_fields::MeshFixture +class BoundingBoxSearch : public stk::unit_test_util::MeshFixture { protected: void make_4_unit_quad_shell_connected_mesh() @@ -45,8 +45,8 @@ class BoundingBoxSearch : public stk::unit_test_util::simple_fields::MeshFixture 0,1,0, 1,1,0, 2,1,0, 3,1,0, 4,1,0 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void make_4_unit_quad_shell_single_gap_mesh(double gapSize) @@ -84,8 +84,8 @@ class BoundingBoxSearch : public stk::unit_test_util::simple_fields::MeshFixture 0,1,0, 1,1,0, 2,1,0, 2+gapSize,1,0, 3+gapSize,1,0, 4+gapSize,1,0 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void make_4_unit_quad_shell_all_gap_mesh(double gapSize) @@ -123,8 +123,8 @@ class BoundingBoxSearch : public stk::unit_test_util::simple_fields::MeshFixture 0,1,0, 1,1,0, 1+gapSize,1,0, 2+gapSize,1,0, 2+2*gapSize,1,0, 3+2*gapSize,1,0, 3+3*gapSize,1,0, 4+3*gapSize,1,0 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void make_6_unit_tri_shell_connected_mesh() @@ -170,8 +170,8 @@ class BoundingBoxSearch : public stk::unit_test_util::simple_fields::MeshFixture 0,1,0, 1,1,0, 2,1,0, 3,1,0 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void make_6_unit_tri_shell_single_gap_mesh(double gapSize) @@ -218,8 +218,8 @@ class BoundingBoxSearch : public stk::unit_test_util::simple_fields::MeshFixture 0,1,0, 1,1,0, 1+gapSize,1,0, 2+gapSize,1,0, 3+gapSize,1,0 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void make_4_unit_hex_single_gap_mesh(double gapSize) @@ -261,8 +261,8 @@ class BoundingBoxSearch : public stk::unit_test_util::simple_fields::MeshFixture 4+gapSize,0,0, 4+gapSize,1,0, 4+gapSize,1,1, 4+gapSize,0,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void make_two_particle_mesh() @@ -280,8 +280,8 @@ class BoundingBoxSearch : public stk::unit_test_util::simple_fields::MeshFixture } std::vector coordinates = { 0,0,0, 2,0,0 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void make_particle_unit_quad_shell_mesh() @@ -299,8 +299,8 @@ class BoundingBoxSearch : public stk::unit_test_util::simple_fields::MeshFixture } std::vector coordinates = { 0,0,0, 1,0,0, 1,1,0, 0,1,0, 2,0,0 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } bool check_edges(const std::vector & graphEdges, @@ -350,7 +350,7 @@ TEST_F(BoundingBoxSearch, fourUnitQuadShell_connected) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.7); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -376,7 +376,7 @@ TEST_F(BoundingBoxSearch, fourUnitQuadShell_tooLargeTolerance) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(1.1); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -402,7 +402,7 @@ TEST_F(BoundingBoxSearch, fourUnitQuadShell_smallGap) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.7); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -429,7 +429,7 @@ TEST_F(BoundingBoxSearch, fourUnitQuadShell_largeGap) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.6); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -456,7 +456,7 @@ TEST_F(BoundingBoxSearch, fourUnitQuadShell_allGaps) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.7); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -483,7 +483,7 @@ TEST_F(BoundingBoxSearch, sixUnitTriShell_connected) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.7); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -509,7 +509,7 @@ TEST_F(BoundingBoxSearch, sixUnitTriShell_smallGap) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.09); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -536,7 +536,7 @@ TEST_F(BoundingBoxSearch, sixUnitTriShell_largeGap) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.1); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -575,7 +575,7 @@ TEST_F(BoundingBoxSearch, fourUnitHex_smallGap) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.7); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -602,7 +602,7 @@ TEST_F(BoundingBoxSearch, particleParticle_smallTolerance) // if (stk::parallel_machine_size(get_comm()) > 2) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForParticleSearch(2.1); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -628,7 +628,7 @@ TEST_F(BoundingBoxSearch, particleParticle_largeTolerance) // if (stk::parallel_machine_size(get_comm()) > 2) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForParticleSearch(4.1); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -654,7 +654,7 @@ TEST_F(BoundingBoxSearch, particleQuadShell_smallTolerance) // if (stk::parallel_machine_size(get_comm()) > 2) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.6); balanceSettings.setToleranceForParticleSearch(1.2); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); @@ -681,7 +681,7 @@ TEST_F(BoundingBoxSearch, particleQuadShell_smallParticleTolerance) // if (stk::parallel_machine_size(get_comm()) > 2) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(1.1); balanceSettings.setToleranceForParticleSearch(1.2); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); @@ -708,7 +708,7 @@ TEST_F(BoundingBoxSearch, particleQuadShell_smallFaceTolerance) // if (stk::parallel_machine_size(get_comm()) > 2) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.6); balanceSettings.setToleranceForParticleSearch(2.2); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); @@ -735,7 +735,7 @@ TEST_F(BoundingBoxSearch, particleQuadShell_largeTolerance) // if (stk::parallel_machine_size(get_comm()) > 2) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(1.1); balanceSettings.setToleranceForParticleSearch(2.2); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestBoundingBoxSearch2D.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestBoundingBoxSearch2D.cpp index 1535741c9ab1..891357a3c2f5 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestBoundingBoxSearch2D.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestBoundingBoxSearch2D.cpp @@ -7,7 +7,7 @@ #include "stk_util/parallel/ParallelReduce.hpp" #include "stk_util/parallel/ParallelReduceBool.hpp" -class BoundingBoxSearch2D : public stk::unit_test_util::simple_fields::MeshFixture2D +class BoundingBoxSearch2D : public stk::unit_test_util::MeshFixture2D { protected: void make_4_unit_quad_connected_mesh() @@ -45,8 +45,8 @@ class BoundingBoxSearch2D : public stk::unit_test_util::simple_fields::MeshFixtu 0,1, 1,1, 2,1, 3,1, 4,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void make_4_unit_quad_single_gap_mesh(double gapSize) @@ -84,8 +84,8 @@ class BoundingBoxSearch2D : public stk::unit_test_util::simple_fields::MeshFixtu 0,1, 1,1, 2,1, 2+gapSize,1, 3+gapSize,1, 4+gapSize,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void make_4_unit_quad_all_gap_mesh(double gapSize) @@ -123,8 +123,8 @@ class BoundingBoxSearch2D : public stk::unit_test_util::simple_fields::MeshFixtu 0,1, 1,1, 1+gapSize,1, 2+gapSize,1, 2+2*gapSize,1, 3+2*gapSize,1, 3+3*gapSize,1, 4+3*gapSize,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void make_6_unit_tri_connected_mesh() @@ -170,8 +170,8 @@ class BoundingBoxSearch2D : public stk::unit_test_util::simple_fields::MeshFixtu 0,1, 1,1, 2,1, 3,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void make_6_unit_tri_single_gap_mesh(double gapSize) @@ -217,8 +217,8 @@ class BoundingBoxSearch2D : public stk::unit_test_util::simple_fields::MeshFixtu 0,1, 1,1, 1+gapSize,1, 2+gapSize,1, 3+gapSize,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void make_two_particle_mesh() @@ -236,8 +236,8 @@ class BoundingBoxSearch2D : public stk::unit_test_util::simple_fields::MeshFixtu } std::vector coordinates = { 0,0, 2,0 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void make_particle_unit_quad_mesh() @@ -255,8 +255,8 @@ class BoundingBoxSearch2D : public stk::unit_test_util::simple_fields::MeshFixtu } std::vector coordinates = { 0,0, 1,0, 1,1, 0,1, 2,0 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } bool check_edges(const std::vector & graphEdges, @@ -306,7 +306,7 @@ TEST_F(BoundingBoxSearch2D, fourUnitQuad_connected) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.7); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -332,7 +332,7 @@ TEST_F(BoundingBoxSearch2D, fourUnitQuad_tooLargeTolerance) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(1.1); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -358,7 +358,7 @@ TEST_F(BoundingBoxSearch2D, fourUnitQuad_smallGap) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.7); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -385,7 +385,7 @@ TEST_F(BoundingBoxSearch2D, fourUnitQuad_largeGap) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.6); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -412,7 +412,7 @@ TEST_F(BoundingBoxSearch2D, fourUnitQuad_allGaps) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.7); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -439,7 +439,7 @@ TEST_F(BoundingBoxSearch2D, sixUnitTri_connected) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.7); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -465,7 +465,7 @@ TEST_F(BoundingBoxSearch2D, sixUnitTri_smallGap) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.09); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -492,7 +492,7 @@ TEST_F(BoundingBoxSearch2D, sixUnitTri_largeGap) // if (stk::parallel_machine_size(get_comm()) > 4) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.1); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -519,7 +519,7 @@ TEST_F(BoundingBoxSearch2D, particleParticle_smallTolerance) // if (stk::parallel_machine_size(get_comm()) > 2) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForParticleSearch(2.1); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -545,7 +545,7 @@ TEST_F(BoundingBoxSearch2D, particleParticle_largeTolerance) // if (stk::parallel_machine_size(get_comm()) > 2) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForParticleSearch(4.1); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); get_meta().set_coordinate_field_name(balanceSettings.getCoordinateFieldName()); @@ -571,7 +571,7 @@ TEST_F(BoundingBoxSearch2D, particleQuad_smallTolerance) // if (stk::parallel_machine_size(get_comm()) > 2) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.6); balanceSettings.setToleranceForParticleSearch(1.2); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); @@ -598,7 +598,7 @@ TEST_F(BoundingBoxSearch2D, particleQuad_smallParticleTolerance) // if (stk::parallel_machine_size(get_comm()) > 2) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(1.1); balanceSettings.setToleranceForParticleSearch(1.2); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); @@ -625,7 +625,7 @@ TEST_F(BoundingBoxSearch2D, particleQuad_smallFaceTolerance) // if (stk::parallel_machine_size(get_comm()) > 2) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(0.6); balanceSettings.setToleranceForParticleSearch(2.2); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); @@ -652,7 +652,7 @@ TEST_F(BoundingBoxSearch2D, particleQuad_largeTolerance) // if (stk::parallel_machine_size(get_comm()) > 2) return; - stk::unit_test_util::simple_fields::StkBalanceUnitTestSettings balanceSettings; + stk::unit_test_util::StkBalanceUnitTestSettings balanceSettings; balanceSettings.setToleranceForFaceSearch(1.1); balanceSettings.setToleranceForParticleSearch(2.2); setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestColoring.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestColoring.cpp index 8945a43b9a89..8de5407a1db8 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestColoring.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestColoring.cpp @@ -19,7 +19,7 @@ namespace { using stk::unit_test_util::build_mesh; -class BasicColoring : public stk::unit_test_util::simple_fields::MeshFixture {}; +class BasicColoring : public stk::unit_test_util::MeshFixture {}; void test_adjacent_elements_have_different_coloring(const stk::mesh::BulkData& bulk) { @@ -143,15 +143,15 @@ TEST(ColorByTopology, colorHeterogeneousMesh) std::shared_ptr bulk = build_mesh(3, MPI_COMM_WORLD); stk::mesh::MetaData& meta = bulk->mesh_meta_data(); - stk::mesh::fixtures::simple_fields::VectorFieldType & node_coord = meta.declare_field(stk::topology::NODE_RANK, + stk::mesh::fixtures::VectorFieldType & node_coord = meta.declare_field(stk::topology::NODE_RANK, "coordinates"); stk::mesh::put_field_on_mesh(node_coord, meta.universal_part(), 3, nullptr); - stk::mesh::fixtures::simple_fields::heterogeneous_mesh_meta_data( meta , node_coord ); + stk::mesh::fixtures::heterogeneous_mesh_meta_data( meta , node_coord ); declare_color_fields(meta); meta.commit(); - stk::mesh::fixtures::simple_fields::heterogeneous_mesh_bulk_data( *bulk , node_coord ); + stk::mesh::fixtures::heterogeneous_mesh_bulk_data( *bulk , node_coord ); stk::balance::BasicColoringByTopologySettings coloringSettings; // ColorMeshWithColoringFieldsSettings coloringSettings; @@ -188,7 +188,7 @@ void quad_tri_mesh_meta_data(stk::mesh::MetaData & meta_data, } } -class Color2DMesh : public stk::unit_test_util::simple_fields::MeshFixture2D {}; +class Color2DMesh : public stk::unit_test_util::MeshFixture2D {}; TEST_F(Color2DMesh, colorHeterogeneousMeshWithQuadsSurroundingTriangles) { @@ -216,8 +216,8 @@ TEST_F(Color2DMesh, colorHeterogeneousMeshWithQuadsSurroundingTriangles) std::vector coordinates = { 0,3, 1,3, 2,3, 3,3, 0,2, 1,2, 2,2, 3,2, 0,1, 1,1, 2,1, 3,1, 0,0, 1,0, 2,0, 3,0 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); ColorMeshWithColoringFieldsSettings coloringSettings; bool meshIsColored = stk::balance::colorStkMesh(coloringSettings, get_bulk()); diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestCommandLineParsing.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestCommandLineParsing.cpp index 506ecf6c0512..a0404b91e07a 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestCommandLineParsing.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestCommandLineParsing.cpp @@ -9,7 +9,7 @@ #include "stk_mesh/base/GetEntities.hpp" #include "stk_balance/setup/DefaultSettings.hpp" -class BalanceCommandLine : public stk::unit_test_util::simple_fields::MeshFixture +class BalanceCommandLine : public stk::unit_test_util::MeshFixture { protected: BalanceCommandLine() diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestCrossProcessorEdge.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestCrossProcessorEdge.cpp index 14a2ac0855cb..fb712a5d7579 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestCrossProcessorEdge.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestCrossProcessorEdge.cpp @@ -25,7 +25,7 @@ bool are_edges_equal(const Edge &edge, const stk::balance::GraphEdge &graphEdge) //////////////////////////////////////////////////////////////////////////////////////////// -class GraphCrossProc : public stk::unit_test_util::simple_fields::MeshFixture {}; +class GraphCrossProc : public stk::unit_test_util::MeshFixture {}; TEST_F(GraphCrossProc, checkEdgeWithAura) { diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestDiagnosticsComputation.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestDiagnosticsComputation.cpp index c0227f360e5e..bca04706f40a 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestDiagnosticsComputation.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestDiagnosticsComputation.cpp @@ -44,7 +44,7 @@ #include #include -class TestDiagnosticsComputation : public stk::unit_test_util::simple_fields::MeshFixture +class TestDiagnosticsComputation : public stk::unit_test_util::MeshFixture { protected: TestDiagnosticsComputation() @@ -108,7 +108,7 @@ class TestDiagnosticsComputation : public stk::unit_test_util::simple_fields::Me void build_mesh(const std::string & meshDesc) { setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); MPI_Barrier(get_comm()); } @@ -116,7 +116,7 @@ class TestDiagnosticsComputation : public stk::unit_test_util::simple_fields::Me stk::io::StkMeshIoBroker & ioBroker) { const std::string tempFileName = "tempFile.g"; - stk::unit_test_util::simple_fields::TextMeshToFile tMesh(get_comm(), stk::mesh::BulkData::AUTO_AURA); + stk::unit_test_util::TextMeshToFile tMesh(get_comm(), stk::mesh::BulkData::AUTO_AURA); tMesh.setup_mesh(meshDesc, tempFileName); tMesh.write_mesh(); @@ -149,7 +149,7 @@ class TestDiagnosticsComputation : public stk::unit_test_util::simple_fields::Me 0,0,0, 1,0,0, 2,0,0, 3,-1,0, 3,1,0 }; - return stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates); + return stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates); } std::string mesh_desc_four_shells_in_square() { @@ -164,7 +164,7 @@ class TestDiagnosticsComputation : public stk::unit_test_util::simple_fields::Me 0,2,0, 1,2,0, 2,2,0 }; - return stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates); + return stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates); } std::string mesh_desc_three_hex_in_row() { @@ -179,7 +179,7 @@ class TestDiagnosticsComputation : public stk::unit_test_util::simple_fields::Me 3,0,0, 3,1,0, 3,1,1, 3,0,1 }; - return stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates); + return stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates); } std::string mesh_desc_four_hex_in_square() { @@ -197,7 +197,7 @@ class TestDiagnosticsComputation : public stk::unit_test_util::simple_fields::Me 0,1,2, 1,1,2, 2,1,2 }; - return stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates); + return stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates); } std::string mesh_desc_hex_pyramid_tet() { @@ -214,7 +214,7 @@ class TestDiagnosticsComputation : public stk::unit_test_util::simple_fields::Me 2,0,0, 2,1,0, 2,1,1, 2,0,1 }; - return stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates); + return stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates); } template diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestElementConnectivity.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestElementConnectivity.cpp index 4c107a98a5e1..b7fbf15592cc 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestElementConnectivity.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestElementConnectivity.cpp @@ -8,11 +8,11 @@ namespace { -class ElementConnectivity : public stk::unit_test_util::simple_fields::MeshFixture +class ElementConnectivity : public stk::unit_test_util::MeshFixture { protected: ElementConnectivity() - : stk::unit_test_util::simple_fields::MeshFixture(3) + : stk::unit_test_util::MeshFixture(3) { setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); } @@ -24,7 +24,7 @@ TEST_F(ElementConnectivity, NumSharedNodes_DisconnectedHexes) std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8\n" "0,2,HEX_8,9,10,11,12,13,14,15,16"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); stk::mesh::Entity elem1 = get_bulk().get_entity(stk::topology::ELEM_RANK, 1); stk::mesh::Entity elem2 = get_bulk().get_entity(stk::topology::ELEM_RANK, 2); @@ -39,7 +39,7 @@ TEST_F(ElementConnectivity, NumSharedNodes_HexesConnectedAtOneNode) std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8\n" "0,2,HEX_8,8,9,10,11,12,13,14,15"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); stk::mesh::Entity elem1 = get_bulk().get_entity(stk::topology::ELEM_RANK, 1); stk::mesh::Entity elem2 = get_bulk().get_entity(stk::topology::ELEM_RANK, 2); @@ -54,7 +54,7 @@ TEST_F(ElementConnectivity, NumSharedNodes_HexesConnectedAtTwoNodes) std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8\n" "0,2,HEX_8,7,8,9,10,11,12,13,14"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); stk::mesh::Entity elem1 = get_bulk().get_entity(stk::topology::ELEM_RANK, 1); stk::mesh::Entity elem2 = get_bulk().get_entity(stk::topology::ELEM_RANK, 2); @@ -69,7 +69,7 @@ TEST_F(ElementConnectivity, NumSharedNodes_HexesConnectedAtThreeNodes) std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8\n" "0,2,HEX_8,6,7,8,9,10,11,12,13"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); stk::mesh::Entity elem1 = get_bulk().get_entity(stk::topology::ELEM_RANK, 1); stk::mesh::Entity elem2 = get_bulk().get_entity(stk::topology::ELEM_RANK, 2); @@ -84,7 +84,7 @@ TEST_F(ElementConnectivity, NumSharedNodes_HexesConnectedAtFourNodes) std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8\n" "0,2,HEX_8,5,6,7,8,9,10,11,12"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); stk::mesh::Entity elem1 = get_bulk().get_entity(stk::topology::ELEM_RANK, 1); stk::mesh::Entity elem2 = get_bulk().get_entity(stk::topology::ELEM_RANK, 2); @@ -99,7 +99,7 @@ TEST_F(ElementConnectivity, NumSharedNodes_DegenerateHexAndTet) std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,5,5,5\n" "0,2,TET_4,4,5,6,7"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); stk::mesh::Entity elem1 = get_bulk().get_entity(stk::topology::ELEM_RANK, 1); stk::mesh::Entity elem2 = get_bulk().get_entity(stk::topology::ELEM_RANK, 2); diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestFileNames.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestFileNames.cpp index 4f6fc4a05089..4a8ebfd9a0b1 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestFileNames.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestFileNames.cpp @@ -36,7 +36,7 @@ #include "stk_balance/setup/FileValidator.hpp" #include "stk_balance/setup/Parser.hpp" -class InputSanity : public stk::unit_test_util::simple_fields::MeshFixture +class InputSanity : public stk::unit_test_util::MeshFixture { public: InputSanity() diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestGeometricMethodsWithSelector.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestGeometricMethodsWithSelector.cpp index 0d73eecfa441..2efc8f227130 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestGeometricMethodsWithSelector.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestGeometricMethodsWithSelector.cpp @@ -28,7 +28,7 @@ class GeometricBalanceSettingsTester : public stk::balance::GraphCreationSetting const std::string& m_method; }; -class ZoltanGeometricMethods : public stk::unit_test_util::simple_fields::MeshFixture +class ZoltanGeometricMethods : public stk::unit_test_util::MeshFixture { protected: diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestLastStepFieldWriter.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestLastStepFieldWriter.cpp index 9ab8065fa5a7..0b663ce8a482 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestLastStepFieldWriter.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestLastStepFieldWriter.cpp @@ -119,7 +119,7 @@ size_t get_global_num_nodes_parallel(const std::string& inputFilename, MPI_Comm TEST(Stk_Balance, checkParallelAndSerialNumNodesConsistency) { MPI_Comm comm = MPI_COMM_WORLD; - std::string inputFilename = stk::unit_test_util::simple_fields::get_option("-i", "generated:4x4x4"); + std::string inputFilename = stk::unit_test_util::get_option("-i", "generated:4x4x4"); size_t goldGlobalNumNodes = get_global_num_nodes_serial(inputFilename); size_t numNodesWIthComm = get_global_num_nodes_parallel(inputFilename, comm); diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestLearningZoltan2.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestLearningZoltan2.cpp index f82e734f56b1..d0e4add64640 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestLearningZoltan2.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestLearningZoltan2.cpp @@ -339,7 +339,7 @@ class LearningZoltan2Adapter : public Zoltan2::MeshAdapter //////////////////////////////////////////////////////////////////////////////////////////// -class UsingZoltan2 : public stk::unit_test_util::simple_fields::MeshFixture +class UsingZoltan2 : public stk::unit_test_util::MeshFixture { protected: void run_decomp_with_method(const std::string& method, int nparts, stk::mesh::EntityRank primary_rank, stk::mesh::EntityRank secondary_rank) diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestLifeCycle.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestLifeCycle.cpp index 15e793afaa18..fd8ac490de7b 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestLifeCycle.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestLifeCycle.cpp @@ -34,7 +34,7 @@ class Args const char** m_argv; }; -class TestLifeCycle : public stk::unit_test_util::simple_fields::MeshFixture +class TestLifeCycle : public stk::unit_test_util::MeshFixture { protected: TestLifeCycle() @@ -44,19 +44,19 @@ class TestLifeCycle : public stk::unit_test_util::simple_fields::MeshFixture } void build_serial_mesh() { - stk::unit_test_util::simple_fields::generated_mesh_to_file_in_serial("1x1x4", m_inFile); + stk::unit_test_util::generated_mesh_to_file_in_serial("1x1x4", m_inFile); MPI_Barrier(get_comm()); } void build_parallel_mesh() { - stk::unit_test_util::simple_fields::GeneratedMeshToFile gMesh(get_comm(), stk::mesh::BulkData::AUTO_AURA); + stk::unit_test_util::GeneratedMeshToFile gMesh(get_comm(), stk::mesh::BulkData::AUTO_AURA); const bool useBigIds = false; gMesh.setup_mesh("1x1x4", m_inFile, useBigIds); gMesh.write_mesh(); } void build_parallel_mesh_with_big_ids() { - stk::unit_test_util::simple_fields::GeneratedMeshToFile gMesh(get_comm(), stk::mesh::BulkData::AUTO_AURA); + stk::unit_test_util::GeneratedMeshToFile gMesh(get_comm(), stk::mesh::BulkData::AUTO_AURA); const bool useBigIds = true; gMesh.setup_mesh("1x1x4", m_inFile, useBigIds); gMesh.write_mesh(); @@ -66,7 +66,7 @@ class TestLifeCycle : public stk::unit_test_util::simple_fields::MeshFixture std::vector transientTimeSteps = {0.0, 1.0, 2.0}; std::string transientFieldName = "transient_field"; std::string globalVariableName = "global_variable"; - stk::unit_test_util::simple_fields::generated_mesh_with_transient_data_to_file_in_serial("1x1x4", + stk::unit_test_util::generated_mesh_with_transient_data_to_file_in_serial("1x1x4", m_inFile, transientFieldName, stk::topology::NODE_RANK, @@ -80,7 +80,7 @@ class TestLifeCycle : public stk::unit_test_util::simple_fields::MeshFixture std::vector transientTimeSteps = {0.0, 1.0, 2.0}; std::string transientFieldName = "transient_field"; std::string globalVariableName = "global_variable"; - stk::unit_test_util::simple_fields::GeneratedMeshToFileWithTransientFields gMesh(get_comm(), + stk::unit_test_util::GeneratedMeshToFileWithTransientFields gMesh(get_comm(), stk::mesh::BulkData::AUTO_AURA, transientFieldName, stk::topology::NODE_RANK); diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestLogFile.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestLogFile.cpp index 85c30d043dc9..a2151787dcec 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestLogFile.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestLogFile.cpp @@ -42,7 +42,7 @@ namespace { -class TestLogFile : public stk::unit_test_util::simple_fields::MeshFixture +class TestLogFile : public stk::unit_test_util::MeshFixture { protected: void clean_up_file(const std::string & fileName) @@ -55,7 +55,7 @@ class TestLogFile : public stk::unit_test_util::simple_fields::MeshFixture void make_dummy_mesh(const std::string & meshFileName) { if (get_parallel_rank() == 0) { - stk::unit_test_util::simple_fields::TextMeshToFile mesh(MPI_COMM_SELF, stk::mesh::BulkData::AUTO_AURA); + stk::unit_test_util::TextMeshToFile mesh(MPI_COMM_SELF, stk::mesh::BulkData::AUTO_AURA); mesh.setup_mesh("0,1,HEX_8,1,2,3,4,5,6,7,8,block_1", meshFileName); mesh.write_mesh(); } diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestM2NCommandLineParsing.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestM2NCommandLineParsing.cpp index d351864eb2f7..a02fdc073bef 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestM2NCommandLineParsing.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestM2NCommandLineParsing.cpp @@ -5,7 +5,7 @@ namespace { -class M2NBalanceCommandLine : public stk::unit_test_util::simple_fields::MeshFixture +class M2NBalanceCommandLine : public stk::unit_test_util::MeshFixture { protected: M2NBalanceCommandLine() diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestM2NLogFile.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestM2NLogFile.cpp index 72c981f9d223..b32601ef1c24 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestM2NLogFile.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestM2NLogFile.cpp @@ -44,7 +44,7 @@ namespace { -class TestM2NLogFile : public stk::unit_test_util::simple_fields::MeshFixture +class TestM2NLogFile : public stk::unit_test_util::MeshFixture { protected: void clean_up_file(const std::string & fileName) diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestMechanismBuster.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestMechanismBuster.cpp index 88572f19c98c..c03d3b8cc816 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestMechanismBuster.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestMechanismBuster.cpp @@ -14,7 +14,7 @@ namespace { -class MechanismMesh2x2 : public stk::unit_test_util::simple_fields::MeshFixture +class MechanismMesh2x2 : public stk::unit_test_util::MeshFixture { protected: void setup_mechanistic_mesh(stk::mesh::BulkData::AutomaticAuraOption auraOption) @@ -115,7 +115,7 @@ TEST_F(MechanismMesh2x2, move_components) } -class LotsOfComponentsMesh : public stk::unit_test_util::simple_fields::MeshFixture +class LotsOfComponentsMesh : public stk::unit_test_util::MeshFixture { protected: void setup_mechanistic_mesh(stk::mesh::BulkData::AutomaticAuraOption auraOption) @@ -173,7 +173,7 @@ TEST_F(LotsOfComponentsMesh, detection_without_aura) } -class GlobalMeshWithMechanism : public stk::unit_test_util::simple_fields::MeshFixture +class GlobalMeshWithMechanism : public stk::unit_test_util::MeshFixture { protected: void setup_mechanistic_mesh(stk::mesh::BulkData::AutomaticAuraOption auraOption) @@ -227,11 +227,11 @@ TEST_F(GlobalMeshWithMechanism, detection_without_aura) } } -class MeshTest : public stk::unit_test_util::simple_fields::MeshFixture {}; +class MeshTest : public stk::unit_test_util::MeshFixture {}; TEST_F(MeshTest, forExodusFile) { - std::string filename = stk::unit_test_util::simple_fields::get_option("-i", "generated:1x1x100"); + std::string filename = stk::unit_test_util::get_option("-i", "generated:1x1x100"); setup_mesh(filename, stk::mesh::BulkData::AUTO_AURA); stk::balance::GraphCreationSettings graphSettings; diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestSearchTolerance.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestSearchTolerance.cpp index 0f1a82662803..b88eaa30ef4d 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestSearchTolerance.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestSearchTolerance.cpp @@ -14,7 +14,7 @@ namespace { -class SearchToleranceTest : public stk::unit_test_util::simple_fields::MeshFixture {}; +class SearchToleranceTest : public stk::unit_test_util::MeshFixture {}; TEST_F(SearchToleranceTest, faceOfCube) { @@ -69,7 +69,7 @@ TEST_F(SearchToleranceTest, faceWithDifferentEdgeLengths) } } -class SearchToleranceTester : public stk::unit_test_util::simple_fields::MeshFixture +class SearchToleranceTester : public stk::unit_test_util::MeshFixture { protected: @@ -99,8 +99,8 @@ class SearchToleranceTester : public stk::unit_test_util::simple_fields::MeshFix 1,eps+2,1, 0,eps+2,1, }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } unsigned get_num_search_results_with_app_settings(const stk::balance::GraphCreationSettings &balanceSettings) diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestSettingVertexWeights.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestSettingVertexWeights.cpp index 4686694122a6..2c75e3092c6f 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestSettingVertexWeights.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestSettingVertexWeights.cpp @@ -14,7 +14,7 @@ namespace //////////////////////////////////////////////////////////////////////////////////////////// -class VertexWeightSettings : public stk::unit_test_util::simple_fields::MeshFixture +class VertexWeightSettings : public stk::unit_test_util::MeshFixture { public: diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestSpiderElements.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestSpiderElements.cpp index af9affe8b612..f50bc0acb4f1 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestSpiderElements.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestSpiderElements.cpp @@ -11,7 +11,7 @@ namespace { -class SpiderElement : public stk::unit_test_util::simple_fields::MeshFixture +class SpiderElement : public stk::unit_test_util::MeshFixture { protected: SpiderElement() @@ -603,7 +603,7 @@ void compare_identical_volume_decompositions(stk::balance::BalanceMesh & meshNod TEST_F(SpiderElement, cubeMeshWithSpider_ParticleBodyInsensitivity) { - const unsigned meshSize = stk::unit_test_util::simple_fields::get_command_line_option("--size", 2); + const unsigned meshSize = stk::unit_test_util::get_command_line_option("--size", 2); bool addParticleBody = false; const std::string fileNameNode = "cube_spider_node.g"; diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestSpiderMeshSetup.hpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestSpiderMeshSetup.hpp index d278c1c403da..677c522afa90 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestSpiderMeshSetup.hpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestSpiderMeshSetup.hpp @@ -25,7 +25,7 @@ void make_mesh_non_spider_no_volume_elements(stk::mesh::BulkData & bulk) 1,1,0.5 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } inline @@ -58,7 +58,7 @@ void make_mesh_non_spider_not_enough_legs(stk::mesh::BulkData & bulk) 2,2,0.5 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } inline @@ -92,7 +92,7 @@ void make_mesh_one_spider_no_body_element(stk::mesh::BulkData & bulk) 2,2,0.5 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } inline @@ -127,7 +127,7 @@ void make_mesh_one_spider_particle_body(stk::mesh::BulkData & bulk) 2,2,0.5 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } inline @@ -163,7 +163,7 @@ void make_mesh_one_spider_beam_body(stk::mesh::BulkData & bulk) 2,3,0.5 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } inline @@ -205,7 +205,7 @@ void make_mesh_compound_spider_beam_body(stk::mesh::BulkData & bulk) 6,2,0.5 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } inline @@ -248,7 +248,7 @@ void make_mesh_two_spiders_particle_body(stk::mesh::BulkData & bulk) 6,2,0.5 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } inline @@ -260,7 +260,6 @@ void write_serial_cube_mesh_with_spider(unsigned meshSize, bool addParticleBody, builder.set_aura_option(stk::mesh::BulkData::NO_AUTO_AURA); auto bulk = builder.create(); stk::mesh::MetaData & meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Part & block2Part = meta.declare_part_with_topology("block_2", stk::topology::BEAM_2); stk::mesh::Part & block3Part = meta.declare_part_with_topology("block_3", stk::topology::PARTICLE); diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestStkBalanceDecomposition.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestStkBalanceDecomposition.cpp index dd1402fe49a9..0615411d0e62 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestStkBalanceDecomposition.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestStkBalanceDecomposition.cpp @@ -43,7 +43,7 @@ #include #include -class StkBalanceDecomposition : public stk::unit_test_util::simple_fields::MeshFixture +class StkBalanceDecomposition : public stk::unit_test_util::MeshFixture { protected: StkBalanceDecomposition() @@ -120,7 +120,7 @@ TEST_F(StkBalanceDecomposition, 6Elem2ProcMesh_OneElem) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}}); balance_mesh({*get_meta().get_part("partA")}); test_mesh_element_distribution({2, 4}); // Moved the one elem from p0 to p1 @@ -131,7 +131,7 @@ TEST_F(StkBalanceDecomposition, 6Elem2ProcMesh_TwoElems) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}}); balance_mesh({*get_meta().get_part("partA")}); test_mesh_element_distribution({2, 4}); // Moved one of the two elems from p0 to p1 @@ -142,7 +142,7 @@ TEST_F(StkBalanceDecomposition, 6Elem2ProcMesh_TwoElemsEachProc) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}, {4, "partA"}, {5, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}, {4, "partA"}, {5, "partA"}}); balance_mesh({*get_meta().get_part("partA")}); test_mesh_element_distribution({3, 3}); @@ -153,7 +153,7 @@ TEST_F(StkBalanceDecomposition, 6Elem2ProcMesh_TwoElemsAcrossProcBoundary) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{3, "partA"}, {4, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{3, "partA"}, {4, "partA"}}); balance_mesh({*get_meta().get_part("partA")}); test_mesh_element_distribution({3, 3}); @@ -164,7 +164,7 @@ TEST_F(StkBalanceDecomposition, 6Elem2ProcMesh_TwoElemsEachProcWithOneAdjacentTo if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {3, "partA"}, {4, "partA"}, {6, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {3, "partA"}, {4, "partA"}, {6, "partA"}}); balance_mesh({*get_meta().get_part("partA")}); test_mesh_element_distribution({3, 3}); diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestStkBalancePartitioning.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestStkBalancePartitioning.cpp index cadf387fc432..9bd58fc9ea03 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestStkBalancePartitioning.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestStkBalancePartitioning.cpp @@ -9,7 +9,7 @@ #include #include -class StkBalancePartitioning : public stk::unit_test_util::simple_fields::MeshFixture +class StkBalancePartitioning : public stk::unit_test_util::MeshFixture { protected: StkBalancePartitioning() @@ -40,8 +40,8 @@ class StkBalancePartitioning : public stk::unit_test_util::simple_fields::MeshFi 0,1,1, 1,1,1, 2,1,1, 0,2,1, 1,2,1, 2,2,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void balance_mesh(const stk::ParallelMachine & decompCommunicator, @@ -125,7 +125,7 @@ TEST_F(StkBalancePartitioning, 6Elem2ProcMesh_OneElem) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}}); balance_mesh(get_bulk().parallel(), 2, {*get_meta().get_part("partA")}); test_partition_element_distribution({0, 1}); @@ -136,7 +136,7 @@ TEST_F(StkBalancePartitioning, 6Elem2ProcMesh_TwoElems) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}}); balance_mesh(get_bulk().parallel(), 2, {*get_meta().get_part("partA")}); test_partition_element_distribution({1, 1}); @@ -147,7 +147,7 @@ TEST_F(StkBalancePartitioning, 6Elem2ProcMesh_TwoElemsEachProc) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}, {4, "partA"}, {5, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}, {4, "partA"}, {5, "partA"}}); balance_mesh(get_bulk().parallel(), 2, {*get_meta().get_part("partA")}); test_partition_element_distribution({2, 2}); @@ -158,7 +158,7 @@ TEST_F(StkBalancePartitioning, 6Elem2ProcMesh_TwoElemsAcrossProcBoundary) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{3, "partA"}, {4, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{3, "partA"}, {4, "partA"}}); balance_mesh(get_bulk().parallel(), 2, {*get_meta().get_part("partA")}); test_partition_element_distribution({1, 1}); @@ -169,7 +169,7 @@ TEST_F(StkBalancePartitioning, 6Elem2ProcMesh_TwoElemsEachProcWithOneAdjacentToP if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {3, "partA"}, {4, "partA"}, {6, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {3, "partA"}, {4, "partA"}, {6, "partA"}}); balance_mesh(get_bulk().parallel(), 2, {*get_meta().get_part("partA")}); test_partition_element_distribution({2, 2}); @@ -220,7 +220,7 @@ TEST_F(StkBalancePartitioning, 6Elem2to1ProcMesh_HalfDomain) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}, {3, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}, {3, "partA"}}); balance_mesh(get_bulk().parallel(), 1, {*get_meta().get_part("partA")}); test_partition_element_distribution({3}); @@ -241,8 +241,8 @@ TEST_F(StkBalancePartitioning, 4Elem2ProcMeshWithContact_EmptyOnOneProc) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_4hex_contact_perpendicular_to_proc_boundary(); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {3, "partA"}}); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{2, "partB"}, {4, "partB"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {3, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{2, "partB"}, {4, "partB"}}); balance_mesh(get_bulk().parallel(), 2, {*get_meta().get_part("partB")}); test_partition_element_distribution({1, 1}); @@ -253,7 +253,7 @@ TEST_F(StkBalancePartitioning, 4Elem2ProcMesh_SeparateCommunicator) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x4"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}, {3, "partB"}, {4, "partB"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}, {3, "partB"}, {4, "partB"}}); if (get_parallel_rank() == 0) { balance_mesh(MPI_COMM_SELF, 2, {*get_meta().get_part("partA")}); } @@ -269,7 +269,7 @@ TEST_F(StkBalancePartitioning, 4Elem2to1ProcMesh_SeparateCommunicator) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x4"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}, {3, "partB"}, {4, "partB"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}, {3, "partB"}, {4, "partB"}}); if (get_parallel_rank() == 0) { balance_mesh(MPI_COMM_SELF, 1, {*get_meta().get_part("partA")}); } @@ -285,7 +285,7 @@ TEST_F(StkBalancePartitioning, 4Elem2ProcMesh_SeparateCommunicator_EmptyOnOnePro if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x4"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}}); balance_mesh(MPI_COMM_SELF, 2, {*get_meta().get_part("partA")}); test_partition_element_distribution({1, 1}); @@ -296,7 +296,7 @@ TEST_F(StkBalancePartitioning, 4Elem2ProcMesh_Geometric_SeparateCommunicator_Emp if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x4"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}}); stk::balance::BasicGeometricSettings balanceSettings; balance_mesh(MPI_COMM_SELF, 2, {*get_meta().get_part("partA")}, balanceSettings); diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestTransientFieldTransferById.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestTransientFieldTransferById.cpp index a0bde092178e..a64a6c3c4bf2 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestTransientFieldTransferById.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestTransientFieldTransferById.cpp @@ -110,7 +110,7 @@ bool are_bulk_data_equivalent(const stk::mesh::BulkData& bulk1, const stk::mesh: return false; } - stk::mesh::EntityIdVector entityCountVec1, entityCountVec2; + std::vector entityCountVec1, entityCountVec2; stk::mesh::count_entities(meta1.locally_owned_part(), bulk1, entityCountVec1); stk::mesh::count_entities(meta2.locally_owned_part(), bulk2, entityCountVec2); diff --git a/packages/stk/stk_unit_tests/stk_balance/UnitTestZoltanGraphGeneration.cpp b/packages/stk/stk_unit_tests/stk_balance/UnitTestZoltanGraphGeneration.cpp index 016d8905368a..ed121961aaa5 100644 --- a/packages/stk/stk_unit_tests/stk_balance/UnitTestZoltanGraphGeneration.cpp +++ b/packages/stk/stk_unit_tests/stk_balance/UnitTestZoltanGraphGeneration.cpp @@ -45,7 +45,7 @@ struct ElementAndPart { std::string partName; }; -class ZoltanGraphGeneration : public stk::unit_test_util::simple_fields::MeshFixture +class ZoltanGraphGeneration : public stk::unit_test_util::MeshFixture { protected: ZoltanGraphGeneration() @@ -156,7 +156,7 @@ TEST_F(ZoltanGraphGeneration, 6Elem2ProcMesh_Coloring_OneElem) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}}); fill_zoltan_graph_for_decomp(*get_meta().get_part("partA")); if (get_parallel_rank() == 0) { @@ -176,7 +176,7 @@ TEST_F(ZoltanGraphGeneration, 6Elem2ProcMesh_Coloring_TwoElems) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}}); fill_zoltan_graph_for_coloring(*get_meta().get_part("partA")); if (get_parallel_rank() == 0) { @@ -196,7 +196,7 @@ TEST_F(ZoltanGraphGeneration, 6Elem2ProcMesh_Coloring_TwoElemsEachProc) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}, {4, "partA"}, {5, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}, {4, "partA"}, {5, "partA"}}); fill_zoltan_graph_for_coloring(*get_meta().get_part("partA")); if (get_parallel_rank() == 0) { @@ -235,7 +235,7 @@ TEST_F(ZoltanGraphGeneration, 6Elem2ProcMesh_Decomp_OneElem) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}}); fill_zoltan_graph_for_decomp(*get_meta().get_part("partA")); if (get_parallel_rank() == 0) { @@ -255,7 +255,7 @@ TEST_F(ZoltanGraphGeneration, 6Elem2ProcMesh_Decomp_TwoElems) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}}); fill_zoltan_graph_for_decomp(*get_meta().get_part("partA")); if (get_parallel_rank() == 0) { @@ -275,7 +275,7 @@ TEST_F(ZoltanGraphGeneration, 6Elem2ProcMesh_Decomp_TwoElemsEachProc) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}, {4, "partA"}, {5, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {2, "partA"}, {4, "partA"}, {5, "partA"}}); fill_zoltan_graph_for_decomp(*get_meta().get_part("partA")); if (get_parallel_rank() == 0) { @@ -295,7 +295,7 @@ TEST_F(ZoltanGraphGeneration, 6Elem2ProcMesh_Decomp_TwoElemsAcrossProcBoundary) if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{3, "partA"}, {4, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{3, "partA"}, {4, "partA"}}); fill_zoltan_graph_for_decomp(*get_meta().get_part("partA")); if (get_parallel_rank() == 0) { @@ -315,7 +315,7 @@ TEST_F(ZoltanGraphGeneration, 6Elem2ProcMesh_Decomp_TwoElemsEachProcWithOneAdjac if (stk::parallel_machine_size(get_comm()) != 2) return; setup_initial_mesh("generated:1x1x6"); - stk::unit_test_util::simple_fields::put_elements_into_part(get_bulk(), {{1, "partA"}, {3, "partA"}, {4, "partA"}, {6, "partA"}}); + stk::unit_test_util::put_elements_into_part(get_bulk(), {{1, "partA"}, {3, "partA"}, {4, "partA"}, {6, "partA"}}); fill_zoltan_graph_for_decomp(*get_meta().get_part("partA")); if (get_parallel_rank() == 0) { diff --git a/packages/stk/stk_unit_tests/stk_expreval/UnitTestEvaluator.cpp b/packages/stk/stk_unit_tests/stk_expreval/UnitTestEvaluator.cpp index 9795488a04eb..ea98a54d788d 100644 --- a/packages/stk/stk_unit_tests/stk_expreval/UnitTestEvaluator.cpp +++ b/packages/stk/stk_unit_tests/stk_expreval/UnitTestEvaluator.cpp @@ -1165,6 +1165,7 @@ TEST(UnitTestEvaluator, testFunctionSyntax) EXPECT_TRUE(isValidFunction("random(1)")); EXPECT_TRUE(isValidFunction("random(time())")); EXPECT_TRUE(isValidFunction("cosine_ramp(x,y)")); + EXPECT_TRUE(isValidFunction("linear_ramp(x,y,z)")); EXPECT_TRUE(isValidFunction("sign(x)")); EXPECT_TRUE(isValidFunction("weibull_pdf(x, alpha, beta)")); EXPECT_TRUE(isValidFunction("normal_pdf(x, alpha, beta)")); @@ -2827,6 +2828,17 @@ TEST(UnitTestEvaluator, testFunction_cosine_ramp3) EXPECT_DOUBLE_EQ(evaluate("cosine_ramp(1.5, 0, 1)"), 1); } +TEST(UnitTestEvaluator, testFunction_linear_ramp3) +{ + EXPECT_DOUBLE_EQ(evaluate("linear_ramp(-0.5, 0, 1)"), 0); + EXPECT_DOUBLE_EQ(evaluate("linear_ramp(0, 0, 1)"), 0); + EXPECT_DOUBLE_EQ(evaluate("linear_ramp(1/4, 0, 1)"), 0.25); + EXPECT_DOUBLE_EQ(evaluate("linear_ramp(0.5, 0, 1)"), 0.5); + EXPECT_DOUBLE_EQ(evaluate("linear_ramp(3/4, 0, 1)"), 0.75); + EXPECT_DOUBLE_EQ(evaluate("linear_ramp(1, 0, 1)"), 1); + EXPECT_DOUBLE_EQ(evaluate("linear_ramp(1.5, 0, 1)"), 1); +} + TEST(UnitTestEvaluator, Ngp_testFunction_cosine_ramp3) { EXPECT_DOUBLE_EQ(device_evaluate("cosine_ramp(-0.5, 0, 1)"), 0); @@ -2838,6 +2850,17 @@ TEST(UnitTestEvaluator, Ngp_testFunction_cosine_ramp3) EXPECT_DOUBLE_EQ(device_evaluate("cosine_ramp(1.5, 0, 1)"), 1); } +TEST(UnitTestEvaluator, Ngp_testFunction_linear_ramp3) +{ + EXPECT_DOUBLE_EQ(device_evaluate("linear_ramp(-0.5, 0, 1)"), 0); + EXPECT_DOUBLE_EQ(device_evaluate("linear_ramp(0, 0, 1)"), 0); + EXPECT_DOUBLE_EQ(device_evaluate("linear_ramp(1/4, 0, 1)"), 0.25); + EXPECT_DOUBLE_EQ(device_evaluate("linear_ramp(0.5, 0, 1)"), 0.5); + EXPECT_DOUBLE_EQ(device_evaluate("linear_ramp(3/4, 0, 1)"), 0.75); + EXPECT_DOUBLE_EQ(device_evaluate("linear_ramp(1, 0, 1)"), 1); + EXPECT_DOUBLE_EQ(device_evaluate("linear_ramp(1.5, 0, 1)"), 1); +} + TEST(UnitTestEvaluator, testFunction_cosine_ramp2) { EXPECT_DOUBLE_EQ(evaluate("cosine_ramp(-0.5, 1)"), 0); diff --git a/packages/stk/stk_unit_tests/stk_io/Assembly.hpp b/packages/stk/stk_unit_tests/stk_io/Assembly.hpp index 46b475c4f51d..4bba0acada39 100644 --- a/packages/stk/stk_unit_tests/stk_io/Assembly.hpp +++ b/packages/stk/stk_unit_tests/stk_io/Assembly.hpp @@ -245,7 +245,6 @@ class Assembly : public IOMeshFixture { stk::mesh::Selector meshSubsetSelector = !stk::mesh::Selector(stk::mesh::selectUnion(outputPartsToExclude)); stk::io::StkMeshIoBroker stkIo; - stkIo.use_simple_fields(); stkIo.set_bulk_data(get_bulk()); size_t outputFileIndex = stkIo.create_output_mesh(fileName, stk::io::WRITE_RESULTS); stkIo.set_output_selector(outputFileIndex, rank, meshSubsetSelector); diff --git a/packages/stk/stk_unit_tests/stk_io/IOMeshFixture.hpp b/packages/stk/stk_unit_tests/stk_io/IOMeshFixture.hpp index 6e1d85ddd594..6b1e75b5a4e8 100644 --- a/packages/stk/stk_unit_tests/stk_io/IOMeshFixture.hpp +++ b/packages/stk/stk_unit_tests/stk_io/IOMeshFixture.hpp @@ -55,7 +55,7 @@ namespace io namespace unit_test { -class IOMeshFixture : public stk::unit_test_util::simple_fields::MeshFixture +class IOMeshFixture : public stk::unit_test_util::MeshFixture { protected: stk::mesh::Part& create_io_part(const std::string& partName, diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestAccessCommSet.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestAccessCommSet.cpp index 0fbead557248..5fce555252a2 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestAccessCommSet.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestAccessCommSet.cpp @@ -53,7 +53,6 @@ TEST(UnitTestAccessCommSet, basicNodeComm) stk::ParallelMachine communicator = MPI_COMM_WORLD; stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); stkMeshIoBroker.add_mesh_database("generated:1x1x4", stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); stkMeshIoBroker.populate_bulk_data(); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestAttributes.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestAttributes.cpp index 9467dfddd30c..098074183d58 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestAttributes.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestAttributes.cpp @@ -11,7 +11,7 @@ namespace using stk::unit_test_util::build_mesh; using stk::unit_test_util::build_mesh_no_simple_fields; -class AttributesInFile : public stk::unit_test_util::simple_fields::MeshFixture +class AttributesInFile : public stk::unit_test_util::MeshFixture { protected: const std::string filename = "fileWithAttr.e"; @@ -31,11 +31,9 @@ class AttributesInFile : public stk::unit_test_util::simple_fields::MeshFixture { setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); stk::io::StkMeshIoBroker stkIo(get_comm()); - stkIo.use_simple_fields(); stk::io::fill_mesh_preexisting(stkIo, filename, get_bulk()); stk::io::StkMeshIoBroker outputStkIo(get_comm()); - outputStkIo.use_simple_fields(); outputStkIo.set_bulk_data(get_bulk()); size_t outputFileIndex = outputStkIo.create_output_mesh(outputFilename, stk::io::WRITE_RESULTS); outputStkIo.set_attribute_field_ordering_stored_by_part_ordinal(stkIo.get_attribute_field_ordering_stored_by_part_ordinal()); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestCustomMeshBuilder.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestCustomMeshBuilder.cpp index 5868ff8a9445..a48e065ef7dc 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestCustomMeshBuilder.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestCustomMeshBuilder.cpp @@ -114,7 +114,6 @@ TEST(StkMeshIoBroker, useExternalBulkData) { stk::mesh::MeshBuilder meshBuilder(MPI_COMM_WORLD); std::shared_ptr bulk = meshBuilder.set_spatial_dimension(3).create(); - bulk->mesh_meta_data().use_simple_fields(); stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); stkMeshIoBroker.set_bulk_data(bulk); @@ -128,7 +127,6 @@ TEST(StkMeshIoBroker, useExternalBulkData) TEST(StkMeshIoBroker, useDefaultMeshBuilder) { stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); stkMeshIoBroker.add_mesh_database("generated:1x1x8", stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); stkMeshIoBroker.populate_bulk_data(); @@ -141,7 +139,6 @@ TEST(StkMeshIoBroker, useCustomMeshBuilder) { stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); stkMeshIoBroker.set_mesh_builder(std::make_shared(MPI_COMM_WORLD)); - stkMeshIoBroker.use_simple_fields(); stkMeshIoBroker.add_mesh_database("generated:1x1x8", stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); stkMeshIoBroker.populate_bulk_data(); @@ -157,14 +154,12 @@ TEST(StkMeshIoBroker, useCustomMeshBuilder_afterInternalAlreadyGenerated) { { stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); stkMeshIoBroker.add_mesh_database("generated:1x1x8", stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); EXPECT_ANY_THROW(stkMeshIoBroker.set_mesh_builder(std::make_shared(MPI_COMM_WORLD))); } { stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); stkMeshIoBroker.add_mesh_database("generated:1x1x8", stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); stkMeshIoBroker.populate_bulk_data(); @@ -176,7 +171,6 @@ TEST(StkMeshIoBroker, useCustomMeshBuilder_afterSetExternal) { stk::mesh::MeshBuilder meshBuilder(MPI_COMM_WORLD); std::shared_ptr bulk = meshBuilder.set_spatial_dimension(3).create(); - bulk->mesh_meta_data().use_simple_fields(); stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); stkMeshIoBroker.set_bulk_data(bulk); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestFieldNames.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestFieldNames.cpp index 5bc8adedde98..324fe09b53b8 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestFieldNames.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestFieldNames.cpp @@ -93,7 +93,6 @@ TEST(FieldNamesTest, FieldNameRenameTwice) std::vector outputFieldNames; { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; size_t index = stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.set_active_mesh(index); @@ -131,7 +130,6 @@ TEST(FieldNamesTest, FieldNameWithRestart) const std::string internalClientFieldName = "Field0"; { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; size_t index = stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.set_active_mesh(index); @@ -168,7 +166,6 @@ TEST(FieldNamesTest, FieldNameWithResultsAndRestart) const std::string internalClientFieldName = "Field0"; { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; size_t index = stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.set_active_mesh(index); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestFieldTypes.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestFieldTypes.cpp index 3426814648e0..51ce96df15f7 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestFieldTypes.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestFieldTypes.cpp @@ -196,8 +196,8 @@ void test_output_field(const stk::mesh::MetaData & meta, stk::mesh::FieldBase & size_t numCopies = 1; const Ioss::CompositeVariableType* compositeVarType = dynamic_cast(varType); if (compositeVarType != nullptr) { - const Ioss::VariableType * baseVarType = compositeVarType->GetBaseType(); - numCopies = compositeVarType->GetNumCopies(); + const Ioss::VariableType * baseVarType = compositeVarType->get_base_type(); + numCopies = compositeVarType->get_num_copies(); numComponents = baseVarType->component_count(); } @@ -221,7 +221,6 @@ void create_and_test_output_field(const FieldConfig & fieldConfig, { const int spatialDimension = 3; stk::mesh::MetaData meta(spatialDimension); - meta.use_simple_fields(); stk::mesh::FieldBase & field = create_stk_field(meta, fieldConfig); @@ -238,7 +237,6 @@ void create_and_test_output_field_with_copy(const FieldConfig & fieldConfig, { const int spatialDimension = 3; stk::mesh::MetaData meta(spatialDimension); - meta.use_simple_fields(); stk::mesh::FieldBase & field = create_stk_field(meta, fieldConfig); @@ -267,7 +265,6 @@ void create_and_test_custom_output_field(const FieldConfig & fieldConfig, { const int spatialDimension = 3; stk::mesh::MetaData meta(spatialDimension); - meta.use_simple_fields(); stk::mesh::FieldBase & field = create_custom_stk_field(meta, fieldConfig); @@ -1136,10 +1133,8 @@ Ioss::Field create_io_field(const FieldConfig & fieldConfig, Ioss::Field::BasicT void create_and_test_stk_field(stk::mesh::MetaData & meta, const FieldConfig & fieldConfig, const Ioss::Field & ioField) { - const bool useCartesianForScalar = false; const stk::mesh::FieldBase * field = stk::io::impl::declare_stk_field_internal(meta, stk::topology::NODE_RANK, - meta.universal_part(), ioField, - useCartesianForScalar); + meta.universal_part(), ioField); ASSERT_NE(field, nullptr); const Ioss::VariableType * varType = ioField.transformed_storage(); @@ -1148,9 +1143,9 @@ void create_and_test_stk_field(stk::mesh::MetaData & meta, const FieldConfig & f const Ioss::CompositeVariableType* compositeVarType = dynamic_cast(varType); if (compositeVarType != nullptr) { - const Ioss::VariableType * baseVarType = compositeVarType->GetBaseType(); + const Ioss::VariableType * baseVarType = compositeVarType->get_base_type(); numComponents = baseVarType->component_count(); - numCopies = compositeVarType->GetNumCopies(); + numCopies = compositeVarType->get_num_copies(); varType = baseVarType; } @@ -1178,7 +1173,6 @@ void create_and_test_input_field(const FieldConfig & fieldConfig, const int spatialDimension = 3; stk::mesh::MetaData meta(spatialDimension); - meta.use_simple_fields(); Ioss::Field ioField = create_io_field(fieldConfig, dataType); create_and_test_stk_field(meta, fieldConfig, ioField); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestGlobalVariables.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestGlobalVariables.cpp index 8faeb22020ae..b4a02225fc76 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestGlobalVariables.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestGlobalVariables.cpp @@ -73,7 +73,6 @@ void testGlobalVarOnFile(const std::string &outputFileName, const int stepNumber const std::vector goldGlobalVarValue, DataType goldGlobalScale, MPI_Comm comm) { stk::io::StkMeshIoBroker stkIo(comm); - stkIo.use_simple_fields(); stkIo.add_mesh_database(outputFileName, stk::io::READ_MESH); stkIo.create_input_mesh(); stkIo.populate_bulk_data(); @@ -122,7 +121,6 @@ TEST(GlobalVariablesTest, OneGlobalDouble) MPI_Comm communicator = MPI_COMM_WORLD; { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.property_add(Ioss::Property("MAXIMUM_NAME_LENGTH", 64)); const std::string exodusFileName = "generated:1x1x8"; stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); @@ -157,7 +155,6 @@ TEST(GlobalVariablesTest, InvalidGlobalRequest) MPI_Comm communicator = MPI_COMM_WORLD; { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.create_input_mesh(); @@ -178,7 +175,6 @@ TEST(GlobalVariablesTest, InvalidGlobalRequest) { double global_value = 0.0; stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(outputFileName, stk::io::READ_MESH); stkIo.create_input_mesh(); stkIo.populate_bulk_data(); @@ -208,7 +204,6 @@ TEST(GlobalVariablesTest, OneGlobalDoubleVector3) MPI_Comm communicator = MPI_COMM_WORLD; { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.create_input_mesh(); @@ -245,7 +240,6 @@ TEST(GlobalVariablesTest, OneGlobalIntegerVector3) MPI_Comm communicator = MPI_COMM_WORLD; { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.create_input_mesh(); @@ -282,7 +276,6 @@ TEST(GlobalVariablesTest, OneGlobalDouble10) MPI_Comm communicator = MPI_COMM_WORLD; { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.create_input_mesh(); @@ -313,7 +306,6 @@ void testTwoGlobals(const std::string &outputFileName, const std::vector globalVarValues = {13, 14}; { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.create_input_mesh(); @@ -393,7 +385,6 @@ TEST(GlobalVariablesTest, GlobalDoubleWithFieldMultipleTimeSteps) } { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.create_input_mesh(); @@ -445,7 +436,6 @@ TEST(GlobalVariablesTest, OneGlobalDoubleRestart) MPI_Comm communicator = MPI_COMM_WORLD; { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.create_input_mesh(); @@ -464,7 +454,6 @@ TEST(GlobalVariablesTest, OneGlobalDoubleRestart) { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); stkIo.add_mesh_database(restartFileName, stk::io::READ_RESTART); stkIo.create_input_mesh(); stkIo.populate_bulk_data(); @@ -501,7 +490,6 @@ TEST(GlobalVariablesTest, OneGlobalDoubleWithFieldRestart) { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.create_input_mesh(); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp index 41f8f52630b5..cd873a952e1c 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp @@ -56,7 +56,7 @@ TEST(UnitTestGmeshFixture, testUnit) std::string config_mesh = std::to_string(num_x) + "x" + std::to_string(num_y) + "x" + std::to_string(num_z) + "|sideset:xXyYzZ"; - stk::io::util::simple_fields::Gmesh_STKmesh_Fixture fixture(MPI_COMM_WORLD, config_mesh); + stk::io::util::Gmesh_STKmesh_Fixture fixture(MPI_COMM_WORLD, config_mesh); fixture.commit(); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestInvalidCallOrdering.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestInvalidCallOrdering.cpp index 03d523c77c5e..66b3cd4b40ae 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestInvalidCallOrdering.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestInvalidCallOrdering.cpp @@ -53,7 +53,6 @@ TEST(StkMeshIoBroker, CheckInvalidCallOrdering) MPI_Comm communicator = MPI_COMM_WORLD; stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string exodusFileName = "generated:1x1x8"; size_t input_index = stkIo.add_mesh_database(exodusFileName, stk::io::READ_MESH); stkIo.set_active_mesh(input_index); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestMeshData.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestMeshData.cpp index f2c5f2279198..3c03c1b6ed62 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestMeshData.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestMeshData.cpp @@ -104,7 +104,6 @@ TEST( StkMeshIoBroker, iofixture ) stk::ParallelMachine pm = MPI_COMM_WORLD; stk::io::StkMeshIoBroker fixture(pm); - fixture.use_simple_fields(); std::string input_base_filename = "unit_test.g"; @@ -144,7 +143,6 @@ TEST( StkMeshIoBroker, testModifyTopology ) if (stk::parallel_machine_size(comm) == 1) { stk::io::StkMeshIoBroker fixture(comm); - fixture.use_simple_fields(); std::string generated_mesh_spec = "generated:1x1x2"; fixture.add_mesh_database(generated_mesh_spec, stk::io::READ_MESH); fixture.create_input_mesh(); @@ -194,7 +192,6 @@ TEST( StkMeshIoBroker, active_only ) stk::ParallelMachine pm = MPI_COMM_WORLD; stk::io::StkMeshIoBroker fixture(pm); - fixture.use_simple_fields(); std::string input_base_filename = "unit_test.g"; @@ -248,7 +245,6 @@ TEST( StkMeshIoBroker, active_and_all ) return; } stk::io::StkMeshIoBroker fixture(pm); - fixture.use_simple_fields(); std::string input_base_filename = "unit_test.g"; @@ -313,7 +309,6 @@ TEST( StkMeshIoBroker, large_mesh_test ) return; } stk::io::StkMeshIoBroker fixture(pm); - fixture.use_simple_fields(); std::string input_base_filename = "1mCube_20x20x20.g"; @@ -424,7 +419,6 @@ TEST(DeclareIossField, reRegisterWithDifferentNumCopies) { if(stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { return; } stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::mesh::Part& myPart = declare_elem_part(meta, "myPart"); stk::mesh::Part& myOtherPart = declare_elem_part(meta, "myOtherPart"); @@ -438,7 +432,7 @@ TEST(DeclareIossField, reRegisterWithDifferentNumCopies) Ioss::Field* iossField1copy = create_ioss_field(fieldName, numScalarComponentsPerField, numFieldCopiesPerEntity); const stk::mesh::FieldBase* stkField = stk::io::impl::declare_stk_field_internal(meta, stk::topology::ELEM_RANK, myPart, - *iossField1copy, false); + *iossField1copy); unsigned expectedMaxSize = numFieldCopiesPerEntity*numScalarComponentsPerField; EXPECT_EQ(expectedMaxSize, stkField->max_size()); @@ -447,7 +441,7 @@ TEST(DeclareIossField, reRegisterWithDifferentNumCopies) Ioss::Field* iossField9copies = create_ioss_field(fieldName, numScalarComponentsPerField, numFieldCopiesPerEntity); - stkField = stk::io::impl::declare_stk_field_internal(meta, stk::topology::ELEM_RANK, myOtherPart, *iossField9copies, false); + stkField = stk::io::impl::declare_stk_field_internal(meta, stk::topology::ELEM_RANK, myOtherPart, *iossField9copies); expectedMaxSize = numFieldCopiesPerEntity*numScalarComponentsPerField; EXPECT_EQ(expectedMaxSize, stkField->max_size()); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestMeshGroupingEntity.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestMeshGroupingEntity.cpp index 275c2ba4ab25..5bb9e4613533 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestMeshGroupingEntity.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestMeshGroupingEntity.cpp @@ -79,7 +79,6 @@ TEST(MeshGroupingEntity, universalSideset_get_entity_rank) Ioss::SideBlock* usideblk = new Ioss::SideBlock(db_io, "universal_sideset", "unknown", "unknown", 1); usideset->add(usideblk); stk::mesh::MetaData meta(3); - meta.use_simple_fields(); EXPECT_EQ(stk::topology::FACE_RANK, stk::io::get_entity_rank(usideset, meta)); EXPECT_EQ(stk::topology::FACE_RANK, stk::io::get_entity_rank(usideblk, meta)); delete usideset; @@ -230,7 +229,6 @@ TEST(MeshGroupingEntity, matchhingNameAndType) TEST(MeshGroupingEntity, iossEntityTypesForElementRank) { stk::mesh::MetaData meta(2); - meta.use_simple_fields(); std::vector entityTypeVec = stk::io::get_ioss_entity_types(meta, stk::topology::ELEMENT_RANK); @@ -242,9 +240,7 @@ TEST(MeshGroupingEntity, iossEntityTypesForElementRank) TEST(MeshGroupingEntity, iossEntityTypesForFaceRank) { stk::mesh::MetaData meta2D(2); - meta2D.use_simple_fields(); stk::mesh::MetaData meta3D(3); - meta3D.use_simple_fields(); std::vector entityTypeVec = stk::io::get_ioss_entity_types(meta2D, stk::topology::FACE_RANK); @@ -269,7 +265,6 @@ TEST(MeshGroupingEntity, iossEntityTypesForFaceRank) TEST(MeshGroupingEntity, iossEntityTypesForConstraintRank) { stk::mesh::MetaData meta(2); - meta.use_simple_fields(); std::vector entityTypeVec = stk::io::get_ioss_entity_types(meta, stk::topology::CONSTRAINT_RANK); @@ -279,7 +274,6 @@ TEST(MeshGroupingEntity, iossEntityTypesForConstraintRank) TEST(MeshGroupingEntity, iossEntityTypesForNodeRank) { stk::mesh::MetaData meta; - meta.use_simple_fields(); std::vector entityTypeVec = stk::io::get_ioss_entity_types(meta, stk::topology::NODE_RANK); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestNodeBucketsHaveValidTopology.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestNodeBucketsHaveValidTopology.cpp index 8c543123c505..6125b84cbedf 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestNodeBucketsHaveValidTopology.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestNodeBucketsHaveValidTopology.cpp @@ -43,7 +43,6 @@ TEST(UnitTestNodeBucketsHaveValidTopology, testUnit) stk::ParallelMachine comm = MPI_COMM_WORLD; stk::io::StkMeshIoBroker meshReader(comm); - meshReader.use_simple_fields(); meshReader.add_mesh_database(generated_mesh, stk::io::READ_MESH); meshReader.create_input_mesh(); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp index 5a7dadd7adb6..c2957d78e73e 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp @@ -48,7 +48,7 @@ void write_mesh_with_transient_field_data(const std::string & fileName, const std::string & transientFieldName) { std::string globalVariableName = "global_variable"; - stk::unit_test_util::simple_fields::GeneratedMeshToFileWithTransientFields gMesh(MPI_COMM_WORLD, + stk::unit_test_util::GeneratedMeshToFileWithTransientFields gMesh(MPI_COMM_WORLD, stk::mesh::BulkData::AUTO_AURA, transientFieldName, stk::topology::NODE_RANK); @@ -58,11 +58,11 @@ void write_mesh_with_transient_field_data(const std::string & fileName, globalVariableName); } -void verify_transient_field_data(stk::unit_test_util::simple_fields::MeshFromFile & mesh, +void verify_transient_field_data(stk::unit_test_util::MeshFromFile & mesh, const std::vector & transientTimeSteps, const std::string & transientFieldName) { - stk::unit_test_util::simple_fields::TransientVerifier verifier(MPI_COMM_WORLD); + stk::unit_test_util::TransientVerifier verifier(MPI_COMM_WORLD); verifier.verify_time_steps(mesh, transientTimeSteps); verifier.verify_num_transient_fields(mesh, 2); verifier.verify_transient_field_names(mesh, transientFieldName); @@ -77,7 +77,7 @@ TEST(StkMeshIoBroker, readTransientFieldData) { write_mesh_with_transient_field_data(fieldDataFile, transientTimeSteps, transientFieldName); - stk::unit_test_util::simple_fields::MeshFromFile meshWithFieldData(MPI_COMM_WORLD); + stk::unit_test_util::MeshFromFile meshWithFieldData(MPI_COMM_WORLD); meshWithFieldData.fill_from_parallel(fieldDataFile); verify_transient_field_data(meshWithFieldData, transientTimeSteps, transientFieldName); @@ -90,11 +90,44 @@ TEST(StkMeshIoBroker, readTransientFieldData_withCache) { write_mesh_with_transient_field_data(fieldDataFile, transientTimeSteps, transientFieldName); - stk::unit_test_util::simple_fields::MeshFromFile meshWithFieldData(MPI_COMM_WORLD); + stk::unit_test_util::MeshFromFile meshWithFieldData(MPI_COMM_WORLD); meshWithFieldData.broker.cache_entity_list_for_transient_steps(true); meshWithFieldData.fill_from_parallel(fieldDataFile); verify_transient_field_data(meshWithFieldData, transientTimeSteps, transientFieldName); } +TEST(StkMeshIoBroker, missingInputField) { + const std::string fieldDataFile = "meshWithMissingFieldData.e"; + std::vector transientTimeSteps = {0.0, 1.0, 2.0}; + std::string transientFieldName = "transient_field"; + + write_mesh_with_transient_field_data(fieldDataFile, transientTimeSteps, transientFieldName); + + std::unique_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + stk::mesh::MetaData& meta = bulk->mesh_meta_data(); + + const stk::mesh::EntityRank rank = stk::topology::NODE_RANK; + const std::string fieldName = transientFieldName+"_scalar"; + const std::string dbFieldName = fieldName + "_missingField"; + + stk::mesh::Field &scalarField = meta.declare_field(rank, fieldName, 1); + stk::mesh::put_field_on_mesh(scalarField, meta.universal_part(), nullptr); + + stk::io::MeshField meshField(&scalarField, dbFieldName); + stk::io::StkMeshIoBroker broker(MPI_COMM_WORLD); + + broker.set_throw_on_missing_input_fields(true); + broker.set_bulk_data(*bulk); + broker.add_mesh_database(fieldDataFile, stk::io::READ_MESH); + broker.create_input_mesh(); + broker.add_input_field(meshField); + broker.populate_bulk_data(); + + std::vector missingFields; + EXPECT_THROW(broker.read_defined_input_fields(0.0, &missingFields), std::logic_error); + + unlink(fieldDataFile.c_str()); +} + } diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteDistributionFactors.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteDistributionFactors.cpp index 868c6f510aa8..52801d4a3a92 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteDistributionFactors.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteDistributionFactors.cpp @@ -40,7 +40,7 @@ namespace { -class DistributionFactor : public stk::unit_test_util::simple_fields::MeshFixture +class DistributionFactor : public stk::unit_test_util::MeshFixture { public: DistributionFactor() @@ -76,7 +76,6 @@ class DistributionFactor : public stk::unit_test_util::simple_fields::MeshFixtur builder.set_aura_option(stk::mesh::BulkData::NO_AUTO_AURA); auto bulk = builder.create(); stk::mesh::MetaData & meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); for (const auto & dfFieldInfo : dfFieldMapping) { stk::mesh::FieldBase & dfField = meta.declare_field(stk::topology::FACE_RANK, dfFieldInfo.first); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteEdges.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteEdges.cpp index cfc7c3a05c58..aae7bb0c1003 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteEdges.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteEdges.cpp @@ -146,7 +146,6 @@ void StkEdgeIoTest::test_faces(const stk::mesh::BulkData& bulk) void StkEdgeIoTest::output_mesh() { stk::io::StkMeshIoBroker stkIo; - stkIo.use_simple_fields(); stkIo.set_bulk_data(get_bulk()); size_t outputFileIndex = stkIo.create_output_mesh(fileName, stk::io::WRITE_RESULTS); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteEdges.hpp b/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteEdges.hpp index 6f7e7fe7dfea..2cad9ecc7e81 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteEdges.hpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteEdges.hpp @@ -54,11 +54,11 @@ #include #include "UnitTestReadWriteUtils.hpp" -class StkEdgeIoTest : public stk::unit_test_util::simple_fields::MeshFixture +class StkEdgeIoTest : public stk::unit_test_util::MeshFixture { public: StkEdgeIoTest(unsigned spatialDim=3) - : stk::unit_test_util::simple_fields::MeshFixture(spatialDim) + : stk::unit_test_util::MeshFixture(spatialDim) { } diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteEdgesForFieldIO.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteEdgesForFieldIO.cpp index ddf2388fd0f3..9b3b4a8e5516 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteEdgesForFieldIO.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteEdgesForFieldIO.cpp @@ -142,7 +142,6 @@ class StkEdgeIoTestForRestart : public StkEdgeIoTestForResultOutput void load_output_mesh(stk::mesh::BulkData& bulk) override { stk::io::StkMeshIoBroker stkIo; - stkIo.use_simple_fields(); stk::io::fill_mesh_preexisting(stkIo, fileName, bulk, stk::io::READ_RESTART); int numSteps = stkIo.get_num_time_steps(); EXPECT_EQ(1, numSteps); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteFaces.hpp b/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteFaces.hpp index 403b993f30f0..7af7c9d2fd94 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteFaces.hpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteFaces.hpp @@ -53,11 +53,11 @@ #include #include "UnitTestReadWriteUtils.hpp" -class StkFaceIoTest : public stk::unit_test_util::simple_fields::MeshFixture +class StkFaceIoTest : public stk::unit_test_util::MeshFixture { public: StkFaceIoTest() - : stk::unit_test_util::simple_fields::MeshFixture(), + : stk::unit_test_util::MeshFixture(), stkIoInput(), stkIoOutput() { diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteSideSets.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteSideSets.cpp index c97bec045e33..21cf866f92ff 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteSideSets.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestReadWriteSideSets.cpp @@ -33,7 +33,6 @@ class StkIoSubset : public stk::io::unit_test::IOMeshFixture const std::string fileName("meshSubset.g"); stk::mesh::Selector meshSubsetSelector = create_block_subset_selector({blockToExclude}); stk::io::StkMeshIoBroker stkIo; - stkIo.use_simple_fields(); stkIo.set_bulk_data(get_bulk()); size_t outputFileIndex = stkIo.create_output_mesh(fileName, stk::io::WRITE_RESULTS); stkIo.set_output_selector(outputFileIndex, stk::topology::ELEM_RANK, meshSubsetSelector); @@ -429,8 +428,7 @@ void test_output_sideset(stk::unit_test_util::sideset::BulkDataTester &bulk, stk::unit_test_util::sideset::write_exo_file(bulk, outputFileName); auto meta2 = std::make_shared(); - meta2->use_simple_fields(); - stk::unit_test_util::sideset::BulkDataTester bulk2(meta2, bulk.parallel()); + stk::unit_test_util::sideset::BulkDataTester bulk2(*meta2, bulk.parallel()); stk::unit_test_util::sideset::read_exo_file(bulk2, outputFileName, readMode); EXPECT_NO_THROW(stk::unit_test_util::sideset::compare_sidesets(outputFileName, bulk, bulk2)); } @@ -491,8 +489,7 @@ void test_create_and_write_new_sideset(stk::ParallelMachine pm, const stk::unit_test_util::sideset::ElemIdSideVector &newSideSet) { auto meta = std::make_shared(3); - meta->use_simple_fields(); - stk::unit_test_util::sideset::BulkDataTester bulk(meta, pm); + stk::unit_test_util::sideset::BulkDataTester bulk(*meta, pm); stk::mesh::PartVector parts = create_parts(*meta); load_mesh_with_no_sidesets(bulk, inputFileName); @@ -552,8 +549,7 @@ void test_read_and_modify_sideset(stk::ParallelMachine pm, const stk::unit_test_util::sideset::ElemIdSideVector &expectedOutputElemIdSides) { auto meta = std::make_shared(); - meta->use_simple_fields(); - stk::unit_test_util::sideset::BulkDataTester bulk(meta, pm); + stk::unit_test_util::sideset::BulkDataTester bulk(*meta, pm); int inputId = 1; read_and_test_preexisting_sidesets(bulk, inputFileName, inputId, expectedInputElemIdSides); @@ -586,8 +582,7 @@ TEST(StkIo, parallel_transform_AA_to_ADA_to_ARA) if(p_size == 2) { auto meta = std::make_shared(3); - meta->use_simple_fields(); - stk::unit_test_util::sideset::BulkDataTester bulk(meta, pm); + stk::unit_test_util::sideset::BulkDataTester bulk(*meta, pm); stk::mesh::PartVector parts = create_parts(*meta); load_mesh_with_no_sidesets(bulk, "AA.e"); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestRestart.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestRestart.cpp index 9f32e7c006fb..5511c19f94f5 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestRestart.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestRestart.cpp @@ -194,13 +194,12 @@ TEST(StkIo, EmptyLocalBlock_beam2) 1,0,0, 1,1,0, 1,2,0, 0,3,0}; - stk::unit_test_util::simple_fields::setup_text_mesh(*bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coords)); + stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); stk::io::write_mesh("shellq4_beam.g", *bulk, stk::io::WRITE_RESTART); stk::parallel_machine_barrier(MPI_COMM_WORLD); stk::io::StkMeshIoBroker ioBroker(MPI_COMM_SELF); - ioBroker.use_simple_fields(); ioBroker.set_mesh_builder(std::make_shared()); std::string pllFileName = "shellq4_beam.g.2." + std::to_string(stk::parallel_machine_rank(MPI_COMM_WORLD)); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestResultsOutputMeshMod.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestResultsOutputMeshMod.cpp new file mode 100644 index 000000000000..d41903686812 --- /dev/null +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestResultsOutputMeshMod.cpp @@ -0,0 +1,128 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include +#include +#include +#include +#include "stk_topology/topology.hpp" // for topology, etc +#include // for Field +#include // for MetaData, put_field +#include // for BulkData +#include +#include +#include // for StkMeshIoBroker + + +namespace { + +size_t open_results_file(const std::string& resultsFileName, + stk::io::StkMeshIoBroker& stkIo, + std::shared_ptr bulkPtr, + stk::mesh::Field& elemField) +{ + size_t resultsFileIndex = stkIo.create_output_mesh(resultsFileName, stk::io::WRITE_RESULTS); + stkIo.set_bulk_data(bulkPtr); + stkIo.add_field(resultsFileIndex, elemField); + stkIo.write_output_mesh(resultsFileIndex); + return resultsFileIndex; +} + +void remove_entity_from_mesh(stk::mesh::BulkData& bulk, + stk::mesh::EntityRank rank, + stk::mesh::EntityId entityId) +{ + bulk.modification_begin(); + stk::mesh::Entity entity = bulk.get_entity(rank, entityId); + EXPECT_TRUE(bulk.is_valid(entity)); + bulk.destroy_entity(entity); + bulk.modification_end(); +} + +void test_results_output(MPI_Comm comm, + const std::string& fileName, + int goldNumSteps) +{ + std::shared_ptr bulkPtr = stk::mesh::MeshBuilder(comm) + .set_spatial_dimension(3).create(); + stk::io::StkMeshIoBroker stkIo; + stk::io::fill_mesh_preexisting(stkIo, fileName, *bulkPtr); + EXPECT_EQ(goldNumSteps, stkIo.get_num_time_steps()); + unlink(fileName.c_str()); +} + +TEST(StkIoResultsOutputMeshMod, writeResultsElemDelete) +{ + MPI_Comm comm = MPI_COMM_WORLD; + if (stk::parallel_machine_size(comm) > 1) { GTEST_SKIP(); } + + std::shared_ptr bulkPtr = stk::mesh::MeshBuilder(comm) + .set_spatial_dimension(3).create(); + stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); + + stk::mesh::Field& elemField = meta.declare_field(stk::topology::ELEM_RANK, "myElementField"); + stk::mesh::put_field_on_mesh(elemField, meta.universal_part(), 1, nullptr); + + const std::string meshDesc("generated:1x1x2|sideset:x"); + stk::io::fill_mesh(meshDesc, *bulkPtr); + + const std::string resultsFileName("results.e"); + const std::string resultsFile2Name("results.e-s0002"); + + stk::io::StkMeshIoBroker stkIo(comm); + size_t resultsFileIndex = open_results_file(resultsFileName, stkIo, bulkPtr, elemField); + + constexpr int nSteps = 5; + for(int i = 1; i <= nSteps; ++i) { + if (i == 3) { + stk::mesh::EntityId elemId = 2; + remove_entity_from_mesh(*bulkPtr, stk::topology::ELEM_RANK, elemId); + + stkIo.close_output_mesh(resultsFileIndex); + resultsFileIndex = open_results_file(resultsFile2Name, stkIo, bulkPtr, elemField); + } + + stkIo.process_output_request(resultsFileIndex, (double)(i-1)); + } + + stkIo.close_output_mesh(resultsFileIndex); + + constexpr int nSteps_before_meshMod = 2; + constexpr int nSteps_after_meshMod = 3; + test_results_output(comm, resultsFileName, nSteps_before_meshMod); + test_results_output(comm, resultsFile2Name, nSteps_after_meshMod); +} + +} + diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestUtils.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestUtils.cpp index 42e1f66f5508..d31a91f0cf71 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestUtils.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestUtils.cpp @@ -94,7 +94,6 @@ TEST(CheckElemBlockTopology, invalidTopology) if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) return; stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::mesh::Part& block2 = meta.declare_part("block_2", stk::topology::ELEM_RANK); stk::io::put_io_part_attribute(block2); EXPECT_THROW(stk::io::throw_if_any_elem_block_has_invalid_topology(meta, "test"), std::runtime_error); diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestWriteSTKMesh.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestWriteSTKMesh.cpp index f162a5bf4f3f..75e180bd1c8b 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestWriteSTKMesh.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestWriteSTKMesh.cpp @@ -215,7 +215,7 @@ TEST(StkIo, write_stk_mesh_to_file) } //EndDocTest1 -class StkIoResultsOutput : public stk::unit_test_util::simple_fields::MeshFixture +class StkIoResultsOutput : public stk::unit_test_util::MeshFixture { protected: void setup_mesh(const std::string & meshSpec, @@ -256,7 +256,6 @@ TEST_F(StkIoResultsOutput, close_output_mesh_makes_it_invalid) { setup_mesh(meshSpec, stk::mesh::BulkData::NO_AUTO_AURA); stk::io::StkMeshIoBroker stkIo; - stkIo.use_simple_fields(); stkIo.set_bulk_data(get_bulk()); std::string fileName1 = "output1.e"; @@ -284,7 +283,6 @@ TEST_F(StkIoResultsOutput, write_nodal_face_variable_multiple_procs) const std::string fileName = "nodal_field_as_face_variable.e"; stk::io::StkMeshIoBroker stkIo; - stkIo.use_simple_fields(); stkIo.set_bulk_data(get_bulk()); size_t outputFileIndex = stkIo.create_output_mesh(fileName, stk::io::WRITE_RESULTS); stkIo.use_nodeset_for_sideset_nodes_fields(outputFileIndex, true); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTest3Tets3Procs.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTest3Tets3Procs.cpp index 912198132fc7..3a5f68ac2571 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTest3Tets3Procs.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTest3Tets3Procs.cpp @@ -153,9 +153,8 @@ TEST(ThreeTet4sOn3Procs, deleteMiddleTetOnP1AndRecreateOnP2_works) builder.set_spatial_dimension(3); std::unique_ptr bulkPtr = builder.create(); stk::mesh::BulkData &bulk = *bulkPtr; - bulk.mesh_meta_data().use_simple_fields(); - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); const int proc = bulk.parallel_rank(); stk::mesh::Part *partWithTopology = bulk.mesh_meta_data().get_part("block_TETRAHEDRON_4"); @@ -225,9 +224,8 @@ TEST(ThreeTet10sOn3Procs, deleteMiddleTetOnP1AndRecreateOnP2_works) builder.set_spatial_dimension(3); std::unique_ptr bulkPtr = builder.create(); stk::mesh::BulkData &bulk = *bulkPtr; - bulk.mesh_meta_data().use_simple_fields(); - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); const int proc = bulk.parallel_rank(); stk::mesh::Part *partWithTopology = bulk.mesh_meta_data().get_part("block_TETRAHEDRON_10"); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestAddNodeSharingWithInternalSideset.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestAddNodeSharingWithInternalSideset.cpp index 58e057cf2dbf..6549a0e75d0a 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestAddNodeSharingWithInternalSideset.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestAddNodeSharingWithInternalSideset.cpp @@ -14,7 +14,7 @@ namespace { -class TwoHexWithInternalSideset : public stk::unit_test_util::simple_fields::MeshFixture +class TwoHexWithInternalSideset : public stk::unit_test_util::MeshFixture { protected: void create_mesh_with_internal_side() diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestAura.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestAura.cpp index add711a9bba4..2300b69b1895 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestAura.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestAura.cpp @@ -731,7 +731,7 @@ void test_aura_partially_disconnect_elem_from_shared_not_owned_nodes(stk::mesh:: } } -class Aura2Hex2Proc : public stk::unit_test_util::simple_fields::MeshFixture +class Aura2Hex2Proc : public stk::unit_test_util::MeshFixture { public: Aura2Hex2Proc() @@ -885,7 +885,7 @@ TEST(BulkData, aura_moveElem1FromProc0ToProc1_NoUpwardConnectivity) } } -class BulkDataAura : public stk::unit_test_util::simple_fields::MeshFixture +class BulkDataAura : public stk::unit_test_util::MeshFixture { public: void verify_no_aura() diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBoundaryAnalysis.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBoundaryAnalysis.cpp index 66019b7f316c..a28af8121c21 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBoundaryAnalysis.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBoundaryAnalysis.cpp @@ -115,7 +115,7 @@ void UnitTestStkMeshBoundaryAnalysis::test_boundary_analysis() } // set up grid_mesh - stk::mesh::fixtures::simple_fields::GridFixture grid_mesh(MPI_COMM_WORLD); + stk::mesh::fixtures::GridFixture grid_mesh(MPI_COMM_WORLD); stk::mesh::MetaData& fem_meta = grid_mesh.fem_meta(); stk::mesh::BulkData& bulk_data = grid_mesh.bulk_data(); @@ -269,7 +269,6 @@ TEST(BoundaryAnalysis, get_adjacent_entities) } stk::io::StkMeshIoBroker meshReader(comm); - meshReader.use_simple_fields(); std::string mesh_spec("generated:3x3x3"); meshReader.add_mesh_database(mesh_spec, stk::io::READ_MESH); meshReader.create_input_mesh(); @@ -277,9 +276,6 @@ TEST(BoundaryAnalysis, get_adjacent_entities) stk::mesh::BulkData& stkMeshBulkData = meshReader.bulk_data(); - // unsigned file_index = meshReader.create_output_mesh("alan.exo", stk::io::WRITE_RESULTS); - // meshReader.write_output_mesh(file_index); - unsigned numEntitiesToTest = 4; stk::mesh::EntityId ids[] = { 14, 1, 2, 5 }; unsigned goldNumConnectedEntities[] = { 6, 3, 4, 5 }; diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBoxFixture.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBoxFixture.cpp index 90e01cdd1a43..eca26cc41393 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBoxFixture.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBoxFixture.cpp @@ -54,7 +54,7 @@ using stk::mesh::Selector; using stk::mesh::Entity; using stk::mesh::EntityId; using stk::mesh::EntityRank; -using stk::mesh::fixtures::simple_fields::BoxFixture; +using stk::mesh::fixtures::BoxFixture; namespace { diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp index 38af622e3595..dd064bd2b7a7 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp @@ -96,7 +96,7 @@ TEST(UnitTestingOfBucket, testBucket) // Create MetaData, BulkData unsigned max_bucket_size = 4; - stk::mesh::fixtures::simple_fields::BoxFixture fixture(pm, stk::mesh::BulkData::AUTO_AURA, max_bucket_size, entity_names); + stk::mesh::fixtures::BoxFixture fixture(pm, stk::mesh::BulkData::AUTO_AURA, max_bucket_size, entity_names); MetaData& meta = fixture.fem_meta(); BulkData& bulk = fixture.bulk_data(); // Create two scalar fields, temperature and volume. Put temperature @@ -155,7 +155,6 @@ TEST(UnitTestingOfBucket, bucketSortChangeEntityId) builder.set_spatial_dimension(spatialDim); std::shared_ptr bulkPtr = builder.create(); stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Part& part = meta.declare_part_with_topology("node_part", stk::topology::NODE); meta.commit(); stk::mesh::BulkData& bulk = *bulkPtr; @@ -248,7 +247,7 @@ bool does_rank_have_permutation(stk::mesh::EntityRank rank) return rank > stk::topology::NODE_RANK && rank < stk::topology::CONSTRAINT_RANK; } -class BucketHex : public stk::mesh::fixtures::simple_fields::TestHexFixture {}; +class BucketHex : public stk::mesh::fixtures::TestHexFixture {}; TEST_F(BucketHex, testing_valid_permutation_on_various_ranks) { @@ -291,7 +290,7 @@ TEST_F(BucketHex, testing_valid_permutation_on_various_ranks) edge_nodes[FIRST_NODE] = nodes[FIRST_NODE]; edge_nodes[SECOND_NODE] = nodes[SECOND_NODE]; - entities[stk::topology::EDGE_RANK] = stk::unit_test_util::simple_fields::declare_element_to_edge_with_nodes(bulk, entities[stk::topology::ELEM_RANK], + entities[stk::topology::EDGE_RANK] = stk::unit_test_util::declare_element_to_edge_with_nodes(bulk, entities[stk::topology::ELEM_RANK], edge_nodes, id, meta.get_topology_root_part(stk::topology::LINE_2)); const unsigned num_nodes_on_face = 4; @@ -301,7 +300,7 @@ TEST_F(BucketHex, testing_valid_permutation_on_various_ranks) face_nodes[THIRD_NODE] = nodes[FOURTH_NODE]; face_nodes[FOURTH_NODE] = nodes[THIRD_NODE]; - entities[stk::topology::FACE_RANK] = stk::unit_test_util::simple_fields::declare_element_side_with_nodes(bulk, entities[stk::topology::ELEM_RANK], + entities[stk::topology::FACE_RANK] = stk::unit_test_util::declare_element_side_with_nodes(bulk, entities[stk::topology::ELEM_RANK], face_nodes, id, meta.get_topology_root_part(stk::topology::QUAD_4)); bulk.modification_end(); @@ -332,7 +331,6 @@ TEST_F(BucketHex, changing_conn_on_bucket_for_face_to_element) { setup_mesh(1, 1, 1); stk::mesh::MetaData& meta = get_meta(); - meta.use_simple_fields(); stk::mesh::BulkData& bulk = get_bulk(); unsigned face_node_ids[] = { 5, 6, 8, 7 }; @@ -344,7 +342,7 @@ TEST_F(BucketHex, changing_conn_on_bucket_for_face_to_element) stk::mesh::Entity elem = bulk.get_entity(stk::topology::ELEM_RANK, 1); bulk.modification_begin(); - stk::mesh::Entity side = stk::unit_test_util::simple_fields::declare_element_side_with_nodes(bulk, elem, nodes, 1, meta.get_topology_root_part(stk::topology::QUAD_4)); + stk::mesh::Entity side = stk::unit_test_util::declare_element_side_with_nodes(bulk, elem, nodes, 1, meta.get_topology_root_part(stk::topology::QUAD_4)); bulk.modification_end(); test_nodes_and_permutation(bulk, elem, side, nodes); @@ -403,7 +401,6 @@ TEST_F(BucketHex, changing_conn_on_bucket_for_edge_to_element) { setup_mesh(1, 1, 1); stk::mesh::MetaData& meta = get_meta(); - meta.use_simple_fields(); stk::mesh::BulkData& bulk = get_bulk(); unsigned edge_node_ids[] = { 5, 6 }; @@ -413,7 +410,7 @@ TEST_F(BucketHex, changing_conn_on_bucket_for_edge_to_element) stk::mesh::Entity elem = bulk.get_entity(stk::topology::ELEM_RANK, 1); bulk.modification_begin(); - stk::mesh::Entity edge = stk::unit_test_util::simple_fields::declare_element_to_edge_with_nodes(bulk, elem, nodes, 1, meta.get_topology_root_part(stk::topology::LINE_2)); + stk::mesh::Entity edge = stk::unit_test_util::declare_element_to_edge_with_nodes(bulk, elem, nodes, 1, meta.get_topology_root_part(stk::topology::LINE_2)); bulk.modification_end(); test_nodes_and_permutation(bulk, elem, edge, nodes); @@ -489,7 +486,7 @@ void do_modifying_entity_creation(stk::mesh::BulkData & bulk, const stk::mesh::F const stk::mesh::MetaData & meta = bulk.mesh_meta_data(); stk::mesh::Entity elem = bulk.get_entity(stk::topology::ELEM_RANK, 1); bulk.modification_begin(); - stk::unit_test_util::simple_fields::declare_element_side_with_nodes(bulk, elem, nodes, 1, meta.get_topology_root_part(stk::topology::QUAD_4)); + stk::unit_test_util::declare_element_side_with_nodes(bulk, elem, nodes, 1, meta.get_topology_root_part(stk::topology::QUAD_4)); bulk.modification_end(); const stk::mesh::BucketVector & buckets = bulk.buckets(stk::topology::NODE_RANK); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucketConnectivity.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucketConnectivity.cpp index 81e4d791a248..bfa84a7e41da 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucketConnectivity.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucketConnectivity.cpp @@ -45,6 +45,7 @@ #include "stk_mesh/base/BucketConnectivity.hpp" // for BucketConnectivity #include "stk_mesh/base/Entity.hpp" // for Entity, operator<< #include "stk_mesh/base/Types.hpp" // for ConnectivityOrdinal, etc +#include "stk_mesh/baseImpl/BucketConnDynamic.hpp" #include "stk_topology/topology.hpp" // for topology, etc namespace stk { namespace mesh { class BulkData; } } @@ -53,18 +54,12 @@ using namespace stk::mesh; namespace { typedef impl::BucketConnectivity fixed_conn; -typedef impl::BucketConnectivity dynamic_conn; void check_uninit_conn_size(fixed_conn& conn, unsigned num_conn, unsigned ordinal) { EXPECT_EQ(conn.num_connectivity(ordinal), num_conn); } -void check_uninit_conn_size(dynamic_conn& conn, unsigned num_conn, unsigned ordinal) -{ - EXPECT_EQ(conn.num_connectivity(ordinal), 0u); -} - void check_even_conn_removed(fixed_conn& conn, unsigned num_conn, unsigned ordinal) { EXPECT_EQ(conn.num_connectivity(ordinal), num_conn); @@ -81,19 +76,6 @@ void check_even_conn_removed(fixed_conn& conn, unsigned num_conn, unsigned ordin } } -void check_even_conn_removed(dynamic_conn& conn, unsigned num_conn, unsigned ordinal) -{ - EXPECT_EQ(conn.num_connectivity(ordinal), num_conn / 2); - - Entity const* targets = conn.begin(ordinal); - ConnectivityOrdinal const* ordinals = conn.begin_ordinals(ordinal); - for (unsigned i = 0; i < num_conn / 2; ++i) { - Entity e_to(ordinal * num_conn + ((2*i) + 1) + 1); - EXPECT_EQ(targets[i], e_to); - EXPECT_EQ(ordinals[i], static_cast((2*i) + 1)); - } -} - template void test_simple_add(Connectivity& connectivity, unsigned num_entities_to_add, unsigned num_to_add) { @@ -223,17 +205,6 @@ TEST(BucketConnectivity, fixed_simple_add) conn.end_modification(bulk); } -TEST(BucketConnectivity, dynamic_simple_add) -{ - const unsigned num_to_add = 8; - const unsigned num_entities = 100; - BulkData * bulk = NULL; - dynamic_conn conn(stk::topology::ELEMENT_RANK, bulk); - - test_simple_add(conn, num_entities, num_to_add); - conn.end_modification(bulk); -} - TEST(BucketConnectivity, fixed_complex_add) { const unsigned num_to_add = 8; @@ -245,101 +216,678 @@ TEST(BucketConnectivity, fixed_complex_add) conn.end_modification(bulk); } -TEST(BucketConnectivity, dynamic_complex_add) +TEST(BucketConnectivity, fixed_remove) { - const unsigned num_to_add = 8; + const unsigned num_to_add = 8; const unsigned num_entities = 100; BulkData * bulk = NULL; - dynamic_conn conn(stk::topology::ELEMENT_RANK, bulk); + fixed_conn conn(num_to_add); - test_complex_add(conn, num_entities, num_to_add); + test_remove(conn, num_entities, num_to_add); conn.end_modification(bulk); } -TEST(BucketConnectivity, fixed_remove) +TEST(BucketConnectivity, fixed_intra_conn_copy) { const unsigned num_to_add = 8; const unsigned num_entities = 100; BulkData * bulk = NULL; fixed_conn conn(num_to_add); - test_remove(conn, num_entities, num_to_add); + test_intra_conn_copy(conn, num_entities, num_to_add); conn.end_modification(bulk); } -TEST(BucketConnectivity, dynamic_remove) +TEST(BucketConnectivity, fixed_inter_conn_copy) { - const unsigned num_to_add = 8; + const unsigned num_to_add = 8; const unsigned num_entities = 100; BulkData * bulk = NULL; - dynamic_conn conn(stk::topology::ELEMENT_RANK, bulk); + fixed_conn conn(num_to_add); - test_remove(conn, num_entities, num_to_add); + test_inter_conn_copy(conn, num_entities, num_to_add); conn.end_modification(bulk); } -TEST(BucketConnectivity, fixed_intra_conn_copy) +TEST(BucketConnectivity, fixed_mod_end) { const unsigned num_to_add = 8; const unsigned num_entities = 100; BulkData * bulk = NULL; fixed_conn conn(num_to_add); - test_intra_conn_copy(conn, num_entities, num_to_add); + test_mod_end(conn, num_entities, num_to_add); conn.end_modification(bulk); } -TEST(BucketConnectivity, dynamic_intra_conn_copy) +TEST(BucketConnDynamic, basic) { - const unsigned num_to_add = 8; - const unsigned num_entities = 100; - BulkData * bulk = NULL; - dynamic_conn conn(stk::topology::ELEMENT_RANK, bulk); + constexpr unsigned bucketCapacity = 10; + stk::mesh::impl::BucketConnDynamic conn(bucketCapacity); + unsigned maxOrdinal = bucketCapacity-1; + conn.grow_if_necessary(maxOrdinal); + for(unsigned i=0; i ordinals = {2, 7, 5}; + + for(unsigned i=0; i ordinals = { + 2, 1, 2, 2, + 1, 2, 1, 1 + }; + + for(unsigned i=0; i ordinals = {2, 7, 5}; + + for(unsigned i=0; i(1); + stk::mesh::Permutation perm2 = static_cast(2); + stk::mesh::Permutation perm7 = static_cast(7); + stk::mesh::Permutation perm8 = static_cast(8); + + EXPECT_TRUE(conn.add_connectivity(0, entity1, ordinal1, perm1)); + EXPECT_TRUE(conn.add_connectivity(1, entity11, ordinal7, perm7)); + EXPECT_TRUE(conn.add_connectivity(0, entity2, ordinal2, perm2)); + EXPECT_TRUE(conn.add_connectivity(2, entity12, ordinal8, perm8)); + + EXPECT_EQ(2u, conn.num_connectivity(0)); + EXPECT_EQ(entity1, conn.begin(0)[0]); + EXPECT_EQ(ordinal1, conn.begin_ordinals(0)[0]); + EXPECT_EQ(perm1, conn.begin_permutations(0)[0]); + EXPECT_EQ(entity2, conn.begin(0)[1]); + EXPECT_EQ(ordinal2, conn.begin_ordinals(0)[1]); + EXPECT_EQ(perm2, conn.begin_permutations(0)[1]); + + EXPECT_EQ(1u, conn.num_connectivity(1)); + EXPECT_EQ(entity11, conn.begin(1)[0]); + EXPECT_EQ(ordinal7, conn.begin_ordinals(1)[0]); + EXPECT_EQ(perm7, conn.begin_permutations(1)[0]); + + EXPECT_EQ(1u, conn.num_connectivity(2)); + EXPECT_EQ(entity12, conn.begin(2)[0]); + EXPECT_EQ(ordinal8, conn.begin_ordinals(2)[0]); + EXPECT_EQ(perm8, conn.begin_permutations(2)[0]); +} + +TEST(BucketConnDynamic, addAndRemoveMultipleConnectivity) +{ + constexpr unsigned bucketCapacity = 10; + stk::mesh::impl::BucketConnDynamic conn(bucketCapacity); + + stk::mesh::Entity entity1(1), entity2(2), entity11(11), entity12(12); + stk::mesh::ConnectivityOrdinal ordinal1 = 1, ordinal2 = 2, ordinal7 = 7, ordinal8 = 8; + + EXPECT_TRUE(conn.add_connectivity(0, entity1, ordinal1)); + EXPECT_TRUE(conn.add_connectivity(1, entity11, ordinal7)); + EXPECT_TRUE(conn.add_connectivity(0, entity2, ordinal2)); + EXPECT_TRUE(conn.add_connectivity(1, entity12, ordinal8)); + + EXPECT_TRUE(conn.remove_connectivity(0, entity2, ordinal2)); + EXPECT_EQ(1u, conn.num_connectivity(0)); + EXPECT_EQ(entity1, conn.begin(0)[0]); + EXPECT_EQ(ordinal1, conn.begin_ordinals(0)[0]); + + EXPECT_TRUE(conn.remove_connectivity(1, entity11, ordinal7)); + EXPECT_EQ(1u, conn.num_connectivity(1)); + EXPECT_EQ(entity12, conn.begin(1)[0]); + EXPECT_EQ(ordinal8, conn.begin_ordinals(1)[0]); + + EXPECT_TRUE(conn.remove_connectivity(1, entity12, ordinal8)); + EXPECT_EQ(0u, conn.num_connectivity(1)); + EXPECT_EQ(conn.begin(1), conn.end(1)); + EXPECT_EQ(conn.begin_ordinals(1), conn.begin_ordinals(1)); + + EXPECT_FALSE(conn.remove_connectivity(0, entity2, ordinal2)); + EXPECT_EQ(1u, conn.num_connectivity(0)); + EXPECT_EQ(entity1, conn.begin(0)[0]); + EXPECT_EQ(ordinal1, conn.begin_ordinals(0)[0]); +} + +TEST(BucketConnDynamic, removeAllConnectivityForBktOrdinal) +{ + constexpr unsigned bucketCapacity = 10; + stk::mesh::impl::BucketConnDynamic conn(bucketCapacity); + + stk::mesh::Entity entity1(1), entity2(2), entity11(11), entity12(12); + stk::mesh::ConnectivityOrdinal ordinal1 = 1, ordinal2 = 2, ordinal7 = 7, ordinal8 = 8; + + EXPECT_TRUE(conn.add_connectivity(0, entity1, ordinal1)); + EXPECT_TRUE(conn.add_connectivity(1, entity11, ordinal7)); + EXPECT_TRUE(conn.add_connectivity(0, entity2, ordinal2)); + EXPECT_TRUE(conn.add_connectivity(1, entity12, ordinal8)); + + EXPECT_TRUE(conn.remove_connectivity(0)); + EXPECT_EQ(0u, conn.num_connectivity(0)); + EXPECT_EQ(conn.begin(0), conn.end(0)); + EXPECT_EQ(conn.begin_ordinals(0), conn.begin_ordinals(0)); + + EXPECT_EQ(2u, conn.num_connectivity(1)); + EXPECT_EQ(entity11, conn.begin(1)[0]); + EXPECT_EQ(ordinal7, conn.begin_ordinals(1)[0]); + EXPECT_EQ(entity12, conn.begin(1)[1]); + EXPECT_EQ(ordinal8, conn.begin_ordinals(1)[1]); +} + +TEST(BucketConnDynamic, makeHoleThenFillHoleWithoutRaisingCapacity) +{ + constexpr unsigned bucketCapacity = 10; + stk::mesh::impl::BucketConnDynamic conn(bucketCapacity); + + stk::mesh::Entity entity1(1), entity2(2), entity11(11), entity12(12); + stk::mesh::Entity entity21(21), entity22(22); + stk::mesh::ConnectivityOrdinal ordinal1 = 1, ordinal2 = 2, ordinal7 = 7, ordinal8 = 8; + stk::mesh::ConnectivityOrdinal ordinal14 = 14, ordinal15 = 15; + + EXPECT_TRUE(conn.add_connectivity(0, entity1, ordinal1)); + EXPECT_TRUE(conn.add_connectivity(0, entity2, ordinal2)); + EXPECT_TRUE(conn.add_connectivity(1, entity11, ordinal7)); + EXPECT_TRUE(conn.add_connectivity(1, entity12, ordinal8)); + EXPECT_TRUE(conn.add_connectivity(2, entity21, ordinal14)); + EXPECT_TRUE(conn.add_connectivity(2, entity22, ordinal15)); + + size_t totalCapacity = conn.total_capacity(); + size_t totalNumConnectivity = conn.total_num_connectivity(); + size_t numUnused = conn.num_unused_entries(); + + EXPECT_TRUE(conn.remove_connectivity(1)); + EXPECT_EQ(0u, conn.num_connectivity(1)); + EXPECT_EQ(conn.begin(1), conn.end(1)); + EXPECT_EQ(conn.begin_ordinals(1), conn.begin_ordinals(1)); + + EXPECT_TRUE(conn.add_connectivity(1, entity11, ordinal7)); + EXPECT_TRUE(conn.add_connectivity(1, entity12, ordinal8)); + + EXPECT_TRUE(totalCapacity == conn.total_capacity()); + EXPECT_TRUE(totalNumConnectivity == conn.total_num_connectivity()); + EXPECT_TRUE(numUnused == conn.num_unused_entries()); } + +TEST(BucketConnDynamic, replaceConnectivity_sameLength) +{ + constexpr unsigned bucketCapacity = 10; + stk::mesh::impl::BucketConnDynamic conn(bucketCapacity); + + std::vector> entities = { + {stk::mesh::Entity(1), stk::mesh::Entity(2)}, + {stk::mesh::Entity(11), stk::mesh::Entity(12)}, + {stk::mesh::Entity(21), stk::mesh::Entity(22)} + }; + std::vector> ordinals = { + {1, 2}, + {7, 8}, + {14, 15} + }; + + for(unsigned bktOrdinal=0; bktOrdinal newEntities = {Entity(31), Entity(32)}; + std::vector newOrdinals = {24, 25}; + const stk::mesh::Permutation* perms = nullptr; + + EXPECT_TRUE(conn.replace_connectivity(1, newEntities.size(), newEntities.data(), newOrdinals.data(), perms)); + EXPECT_EQ(2u, conn.num_connectivity(1)); + EXPECT_EQ(newEntities[0], conn.begin(1)[0]); + EXPECT_EQ(newEntities[1], conn.begin(1)[1]); + EXPECT_EQ(newOrdinals[0], conn.begin_ordinals(1)[0]); + EXPECT_EQ(newOrdinals[1], conn.begin_ordinals(1)[1]); +} + +TEST(BucketConnDynamic, replaceConnectivity_longer) +{ + constexpr unsigned bucketCapacity = 10; + stk::mesh::impl::BucketConnDynamic conn(bucketCapacity); + + std::vector> entities = { + {stk::mesh::Entity(1), stk::mesh::Entity(2)}, + {stk::mesh::Entity(11), stk::mesh::Entity(12)}, + {stk::mesh::Entity(21), stk::mesh::Entity(22)} + }; + std::vector> ordinals = { + {1, 2}, + {7, 8}, + {14, 15} + }; + + for(unsigned bktOrdinal=0; bktOrdinal newEntities = {Entity(31), Entity(32), Entity(33)}; + std::vector newOrdinals = {24, 25, 26}; + const stk::mesh::Permutation* perms = nullptr; + + EXPECT_TRUE(conn.replace_connectivity(1, newEntities.size(), newEntities.data(), newOrdinals.data(), perms)); + EXPECT_EQ(3u, conn.num_connectivity(1)); + EXPECT_EQ(newEntities[0], conn.begin(1)[0]); + EXPECT_EQ(newEntities[1], conn.begin(1)[1]); + EXPECT_EQ(newEntities[2], conn.begin(1)[2]); + EXPECT_EQ(newOrdinals[0], conn.begin_ordinals(1)[0]); + EXPECT_EQ(newOrdinals[1], conn.begin_ordinals(1)[1]); + EXPECT_EQ(newOrdinals[2], conn.begin_ordinals(1)[2]); +} + +TEST(BucketConnDynamic, replaceConnectivity_shorter) +{ + constexpr unsigned bucketCapacity = 10; + stk::mesh::impl::BucketConnDynamic conn(bucketCapacity); + + std::vector> entities = { + {stk::mesh::Entity(1), stk::mesh::Entity(2)}, + {stk::mesh::Entity(11), stk::mesh::Entity(12)}, + {stk::mesh::Entity(21), stk::mesh::Entity(22)} + }; + std::vector> ordinals = { + {1, 2}, + {7, 8}, + {14, 15} + }; + + for(unsigned bktOrdinal=0; bktOrdinal newEntities = {Entity(31)}; + std::vector newOrdinals = {24}; + const stk::mesh::Permutation* perms = nullptr; + + EXPECT_TRUE(conn.replace_connectivity(1, newEntities.size(), newEntities.data(), newOrdinals.data(), perms)); + EXPECT_EQ(1u, conn.num_connectivity(1)); + EXPECT_EQ(newEntities[0], conn.begin(1)[0]); + EXPECT_EQ(newOrdinals[0], conn.begin_ordinals(1)[0]); +} + diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucketRepository.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucketRepository.cpp index 8e590633a995..ab55a5d45d03 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucketRepository.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucketRepository.cpp @@ -54,7 +54,6 @@ TEST(BucketRepositoryTest, createBuckets) stk::ParallelMachine comm = MPI_COMM_WORLD; size_t spatialDim = 3; stk::mesh::MetaData stkMeshMetaData(spatialDim, stk::mesh::entity_rank_names()); - stkMeshMetaData.use_simple_fields(); stk::mesh::OrdinalVector parts, scratch; parts.push_back(stkMeshMetaData.universal_part().mesh_meta_data_ordinal()); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData.cpp index 3f7c64e51040..0e50a4221bea 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData.cpp @@ -40,6 +40,7 @@ #include // for logic_error, runtime_error #include // for sort #include // for BulkData, etc +#include #include // for count_entities, etc #include #include @@ -104,8 +105,8 @@ using stk::mesh::EntityId; using stk::mesh::EntityKey; using stk::mesh::EntityVector; using stk::mesh::EntityRank; -using stk::mesh::fixtures::simple_fields::RingFixture; -using stk::mesh::fixtures::simple_fields::BoxFixture; +using stk::mesh::fixtures::RingFixture; +using stk::mesh::fixtures::BoxFixture; //==================== @@ -183,7 +184,8 @@ void donate_one_element(stk::unit_test_util::BulkDataTester & mesh) } } - mesh.change_entity_owner(change); + const bool someEntitiesMoved = mesh.change_entity_owner(change); + ASSERT_TRUE(someEntitiesMoved); count_entities(select_owned, mesh, after_count); @@ -303,7 +305,8 @@ TEST(BulkData, testChangeOwner_nodes) std::vector change; - bulk.change_entity_owner(change); + const bool anyEntitiesMoved = bulk.change_entity_owner(change); + ASSERT_FALSE(anyEntitiesMoved); for(unsigned i = 0; i < id_total; ++i) { @@ -994,7 +997,6 @@ TEST(BulkData, testChangeOwner_box) const int spatial_dimension = 3; MetaData meta(spatial_dimension); - meta.use_simple_fields(); meta.commit(); @@ -1606,7 +1608,6 @@ TEST(BulkData, testFamilyTreeGhosting) builder.set_entity_rank_names(entity_rank_names); std::shared_ptr bulkPtr = builder.create(); MetaData& meta_data = bulkPtr->mesh_meta_data(); - meta_data.use_simple_fields(); const unsigned nodes_per_elem = 4, nodes_per_side = 2; Part &elem_part = meta_data.declare_part_with_topology("elem_part", stk::topology::QUAD_4_2D); meta_data.commit(); @@ -1933,7 +1934,7 @@ void testParallelSideCreation(stk::mesh::BulkData::AutomaticAuraOption autoAuraO // Create local version of side on each proc Entity side = mesh.declare_element_side(elem, local_side_ordinal, stk::mesh::ConstPartVector{&side_part}); - stk::mesh::Permutation perm1 = mesh.find_permutation( + stk::mesh::Permutation perm1 = stk::mesh::find_permutation(mesh, elem_top, nodes.data(), elem_top.side_topology(local_side_ordinal), side_nodes.data(), local_side_ordinal); ASSERT_TRUE(perm1 != stk::mesh::Permutation::INVALID_PERMUTATION); mesh.modification_end(); @@ -1966,7 +1967,7 @@ void testParallelSideCreation(stk::mesh::BulkData::AutomaticAuraOption autoAuraO } side = mesh.declare_element_side(elem, local_side_ordinal, stk::mesh::ConstPartVector{&side_part}); - stk::mesh::Permutation perm2 = mesh.find_permutation( + stk::mesh::Permutation perm2 = stk::mesh::find_permutation(mesh, elem_top, nodes.data(), elem_top.side_topology(local_side_ordinal), side_nodes.data(), local_side_ordinal); ASSERT_TRUE(perm2 != stk::mesh::Permutation::INVALID_PERMUTATION); @@ -2005,7 +2006,7 @@ TEST(BulkData, testParallelSideCreationWithoutAura) //---------------------------------------------------------------------- //---------------------------------------------------------------------- -class BulkDataWithHexes : public stk::mesh::fixtures::simple_fields::TestHexFixture {}; +class BulkDataWithHexes : public stk::mesh::fixtures::TestHexFixture {}; // Testing of field_data_footprint(.) TEST_F(BulkDataWithHexes, test_total_field_data_footprint ) @@ -2190,7 +2191,6 @@ TEST(BulkData, testFieldComm) const int spatial_dimension = 3; MetaData meta(spatial_dimension); - meta.use_simple_fields(); meta.commit(); @@ -2228,7 +2228,7 @@ TEST(BulkData, testFieldComm) // 2d, not so much if(p_size <= 4) { - stk::mesh::fixtures::simple_fields::QuadFixture fixture(pm, 2 /*nx*/, 2 /*ny*/); + stk::mesh::fixtures::QuadFixture fixture(pm, 2 /*nx*/, 2 /*ny*/); PressureFieldType& p_field = fixture.m_meta.declare_field(stk::topology::NODE_RANK, "p"); stk::mesh::put_field_on_mesh(p_field, fixture.m_meta.universal_part(), nullptr); fixture.m_meta.commit(); @@ -2303,7 +2303,7 @@ TEST(BulkData, testCommList) //------------------------------ // test begin/end pair { - stk::mesh::fixtures::simple_fields::QuadFixture fixture(pm, 2 /*nx*/, 2 /*ny*/); + stk::mesh::fixtures::QuadFixture fixture(pm, 2 /*nx*/, 2 /*ny*/); fixture.m_meta.commit(); fixture.generate_mesh(); stk::mesh::BulkData & bulk = fixture.m_bulk_data; @@ -2314,7 +2314,7 @@ TEST(BulkData, testCommList) //------------------------------ // test begin/end pair with mesh mods { - stk::mesh::fixtures::simple_fields::QuadFixture fixture(pm, 2 /*nx*/, 2 /*ny*/); + stk::mesh::fixtures::QuadFixture fixture(pm, 2 /*nx*/, 2 /*ny*/); fixture.m_meta.commit(); fixture.generate_mesh(); stk::mesh::BulkData & bulk = fixture.m_bulk_data; @@ -2434,7 +2434,6 @@ TEST(BulkData, get_ghost_data) if(psize == 3) { // Skip unless we're on 3 processors stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const string generatedMeshSpecification = "generated:1x1x3"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -2582,7 +2581,6 @@ TEST(DocTestBulkData, onlyTheOwnerCanChangeEntityParts) } stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x2"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -2629,7 +2627,6 @@ TEST(BulkData, onlyKeepTheOwnersParts) } stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x2"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -2706,7 +2703,6 @@ TEST(BulkData, newSharedNodeGetMergedPartsFromElements) } stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x2"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -2779,7 +2775,6 @@ TEST(BulkData, mayCreateRelationsToNodesDifferently) } stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x2"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -2915,7 +2910,6 @@ TEST(DocTestBulkData, inducedPartMembershipIgnoredForNonOwnedHigherRankedEntitie const int myRank = stk::parallel_machine_rank(communicator); stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); stk::mesh::PartVector pv = setupFixture(stkMeshIoBroker); stk::mesh::Part & partA = *pv[0]; stk::mesh::Part & partB = *pv[1]; @@ -3102,17 +3096,15 @@ TEST(BulkData, ModificationEnd) { const int spatialDim = 3; stk::mesh::MetaData stkMeshMetaData(spatialDim); - stkMeshMetaData.use_simple_fields(); stk::unit_test_util::BulkDataTester *stkMeshBulkData = new stk::unit_test_util::BulkDataTester(stkMeshMetaData, communicator); - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-i", "generated:1x1x4"); + std::string exodusFileName = stk::unit_test_util::get_option("-i", "generated:1x1x4"); // STK IO module will be described in separate chapter. // It is used here to read the mesh data from the Exodus file and populate an STK Mesh. // The order of the following lines in {} are important { stk::io::StkMeshIoBroker exodusFileReader(communicator); - exodusFileReader.use_simple_fields(); // Inform STK IO which STK Mesh objects to populate later exodusFileReader.set_bulk_data(*stkMeshBulkData); @@ -3180,12 +3172,10 @@ TEST(BulkData, resolve_ownership_of_modified_entities_trivial) const int spatialDim = 3; stk::mesh::MetaData stkMeshMetaData(spatialDim); - stkMeshMetaData.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(stkMeshMetaData, communicator); - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-i", "generated:1x1x3"); + std::string exodusFileName = stk::unit_test_util::get_option("-i", "generated:1x1x3"); { stk::io::StkMeshIoBroker exodusFileReader(communicator); - exodusFileReader.use_simple_fields(); exodusFileReader.set_bulk_data(mesh); exodusFileReader.add_mesh_database(exodusFileName, stk::io::READ_MESH); exodusFileReader.create_input_mesh(); @@ -3227,17 +3217,15 @@ TEST(BulkData, verify_closure_count_is_correct) { const int spatialDim = 3; stk::mesh::MetaData stkMeshMetaData(spatialDim); - stkMeshMetaData.use_simple_fields(); stk::unit_test_util::BulkDataTester *stkMeshBulkData = new stk::unit_test_util::BulkDataTester(stkMeshMetaData, communicator); - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-i", "generated:1x1x2"); + std::string exodusFileName = stk::unit_test_util::get_option("-i", "generated:1x1x2"); // STK IO module will be described in separate chapter. // It is used here to read the mesh data from the Exodus file and populate an STK Mesh. // The order of the following lines in {} are important { stk::io::StkMeshIoBroker exodusFileReader(communicator); - exodusFileReader.use_simple_fields(); // Inform STK IO which STK Mesh objects to populate later exodusFileReader.set_bulk_data(*stkMeshBulkData); @@ -3370,7 +3358,6 @@ TEST(BulkData, orphaned_node_closure_count_shared_nodes_non_owner_adds_element) const int spatial_dimension = 2; stk::mesh::MetaData meta(spatial_dimension); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester bulk(meta,communicator); stk::mesh::Part& element_part = meta.declare_part_with_topology("Beam2Part", stk::topology::BEAM_2); @@ -3448,7 +3435,6 @@ TEST(BulkData, orphaned_node_closure_count_shared_nodes_owner_deletes) const int spatial_dimension = 2; stk::mesh::MetaData meta(spatial_dimension); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester bulk(meta,communicator); bulk.modification_begin(); @@ -3488,7 +3474,6 @@ TEST(BulkData, orphaned_node_closure_count_shared_nodes_change_entity_owner_3pro const int spatial_dimension = 2; stk::mesh::MetaData meta(spatial_dimension); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester bulk(meta,communicator); bulk.modification_begin(); @@ -3549,7 +3534,6 @@ TEST(BulkData, orphaned_node_closure_count_shared_nodes_change_entity_owner_2pro const int spatial_dimension = 2; stk::mesh::MetaData meta(spatial_dimension); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester bulk(meta,communicator); bulk.modification_begin(); @@ -3596,7 +3580,6 @@ TEST(BulkData, orphaned_node_closure_count_shared_nodes_owner_adds_element) const int spatial_dimension = 2; stk::mesh::MetaData meta(spatial_dimension); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester bulk(meta,communicator); stk::mesh::Part& element_part = meta.declare_part_with_topology("Beam2Part", stk::topology::BEAM_2); @@ -3647,7 +3630,6 @@ TEST(BulkData, change_entity_owner_no_aura_check) const int spatial_dimension = 2; stk::mesh::MetaData meta( spatial_dimension ); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester bulk( meta, pm, stk::mesh::BulkData::NO_AUTO_AURA); std::vector elems; @@ -3685,7 +3667,6 @@ TEST(BulkData, modification_end_and_change_entity_owner_no_aura_check) const int spatial_dimension = 2; stk::mesh::MetaData meta( spatial_dimension ); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh( meta, pm, stk::mesh::BulkData::NO_AUTO_AURA); CEOUtils::fillMeshfor2Elem2ProcFlipAndTest_no_ghost(mesh, meta); @@ -3739,7 +3720,6 @@ TEST(BulkData, change_entity_owner_2Elem2ProcMove) const int spatial_dimension = 2; stk::mesh::MetaData meta( spatial_dimension ); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester bulk( meta, pm); std::vector elems; @@ -3777,7 +3757,6 @@ TEST(BulkData, change_entity_owner_2Elem2ProcFlip) const int spatial_dimension = 2; stk::mesh::MetaData meta( spatial_dimension ); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh( meta, pm); CEOUtils::fillMeshfor2Elem2ProcFlipAndTest(mesh, meta); @@ -3812,7 +3791,6 @@ TEST(BulkData, change_entity_owner_3Elem2ProcMoveRight) // Set up meta and bulk data const unsigned spatial_dim = 2; MetaData meta_data(spatial_dim); - meta_data.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta_data, pm); int p_rank = mesh.parallel_rank(); int p_size = mesh.parallel_size(); @@ -3855,7 +3833,6 @@ TEST(BulkData, change_entity_owner_3Elem2ProcMoveLeft) // Set up meta and bulk data const unsigned spatial_dim = 2; MetaData meta_data(spatial_dim); - meta_data.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta_data, pm); int p_rank = mesh.parallel_rank(); int p_size = mesh.parallel_size(); @@ -3910,7 +3887,6 @@ TEST(BulkData, change_entity_owner_4Elem4ProcEdge) // Set up meta and bulk data const unsigned spatial_dim = 2; MetaData meta_data(spatial_dim); - meta_data.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta_data, pm); int p_rank = mesh.parallel_rank(); int p_size = mesh.parallel_size(); @@ -3982,7 +3958,6 @@ TEST(BulkData, change_entity_owner_8Elem4ProcMoveTop) unsigned spatialDim = 2; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta, pm); CEOUtils::fillMeshfor8Elem4ProcMoveTopAndTest(mesh, meta); @@ -4031,7 +4006,6 @@ TEST(BulkData, change_entity_owner_4Elem4ProcRotate) unsigned spatialDim = 2; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta, pm); const int p_rank = mesh.parallel_rank(); @@ -4099,7 +4073,6 @@ TEST(BulkData, change_entity_owner_3Elem4Proc1Edge3D) unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta, pm); const int p_rank = mesh.parallel_rank(); CEOUtils::fillMeshfor3Elem4Proc1Edge3DAndTest(mesh, meta); @@ -4137,7 +4110,6 @@ TEST(BulkData, test_find_ghosted_nodes_that_need_to_be_shared) { unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::mesh::Part& elem_part = meta.declare_part_with_topology("beam2", stk::topology::BEAM_2); meta.commit(); @@ -4232,7 +4204,6 @@ TEST(BulkData, show_how_one_could_add_a_shared_node) { unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::mesh::Part& elem_part = meta.declare_part_with_topology("triangle", stk::topology::SHELL_TRIANGLE_3); meta.commit(); @@ -4354,7 +4325,6 @@ TEST(BulkData, show_how_one_could_add_a_shared_node) void write_mesh(const std::string& filename, stk::mesh::BulkData& mesh) { stk::io::StkMeshIoBroker writer(mesh.parallel()); - writer.use_simple_fields(); writer.set_bulk_data(mesh); size_t output_handle = writer.create_output_mesh(filename, stk::io::WRITE_RESULTS); writer.write_output_mesh(output_handle); @@ -4425,7 +4395,6 @@ TEST(BulkData, can_we_create_shared_nodes) { unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::mesh::Selector all_nodes = meta.universal_part(); typedef stk::mesh::Field CoordFieldType; @@ -5141,7 +5110,6 @@ TEST(BulkData, show_API_for_batch_create_child_nodes) { unsigned spatialDim = 2; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::mesh::Part& elem_part = meta.declare_part_with_topology("triangle", stk::topology::TRIANGLE_3_2D); meta.commit(); @@ -5660,7 +5628,6 @@ TEST(BulkData, generate_new_ids) int psize = stk::parallel_machine_size(communicator); stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); std::ostringstream os; os << "generated:10x10x" << psize; const std::string generatedMeshSpec = os.str(); @@ -5730,7 +5697,6 @@ TEST(BulkData, test_generate_new_entities) int psize = stk::parallel_machine_size(communicator); stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); std::ostringstream os; os << "generated:10x10x" << psize; const std::string generatedMeshSpec = os.str(); @@ -5801,7 +5767,6 @@ TEST(BulkData, test_destroy_ghosted_entity_then_create_locally_owned_entity_with if ( psize == 2 ) { stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); std::ostringstream os; os << "generated:2x2x2"; const std::string generatedMeshSpec = os.str(); @@ -5849,7 +5814,6 @@ TEST(FaceCreation, test_face_creation_2Hexes_2procs) if (numProcs==2) { stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::unit_test_util::BulkDataFaceSharingTester mesh(meta, MPI_COMM_WORLD); const std::string generatedMeshSpec = "generated:1x1x2"; @@ -5882,7 +5846,7 @@ TEST(FaceCreation, test_face_creation_2Hexes_2procs) mesh.modification_begin(); - stk::mesh::Entity side = stk::unit_test_util::simple_fields::declare_element_side_with_nodes(mesh, elem, nodes, 1+procId, meta.get_topology_root_part(stk::topology::QUAD_4)); + stk::mesh::Entity side = stk::unit_test_util::declare_element_side_with_nodes(mesh, elem, nodes, 1+procId, meta.get_topology_root_part(stk::topology::QUAD_4)); EXPECT_TRUE(mesh.is_valid(side)); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataAura.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataAura.cpp index 11a82dddc23e..a8cc2677f934 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataAura.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataAura.cpp @@ -100,7 +100,6 @@ TEST(UnitTestingOfBulkData, aura1DRing_RestoreDeletedAuraEntity) std::shared_ptr bulkPtr = builder.create(); BulkData& mesh = *bulkPtr; MetaData& meta_data = mesh.mesh_meta_data(); - meta_data.use_simple_fields(); Part & elem_part = meta_data.declare_part_with_topology("elem_part", stk::topology::LINE_2_1D); Part & node_part = meta_data.declare_part_with_topology("node_part", stk::topology::NODE); @@ -113,8 +112,8 @@ TEST(UnitTestingOfBulkData, aura1DRing_RestoreDeletedAuraEntity) { for (unsigned ielem=0; ielem < nelems; ielem++) { int e_owner = static_cast(elems_0[ielem][3]); - stk::mesh::fixtures::simple_fields::AddToNodeProcsMMap(nodes_to_procs, elems_0[ielem][2], e_owner); - stk::mesh::fixtures::simple_fields::AddToNodeProcsMMap(nodes_to_procs, elems_0[ielem][1], e_owner); + stk::mesh::fixtures::AddToNodeProcsMMap(nodes_to_procs, elems_0[ielem][2], e_owner); + stk::mesh::fixtures::AddToNodeProcsMMap(nodes_to_procs, elems_0[ielem][1], e_owner); } } @@ -143,8 +142,8 @@ TEST(UnitTestingOfBulkData, aura1DRing_RestoreDeletedAuraEntity) mesh.declare_relation( elem, nodes[1], 1 ); // Node sharing - stk::mesh::fixtures::simple_fields::DoAddNodeSharings(mesh, nodes_to_procs, mesh.identifier(nodes[0]), nodes[0]); - stk::mesh::fixtures::simple_fields::DoAddNodeSharings(mesh, nodes_to_procs, mesh.identifier(nodes[1]), nodes[1]); + stk::mesh::fixtures::DoAddNodeSharings(mesh, nodes_to_procs, mesh.identifier(nodes[0]), nodes[0]); + stk::mesh::fixtures::DoAddNodeSharings(mesh, nodes_to_procs, mesh.identifier(nodes[1]), nodes[1]); } } diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataIdMapper.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataIdMapper.cpp index ab98f6edef2c..563ac6a5df80 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataIdMapper.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataIdMapper.cpp @@ -8,7 +8,7 @@ namespace { -class BulkDataIdMapperTest : public stk::unit_test_util::simple_fields::MeshFixture +class BulkDataIdMapperTest : public stk::unit_test_util::MeshFixture { protected: BulkDataIdMapperTest() diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataNotifications.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataNotifications.cpp index 47ff264321f0..dda2b8bccd9a 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataNotifications.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataNotifications.cpp @@ -57,7 +57,6 @@ TEST(BulkDataMod, synchronized_count_basic_fill_mesh) if (stk::parallel_machine_size(comm) > 2) { GTEST_SKIP(); } std::shared_ptr bulk = stk::mesh::MeshBuilder(comm).create(); - bulk->mesh_meta_data().use_simple_fields(); const std::string generatedMeshSpec = "generated:1x1x2"; stk::io::fill_mesh(generatedMeshSpec, *bulk); @@ -71,7 +70,6 @@ TEST(BulkDataMod, synchronized_count_empty_mod_cycle) if (stk::parallel_machine_size(comm) > 2) { GTEST_SKIP(); } std::shared_ptr bulk = stk::mesh::MeshBuilder(comm).create(); - bulk->mesh_meta_data().use_simple_fields(); const std::string generatedMeshSpec = "generated:1x1x2"; stk::io::fill_mesh(generatedMeshSpec, *bulk); @@ -91,7 +89,6 @@ TEST(BulkDataNotifications, test_listener_buckets_changed) if (numProcs==2) { stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta, comm, stk::mesh::BulkData::NO_AUTO_AURA); const std::string generatedMeshSpec = "generated:1x1x2"; diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataSharing.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataSharing.cpp index d6ea8115e9ef..14f4e7f7f135 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataSharing.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkDataSharing.cpp @@ -122,7 +122,6 @@ TEST(UnitTestingOfBulkData, node_sharing) std::shared_ptr meshPtr = builder.create(); BulkData& mesh = *meshPtr; MetaData& meta_data = mesh.mesh_meta_data(); - meta_data.use_simple_fields(); stk::mesh::Part& elem_part = meta_data.declare_part_with_topology("elem_part", stk::topology::QUAD_4_2D); stk::mesh::Part& node_part = meta_data.declare_part_with_topology("node_part", stk::topology::NODE); meta_data.commit(); @@ -282,8 +281,6 @@ TEST(UnitTestingOfBulkData, sharedProcsIntersection) builder.set_spatial_dimension(spatialDim); std::shared_ptr bulkPtr = builder.create(); stk::mesh::BulkData& bulk = *bulkPtr; - stk::mesh::MetaData& meta = bulk.mesh_meta_data(); - meta.use_simple_fields(); stk::io::fill_mesh("generated:1x1x4", bulk); stk::mesh::Entity sharedNode9 = bulk.get_entity(stk::topology::NODE_RANK, 9); @@ -391,7 +388,6 @@ TEST(UnitTestingOfBulkData, node_sharing_with_dangling_nodes) std::shared_ptr meshPtr = builder.create(); BulkData& mesh = *meshPtr; MetaData& meta_data = mesh.mesh_meta_data(); - meta_data.use_simple_fields(); stk::mesh::Part& elem_part = meta_data.declare_part_with_topology("elem_part", stk::topology::QUAD_4_2D); stk::mesh::Part& node_part = meta_data.declare_part_with_topology("node_part", stk::topology::NODE); meta_data.commit(); @@ -621,7 +617,6 @@ TEST(UnitTestBulkData, resolveSharedModifiedFaceNextToUnmodifiedFaces) std::shared_ptr bulkPtr = builder.create(); stk::mesh::BulkData& bulk = *bulkPtr; stk::mesh::MetaData& meta = bulk.mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Part& block2 = meta.declare_part_with_topology("block_2", stk::topology::HEX_8); stk::io::fill_mesh("generated:3x1x2", bulk); stk::mesh::Part& block1 = *meta.get_part("block_1"); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData_ChangeParts.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData_ChangeParts.cpp index 6704e4c3a092..7b597c648f5a 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData_ChangeParts.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData_ChangeParts.cpp @@ -53,7 +53,7 @@ using stk::mesh::BulkData; using stk::mesh::Entity; using stk::mesh::Selector; using stk::mesh::PartVector; -using stk::mesh::fixtures::simple_fields::RingFixture; +using stk::mesh::fixtures::RingFixture; //---------------------------------------------------------------------- diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData_Destroy.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData_Destroy.cpp index 53db8d358c4c..60edb44ee4ae 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData_Destroy.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData_Destroy.cpp @@ -62,7 +62,7 @@ using stk::mesh::Entity; using stk::mesh::Selector; using stk::mesh::PartVector; using stk::mesh::EntityId; -using stk::mesh::fixtures::simple_fields::RingFixture; +using stk::mesh::fixtures::RingFixture; using stk::unit_test_util::build_mesh; //---------------------------------------------------------------------- @@ -169,7 +169,6 @@ TEST(UnitTestingOfBulkData, testDestroy_ring) const int spatial_dimension = 3; MetaData meta( spatial_dimension ); - meta.use_simple_fields(); meta.commit(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData_new.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData_new.cpp index afe8dd559f45..6fb3e70d6156 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData_new.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkData_new.cpp @@ -79,7 +79,7 @@ namespace { * is similar to the BoxFixture it inherits from, with the only difference * being the extra parts that this fixture declares for testing purposes. */ -struct TestBoxFixture : public fixtures::simple_fields::BoxFixture +struct TestBoxFixture : public fixtures::BoxFixture { TestBoxFixture(stk::ParallelMachine pm = MPI_COMM_WORLD) : BoxFixture(pm, stk::mesh::BulkData::AUTO_AURA), @@ -203,7 +203,7 @@ TEST ( UnitTestBulkData_new , verifyDetectsNonOwnerChange ) int p_size = stk::parallel_machine_size(pm); int p_rank = stk::parallel_machine_rank(pm); - fixtures::simple_fields::QuadFixture fixture(pm, 1 /*nx*/, p_size /*ny*/); + fixtures::QuadFixture fixture(pm, 1 /*nx*/, p_size /*ny*/); fixture.m_meta.commit(); fixture.generate_mesh(); BulkData & bulk = fixture.m_bulk_data; @@ -248,11 +248,8 @@ TEST ( UnitTestBulkData_new , verifyExplicitAddInducedPart ) cell_part_vector.push_back ( &fixture.m_cell_part ); bulk.change_entity_parts ( new_cell , cell_part_vector ); -#ifdef SIERRA_MIGRATION bulk.change_entity_parts ( new_node , cell_part_vector ); -#else - ASSERT_THROW ( bulk.change_entity_parts ( new_node , cell_part_vector ) , std::runtime_error ); -#endif + EXPECT_TRUE(bulk.bucket(new_node).member(fixture.m_cell_part)); } TEST ( UnitTestBulkData_new , verifyDefaultPartAddition ) @@ -688,7 +685,7 @@ TEST ( UnitTestBulkData_new , verifyBoxGhosting ) const int p_size = stk::parallel_machine_size( MPI_COMM_WORLD ); if ( 8 < p_size ) { return ; } - fixtures::simple_fields::HexFixture fixture( MPI_COMM_WORLD, 2, 2, 2 ); + fixtures::HexFixture fixture( MPI_COMM_WORLD, 2, 2, 2 ); fixture.m_meta.commit(); fixture.generate_mesh(); const BulkData& mesh = fixture.m_bulk_data; @@ -700,7 +697,7 @@ TEST ( UnitTestBulkData_new , verifyBoxGhosting ) ASSERT_TRUE( mesh.is_valid(node) ); ASSERT_TRUE( fixture.node_id(ix,iy,iz) == mesh.identifier(node) ); - fixtures::simple_fields::HexFixture::Scalar * const node_coord = stk::mesh::field_data(*fixture.m_coord_field, node); + fixtures::HexFixture::Scalar * const node_coord = stk::mesh::field_data(*fixture.m_coord_field, node); ASSERT_TRUE( node_coord != NULL ); } } @@ -745,7 +742,6 @@ TEST ( UnitTestBulkData_new , testUninitializedMetaData ) std::shared_ptr bulk = stk::mesh::MeshBuilder(pm).create(); MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); meta.initialize(2); @@ -795,7 +791,6 @@ TEST ( UnitTestBulkData_new , testGhostHandleRemainsValidAfterRefresh ) builder.set_entity_rank_names(entity_rank_names); std::shared_ptr bulkPtr = builder.create(); MetaData& meta_data = bulkPtr->mesh_meta_data(); - meta_data.use_simple_fields(); Part & elem_part = meta_data.declare_part_with_topology("elem_part", stk::topology::LINE_2_1D); Part & node_part = meta_data.declare_part_with_topology("node_part", stk::topology::NODE); @@ -811,8 +806,8 @@ TEST ( UnitTestBulkData_new , testGhostHandleRemainsValidAfterRefresh ) { for (unsigned ielem=0; ielem < nelems; ielem++) { int e_owner = static_cast(elems_0[ielem][3]); - stk::mesh::fixtures::simple_fields::AddToNodeProcsMMap(nodes_to_procs, elems_0[ielem][2], e_owner); - stk::mesh::fixtures::simple_fields::AddToNodeProcsMMap(nodes_to_procs, elems_0[ielem][1], e_owner); + stk::mesh::fixtures::AddToNodeProcsMMap(nodes_to_procs, elems_0[ielem][2], e_owner); + stk::mesh::fixtures::AddToNodeProcsMMap(nodes_to_procs, elems_0[ielem][1], e_owner); } } @@ -840,8 +835,8 @@ TEST ( UnitTestBulkData_new , testGhostHandleRemainsValidAfterRefresh ) mesh.declare_relation( elem, nodes[1], 1 ); // Node sharing - stk::mesh::fixtures::simple_fields::DoAddNodeSharings(mesh, nodes_to_procs, mesh.identifier(nodes[0]), nodes[0]); - stk::mesh::fixtures::simple_fields::DoAddNodeSharings(mesh, nodes_to_procs, mesh.identifier(nodes[1]), nodes[1]); + stk::mesh::fixtures::DoAddNodeSharings(mesh, nodes_to_procs, mesh.identifier(nodes[0]), nodes[0]); + stk::mesh::fixtures::DoAddNodeSharings(mesh, nodes_to_procs, mesh.identifier(nodes[1]), nodes[1]); } } @@ -901,7 +896,6 @@ TEST ( UnitTestBulkData_new , testCustomBucketCapacity ) std::shared_ptr bulk = builder.create(); MetaData& meta = bulk->mesh_meta_data(); - meta.use_simple_fields(); Part & node_part = meta.declare_part_with_topology("node_part", stk::topology::NODE); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkModification.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkModification.cpp index a266b2330453..c123d5e67210 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkModification.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBulkModification.cpp @@ -67,7 +67,7 @@ using stk::mesh::BucketIterator; using stk::mesh::Entity; using stk::mesh::EntityRank; using stk::mesh::BucketVector; -using stk::mesh::fixtures::simple_fields::RingFixture; +using stk::mesh::fixtures::RingFixture; class UnitTestStkMeshBulkModification { public: diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCEO.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCEO.cpp index 581104c530f1..93782a7a7d73 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCEO.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCEO.cpp @@ -62,8 +62,8 @@ namespace stk { namespace mesh { class Ghosting; } } namespace stk { namespace mesh { class Part; } } namespace stk { namespace mesh { class Selector; } } -namespace stk { namespace mesh { namespace fixtures { namespace simple_fields { class BoxFixture; } } } } -namespace stk { namespace mesh { namespace fixtures { namespace simple_fields { class RingFixture; } } } } +namespace stk { namespace mesh { namespace fixtures { class BoxFixture; } } } +namespace stk { namespace mesh { namespace fixtures { class RingFixture; } } } namespace stk { @@ -85,8 +85,8 @@ using stk::mesh::EntityId; using stk::mesh::EntityKey; using stk::mesh::EntityVector; using stk::mesh::EntityRank; -using stk::mesh::fixtures::simple_fields::RingFixture; -using stk::mesh::fixtures::simple_fields::BoxFixture; +using stk::mesh::fixtures::RingFixture; +using stk::mesh::fixtures::BoxFixture; namespace { @@ -105,7 +105,6 @@ TEST(CEO, change_entity_owner_2Elem2ProcMove) const int spatial_dimension = 2; stk::mesh::MetaData meta(spatial_dimension); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester bulk(meta, pm); stk::mesh::EntityVector elems; @@ -155,10 +154,8 @@ TEST(CEO, change_entity_owner_2ElemWithSideset) { builder.set_spatial_dimension(3); std::shared_ptr bulkPtr = builder.create(); stk::mesh::BulkData& bulk = *bulkPtr; - stk::mesh::MetaData& meta = bulk.mesh_meta_data(); - meta.use_simple_fields(); - stk::unit_test_util::simple_fields::create_AB_mesh_with_sideset_and_field(bulk, stk::unit_test_util::LEFT, stk::unit_test_util::DECREASING, "dummyField"); + stk::unit_test_util::create_AB_mesh_with_sideset_and_field(bulk, stk::unit_test_util::LEFT, stk::unit_test_util::DECREASING, "dummyField"); if (pRank == 0) { @@ -210,8 +207,6 @@ void test_change_entity_owner_3Elem3Proc_WithCustomGhosts(stk::mesh::BulkData::A builder.set_entity_rank_names(rankNames); std::shared_ptr bulkPtr = builder.create(); stk::mesh::BulkData& stkMeshBulkData = *bulkPtr; - stk::mesh::MetaData& stkMeshMetaData = stkMeshBulkData.mesh_meta_data(); - stkMeshMetaData.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x6"; @@ -311,7 +306,6 @@ TEST(CEO,moveElem_fieldDataOfNodes) std::shared_ptr bulkPtr = builder.create(); stk::mesh::BulkData& bulk = *bulkPtr; stk::mesh::MetaData& meta = bulk.mesh_meta_data(); - meta.use_simple_fields(); auto &field1 = meta.declare_field(stk::topology::NODE_RANK, "field1"); stk::mesh::put_field_on_entire_mesh(field1); stk::io::fill_mesh("generated:1x1x2", bulk); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCEOME.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCEOME.cpp index d789a40350ac..64b405254fd8 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCEOME.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCEOME.cpp @@ -99,8 +99,8 @@ using stk::mesh::EntityId; using stk::mesh::EntityKey; using stk::mesh::EntityVector; using stk::mesh::EntityRank; -using stk::mesh::fixtures::simple_fields::RingFixture; -using stk::mesh::fixtures::simple_fields::BoxFixture; +using stk::mesh::fixtures::RingFixture; +using stk::mesh::fixtures::BoxFixture; void printMemoryStats(MPI_Comm comm) { @@ -184,7 +184,6 @@ TEST(CEOME, change_entity_owner_2Elem2ProcMove) const int spatial_dimension = 2; stk::mesh::MetaData meta(spatial_dimension); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester bulk(meta, pm); stk::mesh::EntityVector elems; @@ -225,7 +224,6 @@ TEST(CEOME, change_entity_owner_2Elem2ProcFlip) } const int spatial_dimension = 2; stk::mesh::MetaData meta(spatial_dimension); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta, pm); CEOUtils::fillMeshfor2Elem2ProcFlipAndTest(mesh, meta); @@ -281,7 +279,6 @@ TEST(CEOME, change_entity_owner_3Elem2ProcMoveRight) // Set up meta and bulk data const unsigned spatial_dim = 2; MetaData meta_data(spatial_dim); - meta_data.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta_data, pm); int p_rank = mesh.parallel_rank(); int p_size = mesh.parallel_size(); @@ -334,7 +331,6 @@ TEST(CEOME, change_entity_owner_3Elem2ProcMoveLeft) // Set up meta and bulk data const unsigned spatial_dim = 2; MetaData meta_data(spatial_dim); - meta_data.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta_data, pm); int p_rank = mesh.parallel_rank(); int p_size = mesh.parallel_size(); @@ -379,7 +375,6 @@ TEST(CEOME, TwoElemGiveAllEntitiesToOneProcAndCheckParts) // Set up meta and bulk data const unsigned spatial_dim = 2; MetaData meta(spatial_dim); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta, pm); int p_rank = mesh.parallel_rank(); int p_size = mesh.parallel_size(); @@ -444,7 +439,6 @@ TEST(CEOME, change_entity_owner_4Elem4ProcEdge) // Set up meta and bulk data const unsigned spatial_dim = 2; MetaData meta_data(spatial_dim); - meta_data.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta_data, pm); int p_rank = mesh.parallel_rank(); int p_size = mesh.parallel_size(); @@ -568,7 +562,6 @@ TEST(CEOME, change_entity_owner_8Elem4ProcMoveTop) unsigned spatialDim = 2; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta, pm); CEOUtils::fillMeshfor8Elem4ProcMoveTopAndTest(mesh, meta); @@ -627,7 +620,6 @@ TEST(CEOME, change_entity_owner_4Elem4ProcRotate) unsigned spatialDim = 2; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta, pm); const int p_rank = mesh.parallel_rank(); CEOUtils::fillMeshfor4Elem4ProcRotateAndTest(mesh, meta); @@ -708,7 +700,6 @@ TEST(CEOME, change_entity_owner_3Elem4Proc1Edge3D) unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester mesh(meta, pm); const int p_rank = mesh.parallel_rank(); CEOUtils::fillMeshfor3Elem4Proc1Edge3DAndTest(mesh, meta); @@ -763,7 +754,6 @@ TEST(CEOME, test_node_ownership_change_that_causes_ghosted_node_to_be_marked_as_ if(psize == 2) { stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x2x2"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestChangeEntityId.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestChangeEntityId.cpp index f7e5a051e0df..f3734d6980aa 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestChangeEntityId.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestChangeEntityId.cpp @@ -130,7 +130,7 @@ TEST( UnitTestChangeEntityId, change_id_large ) const unsigned NZ = 20; const unsigned num_elems = NX * NY * NZ; - fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); + fixtures::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); Field & simple_nodal_field = hf.m_meta.declare_field(stk::topology::NODE_RANK, "simple_nodal_field"); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestChangeEntityOwnerCommMaps.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestChangeEntityOwnerCommMaps.cpp index 71c16b5fc291..b06fe701172c 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestChangeEntityOwnerCommMaps.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestChangeEntityOwnerCommMaps.cpp @@ -62,8 +62,8 @@ #include "stk_mesh/baseImpl/MeshImplUtils.hpp" namespace stk { namespace mesh { class BulkData; } } -namespace stk { namespace mesh { namespace fixtures { namespace simple_fields { class BoxFixture; } } } } -namespace stk { namespace mesh { namespace fixtures { namespace simple_fields { class RingFixture; } } } } +namespace stk { namespace mesh { namespace fixtures { class BoxFixture; } } } +namespace stk { namespace mesh { namespace fixtures { class RingFixture; } } } namespace stk { namespace mesh { struct EntityKey; } } namespace stk @@ -86,8 +86,8 @@ using stk::mesh::EntityId; using stk::mesh::EntityKey; using stk::mesh::EntityVector; using stk::mesh::EntityRank; -using stk::mesh::fixtures::simple_fields::RingFixture; -using stk::mesh::fixtures::simple_fields::BoxFixture; +using stk::mesh::fixtures::RingFixture; +using stk::mesh::fixtures::BoxFixture; namespace { @@ -106,7 +106,6 @@ class FieldMgr m_auraCommMapElementFieldName("ElementCommInfo"), m_auraCommMapElementField(NULL) { - m_stkMeshMetaData.use_simple_fields(); } ~FieldMgr() {} @@ -184,7 +183,6 @@ TEST(UnitTestChangeEntityOwner, changeEntityOwnerCase1) std::string exodusFileName = "generated:1x1x6"; const int spatialDim = 3; stk::mesh::MetaData stkMeshMetaData(spatialDim); - stkMeshMetaData.use_simple_fields(); stk::unit_test_util::BulkDataTester stkMeshBulkData(stkMeshMetaData, comm); stk::io::StkMeshIoBroker exodusFileReader(comm); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestChangeParts.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestChangeParts.cpp index 84cc65b0e645..c55edaf4185c 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestChangeParts.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestChangeParts.cpp @@ -61,7 +61,6 @@ std::shared_ptr build_mesh(unsigned spatialDim, builder.set_spatial_dimension(spatialDim); builder.set_aura_option(auraOption); std::shared_ptr bulk = builder.create(); - bulk->mesh_meta_data().use_simple_fields(); return bulk; } @@ -113,7 +112,7 @@ TEST(UnitTestChangeParts, test_batch_part_change) std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8"; stk::mesh::Part& part = metaData.declare_part_with_topology("new_part", stk::topology::NODE); - stk::unit_test_util::simple_fields::setup_text_mesh(bulkData, meshDesc); + stk::unit_test_util::setup_text_mesh(bulkData, meshDesc); stk::mesh::Entity elem1 = bulkData.get_entity(stk::topology::ELEM_RANK, 1u); EXPECT_TRUE(bulkData.is_valid(elem1)); @@ -165,7 +164,7 @@ TEST(UnitTestChangeParts, test_superset_and_subset_part_change) stk::mesh::put_field_on_mesh(field, supersetPart, nullptr); std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8"; - stk::unit_test_util::simple_fields::setup_text_mesh(bulkData, meshDesc); + stk::unit_test_util::setup_text_mesh(bulkData, meshDesc); stk::mesh::Entity node1 = bulkData.get_entity(stk::topology::NODE_RANK, 1u); EXPECT_TRUE(bulkData.is_valid(node1)); @@ -303,11 +302,11 @@ TEST(ChangeElemParts, addThenRemoveElemPart_checkSharedNode) EXPECT_EQ(stk::mesh::Modified, bulk.state(node5)); } -class TestChangePartsWithSelector : public stk::unit_test_util::simple_fields::MeshFixture +class TestChangePartsWithSelector : public stk::unit_test_util::MeshFixture { public: TestChangePartsWithSelector() - : stk::unit_test_util::simple_fields::MeshFixture(3) + : stk::unit_test_util::MeshFixture(3) { setenv("STK_MESH_RUN_CONSISTENCY_CHECK", "ON", 1); setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCheckOwnedOrphanedSidesOrEdges.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCheckOwnedOrphanedSidesOrEdges.cpp index 097aa569f09b..7cd9096acc91 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCheckOwnedOrphanedSidesOrEdges.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCheckOwnedOrphanedSidesOrEdges.cpp @@ -7,7 +7,7 @@ namespace { -class MeshCheckerOwnedOrphans : public stk::unit_test_util::simple_fields::MeshFixture +class MeshCheckerOwnedOrphans : public stk::unit_test_util::MeshFixture { protected: MeshCheckerOwnedOrphans() @@ -66,7 +66,7 @@ TEST_F(MeshCheckerOwnedOrphans, check_mesh_with_orphaned_owned_sides) } } -class MeshCheckerWithElements : public stk::unit_test_util::simple_fields::MeshFixture +class MeshCheckerWithElements : public stk::unit_test_util::MeshFixture { protected: void delete_element5_on_proc0() diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCheckUniqueGlobalIds.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCheckUniqueGlobalIds.cpp index a897ac4685ae..fc332841fa47 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCheckUniqueGlobalIds.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCheckUniqueGlobalIds.cpp @@ -9,7 +9,7 @@ namespace { -class MeshChecker : public stk::unit_test_util::simple_fields::MeshFixture +class MeshChecker : public stk::unit_test_util::MeshFixture { public: }; diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCommInfoObserver.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCommInfoObserver.cpp index 80c8b0ca1f48..9f95efe5d274 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCommInfoObserver.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCommInfoObserver.cpp @@ -57,7 +57,6 @@ class CommInfoObserverTest : public ::testing::Test meta(bulkPtr->mesh_meta_data()), bulk(*bulkPtr) { - meta.use_simple_fields(); stk::io::fill_mesh("generated:1x1x4", bulk); observer = std::make_shared(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCrackMesh.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCrackMesh.cpp index 5e4d6946f4e0..1ad6764a49a6 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCrackMesh.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCrackMesh.cpp @@ -63,7 +63,7 @@ TEST ( UnitTestCrackMesh , VerifyDestroy2D ) for ( unsigned iy = 0 ; iy < ny ; ++iy ) { for ( unsigned ix = 0 ; ix < nx ; ++ix ) { - stk::mesh::fixtures::simple_fields::QuadFixture fixture( pm , nx , ny ); + stk::mesh::fixtures::QuadFixture fixture( pm , nx , ny ); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -94,7 +94,7 @@ TEST ( UnitTestCrackMesh , VerifyDestroy3D ) for ( unsigned iz = 0 ; iz < nz ; ++iz ) { for ( unsigned iy = 0 ; iy < ny ; ++iy ) { for ( unsigned ix = 0 ; ix < nx ; ++ix ) { - stk::mesh::fixtures::simple_fields::HexFixture fixture( pm , nx , ny , nz ); + stk::mesh::fixtures::HexFixture fixture( pm , nx , ny , nz ); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -121,7 +121,7 @@ TEST ( UnitTestCrackMesh , verifyBoxGhosting ) // if all (incl ghosted) copies get updated. // Make the hex fixture - stk::mesh::fixtures::simple_fields::HexFixture fixture( MPI_COMM_WORLD, 2,2,2 ); + stk::mesh::fixtures::HexFixture fixture( MPI_COMM_WORLD, 2,2,2 ); fixture.m_meta.commit(); fixture.generate_mesh(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCreateAdjacentEntities.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCreateAdjacentEntities.cpp index d31c021d1490..b17ec2bcda71 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCreateAdjacentEntities.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCreateAdjacentEntities.cpp @@ -56,7 +56,7 @@ TEST( UnitTestStkMeshSkinning , testCreateAdjacentEntities3x1x1 ) const size_t NY = 1; const size_t NZ = 1; - stk::mesh::fixtures::simple_fields::HexFixture fixture(MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture fixture(MPI_COMM_WORLD, NX, NY, NZ); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -97,7 +97,7 @@ TEST( UnitTestStkMeshSkinning , testCreateAdjacentEntities3x3x3 ) const size_t NY = 3; const size_t NZ = 3; - stk::mesh::fixtures::simple_fields::HexFixture fixture(MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture fixture(MPI_COMM_WORLD, NX, NY, NZ); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -175,7 +175,7 @@ TEST( UnitTestStkMeshSkinning , testCreateAdjacentEntities3x3 ) const size_t NX = 3; const size_t NY = 3; - stk::mesh::fixtures::simple_fields::QuadFixture fixture(MPI_COMM_WORLD, NX, NY); + stk::mesh::fixtures::QuadFixture fixture(MPI_COMM_WORLD, NX, NY); fixture.m_meta.commit(); fixture.generate_mesh(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCreateEdges.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCreateEdges.cpp index ac1c6300fb57..69e573afdfce 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCreateEdges.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCreateEdges.cpp @@ -64,7 +64,7 @@ using stk::unit_test_util::build_mesh; TEST ( UnitTestCreateEdges, Quad_2x1 ) { - stk::mesh::fixtures::simple_fields::QuadFixture fixture( MPI_COMM_WORLD, 2, 1); + stk::mesh::fixtures::QuadFixture fixture( MPI_COMM_WORLD, 2, 1); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -94,7 +94,7 @@ TEST ( UnitTestCreateEdges, Quad_2x1 ) TEST ( UnitTestCreateEdges, Quad9_2x1 ) { - stk::mesh::fixtures::simple_fields::Quad9Fixture fixture( MPI_COMM_WORLD, 2, 1); + stk::mesh::fixtures::Quad9Fixture fixture( MPI_COMM_WORLD, 2, 1); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -124,7 +124,7 @@ TEST ( UnitTestCreateEdges, Quad9_2x1 ) TEST ( UnitTestCreateEdges, Quad_3x1 ) { - stk::mesh::fixtures::simple_fields::QuadFixture fixture( MPI_COMM_WORLD, 3, 1); + stk::mesh::fixtures::QuadFixture fixture( MPI_COMM_WORLD, 3, 1); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -155,7 +155,7 @@ TEST ( UnitTestCreateEdges, Quad_3x1 ) TEST ( UnitTestCreateEdges, Quad9_3x1 ) { - stk::mesh::fixtures::simple_fields::Quad9Fixture fixture( MPI_COMM_WORLD, 3, 1); + stk::mesh::fixtures::Quad9Fixture fixture( MPI_COMM_WORLD, 3, 1); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -185,7 +185,7 @@ TEST ( UnitTestCreateEdges, Quad9_3x1 ) TEST ( UnitTestCreateEdges, Hex_2x1x1 ) { - stk::mesh::fixtures::simple_fields::HexFixture fixture( MPI_COMM_WORLD, 2, 1, 1); + stk::mesh::fixtures::HexFixture fixture( MPI_COMM_WORLD, 2, 1, 1); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -215,7 +215,7 @@ TEST ( UnitTestCreateEdges, Hex_2x1x1 ) TEST( UnitTestCreateEdges , Hex_3x1x1 ) { - stk::mesh::fixtures::simple_fields::HexFixture fixture(MPI_COMM_WORLD, 3, 1, 1); + stk::mesh::fixtures::HexFixture fixture(MPI_COMM_WORLD, 3, 1, 1); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -254,7 +254,7 @@ TEST( UnitTestCreateEdges , testCreateEdges3x3x3 ) const size_t NY = 3; const size_t NZ = 3; - stk::mesh::fixtures::simple_fields::HexFixture fixture(MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture fixture(MPI_COMM_WORLD, NX, NY, NZ); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -355,7 +355,7 @@ TEST( UnitTestCreateEdges , testCreateEdges3x3 ) const size_t NX = 3; const size_t NY = 3; - stk::mesh::fixtures::simple_fields::QuadFixture fixture(MPI_COMM_WORLD, NX, NY); + stk::mesh::fixtures::QuadFixture fixture(MPI_COMM_WORLD, NX, NY); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -416,7 +416,7 @@ TEST( UnitTestCreateEdges , hex1x1x4 ) if(procCount == 2) { const std::string generatedMeshSpec = "generated:1x1x4|sideset:xXyYzZ|nodeset:xXyYzZ"; - stk::unit_test_util::simple_fields::StkMeshCreator stkMesh(generatedMeshSpec, communicator); + stk::unit_test_util::StkMeshCreator stkMesh(generatedMeshSpec, communicator); stk::mesh::BulkData &stkMeshBulkData = *stkMesh.getBulkData(); @@ -546,7 +546,7 @@ TEST( UnitTestCreateEdges, hybrid_HexPyrTet ) TEST ( UnitTestCreateEdges, Hex_2x1x1_select_out_a_face ) { - stk::mesh::fixtures::simple_fields::HexFixture fixture( MPI_COMM_WORLD, 2, 1, 1); + stk::mesh::fixtures::HexFixture fixture( MPI_COMM_WORLD, 2, 1, 1); stk::mesh::Part & facePart = fixture.m_meta.declare_part_with_topology("face_part_to_exclude", stk::topology::QUADRILATERAL_4, true); stk::mesh::PartVector facePartVector(1, &facePart); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCreateFaces.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCreateFaces.cpp index e80bc09ebb34..9f446199215a 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestCreateFaces.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestCreateFaces.cpp @@ -113,7 +113,7 @@ TEST ( UnitTestCreateFaces, Hex_2x1x1 ) const size_t NY = 1; const size_t NZ = 1; - stk::mesh::fixtures::simple_fields::HexFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -148,7 +148,7 @@ TEST ( UnitTestCreateFaces, Tet_2x1x1 ) const size_t NY = 1; const size_t NZ = 1; - stk::mesh::fixtures::simple_fields::TetFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::TetFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -183,7 +183,7 @@ TEST( UnitTestCreateFaces , Hex_3x1x1 ) const size_t NY = 1; const size_t NZ = 1; - stk::mesh::fixtures::simple_fields::HexFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -217,7 +217,7 @@ TEST( UnitTestCreateFaces , Tet_3x1x1 ) const size_t NY = 1; const size_t NZ = 1; - stk::mesh::fixtures::simple_fields::TetFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::TetFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -251,7 +251,7 @@ TEST( UnitTestCreateFaces , testCreateFaces3x3x3 ) const size_t NY = 3; const size_t NZ = 3; - stk::mesh::fixtures::simple_fields::HexFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -370,7 +370,7 @@ TEST( UnitTestCreateFaces , testCreateTetFaces3x3x3 ) const size_t NY = 3; const size_t NZ = 3; - stk::mesh::fixtures::simple_fields::TetFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::TetFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -485,8 +485,8 @@ TEST( UnitTestCreateFaces , testCreateTetFaces3x3x3 ) TEST ( UnitTestCreateFaces, Gears ) { - stk::mesh::fixtures::simple_fields::GearsFixture fixture( MPI_COMM_WORLD, 1, - stk::mesh::fixtures::simple_fields::GearParams(0.1, 0.4, 1.0, -0.4, 0.4)); + stk::mesh::fixtures::GearsFixture fixture( MPI_COMM_WORLD, 1, + stk::mesh::fixtures::GearParams(0.1, 0.4, 1.0, -0.4, 0.4)); fixture.meta_data.commit(); fixture.generate_mesh(); @@ -524,14 +524,14 @@ void heterogeneous_create_faces_test(stk::mesh::BulkData::AutomaticAuraOption au std::shared_ptr bulkPtr = build_mesh(3, MPI_COMM_WORLD, autoAuraOption); stk::mesh::MetaData& meta_data = bulkPtr->mesh_meta_data(); stk::mesh::BulkData& bulk_data = *bulkPtr; - stk::mesh::fixtures::simple_fields::VectorFieldType & node_coord = + stk::mesh::fixtures::VectorFieldType & node_coord = meta_data.declare_field(stk::topology::NODE_RANK, "coordinates"); stk::mesh::put_field_on_mesh( node_coord , meta_data.universal_part() , 3, nullptr); - stk::mesh::fixtures::simple_fields::heterogeneous_mesh_meta_data( meta_data , node_coord ); + stk::mesh::fixtures::heterogeneous_mesh_meta_data( meta_data , node_coord ); meta_data.commit(); - stk::mesh::fixtures::simple_fields::heterogeneous_mesh_bulk_data( bulk_data , node_coord ); + stk::mesh::fixtures::heterogeneous_mesh_bulk_data( bulk_data , node_coord ); /* * Three hexes, three wedges, three tets, two pyramids, @@ -617,14 +617,14 @@ TEST ( UnitTestCreateFaces, Degenerate ) std::shared_ptr bulkPtr = build_mesh(3, MPI_COMM_WORLD); stk::mesh::MetaData& meta_data = bulkPtr->mesh_meta_data(); stk::mesh::BulkData& bulk_data = *bulkPtr; - stk::mesh::fixtures::simple_fields::VectorFieldType & node_coord = + stk::mesh::fixtures::VectorFieldType & node_coord = meta_data.declare_field(stk::topology::NODE_RANK, "coordinates"); stk::mesh::put_field_on_mesh( node_coord , meta_data.universal_part() , 3, nullptr); - stk::mesh::fixtures::simple_fields::degenerate_mesh_meta_data( meta_data , node_coord ); + stk::mesh::fixtures::degenerate_mesh_meta_data( meta_data , node_coord ); meta_data.commit(); - stk::mesh::fixtures::simple_fields::degenerate_mesh_bulk_data( bulk_data , node_coord ); + stk::mesh::fixtures::degenerate_mesh_bulk_data( bulk_data , node_coord ); /* * Z = 0 plane: diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestDebugPrinting.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestDebugPrinting.cpp index 622441f2a222..67b8e9ce119e 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestDebugPrinting.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestDebugPrinting.cpp @@ -49,7 +49,7 @@ TEST(UnitTestDebugDump, dump_all_meta_info) const unsigned NX = 3; const unsigned NY = 1; const unsigned NZ = 1; - stk::mesh::fixtures::simple_fields::HexFixture fixture(MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture fixture(MPI_COMM_WORLD, NX, NY, NZ); fixture.m_meta.commit(); // Doesn't check anything, but at least makes sure it builds and runs @@ -64,7 +64,7 @@ TEST(UnitTestDebugDump, dump_all_mesh_info) const unsigned NX = 3; const unsigned NY = 1; const unsigned NZ = 1; - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD, NX, NY, NZ); hf.m_meta.commit(); hf.generate_mesh(); @@ -80,7 +80,7 @@ TEST(UnitTestDebugDump, dump_mesh_per_proc) const unsigned NX = 3; const unsigned NY = 1; const unsigned NZ = 1; - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD, NX, NY, NZ); hf.m_meta.commit(); hf.generate_mesh(); @@ -96,7 +96,7 @@ TEST(UnitTestDebugDump, dump_partition_summary) const unsigned NX = 3; const unsigned NY = 1; const unsigned NZ = 1; - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD, NX, NY, NZ); hf.m_meta.commit(); hf.generate_mesh(); @@ -112,7 +112,7 @@ TEST(UnitTestDebugDump, dump_partition_summary_per_proc) const unsigned NX = 3; const unsigned NY = 1; const unsigned NZ = 1; - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD, NX, NY, NZ); hf.m_meta.commit(); hf.generate_mesh(); @@ -128,7 +128,7 @@ TEST(UnitTestDebugDump, dump_bucket_size_histogram) const unsigned NX = 3; const unsigned NY = 1; const unsigned NZ = 1; - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD, NX, NY, NZ); hf.m_meta.commit(); hf.generate_mesh(); @@ -144,7 +144,7 @@ TEST(UnitTestDebugDump, dump_bucket_size_histogram_per_proc) const unsigned NX = 3; const unsigned NY = 1; const unsigned NZ = 1; - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD, NX, NY, NZ); hf.m_meta.commit(); hf.generate_mesh(); @@ -160,7 +160,7 @@ TEST(UnitTestDebugDump, dump_global_bucket_size_histogram) const unsigned NX = 3; const unsigned NY = 1; const unsigned NZ = 1; - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD, NX, NY, NZ); hf.m_meta.commit(); hf.generate_mesh(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestDeclareElement.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestDeclareElement.cpp index d50b1a90f2a4..3ca7a4e10071 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestDeclareElement.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestDeclareElement.cpp @@ -54,7 +54,7 @@ TEST( UnitTestDeclareElement , inject_shell ) // Create the fixture, adding a part for the shell - stk::mesh::fixtures::simple_fields::HexFixture fixture( pm , 2 , 1 , 1 ); + stk::mesh::fixtures::HexFixture fixture( pm , 2 , 1 , 1 ); const int p_rank = fixture.m_bulk_data.parallel_rank(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestDeleteEntities.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestDeleteEntities.cpp index 7bff9886fffb..5a7e8a580e83 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestDeleteEntities.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestDeleteEntities.cpp @@ -29,7 +29,7 @@ void expect_num_elements_and_faces_and_nodes(stk::mesh::BulkData &bulk, size_t g EXPECT_EQ(goldNumNodes, entityCounts[stk::topology::NODE_RANK]); } -class HexShellHexMesh : public stk::unit_test_util::simple_fields::MeshFixture +class HexShellHexMesh : public stk::unit_test_util::MeshFixture { protected: HexShellHexMesh() @@ -48,7 +48,7 @@ class HexShellHexMesh : public stk::unit_test_util::simple_fields::MeshFixture 1,2,HEX_8,5,6,7,8,9,10,11,12\n\ 0,3,SHELL_QUAD_4,5,6,7,8"; } - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } }; @@ -59,7 +59,7 @@ TEST_F(HexShellHexMesh, DeleteShell_OnlyHexesRemain) expect_num_elements_and_nodes(get_bulk(), 2u, 12u); } -class HexHexShellMesh : public stk::unit_test_util::simple_fields::MeshFixture +class HexHexShellMesh : public stk::unit_test_util::MeshFixture { protected: HexHexShellMesh() @@ -78,7 +78,7 @@ class HexHexShellMesh : public stk::unit_test_util::simple_fields::MeshFixture 1,2,HEX_8,5,6,7,8,9,10,11,12\n\ 0,3,SHELL_QUAD_4,9,10,11,12"; } - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } }; @@ -94,7 +94,7 @@ TEST_F(HexHexShellMesh, DeleteAllHexes_OnlyShellRemains) EXPECT_TRUE(!get_bulk().bucket(node).shared()); } -class HexWedgeHexMesh : public stk::unit_test_util::simple_fields::MeshFixture +class HexWedgeHexMesh : public stk::unit_test_util::MeshFixture { protected: HexWedgeHexMesh() @@ -113,7 +113,7 @@ class HexWedgeHexMesh : public stk::unit_test_util::simple_fields::MeshFixture 1,2,WEDGE_6,5,9,8,6,10,7\n\ 1,3,HEX_8,11,12,13,14,5,9,10,6"; } - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } }; @@ -185,7 +185,7 @@ TEST_F(HexWedgeHexMesh, CreateFacesThenDeleteWedgeThenCreateFaces_TwoHexesRemain expect_num_elements_and_faces_and_nodes(get_bulk(), 2u, 12u, 14u); } -class SingleHexMesh : public stk::unit_test_util::simple_fields::MeshFixture +class SingleHexMesh : public stk::unit_test_util::MeshFixture { protected: const stk::mesh::EntityId firstHexId = 1; diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestDeletedEntityCache.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestDeletedEntityCache.cpp index 959275ccb0fc..bcf2965c6efe 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestDeletedEntityCache.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestDeletedEntityCache.cpp @@ -16,7 +16,7 @@ class DeletedEntityCacheTester : public stk::unit_test_util::MeshFixture "0,1,HEX_8,1,2,3,4,5,6,7,8\n\ 0,2,HEX_8,2,9,10,3,6,11,12,7"; setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); stk::mesh::BucketVector const& buckets = bulkData->get_buckets(stk::topology::NODE_RANK, metaData->universal_part()); for (auto& bucket : buckets) diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestDestroyElements.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestDestroyElements.cpp index a8fae94fa8fa..b9528d885cff 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestDestroyElements.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestDestroyElements.cpp @@ -55,7 +55,7 @@ stk::mesh::EntityVector get_faces_for_entity(const stk::mesh::BulkData &bulk, co return entityFaces; } -class HexMesh : public stk::unit_test_util::simple_fields::MeshTestFixture +class HexMesh : public stk::unit_test_util::MeshTestFixture { protected: HexMesh() @@ -69,7 +69,7 @@ class HexMesh : public stk::unit_test_util::simple_fields::MeshTestFixture std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8\n\ 0,2,HEX_8,2,9,10,3,6,11,12,7"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } void run_test(stk::mesh::BulkData::AutomaticAuraOption auraOption) @@ -137,7 +137,7 @@ TEST_F(HexMesh, DeleteOnProcOneWithSharedNodes_NoAura) run_test_on_num_procs(2, stk::mesh::BulkData::NO_AUTO_AURA); } -class TetMesh : public stk::unit_test_util::simple_fields::MeshFixture +class TetMesh : public stk::unit_test_util::MeshFixture { protected: TetMesh() @@ -157,7 +157,7 @@ class TetMesh : public stk::unit_test_util::simple_fields::MeshFixture meshDesc = "0,1,TET_4,1,2,3,4\n\ 1,2,TET_4,2,5,3,4"; } - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } }; @@ -360,7 +360,7 @@ TEST_F(TetMesh, DeleteGhostedElement) } } -class BeamMesh : public stk::unit_test_util::simple_fields::MeshTestFixture +class BeamMesh : public stk::unit_test_util::MeshTestFixture { protected: BeamMesh() @@ -374,7 +374,7 @@ class BeamMesh : public stk::unit_test_util::simple_fields::MeshTestFixture std::string meshDesc = "0,1,BEAM_2,1,2\n\ 0,2,BEAM_2,2,3"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } void run_test(stk::mesh::BulkData::AutomaticAuraOption auraOption) @@ -402,7 +402,7 @@ TEST_F(BeamMesh, DeleteOneElement) run_test_on_num_procs(1, stk::mesh::BulkData::NO_AUTO_AURA); } -class QuadMesh : public stk::unit_test_util::simple_fields::MeshTestFixture +class QuadMesh : public stk::unit_test_util::MeshTestFixture { protected: QuadMesh() @@ -416,7 +416,7 @@ class QuadMesh : public stk::unit_test_util::simple_fields::MeshTestFixture std::string meshDesc = "0,1,QUAD_4_2D,1,2,3,4\n\ 1,2,QUAD_4_2D,2,5,6,3"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } void run_test(stk::mesh::BulkData::AutomaticAuraOption auraOption) @@ -469,7 +469,7 @@ TEST_F(QuadMesh, DeleteProcBoundaryElementWithAura) run_test_on_num_procs(2, stk::mesh::BulkData::AUTO_AURA); } -class HexShellHex : public stk::unit_test_util::simple_fields::MeshFixture +class HexShellHex : public stk::unit_test_util::MeshFixture { public: void make_hex_shell_hex_mesh() diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestDistributedIndexWithBulkData.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestDistributedIndexWithBulkData.cpp index d9cc1fbe91e4..9c28c607bbad 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestDistributedIndexWithBulkData.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestDistributedIndexWithBulkData.cpp @@ -97,7 +97,7 @@ TEST( UnderstandingDistributedIndex, WithoutStkMeshBulkData) if(procCount == 2) { const std::string generatedMeshSpec = "generated:2x2x2|sideset:xXyYzZ|nodeset:xXyYzZ"; - stk::unit_test_util::simple_fields::StkMeshCreator stkMesh(generatedMeshSpec, communicator); + stk::unit_test_util::StkMeshCreator stkMesh(generatedMeshSpec, communicator); stk::mesh::MetaData &stkMeshMetaData = *stkMesh.getMetaData(); stk::unit_test_util::BulkDataTester &stkMeshBulkData = *stkMesh.getBulkData(); @@ -172,7 +172,7 @@ TEST( UnderstandingDistributedIndex, ViaStkMeshBulkData) if(procCount == 2) { const std::string generatedMeshSpec = "generated:2x2x2|sideset:xXyYzZ|nodeset:xXyYzZ"; - stk::unit_test_util::simple_fields::StkMeshCreator stkMesh(generatedMeshSpec, communicator); + stk::unit_test_util::StkMeshCreator stkMesh(generatedMeshSpec, communicator); stk::mesh::MetaData &stkMeshMetaData = *stkMesh.getMetaData(); stk::mesh::Part &line2_part = stkMeshMetaData.get_topology_root_part(stk::topology::LINE_2); @@ -296,7 +296,7 @@ TEST(UnderstandingDistributedIndex, TestSharedAndGhostedAndOwnedEntitiesWithoutA if(procCount == 2) { const std::string generatedMeshSpec = "generated:2x2x2|sideset:xXyYzZ|nodeset:xXyYzZ"; - stk::unit_test_util::simple_fields::StkMeshCreator stkMesh(generatedMeshSpec, communicator); + stk::unit_test_util::StkMeshCreator stkMesh(generatedMeshSpec, communicator); stk::unit_test_util::BulkDataTester &stkMeshBulkData = *stkMesh.getBulkData(); @@ -423,7 +423,7 @@ TEST(UnderstandingDistributedIndex, GhostAnElement) if(procCount == 2) { const std::string generatedMeshSpec = "generated:2x2x4|sideset:xXyYzZ|nodeset:xXyYzZ"; - stk::unit_test_util::simple_fields::StkMeshCreator stkMesh(generatedMeshSpec, communicator); + stk::unit_test_util::StkMeshCreator stkMesh(generatedMeshSpec, communicator); stk::unit_test_util::BulkDataTester &stkMeshBulkData = *stkMesh.getBulkData(); @@ -496,7 +496,7 @@ TEST(UnderstandingDistributedIndex, KillAGhostedElement) if(procCount == 2) { const std::string generatedMeshSpec = "generated:2x2x4|sideset:xXyYzZ|nodeset:xXyYzZ"; - stk::unit_test_util::simple_fields::StkMeshCreator stkMesh(generatedMeshSpec, communicator); + stk::unit_test_util::StkMeshCreator stkMesh(generatedMeshSpec, communicator); stk::unit_test_util::BulkDataTester &stkMeshBulkData = *stkMesh.getBulkData(); @@ -564,7 +564,7 @@ TEST(UnderstandingDistributedIndex, CreateDisconnectedElement) if(procCount == 2) { const std::string generatedMeshSpec = "generated:2x2x4|sideset:xXyYzZ|nodeset:xXyYzZ"; - stk::unit_test_util::simple_fields::StkMeshCreator stkMesh(generatedMeshSpec, communicator); + stk::unit_test_util::StkMeshCreator stkMesh(generatedMeshSpec, communicator); stk::unit_test_util::BulkDataTester &stkMeshBulkData = *stkMesh.getBulkData(); @@ -652,7 +652,7 @@ TEST(UnderstandingDistributedIndex, MoveAnElement) if(procCount == 2) { const std::string generatedMeshSpec = "generated:2x2x4|sideset:xXyYzZ|nodeset:xXyYzZ"; - stk::unit_test_util::simple_fields::StkMeshCreator stkMesh(generatedMeshSpec, communicator); + stk::unit_test_util::StkMeshCreator stkMesh(generatedMeshSpec, communicator); stk::unit_test_util::BulkDataTester &stkMeshBulkData = *stkMesh.getBulkData(); @@ -744,7 +744,7 @@ TEST(UnderstandingDistributedIndex, GhostANode) if(procCount == 2) { const std::string generatedMeshSpec = "generated:2x2x4|sideset:xXyYzZ|nodeset:xXyYzZ"; - stk::unit_test_util::simple_fields::StkMeshCreator stkMesh(generatedMeshSpec, communicator); + stk::unit_test_util::StkMeshCreator stkMesh(generatedMeshSpec, communicator); stk::unit_test_util::BulkDataTester &stkMeshBulkData = *stkMesh.getBulkData(); @@ -975,7 +975,7 @@ TEST(UnderstandingDistributedIndex, MultipleCustomGhostings) if(procCount == 2) { const std::string generatedMeshSpec = "generated:2x2x4|sideset:xXyYzZ|nodeset:xXyYzZ"; - stk::unit_test_util::simple_fields::StkMeshCreator stkMesh(generatedMeshSpec, communicator); + stk::unit_test_util::StkMeshCreator stkMesh(generatedMeshSpec, communicator); int owningProc = 0; int ghostReceivingProc = 1; @@ -1021,7 +1021,7 @@ TEST(UnderstandingDistributedIndex, MultipleCustomGhostingsWithDestroy) if(procCount == 2) { const std::string generatedMeshSpec = "generated:2x2x4|sideset:xXyYzZ|nodeset:xXyYzZ"; - stk::unit_test_util::simple_fields::StkMeshCreator stkMesh(generatedMeshSpec, communicator); + stk::unit_test_util::StkMeshCreator stkMesh(generatedMeshSpec, communicator); int owningProc = 0; int ghostReceivingProc = 1; diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestElemGraphCoincidentElements.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestElemGraphCoincidentElements.cpp index 8813bc984851..59850a58f49d 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestElemGraphCoincidentElements.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestElemGraphCoincidentElements.cpp @@ -27,7 +27,7 @@ void setup_node_sharing(stk::mesh::BulkData &mesh, const std::vector< std::vecto } } -class HexShellShell : public stk::unit_test_util::simple_fields::MeshFixture +class HexShellShell : public stk::unit_test_util::MeshFixture { protected: HexShellShell() @@ -462,7 +462,7 @@ class ElemElemGraphTester : public stk::mesh::ElemElemGraph const stk::mesh::impl::SparseGraph& my_get_coincident_graph() {return m_coincidentGraph; } }; -class ShellMeshModification : public stk::unit_test_util::simple_fields::MeshFixture +class ShellMeshModification : public stk::unit_test_util::MeshFixture { protected: ShellMeshModification() diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestEntitiesNodesHaveInCommon.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestEntitiesNodesHaveInCommon.cpp index 4f2eb5954bef..243aaa389a2f 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestEntitiesNodesHaveInCommon.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestEntitiesNodesHaveInCommon.cpp @@ -256,7 +256,7 @@ std::string vec_to_string(const std::vector &vec) return s + " ]"; } -class EntitiesNodesHaveInCommon : public stk::unit_test_util::simple_fields::MeshFixture +class EntitiesNodesHaveInCommon : public stk::unit_test_util::MeshFixture { protected: void expect_nodes_have_elems_in_common(stk::mesh::EntityId elemId, diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestFEMHelper.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestFEMHelper.cpp index 51dd1d80c723..5ba419da84ae 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestFEMHelper.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestFEMHelper.cpp @@ -70,7 +70,6 @@ TEST(FEMHelper, get_ordinal_and_permutation) unsigned gold_num_nodes = 4; stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); std::string name = "generated:1x1x2"; stkMeshIoBroker.add_mesh_database(name, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -128,7 +127,6 @@ TEST(FEMHelper, check_permutation_consistency_using_FEMHelper_parallel) unsigned gold_num_nodes = 4; stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); std::string name = "generated:1x1x2"; stkMeshIoBroker.add_mesh_database(name, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -153,7 +151,7 @@ TEST(FEMHelper, check_permutation_consistency_using_FEMHelper_parallel) stk::mesh::Part &part = mesh.mesh_meta_data().get_topology_root_part(stk::topology::QUAD_4); mesh.modification_begin(); - stk::mesh::Entity side = stk::unit_test_util::simple_fields::declare_element_side_with_nodes(mesh, elem, side_nodes, global_side_id, part); + stk::mesh::Entity side = stk::unit_test_util::declare_element_side_with_nodes(mesh, elem, side_nodes, global_side_id, part); EXPECT_NO_THROW(mesh.modification_end()); std::vector mesh_counts; diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestFEMMetaData.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestFEMMetaData.cpp index 1e66764fa734..689783d5b76f 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestFEMMetaData.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestFEMMetaData.cpp @@ -50,7 +50,6 @@ using stk::mesh::MetaData; TEST ( UnitTestMetaData, create ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); EXPECT_TRUE ( true ); EXPECT_FALSE( fem_meta.is_initialized() ); EXPECT_EQ( fem_meta.spatial_dimension(), 0u ); @@ -65,7 +64,6 @@ TEST ( UnitTestMetaData, create ) TEST( UnitTestMetaData, initialize ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); EXPECT_TRUE( fem_meta.is_initialized() ); @@ -75,7 +73,6 @@ TEST( UnitTestMetaData, initialize ) TEST( UnitTestMetaData, initialize_only_once ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); ASSERT_THROW( fem_meta.initialize(2), std::runtime_error ); @@ -84,7 +81,6 @@ TEST( UnitTestMetaData, initialize_only_once ) TEST( UnitTestMetaData, entity_ranks_1 ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 1; fem_meta.initialize(spatial_dimension); EXPECT_EQ( fem_meta.side_rank(), stk::topology::NODE_RANK ); @@ -93,7 +89,6 @@ TEST( UnitTestMetaData, entity_ranks_1 ) TEST( UnitTestMetaData, entity_ranks_2 ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 2; fem_meta.initialize(spatial_dimension); EXPECT_EQ( fem_meta.side_rank(), stk::topology::EDGE_RANK ); @@ -102,7 +97,6 @@ TEST( UnitTestMetaData, entity_ranks_2 ) TEST( UnitTestMetaData, entity_ranks_3 ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); EXPECT_EQ( fem_meta.side_rank(), stk::topology::FACE_RANK ); @@ -111,7 +105,6 @@ TEST( UnitTestMetaData, entity_ranks_3 ) TEST( UnitTestMetaData, get_topology_trivial ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); stk::mesh::Part & hex_part = fem_meta.get_topology_root_part(stk::topology::HEX_8); @@ -131,7 +124,6 @@ TEST( UnitTestMetaData, get_topology_trivial ) TEST( UnitTestMetaData, cell_topology_subsetting ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); stk::mesh::Part & element_part = fem_meta.declare_part("element part", stk::topology::ELEM_RANK ); @@ -207,7 +199,6 @@ TEST( UnitTestMetaData, cell_topology_subsetting ) TEST( UnitTestMetaData, topology_test_1 ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); stk::mesh::Part & HR = fem_meta.get_topology_root_part(stk::topology::HEX_8); @@ -222,7 +213,6 @@ TEST( UnitTestMetaData, topology_test_1 ) TEST( UnitTestMetaData, topology_test_2a ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); stk::mesh::Part & HR = fem_meta.get_topology_root_part(stk::topology::HEX_8); @@ -235,7 +225,6 @@ TEST( UnitTestMetaData, topology_test_2a ) TEST( UnitTestMetaData, topology_test_2b ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); @@ -255,7 +244,6 @@ TEST( UnitTestMetaData, topology_test_2b ) TEST( UnitTestMetaData, topology_test_3a ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); @@ -273,7 +261,6 @@ TEST( UnitTestMetaData, topology_test_3a ) TEST( UnitTestMetaData, topology_test_3b ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); @@ -296,7 +283,6 @@ TEST( UnitTestMetaData, topology_test_3b ) TEST( UnitTestMetaData, topology_test_3c ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); @@ -321,7 +307,6 @@ TEST( UnitTestMetaData, topology_test_3c ) TEST( UnitTestMetaData, topology_test_4a ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); @@ -343,7 +328,6 @@ TEST( UnitTestMetaData, topology_test_4a ) TEST( UnitTestMetaData, topology_test_4b ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); @@ -367,7 +351,6 @@ TEST( UnitTestMetaData, topology_test_4b ) TEST( UnitTestMetaData, topology_test_5a ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); @@ -387,7 +370,6 @@ TEST( UnitTestMetaData, topology_test_5a ) TEST( UnitTestMetaData, topology_test_5b ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); @@ -411,7 +393,6 @@ TEST( UnitTestMetaData, topology_test_5b ) TEST( UnitTestMetaData, topology_test_5c ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); @@ -434,7 +415,6 @@ TEST( UnitTestMetaData, topology_test_5c ) TEST(UnitTestMetaData, subsetRankRequirements) { stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::mesh::Part& elemPart = meta.declare_part("elemPart", stk::topology::ELEM_RANK); stk::mesh::Part& facePart = meta.declare_part("facePart", stk::topology::FACE_RANK); @@ -445,7 +425,6 @@ TEST(UnitTestMetaData, subsetRankRequirements) TEST(UnitTestMetaData, subsetRankTopologyRequirements) { stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::mesh::Part& elemPart = meta.declare_part("elemPart", stk::topology::ELEM_RANK); stk::mesh::Part& quadPart = meta.declare_part_with_topology("quadPart", stk::topology::QUAD_4); @@ -456,7 +435,6 @@ TEST(UnitTestMetaData, subsetRankTopologyRequirements) TEST( MetaData, register_topology_duplicate ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 3; fem_meta.initialize(spatial_dimension); @@ -467,7 +445,6 @@ TEST( MetaData, register_topology_duplicate ) TEST( MetaData, get_topology_root_part_invalid ) { stk::mesh::MetaData fem_meta; - fem_meta.use_simple_fields(); const size_t spatial_dimension = 2; fem_meta.initialize(spatial_dimension); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestField.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestField.cpp index 80e254503376..1dced60a826b 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestField.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestField.cpp @@ -344,7 +344,6 @@ TEST(UnitTestField, writeFieldsWithSameName) // Create the mesh with fields with the same name { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); const std::string generatedFileName = "generated:4x4x16"; size_t index = stkIo.add_mesh_database(generatedFileName, stk::io::READ_MESH); @@ -406,7 +405,6 @@ TEST(UnitTestField, writeFieldsWithSameName) // Verify that we can read the mesh back into memory correctly { stk::io::StkMeshIoBroker stkIo(communicator); - stkIo.use_simple_fields(); size_t index = stkIo.add_mesh_database(mesh_name, stk::io::READ_MESH); stkIo.set_active_mesh(index); @@ -453,7 +451,7 @@ TEST(UnitTestField, writeFieldsWithSameName) EXPECT_TRUE( &nodeField == myFieldBase); } - stk::unit_test_util::simple_fields::delete_mesh(mesh_name); + stk::unit_test_util::delete_mesh(mesh_name); } ////////////////////// @@ -606,7 +604,7 @@ void verify_fields_are_on_entities(const std::string& filename, stk::mesh::Entit } } -class FieldFixture : public stk::unit_test_util::simple_fields::MeshFixture +class FieldFixture : public stk::unit_test_util::MeshFixture { protected: void test_solution_case_with_rank(stk::mesh::EntityRank rank) @@ -620,7 +618,6 @@ class FieldFixture : public stk::unit_test_util::simple_fields::MeshFixture stk::io::fill_mesh("generated:1x1x2", get_bulk()); stk::io::StkMeshIoBroker stkIo; - stkIo.use_simple_fields(); stkIo.set_bulk_data(get_bulk()); stk::mesh::EntityVector locallyOwnedEntities; @@ -633,7 +630,7 @@ class FieldFixture : public stk::unit_test_util::simple_fields::MeshFixture std::string filename = "junk-" + solnCases.get_solution_case_names()[solnIndex] + ".g"; EXPECT_NO_THROW(write_mesh_with_fields(stkIo, filename, rank, solnCases.get_fields_for_case(solnIndex))); verify_fields_are_on_entities(filename, rank, solnCases.get_fields_for_case(solnIndex), locallyOwnedEntities.size()); - stk::unit_test_util::simple_fields::delete_mesh(filename); + stk::unit_test_util::delete_mesh(filename); } verify_acceleration_is_not_on_entities(get_bulk(), rank); @@ -654,7 +651,7 @@ TEST_F(FieldFixture, totalNgpFieldDataBytes) stk::mesh::put_field_on_mesh(field, partB, 5, vectorInitValue); const int numElemsPerDim = 10; - stk::io::fill_mesh(stk::unit_test_util::simple_fields::get_mesh_spec(numElemsPerDim), get_bulk()); + stk::io::fill_mesh(stk::unit_test_util::get_mesh_spec(numElemsPerDim), get_bulk()); const int totalNumElements = numElemsPerDim * numElemsPerDim * numElemsPerDim; stk::mesh::EntityVector elements; @@ -704,7 +701,7 @@ TEST_F(FieldFixture, fenceWithoutNgpField) EXPECT_NO_THROW(field.fence()); } -class LateFieldFixtureNoTest : public stk::unit_test_util::simple_fields::MeshFixtureNoTest +class LateFieldFixtureNoTest : public stk::unit_test_util::MeshFixtureNoTest { protected: LateFieldFixtureNoTest() {} @@ -1427,7 +1424,7 @@ TEST(SharedSidesetField, verifySidesetFieldAfterMeshRead) { std::shared_ptr bulkPtr = build_mesh(3, MPI_COMM_SELF); stk::mesh::BulkData& bulk = *bulkPtr; - stk::unit_test_util::simple_fields::create_AB_mesh_with_sideset_and_distribution_factors(bulk, + stk::unit_test_util::create_AB_mesh_with_sideset_and_distribution_factors(bulk, stk::unit_test_util::LEFT, stk::unit_test_util::DECREASING, fieldName, @@ -1877,7 +1874,6 @@ class VariableCapacityFieldData : public ::testing::TestWithParammesh_meta_data(); - m_meta->use_simple_fields(); } int expected_bytes_allocated_host(const stk::mesh::BucketVector & buckets, int dataSize) diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldBLAS.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldBLAS.cpp index 267acd096251..756105b9976b 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldBLAS.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldBLAS.cpp @@ -102,7 +102,6 @@ BLASFixture::BLASFixture(const A init1, const A init2, const A init3) const unsigned int meshSizeZ = 4; stkMeshIoBroker = new stk::io::StkMeshIoBroker(my_comm); - stkMeshIoBroker->use_simple_fields(); stk::io::StkMeshIoBroker & io = *stkMeshIoBroker; std::ostringstream osstr; osstr << "generated:" << meshSizeX << "x" << meshSizeY << "x" << meshSizeZ; @@ -1899,7 +1898,6 @@ BLASFixture3d::BLASFixture3d(A* init1_input, A* init2_input, A* init3_input) MPI_Comm my_comm = MPI_COMM_WORLD; stkMeshIoBroker = new stk::io::StkMeshIoBroker(my_comm); - stkMeshIoBroker->use_simple_fields(); stk::io::StkMeshIoBroker & io = *stkMeshIoBroker; std::ostringstream osstr; osstr<<"generated:"< bulkDataPtr = build_mesh(spatialDim, MPI_COMM_WORLD, std::move(fieldDataManager)); stk::mesh::MetaData& meshMetaData = bulkDataPtr->mesh_meta_data(); - meshMetaData.use_simple_fields(); initializeTestField(meshMetaData); size_t numNodes = 20; @@ -286,7 +285,6 @@ TYPED_TEST(TestDefaultFieldDataManager, AllocateFieldDataTwoBuckets) auto * localFieldDataManager = fieldDataManager.get(); std::shared_ptr bulkDataPtr = build_mesh(spatialDim, MPI_COMM_WORLD, std::move(fieldDataManager)); stk::mesh::MetaData& meshMetaData = bulkDataPtr->mesh_meta_data(); - meshMetaData.use_simple_fields(); initializeTestField(meshMetaData); const size_t numNodes = 700; @@ -306,7 +304,6 @@ TYPED_TEST(TestDefaultFieldDataManager, TwoEntitiesTwoBuckets) auto * localFieldDataManager = fieldDataManager.get(); std::shared_ptr bulkDataPtr = build_mesh(spatialDim, MPI_COMM_WORLD, std::move(fieldDataManager)); stk::mesh::MetaData& meshMetaData = bulkDataPtr->mesh_meta_data(); - meshMetaData.use_simple_fields(); createPart(meshMetaData); initializeTestField(meshMetaData); @@ -328,7 +325,6 @@ TYPED_TEST(TestContiguousFieldDataManager, AllocateFieldData) auto * localFieldDataManager = fieldDataManager.get(); std::shared_ptr bulkDataPtr = build_mesh(spatialDim, MPI_COMM_WORLD, std::move(fieldDataManager)); stk::mesh::MetaData& meshMetaData = bulkDataPtr->mesh_meta_data(); - meshMetaData.use_simple_fields(); initializeTestField(meshMetaData); size_t numNodes = 20; const size_t extraCapacity = localFieldDataManager->get_extra_capacity(); @@ -347,7 +343,6 @@ TYPED_TEST(TestContiguousFieldDataManager, AllocateFieldDataAndReorderBuckets) auto * localFieldDataManager = fieldDataManager.get(); std::shared_ptr bulkDataPtr = build_mesh(spatialDim, MPI_COMM_WORLD, std::move(fieldDataManager)); stk::mesh::MetaData& meshMetaData = bulkDataPtr->mesh_meta_data(); - meshMetaData.use_simple_fields(); initializeTestField(meshMetaData); size_t numNodes = 10000; const size_t extraCapacity = localFieldDataManager->get_extra_capacity(); @@ -373,7 +368,6 @@ TYPED_TEST(TestContiguousFieldDataManager, TwoEntitiesTwoBuckets) auto * localFieldDataManager = fieldDataManager.get(); std::shared_ptr bulkDataPtr = build_mesh(spatialDim, MPI_COMM_WORLD, std::move(fieldDataManager)); stk::mesh::MetaData& meshMetaData = bulkDataPtr->mesh_meta_data(); - meshMetaData.use_simple_fields(); createPart(meshMetaData); initializeTestField(meshMetaData); @@ -431,7 +425,6 @@ TYPED_TEST(TestContiguousFieldDataManager, nodalFieldNotOnAllNodeBuckets) std::shared_ptr bulkDataPtr = build_mesh(spatialDim, MPI_COMM_WORLD, std::move(fieldDataManager)); stk::mesh::BulkData& bulkData = *bulkDataPtr; stk::mesh::MetaData& meshMetaData = bulkData.mesh_meta_data(); - meshMetaData.use_simple_fields(); initialize2Parts2Fields(meshMetaData); bulkData.deactivate_field_updating(); @@ -494,7 +487,6 @@ TYPED_TEST(TestContiguousFieldDataManager, allocate_bucket_field_data) std::shared_ptr bulkDataPtr = build_mesh(spatialDim, MPI_COMM_WORLD, std::move(fieldDataManager)); stk::mesh::BulkData& bulkData = *bulkDataPtr; stk::mesh::MetaData& meshMetaData = bulkData.mesh_meta_data(); - meshMetaData.use_simple_fields(); initialize2Parts2Fields(meshMetaData); const stk::mesh::FieldVector &allFields = meshMetaData.get_fields(); @@ -616,7 +608,6 @@ void allocate_bucket_field_data_tableBased(stk::mesh::FieldDataManager & fieldDa const size_t spatialDim = 3; stk::mesh::MetaData meshMetaData(spatialDim, stk::mesh::entity_rank_names()); - meshMetaData.use_simple_fields(); initialize2Parts2Fields(meshMetaData); @@ -724,7 +715,6 @@ TYPED_TEST(TestContiguousFieldDataManager, add_field_data_for_entity) stk::mesh::ContiguousFieldDataManager fieldDataManager; const size_t spatialDim = 3; stk::mesh::MetaData meshMetaData(spatialDim, stk::mesh::entity_rank_names()); - meshMetaData.use_simple_fields(); testAddingSingleEntity(meshMetaData, fieldDataManager); @@ -797,7 +787,6 @@ TYPED_TEST(TestContiguousFieldDataManager, deallocate_nonempty_bucket) stk::mesh::ContiguousFieldDataManager fieldDataManager; const size_t spatialDim = 3; stk::mesh::MetaData meshMetaData(spatialDim, stk::mesh::entity_rank_names()); - meshMetaData.use_simple_fields(); testAddingSingleEntity(meshMetaData, fieldDataManager); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldImpl.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldImpl.cpp index 14d94e2a4a78..cbca27ac9295 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldImpl.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldImpl.cpp @@ -64,7 +64,6 @@ class UnitTestFieldImpl : public ::testing::Test pC(meta_data.declare_part( std::string("C") , stk::topology::NODE_RANK)), pD(meta_data.declare_part( std::string("D") , stk::topology::NODE_RANK)) { - meta_data.use_simple_fields(); } void testFieldRestriction(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldParallel.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldParallel.cpp index 0e6ec513515a..27b640060c60 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldParallel.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldParallel.cpp @@ -63,7 +63,7 @@ namespace stk { namespace mesh { class FieldBase; } } namespace { using namespace stk::mesh; -using stk::mesh::fixtures::simple_fields::HexFixture; +using stk::mesh::fixtures::HexFixture; template T do_operation(Operation Op, T lhs, T rhs) diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldQueryFunctions.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldQueryFunctions.cpp index 02c53dcbc990..c1d8f1915641 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldQueryFunctions.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldQueryFunctions.cpp @@ -49,7 +49,7 @@ namespace { -class FieldQueryFunctions : public stk::unit_test_util::simple_fields::MeshFixture +class FieldQueryFunctions : public stk::unit_test_util::MeshFixture { public: FieldQueryFunctions() @@ -72,7 +72,7 @@ class FieldQueryFunctions : public stk::unit_test_util::simple_fields::MeshFixtu const std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8,block_1\n" "0,2,HEX_8,5,6,7,8,9,10,11,12,block_1\n"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } void setup_mesh_vector_fields_one_block() { @@ -87,7 +87,7 @@ class FieldQueryFunctions : public stk::unit_test_util::simple_fields::MeshFixtu const std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8,block_1\n" "0,2,HEX_8,5,6,7,8,9,10,11,12,block_1\n"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } void setup_mesh_vector_fields_four_copies_one_block() { @@ -102,7 +102,7 @@ class FieldQueryFunctions : public stk::unit_test_util::simple_fields::MeshFixtu const std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8,block_1\n" "0,2,HEX_8,5,6,7,8,9,10,11,12,block_1\n"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } void setup_mesh_vector_fields_not_on_all_blocks() { @@ -118,7 +118,7 @@ class FieldQueryFunctions : public stk::unit_test_util::simple_fields::MeshFixtu const std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8,block_1\n" "0,2,HEX_8,5,6,7,8,9,10,11,12,block_2\n"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } void setup_mesh_vector_fields_variable_num_copies() { @@ -136,7 +136,7 @@ class FieldQueryFunctions : public stk::unit_test_util::simple_fields::MeshFixtu const std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8,block_1\n" "0,2,HEX_8,5,6,7,8,9,10,11,12,block_2\n"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } stk::mesh::Field * m_doubleField; diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldRestriction.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldRestriction.cpp index 215724d1abb2..90af4c1ba4c8 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldRestriction.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestFieldRestriction.cpp @@ -53,7 +53,6 @@ TEST( UnitTestFieldRestriction, defaultConstruct ) TEST( UnitTestFieldRestriction, construct ) { stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::mesh::Part& part_a = meta.declare_part("a"); stk::mesh::FieldRestriction fr(part_a); @@ -71,7 +70,6 @@ TEST( UnitTestFieldRestriction, construct ) TEST( UnitTestFieldRestriction, copyConstruct ) { stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::mesh::Part& part_a = meta.declare_part("a"); stk::mesh::FieldRestriction fr(part_a); @@ -89,7 +87,6 @@ TEST( UnitTestFieldRestriction, copyConstruct ) TEST( UnitTestFieldRestriction, selects_part) { stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::mesh::Part& part_a = meta.declare_part("a"); stk::mesh::Part& part_b = meta.declare_part("b"); @@ -101,7 +98,6 @@ TEST( UnitTestFieldRestriction, selects_part) TEST( UnitTestFieldRestriction, union_selects_part) { stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::mesh::Part& part_a = meta.declare_part("a"); stk::mesh::Part& part_b = meta.declare_part("b"); @@ -114,7 +110,6 @@ TEST( UnitTestFieldRestriction, union_selects_part) TEST( UnitTestFieldRestriction, operatorEqual ) { stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::mesh::Part& part_a = meta.declare_part("a"); stk::mesh::Part& part_b = meta.declare_part("b"); @@ -137,7 +132,6 @@ TEST( UnitTestFieldRestriction, operatorEqual ) TEST( UnitTestFieldRestriction, operatorLess ) { stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::mesh::Part& part_a = meta.declare_part("a"); stk::mesh::Part& part_b = meta.declare_part("b"); @@ -168,7 +162,6 @@ TEST( UnitTestFieldRestriction, operatorLessInvalid ) TEST( UnitTestFieldRestriction, operatorEqualEqual_and_NotEqual ) { stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::mesh::Part& part_a = meta.declare_part("a"); stk::mesh::Part& part_b = meta.declare_part("b"); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGenIds.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGenIds.cpp index 5aa5b8b544f3..5082078f4df7 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGenIds.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGenIds.cpp @@ -116,7 +116,7 @@ TEST(GeneratedIds, StkMeshApproach1) { MpiInfo mpiInfo(MPI_COMM_WORLD); - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-i", "generated:10x10x10"); + std::string exodusFileName = stk::unit_test_util::get_option("-i", "generated:10x10x10"); const int spatialDim = 3; std::shared_ptr bulkPtr = build_mesh(spatialDim, mpiInfo.getMpiComm()); stk::mesh::MetaData& stkMeshMetaData = bulkPtr->mesh_meta_data(); @@ -203,7 +203,7 @@ TEST(GeneratedIds, StkMeshApproach2) { MpiInfo mpiInfo(MPI_COMM_WORLD); - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-i", "generated:10x10x10"); + std::string exodusFileName = stk::unit_test_util::get_option("-i", "generated:10x10x10"); const int spatialDim = 3; std::shared_ptr bulkPtr = build_mesh(spatialDim, mpiInfo.getMpiComm()); stk::mesh::BulkData& stkMeshBulkData = *bulkPtr; diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGetBuckets.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGetBuckets.cpp index 86f5071d54f7..37e71b7f1a09 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGetBuckets.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGetBuckets.cpp @@ -72,7 +72,7 @@ TEST( UnitTestGetBuckets, ExampleFixture ) // Generate mesh - stk::mesh::fixtures::simple_fields::SelectorFixture fix ; + stk::mesh::fixtures::SelectorFixture fix ; fix.m_meta_data.commit(); fix.m_bulk_data.modification_begin(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGetEntities.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGetEntities.cpp index 83b3bbe089d4..ad2917d3d3af 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGetEntities.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGetEntities.cpp @@ -6,7 +6,7 @@ namespace { -class GetEntitiesTest : public stk::unit_test_util::simple_fields::MeshFixture { }; +class GetEntitiesTest : public stk::unit_test_util::MeshFixture { }; TEST_F(GetEntitiesTest, get_num_entities) { diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGetFieldByName.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGetFieldByName.cpp index 649940d88dba..a138ef995ac1 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGetFieldByName.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGetFieldByName.cpp @@ -50,7 +50,6 @@ TEST(UnitTestGetFieldByName, test1) { size_t spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); //declare fields on different ranks with names that are unique within a rank but not unique overall: // diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGhostingWithModification.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGhostingWithModification.cpp index 4e7e393bea73..d2d70277a480 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGhostingWithModification.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGhostingWithModification.cpp @@ -182,7 +182,6 @@ TEST(UnitTestGhosting, WithDeclareConstraintRelatedToRecvGhostNode) builder.set_entity_rank_names(rank_names); std::shared_ptr bulkPtr = builder.create(); stk::mesh::MetaData& stkMeshMetaData = bulkPtr->mesh_meta_data(); - stkMeshMetaData.use_simple_fields(); stk::mesh::BulkData& stkMeshBulkData = *bulkPtr; const std::string generatedMeshSpecification = "generated:1x1x3|sideset:xXyYzZ"; stk::io::fill_mesh(generatedMeshSpecification, stkMeshBulkData); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGhostingWithShared.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGhostingWithShared.cpp index 86c8e5da2898..22baba6ad362 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGhostingWithShared.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGhostingWithShared.cpp @@ -73,7 +73,6 @@ TEST(UnitTestGhosting, ThreeElemSendElemWithNonOwnedNodes) unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::unit_test_util::BulkDataTester bulk(meta, communicator); const std::string generatedMeshSpecification = "generated:1x1x3"; stk::io::fill_mesh(generatedMeshSpecification, bulk); @@ -232,7 +231,6 @@ TEST(UnitTestGhosting, WithSharedFiltered) } stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x6"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -297,7 +295,6 @@ TEST(UnitTestGhosting, WithShared) } stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x6"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGloballyShared.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGloballyShared.cpp index 3ae9ed50d461..318e77653a22 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGloballyShared.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGloballyShared.cpp @@ -84,7 +84,7 @@ TEST( UnitTestGloballyShared, DISABLED_keyhole_3x1 ) } // Create the fixture - stk::mesh::fixtures::simple_fields::QuadFixture qf(MPI_COMM_WORLD,NX,NY); + stk::mesh::fixtures::QuadFixture qf(MPI_COMM_WORLD,NX,NY); qf.m_meta.commit(); if (p_rank <= 1) { qf.generate_mesh(parallel_distribution[p_rank]); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGridFixture.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGridFixture.cpp index 88fb9c255266..7e71a3cbfef5 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestGridFixture.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestGridFixture.cpp @@ -64,7 +64,7 @@ TEST( UnitTestGridFixture, test_gridfixture ) //Coverage of GridFixture, Hexfixture, BoxFixture,QuadFixture //and RingFixture in fixture directory for more than one //processor. - stk::mesh::fixtures::simple_fields::GridFixture grid_mesh(MPI_COMM_WORLD); + stk::mesh::fixtures::GridFixture grid_mesh(MPI_COMM_WORLD); stk::mesh::BulkData& bulk_data = grid_mesh.bulk_data(); stk::mesh::MetaData& fem_meta = grid_mesh.fem_meta(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestHexFixture.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestHexFixture.cpp index e32918a8bfea..f19701ad969c 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestHexFixture.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestHexFixture.cpp @@ -59,7 +59,7 @@ TEST( UnitTestHexFixture, elem_ids_1d_x ) const unsigned NX = 3; const unsigned NY = 1; const unsigned NZ = 1; - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); hf.m_meta.commit(); hf.generate_mesh(); EXPECT_EQ( hf.elem_id(0,0,0), 1u ); @@ -73,7 +73,7 @@ TEST( UnitTestHexFixture, elem_ids_3d_x ) const unsigned NX = 3; const unsigned NY = 3; const unsigned NZ = 3; - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); hf.m_meta.commit(); hf.generate_mesh(); EXPECT_EQ( hf.elem_id(0,0,0), 1u ); @@ -87,7 +87,7 @@ TEST( UnitTestHexFixture, elem_ids_1d_y ) const unsigned NX = 1; const unsigned NY = 3; const unsigned NZ = 1; - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); hf.m_meta.commit(); hf.generate_mesh(); EXPECT_EQ( hf.elem_id(0,0,0), 1u ); @@ -101,7 +101,7 @@ TEST( UnitTestHexFixture, elem_ids_3d_y ) const unsigned NX = 3; const unsigned NY = 3; const unsigned NZ = 3; - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); hf.m_meta.commit(); hf.generate_mesh(); EXPECT_EQ( hf.elem_id(0,0,0), 1u ); @@ -115,7 +115,7 @@ TEST( UnitTestHexFixture, elem_ids_1d_z ) const unsigned NX = 1; const unsigned NY = 1; const unsigned NZ = 3; - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); hf.m_meta.commit(); hf.generate_mesh(); EXPECT_EQ( hf.elem_id(0,0,0), 1u ); @@ -129,7 +129,7 @@ TEST( UnitTestHexFixture, elem_ids_3d_z ) const unsigned NX = 3; const unsigned NY = 3; const unsigned NZ = 3; - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); hf.m_meta.commit(); hf.generate_mesh(); EXPECT_EQ( hf.elem_id(0,0,0), 1u ); @@ -143,7 +143,7 @@ TEST( UnitTestHexFixture, elem_ids_3d_diag ) const unsigned NX = 3; const unsigned NY = 3; const unsigned NZ = 3; - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); hf.m_meta.commit(); hf.generate_mesh(); EXPECT_EQ( hf.elem_id(0,0,0), 1u ); @@ -180,7 +180,7 @@ TEST( UnitTestHexFixture, trivial_parallel_2 ) } // Create the fixture - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); hf.m_meta.commit(); if (p_rank <= 1) { hf.fill_node_map(parallel_distribution); @@ -231,7 +231,7 @@ TEST( UnitTestHexFixture, disjoint_parallel_psizex1x1 ) } // Create the fixture - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); hf.m_meta.commit(); hf.fill_node_map(parallel_distribution); hf.generate_mesh(parallel_distribution[p_rank]); @@ -300,7 +300,7 @@ TEST( UnitTestHexFixture, disjoint_parallel_4x2x1 ) } // Create the fixture - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); hf.m_meta.commit(); if (p_rank <= 1) { hf.fill_node_map(parallel_distribution); @@ -403,7 +403,7 @@ TEST( UnitTestHexFixture, disjoint_parallel_5x1x1 ) } // Create the fixture - stk::mesh::fixtures::simple_fields::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); + stk::mesh::fixtures::HexFixture hf(MPI_COMM_WORLD,NX,NY,NZ); hf.m_meta.commit(); if (p_rank <= 1) { hf.fill_node_map(parallel_distribution); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestInducedPart.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestInducedPart.cpp index e091150493db..c1f6ecb2575b 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestInducedPart.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestInducedPart.cpp @@ -61,7 +61,7 @@ namespace { // element_ranked_part subset of unranked_superset_part // modification cycle is left uncompleted -class UnitTestInducedPart2D : public stk::unit_test_util::simple_fields::MeshFixture +class UnitTestInducedPart2D : public stk::unit_test_util::MeshFixture { protected: UnitTestInducedPart2D() @@ -116,7 +116,7 @@ class UnitTestInducedPart2D : public stk::unit_test_util::simple_fields::MeshFix }; -class UnitTestInducedPart3D : public stk::unit_test_util::simple_fields::MeshFixture +class UnitTestInducedPart3D : public stk::unit_test_util::MeshFixture { protected: UnitTestInducedPart3D() diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestLocalIds.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestLocalIds.cpp index bd4631ffadc1..99d601b5be5a 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestLocalIds.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestLocalIds.cpp @@ -15,11 +15,11 @@ #include "stk_io/StkMeshIoBroker.hpp" #include "stk_mesh/baseImpl/elementGraph/BulkDataIdMapper.hpp" -class LocalIds : public stk::unit_test_util::simple_fields::MeshFixture +class LocalIds : public stk::unit_test_util::MeshFixture { protected: LocalIds() - : stk::unit_test_util::simple_fields::MeshFixture(3) + : stk::unit_test_util::MeshFixture(3) {} virtual ~LocalIds() {} }; diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestMeshBuilder.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestMeshBuilder.cpp index e980c705ba8d..31923121c62e 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestMeshBuilder.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestMeshBuilder.cpp @@ -192,7 +192,11 @@ TEST(MeshBuilder, bulkdata_add_fmwk_data) builder.set_add_fmwk_data(true); std::shared_ptr bulk = builder.create(); +#ifdef SIERRA_MIGRATION EXPECT_TRUE(bulk->add_fmwk_data()); +#else + EXPECT_FALSE(bulk->add_fmwk_data()); +#endif } TEST(MeshBuilder, bulkdata_add_fmwk_data_false) diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestMeshImplUtils.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestMeshImplUtils.cpp index d1a01423199d..1b195bae873f 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestMeshImplUtils.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestMeshImplUtils.cpp @@ -133,7 +133,7 @@ class ClosureFixture m_meta = &(m_mesh->mesh_meta_data()); std::ostringstream oss; oss << "generated:" << num_x << "x" << num_y << "x" << m_mesh->parallel_size() << "|sideset:xXyYzZ"; - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-i", oss.str()); + std::string exodusFileName = stk::unit_test_util::get_option("-i", oss.str()); stk::io::StkMeshIoBroker exodus_file_reader(communicator); exodus_file_reader.set_bulk_data(*m_mesh); exodus_file_reader.add_mesh_database(exodusFileName, stk::io::READ_MESH); @@ -700,7 +700,6 @@ TEST(MeshImplUtils, check_for_connected_nodes) unsigned spatialDim = 2; std::shared_ptr meshPtr = build_mesh(spatialDim, communicator); stk::mesh::MetaData& meta = meshPtr->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Part& block_1 = meta.declare_part_with_topology("block_1", stk::topology::QUAD_4_2D); stk::mesh::BulkData& mesh = *meshPtr; mesh.modification_begin(); @@ -741,7 +740,6 @@ TEST(MeshImplUtils, comm_mesh_very_parallel_consistency_nominal) stk::ParallelMachine communicator = MPI_COMM_WORLD; unsigned spatialDim = 2; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::mesh::Part& block_1 = meta.declare_part_with_topology("block_1", stk::topology::QUAD_4_2D); stk::unit_test_util::BulkDataTester mesh(meta, communicator); if (mesh.parallel_size() >= 1) { @@ -831,7 +829,6 @@ TEST( MeshImplUtils, test_create_face_for_sideset) return; } stk::io::StkMeshIoBroker fixture(pm); - fixture.use_simple_fields(); fixture.add_mesh_database("generated:1x1x2", stk::io::READ_MESH); fixture.create_input_mesh(); @@ -866,7 +863,6 @@ TEST( MeshImplUtils, test_connect_face_to_other_elements) return; } stk::io::StkMeshIoBroker fixture(pm); - fixture.use_simple_fields(); fixture.add_mesh_database("generated:1x1x2", stk::io::READ_MESH); fixture.create_input_mesh(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestMeshModLogObserver.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestMeshModLogObserver.cpp index 234bfcff86d6..274dae9065b0 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestMeshModLogObserver.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestMeshModLogObserver.cpp @@ -27,7 +27,6 @@ class MeshModLogTest : public ::testing::Test bulk(*bulkPtr), ostrm() { - meta.use_simple_fields(); } void setup(const stk::mesh::EntityKey& key, diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestMetaData.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestMetaData.cpp index ac1766fd5737..cc4b335fcf74 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestMetaData.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestMetaData.cpp @@ -68,9 +68,7 @@ TEST( UnitTestRootTopology, newPartsWithTopologyAfterCommit ) //Test functions in MetaData.cpp const int spatial_dimension = 3; MetaData uncommitted_metadata(spatial_dimension); - uncommitted_metadata.use_simple_fields(); MetaData committed_metadata(spatial_dimension); - committed_metadata.use_simple_fields(); committed_metadata.commit(); @@ -90,7 +88,6 @@ TEST(UnitTestMetaData, superElemTopoDeclarePartWithTopology) { const int spatial_dimension = 3; MetaData meta(spatial_dimension); - meta.use_simple_fields(); unsigned numNodes = 11; stk::topology superTopo = stk::create_superelement_topology(numNodes); Part& part = meta.declare_part_with_topology("super-part", superTopo); @@ -104,11 +101,8 @@ TEST( UnitTestMetaData, testMetaData ) //Test functions in MetaData.cpp const int spatial_dimension = 3; MetaData metadata_committed(spatial_dimension); - metadata_committed.use_simple_fields(); MetaData metadata_not_committed(spatial_dimension); - metadata_not_committed.use_simple_fields(); MetaData metadata(spatial_dimension); - metadata.use_simple_fields(); stk::mesh::EntityRank node_rank = stk::topology::NODE_RANK; Part &pa = metadata.declare_part( std::string("a") , node_rank ); @@ -145,7 +139,6 @@ TEST( UnitTestMetaData, rankHigherThanDefined ) const int spatial_dimension = 3; const std::vector & rank_names = stk::mesh::entity_rank_names(); MetaData metadata(spatial_dimension, rank_names); - metadata.use_simple_fields(); const std::string& i_name2 = metadata.entity_rank_name( stk::topology::EDGE_RANK ); @@ -164,7 +157,6 @@ TEST( UnitTestMetaData, testEntityKeyMapping ) static const size_t spatial_dimension = 3; stk::mesh::MetaData meta ( spatial_dimension ); - meta.use_simple_fields(); stk::mesh::Part & part = meta.declare_part("another part"); stk::mesh::Part & hex_part = meta.declare_part_with_topology("elem_part", stk::topology::HEX_8); @@ -228,10 +220,8 @@ TEST( UnitTestMetaData, noEntityTypes ) } TEST( UnitTestMetaData, declare_part_with_rank ) { - //MetaData constructor fails because there are no entity types: const int spatial_dimension = 3; MetaData metadata(spatial_dimension); - metadata.use_simple_fields(); metadata.declare_part("foo"); ASSERT_NO_THROW(metadata.declare_part("foo",stk::topology::EDGE_RANK)); ASSERT_NO_THROW(metadata.declare_part("foo",stk::topology::EDGE_RANK)); @@ -249,7 +239,6 @@ TEST( UnitTestMetaData, declare_attribute_no_delete ) const int * singleton = NULL; const int spatial_dimension = 3; MetaData metadata(spatial_dimension); - metadata.use_simple_fields(); Part &pa = metadata.declare_part( std::string("a") , stk::topology::NODE_RANK ); metadata.declare_attribute_no_delete( pa, singleton); metadata.commit(); @@ -260,7 +249,6 @@ TEST(UnitTestMetaData, set_mesh_bulk_data ) const int spatial_dimension = 3; MeshBuilder builder(MPI_COMM_WORLD); std::shared_ptr meta = builder.set_spatial_dimension(spatial_dimension).create_meta_data(); - meta->use_simple_fields(); std::shared_ptr bulk1 = builder.create(meta); ASSERT_THROW(builder.create(meta), std::logic_error); @@ -273,7 +261,7 @@ TEST(UnitTestMetaData, set_mesh_bulk_data ) ASSERT_TRUE(&meta->mesh_bulk_data() == bulk2.get()); } -class TestHexMeta : public stk::mesh::fixtures::simple_fields::TestHexFixture {}; +class TestHexMeta : public stk::mesh::fixtures::TestHexFixture {}; TEST_F(TestHexMeta, superset_of_shared_part) { @@ -283,13 +271,13 @@ TEST_F(TestHexMeta, superset_of_shared_part) { stk::mesh::MetaData &meta = get_meta(); - stk::mesh::Part & mysupername = meta.declare_part("my_superset_part_shared"); - meta.declare_part_subset(mysupername, meta.globally_shared_part()); - mysupername.entity_membership_is_parallel_consistent(false); + stk::mesh::Part & mySuperPart = meta.declare_part("my_superset_part_shared"); + meta.declare_part_subset(mySuperPart, meta.globally_shared_part()); + mySuperPart.entity_membership_is_parallel_consistent(false); - stk::mesh::Part & mysupernamelocal = meta.declare_part("my_superset_part_local"); - meta.declare_part_subset(mysupernamelocal, meta.locally_owned_part()); - mysupernamelocal.entity_membership_is_parallel_consistent(false); + stk::mesh::Part & mySuperPartLocal = meta.declare_part("my_superset_part_local"); + meta.declare_part_subset(mySuperPartLocal, meta.locally_owned_part()); + mySuperPartLocal.entity_membership_is_parallel_consistent(false); stk::mesh::Part & userpart = meta.declare_part("userpartsubsettest"); stk::mesh::Part & usersuper = meta.declare_part("usersuperset"); @@ -303,28 +291,27 @@ TEST_F(TestHexMeta, superset_of_shared_part) if (expect_supersets_to_work_with_shared_part) { std::cout << "p[" << mesh.parallel_rank() <<"] num nodes stk shared part=" << - stk::mesh::count_selected_entities(meta.globally_shared_part(), - mesh.buckets(stk::topology::NODE_RANK)) << std::endl; + stk::mesh::count_entities(mesh, stk::topology::NODE_RANK, meta.globally_shared_part()) + << std::endl; std::cout << "p[" << mesh.parallel_rank() << "] num nodes in superset of stk shared part=" << - stk::mesh::count_selected_entities(mysupername, - mesh.buckets(stk::topology::NODE_RANK)) << std::endl; + stk::mesh::count_entities(mesh, stk::topology::NODE_RANK, mySuperPart) + << std::endl; std::cout << "p[" << mesh.parallel_rank() <<"] num nodes stk local part=" << - stk::mesh::count_selected_entities(meta.locally_owned_part(), - mesh.buckets(stk::topology::NODE_RANK)) << std::endl; + stk::mesh::count_entities(mesh, stk::topology::NODE_RANK, meta.locally_owned_part()) + << std::endl; std::cout << "p[" << mesh.parallel_rank() << "] num nodes in superset of stk local part=" << - stk::mesh::count_selected_entities(mysupernamelocal, - mesh.buckets(stk::topology::NODE_RANK)) << std::endl; + stk::mesh::count_entities(mesh, stk::topology::NODE_RANK, mySuperPartLocal) + << std::endl; EXPECT_EQ( - stk::mesh::count_selected_entities(meta.globally_shared_part(), - mesh.buckets(stk::topology::NODE_RANK)), - stk::mesh::count_selected_entities(mysupername, - mesh.buckets(stk::topology::NODE_RANK))); + stk::mesh::count_entities(mesh, stk::topology::NODE_RANK, meta.globally_shared_part()) + , + stk::mesh::count_entities(mesh, stk::topology::NODE_RANK, mySuperPart)); } EXPECT_EQ(stk::mesh::count_selected_entities(meta.locally_owned_part(), mesh.buckets(stk::topology::NODE_RANK)), - stk::mesh::count_selected_entities(mysupernamelocal, + stk::mesh::count_selected_entities(mySuperPartLocal, mesh.buckets(stk::topology::NODE_RANK))); mesh.modification_begin(); @@ -368,7 +355,6 @@ std::shared_ptr build_mesh(unsigned spatialDim, stk::mesh::MeshBuilder builder(comm); builder.set_spatial_dimension(spatialDim); std::shared_ptr bulk = builder.create(); - bulk->mesh_meta_data().use_simple_fields(); return bulk; } diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestModificationEnd.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestModificationEnd.cpp index 8e83c1cb31bf..2ba6af99cd0b 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestModificationEnd.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestModificationEnd.cpp @@ -86,7 +86,6 @@ namespace stk { namespace mesh { namespace unit_test { // // const int spatialDim = 3; // stk::mesh::MetaData stkMeshMetaData(spatialDim); -// stkMeshMetaData.use_simple_fields(); // stk::unit_test_util::BulkDataTester stkMeshBulkData(stkMeshMetaData, communicator); // // std::string exodusFileName = getOption("-i", "generated:1x1x4"); @@ -184,7 +183,6 @@ namespace stk { namespace mesh { namespace unit_test { // // const int spatialDim = 3; // stk::mesh::MetaData stkMeshMetaData(spatialDim); -// stkMeshMetaData.use_simple_fields(); // stk::unit_test_util::BulkDataTester stkMeshBulkData(stkMeshMetaData, communicator); // // // Elements 1 and 2 on proc 0, Elements 3 and 4 on proc 1 @@ -392,7 +390,6 @@ TEST(BulkDataModificationEnd, create_an_edge_and_test_up_to_IR_parallel_create) const int spatialDim = 3; stk::mesh::MetaData stkMeshMetaData(spatialDim); - stkMeshMetaData.use_simple_fields(); stk::unit_test_util::BulkDataTester stkMeshBulkData(stkMeshMetaData, communicator); // Elements 1 and 2 on proc 0, Elements 3 and 4 on proc 1 @@ -528,7 +525,6 @@ TEST(BulkDataModificationEnd, create_a_ghosted_edge_and_test_internal_modificati const int spatialDim = 3; stk::mesh::MetaData stkMeshMetaData(spatialDim); - stkMeshMetaData.use_simple_fields(); stk::unit_test_util::BulkDataTester stkMeshBulkData(stkMeshMetaData, communicator); // Elements 1 and 2 on proc 0, Elements 3 and 4 on proc 1 @@ -632,7 +628,6 @@ TEST(BulkDataModificationEnd, create_a_ghosted_edge_using_only_needed_pieces) const int spatialDim = 3; stk::mesh::MetaData stkMeshMetaData(spatialDim); - stkMeshMetaData.use_simple_fields(); stk::unit_test_util::BulkDataTester stkMeshBulkData(stkMeshMetaData, communicator); // Elements 1 and 2 on proc 0, Elements 3 and 4 on proc 1 @@ -707,7 +702,6 @@ TEST(BulkDataModificationEnd, create_edges) const int spatialDim = 3; stk::mesh::MetaData stkMeshMetaData(spatialDim); - stkMeshMetaData.use_simple_fields(); stk::unit_test_util::BulkDataTester stkMeshBulkData(stkMeshMetaData, communicator); std::string exodusFileName = getOption("-i", "generated:1x1x4"); @@ -735,7 +729,6 @@ TEST(BulkDataModificationEnd, test_invalid_add_node_sharing) const unsigned spatial_dim = 3; stk::mesh::MetaData meta_data(spatial_dim); - meta_data.use_simple_fields(); stk::mesh::Part &node_part = meta_data.get_topology_root_part(stk::topology::NODE); meta_data.commit(); stk::unit_test_util::BulkDataTester mesh(meta_data, MPI_COMM_WORLD); @@ -777,7 +770,6 @@ TEST(BulkDataModificationEnd, create_edges_with_min_map) const int spatialDim = 3; stk::mesh::MetaData stkMeshMetaData(spatialDim); - stkMeshMetaData.use_simple_fields(); stk::unit_test_util::BulkDataTester stkMeshBulkData(stkMeshMetaData, communicator); // Elements 1 and 2 on proc 0, Elements 3 and 4 on proc 1 @@ -843,7 +835,6 @@ TEST(ModEndForEntityCreation, DISABLED_promotion_of_ghosted_to_shared) const int spatialDim = 3; stk::mesh::MetaData metaData(spatialDim); - metaData.use_simple_fields(); stk::unit_test_util::BulkDataTester bulkData(metaData, communicator, stk::mesh::BulkData::NO_AUTO_AURA); const int p_rank = bulkData.parallel_rank(); @@ -910,7 +901,6 @@ std::shared_ptr create_mesh(stk::ParallelMachine comm, builder.set_spatial_dimension(spatialDim); builder.set_aura_option(auraOption); std::shared_ptr bulk = builder.create(); - bulk->mesh_meta_data().use_simple_fields(); return bulk; } @@ -928,7 +918,7 @@ TEST(TestModificationEnd, destroySharedNode_twoSharers_deinducePartMembership) 0,0, 1,0, 2,0, 0,1, 1,1, 2,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); bulk.modification_begin(); if (bulk.parallel_rank() == 1) { @@ -970,7 +960,7 @@ TEST(TestModificationEnd, destroySharedNode_threeSharers_deinducePartMembership) 0,1, 1,1, 2,1, 0,2, 1,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); bulk.modification_begin(); if (bulk.parallel_rank() == 2) { diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestModificationEnd.hpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestModificationEnd.hpp index 2f2c884cf0c8..5d0835527624 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestModificationEnd.hpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestModificationEnd.hpp @@ -137,7 +137,6 @@ void populateBulkDataWithFile(const std::string& exodusFileName, MPI_Comm commun // The order of the following lines in {} are important { stk::io::StkMeshIoBroker exodusFileReader(communicator); - exodusFileReader.use_simple_fields(); // Inform STK IO which STK Mesh objects to populate later exodusFileReader.set_bulk_data(bulkData); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestModificationSummary.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestModificationSummary.cpp index d586239b8284..aebcebed794b 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestModificationSummary.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestModificationSummary.cpp @@ -21,7 +21,6 @@ TEST(ModificationSummary, testString) if (numprocs == 1 ) { stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x3"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestParallelGraphInfo.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestParallelGraphInfo.cpp index 5092ffd1cb6f..fead4f0c0f00 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestParallelGraphInfo.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestParallelGraphInfo.cpp @@ -21,10 +21,10 @@ class ElemElemGraphTester : public stk::mesh::ElemElemGraph stk::mesh::impl::ParallelGraphInfo& get_parallel_info() { return m_parallelInfoForGraphEdges.get_parallel_graph_info(); } }; -class ParallelGraphUpdate : public stk::unit_test_util::simple_fields::MeshFixture +class ParallelGraphUpdate : public stk::unit_test_util::MeshFixture { public: - ParallelGraphUpdate() : stk::unit_test_util::simple_fields::MeshFixture() + ParallelGraphUpdate() : stk::unit_test_util::MeshFixture() { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); airPart = &get_meta().declare_part_with_topology("air", stk::topology::HEXAHEDRON_8); @@ -293,8 +293,8 @@ TEST_F(ParallelGraphUpdate, deleteBothShellElementsOnParallelEdge) 0,0,2, 1,0,2, 1,1,2, 0,1,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); get_bulk().initialize_face_adjacent_element_graph(); stk::mesh::Entity elem1 = get_bulk().get_entity(stk::topology::ELEM_RANK, 1u); @@ -348,8 +348,8 @@ TEST_F(ParallelGraphUpdate, createAefA_FromScratch) 0,0,2, 1,0,2, 1,1,2, 0,1,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); get_bulk().initialize_face_adjacent_element_graph(); const stk::mesh::ElemElemGraph &graph = get_bulk().get_face_adjacent_element_graph(); @@ -386,8 +386,8 @@ TEST_F(ParallelGraphUpdate, createAefA_FromAA) 0,0,2, 1,0,2, 1,1,2, 0,1,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); get_bulk().initialize_face_adjacent_element_graph(); const stk::mesh::ElemElemGraph &graph = get_bulk().get_face_adjacent_element_graph(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestPart.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestPart.cpp index 4ffa969fbd61..b3058cc701d1 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestPart.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestPart.cpp @@ -55,7 +55,6 @@ TEST(UnitTestPart, testUnit) { const int spatial_dimension = 3; MetaData m(spatial_dimension); - m.use_simple_fields(); PartRepository partRepo(&m); PartRepository partRepo2(&m); PartRepository partRepo3(&m); @@ -129,7 +128,6 @@ TEST(UnitTestPart, testPartVector) { const int spatial_dimension = 3; MetaData m(spatial_dimension); - m.use_simple_fields(); PartRepository partRepo(&m); Part * const pa = partRepo.declare_part( std::string("a") , stk::topology::NODE_RANK ); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartAfterCommit.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartAfterCommit.cpp index dc0889240213..0dc20c9b9984 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartAfterCommit.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartAfterCommit.cpp @@ -132,7 +132,6 @@ TEST(UnitTestPartsAfterCommit, FieldsAndSelectors) stk::ParallelMachine communicator = MPI_COMM_WORLD; stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x4"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -169,7 +168,6 @@ TEST(UnitTestPartsAfterCommit, PartInduction) stk::ParallelMachine communicator = MPI_COMM_WORLD; stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x4"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -231,7 +229,6 @@ TEST(UnitTestPartsAfterCommit, SelectorOps) if(numProcs == 1) { stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x1"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartAlias.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartAlias.cpp index 3ffa7145f6d7..2a094df373e5 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartAlias.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartAlias.cpp @@ -66,7 +66,6 @@ TEST( UnitTestPartAlias, noAliasForDefaultCreatedPart ) { const int spatial_dimension = 3; MetaData metadata(spatial_dimension); - metadata.use_simple_fields(); stk::mesh::EntityRank node_rank = stk::topology::NODE_RANK; Part &pa = metadata.declare_part( std::string("a") , node_rank ); @@ -79,7 +78,6 @@ TEST( UnitTestPartAlias, getAliasForAliasedPart ) { const int spatial_dimension = 3; MetaData metadata(spatial_dimension); - metadata.use_simple_fields(); stk::mesh::EntityRank node_rank = stk::topology::NODE_RANK; Part &pa = metadata.declare_part( std::string("a") , node_rank ); @@ -96,7 +94,6 @@ TEST( UnitTestPartAlias, partAliasesAreUnique ) { const int spatial_dimension = 3; MetaData metadata(spatial_dimension); - metadata.use_simple_fields(); stk::mesh::EntityRank node_rank = stk::topology::NODE_RANK; Part &pa = metadata.declare_part( std::string("a") , node_rank ); @@ -111,7 +108,6 @@ TEST( UnitTestPartAlias, partAliasReregistration ) { const int spatial_dimension = 3; MetaData metadata(spatial_dimension); - metadata.use_simple_fields(); stk::mesh::EntityRank node_rank = stk::topology::NODE_RANK; Part &pa = metadata.declare_part( std::string("a") , node_rank ); @@ -129,7 +125,6 @@ TEST( UnitTestPartAlias, multiplePartAliasRegistration ) { const int spatial_dimension = 3; MetaData metadata(spatial_dimension); - metadata.use_simple_fields(); stk::mesh::EntityRank node_rank = stk::topology::NODE_RANK; Part &pa = metadata.declare_part( std::string("a") , node_rank ); @@ -155,7 +150,6 @@ TEST( UnitTestPartAlias, partAliasesAreCaseSensitive ) { const int spatial_dimension = 3; MetaData metadata(spatial_dimension); - metadata.use_simple_fields(); stk::mesh::EntityRank node_rank = stk::topology::NODE_RANK; Part &pa = metadata.declare_part( std::string("a") , node_rank ); @@ -175,7 +169,6 @@ TEST( UnitTestPartAlias, deletePartAliasExact ) { const int spatial_dimension = 3; MetaData metadata(spatial_dimension); - metadata.use_simple_fields(); stk::mesh::EntityRank node_rank = stk::topology::NODE_RANK; Part &pa = metadata.declare_part( std::string("a") , node_rank ); @@ -216,7 +209,6 @@ TEST( UnitTestPartAlias, deletePartAliasCaseInsensitive ) { const int spatial_dimension = 3; MetaData metadata(spatial_dimension); - metadata.use_simple_fields(); stk::mesh::EntityRank node_rank = stk::topology::NODE_RANK; Part &pa = metadata.declare_part( std::string("a") , node_rank ); @@ -244,7 +236,6 @@ TEST( UnitTestPartAlias, getPartFromAlias ) { const int spatial_dimension = 3; MetaData metadata(spatial_dimension); - metadata.use_simple_fields(); stk::mesh::EntityRank node_rank = stk::topology::NODE_RANK; std::string partName_a("a"); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartRepository.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartRepository.cpp index 3f2cb5887168..de0c002e4013 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartRepository.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartRepository.cpp @@ -86,7 +86,6 @@ UnitTestPartRepository::UnitTestPartRepository() part_2_A (partRepo_2.declare_part("A",stk::topology::NODE_RANK) ), singleton(nullptr) { - meta.use_simple_fields(); meta.commit(); } diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartToBucket.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartToBucket.cpp index 9192b029277b..c6af356486a0 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartToBucket.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartToBucket.cpp @@ -59,7 +59,6 @@ TEST(PartToBucket, hex) return; } stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x1"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -110,7 +109,6 @@ TEST(PartToBucket, hexWithSingleSideset) return; } stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x1|sideset:X"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -173,7 +171,6 @@ TEST(PartToBucket, hexWithTwoSidesets) return; } stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x1|sideset:XY"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -239,7 +236,6 @@ TEST(PartToBucket, twoHex) return; } stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); const std::string generatedMeshSpecification = "generated:1x1x2"; stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -391,7 +387,6 @@ void runTwoHexParallelBucketTests(const std::string &generatedMeshSpecification, return; } stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); stkMeshIoBroker.add_mesh_database(generatedMeshSpecification, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); stkMeshIoBroker.populate_bulk_data(); @@ -480,7 +475,6 @@ TEST(PartToBucket, hexWithThreeSidesets) return; } stk::io::StkMeshIoBroker stkMeshIoBroker(communicator); - stkMeshIoBroker.use_simple_fields(); //generated-mesh 'sideset:xYz' syntax adds face surfaces on 3 sides of the mesh, //(minimum 'x' side, maximum 'y' side, minimum 'z' side) diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartitions.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartitions.cpp index d165abae2f42..1569f62adc60 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartitions.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestPartitions.cpp @@ -48,6 +48,7 @@ #include "stk_mesh/base/FieldBase.hpp" // for field_data #include "stk_mesh/base/MetaData.hpp" // for MetaData #include "stk_mesh/baseImpl/BucketRepository.hpp" // for BucketRepository, etc +#include "stk_mesh/baseImpl/GlobalIdEntitySorter.hpp" // for BucketRepository, etc #include "stk_topology/topology.hpp" // for topology, etc #include "stk_unit_test_utils/stk_mesh_fixtures/SelectorFixture.hpp" // for SelectorFixture @@ -63,7 +64,7 @@ struct ReverseSorter : public stk::mesh::EntitySorterBase } }; -using stk::mesh::fixtures::simple_fields::SelectorFixture; +using stk::mesh::fixtures::SelectorFixture; // Borrow a lot from UnitTestSelector. Bulk up the SelectorFixture to have parts diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestRelation.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestRelation.cpp index d71ddbaabf6c..4af38ec06b38 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestRelation.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestRelation.cpp @@ -34,6 +34,7 @@ #include // for runtime_error #include // for BulkData +#include #include // for MetaData, entity_rank_names #include // for ParallelMachine, etc #include @@ -64,9 +65,9 @@ using stk::mesh::EntityId; using stk::mesh::MetaData; using stk::mesh::BulkData; using stk::mesh::Ghosting; -using stk::mesh::fixtures::simple_fields::BoxFixture; -using stk::mesh::fixtures::simple_fields::HexFixture; -using stk::mesh::fixtures::simple_fields::RingFixture; +using stk::mesh::fixtures::BoxFixture; +using stk::mesh::fixtures::HexFixture; +using stk::mesh::fixtures::RingFixture; using stk::unit_test_util::build_mesh; namespace { @@ -372,7 +373,7 @@ TEST(UnitTestingOfRelation, testDoubleDeclareOfRelation) edge = mesh.declare_element_side(elem, local_side_id, sides_parts); stk::topology elem_top = mesh.bucket(elem).topology(); - stk::mesh::Permutation perm1 = mesh.find_permutation( + stk::mesh::Permutation perm1 = stk::mesh::find_permutation(mesh, elem_top, nodes.data(), elem_top.side_topology(local_side_id), side_nodes.data(), local_side_id); ASSERT_TRUE(perm1 != stk::mesh::Permutation::INVALID_PERMUTATION); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestRingFixture.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestRingFixture.cpp index c62a225be299..fe6fab3d8b39 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestRingFixture.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestRingFixture.cpp @@ -55,7 +55,7 @@ using stk::mesh::Selector; using stk::mesh::Entity; using stk::mesh::EntityKey; using stk::mesh::EntityProc; -using stk::mesh::fixtures::simple_fields::RingFixture; +using stk::mesh::fixtures::RingFixture; namespace { diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestRingFixture.hpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestRingFixture.hpp index 400d46c79d3c..63c98c94b179 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestRingFixture.hpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestRingFixture.hpp @@ -45,7 +45,7 @@ namespace unit_test { /** * TODO - Document what this does */ -void test_shift_ring( stk::mesh::fixtures::simple_fields::RingFixture& ring); +void test_shift_ring( stk::mesh::fixtures::RingFixture& ring); } // namespace unit_test } // namespace stk diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestRootTopology.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestRootTopology.cpp index d568565bc862..f3c34c3094ad 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestRootTopology.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestRootTopology.cpp @@ -85,7 +85,6 @@ TEST(UnitTestRootTopology, TestRootTopologyPartGetters) { const int spatial_dim = 3; stk::mesh::MetaData meta(spatial_dim); - meta.use_simple_fields(); meta.commit(); for (unsigned i = 0; i < num_test_topologies; ++i) @@ -122,7 +121,6 @@ TEST(UnitTestRootTopology, TestRootTopologySubsets) { const int spatial_dim = 3; stk::mesh::MetaData meta(spatial_dim); - meta.use_simple_fields(); for (unsigned i = 0; i < num_test_topologies; ++i) { diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestSelector.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestSelector.cpp index 447d3ebc9c64..71aed117df7c 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestSelector.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestSelector.cpp @@ -60,7 +60,7 @@ namespace stk { namespace mesh { class Bucket; } } namespace { -using stk::mesh::fixtures::simple_fields::SelectorFixture; +using stk::mesh::fixtures::SelectorFixture; void testSelectorWithBuckets(const SelectorFixture &selectorFixture, const stk::mesh::Selector &selector, bool gold_shouldEntityBeInSelector[]); @@ -182,7 +182,6 @@ TEST(Verify, selectorEmptyDuringMeshMod) std::shared_ptr bulkPtr = builder.create(); stk::mesh::BulkData& bulk = *bulkPtr; stk::mesh::MetaData& meta = bulk.mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Part& block1 = meta.declare_part_with_topology("block_1", stk::topology::HEX_8); meta.commit(); @@ -794,7 +793,6 @@ std::shared_ptr create_mesh(stk::ParallelMachine comm, stk::mesh::MeshBuilder builder(comm); builder.set_spatial_dimension(spatialDim); std::shared_ptr bulk = builder.create(); - bulk->mesh_meta_data().use_simple_fields(); return bulk; } diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestSidePolarity.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestSidePolarity.cpp index 1941e0527cd5..0e1a02a40e06 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestSidePolarity.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestSidePolarity.cpp @@ -23,7 +23,7 @@ #include "stk_mesh/baseImpl/elementGraph/ElemElemGraphImpl.hpp" #include "stk_mesh/baseImpl/elementGraph/GraphEdgeData.hpp" -class TestTextMesh : public stk::unit_test_util::simple_fields::MeshFixture +class TestTextMesh : public stk::unit_test_util::MeshFixture { protected: TestTextMesh() @@ -65,7 +65,7 @@ TEST_F(TestQuad4, createNodeOrderingAndTestPolarity) if (get_bulk().parallel_size() == 1) { inititalize_2D_mesh(); - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2}; @@ -90,7 +90,7 @@ TEST_F(TestQuad9, createNodeOrderingAndTestPolarity) if (get_bulk().parallel_size() == 1) { inititalize_2D_mesh(); - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 5}; @@ -115,7 +115,7 @@ TEST_F(TestTri3, createNodeOrderingAndTestPolarity) if (get_bulk().parallel_size() == 1) { inititalize_2D_mesh(); - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2}; @@ -140,7 +140,7 @@ TEST_F(TestTri6, createNodeOrderingAndTestPolarity) if (get_bulk().parallel_size() == 1) { inititalize_2D_mesh(); - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 4}; @@ -164,7 +164,7 @@ TEST_F(TestHex8, createNodeOrderingAndTestPolarity) std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8"; if (get_bulk().parallel_size() == 1) { - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 6, 5}; @@ -188,7 +188,7 @@ TEST_F(TestHex20, createNodeOrderingAndTestPolarity) std::string meshDesc = "0,1,HEX_20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20"; if (get_bulk().parallel_size() == 1) { - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 6, 5, 9, 14, 17, 13}; @@ -212,7 +212,7 @@ TEST_F(TestPyramid5, createNodeOrderingAndTestPolarity) std::string meshDesc = "0,1,PYRAMID_5,1,2,3,4,5"; if (get_bulk().parallel_size() == 1) { - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 5}; @@ -248,7 +248,7 @@ TEST_F(TestPyramid13, createNodeOrderingAndTestPolarity) std::string meshDesc = "0,1,PYRAMID_13,1,2,3,4,5,6,7,8,9,10,11,12,13"; if (get_bulk().parallel_size() == 1) { - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 5, 6, 11, 10}; @@ -284,7 +284,7 @@ TEST_F(TestTet4, createNodeOrderingAndTestPolarity) std::string meshDesc = "0,1,TET_4,1,2,3,4"; if (get_bulk().parallel_size() == 1) { - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 4}; @@ -308,7 +308,7 @@ TEST_F(TestTet10, createNodeOrderingAndTestPolarity) std::string meshDesc = "0,1,TET_10,1,2,3,4,5,6,7,8,9,10"; if (get_bulk().parallel_size() == 1) { - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 4, 5, 9, 8}; @@ -332,7 +332,7 @@ TEST_F(TestWedge6, createNodeOrderingAndTestPolarity) std::string meshDesc = "0,1,WEDGE_6,1,2,3,4,5,6"; if (get_bulk().parallel_size() == 1) { - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 5, 4}; @@ -368,7 +368,7 @@ TEST_F(TestWedge15, createNodeOrderingAndTestPolarity) std::string meshDesc = "0,1,WEDGE_15,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15"; if (get_bulk().parallel_size() == 1) { - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 5, 4, 7, 11, 13, 10}; @@ -405,7 +405,7 @@ TEST_F(TestShellLine2, createNodeOrderingAndTestPolarity) if (get_bulk().parallel_size() == 1) { inititalize_2D_mesh(); - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2}; @@ -430,7 +430,7 @@ TEST_F(TestShellLine3, createNodeOrderingAndTestPolarity) if (get_bulk().parallel_size() == 1) { inititalize_2D_mesh(); - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 3}; @@ -454,7 +454,7 @@ TEST_F(TestShellTri3, createNodeOrderingAndTestPolarity) std::string meshDesc = "0,1,SHELL_TRI_3,1,2,3"; if (get_bulk().parallel_size() == 1) { - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 3}; @@ -490,7 +490,7 @@ TEST_F(TestShellTri6, createNodeOrderingAndTestPolarity) std::string meshDesc = "0,1,SHELL_TRI_6,1,2,3,4,5,6"; if (get_bulk().parallel_size() == 1) { - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 3, 4, 5, 6}; @@ -526,7 +526,7 @@ TEST_F(TestShellQuad4, createNodeOrderingAndTestPolarity) std::string meshDesc = "0,1,SHELL_QUAD_4,1,2,3,4"; if (get_bulk().parallel_size() == 1) { - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2}; @@ -562,7 +562,7 @@ TEST_F(TestShellQuad8, createNodeOrderingAndTestPolarity) std::string meshDesc = "0,1,SHELL_QUAD_8,1,2,3,4,5,6,7,8"; if (get_bulk().parallel_size() == 1) { - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 3}; @@ -598,7 +598,7 @@ TEST_F(TestShellQuad9, createNodeOrderingAndTestPolarity) std::string meshDesc = "0,1,SHELL_QUAD_9,1,2,3,4,5,6,7,8,9"; if (get_bulk().parallel_size() == 1) { - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); { stk::mesh::EntityIdVector nodeIds = {1, 2, 3}; diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestSideSet.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestSideSet.cpp index 49bca3074470..02a3a3f3d0b4 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestSideSet.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestSideSet.cpp @@ -197,14 +197,13 @@ TEST(SkinBoundary, check_interior_shared_shell_side) .set_aura_option(stk::mesh::BulkData::NO_AUTO_AURA) .create(); stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::Part& boundaryPart = meta.declare_part("boundaryPart", meta.side_rank()); std::string meshDesc = "1,10098,TET_4, 1,2,3,4, block_1\n" "0,320234,SHELL_TRI_3, 1,3,4, block_2|sideset:name=surface_1; data=10098,3"; std::vector coords = {0,0,0, 0,1,0, 1,0,0, 1,1,1}; - stk::unit_test_util::simple_fields::setup_text_mesh(*bulkPtr, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coords)); + stk::unit_test_util::setup_text_mesh(*bulkPtr, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); stk::mesh::Entity sharedSide = bulkPtr->get_entity(meta.side_rank(), 100983); ASSERT_TRUE(bulkPtr->is_valid(sharedSide)); @@ -226,7 +225,7 @@ TEST(SkinBoundary, check_interior_shared_shell_side) EXPECT_TRUE(stk::mesh::check_interior_block_boundary_sides(*bulkPtr, meta.universal_part(), boundaryPart)); } -class TestSideSet : public stk::unit_test_util::simple_fields::MeshFixture +class TestSideSet : public stk::unit_test_util::MeshFixture { protected: void setup_2_block_mesh() @@ -275,7 +274,7 @@ TEST_F(TestSideSet, createSideSetsSpanningMultipleBlocks) stk::mesh::create_bulkdata_sidesets(get_bulk()); EXPECT_EQ(2u, get_bulk().get_number_of_sidesets()); - stk::unit_test_util::simple_fields::delete_mesh(filename); + stk::unit_test_util::delete_mesh(filename); } } @@ -404,7 +403,7 @@ void create_sideset_observer(stk::mesh::BulkData& bulk, stk::mesh::Selector acti } } -class SideSetModification : public stk::unit_test_util::simple_fields::MeshFixture +class SideSetModification : public stk::unit_test_util::MeshFixture { protected: stk::mesh::Entity create_hex_solo_side(const stk::mesh::PartVector& parts) @@ -1163,7 +1162,7 @@ TEST(CreateAndWrite, DISABLED_textmesh_shell_quad_4_EdgeSides) 1,0,0, 1,1,0, 1,2,0, 2,0,0, 2,1,0, 2,2,0}; - stk::unit_test_util::simple_fields::setup_text_mesh(*bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coords)); + stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); stk::io::write_mesh("shellq4_edge_sides.g", *bulk); } @@ -1181,7 +1180,7 @@ TEST(CreateAndWrite, DISABLED_textmesh_shell_quad_4_FullExteriorSkin) 1,0,0, 1,1,0, 1,2,0, 2,0,0, 2,1,0, 2,2,0}; - stk::unit_test_util::simple_fields::setup_text_mesh(*bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coords)); + stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); stk::io::write_mesh("shellq4_full_exterior_skin.g", *bulk); } @@ -1214,7 +1213,7 @@ TEST(CreateAndWrite, DISABLED_textmesh_shell_quad_8_EdgeSides) /*10*/3,0,0, /*12*/3,2,0, /*13*/4,0,0, /*14*/4,1,0, /*15*/4,2,0}; - stk::unit_test_util::simple_fields::setup_text_mesh(*bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coords)); + stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); stk::io::write_mesh("shellq8_2blk_face_edge_sides.g", *bulk); } @@ -1241,7 +1240,7 @@ TEST(CreateAndWrite, DISABLED_textmesh_shell_tri_3_EdgeSides) 1,0,0, 1,1,0, 2,0,0, 2,1,0}; - stk::unit_test_util::simple_fields::setup_text_mesh(*bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coords)); + stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); stk::io::write_mesh("shellt3_edge_sides.g", *bulk); } @@ -1259,12 +1258,12 @@ TEST(CreateAndWrite, DISABLED_textmesh_shell_tri_3_FullExteriorSkin) 1,0,0, 1,1,0, 2,0,0, 2,1,0}; - stk::unit_test_util::simple_fields::setup_text_mesh(*bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coords)); + stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); stk::io::write_mesh("shellt3_full_exterior_skin.g", *bulk); } -class InternalSideSet : public stk::unit_test_util::simple_fields::MeshFixture +class InternalSideSet : public stk::unit_test_util::MeshFixture { protected: void setup_internal_sideset_test() @@ -1283,8 +1282,8 @@ class InternalSideSet : public stk::unit_test_util::simple_fields::MeshFixture stk::mesh::Part& sidesetPart = get_meta().declare_part(sidesetName, stk::topology::FACE_RANK); get_meta().set_part_id(sidesetPart, 100u); - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); create_sideset(get_bulk(), sidesetName, "block_1"); std::vector sidesets = get_bulk().get_sidesets(); @@ -1360,7 +1359,7 @@ TEST_F(InternalSideSet, maintainSingleSidedAfterParallelCreationOnProcessorBound } } -class ParallelCoincidence : public stk::unit_test_util::simple_fields::MeshFixture +class ParallelCoincidence : public stk::unit_test_util::MeshFixture { public: struct ParallelCoincidenceEntry @@ -1393,8 +1392,8 @@ class ParallelCoincidence : public stk::unit_test_util::simple_fields::MeshFixtu 0,0,1, 1,0,1, 1,1,1, 0,1,1, 0,0,2, 1,0,2, 1,1,2, 0,1,2}; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void setup_non_coincident_mesh() @@ -1408,8 +1407,8 @@ class ParallelCoincidence : public stk::unit_test_util::simple_fields::MeshFixtu 0,0,1, 1,0,1, 1,1,1, 0,1,1, 0,0,2, 1,0,2, 1,1,2, 0,1,2}; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void test_parallel_coincidence(const std::vector& expectedValues) @@ -1498,7 +1497,6 @@ TEST(Skinning, createSidesForShellQuad4Block) { if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } std::unique_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); - bulk->mesh_meta_data().use_simple_fields(); //shell-quad-4 mesh: // 6 // 3*----*----*9 @@ -1521,7 +1519,7 @@ TEST(Skinning, createSidesForShellQuad4Block) 1,0,0, 1,1,0, 1,2,0, 2,0,0, 2,1,0, 2,2,0}; - stk::unit_test_util::simple_fields::setup_text_mesh(*bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coords)); + stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); stk::mesh::create_exposed_block_boundary_sides(*bulk, bulk->mesh_meta_data().universal_part(), stk::mesh::PartVector{&skinPart}); EXPECT_EQ(8u, stk::mesh::count_entities(*bulk, stk::topology::FACE_RANK, skinPart)); @@ -1531,7 +1529,6 @@ TEST(Skinning, createSidesForShellQuad8Block) { if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } std::unique_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); - bulk->mesh_meta_data().use_simple_fields(); //shell-quad-8 mesh: // 6 12 // 3*----*----*9----*----*15 @@ -1555,7 +1552,7 @@ TEST(Skinning, createSidesForShellQuad8Block) /*10*/3,0,0, /*12*/3,2,0, /*13*/4,0,0, /*14*/4,1,0, /*15*/4,2,0}; - stk::unit_test_util::simple_fields::setup_text_mesh(*bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coords)); + stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); stk::mesh::create_exposed_block_boundary_sides(*bulk, bulk->mesh_meta_data().universal_part(), stk::mesh::PartVector{&skinPart}); EXPECT_EQ(4u, stk::mesh::count_entities(*bulk, stk::topology::FACE_RANK, skinPart)); diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestStkTextMesh.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestStkTextMesh.cpp index 57b8eff4344a..e6eee14b2bef 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestStkTextMesh.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestStkTextMesh.cpp @@ -230,7 +230,7 @@ void test_get_mesh_spec(unsigned blockCountToDist, const std::vector& for(unsigned i = 0; i < numProcs.size(); i++) { unsigned procCount = numProcs[i]; std::vector procs; - stk::unit_test_util::simple_fields::get_block_proc_distribution(blockCountToDist, procCount, procs); + stk::unit_test_util::get_block_proc_distribution(blockCountToDist, procCount, procs); EXPECT_EQ(expectedDist[i].size(), procs.size()); for(unsigned j = 0; j < procs.size(); j++) { diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestTextMeshFixture.hpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestTextMeshFixture.hpp index fba73c944c41..2c81f6f0fc43 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestTextMeshFixture.hpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestTextMeshFixture.hpp @@ -19,7 +19,7 @@ namespace { -class TestTextMesh : public stk::unit_test_util::simple_fields::TextMeshFixture +class TestTextMesh : public stk::unit_test_util::TextMeshFixture { protected: TestTextMesh() : TextMeshFixture(3) @@ -28,7 +28,7 @@ class TestTextMesh : public stk::unit_test_util::simple_fields::TextMeshFixture } }; -class TestTextMeshAura : public stk::unit_test_util::simple_fields::TextMeshFixture +class TestTextMeshAura : public stk::unit_test_util::TextMeshFixture { protected: TestTextMeshAura() : TextMeshFixture(3) @@ -37,7 +37,7 @@ class TestTextMeshAura : public stk::unit_test_util::simple_fields::TextMeshFixt } }; -class TestTextMesh2d : public stk::unit_test_util::simple_fields::TextMeshFixture +class TestTextMesh2d : public stk::unit_test_util::TextMeshFixture { protected: TestTextMesh2d() : TextMeshFixture(2) @@ -46,7 +46,7 @@ class TestTextMesh2d : public stk::unit_test_util::simple_fields::TextMeshFixtur } }; -class TestTextMeshAura2d : public stk::unit_test_util::simple_fields::TextMeshFixture +class TestTextMeshAura2d : public stk::unit_test_util::TextMeshFixture { protected: TestTextMeshAura2d() : TextMeshFixture(2) @@ -55,7 +55,7 @@ class TestTextMeshAura2d : public stk::unit_test_util::simple_fields::TextMeshFi } }; -class TestTextMesh1d : public stk::unit_test_util::simple_fields::TextMeshFixture +class TestTextMesh1d : public stk::unit_test_util::TextMeshFixture { protected: TestTextMesh1d() : TextMeshFixture(1) @@ -64,7 +64,7 @@ class TestTextMesh1d : public stk::unit_test_util::simple_fields::TextMeshFixtur } }; -class TestTextMeshGraph : public stk::unit_test_util::simple_fields::TextMeshFixture +class TestTextMeshGraph : public stk::unit_test_util::TextMeshFixture { protected: TestTextMeshGraph() : TextMeshFixture(3) {} diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestTopology.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestTopology.cpp index 902bc37a465d..0b1d645e9e86 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestTopology.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestTopology.cpp @@ -35,6 +35,7 @@ #include // for NULL #include // for runtime_error #include // for BulkData +#include #include // for Entity #include // for declare_element, etc #include // for get_cell_topology, MetaData @@ -131,7 +132,6 @@ TopologyHelpersTestingFixture::TopologyHelpersTestingFixture(ParallelMachine pm) psize(bulk.parallel_size()), prank(bulk.parallel_rank()) { - meta.use_simple_fields(); meta.commit(); } @@ -465,7 +465,7 @@ int check_permutation_given(stk::mesh::BulkData& mesh, stk::mesh::Entity elem, u } // Indeed, find_permutation computes what was stored! - stk::mesh::Permutation perm = mesh.find_permutation(elem_topo, elem_nodes, face_topo, face_nodes.data(), face_ord); + stk::mesh::Permutation perm = stk::mesh::find_permutation(mesh, elem_topo, elem_nodes, face_topo, face_nodes.data(), face_ord); EXPECT_EQ(perm, claimed_permutation); return innermost_hits; } @@ -478,7 +478,7 @@ TEST (stkTopologyFunctions, use_permutations_Hex_2x1x1) const size_t NY = 1; const size_t NZ = 1; - stk::mesh::fixtures::simple_fields::HexFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -527,7 +527,6 @@ TEST (stkTopologyFunctions, use_permutations_Hex_2x1x1) void test_side_creation(unsigned *gold_side_ids,unsigned local_side_id) { stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); std::string name = "generated:1x1x1"; stkMeshIoBroker.add_mesh_database(name, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -562,7 +561,6 @@ void test_side_creation(unsigned *gold_side_ids,unsigned local_side_id) void test_side_creation_with_permutation(unsigned *gold_side_ids,unsigned local_side_id, stk::mesh::Permutation perm) { stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); std::string name = "generated:1x1x1"; stkMeshIoBroker.add_mesh_database(name, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -655,7 +653,6 @@ TEST(stkTopologyFunctions, check_permutations_for_Hex_1x1x1) }; stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); std::string name = "generated:1x1x1"; stkMeshIoBroker.add_mesh_database(name, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -722,7 +719,6 @@ TEST(stkTopologyFunctions, permutation_consistency_check_3d) unsigned local_side_id = 1; // nodes 2,4,8,5 are nodes of face 'local_side_id' of a 1x1x1 hex element (generated) stk::io::StkMeshIoBroker stkMeshIoBroker(MPI_COMM_WORLD); - stkMeshIoBroker.use_simple_fields(); std::string name = "generated:1x1x1"; stkMeshIoBroker.add_mesh_database(name, stk::io::READ_MESH); stkMeshIoBroker.create_input_mesh(); @@ -766,7 +762,6 @@ TEST(stkTopologyFunctions, check_permutation_consistency_parallel) unsigned gold_side_ids[4] = {5,6,8,7}; stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::unit_test_util::BulkDataFaceSharingTester mesh(meta, MPI_COMM_WORLD); const std::string generatedMeshSpec = "generated:1x1x2"; @@ -845,10 +840,10 @@ TEST(stkTopologyFunctions, permutation_consistency_check_2d) EXPECT_NO_THROW(mesh.modification_end()); - EXPECT_TRUE(mesh.check_permutation(Quad9, sides[0], 0, perm)) << "for side 0"; - EXPECT_TRUE(mesh.check_permutation(Quad9, sides[1], 1, perm)) << "for side 1"; - EXPECT_TRUE(mesh.check_permutation(Quad9, sides[2], 2, perm)) << "for side 2"; - EXPECT_TRUE(mesh.check_permutation(Quad9, sides[3], 3, perm)) << "for side 3"; + EXPECT_TRUE(stk::mesh::check_permutation(mesh, Quad9, sides[0], 0, perm)) << "for side 0"; + EXPECT_TRUE(stk::mesh::check_permutation(mesh, Quad9, sides[1], 1, perm)) << "for side 1"; + EXPECT_TRUE(stk::mesh::check_permutation(mesh, Quad9, sides[2], 2, perm)) << "for side 2"; + EXPECT_TRUE(stk::mesh::check_permutation(mesh, Quad9, sides[3], 3, perm)) << "for side 3"; EXPECT_TRUE(stk::mesh::impl::check_permutations_on_all(mesh)); } @@ -864,11 +859,11 @@ struct SuperTopologySideData std::vector permPerProc; }; -class SuperTopologies : public stk::unit_test_util::simple_fields::MeshFixture +class SuperTopologies : public stk::unit_test_util::MeshFixture { protected: SuperTopologies(int dim) - : stk::unit_test_util::simple_fields::MeshFixture(dim) + : stk::unit_test_util::MeshFixture(dim) { } diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestVisitAura.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestVisitAura.cpp index 0c6055965326..6c3e1f72f594 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestVisitAura.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestVisitAura.cpp @@ -47,7 +47,7 @@ namespace using EntitySet = std::set; -class VisitAura : public stk::unit_test_util::simple_fields::MeshFixture +class VisitAura : public stk::unit_test_util::MeshFixture { public: VisitAura() diff --git a/packages/stk/stk_unit_tests/stk_mesh/change_parts/CustomGhostEntities.cpp b/packages/stk/stk_unit_tests/stk_mesh/change_parts/CustomGhostEntities.cpp index 83cd36dfeeb6..fa09a25f7e86 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/change_parts/CustomGhostEntities.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/change_parts/CustomGhostEntities.cpp @@ -12,11 +12,11 @@ namespace { -class CustomGhostEntities: public stk::unit_test_util::simple_fields::MeshFixture +class CustomGhostEntities: public stk::unit_test_util::MeshFixture { protected: CustomGhostEntities() - : stk::unit_test_util::simple_fields::MeshFixture(), + : stk::unit_test_util::MeshFixture(), myPart(nullptr) { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); diff --git a/packages/stk/stk_unit_tests/stk_mesh/custom_ghosting/UnitTestChangeEntityOwnerKeepUnaffectedCustomGhost.cpp b/packages/stk/stk_unit_tests/stk_mesh/custom_ghosting/UnitTestChangeEntityOwnerKeepUnaffectedCustomGhost.cpp index 136dff231260..61a3c7536031 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/custom_ghosting/UnitTestChangeEntityOwnerKeepUnaffectedCustomGhost.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/custom_ghosting/UnitTestChangeEntityOwnerKeepUnaffectedCustomGhost.cpp @@ -60,7 +60,6 @@ class ChangeEntityOwnerTest : public ::testing::Test builder.set_aura_option(stk::mesh::BulkData::NO_AUTO_AURA); bulk = builder.create(); meta = &(bulk->mesh_meta_data()); - meta->use_simple_fields(); create_1_beam_per_proc(); custom_ghost_node2_to_proc2(); } diff --git a/packages/stk/stk_unit_tests/stk_mesh/custom_ghosting/UnitTestKeepCustomGhostAfterLossOfSharing.cpp b/packages/stk/stk_unit_tests/stk_mesh/custom_ghosting/UnitTestKeepCustomGhostAfterLossOfSharing.cpp index 378e8fb5ceb0..576b0445342a 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/custom_ghosting/UnitTestKeepCustomGhostAfterLossOfSharing.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/custom_ghosting/UnitTestKeepCustomGhostAfterLossOfSharing.cpp @@ -55,7 +55,6 @@ class ParticleCustomGhostTester : public ::testing::Test builder.set_aura_option(auraOption); bulk = builder.create(); meta = &(bulk->mesh_meta_data()); - meta->use_simple_fields(); createParts(); } diff --git a/packages/stk/stk_unit_tests/stk_mesh/custom_ghosting/UnitTestRemoveNeededRecvGhost.cpp b/packages/stk/stk_unit_tests/stk_mesh/custom_ghosting/UnitTestRemoveNeededRecvGhost.cpp index eeb6df6e60b3..d41770d3328c 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/custom_ghosting/UnitTestRemoveNeededRecvGhost.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/custom_ghosting/UnitTestRemoveNeededRecvGhost.cpp @@ -60,7 +60,6 @@ class CustomGhostTest : public ::testing::Test builder.set_aura_option(stk::mesh::BulkData::NO_AUTO_AURA); bulk = builder.create(); meta = &(bulk->mesh_meta_data()); - meta->use_simple_fields(); create_2_beams_per_proc(); custom_ghost_beam1_to_proc1(); } diff --git a/packages/stk/stk_unit_tests/stk_mesh/edge_creation/UnitTestEdgeConnection.cpp b/packages/stk/stk_unit_tests/stk_mesh/edge_creation/UnitTestEdgeConnection.cpp index 1750d3d9f269..97d23bc0510d 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/edge_creation/UnitTestEdgeConnection.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/edge_creation/UnitTestEdgeConnection.cpp @@ -179,14 +179,14 @@ TEST(StkEdgeIo, ParallelWriteMesh) std::string filename = "output.exo"; { - std::string meshDesc = stk::unit_test_util::simple_fields::get_many_block_mesh_desc(2, 2); - std::vector coords = stk::unit_test_util::simple_fields::get_many_block_coordinates(2); + std::string meshDesc = stk::unit_test_util::get_many_block_mesh_desc(2, 2); + std::vector coords = stk::unit_test_util::get_many_block_coordinates(2); std::shared_ptr bulkPtr = build_mesh(3, MPI_COMM_WORLD); stk::mesh::BulkData& bulk = *bulkPtr; stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); stk::mesh::Part* part = &meta.declare_part_with_topology("edgeBlock", stk::topology::LINE_2); stk::io::put_edge_block_io_part_attribute(*part); - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coords)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); stk::mesh::create_edges(bulk, meta.universal_part(), part); const stk::mesh::BucketVector& buckets = bulk.buckets(stk::topology::EDGE_RANK); @@ -227,8 +227,8 @@ TEST(StkEdgeIo, ParallelWriteMeshWithFace) std::string filename = "output.exo"; { - std::string meshDesc = stk::unit_test_util::simple_fields::get_many_block_mesh_desc(2, 2); - std::vector coords = stk::unit_test_util::simple_fields::get_many_block_coordinates(2); + std::string meshDesc = stk::unit_test_util::get_many_block_mesh_desc(2, 2); + std::vector coords = stk::unit_test_util::get_many_block_coordinates(2); std::shared_ptr bulkPtr = build_mesh(3, MPI_COMM_WORLD); stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); stk::mesh::BulkData& bulk = *bulkPtr; @@ -236,7 +236,7 @@ TEST(StkEdgeIo, ParallelWriteMeshWithFace) stk::mesh::Part* facePart = &meta.declare_part_with_topology("faceBlock", stk::topology::QUAD_4); stk::io::put_edge_block_io_part_attribute(*edgePart); stk::io::put_io_part_attribute(*facePart); - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coords)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); stk::mesh::PartVector faceParts = {facePart}; stk::mesh::create_interior_block_boundary_sides(bulk, meta.universal_part(), faceParts); diff --git a/packages/stk/stk_unit_tests/stk_mesh/entitySorting/UnitTestEntitySorting.cpp b/packages/stk/stk_unit_tests/stk_mesh/entitySorting/UnitTestEntitySorting.cpp index 2f46948985ac..01fd3d1596d4 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/entitySorting/UnitTestEntitySorting.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/entitySorting/UnitTestEntitySorting.cpp @@ -76,7 +76,7 @@ class EntityCoordSorter : public stk::mesh::EntitySorterBase }; -class EntitySortingFixture : public stk::unit_test_util::simple_fields::MeshFixture +class EntitySortingFixture : public stk::unit_test_util::MeshFixture { protected: diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/FaceCreatorFixture.hpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/FaceCreatorFixture.hpp index 837b69842e75..73aa81731fc9 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/FaceCreatorFixture.hpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/FaceCreatorFixture.hpp @@ -40,7 +40,7 @@ #include #include -class FaceCreatorFixture : public stk::unit_test_util::simple_fields::MeshFixture +class FaceCreatorFixture : public stk::unit_test_util::MeshFixture { protected: @@ -106,7 +106,7 @@ class FaceCreatorFixture : public stk::unit_test_util::simple_fields::MeshFixtur { unsigned id = get_bulk().parallel_rank()+1; stk::topology side_topology = get_bulk().bucket(element).topology().side_topology(); - stk::mesh::Entity side = stk::unit_test_util::simple_fields::declare_element_side_with_nodes(get_bulk(), element, nodes_of_face, id, get_meta().get_topology_root_part(side_topology)); + stk::mesh::Entity side = stk::unit_test_util::declare_element_side_with_nodes(get_bulk(), element, nodes_of_face, id, get_meta().get_topology_root_part(side_topology)); EXPECT_TRUE(get_bulk().is_valid(side)); } diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/TwQuads2DTwoProcsElemGraph.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/TwQuads2DTwoProcsElemGraph.cpp index 4cf838e9eb19..dfa1faf37767 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/TwQuads2DTwoProcsElemGraph.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/TwQuads2DTwoProcsElemGraph.cpp @@ -13,7 +13,7 @@ namespace void convert_quad_fixture_to_my_bulk_data_flavor(unsigned numX, unsigned numY, stk::mesh::BulkData& bulkData) { - stk::mesh::fixtures::simple_fields::QuadFixture fixture(bulkData.parallel(), numX, numY, false); + stk::mesh::fixtures::QuadFixture fixture(bulkData.parallel(), numX, numY, false); stk::mesh::Field &coordField = fixture.m_meta.declare_field(stk::topology::NODE_RANK, "model_coordinates"); stk::mesh::put_field_on_mesh(coordField, fixture.m_meta.universal_part(), fixture.m_meta.spatial_dimension(), nullptr); diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/ElementGraphAddElements.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/ElementGraphAddElements.cpp index c4e90d3d56ae..95432e9cf869 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/ElementGraphAddElements.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/ElementGraphAddElements.cpp @@ -12,7 +12,7 @@ namespace { -class ElemGraphAddElementsToEmptyGraphTester : public stk::unit_test_util::simple_fields::MeshTestFixture +class ElemGraphAddElementsToEmptyGraphTester : public stk::unit_test_util::MeshTestFixture { protected: ElemGraphAddElementsToEmptyGraphTester() : elementGraph(nullptr) { } @@ -94,7 +94,7 @@ struct NodeSharingInfo void setup_node_sharing(stk::mesh::BulkData &mesh, const std::vector & sharedNodes); -class ElemGraphAddElementsToExistingGraphTester : public stk::unit_test_util::simple_fields::MeshTestFixture +class ElemGraphAddElementsToExistingGraphTester : public stk::unit_test_util::MeshTestFixture { protected: ElemGraphAddElementsToExistingGraphTester() : elementGraph(nullptr), hexPart(nullptr) { } diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/ElementGraphDeleteElements.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/ElementGraphDeleteElements.cpp index 11b0b07bb5c0..71efa9eebc9d 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/ElementGraphDeleteElements.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/ElementGraphDeleteElements.cpp @@ -9,7 +9,7 @@ namespace { -class ElemGraphDeleteElementsTester : public stk::unit_test_util::simple_fields::MeshFixture +class ElemGraphDeleteElementsTester : public stk::unit_test_util::MeshFixture { protected: ElemGraphDeleteElementsTester() : elementGraph(nullptr) { } diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraph.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraph.cpp index dd9b148a2091..5f4f5d8447f2 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraph.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraph.cpp @@ -975,7 +975,6 @@ TEST(ElementSide, get_or_create_element_side_with_permutation) { unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::mesh::Part& new_faces_part = meta.declare_part_with_topology("surface_5", stk::topology::QUAD_4); stk::mesh::PartVector face_parts {&new_faces_part}; stk::io::put_io_part_attribute(new_faces_part); @@ -1109,7 +1108,6 @@ TEST(ElementGraph, create_faces_using_element_graph_parallel) { unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::mesh::Part& new_faces_part = meta.declare_part_with_topology("surface_5", stk::topology::QUAD_4); stk::io::put_io_part_attribute(new_faces_part); BulkDataElementGraphTester bulkData(meta, comm); @@ -1174,7 +1172,6 @@ TEST(ElementGraph, create_faces_using_element_graph_parallel_block_membership) { unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::mesh::Part& new_faces_part = meta.declare_part_with_topology("surface_5", stk::topology::QUAD_4); stk::mesh::Part& block_2 = meta.declare_part_with_topology("block_2", stk::topology::HEX_8); @@ -1250,16 +1247,15 @@ TEST(ElementGraph, compare_performance_create_faces) { stk::ParallelMachine comm = MPI_COMM_WORLD; - int xdim = stk::unit_test_util::simple_fields::get_command_line_option("--xdim", 3); + int xdim = stk::unit_test_util::get_command_line_option("--xdim", 3); int ydim = xdim; int zdim = xdim * stk::parallel_machine_size(comm); - std::string filename = stk::unit_test_util::simple_fields::get_name_of_generated_mesh(xdim, ydim, zdim, "|nodeset:zZ"); + std::string filename = stk::unit_test_util::get_name_of_generated_mesh(xdim, ydim, zdim, "|nodeset:zZ"); { unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::mesh::Part& faces_part = meta.declare_part_with_topology("surface_5", stk::topology::QUAD_4); stk::io::put_io_part_attribute(faces_part); BulkDataElementGraphTester bulkData(meta, comm); @@ -1284,7 +1280,6 @@ TEST(ElementGraph, compare_performance_create_faces) { unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); bool force_no_induce = true; stk::mesh::Part& faces_part = meta.declare_part_with_topology("surface_5", stk::topology::QUAD_4, force_no_induce); stk::io::put_io_part_attribute(faces_part); @@ -1323,7 +1318,6 @@ TEST(ElementGraph, make_items_inactive) unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::mesh::Part& faces_part = meta.declare_part_with_topology("surface_5", stk::topology::QUAD_4); stk::mesh::PartVector boundary_mesh_parts { &faces_part }; stk::io::put_io_part_attribute(faces_part); @@ -1335,7 +1329,7 @@ TEST(ElementGraph, make_items_inactive) stk::io::fill_mesh("generated:1x1x4", bulkData); - stk::unit_test_util::simple_fields::put_mesh_into_part(bulkData, active); + stk::unit_test_util::put_mesh_into_part(bulkData, active); ElemElemGraphTester graph(bulkData); @@ -1395,16 +1389,15 @@ TEST(ElementGraph, test_element_death) if(stk::parallel_machine_size(comm) <= 2) { //IO error when this is <4. Shared face being attached to the wrong element - int xdim = stk::unit_test_util::simple_fields::get_command_line_option("--zdim", 4); + int xdim = stk::unit_test_util::get_command_line_option("--zdim", 4); int ydim = xdim; int zdim = xdim; // * stk::parallel_machine_size(comm); - std::string filename = stk::unit_test_util::simple_fields::get_name_of_generated_mesh(xdim, ydim, zdim, "|nodeset:zZ"); + std::string filename = stk::unit_test_util::get_name_of_generated_mesh(xdim, ydim, zdim, "|nodeset:zZ"); { unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::mesh::Part& faces_part = meta.declare_part_with_topology("surface_5", stk::topology::QUAD_4); stk::mesh::PartVector boundary_mesh_parts { &faces_part }; stk::io::put_io_part_attribute(faces_part); @@ -1419,7 +1412,7 @@ TEST(ElementGraph, test_element_death) stk::mesh::Part& block_1 = *meta.get_part("block_1"); - stk::unit_test_util::simple_fields::put_mesh_into_part(bulkData, active); + stk::unit_test_util::put_mesh_into_part(bulkData, active); std::ostringstream os; os << "Proc id: " << bulkData.parallel_rank() << std::endl; @@ -1439,7 +1432,7 @@ TEST(ElementGraph, test_element_death) for(int i = 0; i < num_time_steps; ++i) { stk::mesh::EntityVector killedElements = get_killed_elements(bulkData, i, active); - stk::unit_test_util::simple_fields::move_killed_elements_out_of_parts(bulkData, killedElements, {&block_1, &active}); + stk::unit_test_util::move_killed_elements_out_of_parts(bulkData, killedElements, {&block_1, &active}); stk::mesh::impl::ParallelSelectedInfo remoteActiveSelector; stk::mesh::impl::populate_selected_value_for_remote_elements(bulkData, elementGraph, active, remoteActiveSelector); @@ -1490,9 +1483,9 @@ class ElementDeathRestartTest void load_without_restart() { initializeObjects(); - std::string filename = stk::unit_test_util::simple_fields::get_name_of_generated_mesh(1, 1, 2, "|sideset:zZ"); + std::string filename = stk::unit_test_util::get_name_of_generated_mesh(1, 1, 2, "|sideset:zZ"); stk::io::fill_mesh(filename, *bulkData); - stk::unit_test_util::simple_fields::put_mesh_into_part(*bulkData, *activePart); + stk::unit_test_util::put_mesh_into_part(*bulkData, *activePart); } void read_restart_file(const std::string& filename) @@ -1528,7 +1521,6 @@ class ElementDeathRestartTest builder.set_aura_option(stk::mesh::BulkData::NO_AUTO_AURA); bulkData = builder.create(); metaData = &(bulkData->mesh_meta_data()); - metaData->use_simple_fields(); stk::io::put_io_part_attribute(metaData->universal_part()); deathStatusField = &metaData->declare_field(stk::topology::ELEM_RANK,deathStatusFieldName); stk::mesh::put_field_on_mesh(*deathStatusField,metaData->universal_part(), nullptr); @@ -1554,7 +1546,7 @@ class ElementDeathRestartTest { elementsToKill.push_back(element); } - stk::unit_test_util::simple_fields::move_killed_elements_out_of_parts(*bulkData, elementsToKill, activePartVector); + stk::unit_test_util::move_killed_elements_out_of_parts(*bulkData, elementsToKill, activePartVector); stk::mesh::impl::ParallelSelectedInfo remoteActiveSelector; stk::mesh::impl::populate_selected_value_for_remote_elements(*bulkData, bulkData->get_face_adjacent_element_graph(), *activePart, remoteActiveSelector); @@ -1731,7 +1723,7 @@ TEST(ElementDeath, test_element_death_with_restart) elementDeathTest.kill_element(2); elementDeathTest.verify_mesh_after_killing_element_2(); } - stk::unit_test_util::simple_fields::delete_mesh("elemDeathRestartFile.exo"); + stk::unit_test_util::delete_mesh("elemDeathRestartFile.exo"); } } @@ -4118,7 +4110,7 @@ void test_add_element_to_graph_with_element_death(stk::mesh::BulkData::Automatic test_add_elements_to_pre_existing_graph_and_mesh(bulkData); stk::mesh::ElemElemGraph &graph = bulkData.get_face_adjacent_element_graph(); - stk::unit_test_util::simple_fields::put_mesh_into_part(bulkData, active); + stk::unit_test_util::put_mesh_into_part(bulkData, active); stk::mesh::EntityVector deactivated_elems; @@ -4227,7 +4219,7 @@ void test_delete_element_from_graph_with_element_death(stk::mesh::BulkData::Auto stk::mesh::ElemElemGraph &graph = bulkData.get_face_adjacent_element_graph(); - stk::unit_test_util::simple_fields::put_mesh_into_part(bulkData, active); + stk::unit_test_util::put_mesh_into_part(bulkData, active); stk::mesh::EntityVector deactivated_elems; @@ -4697,7 +4689,6 @@ TEST(ElemGraph, test_id_reservation) unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); BulkDataElementGraphTester bulkData(meta, comm); stk::io::fill_mesh("generated:1x1x4", bulkData); @@ -4812,11 +4803,10 @@ TEST(ElemGraph, test_initial_graph_creation_with_deactivated_elements) if(stk::parallel_machine_size(comm)==2) { stk::mesh::MetaData meta(3); - meta.use_simple_fields(); stk::mesh::Part &activePart = meta.declare_part("active"); BulkDataElementGraphTester bulkData(meta, comm); stk::io::fill_mesh("generated:1x1x4", bulkData); - stk::unit_test_util::simple_fields::put_mesh_into_part(bulkData, activePart); + stk::unit_test_util::put_mesh_into_part(bulkData, activePart); bulkData.modification_begin(); if(bulkData.parallel_rank() == 0) diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphChangeOwner.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphChangeOwner.cpp index b98ebbaa37f0..e836d09b0327 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphChangeOwner.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphChangeOwner.cpp @@ -65,7 +65,7 @@ void expect_elem_connected_to_remote_elem_id_via_side(const stk::mesh::BulkData& ASSERT_TRUE(foundRemotelyConnectedId) << "elem " << elemId << " expected remote elem " << connectedId; } -class ElemGraphChangeOwner : public stk::unit_test_util::simple_fields::MeshTestFixture +class ElemGraphChangeOwner : public stk::unit_test_util::MeshTestFixture { protected: void expect_initial_graph_correct() @@ -499,7 +499,6 @@ std::shared_ptr build_mesh(stk::ParallelMachine comm, builder.set_entity_rank_names(entityRankNames); builder.set_aura_option(auraOption); std::shared_ptr bulk = builder.create(); - bulk->mesh_meta_data().use_simple_fields(); return bulk; } @@ -610,7 +609,7 @@ void change_entity_owner_then_death_hex_test_2_procs(bool aura_on) stk::io::fill_mesh("generated:1x1x4", bulkData); - stk::unit_test_util::simple_fields::put_mesh_into_part(bulkData, active); + stk::unit_test_util::put_mesh_into_part(bulkData, active); std::vector counts; stk::mesh::count_entities(bulkData.mesh_meta_data().locally_owned_part(), bulkData, counts); diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphDeathIntegration.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphDeathIntegration.cpp index bec34afd74c2..beea53050de9 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphDeathIntegration.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphDeathIntegration.cpp @@ -131,9 +131,8 @@ class UpdateElemElemGraphTest : public ::testing::Test activePart(meta.declare_part("active")), boundaryPart(meta.declare_part("boundary")) { - meta.use_simple_fields(); stk::io::fill_mesh("generated:1x1x4", bulk); - stk::unit_test_util::simple_fields::put_mesh_into_part(bulk, activePart); + stk::unit_test_util::put_mesh_into_part(bulk, activePart); } void add_element_to_deactivate_on_proc(int elementId, int procRankToKillElementsOn, stk::mesh::EntityVector &elementsDeactivated) diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemGraphMultipleSharedSides.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemGraphMultipleSharedSides.cpp index 3d9fe19c9201..57140a918594 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemGraphMultipleSharedSides.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemGraphMultipleSharedSides.cpp @@ -3,7 +3,7 @@ namespace { -class ElemGraph_TwoElemTwoSharedSide : public simple_fields::TwoElemTwoSharedSideTester {}; +class ElemGraph_TwoElemTwoSharedSide : public TwoElemTwoSharedSideTester {}; TEST_F(ElemGraph_TwoElemTwoSharedSide, elem_death) { @@ -21,7 +21,7 @@ TEST_F(ElemGraph_TwoElemTwoSharedSide, double_kissing_hexes) } } -class ElemGraph_TwoElemThreeSharedSide : public simple_fields::TwoElemThreeSharedSideTester {}; +class ElemGraph_TwoElemThreeSharedSide : public TwoElemThreeSharedSideTester {}; TEST_F(ElemGraph_TwoElemThreeSharedSide, triple_kissing_hexes) { @@ -31,7 +31,7 @@ TEST_F(ElemGraph_TwoElemThreeSharedSide, triple_kissing_hexes) } } -class ElemGraph_TwoElemThreeSharedSideNoAura : public simple_fields::TwoElemThreeSharedSideNoAuraTester {}; +class ElemGraph_TwoElemThreeSharedSideNoAura : public TwoElemThreeSharedSideNoAuraTester {}; TEST_F(ElemGraph_TwoElemThreeSharedSideNoAura, triple_kissing_hexes) { @@ -41,7 +41,7 @@ TEST_F(ElemGraph_TwoElemThreeSharedSideNoAura, triple_kissing_hexes) } } -class ElemGraph_TwoElem2dTwoSharedSide : public simple_fields::TwoElem2dTwoSharedSideTester {}; +class ElemGraph_TwoElem2dTwoSharedSide : public TwoElem2dTwoSharedSideTester {}; TEST_F(ElemGraph_TwoElem2dTwoSharedSide, double_kissing_quads) { diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElementDeath.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElementDeath.cpp index 665685aaddaf..487036273f6a 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElementDeath.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElementDeath.cpp @@ -101,14 +101,14 @@ TEST(ElementDeath, replicate_random_death_test) stk::mesh::PartVector boundary_mesh_parts {&faces_part, &death_1_part}; stk::mesh::Part& active = meta.declare_part("active"); - stk::unit_test_util::simple_fields::generate_mesh_from_serial_spec_and_load_in_parallel_with_auto_decomp("2x2x1", bulkData, "cyclic"); + stk::unit_test_util::generate_mesh_from_serial_spec_and_load_in_parallel_with_auto_decomp("2x2x1", bulkData, "cyclic"); stk::mesh::create_faces(bulkData); std::vector mesh_counts; stk::mesh::comm_mesh_counts(bulkData, mesh_counts); ASSERT_EQ(20u, mesh_counts[stk::topology::FACE_RANK]); - stk::unit_test_util::simple_fields::put_mesh_into_part(bulkData, active); + stk::unit_test_util::put_mesh_into_part(bulkData, active); boundary_mesh_parts.push_back(&active); @@ -181,7 +181,7 @@ TEST(ElementDeath, keep_faces_after_element_death_after_calling_create_faces) stk::mesh::create_faces(bulkData); - stk::unit_test_util::simple_fields::put_mesh_into_part(bulkData, active); + stk::unit_test_util::put_mesh_into_part(bulkData, active); stk::mesh::ElemElemGraph graph(bulkData); @@ -311,7 +311,7 @@ TEST(ElementDeath, keep_faces_after_element_death_without_calling_create_faces) stk::io::fill_mesh("generated:1x1x4", bulkData); - stk::unit_test_util::simple_fields::put_mesh_into_part(bulkData, active); + stk::unit_test_util::put_mesh_into_part(bulkData, active); stk::mesh::ElemElemGraph &graph = bulkData.get_face_adjacent_element_graph(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestSideIdPool.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestSideIdPool.cpp index 6792f95c26dc..621b42254ff7 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestSideIdPool.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestSideIdPool.cpp @@ -20,7 +20,7 @@ class SideIdPoolTester : public stk::mesh::SideIdPool }; -class SideIdPoolInitialIdsTest : public stk::unit_test_util::simple_fields::MeshFixture +class SideIdPoolInitialIdsTest : public stk::unit_test_util::MeshFixture { protected: SideIdPoolInitialIdsTest() diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/integration/UnitTestSkinMeshElementDeath.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/integration/UnitTestSkinMeshElementDeath.cpp index f0ee6ac6414a..0136648ed7be 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/integration/UnitTestSkinMeshElementDeath.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/integration/UnitTestSkinMeshElementDeath.cpp @@ -39,7 +39,9 @@ class BulkDataTester : public stk::mesh::BulkData void set_sorting_by_face() { +#ifdef SIERRA_MIGRATION m_shouldSortFacesByNodeIds = true; +#endif } }; @@ -97,7 +99,6 @@ TEST(ElementDeath, compare_death_and_skin_mesh) unsigned spatialDim = 3; stk::mesh::MetaData meta(spatialDim); - meta.use_simple_fields(); stk::mesh::Part& skin = meta.declare_part_with_topology("skin", stk::topology::QUAD_4); stk::io::put_io_part_attribute(skin); BulkDataTester bulkData(meta, comm); @@ -105,7 +106,7 @@ TEST(ElementDeath, compare_death_and_skin_mesh) stk::mesh::Part& active = meta.declare_part("active"); // can't specify rank, because it gets checked against size of rank_names stk::io::fill_mesh("generated:1x1x4", bulkData); - stk::unit_test_util::simple_fields::put_mesh_into_part(bulkData, active); + stk::unit_test_util::put_mesh_into_part(bulkData, active); ElemGraphTestUtils::skin_boundary(bulkData, active, {&skin, &active}); diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/mesh_diagnostics/UnitTestRuleThreeViolation.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/mesh_diagnostics/UnitTestRuleThreeViolation.cpp index eafc340b222f..13e20e369f7e 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/mesh_diagnostics/UnitTestRuleThreeViolation.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/mesh_diagnostics/UnitTestRuleThreeViolation.cpp @@ -10,7 +10,7 @@ namespace { -class UnitTestRuleThreeViolation : public stk::unit_test_util::simple_fields::MeshFixture +class UnitTestRuleThreeViolation : public stk::unit_test_util::MeshFixture { protected: stk::mesh::EntityVector get_shared_nodes_of_element(stk::mesh::Entity elem) diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinIrregular.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinIrregular.cpp index 4d1515247b99..d39261133a17 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinIrregular.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinIrregular.cpp @@ -69,7 +69,6 @@ TEST( UnitTestSkin, SkinPocket) std::shared_ptr bulkPtr = builder.create(); stk::mesh::BulkData& bulk_data = *bulkPtr; stk::mesh::MetaData& fem_meta = bulk_data.mesh_meta_data(); - fem_meta.use_simple_fields(); stk::mesh::Part & hex_part = fem_meta.declare_part_with_topology( "hex_part", stk::topology::HEX_8 ); const EntityRank element_rank = stk::topology::ELEMENT_RANK; const EntityRank side_rank = fem_meta.side_rank(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMesh.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMesh.cpp index 45235054f5dc..a96d558fc098 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMesh.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMesh.cpp @@ -133,7 +133,6 @@ void test_skin_mesh_with_hexes(stk::mesh::BulkData::AutomaticAuraOption autoAura std::shared_ptr meshPtr = build_mesh(spatialDim, MPI_COMM_WORLD, autoAuraOption); stk::mesh::BulkData& mesh = *meshPtr; stk::mesh::MetaData& meta = mesh.mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::EntityRank side_rank = meta.side_rank(); @@ -225,7 +224,7 @@ void test_skin_mesh_with_tets(stk::mesh::BulkData::AutomaticAuraOption autoAuraO const size_t NZ = 2; // fixture generates six tets from NXxNYxNZ hex - stk::mesh::fixtures::simple_fields::TetFixture fixture( MPI_COMM_WORLD, NX, NY, NZ, autoAuraOption); + stk::mesh::fixtures::TetFixture fixture( MPI_COMM_WORLD, NX, NY, NZ, autoAuraOption); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -344,7 +343,6 @@ void test_skin_mesh_with_wedge(stk::mesh::BulkData::AutomaticAuraOption autoAura std::shared_ptr meshPtr = build_mesh(spatialDim, pm, autoAuraOption); stk::mesh::BulkData& mesh = *meshPtr; stk::mesh::MetaData& meta = mesh.mesh_meta_data(); - meta.use_simple_fields(); const int p_rank = mesh.parallel_rank(); stk::mesh::EntityRank side_rank = meta.side_rank(); @@ -529,7 +527,6 @@ void test_skin_mesh_with_pyramid(stk::mesh::BulkData::AutomaticAuraOption autoAu std::shared_ptr meshPtr = build_mesh(spatialDim, pm, autoAuraOption); stk::mesh::BulkData& mesh = *meshPtr; stk::mesh::MetaData& meta = mesh.mesh_meta_data(); - meta.use_simple_fields(); const int p_rank = mesh.parallel_rank(); stk::mesh::EntityRank side_rank = meta.side_rank(); @@ -691,7 +688,6 @@ void test_skin_hybrid_mesh(stk::mesh::BulkData::AutomaticAuraOption autoAuraOpti std::shared_ptr meshPtr = build_mesh(spatialDim, pm, autoAuraOption); stk::mesh::BulkData& mesh = *meshPtr; stk::mesh::MetaData& meta = mesh.mesh_meta_data(); - meta.use_simple_fields(); const int p_rank = mesh.parallel_rank(); stk::mesh::EntityRank side_rank = meta.side_rank(); @@ -862,7 +858,6 @@ void test_2_hex_2_block(stk::mesh::BulkData::AutomaticAuraOption autoAuraOption) std::shared_ptr meshPtr = build_mesh(spatialDim, MPI_COMM_WORLD, autoAuraOption); stk::mesh::BulkData& mesh = *meshPtr; stk::mesh::MetaData& meta = mesh.mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::EntityRank side_rank = meta.side_rank(); @@ -919,7 +914,6 @@ void test_2_hex_2_block_with_second_selector(stk::mesh::BulkData::AutomaticAuraO std::shared_ptr meshPtr = build_mesh(spatialDim, MPI_COMM_WORLD, autoAuraOption); stk::mesh::BulkData& mesh = *meshPtr; stk::mesh::MetaData& meta = mesh.mesh_meta_data(); - meta.use_simple_fields(); stk::mesh::EntityRank side_rank = meta.side_rank(); @@ -971,7 +965,7 @@ TEST( SkinMesh, test_2_hex_2_block_with_second_selector_without_aura) void test_quad_2D_skin_with_aura_option (bool auraOn) { const unsigned X = 5, Y = 5; - stk::mesh::fixtures::simple_fields::QuadFixture fixture(MPI_COMM_WORLD, X, Y, auraOn); + stk::mesh::fixtures::QuadFixture fixture(MPI_COMM_WORLD, X, Y, auraOn); stk::mesh::EntityRank side_rank = fixture.m_meta.side_rank(); @@ -1043,7 +1037,7 @@ TEST( SkinMesh, SimpleQuad) test_quad_2D_skin_with_aura_option(false); } -class TextSkinMesh : public stk::unit_test_util::simple_fields::MeshFixture +class TextSkinMesh : public stk::unit_test_util::MeshFixture { public: void setup_three_wedges_2p() @@ -1060,8 +1054,8 @@ class TextSkinMesh : public stk::unit_test_util::simple_fields::MeshFixture 0,0,1, 1,0,1, 2,0,1, 0,1,1, 1,1,1, }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } void setup_three_wedges_with_sideset_2p() @@ -1078,8 +1072,8 @@ class TextSkinMesh : public stk::unit_test_util::simple_fields::MeshFixture 0,0,1, 1,0,1, 2,0,1, 0,1,1, 1,1,1, }; - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); } }; diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshCoincidentElements.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshCoincidentElements.cpp index a4b3bd6b4961..9859095f4384 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshCoincidentElements.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshCoincidentElements.cpp @@ -29,7 +29,7 @@ int get_other_proc(MPI_Comm comm) // void create_exposed_boundary_sides(BulkData &bulkData, const Selector& blocksToSkin, Part& partToPutSidesInto) -class CoincidentElements: public stk::unit_test_util::simple_fields::MeshTestFixture +class CoincidentElements: public stk::unit_test_util::MeshTestFixture { protected: void make_coincident_element_mesh(unsigned numElemsToCreate, const stk::mesh::EntityIdVector &nodes, stk::mesh::Part &part) diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshCreateEdges.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshCreateEdges.cpp index 4872e3f07793..92893f8dbf13 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshCreateEdges.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshCreateEdges.cpp @@ -52,7 +52,7 @@ using stk::mesh::MetaData; TEST ( UnitTestCreateEdges, Quad_2x2 ) { - stk::mesh::fixtures::simple_fields::QuadFixture fixture( MPI_COMM_WORLD, 2, 2); + stk::mesh::fixtures::QuadFixture fixture( MPI_COMM_WORLD, 2, 2); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -97,7 +97,7 @@ TEST( UnitTestCreateEdges , testSkinAndCreateEdges3x3x3 ) const size_t NY = 3; const size_t NZ = 3; - stk::mesh::fixtures::simple_fields::HexFixture fixture(MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture fixture(MPI_COMM_WORLD, NX, NY, NZ); fixture.m_meta.commit(); fixture.generate_mesh(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshCreateFaces.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshCreateFaces.cpp index b0b64c44b2af..915dc5e11bdf 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshCreateFaces.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshCreateFaces.cpp @@ -84,7 +84,7 @@ TEST( UnitTestCreateFaces , testSkinAndCreateFaces3x3x3 ) const size_t NY = 3; const size_t NZ = 3; - stk::mesh::fixtures::simple_fields::HexFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); fixture.m_meta.commit(); fixture.generate_mesh(); @@ -164,7 +164,7 @@ TEST( UnitTestCreateFaces , testCreateFacesThenSkin3x3x3 ) const size_t NY = 3; const size_t NZ = 3; - stk::mesh::fixtures::simple_fields::HexFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); + stk::mesh::fixtures::HexFixture fixture( MPI_COMM_WORLD, NX, NY, NZ); fixture.m_meta.commit(); fixture.generate_mesh(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshDegenerate.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshDegenerate.cpp index 1ac644a38c2d..512538bb03e4 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshDegenerate.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshDegenerate.cpp @@ -59,15 +59,15 @@ TEST(ElementGraph, degenerate_mesh) { std::shared_ptr bulkPtr = build_mesh(3, MPI_COMM_SELF, stk::mesh::BulkData::NO_AUTO_AURA); stk::mesh::MetaData& meta_data = bulkPtr->mesh_meta_data(); - stk::mesh::fixtures::simple_fields::VectorFieldType & node_coord = + stk::mesh::fixtures::VectorFieldType & node_coord = meta_data.declare_field(stk::topology::NODE_RANK, "coordinates"); stk::mesh::put_field_on_mesh(node_coord, meta_data.universal_part(), 3, nullptr); - stk::mesh::fixtures::simple_fields::degenerate_mesh_meta_data(meta_data, node_coord); + stk::mesh::fixtures::degenerate_mesh_meta_data(meta_data, node_coord); meta_data.commit(); stk::mesh::BulkData& bulk_data = *bulkPtr; - stk::mesh::fixtures::simple_fields::degenerate_mesh_bulk_data(bulk_data, node_coord); + stk::mesh::fixtures::degenerate_mesh_bulk_data(bulk_data, node_coord); if(stk::parallel_machine_rank(comm) == 0) { stk::io::write_mesh(fileName, bulk_data); @@ -78,7 +78,7 @@ TEST(ElementGraph, degenerate_mesh) stk::mesh::BulkData& bulk_data = *bulkPtr; stk::mesh::Part &skin = meta_data.declare_part("skin", meta_data.side_rank()); stk::io::put_io_part_attribute(skin); - stk::unit_test_util::simple_fields::read_from_serial_file_and_decompose(fileName, bulk_data, "RIB"); + stk::unit_test_util::read_from_serial_file_and_decompose(fileName, bulk_data, "RIB"); unlink(fileName.c_str()); EXPECT_NO_FATAL_FAILURE(ElemGraphTestUtils::skin_boundary(bulk_data, meta_data.locally_owned_part(), {&skin})); std::vector mesh_counts; diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshExposedBoundary.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshExposedBoundary.cpp index 54208ff2e833..92c773e999bc 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshExposedBoundary.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshExposedBoundary.cpp @@ -28,7 +28,6 @@ TEST(ElementGraph, skin_exposed_boundary) stk::mesh::MeshBuilder builder(comm); builder.set_spatial_dimension(spatialDim); std::shared_ptr bulkPtr = builder.create(); - bulkPtr->mesh_meta_data().use_simple_fields(); stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); make_2_hex_mesh_with_element1_inactive(*bulkPtr); stk::mesh::PartVector skin_parts = get_skin_parts(meta); diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshHeterogenous.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshHeterogenous.cpp index a027cf2f35e6..9bccf193c252 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshHeterogenous.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshHeterogenous.cpp @@ -60,15 +60,15 @@ TEST(ElementGraph, heterogeneous_mesh) std::shared_ptr bulkPtr = build_mesh(3, MPI_COMM_SELF, stk::mesh::BulkData::NO_AUTO_AURA); stk::mesh::MetaData& meta_data = bulkPtr->mesh_meta_data(); stk::mesh::BulkData& bulk_data = *bulkPtr; - stk::mesh::fixtures::simple_fields::VectorFieldType & node_coord = + stk::mesh::fixtures::VectorFieldType & node_coord = meta_data.declare_field(stk::topology::NODE_RANK, "coordinates"); stk::mesh::put_field_on_mesh( node_coord , meta_data.universal_part() , 3, nullptr); stk::io::set_field_output_type(node_coord, stk::io::FieldOutputType::VECTOR_3D); - stk::mesh::fixtures::simple_fields::heterogeneous_mesh_meta_data( meta_data , node_coord ); + stk::mesh::fixtures::heterogeneous_mesh_meta_data( meta_data , node_coord ); meta_data.commit(); - stk::mesh::fixtures::simple_fields::heterogeneous_mesh_bulk_data( bulk_data , node_coord ); + stk::mesh::fixtures::heterogeneous_mesh_bulk_data( bulk_data , node_coord ); if (stk::parallel_machine_rank(comm) == 0) { stk::io::write_mesh(fileName, bulk_data); @@ -92,7 +92,7 @@ TEST(ElementGraph, heterogeneous_mesh) stk::mesh::Part& skin = meta_data.declare_part("skin", meta_data.side_rank()); - stk::unit_test_util::simple_fields::read_from_serial_file_and_decompose(fileName, bulk_data, "RIB"); + stk::unit_test_util::read_from_serial_file_and_decompose(fileName, bulk_data, "RIB"); unlink(fileName.c_str()); EXPECT_NO_FATAL_FAILURE(ElemGraphTestUtils::skin_boundary(bulk_data, meta_data.locally_owned_part(), {&skin})); std::vector mesh_counts; diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshMultipleSharedSides.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshMultipleSharedSides.cpp index dbe0c3dfb648..127e82f82ee4 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshMultipleSharedSides.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshMultipleSharedSides.cpp @@ -9,7 +9,7 @@ namespace { -class SkinMesh_TwoElemTwoSharedSide : public simple_fields::TwoElemTwoSharedSideTester {}; +class SkinMesh_TwoElemTwoSharedSide : public TwoElemTwoSharedSideTester {}; TEST_F(SkinMesh_TwoElemTwoSharedSide, skin_mesh) { @@ -34,7 +34,7 @@ TEST_F(SkinMesh_TwoElemTwoSharedSide, skin_one_hex) } -class SkinMesh_TwoElemThreeSharedSide : public simple_fields::TwoElemThreeSharedSideTester {}; +class SkinMesh_TwoElemThreeSharedSide : public TwoElemThreeSharedSideTester {}; TEST_F(SkinMesh_TwoElemThreeSharedSide, skin_mesh) { @@ -58,7 +58,7 @@ TEST_F(SkinMesh_TwoElemThreeSharedSide, skin_one_hex) } } -class SkinMesh_TwoElemThreeSharedSideNoAura : public simple_fields::TwoElemThreeSharedSideNoAuraTester {}; +class SkinMesh_TwoElemThreeSharedSideNoAura : public TwoElemThreeSharedSideNoAuraTester {}; TEST_F(SkinMesh_TwoElemThreeSharedSideNoAura, skin_mesh) { diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshShell.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshShell.cpp index cd645ed95ac4..e2b848744690 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshShell.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshShell.cpp @@ -20,7 +20,7 @@ TEST(ElementGraph, two_wedge_sandwich_with_quad_shell) std::shared_ptr bulkPtr = build_mesh(3, MPI_COMM_SELF, stk::mesh::BulkData::NO_AUTO_AURA); stk::mesh::MetaData& meta_data = bulkPtr->mesh_meta_data(); stk::mesh::BulkData& bulk_data = *bulkPtr; - stk::mesh::fixtures::simple_fields::VectorFieldType & node_coord = + stk::mesh::fixtures::VectorFieldType & node_coord = meta_data.declare_field(stk::topology::NODE_RANK, "coordinates"); stk::mesh::put_field_on_mesh(node_coord, meta_data.universal_part(), 3, nullptr); @@ -85,7 +85,7 @@ TEST(ElementGraph, two_wedge_sandwich_with_quad_shell) stk::mesh::BulkData& bulk_data = *bulkPtr; stk::mesh::Part &skin = meta_data.declare_part("skin", meta_data.side_rank()); stk::io::put_io_part_attribute(skin); - stk::unit_test_util::simple_fields::read_from_serial_file_and_decompose(fileName, bulk_data, "RIB"); + stk::unit_test_util::read_from_serial_file_and_decompose(fileName, bulk_data, "RIB"); unlink(fileName.c_str()); EXPECT_NO_FATAL_FAILURE(ElemGraphTestUtils::skin_boundary(bulk_data, meta_data.locally_owned_part(), {&skin})); std::vector mesh_counts; diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshSkinPart.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshSkinPart.cpp index 805a7704f0b0..a91a65d01945 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshSkinPart.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinMeshSkinPart.cpp @@ -215,7 +215,7 @@ TEST(ElementGraph, skin_part_3_blocks_2D) { const unsigned X = 4, Y = 1; bool auraOn = true; - stk::mesh::fixtures::simple_fields::QuadFixture fixture(comm, X, Y, auraOn); + stk::mesh::fixtures::QuadFixture fixture(comm, X, Y, auraOn); stk::mesh::MetaData & meta = fixture.m_meta; stk::mesh::Part& skin = meta.declare_part_with_topology("skin", stk::topology::LINE_2); diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinning.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinning.cpp index 92222f58cd24..3cb61b7ac4b7 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinning.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/skin_mesh/UnitTestSkinning.cpp @@ -77,7 +77,7 @@ void UnitTestStkMeshSkinning::test_skinning() return; } - stk::mesh::fixtures::simple_fields::GridFixture grid_mesh(MPI_COMM_WORLD); + stk::mesh::fixtures::GridFixture grid_mesh(MPI_COMM_WORLD); stk::mesh::BulkData& bulk_data = grid_mesh.bulk_data(); stk::mesh::MetaData& fem_meta = grid_mesh.fem_meta(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.cpp index d4ec3ca3a0fa..3bf404d2c7c1 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.cpp @@ -59,12 +59,16 @@ void extract_warning(std::string & stdoutString, int numExpectedOccurrences, con { std::vector warningLines = split_lines(stdoutString); std::string newStdoutString; +#if defined(STK_USE_DEVICE_MESH) int numFound = 0; +#endif for (const std::string & line : warningLines) { const size_t loc = line.find(warningString); if (loc != std::string::npos) { +#if defined(STK_USE_DEVICE_MESH) ++numFound; +#endif } else { newStdoutString += line + "\n"; diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp index 61304a0bbb54..cb07bb52eb81 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp @@ -70,7 +70,7 @@ struct EntityIdAddRemovePart { std::string removePart; }; -class NgpDebugFieldSyncFixture : public stk::unit_test_util::simple_fields::MeshFixture +class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture { public: template diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpFieldAsyncTest.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpFieldAsyncTest.cpp index f0422b79a1be..cedea4660e95 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpFieldAsyncTest.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpFieldAsyncTest.cpp @@ -64,7 +64,7 @@ #define TEST_ONLY_ON_CUDA(testname) testname #endif -class NgpAsyncDeepCopyFixture : public stk::unit_test_util::simple_fields::MeshFixture +class NgpAsyncDeepCopyFixture : public stk::unit_test_util::MeshFixture { public: NgpAsyncDeepCopyFixture() @@ -99,23 +99,23 @@ class NgpAsyncDeepCopyFixture : public stk::unit_test_util::simple_fields::MeshF void setup_multi_block_mesh_with_field_per_block() { - std::string meshDesc = stk::unit_test_util::simple_fields::get_many_block_mesh_desc(m_numBlocks); - std::vector coordinates = stk::unit_test_util::simple_fields::get_many_block_coordinates(m_numBlocks); + std::string meshDesc = stk::unit_test_util::get_many_block_mesh_desc(m_numBlocks); + std::vector coordinates = stk::unit_test_util::get_many_block_coordinates(m_numBlocks); setup_field_per_block(); - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); construct_ngp_fields(); } void setup_multi_block_mesh_with_fields_on_all_blocks() { - std::string meshDesc = stk::unit_test_util::simple_fields::get_many_block_mesh_desc(m_numBlocks); - std::vector coordinates = stk::unit_test_util::simple_fields::get_many_block_coordinates(m_numBlocks); + std::string meshDesc = stk::unit_test_util::get_many_block_mesh_desc(m_numBlocks); + std::vector coordinates = stk::unit_test_util::get_many_block_coordinates(m_numBlocks); setup_fields_on_all_blocks(); - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); construct_ngp_fields(); } diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpFieldTestUtils.hpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpFieldTestUtils.hpp index f71907d1a327..08329bcac769 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpFieldTestUtils.hpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpFieldTestUtils.hpp @@ -14,13 +14,20 @@ template void check_field_data_on_device(stk::mesh::NgpMesh& ngpMesh, stk::mesh::NgpField& ngpField, const stk::mesh::Selector& selector, - T expectedValue) + T expectedValue, + int component = -1, + T componentValue = 0) { stk::mesh::for_each_entity_run(ngpMesh, ngpField.get_rank(), selector, KOKKOS_LAMBDA(const stk::mesh::FastMeshIndex& entity) { const int numComponents = ngpField.get_num_components_per_entity(entity); for(int i=0; i void check_field_data_on_host(const stk::mesh::HostMesh& stkMesh, const stk::mesh::FieldBase& stkField, const stk::mesh::Selector& selector, - T expectedValue) + T expectedValue, + int component = -1, + T componentValue = 0) { stk::mesh::for_each_entity_run(stkMesh, stkField.entity_rank(), selector, [&](const stk::mesh::FastMeshIndex& fastMeshIndex) { @@ -38,7 +47,12 @@ void check_field_data_on_host(const stk::mesh::HostMesh& stkMesh, const int numComponents = stk::mesh::field_scalars_per_entity(stkField, entity); const T* fieldData = reinterpret_cast(stk::mesh::field_data(stkField, entity)); for(int i=0; i void check_field_data_on_host(const stk::mesh::BulkData& stkMesh, const stk::mesh::FieldBase& stkField, const stk::mesh::Selector& selector, - T expectedValue) + T expectedValue, + int component = -1, + T componentValue = 0) { stk::mesh::for_each_entity_run(stkMesh, stkField.entity_rank(), selector, [&](const stk::mesh::BulkData& bulk, const stk::mesh::Entity entity) { const int numComponents = stk::mesh::field_scalars_per_entity(stkField, entity); const T* fieldData = reinterpret_cast(stk::mesh::field_data(stkField, entity)); for(int i=0; i & deviceGoldValues = stk::mesh::get_updated_ngp_field(goldValues); const bool useSimpleDefault = true; - bool useSimple = stk::unit_test_util::simple_fields::get_command_line_option("-s", useSimpleDefault); + bool useSimple = stk::unit_test_util::get_command_line_option("-s", useSimpleDefault); const int numIterationsDefault = 1; - int numIterations = stk::unit_test_util::simple_fields::get_command_line_option("-n", numIterationsDefault); + int numIterations = stk::unit_test_util::get_command_line_option("-n", numIterationsDefault); for (int i = 0; i < numIterations; ++i) { if (useSimple) { diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpUnitTestUtils.hpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpUnitTestUtils.hpp index 9a83d04ed6ce..1b623df4984b 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpUnitTestUtils.hpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpUnitTestUtils.hpp @@ -36,8 +36,8 @@ DualViewType create_dualview(const std::string& name, unsigned size) inline void setup_mesh_4hex_4block(stk::mesh::BulkData& bulk, unsigned bucketCapacity) { - std::string meshDesc = stk::unit_test_util::simple_fields::get_many_block_mesh_desc(4); - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, meshDesc); + std::string meshDesc = stk::unit_test_util::get_many_block_mesh_desc(4); + stk::unit_test_util::setup_text_mesh(bulk, meshDesc); } inline void setup_mesh_3hex_3block(stk::mesh::BulkData& bulk, unsigned bucketCapacity) @@ -45,7 +45,7 @@ inline void setup_mesh_3hex_3block(stk::mesh::BulkData& bulk, unsigned bucketCap std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8,block_1\n" "0,2,HEX_8,5,6,7,8,9,10,11,12,block_2\n" "0,3,HEX_8,9,10,11,12,13,14,15,16,block_3"; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, meshDesc); + stk::unit_test_util::setup_text_mesh(bulk, meshDesc); } inline void setup_mesh_3hex_2block(stk::mesh::BulkData& bulk, unsigned bucketCapacity) @@ -53,14 +53,14 @@ inline void setup_mesh_3hex_2block(stk::mesh::BulkData& bulk, unsigned bucketCap std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8,block_1\n" "0,2,HEX_8,5,6,7,8,9,10,11,12,block_1\n" "0,3,HEX_8,9,10,11,12,13,14,15,16,block_3"; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, meshDesc); + stk::unit_test_util::setup_text_mesh(bulk, meshDesc); } inline void setup_mesh_2hex_2block(stk::mesh::BulkData& bulk, unsigned bucketCapacity) { std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8,block_1\n" "0,2,HEX_8,5,6,7,8,9,10,11,12,block_2"; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, meshDesc); + stk::unit_test_util::setup_text_mesh(bulk, meshDesc); } struct CheckPartMembership { diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/TestNgpMeshUpdate.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/TestNgpMeshUpdate.cpp index dce5b47cc818..f0f9eb33e4a4 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/TestNgpMeshUpdate.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/TestNgpMeshUpdate.cpp @@ -12,7 +12,7 @@ namespace { -class UpdateNgpMesh : public stk::unit_test_util::simple_fields::MeshFixture +class UpdateNgpMesh : public stk::unit_test_util::MeshFixture { public: void setup_test_mesh() @@ -20,7 +20,7 @@ class UpdateNgpMesh : public stk::unit_test_util::simple_fields::MeshFixture setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); extraPart = &get_meta().declare_part("extraPart"); std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8\n"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } const stk::mesh::Part* extraPart = nullptr; }; @@ -88,7 +88,7 @@ TEST_F(UpdateNgpMesh, OnlyOneDeviceMesh_TwoExternal) #endif } -class BucketLayoutModification : public stk::unit_test_util::simple_fields::MeshFixture +class BucketLayoutModification : public stk::unit_test_util::MeshFixture { public: void setup_mesh_3hex_3block(unsigned bucketCapacity) diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp index 0579ace9a30c..78a62cadccb6 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp @@ -213,11 +213,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture for (unsigned i = 0; i < bucketIds.size(); ++i) { const stk::mesh::NgpMesh::BucketType & bucket = ngpMesh.get_bucket(rank, bucketIds.device_get(i)); for (unsigned j = 0; j < bucket.size(); ++j) { -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after May 2024 - stk::mesh::NgpMesh::MeshIndex index{&bucket, static_cast(j)}; -#else stk::mesh::NgpMesh::MeshIndex index{bucket.bucket_id(), static_cast(j)}; -#endif ngpField(index, component) = value; } } diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp index 05a80fd183da..8c7e854e1c9e 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp @@ -72,11 +72,7 @@ class NgpDebugFieldSync_PartialAllocation : public NgpDebugFieldSyncFixture for (unsigned i = 0; i < bucketIds.size(); ++i) { const stk::mesh::NgpMesh::BucketType & bucket = ngpMesh.get_bucket(rank, bucketIds.device_get(i)); for (unsigned j = 0; j < bucket.size(); ++j) { -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after May 2024 - stk::mesh::NgpMesh::MeshIndex index{&bucket, static_cast(j)}; -#else stk::mesh::NgpMesh::MeshIndex index{bucket.bucket_id(), static_cast(j)}; -#endif ngpField(index, component) = value; } } diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldBLASTest.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldBLASTest.cpp index e785a696ac82..23fba07f811f 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldBLASTest.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldBLASTest.cpp @@ -59,15 +59,15 @@ namespace ngp_field_blas_test { -class NgpFieldBLAS : public stk::unit_test_util::simple_fields::MeshFixture +class NgpFieldBLAS : public stk::unit_test_util::MeshFixture { public: NgpFieldBLAS() { - setup_two_fields_five_hex_three_block_mesh(); + setup_three_fields_five_hex_three_block_mesh(); } - void setup_two_fields_five_hex_three_block_mesh() + void setup_three_fields_five_hex_three_block_mesh() { const unsigned numStates = 1; const unsigned bucketCapacity = 2; @@ -75,6 +75,7 @@ class NgpFieldBLAS : public stk::unit_test_util::simple_fields::MeshFixture stkField1 = &get_meta().declare_field(stk::topology::ELEM_RANK, "variableLengthField1", numStates); stkField2 = &get_meta().declare_field(stk::topology::ELEM_RANK, "variableLengthField2", numStates); + stkField3 = &get_meta().declare_field(stk::topology::ELEM_RANK, "variableLengthField3", numStates); stk::mesh::Part& block1 = get_meta().declare_part_with_topology("block_1", stk::topology::HEX_8); stk::mesh::Part& block2 = get_meta().declare_part_with_topology("block_2", stk::topology::HEX_8); @@ -92,12 +93,18 @@ class NgpFieldBLAS : public stk::unit_test_util::simple_fields::MeshFixture const std::vector init4(numComponent2, -2); stk::mesh::put_field_on_mesh(*stkField2, block2, numComponent2, init4.data()); + const std::vector init5(numComponent1, -1); + stk::mesh::put_field_on_mesh(*stkField3, block1, numComponent1, init5.data()); + + const std::vector init6(numComponent2, -2); + stk::mesh::put_field_on_mesh(*stkField3, block2, numComponent2, init6.data()); + const std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8,block_1\n" "0,2,HEX_8,5,6,7,8,9,10,11,12,block_1\n" "0,3,HEX_8,9,13,14,15,16,17,18,19,block_2\n" "0,4,HEX_8,9,20,21,22,23,24,25,26,block_2\n" "0,5,HEX_8,9,27,28,29,30,31,32,33,block_3"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); EXPECT_FALSE(stkField1->need_sync_to_host()); } @@ -106,6 +113,7 @@ class NgpFieldBLAS : public stk::unit_test_util::simple_fields::MeshFixture const int numComponent2 = 3; stk::mesh::Field* stkField1 = nullptr; stk::mesh::Field* stkField2 = nullptr; + stk::mesh::Field* stkField3 = nullptr; }; #ifdef STK_USE_DEVICE_MESH @@ -115,23 +123,22 @@ TEST_F(NgpFieldBLAS, field_fill_device) if (get_parallel_size() != 1) { GTEST_SKIP(); } const double myConstantValue = 55.5; + stk::mesh::field_fill(myConstantValue, *stkField1, stk::ngp::ExecSpace()); -#ifdef STK_ENABLE_GPU EXPECT_TRUE(stkField1->need_sync_to_host()); -#else - EXPECT_TRUE(stkField1->need_sync_to_device()); - stkField1->sync_to_device(); -#endif - stk::mesh::Selector selector(*stkField1); -#ifdef STK_ENABLE_GPU auto ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); stk::mesh::NgpField& ngpField1 = stk::mesh::get_updated_ngp_field(*stkField1); + + stk::mesh::Selector selector(*stkField1); ngp_field_test_utils::check_field_data_on_device(ngpMesh, ngpField1, selector, myConstantValue); -#else + + const double initialValue = -1; + ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField1, selector, initialValue); + + stkField1->sync_to_host(); ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField1, selector, myConstantValue); -#endif } TEST_F(NgpFieldBLAS, field_fill_selector_device) @@ -143,36 +150,51 @@ TEST_F(NgpFieldBLAS, field_fill_selector_device) stk::mesh::Selector selector = block2; stk::mesh::field_fill(myConstantValue, *stkField1, selector, stk::ngp::ExecSpace()); -#ifdef STK_ENABLE_GPU EXPECT_TRUE(stkField1->need_sync_to_host()); -#else - EXPECT_TRUE(stkField1->need_sync_to_device()); - stkField1->sync_to_device(); -#endif -#ifdef STK_ENABLE_GPU auto ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); stk::mesh::NgpField& ngpField1 = stk::mesh::get_updated_ngp_field(*stkField1); ngp_field_test_utils::check_field_data_on_device(ngpMesh, ngpField1, selector, myConstantValue); -#else + + stkField1->sync_to_host(); ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField1, selector, myConstantValue); -#endif } -TEST_F(NgpFieldBLAS, field_fill_host_ngp) +TEST_F(NgpFieldBLAS, field_fill_component_selector_device) { if (get_parallel_size() != 1) { GTEST_SKIP(); } - stk::mesh::NgpField& ngpField1 = stk::mesh::get_updated_ngp_field(*stkField1); + constexpr int component = 1; + constexpr double myConstantComponentValue = 15.5; + constexpr double myConstantValue = 55.5; + stk::mesh::Part& block2 = *get_meta().get_part("block_2"); + stk::mesh::Selector selector = block2; + stk::mesh::field_fill(myConstantValue, *stkField1, selector, stk::ngp::ExecSpace()); + stk::mesh::field_fill(myConstantComponentValue, *stkField1, component, selector, stk::ngp::ExecSpace()); + + EXPECT_TRUE(stkField1->need_sync_to_host()); + auto ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - ngpField1.set_all(ngpMesh, 97.9); + stk::mesh::NgpField& ngpField1 = stk::mesh::get_updated_ngp_field(*stkField1); + ngp_field_test_utils::check_field_data_on_device(ngpMesh, ngpField1, selector, myConstantValue, component, myConstantComponentValue); - EXPECT_TRUE(ngpField1.need_sync_to_host()); + stkField1->sync_to_host(); + ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField1, selector, myConstantValue, component, myConstantComponentValue); +} + +TEST_F(NgpFieldBLAS, field_fill_host_ngp) +{ + if (get_parallel_size() != 1) { GTEST_SKIP(); } const double myConstantValue = 55.5; stk::mesh::field_fill(myConstantValue, *stkField1, stk::ngp::HostExecSpace{}); +#ifdef STK_ENABLE_GPU EXPECT_TRUE(stkField1->need_sync_to_device()); +#else + EXPECT_TRUE(stkField1->need_sync_to_host()); + stkField1->sync_to_host(); +#endif stk::mesh::Selector selector(*stkField1); ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField1, selector, myConstantValue); @@ -223,6 +245,7 @@ TEST_F(NgpFieldBLAS, field_fill_device_with_host_build) stk::mesh::field_fill(myConstantValue, *stkField1, stk::ngp::HostExecSpace{}, MarkModOnDevice); EXPECT_TRUE(stkField1->need_sync_to_host()); + stkField1->sync_to_host(); stk::mesh::Selector selector(*stkField1); auto ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); @@ -241,27 +264,22 @@ TEST_F(NgpFieldBLAS, field_copy_device) stk::mesh::field_fill(myConstantValue, *stkField1, stk::ngp::ExecSpace()); stk::mesh::field_copy(*stkField1, *stkField2, stk::ngp::ExecSpace()); -#ifdef STK_ENABLE_GPU EXPECT_TRUE(stkField1->need_sync_to_host()); EXPECT_TRUE(stkField2->need_sync_to_host()); -#else - EXPECT_TRUE(stkField1->need_sync_to_device()); - EXPECT_TRUE(stkField2->need_sync_to_device()); - stkField1->sync_to_device(); - stkField2->sync_to_device(); -#endif stk::mesh::Selector selector(*stkField1); -#ifdef STK_ENABLE_GPU + auto ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); auto ngpField1 = stk::mesh::get_updated_ngp_field(*stkField1); auto ngpField2 = stk::mesh::get_updated_ngp_field(*stkField2); ngp_field_test_utils::check_field_data_on_device(ngpMesh, ngpField1, selector, myConstantValue); ngp_field_test_utils::check_field_data_on_device(ngpMesh, ngpField2, selector, myConstantValue); -#else + + stkField1->sync_to_host(); + stkField2->sync_to_host(); + ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField1, selector, myConstantValue); ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField2, selector, myConstantValue); -#endif } TEST_F(NgpFieldBLAS, field_copy_selector_device) @@ -277,19 +295,11 @@ TEST_F(NgpFieldBLAS, field_copy_selector_device) stk::mesh::field_fill(myBlock2Value, *stkField1, selector, stk::ngp::ExecSpace()); stk::mesh::field_copy(*stkField1, *stkField2, selector, stk::ngp::ExecSpace()); -#ifdef STK_ENABLE_GPU EXPECT_TRUE(stkField1->need_sync_to_host()); EXPECT_TRUE(stkField2->need_sync_to_host()); -#else - EXPECT_TRUE(stkField1->need_sync_to_device()); - EXPECT_TRUE(stkField2->need_sync_to_device()); - stkField1->sync_to_device(); - stkField2->sync_to_device(); -#endif stk::mesh::Selector notBlock2(*stkField1); notBlock2 -= selector; -#ifdef STK_ENABLE_GPU auto ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); auto ngpField1 = stk::mesh::get_updated_ngp_field(*stkField1); auto ngpField2 = stk::mesh::get_updated_ngp_field(*stkField2); @@ -297,12 +307,14 @@ TEST_F(NgpFieldBLAS, field_copy_selector_device) ngp_field_test_utils::check_field_data_on_device(ngpMesh, ngpField2, notBlock2, myConstantValue); ngp_field_test_utils::check_field_data_on_device(ngpMesh, ngpField1, selector, myBlock2Value); ngp_field_test_utils::check_field_data_on_device(ngpMesh, ngpField2, selector, myBlock2Value); -#else + + stkField1->sync_to_host(); + stkField2->sync_to_host(); + ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField1, notBlock2, myConstantValue); ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField2, notBlock2, myConstantValue); ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField1, selector, myBlock2Value); ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField2, selector, myBlock2Value); -#endif } TEST_F(NgpFieldBLAS, field_copy_host_ngp) @@ -313,8 +325,16 @@ TEST_F(NgpFieldBLAS, field_copy_host_ngp) stk::mesh::field_fill(myConstantValue, *stkField1, stk::ngp::HostExecSpace{}); stk::mesh::field_copy(*stkField1, *stkField2, stk::ngp::HostExecSpace{}); +#ifdef STK_ENABLE_GPU EXPECT_TRUE(stkField1->need_sync_to_device()); EXPECT_TRUE(stkField2->need_sync_to_device()); +#else + EXPECT_TRUE(stkField1->need_sync_to_host()); + EXPECT_TRUE(stkField2->need_sync_to_host()); + stkField1->sync_to_host(); + stkField2->sync_to_host(); +#endif + stk::mesh::Selector selector(*stkField1); ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField1, selector, myConstantValue); @@ -386,5 +406,24 @@ TEST_F(NgpFieldBLAS, field_copy_device_with_host_build) #endif +TEST_F(NgpFieldBLAS, field_axpbyz) +{ + if (get_parallel_size() != 1) GTEST_SKIP(); + + stk::mesh::field_fill(3.0, *stkField1, stk::ngp::ExecSpace()); + stk::mesh::field_fill(10.0, *stkField2, stk::ngp::ExecSpace()); + + double alpha = 2.0; + double beta = 5.0; + stk::mesh::Selector selectRule(*stkField1); + + stk::mesh::field_axpbyz(get_bulk(), alpha, *stkField1, beta, *stkField2, *stkField3, selectRule, stk::ngp::ExecSpace()); + + stkField3->sync_to_host(); + stk::mesh::Selector selector(*stkField3); + constexpr double expectedValue = 56.0; + ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField3, selector, expectedValue); +} + } diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp index 9263d7397e5a..8016a1a04583 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -62,7 +63,7 @@ namespace ngp_field_test { -class NgpFieldFixture : public stk::unit_test_util::simple_fields::MeshFixture +class NgpFieldFixture : public stk::unit_test_util::MeshFixture { public: template @@ -87,7 +88,7 @@ class NgpFieldFixture : public stk::unit_test_util::simple_fields::MeshFixture stk::mesh::put_field_on_mesh(stkField, block, 1, &init1); const std::string meshDesc = "0,1,SHELL_QUAD_4,1,2,5,6,block_1\n"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } void setup_two_field_two_element_mesh() @@ -110,7 +111,7 @@ class NgpFieldFixture : public stk::unit_test_util::simple_fields::MeshFixture const std::string meshDesc = "0,1,SHELL_QUAD_4,1,2,5,6,block_1\n" "0,2,SHELL_QUAD_4,2,3,4,5,block_2\n"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } void setup_two_variable_fields_two_element_mesh() @@ -136,7 +137,7 @@ class NgpFieldFixture : public stk::unit_test_util::simple_fields::MeshFixture const std::string meshDesc = "0,1,SHELL_QUAD_4,1,2,5,6,block_1\n" "0,2,SHELL_QUAD_4,2,3,4,5,block_2\n"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } template @@ -169,7 +170,47 @@ class NgpFieldFixture : public stk::unit_test_util::simple_fields::MeshFixture "0,3,HEX_8,9,13,14,15,16,17,18,19,block_2\n" "0,4,HEX_8,9,20,21,22,23,24,25,26,block_2\n" "0,5,HEX_8,9,27,28,29,30,31,32,33,block_3"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); + } + + template + void setup_three_fields_five_hex_three_block_mesh(const int numComponent1, const int numComponent2, const int numStates = 1) + { + const unsigned bucketCapacity = 2; + setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA, bucketCapacity, bucketCapacity); + + stk::mesh::Field& stkField1 = get_meta().declare_field(stk::topology::NODE_RANK, "variableLengthField1", numStates); + stk::mesh::Field& stkField2 = get_meta().declare_field(stk::topology::NODE_RANK, "variableLengthField2", numStates); + stk::mesh::Field& stkField3 = get_meta().declare_field(stk::topology::NODE_RANK, "variableLengthField3", numStates); + + stk::mesh::Part& block1 = get_meta().declare_part_with_topology("block_1", stk::topology::HEX_8); + stk::mesh::Part& block2 = get_meta().declare_part_with_topology("block_2", stk::topology::HEX_8); + get_meta().declare_part_with_topology("block_3", stk::topology::HEX_8); + + const std::vector init1(numComponent1, -1); + stk::mesh::put_field_on_mesh(stkField1, block1, numComponent1, init1.data()); + + const std::vector init2(numComponent2, -2); + stk::mesh::put_field_on_mesh(stkField1, block2, numComponent2, init2.data()); + + const std::vector init3(numComponent1, -1); + stk::mesh::put_field_on_mesh(stkField2, block1, numComponent1, init3.data()); + + const std::vector init4(numComponent2, -2); + stk::mesh::put_field_on_mesh(stkField2, block2, numComponent2, init4.data()); + + const std::vector init5(numComponent1, 0); + stk::mesh::put_field_on_mesh(stkField3, block1, numComponent1, init5.data()); + + const std::vector init6(numComponent2, 0); + stk::mesh::put_field_on_mesh(stkField3, block2, numComponent2, init6.data()); + + const std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8,block_1\n" + "0,2,HEX_8,5,6,7,8,9,10,11,12,block_1\n" + "0,3,HEX_8,9,13,14,15,16,17,18,19,block_2\n" + "0,4,HEX_8,9,20,21,22,23,24,25,26,block_2\n" + "0,5,HEX_8,9,27,28,29,30,31,32,33,block_3"; + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } void setup_two_element_mesh_field_on_each_element(stk::mesh::Field& intField, stk::mesh::Field& doubleField) @@ -184,7 +225,7 @@ class NgpFieldFixture : public stk::unit_test_util::simple_fields::MeshFixture const std::string meshDesc = "0,1,SHELL_QUAD_4,1,2,4,3,block_1\n" "0,2,SHELL_QUAD_4,2,5,6,4,block_2\n"; - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); + stk::unit_test_util::setup_text_mesh(get_bulk(), meshDesc); } void add_3rd_element_to_2hex_3block_mesh() @@ -660,15 +701,8 @@ class OptimizedNgpFieldFixture : public NgpFieldFixture { add_element_and_place_in_block(newBlockName); - stk::mesh::EntityVector nodes; stk::mesh::Entity element = get_bulk().get_entity(stk::topology::ELEMENT_RANK, 1); - unsigned numNodes = get_bulk().num_nodes(element); - - fill_nodes(element, numNodes, nodes); - - for(unsigned i = 0; i < numNodes; i++) { - get_bulk().destroy_relation(element, nodes[i], i); - } + stk::mesh::destroy_relations(get_bulk(), element, stk::topology::NODE_RANK); get_bulk().destroy_entity(element); } @@ -1017,11 +1051,11 @@ TEST_F(NgpFieldFixture, noFieldDataTest) const unsigned bucketCapacity = 1; setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA, bucketCapacity, bucketCapacity); - std::string meshDesc = stk::unit_test_util::simple_fields::get_many_block_mesh_desc(numBlocks); - std::vector coordinates = stk::unit_test_util::simple_fields::get_many_block_coordinates(numBlocks); + std::string meshDesc = stk::unit_test_util::get_many_block_mesh_desc(numBlocks); + std::vector coordinates = stk::unit_test_util::get_many_block_coordinates(numBlocks); stk::mesh::Field& field = get_meta().declare_field(stk::topology::ELEM_RANK, "", 1); - stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_NO_THROW(stk::mesh::get_updated_ngp_field(field)); } @@ -1140,7 +1174,7 @@ TEST_F(NgpFieldFixture, DeviceField_set_all_after_modified_on_host) auto stkField1 = get_meta().get_field(stk::topology::ELEM_RANK, "variableLengthField1"); EXPECT_FALSE(stkField1->need_sync_to_host()); - EXPECT_TRUE(stkField1->need_sync_to_device()); + EXPECT_FALSE(stkField1->need_sync_to_device()); stk::mesh::NgpField ngpField1 = stk::mesh::get_updated_ngp_field(*stkField1); @@ -1166,8 +1200,8 @@ TEST_F(NgpFieldFixture, blas_field_copy_device_to_device) EXPECT_FALSE(stkField1->need_sync_to_host()); EXPECT_FALSE(stkField2->need_sync_to_host()); - EXPECT_TRUE(stkField1->need_sync_to_device()); - EXPECT_TRUE(stkField2->need_sync_to_device()); + EXPECT_FALSE(stkField1->need_sync_to_device()); + EXPECT_FALSE(stkField2->need_sync_to_device()); const double myConstantValue = 97.9; diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpMultiStateFieldTests.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpMultiStateFieldTests.cpp index baf4a89d8818..78f708dc9517 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpMultiStateFieldTests.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpMultiStateFieldTests.cpp @@ -94,7 +94,7 @@ void delete_class_on_device(ClassWithNgpField* devicePtr) Kokkos::kokkos_free(static_cast(devicePtr)); } -class NgpMultiStateFieldTest : public stk::mesh::fixtures::simple_fields::TestHexFixture +class NgpMultiStateFieldTest : public stk::mesh::fixtures::TestHexFixture { public: diff --git a/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_interface.cpp b/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_interface.cpp index 93584ff57856..f5ab60e3d226 100644 --- a/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_interface.cpp +++ b/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_interface.cpp @@ -1,4 +1,4 @@ -#ifdef STK_BUILT_IN_SIERRA +#ifdef STK_BUILT_FOR_SIERRA #include "stk_middle_mesh_util/constants.hpp" #include "stk_middle_mesh_util/stk_interface.hpp" @@ -59,8 +59,6 @@ TEST(StkInterface, twoToThree) auto& metaData = bulkDataPtr->mesh_meta_data(); auto& metaData2 = bulkData2Ptr->mesh_meta_data(); - metaData.use_simple_fields(); - metaData2.use_simple_fields(); read_stk_mesh(fnameOut, bulkData); read_stk_mesh(fnameOut2, bulkData2); diff --git a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearch.cpp b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearch.cpp index 361d0a4bfb68..774322452b83 100644 --- a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearch.cpp +++ b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearch.cpp @@ -46,6 +46,7 @@ #include #include #include +#include "stk_search/SearchMethod.hpp" namespace std { template @@ -126,14 +127,13 @@ void expect_search_results(int num_procs, int proc_id, const SearchResults& sea } } -template +template void test_coarse_search_for_algorithm_with_views(stk::search::SearchMethod algorithm, MPI_Comm comm) { int num_procs = stk::parallel_machine_size(comm); int proc_id = stk::parallel_machine_rank(comm); using HostSpace = Kokkos::DefaultHostExecutionSpace; - using ExecSpace = Kokkos::DefaultExecutionSpace; using BoxType = stk::search::Box; using PointType = stk::search::Point; using BoxIdentProcType = stk::search::BoxIdentProc; @@ -213,6 +213,71 @@ void test_coarse_search_for_algorithm(stk::search::SearchMethod algorithm, MPI_C expect_search_results(num_procs, proc_id, searchResults); } +#ifdef KOKKOS_ENABLE_CUDA +void test_coarse_search_with_non_default_view(stk::search::SearchMethod algorithm, MPI_Comm comm) +{ + using Box = stk::search::Box; + using IdentProc = stk::search::IdentProc; + using BoxIdentProc = stk::search::BoxIdentProc; + using ViewType1 = Kokkos::View; + using ViewType2 = Kokkos::View; + Kokkos::Cuda execSpace{}; + + int myrank = stk::parallel_machine_rank(comm); + ViewType1 domainView("domainView", 1); + ViewType2 rangeView("rangeView", 1); + auto createBoxes = KOKKOS_LAMBDA(int idx) + { + Box box(2*myrank, 0, 0, 2*myrank+1, 1, 1); + domainView(idx) = BoxIdentProc{box, IdentProc(0, myrank)}; + rangeView(idx) = BoxIdentProc{box, IdentProc(0, myrank)}; + }; + + Kokkos::parallel_for("create_boxes", 1, createBoxes); + + Kokkos::View*, Kokkos::CudaSpace> results; + stk::search::coarse_search(domainView, rangeView, algorithm, comm, results, execSpace); + + auto resultsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, results); + + EXPECT_EQ(resultsHost.size(), 1U); + EXPECT_EQ(resultsHost(0).domainIdentProc, IdentProc(0, myrank)); + EXPECT_EQ(resultsHost(0).rangeIdentProc, IdentProc(0, myrank)); +} + +void test_local_coarse_search_with_non_default_view(stk::search::SearchMethod algorithm) +{ + using Box = stk::search::Box; + using Ident = int; + using BoxIdent = stk::search::BoxIdent; + using ViewType1 = Kokkos::View; + using ViewType2 = Kokkos::View; + Kokkos::Cuda execSpace{}; + + ViewType1 domainView("domainView", 1); + ViewType2 rangeView("rangeView", 1); + auto createBoxes = KOKKOS_LAMBDA(int idx) + { + Box box(0, 0, 0, 1, 1, 1); + domainView(idx) = BoxIdent{box, Ident(0) }; + rangeView(idx) = BoxIdent{box, Ident(0)}; + }; + + Kokkos::parallel_for("create_boxes", 1, createBoxes); + + Kokkos::View*, Kokkos::CudaSpace> results; + stk::search::local_coarse_search(domainView, rangeView, algorithm, results, execSpace); + + auto resultsHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, results); + + EXPECT_EQ(resultsHost.size(), 1U); + EXPECT_EQ(resultsHost(0).domainIdent, Ident(0)); + EXPECT_EQ(resultsHost(0).rangeIdent, Ident(0)); +} + + +#endif + TEST(stk_search, coarseSearchDoubleBoxes_KDTREE) { test_coarse_search_for_algorithm(stk::search::KDTREE, MPI_COMM_WORLD); @@ -226,7 +291,8 @@ TEST(stk_search, coarseSearchFloatBoxes_KDTREE) TEST(CoarseSearchCorrectness, coarseSearchDoubleBoxes_MORTON_LBVH) { test_coarse_search_for_algorithm(stk::search::MORTON_LBVH, MPI_COMM_WORLD); - test_coarse_search_for_algorithm_with_views(stk::search::MORTON_LBVH, MPI_COMM_WORLD); + test_coarse_search_for_algorithm_with_views(stk::search::MORTON_LBVH, MPI_COMM_WORLD); + test_coarse_search_for_algorithm_with_views(stk::search::MORTON_LBVH, MPI_COMM_WORLD); } TEST(CoarseSearchCorrectness, coarseSearchFloatBoxes_MORTON_LBVH) @@ -241,7 +307,9 @@ TEST(CoarseSearchCorrectness, coarseSearchDoubleBoxes_ARBORX) GTEST_SKIP(); #endif test_coarse_search_for_algorithm(stk::search::ARBORX, MPI_COMM_WORLD); - test_coarse_search_for_algorithm_with_views(stk::search::ARBORX, MPI_COMM_WORLD); + test_coarse_search_for_algorithm_with_views(stk::search::ARBORX, MPI_COMM_WORLD); + test_coarse_search_for_algorithm_with_views(stk::search::ARBORX, MPI_COMM_WORLD); + } TEST(CoarseSearchCorrectness, coarseSearchFloatBoxes_ARBORX) @@ -366,6 +434,50 @@ TEST(stk_search, Local_CoarseSearchFloatBoxes_KDTREE) host_local_test_coarse_search_for_algorithm(stk::search::KDTREE); } +template +void local_test_coarse_search_for_algorithm_with_views(stk::search::SearchMethod algorithm) +{ + using BoxType = stk::search::Box; + using PointType = stk::search::Point; + using BoxIdentType = stk::search::BoxIdent; + using HostSpace = Kokkos::DefaultHostExecutionSpace; + + auto domain = Kokkos::View("domain box-ident", 2); + auto range = Kokkos::View("range box-ident", 2); + + auto domainHost = Kokkos::create_mirror_view(HostSpace{}, domain); + auto rangeHost = Kokkos::create_mirror_view(HostSpace{}, range); + + + domainHost(0) = {BoxType(PointType(0.1, 0.0, 0.0), PointType(0.9, 1.0, 1.0)), 0}; + domainHost(1) = {BoxType(PointType(0.1, 2.0, 0.0), PointType(0.9, 3.0, 1.0)), 1}; + rangeHost(0) = {BoxType(PointType(0.6, 0.5, 0.0), PointType(1.4, 1.5, 1.0)), 2}; + rangeHost(1) = {BoxType(PointType(0.6, 2.5, 0.0), PointType(1.4, 3.5, 1.0)), 3}; + + Kokkos::deep_copy(domain, domainHost); + Kokkos::deep_copy(range, rangeHost); + auto intersections = Kokkos::View("intersections", 0); + + stk::search::local_coarse_search(domain, range, algorithm, intersections); + + auto hostIntersections = Kokkos::create_mirror_view(HostSpace{}, intersections); + Kokkos::deep_copy(hostIntersections, intersections); + + local_expect_search_results(hostIntersections); +} + +TEST(stk_search, Local_CoarseSearchWithViews_MORTON_LBVH) +{ + local_test_coarse_search_for_algorithm_with_views(stk::search::MORTON_LBVH); + local_test_coarse_search_for_algorithm_with_views(stk::search::MORTON_LBVH); +} + +TEST(stk_search, Local_CoarseSearchWithViews_ARBORX) +{ + local_test_coarse_search_for_algorithm_with_views(stk::search::ARBORX); + local_test_coarse_search_for_algorithm_with_views(stk::search::ARBORX); +} + std::pair build_range_boxes_and_nested_domain_boxes(int num_procs, int proc_id, int sizeParam=1) { @@ -935,4 +1047,26 @@ TEST(CoarseSearchScaling, forDeterminingSharingLinearAdjacentCase_ARBORX) test_coarse_search_for_determining_sharing_linear_adjacent_case(stk::search::ARBORX, 1000); } +#ifdef KOKKOS_ENABLE_CUDA +TEST(CoarseSearch, nonDefaultView_MORTON_LBVH) +{ + test_coarse_search_with_non_default_view(stk::search::MORTON_LBVH, stk::parallel_machine_world()); +} + +TEST(CoarseSearch, nonDefaultView_ARBORX) +{ + test_coarse_search_with_non_default_view(stk::search::ARBORX, stk::parallel_machine_world()); +} + +TEST(LocalCoarseSearch, nonDefaultView_MORTON_LBVH) +{ + test_local_coarse_search_with_non_default_view(stk::search::MORTON_LBVH); +} + +TEST(LocalCoarseSearch, nonDefaultView_ARBORX) +{ + test_local_coarse_search_with_non_default_view(stk::search::ARBORX); +} +#endif + } //namespace diff --git a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchBoxOverlappingEightSurroundingBoxes.cpp b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchBoxOverlappingEightSurroundingBoxes.cpp index 000024eca37d..5d461353f473 100644 --- a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchBoxOverlappingEightSurroundingBoxes.cpp +++ b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchBoxOverlappingEightSurroundingBoxes.cpp @@ -57,20 +57,20 @@ void runBoxOverlappingEightSurroundingBoxes(stk::search::SearchMethod searchMeth std::vector> boxVector1; if (procId == 0) { - boxVector1.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(0, 0, 0, radius, 1, procId)); - boxVector1.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(1, 0, 0, radius, 2, procId)); - boxVector1.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(2, 0, 0, radius, 3, procId)); - boxVector1.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(0, 1, 0, radius, 4, procId)); + boxVector1.push_back(stk::unit_test_util::generateBoundingVolume(0, 0, 0, radius, 1, procId)); + boxVector1.push_back(stk::unit_test_util::generateBoundingVolume(1, 0, 0, radius, 2, procId)); + boxVector1.push_back(stk::unit_test_util::generateBoundingVolume(2, 0, 0, radius, 3, procId)); + boxVector1.push_back(stk::unit_test_util::generateBoundingVolume(0, 1, 0, radius, 4, procId)); //skip middle one - boxVector1.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(2, 1, 0, radius, 6, procId)); - boxVector1.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(0, 2, 0, radius, 7, procId)); - boxVector1.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(1, 2, 0, radius, 8, procId)); - boxVector1.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(2, 2, 0, radius, 9, procId)); + boxVector1.push_back(stk::unit_test_util::generateBoundingVolume(2, 1, 0, radius, 6, procId)); + boxVector1.push_back(stk::unit_test_util::generateBoundingVolume(0, 2, 0, radius, 7, procId)); + boxVector1.push_back(stk::unit_test_util::generateBoundingVolume(1, 2, 0, radius, 8, procId)); + boxVector1.push_back(stk::unit_test_util::generateBoundingVolume(2, 2, 0, radius, 9, procId)); } std::vector> boxVector2; if (procId == numProc-1) { - boxVector2.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(1, 1, 0, radius, 5, procId)); + boxVector2.push_back(stk::unit_test_util::generateBoundingVolume(1, 1, 0, radius, 5, procId)); } std::vector< std::pair > boxIdPairResults; @@ -105,15 +105,15 @@ void device_runBoxOverlappingEightSurroundingBoxes(stk::search::SearchMethod sea Kokkos::resize(Kokkos::WithoutInitializing, domain, 8); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const unsigned & i) { - domain[0] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(0, 0, 0, radius, 1, procId); - domain[1] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(1, 0, 0, radius, 2, procId); - domain[2] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(2, 0, 0, radius, 3, procId); - domain[3] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(0, 1, 0, radius, 4, procId); + domain[0] = stk::unit_test_util::device_generateBoxIdentProc(0, 0, 0, radius, 1, procId); + domain[1] = stk::unit_test_util::device_generateBoxIdentProc(1, 0, 0, radius, 2, procId); + domain[2] = stk::unit_test_util::device_generateBoxIdentProc(2, 0, 0, radius, 3, procId); + domain[3] = stk::unit_test_util::device_generateBoxIdentProc(0, 1, 0, radius, 4, procId); // Skip middle box - domain[4] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(2, 1, 0, radius, 6, procId); - domain[5] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(1, 2, 0, radius, 7, procId); - domain[6] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(2, 2, 0, radius, 8, procId); - domain[7] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(0, 2, 0, radius, 9, procId); + domain[4] = stk::unit_test_util::device_generateBoxIdentProc(2, 1, 0, radius, 6, procId); + domain[5] = stk::unit_test_util::device_generateBoxIdentProc(1, 2, 0, radius, 7, procId); + domain[6] = stk::unit_test_util::device_generateBoxIdentProc(2, 2, 0, radius, 8, procId); + domain[7] = stk::unit_test_util::device_generateBoxIdentProc(0, 2, 0, radius, 9, procId); }); } @@ -122,7 +122,7 @@ void device_runBoxOverlappingEightSurroundingBoxes(stk::search::SearchMethod sea Kokkos::resize(Kokkos::WithoutInitializing, range, 1); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const unsigned & i) { - range[0] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(1, 1, 0, radius, 5, procId); + range[0] = stk::unit_test_util::device_generateBoxIdentProc(1, 1, 0, radius, 5, procId); }); } @@ -442,26 +442,26 @@ void host_local_runBoxOverlappingEightSurroundingBoxes( std::vector domain; std::vector range; - domain.push_back(stk::unit_test_util::simple_fields::box_ident_to_pair( - stk::unit_test_util::simple_fields::device_generateBoxIdent(0, 0, 0, radius, 1))); - domain.push_back(stk::unit_test_util::simple_fields::box_ident_to_pair( - stk::unit_test_util::simple_fields::device_generateBoxIdent(1, 0, 0, radius, 2))); - domain.push_back(stk::unit_test_util::simple_fields::box_ident_to_pair( - stk::unit_test_util::simple_fields::device_generateBoxIdent(2, 0, 0, radius, 3))); - domain.push_back(stk::unit_test_util::simple_fields::box_ident_to_pair( - stk::unit_test_util::simple_fields::device_generateBoxIdent(0, 1, 0, radius, 4))); + domain.push_back(stk::unit_test_util::box_ident_to_pair( + stk::unit_test_util::device_generateBoxIdent(0, 0, 0, radius, 1))); + domain.push_back(stk::unit_test_util::box_ident_to_pair( + stk::unit_test_util::device_generateBoxIdent(1, 0, 0, radius, 2))); + domain.push_back(stk::unit_test_util::box_ident_to_pair( + stk::unit_test_util::device_generateBoxIdent(2, 0, 0, radius, 3))); + domain.push_back(stk::unit_test_util::box_ident_to_pair( + stk::unit_test_util::device_generateBoxIdent(0, 1, 0, radius, 4))); // Skip middle box - domain.push_back(stk::unit_test_util::simple_fields::box_ident_to_pair( - stk::unit_test_util::simple_fields::device_generateBoxIdent(2, 1, 0, radius, 6))); - domain.push_back(stk::unit_test_util::simple_fields::box_ident_to_pair( - stk::unit_test_util::simple_fields::device_generateBoxIdent(1, 2, 0, radius, 7))); - domain.push_back(stk::unit_test_util::simple_fields::box_ident_to_pair( - stk::unit_test_util::simple_fields::device_generateBoxIdent(2, 2, 0, radius, 8))); - domain.push_back(stk::unit_test_util::simple_fields::box_ident_to_pair( - stk::unit_test_util::simple_fields::device_generateBoxIdent(0, 2, 0, radius, 9))); - - range.push_back(stk::unit_test_util::simple_fields::box_ident_to_pair( - stk::unit_test_util::simple_fields::device_generateBoxIdent(1, 1, 0, radius, 5))); + domain.push_back(stk::unit_test_util::box_ident_to_pair( + stk::unit_test_util::device_generateBoxIdent(2, 1, 0, radius, 6))); + domain.push_back(stk::unit_test_util::box_ident_to_pair( + stk::unit_test_util::device_generateBoxIdent(1, 2, 0, radius, 7))); + domain.push_back(stk::unit_test_util::box_ident_to_pair( + stk::unit_test_util::device_generateBoxIdent(2, 2, 0, radius, 8))); + domain.push_back(stk::unit_test_util::box_ident_to_pair( + stk::unit_test_util::device_generateBoxIdent(0, 2, 0, radius, 9))); + + range.push_back(stk::unit_test_util::box_ident_to_pair( + stk::unit_test_util::device_generateBoxIdent(1, 1, 0, radius, 5))); LocalSearchResults intersections; @@ -482,17 +482,17 @@ void device_local_runBoxOverlappingEightSurroundingBoxes(stk::search::SearchMeth Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const unsigned & i) { - domain[0] = stk::unit_test_util::simple_fields::device_generateBoxIdent(0, 0, 0, radius, 1); - domain[1] = stk::unit_test_util::simple_fields::device_generateBoxIdent(1, 0, 0, radius, 2); - domain[2] = stk::unit_test_util::simple_fields::device_generateBoxIdent(2, 0, 0, radius, 3); - domain[3] = stk::unit_test_util::simple_fields::device_generateBoxIdent(0, 1, 0, radius, 4); + domain[0] = stk::unit_test_util::device_generateBoxIdent(0, 0, 0, radius, 1); + domain[1] = stk::unit_test_util::device_generateBoxIdent(1, 0, 0, radius, 2); + domain[2] = stk::unit_test_util::device_generateBoxIdent(2, 0, 0, radius, 3); + domain[3] = stk::unit_test_util::device_generateBoxIdent(0, 1, 0, radius, 4); // Skip middle box - domain[4] = stk::unit_test_util::simple_fields::device_generateBoxIdent(2, 1, 0, radius, 6); - domain[5] = stk::unit_test_util::simple_fields::device_generateBoxIdent(1, 2, 0, radius, 7); - domain[6] = stk::unit_test_util::simple_fields::device_generateBoxIdent(2, 2, 0, radius, 8); - domain[7] = stk::unit_test_util::simple_fields::device_generateBoxIdent(0, 2, 0, radius, 9); + domain[4] = stk::unit_test_util::device_generateBoxIdent(2, 1, 0, radius, 6); + domain[5] = stk::unit_test_util::device_generateBoxIdent(1, 2, 0, radius, 7); + domain[6] = stk::unit_test_util::device_generateBoxIdent(2, 2, 0, radius, 8); + domain[7] = stk::unit_test_util::device_generateBoxIdent(0, 2, 0, radius, 9); - range[0] = stk::unit_test_util::simple_fields::device_generateBoxIdent(1, 1, 0, radius, 5); + range[0] = stk::unit_test_util::device_generateBoxIdent(1, 1, 0, radius, 5); }); auto intersections = Kokkos::View("intersections", 0); diff --git a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchLineOfBoundingBoxes.cpp b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchLineOfBoundingBoxes.cpp index 5f03f69ce3e1..70e45a0eb8cb 100644 --- a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchLineOfBoundingBoxes.cpp +++ b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchLineOfBoundingBoxes.cpp @@ -69,13 +69,13 @@ void runLineOfBoundingBoxes(stk::search::SearchMethod searchMethod, enum Axis ax switch(axis) { case xDim: - boxVector1.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(paramCoord, 0, 0, radius, 1, procId)); + boxVector1.push_back(stk::unit_test_util::generateBoundingVolume(paramCoord, 0, 0, radius, 1, procId)); break; case yDim: - boxVector1.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(0, paramCoord, 0, radius, 1, procId)); + boxVector1.push_back(stk::unit_test_util::generateBoundingVolume(0, paramCoord, 0, radius, 1, procId)); break; case zDim: - boxVector1.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(0, 0, paramCoord, radius, 1, procId)); + boxVector1.push_back(stk::unit_test_util::generateBoundingVolume(0, 0, paramCoord, radius, 1, procId)); break; } } @@ -83,13 +83,13 @@ void runLineOfBoundingBoxes(stk::search::SearchMethod searchMethod, enum Axis ax switch(axis) { case xDim: - boxVector2.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(paramCoord, 0, 0, radius, 1, procId)); + boxVector2.push_back(stk::unit_test_util::generateBoundingVolume(paramCoord, 0, 0, radius, 1, procId)); break; case yDim: - boxVector2.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(0, paramCoord, 0, radius, 1, procId)); + boxVector2.push_back(stk::unit_test_util::generateBoundingVolume(0, paramCoord, 0, radius, 1, procId)); break; case zDim: - boxVector2.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(0, 0, paramCoord, radius, 1, procId)); + boxVector2.push_back(stk::unit_test_util::generateBoundingVolume(0, 0, paramCoord, radius, 1, procId)); break; } } @@ -137,13 +137,13 @@ void device_runLineOfBoundingBoxes(stk::search::SearchMethod searchMethod, enum switch(axis) { case xDim: - domain[0] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(paramCoord, 0, 0, radius, 1, procId); + domain[0] = stk::unit_test_util::device_generateBoxIdentProc(paramCoord, 0, 0, radius, 1, procId); break; case yDim: - domain[0] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(0, paramCoord, 0, radius, 1, procId); + domain[0] = stk::unit_test_util::device_generateBoxIdentProc(0, paramCoord, 0, radius, 1, procId); break; case zDim: - domain[0] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(0, 0, paramCoord, radius, 1, procId); + domain[0] = stk::unit_test_util::device_generateBoxIdentProc(0, 0, paramCoord, radius, 1, procId); break; } }); @@ -155,13 +155,13 @@ void device_runLineOfBoundingBoxes(stk::search::SearchMethod searchMethod, enum switch(axis) { case xDim: - range[0] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(paramCoord, 0, 0, radius, 1, procId); + range[0] = stk::unit_test_util::device_generateBoxIdentProc(paramCoord, 0, 0, radius, 1, procId); break; case yDim: - range[0] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(0, paramCoord, 0, radius, 1, procId); + range[0] = stk::unit_test_util::device_generateBoxIdentProc(0, paramCoord, 0, radius, 1, procId); break; case zDim: - range[0] = stk::unit_test_util::simple_fields::device_generateBoxIdentProc(0, 0, paramCoord, radius, 1, procId); + range[0] = stk::unit_test_util::device_generateBoxIdentProc(0, 0, paramCoord, radius, 1, procId); break; } }); diff --git a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp index 039f0038652e..a742aeb80f42 100644 --- a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp +++ b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp @@ -56,12 +56,12 @@ void runTwoBoxTest(stk::search::SearchMethod searchMethod, const double distance std::vector> boxVector1; if (procId == 0) { - boxVector1.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(0, 0, 0, boxSize/2, 1, procId)); + boxVector1.push_back(stk::unit_test_util::generateBoundingVolume(0, 0, 0, boxSize/2, 1, procId)); } std::vector> boxVector2; if (procId == numProcs-1) { - boxVector2.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(distanceBetweenBoxCenters, 0, 0, boxSize/2, 2, procId)); + boxVector2.push_back(stk::unit_test_util::generateBoundingVolume(distanceBetweenBoxCenters, 0, 0, boxSize/2, 2, procId)); } SearchResults boxIdPairResults; @@ -89,7 +89,7 @@ void device_runTwoBoxTest(stk::search::SearchMethod searchMethod, const double d Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const unsigned & i) { domain[0] = - stk::unit_test_util::simple_fields::device_generateBoxIdentProc(0, 0, 0, boxSize/2, 1, procId); + stk::unit_test_util::device_generateBoxIdentProc(0, 0, 0, boxSize/2, 1, procId); }); } @@ -97,7 +97,7 @@ void device_runTwoBoxTest(stk::search::SearchMethod searchMethod, const double d Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const unsigned & i) { range[0] = - stk::unit_test_util::simple_fields::device_generateBoxIdentProc(distanceBetweenBoxCenters, 0, 0, + stk::unit_test_util::device_generateBoxIdentProc(distanceBetweenBoxCenters, 0, 0, boxSize/2, 2, procId); }); } @@ -243,11 +243,11 @@ void host_local_runTwoBoxTest(stk::search::SearchMethod searchMethod, StkBoxIdentVector domain; StkBoxIdentVector range; - domain.emplace_back(stk::unit_test_util::simple_fields::box_ident_to_pair( - stk::unit_test_util::simple_fields::device_generateBoxIdent(0, 0, 0, boxSize / 2, 1))); + domain.emplace_back(stk::unit_test_util::box_ident_to_pair( + stk::unit_test_util::device_generateBoxIdent(0, 0, 0, boxSize / 2, 1))); - range.emplace_back(stk::unit_test_util::simple_fields::box_ident_to_pair( - stk::unit_test_util::simple_fields::device_generateBoxIdent( + range.emplace_back(stk::unit_test_util::box_ident_to_pair( + stk::unit_test_util::device_generateBoxIdent( distanceBetweenBoxCenters, 0, 0, boxSize / 2, 2))); LocalSearchResults intersections; @@ -271,10 +271,10 @@ void device_local_runTwoBoxTest(stk::search::SearchMethod searchMethod, const do Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const unsigned & i) { domain[0] = - stk::unit_test_util::simple_fields::device_generateBoxIdent(0, 0, 0, boxSize/2, 1); + stk::unit_test_util::device_generateBoxIdent(0, 0, 0, boxSize/2, 1); range[0] = - stk::unit_test_util::simple_fields::device_generateBoxIdent(distanceBetweenBoxCenters, 0, 0, + stk::unit_test_util::device_generateBoxIdent(distanceBetweenBoxCenters, 0, 0, boxSize/2, 2); }); diff --git a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoSpheres.cpp b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoSpheres.cpp index 63d2b780db33..b998f7ad0d83 100644 --- a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoSpheres.cpp +++ b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoSpheres.cpp @@ -56,12 +56,12 @@ void runTwoSpheresTest(stk::search::SearchMethod searchMethod, const double dist std::vector> boxVector1; if (procId == 0) { - boxVector1.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(0, 0, 0, radius, 1, procId)); + boxVector1.push_back(stk::unit_test_util::generateBoundingVolume(0, 0, 0, radius, 1, procId)); } std::vector> boxVector2; if (procId == numProcs-1) { - boxVector2.push_back(stk::unit_test_util::simple_fields::generateBoundingVolume(distanceBetweenSphereCenters, 0, 0, radius, 2, procId)); + boxVector2.push_back(stk::unit_test_util::generateBoundingVolume(distanceBetweenSphereCenters, 0, 0, radius, 2, procId)); } SearchResults boxIdPairResults; @@ -89,7 +89,7 @@ void device_runTwoSpheresTest(stk::search::SearchMethod searchMethod, const doub Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const unsigned & i) { domain[0] = - stk::unit_test_util::simple_fields::device_generateBoxIdentProc(0, 0, 0, radius, 1, procId); + stk::unit_test_util::device_generateBoxIdentProc(0, 0, 0, radius, 1, procId); }); } @@ -98,7 +98,7 @@ void device_runTwoSpheresTest(stk::search::SearchMethod searchMethod, const doub KOKKOS_LAMBDA(const unsigned & i) { const double axisOffset = distanceBetweenSphereCenters / sqrt(2.0); range[0] = - stk::unit_test_util::simple_fields::device_generateBoxIdentProc(axisOffset, axisOffset, 0, + stk::unit_test_util::device_generateBoxIdentProc(axisOffset, axisOffset, 0, radius, 2, procId); }); } @@ -244,12 +244,12 @@ void host_local_runTwoSpheresTest(stk::search::SearchMethod searchMethod, std::vector domain; std::vector range; - domain.emplace_back(stk::unit_test_util::simple_fields::box_ident_to_pair( - stk::unit_test_util::simple_fields::device_generateBoxIdent(0, 0, 0, radius, 1))); + domain.emplace_back(stk::unit_test_util::box_ident_to_pair( + stk::unit_test_util::device_generateBoxIdent(0, 0, 0, radius, 1))); const double axisOffset = distanceBetweenSphereCenters / sqrt(2.0); - range.emplace_back(stk::unit_test_util::simple_fields::box_ident_to_pair( - stk::unit_test_util::simple_fields::device_generateBoxIdent( + range.emplace_back(stk::unit_test_util::box_ident_to_pair( + stk::unit_test_util::device_generateBoxIdent( axisOffset, axisOffset, 0, radius, 2))); LocalSearchResults intersections; @@ -273,11 +273,11 @@ void device_local_runTwoSpheresTest(stk::search::SearchMethod searchMethod, cons Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const unsigned & i) { domain[0] = - stk::unit_test_util::simple_fields::device_generateBoxIdent(0, 0, 0, radius, 1); + stk::unit_test_util::device_generateBoxIdent(0, 0, 0, radius, 1); const double axisOffset = distanceBetweenSphereCenters / sqrt(2.0); range[0] = - stk::unit_test_util::simple_fields::device_generateBoxIdent(axisOffset, axisOffset, 0, + stk::unit_test_util::device_generateBoxIdent(axisOffset, axisOffset, 0, radius, 2); }); diff --git a/packages/stk/stk_unit_tests/stk_search/UnitTestDeviceMPIUtils.cpp b/packages/stk/stk_unit_tests/stk_search/UnitTestDeviceMPIUtils.cpp new file mode 100644 index 000000000000..97d804b6107e --- /dev/null +++ b/packages/stk/stk_unit_tests/stk_search/UnitTestDeviceMPIUtils.cpp @@ -0,0 +1,318 @@ +#include "gtest/gtest.h" +#include "stk_util/parallel/Parallel.hpp" +#include "stk_search/DeviceMPIUtils.hpp" +#include "Kokkos_Sort.hpp" + +namespace { +using ExecutionSpace = Kokkos::DefaultExecutionSpace; +using DeviceBuffers = stk::search::impl::DeviceMPIBuffers; +using DeviceBufferAppender = stk::search::impl::DeviceMPIBufferAppender; +using DeviceDataExchanger = stk::search::impl::DeviceDataExchangeUnknownPattern; + +template +void set_on_device(ViewType viewDevice, const std::vector& vals) +{ + STK_ThrowRequire(viewDevice.extent(0) == vals.size()); + auto viewHost = Kokkos::create_mirror_view(viewDevice); + for (size_t i=0; i < vals.size(); ++i) + { + viewHost(i) = vals[i]; + } + + Kokkos::deep_copy(viewDevice, viewHost); +} + +} + +TEST(DeviceMPIBuffers, ConstructorSizeOnly) +{ + DeviceBuffers buffers(3); + EXPECT_EQ(buffers.bufferSizes.extent(0), 3u); + EXPECT_EQ(buffers.buffers.extent(0), 0u); +} + + +TEST(DeviceMPIBuffers, ConstructorSizesAndBuffersGiven) +{ + DeviceBuffers::BufferSizesView bufferSizes("buffer_sizes", 3); + set_on_device(bufferSizes, {5, 2, 4}); + + DeviceBuffers::BufferView bufferData("buffers", 5+2+4); + DeviceBuffers buffers(bufferSizes, bufferData); + + EXPECT_EQ(buffers.bufferSizes.extent(0), 3u); + EXPECT_EQ(buffers.buffers.extent(0), 5u + 2u + 4u); +} + +namespace { +void test_device_buffers_set_data() +{ + DeviceBuffers::BufferSizesView bufferSizes("buffer_sizes", 3); + set_on_device(bufferSizes, {5, 2, 4}); + + int totalSize = 5 + 2 + 4; + DeviceBuffers::BufferView bufferData("buffers", totalSize); + DeviceBuffers buffers(bufferSizes, bufferData); + + Kokkos::RangePolicy policy(0, totalSize); + auto setFunc = KOKKOS_LAMBDA(int i) + { + buffers.buffers(i) = i; + }; + + Kokkos::parallel_for("set_buffer_vals", policy, setFunc); + + auto buffersHost = Kokkos::create_mirror_view_and_copy(ExecutionSpace{}, buffers.buffers); + for (int i=0; i < totalSize; ++i) + { + EXPECT_EQ(buffersHost(i), i); + } +} +} + +TEST(DeviceMPIBuffers, SetData) +{ + test_device_buffers_set_data(); +} + +namespace { +template +struct DeviceAppenderUser2Ranks +{ + DeviceAppenderUser2Ranks(int numEntriesOnRank1, ExecutionSpace execSpace) : + m_numEntriesOnRank1(numEntriesOnRank1), + appender(2, execSpace) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const + { + int rank = i < m_numEntriesOnRank1 ? 0 : 1; + appender.push_back(rank, i); + } + + int m_numEntriesOnRank1; + DeviceBufferAppender appender; +}; +} + +TEST(DeviceMPIBufferAppender, push_back) +{ + Kokkos::DefaultHostExecutionSpace hostSpace{}; + ExecutionSpace execSpace{}; + int numRanks = 2; + Kokkos::View bufferExpectedSizes("expected_buffer_sizes", numRanks); + set_on_device(bufferExpectedSizes, {3, 4}); + auto bufferExpectedSizesHost = Kokkos::create_mirror_view_and_copy(hostSpace, bufferExpectedSizes); + int totalSize = bufferExpectedSizesHost(0) + bufferExpectedSizesHost(1); + + Kokkos::RangePolicy<> policy(execSpace, 0, totalSize); + DeviceAppenderUser2Ranks setValuesFunc(bufferExpectedSizesHost(0), execSpace); + + Kokkos::parallel_for("size_buffers", policy, setValuesFunc); + execSpace.fence(); + auto bufferSizesHost = Kokkos::create_mirror_view_and_copy(hostSpace, setValuesFunc.appender.getBuffers().bufferSizes); + EXPECT_EQ(bufferSizesHost(0), bufferExpectedSizesHost(0)); + EXPECT_EQ(bufferSizesHost(1), bufferExpectedSizesHost(1)); + EXPECT_EQ(setValuesFunc.appender.getBuffers().buffers.extent(0), 0u); + setValuesFunc.appender.allocate_buffers(); + EXPECT_EQ(setValuesFunc.appender.getBuffers().buffers.extent(0), size_t(totalSize)); + + Kokkos::parallel_for("fill_buffers", policy, setValuesFunc); + execSpace.fence(); + DeviceBuffers deviceBuffers = setValuesFunc.appender.getBuffers(); + + auto buffersHost = Kokkos::create_mirror_view_and_copy(hostSpace, deviceBuffers.buffers); + auto rank0Buffer = Kokkos::subview(buffersHost, std::pair(0, bufferExpectedSizesHost(0))); + auto rank1Buffer = Kokkos::subview(buffersHost, std::pair(bufferExpectedSizesHost(0), buffersHost.extent(0))); + // the order the values get put into the buffer are undefined, sort them for testing + Kokkos::sort(rank0Buffer); + Kokkos::sort(rank1Buffer); + + for (size_t i=0; i < rank0Buffer.extent(0); ++i) + { + EXPECT_EQ(rank0Buffer(i), int(i)); + } + + for (size_t i=0; i < rank1Buffer.extent(0); ++i) + { + EXPECT_EQ(rank1Buffer(i), int(i + bufferExpectedSizesHost(0))); + } +} + +void test_device_data_exchange_all_to_all(int numValuesPerRank) +{ + using HostSpace = Kokkos::DefaultHostExecutionSpace; + + HostSpace hostSpace{}; + ExecutionSpace execSpace{}; + MPI_Comm comm = stk::parallel_machine_world(); + int commRank = stk::parallel_machine_rank(comm); + int commSize = stk::parallel_machine_size(comm); + + DeviceBuffers sendBuffers(commSize); + Kokkos::resize(sendBuffers.buffers, commSize*numValuesPerRank); + + Kokkos::RangePolicy<> setValuesPolicy(execSpace, 0, commSize*numValuesPerRank); + auto setValuesFunc = KOKKOS_LAMBDA(int i) { sendBuffers.buffers(i) = i + numValuesPerRank * commSize * commRank; }; + Kokkos::parallel_for("set_send_values", setValuesPolicy, setValuesFunc); + + Kokkos::RangePolicy<> setSizesPolicy(execSpace, 0, commSize); + auto setSizesFunc = KOKKOS_LAMBDA(int i) { sendBuffers.bufferSizes(i) = numValuesPerRank; }; + Kokkos::parallel_for("set_send_sizes", setSizesPolicy, setSizesFunc); + + DeviceDataExchanger exchanger(sendBuffers, execSpace, comm); + DeviceBuffers recvBuffers = exchanger.communicate(); + + auto recvSizesHost = Kokkos::create_mirror_view_and_copy(hostSpace, recvBuffers.bufferSizes); + auto recvBuffersHost = Kokkos::create_mirror_view_and_copy(hostSpace, recvBuffers.buffers); + + EXPECT_EQ(recvSizesHost.extent(0), size_t(commSize)); + EXPECT_EQ(recvBuffersHost.extent(0), size_t(commSize * numValuesPerRank)); + + int idx = 0; + for (int senderRank=0; senderRank < commSize; ++senderRank) + { + EXPECT_EQ(recvSizesHost(senderRank), numValuesPerRank); + int offset = numValuesPerRank * commSize * senderRank + commRank * numValuesPerRank; + for (int i=0; i < numValuesPerRank; ++i) + { + EXPECT_EQ(recvBuffersHost(idx++), i + offset); + } + } +} + +void test_device_data_exchange_all_to_all_skip_sender(int numValuesPerRank, int senderRankToSkip) +{ + using HostSpace = Kokkos::DefaultHostExecutionSpace; + + HostSpace hostSpace{}; + ExecutionSpace execSpace{}; + MPI_Comm comm = stk::parallel_machine_world(); + int commRank = stk::parallel_machine_rank(comm); + int commSize = stk::parallel_machine_size(comm); + + DeviceBuffers sendBuffers(commSize); + Kokkos::resize(sendBuffers.buffers, commSize*numValuesPerRank); + + if (commRank != senderRankToSkip) + { + Kokkos::RangePolicy<> setValuesPolicy(execSpace, 0, commSize*numValuesPerRank); + auto setValuesFunc = KOKKOS_LAMBDA(int i) { sendBuffers.buffers(i) = i + numValuesPerRank * commSize * commRank; }; + Kokkos::parallel_for("set_send_values", setValuesPolicy, setValuesFunc); + + Kokkos::RangePolicy<> setSizesPolicy(execSpace, 0, commSize); + auto setSizesFunc = KOKKOS_LAMBDA(int i) { sendBuffers.bufferSizes(i) = numValuesPerRank; }; + Kokkos::parallel_for("set_send_sizes", setSizesPolicy, setSizesFunc); + } + + DeviceDataExchanger exchanger(sendBuffers, execSpace, comm); + DeviceBuffers recvBuffers = exchanger.communicate(); + + auto recvSizesHost = Kokkos::create_mirror_view_and_copy(hostSpace, recvBuffers.bufferSizes); + auto recvBuffersHost = Kokkos::create_mirror_view_and_copy(hostSpace, recvBuffers.buffers); + + EXPECT_EQ(recvSizesHost.extent(0), size_t(commSize)); + EXPECT_EQ(recvBuffersHost.extent(0), size_t((commSize-1) * numValuesPerRank)); + + int idx = 0; + for (int senderRank=0; senderRank < commSize; ++senderRank) + { + if (senderRank == senderRankToSkip) + { + EXPECT_EQ(recvSizesHost(senderRank), 0); + continue; + } + + EXPECT_EQ(recvSizesHost(senderRank), numValuesPerRank); + int offset = numValuesPerRank * commSize * senderRank + commRank * numValuesPerRank; + for (int i=0; i < numValuesPerRank; ++i) + { + EXPECT_EQ(recvBuffersHost(idx++), i + offset); + } + } +} + + +void test_device_data_exchange_all_to_all_skip_receiver(int numValuesPerRank, int receiverToSkip) +{ + using HostSpace = Kokkos::DefaultHostExecutionSpace; + + HostSpace hostSpace{}; + ExecutionSpace execSpace{}; + MPI_Comm comm = stk::parallel_machine_world(); + int commRank = stk::parallel_machine_rank(comm); + int commSize = stk::parallel_machine_size(comm); + + DeviceBuffers sendBuffers(commSize); + Kokkos::resize(sendBuffers.buffers, commSize*numValuesPerRank); + + Kokkos::RangePolicy<> setValuesPolicy(execSpace, 0, (commSize - 1)*numValuesPerRank); + auto setValuesFunc = KOKKOS_LAMBDA(int i) { sendBuffers.buffers(i) = i + numValuesPerRank * commSize * commRank; }; + Kokkos::parallel_for("set_send_values", setValuesPolicy, setValuesFunc); + + Kokkos::RangePolicy<> setSizesPolicy(execSpace, 0, commSize); + auto setSizesFunc = KOKKOS_LAMBDA(int rank) { sendBuffers.bufferSizes(rank) = rank == receiverToSkip ? 0 : numValuesPerRank;}; + Kokkos::parallel_for("set_send_sizes", setSizesPolicy, setSizesFunc); + + DeviceDataExchanger exchanger(sendBuffers, execSpace, comm); + DeviceBuffers recvBuffers = exchanger.communicate(); + + auto recvSizesHost = Kokkos::create_mirror_view_and_copy(hostSpace, recvBuffers.bufferSizes); + auto recvBuffersHost = Kokkos::create_mirror_view_and_copy(hostSpace, recvBuffers.buffers); + + EXPECT_EQ(recvSizesHost.extent(0), size_t(commSize)); + + if (commRank == receiverToSkip) + { + for (int senderRank = 0; senderRank < commSize; ++senderRank) + { + EXPECT_EQ(recvSizesHost(senderRank), 0); + } + EXPECT_EQ(recvBuffersHost.extent(0), 0U); + + return; + } else + { + EXPECT_EQ(recvBuffersHost.extent(0), size_t(commSize * numValuesPerRank)); + + int idx = 0; + for (int senderRank=0; senderRank < commSize; ++senderRank) + { + EXPECT_EQ(recvSizesHost(senderRank), numValuesPerRank); + int offset = numValuesPerRank * commSize * senderRank; + if (commRank < receiverToSkip) + { + offset += commRank * numValuesPerRank; + } else + { + offset += (commRank - 1) * numValuesPerRank; + } + for (int i=0; i < numValuesPerRank; ++i) + { + EXPECT_EQ(recvBuffersHost(idx++), i + offset); + } + } + } +} + + +TEST(DeviceDataExchangeUnknownPattern, AllToAllSmallMsg) +{ + test_device_data_exchange_all_to_all(4); +} + +TEST(DeviceDataExchangeUnknownPattern, AllToAllLargeMsg) +{ + test_device_data_exchange_all_to_all(20 * 1024 * 1024 / sizeof(int)); +} + +TEST(DeviceDataExchangeUnknownPattern, AllToAllSkipSender) +{ + test_device_data_exchange_all_to_all_skip_sender(4, 0); +} + +TEST(DeviceDataExchangeUnknownPattern, AllToAllSkipReceiver) +{ + test_device_data_exchange_all_to_all_skip_receiver(4, 0); +} \ No newline at end of file diff --git a/packages/stk/stk_unit_tests/stk_search/UnitTestHelperTraits.cpp b/packages/stk/stk_unit_tests/stk_search/UnitTestHelperTraits.cpp new file mode 100644 index 000000000000..e0b6f0bd027c --- /dev/null +++ b/packages/stk/stk_unit_tests/stk_search/UnitTestHelperTraits.cpp @@ -0,0 +1,317 @@ +#include "gtest/gtest.h" +#include "stk_search/HelperTraits.hpp" +#include "stk_search/BoxIdent.hpp" +#include "stk_search/IdentProc.hpp" +#include "stk_search/Box.hpp" + +namespace { +using BoxIdentType = stk::search::BoxIdent, int>; +using BoxIdentProcType = stk::search::BoxIdentProc, stk::search::IdentProc>; +using IdentIntersectionType = stk::search::IdentIntersection; +using IdentProcIntersectionType = stk::search::IdentProcIntersection, stk::search::IdentProc>; + +template +struct Foo +{ +}; + +using FooType = Foo; +} + +TEST(HelperTraits, RemoveCVRef) +{ + static_assert(std::is_same_v, int>); + static_assert(std::is_same_v, int>); + static_assert(std::is_same_v, int>); + static_assert(std::is_same_v, int>); +} + +TEST(HelperTraits, ValueTypeOrVoid) +{ + static_assert(std::is_same_v>, int>); + static_assert(std::is_same_v&>, int>); + static_assert(std::is_same_v>, int>); + static_assert(std::is_same_v&>, int>); + + static_assert(std::is_same_v, void>); + static_assert(std::is_same_v, void>); + static_assert(std::is_same_v, void>); + static_assert(std::is_same_v, void>); +} + +TEST(HelperTraits, IsBoxIdent) +{ + static_assert(stk::search::is_box_ident_v); + static_assert(stk::search::is_box_ident_v); + static_assert(stk::search::is_box_ident_v); + static_assert(stk::search::is_box_ident_v); + + static_assert(!stk::search::is_box_ident_v); + static_assert(!stk::search::is_box_ident_v); + static_assert(!stk::search::is_box_ident_v); + static_assert(!stk::search::is_box_ident_v); + + static_assert(!stk::search::is_box_ident_v); + static_assert(!stk::search::is_box_ident_v); + static_assert(!stk::search::is_box_ident_v); + static_assert(!stk::search::is_box_ident_v); +} + +TEST(HelperTraits, IsBoxIdentProc) +{ + static_assert(stk::search::is_box_ident_proc_v); + static_assert(stk::search::is_box_ident_proc_v); + static_assert(stk::search::is_box_ident_proc_v); + static_assert(stk::search::is_box_ident_proc_v); + + static_assert(!stk::search::is_box_ident_proc_v); + static_assert(!stk::search::is_box_ident_proc_v); + static_assert(!stk::search::is_box_ident_proc_v); + static_assert(!stk::search::is_box_ident_proc_v); + + static_assert(!stk::search::is_box_ident_proc_v); + static_assert(!stk::search::is_box_ident_proc_v); + static_assert(!stk::search::is_box_ident_proc_v); + static_assert(!stk::search::is_box_ident_proc_v); +} + +TEST(HelperTraits, IsBoxIdentContainerSTL) +{ + static_assert(std::is_same_v>, BoxIdentType>); + static_assert(std::is_same_v, BoxIdentType>); + static_assert(stk::search::is_box_ident_container_v>); + static_assert(stk::search::is_box_ident_container_v&>); + static_assert(stk::search::is_box_ident_container_v>); + static_assert(stk::search::is_box_ident_container_v&>); + + static_assert(!stk::search::is_box_ident_container_v>); + static_assert(!stk::search::is_box_ident_container_v&>); + static_assert(!stk::search::is_box_ident_container_v>); + static_assert(!stk::search::is_box_ident_container_v&>); + + static_assert(!stk::search::is_box_ident_container_v); + static_assert(!stk::search::is_box_ident_container_v); + static_assert(!stk::search::is_box_ident_container_v); + static_assert(!stk::search::is_box_ident_container_v); +} + +TEST(HelperTraits, IsBoxIdentContainerView) +{ + static_assert(stk::search::is_box_ident_container_v>); + static_assert(stk::search::is_box_ident_container_v>); + + static_assert(stk::search::is_box_ident_container_v>); + static_assert(stk::search::is_box_ident_container_v>); + + static_assert(stk::search::is_box_ident_container_v&>); + static_assert(stk::search::is_box_ident_container_v&>); + + static_assert(stk::search::is_box_ident_container_v&>); + static_assert(stk::search::is_box_ident_container_v&>); + + + static_assert(!stk::search::is_box_ident_container_v>); + static_assert(!stk::search::is_box_ident_container_v>); + + static_assert(!stk::search::is_box_ident_container_v>); + static_assert(!stk::search::is_box_ident_container_v>); + + static_assert(!stk::search::is_box_ident_container_v&>); + static_assert(!stk::search::is_box_ident_container_v&>); + + static_assert(!stk::search::is_box_ident_container_v&>); + static_assert(!stk::search::is_box_ident_container_v&>); +} + +TEST(HelperTraits, IsBoxIdentProcContainerSTL) +{ + static_assert(std::is_same_v>, BoxIdentProcType>); + static_assert(std::is_same_v, BoxIdentProcType>); + static_assert(stk::search::is_box_ident_proc_container_v>); + static_assert(stk::search::is_box_ident_proc_container_v&>); + static_assert(stk::search::is_box_ident_proc_container_v>); + static_assert(stk::search::is_box_ident_proc_container_v&>); + + static_assert(!stk::search::is_box_ident_proc_container_v>); + static_assert(!stk::search::is_box_ident_proc_container_v&>); + static_assert(!stk::search::is_box_ident_proc_container_v>); + static_assert(!stk::search::is_box_ident_proc_container_v&>); + + static_assert(!stk::search::is_box_ident_proc_container_v); + static_assert(!stk::search::is_box_ident_proc_container_v); + static_assert(!stk::search::is_box_ident_proc_container_v); + static_assert(!stk::search::is_box_ident_proc_container_v); +} + + +TEST(HelperTraits, IsBoxIdentProcContainerView) +{ + static_assert(stk::search::is_box_ident_proc_container_v>); + static_assert(stk::search::is_box_ident_proc_container_v>); + + static_assert(stk::search::is_box_ident_proc_container_v>); + static_assert(stk::search::is_box_ident_proc_container_v>); + + static_assert(stk::search::is_box_ident_proc_container_v&>); + static_assert(stk::search::is_box_ident_proc_container_v&>); + + static_assert(stk::search::is_box_ident_proc_container_v&>); + static_assert(stk::search::is_box_ident_proc_container_v&>); + + + static_assert(!stk::search::is_box_ident_proc_container_v>); + static_assert(!stk::search::is_box_ident_proc_container_v>); + + static_assert(!stk::search::is_box_ident_proc_container_v>); + static_assert(!stk::search::is_box_ident_proc_container_v>); + + static_assert(!stk::search::is_box_ident_proc_container_v&>); + static_assert(!stk::search::is_box_ident_proc_container_v&>); + + static_assert(!stk::search::is_box_ident_proc_container_v&>); + static_assert(!stk::search::is_box_ident_proc_container_v&>); +} + +TEST(HelperTraits, IsIdentIntersection) +{ + static_assert(stk::search::is_ident_intersection_v); + static_assert(stk::search::is_ident_intersection_v); + static_assert(stk::search::is_ident_intersection_v); + static_assert(stk::search::is_ident_intersection_v); + + static_assert(!stk::search::is_ident_intersection_v); + static_assert(!stk::search::is_ident_intersection_v); + static_assert(!stk::search::is_ident_intersection_v); + static_assert(!stk::search::is_ident_intersection_v); +} + +TEST(HelperTraits, IsIdentIntersectionContainerSTL) +{ + static_assert(stk::search::is_ident_intersection_container_v>); + static_assert(stk::search::is_ident_intersection_container_v&>); + static_assert(stk::search::is_ident_intersection_container_v>); + static_assert(stk::search::is_ident_intersection_container_v&>); + + static_assert(!stk::search::is_ident_intersection_container_v>); + static_assert(!stk::search::is_ident_intersection_container_v&>); + static_assert(!stk::search::is_ident_intersection_container_v>); + static_assert(!stk::search::is_ident_intersection_container_v&>); +} + +TEST(HelperTraits, IsIdentProcIntersection) +{ + static_assert(stk::search::is_ident_proc_intersection_v); + static_assert(stk::search::is_ident_proc_intersection_v); + static_assert(stk::search::is_ident_proc_intersection_v); + static_assert(stk::search::is_ident_proc_intersection_v); + + static_assert(!stk::search::is_ident_proc_intersection_v); + static_assert(!stk::search::is_ident_proc_intersection_v); + static_assert(!stk::search::is_ident_proc_intersection_v); + static_assert(!stk::search::is_ident_proc_intersection_v); +} + +TEST(HelperTraits, IsIdentIntersectionContainerView) +{ + static_assert(stk::search::is_ident_intersection_container_v>); + static_assert(stk::search::is_ident_intersection_container_v>); + + static_assert(stk::search::is_ident_intersection_container_v&>); + static_assert(stk::search::is_ident_intersection_container_v&>); + + static_assert(stk::search::is_ident_intersection_container_v>); + static_assert(stk::search::is_ident_intersection_container_v>); + + static_assert(stk::search::is_ident_intersection_container_v&>); + static_assert(stk::search::is_ident_intersection_container_v&>); + + + static_assert(!stk::search::is_ident_intersection_container_v>); + static_assert(!stk::search::is_ident_intersection_container_v>); + + static_assert(!stk::search::is_ident_intersection_container_v&>); + static_assert(!stk::search::is_ident_intersection_container_v&>); + + static_assert(!stk::search::is_ident_intersection_container_v>); + static_assert(!stk::search::is_ident_intersection_container_v>); + + static_assert(!stk::search::is_ident_intersection_container_v&>); + static_assert(!stk::search::is_ident_intersection_container_v&>); +} + +TEST(HelperTraits, IsIdentProcIntersectionContainerSTL) +{ + static_assert(stk::search::is_ident_proc_intersection_container_v>); + static_assert(stk::search::is_ident_proc_intersection_container_v&>); + static_assert(stk::search::is_ident_proc_intersection_container_v>); + static_assert(stk::search::is_ident_proc_intersection_container_v&>); + + static_assert(!stk::search::is_ident_proc_intersection_container_v>); + static_assert(!stk::search::is_ident_proc_intersection_container_v&>); + static_assert(!stk::search::is_ident_proc_intersection_container_v>); + static_assert(!stk::search::is_ident_proc_intersection_container_v&>); +} + +TEST(HelperTraits, IsIdentProcIntersectionContainerView) +{ + static_assert(stk::search::is_ident_proc_intersection_container_v>); + static_assert(stk::search::is_ident_proc_intersection_container_v>); + + static_assert(stk::search::is_ident_proc_intersection_container_v&>); + static_assert(stk::search::is_ident_proc_intersection_container_v&>); + + static_assert(stk::search::is_ident_proc_intersection_container_v>); + static_assert(stk::search::is_ident_proc_intersection_container_v>); + + static_assert(stk::search::is_ident_proc_intersection_container_v&>); + static_assert(stk::search::is_ident_proc_intersection_container_v&>); + + + static_assert(!stk::search::is_ident_proc_intersection_container_v>); + static_assert(!stk::search::is_ident_proc_intersection_container_v>); + + static_assert(!stk::search::is_ident_proc_intersection_container_v&>); + static_assert(!stk::search::is_ident_proc_intersection_container_v&>); + + static_assert(!stk::search::is_ident_proc_intersection_container_v>); + static_assert(!stk::search::is_ident_proc_intersection_container_v>); + + static_assert(!stk::search::is_ident_proc_intersection_container_v&>); + static_assert(!stk::search::is_ident_proc_intersection_container_v&>); +} + +TEST(HelperTraits, IsModifiable) +{ + static_assert(stk::search::is_modifiable_v); + static_assert(stk::search::is_modifiable_v); + static_assert(!stk::search::is_modifiable_v); + static_assert(!stk::search::is_modifiable_v); +} + +TEST(HelperTraits, IsModifiableView) +{ + static_assert(stk::search::is_modifiable_view_v>); + static_assert(stk::search::is_modifiable_view_v&>); + static_assert(stk::search::is_modifiable_view_v>); + static_assert(stk::search::is_modifiable_view_v&>); + + static_assert(!stk::search::is_modifiable_view_v>); + static_assert(!stk::search::is_modifiable_view_v&>); + static_assert(!stk::search::is_modifiable_view_v>); + static_assert(!stk::search::is_modifiable_view_v&>); +} + +TEST(HelperTraits, ViewUsableFrom) +{ + stk::search::check_view_is_usable_from>(); + stk::search::check_view_is_usable_from>(); + stk::search::check_view_is_usable_from>(); + stk::search::check_view_is_usable_from>(); + +#ifdef KOKKOS_ENABLE_CUDA + stk::search::check_view_is_usable_from>(); + stk::search::check_view_is_usable_from>(); + stk::search::check_view_is_usable_from>(); + stk::search::check_view_is_usable_from>(); +#endif +} diff --git a/packages/stk/stk_unit_tests/stk_search/UnitTestParallelConsistencyUtils.cpp b/packages/stk/stk_unit_tests/stk_search/UnitTestParallelConsistencyUtils.cpp new file mode 100644 index 000000000000..b8060bf2a823 --- /dev/null +++ b/packages/stk/stk_unit_tests/stk_search/UnitTestParallelConsistencyUtils.cpp @@ -0,0 +1,395 @@ +#include "gtest/gtest.h" +#include "stk_search/morton_lbvh/MortonLBVH_ParallelConsistencyUtils.hpp" +#include "stk_search/Box.hpp" +#include "stk_search/BoxIdent.hpp" +#include "stk_search/IdentProc.hpp" +#include "stk_util/parallel/Parallel.hpp" + +namespace { +using BoxType = stk::search::Box; +using IdentProcType = stk::search::IdentProc; +using BoxIdentProcType = stk::search::BoxIdentProc; +using ExecutionSpace = Kokkos::DefaultExecutionSpace; +using BoxIdentProcView = Kokkos::View; + +using HostExecutionSpace = Kokkos::DefaultHostExecutionSpace; +using BoxIdentProcViewHost = Kokkos::View; +} + +TEST(ParallelConsistencyUtils, ProcBoundingBoxView1Proc) +{ + if (stk::parallel_machine_size(stk::parallel_machine_world()) != 1) + { + GTEST_SKIP(); + } + + ExecutionSpace execSpace{}; + + int myrank = stk::parallel_machine_rank(stk::parallel_machine_world()); + Kokkos::View boxIdentProcs("box_ident_procs", 2); + double delta = 0.5; + double x0 = delta * myrank; + boxIdentProcs(0) = BoxIdentProcType{BoxType(x0, 0, 0, x0 + delta, delta, delta), IdentProcType(0, myrank)}; + boxIdentProcs(1) = BoxIdentProcType{BoxType(x0, delta, 0, x0 + delta, 2*delta, delta), IdentProcType(1, myrank)}; + + Kokkos::View procBoxes = stk::search::gather_all_processor_superset_domain_boxes(boxIdentProcs, execSpace, stk::parallel_machine_world()); + + auto procBoxesHost = Kokkos::create_mirror_view_and_copy(execSpace, procBoxes); + + EXPECT_EQ(procBoxesHost.extent(0), 1u); + EXPECT_EQ(procBoxesHost(0), BoxType(0, 0, 0, delta, 2*delta, delta)); +} + +TEST(ParallelConsistencyUtils, ProcBoundingBoxView1ProcSphere) +{ + if (stk::parallel_machine_size(stk::parallel_machine_world()) != 1) + { + GTEST_SKIP(); + } + + using BoxType = stk::search::Sphere; + using BoxIdentProcType = stk::search::BoxIdentProc; + + ExecutionSpace execSpace{}; + + int myrank = stk::parallel_machine_rank(stk::parallel_machine_world()); + Kokkos::View boxIdentProcs("box_ident_procs", 2); + boxIdentProcs(0) = BoxIdentProcType{BoxType({0, 0, 0}, 1), IdentProcType(0, myrank)}; + boxIdentProcs(1) = BoxIdentProcType{BoxType({1, 0, 0}, 1), IdentProcType(1, myrank)}; + + Kokkos::View*, ExecutionSpace> procBoxes = stk::search::gather_all_processor_superset_domain_boxes(boxIdentProcs, execSpace, stk::parallel_machine_world()); + + auto procBoxesHost = Kokkos::create_mirror_view_and_copy(execSpace, procBoxes); + + EXPECT_EQ(procBoxesHost.extent(0), 1u); + EXPECT_EQ(procBoxesHost(0), stk::search::Box(-1, -1, -1, 2, 1, 1)); +} + +TEST(ParallelConsistencyUtils, ProcBoundingBoxView2Proc) +{ + if (stk::parallel_machine_size(stk::parallel_machine_world()) != 2) + { + GTEST_SKIP(); + } + + ExecutionSpace execSpace{}; + + int myrank = stk::parallel_machine_rank(stk::parallel_machine_world()); + Kokkos::View boxIdentProcs("box_ident_procs", 2); + double delta = 0.5; + double x0 = delta * myrank; + boxIdentProcs(0) = BoxIdentProcType{BoxType(x0, 0, 0, x0 + delta, delta, delta), IdentProcType(0, myrank)}; + boxIdentProcs(1) = BoxIdentProcType{BoxType(x0, delta, 0, x0 + delta, 2*delta, delta), IdentProcType(1, myrank)}; + + Kokkos::View procBoxes = stk::search::gather_all_processor_superset_domain_boxes(boxIdentProcs, execSpace, stk::parallel_machine_world()); + + auto procBoxesHost = Kokkos::create_mirror_view_and_copy(execSpace, procBoxes); + + EXPECT_EQ(procBoxesHost.extent(0), 2u); + EXPECT_EQ(procBoxesHost(0), BoxType(0, 0, 0, delta, 2*delta, delta)); + EXPECT_EQ(procBoxesHost(1), BoxType(delta, 0, 0, 2*delta, 2*delta, delta)); +} + +TEST(ParallelConsistencyUtils, ProcBoundingBoxView4Proc) +{ + if (stk::parallel_machine_size(stk::parallel_machine_world()) != 4) + { + GTEST_SKIP(); + } + + ExecutionSpace execSpace{}; + + int myrank = stk::parallel_machine_rank(stk::parallel_machine_world()); + int i = myrank % 2; + int j = myrank / 2; + Kokkos::View boxIdentProcs("box_ident_procs", 1); + + double delta = 0.5; + double x0 = delta * i; + double y0 = delta * j; + boxIdentProcs(0) = BoxIdentProcType{BoxType(x0, y0, 0, x0 + delta, y0 + delta, delta), IdentProcType(0, myrank)}; + + Kokkos::View procBoxes = stk::search::gather_all_processor_superset_domain_boxes(boxIdentProcs, execSpace, stk::parallel_machine_world()); + + auto procBoxesHost = Kokkos::create_mirror_view_and_copy(execSpace, procBoxes); + + EXPECT_EQ(procBoxesHost.extent(0), 4u); + EXPECT_EQ(procBoxesHost(0), BoxType(0, 0, 0, delta, delta, delta)); + EXPECT_EQ(procBoxesHost(1), BoxType(delta, 0, 0, 2*delta, delta, delta)); + EXPECT_EQ(procBoxesHost(2), BoxType(0, delta, 0, delta, 2*delta, delta)); + EXPECT_EQ(procBoxesHost(3), BoxType(delta, delta, 0, 2*delta, 2*delta, delta)); +} + +TEST(ParallelConsistencyUtils, ExtendRangeWithRemoteBoxesLocal) +{ + if (stk::parallel_machine_size(stk::parallel_machine_world()) != 1) + { + GTEST_SKIP(); + } + + BoxIdentProcView domainBoxes("domain_boxes", 1); + BoxIdentProcView rangeBoxes("range_boxes", 2); + + auto domainBoxesHost = Kokkos::create_mirror_view(Kokkos::DefaultHostExecutionSpace{}, domainBoxes); + auto rangeBoxesHost = Kokkos::create_mirror_view(Kokkos::DefaultHostExecutionSpace{}, rangeBoxes); + + domainBoxesHost(0) = BoxIdentProcType{BoxType(0, 0, 0, 1, 1, 1), IdentProcType(0, 0)}; + + rangeBoxesHost(0) = BoxIdentProcType{BoxType(0, 0, 0, 0.5, 1, 1), IdentProcType(0, 0)}; + rangeBoxesHost(1) = BoxIdentProcType{BoxType(0.5, 0, 0, 1, 1, 1), IdentProcType(1, 0)}; + + Kokkos::deep_copy(domainBoxes, domainBoxesHost); + Kokkos::deep_copy(rangeBoxes, rangeBoxesHost); + + auto [extendedRange, remoteIdents] = stk::search::morton_extend_local_range_with_remote_boxes_that_might_intersect( + domainBoxes, rangeBoxes, ExecutionSpace{}, stk::parallel_machine_world()); + + EXPECT_EQ(extendedRange.extent(0), 2U); + EXPECT_EQ(remoteIdents.extent(0), 0U); +} + +TEST(ParallelConsistencyUtils, ExtendRangeWithRemoteBoxes2Procs) +{ + if (stk::parallel_machine_size(stk::parallel_machine_world()) != 2) + { + GTEST_SKIP(); + } + + BoxIdentProcView domainBoxes("domain_boxes", 1); + BoxIdentProcView rangeBoxes("range_boxes", 2); + + auto domainBoxesHost = Kokkos::create_mirror_view(Kokkos::DefaultHostExecutionSpace{}, domainBoxes); + auto rangeBoxesHost = Kokkos::create_mirror_view(Kokkos::DefaultHostExecutionSpace{}, rangeBoxes); + + int myrank = stk::parallel_machine_rank(stk::parallel_machine_world());; + double delta_x = 0.5; + double x0 = delta_x * myrank; + domainBoxesHost(0) = BoxIdentProcType{BoxType(x0, 0, 0, x0 + delta_x, 1, 1), IdentProcType(0, 0)}; + + if (myrank == 0) + { + rangeBoxesHost(0) = BoxIdentProcType{BoxType(0, 0, 0, 0.4, 1, 1), IdentProcType(0, myrank)}; + rangeBoxesHost(1) = BoxIdentProcType{BoxType(0.4, 0, 0, 0.6, 1, 1), IdentProcType(1, myrank)}; + } else + { + rangeBoxesHost(0) = BoxIdentProcType{BoxType(0.6, 0, 0, 1, 1, 1), IdentProcType(0, myrank)}; + rangeBoxesHost(1) = BoxIdentProcType{BoxType(0.4, 0, 0, 0.7, 1, 1), IdentProcType(1, myrank)}; + } + + Kokkos::deep_copy(domainBoxes, domainBoxesHost); + Kokkos::deep_copy(rangeBoxes, rangeBoxesHost); + + auto [extendedRange, remoteIdentProcs] = stk::search::morton_extend_local_range_with_remote_boxes_that_might_intersect( + domainBoxes, rangeBoxes, ExecutionSpace{}, stk::parallel_machine_world()); + + EXPECT_EQ(extendedRange.extent(0), 3U); + EXPECT_EQ(remoteIdentProcs.extent(0), 1U); + + auto extendedRangeHost = Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace{}, extendedRange); + auto remoteIdentProcsHost = Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace{}, remoteIdentProcs); + + if (myrank == 0) + { + EXPECT_EQ(extendedRangeHost(0), rangeBoxesHost(0).box); + + EXPECT_EQ(extendedRangeHost(1), rangeBoxesHost(1).box); + + EXPECT_EQ(extendedRangeHost(2), BoxType(0.4, 0, 0, 0.7, 1, 1)); + EXPECT_EQ(remoteIdentProcsHost(0), IdentProcType(1, 1)); + } else + { + EXPECT_EQ(extendedRangeHost(0), rangeBoxesHost(0).box); + + EXPECT_EQ(extendedRangeHost(1), rangeBoxesHost(1).box); + + EXPECT_EQ(extendedRangeHost(2), BoxType(0.4, 0, 0, 0.6, 1, 1)); + EXPECT_EQ(remoteIdentProcsHost(0), IdentProcType(1, 0)); + } +} + +TEST(ParallelConsistencyUtils, ExtendRangeWithRemoteBoxes4Procs) +{ + if (stk::parallel_machine_size(stk::parallel_machine_world()) != 4) + { + GTEST_SKIP(); + } + + BoxIdentProcView domainBoxes("domain_boxes", 1); + BoxIdentProcView rangeBoxes("range_boxes", 2); + + auto domainBoxesHost = Kokkos::create_mirror_view(Kokkos::DefaultHostExecutionSpace{}, domainBoxes); + auto rangeBoxesHost = Kokkos::create_mirror_view(Kokkos::DefaultHostExecutionSpace{}, rangeBoxes); + + int myrank = stk::parallel_machine_rank(stk::parallel_machine_world());; + + if (myrank == 0) + { + domainBoxesHost(0) = BoxIdentProcType{BoxType(0, 0, 0, 0.5, 0.5, 1), IdentProcType(0, myrank)}; + + rangeBoxesHost(0) = BoxIdentProcType{BoxType(0, 0, 0, 0.4, 0.4, 1), IdentProcType(0, myrank)}; + rangeBoxesHost(1) = BoxIdentProcType{BoxType(0.3, 0.4, 0, 0.6, 0.6, 1), IdentProcType(1, myrank)}; + } else if (myrank == 1) + { + domainBoxesHost(0) = BoxIdentProcType{BoxType(0.5, 0, 0, 1.0, 0.5, 1), IdentProcType(0, myrank)}; + + rangeBoxesHost(0) = BoxIdentProcType{BoxType(0.6, 0.0, 0, 1.0, 0.4, 1), IdentProcType(0, myrank)}; + rangeBoxesHost(1) = BoxIdentProcType{BoxType(0.2, 0.4, 0, 0.6, 0.6, 1), IdentProcType(1, myrank)}; + } else if (myrank == 2) + { + domainBoxesHost(0) = BoxIdentProcType{BoxType(0.0, 0.5, 0, 0.5, 1.0, 1), IdentProcType(0, myrank)}; + + rangeBoxesHost(0) = BoxIdentProcType{BoxType(0.0, 0.6, 0, 0.4, 1.0, 1), IdentProcType(0, myrank)}; + rangeBoxesHost(1) = BoxIdentProcType{BoxType(0.4, 0.4, 0, 0.7, 0.6, 1), IdentProcType(1, myrank)}; + } else + { + domainBoxesHost(0) = BoxIdentProcType{BoxType(0.5, 0.5, 0, 1.0, 1.0, 1), IdentProcType(0, myrank)}; + + rangeBoxesHost(0) = BoxIdentProcType{BoxType(0.6, 0.6, 0, 1.0, 1.0, 1), IdentProcType(0, myrank)}; + rangeBoxesHost(1) = BoxIdentProcType{BoxType(0.4, 0.4, 0, 0.8, 0.6, 1), IdentProcType(1, myrank)}; + } + + Kokkos::deep_copy(domainBoxes, domainBoxesHost); + Kokkos::deep_copy(rangeBoxes, rangeBoxesHost); + + auto [extendedRange, remoteIdentProcs] = stk::search::morton_extend_local_range_with_remote_boxes_that_might_intersect( + domainBoxes, rangeBoxes, ExecutionSpace{}, stk::parallel_machine_world()); + + EXPECT_EQ(extendedRange.extent(0), 5U); + EXPECT_EQ(remoteIdentProcs.extent(0), 3U); + + auto extendedRangeHost = Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace{}, extendedRange); + auto remoteIdentProcsHost = Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace{}, remoteIdentProcs); + + EXPECT_EQ(extendedRangeHost(0), rangeBoxesHost(0).box); + EXPECT_EQ(extendedRangeHost(1), rangeBoxesHost(1).box); + if (myrank == 0) + { + EXPECT_EQ(extendedRangeHost(2), BoxType(0.2, 0.4, 0, 0.6, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(0), IdentProcType(1, 1)); + + EXPECT_EQ(extendedRangeHost(3), BoxType(0.4, 0.4, 0, 0.7, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(1), IdentProcType(1, 2)); + + EXPECT_EQ(extendedRangeHost(4), BoxType(0.4, 0.4, 0, 0.8, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(2), IdentProcType(1, 3)); + } else if (myrank == 1) + { + EXPECT_EQ(extendedRangeHost(2), BoxType(0.3, 0.4, 0, 0.6, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(0), IdentProcType(1, 0)); + + EXPECT_EQ(extendedRangeHost(3), BoxType(0.4, 0.4, 0, 0.7, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(1), IdentProcType(1, 2)); + + EXPECT_EQ(extendedRangeHost(4), BoxType(0.4, 0.4, 0, 0.8, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(2), IdentProcType(1, 3)); + } else if (myrank == 2) + { + EXPECT_EQ(extendedRangeHost(2), BoxType(0.3, 0.4, 0, 0.6, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(0), IdentProcType(1, 0)); + + EXPECT_EQ(extendedRangeHost(3), BoxType(0.2, 0.4, 0, 0.6, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(1), IdentProcType(1, 1)); + + EXPECT_EQ(extendedRangeHost(4), BoxType(0.4, 0.4, 0, 0.8, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(2), IdentProcType(1, 3)); + } else + { + EXPECT_EQ(extendedRangeHost(2), BoxType(0.3, 0.4, 0, 0.6, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(0), IdentProcType(1, 0)); + + EXPECT_EQ(extendedRangeHost(3), BoxType(0.2, 0.4, 0, 0.6, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(1), IdentProcType(1, 1)); + + EXPECT_EQ(extendedRangeHost(4), BoxType(0.4, 0.4, 0, 0.7, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(2), IdentProcType(1, 2)); + } +} + +TEST(ParallelConsistencyUtils, ExtendRangeWithRemoteBoxes4ProcsHostSpace) +{ + if (stk::parallel_machine_size(stk::parallel_machine_world()) != 4) + { + GTEST_SKIP(); + } + + BoxIdentProcViewHost domainBoxesHost("domain_boxes", 1); + BoxIdentProcViewHost rangeBoxesHost("range_boxes", 2); + + int myrank = stk::parallel_machine_rank(stk::parallel_machine_world());; + + if (myrank == 0) + { + domainBoxesHost(0) = BoxIdentProcType{BoxType(0, 0, 0, 0.5, 0.5, 1), IdentProcType(0, myrank)}; + + rangeBoxesHost(0) = BoxIdentProcType{BoxType(0, 0, 0, 0.4, 0.4, 1), IdentProcType(0, myrank)}; + rangeBoxesHost(1) = BoxIdentProcType{BoxType(0.3, 0.4, 0, 0.6, 0.6, 1), IdentProcType(1, myrank)}; + } else if (myrank == 1) + { + domainBoxesHost(0) = BoxIdentProcType{BoxType(0.5, 0, 0, 1.0, 0.5, 1), IdentProcType(0, myrank)}; + + rangeBoxesHost(0) = BoxIdentProcType{BoxType(0.6, 0.0, 0, 1.0, 0.4, 1), IdentProcType(0, myrank)}; + rangeBoxesHost(1) = BoxIdentProcType{BoxType(0.2, 0.4, 0, 0.6, 0.6, 1), IdentProcType(1, myrank)}; + } else if (myrank == 2) + { + domainBoxesHost(0) = BoxIdentProcType{BoxType(0.0, 0.5, 0, 0.5, 1.0, 1), IdentProcType(0, myrank)}; + + rangeBoxesHost(0) = BoxIdentProcType{BoxType(0.0, 0.6, 0, 0.4, 1.0, 1), IdentProcType(0, myrank)}; + rangeBoxesHost(1) = BoxIdentProcType{BoxType(0.4, 0.4, 0, 0.7, 0.6, 1), IdentProcType(1, myrank)}; + } else + { + domainBoxesHost(0) = BoxIdentProcType{BoxType(0.5, 0.5, 0, 1.0, 1.0, 1), IdentProcType(0, myrank)}; + + rangeBoxesHost(0) = BoxIdentProcType{BoxType(0.6, 0.6, 0, 1.0, 1.0, 1), IdentProcType(0, myrank)}; + rangeBoxesHost(1) = BoxIdentProcType{BoxType(0.4, 0.4, 0, 0.8, 0.6, 1), IdentProcType(1, myrank)}; + } + + auto [extendedRangeHost, remoteIdentProcsHost] = stk::search::morton_extend_local_range_with_remote_boxes_that_might_intersect( + domainBoxesHost, rangeBoxesHost, HostExecutionSpace{}, stk::parallel_machine_world()); + + EXPECT_EQ(extendedRangeHost.extent(0), 5U); + EXPECT_EQ(remoteIdentProcsHost.extent(0), 3U); + + EXPECT_EQ(extendedRangeHost(0), rangeBoxesHost(0).box); + EXPECT_EQ(extendedRangeHost(1), rangeBoxesHost(1).box); + if (myrank == 0) + { + EXPECT_EQ(extendedRangeHost(2), BoxType(0.2, 0.4, 0, 0.6, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(0), IdentProcType(1, 1)); + + EXPECT_EQ(extendedRangeHost(3), BoxType(0.4, 0.4, 0, 0.7, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(1), IdentProcType(1, 2)); + + EXPECT_EQ(extendedRangeHost(4), BoxType(0.4, 0.4, 0, 0.8, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(2), IdentProcType(1, 3)); + } else if (myrank == 1) + { + EXPECT_EQ(extendedRangeHost(2), BoxType(0.3, 0.4, 0, 0.6, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(0), IdentProcType(1, 0)); + + EXPECT_EQ(extendedRangeHost(3), BoxType(0.4, 0.4, 0, 0.7, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(1), IdentProcType(1, 2)); + + EXPECT_EQ(extendedRangeHost(4), BoxType(0.4, 0.4, 0, 0.8, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(2), IdentProcType(1, 3)); + } else if (myrank == 2) + { + EXPECT_EQ(extendedRangeHost(2), BoxType(0.3, 0.4, 0, 0.6, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(0), IdentProcType(1, 0)); + + EXPECT_EQ(extendedRangeHost(3), BoxType(0.2, 0.4, 0, 0.6, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(1), IdentProcType(1, 1)); + + EXPECT_EQ(extendedRangeHost(4), BoxType(0.4, 0.4, 0, 0.8, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(2), IdentProcType(1, 3)); + } else + { + EXPECT_EQ(extendedRangeHost(2), BoxType(0.3, 0.4, 0, 0.6, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(0), IdentProcType(1, 0)); + + EXPECT_EQ(extendedRangeHost(3), BoxType(0.2, 0.4, 0, 0.6, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(1), IdentProcType(1, 1)); + + EXPECT_EQ(extendedRangeHost(4), BoxType(0.4, 0.4, 0, 0.7, 0.6, 1)); + EXPECT_EQ(remoteIdentProcsHost(2), IdentProcType(1, 2)); + } +} diff --git a/packages/stk/stk_unit_tests/stk_search_util/UnitTestSearchWithPeriodicBC.cpp b/packages/stk/stk_unit_tests/stk_search_util/UnitTestSearchWithPeriodicBC.cpp index 0389873f4b90..d3b733120cfa 100644 --- a/packages/stk/stk_unit_tests/stk_search_util/UnitTestSearchWithPeriodicBC.cpp +++ b/packages/stk/stk_unit_tests/stk_search_util/UnitTestSearchWithPeriodicBC.cpp @@ -41,7 +41,7 @@ #include #include -typedef stk::mesh::fixtures::simple_fields::HexFixture::CoordFieldType CoordFieldType; +typedef stk::mesh::fixtures::HexFixture::CoordFieldType CoordFieldType; typedef stk::mesh::GetCoordinates CoordinateFunctor; typedef stk::mesh::PeriodicBoundarySearch PeriodicSearch; @@ -255,7 +255,7 @@ void check_gold_three_way_multiperiodic( const SearchPairVector & search_results } void check_single_periodic_assembly(const stk::mesh::BulkData & bulk_data, - const stk::mesh::fixtures::simple_fields::HexFixture & fixture, + const stk::mesh::fixtures::HexFixture & fixture, const stk::mesh::Field & volField, unsigned x, unsigned y, @@ -367,7 +367,7 @@ TEST(CoarseSearch, PeriodicBC) { const unsigned x = 3, y = 3, z = 3; - stk::mesh::fixtures::simple_fields::HexFixture fixture(MPI_COMM_WORLD, x, y, z); + stk::mesh::fixtures::HexFixture fixture(MPI_COMM_WORLD, x, y, z); stk::mesh::BulkData & bulk_data = fixture.m_bulk_data; stk::mesh::MetaData & meta_data = fixture.m_meta; @@ -431,7 +431,7 @@ TEST(CoarseSearch, PeriodicBC) void assign_to_parts_for_two_way(const unsigned x, const unsigned y, const unsigned z, - stk::mesh::fixtures::simple_fields::HexFixture &fixture, + stk::mesh::fixtures::HexFixture &fixture, stk::mesh::BulkData &bulk_data, stk::mesh::PartVector &side_0_parts, stk::mesh::PartVector &side_1_parts, @@ -475,7 +475,7 @@ TEST(CoarseSearch, TwoWayMultiPeriodicBC) const unsigned x = 3, y = 3, z = 3; - stk::mesh::fixtures::simple_fields::HexFixture fixture(MPI_COMM_WORLD, x, y, z); + stk::mesh::fixtures::HexFixture fixture(MPI_COMM_WORLD, x, y, z); stk::mesh::BulkData & bulk_data = fixture.m_bulk_data; stk::mesh::MetaData & meta_data = fixture.m_meta; @@ -560,7 +560,7 @@ TEST(CoarseSearch, TwoWayMultiPeriodicBC) } void assign_to_parts_for_three_way(const unsigned x, const unsigned y, const unsigned z, - stk::mesh::fixtures::simple_fields::HexFixture &fixture, + stk::mesh::fixtures::HexFixture &fixture, stk::mesh::BulkData &bulk_data, stk::mesh::PartVector &side_0_parts, stk::mesh::PartVector &side_1_parts, @@ -592,7 +592,7 @@ TEST(CoarseSearch, ThreeWayMultiPeriodicBC) { const unsigned x = 3, y = 3, z = 3; - stk::mesh::fixtures::simple_fields::HexFixture fixture(MPI_COMM_WORLD, x, y, z); + stk::mesh::fixtures::HexFixture fixture(MPI_COMM_WORLD, x, y, z); stk::mesh::BulkData & bulk_data = fixture.m_bulk_data; stk::mesh::MetaData & meta_data = fixture.m_meta; @@ -677,7 +677,7 @@ TEST(CoarseSearch, MultiPeriodicBCDisallowRotational) { const unsigned x = 3, y = 3, z = 3; - stk::mesh::fixtures::simple_fields::HexFixture fixture(MPI_COMM_WORLD, x, y, z); + stk::mesh::fixtures::HexFixture fixture(MPI_COMM_WORLD, x, y, z); stk::mesh::BulkData & bulk_data = fixture.m_bulk_data; stk::mesh::MetaData & meta_data = fixture.m_meta; @@ -734,7 +734,7 @@ TEST(CoarseSearch, RotationalPeriodicBC) { const unsigned x = 3, y = 3, z = 3; - stk::mesh::fixtures::simple_fields::HexFixture fixture(MPI_COMM_WORLD, x, y, z); + stk::mesh::fixtures::HexFixture fixture(MPI_COMM_WORLD, x, y, z); stk::mesh::BulkData & bulk_data = fixture.m_bulk_data; stk::mesh::MetaData & meta_data = fixture.m_meta; @@ -751,7 +751,7 @@ TEST(CoarseSearch, RotationalPeriodicBC) const double rotationAngle = -TWO_PI/4.0; const double rotationAxis[3] = {0.0, 0.0, 1.0}; const double axisLocation[3] = {0.0, 0.0, 0.0}; - stk::mesh::fixtures::simple_fields::CylindricalCoordinateMapping coordMap(1.0, rotationAngle, 4); + stk::mesh::fixtures::CylindricalCoordinateMapping coordMap(1.0, rotationAngle, 4); fixture.generate_mesh(coordMap); stk::mesh::PartVector independent_parts(1,&side_0); @@ -812,7 +812,7 @@ TEST(CoarseSearch, OffsetRotationalPeriodicBC) { const unsigned x = 3, y = 3, z = 3; - stk::mesh::fixtures::simple_fields::HexFixture fixture(MPI_COMM_WORLD, x, y, z); + stk::mesh::fixtures::HexFixture fixture(MPI_COMM_WORLD, x, y, z); stk::mesh::BulkData & bulk_data = fixture.m_bulk_data; stk::mesh::MetaData & meta_data = fixture.m_meta; diff --git a/packages/stk/stk_unit_tests/stk_simd/UnitTestPrintSimdInfo.cpp b/packages/stk/stk_unit_tests/stk_simd/UnitTestPrintSimdInfo.cpp index 000f845bfc5d..cd65bafadf1e 100644 --- a/packages/stk/stk_unit_tests/stk_simd/UnitTestPrintSimdInfo.cpp +++ b/packages/stk/stk_unit_tests/stk_simd/UnitTestPrintSimdInfo.cpp @@ -46,11 +46,11 @@ TEST( PrintSimdInfo, printTypes ) { std::cout << "Datatype stored by stk::simd::Float is "; stk::simd::Float f; - stk::unit_test_util::simple_fields::print_type(f._data); + stk::unit_test_util::print_type(f._data); std::cout << "Datatype stored by stk::simd::Double is "; stk::simd::Double d; - stk::unit_test_util::simple_fields::print_type(d._data); + stk::unit_test_util::print_type(d._data); } #ifdef STK_ENABLE_GPU diff --git a/packages/stk/stk_unit_tests/stk_tools/blockIdQuery.cpp b/packages/stk/stk_unit_tests/stk_tools/blockIdQuery.cpp index 41df2416e9bb..7602fc64169b 100644 --- a/packages/stk/stk_unit_tests/stk_tools/blockIdQuery.cpp +++ b/packages/stk/stk_unit_tests/stk_tools/blockIdQuery.cpp @@ -7,17 +7,17 @@ namespace { -class StkToolsB : public stk::unit_test_util::simple_fields::MeshFixture {}; +class StkToolsB : public stk::unit_test_util::MeshFixture {}; TEST_F(StkToolsB, GetBlockIdsForSpecifiedSideset) { const std::string unNamed = "mesh not specified"; - const std::string meshName = stk::unit_test_util::simple_fields::get_option("-i", unNamed); + const std::string meshName = stk::unit_test_util::get_option("-i", unNamed); STK_ThrowRequireMsg(meshName!=unNamed, "Please specify mesh with -i option."); setup_mesh(meshName, stk::mesh::BulkData::NO_AUTO_AURA); int invalidSideset = -1; - int sideset = stk::unit_test_util::simple_fields::get_command_line_option("-s", invalidSideset); + int sideset = stk::unit_test_util::get_command_line_option("-s", invalidSideset); STK_ThrowRequireMsg(sideset!=invalidSideset, "Please specify sideset with -s."); std::string sidesetName = "surface_" + std::to_string(sideset); diff --git a/packages/stk/stk_unit_tests/stk_tools/block_extractor/UnitTestExtractBlocks.cpp b/packages/stk/stk_unit_tests/stk_tools/block_extractor/UnitTestExtractBlocks.cpp index 2d2ef4b10735..960032e80e7f 100644 --- a/packages/stk/stk_unit_tests/stk_tools/block_extractor/UnitTestExtractBlocks.cpp +++ b/packages/stk/stk_unit_tests/stk_tools/block_extractor/UnitTestExtractBlocks.cpp @@ -43,7 +43,7 @@ namespace { using stk::unit_test_util::build_mesh; -class MeshWithTwoBlocks : public stk::unit_test_util::simple_fields::MeshFixture +class MeshWithTwoBlocks : public stk::unit_test_util::MeshFixture { protected: void switch_half_mesh_to_part(stk::mesh::Part &addPart, stk::mesh::Part &removePart) @@ -171,7 +171,7 @@ TEST_F(MeshWithTwoBlocks, getOneBlockAndOneNodeset) } -class MeshWithOneBlock : public stk::unit_test_util::simple_fields::MeshFixture +class MeshWithOneBlock : public stk::unit_test_util::MeshFixture { }; diff --git a/packages/stk/stk_unit_tests/stk_tools/createMesh.cpp b/packages/stk/stk_unit_tests/stk_tools/createMesh.cpp index 59c5b84fa9b6..d6ce3405ffa8 100644 --- a/packages/stk/stk_unit_tests/stk_tools/createMesh.cpp +++ b/packages/stk/stk_unit_tests/stk_tools/createMesh.cpp @@ -11,7 +11,7 @@ namespace { -class StkToolsA : public stk::unit_test_util::simple_fields::MeshFixture +class StkToolsA : public stk::unit_test_util::MeshFixture { }; @@ -19,7 +19,7 @@ class StkToolsA : public stk::unit_test_util::simple_fields::MeshFixture TEST_F(StkToolsA, WriteTextMeshDescFromExodusFile) { const std::string unNamed = "mesh not specified"; - const std::string meshName = stk::unit_test_util::simple_fields::get_option("-i", unNamed); + const std::string meshName = stk::unit_test_util::get_option("-i", unNamed); STK_ThrowRequireMsg(meshName!=unNamed, "Please specify mesh with -i option."); setup_mesh(meshName, stk::mesh::BulkData::NO_AUTO_AURA); diff --git a/packages/stk/stk_unit_tests/stk_tools/elementExtractor.cpp b/packages/stk/stk_unit_tests/stk_tools/elementExtractor.cpp index afb8b5e85e86..ddf3ac50d5f7 100644 --- a/packages/stk/stk_unit_tests/stk_tools/elementExtractor.cpp +++ b/packages/stk/stk_unit_tests/stk_tools/elementExtractor.cpp @@ -8,18 +8,18 @@ namespace { -class StkToolsC : public stk::unit_test_util::simple_fields::MeshFixture +class StkToolsC : public stk::unit_test_util::MeshFixture {}; TEST_F(StkToolsC, DeleteMeshExceptSpecifiedElems) { const std::string unNamed = "mesh not specified"; - const std::string meshName = stk::unit_test_util::simple_fields::get_option("-i", unNamed); + const std::string meshName = stk::unit_test_util::get_option("-i", unNamed); STK_ThrowRequireMsg(meshName!=unNamed, "Please specify mesh with -i option."); setup_mesh(meshName, stk::mesh::BulkData::NO_AUTO_AURA); std::string invalidElemId = "-1"; - std::string inputElemIds = stk::unit_test_util::simple_fields::get_command_line_option("-e", invalidElemId); + std::string inputElemIds = stk::unit_test_util::get_command_line_option("-e", invalidElemId); STK_ThrowRequireMsg(inputElemIds != invalidElemId, "Please specify element list with -e."); std::set elemIdsToKeep; @@ -86,18 +86,18 @@ void stk_determine_centroid(const unsigned spatial_dim, stk::mesh::Entity elemen TEST_F(StkToolsC, DeleteMeshExceptWithinBoundingBox) { const std::string unNamed = "mesh not specified"; - const std::string inputMeshName = stk::unit_test_util::simple_fields::get_option("-i", unNamed); + const std::string inputMeshName = stk::unit_test_util::get_option("-i", unNamed); STK_ThrowRequireMsg(inputMeshName!=unNamed, "Please specify mesh with -i option."); setup_mesh(inputMeshName, stk::mesh::BulkData::NO_AUTO_AURA); - const std::string outputMeshName = stk::unit_test_util::simple_fields::get_option("-o", "modified.g"); + const std::string outputMeshName = stk::unit_test_util::get_option("-o", "modified.g"); - double xLo = stk::unit_test_util::simple_fields::get_command_line_option("-x", std::numeric_limits::lowest()); - double xHi = stk::unit_test_util::simple_fields::get_command_line_option("-X", std::numeric_limits::max()); - double yLo = stk::unit_test_util::simple_fields::get_command_line_option("-y", std::numeric_limits::lowest()); - double yHi = stk::unit_test_util::simple_fields::get_command_line_option("-Y", std::numeric_limits::max()); - double zLo = stk::unit_test_util::simple_fields::get_command_line_option("-z", std::numeric_limits::lowest()); - double zHi = stk::unit_test_util::simple_fields::get_command_line_option("-Z", std::numeric_limits::max()); + double xLo = stk::unit_test_util::get_command_line_option("-x", std::numeric_limits::lowest()); + double xHi = stk::unit_test_util::get_command_line_option("-X", std::numeric_limits::max()); + double yLo = stk::unit_test_util::get_command_line_option("-y", std::numeric_limits::lowest()); + double yHi = stk::unit_test_util::get_command_line_option("-Y", std::numeric_limits::max()); + double zLo = stk::unit_test_util::get_command_line_option("-z", std::numeric_limits::lowest()); + double zHi = stk::unit_test_util::get_command_line_option("-Z", std::numeric_limits::max()); const stk::mesh::BucketVector &buckets = get_bulk().get_buckets(stk::topology::ELEM_RANK, get_meta().locally_owned_part()); const stk::mesh::FieldBase * coordinates = get_meta().coordinate_field(); @@ -126,12 +126,12 @@ TEST_F(StkToolsC, DeleteMeshExceptWithinBoundingBox) TEST_F(StkToolsC, FlipElementConnectivity) { const std::string unNamed = "mesh not specified"; - const std::string meshName = stk::unit_test_util::simple_fields::get_option("-i", unNamed); + const std::string meshName = stk::unit_test_util::get_option("-i", unNamed); STK_ThrowRequireMsg(meshName!=unNamed, "Please specify mesh with -i option."); setup_mesh(meshName, stk::mesh::BulkData::NO_AUTO_AURA); int invalidBlockId = -1; - int inputBlockId = stk::unit_test_util::simple_fields::get_command_line_option("-b", invalidBlockId); + int inputBlockId = stk::unit_test_util::get_command_line_option("-b", invalidBlockId); STK_ThrowRequireMsg(inputBlockId!=invalidBlockId, "Please specify block with -b."); std::ostringstream os; diff --git a/packages/stk/stk_unit_tests/stk_tools/mesh_clone/UnitTestBulkDataClone.cpp b/packages/stk/stk_unit_tests/stk_tools/mesh_clone/UnitTestBulkDataClone.cpp index 9289bad0b015..cb378f0abe1c 100644 --- a/packages/stk/stk_unit_tests/stk_tools/mesh_clone/UnitTestBulkDataClone.cpp +++ b/packages/stk/stk_unit_tests/stk_tools/mesh_clone/UnitTestBulkDataClone.cpp @@ -239,14 +239,14 @@ void expect_superset_sharing(const stk::mesh::BulkData& oldBulk, stk::mesh::Enti ); } -class CloningMesh : public stk::unit_test_util::simple_fields::MeshFixture +class CloningMesh : public stk::unit_test_util::MeshFixture { protected: const char *get_mesh_spec_for_1x1x8_with_sideset() const {return "generated:1x1x8|sideset:x";} stk::mesh::BulkData::AutomaticAuraOption get_no_auto_aura_option() const {return stk::mesh::BulkData::NO_AUTO_AURA;} CloningMesh() - : stk::unit_test_util::simple_fields::MeshFixture(3, {"node", "edge", "face", "element", "constraint"}) + : stk::unit_test_util::MeshFixture(3, {"node", "edge", "face", "element", "constraint"}) { } void create_constraints() diff --git a/packages/stk/stk_unit_tests/stk_tools/mesh_clone/UnitTestCloneIntoCommSelf.cpp b/packages/stk/stk_unit_tests/stk_tools/mesh_clone/UnitTestCloneIntoCommSelf.cpp index 3787818a3334..95a730370c33 100644 --- a/packages/stk/stk_unit_tests/stk_tools/mesh_clone/UnitTestCloneIntoCommSelf.cpp +++ b/packages/stk/stk_unit_tests/stk_tools/mesh_clone/UnitTestCloneIntoCommSelf.cpp @@ -74,7 +74,6 @@ void testSubMesh(stk::unit_test_util::BulkDataTester &oldBulkData, stk::mesh::Se { const stk::mesh::MetaData &oldMetaData = oldBulkData.mesh_meta_data(); stk::mesh::MetaData newMetaData; - newMetaData.use_simple_fields(); stk::unit_test_util::BulkDataTester newBulkData(newMetaData, MPI_COMM_SELF); stk::tools::copy_mesh(oldBulkData, select, newBulkData); @@ -131,7 +130,6 @@ TEST(CloningParallelMesh, destinationHasMpiCommSelf_destinationHasAuraEntities) std::string exodusFileName = "generated:1x1x4|sideset:xXyYzZ|nodeset:xXyYzZ"; const int spatialDim = 3; stk::mesh::MetaData stkMeshMetaData(spatialDim); - stkMeshMetaData.use_simple_fields(); stk::unit_test_util::BulkDataTester stkMeshBulkData(stkMeshMetaData, comm); stk::io::StkMeshIoBroker exodusFileReader(comm); diff --git a/packages/stk/stk_unit_tests/stk_tools/mesh_clone/UnitTestMeshClone.cpp b/packages/stk/stk_unit_tests/stk_tools/mesh_clone/UnitTestMeshClone.cpp index 7a07b903aace..65549c860105 100644 --- a/packages/stk/stk_unit_tests/stk_tools/mesh_clone/UnitTestMeshClone.cpp +++ b/packages/stk/stk_unit_tests/stk_tools/mesh_clone/UnitTestMeshClone.cpp @@ -264,7 +264,7 @@ void expect_equal_entity_counts(stk::mesh::BulkData& oldBulk, stk::mesh::BulkDat EXPECT_EQ(oldCount, newCount); } -class MeshClone : public stk::unit_test_util::simple_fields::MeshFixture +class MeshClone : public stk::unit_test_util::MeshFixture { public: MeshClone() @@ -368,7 +368,6 @@ TEST_F(MeshClone, copyOnlyMeta) initialize_mesh_with_parts_and_fields(); stk::mesh::MetaData newMeta; - newMeta.use_simple_fields(); stk::tools::copy_meta_with_io_attributes(get_meta(), newMeta); expect_equal_meta_datas(get_meta(), newMeta); @@ -450,7 +449,6 @@ TEST_F(MeshClone, copyMeshWithOrphanNodesOnOwnedSide) TEST(MetaDataSize, sizeChanges_needToUpdateCopyMesh) { stk::mesh::MetaData meta; - meta.use_simple_fields(); EXPECT_GE(632u, sizeof(meta)) << "Size of MetaData changed. Does mesh copying capability need to be updated?"; } #endif diff --git a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/DisconnectBlocksMeshConstruction.cpp b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/DisconnectBlocksMeshConstruction.cpp index 1fbc11ee682e..ed5756880b08 100644 --- a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/DisconnectBlocksMeshConstruction.cpp +++ b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/DisconnectBlocksMeshConstruction.cpp @@ -116,7 +116,7 @@ stk::mesh::PartVector setup_mesh_1block_1quad(stk::mesh::BulkData& bulk) stk::mesh::Part & block1 = create_part(bulk.mesh_meta_data(), stk::topology::QUAD_4_2D, "block_1", 1); std::string meshDesc = "0,1,QUAD_4_2D,1,2,4,3,block_1"; std::vector coordinates = { 0,0, 1,0, 0,1, 1,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -127,7 +127,7 @@ stk::mesh::PartVector setup_mesh_1block_2quad(stk::mesh::BulkData& bulk) std::string meshDesc = "0,1,QUAD_4_2D,1,2,4,3,block_1\n" "0,2,QUAD_4_2D,2,5,6,4,block_1"; std::vector coordinates = { 0,0, 1,0, 0,1, 1,1, 2,0, 2,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -138,7 +138,7 @@ stk::mesh::PartVector setup_mesh_1block_2quad_1node_hinge(stk::mesh::BulkData& b std::string meshDesc = "0,1,QUAD_4_2D,1,2,4,3,block_1\n" "0,2,QUAD_4_2D,7,5,6,4,block_1"; std::vector coordinates = { 0,0, (1-EPS),0, 0,1, 1,1, 2,0, 2,1, (1+EPS),0 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -150,7 +150,7 @@ stk::mesh::PartVector setup_mesh_2block_2quad_1node_hinge(stk::mesh::BulkData& b std::string meshDesc = "0,1,QUAD_4_2D,1,2,4,3,block_1\n" "0,2,QUAD_4_2D,7,5,6,4,block_2"; std::vector coordinates = { 0,0, (1-EPS),0, 0,1, 1,1, 2,0, 2,1, (1+EPS),0 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2}; } @@ -161,7 +161,7 @@ stk::mesh::PartVector setup_mesh_1block_2quad_2hinge(stk::mesh::BulkData& bulk) std::string meshDesc = "0,1,QUAD_4_2D,3,4,1,2,block_1\n" "0,2,QUAD_4_2D,2,6,4,5,block_1"; std::vector coordinates = { 0,2, 2,1, 1,2, 2,3, 3,2, 4,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -173,7 +173,7 @@ stk::mesh::PartVector setup_mesh_2block_2quad_2hinge(stk::mesh::BulkData& bulk) std::string meshDesc = "0,1,QUAD_4_2D,3,4,1,2,block_1\n" "0,2,QUAD_4_2D,2,6,4,5,block_2"; std::vector coordinates = { 0,2, 2,1, 1,2, 2,3, 3,2, 4,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2}; } @@ -185,7 +185,7 @@ stk::mesh::PartVector setup_mesh_1block_3quad_1hinge(stk::mesh::BulkData& bulk) "0,2,QUAD_4_2D,2,5,6,4,block_1\n" "0,3,QUAD_4_2D,4,7,8,9,block_1"; std::vector coordinates = { 0,0, 1,0, 0,1, 1,1, 2,0, 2,(1-EPS), 2,(1+EPS), 2,2, 1,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -197,7 +197,7 @@ stk::mesh::PartVector setup_mesh_1block_3quad_1hinge_linear_stack(stk::mesh::Bul "0,2,QUAD_4_2D,2,5,6,4,block_1\n" "0,3,QUAD_4_2D,7,8,9,6,block_1"; std::vector coordinates = { 0,0, 1,0, 0,1, 1,1, (2-EPS),0, 2,1, (2+EPS),0, 3,0, 3,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -211,7 +211,7 @@ stk::mesh::PartVector setup_mesh_3block_3quad_1hinge_linear_stack(stk::mesh::Bul "0,2,QUAD_4_2D,2,5,6,4,block_2\n" "0,3,QUAD_4_2D,7,8,9,6,block_3"; std::vector coordinates = { 0,0, 1,0, 0,1, 1,1, (2-EPS),0, 2,1, (2+EPS),0, 3,0, 3,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3}; } @@ -225,7 +225,7 @@ stk::mesh::PartVector setup_mesh_1block_4quad_bowtie_1hinge(stk::mesh::BulkData& "0,4,QUAD_4_2D,4,11,12,13,block_1"; std::vector coordinates = { 0,0, (1-EPS),0, 0,(1-EPS), 1,1, (1+EPS),0, 2,0, 2,(1-EPS), 2,(1+EPS), 2,2, (1+EPS),2, (1-EPS),2, 0,2, 0,(1+EPS) }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -238,7 +238,7 @@ stk::mesh::PartVector setup_mesh_1block_4quad_2hinge(stk::mesh::BulkData& bulk) "0,3,QUAD_4_2D,6,7,8,9,block_1\n" "0,4,QUAD_4_2D,3,9,8,10,block_1"; std::vector coordinates = { 0,0, 1,0, 0,1, 1,(1-EPS), 2,0, 2,1, 2,2, 1,2, 1,(1+EPS), 0,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -254,7 +254,7 @@ stk::mesh::PartVector setup_mesh_4block_4quad_2hinge(stk::mesh::BulkData& bulk) "0,3,QUAD_4_2D,6,7,8,9,block_3\n" "0,4,QUAD_4_2D,3,9,8,10,block_4"; std::vector coordinates = { 0,0, 1,0, 0,1, 1,(1-EPS), 2,0, 2,1, 2,2, 1,2, 1,(1+EPS), 0,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3,&block4}; } @@ -267,7 +267,7 @@ stk::mesh::PartVector setup_mesh_1block_4quad_4hinge(stk::mesh::BulkData& bulk) "0,3,QUAD_4_2D,6,9,10,8,block_1\n" "0,4,QUAD_4_2D,3,11,10,12,block_1"; std::vector coordinates = { 0,0, 1,0, 0,1, 1,1, 2,0, 2,1, 1,1, 1,1, 2,2, 1,2, 1,1, 0,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -283,7 +283,7 @@ stk::mesh::PartVector setup_mesh_4block_4quad_4hinge(stk::mesh::BulkData& bulk) "0,3,QUAD_4_2D,6,9,10,8,block_3\n" "0,4,QUAD_4_2D,3,11,10,12,block_4"; std::vector coordinates = { 0,0, 1,0, 0,1, 1,1, 2,0, 2,1, 1,1, 1,1, 2,2, 1,2, 1,1, 0,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3,&block4}; } @@ -296,7 +296,7 @@ stk::mesh::PartVector setup_mesh_1block_4quad_pacman(stk::mesh::BulkData& bulk) "0,3,QUAD_4_2D,4,7,8,9,block_1\n" "0,4,QUAD_4_2D,3,4,9,10,block_1"; std::vector coordinates = { 0,0, 1,0, 0,1, 1,1, 2,0, 2,(1-EPS), 2,(1+EPS), 2,2, 1,2, 0,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -312,7 +312,7 @@ stk::mesh::PartVector setup_mesh_4block_4quad_pacman(stk::mesh::BulkData& bulk) "0,3,QUAD_4_2D,4,7,8,9,block_3\n" "0,4,QUAD_4_2D,3,4,9,10,block_4"; std::vector coordinates = { 0,0, 1,0, 0,1, 1,1, 2,0, 2,(1-EPS), 2,(1+EPS), 2,2, 1,2, 0,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3,&block4}; } @@ -325,7 +325,7 @@ stk::mesh::PartVector setup_mesh_1block_4quad_1hinge(stk::mesh::BulkData& bulk) "0,3,QUAD_4_2D,7,8,9,4,block_1\n" "0,4,QUAD_4_2D,4,9,10,11,block_1"; std::vector coordinates = { 0,0, (1-EPS),0, 0,(1-EPS), 1,1, (1+EPS),0, 2,0, 2,1, 2,2, 1,2, 0,2, 0,(1+EPS) }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -341,7 +341,7 @@ stk::mesh::PartVector setup_mesh_4block_4quad_1hinge(stk::mesh::BulkData& bulk) "0,3,QUAD_4_2D,7,8,9,4,block_3\n" "0,4,QUAD_4_2D,4,9,10,11,block_4"; std::vector coordinates = { 0,0, (1-EPS),0, 0,(1-EPS), 1,1, (1+EPS),0, 2,0, 2,1, 2,2, 1,2, 0,2, 0,(1+EPS) }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3,&block4}; } @@ -356,7 +356,7 @@ stk::mesh::PartVector setup_mesh_2block_3quad_2tri_1hinge(stk::mesh::BulkData& b "0,4,QUAD_4_2D,7,8,9,4,block_2\n" "0,5,QUAD_4_2D,4,9,10,11,block_2"; std::vector coordinates = { 0,0, (1-EPS),0, 0,(1-EPS), 1,1, (1+EPS),0, 2,0, 2,1, 2,2, 1,2, 0,2, 0,(1+EPS) }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2}; } @@ -374,7 +374,7 @@ stk::mesh::PartVector setup_mesh_5block_3quad_2tri_1hinge(stk::mesh::BulkData& b "0,4,QUAD_4_2D,7,8,9,4,block_4\n" "0,5,QUAD_4_2D,4,9,10,11,block_5"; std::vector coordinates = { 0,0, (1-EPS),0, 0,(1-EPS), 1,1, (1+EPS),0, 2,0, 2,1, 2,2, 1,2, 0,2, 0,(1+EPS) }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3,&block4,&block5}; } @@ -385,7 +385,7 @@ stk::mesh::PartVector setup_mesh_1block_1hex(stk::mesh::BulkData& bulk) std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8,block_1"; std::vector coordinates = { 0,0,0, 1,0,0, 1,1,0, 0,1,0, 0,0,1, 1,0,1, 1,1,1, 0,1,1}; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -401,7 +401,7 @@ stk::mesh::PartVector setup_mesh_1block_2hex(stk::mesh::BulkData& bulk) 0,0,2, 1,0,2, 1,1,2, 0,1,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -418,7 +418,7 @@ stk::mesh::PartVector setup_mesh_1block_2hex_1node_hinge(stk::mesh::BulkData& bu 1,0,2, 1,1,2, 0,1,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -434,7 +434,7 @@ stk::mesh::PartVector setup_mesh_1block_2hex_2node_hinge(stk::mesh::BulkData& bu 1,1,0, (0.5+EPS),(0.5+EPS),0, 1,EPS,1, 1,1,1, 0,(1+EPS),1, (0.5+EPS),(0.5+EPS),1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -452,7 +452,7 @@ stk::mesh::PartVector setup_mesh_1block_3hex_1node_hinge(stk::mesh::BulkData& bu (1+EPS),0,0, 2,0,0, 2,1,0, (1+EPS),1,0, 2,0,1, 2,1,1, (1+EPS),1,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -467,7 +467,7 @@ stk::mesh::PartVector setup_mesh_1block_2hex_face_test(stk::mesh::BulkData& bulk 2,0,0, 2,1,0, 2,0,1, 2,1,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -495,7 +495,7 @@ stk::mesh::PartVector setup_mesh_1block_8hex_flower_1node_hinge(stk::mesh::BulkD 0,(1+EPS),(1+EPS), (1-EPS),2,(1+EPS), 0,2,(1+EPS), 0,(1+EPS),2, (1-EPS),(1+EPS),2, (1-EPS),2,2, 0,2,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -511,7 +511,7 @@ stk::mesh::PartVector setup_mesh_1block_2tet_1node_hinge(stk::mesh::BulkData& bu 1,1,0, 0,2,0, 0,1,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -526,7 +526,7 @@ stk::mesh::PartVector setup_mesh_1block_2hex_1edge_hinge(stk::mesh::BulkData& bu 2,0,0, 2,0,-1, 1,0,-1, 2,1,0, 2,1,-1, 1,1,-1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -542,7 +542,7 @@ stk::mesh::PartVector setup_mesh_2block_2hex_1edge_hinge(stk::mesh::BulkData& bu 2,0,0, 2,0,-1, 1,0,-1, 2,1,0, 2,1,-1, 1,1,-1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2}; } @@ -559,7 +559,7 @@ stk::mesh::PartVector setup_mesh_1block_3hex_1edge_hinge(stk::mesh::BulkData& bu 0,(1+EPS),0, 1,(1+EPS),0, 1,2,0, 0,2,0, 1,2,1, 0,2,1, }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -578,7 +578,7 @@ stk::mesh::PartVector setup_mesh_3block_3hex_1edge_hinge(stk::mesh::BulkData& bu 0,(1+EPS),0, 1,(1+EPS),0, 1,2,0, 0,2,0, 1,2,1, 0,2,1, }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3}; } @@ -595,7 +595,7 @@ stk::mesh::PartVector setup_mesh_1block_3hex_1node_hinge_1edge_hinge(stk::mesh:: 2,(1+EPS),0, 2,(1+EPS),-1, 1,(1+EPS),-1, 1,2,0, 2,2,0, 2,2,-1, 1,2,-1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -614,7 +614,7 @@ stk::mesh::PartVector setup_mesh_3block_3hex_1node_hinge_1edge_hinge(stk::mesh:: 2,(1+EPS),0, 2,(1+EPS),-1, 1,(1+EPS),-1, 1,2,0, 2,2,0, 2,2,-1, 1,2,-1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3}; } @@ -631,7 +631,7 @@ stk::mesh::PartVector setup_mesh_1block_3hex_1node_hinge_1edge_hinge2(stk::mesh: 1,(1+EPS),0, 2,(1+EPS),-1, 1,(1+EPS),-1, 1,2,0, 2,2,0, 2,2,-1, 1,2,-1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -650,7 +650,7 @@ stk::mesh::PartVector setup_mesh_3block_3hex_1node_hinge_1edge_hinge2(stk::mesh: 1,(1+EPS),0, 2,(1+EPS),-1, 1,(1+EPS),-1, 1,2,0, 2,2,0, 2,2,-1, 1,2,-1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3}; } @@ -667,7 +667,7 @@ stk::mesh::PartVector setup_mesh_1block_3hex_1node_hinge_1edge_hinge3(stk::mesh: 1,(1+EPS),0, 2,(1+EPS),0, 1,(1+EPS),-1, 1,2,0, 2,2,0, 2,2,-1, 1,2,-1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -686,7 +686,7 @@ stk::mesh::PartVector setup_mesh_3block_3hex_1node_hinge_1edge_hinge3(stk::mesh: 1,(1+EPS),0, 2,(1+EPS),0, 1,(1+EPS),-1, 1,2,0, 2,2,0, 2,2,-1, 1,2,-1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3}; } @@ -705,7 +705,7 @@ stk::mesh::PartVector setup_mesh_1block_4hex_bowtie_1edge_hinge(stk::mesh::BulkD 2,2,1, (1+EPS),2,1, (1-EPS),2,1, 0,2,1, 0,(1+EPS),1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -727,7 +727,7 @@ stk::mesh::PartVector setup_mesh_4block_4hex_bowtie_1edge_hinge(stk::mesh::BulkD 2,2,1, (1+EPS),2,1, (1-EPS),2,1, 0,2,1, 0,(1+EPS),1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3,&block4}; } @@ -746,7 +746,7 @@ stk::mesh::PartVector setup_mesh_1block_two_by_two_hex_2edge_hinge(stk::mesh::Bu 2,2,0, 2,2,-1, 1,2,-1, }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -768,7 +768,7 @@ stk::mesh::PartVector setup_mesh_4block_two_by_two_hex_2edge_hinge(stk::mesh::Bu 2,2,0, 2,2,-1, 1,2,-1, }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3,&block4}; } @@ -786,7 +786,7 @@ stk::mesh::PartVector setup_mesh_1block_four_hex_one_edge_one_node_hinge(stk::me 0,2,1, -1,2,1, -1,(1+EPS),1, 0,1,2, 0,2,2, -1,2,2, -1,1,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -807,7 +807,7 @@ stk::mesh::PartVector setup_mesh_4block_four_hex_one_edge_one_node_hinge(stk::me 0,2,1, -1,2,1, -1,(1+EPS),1, 0,1,2, 0,2,2, -1,2,2, -1,1,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3,&block4}; } @@ -826,7 +826,7 @@ stk::mesh::PartVector setup_mesh_1block_four_hex_2node_hinge(stk::mesh::BulkData 1,3,1, 0,3,1, 0,(2+EPS),1, 1,2,2, 1,3,2, 0,3,2, 0,2,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -848,7 +848,7 @@ stk::mesh::PartVector setup_mesh_4block_four_hex_2node_hinge(stk::mesh::BulkData 1,3,1, 0,3,1, 0,(2+EPS),1, 1,2,2, 1,3,2, 0,3,2, 0,2,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3,&block4}; } @@ -868,7 +868,7 @@ stk::mesh::PartVector setup_mesh_1block_four_hex_2node_one_edge_hinge_manual(stk (1-EPS),1,0, (1-EPS),2,0 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -963,7 +963,7 @@ stk::mesh::PartVector setup_mesh_1block_four_hex_2node_one_edge_hinge(stk::mesh: (1-EPS),1,0, (1-EPS),2,0 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -987,7 +987,7 @@ stk::mesh::PartVector setup_mesh_1block_eight_tri_1node_hinge(stk::mesh::BulkDat 2,(2-EPS) }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1}; } @@ -1006,7 +1006,7 @@ stk::mesh::PartVector setup_mesh_4block_4quad_bowtie_1hinge(stk::mesh::BulkData& 0,0, (1-EPS),0, 0,(1-EPS), 1,1, (1+EPS),0, 2,0, 2,(1-EPS), 2,(1+EPS), 2,2, (1+EPS),2, (1-EPS),2, 0,2, 0,(1+EPS) }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3,&block4}; } @@ -1020,7 +1020,7 @@ stk::mesh::PartVector setup_mesh_3block_3quad_1hinge(stk::mesh::BulkData& bulk) "0,2,QUAD_4_2D,2,5,6,4,block_2\n" "0,3,QUAD_4_2D,4,7,8,9,block_3"; std::vector coordinates = { 0,0, 1,0, 0,1, 1,1, 2,0, 2,(1-EPS), 2,(1+EPS), 2,2, 1,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2,&block3}; } @@ -1055,7 +1055,7 @@ void print_hinge_info(const stk::mesh::BulkData& bulk, bool is_debug() { - return stk::unit_test_util::simple_fields::has_option("--debug"); + return stk::unit_test_util::has_option("--debug"); } // Common Decompositions @@ -1425,7 +1425,7 @@ bool check_orphaned_nodes(stk::mesh::BulkData & bulk) void output_mesh(stk::mesh::BulkData & bulk, const std::string & fileName) { - std::string writeOutput = stk::unit_test_util::simple_fields::get_option("--output", "off"); + std::string writeOutput = stk::unit_test_util::get_option("--output", "off"); if (writeOutput == "on") { stk::io::write_mesh(fileName, bulk); } @@ -1439,7 +1439,7 @@ void output_mesh(stk::mesh::BulkData & bulk) int get_debug_level() { - int level = stk::unit_test_util::simple_fields::get_command_line_option("--debug", 0); + int level = stk::unit_test_util::get_command_line_option("--debug", 0); return std::max(level, 0); } @@ -1450,7 +1450,7 @@ stk::mesh::PartVector setup_mesh_2block_1quad(stk::mesh::BulkData& bulk) std::string meshDesc = "0,1,QUAD_4_2D,1,2,4,3,block_1"; std::vector coordinates = { 0,0, 1,0, 0,1, 1,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {&block1, &block2})); EXPECT_EQ(4u, get_num_total_nodes(bulk)); @@ -1465,7 +1465,7 @@ stk::mesh::PartVector setup_mesh_2block_2quad_only_on_proc_0(stk::mesh::BulkData std::string meshDesc = "0,1,QUAD_4_2D,1,2,4,3,block_1\n" "0,2,QUAD_4_2D,2,5,6,4,block_2"; std::vector coordinates = { 0,0, 1,0, 0,1, 1,1, 2,0, 2,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); return {&block1,&block2}; } @@ -1485,7 +1485,7 @@ stk::mesh::PartVector setup_mesh_2block_2quad(stk::mesh::BulkData& bulk) "1,2,QUAD_4_2D,2,3,6,5,block_2"; } std::vector coordinates = { 0,0, 1,0, 2,0, 0,1, 1,1, 2,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ(2u, get_num_intersecting_nodes(bulk, {&block1, &block2})); EXPECT_EQ(6u, get_num_total_nodes(bulk)); @@ -1508,7 +1508,7 @@ stk::mesh::PartVector setup_mesh_2block_2quad_reversed(stk::mesh::BulkData& bulk "1,2,QUAD_4_2D,2,3,6,5,block_1"; } std::vector coordinates = { 0,0, 1,0, 2,0, 0,1, 1,1, 2,1 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ(2u, get_num_intersecting_nodes(bulk, {&block1, &block2})); EXPECT_EQ(6u, get_num_total_nodes(bulk)); @@ -1564,7 +1564,7 @@ stk::mesh::PartVector setup_mesh_2block_4quad_corner(stk::mesh::BulkData& bulk, } std::vector coordinates = { 0,0, 1,0, 2,0, 0,1, 1,1, 2,1, 0,2, 1,2, 2,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ(3u, get_num_intersecting_nodes(bulk, {&block1, &block2})); EXPECT_EQ(9u, get_num_total_nodes(bulk)); @@ -1646,7 +1646,7 @@ stk::mesh::PartVector setup_mesh_2block_4quad_swappedCorner(stk::mesh::BulkData& } std::vector coordinates = { 0,0, 1,0, 2,0, 0,1, 1,1, 2,1, 0,2, 1,2, 2,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ(3u, get_num_intersecting_nodes(bulk, {&block1, &block2})); EXPECT_EQ(9u, get_num_total_nodes(bulk)); @@ -1778,7 +1778,7 @@ void setup_mesh_3block_4quad_base(stk::mesh::BulkData& bulk, stk::mesh::PartVect } std::vector coordinates = { 0,0, 1,0, 2,0, 0,1, 1,1, 2,1, 0,2, 1,2, 2,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ(4u, get_num_intersecting_nodes(bulk, blocks)); EXPECT_EQ(9u, get_num_total_nodes(bulk)); @@ -1835,7 +1835,7 @@ stk::mesh::PartVector setup_mesh_3block_4quad_reverse_ordinal(stk::mesh::BulkDat std::vector coordinates = { 0,0, 1,0, 2,0, 0,1, 1,1, 2,1, 0,2, 1,2, 2,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ(4u, get_num_intersecting_nodes(bulk, {&vl, &radax, &lateral})); EXPECT_EQ(9u, get_num_total_nodes(bulk)); @@ -1906,7 +1906,7 @@ stk::mesh::PartVector setup_mesh_3block_4quad_keepLowerRight(stk::mesh::BulkData } std::vector coordinates = { 0,0, 1,0, 2,0, 0,1, 1,1, 2,1, 0,2, 1,2, 2,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ(4u, get_num_intersecting_nodes(bulk, {&block1, &block2, &block3})); EXPECT_EQ(9u, get_num_total_nodes(bulk)); @@ -1952,7 +1952,7 @@ stk::mesh::PartVector setup_mesh_2block_4quad_checkerboard(stk::mesh::BulkData& } std::vector coordinates = { 0,0, 1,0, 2,0, 0,1, 1,1, 2,1, 0,2, 1,2, 2,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ(5u, get_num_intersecting_nodes(bulk, {&block1, &block2})); EXPECT_EQ(9u, get_num_total_nodes(bulk)); @@ -1999,7 +1999,7 @@ stk::mesh::PartVector setup_mesh_3block_4quad_checkerboard(stk::mesh::BulkData& } std::vector coordinates = { 0,0, 1,0, 2,0, 0,1, 1,1, 2,1, 0,2, 1,2, 2,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ(5u, get_num_intersecting_nodes(bulk, {&block1, &block2, &block3})); EXPECT_EQ(9u, get_num_total_nodes(bulk)); @@ -2023,7 +2023,7 @@ stk::mesh::PartVector setup_mesh_2block_2quad_diagonal(stk::mesh::BulkData& bulk } std::vector coordinates = { 0,0, 1,0, 0,1, 1,1, 2,1, 1,2, 2,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ(1u, get_num_intersecting_nodes(bulk, {&block1, &block2})); EXPECT_EQ(7u, get_num_total_nodes(bulk)); @@ -2042,7 +2042,7 @@ stk::mesh::PartVector setup_mesh_3block_4quad_bowtie(stk::mesh::BulkData& bulk) "0,3,QUAD_4_2D,8,6,11,10,block_3\n" "0,4,QUAD_4_2D,6,9,13,12,block_1"; std::vector coordinates = { 0,0, 0.9,0, 1.1,0, 2,0, 0,0.9, 1,1, 2,0.9, 0,1.1, 2,1.1, 0,2, 0.9,2, 1.1,2, 2,2 }; - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ( 1u, get_num_intersecting_nodes(bulk, {&block1, &block2, &block3})); EXPECT_EQ(13u, get_num_total_nodes(bulk)); @@ -2191,7 +2191,7 @@ stk::mesh::PartVector setup_mesh_4block_4quad(stk::mesh::BulkData& bulk, unsigne break; } - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ(5u, get_num_intersecting_nodes(bulk, {&block1, &block2, &block3, &block4})); EXPECT_EQ(9u, get_num_total_nodes(bulk)); @@ -2235,7 +2235,7 @@ stk::mesh::PartVector setup_mesh_6block_6quad(stk::mesh::BulkData& bulk) break; } - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ(8u, get_num_intersecting_nodes(bulk, {&block1, &block2, &block3, &block4, &block5, &block6})); EXPECT_EQ(12u, get_num_total_nodes(bulk)); @@ -2288,7 +2288,7 @@ stk::mesh::PartVector setup_mesh_9block_9quad(stk::mesh::BulkData& bulk) break; } - stk::unit_test_util::simple_fields::setup_text_mesh(bulk, stk::unit_test_util::simple_fields::get_full_text_mesh_desc(meshDesc, coordinates)); + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); EXPECT_EQ(12u, get_num_intersecting_nodes(bulk, {&block1, &block2, &block3, &block4, &block5, &block6, &block7, &block8, &block9})); EXPECT_EQ(16u, get_num_total_nodes(bulk)); diff --git a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestCustomAura.hpp b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestCustomAura.hpp index cfc6f452d8d2..1c640dcb955e 100644 --- a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestCustomAura.hpp +++ b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestCustomAura.hpp @@ -40,7 +40,7 @@ namespace aura_unit_tests { -class FourQuadShellsInSequenceFixture : public stk::unit_test_util::simple_fields::MeshFixture { +class FourQuadShellsInSequenceFixture : public stk::unit_test_util::MeshFixture { public: FourQuadShellsInSequenceFixture() { reset_mesh(); @@ -81,8 +81,8 @@ class FourQuadShellsInSequenceFixture : public stk::unit_test_util::simple_field "2,3,SHELL_QUAD_4,3,4,9,8,block_1\n" "3,4,SHELL_QUAD_4,4,5,10,9,block_1"; } - stk::unit_test_util::simple_fields::setup_text_mesh( - get_bulk(), stk::unit_test_util::simple_fields::get_full_text_mesh_desc(mesh_description, coordinates)); + stk::unit_test_util::setup_text_mesh( + get_bulk(), stk::unit_test_util::get_full_text_mesh_desc(mesh_description, coordinates)); } void print_local_node_comm(const int rank); diff --git a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestDetectHinge.cpp b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestDetectHinge.cpp index d55321052b75..691aaacbc084 100644 --- a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestDetectHinge.cpp +++ b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestDetectHinge.cpp @@ -781,8 +781,8 @@ TEST(DetectHinge3D, inputFile) { std::shared_ptr bulkPtr = build_mesh(3,MPI_COMM_WORLD); stk::mesh::BulkData& bulk = *bulkPtr; - std::string inputFileName = stk::unit_test_util::simple_fields::get_option("--inputFile", ""); - bool nodesOnly = stk::unit_test_util::simple_fields::has_option("--nodesOnly"); + std::string inputFileName = stk::unit_test_util::get_option("--inputFile", ""); + bool nodesOnly = stk::unit_test_util::has_option("--nodesOnly"); if(!inputFileName.empty()) { double startTime = stk::wall_time(); @@ -792,7 +792,7 @@ TEST(DetectHinge3D, inputFile) stk::tools::impl::HingeEdgeVector hingeEdges; std::string blockList = ""; - std::string inputBlockList = stk::unit_test_util::simple_fields::get_command_line_option("--blockList", blockList); + std::string inputBlockList = stk::unit_test_util::get_command_line_option("--blockList", blockList); std::vector blocksToDetect = stk::split_csv_string(inputBlockList); for (std::string & blockToDetect : blocksToDetect) { @@ -1732,7 +1732,7 @@ TEST(SnipHinge, inputFile) { std::shared_ptr bulkPtr = build_mesh(3,MPI_COMM_WORLD); stk::mesh::BulkData& bulk = *bulkPtr; - std::string inputFileName = stk::unit_test_util::simple_fields::get_option("--inputFile", ""); + std::string inputFileName = stk::unit_test_util::get_option("--inputFile", ""); if(!inputFileName.empty()) { double startTime = stk::wall_time(); stk::io::fill_mesh(inputFileName, bulk); diff --git a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestDisconnectBlocks.cpp b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestDisconnectBlocks.cpp index cc963e4019e5..02b4d526130f 100644 --- a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestDisconnectBlocks.cpp +++ b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestDisconnectBlocks.cpp @@ -28,10 +28,10 @@ using stk::unit_test_util::build_mesh; -class TestDisconnectBlocks2D : public stk::unit_test_util::simple_fields::MeshFixture +class TestDisconnectBlocks2D : public stk::unit_test_util::MeshFixture { protected: - TestDisconnectBlocks2D() : stk::unit_test_util::simple_fields::MeshFixture(2) + TestDisconnectBlocks2D() : stk::unit_test_util::MeshFixture(2) { setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); } @@ -58,8 +58,7 @@ TEST_F(TestDisconnectBlocks2D, disconnect_user_block_1block_1quad_empty_part) stk::tools::BlockPairVector blocksToDisconnect; blocksToDisconnect.emplace_back(blocks[0], &block2); - stk::tools::DisconnectBlocksOption option(stk::tools::DISCONNECT_LOCAL, stk::tools::SNIP_ALL_HINGES); - EXPECT_NO_THROW(stk::tools::disconnect_user_blocks(get_bulk(), blocksToDisconnect, option)); + EXPECT_NO_THROW(stk::tools::disconnect_user_blocks(get_bulk(), blocksToDisconnect, stk::tools::SNIP_ALL_HINGES)); } TEST_F(TestDisconnectBlocks2D, disconnect_2block_1quad) @@ -96,9 +95,7 @@ TEST_F(TestDisconnectBlocks2D, disconnect_user_blocks_2block_2quad_in_reverse_bl stk::tools::BlockPairVector blocksToDisconnect; blocksToDisconnect.emplace_back(blocks[1], blocks[0]); - stk::tools::DisconnectBlocksOption option(stk::tools::DISCONNECT_LOCAL, stk::tools::SNIP_ALL_HINGES); - - EXPECT_NO_THROW(stk::tools::disconnect_user_blocks(get_bulk(), blocksToDisconnect, option)); + EXPECT_NO_THROW(stk::tools::disconnect_user_blocks(get_bulk(), blocksToDisconnect, stk::tools::SNIP_ALL_HINGES)); } TEST_F(TestDisconnectBlocks2D, disconnect_2block_2quad_updateGraph) @@ -640,10 +637,10 @@ TEST_F(TestReconnectList2D, reconnect_9block_9quad_permutation2) } -class TestDisconnectBlocks : public stk::unit_test_util::simple_fields::MeshFixture +class TestDisconnectBlocks : public stk::unit_test_util::MeshFixture { protected: - TestDisconnectBlocks() : stk::unit_test_util::simple_fields::MeshFixture(3) + TestDisconnectBlocks() : stk::unit_test_util::MeshFixture(3) { setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); } @@ -803,7 +800,7 @@ TEST(DisconnectBlocks, input_mesh) std::shared_ptr bulkPtr = build_mesh(3, MPI_COMM_WORLD, stk::mesh::BulkData::NO_AUTO_AURA); stk::mesh::BulkData& bulk = *bulkPtr; - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-f", ""); + std::string exodusFileName = stk::unit_test_util::get_option("-f", ""); if (exodusFileName.empty()) return; stk::io::fill_mesh_with_auto_decomp(exodusFileName, bulk); @@ -1705,13 +1702,13 @@ void test_connection_pairs(stk::mesh::BulkData& bulk, stk::mesh::PartVector& all void test_user_block_disconnect(stk::mesh::BulkData& bulk, BlockConnectionVector& blockPairConnectionsToDisconnect, unsigned expectedFinalCommonNodeCount, - stk::tools::DisconnectBlocksOption disconnectOptions = stk::tools::DisconnectBlocksOption()) + stk::tools::SnipOption snipOption = stk::tools::PRESERVE_INITIAL_HINGES) { stk::mesh::PartVector allBlocksInMesh; stk::tools::impl::get_all_blocks_in_mesh(bulk, allBlocksInMesh); stk::tools::BlockPairVector blockPairsToDisconnect = convert_connection_vector_to_pair_vector(bulk, blockPairConnectionsToDisconnect); - stk::tools::disconnect_user_blocks(bulk, blockPairsToDisconnect, disconnectOptions); + stk::tools::disconnect_user_blocks(bulk, blockPairsToDisconnect, snipOption); test_connection_pairs(bulk, allBlocksInMesh, blockPairConnectionsToDisconnect, expectedFinalCommonNodeCount); } @@ -1929,10 +1926,10 @@ TEST(TestDisconnectInputFile, input_mesh) std::shared_ptr bulkPtr = build_mesh(3, MPI_COMM_WORLD); stk::mesh::BulkData& bulk = *bulkPtr; - std::string exodusFileName = stk::unit_test_util::simple_fields::get_option("-exoFile", ""); + std::string exodusFileName = stk::unit_test_util::get_option("-exoFile", ""); if (exodusFileName.empty()) return; - std::string disconnectBlockFile = stk::unit_test_util::simple_fields::get_option("-blockFile", ""); + std::string disconnectBlockFile = stk::unit_test_util::get_option("-blockFile", ""); if (disconnectBlockFile.empty()) return; stk::io::fill_mesh(exodusFileName, bulk); @@ -1968,7 +1965,7 @@ TEST(TestDisconnectInputFile, input_mesh) double meshReadTime = stk::wall_time(); std::cout << "Starting disconnect block sequence" << std::endl; - stk::tools::disconnect_user_blocks(bulk, disconnectBlockVec, stk::tools::DisconnectBlocksOption(stk::tools::DISCONNECT_LOCAL, stk::tools::PRESERVE_INITIAL_HINGES)); + stk::tools::disconnect_user_blocks(bulk, disconnectBlockVec, stk::tools::PRESERVE_INITIAL_HINGES); double disconnectTime = stk::wall_time(); @@ -2002,7 +1999,7 @@ TEST_F(TestDisconnectUserBlocks, disconnect_user_blocks_preserve_snip_option_4bl stk::mesh::PartVector blocks = setup_mesh_4block_4hex(get_bulk()); BlockConnectionVector blockPairsToDisconnectVector{BlockConnection("block_2","block_1",0), BlockConnection("block_2","block_4",0), BlockConnection("block_2","block_3",0)}; - test_user_block_disconnect(get_bulk(), blockPairsToDisconnectVector, 6u, stk::tools::DisconnectBlocksOption(stk::tools::DISCONNECT_GLOBAL, stk::tools::PRESERVE_INITIAL_HINGES)); + test_user_block_disconnect(get_bulk(), blockPairsToDisconnectVector, 6u, stk::tools::PRESERVE_INITIAL_HINGES); } } @@ -2012,7 +2009,7 @@ TEST_F(TestDisconnectUserBlocks, disconnect_user_blocks_snip_all_hinges_snip_opt stk::mesh::PartVector blocks = setup_mesh_4block_4hex(get_bulk()); BlockConnectionVector blockPairsToDisconnectVector{BlockConnection("block_2","block_1",0), BlockConnection("block_2","block_4",0)}; - test_user_block_disconnect(get_bulk(), blockPairsToDisconnectVector, 6u, stk::tools::DisconnectBlocksOption(stk::tools::DISCONNECT_GLOBAL, stk::tools::SNIP_ALL_HINGES)); + test_user_block_disconnect(get_bulk(), blockPairsToDisconnectVector, 6u, stk::tools::SNIP_ALL_HINGES); } } @@ -2022,7 +2019,7 @@ TEST_F(TestDisconnectUserBlocks2D, disconnect_user_blocks_2block_3quad_1hinge) stk::mesh::PartVector blocks = setup_mesh_3block_3quad_1hinge_linear_stack(get_bulk()); BlockConnectionVector blockPairsToDisconnectVector{BlockConnection("block_1","block_3",0)}; - test_user_block_disconnect(get_bulk(), blockPairsToDisconnectVector, 3u, stk::tools::DisconnectBlocksOption(stk::tools::DISCONNECT_LOCAL, stk::tools::PRESERVE_INITIAL_HINGES)); + test_user_block_disconnect(get_bulk(), blockPairsToDisconnectVector, 3u, stk::tools::PRESERVE_INITIAL_HINGES); } } @@ -2032,7 +2029,7 @@ TEST_F(TestDisconnectUserBlocks2D, disconnect_user_blocks_3blocks_4quad_3proc) stk::mesh::PartVector blocks = setup_mesh_3block_4quad_keepLowerRight(get_bulk(), 1); BlockConnectionVector blockPairsToDisconnectVector{BlockConnection("block_1","block_3",0), BlockConnection("block_2","block_3",0)}; - test_user_block_disconnect(get_bulk(), blockPairsToDisconnectVector, 2u, stk::tools::DisconnectBlocksOption(stk::tools::DISCONNECT_LOCAL, stk::tools::PRESERVE_INITIAL_HINGES)); + test_user_block_disconnect(get_bulk(), blockPairsToDisconnectVector, 2u, stk::tools::PRESERVE_INITIAL_HINGES); } } @@ -2043,7 +2040,7 @@ TEST_F(TestDisconnectUserBlocks2D, disconnect_user_blocks_3blocks_4quad_custom_o stk::mesh::PartVector blocks = setup_mesh_3block_4quad_reverse_ordinal(get_bulk()); BlockConnectionVector blockPairsToDisconnectVector{BlockConnection("vl","lateral",0), BlockConnection("radax","lateral",0)}; - test_user_block_disconnect(get_bulk(), blockPairsToDisconnectVector, 2u, stk::tools::DisconnectBlocksOption(stk::tools::DISCONNECT_LOCAL, stk::tools::PRESERVE_INITIAL_HINGES)); + test_user_block_disconnect(get_bulk(), blockPairsToDisconnectVector, 2u, stk::tools::PRESERVE_INITIAL_HINGES); } @@ -2070,7 +2067,7 @@ TEST_F(TestBlockPairCreation, test_block_pair_creation) void create_ngs_jtd_sub_mesh(stk::mesh::BulkData& bulk) { - stk::unit_test_util::simple_fields::ConstructedMesh data(3); + stk::unit_test_util::ConstructedMesh data(3); data.set_x_coordinates({ 2.69902090331971, 2.66623140978201, 2.75874573101832, 2.69902090331971, 2.69659806972163, 2.68053633989566, 2.66870203682596, 2.7137889539581, @@ -2101,7 +2098,7 @@ void create_ngs_jtd_sub_mesh(stk::mesh::BulkData& bulk) {30822787, 1}, {30824958, 1}, {26778235, 1}, {30822789, 1}, {30162567, 0}, {27186527, 1}, {29861241, 1}, {29528157, 0}, {30162566, 0}} ); - stk::unit_test_util::simple_fields::ConstructedElementBlock block1(stk::topology::TET_4, "block_1", 202, { {2, 8, 9, 10}, + stk::unit_test_util::ConstructedElementBlock block1(stk::topology::TET_4, "block_1", 202, { {2, 8, 9, 10}, {10, 8, 9, 6}, {5, 15, 16, 8}, {2, 8, 10, 5}, @@ -2114,13 +2111,13 @@ void create_ngs_jtd_sub_mesh(stk::mesh::BulkData& bulk) {6, 8, 7, 10} }); data.add_elem_block(block1); - stk::unit_test_util::simple_fields::ConstructedElementBlock block2(stk::topology::TET_4, "block_2", 223, { {15, 12, 8, 6}, + stk::unit_test_util::ConstructedElementBlock block2(stk::topology::TET_4, "block_2", 223, { {15, 12, 8, 6}, {8, 13, 9, 6}, {12, 16, 15, 8}, {8, 12, 13, 6} }); data.add_elem_block(block2); - stk::unit_test_util::simple_fields::ConstructedElementBlock block3(stk::topology::TET_4, "block_3", 245, { {2, 8, 4, 1}, + stk::unit_test_util::ConstructedElementBlock block3(stk::topology::TET_4, "block_3", 245, { {2, 8, 4, 1}, {2, 8, 1, 9}, {8, 11, 3, 4}, {8, 12, 11, 4}, @@ -2145,9 +2142,8 @@ TEST(TestNGSDisconnect, jtd_sub_mesh) stk::mesh::Part* block2 = meta.get_part("block_2"); stk::mesh::Part* block3 = meta.get_part("block_3"); - stk::tools::DisconnectBlocksOption disconnectOption(stk::tools::DISCONNECT_LOCAL, stk::tools::PRESERVE_INITIAL_HINGES); stk::tools::BlockPairVector disconnectPairs{{block1, block3}, {block2, block3}}; - EXPECT_NO_THROW(stk::tools::disconnect_user_blocks(bulk, disconnectPairs, disconnectOption)); + EXPECT_NO_THROW(stk::tools::disconnect_user_blocks(bulk, disconnectPairs, stk::tools::PRESERVE_INITIAL_HINGES)); } void create_2_tet10s_in_2_blocks_sharing_6_nodes(stk::mesh::BulkData &bulk) @@ -2253,10 +2249,10 @@ void expect_one_face_in_sideset_with_adjacent_blocks( } } -class TestDisconnectWithSidesets : public stk::unit_test_util::simple_fields::MeshFixture +class TestDisconnectWithSidesets : public stk::unit_test_util::MeshFixture { protected: - TestDisconnectWithSidesets() : stk::unit_test_util::simple_fields::MeshFixture(3) + TestDisconnectWithSidesets() : stk::unit_test_util::MeshFixture(3) { setup_empty_mesh(stk::mesh::BulkData::AUTO_AURA); } @@ -2405,7 +2401,7 @@ TEST_F(TestDisconnectWithSidesets, twoHexesInTwoBlocks_with_internalSidesets) auto block1 = blocks[0]; auto block2 = blocks[1]; - stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}}); + stk::tools::disconnect_user_blocks(bulk, stk::tools::BlockPairVector{stk::tools::BlockPair{block1, block2}}, stk::tools::PRESERVE_INITIAL_HINGES); EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); @@ -2426,9 +2422,9 @@ TEST_F(TestDisconnectWithSidesets, fourHexesInThreeBlocks_with_internalSidesets) auto block2 = blocks[1]; auto block3 = blocks[2]; - stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}, + stk::tools::disconnect_user_blocks(bulk, stk::tools::BlockPairVector{stk::tools::BlockPair{block1, block2}, stk::tools::BlockPair{block1, block3}, - stk::tools::BlockPair{block2, block3}}); + stk::tools::BlockPair{block2, block3}}, stk::tools::PRESERVE_INITIAL_HINGES); EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block2, block3})); @@ -2448,7 +2444,7 @@ TEST_F(TestDisconnectWithSidesets, twoTet4InTwoBlocks_with_internalSidesets) auto block1 = blocks[0]; auto block2 = blocks[1]; - stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}}); + stk::tools::disconnect_user_blocks(bulk, stk::tools::BlockPairVector{stk::tools::BlockPair{block1, block2}}, stk::tools::PRESERVE_INITIAL_HINGES); EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); @@ -2468,9 +2464,9 @@ TEST_F(TestDisconnectWithSidesets, threeTet4InFourBlocks_with_internalSidesets) auto block2 = blocks[1]; auto block3 = blocks[2]; - stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}, + stk::tools::disconnect_user_blocks(bulk, stk::tools::BlockPairVector{stk::tools::BlockPair{block1, block2}, stk::tools::BlockPair{block1, block3}, - stk::tools::BlockPair{block2, block3}}); + stk::tools::BlockPair{block2, block3}}, stk::tools::PRESERVE_INITIAL_HINGES); EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block2, block3})); @@ -2491,7 +2487,7 @@ TEST_F(TestDisconnectWithSidesets, twoHexesInTwoBlocks_with_internalAndExternalS auto block1 = blocks[0]; auto block2 = blocks[1]; - stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}}); + stk::tools::disconnect_user_blocks(bulk, stk::tools::BlockPairVector{stk::tools::BlockPair{block1, block2}}, stk::tools::PRESERVE_INITIAL_HINGES); EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); @@ -2511,7 +2507,7 @@ TEST_F(TestDisconnectWithSidesets, twoHexesInTwoBlocks_with_dualInternalAndExter auto block1 = blocks[0]; auto block2 = blocks[1]; - stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}}); + stk::tools::disconnect_user_blocks(bulk, stk::tools::BlockPairVector{stk::tools::BlockPair{block1, block2}}, stk::tools::PRESERVE_INITIAL_HINGES); EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); @@ -2543,7 +2539,7 @@ TEST_F(TestDisconnectWithSidesets, twoHexesInTwoBlocks_with_internalSidesetAndEm EXPECT_EQ(0u, numFacesInSurface2); EXPECT_EQ(0u, numFacesInSurface3); - stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}}); + stk::tools::disconnect_user_blocks(bulk, stk::tools::BlockPairVector{stk::tools::BlockPair{block1, block2}}, stk::tools::PRESERVE_INITIAL_HINGES); EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); @@ -2583,7 +2579,7 @@ TEST_F(TestDisconnectWithSidesets, ALRBC) unsigned numFacesInSurface1 = stk::mesh::count_selected_entities(*surface1, get_bulk().buckets(stk::topology::FACE_RANK)); EXPECT_EQ(1u, numFacesInSurface1); - stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}, stk::tools::BlockPair{block2, block3}}); + stk::tools::disconnect_user_blocks(bulk, stk::tools::BlockPairVector{stk::tools::BlockPair{block1, block2}, stk::tools::BlockPair{block2, block3}}, stk::tools::PRESERVE_INITIAL_HINGES); EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block2, block3})); @@ -2625,7 +2621,7 @@ TEST_F(TestDisconnectWithSidesets, ALRB) unsigned numFacesInSurface1 = stk::mesh::count_selected_entities(*surface1, get_bulk().buckets(stk::topology::FACE_RANK)); EXPECT_EQ(1u, numFacesInSurface1); - stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}}); + stk::tools::disconnect_user_blocks(bulk, stk::tools::BlockPairVector{stk::tools::BlockPair{block1, block2}}, stk::tools::PRESERVE_INITIAL_HINGES); EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); diff --git a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestMechanism.cpp b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestMechanism.cpp index 288da9111feb..2d86b2e813d3 100644 --- a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestMechanism.cpp +++ b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestMechanism.cpp @@ -654,7 +654,7 @@ void test_clusters(const stk::mesh::BulkData& bulk, const std::vector bulk = build_mesh(dim,MPI_COMM_WORLD); - stk::unit_test_util::simple_fields::setup_text_mesh(*bulk, meshDesc); + stk::unit_test_util::setup_text_mesh(*bulk, meshDesc); MeshTraverser bfs(*bulk); test_clusters(*bulk, bfs.get_clusters(), goldIds); diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_a/unit_test_hex.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_a/unit_test_hex.cpp index f0e038ff400a..6f8a22079e68 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_a/unit_test_hex.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_a/unit_test_hex.cpp @@ -212,6 +212,7 @@ void check_hex_8_on_device() stk::topology t = stk::topology::HEX_8; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(8u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -376,6 +377,7 @@ void check_hex_20_on_device() const stk::topology t = stk::topology::HEX_20; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(20u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -539,6 +541,7 @@ void check_hex_27_on_device() const stk::topology t = stk::topology::HEX_27; const unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(27u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_a/unit_test_node.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_a/unit_test_node.cpp index 05a27c0cdd74..c0c0790e2900 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_a/unit_test_node.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_a/unit_test_node.cpp @@ -112,6 +112,7 @@ void check_node_on_device() NGP_EXPECT_EQ(t.face_topology(0), stk::topology::INVALID_TOPOLOGY); constexpr unsigned numNodes = 1; // Node actually has 0 nodes, but zero-length arrays are not allowed + NGP_EXPECT_EQ(1u, numNodes);//silly suppression of "unused" compiler warning check_lexicographical_smallest_permutation_ngp(t, goldPermutationNodeOrdinals); }); diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_a/unit_test_particle.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_a/unit_test_particle.cpp index 16bab646603f..ac4506ecf3aa 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_a/unit_test_particle.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_a/unit_test_particle.cpp @@ -88,6 +88,7 @@ void check_particle_on_device() stk::topology t = stk::topology::PARTICLE; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(1u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_pyramid.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_pyramid.cpp index 142994a3881e..cfda10a6538a 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_pyramid.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_pyramid.cpp @@ -125,6 +125,7 @@ void check_pyramid_5_on_device() stk::topology t = stk::topology::PYRAMID_5; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(5u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -262,6 +263,7 @@ void check_pyramid_13_on_device() stk::topology t = stk::topology::PYRAMID_13; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(13u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -399,6 +401,7 @@ void check_pyramid_14_on_device() stk::topology t = stk::topology::PYRAMID_14; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(14u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_quad.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_quad.cpp index 830df685a0b6..2e182fb4bc86 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_quad.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_quad.cpp @@ -114,6 +114,7 @@ void check_quad_4_on_device() stk::topology t = stk::topology::QUAD_4; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(4u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -238,6 +239,7 @@ void check_quad_6_on_device() stk::topology t = stk::topology::QUAD_6; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(6u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -366,6 +368,7 @@ void check_quad_8_on_device() stk::topology t = stk::topology::QUAD_8; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(8u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -486,6 +489,7 @@ void check_quad_9_on_device() stk::topology t = stk::topology::QUAD_9; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(9u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_spring.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_spring.cpp index a310255e45fe..3fe750a7c1bd 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_spring.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_spring.cpp @@ -100,6 +100,7 @@ void check_spring2_on_device() stk::topology t = stk::topology::SPRING_2; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(2u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -206,6 +207,7 @@ void check_spring3_on_device() stk::topology t = stk::topology::SPRING_3; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(3u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_tet.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_tet.cpp index e7ec85376b85..898d98b7afd6 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_tet.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_b/unit_test_tet.cpp @@ -130,6 +130,7 @@ void check_tet_4_on_device() stk::topology t = stk::topology::TET_4; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(4u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -259,6 +260,7 @@ void check_tet_8_on_device() stk::topology t = stk::topology::TET_8; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(8u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -399,6 +401,7 @@ void check_tet_10_on_device() stk::topology t = stk::topology::TET_10; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(10u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -539,6 +542,7 @@ void check_tet_11_on_device() stk::topology t = stk::topology::TET_11; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(11u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp index 8634b82145e5..8ed599943345 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp @@ -127,6 +127,7 @@ void check_shell_quad_4_on_device() stk::topology t = stk::topology::SHELL_QUAD_4; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(4u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -265,6 +266,7 @@ void check_shell_quad_8_on_device() stk::topology t = stk::topology::SHELL_QUAD_8; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(8u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -403,6 +405,7 @@ void check_shell_quad_9_on_device() stk::topology t = stk::topology::SHELL_QUAD_9; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(9u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -458,4 +461,4 @@ NGP_TEST(stk_topology_ngp, shell_quad_9) check_shell_quad_9_on_device(); } -} \ No newline at end of file +} diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_side_beam.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_side_beam.cpp index 10b408564fc0..92278a1591dc 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_side_beam.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_side_beam.cpp @@ -101,6 +101,7 @@ void check_shell_side_beam_2_on_device() stk::topology t = stk::topology::SHELL_SIDE_BEAM_2; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(2u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -207,6 +208,7 @@ void check_shell_side_beam_3_on_device() stk::topology t = stk::topology::SHELL_SIDE_BEAM_3; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(3u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp index bb144cff0d06..5bf526b6ec65 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp @@ -123,6 +123,7 @@ void check_shell_tri_3_on_device() stk::topology t = stk::topology::SHELL_TRI_3; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(3u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -256,6 +257,7 @@ void check_shell_tri_4_on_device() stk::topology t = stk::topology::SHELL_TRI_4; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(4u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -389,6 +391,7 @@ void check_shell_tri_6_on_device() stk::topology t = stk::topology::SHELL_TRI_6; constexpr unsigned numNodes = stk::topology_detail::topology_data::num_nodes; + EXPECT_EQ(6u, numNodes); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 1), KOKKOS_LAMBDA(const int i) { @@ -447,4 +450,4 @@ NGP_TEST(stk_topology_ngp, shell_tri_6) check_shell_tri_6_on_device(); } -} \ No newline at end of file +} diff --git a/packages/stk/stk_unit_tests/stk_transfer_util/UnitTestLeastSquares.cpp b/packages/stk/stk_unit_tests/stk_transfer_util/UnitTestLeastSquares.cpp index 88986d089225..23d7718d733a 100644 --- a/packages/stk/stk_unit_tests/stk_transfer_util/UnitTestLeastSquares.cpp +++ b/packages/stk/stk_unit_tests/stk_transfer_util/UnitTestLeastSquares.cpp @@ -75,14 +75,14 @@ namespace { -class LeastSquaresTester : public stk::unit_test_util::simple_fields::MeshFixtureNoTest, public ::testing::Test { +class LeastSquaresTester : public stk::unit_test_util::MeshFixtureNoTest, public ::testing::Test { public: LeastSquaresTester() - : stk::unit_test_util::simple_fields::MeshFixtureNoTest(3) + : stk::unit_test_util::MeshFixtureNoTest(3) { - m_scaleFactorX = stk::unit_test_util::simple_fields::get_command_line_option("-sx", 1.0); - m_scaleFactorY = stk::unit_test_util::simple_fields::get_command_line_option("-sy", 1.0); - m_stretchFactorZ = stk::unit_test_util::simple_fields::get_command_line_option("-sz", 1.0); + m_scaleFactorX = stk::unit_test_util::get_command_line_option("-sx", 1.0); + m_scaleFactorY = stk::unit_test_util::get_command_line_option("-sy", 1.0); + m_stretchFactorZ = stk::unit_test_util::get_command_line_option("-sz", 1.0); setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); } @@ -140,9 +140,9 @@ class LeastSquaresTester : public stk::unit_test_util::simple_fields::MeshFixtur void generate_stretched_mesh(stk::mesh::BulkData& bulk, unsigned numElemPerDim) { - double scaleFactorX = stk::unit_test_util::simple_fields::get_command_line_option("-sx", 1.0); - double scaleFactorY = stk::unit_test_util::simple_fields::get_command_line_option("-sy", 1.0); - double stretchFactorZ = stk::unit_test_util::simple_fields::get_command_line_option("-sz", 1.0); + double scaleFactorX = stk::unit_test_util::get_command_line_option("-sx", 1.0); + double scaleFactorY = stk::unit_test_util::get_command_line_option("-sy", 1.0); + double stretchFactorZ = stk::unit_test_util::get_command_line_option("-sz", 1.0); ASSERT_TRUE(scaleFactorX >= 0.0); ASSERT_TRUE(scaleFactorY >= 0.0); diff --git a/packages/stk/stk_unit_tests/stk_transfer_util/UnitTestPatch.cpp b/packages/stk/stk_unit_tests/stk_transfer_util/UnitTestPatch.cpp index 730d8e0bf9fa..bc5862dfe8af 100644 --- a/packages/stk/stk_unit_tests/stk_transfer_util/UnitTestPatch.cpp +++ b/packages/stk/stk_unit_tests/stk_transfer_util/UnitTestPatch.cpp @@ -71,10 +71,10 @@ namespace { -class PatchTester : public stk::unit_test_util::simple_fields::MeshFixtureNoTest, public ::testing::Test { +class PatchTester : public stk::unit_test_util::MeshFixtureNoTest, public ::testing::Test { public: PatchTester() - : stk::unit_test_util::simple_fields::MeshFixtureNoTest(3) + : stk::unit_test_util::MeshFixtureNoTest(3) { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); } diff --git a/packages/stk/stk_unit_tests/stk_util/algorithmTimerTest.cpp b/packages/stk/stk_unit_tests/stk_util/algorithmTimerTest.cpp index eb8406610325..faa598886ac8 100644 --- a/packages/stk/stk_unit_tests/stk_util/algorithmTimerTest.cpp +++ b/packages/stk/stk_unit_tests/stk_util/algorithmTimerTest.cpp @@ -7,9 +7,9 @@ TEST(RunTimer, runs) { - const size_t numWork = stk::unit_test_util::simple_fields::get_command_line_option("-work", 10000); - const double tolerance = stk::unit_test_util::simple_fields::get_command_line_option("-tol", 1e-6); - const size_t minRuns = stk::unit_test_util::simple_fields::get_command_line_option("-min", 10000); + const size_t numWork = stk::unit_test_util::get_command_line_option("-work", 10000); + const double tolerance = stk::unit_test_util::get_command_line_option("-tol", 1e-6); + const size_t minRuns = stk::unit_test_util::get_command_line_option("-min", 10000); ASSERT_TRUE(tolerance > 0); stk::ParallelMachine comm = MPI_COMM_WORLD; diff --git a/packages/stk/stk_unit_tests/stk_util/diag/UnitTestTimer.cpp b/packages/stk/stk_unit_tests/stk_util/diag/UnitTestTimer.cpp index ff9bd467f561..223a8fdc18d6 100644 --- a/packages/stk/stk_unit_tests/stk_util/diag/UnitTestTimer.cpp +++ b/packages/stk/stk_unit_tests/stk_util/diag/UnitTestTimer.cpp @@ -47,7 +47,7 @@ #include // for sleep_for #include // for vector -using stk::unit_test_util::simple_fields::get_command_line_option; +using stk::unit_test_util::get_command_line_option; enum { TIMER_DOMAIN = 0x00001000, ///< Enable domain timers @@ -330,7 +330,7 @@ TEST(UnitTestTimer, YuugeNumberOfTimers) stk::diag::TimeBlock root_time_block(rootTimer); unsigned numTimers = 100; - numTimers = stk::unit_test_util::simple_fields::get_command_line_option("-numTimers", numTimers); + numTimers = stk::unit_test_util::get_command_line_option("-numTimers", numTimers); { static std::vector lap_timers; for (unsigned i = 0; i < numTimers; ++i) diff --git a/packages/stk/stk_unit_tests/stk_util/util/UnitTestMCSR.cpp b/packages/stk/stk_unit_tests/stk_util/util/UnitTestMCSR.cpp index 482530395e9d..a05f63fd4268 100644 --- a/packages/stk/stk_unit_tests/stk_util/util/UnitTestMCSR.cpp +++ b/packages/stk/stk_unit_tests/stk_util/util/UnitTestMCSR.cpp @@ -55,6 +55,14 @@ TEST( MCSR, basic) #endif } +TEST(MCSR, find_sorted_insertion_index_empty) +{ + std::vector items; + stk::util::IndexRange indices(0,0); + const int item = 42; + EXPECT_EQ(0, stk::util::find_sorted_insertion_index(items, indices, item)); +} + TEST(MCSR, addItem) { constexpr unsigned numRows = 3; diff --git a/packages/stk/stk_util/Jamfile b/packages/stk/stk_util/Jamfile index 83c798152441..8f5faafa5e5f 100644 --- a/packages/stk/stk_util/Jamfile +++ b/packages/stk/stk_util/Jamfile @@ -55,14 +55,18 @@ if $(RTenv-arg) = "user" { project votd : requirements $(sierra-warnings) + STK_SHOW_DEPRECATED_WARNINGS STK_HIDE_DEPRECATED_CODE - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM shared:SIERRA_DLOPEN_ENABLED darwin:_GNU_SOURCE=1 $(stk_util-root-inc) : usage-requirements + STK_SHOW_DEPRECATED_WARNINGS STK_HIDE_DEPRECATED_CODE - STK_BUILT_IN_SIERRA + STK_BUILT_FOR_SIERRA + STK_BUILT_WITH_BJAM shared:SIERRA_DLOPEN_ENABLED $(stk_util-root-inc) : build-dir $(stk_util-builddir) diff --git a/packages/stk/stk_util/stk_util/Version.hpp b/packages/stk/stk_util/stk_util/Version.hpp index 9ec38172e154..e78c59ef4158 100644 --- a/packages/stk/stk_util/stk_util/Version.hpp +++ b/packages/stk/stk_util/stk_util/Version.hpp @@ -44,7 +44,7 @@ //See the file CHANGELOG.md for a listing that shows the //correspondence between version numbers and API changes. -#define STK_VERSION 5190401 +#define STK_VERSION 5210300 namespace stk diff --git a/packages/stk/stk_util/stk_util/command_line/CMakeLists.txt b/packages/stk/stk_util/stk_util/command_line/CMakeLists.txt index 87edd6860fa1..23cd3f23bc96 100644 --- a/packages/stk/stk_util/stk_util/command_line/CMakeLists.txt +++ b/packages/stk/stk_util/stk_util/command_line/CMakeLists.txt @@ -59,5 +59,5 @@ INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_util/command_line/) IF(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_util_command_line DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_util_command_line EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) ENDIF() diff --git a/packages/stk/stk_util/stk_util/diag/CMakeLists.txt b/packages/stk/stk_util/stk_util/diag/CMakeLists.txt index 1c11624d638d..2a946ed06bdb 100644 --- a/packages/stk/stk_util/stk_util/diag/CMakeLists.txt +++ b/packages/stk/stk_util/stk_util/diag/CMakeLists.txt @@ -59,5 +59,5 @@ INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_util/diag/) IF(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_util_diag DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_util_diag EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) ENDIF() diff --git a/packages/stk/stk_util/stk_util/environment/CMakeLists.txt b/packages/stk/stk_util/stk_util/environment/CMakeLists.txt index 6cdd6713e863..8a62a9d893b6 100644 --- a/packages/stk/stk_util/stk_util/environment/CMakeLists.txt +++ b/packages/stk/stk_util/stk_util/environment/CMakeLists.txt @@ -59,5 +59,5 @@ INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_util/environment/) IF(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_util_env DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_util_env EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) ENDIF() diff --git a/packages/stk/stk_util/stk_util/ngp/CMakeLists.txt b/packages/stk/stk_util/stk_util/ngp/CMakeLists.txt index 4958f365f4b9..c9faa65a6102 100644 --- a/packages/stk/stk_util/stk_util/ngp/CMakeLists.txt +++ b/packages/stk/stk_util/stk_util/ngp/CMakeLists.txt @@ -46,6 +46,8 @@ ELSE() $ $ ) + + INSTALL(TARGETS stk_util_ngp EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) ENDIF() INSTALL(FILES ${HEADERS} DESTINATION diff --git a/packages/stk/stk_util/stk_util/parallel/CMakeLists.txt b/packages/stk/stk_util/stk_util/parallel/CMakeLists.txt index ced0f931ec3f..729bc81c16d4 100644 --- a/packages/stk/stk_util/stk_util/parallel/CMakeLists.txt +++ b/packages/stk/stk_util/stk_util/parallel/CMakeLists.txt @@ -71,7 +71,7 @@ ELSE() add_library(stk_util_parallel ${SOURCES}) IF(STK_HAS_MPI) - target_link_libraries(stk_util_parallel PUBLIC ${MPI_LIBRARIES}) + target_link_libraries(stk_util_parallel PUBLIC MPI::MPI_CXX) message("MPI_LIBRARIES: ${MPI_LIBRARIES}") ENDIF() @@ -87,5 +87,5 @@ INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_util/parallel/) IF(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_util_parallel DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_util_parallel EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) ENDIF() diff --git a/packages/stk/stk_util/stk_util/parallel/CommBuffer.cpp b/packages/stk/stk_util/stk_util/parallel/CommBuffer.cpp index 17e3d393f782..9ebc9eb8e992 100644 --- a/packages/stk/stk_util/stk_util/parallel/CommBuffer.cpp +++ b/packages/stk/stk_util/stk_util/parallel/CommBuffer.cpp @@ -29,6 +29,7 @@ void CommBuffer::set_buffer_ptrs(unsigned char* begin, unsigned char* ptr, unsig m_beg = begin; m_ptr = ptr; m_end = end; + m_offset = static_cast(m_ptr-m_beg); } -} \ No newline at end of file +} diff --git a/packages/stk/stk_util/stk_util/parallel/CommBuffer.hpp b/packages/stk/stk_util/stk_util/parallel/CommBuffer.hpp index e4a8dc80be4b..b39e29cb0598 100644 --- a/packages/stk/stk_util/stk_util/parallel/CommBuffer.hpp +++ b/packages/stk/stk_util/stk_util/parallel/CommBuffer.hpp @@ -134,7 +134,7 @@ class CommBuffer { /** Pointer to base of buffer. */ void * buffer() const ; - CommBuffer() : m_beg(nullptr), m_ptr(nullptr), m_end(nullptr) { } + CommBuffer() : m_beg(nullptr), m_ptr(nullptr), m_end(nullptr), m_offset(0) { } void set_buffer_ptrs(unsigned char* begin, unsigned char* ptr, unsigned char* end); @@ -151,13 +151,14 @@ class CommBuffer { ucharp m_beg ; ucharp m_ptr ; ucharp m_end ; + unsigned m_offset; }; //---------------------------------------------------------------------- //---------------------------------------------------------------------- // Inlined template implementations for the CommBuffer -template +template struct CommBufferAlign { static size_t align( size_t i ) { i %= N ; return i ? ( N - i ) : 0 ; } }; @@ -171,20 +172,22 @@ template inline CommBuffer &CommBuffer::pack( const T & value ) { - if (std::is_same::value) { + if constexpr (std::is_same_v) { return pack(value); } - enum { Size = sizeof(T) }; - size_t nalign = CommBufferAlign::align( m_ptr - m_beg ); + static constexpr auto Size = sizeof(T); if ( m_beg ) { + size_t nalign = CommBufferAlign::align( m_ptr - m_beg ); +//std::cout<<"m_beg: "<<(void*)m_beg<<", m_ptr: "<<(void*)m_ptr<<"m_ptr-m_beg: "<(m_ptr); + T *tmp = reinterpret_cast(m_ptr); *tmp = value ; m_ptr = reinterpret_cast( ++tmp ); } else { - m_ptr += nalign + Size ; + size_t nalign = CommBufferAlign::align( m_offset ); + m_offset += nalign + Size ; } return *this; } @@ -238,9 +241,9 @@ template inline CommBuffer &CommBuffer::pack( const T * value , size_t number ) { - enum { Size = sizeof(T) }; - size_t nalign = CommBufferAlign::align( m_ptr - m_beg ); + static constexpr auto Size = sizeof(T); if ( m_beg ) { + size_t nalign = CommBufferAlign::align( m_ptr - m_beg ); if ( m_end < m_ptr + nalign + number * Size ) { pack_overflow(); } while ( nalign ) { --nalign ; *m_ptr = 0 ; ++m_ptr ; } T * tmp = reinterpret_cast(m_ptr); @@ -248,7 +251,8 @@ CommBuffer &CommBuffer::pack( const T * value , size_t number ) m_ptr = reinterpret_cast( tmp ); } else { - m_ptr += nalign + number * Size ; + size_t nalign = CommBufferAlign::align( m_offset ); + m_offset += nalign + number * Size ; } return *this; } @@ -257,8 +261,13 @@ template inline CommBuffer &CommBuffer::skip( size_t number ) { - enum { Size = sizeof(T) }; - m_ptr += CommBufferAlign::align( m_ptr - m_beg ) + Size * number ; + static constexpr auto Size = sizeof(T); + if ( m_beg ) { + m_ptr += CommBufferAlign::align( m_ptr - m_beg ) + Size * number ; + } + else { + m_offset += CommBufferAlign::align( m_offset ) + Size * number ; + } if ( m_beg && m_end < m_ptr ) { unpack_overflow(); } return *this; } @@ -276,10 +285,10 @@ template inline CommBuffer &CommBuffer::unpack( T & value ) { - if (std::is_same::value) { + if constexpr (std::is_same_v) { return unpack(value); } - enum { Size = sizeof(T) }; + static constexpr auto Size = sizeof(T); const size_t nalign = CommBufferAlign::align( m_ptr - m_beg ); T * tmp = reinterpret_cast( m_ptr + nalign ); value = *tmp ; @@ -318,8 +327,7 @@ CommBuffer &CommBuffer::unpack( std::map & value ) size_t ns; unpack(ns); - for (size_t i = 0; i < ns; ++i) - { + for (size_t i = 0; i < ns; ++i) { K key; unpack(key); @@ -350,7 +358,7 @@ template inline CommBuffer &CommBuffer::unpack( T * value , size_t number ) { - enum { Size = sizeof(T) }; + static constexpr auto Size = sizeof(T); const size_t nalign = CommBufferAlign::align( m_ptr - m_beg ); T * tmp = reinterpret_cast( m_ptr + nalign ); while ( number ) { --number ; *value = *tmp ; ++tmp ; ++value ; } @@ -403,7 +411,7 @@ template inline CommBuffer &CommBuffer::peek( T * value , size_t number ) { - enum { Size = sizeof(T) }; + static constexpr auto Size = sizeof(T); const size_t nalign = CommBufferAlign::align( m_ptr - m_beg ); T * tmp = reinterpret_cast( m_ptr + nalign ); while ( number ) { --number ; *value = *tmp ; ++tmp ; ++value ; } @@ -421,11 +429,11 @@ size_t CommBuffer::capacity() const inline size_t CommBuffer::size() const -{ return m_ptr - m_beg ; } +{ return m_beg ? static_cast(m_ptr - m_beg) : static_cast(m_offset) ; } inline void CommBuffer::set_size(size_t newsize_bytes) -{ m_beg = nullptr; m_ptr = nullptr; m_ptr += newsize_bytes ; m_end = nullptr; } +{ m_beg = nullptr; m_ptr = nullptr; m_offset = newsize_bytes ; m_end = nullptr; } inline ptrdiff_t CommBuffer::remaining() const diff --git a/packages/stk/stk_util/stk_util/registry/CMakeLists.txt b/packages/stk/stk_util/stk_util/registry/CMakeLists.txt index c741aa6d17c2..db205b4e81c1 100644 --- a/packages/stk/stk_util/stk_util/registry/CMakeLists.txt +++ b/packages/stk/stk_util/stk_util/registry/CMakeLists.txt @@ -59,5 +59,5 @@ INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_util/registry/) IF(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_util_registry DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_util_registry EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) ENDIF() diff --git a/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp b/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp index e4eba23b5543..931e4d275339 100644 --- a/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp +++ b/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp @@ -42,7 +42,7 @@ //In Sierra, STK_VERSION_STRING is provided on the compile line by bake. //For Trilinos stk snapshots, the following macro definition gets populated with //the real version string by the trilinos_snapshot.sh script. -#define STK_VERSION_STRING "5.19.4-573-gfdf674ff" +#define STK_VERSION_STRING "5.21.3-99-g30df4ff9" #endif namespace stk { diff --git a/packages/stk/stk_util/stk_util/registry/ProductRegistry.hpp b/packages/stk/stk_util/stk_util/registry/ProductRegistry.hpp index 863e25be9201..bd7963d9efd2 100644 --- a/packages/stk/stk_util/stk_util/registry/ProductRegistry.hpp +++ b/packages/stk/stk_util/stk_util/registry/ProductRegistry.hpp @@ -251,7 +251,7 @@ std::string get_version(const std::string& executableName); } // namespace stk -#ifdef STK_BUILT_IN_SIERRA +#ifdef STK_BUILT_FOR_SIERRA #undef VERSION // Nice, Trilinos leaves us this gem namespace sierra { @@ -259,7 +259,7 @@ namespace sierra { typedef stk::ProductRegistry ProductRegistry; } // namespace sierra -#endif // STK_BUILT_IN_SIERRA +#endif // STK_BUILT_FOR_SIERRA #endif // STK_UTIL_REGISTRY_PRODUCTREGISTRY_HPP diff --git a/packages/stk/stk_util/stk_util/stk_config.h b/packages/stk/stk_util/stk_util/stk_config.h index a49674f41a0f..26a9c6b8394c 100644 --- a/packages/stk/stk_util/stk_util/stk_config.h +++ b/packages/stk/stk_util/stk_util/stk_config.h @@ -35,7 +35,7 @@ #ifndef stk_util_config_h #define stk_util_config_h -#ifdef STK_BUILT_IN_SIERRA +#ifdef STK_BUILT_WITH_BJAM #define STK_HAS_MPI #define STK_HAS_ARBORX diff --git a/packages/stk/stk_util/stk_util/util/BlasLapack.hpp b/packages/stk/stk_util/stk_util/util/BlasLapack.hpp index b74c59af9ada..69cb3ac89628 100644 --- a/packages/stk/stk_util/stk_util/util/BlasLapack.hpp +++ b/packages/stk/stk_util/stk_util/util/BlasLapack.hpp @@ -37,7 +37,7 @@ #include "stk_util/util/Fortran.hpp" -#ifdef STK_BUILT_IN_SIERRA +#ifdef STK_BUILT_FOR_SIERRA #include #endif @@ -71,7 +71,7 @@ void SIERRA_FORTRAN(dtrsm)(const char *side, const char *uplo, const char *trans const int *m, const int *n, const double *alpha, const double *a, const int *lda, double *b, const int *ldb); -#if !defined(_MKL_LAPACK_H_) && !defined(STK_BUILT_IN_SIERRA) +#if !defined(_MKL_LAPACK_H_) && !defined(STK_BUILT_FOR_SIERRA) void SIERRA_FORTRAN(dgels)(const char* trans, const int* m, const int* n, const int* nrhs, double* a, const int* lda, double* b, diff --git a/packages/stk/stk_util/stk_util/util/CMakeLists.txt b/packages/stk/stk_util/stk_util/util/CMakeLists.txt index 7dfc917393b9..7a488c2b4f24 100644 --- a/packages/stk/stk_util/stk_util/util/CMakeLists.txt +++ b/packages/stk/stk_util/stk_util/util/CMakeLists.txt @@ -46,7 +46,16 @@ IF(HAVE_STK_Trilinos) ELSE() add_library(stk_util_util ${SOURCES}) + if(STK_BUILT_FOR_SIERRA) + find_package(SierraLapack REQUIRED) + target_link_libraries(stk_util_util PUBLIC SierraLapack::sierra_blas_lapack) + else() + find_package(BLAS REQUIRED) + target_link_libraries(stk_util_util PUBLIC BLAS::BLAS) + endif() + target_link_libraries(stk_util_util PUBLIC Kokkos::kokkos) + target_compile_definitions(stk_util_util PUBLIC FORTRAN_ONE_UNDERSCORE) ENDIF() target_include_directories(stk_util_util PUBLIC @@ -62,6 +71,5 @@ INSTALL(FILES ${HEADERS} DESTINATION ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}/stk_util/util/) IF(NOT HAVE_STK_Trilinos) - INSTALL(TARGETS stk_util_util DESTINATION ${STK_INSTALL_LIBDIR}) + INSTALL(TARGETS stk_util_util EXPORT stkTargets DESTINATION ${STK_INSTALL_LIBDIR}) ENDIF() - diff --git a/packages/stk/stk_util/stk_util/util/MCSR.hpp b/packages/stk/stk_util/stk_util/util/MCSR.hpp index d9f9aeaf302e..68c1934e48fb 100644 --- a/packages/stk/stk_util/stk_util/util/MCSR.hpp +++ b/packages/stk/stk_util/stk_util/util/MCSR.hpp @@ -47,6 +47,23 @@ namespace stk { namespace util { +using IndexRange = std::pair; + +template +int find_sorted_insertion_index(const std::vector& items, const IndexRange& indices, const T& item) +{ + const T* begItems = items.data()+indices.first; + const T* endItems = items.data()+indices.second; + const T* it = std::lower_bound(begItems, endItems, item); + if (it != endItems) { + if (*it==item) { + return -1; + } + return indices.first + (it - begItems); + } + return indices.second; +} + template class MCSR { @@ -230,8 +247,6 @@ class MCSR } private: - using IndexRange = std::pair; - bool is_valid(const T& item) { return item != m_invalidItem; @@ -249,7 +264,7 @@ class MCSR IndexRange& indices = m_offsets[row]; if (indices.second >= numItems) { - int insertIdx = find_sorted_insertion_index(indices, item); + int insertIdx = find_sorted_insertion_index(m_items, indices, item); if (insertIdx < 0) { return false; } @@ -265,7 +280,7 @@ class MCSR return didInsert; } else { - const int insertIdx = find_sorted_insertion_index(indices, item); + const int insertIdx = find_sorted_insertion_index(m_items, indices, item); if (insertIdx < 0) { return false; } @@ -279,20 +294,6 @@ class MCSR return false; } - int find_sorted_insertion_index(const IndexRange& indices, const T& item) - { - const T* begItems = &m_items[indices.first]; - const T* endItems = &m_items[indices.second]; - const T* it = std::lower_bound(begItems, endItems, item); - if (it != endItems) { - if (*it==item) { - return -1; - } - return indices.first + (it - begItems); - } - return indices.second; - } - bool insert_item_at_index(IndexRange& indices, int insertIdx, const T& item) { if (insertIdx < 0) { @@ -311,7 +312,7 @@ class MCSR bool insert_item_into_sorted_range(IndexRange& indices, const T& item) { - return insert_item_at_index(indices, find_sorted_insertion_index(indices, item), item); + return insert_item_at_index(indices, find_sorted_insertion_index(m_items, indices, item), item); } unsigned move_items_to_end(unsigned row) diff --git a/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp b/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp index 7e09ad6b4344..567e4f875024 100644 --- a/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp +++ b/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp @@ -53,7 +53,7 @@ class NgpVector NgpVector(const std::string &n, size_t s) : mSize(s), deviceVals(Kokkos::view_alloc(Kokkos::WithoutInitializing, n), mSize), - hostVals(Kokkos::create_mirror_view(Kokkos::WithoutInitializing, HostSpace(), deviceVals)) + hostVals(Kokkos::create_mirror_view(Kokkos::WithoutInitializing, deviceVals)) { } NgpVector(size_t s) : NgpVector(get_default_name(), s) @@ -151,7 +151,7 @@ class NgpVector } protected: - typedef Kokkos::View DeviceType; + typedef Kokkos::View DeviceType; typedef typename DeviceType::HostMirror HostType; virtual DeviceType get_new_vals_of_size(size_t s) diff --git a/packages/stk/stk_util/stk_util/util/concat_variable_name.cpp b/packages/stk/stk_util/stk_util/util/concat_variable_name.cpp index e96875819612..a6fecf922be9 100644 --- a/packages/stk/stk_util/stk_util/util/concat_variable_name.cpp +++ b/packages/stk/stk_util/stk_util/util/concat_variable_name.cpp @@ -36,29 +36,28 @@ #include // for size_t namespace stk { - namespace util { - bool concat_variable_name(const std::string& first_string, - const std::string& second_string, - std::string& concat_string) { - int num_left_paren_first_string = 0; - int num_right_paren_first_string = 0; - int num_left_paren_second_string = 0; - int num_right_paren_second_string = 0; - for(size_t ifirst=0; ifirst Date: Mon, 12 Aug 2024 21:39:50 -0600 Subject: [PATCH 26/37] ifpack2 : add option to use Zoltan2 for belos-test --- packages/ifpack2/test/belos/build_problem.hpp | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/packages/ifpack2/test/belos/build_problem.hpp b/packages/ifpack2/test/belos/build_problem.hpp index f4e699d0894a..46d51a9417e8 100644 --- a/packages/ifpack2/test/belos/build_problem.hpp +++ b/packages/ifpack2/test/belos/build_problem.hpp @@ -58,6 +58,12 @@ #include "BelosLinearProblem.hpp" #include "BelosTpetraAdapter.hpp" +#if defined(HAVE_IFPACK2_XPETRA) && defined(HAVE_IFPACK2_ZOLTAN2) +# include "Zoltan2_PartitioningProblem.hpp" +# include "Zoltan2_XpetraCrsMatrixAdapter.hpp" +# include "Zoltan2_XpetraMultiVectorAdapter.hpp" +#endif + #include "read_matrix.hpp" #include "build_precond.hpp" @@ -101,7 +107,11 @@ build_problem (Teuchos::ParameterList& test_params, std::string hb_file("not specified"); Ifpack2::getParameter(test_params, "hb_file", hb_file); bool useMatrixWithConstGraph = false; + bool useZoltan2 = false; + bool useParMETIS = false; Ifpack2::getParameter(test_params, "Use matrix with const graph", useMatrixWithConstGraph); + Ifpack2::getParameter(test_params, "Use Zoltan2", useZoltan2); + Ifpack2::getParameter(test_params, "Use ParMetis", useParMETIS); if (mm_file != "not specified") { if (comm->getRank() == 0) { @@ -222,6 +232,61 @@ build_problem (Teuchos::ParameterList& test_params, x->putScalar (STS::zero ()); } + if (useZoltan2) { +#if defined(HAVE_IFPACK2_XPETRA) && defined(HAVE_IFPACK2_ZOLTAN2) + // Create an input adapter for the Tpetra matrix. + Zoltan2::XpetraCrsMatrixAdapter + zoltan_matrix(A); + + // Specify partitioning parameters + Teuchos::ParameterList zoltan_params; + zoltan_params.set("partitioning_approach", "partition"); + // + if (useParMETIS) { + if (comm->getRank() == 0) { + std::cout << "Using Zoltan2(ParMETIS)" << std::endl; + } + zoltan_params.set("algorithm", "parmetis"); + zoltan_params.set("symmetrize_input", "transpose"); + zoltan_params.set("partitioning_objective", "minimize_cut_edge_weight"); + } else { + if (comm->getRank() == 0) { + std::cout << "Using Zoltan2(HyperGraph)" << std::endl; + } + zoltan_params.set("algorithm", "phg"); + } + + // Create and solve partitioning problem + Zoltan2::PartitioningProblem> + problem(&zoltan_matrix, &zoltan_params); + problem.solve(); + + // Redistribute matrix + RCP zoltan_A; + zoltan_matrix.applyPartitioningSolution (*A, zoltan_A, problem.getSolution()); + // Set it as coefficient matrix + A = zoltan_A; + + // Redistribute RHS + RCP zoltan_b; + Zoltan2::XpetraMultiVectorAdapter adapterRHS(rcpFromRef (*b)); + adapterRHS.applyPartitioningSolution (*b, zoltan_b, problem.getSolution()); + // Set it as RHS + b = zoltan_b; + + // Redistribute Sol + RCP zoltan_x; + Zoltan2::XpetraMultiVectorAdapter adapterSol(rcpFromRef (*x)); + adapterSol.applyPartitioningSolution (*x, zoltan_x, problem.getSolution()); + // Set it as Sol + x = zoltan_x; +#else + TEUCHOS_TEST_FOR_EXCEPTION( + useZoltan2, std::invalid_argument, + "Both Xpetra and Zoltan2 are neeeded to usee Zoltan2."); +#endif + } + Teuchos::RCP< BLinProb > problem; Teuchos::RCP borderedA; if (nullVec == Teuchos::null) { From 32b3639841c4e335a6dcdfc3d80b7bd28e51ad8a Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 13 Aug 2024 10:28:04 -0600 Subject: [PATCH 27/37] Teko: Build DiagonalPreconditionerFactory for Tpetra Signed-off-by: Christian Glusa --- packages/teko/src/CMakeLists.txt | 2 -- packages/teko/src/Teko_DiagonalPreconditionerFactory.cpp | 7 ++++++- packages/teko/src/Teko_PreconditionerFactory.cpp | 4 +--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/teko/src/CMakeLists.txt b/packages/teko/src/CMakeLists.txt index 555dfb499ba9..d8b016add0da 100644 --- a/packages/teko/src/CMakeLists.txt +++ b/packages/teko/src/CMakeLists.txt @@ -55,13 +55,11 @@ IF(NOT TEKO_HAVE_EPETRA) LIST(REMOVE_ITEM SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/NS/Teko_ALOperator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Teko_DiagonalPreconditionerOp.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/Teko_DiagonalPreconditionerFactory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Teko_ProbingPreconditionerFactory.cpp ) LIST(REMOVE_ITEM HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/NS/Teko_ALOperator.hpp ${CMAKE_CURRENT_SOURCE_DIR}/Teko_DiagonalPreconditionerOp.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/Teko_DiagonalPreconditionerFactory.hpp ${CMAKE_CURRENT_SOURCE_DIR}/Teko_ProbingPreconditionerFactory.hpp ) ELSE() diff --git a/packages/teko/src/Teko_DiagonalPreconditionerFactory.cpp b/packages/teko/src/Teko_DiagonalPreconditionerFactory.cpp index 5f9b0745a59f..c52bf7ac4740 100644 --- a/packages/teko/src/Teko_DiagonalPreconditionerFactory.cpp +++ b/packages/teko/src/Teko_DiagonalPreconditionerFactory.cpp @@ -9,12 +9,16 @@ #include "Teko_DiagonalPreconditionerFactory.hpp" #include "Teko_DiagonalPreconditionerOp.hpp" +#ifdef TEKO_HAVE_EPETRA #include "Thyra_get_Epetra_Operator.hpp" #include "Epetra_CrsMatrix.h" #include "EpetraExt_PointToBlockDiagPermute.h" +#endif #include "Teko_TpetraHelpers.hpp" +#ifdef TEKO_HAVE_EPETRA #include "Thyra_EpetraLinearOp.hpp" +#endif #include "Thyra_TpetraLinearOp.hpp" using Teuchos::rcp; @@ -40,7 +44,7 @@ LinearOp DiagonalPreconditionerFactory::buildPreconditionerOperator( TEUCHOS_TEST_FOR_EXCEPTION(TpetraHelpers::isTpetraLinearOp(lo), std::runtime_error, "BlkDiag not implemented for Tpetra operators"); - +#ifdef TEKO_HAVE_EPETRA // Get the underlying Epetra_CrsMatrix, if we have one Teuchos::RCP eo = Thyra::get_Epetra_Operator(*lo); TEUCHOS_ASSERT(eo != Teuchos::null); @@ -63,6 +67,7 @@ LinearOp DiagonalPreconditionerFactory::buildPreconditionerOperator( // Build the LinearOp object (NTS: swapping the range and domain) // LinearOp MyOp = Teuchos::rcp(new // DiagonalPreconditionerOp(MyState.BDP_,lo->domain(),lo->range())); +#endif } return getInvDiagonalOp(lo, diagonalType_); diff --git a/packages/teko/src/Teko_PreconditionerFactory.cpp b/packages/teko/src/Teko_PreconditionerFactory.cpp index f344b637ad11..290e92bf1ef3 100644 --- a/packages/teko/src/Teko_PreconditionerFactory.cpp +++ b/packages/teko/src/Teko_PreconditionerFactory.cpp @@ -23,8 +23,8 @@ #include "Teko_IterativePreconditionerFactory.hpp" #include "Teko_DiagnosticPreconditionerFactory.hpp" #include "Teko_DiagonallyScaledPreconditionerFactory.hpp" -#ifdef TEKO_HAVE_EPETRA #include "Teko_DiagonalPreconditionerFactory.hpp" +#ifdef TEKO_HAVE_EPETRA #include "Teko_ProbingPreconditionerFactory.hpp" #endif #include "Teko_IdentityPreconditionerFactory.hpp" @@ -263,10 +263,8 @@ void PreconditionerFactory::initializePrecFactoryBuilder() { clone = rcp(new AutoClone()); precFactoryBuilder_.addClone("Iterative Preconditioner", clone); -#ifdef TEKO_HAVE_EPETRA clone = rcp(new AutoClone()); precFactoryBuilder_.addClone("Explicit Diagonal Preconditioner", clone); -#endif clone = rcp(new AutoClone()); precFactoryBuilder_.addClone("Diagnostic Inverse", clone); From e05307e92b58117755cfc62fff409a17bff6df13 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Tue, 13 Aug 2024 12:01:35 -0600 Subject: [PATCH 28/37] Ifpack2 : typo --- packages/ifpack2/test/belos/build_problem.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/ifpack2/test/belos/build_problem.hpp b/packages/ifpack2/test/belos/build_problem.hpp index 46d51a9417e8..f104a2117512 100644 --- a/packages/ifpack2/test/belos/build_problem.hpp +++ b/packages/ifpack2/test/belos/build_problem.hpp @@ -283,7 +283,7 @@ build_problem (Teuchos::ParameterList& test_params, #else TEUCHOS_TEST_FOR_EXCEPTION( useZoltan2, std::invalid_argument, - "Both Xpetra and Zoltan2 are neeeded to usee Zoltan2."); + "Both Xpetra and Zoltan2 are needed to usee Zoltan2."); #endif } From b92ec436c20110d34e1b329130410cf57e9c9974 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Tue, 13 Aug 2024 12:26:43 -0600 Subject: [PATCH 29/37] Ifpack2 : add Zoltan2 to TEST_OPTIONAL_DEP_PACKAGES --- packages/ifpack2/cmake/Dependencies.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/ifpack2/cmake/Dependencies.cmake b/packages/ifpack2/cmake/Dependencies.cmake index dc5b40abf678..ae074cb404d1 100644 --- a/packages/ifpack2/cmake/Dependencies.cmake +++ b/packages/ifpack2/cmake/Dependencies.cmake @@ -1,7 +1,7 @@ SET(LIB_REQUIRED_DEP_PACKAGES Belos Teuchos Tpetra KokkosKernels) SET(LIB_OPTIONAL_DEP_PACKAGES Xpetra Zoltan2Core ThyraTpetraAdapters Amesos2 ShyLU_NodeBasker ShyLU_NodeHTS ShyLU_NodeFastILU) SET(TEST_REQUIRED_DEP_PACKAGES Belos Galeri) -SET(TEST_OPTIONAL_DEP_PACKAGES Amesos2 ShyLU_NodeHTS ML AztecOO Epetra) +SET(TEST_OPTIONAL_DEP_PACKAGES Amesos2 ShyLU_NodeHTS ML AztecOO Epetra Zoltan2Core) SET(LIB_REQUIRED_DEP_TPLS) SET(LIB_OPTIONAL_DEP_TPLS HYPRE Cholmod Lemon METIS MPI) SET(TEST_REQUIRED_DEP_TPLS) From 849dba7cb60bed0c4e67ec5d57fb4de4ea48b288 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Tue, 13 Aug 2024 12:36:34 -0600 Subject: [PATCH 30/37] ifpack2 : more typo --- packages/ifpack2/test/belos/build_problem.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/ifpack2/test/belos/build_problem.hpp b/packages/ifpack2/test/belos/build_problem.hpp index f104a2117512..ead116362817 100644 --- a/packages/ifpack2/test/belos/build_problem.hpp +++ b/packages/ifpack2/test/belos/build_problem.hpp @@ -283,7 +283,7 @@ build_problem (Teuchos::ParameterList& test_params, #else TEUCHOS_TEST_FOR_EXCEPTION( useZoltan2, std::invalid_argument, - "Both Xpetra and Zoltan2 are needed to usee Zoltan2."); + "Both Xpetra and Zoltan2 are needed to use Zoltan2."); #endif } From cec419e8f89e169980dadcec51beb4fc9efbd788 Mon Sep 17 00:00:00 2001 From: Kim Liegeois Date: Thu, 1 Aug 2024 09:10:20 -0600 Subject: [PATCH 31/37] add an option to use LID --- .../src/Ifpack2_BlockRelaxation_def.hpp | 20 +++++++++++-------- .../src/Ifpack2_BlockTriDiContainer_def.hpp | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp b/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp index 4d27076edb9a..a21900bbf30f 100644 --- a/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp @@ -150,6 +150,7 @@ getValidParameters () const validParams->set("partitioner: print level", false); validParams->set("partitioner: explicit convert to BlockCrs", false); validParams->set("partitioner: checkBlockConsistency", true); + validParams->set("partitioner: use LIDs", true); return validParams; } @@ -612,20 +613,23 @@ initialize () bool use_explicit_conversion = List_.isParameter("partitioner: explicit convert to BlockCrs") && List_.get("partitioner: explicit convert to BlockCrs"); TEUCHOS_TEST_FOR_EXCEPT_MSG (use_explicit_conversion && block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs."); + bool use_LID = !List_.isParameter("partitioner: use LIDs") || List_.get("partitioner: use LIDs"); + bool check_block_consistency = !List_.isParameter("partitioner: checkBlockConsistency") || List_.get("partitioner: checkBlockConsistency"); + + if ( (use_LID || !use_explicit_conversion) && check_block_consistency ) { + if ( !A_->getGraph ()->getImporter().is_null()) { + TEUCHOS_TEST_FOR_EXCEPT_MSG + (!Tpetra::Import_Util::checkBlockConsistency(*(A_->getGraph ()->getColMap()), block_size), + "The pointwise graph of the input matrix A pointwise is not consistent with block_size."); + } + } if(use_explicit_conversion) { - A_bcrs = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast(A_), block_size, false); + A_bcrs = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast(A_), block_size, use_LID); A_ = A_bcrs; hasBlockCrsMatrix_ = true; graph = A_->getGraph (); } else { - if ( !List_.isParameter("partitioner: checkBlockConsistency") || List_.get("partitioner: checkBlockConsistency")) { - if ( !A_->getGraph ()->getImporter().is_null()) { - TEUCHOS_TEST_FOR_EXCEPT_MSG - (!Tpetra::Import_Util::checkBlockConsistency(*(A_->getGraph ()->getColMap()), block_size), - "The pointwise graph of the input matrix A pointwise is not consistent with block_size."); - } - } graph = Tpetra::getBlockCrsGraph(*Teuchos::rcp_dynamic_cast(A_), block_size, true); } IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE(); diff --git a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp index c98b5e564d81..0b51690c08a1 100644 --- a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp @@ -74,7 +74,7 @@ namespace Ifpack2 { (block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs."); { IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA::convertToBlockCrsMatrix"); - impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast(matrix), block_size, false); + impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast(matrix), block_size, true); IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType::execution_space) } } From 5280ceb4d033aab991d225e21797a6597d5b0e08 Mon Sep 17 00:00:00 2001 From: Kim Liegeois Date: Wed, 7 Aug 2024 06:26:40 -0600 Subject: [PATCH 32/37] Reduces loop_over_local_elements timer by adding a test that runs on device and that is most likely true --- .../src/Ifpack2_BlockTriDiContainer_impl.hpp | 83 ++++++++++++++----- 1 file changed, 61 insertions(+), 22 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp index 5842854402be..8029d9034556 100644 --- a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp @@ -854,6 +854,28 @@ namespace Ifpack2 { impl_scalar_type_2d_view_tpetra getRemoteMultiVectorLocalView() const { return remote_multivector; } }; + template + struct are_same_struct { + ViewType1 keys1; + ViewType2 keys2; + + are_same_struct(ViewType1 keys1_, ViewType2 keys2_) : keys1(keys1_), keys2(keys2_) {} + KOKKOS_INLINE_FUNCTION + void operator()(int i, unsigned int& count) const { + if (keys1(i) != keys2(i)) count++; + } + }; + + template + bool are_same (ViewType1 keys1, ViewType2 keys2) { + unsigned int are_same_ = 0; + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, keys1.extent(0)), + are_same_struct(keys1, keys2), + are_same_); + return are_same_==0; + } + /// /// setup async importer /// @@ -882,30 +904,45 @@ namespace Ifpack2 { const auto column_map = g.getColMap(); std::vector gids; + Kokkos::View column_map_global_iD_last; bool separate_remotes = true, found_first = false, need_owned_permutation = false; { IFPACK2_BLOCKHELPER_TIMER("createBlockCrsAsyncImporter::loop_over_local_elements"); - // This loop is relatively expensive - for (size_t i=0;igetLocalNumElements();++i) { - const global_ordinal_type gid = column_map->getGlobalElement(i); - if (!domain_map->isNodeGlobalElement(gid)) { - found_first = true; - gids.push_back(gid); - } else if (found_first) { - separate_remotes = false; - break; - } - if (!need_owned_permutation && - domain_map->getLocalElement(gid) != static_cast(i)) { - // The owned part of the domain and column maps are different - // orderings. We *could* do a super efficient impl of this case in the - // num_sweeps > 1 case by adding complexity to PermuteAndRepack. But, - // really, if a caller cares about speed, they wouldn't make different - // local permutations like this. So we punt on the best impl and go for - // a pretty good one: the permutation is done in place in - // compute_b_minus_Rx for the pure-owned part of the MVP. The only cost - // is the presumably worse memory access pattern of the input vector. - need_owned_permutation = true; + + auto column_map_global_iD = column_map->getMyGlobalIndicesDevice(); + auto domain_map_global_iD = domain_map->getMyGlobalIndicesDevice(); + + if(are_same(domain_map_global_iD, column_map_global_iD)) { + // this should be the most likely path + separate_remotes = true; + need_owned_permutation = false; + + column_map_global_iD_last = Kokkos::subview(column_map_global_iD, + Kokkos::pair(domain_map_global_iD.extent(0), column_map_global_iD.extent(0))); + } + else { + // This loop is relatively expensive + for (size_t i=0;igetLocalNumElements();++i) { + const global_ordinal_type gid = column_map->getGlobalElement(i); + if (!domain_map->isNodeGlobalElement(gid)) { + found_first = true; + gids.push_back(gid); + } else if (found_first) { + separate_remotes = false; + break; + } + if (!found_first && !need_owned_permutation && + domain_map->getLocalElement(gid) != static_cast(i)) { + // The owned part of the domain and column maps are different + // orderings. We *could* do a super efficient impl of this case in the + // num_sweeps > 1 case by adding complexity to PermuteAndRepack. But, + // really, if a caller cares about speed, they wouldn't make different + // local permutations like this. So we punt on the best impl and go for + // a pretty good one: the permutation is done in place in + // compute_b_minus_Rx for the pure-owned part of the MVP. The only cost + // is the presumably worse memory access pattern of the input vector. + need_owned_permutation = true; + } } } IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType::execution_space) @@ -915,7 +952,9 @@ namespace Ifpack2 { IFPACK2_BLOCKHELPER_TIMER("createBlockCrsAsyncImporter::separate_remotes"); const auto invalid = Teuchos::OrdinalTraits::invalid(); const auto parsimonious_col_map - = Teuchos::rcp(new tpetra_map_type(invalid, gids.data(), gids.size(), 0, domain_map->getComm())); + = need_owned_permutation ? + Teuchos::rcp(new tpetra_map_type(invalid, gids.data(), gids.size(), 0, domain_map->getComm())): + Teuchos::rcp(new tpetra_map_type(invalid, column_map_global_iD_last, 0, domain_map->getComm())); if (parsimonious_col_map->getGlobalNumElements() > 0) { // make the importer only if needed. local_ordinal_type_1d_view dm2cm; From 9aef81c153fd200028e374052b3b81cc6d22f27b Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 13 Aug 2024 13:51:03 -0600 Subject: [PATCH 33/37] MueLu: Fix CoalesceDropFactory_kokkos unit test w/ Cuda UVM Signed-off-by: Christian Glusa --- .../muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp b/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp index ad40c6f8e821..c856c755ec50 100644 --- a/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp +++ b/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp @@ -973,7 +973,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, 2x2, Scalar, Local if (useKokkos) { auto graph = fineLevel.Get>("Graph", dropFact.get()); auto boundaryNodes_d = graph->GetBoundaryNodeMap(); - boundaryNodes = Kokkos::create_mirror_view(boundaryNodes_d); + boundaryNodes = Kokkos::View("boundaryNodes_host", boundaryNodes_d.extent(0)); Kokkos::deep_copy(boundaryNodes, boundaryNodes_d); } else { auto graph = fineLevel.Get>("Graph", dropFact.get()); From e8d359b0d05604cd2fe1f3198fbbbdb9403045cd Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Tue, 13 Aug 2024 16:04:17 -0600 Subject: [PATCH 34/37] Panzer: add point evaluator Recovers the old point evaluator, gets it running on cuda and adds unit tests. --- .../example/main_driver/CMakeLists.txt | 9 + .../main_driver/energy-ss-point-calc.xml | 297 ++++++++++++++++++ .../example/main_driver/main_driver.cpp | 165 +++++++--- .../Panzer_ResponseScatterEvaluator_Probe.hpp | 10 +- ...er_ResponseScatterEvaluator_Probe_impl.hpp | 166 +++++----- 5 files changed, 527 insertions(+), 120 deletions(-) create mode 100644 packages/panzer/adapters-stk/example/main_driver/energy-ss-point-calc.xml diff --git a/packages/panzer/adapters-stk/example/main_driver/CMakeLists.txt b/packages/panzer/adapters-stk/example/main_driver/CMakeLists.txt index d2da3b73d4dc..0ed82da7ca7d 100644 --- a/packages/panzer/adapters-stk/example/main_driver/CMakeLists.txt +++ b/packages/panzer/adapters-stk/example/main_driver/CMakeLists.txt @@ -20,6 +20,7 @@ TRIBITS_COPY_FILES_TO_BINARY_DIR(main_driver_files SOURCE_FILES energy-ss.xml energy-ss-tp.xml + energy-ss-point-calc.xml energy-ss-tp-delay-prec.xml energy-ss-loca-eigenvalue.xml energy-ss-blocked.xml @@ -56,6 +57,14 @@ TRIBITS_ADD_ADVANCED_TEST( PASS_REGULAR_EXPRESSION "panzer::MainDriver run completed." ) + TRIBITS_ADD_ADVANCED_TEST( + main_driver_energy-ss-point-calc + TEST_0 EXEC main_driver + ARGS --i=energy-ss-point-calc.xml --exodus-io-num-procs=1 --point-calc + PASS_REGULAR_EXPRESSION "panzer::MainDriver run completed." + NUM_MPI_PROCS 4 + ) + TRIBITS_ADD_ADVANCED_TEST( main_driver_energy-ss-tp-delay-prec TEST_0 EXEC main_driver diff --git a/packages/panzer/adapters-stk/example/main_driver/energy-ss-point-calc.xml b/packages/panzer/adapters-stk/example/main_driver/energy-ss-point-calc.xml new file mode 100644 index 000000000000..c7b2807e1321 --- /dev/null +++ b/packages/panzer/adapters-stk/example/main_driver/energy-ss-point-calc.xml @@ -0,0 +1,297 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/packages/panzer/adapters-stk/example/main_driver/main_driver.cpp b/packages/panzer/adapters-stk/example/main_driver/main_driver.cpp index 66b8a3ad7957..3dbcbcb1daa7 100644 --- a/packages/panzer/adapters-stk/example/main_driver/main_driver.cpp +++ b/packages/panzer/adapters-stk/example/main_driver/main_driver.cpp @@ -44,6 +44,8 @@ #endif #include "user_app_ResponseEvaluatorFactory_HOFlux.hpp" +#include "Panzer_ResponseEvaluatorFactory_Probe.hpp" + #include #include @@ -72,30 +74,32 @@ int main(int argc, char *argv[]) try { const auto stackedTimer = Teuchos::rcp(new Teuchos::StackedTimer("Panzer Main Driver")); Teuchos::TimeMonitor::setStackedTimer(stackedTimer); - + Teuchos::RCP > comm = Teuchos::DefaultComm::getComm(); - + // Parse the command line arguments std::string input_file_name = "user_app.xml"; int exodus_io_num_procs = 0; bool pauseToAttachOn = false; bool fluxCalculation = false; + bool pointCalculation = false; bool printTimers = false; bool printInputPL = false; { Teuchos::CommandLineProcessor clp; - + clp.setOption("i", &input_file_name, "User_App input xml filename"); clp.setOption("exodus-io-num-procs", &exodus_io_num_procs, "Number of processes that can access the file system at the same time to read their portion of a sliced exodus file in parallel. Defaults to 0 - implies all processes for the run can access the file system at the same time."); clp.setOption("pause-to-attach","disable-pause-to-attach", &pauseToAttachOn, "Call pause to attach, default is off."); clp.setOption("flux-calc","disable-flux-calc", &fluxCalculation, "Enable the flux calculation."); + clp.setOption("point-calc","disable-point-calc", &pointCalculation, "Enable the probe evaluator unit test."); clp.setOption("time","no-time", &printTimers, "Print the timing information."); clp.setOption("pl","no-pl", &printTimers, "Print the input ParameterList at the start of the run."); - - Teuchos::CommandLineProcessor::EParseCommandLineReturn parse_return = + + Teuchos::CommandLineProcessor::EParseCommandLineReturn parse_return = clp.parse(argc,argv,&std::cerr); - - TEUCHOS_TEST_FOR_EXCEPTION(parse_return != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL, + + TEUCHOS_TEST_FOR_EXCEPTION(parse_return != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL, std::runtime_error, "Failed to parse command line!"); } @@ -109,23 +113,23 @@ int main(int argc, char *argv[]) // Parse the input file and broadcast to other processes Teuchos::RCP input_params = Teuchos::rcp(new Teuchos::ParameterList("User_App Parameters")); Teuchos::updateParametersFromXmlFileAndBroadcast(input_file_name, input_params.ptr(), *comm); - + if (printInputPL) *out << *input_params << std::endl; Teuchos::ParameterList solver_factories = input_params->sublist("Solver Factories"); input_params->remove("Solver Factories"); - + // Add in the application specific equation set factory Teuchos::RCP eqset_factory = Teuchos::rcp(new user_app::MyFactory); // Add in the application specific closure model factory user_app::MyModelFactory_TemplateBuilder cm_builder; - panzer::ClosureModelFactory_TemplateManager cm_factory; + panzer::ClosureModelFactory_TemplateManager cm_factory; cm_factory.buildObjects(cm_builder); // Add in the application specific bc factory - user_app::BCFactory bc_factory; + user_app::BCFactory bc_factory; // Create the global data Teuchos::RCP global_data = panzer::createGlobalData(); @@ -152,13 +156,12 @@ int main(int argc, char *argv[]) me_factory.setParameterList(input_params); me_factory.buildObjects(comm,global_data,eqset_factory,bc_factory,cm_factory); - // add a volume response functional for each field + // add a volume response functional for each field for(Teuchos::ParameterList::ConstIterator itr=responses.begin();itr!=responses.end();++itr) { const std::string name = responses.name(itr); TEUCHOS_ASSERT(responses.entry(itr).isList()); Teuchos::ParameterList & lst = Teuchos::getValue(responses.entry(itr)); - // parameterize the builder panzer::FunctionalResponse_Builder builder; builder.comm = MPI_COMM_WORLD; // good enough @@ -166,18 +169,18 @@ int main(int argc, char *argv[]) builder.requiresCellIntegral = lst.isType("Requires Cell Integral") ? lst.get("Requires Cell Integral"): false; builder.quadPointField = lst.get("Field Name"); - // add the respone + // add the response std::vector eblocks; panzer::StringTokenizer(eblocks,lst.get("Element Blocks"),",",true); - + std::vector wkst_descs; - for(std::size_t i=0;i nof; { nof = Teuchos::rcp(new user_app::NOXObserverFactory(stkIOResponseLibrary)); - - Teuchos::RCP observers_to_build = + + Teuchos::RCP observers_to_build = Teuchos::parameterList(solver_factories.sublist("NOX Observers")); - + nof->setParameterList(observers_to_build); } @@ -208,9 +211,9 @@ int main(int argc, char *argv[]) #else solver = me_factory.buildResponseOnlyModelEvaluator(physics,global_data,nof.ptr()); #endif - } + } } - + // setup outputs to mesh on the stkIOResponseLibrary //////////////////////////////////////////////////////////////// @@ -222,7 +225,7 @@ int main(int argc, char *argv[]) { Teuchos::ParameterList user_data(input_params->sublist("User Data")); user_data.set("Workset Size",input_params->sublist("Assembly").get("Workset Size")); - + stkIOResponseLibrary->buildResponseEvaluators(physicsBlocks, cm_factory, input_params->sublist("Closure Models"), @@ -232,35 +235,35 @@ int main(int argc, char *argv[]) // setup outputs to mesh on the fluxResponseLibrary //////////////////////////////////////////////////////////////// - Teuchos::RCP > fluxResponseLibrary + Teuchos::RCP > fluxResponseLibrary = Teuchos::rcp(new panzer::ResponseLibrary); if(fluxCalculation) { fluxResponseLibrary->initialize(*rLibrary); - + // build high-order flux response { user_app::HOFluxResponse_Builder builder; builder.comm = MPI_COMM_WORLD; builder.cubatureDegree = 2; - + std::vector sidesets; sidesets.push_back(panzer::sidesetVolumeDescriptor("eblock-0_0","left")); - + fluxResponseLibrary->addResponse("HO-Flux",sidesets,builder); } - + { Teuchos::ParameterList user_data(input_params->sublist("User Data")); user_data.set("Workset Size",input_params->sublist("Assembly").get("Workset Size")); - + fluxResponseLibrary->buildResponseEvaluators(physicsBlocks, *eqset_factory, cm_factory, input_params->sublist("Closure Models"), user_data); } - + { Teuchos::RCP > resp = Teuchos::rcp_dynamic_cast >(fluxResponseLibrary->getResponse("HO-Flux"),true); @@ -269,12 +272,55 @@ int main(int argc, char *argv[]) resp->setVector(vec); } } - + + // setup outputs for the point calculation + //////////////////////////////////////////////////////////////// + + Teuchos::RCP > pointResponseLibrary + = Teuchos::rcp(new panzer::ResponseLibrary); + + if(pointCalculation) { + pointResponseLibrary->initialize(*rLibrary); + + { + panzer::ProbeResponse_Builder builder; + builder.comm = MPI_COMM_WORLD; + builder.point = Teuchos::Array{0.5,0.5}; // Bottom + builder.cubatureDegree = 2; + builder.fieldName = "TEMPERATURE"; + builder.applyDirichletToDerivative = false; + + std::vector descriptors; + descriptors.push_back(panzer::WorksetDescriptor("eblock-0_0")); + + pointResponseLibrary->addResponse("Value In Middle",descriptors,builder); + } + + { + Teuchos::ParameterList user_data(input_params->sublist("User Data")); + user_data.set("Workset Size",input_params->sublist("Assembly").get("Workset Size")); + + pointResponseLibrary->buildResponseEvaluators(physicsBlocks, + *eqset_factory, + cm_factory, + input_params->sublist("Closure Models"), + user_data); + } + + { + Teuchos::RCP > resp + = Teuchos::rcp_dynamic_cast >(pointResponseLibrary->getResponse("Value In Middle"),true); + + const auto vec = Thyra::createMember(*resp->getVectorSpace(),"Value In Middle Response Thyra Vector"); + resp->setVector(vec); + } + } + //////////////////////////////////////////////////////////////// - + // solve the system { - + // Set inputs Thyra::ModelEvaluatorBase::InArgs inArgs = solver->createInArgs(); const Thyra::ModelEvaluatorBase::InArgs inArgsNominal = solver->getNominalValues(); @@ -282,7 +328,7 @@ int main(int argc, char *argv[]) // Set outputs Thyra::ModelEvaluatorBase::OutArgs outArgs = solver->createOutArgs(); - // Solution vector is returned as extra respons vector + // Solution vector is returned as extra response vector Teuchos::RCP > gx = Thyra::createMember(*physics->get_x_space()); for(int i=0;i respOutArgs = physics->createOutArgs(); TEUCHOS_ASSERT(physics->Ng()==respOutArgs.Ng()); - + respInArgs.set_x(gx); - + // set up response out args for(int i=0;i > response = Thyra::createMember(*physics->get_g_space(i)); respOutArgs.set_g(i,response); } - + // Now, solve the problem and return the responses physics->evalModel(respInArgs, respOutArgs); - + // loop over out args for printing for(int i=0;i > response = respOutArgs.get_g(i); @@ -322,6 +368,7 @@ int main(int argc, char *argv[]) } if(fluxCalculation) { + stackedTimer->start("Flux Response Calculation"); // initialize the assembly container panzer::AssemblyEngineInArgs ae_inargs; ae_inargs.container_ = linObjFactory->buildLinearObjContainer(); @@ -351,6 +398,46 @@ int main(int argc, char *argv[]) *out << " " << currentRespName << " = " << resp->value << std::endl; } + stackedTimer->stop("Flux Response Calculation"); + } + + if(pointCalculation) { + stackedTimer->start("Point Value Response Calculation"); + // initialize the assembly container + panzer::AssemblyEngineInArgs ae_inargs; + ae_inargs.container_ = linObjFactory->buildLinearObjContainer(); + ae_inargs.ghostedContainer_ = linObjFactory->buildGhostedLinearObjContainer(); + ae_inargs.alpha = 0.0; + ae_inargs.beta = 1.0; + ae_inargs.evaluate_transient_terms = false; + + // initialize the ghosted container + linObjFactory->initializeGhostedContainer(panzer::LinearObjContainer::X,*ae_inargs.ghostedContainer_); + + const Teuchos::RCP> thGlobalContainer + = Teuchos::rcp_dynamic_cast>(ae_inargs.container_,true); + thGlobalContainer->set_x_th(gx); + + // evaluate current on contacts + pointResponseLibrary->addResponsesToInArgs(ae_inargs); + pointResponseLibrary->evaluate(ae_inargs); + + // output current values + *out << "\nPoint Values: \n"; + { + std::string currentRespName = "Value In Middle"; + + Teuchos::RCP > resp + = Teuchos::rcp_dynamic_cast >(pointResponseLibrary->getResponse(currentRespName),true); + + // Linear problem with analytic solution + const double gold_value = 0.5; + const double tol = 1.0e-8; + *out << " " << currentRespName << " = " << resp->value << ", error = " << fabs(resp->value - gold_value) << ", tol = " << tol << std::endl; + TEUCHOS_ASSERT(fabs(resp->value - gold_value) < tol); + } + + stackedTimer->stop("Point Value Response Calculation"); } } @@ -383,7 +470,7 @@ int main(int argc, char *argv[]) *out << "************ Caught Exception: End Error Report ************" << std::endl; status = -1; } - + // Teuchos::TimeMonitor::summarize(*out,false,true,false); #ifdef Panzer_BUILD_PAPI_SUPPORT diff --git a/packages/panzer/disc-fe/src/responses/Panzer_ResponseScatterEvaluator_Probe.hpp b/packages/panzer/disc-fe/src/responses/Panzer_ResponseScatterEvaluator_Probe.hpp index 9fad9bfb3444..acd020e9a31a 100644 --- a/packages/panzer/disc-fe/src/responses/Panzer_ResponseScatterEvaluator_Probe.hpp +++ b/packages/panzer/disc-fe/src/responses/Panzer_ResponseScatterEvaluator_Probe.hpp @@ -80,8 +80,14 @@ class ResponseScatterEvaluator_ProbeBase : void evaluateFields(typename Traits::EvalData d); + void postRegistrationSetup(typename Traits::SetupData, + PHX::FieldManager&); + void preEvaluate(typename Traits::PreEvalData d); + // Should be protected, but is public for cuda lambda support + bool findCellAndComputeBasisValues(typename Traits::EvalData d); + protected: typedef typename EvalT::ScalarT ScalarT; @@ -98,11 +104,11 @@ class ResponseScatterEvaluator_ProbeBase : PHX::MDField field_; // holds field values Teuchos::RCP scatterObj_; + bool haveProbe_; int cellIndex_; + size_t workset_id_; size_t num_basis, num_dim; Kokkos::DynRankView basis_values_; - - bool computeBasisValues(typename Traits::EvalData d); }; /** This class handles calculation of a DOF at a single point in space diff --git a/packages/panzer/disc-fe/src/responses/Panzer_ResponseScatterEvaluator_Probe_impl.hpp b/packages/panzer/disc-fe/src/responses/Panzer_ResponseScatterEvaluator_Probe_impl.hpp index 68e957fd2444..aaed5dc08fa2 100644 --- a/packages/panzer/disc-fe/src/responses/Panzer_ResponseScatterEvaluator_Probe_impl.hpp +++ b/packages/panzer/disc-fe/src/responses/Panzer_ResponseScatterEvaluator_Probe_impl.hpp @@ -55,7 +55,9 @@ ResponseScatterEvaluator_ProbeBase( , topology_(ir.topology) , globalIndexer_(indexer) , scatterObj_(probeScatter) - , cellIndex_(0) + , haveProbe_(false) + , cellIndex_(-1) + , workset_id_(0) { using Teuchos::RCP; using Teuchos::rcp; @@ -82,6 +84,18 @@ ResponseScatterEvaluator_ProbeBase( this->setName(n); } +template +void ResponseScatterEvaluator_ProbeBase:: +postRegistrationSetup(typename Traits::SetupData sd, + PHX::FieldManager& ) +{ + for (const auto& workset : *sd.worksets_) { + this->findCellAndComputeBasisValues(workset); + if (haveProbe_) + break; + } +} + template void ResponseScatterEvaluator_ProbeBase:: preEvaluate(typename Traits::PreEvalData d) @@ -93,69 +107,72 @@ preEvaluate(typename Traits::PreEvalData d) true); } - template bool ResponseScatterEvaluator_ProbeBase:: -computeBasisValues(typename Traits::EvalData d) +findCellAndComputeBasisValues(typename Traits::EvalData d) { - typedef Intrepid2::CellTools CTD; - typedef Intrepid2::FunctionSpaceTools FST; - - const int num_points = 1; // Always a single point in this evaluator! - Kokkos::DynRankView inCell("inCell", this->wda(d).cell_node_coordinates.extent_int(0), num_points); - Kokkos::DynRankView physical_points_cell("physical_points_cell", this->wda(d).cell_node_coordinates.extent_int(0), num_points, num_dim); - for (panzer::index_t cell(0); cell < d.num_cells; ++cell) - for (size_t dim=0; dimwda(d).cell_node_coordinates.get_view(), - *topology_, - tol); + // This evaluator needs to run on host until checkPointwiseInclusion + // is moved to device. + using HostSpace = Kokkos::DefaultHostExecutionSpace; + using CTD = Intrepid2::CellTools; + using FST = Intrepid2::FunctionSpaceTools; // Find which cell contains our point - cellIndex_ = -1; - bool haveProbe = false; - for (index_t cell=0; cell(d.num_cells); ++cell) { - // CTD::checkPointwiseInclusion(inCell, - // physical_points_cell, - // this->wda(d).cell_vertex_coordinates, - // *topology_, - // cell); + const int num_points = 1; + Kokkos::DynRankView inCell("inCell", this->wda(d).cell_node_coordinates.extent_int(0), num_points); + Kokkos::DynRankView physical_points_cell("physical_points_cell", this->wda(d).cell_node_coordinates.extent_int(0), num_points, num_dim); + auto tmp_point = point_; + { + Kokkos::MDRangePolicy> policy({0,0},{d.num_cells,static_cast(num_dim)}); + Kokkos::parallel_for("copy node coords",policy,[&](const int cell, const int dim){ + physical_points_cell(cell,0,dim) = tmp_point[dim]; + }); + HostSpace().fence(); + + auto cell_coords = this->wda(d).cell_node_coordinates.get_view(); + auto cell_coords_host = Kokkos::create_mirror_view(cell_coords); + Kokkos::deep_copy(cell_coords_host, cell_coords); + + const double tol = 1.0e-12; + CTD::checkPointwiseInclusion(inCell, + physical_points_cell, + cell_coords_host, + *topology_, + tol); + } + for (index_t cell=0; cell(d.num_cells); ++cell) { if (inCell(cell,0) == 1) { cellIndex_ = cell; - haveProbe = true; + workset_id_ = d.getIdentifier(); + haveProbe_ = true; break; } } // If no cell does, we're done - if (!haveProbe) { + if (!haveProbe_) { return false; } // Map point to reference frame const size_t num_nodes = this->wda(d).cell_node_coordinates.extent(1); - Kokkos::DynRankView cell_coords( - "cell_coords", 1, num_nodes, num_dim); // Cell, Basis, Dim + Kokkos::DynRankView cell_coords("cell_coords", 1, int(num_nodes), int(num_dim)); // + auto cnc_host = Kokkos::create_mirror_view(this->wda(d).cell_node_coordinates.get_view()); + Kokkos::deep_copy(cnc_host,this->wda(d).cell_node_coordinates.get_view()); for (size_t i=0; iwda(d).cell_node_coordinates(cellIndex_,i,j); + cell_coords(0,i,j) = cnc_host(cellIndex_,i,j); } } - Kokkos::DynRankView physical_points( - "physical_points", 1, 1, num_dim); // Cell, Point, Dim + Kokkos::DynRankView physical_points("physical_points", 1, 1, num_dim); // for (size_t i=0; i reference_points( - "reference_points", 1, 1, num_dim); // Cell, Point, Dim - CTD::mapToReferenceFrame(reference_points, physical_points, cell_coords, - *topology_); - Kokkos::DynRankView reference_points_cell( - "reference_points_cell", 1, num_dim); // Point, Dim + + Kokkos::DynRankView reference_points("reference_points", 1, 1, num_dim); // + CTD::mapToReferenceFrame(reference_points, physical_points, cell_coords, *topology_); + + Kokkos::DynRankView reference_points_cell("reference_points_cell", 1, num_dim); // for (size_t i=0; igetElementSpace() == PureBasis::HGRAD) { // Evaluate basis at reference values - Kokkos::DynRankView - ref_basis_values("ref_basis_values", num_basis, 1); // Basis, Point - basis_->getIntrepid2Basis()->getValues(ref_basis_values, - reference_points_cell, - Intrepid2::OPERATOR_VALUE); + Kokkos::DynRankView ref_basis_values("ref_basis_values", num_basis, 1); // + basis_->getIntrepid2Basis()->getValues(ref_basis_values, + reference_points_cell, + Intrepid2::OPERATOR_VALUE); // Apply transformation to physical frame - FST::HGRADtransformVALUE(basis_values_, ref_basis_values); - + auto basis_values_host = Kokkos::create_mirror_view(basis_values_); + FST::HGRADtransformVALUE(basis_values_host, ref_basis_values); + Kokkos::deep_copy(basis_values_,basis_values_host); } else if (basis_->getElementSpace() == PureBasis::HCURL || basis_->getElementSpace() == PureBasis::HDIV) { // Evaluate basis at reference values - Kokkos::DynRankView ref_basis_values( - "ref_basis_values", num_basis, 1, num_dim); // Basis, Point, Dim - basis_->getIntrepid2Basis()->getValues(ref_basis_values, - reference_points_cell, - Intrepid2::OPERATOR_VALUE); + Kokkos::DynRankView ref_basis_values("ref_basis_values", num_basis, 1, num_dim); // + basis_->getIntrepid2Basis()->getValues(ref_basis_values, + reference_points_cell, + Intrepid2::OPERATOR_VALUE); // Apply transformation to physical frame - Kokkos::DynRankView jac - ("jac", 1, 1, num_dim, num_dim); // Cell, Point, Dim, Dim + Kokkos::DynRankView jac("jac", 1, 1, num_dim, num_dim); // CTD::setJacobian(jac, reference_points, cell_coords, *topology_); - Kokkos::DynRankView basis_values_vec( - "basis_values_vec", 1, num_basis, 1, num_dim); // Cell, Basis, Point, Dim + Kokkos::DynRankView basis_values_vec("basis_values_vec", 1, num_basis, 1, num_dim); // if (basis_->getElementSpace() == PureBasis::HCURL) { - Kokkos::DynRankView jac_inv( - "jac_inv", 1, 1, num_dim, num_dim); // Cell, Point, Dim, Dim + Kokkos::DynRankView jac_inv("jac_inv", 1, 1, num_dim, num_dim); // CTD::setJacobianInv(jac_inv, jac); FST::HCURLtransformVALUE(basis_values_vec, jac_inv, ref_basis_values); } else { - Kokkos::DynRankView jac_det( - "jac_det", 1, 1); // Cell Point + Kokkos::DynRankView jac_det("jac_det", 1, 1); // CTD::setJacobianDet(jac_det, jac); FST::HDIVtransformVALUE(basis_values_vec, jac, jac_det, ref_basis_values); @@ -210,14 +222,12 @@ computeBasisValues(typename Traits::EvalData d) globalIndexer_->getElementOrientation(cellIndex_, orientation); std::string blockId = this->wda(d).block_id; int fieldNum = globalIndexer_->getFieldNum(fieldName_); - const std::vector & elmtOffset = - globalIndexer_->getGIDFieldOffsets(blockId,fieldNum); + const std::vector & elmtOffset = globalIndexer_->getGIDFieldOffsets(blockId,fieldNum); // Extract component of basis for (size_t i=0; i void ResponseScatterEvaluator_ProbeBase:: evaluateFields(typename Traits::EvalData d) { - // Compute basis values at point - const bool haveProbe = computeBasisValues(d); + using HostSpace = Kokkos::DefaultHostExecutionSpace; - if (!haveProbe) + if ( !haveProbe_ || + (haveProbe_ && d.getIdentifier() != workset_id_) ) return; - // Get field coefficients for cell - Kokkos::DynRankView::type,PHX::Device> field_coeffs = - Kokkos::createDynRankView(field_.get_static_view(), "field_val", - 1, num_basis); // Cell, Basis - for (size_t i=0; i::type,PHX::Device> field_val = - Kokkos::createDynRankView(field_coeffs, "field_val", 1, 1); // Cell, Point - Intrepid2::FunctionSpaceTools::evaluate( - field_val, field_coeffs, basis_values_); + auto field_coeffs_host = Kokkos::create_mirror_view(field_.get_view()); + Kokkos::deep_copy(field_coeffs_host,field_.get_view()); + + auto field_coeffs_host_subview = Kokkos::subview(field_coeffs_host,std::pair(cellIndex_,cellIndex_+1),Kokkos::ALL); + + auto field_val = Kokkos::createDynRankViewWithType>(field_coeffs_host, "field_val_at_point", 1, 1); // + + auto basis_values_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),basis_values_); + + Intrepid2::FunctionSpaceTools::evaluate(field_val, field_coeffs_host_subview, basis_values_host); responseObj_->value = field_val(0,0); responseObj_->have_probe = true; } From 169ed1b64f74ff263e911a42f2164f8d71ed0a3f Mon Sep 17 00:00:00 2001 From: Kim Liegeois Date: Wed, 14 Aug 2024 09:26:02 -0600 Subject: [PATCH 35/37] Use global_indices_array_device_type to deduce the type of the subview --- .../ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp index 8029d9034556..1394549df0d7 100644 --- a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp @@ -890,6 +890,7 @@ namespace Ifpack2 { using local_ordinal_type_1d_view = typename impl_type::local_ordinal_type_1d_view; using crs_matrix_type = typename impl_type::tpetra_crs_matrix_type; using block_crs_matrix_type = typename impl_type::tpetra_block_crs_matrix_type; + using global_indices_array_device_type = Kokkos::View; auto A_crs = Teuchos::rcp_dynamic_cast(A); auto A_bcrs = Teuchos::rcp_dynamic_cast(A); @@ -904,13 +905,15 @@ namespace Ifpack2 { const auto column_map = g.getColMap(); std::vector gids; - Kokkos::View column_map_global_iD_last; + + Kokkos::Subview> column_map_global_iD_last; + bool separate_remotes = true, found_first = false, need_owned_permutation = false; { IFPACK2_BLOCKHELPER_TIMER("createBlockCrsAsyncImporter::loop_over_local_elements"); - auto column_map_global_iD = column_map->getMyGlobalIndicesDevice(); - auto domain_map_global_iD = domain_map->getMyGlobalIndicesDevice(); + global_indices_array_device_type column_map_global_iD = column_map->getMyGlobalIndicesDevice(); + global_indices_array_device_type domain_map_global_iD = domain_map->getMyGlobalIndicesDevice(); if(are_same(domain_map_global_iD, column_map_global_iD)) { // this should be the most likely path @@ -918,7 +921,7 @@ namespace Ifpack2 { need_owned_permutation = false; column_map_global_iD_last = Kokkos::subview(column_map_global_iD, - Kokkos::pair(domain_map_global_iD.extent(0), column_map_global_iD.extent(0))); + std::pair(domain_map_global_iD.extent(0), column_map_global_iD.extent(0))); } else { // This loop is relatively expensive From 34eb6323c54350abee13c567089eca7604d24ad6 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Wed, 14 Aug 2024 11:09:00 -0600 Subject: [PATCH 36/37] Teko: Silence warning Signed-off-by: Christian Glusa --- packages/teko/src/Teko_DiagonalPreconditionerFactory.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/teko/src/Teko_DiagonalPreconditionerFactory.cpp b/packages/teko/src/Teko_DiagonalPreconditionerFactory.cpp index c52bf7ac4740..55209ecd32eb 100644 --- a/packages/teko/src/Teko_DiagonalPreconditionerFactory.cpp +++ b/packages/teko/src/Teko_DiagonalPreconditionerFactory.cpp @@ -39,12 +39,12 @@ RCP DiagonalPreconditionerFactory::buildPreconditionerState LinearOp DiagonalPreconditionerFactory::buildPreconditionerOperator( LinearOp& lo, PreconditionerState& state) const { if (diagonalType_ == BlkDiag) { - // Sanity check the state - DiagonalPrecondState& MyState = Teuchos::dyn_cast(state); - TEUCHOS_TEST_FOR_EXCEPTION(TpetraHelpers::isTpetraLinearOp(lo), std::runtime_error, "BlkDiag not implemented for Tpetra operators"); #ifdef TEKO_HAVE_EPETRA + // Sanity check the state + DiagonalPrecondState& MyState = Teuchos::dyn_cast(state); + // Get the underlying Epetra_CrsMatrix, if we have one Teuchos::RCP eo = Thyra::get_Epetra_Operator(*lo); TEUCHOS_ASSERT(eo != Teuchos::null); From 6b4104df4e6fd55d36f7e26b5f46e806881e0753 Mon Sep 17 00:00:00 2001 From: Kim Liegeois Date: Wed, 14 Aug 2024 15:56:37 -0600 Subject: [PATCH 37/37] Use TEUCHOS_FUNC_TIME_MONITOR_DIFF to avoid -Werror=shadow when using nested timers --- .../Ifpack2_BlockComputeResidualVector.hpp | 6 +- packages/ifpack2/src/Ifpack2_BlockHelper.hpp | 6 +- .../src/Ifpack2_BlockHelper_Timers.hpp | 8 +-- .../src/Ifpack2_BlockRelaxation_def.hpp | 16 +++--- .../src/Ifpack2_BlockTriDiContainer_def.hpp | 38 ++++++------- .../src/Ifpack2_BlockTriDiContainer_impl.hpp | 56 +++++++++---------- .../src/Tpetra_BlockCrsMatrix_Helpers_def.hpp | 14 ++--- 7 files changed, 72 insertions(+), 72 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_BlockComputeResidualVector.hpp b/packages/ifpack2/src/Ifpack2_BlockComputeResidualVector.hpp index eff2119a1075..7fa19c1c1c3b 100644 --- a/packages/ifpack2/src/Ifpack2_BlockComputeResidualVector.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockComputeResidualVector.hpp @@ -790,7 +790,7 @@ namespace Ifpack2 { const MultiVectorLocalViewTypeB &b_, const MultiVectorLocalViewTypeX &x_) { IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN; - IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDi::ComputeResidual::", execution_space); + IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDi::ComputeResidual::", ComputeResidual0, execution_space); y = y_; b = b_; x = x_; if constexpr (is_device::value) { @@ -818,7 +818,7 @@ namespace Ifpack2 { const MultiVectorLocalViewTypeX &x_, const MultiVectorLocalViewTypeX_Remote &x_remote_) { IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN; - IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDi::ComputeResidual::", execution_space); + IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDi::ComputeResidual::", ComputeResidual0, execution_space); b = b_; x = x_; x_remote = x_remote_; if constexpr (is_device::value) { @@ -892,7 +892,7 @@ namespace Ifpack2 { const MultiVectorLocalViewTypeX_Remote &x_remote_, const bool compute_owned) { IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN; - IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDi::ComputeResidual::", execution_space); + IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDi::ComputeResidual::", ComputeResidual0, execution_space); b = b_; x = x_; x_remote = x_remote_; if constexpr (is_device::value) { diff --git a/packages/ifpack2/src/Ifpack2_BlockHelper.hpp b/packages/ifpack2/src/Ifpack2_BlockHelper.hpp index 5681e7dee0ea..59046103e9d3 100644 --- a/packages/ifpack2/src/Ifpack2_BlockHelper.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockHelper.hpp @@ -396,7 +396,7 @@ namespace Ifpack2 { void ireduce(const int sweep, const bool force = false) { if ( ! force && sweep % sweep_step_) return; - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::Ireduce"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::Ireduce", Ireduce); work_[1] = work_[0]; #ifdef HAVE_IFPACK2_MPI @@ -425,7 +425,7 @@ namespace Ifpack2 { // early return if (sweep <= 0) return false; - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::CheckDone"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::CheckDone", CheckDone); TEUCHOS_ASSERT(sweep >= 1); if ( ! force && (sweep - 1) % sweep_step_) return false; @@ -473,7 +473,7 @@ namespace Ifpack2 { void reduceVector(const ConstUnmanaged::impl_scalar_type_1d_view> zz, /* */ typename BlockHelperDetails::ImplType::magnitude_type *vals) { IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN; - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ReduceVector"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ReduceVector", ReduceVector); using impl_type = BlockHelperDetails::ImplType; using local_ordinal_type = typename impl_type::local_ordinal_type; diff --git a/packages/ifpack2/src/Ifpack2_BlockHelper_Timers.hpp b/packages/ifpack2/src/Ifpack2_BlockHelper_Timers.hpp index a42039d921ce..0e269c340acd 100644 --- a/packages/ifpack2/src/Ifpack2_BlockHelper_Timers.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockHelper_Timers.hpp @@ -16,18 +16,18 @@ namespace Ifpack2 { namespace BlockHelperDetails { #if defined(HAVE_IFPACK2_BLOCKTRIDICONTAINER_TIMERS) -#define IFPACK2_BLOCKHELPER_TIMER(label) TEUCHOS_FUNC_TIME_MONITOR(label); +#define IFPACK2_BLOCKHELPER_TIMER(label, varname) TEUCHOS_FUNC_TIME_MONITOR_DIFF(label, varname); #define IFPACK2_BLOCKHELPER_TIMER_FENCE(execution_space) execution_space().fence(); #define IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE() Kokkos::DefaultExecutionSpace().fence(); #else -#define IFPACK2_BLOCKHELPER_TIMER(label) +#define IFPACK2_BLOCKHELPER_TIMER(label, varname) #define IFPACK2_BLOCKHELPER_TIMER_FENCE(execution_space) #define IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE() #endif -#define IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE(label, execution_space) \ +#define IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE(label, varname, execution_space) \ IFPACK2_BLOCKHELPER_TIMER_FENCE(execution_space) \ - IFPACK2_BLOCKHELPER_TIMER(label) + IFPACK2_BLOCKHELPER_TIMER(label, varname) } // namespace BlockHelperDetails diff --git a/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp b/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp index a21900bbf30f..a8a1ad69554b 100644 --- a/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockRelaxation_def.hpp @@ -608,7 +608,7 @@ initialize () Teuchos::RCP graph = A_->getGraph (); if(!hasBlockCrsMatrix_ && List_.isParameter("relaxation: container") && List_.get("relaxation: container") == "BlockTriDi" ) { - IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::convertToBlockCrsMatrix"); + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::convertToBlockCrsMatrix", convertToBlockCrsMatrix); int block_size = List_.get("partitioner: block size"); bool use_explicit_conversion = List_.isParameter("partitioner: explicit convert to BlockCrs") && List_.get("partitioner: explicit convert to BlockCrs"); TEUCHOS_TEST_FOR_EXCEPT_MSG @@ -645,22 +645,22 @@ initialize () Partitioner_ = Teuchos::null; if (PartitionerType_ == "linear") { - IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::linear"); + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::linear", linear); Partitioner_ = rcp (new Ifpack2::LinearPartitioner (graph)); IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE(); } else if (PartitionerType_ == "line") { - IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::line"); + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::line", line); Partitioner_ = rcp (new Ifpack2::LinePartitioner (graph)); IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE(); } else if (PartitionerType_ == "user") { - IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::user"); + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::user", user); Partitioner_ = rcp (new Ifpack2::Details::UserPartitioner (graph ) ); IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE(); } else if (PartitionerType_ == "zoltan2") { - IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::zoltan2"); + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::zoltan2", zoltan2); #if defined(HAVE_IFPACK2_ZOLTAN2) if (graph->getComm ()->getSize () == 1) { // Only one MPI, so call zoltan2 with global graph @@ -688,7 +688,7 @@ initialize () // need to partition the graph of A { - IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::Partitioner"); + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::Partitioner", Partitioner); Partitioner_->setParameters (List_); Partitioner_->compute (); IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE(); @@ -714,7 +714,7 @@ initialize () // Extract the submatrices { - IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::ExtractSubmatricesStructure"); + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::ExtractSubmatricesStructure", ExtractSubmatricesStructure); ExtractSubmatricesStructure (); IFPACK2_BLOCKHELPER_TIMER_DEFAULT_FENCE(); } @@ -746,7 +746,7 @@ initialize () // only needed when Schwarz combine mode is ADD as opposed to ZERO (which is RAS) if (schwarzCombineMode_ == "ADD") { - IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::ADD"); + IFPACK2_BLOCKHELPER_TIMER("Ifpack2::BlockRelaxation::initialize::ADD", ADD); typedef Tpetra::MultiVector< typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type,typename MatrixType::node_type> scMV; Teuchos::RCP theImport = A_->getGraph()->getImporter(); if (!theImport.is_null()) { diff --git a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp index 0b51690c08a1..0848805647b2 100644 --- a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_def.hpp @@ -53,11 +53,11 @@ namespace Ifpack2 { const int block_size, const bool explicitConversion) { - IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDiContainer::initInternal", typename BlockHelperDetails::ImplType::execution_space); + IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDiContainer::initInternal", initInternal, typename BlockHelperDetails::ImplType::execution_space); // create pointer of impl { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createImpl"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createImpl", createImpl); impl_ = Teuchos::rcp(new BlockTriDiContainerDetails::ImplObject()); IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType::execution_space) } @@ -66,14 +66,14 @@ namespace Ifpack2 { // using block_crs_matrix_type = typename impl_type::tpetra_block_crs_matrix_type; { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA", setA); if (explicitConversion) { impl_->A = Teuchos::rcp_dynamic_cast(matrix); if (impl_->A.is_null()) { TEUCHOS_TEST_FOR_EXCEPT_MSG (block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs."); { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA::convertToBlockCrsMatrix"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA::convertToBlockCrsMatrix", convertToBlockCrsMatrix); impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast(matrix), block_size, true); IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType::execution_space) } @@ -90,7 +90,7 @@ namespace Ifpack2 { if (useSeqMethod) { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createBlockCrsTpetraImporter useSeqMethod"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createBlockCrsTpetraImporter useSeqMethod", useSeqMethod); if (importer.is_null()) // there is no given importer, then create one impl_->tpetra_importer = BlockTriDiContainerDetails::createBlockCrsTpetraImporter(impl_->A); else @@ -99,7 +99,7 @@ namespace Ifpack2 { } else { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createBlockCrsTpetraImporter"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createBlockCrsTpetraImporter", createBlockCrsTpetraImporter); //Leave tpetra_importer null even if user provided an importer. //It is not used in the performant codepath (!useSeqMethod) impl_->async_importer = BlockTriDiContainerDetails::createBlockCrsAsyncImporter(impl_->A); @@ -115,12 +115,12 @@ namespace Ifpack2 { impl_->overlap_communication_and_computation = overlapCommAndComp; { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createZ"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createZ", createZ); impl_->Z = typename impl_type::tpetra_multivector_type(); IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType::execution_space) } { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createW"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createW", createW); impl_->W = typename impl_type::impl_scalar_type_1d_view(); IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType::execution_space) } @@ -133,7 +133,7 @@ namespace Ifpack2 { BlockTriDiContainer ::clearInternal () { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::clearInternal"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::clearInternal", clearInternal); using impl_type = BlockHelperDetails::ImplType; using part_interface_type = BlockHelperDetails::PartInterface; using block_tridiags_type = BlockTriDiContainerDetails::BlockTridiags; @@ -165,7 +165,7 @@ namespace Ifpack2 { bool pointIndexed) : Container(matrix, partitions, pointIndexed), partitions_(partitions) { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::BlockTriDiContainer"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::BlockTriDiContainer", BlockTriDiContainer); const bool useSeqMethod = false; const bool overlapCommAndComp = false; initInternal(matrix, importer, overlapCommAndComp, useSeqMethod); @@ -185,7 +185,7 @@ namespace Ifpack2 { const bool explicitConversion) : Container(matrix, partitions, false), partitions_(partitions) { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::BlockTriDiContainer"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::BlockTriDiContainer", BlockTriDiContainer); initInternal(matrix, Teuchos::null, overlapCommAndComp, useSeqMethod, block_size, explicitConversion); n_subparts_per_part_ = n_subparts_per_part; block_size_ = block_size; @@ -214,7 +214,7 @@ namespace Ifpack2 { BlockTriDiContainer ::initialize () { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initialize"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initialize", initialize); this->IsInitialized_ = true; { auto bA = Teuchos::rcp_dynamic_cast(impl_->A); @@ -222,7 +222,7 @@ namespace Ifpack2 { TEUCHOS_TEST_FOR_EXCEPT_MSG (block_size_ == -1, "A pointwise matrix and block_size = -1 were given as inputs."); { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initialize::getBlockCrsGraph"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initialize::getBlockCrsGraph", getBlockCrsGraph); auto A = Teuchos::rcp_dynamic_cast(impl_->A); impl_->blockGraph = Tpetra::getBlockCrsGraph(*A, block_size_, true); IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType::execution_space) @@ -234,7 +234,7 @@ namespace Ifpack2 { } { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createPartInterfaceBlockTridiagsNormManager"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createPartInterfaceBlockTridiagsNormManager", createPartInterfaceBlockTridiagsNormManager); impl_->part_interface = BlockTriDiContainerDetails::createPartInterface(impl_->A, impl_->blockGraph, partitions_, n_subparts_per_part_); impl_->block_tridiags = BlockTriDiContainerDetails::createBlockTridiags(impl_->part_interface); impl_->norm_manager = BlockHelperDetails::NormManager(impl_->A->getComm()); @@ -262,7 +262,7 @@ namespace Ifpack2 { BlockTriDiContainer ::compute () { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::compute"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::compute", compute); this->IsComputed_ = false; if (!this->isInitialized()) this->initialize(); @@ -282,7 +282,7 @@ namespace Ifpack2 { BlockTriDiContainer ::clearBlocks () { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::clearBlocks"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::clearBlocks", clearBlocks); clearInternal(); this->IsInitialized_ = false; this->IsComputed_ = false; @@ -296,7 +296,7 @@ namespace Ifpack2 { ::applyInverseJacobi (const mv_type& X, mv_type& Y, scalar_type dampingFactor, bool zeroStartingSolution, int numSweeps) const { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::applyInverseJacobi"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi); const magnitude_type tol = Kokkos::ArithTraits::zero(); const int check_tol_every = 1; @@ -331,7 +331,7 @@ namespace Ifpack2 { BlockTriDiContainer ::compute (const ComputeParameters& in) { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::compute"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::compute", compute); this->IsComputed_ = false; if (!this->isInitialized()) this->initialize(); @@ -362,7 +362,7 @@ namespace Ifpack2 { ::applyInverseJacobi (const mv_type& X, mv_type& Y, const ApplyParameters& in) const { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::applyInverseJacobi"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi); int r_val = 0; { r_val = BlockTriDiContainerDetails::applyInverseJacobi diff --git a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp index 1394549df0d7..256400e1470f 100644 --- a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp @@ -161,7 +161,7 @@ namespace Ifpack2 { template typename Teuchos::RCP::tpetra_import_type> createBlockCrsTpetraImporter(const Teuchos::RCP::tpetra_row_matrix_type> &A) { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::CreateBlockCrsTpetraImporter"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::CreateBlockCrsTpetraImporter", CreateBlockCrsTpetraImporter); using impl_type = BlockHelperDetails::ImplType; using tpetra_map_type = typename impl_type::tpetra_map_type; using tpetra_mv_type = typename impl_type::tpetra_block_multivector_type; @@ -523,7 +523,7 @@ namespace Ifpack2 { } void asyncSendRecvVar1(const impl_scalar_type_2d_view_tpetra &mv) { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::AsyncableImport::AsyncSendRecv"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::AsyncableImport::AsyncSendRecv", AsyncSendRecv); #ifdef HAVE_IFPACK2_MPI // constants and reallocate data buffers if necessary @@ -612,7 +612,7 @@ namespace Ifpack2 { } void syncRecvVar1() { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::AsyncableImport::SyncRecv"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::AsyncableImport::SyncRecv", SyncRecv); #ifdef HAVE_IFPACK2_MPI // 0. wait for receive async. for (local_ordinal_type i=0;i(pids.recv.extent(0));++i) { @@ -719,7 +719,7 @@ namespace Ifpack2 { /// standard comm /// void asyncSendRecvVar0(const impl_scalar_type_2d_view_tpetra &mv) { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::AsyncableImport::AsyncSendRecv"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::AsyncableImport::AsyncSendRecv", AsyncSendRecv); #ifdef HAVE_IFPACK2_MPI // constants and reallocate data buffers if necessary @@ -790,7 +790,7 @@ namespace Ifpack2 { } void syncRecvVar0() { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::AsyncableImport::SyncRecv"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::AsyncableImport::SyncRecv", SyncRecv); #ifdef HAVE_IFPACK2_MPI // receive async. for (local_ordinal_type i=0,iend=pids.recv.extent(0);i Teuchos::RCP > createBlockCrsAsyncImporter(const Teuchos::RCP::tpetra_row_matrix_type> &A) { - IFPACK2_BLOCKHELPER_TIMER("createBlockCrsAsyncImporter"); + IFPACK2_BLOCKHELPER_TIMER("createBlockCrsAsyncImporter", createBlockCrsAsyncImporter); using impl_type = BlockHelperDetails::ImplType; using tpetra_map_type = typename impl_type::tpetra_map_type; using local_ordinal_type = typename impl_type::local_ordinal_type; @@ -910,7 +910,7 @@ namespace Ifpack2 { bool separate_remotes = true, found_first = false, need_owned_permutation = false; { - IFPACK2_BLOCKHELPER_TIMER("createBlockCrsAsyncImporter::loop_over_local_elements"); + IFPACK2_BLOCKHELPER_TIMER("createBlockCrsAsyncImporter::loop_over_local_elements", loop_over_local_elements); global_indices_array_device_type column_map_global_iD = column_map->getMyGlobalIndicesDevice(); global_indices_array_device_type domain_map_global_iD = domain_map->getMyGlobalIndicesDevice(); @@ -952,7 +952,7 @@ namespace Ifpack2 { } if (separate_remotes) { - IFPACK2_BLOCKHELPER_TIMER("createBlockCrsAsyncImporter::separate_remotes"); + IFPACK2_BLOCKHELPER_TIMER("createBlockCrsAsyncImporter::separate_remotes", separate_remotes); const auto invalid = Teuchos::OrdinalTraits::invalid(); const auto parsimonious_col_map = need_owned_permutation ? @@ -1044,7 +1044,7 @@ namespace Ifpack2 { const Teuchos::RCP::tpetra_crs_graph_type> &G, const Teuchos::Array::local_ordinal_type> > &partitions, const typename BlockHelperDetails::ImplType::local_ordinal_type n_subparts_per_part_in) { - IFPACK2_BLOCKHELPER_TIMER("createPartInterface"); + IFPACK2_BLOCKHELPER_TIMER("createPartInterface", createPartInterface); using impl_type = BlockHelperDetails::ImplType; using local_ordinal_type = typename impl_type::local_ordinal_type; using local_ordinal_type_1d_view = typename impl_type::local_ordinal_type_1d_view; @@ -1179,7 +1179,7 @@ namespace Ifpack2 { local_ordinal_type pack_nrows = 0; local_ordinal_type pack_nrows_sub = 0; if (jacobi) { - IFPACK2_BLOCKHELPER_TIMER("compute part indices (Jacobi)"); + IFPACK2_BLOCKHELPER_TIMER("compute part indices (Jacobi)", Jacobi); for (local_ordinal_type ip=0;ip::execution_space) } else { - IFPACK2_BLOCKHELPER_TIMER("compute part indices"); + IFPACK2_BLOCKHELPER_TIMER("compute part indices", indices); for (local_ordinal_type ip=0;ipsize(); @@ -1484,7 +1484,7 @@ namespace Ifpack2 { Kokkos::deep_copy(interf.rowidx2part, rowidx2part); { // Fill packptr. - IFPACK2_BLOCKHELPER_TIMER("Fill packptr"); + IFPACK2_BLOCKHELPER_TIMER("Fill packptr", packptr0); local_ordinal_type npacks = ceil(float(nparts)/vector_length) * (part2packrowidx0_sub.extent(1)-1); npacks = 0; for (local_ordinal_type ip=1;ip<=nparts;++ip) //n_sub_parts_and_schur @@ -1618,7 +1618,7 @@ namespace Ifpack2 { template BlockTridiags createBlockTridiags(const BlockHelperDetails::PartInterface &interf) { - IFPACK2_BLOCKHELPER_TIMER("createBlockTridiags"); + IFPACK2_BLOCKHELPER_TIMER("createBlockTridiags", createBlockTridiags0); using impl_type = BlockHelperDetails::ImplType; using execution_space = typename impl_type::execution_space; using local_ordinal_type = typename impl_type::local_ordinal_type; @@ -1865,7 +1865,7 @@ namespace Ifpack2 { BlockTridiags &btdm, BlockHelperDetails::AmD &amd, const bool overlap_communication_and_computation) { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::SymbolicPhase"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::SymbolicPhase", SymbolicPhase); using impl_type = BlockHelperDetails::ImplType; @@ -3469,7 +3469,7 @@ namespace Ifpack2 { #ifdef IFPACK2_BLOCKTRIDICONTAINER_USE_PRINTF printf("Start ExtractAndFactorizeSubLineTag\n"); #endif - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NumericPhase::ExtractAndFactorizeSubLineTag"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NumericPhase::ExtractAndFactorizeSubLineTag", ExtractAndFactorizeSubLineTag0); Kokkos::TeamPolicy policy(packindices_sub.extent(0), team_size, vector_loop_size); @@ -3500,7 +3500,7 @@ namespace Ifpack2 { write5DMultiVectorValuesToFile(part2packrowidx0_sub.extent(0), e_scalar_values, "e_scalar_values_before_extract.mm"); { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NumericPhase::ExtractBCDTag"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NumericPhase::ExtractBCDTag", ExtractBCDTag0); Kokkos::TeamPolicy policy(packindices_schur.extent(0)*packindices_schur.extent(1), team_size, vector_loop_size); @@ -3519,7 +3519,7 @@ namespace Ifpack2 { #endif write5DMultiVectorValuesToFile(part2packrowidx0_sub.extent(0), e_scalar_values, "e_scalar_values_after_extract.mm"); { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NumericPhase::ComputeETag"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NumericPhase::ComputeETag", ComputeETag0); Kokkos::TeamPolicy policy(packindices_sub.extent(0), team_size, vector_loop_size); @@ -3539,7 +3539,7 @@ namespace Ifpack2 { #ifdef IFPACK2_BLOCKTRIDICONTAINER_USE_PRINTF printf("Start ComputeSchurTag\n"); #endif - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NumericPhase::ComputeSchurTag"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NumericPhase::ComputeSchurTag", ComputeSchurTag0); writeBTDValuesToFile(part2packrowidx0_sub.extent(0), scalar_values_schur, "before_schur.mm"); Kokkos::TeamPolicy policy(packindices_schur.extent(0)*packindices_schur.extent(1), team_size, vector_loop_size); @@ -3558,7 +3558,7 @@ namespace Ifpack2 { #ifdef IFPACK2_BLOCKTRIDICONTAINER_USE_PRINTF printf("Start FactorizeSchurTag\n"); #endif - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NumericPhase::FactorizeSchurTag"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NumericPhase::FactorizeSchurTag", FactorizeSchurTag0); Kokkos::TeamPolicy policy(packindices_schur.extent(0), team_size, vector_loop_size); policy.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch)); @@ -3587,7 +3587,7 @@ namespace Ifpack2 { const BlockHelperDetails::PartInterface &interf, BlockTridiags &btdm, const typename BlockHelperDetails::ImplType::magnitude_type tiny) { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NumericPhase"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NumericPhase", NumericPhase); ExtractAndFactorizeTridiags function(btdm, interf, A, G, tiny); function.run(); IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType::execution_space) @@ -3712,7 +3712,7 @@ namespace Ifpack2 { void run(const const_impl_scalar_type_2d_view_tpetra &scalar_multivector_) { IFPACK2_BLOCKTRIDICONTAINER_PROFILER_REGION_BEGIN; - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::MultiVectorConverter"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::MultiVectorConverter", MultiVectorConverter0); scalar_multivector = scalar_multivector_; if constexpr (BlockHelperDetails::is_device::value) { @@ -4696,7 +4696,7 @@ namespace Ifpack2 { void run(const impl_scalar_type_2d_view_tpetra &Y, const impl_scalar_type_1d_view &Z) { IFPACK2_BLOCKTRIDICONTAINER_PROFILER_REGION_BEGIN; - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::SolveTridiags"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::SolveTridiags", SolveTridiags); /// set vectors this->Y_scalar_multivector = Y; @@ -4747,7 +4747,7 @@ namespace Ifpack2 { policy, *this); \ } \ { \ - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ApplyInverseJacobi::SingleVectorSubLineTag"); \ + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ApplyInverseJacobi::SingleVectorSubLineTag", SingleVectorSubLineTag0); \ write4DMultiVectorValuesToFile(part2packrowidx0_sub.extent(0), X_internal_scalar_values, "x_scalar_values_before_SingleVectorSubLineTag.mm"); \ Kokkos::TeamPolicy > \ policy(packindices_sub.extent(0), team_size, vector_loop_size); \ @@ -4759,7 +4759,7 @@ namespace Ifpack2 { IFPACK2_BLOCKHELPER_TIMER_FENCE(execution_space) \ } \ { \ - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ApplyInverseJacobi::SingleVectorApplyCTag"); \ + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ApplyInverseJacobi::SingleVectorApplyCTag", SingleVectorApplyCTag0); \ write4DMultiVectorValuesToFile(part2packrowidx0_sub.extent(0), X_internal_scalar_values, "x_scalar_values_before_SingleVectorApplyCTag.mm"); \ Kokkos::TeamPolicy > \ policy(packindices_sub.extent(0), team_size, vector_loop_size); \ @@ -4771,7 +4771,7 @@ namespace Ifpack2 { IFPACK2_BLOCKHELPER_TIMER_FENCE(execution_space) \ } \ { \ - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ApplyInverseJacobi::SingleVectorSchurTag"); \ + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ApplyInverseJacobi::SingleVectorSchurTag", SingleVectorSchurTag0); \ write4DMultiVectorValuesToFile(part2packrowidx0_sub.extent(0), X_internal_scalar_values, "x_scalar_values_before_SingleVectorSchurTag.mm"); \ Kokkos::TeamPolicy > \ policy(packindices_schur.extent(0), team_size, vector_loop_size); \ @@ -4783,7 +4783,7 @@ namespace Ifpack2 { IFPACK2_BLOCKHELPER_TIMER_FENCE(execution_space) \ } \ { \ - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ApplyInverseJacobi::SingleVectorApplyETag"); \ + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ApplyInverseJacobi::SingleVectorApplyETag", SingleVectorApplyETag0); \ write4DMultiVectorValuesToFile(part2packrowidx0_sub.extent(0), X_internal_scalar_values, "x_scalar_values_before_SingleVectorApplyETag.mm"); \ Kokkos::TeamPolicy > \ policy(packindices_sub.extent(0), team_size, vector_loop_size); \ @@ -4862,7 +4862,7 @@ namespace Ifpack2 { const int max_num_sweeps, const typename BlockHelperDetails::ImplType::magnitude_type tol, const int check_tol_every) { - IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ApplyInverseJacobi"); + IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ApplyInverseJacobi", ApplyInverseJacobi); using impl_type = BlockHelperDetails::ImplType; using node_memory_space = typename impl_type::node_memory_space; diff --git a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp index 807707a3ea03..3b25ec72b89a 100644 --- a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp +++ b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp @@ -265,7 +265,7 @@ namespace Tpetra { Teuchos::RCP > getBlockCrsGraph(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize, bool use_LID) { - TEUCHOS_FUNC_TIME_MONITOR("Tpetra::getBlockCrsGraph"); + TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::getBlockCrsGraph", getBlockCrsGraph0); /* ASSUMPTIONS: @@ -299,7 +299,7 @@ namespace Tpetra { const map_type &pointRangeMap = *(pointMatrix.getRangeMap()); { - TEUCHOS_FUNC_TIME_MONITOR("Tpetra::getBlockCrsGraph::createMeshMaps"); + TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::getBlockCrsGraph::createMeshMaps", getBlockCrsGraph1); meshRowMap = createMeshMap(blockSize, pointRowMap, use_LID); meshColMap = createMeshMap(blockSize, pointColMap, use_LID); meshDomainMap = createMeshMap(blockSize, pointDomainMap, use_LID); @@ -318,7 +318,7 @@ namespace Tpetra { const offset_type bs2 = blockSize * blockSize; if (use_LID) { - TEUCHOS_FUNC_TIME_MONITOR("Tpetra::getBlockCrsGraph::LID"); + TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::getBlockCrsGraph::LID", getBlockCrsGraph2); auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); auto pointRowptr = pointLocalGraph.row_map; auto pointColind = pointLocalGraph.entries; @@ -352,7 +352,7 @@ namespace Tpetra { Kokkos::DefaultExecutionSpace().fence(); } else { - TEUCHOS_FUNC_TIME_MONITOR("Tpetra::getBlockCrsGraph::GID"); + TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::getBlockCrsGraph::GID", getBlockCrsGraph3); auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); auto pointRowptr = pointLocalGraph.row_map; auto pointColind = pointLocalGraph.entries; @@ -406,7 +406,7 @@ namespace Tpetra { Teuchos::RCP > convertToBlockCrsMatrix(const Tpetra::CrsMatrix& pointMatrix, const LO &blockSize, bool use_LID) { - TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix"); + TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::convertToBlockCrsMatrix", convertToBlockCrsMatrix0); /* ASSUMPTIONS: @@ -439,7 +439,7 @@ namespace Tpetra { auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize, use_LID); if (use_LID) { - TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::LID"); + TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::convertToBlockCrsMatrix::LID", convertToBlockCrsMatrix1); auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice(); auto pointRowptr = pointLocalGraph.row_map; auto pointColind = pointLocalGraph.entries; @@ -471,7 +471,7 @@ namespace Tpetra { Kokkos::DefaultExecutionSpace().fence(); } else { - TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::GID"); + TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::convertToBlockCrsMatrix::GID", convertToBlockCrsMatrix2); auto localMeshColMap = meshCrsGraph->getColMap()->getLocalMap(); auto localPointColMap = pointMatrix.getColMap()->getLocalMap();